#####################
##permutation tests##
#####################
set.seed(891) #setting the seed makes the psuedorandomizer replicable
##############
##simulation## first simulation: tau is zero
##############
#create the potential outcomes.
#when we create y_c (vector of outcomes for control obs) and y_t
#if we do it with non-symmetric differences then we are saying
#the matching process is biased.
n_pairs <- 100 #number of pairs
tau <- 0 #set to 0 if there is no effect
y_c <- rnorm(n_pairs) #create outcomes for obs put through control level
y_t <- rnorm(n_pairs) + tau #if tau not equal to 0 then there is an additive treatment effect
n_sims <- 10000
stor_sim <- matrix(NA,nrow = n_sims,ncol = 2) #we'll run a bunch of simulations and store the test stats in this matrix.
for(i in 1:n_sims){
z_temp <- sample(c(-1,1),size = n_pairs,replace = TRUE) #random permutation of treatment label. if 1 then no change within pair. if -1 then swap y_c and y_t.
stor_sim[i,1] <- mean(z_temp*(y_t - y_c)) #get the difference within pair and then possibly swap the direction based on the random permutation. this is our first test statistic.
#below, create temporary vectors of permuted obs.
#then use wilcox.test() to calculate our second test stat.
y_c_temp <- ifelse(z_temp==1,y_c,y_t)
y_t_temp <- ifelse(z_temp==1,y_t,y_c)
stat_temp <- wilcox.test(y_t_temp,y_c_temp,paired=TRUE)[1] #the "[1]" gets you the test statistic. "[3]" would get you the p-value.
stor_sim[i,2] <- as.numeric(stat_temp)
}
hist(stor_sim[,1],
breaks=100,
main="Histogram of mean within pairs difference",
xlab = "mean(y_t - y_c |within pair)")
abline(v=mean(y_t-y_c),col="red") #shows where the actual test stat is relative to the null distribution
hist(stor_sim[,2],
breaks=100,
main="Histogram of Wilcoxon signed rank statistic",
xlab = "Wilcoxon signed rank statistic")
abline(v=as.numeric(wilcox.test(y_t,y_c,paired=TRUE)[1]),col="red") #shows where the actual test stat is relative to the null distribution
##############
##simulation## second simulation: tau is 0.1
##############
n_pairs <- 100 #number of pairs
tau <- 0.1 #set to 0 if there is no effect
y_c <- rnorm(n_pairs) #create outcomes for obs put through control level
y_t <- rnorm(n_pairs) + tau #if tau not equal to 0 then there is an additive treatment effect
n_sims <- 10000
stor_sim <- matrix(NA,nrow = n_sims,ncol = 2)
for(i in 1:n_sims){
z_temp <- sample(c(-1,1),size = n_pairs,replace = TRUE) #random permutation of treatment label. if 1 then no change within pair. if -1 then swap y_c and y_t.
stor_sim[i,1] <- mean(z_temp*(y_t - y_c)) #get the difference within pair and then possibly swap the direction based on the random permutation. this is our first test statistic.
#below, create temporary vectors of permuted obs.
#then use wilcox.test() to calculate our second test stat.
y_c_temp <- ifelse(z_temp==1,y_c,y_t)
y_t_temp <- ifelse(z_temp==1,y_t,y_c)
stat_temp <- wilcox.test(y_t_temp,y_c_temp,paired=TRUE)[1] #the "[1]" gets you the test statistic. "[3]" would get you the p-value.
stor_sim[i,2] <- as.numeric(stat_temp)
}
hist(c(stor_sim[,1],mean(y_t-y_c)), #note you'll want to include the actual test stat in your null distribution. i didn't do this in simulation 1.
breaks=100,
main="Histogram of mean within pairs difference",
xlab = "mean(y_t - y_c |within pair)")
abline(v=mean(y_t-y_c),col="red")
hist(c(stor_sim[,2],as.numeric(wilcox.test(y_t,y_c,paired=TRUE)[1])),
breaks=100,
main="Histogram of Wilcoxon signed rank statistic",
xlab = "Wilcoxon signed rank statistic")
abline(v=as.numeric(wilcox.test(y_t,y_c,paired=TRUE)[1]),col="red")
##############
##simulation## third simulation: tau is 0, but one "weird" data point
##############
n_pairs <- 100 #number of pairs
tau <- 0 #set to 0 if there is no effect
y_c <- rnorm(n_pairs) #create outcomes for obs put through control level
y_t <- rnorm(n_pairs) + tau #if tau not equal to 0 then there is an additive treatment effect
y_t[1] <- 100*max(y_t) #this data point seems..... suspicious.
n_sims <- 10000
stor_sim <- matrix(NA,nrow = n_sims,ncol = 2)
for(i in 1:n_sims){
z_temp <- sample(c(-1,1),size = n_pairs,replace = TRUE) #random permutation of treatment label. if 1 then no change within pair. if -1 then swap y_c and y_t.
stor_sim[i,1] <- mean(z_temp*(y_t - y_c)) #get the difference within pair and then possibly swap the direction based on the random permutation. this is our first test statistic.
#below, create temporary vectors of permuted obs.
#then use wilcox.test() to calculate our second test stat.
y_c_temp <- ifelse(z_temp==1,y_c,y_t)
y_t_temp <- ifelse(z_temp==1,y_t,y_c)
stat_temp <- wilcox.test(y_t_temp,y_c_temp,paired=TRUE)[1] #the "[1]" gets you the test statistic. "[3]" would get you the p-value.
stor_sim[i,2] <- as.numeric(stat_temp)
}
hist(c(stor_sim[,1],mean(y_t-y_c)),
breaks=100,
main="Histogram of mean within pairs difference",
xlab = "mean(y_t - y_c |within pair)")
abline(v=mean(y_t-y_c),col="red")
hist(c(stor_sim[,2],as.numeric(wilcox.test(y_t,y_c,paired=TRUE)[1])),
breaks=100,
main="Histogram of Wilcoxon signed rank statistic",
xlab = "Wilcoxon signed rank statistic")
abline(v=as.numeric(wilcox.test(y_t,y_c,paired=TRUE)[1]),col="red")
#Moral of the story: the magnitude of the differences within matched pairs
#is lost when we look at the Wilcoxon signed rank. It only uses
#the sign (i.e., is treated or control bigger within matched pair?)
#and the rank (i.e., how does this particular within pair difference compare to other within pair differences?).
#This makes the Wilcoxon less sensitive to particular measurements, but still seems to capture much of the
#information needed to create a null distribution. You can see this claim about "enough info" by looking at the
#the histograms produced in simulations 1 and 2; they look quite similar between the two stats.