所以使用Matching
包裹(Link to package here)
我们可以通过修改后的GenMatch
示例。
library(Matching)
data(lalonde)
#introduce an id vaiable
lalonde$ID <- 1:length(lalonde$age)
X = cbind(lalonde$age, lalonde$educ, lalonde$black, lalonde$hisp,
lalonde$married, lalonde$nodegr, lalonde$u74, lalonde$u75,
lalonde$re75, lalonde$re74)
BalanceMat <- cbind(lalonde$age, lalonde$educ, lalonde$black,
lalonde$hisp, lalonde$married, lalonde$nodegr,
lalonde$u74, lalonde$u75, lalonde$re75, lalonde$re74,
I(lalonde$re74*lalonde$re75))
genout <- GenMatch(Tr=lalonde$treat, X=X, BalanceMatrix=BalanceMat, estimand="ATE",
pop.size=16, max.generations=10, wait.generations=1)
mout <- Match(Y=NULL, Tr=lalonde$treat, X=X,
Weight.matrix=genout,
replace=TRUE, ties=FALSE)
# here we set ties FALSE so we only have 1-1 Matching
summary(mout)
#now lets create our "Matched dataset"
treated <- lalonde[mout$index.treated,]
# and introduce an indetity variable for each pair
treated$Pair_ID <- treated$ID
non.treated <- lalonde[mout$index.control,]
non.treated$Pair_ID <- treated$ID
matched.data <- rbind(treated, non.treated)
matched.data <- matched.data[order(matched.data$Pair_ID),]
#this outputs which of the non-treated ID was paired with the first person
matched.data$ID[matched.data$Pair_ID==1 & matched.data$treat==0]
我们看到,对于数据, ID = 1与ID = 193匹配
现在让我们将一些随机化引入数据的顺序,看看我们是否得到相同的对
n <- 500
P1 <- rep(NA, n)
P2 <- rep(NA, n)
P3 <- rep(NA, n)
P4 <- rep(NA, n)
P5 <- rep(NA, n)
P6 <- rep(NA, n)
P7 <- rep(NA, n)
for (i in 1:n) {
lalonde <- lalonde[sample(1:nrow(lalonde)), ] # randomise order
genout <- GenMatch(Tr=lalonde$treat, X=X, BalanceMatrix=BalanceMat, estimand="ATE",
pop.size=16, max.generations=10, wait.generations=1)
mout <- Match(Y=NULL, Tr=lalonde$treat, X=X,
Weight.matrix=genout,
replace=TRUE, ties=FALSE)
summary(mout)
treated <- lalonde[mout$index.treated,]
treated$Pair_ID <- treated$ID
non.treated <- lalonde[mout$index.control,]
non.treated$Pair_ID <- treated$ID
matched.data <- rbind(treated, non.treated)
matched.data <- matched.data[order(matched.data$Pair_ID),]
P1[i] <- matched.data$ID[matched.data$Pair_ID==1 & matched.data$treat==0]
P2[i] <- matched.data$ID[matched.data$Pair_ID==2 & matched.data$treat==0]
P3[i] <- matched.data$ID[matched.data$Pair_ID==3 & matched.data$treat==0]
P4[i] <- matched.data$ID[matched.data$Pair_ID==4 & matched.data$treat==0]
P5[i] <- matched.data$ID[matched.data$Pair_ID==5 & matched.data$treat==0]
P6[i] <- matched.data$ID[matched.data$Pair_ID==6 & matched.data$treat==0]
P7[i] <- matched.data$ID[matched.data$Pair_ID==7 & matched.data$treat==0]
}
因此loop
将匹配对500次,P1
每次都会保存treat==0
个案例。
然后我们通过以下方式查看哪个P1
最多出现:
plot(1:n, P1, main="P1")
OR
summary(as.factor(P1))
我们发现没有一个treat==0
案例通常是配对的。
我希望有一个案例(可能= 193 ??)通常配对,不依赖于数据的顺序。因此我认为我的循环是错误的。任何人都可以指出在哪里?或者当他们运行循环时,他们发现,与数据的顺序无关,类似的情况是配对的吗?
答案 0 :(得分:1)
问题是您将lalonde
的顺序随机化了,但您对GenMatch
和Match
的输入是X
和BalanceMat
仍然有原始版本订购。当您最后构建matched.data
时,您将使用不再与lalonde
绑定的索引进行子集化。请重试,但在循环中包含X
和BalanceMat
的分配。
即
X = cbind(lalonde$age, lalonde$educ, lalonde$black, lalonde$hisp,
lalonde$married, lalonde$nodegr, lalonde$u74, lalonde$u75,
lalonde$re75, lalonde$re74)
BalanceMat <- cbind(lalonde$age, lalonde$educ, lalonde$black,
lalonde$hisp, lalonde$married, lalonde$nodegr,
lalonde$u74, lalonde$u75, lalonde$re75, lalonde$re74,
I(lalonde$re74*lalonde$re75))