我有一个病例表和一个对照表。我想使用年龄和性别的完全匹配来创建一组匹配的控件。我还想指定控件在案件死亡日期(dod
之前至少有一年的数据。
数据如下:
nControls <- 10e4
nCases <- 10e2
start_date <- as.Date('2011-04-01')
end_date <- as.Date('2016-04-01')
ages <- paste0(seq(0, 75, 5), '-', seq(4, 79, 5))
nAges <- length(ages)
controls <- data.frame(
id = seq_len(nControls),
start = sample(seq(start_date, end_date, by = 'year'), size = nControls, replace = T),
dur = sample(1:5, nControls, replace = T) * 365.25,
age = sample(ages, nControls, replace = T, prob = 1:nAges / sum(1:nAges)),
sex = sample(c('m', 'f'), nControls, replace = T, prob = c(0.7, 0.3)))
controls$end <- controls$start + controls$dur
cases <- data.frame(
id = seq_len(nCases),
dod = sample(seq(as.Date('2011/04/01'), as.Date('2016/04/01'), by = 'day'), size = nCases, replace = T),
age = sample(ages, nCases, replace = T),
sex = sample(c('m', 'f'), nCases, replace = T))
只需手动或使用MatchIt
软件包即可轻松完成年龄和性别的匹配:
controls$treat <- 0
cases$treat <- 1
mt <- rbind(controls[,c('treat', 'age', 'sex')], cases[,c('treat', 'age', 'sex')])
m.out <- matchit(treat ~ age + sex, data = mt, exact = c('age', 'sex'), method = 'nearest', ratio = 2)
但是我不知道如何包含cases$dod
应该在controls$end
之前以及controls$start
之后至少1年的标准。
答案 0 :(得分:0)
不确定这是您要执行的操作(缺少期望的输出)..但这是第一步:
下面的代码在案例和控件中对data.table
和sex
进行age
和start.control < dod - 1 year
的{{1}}左非等值联接:>
代码
end.control > dod
输出
library( data.table )
library( lubridate )
#create data.tables
dt.controls <- as.data.table( controls ) #or use setDT()
dt.cases <- as.data.table( cases ) #or use setDT()
#add suffixes, to identify the columns after join
names( dt.controls ) <- paste0( names( dt.controls ), ".control" )
names( dt.cases ) <- paste0( names( dt.cases ), ".case" )
#save colum order for later use
colorder <- c( names( dt.cases), names( dt.controls ) )
#set join columns
dt.controls[, `:=`( age.join = age.control,
sex.join = sex.control,
start.join = start.control,
end.join = end.control)]
dt.cases[, `:=`( age.join = age.case,
sex.join = sex.case,
dod.join.start = dod.case %m-% lubridate::years( 1 ), #!! %m-%
dod.join.end = dod.case )]
#perform non-equi join
result <- dt.controls[ dt.cases, on = .( age.join ,
sex.join,
start.join < dod.join.start,
end.join > dod.join.end ),
mult = "first",
nomatch = NA ]
#drop the *.join columns
result[, grep( ".join$", names( result ) ) := NULL]
#set column order so cases come first, controls after
setcolorder( result, colorder )
您可以填充联接的head(result)
# id.case dod.case age.case sex.case id.control start.control dur.control age.control sex.control end.control
# 1: 1 2012-12-26 5-9 m 318 2011-04-01 1826.25 5-9 m 2016-03-31
# 2: 2 2015-09-19 75-79 f 26 2012-04-01 1461.00 75-79 f 2016-04-01
# 3: 3 2011-08-17 25-29 m NA <NA> NA <NA> <NA> <NA>
# 4: 4 2011-07-23 35-39 f NA <NA> NA <NA> <NA> <NA>
# 5: 5 2013-11-16 30-34 f 112 2012-04-01 730.50 30-34 f 2014-04-01
# 6: 6 2014-09-17 5-9 f 784 2013-04-01 1826.25 5-9 f 2018-04-01
和nomatch
参数以“调整”输出...