我正在将一个df合并到另一个df中。这通常适合我使用:
df<-merge(x,y, by=c(variable1, variable2))
使用的实际代码
merge(mymaindf, mergingdf, by=c('hai_dispense_number','ID'))
我的问题是,对于我现在正在使用的数据,x中的一个变量不会保留它的值。它是一个二进制变量,大约有一半,一半是0和1.当我合并x和y时,这个变量中的值都变为1.我尝试了各种各样的事情,比如将变量从数字,字符变为因子变量 - 但没有运气。我也尝试在y中创建变量,但它在合并时也会发生变化。有谁知道会发生什么?我在下面列出了dfs(x = mymaindf
和y = mergingdf
)的输入。
更改的变量称为scheme。
mymaindf:
structure(list(hai_dispense_number = c("Patient HAI0000059",
"Patient HAI0000059", "Patient HAI0000059", "Patient HAI0000059",
"Patient HAI0000059", "Patient HAI0000059", "Patient HAI0000059",
"Patient HAI0000059", "Patient HAI0000059", "Patient HAI0000059"
), ID = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"),
variable.x = structure(1:10, .Label = c("month1", "month2",
"month3", "month4", "month5", "month6", "month7", "month8",
"month9", "month10", "month11", "month12", "month13", "month14",
"month15", "month16", "month17", "month18"), class = "factor"),
adherence = c(1, 1, 0.933333333333333, 0.966666666666667,
0.966666666666667, 0.966666666666667, 0.9, 0.966666666666667,
0.633333333333333, 0.866666666666667), time1 = c(-1, -2,
-3, -4, 1, 2, 3, 4, 5, 6), new_numbers = c(-4L, -3L, -2L,
-1L, 1L, 2L, 3L, 4L, 5L, 6L), variable.y = structure(1:10, .Label = c("t1",
"t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "t10", "t11",
"t12", "t13", "t14", "t15", "t16", "t17", "t18"), class = "factor"),
age = c(72, 72.0833333333333, 72.1666666666667, 72.25, 72.3333333333333,
72.4166666666667, 72.5, 72.5833333333333, 72.6666666666667,
72.75), sex = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), post = c(0,
0, 0, 0, 1, 1, 1, 1, 1, 1), time_post = c(0, 0, 0, 0, 0,
1, 2, 3, 4, 5), base = c(3, 4, 5, 6, 7, 8, 9, 10, 11, 12),
scheme = c("1", "1", "1", "1", "1", "1", "1", "1", "1", "1"
)), .Names = c("hai_dispense_number", "ID", "variable.x",
"adherence", "time1", "new_numbers", "variable.y", "age", "sex",
"post", "time_post", "base", "scheme"), row.names = c("1", "9",
"10", "11", "12", "13", "14", "15", "16", "2"), class = "data.frame")
mergingdf:
structure(list(hai_dispense_number = c("Patient HAI0000059",
"Patient HAI0000059", "Patient HAI0000059", "Patient HAI0000059",
"Patient HAI0000059", "Patient HAI0000059", "Patient HAI0000059",
"Patient HAI0000059", "Patient HAI0000059", "Patient HAI0000059"
), aspT1person = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), aspT2person = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1), aspT3person = c(1, 1, 1, 1, 1, 1,
1, 1, 1, 1), aspbaseperson = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
lipidT1person = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), lipidT2person = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1), lipidT3person = c(1, 1, 1, 1,
1, 1, 1, 1, 1, 1), lipidbaseperson = c(1, 1, 1, 1, 1, 1,
1, 1, 1, 1), hyptenT1person = c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1), hyptenT2person = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), hyptenT3person = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1), hyptenbaseperson = c(1, 1, 1,
1, 1, 1, 1, 1, 1, 1), insulinT1person = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0), insulinT2person = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), insulinT3person = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0), insulinbaseperson = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
aspirin = c(7, 7, 7, 7, 7, 7, 7, 7, 7, 7), aspirinbin = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1), hypertension = c(7, 7, 7, 7,
7, 7, 7, 7, 7, 7), hypertensionbin = c(1, 1, 1, 1, 1, 1,
1, 1, 1, 1), lipids = c(7, 7, 7, 7, 7, 7, 7, 7, 7, 7), lipidsbin = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1), insulin = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0), insulinbin = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
scheme = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("0", "1"), class = "factor"), ID = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10")), .Names = c("hai_dispense_number",
"aspT1person", "aspT2person", "aspT3person", "aspbaseperson",
"lipidT1person", "lipidT2person", "lipidT3person", "lipidbaseperson",
"hyptenT1person", "hyptenT2person", "hyptenT3person", "hyptenbaseperson",
"insulinT1person", "insulinT2person", "insulinT3person", "insulinbaseperson",
"aspirin", "aspirinbin", "hypertension", "hypertensionbin", "lipids",
"lipidsbin", "insulin", "insulinbin", "scheme", "ID"), row.names = c(NA,
10L), class = "data.frame")
合并df的输入
structure(list(hai_dispense_number = c("Patient HAI0000059",
"Patient HAI0000059", "Patient HAI0000059", "Patient HAI0000059",
"Patient HAI0000059", "Patient HAI0000059", "Patient HAI0000059",
"Patient HAI0000059", "Patient HAI0000059", "Patient HAI0000059"
), ID = c("1", "10", "11", "12", "13", "14", "15", "16", "2",
"3"), variable.x = structure(c(1L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 2L, 3L), .Label = c("month1", "month2", "month3", "month4",
"month5", "month6", "month7", "month8", "month9", "month10",
"month11", "month12", "month13", "month14", "month15", "month16",
"month17", "month18"), class = "factor"), adherence = c(1, 0.866666666666667,
0.833333333333333, 0.833333333333333, 0.966666666666667, 0.6,
0.833333333333333, 0.966666666666667, 1, 0.933333333333333),
time1 = c(-1, 6, 7, 8, 9, 10, 11, 12, -2, -3), new_numbers = c(-4L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, -3L, -2L), variable.y = structure(c(1L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 2L, 3L), .Label = c("t1",
"t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "t10", "t11",
"t12", "t13", "t14", "t15", "t16", "t17", "t18"), class = "factor"),
age = c(72, 72.75, 72.8333333333333, 72.9166666666667, 73,
73.0833333333333, 73.1666666666667, 73.25, 72.0833333333333,
72.1666666666667), sex = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
post = c(0, 1, 1, 1, 1, 1, 1, 1, 0, 0), time_post = c(0,
5, 6, 7, 8, 9, 10, 11, 0, 0), base = c(3, 12, 13, 14, 15,
16, 17, 18, 4, 5), scheme = c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1), aspT1person = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), aspT2person = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1), aspT3person = c(1, 1, 1, 1, 1,
1, 1, 1, 1, 1), aspbaseperson = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1), lipidT1person = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), lipidT2person = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1), lipidT3person = c(1, 1, 1, 1,
1, 1, 1, 1, 1, 1), lipidbaseperson = c(1, 1, 1, 1, 1, 1,
1, 1, 1, 1), hyptenT1person = c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1), hyptenT2person = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), hyptenT3person = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1), hyptenbaseperson = c(1, 1, 1,
1, 1, 1, 1, 1, 1, 1), insulinT1person = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0), insulinT2person = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), insulinT3person = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0), insulinbaseperson = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
aspirin = c(7, 7, 7, 7, 7, 7, 7, 7, 7, 7), aspirinbin = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1), hypertension = c(7, 7, 7, 7,
7, 7, 7, 7, 7, 7), hypertensionbin = c(1, 1, 1, 1, 1, 1,
1, 1, 1, 1), lipids = c(7, 7, 7, 7, 7, 7, 7, 7, 7, 7), lipidsbin = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1), insulin = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0), insulinbin = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("hai_dispense_number",
"ID", "variable.x", "adherence", "time1", "new_numbers", "variable.y",
"age", "sex", "post", "time_post", "base", "scheme", "aspT1person",
"aspT2person", "aspT3person", "aspbaseperson", "lipidT1person",
"lipidT2person", "lipidT3person", "lipidbaseperson", "hyptenT1person",
"hyptenT2person", "hyptenT3person", "hyptenbaseperson", "insulinT1person",
"insulinT2person", "insulinT3person", "insulinbaseperson", "aspirin",
"aspirinbin", "hypertension", "hypertensionbin", "lipids", "lipidsbin",
"insulin", "insulinbin"), row.names = c(NA, 10L), class = "data.frame")
答案 0 :(得分:0)
您的mergingdf$scheme
是factor
,因此其值不是您认为的值。
这很容易通过简单地输入来确定,
maindf$scheme
和mergingdf$scheme
。