我希望能够使用纵向试验数据将长格式的数据框重塑为宽格式的数据框,这些数据具有针对每个研究干预部门的基线和后续措施。我的最终目标是对数据进行元分析,但是要做到这一点,我需要在同一行中包含基线和后续数据。下面是我正在使用的示例:
structure(list(study_cat = c("PSY", "PSY", "PSY", "PSY", "PSY",
"PSY"), study_id = c(1821L, 1821L, 1821L, 1821L, 2017L, 2017L
), study = c("Study1", "Study1", "Study1", "Study1", "Study2",
"Study2"), instr_anxiety = c("HADS-A", "HADS-A", "HADS-A", "HADS-A",
"STAI-Trait", "STAI-Trait"), alloc = c("IG1", "IG1", "IG2", "IG2",
"IG1", "IG1"), time_months = c(0L, 6L, 0L, 6L, 0L, 18L), ig_n = c(172L,
167L, 168L, 155L, 58L, 53L), ig_measure = c("mean", "mean", "mean",
"mean", "mean", NA), ig_value = c(8.1, 5.6, 8.2, 4.3, 41.6, NA
), ig_disp_type = c("sd", "sd", "sd", "sd", "sd", NA), ig_disp_value =
c(4.4, 3.2, 3.8, 1.2, 10.3, NA), cg_n = c(87L, 65L, 87L, 65L, 55L, 50L
), cg_measure = c("mean", "mean", "mean", "mean", "mean", NA),
cg_value = c(9.3, 5.7, 9.3, 5.7, 38.4, NA), cg_disp_type = c("sd",
"sd", "sd", "sd", "sd", NA), cg_disp_value = c(4.3, 4.3,
4.3, 4.3, 11.1, NA), ig_change_measure = c(NA, "meanchg",
NA, "meanchg", NA, "meanchg"), ig_change_value = c(NA, -2.5,
NA, -3.9, NA, -5.2), ig_chg_measure_disp_type = c(NA, "sd",
NA, "sd", NA, "sd"), ig_chg_disp = c(NA, 0.3, NA, 2.1, NA,
4.2), cg_change_measure = c(NA, "meanchg", NA, "meanchg",
NA, "meanchg"), cg_change_value = c(NA, -3.6, NA, -3.6, NA,
-1.3), cg_chg_measure_disp_type = c(NA, "sd", NA, "sd", NA,
"sd"), cg_chg_disp = c(NA, 0.5, NA, 0.5, NA, 2.5), bgdiff_measure = c(NA,
"MDC", NA, "MDC", NA, "MD"), bgdiff = c(NA, 1.1, NA, -0.3,
NA, -3.9), bgdiff_disp_type = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), bgdiff_disp = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), bgdiff_lci = c(NA, -0.2, NA, -0.6, NA,
NA), bgdiff_uci = c(NA, 3.2, NA, 0.1, NA, NA), bgdiff_pval = c(NA,
0.12, NA, 0.08, NA, 0.56)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
我希望能够获得以下数据框:
structure(list(study_cat = c("PSY", "PSY", "PSY", "SS"), study_id = c(1821L,
1821L, 2017L, 2047L), study = c("Study1", "Study1", "Study2",
"Study3"), instr_anxiety = c("HADS-A", "HADS-A", "STAI-Trait",
"HADS-A"), alloc = c("IG1", "IG2", "IG1", "IG1"), bl_time_months = c(0L,
0L, 0L, 0L), bl_ig_n = c(172L, 168L, 58L, 116L), bl_ig_measure = c("mean",
"mean", "mean", "LSmean"), bl_ig_value = c(8.1, 8.2, 41.6, 7.55
), bl_ig_disp_type = c("sd", "sd", "sd", "se"), bl_ig_disp_value = c(4.4,
3.8, 10.3, 2.5), bl_cg_n = c(87L, 87L, 55L, 120L), bl_cg_measure = c("mean",
"mean", "mean", "LSmean"), bl_cg_value = c(9.3, 9.3, 38.4, 7.97
), bl_cg_disp_type = c("sd", "sd", "sd", "se"), bl_cg_disp_value = c(4.3,
4.3, 11.1, 2.3), fu_time_months = c(6L, 6L, 18L, 8L), fu_ig_n = c(167L,
155L, 53L, 93L), fu_ig_measure = c("mean", "mean", NA, "LSmean"
), fu_ig_value = c(5.6, 4.3, NA, 5.6), fu_ig_disp_type = c("sd",
"sd", NA, "se"), fu_ig_disp_value = c(3.2, 1.2, NA, 3.7), fu_cg_n = c(65L,
65L, 50L, 100L), fu_cg_measure = c("mean", "mean", NA, "LSmean"
), fu_cg_value = c(5.7, 5.7, NA, 5.5), fu_cg_disp_type = c("sd",
"sd", NA, "se"), fu_cg_disp_value = c(4.3, 4.3, NA, 3.5), ig_change_measure
= c("meanchg", "meanchg", "meanchg", "LSmeanchg"), ig_change_value = c(-2.5,
-3.9, -5.2, -2.3), ig_chg_measure_disp_type = c("sd", "sd", "sd",
"se"), ig_chg_disp = c(0.3, 2.1, 4.2, 0.68), cg_change_measure =
c("meanchg", "meanchg", "meanchg", "LSmeanchg"), cg_change_value = c(-3.6,
-3.6, -1.3, -3.8), cg_chg_measure_disp_type = c("sd", "sd", "sd",
"se"), cg_chg_disp = c(0.5, 0.5, 2.5, 0.34), bgdiff_measure = c("MDC",
"MDC", "MD", "BC"), bgdiff = c(1.1, -0.3, -3.9, -0.1), bgdiff_disp_type =
c(NA, NA, NA, "se"), bgdiff_disp = c(NA, NA, NA, 0.005), bgdiff_lci =
c(-0.2, -0.6, NA, NA), bgdiff_uci = c(3.2, 0.1, NA, NA), bgdiff_pval =
c(0.12, 0.08, 0.56, 0.78)), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
在第二个数据帧中,每个研究干预部门(“ alloc”)在每个时间点(“ time_months”)都有自己的列,在时间点变量中添加了基线“ bl_”的前缀和后续的“ fu_”。因此,关键变量将是“研究”,“分配”和“ time_months”。我已经看到了有关如何根据一个键变量来重塑数据的示例,但是对于如何基于多个键来重塑数据却还没有见过很多。
谢谢。