我遇到了最困难的时候,因为我需要用多个键值对来处理数据。基本上,我每个人都有两个数据点。在这两个数据点上发生变化的每个变量都用 _1 或 _2 表示。例如,我想将 Age_1 和 Age_2 折叠为一对时间(时间 1 和时间 2)和年龄的键值对。但我也想将 Test.date_1 和 Test.date_2 折叠成时间和测试日期的键值对。我估计我想用大约 40 个变量来做这件事。基本上 - 我希望它最后看起来像这样:
这是我的数据的一个子集
structure(list(PARTID = c("AGE004", "AGE005", "AGE007", "AGE012",
"AGE022", "AGE026"), Phase_1 = c("Phase One", "Phase One", "Phase One",
"Phase One", "Phase One", "Phase One"), Age_1 = c(34L, 27L, 34L,
35L, 19L, 19L), Sex = c(2L, 1L, 1L, 2L, 1L, 2L), Handedness = c(1L,
1L, 0L, 1L, 1L, 1L), Test.date_1 = c("14/02/2013", "24/02/2013",
"25/02/2013", "15/04/2013", "23/04/2013", "20/05/2013"), PartID_2 = c("BGE004",
"BGE005", "CGE007", "BGE012", "BGE022", "BGE026"), Phase_2 = c("Phase Two",
"Phase Two", "Phase Three", "Phase Two", "Phase Two", "Phase Two"
), Age_2 = c(37L, 30L, 39L, 38L, 22L, 22L), Test.date_2 = c("30/07/2015",
"28/07/2015", "21/08/2017", "27/05/2016", "31/05/2016", "3/03/2016"
), RART_1 = c(606.045488, 497.252507, 620.9270198, 667.98753,
553.8135176, 609.2147245), RMST_1 = c(606.7195939, 612.5271588,
748.2943148, 723.4190089, 679.2795688, 659.0553977), STRT_1 = c(762.7184264,
672.3305307, 907.0870536, 821.0780768, 829.0058858, 716.4811003
), SART_1 = c(1005.309323, 844.5555086, 955.1771471, 949.6397829,
942.4323483, 897.528926), NIRT_1 = c(1001.370827, 793.4688684,
878.7502713, 898.2766888, 863.6903505, 876.6728154), RNRT_1 = c(915.2199074,
749.2448674, 771.7413651, 752.5620404, 786.4171282, 877.0446777
), SNRT_1 = c(1088.598633, 841.6114517, 998.3484605, 1049.322361,
952.1625906, 876.3100229), RAER_1 = c(0.699300699, 2.857142857,
1.408450704, 2.142857143, 0.699300699, 2.857142857), RMER_1 = c(2.285714286,
1.129943503, 1.142857143, 1.724137931, 1.694915254, 1.704545455
), STER_1 = c(0.568181818, 1.704545455, 4.597701149, 1.734104046,
1.694915254, 2.259887006), SAER_1 = c(0.555555556, 2.89017341,
1.754385965, 2.793296089, 1.704545455, 2.259887006), NIER_1 = c(2.808988764,
2.824858757, 4.545454545, 2.840909091, 2.247191011, 2.298850575
), RNER_1 = c(3.370786517, 0, 1.136363636, 0, 1.123595506, 3.488372093
), SNER_1 = c(2.247191011, 5.617977528, 7.954545455, 5.681818182,
3.370786517, 1.136363636), MixingCostRT_1 = c(0.6741059, 115.2746518,
127.367295, 55.4314789, 125.4660512, 49.8406732), STCostRT_1 = c(155.9988325,
59.8033719, 158.7927388, 97.6590679, 149.726317, 57.4257026),
SACostRT_1 = c(398.5897291, 232.0283498, 206.8828323, 226.220774,
263.1527795, 238.4735283), NICostRT_1 = c(394.6512331, 180.9417096,
130.4559565, 174.8576799, 184.4107817, 217.6174177), RNCostRT_1 = c(308.5003135,
136.7177086, 23.4470503, 29.1430315, 107.1375594, 217.98928
), SNCostRT_1 = c(481.8790391, 229.0842929, 250.0541457,
325.9033521, 272.8830218, 217.2546252), MixingPropRT_1 = c(0.001112302,
0.231823169, 0.20512442, 0.082982805, 0.226549276, 0.08181134
), STPropRT_1 = c(0.257118501, 0.097633829, 0.212206261,
0.134996547, 0.220419285, 0.087133347), SAPropRT_1 = c(0.656958722,
0.378804999, 0.276472543, 0.312710575, 0.387399815, 0.361841401
), NIPropRT_1 = c(0.650467262, 0.295401938, 0.174337762,
0.241710099, 0.271479948, 0.330195942), RNPropRT_1 = c(0.50847264,
0.223202688, 0.031333995, 0.040285134, 0.157722335, 0.330760177
), SNPropRT_1 = c(0.794236817, 0.373998589, 0.334165502,
0.450504269, 0.401724171, 0.329645468), MixingCostER_1 = c(1.586413587,
-1.727199354, -0.265593561, -0.418719212, 0.995614555, -1.152597402
), STCostER_1 = c(-1.717532468, 0.574601952, 3.454844006,
0.009966115, 0, 0.555341551), SACostER_1 = c(-1.73015873,
1.760229907, 0.611528822, 1.069158158, 0.009630201, 0.555341551
), NICostER_1 = c(0.523274478, 1.694915254, 3.402597402,
1.11677116, 0.552275757, 0.59430512), RNCostER_1 = c(1.085072231,
-1.129943503, -0.006493507, -1.724137931, -0.571319748, 1.783826638
), SNCostER_1 = c(-0.038523275, 4.488034025, 6.811688312,
3.957680251, 1.675871263, -0.568181819), MixingPropER_1 = c("2.26857143",
"-0.604519774", "-0.188571428", "-0.195402299", "1.423728814",
"-0.403409091"), STPropER_1 = c(-0.751420455, 0.508522727,
3.022988505, 0.005780347, 0, 0.325800376), SAPropER_1 = c(-0.756944444,
1.557803467, 0.535087719, 0.620111732, 0.005681819, 0.325800376
), NIPropER_1 = c(0.228932584, 1.5, 2.977272726, 0.647727273,
0.325842697, 0.348659004), RNPropER_1 = c(0.474719101, -1,
-0.005681819, -1, -0.337078651, 1.046511627), SNPropER_1 = c(-0.016853933,
3.971910112, 5.960227272, 2.295454546, 0.988764045, -0.333333334
), `_` = c(NA, NA, NA, NA, NA, NA), RART_2 = c(534.6624201,
551.4502338, 708.557581, 766.713627, 595.5418578, 622.2098214
), RMS_T = c(520.2972412, 626.2751518, 743.1781747, 822.0529381,
615.4418945, 683.4996235), STRT_2 = c(739.1043527, 731.2613225,
876.2105908, 911.3537016, 754.9962198, 714.9285414), SART_2 = c(901.0522605,
865.5911959, 900.192916, 1030.688477, 835.2385876, 823.8851417
), NIRT_2 = c(847.4667198, 826.9730689, 929.7489006, 998.631643,
800.8631764, 830.4781627), RNRT_2 = c(682.2760595, 801.8197073,
829.0902071, 948.3103198, 713.0618405, 829.2444741), SNRT_2 = c(1000.111254,
852.1264305, 1036.621094, 1052.059221, 894.7618273, 831.6824777
), RAER_2 = c(2.857142857, 0, 0, 2.857142857, 1.408450704,
0.699300699), RMER_2 = c(1.694915254, 1.754385965, 0, 1.129943503,
1.136363636, 1.129943503), STER_2 = c(2.840909091, 2.824858757,
0, 0, 3.409090909, 0.561797753), SAER_2 = c(0, 2.272727273,
0.558659218, 0.558659218, 1.714285714, 1.685393258), NIER_2 = c(1.117318436,
3.389830508, 1.111111111, 1.111111111, 3.409090909, 1.685393258
), RNER_2 = c(1.123595506, 3.409090909, 1.111111111, 0, 2.272727273,
3.370786517), SNER_2 = c(1.111111111, 3.370786517, 1.111111111,
2.222222222, 4.545454545, 0), MixingCostRT_2 = c(-14.3651789,
74.824918, 34.6205937, 55.3393111, 19.9000367, 61.2898021
), STCostRT_2 = c(218.8071115, 104.9861707, 133.0324161,
89.3007635, 139.5543253, 31.4289179), SACostRT_2 = c(380.7550193,
239.3160441, 157.0147413, 208.6355389, 219.7966931, 140.3855182
), NICostRT_2 = c(327.1694786, 200.6979171, 186.5707259,
176.5787049, 185.4212819, 146.9785392), RNCostRT_2 = c(161.9788183,
175.5445555, 85.9120324, 126.2573817, 97.619946, 145.7448506
), SNCostRT_2 = c(479.8140128, 225.8512787, 293.4429193,
230.0062829, 279.3199328, 148.1828542), MixingPropRT_2 = c(-0.026867755,
0.135687526, 0.048860664, 0.072177289, 0.033415009, 0.098503431
), STPropRT_2 = c(0.420542517, 0.167635855, 0.17900474, 0.108631402,
0.226754673, 0.045982349), SAPropRT_2 = c(0.731802879, 0.382126041,
0.211274694, 0.253798179, 0.357136385, 0.205392239), NIPropRT_2 = c(0.628812634,
0.320462845, 0.251044409, 0.214802109, 0.30128154, 0.215038215
), RNPropRT_2 = c(0.311319772, 0.28029941, 0.115600855, 0.153587897,
0.158617648, 0.213233257), SNPropRT_2 = c(0.922192114, 0.360626281,
0.394848677, 0.279794977, 0.453852647, 0.216800199), MixingCostER_2 = c(-1.162227603,
1.754385965, 0, -1.727199354, -0.272087068, 0.430642804),
STCostER_2 = c(1.145993837, 1.070472792, 0, -1.129943503,
2.272727273, -0.56814575), SACostER_2 = c(-1.694915254, 0.518341308,
0.558659218, -0.571284285, 0.577922078, 0.555449755), NICostER_2 = c(-0.577596818,
1.635444543, 1.111111111, -0.018832392, 2.272727273, 0.555449755
), RNCostER_2 = c(-0.571319748, 1.654704944, 1.111111111,
-1.129943503, 1.136363637, 2.240843014), SNCostER_2 = c(-0.583804143,
1.616400552, 1.111111111, 1.092278719, 3.409090909, -1.129943503
), MixingPropER_2 = c("-0.406779661", "", "", "-0.604519774",
"-0.193181818", "0.61581921"), STPropER_2 = c(0.676136364,
0.610169491, NA, -1, 2.000000001, -0.502808989), SAPropER_2 = c(-1,
0.295454546, NA, -0.505586592, 0.508571429, 0.491573033),
NIPropER_2 = c(-0.340782123, 0.932203389, NA, -0.016666667,
2.000000001, 0.491573033), RNPropER_2 = c(-0.337078651, 0.943181818,
NA, -1, 1.000000001, 1.983146067), SNPropER_2 = c(-0.344444444,
0.921348315, NA, 0.966666666, 3.000000001, -1)), row.names = c(NA,
6L), class = "data.frame")
这是我迄今为止尝试过的:
DT.m1 = melt(data, id = c("PARTID","Sex","Handedness"))
DT.m1[, c("Phase", "variable") := tstrsplit(variable, "_", fixed = TRUE)]
DT.c1 = dcast(DT.m1, PARTID + Sex + Handedness + Phase ~ variable, value.var = "value")
DT.c1
答案 0 :(得分:2)
一种方法可能是1/2
然后pivot_longer
:
pivot_wider
我们需要将所有列转换为字符,以便它们可以暂时共存于同一列中。这可能会导致精度的细微损失。
另请注意,您的数据不包含 library(tidyverse)
data %>%
mutate(across(everything(),as.character)) %>%
pivot_longer(matches("_[12]$"), names_sep = "_", names_to = c("Variable","TimePoint")) %>%
pivot_wider(names_from = Variable, values_from = "value") %>%
mutate(across(everything(),type.convert))
# A tibble: 12 x 48
PARTID Sex Handedness `_` RMS_T TimePoint Phase Age Test.date PartID RART RMST STRT SART NIRT RNRT SNRT RAER RMER STER
<fct> <int> <int> <lgl> <dbl> <int> <fct> <int> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AGE004 2 1 NA 520. 1 Phas… 34 14/02/20… NA 606. 607. 763. 1005. 1001. 915. 1089. 0.699 2.29 0.568
2 AGE004 2 1 NA 520. 2 Phas… 37 30/07/20… BGE004 535. NA 739. 901. 847. 682. 1000. 2.86 1.69 2.84
3 AGE005 1 1 NA 626. 1 Phas… 27 24/02/20… NA 497. 613. 672. 845. 793. 749. 842. 2.86 1.13 1.70
4 AGE005 1 1 NA 626. 2 Phas… 30 28/07/20… BGE005 551. NA 731. 866. 827. 802. 852. 0 1.75 2.82
5 AGE007 1 0 NA 743. 1 Phas… 34 25/02/20… NA 621. 748. 907. 955. 879. 772. 998. 1.41 1.14 4.60
6 AGE007 1 0 NA 743. 2 Phas… 39 21/08/20… CGE007 709. NA 876. 900. 930. 829. 1037. 0 0 0
7 AGE012 2 1 NA 822. 1 Phas… 35 15/04/20… NA 668. 723. 821. 950. 898. 753. 1049. 2.14 1.72 1.73
8 AGE012 2 1 NA 822. 2 Phas… 38 27/05/20… BGE012 767. NA 911. 1031. 999. 948. 1052. 2.86 1.13 0
9 AGE022 1 1 NA 615. 1 Phas… 19 23/04/20… NA 554. 679. 829. 942. 864. 786. 952. 0.699 1.69 1.69
10 AGE022 1 1 NA 615. 2 Phas… 22 31/05/20… BGE022 596. NA 755. 835. 801. 713. 895. 1.41 1.14 3.41
11 AGE026 2 1 NA 683. 1 Phas… 19 20/05/20… NA 609. 659. 716. 898. 877. 877. 876. 2.86 1.70 2.26
12 AGE026 2 1 NA 683. 2 Phas… 22 3/03/2016 BGE026 622. NA 715. 824. 830. 829. 832. 0.699 1.13 0.562
,也不包含 PartID_1
。