我需要使用其中一列将数据集从宽格式重整为长格式,以创建其他列。
我的数据如下:
Participant V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 ... V1000
Prob1_1 323.25 325.85
Prob1_2 236.12 455.23
Prob2_3 423.52 526.14 ....
Prob2_4 512.47 426.12
....
Prob2_100 235.14 632.14
Improb1_1 632.12 236.12
我想使用参与者变量来构建以下长数据集。
Participant Probability RT Trial Session
1 Prob 323.25 1 1
1 Prob 325.85 2 1
2 Prob 236.12 1 1
2 Prob 455.23 2 1
3 Prob 423.52 1 2
3 Prob 526.14 2 2
4
5
6...
我尝试使用mutate函数,但是它似乎取决于列名,并且我希望它以实际参与者的名字为准。例如“ Prob1_1”,Prob之后的数字代表会话,最后一个数字是参与者号码。变量V1,V2 ...表示试验编号。
How do I convert a wide dataframe to a long dataframe for a multilevel structure with 'quadruple nesting'?中提到的解决方案对我不起作用。
答案 0 :(得分:2)
使用tidyr::extract
,我们可以根据以下正则表达式将参与者分为三个组/列:
然后gather
和mutate
library(dplyr)
library(tidyr)
extract(df, Participant, into = c('Probability','Session','Participant'),
regex = "^(\\D+)(\\d+)_*(\\d+)") %>%
gather(Trial, RT, -c('Probability','Participant','Session')) %>%
mutate(Trial=sub('V','',Trial)) %>%
select(Participant, Probability, RT, Trial, Session) %>%
arrange(Participant, Session, Trial)
Participant Probability RT Trial Session
1 1 Prob 323.25 1 1
2 1 Prob 325.85 2 1
3 2 Prob 236.12 1 1
4 2 Prob 455.23 2 1
5 3 Prob 423.52 1 2
6 3 Prob 526.14 2 2
7 4 Prob 512.47 1 2
8 4 Prob 426.12 2 2
数据
df <- structure(list(Participant = structure(1:4, .Label = c("Prob1_1",
"Prob1_2", "Prob2_3", "Prob2_4"), class = "factor"), V1 = c(323.25,
236.12, 423.52, 512.47), V2 = c(325.85, 455.23, 526.14, 426.12
)), class = "data.frame", row.names = c(NA, -4L))
答案 1 :(得分:1)
这是使用reshape()
和正则表达式来transform()
数据的基本R方法。
dat.long <- reshape(transform(dat,
probability=gsub("\\d.+", "", participant),
participant=gsub("\\w*_(\\d)", "\\1", participant),
trial=gsub("\\w*(\\d).+", "\\1", participant)),
varying=2:11, idvar=c("participant", "probability", "trial"),
direction="long", v.names="RT", timevar="session")
head(dat.long, 15)
# participant probability trial session RT
# 1.Prob.1.1 1 Prob 1 1 1.28978001
# 2.Prob.1.1 2 Prob 1 1 -1.40316524
# 3.Prob.1.1 3 Prob 1 1 0.51445097
# 1.Prob.2.1 1 Prob 2 1 0.14846476
# 2.Prob.2.1 2 Prob 2 1 0.06879947
# 3.Prob.2.1 3 Prob 2 1 0.02801546
# 1.Improb.1.1 1 Improb 1 1 1.26768662
# 2.Improb.1.1 2 Improb 1 1 -0.87197423
# 3.Improb.1.1 3 Improb 1 1 -1.04835070
# 1.Improb.2.1 1 Improb 2 1 0.65630521
# 2.Improb.2.1 2 Improb 2 1 -0.58099358
# 3.Improb.2.1 3 Improb 2 1 -0.15082366
# 1.Prob.1.2 1 Prob 1 2 0.58457874
# 2.Prob.1.2 2 Prob 1 2 -2.25150269
# 3.Prob.1.2 3 Prob 1 2 1.50887273
dat <- structure(list(participant = structure(c(7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("Improb1_1", "Improb1_2",
"Improb1_3", "Improb2_1", "Improb2_2", "Improb2_3", "Prob1_1",
"Prob1_2", "Prob1_3", "Prob2_1", "Prob2_2", "Prob2_3"), class = "factor"),
V.1 = c(-0.78317903978425, -1.42256802537658, 0.342940938479779,
-1.31227758139305, -0.134871424882155, 0.372262460142081,
0.313809235928102, 0.296138275146936, 0.931606411400065,
0.375142970846081, -1.43961382169779, 0.815799047808872),
V.2 = c(-0.171646399523515, -0.644161162124944, 0.785979607398719,
0.47705655100109, -0.963859684799095, 0.154552158842357,
1.72307227079195, 0.986655369736914, -0.32413410098149, 0.44638843532548,
1.28716761230553, 0.628048242307817), V.3 = c(0.0521416442312076,
-1.31894376808205, -0.40743302087948, 1.14813283531483, 0.575490018443863,
-0.768152373604551, 0.668394957075075, 0.0652434186965083,
0.796433243461602, -0.607367768674947, 1.16046833952821,
0.416012124430193), V.4 = c(0.109068898771834, 0.310749865844485,
-0.578879180813773, -0.160584364698438, -0.567827511946429,
-0.0762903833505978, -0.940099003977588, -0.706132353777999,
0.551756154707779, 1.21946510393981, 0.0540021849120832,
0.371706858474099), V.5 = c(0.997271454464248, 0.351789857136835,
0.335620936190577, 0.115428590188729, -1.02709809154436,
0.640719901786663, -0.828218512265051, 1.57701044840292,
0.0587912355165915, -0.290236728884489, 0.875871492695704,
-0.130491615088836), V.6 = c(1.29218428325551, -0.60588680898263,
0.403803440305249, 1.0357840121496, -1.34874665542469, 0.883403082744137,
2.1083976501382, -0.133455001164623, -0.392764320879111,
2.45559047947122, -0.836168557148904, 0.542357603414291),
V.7 = c(0.283836115710646, -1.11604617217924, -0.702911947372907,
1.92979553472645, 1.07991010308695, 1.75404937440206, 0.477955966827059,
-0.64206114456452, -0.401702215242213, -1.36264088455225,
-0.948291093216559, 0.417484687283255), V.8 = c(-1.22023326762452,
-0.876955844153173, -1.14703776357049, -0.0850518753026808,
1.56853089303981, -0.278003253072658, -1.07862125797898,
2.03389661648939, 1.25326789139365, 0.700470424495529, 0.0722915950880813,
-1.16225205037457), V.9 = c(-1.52445924377294, -0.260093122031534,
0.982661963156681, -2.6411330557081, 1.0685535833561, -1.27019946336172,
0.387277102978568, 0.615191170581553, -0.592960414663139,
-0.0183305342891908, -0.392615477570169, -1.06251098372276
), V.10 = c(-0.295815527371644, 0.243080328002458, 0.517476015205563,
-1.4243221433541, 0.411574978845139, -0.164274442339443,
-0.0564129898114199, -1.05954278095433, -0.784089424501994,
0.422217107186452, -1.71720615045398, 0.482129993001465)), class = "data.frame", row.names = c(NA,
-12L))