我有一个df:
group number id
1 A abcd 1
2 A abcd 2
3 A abcd 3
4 A efgh 4
5 A efgh 5
6 B abcd 1
7 B abcd 2
8 B abcd 3
9 B abcd 9
10 B ijkl 10
我想这样做:
group number data1 data2 data3 data4 Length
1 A abcd 1 2 3 3
2 A efgh 4 5 2
3 B abcd 1 2 3 9 4
4 B ijkl 10 1
对不起,我只能这样做到df2:
group number data Length
1 A abcd c(1,2,3) 3
2 A efgh c(4,5) 2
3 B abcd c(1,2,3,9) 4
4 B ijkl 10 1
我的代码在这里:
library(tidyverse)
df <- data.frame (group = c(rep('A',5),rep("B",5)),
number = c(rep('abcd',3),rep('efgh',2),rep('abcd',4),rep('ijkl',1)),
id = c(1,2,3,4,5,1,2,3,9,10))
df2 <- df %>%
group_by(group,number) %>%
nest() %>%
mutate(data=map(data,~unlist(.x, recursive = TRUE, use.names = FALSE)),
Length= map(data, ~length(.x)))
请随意从df或df2开始,(out)任何包都没问题。
答案 0 :(得分:4)
您可以将名称count
更改为length
(另外,我将{&#39;空格&#39;更改为NA
,如果要更改,{{1 }})
选项1
df2[is.na(df2)]=''
选项2
df <- data.frame (group = c(rep('A',5),rep("B",5)),
number = c(rep('abcd',3),rep('efgh',2),rep('abcd',4),rep('ijkl',1)),
id = c(1,2,3,4,5,1,2,3,9,10))
df2 <- df %>%
group_by(group,number) %>%
mutate(data=toString(id),count=n())
library(splitstackshape)
cSplit(df2, 3, drop = TRUE,sep=',')
group number count data_1 data_2 data_3 data_4
1: A abcd 3 1 2 3 NA
2: A efgh 2 4 5 NA NA
3: B abcd 4 1 2 3 9
4: B ijkl 1 10 NA NA NA
答案 1 :(得分:3)
我必须盲目地把它给你,但这应该有效或接近:
library(tidyverse)
df %>%
group_by(group,number) %>%
mutate(key = paste0("data",row_number()),length = n()) %>%
ungroup %>%
spread(key,id,"")
为了使它能够从你的嵌套数据中运行,我认为你必须将这些向量更改为1行data.frames具有相同的col号和名称,然后使用unexst,更复杂! :)
答案 2 :(得分:1)
在基地R
temp = split(df, paste(df$group, df$number))
columns = max(sapply(temp, NROW))
do.call(rbind, lapply(temp, function(a)
cbind(group = a$group[1],
number = a$number[1],
setNames(data.frame(t(a$id[1:columns])), paste0("data", 1:columns)),
length = length(a$id))
))
# group number data1 data2 data3 data4 length
#A abcd A abcd 1 2 3 NA 3
#A efgh A efgh 4 5 NA NA 2
#B abcd B abcd 1 2 3 9 4
#B ijkl B ijkl 10 NA NA NA 1
答案 3 :(得分:0)
以下是使用data.table
library(data.table)
dcast(setDT(df), group + number~ paste0("data", rowid(group, number)),
value.var = 'id', fill = 0)[,
length := Reduce(`+`, lapply(.SD, `>`, 0)), .SDcols = data1:data4][]
# group number data1 data2 data3 data4 length
#1: A abcd 1 2 3 0 3
#2: A efgh 4 5 0 0 2
#3: B abcd 1 2 3 9 4
#4: B ijkl 10 0 0 0 1
答案 4 :(得分:0)
这是[{3}}的变体,它在从长格式转换为宽格式之前计算Length
,并在调用{prefix
时使用rowid()
参数1}}:
library(data.table)
data.table(df)[, Length := .N, by = .(group, number)][
, dcast(.SD, group + number + Length ~ rowid(group, number, prefix = "data"),
value.var = "id")]
group number Length data1 data2 data3 data4 1: A abcd 3 1 2 3 NA 2: A efgh 2 4 5 NA NA 3: B abcd 4 1 2 3 9 4: B ijkl 1 10 NA NA NA
对于漂亮的打印,NA
值可以转换为空格:
data.table(df)[, Length := .N, by = .(group, number)][
, dcast(.SD, group + number + Length ~ rowid(group, number, prefix = "data"),
as.character, value.var = "id", fill = "")]
group number Length data1 data2 data3 data4 1: A abcd 3 1 2 3 2: A efgh 2 4 5 3: B abcd 4 1 2 3 9 4: B ijkl 1 10