我在使用R中的数据操作时遇到问题。我有这个数据集(示例)
df <- data.frame(GRP = c(1,2,3,1,2,3,4),
x=c("x1","x1","x1","x2","x2","x2","x2"),y=c(3,8,2,20,24,28,31))
GRP x y
1 x1 3
2 x1 8
3 x1 2
1 x2 20
2 x2 24
3 x2 28
4 x2 31
并希望将此数据集转换为
GRP x1 x2
1 3 20
2 8 24
3 2 28
4 0 31
我试过了:
df1 <- expand.grid(GRP = unique(df$GRP), x1=0, x2=0)
df1$x1[df1$GRP %in% df$GRP]<- df$y[df1$GRP %in% df$GRP &
df$x %in% c("x1")]
df1$x2[df1$GRP %in% df$GRP]<- df$y[df1$GRP %in% df$GRP & df$x %in%
c("x2")]
得到这个结果:
GRP x1 x2
1 3 20
2 8 24
3 2 28
4 3 31
有关如何解决此问题的任何建议吗?
答案 0 :(得分:0)
如果您需要将数据从long
转换为wide
,这些是一些选项
library(reshape2)
dcast(df, GRP~x, value.var='y', fill=0)
# GRP x1 x2
#1 1 3 20
#2 2 8 24
#3 3 2 28
#4 4 0 31
或者
xtabs(y~GRP+x, df)
或者
library(tidyr)
spread(df, x,y, fill=0)
另外,对于从'df'替换'df1'中的值,可能会有所帮助
df1 <- data.frame(GRP=unique(df$GRP), x1=0, x2=0)
indx1 <- df$x=='x1' & df$GRP %in% df1$GRP
indx2 <- df$x=='x2' & df$GRP %in% df1$GRP
df1$x1[df1$GRP%in% df$GRP[indx1]] <- df$y[indx1]
df1$x2[df1$GRP%in% df$GRP[indx2]] <- df$y[indx2]
df1
# GRP x1 x2
#1 1 3 20
#2 2 8 24
#3 3 2 28
#4 4 0 31
假设,'df1'中的行顺序不同
df1 <- data.frame(GRP=unique(df$GRP), x1=0, x2=0)
df1 <- df1[order(-df1$GRP),]
indx1 <- df$x=='x1' & df$GRP %in% df1$GRP
indx2 <- df$x=='x2' & df$GRP %in% df1$GRP
df1$x1[match(df$GRP[indx1], df1$GRP)] <- df$y[indx1]
df1$x2[match(df$GRP[indx2], df1$GRP)] <- df$y[indx2]
df1
# GRP x1 x2
#4 4 0 31
#3 3 2 28
#2 2 8 24
#1 1 3 20
它也适用于其他行的顺序,从原始的“df1”
开始 set.seed(28)
df1 <- df1[sample(nrow(df1)),]
在运行上面列出的代码后给出输出
df1
# GRP x1 x2
#1 1 3 20
#4 4 0 31
#3 3 2 28
#2 2 8 24