你知道如何创建一系列列而不逐一编写吗?例如,我想总结一些国家的州。例如,如果我的df是
head(df)
Time day State Vote
d1-h1 1 state1 5
d1-h2 1 state1 3
d1-h3 1 state1 8
d2-h1 2 state1 4
d2-h2 2 state1 5
d2-h3 2 state1 1
d1-h1 1 state2 5
d1-h2 1 state2 3
d1-h3 1 state2 8
d2-h1 2 state2 3
d2-h2 2 state2 1
d2-h3 2 state2 4
逐个案例如下
df2<- df %>% group_by(Time) %>% summarise(state1 = sum(votes),
state2 = sum(votes),
......
state27 = sum(vote))
这个问题的一个解决方案是循环,但它不起作用
states<- unique(df$State)
for(i in 1:length(states))
{
df2<- df %>% group_by(Time) %>% summarise( paste0("Vote_",states[i]) = sum(Vote[State == states[i]]))
}
答案 0 :(得分:3)
我想这是一个reshaping
问题。您可以尝试任何“长”到“宽”格式函数(dcast
,spread
或reshape
来自base R
)以获得正确的输出。
library(reshape2)
dcast(df, Time~State, value.var='Vote')
# Time state1 state2
#1 d1-h1 5 5
#2 d1-h2 3 3
#3 d1-h3 8 8
#4 d2-h1 4 3
#5 d2-h2 5 1
#6 d2-h3 1 4
或者
library(tidyr)
spread(df, State,Vote)
获得相同的输出(基于提供的示例)
library(dplyr)
df %>%
group_by(Time) %>%
summarise(state1=sum(Vote[State=='state1']),
state2=sum(Vote[State=='state2']))
# Time state1 state2
#1 d1-h1 5 5
#2 d1-h2 3 3
#3 d1-h3 8 8
#4 d2-h1 4 3
#5 d2-h2 5 1
#6 d2-h3 1 4
假设有多个'value'列,
set.seed(22)
df$Vote2 <- sample(1:10, 12, replace=TRUE)
然后,我们可以使用devel
版data.table
即。 'v1.9.5',可以使用多个'value.vars'。它可以从here
library(data.table)
dcast(setDT(df), Time~State, value.var=c('Vote', 'Vote2'))
# Time state1_Vote state2_Vote state1_Vote2 state2_Vote2
#1: d1-h1 5 5 4 7
#2: d1-h2 3 3 5 8
#3: d1-h3 8 8 10 5
#4: d2-h1 4 3 6 4
#5: d2-h2 5 1 9 10
#6: d2-h3 1 4 8 7
或使用reshape
base R
reshape(df[-2], idvar='Time', timevar='State', direction='wide')
# Time Vote.state1 Vote2.state1 Vote.state2 Vote2.state2
#1 d1-h1 5 4 5 7
#2 d1-h2 3 5 3 8
#3 d1-h3 8 10 8 5
#4 d2-h1 4 6 3 4
#5 d2-h2 5 9 1 10
#6 d2-h3 1 8 4 7
df <- structure(list(Time = c("d1-h1", "d1-h2", "d1-h3", "d2-h1",
"d2-h2",
"d2-h3", "d1-h1", "d1-h2", "d1-h3", "d2-h1", "d2-h2", "d2-h3"
), day = c(1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L),
State = c("state1",
"state1", "state1", "state1", "state1", "state1", "state2", "state2",
"state2", "state2", "state2", "state2"), Vote = c(5L, 3L, 8L,
4L, 5L, 1L, 5L, 3L, 8L, 3L, 1L, 4L)), .Names = c("Time", "day",
"State", "Vote"), class = "data.frame", row.names = c(NA, -12L))