我正在尝试重组数据,以便扩展包含多个值的列以匹配下面列出的所需输出?我对tidyr::spread()
和tidyr::gather()
进行了几次尝试都无济于事。有什么想法吗?
dat <- data.frame("name" = c("a", "b", "c"),
"count" = c("2003=22; 2004=32",
"2003=34; 2005=45",
"2005=32; 2006=67"))
name count
a 2003=22; 2004=32
b 2003=34; 2005=45
c 2005=32; 2006=67
所需的输出:
name 2003 2004 2005 2006
a 22 32 NA NA
b 34 NA 45 NA
c NA NA 32 67
答案 0 :(得分:3)
可能有一种更聪明,更简洁的方法,但这可行:
library(tidyr)
dat %>%
separate(count, sep = "; ", into = c("c1", "c2")) %>%
gather(Var, Val, -name) %>%
separate(Val, sep = "=", into = c("year", "value")) %>%
select(-Var) %>%
spread(year, value)
name 2003 2004 2005 2006
1 a 22 32 <NA> <NA>
2 b 34 <NA> 45 <NA>
3 c <NA> <NA> 32 67
请注意,这会导致“宽”数据;传播之前的“长”数据可能更易于使用。
答案 1 :(得分:2)
您可以使用separate_rows
中的separate
和tidyr
。
library(tidyr)
dat %>%
separate_rows(count, sep = "; ") %>%
separate(count, sep = "=", into = c("key","val")) %>%
spread(key, val)
# name 2003 2004 2005 2006
# 1 a 22 32 <NA> <NA>
# 2 b 34 <NA> 45 <NA>
# 3 c <NA> <NA> 32 67
另一种解决方案是将这些字符串解析为小标题并使结果嵌套:
library(tidyverse)
dat %>%
mutate(count = map(count, ~ gsub("; ",", Y",.) %>%
paste0("tibble(Y",.,")") %>%
{eval(parse(text= .))})) %>%
unnest %>%
rename_at(-1,~str_sub(.,2))
# name 2003 2004 2005 2006
# 1 a 22 32 NA NA
# 2 b 34 NA 45 NA
# 3 c NA NA 32 67
答案 2 :(得分:1)
这是一个具有R和reshape2的解决方案:
ting1 <- data.frame(get1 = gsub("(.*);(.*)", "\\1", dat$count))
ting1 <- cbind(name = dat$name, ting1)
ting2 <- data.frame(get1 = gsub("(.*);(.*)", "\\2", dat$count))
ting2 <- cbind(name = dat$name, ting2)
df <- rbind(ting1, ting2)
df$years <- trimws(gsub("(.*)=(.*)", "\\1", df$get1))
df$values <- gsub("(.*)=(.*)", "\\2", df$get1)
library(reshape2)
outdf <- dcast(df, name ~ years, value.var = "values")
outdf
# name 2003 2004 2005 2006
# 1 a 22 32 <NA> <NA>
# 2 b 34 <NA> 45 <NA>
# 3 c <NA> <NA> 32 67
答案 3 :(得分:0)
这是一种使用input = input.float().cuda(async=True)
^
SyntaxError: invalid syntax
Process finished with exit code 1
+ extract
+ bind_rows
-
spread
这是使用dat %>%
extract(count, c("year1", "value1", "year2", "value2"),
regex = "([:digit:]+)=([:digit:]+);.([:digit:]+)=([:digit:]+)") %>%
{bind_rows(
select(., name, year = year1, value = value1),
select(., name, year = year2, value = value2)
)} %>%
spread(year, value)
name 2003 2004 2005 2006
1 a 22 32 <NA> <NA>
2 b 34 <NA> 45 <NA>
3 c <NA> <NA> 32 67
+ extract
+ spread
的另一种方法。这看起来似乎不太冗长,但我认为上述方法更可靠,因为此处的第一个spread
在某些特定情况下可能会失败。
spread
答案 4 :(得分:0)
如果您要使用不带基于strsplit()
和reshape()
的软件包的基本R方法。
dat[] <- lapply(dat, as.character) # transform columns to characters
# split the columns
DF <- data.frame(cbind(rep(dat$name, each=2),
matrix(unlist(
lapply(strsplit(dat$count, "; "), strsplit, "=")),
6, byrow=TRUE)))
# reshape into wide format
DF <- reshape(DF, timevar="X2", idvar="X1", direction="wide")
# coerce year values into numeric form
DF[, -1] <- lapply(DF[, -1], function(x) as.numeric(as.character(x)))
# desired column names (optional)
names(DF) <- c(names(dat)[1], sub("X[^09+].", "", names(DF)[-1]))
> DF
name 2003 2004 2005 2006
1 a 22 32 NA NA
3 b 34 NA 45 NA
5 c NA NA 32 67