有一种情况
one two three type
x x chocolate
x vanilla
x x strawberry
如果我想根据列中的'x'进行复制,例如:
one two three type
x chocolate
x chocolate
x vanilla
x strawberry
x strawberry
这里的目标是每行中有一个'x',因此如果每行中有多个'x',则复制整行,并为每个副本保留该行的唯一'x'。
重现
dat <- data.frame(one = c("x", "x", "x"), two = c("x", "", "x"), three = c("", "", ""), type = c("chocolate", "vanilla",
"strawberry"))
答案 0 :(得分:4)
使用tidyr :: gather
与MKR和Anant的解决方案一脉相承。
library(tidyverse)
dat %>%
mutate(i=row_number()) %>%
gather("Key", "Val", -type,-i) %>%
rowid_to_column %>%
spread(Key, Val,fill = "") %>%
`[`(rowSums(.=="x")>0,) %>%
arrange(i) %>%
select(type,one,two,three)
# type one two three
# 1 chocolate x
# 2 chocolate x
# 3 vanilla x
# 4 strawberry x
# 5 strawberry x
虽然您可能对此感到满意(如果以填充的一个值填充,则会显示列three
,但行会被洗牌):
dat %>%
na_if("") %>%
gather("Key", "Val", -type,na.rm=TRUE) %>%
rowid_to_column %>%
spread(Key, Val,fill = "") %>%
select(-1)
# type one two
# 1 chocolate x
# 2 vanilla x
# 3 strawberry x
# 4 chocolate x
# 5 strawberry x
绑定数据框
另一种方法是将数据分成不同的数据框然后绑定它们(看起来像@Acccumulation在他的回答中试图解释的那样):
dat %>%
map(1:3,~filter(.y[c(.x,4)],.y[.x]=="x"),.) %>%
bind_rows %>%
modify(as.character) %>%
`[<-`(is.na(.),value="") %>%
select(one,two,three,type)
# one two three type
# 1 x chocolate
# 2 x vanilla
# 3 x strawberry
# 4 x chocolate
# 6 x strawberry
使用合并
dat %>%
na_if("") %>%
bind_rows(.["type"]) %>%
map(1:3,~.y[c(.x,4)],.) %>%
reduce(merge,all=TRUE) %>%
`[<-`(is.na(.),value="") %>%
`[`(rowSums(.=="x")==1,) %>%
distinct
# type one two three
# 1 chocolate x
# 2 chocolate x
# 3 strawberry x
# 4 strawberry x
# 5 vanilla x
使用免费
dat %>%
imap_dfc(~{
i <- match(.y,names(dat))
if(.y != "type") map(.x,~`[<-`(character(3),i,.x)) else .x}) %>%
unnest %>%
`[`(rowSums(.=="x")==1,)
# # A tibble: 5 x 4
# type one two three
# <chr> <chr> <chr> <chr>
# 1 chocolate x
# 2 chocolate x
# 3 vanilla x
# 4 strawberry x
# 5 strawberry x
使用diast
dat %>%
rowwise %>%
transmute(type,cols = list(setNames(data.frame(diag(c(one,two,three)=="x")),c('one','two','three')))) %>%
unnest %>%
modify_at(2:4, ~c('','x')[.x+1]) %>%
`[`(rowSums(.=="x")==1,)
# # A tibble: 5 x 4
# type one two three
# <chr> <chr> <chr> <chr>
# 1 chocolate x
# 2 chocolate x
# 3 vanilla x
# 4 strawberry x
# 5 strawberry x
编辑 以在评论中提供OP的请求:
我们采用第一个解决方案并对源数据使用right_join
以确保所有行都在那里,然后用空字符串替换NAs。我们还清理了命令的第一行,因为我们不再需要i
来安排。
dat2 %>%
gather("Key", "Val", -type) %>%
rowid_to_column %>%
spread(Key, Val,fill = "") %>%
`[`(rowSums(.=="x")>0,) %>%
right_join(dat2["type"]) %>%
`[<-`(is.na(.),value="") %>%
select(type,one,two,three)
# type one two three
# 1 chocolate x
# 2 chocolate x
# 3 vanilla x
# 4 strawberry x
# 5 strawberry x
# 6 hazelnut
答案 1 :(得分:1)
您可以使用dplyr::gather
来完成您需要的内容,这里是一个快速示例,其中空字符串转换为NA:
library(tidyverse)
dat <- tibble(
one = c("x", "x", "x"),
two = c("x", "", "x"),
three = c("", "", ""),
type = c("chocolate", "vanilla", "strawberry")
) %>%
na_if("") %>%
gather("number", "value", one:three) %>%
filter(!is.na(value)) %>%
mutate(one = ifelse(number == "one", "x", ""),
two = ifelse(number == "two", "x", ""),
three = ifelse(number == "three", "x", "")) %>%
select(one:three, type) %>%
arrange(type)
dat
# A tibble: 5 x 4
one two three type
<chr> <chr> <chr> <chr>
1 x "" "" chocolate
2 "" x "" chocolate
3 x "" "" strawberry
4 "" x "" strawberry
5 x "" "" vanilla
答案 2 :(得分:1)
使用gather
然后spread
可以实现一个解决方案。步骤是:
#data
df <- read.table(text = "one two three type
x x '' chocolate
x '' '' vanilla
x x '' strawberry", header = T, stringsAsFactors = F)
df
# one two three type
#1 x x NA chocolate
#2 x NA vanilla
#3 x x NA strawberry
library(tidyverse)
df1 <- gather(df, "Key", "Val", -type)
# type Key Val
#1 chocolate 1 one x
#2 vanilla 2 one x
#3 strawberry 3 one x
#4 chocolate 4 two x
#5 vanilla 5 two
#6 strawberry 6 two x
#7 chocolate 7 three <NA>
#8 vanilla 8 three <NA>
#9 strawberry 9 three <NA>
#Make type unique before using sperad
df1$type <- paste(df1$type, 1:nrow(df1))
df2 <- spread(df1, Key, Val)
#Remove the digit part form the column name
df2$type <- gsub("(^\\w+)\\s.+","\\1",df2$type, perl = T)
# filter for the rows where either of one, two or three is 'x'
df_final <- df2 %>% filter(one == "x" | two == "x" | three == "x")
#set blank value to NA cell
df_final[is.na(df_final)] <- ""
df_final
# type one three two
#1 chocolate x
#2 chocolate x
#3 strawberry x
#4 strawberry x
#5 vanilla x
答案 3 :(得分:1)
感谢您提出这个有趣的问题。
这是一个使用基数R的解决方案。想法是根据行的ID分割数据框,计算每行中x
的数量,创建一个对角线的矩阵都等于1,基于维度在x
的计数上,将矩阵转换为数据帧并将1替换为x
,将0替换为&#34;&#34;,并操纵数据帧然后将它们组合一起来。
此解决方案适用于x
的任意数量的列,但它假定type
列是唯一与x
没有任何关系的列。如果您有多个列,例如type
列,则可能需要根据需要修改代码。
# Create a row ID
dat$ID <- 1:nrow(dat)
# Split the data frame based on ID
dat_list <- split(dat, f = dat$ID)
dat_list2 <- lapply(dat_list, function(x){
x$ID <- NULL # Remove the ID column
x_count <- sum(grepl("x", as.vector(unlist(x)))) # Count how many x per row
m <- diag(x_count) # Create a matrix with diagnol value to be 1 based on count
cols <- names(x)[sapply(x, function(x) grepl("x", x))] # Document the columns with "x"
cols_n <- names(x)[!(names(x) %in% cols | names(x) %in% "type")] # Document the columns do not have "x"
dat <- as.data.frame(m) # Create a data frame by converting the m matrix
colnames(dat) <- cols # Assign column names
dat[dat == 0] <- "" # Replace 0 with "",
dat[dat == "1"] <- "x" # Replace 1 with "x"
if (length(cols_n) >= 1){
dat[, cols_n] <- "" # Add all other columns in addition to type
}
dat$type <- x$type # Add the type column
return(dat)
})
# Combine all data frame in dat_list2
dat2 <- do.call(rbind, dat_list2)
# Re-assign the row name
rownames(dat2) <- 1:nrow(dat2)
dat2
# one two three type
# 1 x chocolate
# 2 x chocolate
# 3 x vanilla
# 4 x strawberry
# 5 x strawberry
答案 4 :(得分:0)
使用data.table
:
library(data.table)
setDT(df)
df[one == "x" & two == "x", id := 1]
df1 <- df[id == 1, ]
df[, two := ""]
df1[, one := ""]
df <- rbind(df, df1)
df[, id := NULL]
df
输出:
one two three type
1: x NA chocolate
2: x NA vanilla
3: x NA strawberry
4: x NA chocolate
5: x NA strawberry
答案 5 :(得分:0)
这是使用tidyverse
工具的解决方案。您可能需要调整以包含所有7个列名称,但希望它仍然可以工作。洞察力是使用gather
,添加唯一的rowid
,然后使用spread
强制仍然存在相同的行数,因为现在每行与其余行不同(通过{ {1}}和type
)。其余的只是将空字符转换为rowid
,然后删除所有NA
的额外行。
NA
答案 6 :(得分:0)
我假设在下面的x之前总是x。即如果two
中有x,那么one
中就不会有{。}否则,我实际上并不确定你的期望。
因此,假设您的数据框名为df
,只需找到行two
和three
中的xs并复制它们,同时清除其他内容。
twoMatches <- df$two == 'x' #Find all the twos with xs
threeMatches <- df$three == 'x' #Find all the threes with xs
df$two[twoMatches] <- "" #Clear the two field where there were twos
df$three[threeMatches] <- "" #Clear the three field where there were threes
numTwos <- sum(twoMatches) #Number of twos
numThrees <- sum(threeMatches) #Number of threes
#Generate our frame of twos
twoFrame <- data.frame(one = rep("", numTwos), two = rep("x", numTwos), three = rep("", numTwos), type = df[twoMatches, "type"], stringsAsFactors = F)
#Generate our frame of threes
threeFrame <- data.frame(one = rep("", numThrees), two = rep("", numThrees), three = rep("x", numThrees), type = df[threeMatches, "type"], stringsAsFactors = F)
#Bind them all together
outdf <- dplyr::bind_rows(list(df, twoFrame, threeFrame))
#Sort them into something more meaningful
outdf <- outdf[order(outdf$type, outdf$one, outdf$two, outdf$three, decreasing = T),]
输出: