我有一个这样的数据框:
df <- data.frame(theme1=c("hello",NA,NA,NA), theme2=c(NA,"world",NA,NA), theme3=c(NA,NA,"good_morning",NA), theme4=c(NA,NA,NA,"good_evening"))
theme1 theme2 theme3 theme4
1 hello NA NA NA
2 NA world NA NA
3 NA NA good_morning NA
4 NA NA NA good_evening
现在我想获得一个保留行顺序的列:
**Theme_merged**
hello
world
good_morning
good_evening
尝试:
merge_themes <- data.frame(cbind(mycol = na.omit(unlist(data2_tst[18:23]))), stringsAsFactors = F)
上面的代码有效但不保留行顺序,因此当我想将向量放回原始数据帧时,它就不再匹配了。
真实数据:
dput(head(data2_tst[18:23], n = 50))
structure(list(Theme1 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, "%Bedrukken%", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, "%Bedrukken%", NA, NA, NA, NA, NA, NA, NA, NA), Theme2 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "%Nieuwste|Nieuwe|201[6:7]%",
"%Nieuwste|Nieuwe|201[6:7]%", "%Nieuwste|Nieuwe|201[6:7]%", NA,
NA, NA, NA, NA, "%Nieuwste|Nieuwe|201[6:7]%", "%Nieuwste|Nieuwe|201[6:7]%",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "%Nieuwste|Nieuwe|201[6:7]%",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "%Nieuwste|Nieuwe|201[6:7]%",
"%Nieuwste|Nieuwe|201[6:7]%"), Theme3 = c("%Nodig%", NA, "%Nodig%",
"%Nodig%", "%Nodig%", NA, NA, "%Nodig%", NA, "%Nodig%", NA, NA,
NA, NA, "%Nodig%", "%Nodig%", "%Nodig%", NA, NA, NA, NA, NA,
NA, "%Nodig%", "%Nodig%", NA, NA, "%Nodig%", NA, "%Nodig%", "%Nodig%",
"%Nodig%", NA, "%Nodig%", "%Nodig%", "%Nodig%", NA, NA, NA, "%Nodig%",
"%Nodig%", NA, "%Nodig%", NA, "%Nodig%", "%Nodig%", NA, "%Nodig%",
NA, NA), Theme4 = c(NA, "%Kopen%", NA, NA, NA, "%Kopen%", "%Kopen%",
NA, "%Kopen%", NA, NA, NA, NA, NA, NA, NA, NA, "%Kopen%", "%Kopen%",
NA, NA, "%Kopen%", "%Kopen%", NA, NA, "%Kopen%", "%Kopen%", NA,
"%Kopen%", NA, NA, NA, NA, NA, NA, NA, "%Kopen%", "%Kopen%",
"%Kopen%", NA, NA, NA, NA, "%Kopen%", NA, NA, "%Kopen%", NA,
NA, NA), Theme5 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), Theme6 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_)), .Names = c("Theme1",
"Theme2", "Theme3", "Theme4", "Theme5", "Theme6"), row.names = 3:52, class = "data.frame")
答案 0 :(得分:1)
在SQL中,这将是COALESCE
函数:
apply(df, 1, function(r) c(na.omit(r), NA)[1])
# [1] "hello" "world" "good_morning" "good_evening"
df <- data.frame(
theme1=c("hello",NA,NA,NA),
theme2=c(NA,"world",NA,NA),
theme3=c(NA,NA,"good_morning",NA),
theme4=c(NA,NA,NA,"good_evening"),
stringsAsFactors = FALSE
)
在您的示例数据na.omit(unlist(df2, use.names = FALSE))
上工作正常,但如果只有一行 NA
值,则会失败:
df2 <- data.frame(
theme1=c("hello",NA,NA,NA,NA),
theme2=c(NA,"world",NA,NA,NA),
theme3=c(NA,NA,"good_morning",NA,NA),
theme4=c(NA,NA,NA,"good_evening",NA),
theme5=c(NA_character_,NA_character_,NA_character_,
NA_character_,NA_character_),
stringsAsFactors = FALSE
)
df2$X <- na.omit(unlist(df2, use.names = FALSE))
# Error in `$<-.data.frame`(`*tmp*`, "X", value = c("hello", "world", "good_morning", :
# replacement has 4 rows, data has 5
df2$X <- apply(df2, 1, function(r) c(na.omit(r), NA)[1])
# theme1 theme2 theme3 theme4 theme5 X
# 1 hello <NA> <NA> <NA> <NA> hello
# 2 <NA> world <NA> <NA> <NA> world
# 3 <NA> <NA> good_morning <NA> <NA> good_morning
# 4 <NA> <NA> <NA> good_evening <NA> good_evening
# 5 <NA> <NA> <NA> <NA> <NA> <NA>
另一个选项可能是df2$X <- df2[cbind(1:nrow(df2), max.col(!is.na(df2)))]
答案 1 :(得分:1)
dplyr的0.5.0版引入了合并函数:
这个版本的dplyr获得了许多受SQL启发的向量函数。两个函数使消除或生成缺失值更容易:
给定一组向量,coalesce()在每个位置找到第一个非缺失值。
要将其应用于示例数据框,您可以使用:
df <- mutate_all(df, .funs = as.character)
df$merged <- with(df, coalesce(theme1, theme2, theme3, theme4))
我发现有必要将因素转换为字符,以避免无效因素水平&#39;错误。
在您的实际数据中,无需转换:
df$merged <- with(df, coalesce(Theme1, Theme2, Theme3, Theme4, Theme5, Theme6)
答案 2 :(得分:0)
这是一个整合的解决方案(使用dplyr
和tidyr
或仅tidyverse
)
library(tidyverse)
> df <- df %>%
gather("theme", "theme_merged", 1:4) %>%
filter(!is.na(theme_merged)) %>%
select(theme_merged)
> df
theme_merged
1 hello
2 world
3 good_morning
4 good_evening
答案 3 :(得分:0)
这应该适用于您的数据:
new_df = c(as.matrix(df))
此行首先将df
转换为矩阵,并使用c()
将一个向量中的所有列绑定。
new_df <- new_df[!is.na(new_df)]
现在我们只保留非NA
条目。如果您愿意,可以将其转换回数据帧:
new_df <- data.frame(new_df);names(new_df) <- "Themes"