有一个大df,我已按特定col的级别将其分成了dfs列表。我试图遍历df的所述列表,访问每个df中的相同col,同时将get_nrc_sentiment()fn应用于每个df中上述col内的所有文本数据行。
library(syuzhet)
# create example df
df <- data.frame("a"=c(1:3, 1, 1),
"b"=c("bad", "angry", "joy", "happy", "unhappy"))
df
df_split <- split(df, df$a)
df_split # split df by a into list of dfs
# iterate over col data in each df within
# list of dfs and apply sentiment function
for(i in 1:length(df_split)){
df2 <- lapply(as.vector(df_split[[i]]$b), function(x) get_nrc_sentiment(x))
}
# for some reason only works for last df within list of dfs
df2
# desired output, but for each df col within list
df3 <- get_nrc_sentiment(as.vector(df_split[[1]]$b))
df3
# output of df
a b
1 1 bad
2 2 angry
3 3 joy
4 1 happy
5 1 unhappy
# output of df_split
$`1`
a b
1 1 bad
4 1 happy
5 1 unhappy
$`2`
a b
2 2 angry
$`3`
a b
3 3 joy
# output of code within for loop
[[1]]
anger anticipation disgust fear joy sadness surprise trust negative positive
1 0 0 0 0 1 0 0 0 0 1
# output of df3, desired output example for each df within list
anger anticipation disgust fear joy sadness surprise trust negative positive
1 1 0 1 1 0 1 0 0 1 0
2 0 1 0 0 1 0 0 1 0 1
3 1 0 1 0 0
您可以看到,这似乎只适用于dfs列表中的最后一个df,而不是dfs列表中的所有df。我可能会因为for循环而以错误的方式进行操作,我也在lapply中尝试了lapply,但是对于如何访问dfs列表的每个df中的col没有其他想法。非常感谢您的帮助,并希望我提供的示例足够清楚。
答案 0 :(得分:0)
此for循环不起作用
for(i in 1:length(df_split)){
df2 <- lapply(as.vector(df_split[[i]]$b), function(x) get_nrc_sentiment(x))
}
每次迭代都覆盖df2。
您可以在以下情况下使用此功能:在其中应用拆分数据的功能。frame
by(df,df$a,function(i)get_nrc_sentiment(as.character(i$b)))
我对get_nrc_sentiment不熟悉,但是好像将它应用于df_split [[1]] $ b时,会得到3行?
dplyr解决方案:
df %>% group_by(a) %>% group_map(~ get_nrc_sentiment(.x$b))
答案 1 :(得分:0)
我不完全理解您的代码,但是下面提供了一个示例,说明如何将函数应用于每个df并将它们全部放入列表中。希望对您有所帮助。
res <- list() # empty list to put into final result
for (i in seq(length(list_a))) {
l <- as.data.frame(list_a[[i]]) # convert every df in list to normal df
l = l$b # put your sentiment function here
res[[i]]=l # add every df result to list of df, not just last one
print(res)
}
答案 2 :(得分:0)
您无需在循环内调用lapply
,因为lapply
将对列表的每个元素应用相同的功能。或者,您可以使用purrr::map
。
library("tidyverse")
library("syuzhet")
# create example df
df <- data.frame(
"a" = c(1:3, 1, 1),
"b" = c("bad", "angry", "joy", "happy", "unhappy"),
# Don't make `b` a factor
stringsAsFactors = FALSE
)
# split df by a into list of dfs
df_split <- split(df, df$a)
lapply(
df_split,
function(x) get_nrc_sentiment(x$b)
)
#> $`1`
#> anger anticipation disgust fear joy sadness surprise trust negative
#> 1 1 0 1 1 0 1 0 0 1
#> 2 0 1 0 0 1 0 0 1 0
#> 3 1 0 1 0 0 1 0 0 1
#> positive
#> 1 0
#> 2 1
#> 3 0
#>
#> $`2`
#> anger anticipation disgust fear joy sadness surprise trust negative
#> 1 1 0 1 0 0 0 0 0 1
#> positive
#> 1 0
#>
#> $`3`
#> anger anticipation disgust fear joy sadness surprise trust negative
#> 1 0 0 0 0 1 0 0 0 0
#> positive
#> 1 1
df_split %>%
map(~ get_nrc_sentiment(.$b))
#> $`1`
#> anger anticipation disgust fear joy sadness surprise trust negative
#> 1 1 0 1 1 0 1 0 0 1
#> 2 0 1 0 0 1 0 0 1 0
#> 3 1 0 1 0 0 1 0 0 1
#> positive
#> 1 0
#> 2 1
#> 3 0
#>
#> $`2`
#> anger anticipation disgust fear joy sadness surprise trust negative
#> 1 1 0 1 0 0 0 0 0 1
#> positive
#> 1 0
#>
#> $`3`
#> anger anticipation disgust fear joy sadness surprise trust negative
#> 1 0 0 0 0 1 0 0 0 0
#> positive
#> 1 1
由reprex package(v0.3.0)于2019-11-01创建