Question

有一个大df，我已按特定col的级别将其分成了dfs列表。我试图遍历df的所述列表，访问每个df中的相同col，同时将get_nrc_sentiment（）fn应用于每个df中上述col内的所有文本数据行。

library(syuzhet)

# create example df
df <- data.frame("a"=c(1:3, 1, 1), 
"b"=c("bad", "angry", "joy", "happy", "unhappy"))
df

df_split <- split(df, df$a)  
df_split # split df by a into list of dfs    

# iterate over col data in each df within 
# list of dfs and apply sentiment function
for(i in 1:length(df_split)){
df2 <- lapply(as.vector(df_split[[i]]$b), function(x) get_nrc_sentiment(x))
}   
# for some reason only works for last df within list of dfs
df2  

# desired output, but for each df col within list
df3 <- get_nrc_sentiment(as.vector(df_split[[1]]$b))
df3

# output of df
  a       b
1 1     bad
2 2   angry
3 3     joy
4 1   happy
5 1 unhappy

# output of df_split
$`1`
  a       b
1 1     bad
4 1   happy
5 1 unhappy

$`2`
  a     b
2 2 angry

$`3`
  a   b
3 3 joy

# output of code within for loop
[[1]]
  anger anticipation disgust fear joy sadness surprise trust negative positive
1     0            0       0    0   1       0        0     0        0        1

# output of df3, desired output example for each df within list
  anger anticipation disgust fear joy sadness surprise trust negative positive
1     1            0       1    1   0       1        0     0        1        0
2     0            1       0    0   1       0        0     1        0        1
3     1            0       1    0   0

您可以看到，这似乎只适用于dfs列表中的最后一个df，而不是dfs列表中的所有df。我可能会因为for循环而以错误的方式进行操作，我也在lapply中尝试了lapply，但是对于如何访问dfs列表的每个df中的col没有其他想法。非常感谢您的帮助，并希望我提供的示例足够清楚。

Answer 1

此for循环不起作用

for(i in 1:length(df_split)){
df2 <- lapply(as.vector(df_split[[i]]$b), function(x) get_nrc_sentiment(x))
}

每次迭代都覆盖df2。

您可以在以下情况下使用此功能：在其中应用拆分数据的功能。frame

by(df,df$a,function(i)get_nrc_sentiment(as.character(i$b)))

我对get_nrc_sentiment不熟悉，但是好像将它应用于df_split [[1]] $ b时，会得到3行？

dplyr解决方案：

df %>% group_by(a) %>% group_map(~ get_nrc_sentiment(.x$b))

Answer 2

我不完全理解您的代码，但是下面提供了一个示例，说明如何将函数应用于每个df并将它们全部放入列表中。希望对您有所帮助。

res <- list() # empty list to put into final result


for (i in seq(length(list_a))) {
  l <- as.data.frame(list_a[[i]]) # convert every df in list to normal df
  l = l$b # put your sentiment function here 
  res[[i]]=l  # add every df result to list of df, not just last one 
  print(res)

}

Answer 3

您无需在循环内调用lapply，因为lapply将对列表的每个元素应用相同的功能。或者，您可以使用purrr::map。


library("tidyverse")
library("syuzhet")

# create example df
df <- data.frame(
  "a" = c(1:3, 1, 1),
  "b" = c("bad", "angry", "joy", "happy", "unhappy"),
  # Don't make `b` a factor
  stringsAsFactors = FALSE
)

# split df by a into list of dfs
df_split <- split(df, df$a)

lapply(
  df_split,
  function(x) get_nrc_sentiment(x$b)
)
#> $`1`
#>   anger anticipation disgust fear joy sadness surprise trust negative
#> 1     1            0       1    1   0       1        0     0        1
#> 2     0            1       0    0   1       0        0     1        0
#> 3     1            0       1    0   0       1        0     0        1
#>   positive
#> 1        0
#> 2        1
#> 3        0
#> 
#> $`2`
#>   anger anticipation disgust fear joy sadness surprise trust negative
#> 1     1            0       1    0   0       0        0     0        1
#>   positive
#> 1        0
#> 
#> $`3`
#>   anger anticipation disgust fear joy sadness surprise trust negative
#> 1     0            0       0    0   1       0        0     0        0
#>   positive
#> 1        1

df_split %>%
  map(~ get_nrc_sentiment(.$b))
#> $`1`
#>   anger anticipation disgust fear joy sadness surprise trust negative
#> 1     1            0       1    1   0       1        0     0        1
#> 2     0            1       0    0   1       0        0     1        0
#> 3     1            0       1    0   0       1        0     0        1
#>   positive
#> 1        0
#> 2        1
#> 3        0
#> 
#> $`2`
#>   anger anticipation disgust fear joy sadness surprise trust negative
#> 1     1            0       1    0   0       0        0     0        1
#>   positive
#> 1        0
#> 
#> $`3`
#>   anger anticipation disgust fear joy sadness surprise trust negative
#> 1     0            0       0    0   1       0        0     0        0
#>   positive
#> 1        1

^{由reprex package（v0.3.0）于2019-11-01创建}

遍历df列表，将fn应用于每个df中的相同col

3 个答案: