我有两个数据帧,df1
和df2
:
df1 <- tribble(~Speaker, ~age, ~word, ~ID,
"Alex", 10, "cat", "Alex_10",
"Alex", 10, "dog", "Alex_10",
"Alex", 10, "car", "Alex_10",
"Alex", 11, "sheep", "Alex_11",
"Alex", 11, "box", "Alex_11",
"Alex", 11, "cup", "Alex_11",
"Bob", 10, "cat", "Bob_10",
"Bob", 10, "dog", "Bob_10",
"Bob", 10, "car", "Bob_10",
"Bob", 11, "sheep", "Bob_11",
"Bob", 11, "box", "Bob_11",
"Bob", 11, "cup", "Bob_11")
df2 <- tribble(~Speaker, ~age, ~word1, ~word2, ~word_pair, ~ID, ~value,
"Alex", 10, "cat", "dog", "cat_dog", "Alex_10", 23,
"Alex", 10, "cat", "car", "car_cat", "Alex_10", 12,
"Alex", 10, "dog", "cat", "cat_dog", "Alex_10", 23,
"Alex", 10, "dog", "car", "car_dog", "Alex_10", 25,
"Alex", 10, "car", "dog", "car_dog", "Alex_10", 25,
"Alex", 10, "car", "cat", "car_cat", "Alex_10", 12,
"Alex", 11, "box", "sheep", "box_sheep", "Alex_11", 56,
"Alex", 11, "box", "cup", "box_cup", "Alex_11", 34,
"Alex", 11, "sheep", "box", "box_sheep", "Alex_11", 56,
"Alex", 11, "sheep", "cup", "cup_sheep", "Alex_11", 21,
"Alex", 11, "cup", "box", "box_cup", "Alex_11", 34,
"Alex", 11, "cup", "sheep", "cup_sheep", "Alex_11", 21,
"Bob", 10, "cat", "dog", "cat_dog", "Bob_10", 11,
"Bob", 10, "cat", "car", "car_cat", "Bob_10", 87,
"Bob", 10, "dog", "cat", "cat_dog", "Bob_10", 11,
"Bob", 10, "dog", "car", "car_dog", "Bob_10", 45,
"Bob", 10, "car", "cat", "car_cat", "Bob_10", 87,
"Bob", 10, "car", "dog", "car_dog", "Bob_10", 45,
"Bob", 11, "sheep", "box", "box_sheep", "Bob_11", 32,
"Bob", 11, "sheep", "cup", "cup_sheep", "Bob_11", 24,
"Bob", 11, "box", "cup", "box_cup", "Bob_11", 65,
"Bob", 11, "box", "sheep", "box_sheep", "Bob_11", 32,
"Bob", 11, "cup", "box", "box_cup", "Bob_11", 65,
"Bob", 11, "cup", "sheep", "cup_sheep", "Bob_11", 24)
我想循环遍历df1
,以便为ID
的每个实例创建一个新的数据框(保存为列表),以提取每个时间点产生的每个单词。每个发言人,并将其与word_pair
中的df2
个值进行匹配。我创建了一个循环,如下所示:
value_list <- vector("list", length(df1))
for (i in unique(df1$ID)) {
value_words <- df2 %>%
group_by(Speaker, age) %>%
filter(Speaker == df1$Speaker[which(df1$ID == i)] &
age == df1$age[which(df1$ID == i)]) %>%
filter((word1 %in% df1$word |
word2 %in% df1$word) &
value <= 50) %>%
distinct(word_pair, .keep_all = T)
value_list[[i]] <- value_words
}
预期输出如下:
value_list[[Alex_10]]
# A tibble: 1 x 9
# Groups: Speaker, age [1]
word1 word2 value Speaker age word_pair ID
<chr> <chr> <dbl> <chr> <dbl> <chr> <chr>
cat dog 23 Alex 10 cat_dog Alex_10
cat car 12 Alex 10 car_cat Alex_10
car dog 25 Alex 10 car_dog Alex_10
因此每个单词与每个单词的每个组合仅列出一次
但是出现以下错误:
错误:
filter()
输入..1
出现问题。 x输入..1
的大小必须为4或1,而不是大小5。 i输入..1
为&...
。 i错误发生在第1组中:说话者=“ Alex”,年龄=“ 10”。
显然,问题出在filter()
上,但我不知道如何更改它并仍然获得所需的输出。
答案 0 :(得分:0)
以下内容可以满足您对left_join
,filter
(来自dplyr
)和最终split
(来自base
)的要求:>
library(dplyr)
df <- df1 %>%
left_join(df2, by = c("Speaker", "age", "ID")) %>%
filter(value <= 50,
word == word1 | word == word2) %>%
group_by(ID) %>%
distinct(word_pair, .keep_all = T) %>%
ungroup()
split(df, df$ID)
哪个给
$Alex_10
# A tibble: 3 x 8
Speaker age word ID word1 word2 word_pair value
<chr> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
1 Alex 10 cat Alex_10 cat dog cat_dog 23
2 Alex 10 cat Alex_10 cat car car_cat 12
3 Alex 10 dog Alex_10 dog car car_dog 25
$Alex_11
# A tibble: 2 x 8
Speaker age word ID word1 word2 word_pair value
<chr> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
1 Alex 11 sheep Alex_11 sheep cup cup_sheep 21
2 Alex 11 box Alex_11 box cup box_cup 34
$Bob_10
# A tibble: 2 x 8
Speaker age word ID word1 word2 word_pair value
<chr> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
1 Bob 10 cat Bob_10 cat dog cat_dog 11
2 Bob 10 dog Bob_10 dog car car_dog 45
$Bob_11
# A tibble: 2 x 8
Speaker age word ID word1 word2 word_pair value
<chr> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
1 Bob 11 sheep Bob_11 sheep box box_sheep 32
2 Bob 11 sheep Bob_11 sheep cup cup_sheep 24