Question

在R中最好的方法是什么？保存键值对的最佳数据结构是什么，向最终结果添加新键值对的最佳方法是什么？

Answer 1

假设description列是一个字符串，我们可以计算每个字符串中的单词数并将其除以cost，然后使用separate_rows

library(tidyverse)

df %>%
  mutate(cost = cost/(str_count(description, "\\w+"))) %>%
  separate_rows(description, sep = ",")

#        cost  description
#1    600.3333      purchas
#2    600.3333           2 
#3    600.3333        file 
#4    600.3333        safe 
#5    600.3333         cbp 
#6    600.3333        offic
#7  15909.0909           4 
#8  15909.0909          ea 
#9  15909.0909         4x4 
#10 15909.0909        util  
#.....

数据

df <- structure(list(cost = c(3602, 175000, 17529.46, 19794.71), description = structure(c(4L, 
1L, 3L, 2L), .Label = c("     4 , ea , 4x4 , util , vehicl, 2007 , v6 , volvo , dhs , cbp , cis", 
"     offic , furnitur , new , cbp , cis , offic , manzanillo, port", 
"     offic , furnitur, new , offic , balboa , port , cbp , csi", 
"     purchas, 2 , file , safe , cbp , offic"), class = "factor")), .Names = c("cost", 
"description"), class = "data.frame", row.names = c(NA, -4L))

Answer 2

假设“说明”为list列，请通过将“费用”除以“说明”的lengths和unnest

来创建“单词”

library(tidyverse)
df1 %>% 
  transmute(cost = cost/lengths(description), word = description) %>% 
  unnest
#        cost       word
#1    600.3333    purchas
#2    600.3333          2
#3    600.3333       file
#4    600.3333       safe
#5    600.3333        cbp
#6    600.3333      offic
#7  15909.0909          4
#8  15909.0909         ea
#9  15909.0909        4x4
#...

数据

df1 <- structure(list(cost = c(3602, 175000, 17529.46, 19794.71), description = list(
    list("purchas", "2", "file", "safe", "cbp", "offic"), list(
        "4", "ea", "4x4", "util", "vehicl", "2007", "v6", "volvo", 
        "dhs", "cbp", "cis"), list("offic", "furnitur", "new", 
        "offic", "balboa", "port", "cbp", "csi"), list("offic", 
        "furnitur", "new", "cbp", "cis", "offic", "manzanillo", 
        "port"))), row.names = c(NA, -4L), class = "data.frame")

在R中保存密钥对值的最佳数据结构是什么？

2 个答案:

数据