Question

我有多个数据框。这些数据帧中的别名具有特定的前缀。我想以这些数据帧的前缀存储方式创建一个列表。

在此示例中，我有两个数据帧。每个都有两个不同的前缀。

dput（head（FEB_gems））输出：

structure(list(GAME1_Class = structure(c(2L, 1L, 5L, 4L, 3L), .Label = c("fighter", 
"paladin", "rouge", "sorcerer", "wizard"), class = "factor"), 
GAME1_Race = structure(c(3L, 1L, 4L, 3L, 2L), .Label = c("elf", 
"gnome", "human", "orc"), class = "factor"), GAME1_Alignment = structure(c(4L, 
2L, 1L, 5L, 3L), .Label = c("CE", "CG", "LG", "NE", "NN"), class = "factor"), 
GAME1_Level = c(6, 7, 6, 7, 7), GAME1_Alive = structure(c(1L, 
1L, 1L, 1L, 1L), .Label = "y", class = "factor"), GAME2_Class = structure(c(3L, 
5L, 2L, 4L, 1L), .Label = c("bard", "cleric", "fighter", 
"monk", "wizard"), class = "factor"), GAME2_Race = structure(c(2L, 
3L, 2L, 4L, 1L), .Label = c("dwarf", "elf", "half-elf", "human"
), class = "factor"), GAME2_Alignment = structure(c(4L, 2L, 
1L, 5L, 3L), .Label = c("CE", "CG", "LG", "NE", "NN"), class = "factor"), 
GAME2_Level = c(5, 5, 5, 5, 5), GAME2_Alive = structure(c(1L, 
2L, 2L, 2L, 2L), .Label = c("n", "y"), class = "factor")), row.names = c(NA, 
5L), class = "data.frame")

dput（head（MAR_gems））输出：

structure(list(GAME3_Class = structure(c(2L, 1L, 5L, 4L, 3L), .Label = c("barbarian", 
"cleric", "monk", "ranger", "warlock"), class = "factor"), GAME3_Race = structure(c(2L, 
3L, 2L, 4L, 1L), .Label = c("dwarf", "elf", "half-elf", "human"
), class = "factor"), GAME3_Alignment = structure(c(2L, 2L, 1L, 
3L, 2L), .Label = c("CE", "LG", "LN"), class = "factor"), GAME3_Level = c(1, 
1, 1, 1, 1), GAME3_Alive = structure(c(2L, 2L, 2L, 1L, 2L), .Label = c("n", 
"y"), class = "factor"), GAME4_Class = structure(c(2L, 1L, 5L, 
4L, 3L), .Label = c("fighter", "paladin", "rouge", "sorcerer", 
"wizard"), class = "factor"), GAME4_Race = structure(c(2L, 3L, 
2L, 4L, 1L), .Label = c("dwarf", "elf", "half-elf", "human"), class = "factor"), 
GAME4_Alignment = structure(c(1L, 2L, 1L, 4L, 3L), .Label = c("CE", 
"CG", "LG", "LN"), class = "factor"), GAME4_Level = c(5, 
5, 5, 5, 5), GAME4_Alive = structure(c(1L, 2L, 2L, 2L, 2L
), .Label = c("n", "y"), class = "factor")), row.names = c(NA, 
5L), class = "data.frame")

为此进行了尝试，包括：

分隔游戏信息

CharecterInfo <- function(df){
  names(df) -> rons
  gsub(x=names(df), pattern = '_.*', replacement = '') -> subn
  subn[! duplicated(subn)] -> dupn
  return(dupn)
}

CharecterInfo(FEB_games) -> FCharInfo
CharecterInfo(MAR_games) -> MCharInfo

然后尝试通过特定前缀将数据帧分开。

for (i in FCharInfo) {
  assign(i, FCharInfo[, grep(paste0(i, '\\.'), colnames(FCharInfo), ignore.case = T)])
}

这不起作用，因为我的尺寸不正确，但是只会生成许多数据框，而不是列出列表。

我也尝试了R中的Map函数，但没有成功。

我的理想输出是：

1）包含“ GAME1 _”，“ GAME_2”，“ GAME_3”和“ GAME_4”数据作为单独数据帧的列表。

2）最好在函数中，因为我的真实数据比此处显示的数据大得多。

尝试说明理想的列表（我知道缩进/灰色框仅用于代码，但是我不知道如何在stackoverflow上说明列表）

GameInfo

        Game1_
              GAME1_Class
              GAME1_Race
              GAME1_Alignment
              GAME1_Level
              GAME1_Alive
        Game2_
              GAME2_Class
              GAME2_Race
              GAME2_Alignment
              GAME2_Level
              GAME2_Alive
        Game3_
              GAME3_Class
              GAME3_Race
              GAME3_Alignment
              GAME3_Level
              GAME3_Alive
        Game4_
              GAME4_Class
              GAME4_Race
              GAME4_Alignment
              GAME4_Level
              GAME4_Alive

我的前缀有点复杂，其中可能包括：

GAME_1.Class
GAME_10.Class
GAME_100.Class

我尝试过（基于Julian_hn的回答）

Gems <- list(FEB_gems = FEB_games, MAR_gems = MAR_games)
Gems.split <- lapply(Gems, function(df)
{
  Games <- unique(str_extract(names(df),"[:alnum:]+..."))
  List <- lapply(Games,function(name){return(df[,grep(name,names(df))])})
  names(List) <- Games
  return(List)
})

但是这不能区分1、100或1000。我可以在'。'之后分隔前缀。符号？

Answer 1

如果您的FEB_gem和MAR_gem data.frames被构造为列表，则此方法对我有用。

library(stringr)
Gems <- list(FEB_gems=FEB_gems,MAR_gems=MAR_gems)
Gems.Split <- lapply(Gems,function(df)
{
  #old solution, requires GAME as name
  #Games <- unique(str_extract(names(df),"GAME[0-9]+"))

  #old solution 2: splits at "_"
  #Games <- unique(str_sub(str_extract(names(df),"[:alnum:]+_"),end=-2))

  #new solution: splits at "."
  Games <- unique(str_extract(names(df),"\\S+\\."))
  List <- lapply(Games,function(name){return(df[,grep(name,names(df),fixed=T)])})
  names(List) <- Games
  return(List)
})

Answer 2

这是使用 tidyverse 的一种方式：

fseq <- . %>%
  rowid_to_column() %>%
  gather(,,-rowid) %>%
  separate(key,c("game","col")) %>%
  spread(col,value) %>%
  select(-rowid)

map_dfr(list(FEB_gems, MAR_gems), fseq) %>%
  {split(.[-1],.$game)}

# $GAME1
# Alignment Alive    Class Level  Race
# 1        NE     y  paladin     6 human
# 3        CG     y  fighter     7   elf
# 5        CE     y   wizard     6   orc
# 7        NN     y sorcerer     7 human
# 9        LG     y    rouge     7 gnome
# 
# $GAME2
# Alignment Alive   Class Level     Race
# 2         NE     n fighter     5      elf
# 4         CG     y  wizard     5 half-elf
# 6         CE     y  cleric     5      elf
# 8         NN     y    monk     5    human
# 10        LG     y    bard     5    dwarf
# 
# $GAME3
# Alignment Alive     Class Level     Race
# 11        LG     y    cleric     1      elf
# 13        LG     y barbarian     1 half-elf
# 15        CE     y   warlock     1      elf
# 17        LN     n    ranger     1    human
# 19        LG     y      monk     1    dwarf
# 
# $GAME4
# Alignment Alive    Class Level     Race
# 12        CE     n  paladin     5      elf
# 14        CG     y  fighter     5 half-elf
# 16        CE     y   wizard     5      elf
# 18        LN     y sorcerer     5    human
# 20        LG     y    rouge     5    dwarf
# 
# Warning messages:
#   1: attributes are not identical across measure variables;
# they will be dropped 
# 2: attributes are not identical across measure variables;
# they will be dropped

Answer 3

另一种可能（嵌套）的tidyverse解决方案：

library(tidyverse)

t_tib <- function(df) {
  as_tibble(cbind(nms = names(df), t(df)))
}

GameInfo <- t_tib(FEB_gems) %>% 
  bind_rows(t_tib(MAR_gems)) %>% 
  gather(rowid, val, -nms) %>% 
  separate(nms, into = c("Game", "var")) %>% 
  spread(var, val) %>% 
  select(-rowid) %>%
  select(1, 4, 6, 2, 5, 3) %>% 
  nest(-Game)

（根据Moody_Mudskipper的建议删除了bind_cols。）

如何通过特定的前缀将多个数据帧转换为列表

3 个答案: