匹配数据帧列表R中的值

时间:2016-08-19 19:26:22

标签: r match

我想从数据帧列表中提取的值填充当前数据框中的新列。以下是我的数据的样子(下面的完整输入数据):

#Data Frame to be filled
> head(games, 5)
      Season Div       Date    HomeTeam  AwayTeam FTHG FTAG FTR
23053      1  E0 2000-09-10       Derby  Charlton    2    2   D
23054      1  E0 2000-09-11   Tottenham  West Ham    1    0   H
23055      1  E0 2000-09-16     Arsenal  Coventry    2    1   H
23056      1  E0 2000-09-16 Aston Villa  Bradford    2    0   H
23057      1  E0 2000-09-16    Charlton Tottenham    1    0   H

和我的数据框列表,其中应该提取/匹配值:

> head(para, 1)
$`2000-09-10`
$`2000-09-10`$scores
                   Attack     Defense     Success
Man United     2.14912729 -1.25946397  3.31882422
Leicester     -0.49739636 -0.90719409  0.06857911
Newcastle     -0.03265567 -0.42396682  0.46248947
Liverpool      0.90666453  0.32898381  0.12367415
Arsenal        2.06959156 -0.74226622  0.77491185
Leeds         -0.49633686 -1.80494633  0.71006855
Everton        0.94810944  0.02319138  0.31128405
Tottenham     -0.60791873 -0.94621398  0.37125322
Coventry      -0.83692022  0.49515076 -0.94503859
Chelsea        0.49759065  0.26012454 -0.16898841
Man City       0.34567732  1.20600136 -0.88994356
Middlesbrough  0.52869431  1.09634207  0.16495278
Aston Villa   -1.54843810 -0.80686850 -0.23383171
Bradford      -1.59279590  2.17819215 -1.76900051
Charlton      -0.22182631  0.12770046  0.22004781
Ipswich       -0.24578699 -0.11587056 -0.57752117
Sunderland    -1.22434134  1.92507466 -1.37509016
Derby         -0.68685237 -0.01749803  0.07859067
Southampton    0.24417108  0.08377682 -0.32019382
West Ham       0.30164268 -0.70024952 -0.32506796

最终结果将如下所示,其中新列(Home.att,Home.def ...)从“攻击”,“防御”和“成功”列中填充正确的日期(手动填充的第一个游戏):

 > head(result, 5)
      Season Div       Date    HomeTeam  AwayTeam FTHG FTAG FTR Home.att Home.def Home.suc Away.att Away.def
23053      1  E0 2000-09-10       Derby  Charlton    2    2   D  -0.686   -0.0174   0.078   -0.221   0.127
23054      1  E0 2000-09-11   Tottenham  West Ham    1    0   H       NA       NA       NA       NA       NA
23055      1  E0 2000-09-16     Arsenal  Coventry    2    1   H       NA       NA       NA       NA       NA
23056      1  E0 2000-09-16 Aston Villa  Bradford    2    0   H       NA       NA       NA       NA       NA
23057      1  E0 2000-09-16    Charlton Tottenham    1    0   H       NA       NA       NA       NA       NA
      Away.suc
23053       0.220
23054       NA
23055       NA
23056       NA
23057       NA

这是数据:

列表:

> dput(para)
structure(list(`2000-09-10` = structure(list(scores = structure(c(2.14912728720235, 
-0.497396360763537, -0.0326556689197955, 0.906664533290402, 2.06959155533505, 
-0.496336856936472, 0.948109444486475, -0.60791873399096, -0.836920224242405, 
0.497590648317418, 0.345677324643562, 0.528694311129923, -1.5484380984862, 
-1.59279590104225, -0.221826312405977, -0.245786985298441, -1.22434134315465, 
-0.686852371353265, 0.24417107585143, 0.301642676337344, -1.25946396898093, 
-0.907194092428637, -0.423966820565071, 0.328983814921655, -0.742266224892033, 
-1.8049463252895, 0.0231913829306193, -0.946213980365056, 0.49515075621433, 
0.260124536504241, 1.20600135926941, 1.09634207344526, -0.806868498981945, 
2.17819215173134, 0.12770046311082, -0.115870563365191, 1.92507465799405, 
-0.0174980256086261, 0.0837768233442609, -0.700249518989006, 
3.31882421660283, 0.0685791134716462, 0.462489468253112, 0.12367414627459, 
0.774911853045879, 0.710068546448154, 0.311284054769298, 0.371253224469632, 
-0.945038589967606, -0.168988406898258, -0.889943557164662, 0.164952778182874, 
-0.233831713495982, -1.76900051051107, 0.220047810985818, -0.577521172371895, 
-1.37509015572961, 0.0785906712967843, -0.320193820382072, -0.325067957279463
), .Dim = c(20L, 3L), .Dimnames = list(c("Man United", "Leicester", 
"Newcastle", "Liverpool", "Arsenal", "Leeds", "Everton", "Tottenham", 
"Coventry", "Chelsea", "Man City", "Middlesbrough", "Aston Villa", 
"Bradford", "Charlton", "Ipswich", "Sunderland", "Derby", "Southampton", 
"West Ham"), c("Attack", "Defense", "Success")))), .Names = "scores"), 
    `2000-09-11` = structure(list(scores = structure(c(2.0393133346101, 
    -0.540829703243983, -0.112676786866554, 0.737159302036929, 
    2.06330033142029, -0.566884605341195, 0.846093684747277, 
    -0.703918307389998, -0.959042142708014, 0.46674571916184, 
    0.198090956701195, 0.4085303953687, 0.564657150954971, -1.71918010765161, 
    -1.676904352495, -0.0470924017034225, -0.198365096964476, 
    -1.2681656570254, 0.155161529580561, 0.314006756807798, -1.20196548842488, 
    -0.931781220419512, -0.474448866704706, 0.186513840023453, 
    -0.872801458087194, -1.77570529857168, -0.105417478024948, 
    -0.95653279019992, 0.383018139632662, 0.11701681685574, 0.967101862640228, 
    0.84141630595534, 0.995275000232571, -0.906624488611703, 
    2.05071990529631, 1.08136983474088, -0.212336474435291, 1.70999792726233, 
    -0.111513634837269, -0.7833024343224, 3.36317974520892, -0.169706477913325, 
    0.402653854546479, 0.0896055389478296, 0.783966989384827, 
    0.814723517727525, 0.387397457869648, 0.501675202128876, 
    -1.00885846596321, -0.230975799609684, -0.749546449102057, 
    0.227106788590891, -0.0859415270077576, -0.200219271266986, 
    -1.79524576643927, -0.39126646888844, -0.368233314263709, 
    -1.22288543397946, -0.246232196286514, -0.101197923684588
    ), .Dim = c(20L, 3L), .Dimnames = list(c("Man United", "Leicester", 
    "Newcastle", "Liverpool", "Arsenal", "Leeds", "Everton", 
    "Tottenham", "Coventry", "Chelsea", "Man City", "Middlesbrough", 
    "Charlton", "Aston Villa", "Bradford", "Derby", "Ipswich", 
    "Sunderland", "Southampton", "West Ham"), c("Attack", "Defense", 
    "Success")))), .Names = "scores"), `2000-09-16` = structure(list(
        scores = structure(c(1.92322158767458, -0.61891728696854, 
        -0.207035659727411, 0.688128125165125, 0.246315084032231, 
        1.95880174075958, -0.648638451449831, 0.768090739958042, 
        -1.0191583675216, 0.3656920613234, 0.124998067667584, 
        0.323564120241808, 0.542301440975899, -1.76485773883853, 
        -1.7642466815073, -0.066324937731207, -0.350981404691087, 
        -1.38323736267654, 0.0881220808769143, 0.794162842436876, 
        -1.40224189200917, -1.05510108659065, -0.617418626982966, 
        0.0800888050321076, -0.400826740065721, -0.982638622688023, 
        -1.96015135404974, -0.22407473281442, 0.257657853367004, 
        0.0137337322954092, 0.890667127068695, 0.80253512727734, 
        0.928353461448457, -1.01661460471081, 1.93243466881776, 
        1.09289024885474, -0.359958801143917, 1.63993375643849, 
        -0.193560894118028, 0.574292574573439, 3.39626154818425, 
        -0.0293709386209353, 0.513750754643648, 0.28019086215193, 
        0.28019086215193, 0.851507423165214, 0.815035146855537, 
        0.408458215822548, -0.984289615817692, -0.20348373995346, 
        -0.674727815044826, 0.166620914769853, -0.0669389777218656, 
        -0.239956016263137, -1.69324670185382, -0.445321041006155, 
        -0.336971146523261, -1.15012500858924, -0.308776278774561, 
        -0.578808447575956), .Dim = c(20L, 3L), .Dimnames = list(
            c("Man United", "Leicester", "Newcastle", "Liverpool", 
            "Tottenham", "Arsenal", "Leeds", "Everton", "Coventry", 
            "Chelsea", "Man City", "Middlesbrough", "Charlton", 
            "Aston Villa", "Bradford", "Derby", "Ipswich", "Sunderland", 
            "Southampton", "West Ham"), c("Attack", "Defense", 
            "Success")))), .Names = "scores"), `2000-09-17` = structure(list(
        scores = structure(c(2.23950654572345, 2.53534637007243, 
        -1.02814784294904, 0.0758308950162754, 0.18010645884533, 
        -0.0555578804246226, -1.20639453648404, 0.628104564963435, 
        -0.359037143604242, 0.657288957120714, -0.0561769298423526, 
        -0.985662695245933, -0.822656590242715, 0.569002594138633, 
        -0.202384066222586, -0.465304772569995, -0.301679618149048, 
        -1.63709664978296, 0.027857253761329, 0.207055085875938, 
        -1.17481345793862, -1.02190991527362, -1.58188518175494, 
        -0.585777915399439, -0.143360945380226, -0.125183473879223, 
        -0.884351783270966, 0.861181295478697, -1.45949108937662, 
        0.485164154534398, -0.0783646033326118, 1.16399232392728, 
        1.59113167332065, 0.118268772413523, -0.672634459855927, 
        0.125845392474999, 0.0420698284585762, 2.10407379588099, 
        1.40440766427038, -0.168362075297294, 3.21825409955167, 
        0.936728354347971, 0.0735920870226012, 0.780725963847763, 
        0.403380549078277, -0.0824103034776063, -0.0523066073505803, 
        0.0561388304358171, 0.546381831688184, -0.208308997850788, 
        -0.585654412620274, -1.44879067994564, -0.854554389603362, 
        0.0862425265628431, 0.0862425265628431, -0.364311388350996, 
        0.199140113045724, -1.95648693778479, -0.638344606849103, 
        -0.195658558310546), .Dim = c(20L, 3L), .Dimnames = list(
            c("Man United", "Arsenal", "Leicester", "Liverpool", 
            "Tottenham", "Newcastle", "Aston Villa", "Charlton", 
            "Leeds", "Everton", "Ipswich", "Coventry", "Sunderland", 
            "Southampton", "Chelsea", "Man City", "Middlesbrough", 
            "Bradford", "Derby", "West Ham"), c("Attack", "Defense", 
            "Success")))), .Names = "scores")), .Names = c("2000-09-10", 
"2000-09-11", "2000-09-16", "2000-09-17"))

游戏:

> dput(games)
structure(list(Season = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 
    Div = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L), .Label = "E0", class = "factor"), Date = structure(c(11210, 
    11211, 11216, 11216, 11216, 11216, 11216, 11216, 11216, 11217, 
    11217, 11217), class = "Date"), HomeTeam = structure(c(7L, 
    11L, 1L, 6L, 9L, 3L, 8L, 5L, 10L, 2L, 4L, 12L), .Label = c("Arsenal", 
    "Chelsea", "Everton", "Man City", "Southampton", "Aston Villa", 
    "Derby", "Leeds", "Charlton", "Sunderland", "Tottenham", 
    "West Ham"), class = "factor"), AwayTeam = structure(c(10L, 
    5L, 11L, 9L, 4L, 3L, 7L, 8L, 6L, 1L, 12L, 2L), .Label = c("Leicester", 
    "Liverpool", "Man United", "Tottenham", "West Ham", "Derby", 
    "Ipswich", "Newcastle", "Bradford", "Charlton", "Coventry", 
    "Middlesbrough"), class = "factor"), FTHG = c(2L, 1L, 2L, 
    2L, 1L, 1L, 1L, 2L, 2L, 0L, 1L, 1L), FTAG = c(2L, 0L, 1L, 
    0L, 0L, 3L, 2L, 0L, 1L, 2L, 1L, 1L), FTR = structure(c(2L, 
    3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 1L, 2L, 2L), .Label = c("A", 
    "D", "H"), class = "factor")), .Names = c("Season", "Div", 
"Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "FTR"), row.names = 23053:23064, class = "data.frame")

1 个答案:

答案 0 :(得分:0)

同样仍然只针对一个专栏,但解决了您的问题:

games_list <- list()
for(p in 1:length(para)) {
  # create the list of the same table over and over
  games_list[[p]] <- games
  # create the new column
  games_list[[p]]$Home.att <- NA
  # loop through
  for (g in 1:nrow(games_list[[p]])) {
    for(r in 1:nrow(data.frame(para[[p]])))
    if(games_list[[p]][g,]$HomeTeam==row.names(data.frame(para[[p]]))[r]) {
      games_list[[p]][g,]$Home.att <- data.frame(para[[p]])[r,]$scores.Attack
    }
  }
}