将数据框的五列合并到两个新列表中

时间:2017-11-26 10:27:24

标签: r list dataframe data.table

我有一个data.frame NOAA_OLR_TEST

NOAA_OLR_TEST <- structure(list(DATE_START = structure(c(1170720000, 1170806400,
1170892800, 1170979200, 1171065600, 1171152000, 1171238400, 1171324800,
1171411200, 1171497600), class = c("POSIXct", "POSIXt")), 
DATE_END = structure(c(1171065600,1171152000, 1171238400, 1171324800, 
1171411200, 1171497600, 1171584000,1171670400, 1171756800, 1171843200), 
class = c("POSIXct", "POSIXt")), LONGITUDE = c(-89.5, -89.5, -89.5, -89.5, 
-89.5, -88.5, -88.5,-88.5, -88.5, -88.5), LATITUDE = c(-179.5, -179.5, -179.5, 
-179.5,-179.5, -179.5, -179.5, -179.5, -179.5, -179.5), OLR_DATA_1 = c(150,146, 
146, 142, NA, 150, 158, 155, 143, 142), OLR_DATA_2 = c(146,146, 142, 141, 150, 
NA, 155, 143, 142, 138), OLR_DATA_3 = c(146,NA, 141, 150, 158, 155, 143, 142, 
138, 135), OLR_DATA_4 = c(142,141, 150, 158, 155, 143, 142, 138, 135, NA), 
OLR_DATA_5 = c(141,150, NA, 155, 143, 142, 138, 135, 140, 139)), 
.Names = c("DATE_START","DATE_END", "LONGITUDE", "LATITUDE", "OLR_DATA_1", 
"OLR_DATA_2","OLR_DATA_3", "OLR_DATA_4", "OLR_DATA_5"), row.names = c(NA,10L), 
class = "data.frame") 

这是我的数据:

head(NOAA_OLR_TEST)

 DATE_START   DATE_END LONGITUDE LATITUDE OLR_DATA_1 OLR_DATA_2 OLR_DATA_3 OLR_DATA_4 OLR_DATA_5
1 2007-02-06 2007-02-10     -89.5   -179.5        150        146        146        142        141
2 2007-02-07 2007-02-11     -89.5   -179.5        146        146         NA        141        150
3 2007-02-08 2007-02-12     -89.5   -179.5        146        142        141        150         NA
4 2007-02-09 2007-02-13     -89.5   -179.5        142        141        150        158        155
5 2007-02-10 2007-02-14     -89.5   -179.5         NA        150        158        155        143
6 2007-02-11 2007-02-15     -88.5   -179.5        150         NA        155        143        142

我希望将数据框NOAA_OLR_TEST[5:9]的No.5到No.9列转换为两个名为data_list_1data_list_2的列表:

 DATE_START   DATE_END LONGITUDE LATITUDE        DATA_LIST_1     DATA_LIST_2  
1 2007-02-06 2007-02-10     -89.5   -179.5        (150 ,146)      (146,142,141)
2 2007-02-07 2007-02-11     -89.5   -179.5        (146 ,146)      ( NA,141,150)
3 2007-02-08 2007-02-12     -89.5   -179.5        (146 ,142)      (141,150, NA)
4 2007-02-09 2007-02-13     -89.5   -179.5        (142 ,141)      (150,158,155)
5 2007-02-10 2007-02-14     -89.5   -179.5        ( NA ,150)      (158,155,143)
6 2007-02-11 2007-02-15     -88.5   -179.5        (150 , NA)      (155,143,142)

我使用mapply,Map,cbind,所有这些都有一些错误。

2 个答案:

答案 0 :(得分:0)

您可以按照以下方式使用apply

NOAA_OLR_TEST_2 <- NOAA_OLR_TEST[, 1:4]
NOAA_OLR_TEST_2$list1 <- apply(NOAA_OLR_TEST[, 5:6], 1, list)
NOAA_OLR_TEST_2$list2 <- apply(NOAA_OLR_TEST[, 7:9], 1, list)
NOAA_OLR_TEST_2

结果:

> NOAA_OLR_TEST_2
            DATE_START            DATE_END LONGITUDE LATITUDE    list1         list2
1  2007-02-06 01:00:00 2007-02-10 01:00:00     -89.5   -179.5 150, 146 146, 142, 141
2  2007-02-07 01:00:00 2007-02-11 01:00:00     -89.5   -179.5 146, 146  NA, 141, 150
3  2007-02-08 01:00:00 2007-02-12 01:00:00     -89.5   -179.5 146, 142  141, 150, NA
4  2007-02-09 01:00:00 2007-02-13 01:00:00     -89.5   -179.5 142, 141 150, 158, 155
5  2007-02-10 01:00:00 2007-02-14 01:00:00     -89.5   -179.5  NA, 150 158, 155, 143
6  2007-02-11 01:00:00 2007-02-15 01:00:00     -88.5   -179.5  150, NA 155, 143, 142
7  2007-02-12 01:00:00 2007-02-16 01:00:00     -88.5   -179.5 158, 155 143, 142, 138
8  2007-02-13 01:00:00 2007-02-17 01:00:00     -88.5   -179.5 155, 143 142, 138, 135
9  2007-02-14 01:00:00 2007-02-18 01:00:00     -88.5   -179.5 143, 142 138, 135, 140
10 2007-02-15 01:00:00 2007-02-19 01:00:00     -88.5   -179.5 142, 138  135, NA, 139

如果要排除NA - 值,您可以将代码调整为:

NOAA_OLR_TEST_2 <- NOAA_OLR_TEST[, 1:4]
NOAA_OLR_TEST_2$list1 <- apply(NOAA_OLR_TEST[, 5:6], 1, function(x) list(na.omit(x)))
NOAA_OLR_TEST_2$list2 <- apply(NOAA_OLR_TEST[, 7:9], 1, function(x) list(na.omit(x)))

答案 1 :(得分:0)

这是我的mutate(pmap())方法:

# "name =" isn't necessary.
d1 <- NOAA_OLR_TEST %>% 
  # as.tibble() %>% 
  mutate(data_list_1 = pmap(., ~ c(OLR_DATA_1 = ..5, OLR_DATA_2 = ..6)), 
         data_list_2 = pmap(., ~ c(OLR_DATA_3 = ..7, OLR_DATA_4 = ..8, OLR_DATA_5 = ..9))) %>% 
  select(-(5:9))

d2 <- NOAA_OLR_TEST %>% 
  # as.tibble() %>% 
  mutate(data_list_1 = pmap(., ~ list(OLR_DATA_1 = ..5, OLR_DATA_2 = ..6)), 
         data_list_2 = pmap(., ~ list(OLR_DATA_3 = ..7, OLR_DATA_4 = ..8, OLR_DATA_5 = ..9))) %>% 
  select(-(5:9))

d1
d1 %>% as.tibble()
d2 %>% as.tibble()