创建一个包含特定数据的列表R

时间:2018-10-02 08:52:42

标签: r list nested

以下是嵌套在较大列表中的两个列表。

我想创建一个新列表(我想这是最好的输出,但是欢迎其他建议!),该列表只包含与疟疾有关的数据。

我没有共享的代码,因为我不知道该怎么做!预先感谢。

list(structure(c("Condition", "Malnutrition", "Anaemia", "Pneumonia", 
"Uncomplicated malaria", "Diarrhoea with Blood", "Other diarrhea", 
"Total", "2001", "118", "243", "1592", "4969", "134", "423", 
"7479", "2002", "9927", "18933", "98068", "302891", "21724", 
"48610", "500153", "2003", "7232", "18933", "68418", "227034", 
"13369", "35083", "370069", "2004", "6896", "98068", "74769", 
"215533", "12198", "46265", "453729", "Total", "334156", "136177", 
"242847", "750426", "47425", "130381", "1641412"), .Dim = c(8L, 
6L)), 
structure(c("Condition", "Uncomplicated malaria", "Diarrhoea with Blood", 
"Other diarrhea", "Pneumonia", "Hypertension", "Diabetes", "Cataracts", 
"Asthma", "Scabies", "Mental Disorder", "2001", "209182", "7306", 
"12800", "27832", "13573", "231", "614", "2857", "15596", "1326", 
"2002", "264785", "17662", "27739", "54454", "24759", "1009", 
"1092", "7470", "43741", "2298", "2003", "227034", "8225", "2003", 
"38233", "18790", "551", "526", "5819", "22077", "1627", "2004", 
"215533", "7605", "2004", "41128", "23329", "690", "834", "5583", 
"23784", "1954", "Total", "916534", "40798", "44546", "161647", 
"80451", "2481", "3066", "21729", "105198", "7205"), .Dim = c(11L, 
6L))

1 个答案:

答案 0 :(得分:0)

Edit based on op feedback:

Looking at your example first we need to trasform each list element in a data.frame with the correct colnames:

library(purrr)
library(dplyr)

col_names_list <- lapply(dat, function(x) x[1, ]) # we extract the first row (colnames)

dat <- lapply(dat, function(x) as.data.frame(x[-1, ])) # change to data.frame format

dat <- map2(dat, col_names_list, function(x,y) {colnames(x)[1] <- y[1] 
                                                x}) 
# change the colname in data.frame only for the first column (named "Condition")

With map we can then extract what we need:

dat %>% 
  map(function(x) x[grepl("malaria", x[,1]),, drop = FALSE])

# [[1]]
# Condition 2001   2002   2003   2004  Total
# 4 Uncomplicated malaria 4969 302891 227034 215533 750426
# 
# [[2]]
# Condition   2001   2002   2003   2004  Total
# 1 Uncomplicated malaria 209182 264785 227034 215533 916534

Data used:

dat <- list(structure(c("Condition", "Malnutrition", "Anaemia", "Pneumonia", 
                 "Uncomplicated malaria", "Diarrhoea with Blood", "Other diarrhea", 
                 "Total", "2001", "118", "243", "1592", "4969", "134", "423", 
                 "7479", "2002", "9927", "18933", "98068", "302891", "21724", 
                 "48610", "500153", "2003", "7232", "18933", "68418", "227034", 
                 "13369", "35083", "370069", "2004", "6896", "98068", "74769", 
                 "215533", "12198", "46265", "453729", "Total", "334156", "136177", 
                 "242847", "750426", "47425", "130381", "1641412"), .Dim = c(8L, 
                                                                             6L)), 
     structure(c("Condition", "Uncomplicated malaria", "Diarrhoea with Blood", 
                 "Other diarrhea", "Pneumonia", "Hypertension", "Diabetes", "Cataracts", 
                 "Asthma", "Scabies", "Mental Disorder", "2001", "209182", "7306", 
                 "12800", "27832", "13573", "231", "614", "2857", "15596", "1326", 
                 "2002", "264785", "17662", "27739", "54454", "24759", "1009", 
                 "1092", "7470", "43741", "2298", "2003", "227034", "8225", "2003", 
                 "38233", "18790", "551", "526", "5819", "22077", "1627", "2004", 
                 "215533", "7605", "2004", "41128", "23329", "690", "834", "5583", 
                 "23784", "1954", "Total", "916534", "40798", "44546", "161647", 
                 "80451", "2481", "3066", "21729", "105198", "7205"), .Dim = c(11L, 
                                                                               6L)))