我想(通过名称或位置)选择存储在目录中的多个.rda文件中的指定列,然后仅使用所选列保存它们(最好使用purrr和dplyr)。 .rda文件具有相同的结构:38列,前28列具有相同的名称,后10列具有相同前缀的不同名称(" SC *")。
这是我到目前为止所尝试的:
library(tidyverse)
file_names <- as.list(dir(path=".", pattern="ASJC*"))
我加载了所选的文件
files <- map(file_names,load,environment())
然后我尝试按名称选择指定的列:
files_selected <- map(files,dplyr::select(SOURCERECORD_ID,starts_with("SC*")))
我收到以下错误消息:
dplyr :: select出错(SOURCERECORD_ID,starts_with(&#34; SC *&#34;)): 对象&#39; SOURCERECORD_ID&#39;找不到
所以我尝试了以下代码:
files_selected <- map(files,dplyr::select(1,29:38)
另一条错误消息:
UseMethod出错(&#34;选择_&#34;): 没有适用的方法来选择_&#39;适用于班级&#34; c(&#39; double&#39;,&#39;数字&#39;)&#34;
这是.rda文件的结构:
df1 <- structure(list(SOURCERECORD_ID = c("18659", "13951", "5400152705",
"16500154707", "20300195074", "19472"), TITOLO_FONTE = c("ANAIS DA ACADEMIA BRASILEIRA DE CIENCIAS",
"ARABIAN JOURNAL FOR SCIENCE AND ENGINEERING", "ARCHIVES DES SCIENCES",
"ASIAN JOURNAL OF SCIENTIFIC RESEARCH", "ASM SCIENCE JOURNAL",
"BEIJING DAXUE XUEBAO (ZIRAN KEXUE BAN)/ACTA SCIENTIARUM NATURALIUM UNIVERSITATIS PEKINENSIS"
), ISSN_P = c("0001-3765", "1319-8025", "1661-464X", "1992-1454",
"1823-6782", "0479-8023"), ISSN_E = c("1678-2690", NA, NA, NA,
NA, NA), STATUS = c("Active", "Active", "Active", "Active", "Active",
"Active"), COPERTURA = c("1994-ongoing, 1970-1992, 1949", "2003-ongoing, 1981",
"2017-ongoing, 2004-2014", "2009-ongoing", "2011-ongoing", "2001-ongoing"
), LINGUA = c("ENG", "ENG", "ENG, FRE", "ENG", "ENG", "CHI"),
CS2014 = c(0.95, 1.19, 0.64, 0.55, 0.1, 0.24), CS2015 = c(0.89,
0.81, 0.57, 0.36, 0.06, 0.17), CS2016 = c(1.05, 1.02, NA,
0.64, 0.11, 0.35), SJR2014 = c(0.42, 0.332, 0.285, 0.394,
0.107, 0.13), SJR2015 = c(0.332, 0.335, 0.201, 0.163, 0.122,
0.123), SJR2016 = c(0.386, 0.29, 0.149, 0.195, 0.101, 0.157
), SNIP2014 = c(0.756, 1.149, 0.236, 1.021, 0.408, 0.338),
SNIP2015 = c(0.67, 0.51, 0.362, 0.472, 0.082, 0.164), SNIP2016 = c(0.713,
0.657, 0.275, 0.549, 0.595, 0.265), TIPO_FONTE = c("Journal",
"Journal", "Journal", "Journal", "Journal", "Journal"), STORIA_TITOLO = c(NA,
NA, "Formerly known as", NA, NA, NA), TITOLI_COLLEGATI = c(NA,
NA, "Archives des Sciences et Compte Rendu Seances de la Societe",
NA, NA, NA), EDITORE = c("Academia Brasileira de Ciencias",
"Springer Verlag", "Societe de physique et d'histoire naturelle",
"Asian Network for Scientific Information", "Akademi Sains Malaysia",
"Beijing University Press"), PAESE_EDITORE = c("Brazil",
"Germany", "Switzerland", "Pakistan", "Malaysia", "China"
), ASJC = c("1000;", "1000;", "1000;", "1000;", "1000;",
"1000;"), AVG_CS = c(0.963, 1.007, 0.605, 0.517, 0.09, 0.253
), AVG_SJR = c(0.379, 0.319, 0.212, 0.251, 0.11, 0.137),
AVG_SNIP = c(0.713, 0.772, 0.291, 0.681, 0.362, 0.256), ELEGGIBILE = c("Eleggibile",
"Eleggibile", "Eleggibile", "Eleggibile", "Eleggibile", "Eleggibile"
), Percentili_SJR = c(15L, 21L, 47L, 35L, 89L, 77L), Percentili_SNIP = c(25L,
19L, 74L, 28L, 61L, 78L), SC13A_1000_SJR = c("Bottom", "Bottom",
"Bottom", "Bottom", "Bottom", "Bottom"), SC13B_1000_SJR = c("Bottom",
"Bottom", "Bottom", "Bottom", "Bottom", "Bottom"), SC13C_1000_SJR = c("Bottom",
"Bottom", "Bottom", "Bottom", "Bottom", "Bottom"), SC13D_1000_SJR = c("Bottom",
"Bottom", "Bottom", "Bottom", "Bottom", "Bottom"), SC13D4_1000_SJR = c("Bottom",
"Bottom", "Bottom", "Bottom", "Bottom", "Bottom"), SC13A_1000_SNIP = c("Bottom",
"Bottom", "Bottom", "Bottom", "Bottom", "Bottom"), SC13B_1000_SNIP = c("Bottom",
"Bottom", "Bottom", "Bottom", "Bottom", "Bottom"), SC13C_1000_SNIP = c("Bottom",
"Bottom", "Bottom", "Bottom", "Bottom", "Bottom"), SC13D_1000_SNIP = c("Bottom",
"Bottom", "Bottom", "Bottom", "Bottom", "Bottom"), SC13D4_1000_SNIP = c("Bottom",
"Bottom", "Bottom", "Bottom", "Bottom", "Bottom")), .Names = c("SOURCERECORD_ID",
"TITOLO_FONTE", "ISSN_P", "ISSN_E", "STATUS", "COPERTURA", "LINGUA",
"CS2014", "CS2015", "CS2016", "SJR2014", "SJR2015", "SJR2016",
"SNIP2014", "SNIP2015", "SNIP2016", "TIPO_FONTE", "STORIA_TITOLO",
"TITOLI_COLLEGATI", "EDITORE", "PAESE_EDITORE", "ASJC", "AVG_CS",
"AVG_SJR", "AVG_SNIP", "ELEGGIBILE", "Percentili_SJR", "Percentili_SNIP",
"SC13A_1000_SJR", "SC13B_1000_SJR", "SC13C_1000_SJR", "SC13D_1000_SJR",
"SC13D4_1000_SJR", "SC13A_1000_SNIP", "SC13B_1000_SNIP", "SC13C_1000_SNIP",
"SC13D_1000_SNIP", "SC13D4_1000_SNIP"), row.names = c(NA, -6L
), class = c("tbl_df", "tbl", "data.frame"))
答案 0 :(得分:0)
您确定files <- map(file_names,load,environment())
拥有一个datframe列表吗?
如果可以,请尝试
map(files,~ dplyr::select(.,SOURCERECORD_ID,starts_with("SC*")))
我认为缺少的是~
函数之前的select
和select函数中的.
,因为select
是select(.data, ...
){{1 }}。`您暗示每个数据集都在这里