Question

这是来自8个不同数据集的列名称的列表。某些列在数据集之间是公用的。例如，学校名称出现在数据集1、4、6、7和8中。但是，其输入方式不同。由于R区分大小写，这将使我很难从这些数据集中提取公共列。在这种情况下应该怎么办？我想到将所有元素都转换为大写。但这没用。

library(xlsx)
file_names = list.files(pattern = "*.csv")
files = lapply(file_names, read.csv )
lapply(files,head)
#Reduce(intersect, lapply(files,names))
p = lapply(files,names)
toupper(p)




 p = lapply(files,names)
> p
[[1]]
 [1] "Demographic"                       
 [2] "DBN"                               
 [3] "School.Name"                       
 [4] "Cohort"                            
 [5] "Total.Cohort"                      
 [6] "Total.Grads...n"                   
 [7] "Total.Grads.....of.cohort"         
 [8] "Total.Regents...n"                 
 [9] "Total.Regents.....of.cohort"       
[10] "Total.Regents.....of.grads"        
[11] "Advanced.Regents...n"              
[12] "Advanced.Regents.....of.cohort"    
[13] "Advanced.Regents.....of.grads"     
[14] "Regents.w.o.Advanced...n"          
[15] "Regents.w.o.Advanced.....of.cohort"
[16] "Regents.w.o.Advanced.....of.grads" 
[17] "Local...n"                         
[18] "Local.....of.cohort"               
[19] "Local.....of.grads"                
[20] "Still.Enrolled...n"                
[21] "Still.Enrolled.....of.cohort"      
[22] "Dropped.Out...n"                   
[23] "Dropped.Out.....of.cohort"         

[[2]]
 [1] "DBN"              "Grade"            "Year"            
 [4] "Category"         "Number.Tested"    "Mean.Scale.Score"
 [7] "Level.1.."        "Level.1...1"      "Level.2.."       
[10] "Level.2...1"      "Level.3.."        "Level.3...1"     
[13] "Level.4.."        "Level.4...1"      "Level.3.4.."     
[16] "Level.3.4...1"   

[[3]]
 [1] "DBN"               "Name"              "schoolyear"       
 [4] "fl_percent"        "frl_percent"       "total_enrollment" 
 [7] "prek"              "k"                 "grade1"           
[10] "grade2"            "grade3"            "grade4"           
[13] "grade5"            "grade6"            "grade7"           
[16] "grade8"            "grade9"            "grade10"          
[19] "grade11"           "grade12"           "ell_num"          
[22] "ell_percent"       "sped_num"          "sped_percent"     
[25] "ctt_num"           "selfcontained_num" "asian_num"        
[28] "asian_per"         "black_num"         "black_per"        
[31] "hispanic_num"      "hispanic_per"      "white_num"        
[34] "white_per"         "male_num"          "male_per"         
[37] "female_num"        "female_per"       

[[4]]
 [1] "CSD"                                 
 [2] "BOROUGH"                             
 [3] "SCHOOL.CODE"                         
 [4] "SCHOOL.NAME"                         
 [5] "GRADE"                               
 [6] "PROGRAM.TYPE"                        
 [7] "CORE.SUBJECT..MS.CORE.and.9.12.ONLY."
 [8] "CORE.COURSE..MS.CORE.and.9.12.ONLY." 
 [9] "SERVICE.CATEGORY.K.9..ONLY."         
[10] "NUMBER.OF.STUDENTS...SEATS.FILLED"   
[11] "NUMBER.OF.SECTIONS"                  
[12] "AVERAGE.CLASS.SIZE"                  
[13] "SIZE.OF.SMALLEST.CLASS"              
[14] "SIZE.OF.LARGEST.CLASS"               
[15] "DATA.SOURCE"                         
[16] "SCHOOLWIDE.PUPIL.TEACHER.RATIO"      

[[5]]
[1] "District"               "YTD...Attendance..Avg."
[3] "YTD.Enrollment.Avg."   

[[6]]
[1] "DBN"                                 
[2] "SchoolName"                          
[3] "AP.Test.Takers"                      
[4] "Total.Exams.Taken"                   
[5] "Number.of.Exams.with.scores.3.4.or.5"

[[7]]
[1] "DBN"                            
[2] "SCHOOL.NAME"                    
[3] "Num.of.SAT.Test.Takers"         
[4] "SAT.Critical.Reading.Avg..Score"
[5] "SAT.Math.Avg..Score"            
[6] "SAT.Writing.Avg..Score"         

[[8]]
 [1] "dbn"                             
 [2] "school_name"                     
 [3] "borough"                         
 [4] "building_code"                   
 [5] "phone_number"                    
 [6] "fax_number"                      
 [7] "grade_span_min"                  
 [8] "grade_span_max"                  
 [9] "expgrade_span_min"               
[10] "expgrade_span_max"               
[11] "bus"                             
[12] "subway"                          
[13] "primary_address_line_1"          
[14] "city"                            
[15] "state_code"                      
[16] "postcode"                        
[17] "website"                         
[18] "total_students"                  
[19] "campus_name"                     
[20] "school_type"                     
[21] "overview_paragraph"              
[22] "program_highlights"              
[23] "language_classes"                
[24] "advancedplacement_courses"       
[25] "online_ap_courses"               
[26] "online_language_courses"         
[27] "extracurricular_activities"      
[28] "psal_sports_boys"                
[29] "psal_sports_girls"               
[30] "psal_sports_coed"                
[31] "school_sports"                   
[32] "partner_cbo"                     
[33] "partner_hospital"                
[34] "partner_highered"                
[35] "partner_cultural"                
[36] "partner_nonprofit"               
[37] "partner_corporate"               
[38] "partner_financial"               
[39] "partner_other"                   
[40] "addtl_info1"                     
[41] "addtl_info2"                     
[42] "start_time"                      
[43] "end_time"                        
[44] "se_services"                     
[45] "ell_programs"                    
[46] "school_accessibility_description"
[47] "number_programs"                 
[48] "priority01"                      
[49] "priority02"                      
[50] "priority03"                      
[51] "priority04"                      
[52] "priority05"                      
[53] "priority06"                      
[54] "priority07"                      
[55] "priority08"                      
[56] "priority09"                      
[57] "priority10"                      
[58] "Location.1"                      
[59] "Community.Board"                 
[60] "Council.District"                
[61] "Census.Tract"                    
[62] "BIN"                             
[63] "BBL"                             
[64] "NTA"

Answer 1

如果我们要将names元素的list更改为大写，请在更改名称的同时在tolower的{{1}}上使用names与list

setNames

然后提取常用列

files1 <- lapply(files, function(x) setNames(x, toupper(names(x))))

在nm1 <- Reduce(intersect, lapply(files1, names)) lst2 <- lapply(files1, `[[`, nm1)中，这可以通过

完成

tidyverse

将列表中的所有元素转换为大写

1 个答案: