我怎样才能找到丢失的数据帧?

时间:2012-03-12 00:20:05

标签: r sqldf rbind

我正在进行竞选财务数据,并正在寻找在1989年至2010年期间捐赠x金额的x公司员工。使用sqldf,我能够解析这些信息。数据大于4gb,其中我的内存小于2gb,因此我将数据分成大约100mb的电子表格,并且我加载了40个以获取此信息。有些人有来自公司x的贡献者,有些则没有。

有谁知道如何为文件制作'捕手手套'?这个'catchers mitt'将识别这些相关文件并绑定行。

如果有人想进行一般改进,我附上我的代码?

感谢您的帮助

#Individual Donors for Retail Sales Companies 1990-2010

#load file

    indiv00.1<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv00.1.csv")
    indiv00.2<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv00.2.csv")
    indiv00.3<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv00.3.csv")
    indiv00.4<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv00.4.csv")
    indiv00.5<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv02.1.csv")

#check

#library

    library(sqldf)

#search for name

    name1<-sqldf("select * from 'indiv00.1' where Orgname like '%Wal-Mart Stores% '")   
    name2<-sqldf("select * from 'indiv00.2' where Orgname like '%Wal-Mart Stores% '")
    name3<-sqldf("select * from 'indiv00.3' where Orgname like '%Wal-Mart Stores% '")
    name4<-sqldf("select * from 'indiv00.4' where Orgname like '%Wal-Mart Stores% '")
    name5<-sqldf("select * from 'indiv00.5' where Orgname like '%Wal-Mart Stores% '")

#check

#merge data

    na1<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name1 group by 
    ContribID,Cycle") 

    na2<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name2 group by 
    ContribID,Cycle")

    na3<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name3 group by 
    ContribID,Cycle") 

    na4<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name4 group by 
    ContribID,Cycle")

    na5<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name5 group by 
    ContribID,Cycle") 

#check

#save data

    setwd("C:\\Users\\tbrown11\\Desktop\\Data")

    if(!exists("na1", mode="global.env"))save(na1,file="na1.Rdata")
    if(!exists("na2", mode="global.env"))save(na2,file="na2.Rdata")
    if(!exists("na3", mode="global.env"))save(na3,file="na3.Rdata")
    if(!exists("na4", mode="global.env"))save(na4,file="na4.Rdata")
    if(!exists("na5", mode="global.env"))save(na5,file="na5.Rdata")

#check

#clean house

    rm(list = ls(), envir = globalenv())

#check

#load file

    indiv00.6<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv02.2.csv")
    indiv00.7<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv02.3.csv")
    indiv00.8<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.1.csv")
    indiv00.9<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.2.csv")
    indiv00.10<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.3.csv")

#check

#library

    library(sqldf)

#search for name    

    name6<-sqldf("select * from 'indiv00.6' where Orgname like '%Wal-Mart Stores% '")
    name7<-sqldf("select * from 'indiv00.7' where Orgname like '%Wal-Mart Stores% '")
    name8<-sqldf("select * from 'indiv00.8' where Orgname like '%Wal-Mart Stores% '")
    name9<-sqldf("select * from 'indiv00.9' where Orgname like '%Wal-Mart Stores% '")
    name10<-sqldf("select * from 'indiv00.10' where Orgname like '%Wal-Mart Stores% '")   

#check

#merge data

    na6<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name6 group by 
    ContribID,Cycle") 

    na7<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name7 group by 
    ContribID,Cycle")

    na8<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name8 group by 
    ContribID,Cycle") 

    na9<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name9 group by 
    ContribID,Cycle")

    na10<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name10 group by 
    ContribID,Cycle") 

#check

#save data

    setwd("C:\\Users\\tbrown11\\Desktop\\Data")

    if(!exists("na6", mode="global.env"))save(na6,file="na6.Rdata")
    if(!exists("na7", mode="global.env"))save(na7,file="na7.Rdata")
    if(!exists("na8", mode="global.env"))save(na8,file="na8.Rdata")
    if(!exists("na9", mode="global.env"))save(na9,file="na9.Rdata")
    if(!exists("na10", mode="global.env"))save(na10,file="na10.Rdata")

#check

#clean house

    rm(list = ls(), envir = globalenv())

#check

#load file

    indiv00.11<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.4.csv")
    indiv00.12<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.5.csv")
    indiv00.13<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.6.csv")
    indiv00.14<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv06.1.csv")
    indiv00.15<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv06.2.csv")

#check

#library

    library(sqldf)

#search for name

    name11<-sqldf("select * from 'indiv00.11' where Orgname like '%Wal-Mart Stores% '") 
    name12<-sqldf("select * from 'indiv00.12' where Orgname like '%Wal-Mart Stores% '")
    name13<-sqldf("select * from 'indiv00.13' where Orgname like '%Wal-Mart Stores% '")
    name14<-sqldf("select * from 'indiv00.14' where Orgname like '%Wal-Mart Stores% '")
    name15<-sqldf("select * from 'indiv00.15' where Orgname like '%Wal-Mart Stores% '")

#merge

    na11<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name11 group by 
    ContribID,Cycle") 

    na12<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name12 group by 
    ContribID,Cycle")

    na13<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name13 group by 
    ContribID,Cycle") 

    na14<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name14 group by 
    ContribID,Cycle")

    na15<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name15 group by 
    ContribID,Cycle") 

#check

#save data

    setwd("C:\\Users\\tbrown11\\Desktop\\Data")

    if(!exists("na11", mode="global.env"))save(na11,file="na11.Rdata")
    if(!exists("na12", mode="global.env"))save(na12,file="na12.Rdata")
    if(!exists("na13", mode="global.env"))save(na13,file="na13.Rdata")
    if(!exists("na14", mode="global.env"))save(na14,file="na14.Rdata")
    if(!exists("na15", mode="global.env"))save(na15,file="na15.Rdata")

#check

#clean house

    rm(list = ls(), envir = globalenv())

#check  

#load file

    indiv00.16<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv06.3.csv")
    indiv00.17<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv06.4.csv")
    indiv00.18<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv06.5.csv")
    indiv00.19<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.1.csv")
    indiv00.20<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.2.csv")

#check

#library

    library(sqldf)

#search for name

    name16<-sqldf("select * from 'indiv00.16' where Orgname like '%Wal-Mart Stores% '")
    name17<-sqldf("select * from 'indiv00.17' where Orgname like '%Wal-Mart Stores% '")
    name18<-sqldf("select * from 'indiv00.18' where Orgname like '%Wal-Mart Stores% '")
    name19<-sqldf("select * from 'indiv00.19' where Orgname like '%Wal-Mart Stores% '")
    name20<-sqldf("select * from 'indiv00.20' where Orgname like '%Wal-Mart Stores% '")

#check 

#merge

    na16<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name16 group by 
    ContribID,Cycle") 

    na17<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name17 group by 
    ContribID,Cycle")

    na18<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name18 group by 
    ContribID,Cycle") 

    na19<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name19 group by 
    ContribID,Cycle")

    na20<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name20 group by 
    ContribID,Cycle") 

#check

#save data

    setwd("C:\\Users\\tbrown11\\Desktop\\Data")

    if(!exists("na16", mode="global.env"))save(na16,file="na16.Rdata")
    if(!exists("na17", mode="global.env"))save(na17,file="na17.Rdata")
    if(!exists("na18", mode="global.env"))save(na18,file="na18.Rdata")
    if(!exists("na19", mode="global.env"))save(na19,file="na19.Rdata")
    if(!exists("na20", mode="global.env"))save(na20,file="na20.Rdata")

#check

#clean house

    rm(list = ls(), envir = globalenv())

#check  

#load file

    indiv00.21<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.3.csv")
    indiv00.22<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.4.csv")
    indiv00.23<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.5.csv")
    indiv00.24<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.6.csv")
    indiv00.25<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.7.csv")

#check

#library

    library(sqldf)

#search for name

    name21<-sqldf("select * from 'indiv00.21' where Orgname like '%Wal-Mart Stores% '") 
    name22<-sqldf("select * from 'indiv00.22' where Orgname like '%Wal-Mart Stores% '")
    name23<-sqldf("select * from 'indiv00.23' where Orgname like '%Wal-Mart Stores% '")
    name24<-sqldf("select * from 'indiv00.24' where Orgname like '%Wal-Mart Stores% '")
    name25<-sqldf("select * from 'indiv00.25' where Orgname like '%Wal-Mart Stores% '")

#check  

#merge

    na21<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name21 group by 
    ContribID,Cycle") 

    na22<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name22 group by 
    ContribID,Cycle")

    na23<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name23 group by 
    ContribID,Cycle") 

    na24<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name24 group by 
    ContribID,Cycle")

    na25<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name25 group by 
    ContribID,Cycle") 

#check

#save data

    setwd("C:\\Users\\tbrown11\\Desktop\\Data")

    if(!exists("na21", mode="global.env"))save(na21,file="na21.Rdata")
    if(!exists("na22", mode="global.env"))save(na22,file="na22.Rdata")
    if(!exists("na23", mode="global.env"))save(na23,file="na23.Rdata")
    if(!exists("na24", mode="global.env"))save(na24,file="na24.Rdata")
    if(!exists("na25", mode="global.env"))save(na25,file="na25.Rdata")

#check

#clean house

    rm(list = ls(), envir = globalenv())

#check

#Load file

    indiv00.26<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.8.csv")
    indiv00.27<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.9.csv")
    indiv00.28<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv10.1.csv")
    indiv00.29<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv10.2.csv")
    indiv00.30<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv10.3.csv")

#check

#library

    library(sqldf)

#search for name

    name26<-sqldf("select * from 'indiv00.26' where Orgname like '%Wal-Mart Stores% '") 
    name27<-sqldf("select * from 'indiv00.27' where Orgname like '%Wal-Mart Stores% '")
    name28<-sqldf("select * from 'indiv00.28' where Orgname like '%Wal-Mart Stores% '")
    name29<-sqldf("select * from 'indiv00.29' where Orgname like '%Wal-Mart Stores% '")
    name30<-sqldf("select * from 'indiv00.30' where Orgname like '%Wal-Mart Stores% '")

#check

#merge

    na26<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name26 group by 
    ContribID,Cycle") 

    na27<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name27 group by 
    ContribID,Cycle")

    na28<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name28 group by 
    ContribID,Cycle") 

    na29<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name29 group by 
    ContribID,Cycle")

    na30<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name30 group by 
    ContribID,Cycle") 

#check

#save data

    setwd("C:\\Users\\tbrown11\\Desktop\\Data")

    if(!exists("na26", mode="global.env"))save(na26,file="na26.Rdata")
    if(!exists("na27", mode="global.env"))save(na27,file="na27.Rdata")
    if(!exists("na28", mode="global.env"))save(na28,file="na28.Rdata")
    if(!exists("na29", mode="global.env"))save(na29,file="na29.Rdata")
    if(!exists("na30", mode="global.env"))save(na30,file="na30.Rdata")

#check

#clean house

    rm(list = ls(), envir = globalenv())

#check

#load file

    indiv00.31<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv10.4.csv")
    indiv00.32<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv10.5.csv")
    indiv00.33<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv96.csv")
    indiv00.34<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv96.2.csv")
    indiv00.35<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv98.csv")

#check

#library

    library(sqldf)

#search for name

    name31<-sqldf("select * from 'indiv00.31' where Orgname like '%Wal-Mart Stores% '") 
    name32<-sqldf("select * from 'indiv00.32' where Orgname like '%Wal-Mart Stores% '")
    name33<-sqldf("select * from 'indiv00.33' where Orgname like '%Wal-Mart Stores% '")
    name34<-sqldf("select * from 'indiv00.34' where Orgname like '%Wal-Mart Stores% '")
    name35<-sqldf("select * from 'indiv00.35' where Orgname like '%Wal-Mart Stores% '")

#check

#merge

    na31<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name31 group by 
    ContribID,Cycle") 

    na32<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name32 group by 
    ContribID,Cycle")

    na33<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name33 group by 
    ContribID,Cycle") 

    na34<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name34 group by 
    ContribID,Cycle")

    na35<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name35 group by 
    ContribID,Cycle") 

#check

#save data

    setwd("C:\\Users\\tbrown11\\Desktop\\Data")

    if(!exists("na31", mode="global.env"))save(na31,file="na31.Rdata")
    if(!exists("na32", mode="global.env"))save(na32,file="na32.Rdata")
    if(!exists("na33", mode="global.env"))save(na33,file="na33.Rdata")
    if(!exists("na34", mode="global.env"))save(na34,file="na34.Rdata")
    if(!exists("na35", mode="global.env"))save(na35,file="na35.Rdata")

#check

#clean house

    rm(list = ls(), envir = globalenv())

#check

#load file

    indiv00.36<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv98.2.csv")
    indiv00.37<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv98.3.csv")
    indiv00.38<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indivs90.csv")
    indiv00.39<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indivs92.csv")
    indiv00.40<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indivs94.csv")

#check

#library

    library(sqldf)

#search for name

#check
    name36<-sqldf("select * from 'indiv00.36' where Orgname like '%Wal-Mart Stores% '")
    name37<-sqldf("select * from 'indiv00.37' where Orgname like '%Wal-Mart Stores% '")
    name38<-sqldf("select * from 'indiv00.38' where Orgname like '%Wal-Mart Stores% '")
    name39<-sqldf("select * from 'indiv00.39' where Orgname like '%Wal-Mart Stores% '")
    name40<-sqldf("select * from 'indiv00.40' where Orgname like '%Wal-Mart Stores% '")

#check

#merge

    na36<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name36 group by 
    ContribID,Cycle") 

    na37<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name37 group by 
    ContribID,Cycle")

    na38<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name38 group by 
    ContribID,Cycle") 

    na39<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name39 group by 
    ContribID,Cycle")

    na40<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name40 group by 
    ContribID,Cycle") 

#check

#save data

    setwd("C:\\Users\\tbrown11\\Desktop\\Data")

    if(!exists("na36", mode="global.env"))save(na36,file="na36.Rdata")
    if(!exists("na37", mode="global.env"))save(na37,file="na37.Rdata")
    if(!exists("na38", mode="global.env"))save(na38,file="na38.Rdata")
    if(!exists("na39", mode="global.env"))save(na39,file="na39.Rdata")
    if(!exists("na40", mode="global.env"))save(na40,file="na40.Rdata")

#check

#clean house

    rm(list = ls(), envir = globalenv())

#check

#load file

    a1<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na1.Rdata")
    a2<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na2.Rdata")
    a3<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na3.Rdata")
    a4<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na4.Rdata")
    a5<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na5.Rdata")
    a6<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na6.Rdata")
    a7<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na7.Rdata")
    a8<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na8.Rdata")
    a9<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na9.Rdata")
    a10<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na10.Rdata")
    a11<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na11.Rdata")
    a12<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na12.Rdata")
    a13<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na13.Rdata")
    a14<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na14.Rdata")
    a15<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na15.Rdata")
    a16<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na16.Rdata")
    a17<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na17.Rdata")
    a18<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na18.Rdata")
    a19<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na19.Rdata")
    a20<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na20.Rdata")
    a21<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na21.Rdata")
    a22<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na22.Rdata")
    a23<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na23.Rdata")
    a24<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na24.Rdata")
    a25<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na25.Rdata")
    a26<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na26.Rdata")
    a27<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na27.Rdata")
    a28<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na28.Rdata")
    a29<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na29.Rdata")
    a30<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na30.Rdata")
    a31<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na31.Rdata")
    a32<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na32.Rdata")
    a33<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na33.Rdata")
    a34<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na34.Rdata")
    a35<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na35.Rdata")
    a36<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na36.Rdata")
    a37<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na37.Rdata")
    a38<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na38.Rdata")
    a39<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na39.Rdata")
    a40<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na40.Rdata")

#global environment

    b<-c(ls())
    B<-paste("n",b)
    paste(B,",")    

#clean in text editor

#check      #check      #check      #check      #check
#check      #check      #check      #check      #check
#check      #check      #check      #check      #check

#Row bind


    name<-rbind(

















    )


#write table
#library
    library(XML)
    setwd("C:\\Users\\tbrown11\\Desktop\\Data\\Retail")
#writing table to csv
    out_file <- file("Walmart.csv", open="a")
    write.table(name, file=out_file, sep=",", dec=".", quote=FALSE, 
    col.names=NA, row.names=TRUE)
    close(out_file)
#check

#clean house

    rm(list = ls(), envir = globalenv())

#check

2 个答案:

答案 0 :(得分:2)

你似乎有很多重复read.csvsql('select * from ...')等等 - 我想你可以把它变成一个循环。在循环的每次迭代中,您加载csv文件,执行查询,并rbind结果。

此外,您的查询目前分为两个步骤 - SELECT * .. where Orgname like '%Wal-Mart..',然后您可以从中选择各种列。您可以将这些组合成一个查询(我想!):

library(sqldf)

# make a vector of all your files:
# indiv00.{1--4}, invid02.{1,2,3}, indiv04.{1--6}, indiv06.{1--5},
# indiv08.{1--9}, indiv10.{1--5}, indiv96, indiv96.2, indiv98, indiv98.{2,3},
# indivs{90,92,94}
fnames <- file.path('C:','Users','tbrown11','Desktop','indivs',
                    c( sprintf('indiv00.%i.csv', 1:4),
                       sprintf('indiv02.%i.csv', 1:3),
                       sprintf('indiv04.%i.csv', 1:6),
                       sprintf('indiv06.%i.csv', 1:5),
                       sprintf('indiv08.%i.csv', 1:9),
                       sprintf('indiv10.%i.csv', 1:5),
                       sprintf('indiv%i.csv',c(96,98)),
                       'indiv96.2.csv',
                       sprintf('indiv98.%i.csv', 2:3),
                       sprintf('indivs%i.csv',c(90,92,94)) )
                   )
# make a data frame to hold results:
results <- NULL

# LOOP THROUGH FILES:
for ( csvfile in fnames ) {
    # read.csv:
    tbl <- read.csv(csvfile)

    # do SELECT * from 'table' where Orgname like ...
    # Combine into one query:    
    dat <- sqldf("SELECT Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp
    FROM tbl
    WHERE Orgname like '%Wal-Mart Stores% '
    GROUP BY ContribID, Cycle")

    # store these results:
    results <- rbind(results, dat)         
}

# finished. now just write your XML table:
library(XML)
out_file <- file( file.path('C:','Users','tbrown11','Desktop','Data','Retail',
                            'Walmart.csv'), open='a' )
write.table(results, file=out_file, sep=',', dec='.', quote=FALSE,
            col.names=NA, row.names=T)
close(out_file)

看看你的所有重复代码是如何被压缩到一个for循环中的?这就是主意。

您可能需要进行调整以确保:

  1. (合并)查询
  2. 你真的想write.table没有列名吗?另一个人如何看待你的CSV文件,知道列实际上是“Cycle”,“FECTransID”,“ContribID”,......等等?

答案 1 :(得分:2)

由于你一遍又一遍地做同样的事情,你应该考虑使用某种循环。 R中的隐式循环像lapply

所有相关文件的列表:

myFunction = function(f)
{ 
  require(sqldf)
  ##Better to use require, since if the library is already loaded it
  ##won't try to load it again
  indiv <- read.csv(f)
  name <- sqldf("select * from 'indiv' where Orgname like '%Wal-Mart Stores% '")   
  na <- sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
               Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
               RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name group by 
               ContribID,Cycle") 

  if(!exists("na", mode="global.env"))save(na,file="na.Rdata")
  rm(list = ls(), envir = globalenv())
}

setwd("~/relevant_directories")
files = list.files()
pre.results = lapply(files, myFunction)
results = do.call(rbind, pre.results)

##Do whatever you want with your results then!

你必须调整myFunction代码,以便每次调用时都不会保存相同的na.Rdata代码,但这不应该太难。也许将计数器传递给函数并将计数器粘贴到文件名

如果您的数据文件太大而无法加载到内存中,则有R软件包可以帮助解决此问题。看一下ff包,它有很多功能可以访问硬盘上的数据文件,而不必将它们加载到内存中。