我正在进行竞选财务数据,并正在寻找在1989年至2010年期间捐赠x金额的x公司员工。使用sqldf,我能够解析这些信息。数据大于4gb,其中我的内存小于2gb,因此我将数据分成大约100mb的电子表格,并且我加载了40个以获取此信息。有些人有来自公司x的贡献者,有些则没有。
有谁知道如何为文件制作'捕手手套'?这个'catchers mitt'将识别这些相关文件并绑定行。
如果有人想进行一般改进,我附上我的代码?
感谢您的帮助
#Individual Donors for Retail Sales Companies 1990-2010
#load file
indiv00.1<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv00.1.csv")
indiv00.2<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv00.2.csv")
indiv00.3<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv00.3.csv")
indiv00.4<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv00.4.csv")
indiv00.5<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv02.1.csv")
#check
#library
library(sqldf)
#search for name
name1<-sqldf("select * from 'indiv00.1' where Orgname like '%Wal-Mart Stores% '")
name2<-sqldf("select * from 'indiv00.2' where Orgname like '%Wal-Mart Stores% '")
name3<-sqldf("select * from 'indiv00.3' where Orgname like '%Wal-Mart Stores% '")
name4<-sqldf("select * from 'indiv00.4' where Orgname like '%Wal-Mart Stores% '")
name5<-sqldf("select * from 'indiv00.5' where Orgname like '%Wal-Mart Stores% '")
#check
#merge data
na1<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name1 group by
ContribID,Cycle")
na2<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name2 group by
ContribID,Cycle")
na3<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name3 group by
ContribID,Cycle")
na4<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name4 group by
ContribID,Cycle")
na5<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name5 group by
ContribID,Cycle")
#check
#save data
setwd("C:\\Users\\tbrown11\\Desktop\\Data")
if(!exists("na1", mode="global.env"))save(na1,file="na1.Rdata")
if(!exists("na2", mode="global.env"))save(na2,file="na2.Rdata")
if(!exists("na3", mode="global.env"))save(na3,file="na3.Rdata")
if(!exists("na4", mode="global.env"))save(na4,file="na4.Rdata")
if(!exists("na5", mode="global.env"))save(na5,file="na5.Rdata")
#check
#clean house
rm(list = ls(), envir = globalenv())
#check
#load file
indiv00.6<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv02.2.csv")
indiv00.7<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv02.3.csv")
indiv00.8<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.1.csv")
indiv00.9<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.2.csv")
indiv00.10<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.3.csv")
#check
#library
library(sqldf)
#search for name
name6<-sqldf("select * from 'indiv00.6' where Orgname like '%Wal-Mart Stores% '")
name7<-sqldf("select * from 'indiv00.7' where Orgname like '%Wal-Mart Stores% '")
name8<-sqldf("select * from 'indiv00.8' where Orgname like '%Wal-Mart Stores% '")
name9<-sqldf("select * from 'indiv00.9' where Orgname like '%Wal-Mart Stores% '")
name10<-sqldf("select * from 'indiv00.10' where Orgname like '%Wal-Mart Stores% '")
#check
#merge data
na6<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name6 group by
ContribID,Cycle")
na7<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name7 group by
ContribID,Cycle")
na8<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name8 group by
ContribID,Cycle")
na9<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name9 group by
ContribID,Cycle")
na10<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name10 group by
ContribID,Cycle")
#check
#save data
setwd("C:\\Users\\tbrown11\\Desktop\\Data")
if(!exists("na6", mode="global.env"))save(na6,file="na6.Rdata")
if(!exists("na7", mode="global.env"))save(na7,file="na7.Rdata")
if(!exists("na8", mode="global.env"))save(na8,file="na8.Rdata")
if(!exists("na9", mode="global.env"))save(na9,file="na9.Rdata")
if(!exists("na10", mode="global.env"))save(na10,file="na10.Rdata")
#check
#clean house
rm(list = ls(), envir = globalenv())
#check
#load file
indiv00.11<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.4.csv")
indiv00.12<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.5.csv")
indiv00.13<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.6.csv")
indiv00.14<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv06.1.csv")
indiv00.15<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv06.2.csv")
#check
#library
library(sqldf)
#search for name
name11<-sqldf("select * from 'indiv00.11' where Orgname like '%Wal-Mart Stores% '")
name12<-sqldf("select * from 'indiv00.12' where Orgname like '%Wal-Mart Stores% '")
name13<-sqldf("select * from 'indiv00.13' where Orgname like '%Wal-Mart Stores% '")
name14<-sqldf("select * from 'indiv00.14' where Orgname like '%Wal-Mart Stores% '")
name15<-sqldf("select * from 'indiv00.15' where Orgname like '%Wal-Mart Stores% '")
#merge
na11<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name11 group by
ContribID,Cycle")
na12<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name12 group by
ContribID,Cycle")
na13<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name13 group by
ContribID,Cycle")
na14<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name14 group by
ContribID,Cycle")
na15<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name15 group by
ContribID,Cycle")
#check
#save data
setwd("C:\\Users\\tbrown11\\Desktop\\Data")
if(!exists("na11", mode="global.env"))save(na11,file="na11.Rdata")
if(!exists("na12", mode="global.env"))save(na12,file="na12.Rdata")
if(!exists("na13", mode="global.env"))save(na13,file="na13.Rdata")
if(!exists("na14", mode="global.env"))save(na14,file="na14.Rdata")
if(!exists("na15", mode="global.env"))save(na15,file="na15.Rdata")
#check
#clean house
rm(list = ls(), envir = globalenv())
#check
#load file
indiv00.16<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv06.3.csv")
indiv00.17<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv06.4.csv")
indiv00.18<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv06.5.csv")
indiv00.19<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.1.csv")
indiv00.20<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.2.csv")
#check
#library
library(sqldf)
#search for name
name16<-sqldf("select * from 'indiv00.16' where Orgname like '%Wal-Mart Stores% '")
name17<-sqldf("select * from 'indiv00.17' where Orgname like '%Wal-Mart Stores% '")
name18<-sqldf("select * from 'indiv00.18' where Orgname like '%Wal-Mart Stores% '")
name19<-sqldf("select * from 'indiv00.19' where Orgname like '%Wal-Mart Stores% '")
name20<-sqldf("select * from 'indiv00.20' where Orgname like '%Wal-Mart Stores% '")
#check
#merge
na16<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name16 group by
ContribID,Cycle")
na17<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name17 group by
ContribID,Cycle")
na18<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name18 group by
ContribID,Cycle")
na19<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name19 group by
ContribID,Cycle")
na20<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name20 group by
ContribID,Cycle")
#check
#save data
setwd("C:\\Users\\tbrown11\\Desktop\\Data")
if(!exists("na16", mode="global.env"))save(na16,file="na16.Rdata")
if(!exists("na17", mode="global.env"))save(na17,file="na17.Rdata")
if(!exists("na18", mode="global.env"))save(na18,file="na18.Rdata")
if(!exists("na19", mode="global.env"))save(na19,file="na19.Rdata")
if(!exists("na20", mode="global.env"))save(na20,file="na20.Rdata")
#check
#clean house
rm(list = ls(), envir = globalenv())
#check
#load file
indiv00.21<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.3.csv")
indiv00.22<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.4.csv")
indiv00.23<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.5.csv")
indiv00.24<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.6.csv")
indiv00.25<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.7.csv")
#check
#library
library(sqldf)
#search for name
name21<-sqldf("select * from 'indiv00.21' where Orgname like '%Wal-Mart Stores% '")
name22<-sqldf("select * from 'indiv00.22' where Orgname like '%Wal-Mart Stores% '")
name23<-sqldf("select * from 'indiv00.23' where Orgname like '%Wal-Mart Stores% '")
name24<-sqldf("select * from 'indiv00.24' where Orgname like '%Wal-Mart Stores% '")
name25<-sqldf("select * from 'indiv00.25' where Orgname like '%Wal-Mart Stores% '")
#check
#merge
na21<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name21 group by
ContribID,Cycle")
na22<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name22 group by
ContribID,Cycle")
na23<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name23 group by
ContribID,Cycle")
na24<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name24 group by
ContribID,Cycle")
na25<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name25 group by
ContribID,Cycle")
#check
#save data
setwd("C:\\Users\\tbrown11\\Desktop\\Data")
if(!exists("na21", mode="global.env"))save(na21,file="na21.Rdata")
if(!exists("na22", mode="global.env"))save(na22,file="na22.Rdata")
if(!exists("na23", mode="global.env"))save(na23,file="na23.Rdata")
if(!exists("na24", mode="global.env"))save(na24,file="na24.Rdata")
if(!exists("na25", mode="global.env"))save(na25,file="na25.Rdata")
#check
#clean house
rm(list = ls(), envir = globalenv())
#check
#Load file
indiv00.26<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.8.csv")
indiv00.27<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.9.csv")
indiv00.28<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv10.1.csv")
indiv00.29<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv10.2.csv")
indiv00.30<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv10.3.csv")
#check
#library
library(sqldf)
#search for name
name26<-sqldf("select * from 'indiv00.26' where Orgname like '%Wal-Mart Stores% '")
name27<-sqldf("select * from 'indiv00.27' where Orgname like '%Wal-Mart Stores% '")
name28<-sqldf("select * from 'indiv00.28' where Orgname like '%Wal-Mart Stores% '")
name29<-sqldf("select * from 'indiv00.29' where Orgname like '%Wal-Mart Stores% '")
name30<-sqldf("select * from 'indiv00.30' where Orgname like '%Wal-Mart Stores% '")
#check
#merge
na26<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name26 group by
ContribID,Cycle")
na27<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name27 group by
ContribID,Cycle")
na28<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name28 group by
ContribID,Cycle")
na29<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name29 group by
ContribID,Cycle")
na30<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name30 group by
ContribID,Cycle")
#check
#save data
setwd("C:\\Users\\tbrown11\\Desktop\\Data")
if(!exists("na26", mode="global.env"))save(na26,file="na26.Rdata")
if(!exists("na27", mode="global.env"))save(na27,file="na27.Rdata")
if(!exists("na28", mode="global.env"))save(na28,file="na28.Rdata")
if(!exists("na29", mode="global.env"))save(na29,file="na29.Rdata")
if(!exists("na30", mode="global.env"))save(na30,file="na30.Rdata")
#check
#clean house
rm(list = ls(), envir = globalenv())
#check
#load file
indiv00.31<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv10.4.csv")
indiv00.32<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv10.5.csv")
indiv00.33<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv96.csv")
indiv00.34<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv96.2.csv")
indiv00.35<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv98.csv")
#check
#library
library(sqldf)
#search for name
name31<-sqldf("select * from 'indiv00.31' where Orgname like '%Wal-Mart Stores% '")
name32<-sqldf("select * from 'indiv00.32' where Orgname like '%Wal-Mart Stores% '")
name33<-sqldf("select * from 'indiv00.33' where Orgname like '%Wal-Mart Stores% '")
name34<-sqldf("select * from 'indiv00.34' where Orgname like '%Wal-Mart Stores% '")
name35<-sqldf("select * from 'indiv00.35' where Orgname like '%Wal-Mart Stores% '")
#check
#merge
na31<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name31 group by
ContribID,Cycle")
na32<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name32 group by
ContribID,Cycle")
na33<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name33 group by
ContribID,Cycle")
na34<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name34 group by
ContribID,Cycle")
na35<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name35 group by
ContribID,Cycle")
#check
#save data
setwd("C:\\Users\\tbrown11\\Desktop\\Data")
if(!exists("na31", mode="global.env"))save(na31,file="na31.Rdata")
if(!exists("na32", mode="global.env"))save(na32,file="na32.Rdata")
if(!exists("na33", mode="global.env"))save(na33,file="na33.Rdata")
if(!exists("na34", mode="global.env"))save(na34,file="na34.Rdata")
if(!exists("na35", mode="global.env"))save(na35,file="na35.Rdata")
#check
#clean house
rm(list = ls(), envir = globalenv())
#check
#load file
indiv00.36<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv98.2.csv")
indiv00.37<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv98.3.csv")
indiv00.38<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indivs90.csv")
indiv00.39<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indivs92.csv")
indiv00.40<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indivs94.csv")
#check
#library
library(sqldf)
#search for name
#check
name36<-sqldf("select * from 'indiv00.36' where Orgname like '%Wal-Mart Stores% '")
name37<-sqldf("select * from 'indiv00.37' where Orgname like '%Wal-Mart Stores% '")
name38<-sqldf("select * from 'indiv00.38' where Orgname like '%Wal-Mart Stores% '")
name39<-sqldf("select * from 'indiv00.39' where Orgname like '%Wal-Mart Stores% '")
name40<-sqldf("select * from 'indiv00.40' where Orgname like '%Wal-Mart Stores% '")
#check
#merge
na36<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name36 group by
ContribID,Cycle")
na37<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name37 group by
ContribID,Cycle")
na38<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name38 group by
ContribID,Cycle")
na39<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name39 group by
ContribID,Cycle")
na40<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name40 group by
ContribID,Cycle")
#check
#save data
setwd("C:\\Users\\tbrown11\\Desktop\\Data")
if(!exists("na36", mode="global.env"))save(na36,file="na36.Rdata")
if(!exists("na37", mode="global.env"))save(na37,file="na37.Rdata")
if(!exists("na38", mode="global.env"))save(na38,file="na38.Rdata")
if(!exists("na39", mode="global.env"))save(na39,file="na39.Rdata")
if(!exists("na40", mode="global.env"))save(na40,file="na40.Rdata")
#check
#clean house
rm(list = ls(), envir = globalenv())
#check
#load file
a1<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na1.Rdata")
a2<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na2.Rdata")
a3<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na3.Rdata")
a4<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na4.Rdata")
a5<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na5.Rdata")
a6<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na6.Rdata")
a7<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na7.Rdata")
a8<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na8.Rdata")
a9<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na9.Rdata")
a10<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na10.Rdata")
a11<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na11.Rdata")
a12<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na12.Rdata")
a13<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na13.Rdata")
a14<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na14.Rdata")
a15<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na15.Rdata")
a16<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na16.Rdata")
a17<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na17.Rdata")
a18<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na18.Rdata")
a19<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na19.Rdata")
a20<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na20.Rdata")
a21<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na21.Rdata")
a22<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na22.Rdata")
a23<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na23.Rdata")
a24<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na24.Rdata")
a25<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na25.Rdata")
a26<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na26.Rdata")
a27<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na27.Rdata")
a28<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na28.Rdata")
a29<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na29.Rdata")
a30<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na30.Rdata")
a31<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na31.Rdata")
a32<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na32.Rdata")
a33<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na33.Rdata")
a34<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na34.Rdata")
a35<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na35.Rdata")
a36<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na36.Rdata")
a37<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na37.Rdata")
a38<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na38.Rdata")
a39<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na39.Rdata")
a40<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na40.Rdata")
#global environment
b<-c(ls())
B<-paste("n",b)
paste(B,",")
#clean in text editor
#check #check #check #check #check
#check #check #check #check #check
#check #check #check #check #check
#Row bind
name<-rbind(
)
#write table
#library
library(XML)
setwd("C:\\Users\\tbrown11\\Desktop\\Data\\Retail")
#writing table to csv
out_file <- file("Walmart.csv", open="a")
write.table(name, file=out_file, sep=",", dec=".", quote=FALSE,
col.names=NA, row.names=TRUE)
close(out_file)
#check
#clean house
rm(list = ls(), envir = globalenv())
#check
答案 0 :(得分:2)
你似乎有很多重复read.csv
,sql('select * from ...')
等等 - 我想你可以把它变成一个循环。在循环的每次迭代中,您加载csv文件,执行查询,并rbind
结果。
此外,您的查询目前分为两个步骤 - SELECT * .. where Orgname like '%Wal-Mart..'
,然后您可以从中选择各种列。您可以将这些组合成一个查询(我想!):
library(sqldf)
# make a vector of all your files:
# indiv00.{1--4}, invid02.{1,2,3}, indiv04.{1--6}, indiv06.{1--5},
# indiv08.{1--9}, indiv10.{1--5}, indiv96, indiv96.2, indiv98, indiv98.{2,3},
# indivs{90,92,94}
fnames <- file.path('C:','Users','tbrown11','Desktop','indivs',
c( sprintf('indiv00.%i.csv', 1:4),
sprintf('indiv02.%i.csv', 1:3),
sprintf('indiv04.%i.csv', 1:6),
sprintf('indiv06.%i.csv', 1:5),
sprintf('indiv08.%i.csv', 1:9),
sprintf('indiv10.%i.csv', 1:5),
sprintf('indiv%i.csv',c(96,98)),
'indiv96.2.csv',
sprintf('indiv98.%i.csv', 2:3),
sprintf('indivs%i.csv',c(90,92,94)) )
)
# make a data frame to hold results:
results <- NULL
# LOOP THROUGH FILES:
for ( csvfile in fnames ) {
# read.csv:
tbl <- read.csv(csvfile)
# do SELECT * from 'table' where Orgname like ...
# Combine into one query:
dat <- sqldf("SELECT Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp
FROM tbl
WHERE Orgname like '%Wal-Mart Stores% '
GROUP BY ContribID, Cycle")
# store these results:
results <- rbind(results, dat)
}
# finished. now just write your XML table:
library(XML)
out_file <- file( file.path('C:','Users','tbrown11','Desktop','Data','Retail',
'Walmart.csv'), open='a' )
write.table(results, file=out_file, sep=',', dec='.', quote=FALSE,
col.names=NA, row.names=T)
close(out_file)
看看你的所有重复代码是如何被压缩到一个for循环中的?这就是主意。
您可能需要进行调整以确保:
write.table
没有列名吗?另一个人如何看待你的CSV文件,知道列实际上是“Cycle”,“FECTransID”,“ContribID”,......等等?答案 1 :(得分:2)
由于你一遍又一遍地做同样的事情,你应该考虑使用某种循环。 R中的隐式循环像lapply
所有相关文件的列表:
myFunction = function(f)
{
require(sqldf)
##Better to use require, since if the library is already loaded it
##won't try to load it again
indiv <- read.csv(f)
name <- sqldf("select * from 'indiv' where Orgname like '%Wal-Mart Stores% '")
na <- sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID,
Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip,
RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name group by
ContribID,Cycle")
if(!exists("na", mode="global.env"))save(na,file="na.Rdata")
rm(list = ls(), envir = globalenv())
}
setwd("~/relevant_directories")
files = list.files()
pre.results = lapply(files, myFunction)
results = do.call(rbind, pre.results)
##Do whatever you want with your results then!
你必须调整myFunction代码,以便每次调用时都不会保存相同的na.Rdata代码,但这不应该太难。也许将计数器传递给函数并将计数器粘贴到文件名
如果您的数据文件太大而无法加载到内存中,则有R软件包可以帮助解决此问题。看一下ff包,它有很多功能可以访问硬盘上的数据文件,而不必将它们加载到内存中。