
时间:2017-02-20 03:22:13

标签: r aggregate reshape



structure(list(StateSenatorialDistrict = c(41L, 14L, 30L, 38L, 
43L, 37L, 20L, 45L, 37L, 44L), CandidateOfficeCode = structure(c(2L, 
5L, 2L, 5L, 4L, 3L, 1L, 4L, 1L, 1L), .Label = c("ATT", "AUD", 
"STH", "TRE", "USP"), class = "factor"), CandidateLastName = structure(c(4L, 
2L, 1L, 3L, 9L, 5L, 7L, 8L, 7L, 6L), .Label = c("BROWN", "CASTLE", 
"VOIT"), class = "factor"), CandidateParty = structure(c(2L, 
1L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 3L), .Label = c("CON", "DEM", 
"REP"), class = "factor"), VoteTotal = c(256L, 3L, 202L, 188L, 
18L, 402L, 251L, 383L, 156L, 761L)), .Names = c("StateSenatorialDistrict", 
"CandidateOfficeCode", "CandidateLastName", "CandidateParty", 
"VoteTotal"), row.names = c(30901L, 115192L, 41264L, 1389L, 21982L, 
29827L, 192288L, 20019L, 12803L, 60823L), class = "data.frame")


StateSenatorialDistrict CandidateOfficeCode CandidateLastName CandidateParty VoteTotal
41                 AUD        DEPASQUALE            DEM       256
14                 USP            CASTLE            CON         3
30                 AUD             BROWN            REP       202
38                 USP           CLINTON            DEM       188
43                 TRE              VOIT            REP        18
37                 STH            MILLER            DEM       402
20                 ATT           SHAPIRO            DEM       251
45                 TRE         TORSELLA             DEM       383
37                 ATT           SHAPIRO            DEM       156    
44                 ATT          RAFFERTY            REP       761


我希望获取这些数据并对其进行总结,以便每个参议院区获得一行,并在每行上选择其他数据。想法结果看起来像这样(这里的数据是弥补的 - 它不是基于以上所述):

StateSenatorialDistrict SenateRepLastName SenateDemLastName  SenateRepVoteTotal SenateDemVoteTotal ClintonVotes TrumpVotes
41                 BOZO            SMITH            250            300            1000            2000
42                 JOHNSON         CARTER           2012           237            1350            1000
53                 ARCHIBALD       BISHOP           350            500            5000            3000

在任何给定的行上,您知道候选人是参议员b / c他们的CandidateOfficeCode是STS;你知道他们是他们党的民主党或众议员,即REP或DEM。


senateDistricts2016 <- aggregate(VoteTotal ~ StateSenatorialDistrict + CandidateOfficeCode + CandidateFirstName + CandidateLastName + CandidateParty, data=votes2016[votes2016$CandidateOfficeCode %in% c("USP", "STS"),], FUN="sum")
wideSenate <- dcast(senateDistricts2016, StateSenatorialDistrict ~ CandidateLastName)


提前致谢。如果我的问题没有意义,请告诉我 - 我很乐意编辑。



senateDs <- aggregate(VoteTotal ~ StateSenatorialDistrict + CandidateName, data=votes2016[votes2016$CandidateOfficeCode=="STS" & votes2016$CandidateParty=="DEM",], FUN="sum")
senateRs <- aggregate(VoteTotal ~ StateSenatorialDistrict + CandidateName, data=votes2016[votes2016$CandidateOfficeCode=="STS" & votes2016$CandidateParty=="REP",], FUN="sum")
senateTotalVotes <- aggregate(VoteTotal ~ StateSenatorialDistrict, data=votes2016[votes2016$CandidateOfficeCode=="STS",], FUN="sum")
senateVotes <- merge(senateDs,senateRs, by="StateSenatorialDistrict", all=TRUE)
senateVotes <- merge(senateVotes, senateTotalVotes, by="StateSenatorialDistrict", all=TRUE)

# now aggregate the presidential votes for Rs, Ds, and Total votes and combine
senatePresD <- aggregate(VoteTotal ~ StateSenatorialDistrict, data=votes2016[votes2016$CandidateOfficeCode=="USP" & votes2016$CandidateParty=="DEM",], FUN="sum")
senatePresR <- aggregate(VoteTotal ~ StateSenatorialDistrict, data=votes2016[votes2016$CandidateOfficeCode=="USP" & votes2016$CandidateParty=="REP",], FUN="sum")
senatePresTotalVotes <- aggregate(VoteTotal ~ StateSenatorialDistrict, data=votes2016[votes2016$CandidateOfficeCode=="USP",], FUN="sum")
senateVotes <- merge(senateVotes,senatePresD, by="StateSenatorialDistrict", all=TRUE)
senateVotes <- merge(senateVotes,senatePresR, by="StateSenatorialDistrict", all=TRUE)
senateVotes <- merge(senateVotes, senatePresTotalVotes, by="StateSenatorialDistrict", all=TRUE)

setnames(senateVotes, c("StateSenatorialDistrict", "DCandidate","DVotes","RCandidate","RVotes", "TotalSenatorVotes", "PresDVotes", "PresRVotes","TotalPresVotes"))

0 个答案:
