这是我目前的手动解决方案。我想知道如何实现我的问题的自动化解决方案。
smarties <- aggregate(Mention_DRGU ~ USC5_CLASS + AGE + year , data = chanko , sum )
keto <- filter(smarties, AGE == "0-2" & year == "2010")[,4]/Actual$ZERO.TWO[1]
keto1 <- filter(smarties, AGE == "0-2" & year == "2011")[,4]/Actual$ZERO.TWO[2]
keto2 <- filter(smarties, AGE == "0-2" & year == "2012")[,4]/Actual$ZERO.TWO[3]
keto3 <- filter(smarties, AGE == "0-2" & year == "2013")[,4]/Actual$ZERO.TWO[4]
blarg <- matrix(c(keto,keto1, keto2,keto3), nrow=9, ncol = 1)
cbind(filter(smarties, AGE == "0-2")[,1:3],blarg)
我一直在阅读与我类似的其他用户问题,我遇到了this我试图实现,但我已经开始使用`tapply函数了。
基本上,如果年龄和年份相似,我希望它与实际data.frame中的相应列分开。
temp <- split(smarties, f = list(smarties$AGE, smarties$year))
dput(Actual)
structure(list(Region = c("Canada", "Canada", "Canada", "Canada"
), Year = c("2010", "2011", "2012", "2013"), Level = c("ID",
"PD", "PR", "PP"), Sex = c("3", "3", "3", "3"), Total = c(34005,
34343, 34754, 35158), X0.2 = c(1139, 1139, 1140, 1145), X3.10 = c(2900,
2935, 2980, 3024), X11.19 = c(3835, 3792, 3740, 3684), X20.39 = c(9247,
9325, 9474, 9618), X40.59 = c(10106, 10150, 10185, 10197), X60.64 = c(1982,
2050, 2071, 2110), X65. = c(4736, 4887, 5099, 5310)), .Names = c("Region",
"Year", "Level", "Sex", "Total", "X0.2", "X3.10", "X11.19", "X20.39",
"X40.59", "X60.64", "X65."), row.names = c(NA, -4L), class = "data.frame")
dput(smarties)
structure(list(USC5_CLASS = structure(c(5L, 6L, 7L, 3L, 5L, 6L,
3L, 5L, 6L, 7L, 12L, 3L, 5L, 6L, 7L, 12L, 2L, 3L, 5L, 6L, 7L,
8L, 11L, 12L, 3L, 5L, 6L, 7L, 8L, 12L, 2L, 5L, 6L, 7L, 12L, 5L,
8L, 5L, 6L, 3L, 4L, 5L, 6L, 7L, 8L, 3L, 5L, 6L, 7L, 3L, 4L, 5L,
6L, 7L, 12L, 2L, 3L, 4L, 5L, 6L, 7L, 9L, 11L, 12L, 3L, 5L, 6L,
7L, 12L, 2L, 3L, 5L, 6L, 7L, 12L, 3L, 6L, 3L, 5L, 6L, 3L, 5L,
6L, 2L, 3L, 5L, 6L, 7L, 9L, 12L, 1L, 2L, 3L, 5L, 6L, 7L, 9L,
12L, 3L, 5L, 6L, 7L, 12L, 2L, 3L, 5L, 6L, 7L, 8L, 12L, 12L, 5L,
6L, 3L, 5L, 6L, 3L, 5L, 6L, 7L, 2L, 3L, 5L, 6L, 7L, 10L, 12L,
2L, 3L, 5L, 6L, 7L, 12L, 3L, 5L, 6L, 7L, 8L, 12L, 3L, 5L, 6L,
7L, 12L, 5L, 6L), .Label = c("15111 TETRACYCLINE", "15112 TETRACYCLINE CONGENERS",
"15130 CEPHALOSPORIN", "15141 ERYTHROMYCIN", "15142 EXTENDED SPEC MACROLIDES",
"15152 AMOXICILLIN", "15153 OTHER BRD SPEC PENICILL", "15180 TRIMETHOPRIM COMBS",
"15190 BRD/MED SPEC OTHER", "15210 PENICILLIN V & VK", "15230 ANTI-STAPH PENICILLIN",
"15810 QUINOLONES, ORAL"), class = "factor"), AGE = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L,
8L), .Label = c("0-2", "3-9", "10-19", "20-39", "40-59", "60-64",
"65+", "UNSP"), class = "factor"), year = c(2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2011L, 2011L,
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L,
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L,
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L,
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L,
2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L,
2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L,
2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L,
2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L), Mention_DRGU = c(5760L,
19230L, 5610L, 4700L, 36660L, 16070L, 7600L, 32910L, 39850L,
2230L, 5690L, 49240L, 214790L, 144420L, 27840L, 60320L, 5520L,
98940L, 216040L, 103030L, 27440L, 10840L, 5500L, 145650L, 5610L,
44470L, 16510L, 7910L, 2470L, 38280L, 5520L, 61050L, 22340L,
10390L, 28960L, 5460L, 5460L, 22380L, 2470L, 5980L, 5550L, 11370L,
23830L, 5840L, 5590L, 14680L, 68290L, 47860L, 2110L, 73700L,
1960L, 177290L, 177440L, 47750L, 103110L, 16650L, 57340L, 5530L,
214590L, 143790L, 50590L, 3280L, 5680L, 104760L, 28860L, 47150L,
5550L, 28680L, 5680L, 10970L, 5590L, 50260L, 28380L, 11480L,
28800L, 2260L, 7820L, 13750L, 11120L, 14700L, 16840L, 55900L,
11060L, 5680L, 39920L, 166720L, 156230L, 19990L, 5540L, 60910L,
5540L, 10960L, 16780L, 205220L, 153830L, 30970L, 3500L, 110530L,
5540L, 50360L, 44410L, 18140L, 16760L, 5400L, 11080L, 39180L,
31650L, 16840L, 5530L, 55590L, 1390L, 5860L, 2100L, 2100L, 28660L,
15460L, 5770L, 22890L, 56150L, 5540L, 17010L, 45740L, 114940L,
224230L, 37560L, 5670L, 60150L, 11310L, 39620L, 162890L, 178530L,
57780L, 74110L, 16900L, 15500L, 16850L, 17090L, 5610L, 14490L,
17020L, 11350L, 45100L, 5700L, 17100L, 11340L, 5670L)), .Names = c("USC5_CLASS",
"AGE", "year", "Mention_DRGU"), row.names = c(NA, -146L), class = "data.frame")
答案 0 :(得分:1)
这不是可靠的编程,但似乎有效:
# remove rows not to be analyzed
smarties <- smarties[smarties$AGE != 'UNSP', ]
# get row and column indices
smarties$agenum <- as.numeric(smarties$AGE)
smarties$colnum <- smarties$agenum + 5
smarties$yearnum <- as.numeric(as.factor(smarties$year))
# send col and row index (x and y) to find denominator from Actual df,
# and numerator z
smarties$result <- mapply(FUN = function(x,y,z,df){
z/df[x,y]
},x = smarties$yearnum,y = smarties$colnum,z = smarties$Mention_DRGU,MoreArgs = list(df = Actual))
这取决于按正确顺序出现的年龄和年份,以及按正确顺序排列的列。
<强>更新强>
这很容易打破,因为它使用了名字。您可以使用适用于您的任何*适用。
# remove rows not to be analyzed
smarties <- smarties[smarties$AGE != 'UNSP', ]
# rename variable names and fix weird names
smarties$agecol <- paste0('X',gsub(pattern = '\\-|\\+',replacement = '.',x = smarties$AGE))
smarties[smarties$agecol == 'X3.9','agecol'] <- 'X3.10'
smarties[smarties$agecol == 'X10.19','agecol'] <- 'X11.19'
# mapply
smarties$mresult <- mapply(FUN = function(x,y,z,df){
z/df[df$Year == x,names(df) == y]
},x = smarties$year,y = smarties$agecol,z = smarties$Mention_DRGU,MoreArgs = list(df = Actual))
# or apply
smarties$appresult <- apply(smarties,1,function(x,df){
as.numeric(x['Mention_DRGU'])/df[df$Year == x['year'],names(df) == x['agecol']]
},df = Actual)
更新2 通过将数据更改为一致的格式,这是一种更好的方法。
修复数据集:
# tidy data aka wide to long format
require(tidyr)
smarties <- smarties[smarties$AGE != 'UNSP', ]
Actual <- gather(Actual,age,secretvalue,-c(Region,Year,Sex,Level,Total))
# fix age mismatches (should actual type them in)
Actual$age <- levels(smarties$AGE)[as.numeric(Actual$age)]
# make names in both datasets the same
names(Actual) <- tolower(names(Actual))
names(smarties) <- tolower(names(smarties))
重组数据后,解决方案很简单
# merge appropriate data since it is 1:1
smartiesmerged <- merge(smarties,Actual[ ,c('year','age','secretvalue')],by = c('year','age'),all.x = T)
# do the calculation
smartiesmerged$result <- smartiesmerged$mention_drgu / smartiesmerged$secretvalue
答案 1 :(得分:0)
我想我用dplyr
找到了一种不那么简单的手动方式。
blarb <- melt(Actual)
Lol <- filter(blarb, variable == "X0.2", Year == "2010")
smarties %>%
filter(AGE == "0-2", year == 2010) %>%
mutate(Popul = Mention_DRGU/Lol$value)
不幸的是,我仍然必须为每个年龄和年份做这件事,有人知道我是否可以使用group_by进一步自动化这个?