我试图在我的df中添加一个新列,该列只是将我的函数 hardfunct 应用于行为“硬度”的“值”。然后,我希望该值填充该列中与“站点”和“日期”相匹配的所有行。如何填充其余行?我试过使用摘要,逐行和变异。示例数据如下。
site=c(rep("River A",4),rep("River B",4))
dates=as.Date(c("01/01/2001","01/01/2001","01/01/2001","01/01/2001","05/08/2001","05/08/2001","05/08/2001","05/08/2001"), format = "%m/%d/%Y")
param=c("lead","hardness","mercury","cadmium","lead","hardness","mercury","cadmium")
value=c("0.2","45","0.9","1.2","0.5","1800","0.6","0.8")
df=data.frame(site,param,dates,value)
hardfunct=function(x){
if (x>=400) {
print(400)
} else if (x<=25) {
print(25)
} else {
return(x)}
}
#######Trying to use group_by and mutate
df %>% group_by(site,dates) %>%
mutate(New_Hardness=sapply(df[df$param=="hardness","value"],hardfunct))
site param dates value New_Hardness
River A lead 1/1/2001 0.2 45
River A hardness 1/1/2001 45 45
River A mercury 1/1/2001 0.9 45
River A cadmium 1/1/2001 1.2 45
River B lead 5/8/2001 0.5 400
River B hardness 5/8/2001 1800 400
River B mercury 5/8/2001 0.6 400
River B cadmium 5/8/2001 0.8 400
答案 0 :(得分:2)
site=c(rep("River A",4),rep("River B",4))
dates=as.Date(c("01/01/2001","01/01/2001","01/01/2001","01/01/2001","05/08/2001","05/08/2001","05/08/2001","05/08/2001"), format = "%m/%d/%Y")
param=c("lead","hardness","mercury","cadmium","lead","hardness","mercury","cadmium")
value=c("0.2","45","0.9","1.2","0.5","1800","0.6","0.8")
df=data.frame(site,param,dates,value, stringsAsFactors = F)
hardfunct=function(x){
if (x>=400) {
return(400)
} else if (x<=25) {
return(25)
} else {
return(x)}
}
library(dplyr)
df %>%
group_by(site, dates) %>%
mutate(New = hardfunct(as.numeric(value[param == "hardness"]))) %>%
ungroup()
# # A tibble: 8 x 5
# site param dates value New
# <chr> <chr> <date> <chr> <dbl>
# 1 River A lead 2001-01-01 0.2 45
# 2 River A hardness 2001-01-01 45 45
# 3 River A mercury 2001-01-01 0.9 45
# 4 River A cadmium 2001-01-01 1.2 45
# 5 River B lead 2001-05-08 0.5 400
# 6 River B hardness 2001-05-08 1800 400
# 7 River B mercury 2001-05-08 0.6 400
# 8 River B cadmium 2001-05-08 0.8 400
请注意,您必须将函数中的print
更改为return
,否则在输出数据框之前也会获得一个打印值。
还请注意,您需要具有字符变量而不是因数,因为应用于因数的as.numeric
将为您提供与期望值不同的数字。
答案 1 :(得分:1)
在基本R中,可以使用拆分/应用/合并策略。
请注意,pmax
和pmin
的想法是@Frank的想法。
sp <- split(df, list(df$site, df$dates))
sp <- sp[sapply(sp, function(x) nrow(x) != 0)]
newdf <- lapply(sp, function(DF) {
DF$New_Hardness <- pmax(25, pmin(400, DF$value[DF$param == "hardness"]))
DF
})
rm(sp) # tidy up
newdf <- do.call(rbind, newdf)
row.names(newdf) <- NULL
newdf
# site param dates value New_Hardness
#1 River A lead 2001-01-01 0.2 45
#2 River A hardness 2001-01-01 45.0 45
#3 River A mercury 2001-01-01 0.9 45
#4 River A cadmium 2001-01-01 1.2 45
#5 River B lead 2001-05-08 0.5 400
#6 River B hardness 2001-05-08 1800.0 400
#7 River B mercury 2001-05-08 0.6 400
#8 River B cadmium 2001-05-08 0.8 400