Sample data
df <- data.frame(location = rep(1:1000, each = 36),
year = rep(1980:2015,times = 1000),
mu = runif(min = 36.5, max = 43.2, 1000*36),
lambda = runif(min = 4.5, max = 4.8, 1000*36))
This data consits of 1000 locations and 36 years with two variables mu and lambda
For each location X year combination, I have a function which takes a value of lambda and mu and generates a vector of size 12. An example:
library(grofit)
dat <- df[df$location == 1 & df$year == 1980,]
y <- round(gompertz(1:12,100,dat$mu,dat$lambda), digits = 2)
y
[1] 0.00 0.00 0.00 0.72 18.60 56.37 82.26 93.56 97.76 99.23
[11] 99.74 99.91
If I want to add y
as columns to dat
new.col <- 5:16
dat[new.col] <- y
dat
location year mu lambda V5 V6 V7 V8 V9 V10 V11
1 1980 39.60263 4.554095 0 0 0 0.72 18.6 56.37 82.26
V12 V13 V14 V15 V16
1 93.56 97.76 99.23 99.74 99.91
As you see, I have attached y as columns V5 till V16 in the dat
. I want to repeat this for all location and year combination in df
. I hope this is clear.
df %>% group_by(location year) %>% mutate(?? how to I add new columns for y??)
答案 0 :(得分:4)
You could use lapply()
:
library(grofit)
df2 <- do.call(rbind, lapply(1:nrow(df),
function(x) round(gompertz(1:12, 100, df[x, 3], df[x, 4]),
digits = 2)))
df3 <- cbind(df, df2)
result:
> head(df3)
location year mu lambda 1 2 3 4 5 6 7 8 9 10 11 12
1 1 1980 43.04565 4.536717 0 0 0 0.61 20.58 61.23 85.88 95.39 98.54 99.55 99.86 99.96
2 1 1981 39.00524 4.505235 0 0 0 0.96 20.02 57.28 82.45 93.53 97.71 99.20 99.72 99.90
3 1 1982 41.60206 4.619627 0 0 0 0.42 17.07 56.52 83.18 94.23 98.10 99.38 99.80 99.94
4 1 1983 42.01069 4.689058 0 0 0 0.26 14.87 54.43 82.35 93.99 98.04 99.37 99.80 99.94
5 1 1984 40.34275 4.692595 0 0 0 0.30 14.36 52.30 80.54 93.03 97.61 99.20 99.73 99.91
6 1 1985 41.13246 4.641404 0 0 0 0.38 16.20 55.15 82.32 93.84 97.94 99.32 99.78 99.93
data:
set.seed(47) # for sake of reproducibility
df <- data.frame(location = rep(1:1000, each = 36),
year = rep(1980:2015, times = 1000),
mu = runif(min = 36.5, max = 43.2, 1000 * 36),
lambda = runif(min = 4.5, max = 4.8, 1000 * 36))
答案 1 :(得分:3)
Here is a tidyverse
solution :
df <- head(df) # we'll work on a sample
library(tidyverse)
df %>%
mutate(y = map2(mu,lambda,gompertz,time= 1:12,A = 100),
y = map(y,. %>% round(2) %>% t %>% as_tibble)) %>% # we reformat the vectors as one line tibbles for smooth unnesting
unnest %>%
rename_at(5:16,~paste0("y",1:12))
# location year mu lambda y1 y2 y3 y4 y5 y6 y7 y8 y9 y10 y11 y12
# 1 1 1980 38.52133 4.793232 0 0 0 0.20 11.20 46.38 76.37 90.97 96.73 98.84 99.59 99.86
# 2 1 1981 41.05032 4.668713 0 0 0 0.32 15.29 54.04 81.74 93.61 97.86 99.29 99.77 99.92
# 3 1 1982 36.76366 4.687794 0 0 0 0.45 13.67 48.07 76.37 90.55 96.41 98.66 99.51 99.82
# 4 1 1983 42.47994 4.766380 0 0 0 0.14 12.55 51.99 81.37 93.71 97.97 99.36 99.80 99.94
# 5 1 1984 36.58161 4.510503 0 0 0 1.09 18.81 53.90 79.56 91.89 96.92 98.85 99.57 99.84
# 6 1 1985 41.77695 4.705588 0 0 0 0.23 14.29 53.52 81.81 93.75 97.95 99.34 99.79 99.93
and a base version that should run faster:
new_df <- cbind(df,round(t(mapply(gompertz, df$mu, df$lambda,MoreArgs = list(time= 1:12, A = 100))),2))
names(new_df)[5:16] <- paste0("y",1:12)
# location year mu lambda y1 y2 y3 y4 y5 y6 y7 y8 y9 y10 y11 y12
# 1 1 1980 38.52133 4.793232 0 0 0 0.20 11.20 46.38 76.37 90.97 96.73 98.84 99.59 99.86
# 2 1 1981 41.05032 4.668713 0 0 0 0.32 15.29 54.04 81.74 93.61 97.86 99.29 99.77 99.92
# 3 1 1982 36.76366 4.687794 0 0 0 0.45 13.67 48.07 76.37 90.55 96.41 98.66 99.51 99.82
# 4 1 1983 42.47994 4.766380 0 0 0 0.14 12.55 51.99 81.37 93.71 97.97 99.36 99.80 99.94
# 5 1 1984 36.58161 4.510503 0 0 0 1.09 18.81 53.90 79.56 91.89 96.92 98.85 99.57 99.84
# 6 1 1985 41.77695 4.705588 0 0 0 0.23 14.29 53.52 81.81 93.75 97.95 99.34 99.79 99.93
An alternative to mapply
that is not used so often is Vectorize
, in this case I think its use is justified as this function seems like it really should be vectorized in the first place.
gompertz2 <- Vectorize(gompertz,c("mu","lambda"))
new_df <- cbind(df,round(t(gompertz2(1:12, 100, df$mu,df$lambda)),2))
names(new_df)[5:16] <- paste0("y",1:12)
# same output
答案 2 :(得分:1)
With the data that you generated, there is no need to summarise()
with dplyr. Each record is unique. So this seems more like a place to use apply()
.
There are ways to loop through this; I just created twelve statements. We are passing the mu,lamda columns of df
to the apply function and then using your function across each 36000 rows to grab the 12 pieces of that vector into 12 new variables y1:y12.
df$y1 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[1])
df$y2 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[2])
df$y3 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[3])
df$y4 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[4])
df$y5 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[5])
df$y6 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[6])
df$y7 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[7])
df$y8 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[8])
df$y9 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[9])
df$y10 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[10])
df$y11 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[11])
df$y12 <- apply(df[,3:4], 1, function(x) round(gompertz(1:12,100,x[1],x[2]), digits = 2)[12])
head(df)
location year mu lambda y1 y2 y3 y4 y5 y6 y7 y8 y9 y10 y11 y12
1 1 1980 38.70790 4.531560 0 0 0 0.86 19.00 56.00 81.67 93.18 97.56 99.14 99.70 99.89
2 1 1981 42.64717 4.765444 0 0 0 0.14 12.60 52.22 81.56 93.81 98.01 99.37 99.80 99.94
3 1 1982 39.19041 4.527792 0 0 0 0.85 19.33 56.75 82.27 93.49 97.71 99.20 99.73 99.91
4 1 1983 37.50859 4.565435 0 0 0 0.79 17.46 53.28 79.68 92.13 97.09 98.94 99.62 99.86
5 1 1984 36.71666 4.779357 0 0 0 0.27 11.29 44.76 74.36 89.65 96.05 98.53 99.45 99.80
6 1 1985 42.11325 4.783322 0 0 0 0.13 11.99 50.91 80.66 93.39 97.85 99.31 99.78 99.93
NOTE: within dplyr
you could also do something like:
df <- df %>% rowwise() %>% mutate(y1 = round(gompertz(1:12,100,mu,lambda), digits = 2)[1],
y2 = round(gompertz(1:12,100,mu,lambda), digits = 2)[2],
y3 = round(gompertz(1:12,100,mu,lambda), digits = 2)[3],
y4 = round(gompertz(1:12,100,mu,lambda), digits = 2)[4],
y5 = round(gompertz(1:12,100,mu,lambda), digits = 2)[5])
and repeat with 6-12, and achieve same result.