我正在运行以下形式的逻辑回归:
model_1 <- lrm(dependent_variable ~ var1 + var2 + var3, data = merged_dataset, na.action="na.delete")
我想做的是将上一季度的平均因变量作为自变量。
我的数据如下:
> print(head(merged_dataset[,c("dependent_variable", "qy")]))
dependent_variable qy
1: 0 2008 Q1
2: 0 2008 Q1
3: 0 2008 Q1
4: 0 2008 Q1
5: 0 2008 Q1
6: 0 2008 Q1
其中因变量的取值为0或1,而qy变量取的值为诸如2008 Q1、2008 Q2,...,2017 Q4。
编辑:
我运行以下代码:
# function that gets the name of previous quarter
PrevQ = function(quarter = "2018 Q1"){
ifelse(grepl("Q1", quarter),
paste0(as.numeric(substr(quarter, 1,4))-1, " Q4"),
paste0(substr(quarter, 1,6), as.numeric(substr(quarter,7,7))-1)
)
}
# means in current quarters
merged_dataset$dep_means = with(merged_dataset, ave(dependent_variable, qy, FUN = mean))
# get names of previous quarters
merged_dataset$prev_qy = PrevQ(merged_dataset$qy)
# merge mean of previous quarter by name of the quarter
merged_dataset$var4 = with(merged_dataset, dep_means[match(prev_qy, qy)])
print(head(merged_dataset[,c("dependent_variable", "qy")]))
print(head(cbind(merged_dataset$var4,merged_dataset$qy)))
这给了我
> print(head(merged_dataset[,c("dependent_variable", "qy")]))
dependent_variable qy
1: 0 2008 Q1
2: 0 2008 Q1
3: 0 2008 Q1
4: 0 2008 Q1
5: 0 2008 Q1
6: 0 2008 Q1
>
>
> print(head(cbind(merged_dataset$var4,merged_dataset$qy)))
[,1] [,2]
[1,] NA 2008
[2,] NA 2008
[3,] NA 2008
[4,] NA 2008
[5,] NA 2008
[6,] NA 2008
似乎正在改变qy变量,仅保留年份?我得到了所有NA。
> print(head(cbind(merged_dataset$var4,merged_dataset$qy,merged_dataset$dep_means)))
[,1] [,2] [,3]
[1,] NA 2008 0.1292719
[2,] NA 2008 0.1292719
[3,] NA 2008 0.1292719
[4,] NA 2008 0.1292719
[5,] NA 2008 0.1292719
[6,] NA 2008 0.1292719
> print(tail(cbind(merged_dataset$var4,merged_dataset$qy,merged_dataset$dep_means)))
[,1] [,2] [,3]
[32008,] NA 2017.75 0.09802372
[32009,] NA 2017.75 0.09802372
[32010,] NA 2017.75 0.09802372
[32011,] NA 2017.75 0.09802372
[32012,] NA 2017.75 0.09802372
[32013,] NA 2017.75 0.09802372
答案 0 :(得分:2)
# function that gets the name of previous quarter
PrevQ = function(quarter = "2018 Q1"){
ifelse(grepl("Q1", quarter),
paste0(as.numeric(substr(quarter, 1,4))-1, " Q4"),
paste0(substr(quarter, 1,6), as.numeric(substr(quarter,7,7))-1)
)
}
# sample data
set.seed(5551)
merged_dataset = data.frame(dependent_variable = sample.int(2,6, rep = TRUE)-1,
qy = rep(c("2017 Q4", "2018 Q1"), each = 3))
# means in current quarters
merged_dataset$dep_means = with(merged_dataset, ave(dependent_variable, qy, FUN = mean))
# get names of previous quarters
merged_dataset$prev_qy = PrevQ(merged_dataset$qy)
# merge mean of previous quarter by name of the quarter
merged_dataset$var4 = with(merged_dataset, dep_means[match(prev_qy, qy)])
merged_dataset
# dependent_variable qy dep_means prev_qy var4
# 1 0 2017 Q4 0.33333333 2017 Q3 NA
# 2 1 2017 Q4 0.33333333 2017 Q3 NA
# 3 0 2017 Q4 0.33333333 2017 Q3 NA
# 4 0 2018 Q1 0.00000000 2017 Q4 0.33333333
# 5 0 2018 Q1 0.00000000 2017 Q4 0.33333333
# 6 0 2018 Q1 0.00000000 2017 Q4 0.33333333