我正在尝试逐行运行data.frame上的函数。数据集太大而无法包含,但这里有一瞥:
patientid SBP Smoker DM TotChol HDL HTN Race age gender
1 13 132 0 0 130 45 0 0 56.59954 1
2 21 150 0 1 154 52 1 1 54.19395 0
3 36 154 0 1 141 41 1 1 66.52546 0
4 45 126 0 1 134 48 1 0 67.00228 0
5 49 134 0 1 169 43 1 1 57.70091 1
6 50 136 0 0 168 35 0 0 49.18025 1
这是我的功能:
ASCVD_calc <- function(ASCVD_10, gender_col = "gender", race_col = "Race", HTN_col = "HTN", age_col = "age", totchol_col = "TotChol",
hdl_col = "HDL", sbp_col = "SBP", smoke_col = "Smoker", dm_col = "DM") {
##Coefficients are different for different gender, race or treatment status
if ((ASCVD_10[, gender_col] == 0) & (ASCVD_10[, race_col] == 0) & (ASCVD_10[,htn_col] == 0) & (ASCVD_10[,age_col] >= 40) & (ASCVD_10[,age_col] < 80)) {
param <- c(-29.799, 4.884, 13.540, -3.114, -13.578, 3.149, 1.957, 0, 7.574, -1.665, 0.661, -29.18, 0.9665)
}
if ((ASCVD_10[, gender_col] == 0) & (ASCVD_10[, race_col] == 0) & (ASCVD_10[, htn_col] == 1) & (ASCVD_10[,age_col] >= 40) & (ASCVD_10[,age_col] < 80)) {
param <- c(-29.799, 4.884, 13.540, -3.114, -13.578, 3.149, 2.019, 0, 7.574, -1.665, 0.661, -29.18, 0.9665)
}
if ((ASCVD_10[, gender_col] == 0) & (ASCVD_10[, race_col] == 1) & (ASCVD_10[, htn_col] == 0) & (ASCVD_10[,age_col] >= 40) & (ASCVD_10[,age_col] < 80)) {
param <- c(17.114, 0, 0.94, 0, -18.920, 4.475, 27.820, -6.087, 0.691, 0, 0.874, 86.61, 0.9533)
}
if ((ASCVD_10[, gender_col] == 0) & (ASCVD_10[, race_col] == 1) & (ASCVD_10[, htn_col] == 1) & (ASCVD_10[,age_col] >= 40) & (ASCVD_10[,age_col] < 80)) {
param <- c(17.114, 0, 0.94, 0, -18.920, 4.475, 29.291, -6.432, 0.691, 0, 0.874, 86.61, 0.9533)
}
if ((ASCVD_10[, gender_col] == 1) & (ASCVD_10[, race_col] == 0) & (ASCVD_10[, htn_col] == 0) & (ASCVD_10[,age_col] >= 40) & (ASCVD_10[,age_col] < 80)) {
param <- c(12.344, 0, 11.853, -2.664, -7.990, 1.769, 1.764, 0, 7.837, -1.795, 0.658, 61.18, 0.9144)
}
if ((ASCVD_10[, gender_col] == 1) & (ASCVD_10[, race_col] == 0) & (ASCVD_10[, htn_col] == 1) & (ASCVD_10[,age_col] >= 40) & (ASCVD_10[,age_col] < 80)) {
param <- c(12.344, 0, 11.853, -2.664, -7.990, 1.769, 1.797, 0, 7.837, -1.795, 0.658, 61.18, 0.9144)
}
if ((ASCVD_10[, gender_col] == 1) & (ASCVD_10[, race_col] == 1) & (ASCVD_10[, htn_col] == 0) & (ASCVD_10[,age_col] >= 40) & (ASCVD_10[,age_col] < 80)) {
param <- c(2.469, 0, 0.302, 0, -0.307, 0, 1.809, 0, 0.549, 0, 0.645, 19.54, 0.8954)
}
if ((ASCVD_10[, gender_col] == 1) & (ASCVD_10[, race_col] == 1) & (ASCVD_10[, htn_col] == 1) & (ASCVD_10[,age_col] >= 40) & (ASCVD_10[,age_col] < 80)) {
param <- c(2.469, 0, 0.302, 0, -0.307, 0, 1.916, 0, 0.549, 0, 0.645, 19.54, 0.8954)
}
##Transformed Risk Score Predictors
pred_trans <- c(log(ASCVD_10[, age_col]), (log(ASCVD_10[, age_col])^2), log(ASCVD_10[, totchol_col]),
log(ASCVD_10[, age_col])*log(ASCVD_10[, totchol_col]), log(ASCVD_10[, hdl_col]), log(ASCVD_10[, age_col])*log(ASCVD_10[, hdl_col]),
log(ASCVD_10[, sbp_col]), log(ASCVD_10[, age_col])*log(ASCVD_10[, sbp_col]), ASCVD_10[, smoke_col], log(ASCVD_10[, age])*ASCVD[, smoke_col],
ASCVD_10[, dm_col])
##sum of the transformed variables multiplied by corresponding coefficients
pred_sum <- sum(param[[1:11]]*pred_trans)
##Return ASCVD 10 Year Risk
a <- exp(pred_sum - param[[12]])
ASCVD_10$Risk10 <- paste(round((1 - (param[[13]]^a))*100,1),"%")
return(ASCVD_10)
};
ASCVD_10$Risk10 <- apply(ASCVD_10, 1, ASCVD_calc);
但我不断收到此错误消息:Error in ASCVD_10[, gender_col] : incorrect number of dimensions
我似乎无法找到一种方法来传递它。
我已经尝试过的事情:
提前感谢您提供的任何澄清。
答案 0 :(得分:1)
使用apply()
时,值将作为原子命名向量传递给您的函数;而不是data.frame。 apply()
旨在与矩阵而不是data.frames
一起使用,因此假设所有列/行都具有相同的data.type。由于您是通过
ASCVD_10[, gender_col]
在你的函数中,你得到一个错误,因为ASCVD_10
是一个向量,而不是data.frame。它是一个命名向量,所以你可以做
ASCVD_10[gender_col]
这种策略似乎难以维持或扩展。好像你想为不同的子集执行不同的转换。也许你试图从之前拟合的回归模型预测?您可能最好不要分割数据,进行转换,然后重新组合,而不是进行apply
。这将有助于描述您真正想要做的事情,而不仅仅是展示您是如何尝试这样做的。