我正在尝试在R中构建一个函数,它允许我使用Thomas Lumley的R调查包在数据框中生成命名变量的加权表。我想我遇到了错误,因为调查包中的svytable和svychisq函数没有将我自制函数中的参数识别为我想要分析的数据框中所需变量的名称。
我曾尝试利用此处给出的示例来开发解决方案: lapply with anonymous function call to svytable results in object 'x' not found
我在尝试对svychisq进行问题排查时也看过这篇文章:Error using dynamic variable specification in R survey function svychisq()
这些帖子的所有建议都没有对我有用。我非常感谢人们可以查看我的代码并提供反馈,以便我可以尝试生成一个打印的函数1.总计的加权表2.在该加权表上运行卡方分析的结果。
这是我迄今为止的数据/代码:
#data to make the code reproducible
independent <- c(2,1,1,1,1,1,2,2,1,1,2,2,2,2,1,1,1,2,2,2,2,2,2,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,2,1,2,1,1,1,1,1,1,1,2,1,2,1,2,2,2,2,1,1,1,1,2,2,1,1,2,2,1,1,1,1,2,1,2,1,1,1,1,2,1,2,1,2,2,1,1,1,2,2,1,2,2,2,1,2,1,2,2,2,2,2,1,1,2,2,1,2,1,2,2,2,1,1,2,1,1,2,1,1,1,2,1,2,1,1,2,1,2,2,2,1,1,1,1,1,1,2,2,1,1,1,2,2,1,1,2,2,2,2,1,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,2,2,1,2,2,1,1,2,2,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,1,2,2,1,2,1,1,2,1)
dependent <-c(1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,1,2,1,1,1,2,1,98,1,1,1,1,1,1,2,1,1,1,1,1,1,2,1,1,1,1,1,2,2,1,1,1,1,3,1,1,2,1,2,1,2,1,1,1,1,1,1,2,1,1,1,1,1,2,1,1,1,4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,4,1,1,1,5,1,2,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,2,1,1,5,1,2,1,1,1,2,1,1,1,1,2,1,1,1,2,1,1,1,2,1,4,3,1,3,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,2,1,1,2,1,2,2,1,2,1)'
weight <- c(2937,9389,7918,3851,8560,7433,6577,4507,5601,1358,7274,4507,5160,2429,618,4938,3734,5177,3683,3988,3534,2364,1979,5879,5131,6119,4321,4425,1302,3563,5133,10947,2183,3988,6607,5637,4507,5598,3086,6341,2553,6040,15050,809,5056,5709,2557,3734,1136,2596,7029,4507,2099,1296,4103,4030,4944,3940,4358,5598,1794,4636,6130,6119,431,4944,8361,5732,2602,6117,3719,2727,1406,6023,15050,3870,6808,3957,4507,3827,3658,7600,4244,4507,1850,4358,3770,4103,4927,4801,4012,2364,5547,4381,4103,4444,1427,5575,4656,3708,2865,4447,5709,477,2617,6232,3160,2745,3103,1604,2925,3950,1679,1341,7420,7600,1655,6307,4041,3882,5058,6412,4332,3415,7913,4330,5604,4027,305,4933,4635,1224,4464,8988,4664,1143,882,4061,6142,5763,9811,882,580,780,6973,1752,4263,3809,3440,4447,4740,443,3438,4076,3948,3438,2429,3582,4397,4283,4399,2028,758,15944,3593,4430,4159,4292,5976,3619,18834,2301,4485,942,5861,4284,2102,2102,8503,942,4747,8503,4358,4348,1794,4783,1899,373,5811,2183,5575,4702,4466,4653,3216,4553,7523,2606,2396,3216)
psu <- c(108,167,224,185,167,151,294,187,274,111,161,187,286,179,228,145,108,248,240,267,253,109,179,240,228,172,278,165,129,153,271,161,141,267,243,278,187,168,108,250,179,194,210,278,114,240,189,108,287,189,154,187,141,251,108,108,187,142,145,168,143,253,172,172,178,187,165,187,267,172,177,165,167,186,210,107,168,108,187,167,186,273,187,187,179,145,174,108,115,115,215,109,253,120,108,240,123,150,115,115,278,174,240,142,109,159,187,185,176,140,220,166,129,129,240,273,111,161,141,141,121,115,120,175,168,472,472,142,142,145,147,145,150,132,186,166,166,169,161,169,163,166,166,166,163,179,178,176,177,174,180,178,171,180,171,171,179,173,192,192,157,154,151,160,154,142,186,186,163,171,160,141,142,143,143,148,141,141,143,143,150,143,145,147,143,141,141,142,148,141,150,141,148,148,136,134,140,136,136,136)
#make the data frame for reproducible example
dat <- as.data.frame(cbind(independent,dependent,weight,psu))
dat$independent <- factor(dat$independent, levels=c("1", "2"))
dat$dependent <- factor(dat$dependent, levels = c("1", "2", "3", "4", "5", "98"))
str(dat)
#Function to print svytables + chi square statistic
library(survey) #opens the survey package in R
x <- independent
y <- dependent
cstbl <- function(x,y) {
dat_w <- svydesign(id=~psu, weights=~weight, data=dat)
tbl <- svytable(bquote(~.(as.name(y)+as.name(x))), dat_w) #should make table
chi <- svychisq(as.formula(paste("~", x, "+", y)), dat_w) #should give test stat
final <- list(tbl, chi)
return(final) #should return table + chi square test stat
}
cstbl(x,y)
这会抛出错误:“as.name(y)+ as.name(x)中的错误: 二元运算符的非数字参数“
如果我用函数的第三行替换,我会得到同样的错误
tbl <- svytable(~y+x, dat_w)
我非常感谢您的指导和耐心,因为我不熟悉编程。