使用匿名函数调用对象中的svytable结果' x'未找到

时间:2014-07-29 02:43:08

标签: r

我有一个调查数据集,我正在为其创建列联表。数据框中的每一列都是一个问题,一般而言,问题倾向于组合在一起。所以为了让生活更轻松,我一直在使用lapply循环遍历各个部分并使用以下代码返回列联表:

> out <- lapply(dat[,162:170], function(x) round(prop.table(table(x,dat$seg_2),2),3)*100)
> out
$r3a_1

x               1    2
  Don't Know  1.9  1.4
  No         14.2  4.9
  Yes        83.9 93.7

$r3a_2

x               1    2
  Don't Know  2.7  1.7
  No         14.8  6.6
  Yes        82.4 91.6

etc...

正如您所看到的,我正在循环列162:170并创建一个支持表,显示第1组和第2组之间的不同响应。

但是,我想对这些数据进行加权。因此,我使用调查包创建一个名为dat_weight的简单加权调查设计对象,并使用svytable()而不是table()。我可以手动在单个列上运行更新的代码:

> round(prop.table(svytable(~dat[,162] + dat$seg_2, dat_weight),2),3)*100 
            dat$seg_2
dat[, 162]      1    2
  Don't Know  2.5  2.7
  No         16.5  5.4
  Yes        80.9 91.9

但是,当我尝试使用lapply时,它不起作用:

> out <- lapply(dat[,162:170], function(x) round(prop.table(svytable(~x + dat$seg_2, dat_weight),2),3)*100)

Error in eval(expr, envir, enclos) : object 'x' not found 

显然,匿名函数调用和svytable不能很好地协同工作。我尝试过创建一个也不起作用的for循环。我猜这与范围有关,但我不知道如何修复它。

当然,必须有一种方法来遍历此调查的块,并避免为每列创建唯一的代码行。任何帮助将不胜感激。

编辑以添加一些示例数据:

> library("survey")
> dat <- structure(list(r3a_1 = structure(c(3L, 2L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Don't Know", 
"No", "Yes"), class = "factor"), r3a_2 = structure(c(3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L), .Label = c("Don't Know", "No", "Yes"), class = "factor"), 
    r3a_3 = structure(c(3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L), .Label = c("Don't Know", 
    "No", "Yes"), class = "factor"), r3a_4 = structure(c(3L, 
    2L, 2L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 
    3L, 3L, 3L, 1L), .Label = c("Don't Know", "No", "Yes"), class = "factor"), 
    r3a_5 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 
    2L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 1L), .Label = c("Don't Know", 
    "No", "Yes"), class = "factor"), r3a_6 = structure(c(3L, 
    3L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 
    2L, 3L, 3L, 3L), .Label = c("Don't Know", "No", "Yes"), class = "factor"), 
    r3a_7 = structure(c(1L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 
    3L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("Don't Know", 
    "No", "Yes"), class = "factor"), r3a_8 = structure(c(3L, 
    2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 
    2L, 2L, 3L, 3L), .Label = c("Don't Know", "No", "Yes"), class = "factor"), 
    r3a_9 = structure(c(1L, 3L, 2L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 
    3L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 3L), .Label = c("Don't Know", 
    "No", "Yes"), class = "factor"), weight = c(0.34, 0.34, 0.34, 
    0.34, 0.34, 0.34, 0.34, 0.34, 0.34, 0.34, 0.34, 0.34, 0.43, 
    0.43, 0.43, 0.34, 0.34, 0.34, 0.34, 0.34), seg_2 = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), .Label = c("1", "2"), class = "factor")), .Names = c("r3a_1", 
"r3a_2", "r3a_3", "r3a_4", "r3a_5", "r3a_6", "r3a_7", "r3a_8", 
"r3a_9", "weight", "seg_2"), row.names = c(NA, 20L), class = "data.frame")

> dat_weight <- svydesign(ids = ~1, weights = ~weight, data = dat)

从那里你可以得到加权和未加权的表格:

round(prop.table(table(dat[,1],dat$seg_2),2),3)*100  #unweighted

round(prop.table(svytable(~dat[,1] + dat$seg_2, dat_weight),2),3)*100   #weighted

然而,这有效:

lapply(dat[,1:9], function(x) round(prop.table(table(x,dat$seg_2),2),3)*100)

虽然这不是:

lapply(dat[,1:9], function(x) round(prop.table(svytable(~x + dat$seg_2, dat_weight),2),3)*100)

1 个答案:

答案 0 :(得分:2)

好吧,似乎svytable函数很挑剔,只会在设计对象中查找数据。它似乎没有在封闭环境中寻找x。因此,另一种方法是动态构建公式。因此,我们不是自己传递数据列,而是从data.frame传入列的名称。然后我们将它们插入到公式中,然后通过指向原始data.frame的设计对象来解析它们。以下是使用示例数据的一些工作代码

lapply(names(dat)[1:9], function(x) round(prop.table(
    svytable(bquote(~.(as.name(x)) + seg_2), dat_weight),
2),3)*100)

所以我们在这里使用bquote来构建公式。 .()允许我们插入表达式,在这里我们获取x中的字符值并将其转换为正确的名称对象。因此从"r3a_9"r3a_9