很高兴看到Charco Hui将Thomas Lumley的实验性sqlsurvey包复活为svydb。可能是处理R中大型调查数据集的好工具。
尽管svydb在计算标准错误时遇到了一些问题。此问题与我在sqlsurvey中遇到的问题非常相似。在运行Mojave,R版本3.5.1和MonetDB 2018年8月-SP1版本的Mac Pro上进行定期调查时,类似的程序可以正常工作
该软件包对于健康研究人员和流行病学家而言可能是一个非常重要的工具,因此希望我做的事情显然是错误的,并且很容易解决。
下面是一个基于BRFSS数据集的随机抽取的可复制示例。
# create data
dat<-data.frame(var1=c(6, 5, 6, 6, 6, 6, 3, 3, 2, 6, 3, 4, 6, 6, 3, 6, 5, 5, 4, 5, 4, 4, 6, 6, 4, 3, 4, 4, 3, 5, 5, 3, 6, 1, 6, 3, 5, 3, 4, 2, 5, 2, 6, 5, 6, 2, 2, 4, 3, 6, 6, 6, 3, 6, 2, 3, 4, 1, 4, 4, 2, 6, 5, 4, 6, 2, 6, 2, 6, 6, 6, 5, 4, 2, 5, 3, 4, 4, 5, 1, 2, 6, 6, 6, 2, 6, 4, 4, 2, 3, 6, 3, 3, 6, 2, 6, 5, 4, 6, 2),
var2=c(NA, NA, 88, 88, 88, NA, 1, 88, 88, NA, NA, 2, NA, NA, 88, NA, NA, 88, NA, 88, 88, 88, 88, NA, 88, 88, 88, NA, 88, NA, NA, 88, NA, NA, NA, 88, NA, NA, 4, NA, 88, NA, NA, 8, NA, NA, NA, 88, 88, NA, NA, NA, NA, NA, 88, NA, 88, 88, NA, 88, NA, 88, NA, 11, NA, 1, NA, 4, 88, 7, NA, 88, NA, NA, 88, 88, NA, 88, NA, 88, 1, 88, 88, NA, 88, NA, NA, 88, 29, 88, 88, NA, 88, NA, 88, NA, 88, NA, NA, 88),
strat=c(25011, 12032, 19101, 22011, 24011, 53311, 8011, 25061, 8012, 35031, 32022, 18022, 34101, 19061, 5011, 24011, 34101, 24011, 19021, 31031, 37111, 5012, 25011, 53201, 15052, 22011, 37261, 27011, 30031, 44031, 26011, 42011, 37121, 25041, 39082, 24032, 38011, 15012, 24012, 35062, 16071, 42042, 27011, 45062, 50011, 25041, 56012, 25041, 9022, 55031, 55021, 19091, 35022, 28011, 34101, 40021, 9021, 23031, 34041, 78021, 2021, 30021, 1011, 9012, 40032, 18042, 20011, 49041, 24011, 53111, 16012, 20011, 22011, 25042, 49122, 53352, 53091, 9021, 19071, 29021, 18021, 37131, 46041, 8011, 42031, 47121, 46051, 17011, 42021, 72061, 34101, 25062, 35062, 37061, 55062, 46031, 45041, 28011, 37211, 12021),
psu=c(2006092024, 2006018204, 2006024879, 2006009844, 2006054713, 2006074840, 2006014252, 2006022459, 2006030518, 2006033744, 2006004188, 2006029456, 2006028186, 2006021413, 2006049813, 2006007831, 2006166137, 2006030867, 2006027180, 2006027225, 2006029188, 2006004573, 2006000994, 2006064527, 2006011204, 2006007744, 2006016792, 2006020891, 2006039237, 2006007268, 2006019976, 2006065941, 2006074498, 2006008616, 2006018805, 2006046669, 2006018600, 2006013675, 2006075607, 2006008605, 2006020846, 2006022428, 2006033687, 2006020260, 2006035555, 2006009957, 2006006278, 2006103150, 2006048853, 2006015663, 2006001191, 2006023333, 2006002411, 2006035682, 2006041137, 2006011947, 2006042893, 2006025836, 2006099337, 2006036076, 2006016300, 2006028942, 2006013850, 2006064497, 2006026877, 2006020508, 2006020272, 2006023852, 2006012831, 2006051597, 2006033700, 2006044908, 2006003444, 2006072406, 2006021862, 2006081285, 2006133751, 2006019507, 2006031458, 2006019504, 2006002030, 2006075571, 2006020267, 2006040619, 2006125145, 2006008496, 2006051043, 2006031048, 2006106217, 2006004721, 2006148449, 2006017795, 2006008827, 2006003243, 2006009762, 2006044530, 2006029068, 2006019002, 2006013326, 2006015464),
wt= c(131.167091, 1135.222016, 462.911082, 237.434588, 222.090249, 8.867523, 367.057462, 635.047502, 734.512583, 168.266313, 237.236438, 794.324159, 896.016179, 453.886381, 516.676601, 222.090249, 370.880284, 266.860031, 178.231943, 127.053275, 286.216069, 212.409224, 196.986648, 52.223519, 13.850239, 176.044166, 290.372567, 517.028007, 79.796161, 174.701770, 829.008988, 2514.492945, 177.300719, 190.845451, 270.146986, 261.739684, 45.144276, 276.447180, 1704.022745, 102.473540, 168.798958, 42.614573, 321.460974, 139.006541, 22.110006, 50.144119, 60.941799, 42.225301, 704.172192, 1094.072745, 423.243864, 424.056478, 256.796474, 222.921805, 940.041214, 191.560779, 280.303505, 959.310457, 401.479694, 27.638152, 84.411858, 111.955653, 661.796967, 72.884007, 118.783933, 1905.071998, 214.932923, 539.094181, 415.230283, 91.327212, 217.747861, 366.950758, 378.620855, 150.430028, 594.726292, 189.966155, 74.510093, 280.303505, 474.297391, 5389.787617, 753.892676, 502.921582, 59.307656, 444.877512, 51.007590, 446.469588, 32.298617, 2518.461812, 76.319144, 819.303078, 396.857545, 476.354501, 97.705354, 168.923737, 1179.296136, 34.909445, 275.003257, 410.288302, 147.335207, 723.221948))
# analysis in survey works
library(survey)
options(survey.lonely.psu="remove")
svyDat1<-
svydesign(
id=~psu,
strata=~strat,
data=dat,
weights=~wt,
)
svymean(~var1, svyDat1, se=T, na.rm=T)
# mean SE
# numVar 79.286 0.3439
svymean(~var2, svyDat1, se=T, na.rm=T)
# mean SE
# var2 70.171 1.0314
# analysis in svydb does not work
library(devtools)
install_github("chrk623/svydb")
library(svydb)
svyDat2 = svydbdesign(st="strat", wt="wt", id="psu", data=dat)
svydbmean(x = var1, design = svyDat2 , num = T)
# Mean SE
# var1 3.6063 NA
svydbmean(x = var2, design = svyDat2 , num = T)
# Mean SE
# var2 70.171 NA
# try with database connection returns more informative (?) error message
require(MonetDBLite)
require(DBI)
require(dbplyr)
con = dbConnect(MonetDBLite())
dbWriteTable(con, "dat", dat)
dat.db = tbl(con, "dat")
svyDat3 = svydbdesign(st = strat , wt = wt,id = psu , data = dat.db)
svydbmean(x = var1, design = svyDat3 , num = T)
# Error in .local(conn, statement, ...) :
# Unable to execute statement 'CREATE TEMPORARY TABLE "auugiyqyip" AS SELECT "sum(scaled)"
# FROM (SELECT SUM("scaled") AS "sum(scale...'.
# Server says 'MALException:batcalc./:22012!division by zero. '.
# In addition: There were 12 warnings (use warnings() to see them)
svydbmean(x = var2, design = svyDat3 , num = T)
# Error in .local(conn, statement, ...) :
# Unable to execute statement 'CREATE TEMPORARY TABLE "yxqvidpdbx" AS SELECT "sum(scaled)"
# FROM (SELECT SUM("scaled") AS "sum(scale...'.
# Server says 'MALException:batcalc./:22012!division by zero. '.
答案 0 :(得分:1)
如该程序的作者Charco Hui所述,svydb已更新为允许svydb.lonely.psu =“ remove”作为选项。这样就解决了问题。