Question

我刚开始使用RSQLite分析使用R的非常大的调查数据集和Thomas Lumley的survey包。我收到了之前在Stack Overflow和R帮助存档上被问过的错误消息，但解决方案不适用于我的数据（一个解决方案是原始海报使用POSIX数据类型，但我的数据没有那个）。我不认为这是survey包的问题，而是我认为我在创建数据库/表时出错了。有一点可能会有所帮助，当我使用我在下面提出的数据中的样本时，我不会在SELECT查询中出错，但是当我使用完整数据集做同样的事情时，我确实得到了同样的错误。以下是我的数据示例和一些可重现的代码：

test=structure(list(household = c(0, 0, 0, 0, 0), NUMADULT = c(2L, 
1L, 2L, 1L, 1L), CHILDREN = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_), SEX = c(1L, 2L, 1L, 2L, 2L), X_STATE = c(36L, 5L, 
53L, 41L, 10L), X_FINALWT = c(665.97647582, 53.293518032, 72.60538811, 
61.223634396, 5.5921160216), AGE = c(30L, 65L, 9L, 49L, 48L), 
    X_INCOMG = structure(c(6L, 6L, 6L, 6L, 6L), .Label = c("1", 
    "2", "3", "4", "5", "9"), class = "factor"), X_MAM502Y = structure(c(NA, 
    1L, NA, NA, NA), .Label = c("1", "2", "9"), class = "factor"), 
    HLTHPLAN = structure(c(2L, 1L, 1L, 1L, 1L), .Label = c("1", 
    "2"), class = "factor"), MEDCOST = structure(c(1L, 2L, 2L, 
    2L, 2L), .Label = c("1", "2"), class = "factor"), QLACTLM2 = c(2L, 
    2L, 2L, 2L, 2L), CTYCODE = structure(c(30L, 53L, 33L, 26L, 
    1L), .Label = c("1", "3", "5", "6", "7", "9", "10", "11", 
    "13", "14", "15", "17", "19", "20", "21", "23", "25", "27", 
    "28", "29", "30", "31", "33", "35", "37", "39", "41", "43", 
    "45", "47", "49", "51", "53", "55", "57", "59", "61", "63", 
    "65", "67", "69", "71", "73", "75", "77", "79", "81", "83", 
    "85", "86", "87", "89", "91", "93", "95", "97", "99", "101", 
    "103", "105", "107", "109", "111", "113", "115", "117", "119", 
    "121", "123", "125", "127", "129", "131", "133", "135", "137", 
    "139", "141", "143", "145", "147", "149", "151", "153", "155", 
    "157", "159", "161", "163", "165", "167", "169", "171", "173", 
    "175", "177", "179", "181", "183", "185", "187", "189", "191", 
    "193", "195", "197", "199", "201", "205", "209", "215", "227", 
    "235", "245", "297", "303", "309", "339", "355", "439", "453", 
    "491", "510", "550", "590", "650", "700", "710", "740", "760", 
    "770", "777", "800", "810", "999", "203", "207", "217", "221", 
    "223", "275", "277", "295", "313", "381", "423", "680", "12", 
    "54", "186", "211", "213", "219", "225", "229", "231", "233", 
    "237", "239", "241", "247", "249", "251", "253", "255", "257", 
    "259", "261", "265", "267", "271", "273", "279", "281", "285", 
    "287", "289", "291", "293", "299", "305", "311", "321", "323", 
    "325", "329", "331", "337", "341", "343", "347", "349", "351", 
    "353", "361", "363", "365", "367", "371", "373", "375", "387", 
    "395", "397", "401", "407", "409", "415", "419", "427", "441", 
    "449", "451", "455", "457", "459", "463", "465", "467", "469", 
    "471", "473", "477", "479", "481", "485", "487", "489", "493", 
    "497", "499", "503", "520", "540", "570", "600", "630", "660", 
    "670", "683", "690", "730", "750", "775", "820", "830", "840", 
    "790"), class = "factor"), X_RACEGR2 = structure(c(1L, 1L, 
    NA, 1L, NA), .Label = c("1", "2", "3", "4", "5"), class = "factor"), 
    PERSDOC2 = structure(c(3L, 1L, 1L, 1L, 1L), .Label = c("1", 
    "2", "3"), class = "factor"), POORHLTH = c(0, NA, NA, 0, 
    0), X_EDUCAG = structure(c(3L, 2L, 4L, 4L, 4L), .Label = c("1", 
    "2", "3", "4"), class = "factor"), X_PSU = c(2004006698L, 
    2004014294L, 2004100796L, 2004024220L, 2004005537L), X_STSTR = c(36011L, 
    5012L, 53271L, 41012L, 10011L), X_RFMAM2Y = structure(c(NA, 
    1L, NA, 1L, 1L), .Label = c("1", "2", "9"), class = "factor"), 
    X_RFSMOK3 = structure(c(2L, 1L, 1L, 2L, 1L), .Label = c("1", 
    "2"), class = "factor"), X_RFHLTH = structure(c(1L, 1L, 1L, 
    1L, 1L), .Label = c("1", "2", "3"), class = "factor"), YEAR = c(2004, 
    2004, 2004, 2004, 2004), bcccp = structure(c(2L, 2L, 2L, 
    2L, 1L), .Label = c("0", "1"), class = "factor"), pov.limit = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_), cutoff = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_), elig = c(NA, NA, 
    NA, NA, NA), bcccp_elig = c(NA, NA, NA, NA, NA)), .Names = c("household", 
"NUMADULT", "CHILDREN", "SEX", "X_STATE", "X_FINALWT", "AGE", 
"X_INCOMG", "X_MAM502Y", "HLTHPLAN", "MEDCOST", "QLACTLM2", "CTYCODE", 
"X_RACEGR2", "PERSDOC2", "POORHLTH", "X_EDUCAG", "X_PSU", "X_STSTR", 
"X_RFMAM2Y", "X_RFSMOK3", "X_RFHLTH", "YEAR", "bcccp", "pov.limit", 
"cutoff", "elig", "bcccp_elig"), row.names = c(NA, 5L), class = "data.frame")

library(survey)
library(sqldf)
library(RSQLite)

drv=dbDriver('SQLite')
con=dbConnect(drv,'brfsagg.db')
dbWriteTable(con,'brfs0210',test)
dbListFields(con,'brfs0210') #This function works
sqldf("select SEX from brfs0210") #This works with my sample data but I get the same error message when I use the full data set.

dbExistsTable(con,'test') #This proves that the table exists

brfsvy=svydesign(id=~X_PSU, strata=~X_STSTR, weights=~X_FINALWT,nest=TRUE,
        data='test',dbtype='SQLite',dbname=system.file('brfsagg.db',package='survey')) #This always generates the error message, regardless of whether I am using the test sample data or my full data set.

Answer 1

您尝试编写的has already been written here附带blog post here的代码。你为什么要重新发明轮子？谷歌搜索r brfss或import brfss into r可以帮助您访问这些帖子。

您是否有理由想从头开始重新编写所有内容？有很多example syntax using SQLite with the survey package here ..如何解决这个特定问题。：）

library(survey)
library(RSQLite)

db.filename <- 'brfsagg.db'

con <- dbConnect(SQLite(),db.filename)

dbWriteTable( con , 'test' , test )

brfsvy <- 
    svydesign(
        id = ~X_PSU , 
        strata = ~X_STSTR , 
        weights = ~X_FINALWT , 
        nest = TRUE ,
        data = 'test' ,
        dbtype = 'SQLite' ,
        dbname = db.filename
    )

svymean( ~ SEX , brfsvy )

options( 'survey.lonely.psu' = 'adjust' )

svymean( ~ SEX , brfsvy )

svymean( ~ factor( SEX ) , brfsvy )

RSQLite RS-DBI驱动程序:(语句错误：没有这样的表：test）

1 个答案: