我刚开始使用RSQLite分析使用R的非常大的调查数据集和Thomas Lumley的survey
包。我收到了之前在Stack Overflow和R帮助存档上被问过的错误消息,但解决方案不适用于我的数据(一个解决方案是原始海报使用POSIX数据类型,但我的数据没有那个)。我不认为这是survey
包的问题,而是我认为我在创建数据库/表时出错了。有一点可能会有所帮助,当我使用我在下面提出的数据中的样本时,我不会在SELECT查询中出错,但是当我使用完整数据集做同样的事情时,我确实得到了同样的错误。以下是我的数据示例和一些可重现的代码:
test=structure(list(household = c(0, 0, 0, 0, 0), NUMADULT = c(2L,
1L, 2L, 1L, 1L), CHILDREN = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), SEX = c(1L, 2L, 1L, 2L, 2L), X_STATE = c(36L, 5L,
53L, 41L, 10L), X_FINALWT = c(665.97647582, 53.293518032, 72.60538811,
61.223634396, 5.5921160216), AGE = c(30L, 65L, 9L, 49L, 48L),
X_INCOMG = structure(c(6L, 6L, 6L, 6L, 6L), .Label = c("1",
"2", "3", "4", "5", "9"), class = "factor"), X_MAM502Y = structure(c(NA,
1L, NA, NA, NA), .Label = c("1", "2", "9"), class = "factor"),
HLTHPLAN = structure(c(2L, 1L, 1L, 1L, 1L), .Label = c("1",
"2"), class = "factor"), MEDCOST = structure(c(1L, 2L, 2L,
2L, 2L), .Label = c("1", "2"), class = "factor"), QLACTLM2 = c(2L,
2L, 2L, 2L, 2L), CTYCODE = structure(c(30L, 53L, 33L, 26L,
1L), .Label = c("1", "3", "5", "6", "7", "9", "10", "11",
"13", "14", "15", "17", "19", "20", "21", "23", "25", "27",
"28", "29", "30", "31", "33", "35", "37", "39", "41", "43",
"45", "47", "49", "51", "53", "55", "57", "59", "61", "63",
"65", "67", "69", "71", "73", "75", "77", "79", "81", "83",
"85", "86", "87", "89", "91", "93", "95", "97", "99", "101",
"103", "105", "107", "109", "111", "113", "115", "117", "119",
"121", "123", "125", "127", "129", "131", "133", "135", "137",
"139", "141", "143", "145", "147", "149", "151", "153", "155",
"157", "159", "161", "163", "165", "167", "169", "171", "173",
"175", "177", "179", "181", "183", "185", "187", "189", "191",
"193", "195", "197", "199", "201", "205", "209", "215", "227",
"235", "245", "297", "303", "309", "339", "355", "439", "453",
"491", "510", "550", "590", "650", "700", "710", "740", "760",
"770", "777", "800", "810", "999", "203", "207", "217", "221",
"223", "275", "277", "295", "313", "381", "423", "680", "12",
"54", "186", "211", "213", "219", "225", "229", "231", "233",
"237", "239", "241", "247", "249", "251", "253", "255", "257",
"259", "261", "265", "267", "271", "273", "279", "281", "285",
"287", "289", "291", "293", "299", "305", "311", "321", "323",
"325", "329", "331", "337", "341", "343", "347", "349", "351",
"353", "361", "363", "365", "367", "371", "373", "375", "387",
"395", "397", "401", "407", "409", "415", "419", "427", "441",
"449", "451", "455", "457", "459", "463", "465", "467", "469",
"471", "473", "477", "479", "481", "485", "487", "489", "493",
"497", "499", "503", "520", "540", "570", "600", "630", "660",
"670", "683", "690", "730", "750", "775", "820", "830", "840",
"790"), class = "factor"), X_RACEGR2 = structure(c(1L, 1L,
NA, 1L, NA), .Label = c("1", "2", "3", "4", "5"), class = "factor"),
PERSDOC2 = structure(c(3L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3"), class = "factor"), POORHLTH = c(0, NA, NA, 0,
0), X_EDUCAG = structure(c(3L, 2L, 4L, 4L, 4L), .Label = c("1",
"2", "3", "4"), class = "factor"), X_PSU = c(2004006698L,
2004014294L, 2004100796L, 2004024220L, 2004005537L), X_STSTR = c(36011L,
5012L, 53271L, 41012L, 10011L), X_RFMAM2Y = structure(c(NA,
1L, NA, 1L, 1L), .Label = c("1", "2", "9"), class = "factor"),
X_RFSMOK3 = structure(c(2L, 1L, 1L, 2L, 1L), .Label = c("1",
"2"), class = "factor"), X_RFHLTH = structure(c(1L, 1L, 1L,
1L, 1L), .Label = c("1", "2", "3"), class = "factor"), YEAR = c(2004,
2004, 2004, 2004, 2004), bcccp = structure(c(2L, 2L, 2L,
2L, 1L), .Label = c("0", "1"), class = "factor"), pov.limit = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), cutoff = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), elig = c(NA, NA,
NA, NA, NA), bcccp_elig = c(NA, NA, NA, NA, NA)), .Names = c("household",
"NUMADULT", "CHILDREN", "SEX", "X_STATE", "X_FINALWT", "AGE",
"X_INCOMG", "X_MAM502Y", "HLTHPLAN", "MEDCOST", "QLACTLM2", "CTYCODE",
"X_RACEGR2", "PERSDOC2", "POORHLTH", "X_EDUCAG", "X_PSU", "X_STSTR",
"X_RFMAM2Y", "X_RFSMOK3", "X_RFHLTH", "YEAR", "bcccp", "pov.limit",
"cutoff", "elig", "bcccp_elig"), row.names = c(NA, 5L), class = "data.frame")
library(survey)
library(sqldf)
library(RSQLite)
drv=dbDriver('SQLite')
con=dbConnect(drv,'brfsagg.db')
dbWriteTable(con,'brfs0210',test)
dbListFields(con,'brfs0210') #This function works
sqldf("select SEX from brfs0210") #This works with my sample data but I get the same error message when I use the full data set.
dbExistsTable(con,'test') #This proves that the table exists
brfsvy=svydesign(id=~X_PSU, strata=~X_STSTR, weights=~X_FINALWT,nest=TRUE,
data='test',dbtype='SQLite',dbname=system.file('brfsagg.db',package='survey')) #This always generates the error message, regardless of whether I am using the test sample data or my full data set.
答案 0 :(得分:1)
您尝试编写的has already been written here附带blog post here的代码。你为什么要重新发明轮子?谷歌搜索r brfss
或import brfss into r
可以帮助您访问这些帖子。
您是否有理由想从头开始重新编写所有内容?有很多example syntax using SQLite
with the survey
package here ..如何解决这个特定问题。 :)
library(survey)
library(RSQLite)
db.filename <- 'brfsagg.db'
con <- dbConnect(SQLite(),db.filename)
dbWriteTable( con , 'test' , test )
brfsvy <-
svydesign(
id = ~X_PSU ,
strata = ~X_STSTR ,
weights = ~X_FINALWT ,
nest = TRUE ,
data = 'test' ,
dbtype = 'SQLite' ,
dbname = db.filename
)
svymean( ~ SEX , brfsvy )
options( 'survey.lonely.psu' = 'adjust' )
svymean( ~ SEX , brfsvy )
svymean( ~ factor( SEX ) , brfsvy )