从CSV和SQL导入数据时出现问题

时间:2013-06-13 16:25:37

标签: r csv dataframe

所以我有一个使用的脚本:

read.csv("data.csv") -> y 

但我想通过使用:

来分配“y”
sqlQuery(ch, "select * from table") -> y

sqlQuery返回/提供.csv文件中的相同数据/列。我遇到的问题是当我将源代码从csv更改为sql时,我的其余脚本无效(我收到了很多错误)。我可以使用表值填充y,但我的其余脚本似乎没有设置为正确使用它。所以我的问题是:使用sqlQuery从SQL导入后,我可以像.csv一样格式化它,这样我就可以避免错误,例如“$ operator对原子向量无效”和“错误代表”(1,nrow( xxxx)):无效的'次'参数“?

感谢你指点我的任何帮助或指示。


> dput(head(from_sql))
structure(list(school_year = structure(c(1L, 1L, 1L, 1L, 1L, 
1L), .Label = "2010-2011", class = "factor"), student_number = c(689322L, 
698048L, 698048L, 720067L, 720067L, 725021L), teacher_dpsid = c(100192400L, 
100192400L, 100192400L, 100192400L, 100192400L, 100192400L), 
    school_number = c(459L, 459L, 459L, 459L, 459L, 459L), test_category = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L), .Label = "C", class = "factor"), 
    test_name = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "CNAME", class = "factor"), 
    measure_name = structure(c(3L, 2L, 3L, 2L, 3L, 2L), .Label = c("M", 
    "R", "W"), class = "factor"), course_code = c(1356L, 
    1356L, 1356L, 1356L, 1356L, 1356L), course_name = c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), grade_name = c(10L, 9L, 9L, 9L, 9L, 9L), test_date = c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), batteryx = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = ".", class = "factor"), 
    rs = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = ".", class = "factor"), 
    ss = c(452L, 638L, 529L, 599L, 484L, 594L), proficiency = c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), semesterx = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = ".", class = "factor"), 
    NP = c(24L, 97L, 96L, 77L, 91L, 80L), schoolyear = c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), assessment = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "xxxx", class = "factor")), .Names = c("school_year", 
"student_number", "teacher_dpsid", "school_number", "test_category", 
"test_name", "measure_name", "course_code", "course_name", "grade_name", 
"test_date", "batteryx", "rs", "ss", "proficiency", "semesterx", 
"NP", "schoolyear", "assessment"), row.names = c(NA, 6L), class = "data.frame")

> dput(head(from_csv))
structure(list(school_year = structure(c(1L, 1L, 1L, 1L, 1L, 
1L), .Label = "2010-2011", class = "factor"), student_number = c(3466L, 
3466L, 3466L, 3466L, 3466L, 3466L), teacher_dpsid = c(150L, 150L, 
201L, 201L, 201L, 201L), school_number = c(2L, 2L, 2L, 2L, 2L, 
2L), test_category = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "C", class = "factor"), 
    test_name = structure(c(1L, 1L, 2L, 2L, 3L, 3L), .Label = c("CMh", 
    "CR", "CW"), class = "factor"), measure_name = structure(c(1L, 
    1L, 2L, 2L, 3L, 3L), .Label = c("M", "R", "W"
    ), class = "factor"), course_code = c(4330L, 4331L, 1400L, 
    1405L, 1400L, 1405L), course_name = c(NA, NA, NA, NA, NA, 
    NA), grade_name = c(10L, 10L, 10L, 10L, 10L, 10L), test_date = c(NA, 
    NA, NA, NA, NA, NA), batteryx = structure(c(1L, 1L, 1L, 1L, 
    1L, 1L), .Label = ".", class = "factor"), rs = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L), .Label = ".", class = "factor"), ss = c(541L, 
    705L, 742L, 706L, 439L, 535L), proficiency = c(NA, NA, NA, 
    NA, NA, NA), semesterx = structure(c(1L, 1L, 1L, 1L, 1L, 
    1L), .Label = ".", class = "factor"), np = c(59L, 56L, 3L, 
    18L, 39L, 69L), schoolyear = c(NA, NA, NA, NA, NA, NA), assessment = structure(c(2L, 
    2L, 4L, 4L, 6L, 6L), .Label = c("CM2010-2011", "CM2011-2012", 
    "CR2010-2011", "CR2011-2012", "CW2010-2011", 
    "CW2011-2012"), class = "factor")), .Names = c("school_year", 
"student_number", "teacher_dpsid", "school_number", "test_category", 
"test_name", "measure_name", "course_code", "course_name", "grade_name", 
"test_date", "batteryx", "rs", "ss", "proficiency", "semesterx", 
"np", "schoolyear", "assessment"), row.names = c(NA, 6L), class = "data.frame")

0 个答案:

没有答案