我正在尝试将R + MonetDB用作大型数据分析堆栈,但在创建新列并使用我的分析数据填充新列时遇到问题。这是一个玩具示例:
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB.R::MonetDB(), embedded="./test.db")
# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite=TRUE)
## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
## insert data into the new column
v1 <- mtcars["mpg"] * pi
dbSendQuery(db, "INSERT INTO mtcars (v1) VALUES (?)", bind.data=v1)
错误信息:
Error in .local(conn, statement, ...) :
Unable to execute statement 'INSERT INTO mtcars (v1) VALUES ('c(65.9734457253857, 65.9734457253857, 71.6283125018473, 67.23008278...'.
Server says 'ERROR: 49.6371639267187, 61.8893752757189, 47.1238898038469, 67.2300827868216)' to type dbl failed.
' [#conversion of string 'c(65.9734457253857, 65.9734457253857, 71.6283125018473, 67.2300827868216, 58.7477826221291, 56.8628270299753, 44.924774946334, 76.6548607475909, 71.6283125018473, 60.318578948924, 55.9203492338983, 51.5221195188726, 54.3495529071034, 47.7522083345649, 32.6725635973338, 32.6725635973338, 46.18141200777, 101.787601976309, 95.5044166691297, 106.499990956694, 67.5442420521806, 48.6946861306418, 47.7522083345649, 41.7831822927443, 60.318578948924, 85.7654794430014, 81.6814089933346, 95.5044166691297,
].
In addition: Warning message:
In if (is.na(value)) statement <- sub("?", "NULL", statement, fixed = TRUE) else if (valueClass %in% :
the condition has length > 1 and only the first element will be used
从这个错误中我猜测bind.data
可能不能与MonetDBLite
一起使用?
问题:
如何将列添加到MonetDBLite表并使用来自R会话的数据填充它?
答案 0 :(得分:0)
最后一条语句中的“ INSERT”命令首先不正确。您将需要“ UPDATE”语句。
话虽如此,我提出了一个解决方案,您可以直接从R中填充MonetDBLite表:
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")
# I added a rownbr to the dataset so it will be easier later
mtcars$rownbr <- 1:nrow(mtcars)
# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite=TRUE)
## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
## insert data into the new column
v1 <- mtcars["mpg"] * pi
for (i in 1:nrow(mtcars)){
myquery <- paste0("UPDATE mtcars SET v1 = ",v1$mpg[i], "where rownbr =",i," ;")
dbSendQuery(db, myquery )
}
答案 1 :(得分:0)
通过MonetDBLite通过dbBind
进行的参数化SQL查询似乎存在问题(请参见https://github.com/hannesmuehleisen/MonetDBLite-R/issues/16)。以下代码可用于SQLite:
library(RSQLite)
data(mtcars)
db <- dbConnect(SQLite(), ":memory:")
# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite = TRUE, row.names=TRUE)
## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
## do computation with R
mtcars$v1 <- mtcars$mpg * pi
mtcars$row_names <- rownames(mtcars)
update_query <- dbSendQuery(db, 'update mtcars set "v1"=$v1 WHERE row_names=$row_names')
dbBind(update_query, mtcars[, c("v1", "row_names")]) # send the updated data
dbClearResult(update_query) # release the prepared statement
dbReadTable(db, "mtcars")
dbDisconnect(db)
但是,使用MonetDBLite,它会为dbBind步骤产生一个错误(和警告):
> dbBind(update_query, mtcars[, c("v1", "row_names")]) # send the updated data
Error in vapply(params, function(x) { : values must be length 1,
but FUN(X[[1]]) result is length 32
In addition: Warning message:
In if (is.na(x)) "NULL" else if (is.numeric(x) || is.logical(x)) { :
the condition has length > 1 and only the first element will be used
我想出的解决方法是使用glue_data_sql
包中的glue
来“手动”组成查询(无需遍历行):
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite = TRUE, row.names=TRUE)
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
library(glue)
mtcars$row_names <- rownames(mtcars)
mtcars$v1 <- mtcars$mpg * pi
update_query <- glue_data_sql(mtcars, "update mtcars set v1 = {v1} where row_names = {row_names};", .con=db)
lapply(update_query, dbSendQuery, conn=db)
# verify
dbReadTable(db, "mtcars")
dbDisconnect(db)
另一种变通方法是在一个查询中执行insert
(更接近@ Zelazny7的原始尝试):
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")
dbSendQuery(db, "CREATE TABLE mtcars (
row_names VARCHAR(32),
v1 DOUBLE);")
library(glue)
mtcars$row_names <- rownames(mtcars)
mtcars$v1 <- mtcars$mpg * pi
insert_values <- glue_data(mtcars, "('{row_names}', {v1})")
insert_values <- glue_collapse(insert_values, sep=", ", last="")
insert_query <- glue("INSERT INTO mtcars (row_names, v1) VALUES {insert_values}")
dbSendQuery(db, insert_query)
dbReadTable(db, "mtcars")
dbDisconnect(db)