什么是最有效的traspose方式
> dt <- data.table( x = c(1, 1, 3, 1, 3, 1, 1), y = c(1, 2, 1, 2, 2, 1, 1) )
> dt
x y
1: 1 1
2: 1 2
3: 3 1
4: 1 2
5: 3 2
6: 1 1
7: 1 1
成:
> output
cn v1 v2 v3 v4 v5 v6 v7
1: x 1 1 3 1 3 1 1
2: y 1 2 1 2 2 1 1
dcast.data.table应该是高效的,但我无法弄清楚它到底要做什么
答案 0 :(得分:7)
data.table::transpose
:
data.table(cn = names(dt), transpose(dt))
# cn V1 V2 V3 V4 V5 V6 V7
#1: x 1 1 3 1 3 1 1
#2: y 1 2 1 2 2 1 1
如果您真的关心效率,可能更好:
tdt <- transpose(dt)[, cn := names(dt)]
setcolorder(tdt, c(ncol(tdt), 1:(ncol(tdt) - 1)))
tdt
# cn V1 V2 V3 V4 V5 V6 V7
#1: x 1 1 3 1 3 1 1
#2: y 1 2 1 2 2 1 1
transpose
似乎比t
(调用do_transpose
)快一点,但不是很大。我猜这两种实现都非常接近非就地转置算法的效率上限。
Dt <- data.table(
x = rep(c(1, 1, 3, 1, 3, 1, 1), 10e2),
y = rep(c(1, 2, 1, 2, 2, 1, 1), 10e2))
all.equal(data.table(t(Dt)), data.table(transpose(Dt)))
#[1] TRUE
microbenchmark::microbenchmark(
"base::t" = data.table(t(Dt)),
"data.table::transpose" = data.table(transpose(Dt))
)
#Unit: milliseconds
# expr min lq mean median uq max neval
#base::t 415.4200 434.5308 481.4373 458.1619 507.9556 747.2022 100
#data.table::transpose 409.5624 425.8409 474.9709 444.5194 510.3750 685.0543 100
答案 1 :(得分:-1)
temp_table
temp_table_data_types <- sapply (temp_table, class)
temp_table_schema <- NULL
for (x in 1:(length(temp_table_data_types))) {
temp_table_schema <- base::rbind(temp_table_schema, data.table(ROWID = (x)
, COLUMN_NAME = names(temp_table_data_types[x])
, DATA_TYPE = temp_table_data_types[[x]][[1]]
, DETAILS = if(length(as.list(temp_table_data_types[[x]]))> 1) {as.list(temp_table_data_types[[x]])[[2]]} else {""}
))
}
temp_table_schema
rm(list = c("temp_table_data_types"))