我有一个庞大的数据框,最后30行在下面:
libary(data.table)
dput(p)的
structure(list(DATE = structure(c(1367516015, 1367516045, 1367516075,
1367516105, 1367516135, 1367516165, 1367516195, 1367516225, 1367516255,
1367516285, 1367516315, 1367516345, 1367516375, 1367516405, 1367516435,
1367516465, 1367516495, 1367516525, 1367516555, 1367516585, 1367516615,
1367516645, 1367516675, 1367516705, 1367516735, 1367516765, 1367516795,
1367516825, 1367516855, 1367516885), class = c("POSIXct", "POSIXt"
), tzone = ""), LPAR = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("cigp01a4a004", "cigp01b4a002",
"cigp01b4a004", "cigp04a4a002", "cigp04a4a004", "cigp04b4a002",
"cigp04b4a004"), class = "factor"), ENT = c(0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5
), USR_SYS_CPU_PCT = c(79L, 80L, 77L, 77L, 77L, 76L, 79L, 82L,
81L, 80L, 79L, 77L, 77L, 77L, 79L, 79L, 80L, 82L, 82L, 83L, 80L,
81L, 80L, 78L, 78L, 83L, 86L, 87L, 88L, 87L), ENT_PCT = c(706.8,
693.8, 570.1, 641.5, 558.5, 601.5, 674.3, 742.3, 668.9, 722.6,
679.1, 677.2, 548.5, 644.6, 689.3, 716.1, 709.5, 767.3, 753.7,
786.4, 684.2, 735.1, 688.2, 676.6, 645.6, 788, 859.5, 832.6,
883.1, 872.2), PHYSICAL_CPU_USED = c(3.53, 3.47, 2.85, 3.21,
2.79, 3.01, 3.37, 3.71, 3.34, 3.61, 3.4, 3.39, 2.74, 3.22, 3.45,
3.58, 3.55, 3.84, 3.77, 3.93, 3.42, 3.68, 3.44, 3.38, 3.23, 3.94,
4.3, 4.16, 4.42, 4.36), PROC_QUE = c(12L, 13L, 19L, 16L, 11L,
13L, 17L, 14L, 9L, 10L, 12L, 13L, 16L, 14L, 22L, 17L, 17L, 17L,
26L, 26L, 15L, 43L, 9L, 11L, 12L, 7L, 31L, 26L, 27L, 23L), RELATIVE_CORES = c(3.53,
3.47, 2.85, 3.21, 2.79, 3.01, 3.37, 3.71, 3.34, 3.61, 3.4, 3.39,
2.74, 3.22, 3.45, 3.58, 3.55, 3.84, 3.77, 3.93, 3.42, 3.68, 3.44,
3.38, 3.23, 3.94, 4.3, 4.16, 4.42, 4.36), USED_CORES = c(2.7887,
2.776, 2.1945, 2.4717, 2.1483, 2.2876, 2.6623, 3.0422, 2.7054,
2.888, 2.686, 2.6103, 2.1098, 2.4794, 2.7255, 2.8282, 2.84, 3.1488,
3.0914, 3.2619, 2.736, 2.9808, 2.752, 2.6364, 2.5194, 3.2702,
3.698, 3.6192, 3.8896, 3.7932)), .Names = c("DATE", "LPAR", "ENT",
"USR_SYS_CPU_PCT", "ENT_PCT", "PHYSICAL_CPU_USED", "PROC_QUE",
"RELATIVE_CORES", "USED_CORES"), class = "data.frame", row.names = c(NA,
-30L))
当我喜欢使用data.table计算一些值时,如下所示:
p<-data.table(p)
p<-p[,RELATIVE_PERCENT:=ifelse(ENT_PCT>100, (USED_CORES/ENT)*100, USR_SYS_CPU_PCT), by= c("DATE", "LPAR")]
我收到此错误:
Error in `[.data.table`(x, , `:=`(RELATIVE_PERCENT, ifelse(ENT_PCT > 100, :
Type of RHS ('integer') must match LHS ('double'). To check and coerce would
impact performance too much for the fastest cases. Either change the type of
the target column, or coerce the RHS of := yourself (e.g. by using 1L instead
of 1)
这个错误是什么意思?我怎样才能解决这个错误?
答案 0 :(得分:10)
问题是您的ifelse
语句对于某些值返回integer
类型,对于其他一些条目则返回numeric
(double)。并且data.table
抱怨列类型不匹配,因为它期望用户执行强制(出于性能原因,如错误中给出)。所以,只需用as.numeric
包裹它,以便所有值都转换为double。
p <- p[,RELATIVE_PERCENT := as.numeric(ifelse(ENT_PCT>100, (USED_CORES/ENT)*100,
USR_SYS_CPU_PCT)), by= c("DATE", "LPAR")]
答案 1 :(得分:3)
我这样做了:
sapply(p, class)
并注意到我的一个列是整数。然后我这样做了:
x<-x[,RELATIVE_PERCENT:=ifelse(ENT_PCT>100, ((USED_CORES/ENT)*100), as.numeric(USR_SYS_CPU_PCT)), by= c("DATE", "LPAR")]
这是金钱