我有一个数据帧'd3'(其dput如下):
structure(list(x1 = c(12.800454545, 17.71, 5.805, 13.111875, 14.121428571, 12.800454545, 17.71, 5.805, 13.111875, 14.121428571),
x2 = c(281.61, 230.23, 11.61, 209.79, 296.55, 281.61, 230.23, 11.61, 209.79, 296.55),
x3 = c(19.41, 13.91, 0, 2.37, 23.49, 19.41, 13.91, 0, 2.37, 23.49),
x4 = c(65L, 62L, 3L, 41L, 45L, 65L, 62L, 3L, 41L, 45L),
x5 = c(0.571428571, 1.857142857, 21.14285714, 2.571428571, 1.428571429, 0.571428571, 1.857142857, 21.14285714, 2.571428571, 1.428571429),
x6 = c(52L, 40L, 3L, 22L, 33L, 52L, 40L, 3L, 22L, 33L),
x7 = c(44.53, 15.38, 5.97, 4.97, 13.94, 44.53, 15.38, 5.97, 4.97, 13.94),
x8 = c(65L, 53L, 3L, 41L, 45L, 65L, 53L, 3L, 41L, 45L),
x9 = c(6L, 4L, 1L, 1L, 1L, 6L, 4L, 1L, 1L, 1L),
x10 = c(46.43, 17.52, 0, 11.73, 0, 46.43, 17.52, 0, 11.73, 0)),
row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), class = "data.frame")
我想将列添加到df_dummy(这是d3的副本),以使总共应该有10 +(5 * 10)= 60个总列。
我尝试如下:
library(Hmisc)
df_dummy <- d3
for (i in 1:length(d3)){
aa <- NULL
bb <- NULL
aa <- as.integer(cut2(d3[,i], g=5))
# Create dummy variables
bb <- model.matrix(~ aa + 0, data=d3)
colnames(bb) <- gsub("aa", paste0(names(d3[i]),"_D",i), colnames(bb)) #clean column names
bb <- as.data.frame(bb) # convert matrix to dataframe
# add dummy columns to the original static dataset
df_dummy <- cbind(df_dummy, bb)
#dim(df_static_dummy)
rm(aa)
rm(bb)
}
它返回了df_dummy,其中包含以下列:
x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x1_D1 x2_D2 x3_D3 x4_D4 x5_D5 x6_D6 x7_D7 x8_D8 x9_D9 x10_D10
而不是像我解释的那样:
x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x1_D1, x1_D2, x1_D3.... x1_D10, x2_D1, x2_D2, x2_D3.... x2_D10... so forth
答案 0 :(得分:0)
尝试此...如果尚未安装软件包,则...
library(fastDummies)
library(Hmisc)
fdf <- d3
for (i in 1:ncol(d3)){
#d3$rank <- as.integer(cut2(d3[,1], g=5))
d3$rank <- as.integer(cut2(d3[,i], g=5))
results <- dummy_cols(d3, select_columns = "rank")
d3$rank <- NULL
results <- results[12:ncol(results)]
names(results) <- paste0("x",i,"_",names(results))
fdf <- cbind(fdf, results)
rm(results)
}
根据您的数据,我生成了以下列:
names(fdf)
[1] "x1" "x2" "x3" "x4" "x5" "x6" "x7" "x8" "x9"
[10] "x10" "x1_rank_2" "x1_rank_5" "x1_rank_1" "x1_rank_3" "x1_rank_4" "x2_rank_4" "x2_rank_3" "x2_rank_1"
[19] "x2_rank_2" "x2_rank_5" "x3_rank_4" "x3_rank_3" "x3_rank_1" "x3_rank_2" "x3_rank_5" "x4_rank_5" "x4_rank_4"
[28] "x4_rank_1" "x4_rank_2" "x4_rank_3" "x5_rank_1" "x5_rank_3" "x5_rank_5" "x5_rank_4" "x5_rank_2" "x6_rank_5"
[37] "x6_rank_4" "x6_rank_1" "x6_rank_2" "x6_rank_3" "x7_rank_5" "x7_rank_4" "x7_rank_2" "x7_rank_1" "x7_rank_3"
[46] "x8_rank_5" "x8_rank_4" "x8_rank_1" "x8_rank_2" "x8_rank_3" "x9_rank_3" "x9_rank_2" "x9_rank_1" "x10_rank_4"
[55] "x10_rank_3" "x10_rank_1" "x10_rank_2"