我有一个包含变量nr.employed的数据集。它的数字。 我正在使用代码
对其进行规范化markting_train_dim_deleted =
"","custAge","profession","marital","schooling","default","contact","month","campaign","previous","poutcome","cons.price.idx","cons.conf.idx","euribor3m","nr.employed","pmonths","pastEmail","responded"
"1",0.486842105263158,"1","3","7","2","1","8",0,0,"2",0.389321901792677,0.368200836820084,0.806393108138744,5195.8,999,0,"1"
"2",0.342105263157895,"2","2","1","1","1","4",0,0,"2",0.669134840218243,0.338912133891213,0.980729993198821,5228.1,999,0,"1"
"3",0.315789473684211,"10","2","4","1","2","7",0,0,"2",0.698752922837102,0.602510460251046,0.95737927907504,5191,999,0,"1"
"4",0.486842105263158,"5","1","1","2","1","4",0.0256410256410256,0,"2",0.669134840218243,0.338912133891213,0.981183405123555,5228.1,999,0,"1"
"5",0.215870043275927,"1","1","7","1","1","7",0.102564102564103,0.166666666666667,"1",0.26968043647701,0.192468619246862,0.148945817274994,5099.1,999,1,"1"
"6",0.381578947368421,"2","2","1","1","2","7",0,0,"2",0.698752922837102,0.602510460251046,0.95737927907504,5191,999,0,"1"
cnames=c("custAge","campaign","previous","cons.price.idx","cons.conf.idx",
"euribor3m"," nr.employed","pmonths","pastEmail")
for(i in cnames){
print(i)
print(markting_train_dim_deleted[,i])
markting_train_dim_deleted[,i]=
(markting_train_dim_deleted[,i]-min(markting_train_dim_deleted[,i]))/
(max(markting_train_dim_deleted[,i]-min(markting_train_dim_deleted[,i])))
}
处理完euribor3m后,它正在打印nr.employed,它会抛出异常
Error in `[.data.frame`(markting_train_dim_deleted, , i) :
undefined columns selected
我看过这个结构。它是一个没有缺失值的数值数据类型。
输出
dput(head(markting_train_dim_deleted))
structure(list(custAge = c(0.486842105263158, 0.342105263157895,
0.315789473684211, 0.486842105263158, 0.215870043275927, 0.381578947368421
), profession = structure(c(1L, 2L, 10L, 5L, 1L, 2L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"), class = "factor"),
marital = structure(c(3L, 2L, 2L, 1L, 1L, 2L), .Label = c("1",
"2", "3", "4"), class = "factor"), schooling = structure(c(7L,
1L, 4L, 1L, 7L, 1L), .Label = c("1", "2", "3", "4", "5",
"6", "7", "8"), class = "factor"), default = structure(c(2L,
1L, 1L, 2L, 1L, 1L), .Label = c("1", "2", "3"), class = "factor"),
contact = structure(c(1L, 1L, 2L, 1L, 1L, 2L), .Label = c("1",
"2"), class = "factor"), month = structure(c(8L, 4L, 7L,
4L, 7L, 7L), .Label = c("1", "2", "3", "4", "5", "6", "7",
"8", "9", "10"), class = "factor"), campaign = c(0, 0, 0,
0.0256410256410256, 0.102564102564103, 0), previous = c(0,
0, 0, 0, 0.166666666666667, 0), poutcome = structure(c(2L,
2L, 2L, 2L, 1L, 2L), .Label = c("1", "2", "3"), class = "factor"),
cons.price.idx = c(0.389321901792677, 0.669134840218243,
0.698752922837102, 0.669134840218243, 0.26968043647701, 0.698752922837102
), cons.conf.idx = c(0.368200836820084, 0.338912133891213,
0.602510460251046, 0.338912133891213, 0.192468619246862,
0.602510460251046), euribor3m = c(0.806393108138744, 0.980729993198821,
0.95737927907504, 0.981183405123555, 0.148945817274994, 0.95737927907504
), nr.employed = c(5195.8, 5228.1, 5191, 5228.1, 5099.1,
5191), pmonths = c(999, 999, 999, 999, 999, 999), pastEmail = c(0L,
0L, 0L, 0L, 1L, 0L), responded = structure(c(1L, 1L, 1L,
1L, 1L, 1L), .Label = c("1", "2"), class = "factor")), .Names = c("custAge",
"profession", "marital", "schooling", "default", "contact", "month",
"campaign", "previous", "poutcome", "cons.price.idx", "cons.conf.idx",
"euribor3m", "nr.employed", "pmonths", "pastEmail", "responded"
), row.names = c(NA, 6L), class = "data.frame")
答案 0 :(得分:2)
错误只是在" nr.employed"
中使用"nr.employed"
(带空格)而不是cnames
。
此外,像
markting_train_dim_deleted[, cnames] <- sapply(markting_train_dim_deleted[, cnames],
function(x) (x - min(x)) / (max(x) - min(x)))
会使标准化更容易阅读。