Question

我有一个包含变量nr.employed的数据集。它的数字。我正在使用代码

对其进行规范化

markting_train_dim_deleted =

"","custAge","profession","marital","schooling","default","contact","month","campaign","previous","poutcome","cons.price.idx","cons.conf.idx","euribor3m","nr.employed","pmonths","pastEmail","responded"
    "1",0.486842105263158,"1","3","7","2","1","8",0,0,"2",0.389321901792677,0.368200836820084,0.806393108138744,5195.8,999,0,"1"
    "2",0.342105263157895,"2","2","1","1","1","4",0,0,"2",0.669134840218243,0.338912133891213,0.980729993198821,5228.1,999,0,"1"
    "3",0.315789473684211,"10","2","4","1","2","7",0,0,"2",0.698752922837102,0.602510460251046,0.95737927907504,5191,999,0,"1"
    "4",0.486842105263158,"5","1","1","2","1","4",0.0256410256410256,0,"2",0.669134840218243,0.338912133891213,0.981183405123555,5228.1,999,0,"1"
    "5",0.215870043275927,"1","1","7","1","1","7",0.102564102564103,0.166666666666667,"1",0.26968043647701,0.192468619246862,0.148945817274994,5099.1,999,1,"1"
    "6",0.381578947368421,"2","2","1","1","2","7",0,0,"2",0.698752922837102,0.602510460251046,0.95737927907504,5191,999,0,"1"

cnames=c("custAge","campaign","previous","cons.price.idx","cons.conf.idx",
         "euribor3m"," nr.employed","pmonths","pastEmail")
for(i in cnames){
  print(i)
  print(markting_train_dim_deleted[,i])

  markting_train_dim_deleted[,i]=
    (markting_train_dim_deleted[,i]-min(markting_train_dim_deleted[,i]))/
                                    (max(markting_train_dim_deleted[,i]-min(markting_train_dim_deleted[,i])))

}

处理完euribor3m后，它正在打印nr.employed，它会抛出异常

Error in `[.data.frame`(markting_train_dim_deleted, , i) : 
  undefined columns selected

我看过这个结构。它是一个没有缺失值的数值数据类型。

输出

dput(head(markting_train_dim_deleted))

structure(list(custAge = c(0.486842105263158, 0.342105263157895, 
0.315789473684211, 0.486842105263158, 0.215870043275927, 0.381578947368421
), profession = structure(c(1L, 2L, 10L, 5L, 1L, 2L), .Label = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"), class = "factor"), 
    marital = structure(c(3L, 2L, 2L, 1L, 1L, 2L), .Label = c("1", 
    "2", "3", "4"), class = "factor"), schooling = structure(c(7L, 
    1L, 4L, 1L, 7L, 1L), .Label = c("1", "2", "3", "4", "5", 
    "6", "7", "8"), class = "factor"), default = structure(c(2L, 
    1L, 1L, 2L, 1L, 1L), .Label = c("1", "2", "3"), class = "factor"), 
    contact = structure(c(1L, 1L, 2L, 1L, 1L, 2L), .Label = c("1", 
    "2"), class = "factor"), month = structure(c(8L, 4L, 7L, 
    4L, 7L, 7L), .Label = c("1", "2", "3", "4", "5", "6", "7", 
    "8", "9", "10"), class = "factor"), campaign = c(0, 0, 0, 
    0.0256410256410256, 0.102564102564103, 0), previous = c(0, 
    0, 0, 0, 0.166666666666667, 0), poutcome = structure(c(2L, 
    2L, 2L, 2L, 1L, 2L), .Label = c("1", "2", "3"), class = "factor"), 
    cons.price.idx = c(0.389321901792677, 0.669134840218243, 
    0.698752922837102, 0.669134840218243, 0.26968043647701, 0.698752922837102
    ), cons.conf.idx = c(0.368200836820084, 0.338912133891213, 
    0.602510460251046, 0.338912133891213, 0.192468619246862, 
    0.602510460251046), euribor3m = c(0.806393108138744, 0.980729993198821, 
    0.95737927907504, 0.981183405123555, 0.148945817274994, 0.95737927907504
    ), nr.employed = c(5195.8, 5228.1, 5191, 5228.1, 5099.1, 
    5191), pmonths = c(999, 999, 999, 999, 999, 999), pastEmail = c(0L, 
    0L, 0L, 0L, 1L, 0L), responded = structure(c(1L, 1L, 1L, 
    1L, 1L, 1L), .Label = c("1", "2"), class = "factor")), .Names = c("custAge", 
"profession", "marital", "schooling", "default", "contact", "month", 
"campaign", "previous", "poutcome", "cons.price.idx", "cons.conf.idx", 
"euribor3m", "nr.employed", "pmonths", "pastEmail", "responded"
), row.names = c(NA, 6L), class = "data.frame")

Answer 1

错误只是在" nr.employed"中使用"nr.employed"（带空格）而不是cnames。

此外，像

markting_train_dim_deleted[, cnames] <- sapply(markting_train_dim_deleted[, cnames], 
                                               function(x) (x - min(x)) / (max(x) - min(x)))

会使标准化更容易阅读。

尝试规范化数据，但在R中选择了未定义的列错误

1 个答案: