在R中创建函数时出错

时间:2016-07-09 20:58:06

标签: r

创建此功能时,我希望将用户filedatacat_name的输入作为参数传递,但问题是这些输入存储在filedatacat_name中不要传递函数内部。我无法理解为什么会这样。我该怎么办?

代码:

filename = file.choose()
filedata = read.csv(filename, stringsAsFactors = F)

cat_name = readline(prompt="Enter the categorical name to create its dummy variables: ")

function(filedata, cat_name)
{
  data_cat=filedata[,cat_name]

  if(class(data_cat)=="character")
  {
    freq=sort(table(data_cat))
    freq=freq[-1]
    for( i in names(freq))
    {
      colName = paste(cat_name,i,sep="_")
      filedata[,colName] = ifelse(data_cat==freq[i],1,0)

    }
    filedata[,cat_name]=NULL

    print("Successfully created dummy variables...")
  } else
  {

    print("Please enter a categorical variable with character as its datatype")
  }
  return(filedata)
}

2 个答案:

答案 0 :(得分:1)

您的代码存在两个问题。

很多指出的主要问题是虚拟创建功能没有名称

要解决这个问题,只需将声明分配给变量即可。在这种情况下,我选择了make_dummies

此帖子的主要问题是使用freq[i]为您提供计数,而不是要匹配的字符串。

更正

创建一些要测试的数据:

# Make some data
n = 10

set.seed(112)
d = data.frame(id = 1:n,
               cat_var = sample(letters,n, replace = T),
               num_var = runif(n),
               stringsAsFactors = F
               )

tempcsv = tempfile()

write.csv(d, file=tempcsv, row.names = F)

数据样本:

 id cat_var   num_var
  1       j 0.2359040
  2       x 0.3471606
  3       z 0.9049400
  4       z 0.6322996
  5       g 0.6743289
  6       c 0.9700548
  7       b 0.5604765
  8       s 0.5553125
  9       d 0.7432414
 10       k 0.3701336

虚拟变量代码:

# Read that data in

filename = tempcsv # file.choose()
filedata = read.csv(filename, stringsAsFactors = F)

cat_name = "cat_var" #readline(prompt="Enter the categorical name to create its dummy variables: ")

make_dummies = function(filedata,cat_name)
{
  data_cat=filedata[,cat_name]

  if(class(data_cat)=="character")
  {
    freq=sort(table(data_cat))
    freq=freq[-1]
    for( i in names(freq))
    {
      colName = paste(cat_name,i,sep="_")
      filedata[,colName] = ifelse(data_cat==i,1,0) # Note the change here
    }
    filedata[,cat_name]=NULL

    print("Successfully created dummy variables...")
  }else
  {

    print("Please enter a categorical variable with character as its datatype")
  }
  return(filedata)
}

示例电话:

(filedata = make_dummies(filedata, cat_name))

输出:

   id   num_var cat_var_c cat_var_d cat_var_g cat_var_j cat_var_k cat_var_s cat_var_x cat_var_z
1   1 0.2359040         0         0         0         1         0         0         0         0
2   2 0.3471606         0         0         0         0         0         0         1         0
3   3 0.9049400         0         0         0         0         0         0         0         1
4   4 0.6322996         0         0         0         0         0         0         0         1
5   5 0.6743289         0         0         1         0         0         0         0         0
6   6 0.9700548         1         0         0         0         0         0         0         0
7   7 0.5604765         0         0         0         0         0         0         0         0
8   8 0.5553125         0         0         0         0         0         1         0         0
9   9 0.7432414         0         1         0         0         0         0         0         0
10 10 0.3701336         0         0         0         0         1         0         0         0

未来使用

此外,我强烈建议您使用 R 中的内置model.matrix()函数,并使用适当的转换factor代替string类型的数据。< / p>

例如:

model.matrix(~ cat_var - 1, filedata)

输出:

  cat_vara cat_varg cat_varm cat_varo cat_vart cat_varu cat_varw cat_varz
1         1        0        0        0        0        0        0        0
2         1        0        0        0        0        0        0        0
3         0        0        0        0        0        0        1        0
4         0        0        1        0        0        0        0        0
5         0        1        0        0        0        0        0        0
6         0        0        0        0        1        0        0        0
7         0        0        0        0        1        0        0        0
8         0        0        0        0        0        1        0        0
9         0        0        0        0        0        0        0        1
10        0        0        0        1        0        0        0        0

答案 1 :(得分:0)

我注释掉的代码行仅被注释掉,因为它们阻止它成为可重现的示例。相反,我加载了一个内置数据集并使用它(而不是我的文件系统中的未知文件)。

#filename = file.choose()
#filedata = read.csv(filename, stringsAsFactors = F)

data("mtcars")
mtcars$cn <- row.names(mtcars)
filedata <- mtcars
#cat_name = readline(prompt="Enter the categorical name to create its dummy variables: ")
cat_name <- "cn"

colnames(filedata)
 [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear" "carb" "cn"  


f<-
function(filedata, cat_name)
{
  data_cat=filedata[,cat_name]

  if(class(data_cat)=="character")
  {
    freq=sort(table(data_cat))
    freq=freq[-1]
    for( i in names(freq))
    {
      colName = paste(cat_name,i,sep="_")
      filedata[,colName] = ifelse(data_cat==freq[i],1,0)

    }
    filedata[,cat_name]=NULL

    print("Successfully created dummy variables...")
  } else
  {

    print("Please enter a categorical variable with character as its datatype")
  }
  return(filedata)
}

filedata <- f(filedata,cat_name)
colnames(filedata)


 [1] "mpg"                    "cyl"                    "disp"                   "hp"                     "drat"                   "wt"                     "qsec"                  
 [8] "vs"                     "am"                     "gear"                   "carb"                   "cn_Cadillac Fleetwood"  "cn_Camaro Z28"          "cn_Chrysler Imperial"  
[15] "cn_Datsun 710"          "cn_Dodge Challenger"    "cn_Duster 360"          "cn_Ferrari Dino"        "cn_Fiat 128"            "cn_Fiat X1-9"           "cn_Ford Pantera L"     
[22] "cn_Honda Civic"         "cn_Hornet 4 Drive"      "cn_Hornet Sportabout"   "cn_Lincoln Continental" "cn_Lotus Europa"        "cn_Maserati Bora"       "cn_Mazda RX4"          
[29] "cn_Mazda RX4 Wag"       "cn_Merc 230"            "cn_Merc 240D"           "cn_Merc 280"            "cn_Merc 280C"           "cn_Merc 450SE"          "cn_Merc 450SL"         
[36] "cn_Merc 450SLC"         "cn_Pontiac Firebird"    "cn_Porsche 914-2"       "cn_Toyota Corolla"      "cn_Toyota Corona"       "cn_Valiant"             "cn_Volvo 142E"     
顺便说一下,很棒的功能。我可能会用它。