**你好每一个,我是R语言的新手,我正在尝试将数据从分类转换为数字,我读了Database.csv,我做了所有的操作,但我在结果中遇到错误,代码没有运行, 错误“错误在r-arguments-imply-different-number-of-rows-4 7”
注意:我的数据库只包含一行,我试过其他数据库,包括多行和所有操作。
这是我的数据库,在我的代码下面。 https://my.pcloud.com/publink/show?code=XZbbjNZoNfJRmssXnSj8tQq6qjOyJayKwyX
是的,请帮助我。 **rm(list=ls(all=TRUE))
setwd("C:/Users/maher/Desktop/R")
# Load required libraries
library(vegan)
library(dummies)
library(dummy)
library(e1071)
attr = c('id', 'age', 'exp', 'inc', 'zip', 'family',
'ccavg', 'edu', 'mortgage', 'loan',
'securities', 'cd', 'online', 'cc')
# Read the data using csv file
data = read.csv(file = "One.csv",
header = TRUE, col.names = attr)
# Removing the id, zip and experience.
drop_Attr = c("id", "zip", "exp")
attr = setdiff(attr, drop_Attr)
data = data[, attr]
rm(drop_Attr)
# Convert attribute to appropriate type
cat_Attr = c("family", "edu", "securities",
"cd", "online", "cc", "loan")
num_Attr = setdiff(attr, cat_Attr)
cat_Data <- data.frame(sapply(data[,cat_Attr], as.factor))
num_Data <- data.frame(sapply(data[,num_Attr], as.numeric))
data = cbind(num_Data, cat_Data)
# Do the summary statistics and check for missing values and outliers.
summary(data)
#------------------------------------------------------
ind_Num_Attr = num_Attr
rm(num_Attr)
ind_Cat_Attr = setdiff(cat_Attr, "loan")
rm(cat_Attr)
# Standardizing the numeric data
cla_Data = decostand(data[,ind_Num_Attr], "range")
rm(ind_Num_Attr)
# Convert all categorical attributes to numeric
# 1. Using dummy function, convert education and family categorical**strong text** attributes into numeric attributes
edu = dummy(data.frame(data$edu))
family = dummy(data.frame(data$family))
cla_Data = cbind(cla_Data, edu, family)
ind_Cat_Attr = setdiff(ind_Cat_Attr, c("edu", "family"))
rm(edu, family)
# 2. Using as.numeric function, convert remaining categorical attributes into numeric attributes
cla_Data = cbind(cla_Data, sapply(data[,ind_Cat_Attr], as.numeric))
ind_Attr = names(cla_Data)
cla_Data = cbind(cla_Data, loan=data[,"loan"])
str(cla_Data)
cla_Data <- data.frame(sapply(cla_Data,as.numeric))
summary(cla_Data)
write.csv(cla_Data,"OneAfter.csv")
答案 0 :(得分:0)
以下是一些修订后的代码。
- 在sep = ";"
read.csv
- 我在colClasses
上使用了read.csv
来正确设置初始类型。我更喜欢这个清楚正确的类型。这消除了一些转换
- 我删除了所有rm
语句,因为它使代码难以阅读并且通常不是必需的
- 您的剩余转化次数看起来是正确的。
- 注意:我添加了数据[2,]&lt; - data [1,]以添加第二行测试数据。我相信你的一些sapply没有正常工作,因为只有一行被视为向量。删除此文件以获取完整文件
# Load required libraries
library(vegan)
library(dummies)
library(dummy)
library(e1071)
attr = c('id', 'age', 'exp', 'inc', 'zip', 'family',
'ccavg', 'edu', 'mortgage', 'loan',
'securities', 'cd', 'online', 'cc')
classes = c("NULL", "numeric", "NULL", "numeric", "NULL", "factor",
"numeric", "factor", "numeric", "factor",
"factor", "factor", "factor", "factor")
# Read the data using csv file
data <- read.csv(file = "~/Downloads/one.csv", sep = ";",
col.names = attr, colClasses = classes)
data[2,] <- data[1,]
num_Attr <- names(data)[sapply(data[1,], class)== "numeric"]
cat_Attr <- names(data)[sapply(data[1,], class)== "factor"]
# Do the summary statistics and check for missing values and outliers.
summary(data)
#------------------------------------------------------
ind_Num_Attr = num_Attr
ind_Cat_Attr = setdiff(cat_Attr, "loan")
# Standardizing the numeric data
cla_Data = decostand(data[,ind_Num_Attr], "range")
# Convert all categorical attributes to numeric
# 1. Using dummy function, convert education and family categorical**strong text** attributes into numeric attributes
edu = dummy(data.frame(data$edu))
family = dummy(data.frame(data$family))
cla_Data = cbind(cla_Data, edu, family)
ind_Cat_Attr = setdiff(ind_Cat_Attr, c("edu", "family"))
# 2. Using as.numeric function, convert remaining categorical attributes into numeric attributes
cla_Data = cbind(cla_Data, sapply(data[,ind_Cat_Attr], as.numeric))
ind_Attr = names(cla_Data)
cla_Data = cbind(cla_Data, loan=data[,"loan"])
str(cla_Data)
cla_Data <- data.frame(sapply(cla_Data,as.numeric))
summary(cla_Data)
write.csv(cla_Data,"OneAfter.csv")