我有这样的数据
id , name , age, sex
1, sam, 22, m
2, rita, 34, f
3, robin, 32, f
4, josh, 21, m
我想将数据转换为
id, name, age, m, f
1, sam, 22, 1,0
2, rita, 34, 0,1
3, robin, 32, 0,1
4, josh, 21, 1,0
我可以在R
中这样做吗?除了使用
dataset$m <- with(dataset, ifelse ((Type == "m"), 1, 0))
答案 0 :(得分:1)
你可以尝试
library(reshape2)
dataset1 <- dcast(dataset, ...~sex, value.var='sex', length)
dataset1
# id name age f m
#1 1 sam 22 0 1
#2 2 rita 34 1 0
#3 3 robin 32 1 0
#4 4 josh 21 0 1
如果&#39; id / name / age&#39;有重复的行列,我们可以创建一个序列列(&#39; ind&#39;)然后dcast
datasetN$ind <- with(datasetN, ave(seq_along(id), id, name,
age, FUN=seq_along))
subset(dcast(datasetN, ...~sex, value.var='sex', length),
select=-ind)
# id name age f m
#1 1 sam 22 0 1
#2 2 rita 34 1 0
#3 3 robin 32 1 0
#4 3 robin 32 1 0
#5 4 josh 21 0 1
使用&#39; train.csv&#39;来自here
train <- read.csv('train.csv')
train$ind <- with(train, ave(seq_along(City), City, FUN=seq_along))
res <- subset(dcast(train, ...~City, value.var='City', length),
select=-ind)
dim(res)
#[1] 137 76
dataset <- structure(list(id = 1:4, name = c("sam", "rita", "robin",
"josh"
), age = c(22L, 34L, 32L, 21L), sex = c("m", "f", "f", "m")),
.Names = c("id",
"name", "age", "sex"), row.names = c(NA, -4L), class = "data.frame")
datasetN <- structure(list(id = c(1, 2, 3, 4, 3), name = c("sam",
"rita",
"robin", "josh", "robin"), age = c(22, 34, 32, 21, 32), sex = c("m",
"f", "f", "m", "f")), .Names = c("id", "name", "age", "sex"),
row.names = c(NA, 5L), class = "data.frame")