此数据框
df <- structure(list(mpg = c(15.2, 10.4, 13.3, 14.7, 22.8, 15.5, 14.3,
19.7, 32.4, 27.3, 15.8, 30.4, 21.4, 18.7, 10.4, 30.4, 15, 21,
21, 22.8, 24.4, 19.2, 17.8, 16.4, 17.3, 15.2, 19.2, 26, 33.9,
21.5, 18.1, 21.4), cyl = c(8, 8, 8, 8, 4, 8, 8, 6, 4, 4, 8, 4,
6, 8, 8, 4, 8, 6, 6, 4, 4, 6, 6, 8, 8, 8, 8, 4, 4, 4, 6, 4),
disp = c(304, 472, 350, 440, 108, 318, 360, 145, 78.7, 79,
351, 75.7, 258, 360, 460, 95.1, 301, 160, 160, 140.8, 146.7,
167.6, 167.6, 275.8, 275.8, 275.8, 400, 120.3, 71.1, 120.1,
225, 121), hp = c(150, 205, 245, 230, 93, 150, 245, 175,
66, 66, 264, 52, 110, 175, 215, 113, 335, 110, 110, 95, 62,
123, 123, 180, 180, 180, 175, 91, 65, 97, 105, 109), drat = c(3.15,
2.93, 3.73, 3.23, 3.85, 2.76, 3.21, 3.62, 4.08, 4.08, 4.22,
4.93, 3.08, 3.15, 3, 3.77, 3.54, 3.9, 3.9, 3.92, 3.69, 3.92,
3.92, 3.07, 3.07, 3.07, 3.08, 4.43, 4.22, 3.7, 2.76, 4.11
), wt = c(3.435, 5.25, 3.84, 5.345, 2.32, 3.52, 3.57, 2.77,
2.2, 1.935, 3.17, 1.615, 3.215, 3.44, 5.424, 1.513, 3.57,
2.62, 2.875, 3.15, 3.19, 3.44, 3.44, 4.07, 3.73, 3.78, 3.845,
2.14, 1.835, 2.465, 3.46, 2.78), qsec = c(17.3, 17.98, 15.41,
17.42, 18.61, 16.87, 15.84, 15.5, 19.47, 18.9, 14.5, 18.52,
19.44, 17.02, 17.82, 16.9, 14.6, 16.46, 17.02, 22.9, 20,
18.3, 18.9, 17.4, 17.6, 18, 17.05, 16.7, 19.9, 20.01, 20.22,
18.6), vs = c(0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,
1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1), am = c(0,
0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1), gear = c(3, 3, 3, 3,
4, 3, 3, 5, 4, 4, 5, 4, 3, 3, 3, 5, 5, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 5, 4, 3, 3, 4), carb = c(2, 4, 4, 4, 1, 2, 4,
6, 1, 1, 4, 2, 1, 2, 4, 2, 8, 4, 4, 2, 2, 4, 4, 3, 3, 3,
2, 2, 1, 1, 1, 2)), .Names = c("mpg", "cyl", "disp", "hp",
"drat", "wt", "qsec", "vs", "am", "gear", "carb"), row.names = c("AMC Javelin:2.1.2.2.2",
"Cadillac Fleetwood:1.2.1.2.1", "Camaro Z28:1.2.2.1.2.2", "Chrysler Imperial:1.2.1.1",
"Datsun 710:2.2.2.2.1.2.2.2.1", "Dodge Challenger:2.1.2.2.1",
"Duster 360:1.2.2.1.2.1", "Ferrari Dino:2.2.2.1", "Fiat 128:2.2.1.2.2.1",
"Fiat X1-9:2.2.1.2.2.2", "Ford Pantera L:1.2.2.1.1", "Honda Civic:2.2.1.1",
"Hornet 4 Drive:2.1.1.1", "Hornet Sportabout:1.2.2.2.1", "Lincoln Continental:1.2.1.2.2",
"Lotus Europa:2.2.2.2.1.1", "Maserati Bora:1.1", "Mazda RX4:2.2.2.2.2.2.1.1",
"Mazda RX4 Wag:2.2.2.2.2.2.1.2", "Merc 230:2.2.2.2.1.2.1", "Merc 240D:2.2.2.2.2.1",
"Merc 280:2.2.2.2.2.2.2.1", "Merc 280C:2.2.2.2.2.2.2.2", "Merc 450SE:2.1.2.1.2.1",
"Merc 450SL:2.1.2.1.2.2", "Merc 450SLC:2.1.2.1.1", "Pontiac Firebird:1.2.2.2.2",
"Porsche 914-2:2.2.2.2.1.2.2.2.2.2", "Toyota Corolla:2.2.1.2.1",
"Toyota Corona:2.2.2.2.1.2.2.2.2.1", "Valiant:2.1.1.2", "Volvo 142E:2.2.2.2.1.2.2.1"
), class = "data.frame")
产生这个:
> head(df)
mpg cyl disp hp drat wt qsec vs am gear carb
AMC Javelin:2.1.2.2.2 15.2 8 304 150 3.15 3.435 17.30 0 0 3 2
Cadillac Fleetwood:1.2.1.2.1 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4
Camaro Z28:1.2.2.1.2.2 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4
Chrysler Imperial:1.2.1.1 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4
Datsun 710:2.2.2.2.1.2.2.2.1 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
Dodge Challenger:2.1.2.2.1 15.5 8 318 150 2.76 3.520 16.87 0 0 3 2
请注意,行名称用&#34;:&#34;分隔。我想要做的就是拆分他们 第二部分成为数据框的新列:
ancest mpg cyl disp hp drat wt qsec vs am gear carb
AMC Javelin 2.1.2.2.2 15.2 8 304 150 3.15 3.435 17.30 0 0 3 2
Cadillac Fleetwood 1.2.1.2.1 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4
这样做的方法是什么? 我坚持这个:
rn <- rownames(df)
unlist(lapply(rn,strsplit,":"))
答案 0 :(得分:2)
我们可以使用strsplit
将输出输入&#34;列表&#34;,rbind
输出以获得矩阵&#34; m1&#34;。更改&#34; df&#34;的rownames通过第一列创建一个新列&#34; ancest&#34;使用&#34; m1&#34;
m1 <- do.call(rbind, strsplit(rn, ':'))
row.names(df) <- m1[,1]
df['ancest'] <- m1[,2]
或者,如果您需要数据集的第一列作为split
列之一,
df1 <- cbind(ancest=m1[,2], df)
row.names(df1) <- m1[,1]
或使用splitstackshape
和data.table
library(data.table)
library(splitstackshape)
df1 <- setDF(cSplit(setDT(df, keep.rownames=TRUE)[],
'rn', sep=":")[, c(12), with=FALSE])
rownames(df1) <- df1[,13]
df1 <- df1[-13]
答案 1 :(得分:1)
试试这个:
# create a new variable with the row names
df$names <- rownames((df)
# split the new variable into two pieces, delete the pattern (the :), and keep both pieces
df$names <- stringr::str_split_fixed(df$names, ":", 2)
答案 2 :(得分:1)
使用sapply
和[
函数:
nm_plus_ancest <- rownames(df)
nm_plus_ancest_split <- strsplit(nm_plus_ancest, ":")
rownames(df) <- sapply(nm_plus_ancest_split, "[", 1)
df$ancest <- sapply(nm_plus_ancest_split, "[", 2)
您可以使用漂亮的dplyr::select
函数重新排列列:
require(dplyr)
df <- select(df, ancest, mpg:carb)