让我们举一个例子:
> dput(data)
structure(list(mpg = c(15.2, 10.4, 13.3, 14.7, 22.8, 15.5, 14.3,
19.7, 32.4, 27.3, 15.8, 30.4, 21.4, 18.7, 10.4, 30.4, 15, 21,
21, 22.8, 24.4, 19.2, 17.8, 16.4, 17.3, 15.2, 19.2, 26, 33.9,
21.5, 18.1, 21.4), cyl = c(8, 8, 8, 8, 4, 8, 8, 6, 4, 4, 8, 4,
6, 8, 8, 4, 8, 6, 6, 4, 4, 6, 6, 8, 8, 8, 8, 4, 4, 4, 6, 4),
disp = c(304, 472, 350, 440, 108, 318, 360, 145, 78.7, 79,
351, 75.7, 258, 360, 460, 95.1, 301, 160, 160, 140.8, 146.7,
167.6, 167.6, 275.8, 275.8, 275.8, 400, 120.3, 71.1, 120.1,
225, 121), hp = c(150, 205, 245, 230, 93, 150, 245, 175,
66, 66, 264, 52, 110, 175, 215, 113, 335, 110, 110, 95, 62,
123, 123, 180, 180, 180, 175, 91, 65, 97, 105, 109), drat = c(3.15,
2.93, 3.73, 3.23, 3.85, 2.76, 3.21, 3.62, 4.08, 4.08, 4.22,
4.93, 3.08, 3.15, 3, 3.77, 3.54, 3.9, 3.9, 3.92, 3.69, 3.92,
3.92, 3.07, 3.07, 3.07, 3.08, 4.43, 4.22, 3.7, 2.76, 4.11
), wt = c(3.435, 5.25, 3.84, 5.345, 2.32, 3.52, 3.57, 2.77,
2.2, 1.935, 3.17, 1.615, 3.215, 3.44, 5.424, 1.513, 3.57,
2.62, 2.875, 3.15, 3.19, 3.44, 3.44, 4.07, 3.73, 3.78, 3.845,
2.14, 1.835, 2.465, 3.46, 2.78), qsec = c(17.3, 17.98, 15.41,
17.42, 18.61, 16.87, 15.84, 15.5, 19.47, 18.9, 14.5, 18.52,
19.44, 17.02, 17.82, 16.9, 14.6, 16.46, 17.02, 22.9, 20,
18.3, 18.9, 17.4, 17.6, 18, 17.05, 16.7, 19.9, 20.01, 20.22,
18.6), vs = c(0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,
1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1), am = c(0,
0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1), gear = c(3, 3, 3, 3,
4, 3, 3, 5, 4, 4, 5, 4, 3, 3, 3, 5, 5, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 5, 4, 3, 3, 4), carb = c(2, 4, 4, 4, 1, 2, 4,
6, 1, 1, 4, 2, 1, 2, 4, 2, 8, 4, 4, 2, 2, 4, 4, 3, 3, 3,
2, 2, 1, 1, 1, 2), car = structure(c(18L, 19L, 5L, 13L, 14L,
31L, 7L, 21L, 20L, 22L, 23L, 24L, 25L, 26L, 2L, 15L, 4L,
9L, 12L, 29L, 30L, 6L, 1L, 3L, 27L, 10L, 28L, 16L, 11L, 8L,
17L, 32L), .Label = c("AMC Javelin, AMC Javelin, AMC Javelin, AMC Javelin, AMC Javelin",
"Cadillac Fleetwood", "Camaro Z28", "Chrysler Imperial",
"Datsun 710, Datsun 710, Datsun 710, Datsun 710", "Dodge Challenger",
"Duster 360", "Ferrari Dino, Ferrari Dino, Ferrari Dino, Ferrari Dino, Ferrari Dino",
"Fiat 128, Fiat 128, Fiat 128, Fiat 128, Fiat 128", "Fiat X1-9",
"Ford Pantera L", "Honda Civic, Honda Civic, Honda Civic, Honda Civic, Honda Civic",
"Hornet 4 Drive, Hornet 4 Drive, Hornet 4 Drive, Hornet 4 Drive, Hornet 4 Drive",
"Hornet Sportabout", "Lincoln Continental", "Lotus Europa",
"Maserati Bora", "Mazda RX4", "Mazda RX4 Wag, Mazda RX4 Wag, Mazda RX4 Wag, Mazda RX4 Wag, Mazda RX4 Wag",
"Merc 230", "Merc 240D", "Merc 280", "Merc 280C", "Merc 450SE",
"Merc 450SL", "Merc 450SLC", "Pontiac Firebird", "Porsche 914-2",
"Toyota Corolla", "Toyota Corona", "Valiant", "Volvo 142E"
), class = "factor")), .Names = c("mpg", "cyl", "disp", "hp",
"drat", "wt", "qsec", "vs", "am", "gear", "carb", "car"), row.names = c("Mazda RX4",
"Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout",
"Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280",
"Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood",
"Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic",
"Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin",
"Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2",
"Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora",
"Volvo 142E"), class = "data.frame")
正如您所看到的,我们在car
列中有一些重复的字词。我想在每个单元格中只保留一个单词/短语。在我的原始数据中,单词也被昏迷分开。可能的是,在我的原始数据中,我在不同的列中有相同的单词,我想保留原样。该函数应仅用于此特定列。
答案 0 :(得分:7)
一个选项是使用正则表达式来检查每一行中是否有逗号,如果条件满足,只保留前面的内容
sub(",.*", "", data$car)
答案 1 :(得分:4)
这可能是使用cSplit
library(splitstackshape)
unique(cSplit(df, "car", sep = ',', direction = "long"))
# mpg cyl disp hp drat wt qsec vs am gear carb car
# 1: 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 Mazda RX4
# 2: 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 Mazda RX4 Wag
# 3: 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 Datsun 710
# 4: 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 Hornet 4 Drive
# 5: 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 Hornet Sportabout
# 6: 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 Valiant
# 7: 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 Duster 360
# 8: 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 Merc 240D
# 9: 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 Merc 230
#10: 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 Merc 280
#11: 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 Merc 280C
#12: 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 Merc 450SE
#13: 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 Merc 450SL
#14: 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 Merc 450SLC
#15: 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 Cadillac Fleetwood
#...