我的数据看起来像这样
df <- structure(list(V = structure(c(4L, 5L, 3L, 7L, 6L, 2L, 1L), .Label = c("132BALT_ 26,172BALT_ 27,107BALT_ 57,104BALT_ 59,137BALT_ 60,133BALT_ 61,103BALT_ 62,134BALT_ 63,177BALT_ 100,123BALT_ 133,184BALT_ 168,109BALT_ 197,103BALT_ 198,173BALT_ 202,157BALT_ 203,143BALT_ 266,62BALT_ 342,62BALT_ 354,92BALT_ 355,195BALT_ 368,164BALT_ 370,52BALT_ 468,74BALT_ 469,71BALT_ 484,98BALT_ 494,66BALT_ 502,63BALT_ 601,133BALT_ 622",
"135A,510A,511A,60BALT_ 23,67BALT_ 24,70BALT_ 25,95BALT_ 26,122BALT_ 27,123BALT_ 27,109BALT_ 60",
"25A,28BALT_ 55,31BALT_ 56,45BALT_ 57,43BALT_ 58,5BALT_ 59,47BALT_ 59,6BALT_ 60,69BALT_ 60,66BALT_ 61",
"267BALT_ 361,786BALT_ 363,543BALT_ 392", "563BALT_ 202,983BALT_ 360", "8BALT_ 1,12BALT_ 35,10BALT_ 71,9BALT_ 154,51BALT_ 179",
"91BALT_ 26,117BALT_ 27,117BALT_ 28,102BALT_ 29,47BALT_ 31,96BALT_ 63,78BALT_ 64,133BALT_ 65,117BALT_ 66,121BALT_ 66,112BALT_ 67,127BALT_ 100"
), class = "factor")), .Names = "V", class = "data.frame", row.names = c(NA,
-7L))
我只是展示它的前两行
#1 267BALT_ 361,786BALT_ 363,543BALT_ 392
#2 563BALT_ 202,983BALT_ 360
# .
# .
我想要的是删除第一部分直到下划线并保留其余部分。所以输出应该看起来像
361,363,392
202,360
.
.
.
我希望将它们作为列表,我尝试了这个
mylist <- lapply(str_extract_all(df$V, "(?<=[A-Z])\\d+"), as.numeric)
在某个地方,我犯了一个错误,我将不胜感激任何帮助,
答案 0 :(得分:1)
以下使用stringr
来操纵字符串,使用dplyr
来管理命令:
library(stringr)
library(dplyr)
mylist <- str_split(df$V, pattern = ",") %>% # Split the column in a list of vectors
lapply(function(x) str_replace_all(x, "^(.*?)_", "") %>% # Removes anything before underscore
str_trim("left") %>% # Removes the empty space before the number
paste(collapse = ",")) # Collapses all the elements into a single vector
答案 1 :(得分:1)
library(stringr)
选项1 =强制转换为单个元素的列表:
opt1 <- as.numeric(unlist(str_extract_all(df$V, "\\b[0-9]\\d+\\b")))
print(opt1)
#[1] 361 363 392 202 360 55 56 57 58 59 59 60 60 61 26 27 28 29 31 63 64
#[22] 65 66 66 67 100 35 71 154 179 23 24 25 26 27 27 60 26 27 57 59 60
#[43] 61 62 63 100 133 168 197 198 202 203 266 342 354 355 368 370 468 469 484 494 502
#[64] 601 622
选项2 =强制进入包含多个元素的列表:
opt2 <- lapply(str_extract_all(df$V, "\\b[0-9]\\d+\\b"), as.numeric)
print(opt2)
#[[1]]
#[1] 361 363 392
#
#[[2]]
#[1] 202 360
#
#[[3]]
#[1] 55 56 57 58 59 59 60 60 61
#
#[[4]]
#[1] 26 27 28 29 31 63 64 65 66 66 67 100
#
#[[5]]
#[1] 35 71 154 179
#
#[[6]]
#[1] 23 24 25 26 27 27 60
#
#[[7]]
#[1] 26 27 57 59 60 61 62 63 100 133 168 197 198 202 203 266 342 354 355 368 370
#[22] 468 469 484 494 502 601 622