我正在尝试使用str_split将以下观察结果拆分为特定格式。
"00010943900008" "00010946803119" "00010946803219" "00010946803219" "00010946803219" "00010948700007"
我正在尝试将其拆分为不同的列。
第一个观察结果如下所示:
Column x = 00
Column y = 01
Column z = 09439
Column w = 00008
其中x列将始终是观察结果的前2个数字,y列将是后2个数字,z列将是后5个数字,w列将是最后5个数字
数据
string <- c("00010943900008", "00010946803119", "00010946803219", "00010946803219",
"00010946803219", "00010948700007", "00010948700007", "00010948700007",
"00010948700007", "00010948700007", "00010948700007", "00010948700007",
"00010948700007", "00010948700007", "00010948700007", "00010948700007",
"00010948700007", "00010948700007", "00010948700007", "00010948700007",
"00010948700007", "00010948700007", "00010948700007", "00010948700007",
"00010948700007", "00010948700007", "00010948700007", "00010948700007",
"00010948700007", "00010948700007", "00010948700007", "00010948700007",
"00010948700007", "00010948700007", "00010948700007", "00010948700007",
"00010948700007", "00010948700007", "00011820000016", "00011820000016",
"00011820000016", "00011820000016", "00011820000016", "00011820000016",
"00011820000016", "00011820000016", "00011820000016", "00011820000016",
"00011820000016", "00011820000016", "00011820000016", "00011820000016",
"00011820000016", "00011820000016", "00011820000016", "00011820000016",
"00011820000016", "00011820000016", "00011820000016", "00011820000016",
"00011820000016", "00011820000016", "00011820000016", "00011820000016",
"00011820000016", "00011820000016", "00011820000016", "00011820000016",
"00011820000016", "00011820000016", "00011820000016", "00011820000016",
"00011820000016", "00011820000016", "00011820000016", "00011820000016",
"00011820000016", "00011820000016", "00011820000016", "00011820000016",
"00011820000016", "00011820000016", "00011820000016", "00011820000016",
"00011820000016", "00011820000016", "00011820000016", "00011820000016",
"00011820000016", "00011820000016", "00011820000016", "00011820000016",
"00011820000016", "00011820000016", "00011820000016", "00011820000016",
"00011820000016", "00011820000016", "00011820000016", "00011820000016"
)
答案 0 :(得分:3)
您可以使用\n
作为分隔符来连接数据或将其写入文件,然后使用readr::read_fwf
或read.fwf
(仅来自文件)将其导入为固定宽度格式。这是readr::read_fwf
版本,没有写入磁盘:
library(readr)
result = read_fwf(paste(string, collapse = "\n"),
col_positions = fwf_widths(c(2, 2, 5, 5), col_names = c("x", "y", "z", "w")))
head(result)
# # A tibble: 6 x 4
# x y z w
# <chr> <chr> <chr> <chr>
# 1 00 01 09439 00008
# 2 00 01 09468 03119
# 3 00 01 09468 03219
# 4 00 01 09468 03219
# 5 00 01 09468 03219
# 6 00 01 09487 00007
答案 1 :(得分:2)
您可以根据字符串创建一个数据帧,然后使用substr(),它会根据位置返回部分字符串:
data<- as.data.frame(string)
data$x <- substr(string,1,2)
data$y <- substr(string,3,4)
data$z <- substr(string,5,9)
data$w <- substr(string,10,14)
答案 2 :(得分:2)
我们可以使用regex
和read.table
(仅在模式相同的情况下才有效):
> read.table(text=gsub("(\\d{2})(\\d{2})(\\d{5})(\\d{5})", "\\1,\\2,\\3,\\4", string),
colClasses="character", sep=",", stringsAsFactors = FALSE)
V1 V2 V3 V4
1 00 01 09439 00008
2 00 01 09468 03119
3 00 01 09468 03219
4 00 01 09468 03219
5 00 01 09468 03219
6 00 01 09487 00007
7 00 01 09487 00007
8 00 01 09487 00007
9 00 01 09487 00007
10 00 01 09487 00007
...
答案 3 :(得分:2)
使用extract
中的tidyr
。 extract
将每个正则表达式捕获组转换为自己的列。如果我们不想保留原始列,则可以设置remove = TRUE
(默认值):
library(dplyr)
library(tidyr)
string %>%
data.frame(string = .) %>%
extract(string, c("x","y","z","w"), "^(\\d{2})(\\d{2})(\\d{5})(\\d{5})", remove = FALSE)
输出:
string x y z w
1 00010943900008 00 01 09439 00008
2 00010946803119 00 01 09468 03119
3 00010946803219 00 01 09468 03219
4 00010946803219 00 01 09468 03219
5 00010946803219 00 01 09468 03219
6 00010948700007 00 01 09487 00007
7 00010948700007 00 01 09487 00007
8 00010948700007 00 01 09487 00007
9 00010948700007 00 01 09487 00007
10 00010948700007 00 01 09487 00007
11 00010948700007 00 01 09487 00007
12 00010948700007 00 01 09487 00007
答案 4 :(得分:0)
使用tidyr::separate:
library(tidyr)
data.frame(string = string[1:5]) %>%
separate(string, c("x", "y", "z", "w"),
sep = c(2, 4, 9), remove = FALSE)
# string x y z w
# 1 00010943900008 00 01 09439 00008
# 2 00010946803119 00 01 09468 03119
# 3 00010946803219 00 01 09468 03219
# 4 00010946803219 00 01 09468 03219
# 5 00010946803219 00 01 09468 03219