如何扩展" Coderange"中的值?柱?我的数据有两列:CodeRange和Desc。我想在数据框中添加第三列,其中包含codeRange列中矢量的各个值。我如何在R?中做到这一点?
这就是我现在的表格
CodeRange Desc
61000:61055 Test1
61000:61055 Test1
61000:61055 Test1
0356T:0358T Test2
S9090:S9090 Test3
0062T:0062T Test4
这是我希望的最终结果
CodeRange Desc Codes(new Column to be add)
61000:61055 Test1 61001
61000:61055 Test1 61002
61000:61055 Test1 61003
61000:61055 Test1 61004
61000:61055 Test1 61005
.
.
61000:61055 Test1 61055
0356T:0358T Test2 0356T
0356T:0358T Test2 0357T
0356T:0358T Test2 0358T
S9090:S9090 Test3 S9090
0062T:0062T Test4 0062T
答案 0 :(得分:0)
dt <- read.table(text = "CodeRange Desc
61000:61055 Test1
61000:61055 Test1
61000:61055 Test1
0356T:0358T Test2
S9090:S9090 Test3
0062T:0062T Test4
", stringsAsFactors = FALSE, header = TRUE)
dt <- dt[!duplicated(paste(dt$CodeRange, dt$Desc)),]
expand_code <- function(x){
code_range <- x[1]
code_range_raw <- as.character(unlist(strsplit(code_range, ":")))
letters <- stringr::str_extract(code_range, "[^0-9]")
letters_pos <- stringr::str_locate(code_range, "[^0-9]")
code_range <- gsub("[^0-9]", "", code_range_raw)
code_range <- seq(from = code_range[1], to = code_range[2], by = 1)
if(nchar(code_range_raw[1]) != nchar(code_range[1])){
code_range <- paste0(strrep("0",
nchar(code_range_raw[1]) - nchar(code_range[1]) - nchar(letters[1])),
code_range)
}
if(letters_pos[[1]] == nchar(code_range_raw[1])){
code_range <- paste0(code_range, letters[1])
}
if(letters_pos[[1]] < nchar(code_range_raw[1])){
code_range <- paste0(letters[1], code_range)
}
data.frame(CodeRange = x[1], Desc = x[2], Code = code_range, row.names = NULL)
}
do.call("rbind", apply(dt, 1, expand_code))
答案 1 :(得分:0)
df=df[!duplicated(df),]
values=sapply(parse(text=gsub("[A-Za-z]+","",df$CodeRange)),eval)#EXPAND
dat=`row.names<-`(df[rep(1:nrow(df),lengths(values)),],NULL)#MAKE THE NEW LARGE DF
Codes=sub(":.*","",dat$CodeRange)
regmatches(Codes,regexpr("\\d+",Codes))<-sprintf("%04.0f", unlist(values))#INCLUDE THE LETTERS TO THE LARGE DF
dat$Codes=Codes
head(dat)
CodeRange Desc Codes
1 61000:61055 Test1 61000
2 61000:61055 Test1 61001
3 61000:61055 Test1 61002
4 61000:61055 Test1 61003
5 61000:61055 Test1 61004
6 61000:61055 Test1 61005
tail(dat)
CodeRange Desc Codes
56 61000:61055 Test1 61055
57 0356T:0358T Test2 0356T
58 0356T:0358T Test2 0357T
59 0356T:0358T Test2 0358T
60 S9090:S9090 Test3 S9090
61 0062T:0062T Test4 0062T
一个问题是重复data 1:3
。那么你想使用df=df[!duplicated(df),]
然后执行上面的
DATA:
df=read.table(text="CodeRange Desc
61000:61055 Test1
61000:61055 Test1
61000:61055 Test1
0356T:0358T Test2
S9090:S9090 Test3
0062T:0062T Test4 ",h=T,stringsAsFactors=F)