我在R中有一个列如下:
Path Column
ag.1.4->ao.5.5->iv.9.12->ag.4.35
ao.11.234->iv.345.455.1.2->ag.9.531
我想将其转换为:
Path Column
ag->ao->iv->ag
ao->iv->ag
我该怎么做?
谢谢
以下是我的数据的全部输入:
structure(list(Rank = c(10394749L, 36749879L), Count = c(1L,
1L), Percent = c(0.001011122, 0.001011122), Path = c("ao.legacy payment.not_completed->ao.legacy payment.not_completed->ao.legacy payment.completed",
"ao.legacy payment.not_completed->agent.payment.completed")), .Names = c("Rank",
"Count", "Percent", "Path"), class = "data.frame", row.names = c(NA,
-2L))
答案 0 :(得分:2)
您可以使用gsub
来匹配.
和.
(\\.[0-9]+
)后的数字,并将其替换为''
。
df1$Path.Column <- gsub('\\.[0-9]+', '', df1$Path.Column)
df1
# Path.Column
#1 ag -> ao -> iv -> ag
#2 ao -> iv -> ag
对于新数据集df2
gsub('\\.[^->]+(?=(->|\\b))', '', df2$Path, perl=TRUE)
#[1] "ao->ao->ao" "ao->agent"
和OP的帖子中显示的字符串
str2 <- c('ag.1.4->ao.5.5->iv.9.12->ag.4.35',
'ao.11.234->iv.345.455.1.2->ag.9.531')
gsub('\\.[^->]+(?=(->|\\b))', '', str2, perl=TRUE)
#[1] "ag->ao->iv->ag" "ao->iv->ag"
df1 <- structure(list(Path.Column = c("ag.1 -> ao.5 -> iv.9 -> ag.4",
"ao.11 -> iv.345 -> ag.9")), .Names = "Path.Column",
class = "data.frame", row.names = c(NA, -2L))
df2 <- structure(list(Rank = c(10394749L, 36749879L), Count = c(1L,
1L), Percent = c(0.001011122, 0.001011122),
Path = c("ao.legacy payment.not_completed->ao.legacy payment.not_completed->ao.legacy payment.completed",
"ao.legacy payment.not_completed->agent.payment.completed")),
.Names = c("Rank", "Count", "Percent", "Path"), class = "data.frame",
row.names = c(NA, -2L))
答案 1 :(得分:1)
在'->'
上拆分字符串并单独处理子字符串可能更容易
# split the stirngs into parts
subStrings <- strsplit(df$Path,'->')
# remove eveything after **first** the dot
subStrings<- lapply(subStrings,
function(x)gsub('\\..*','',x))
# paste them back together.
sapply(subStrings,paste0,collapse="->")
#> "ao->ao->ao" "ao->agent"
或
# split the stirngs into parts
subStrings <- strsplit(df$Path,'->')
# remove the parts of the identifiers after the dot
subStrings<- lapply(subStrings,
function(x)gsub('\\.[^ \t]*','',x))
# paste them back together.
sapply(subStrings,paste0,collapse="->")
#> "ao payment->ao payment->ao payment" "ao payment->agent"