我有下表:
structure(list(Id = structure(c(1L, 1L, 2L, 2L, 1L, 3L, 3L, 3L
), .Label = c("a", "b", "c"), class = "factor"), stops = c(1,
1, 1, 1, 1, 2, 2, 2)), .Names = c("Id", "stops"), row.names = c(NA,
-8L), class = "data.frame")
我希望在止损没有改变时添加$停止新字符,但$ Id确实。
例如,我想得到:
structure(list(Id = structure(c(1L, 1L, 2L, 2L, 1L, 3L, 3L, 3L
), .Label = c("a", "b", "c"), class = "factor"), stops = structure(c(1L,
1L, 2L, 2L, 3L, 4L, 4L, 4L), .Label = c("1", "1-1", "1-2", "2"
), class = "factor")), .Names = c("Id", "stops"), row.names = c(NA,
-8L), class = "data.frame")
如果Id与前一个不同,我想这样做,如果Stops与前一个相同...
我尝试使用mutate(),但似乎我在这里工作得很远......
答案 0 :(得分:3)
以下是使用data.table
library(data.table)
setDT(df)[, `:=`(stops = as.character(stops), Idindx = rleid(Id))]
indx <- unique(df, by = "Idindx")[, counter := (1:.N) - 1L, by = rleid(stops)]
df[indx[counter > 0], stops := paste(stops, i.counter, sep = "-"), on = "Idindx"]
# Id stops Idindx
# 1: a 1 1
# 2: a 1 1
# 3: b 1-1 2
# 4: b 1-1 2
# 5: a 1-2 3
# 6: c 2 4
# 7: c 2 4
# 8: c 2 4
第一步是为每个Id
创建一个唯一索引(因为它们不是唯一的),并将stops
转换为character
(根据您所需的输出)
然后,对唯一索引进行操作可识别相同stops
的计数并重新连接回原始数据
答案 1 :(得分:0)
你可以写一个循环来解决你的问题:
# Original data
data <- structure(list(Id = structure(c(1L, 1L, 2L, 2L, 1L, 3L, 3L, 3L
), .Label = c("a", "b", "c"), class = "factor"), stops = c(1,
1, 1, 1, 1, 2, 2, 2)), .Names = c("Id", "stops"), row.names = c(NA,
-8L), class = "data.frame")
# Add new column, which will be converted in the following loop
data$stops_new <- as.character(data$stops)
new <- 1
for(i in 2:nrow(data)) {
# Convert values of stops_new, if your specified conditions appear
if(data$Id[i] != data$Id[i - 1] & data$stops[i] == data$stops[i - 1]) {
data$stops_new[i] <- paste(data$stops_new[i], "-", new, sep = "")
# Repeat the convertion for all values with the same ID and stop-value
j <- i + 1
while(data$Id[i] == data$Id[j] & data$stops[i] == data$stops[j]) {
data$stops_new[j] <- paste(data$stops[i], "-", new, sep = "")
j <- j + 1
}
new <- new + 1
}
}
data
答案 2 :(得分:0)
这是基础R解决方案。
创建指标,显示Id
是否已更改(id.ind
)以及stops
是否已从上一行更改(stops.ind
)(惯例是这些指标已设置to&#34; 0&#34;,即第一行没有变化):
stops.ind <- c(0, diff(dat$stops))
id.ind <- c(0, diff(as.numeric(dat$Id)))
创建新的停止向量:
stops <- new.stops <- dat$stops
逐行检查a)id
中是否有变化而stops
或b中没有变化)前一行中没有变化。如果a)将k
增加1并附加&#34; -k&#34;到stops
值b)使用stops
的先前值:
k <- 0
for(i in 2 : nrow(dat)){
if(id.ind[i] != 0 & stops.ind[i] == 0){
k <- k + 1
new.stops[i] <- paste0(stops[i], "-", k)
}
if(id.ind[i] == 0 & stops.ind[i] == 0)
new.stops[i] <- new.stops[i - 1]
}
new.stops
# [1] "1" "1" "1-1" "1-1" "1-2" "2" "2" "2"
new.dat <- data.frame(Id = dat$Id, stops = new.stops)