structure(list(Year = 1998:2007, Pregnant = structure(c(2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
Infection = structure(c(2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L,
1L), .Label = c("Negative", "Positive"), class = "factor"),
Keep = c(0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L)), .Names = c("Year",
"Pregnant", "Infection", "Keep"), class = "data.frame", row.names = c(NA,
# Year Pregnant Infection Keep
# 1 1998 Yes Positive 0
# 2 1999 Yes Positive 0
# 3 2000 No Negative 0
# 4 2001 No Negative 1 # Infection changes from Negative to Positive
# 5 2002 No Positive 1
# 6 2003 No Positive 0
# 7 2004 No Negative 0
# 8 2005 No Negative 1 # Pregnant changes from No to Yes
# 9 2006 Yes Negative 1
# 10 2007 Yes Negative 0
我想标记按特定顺序更改的行。例如, 怀孕列的值从“否”(第8行)更改为“是”(第9行),感染列的值从“负数”(第4行)更改为“正数”(第5行)。因此,我想标记这些行(“保留”列将标记的行指示为1)。
Variable - Pregnant, From - 'No', To - 'Yes'
Variable - Infection, From - 'Negative', To - 'Positive'
答案 0 :(得分:1)
# select relevant columns from original data
d <- df[ , 2:3]
# or, assuming that 'Keep' is not in original data, just remove the first column 'Year'
# d <- df[ , -1]
# set factor levels in order of from-to
d$Pregnant <- factor(d$Pregnant, levels = c("No", "Yes"), ordered = TRUE)
d$Infection <- factor(d$Infection, levels = c("Negative", "Positive"), ordered = TRUE)
# check if factor levels are 'increasing' between rows
m <- d[-nrow(d), ] < d[-1, ]
# add a FALSE row to restore dimensions
m <- rbind(rep(FALSE, ncol(m)), m)
# get indices of changes
ix <- which(m, arr.ind = TRUE)
# set also preceeding rows to TRUE
m[cbind(ix[ , 1] - 1, ix[ , 2])] <- TRUE
dimnames(m) <- list(NULL, paste0(colnames(m), "_diff"))
m <- m + 0
的存在情况创建一个“ keep”列,并将cbind
cbind(df, Keep2 = as.integer(rowSums(m) != 0), m)
# Year Pregnant Infection Keep Keep2 Pregnant_diff Infection_diff
# 1 1998 Yes Positive 0 0 0 0
# 2 1999 Yes Positive 0 0 0 0
# 3 2000 No Negative 0 0 0 0
# 4 2001 No Negative 1 1 0 1
# 5 2002 No Positive 1 1 0 1
# 6 2003 No Positive 0 0 0 0
# 7 2004 No Negative 0 0 0 0
# 8 2005 No Negative 1 1 1 0
# 9 2006 Yes Negative 1 1 1 0
# 10 2007 Yes Negative 0 0 0 0
答案 1 :(得分:0)
df %>%
grp.Preg = c(diff(as.numeric(Pregnant)) > 0, 0),
grp.Infc = c(diff(as.numeric(Infection)) > 0, 0),
flagChangePreg = abs(grp.Preg - lag(grp.Preg, default = 0)),
flagChangeInfc = abs(grp.Infc - lag(grp.Infc, default = 0))) %>%
select(-grp.Preg, -grp.Infc)
# Year Pregnant Infection Keep flagChangePreg flagChangeInfc
#1 1998 Yes Positive 0 0 0
#2 1999 Yes Positive 0 0 0
#3 2000 No Negative 0 0 0
#4 2001 No Negative 1 0 1
#5 2002 No Positive 1 0 1
#6 2003 No Positive 0 0 0
#7 2004 No Negative 0 0 0
#8 2005 No Negative 1 1 0
#9 2006 Yes Negative 1 1 0
#10 2007 Yes Negative 0 0 0
从{{1} }到"Negative"