我有一个汇总表,其中包含下面df
中的列。我想在"closing_bal"
列中placement_status_type
值后立即插入空白行
# This is part of the df that I have
df <- data.frame(stringsAsFactors=FALSE,
referral_phase_code = c("-", "EA", "EA", "EA", "EA", "EA", "EA", "-", "-",
"PPS", "PPS", "-", "OS", "-", "-", "EA",
"EA", "EA", "EA", "EA", "EA", "-", "-", "PPS",
"PPS", "-"),
placement_status_type = c("opening_bal", "New", "Transfer", "Reinstated",
"Suspended", "Trf to PPS", "Exit",
"closing_bal", "opening_bal", "New", "Trf to EA",
"closing_bal", "New", "closing_bal", "opening_bal",
"New", "Transfer", "Reinstated", "Suspended",
"Trf to PPS", "Exit", "closing_bal",
"opening_bal", "New", "Trf to EA", "closing_bal")
)
# This is the desired output
output_df <- data.frame(stringsAsFactors=FALSE,
referral_phase_code = c("-", "EA", "EA", "EA", "EA", "EA", "EA", "-", NA,
"-", "PPS", "PPS", "-", NA, "OS",
"-", NA, "-", "EA", "EA", "EA", "EA",
"EA", "EA", "-", NA, "-", "PPS", "PPS",
"-"),
placement_status_type = c("opening_bal", "New", "Transfer", "Reinstated",
"Suspended", "Trf to PPS", "Exit",
"closing_bal", NA, "opening_bal", "New",
"Trf to EA", "closing_bal", NA, "New",
"closing_bal", NA, "opening_bal",
"New", "Transfer", "Reinstated",
"Suspended", "Trf to PPS", "Exit", "closing_bal",
NA, "opening_bal", "New", "Trf to EA",
"closing_bal")
)
我知道add_row
函数,但是在这种情况下不确定如何使用它。
有什么想法吗?
答案 0 :(得分:4)
我认为这是基于序列的逻辑的绝佳机会:
idx <- which(df$placement_status_type == "closing_bal")
df <- df[sort(c(sequence(nrow(df)),idx)),]
df[seq_along(idx) + idx,] <- NA
df
识别行,复制行,并使用NA
答案 1 :(得分:1)
在add_row
根据“ placement_status_type”列中是否出现“ closing_bal”设置数据集之后,split
是一个选择
library(tidyverse)
df %>%
group_split(grp = cumsum(lag(placement_status_type == "closing_bal",
default = TRUE)), keep = FALSE) %>%
map_df(~
.x %>%
add_row(referral_phase_code = NA,
placement_status_type = NA, .after = nrow(.))) %>%
slice(-n())
# A tibble: 30 x 2
# referral_phase_code placement_status_type
# <chr> <chr>
# 1 - opening_bal
# 2 EA New
# 3 EA Transfer
# 4 EA Reinstated
# 5 EA Suspended
# 6 EA Trf to PPS
# 7 EA Exit
# 8 - closing_bal
# 9 <NA> <NA>
#10 - opening_bal
# … with 20 more rows
或者另一种选择是通过展开行,然后uncount
行编号replace
,duplicated
来NA
df %>%
mutate(n = (row_number() %in% which(placement_status_type ==
'closing_bal')) + 1) %>%
uncount(n, .remove = FALSE) %>%
rownames_to_column('rn') %>%
mutate_all(list(~ replace(., duplicated(str_remove(rn,
"\\.\\d+$")), NA))) %>%
slice(-n())
或使用data.table
library(data.table)
setDT(df)[, grp := shift(cumsum(placement_status_type ==
'closing_bal'), fill = 0)][, .SD[c(seq_len(.N), .N+1)], by = grp]