Cust_ID Date QTR Sales Action Link_Cust_ID
1 1/1/18 2018 Q1 23 NA NA
1 1/2/18 2018 Q1 22.2 NA NA
1 1/3/18 2018 Q1 12.1 2 5
1 1/4/18 2018 Q1 14.1 5 NA
3 1/1/18 2018 Q1 101 NA NA
3 1/2/18 2018 Q1 55 2 18
... ... ... ... ... ...
我的目标是:如果“操作”列== 2,我想从link_cust_id填充到季度末日期起将该客户(来自Link_Cust_ID)包括在我的Top_Customer表中
例如,我将从上图的1/3/18到3/31/18(季度末)包括Cust_ID = 5
Linking_ID <- function(data)
if (data$link_type == 2)
temp.linkid <- data$link_cust_id[i] #stores the linked customer_id
temp.date <- data$Date[i] #stores the date linking occurs
temp.data <- customer_data %>% group_by(Quarter) %>% filter(customer_id = temp.linkid & Date >= temp.date)
#the above line of code is suppose to subset only link_customer_id data from the link_date to the end of the quarter
data <- rbind(data, temp.data)
structure(list(Cust_ID = c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 5, 5, 5, 5),
Date = structure(c(1514764800, 1514851200, 1514937600, 1515024000,
1514764800, 1514851200, 1514937600, 1515024000,
1514764800, 1514851200, 1514937600, 1515024000,
1514764800, 1514851200, 1514937600, 1515024000),
class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Quarter = c("2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1",
"2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1",
"2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1",
"2018 Q1"),
Sales = c(23, 22.2, 12.1, 14.1, 18, 18.8, 19.2, 19.8, 101, 55, 56,
55, NA, NA, 10.5, 11.1),
Action = c(NA, NA, 2, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA, NA),
Link_Cust_ID = c(NA, NA, 5, NA, NA, NA, NA, NA, NA, 18, NA, NA, NA, NA, NA, NA)),
row.names = c(NA, -16L), class = c("tbl_df", "tbl", "data.frame"))
top_customer tble的投放:
structure(list(Cust_ID = c(1, 1, 1, 1, 3, 3, 3, 3), Date =
1514851200, 1514937600, 1515024000, 1514764800, 1514851200, 1514937600,
1515024000), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Quarter = c("2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1",
"2018 Q1", "2018 Q1", "2018 Q1"), Sales = c(23, 22.2, 12.1,
14.1, 101, 55, 56, 55), Action = c(NA, NA, 2, NA, NA, 2,
NA, NA), Link_Cust_ID = c(NA, NA, 5, NA, NA, 18, NA, NA)), row.names = c(NA,
-8L), class = c("tbl_df", "tbl", "data.frame"))
答案 0 :(得分:1)
all_cust <- structure(list(Cust_ID = c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 5, 5, 5, 5),
Date = structure(c(1514764800, 1514851200, 1514937600, 1515024000,
1514764800, 1514851200, 1514937600, 1515024000,
1514764800, 1514851200, 1514937600, 1515024000,
1514764800, 1514851200, 1514937600, 1515024000),
class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Quarter = c("2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1",
"2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1",
"2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1",
"2018 Q1"),
Sales = c(23, 22.2, 12.1, 14.1, 18, 18.8, 19.2, 19.8, 101, 55, 56,
55, NA, NA, 10.5, 11.1),
Action = c(NA, NA, 2, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA, NA),
Link_Cust_ID = c(NA, NA, 5, NA, NA, NA, NA, NA, NA, 18, NA, NA, NA, NA, NA, NA)),
row.names = c(NA, -16L), class = c("tbl_df", "tbl", "data.frame"))
top_cust <- structure(list(Cust_ID = c(1, 1, 1, 1, 3, 3, 3, 3), Date =
1514851200, 1514937600, 1515024000, 1514764800, 1514851200, 1514937600,
1515024000), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Quarter = c("2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1",
"2018 Q1", "2018 Q1", "2018 Q1"), Sales = c(23, 22.2, 12.1,
14.1, 101, 55, 56, 55), Action = c(NA, NA, 2, NA, NA, 2,
NA, NA), Link_Cust_ID = c(NA, NA, 5, NA, NA, 18, NA, NA)), row.names = c(NA,
-8L), class = c("tbl_df", "tbl", "data.frame"))
#get where action is equal to 2
top_cust2 <- filter(top_cust, Action == 2)
#join on cust_id to link_cust_id
#Then filter where date is greater or equal too
#reference date and in the same quarter
bth <- inner_join(all_cust,top_cust2,
by =c("Cust_ID"="Link_Cust_ID")) %>%
filter(Date.x >= Date.y, Quarter.x == Quarter.y)
#remove .y columns from all_cust
bth <- bth[,!grepl(".y",colnames(bth))]
#drop .x from variable names
colnames(bth) <- gsub(".x","",colnames(bth))
cmb <- bind_rows(top_cust, bth)
答案 1 :(得分:1)
link_ids_action2 <- customer %>%
select(Cust_ID, Action, Link_Cust_ID, Date, Quarter) %>%
filter(Action == 2) %>%
mutate(Quarter_end_dates = case_when(grepl("Q1", Quarter) ~ "2018-03-31",
grepl("Q2", Quarter) ~ "2018-06-30",
grepl("Q3", Quarter) ~ "2018-09-30",
grepl("Q4", Quarter) ~ "2018-12-31"),
Quarter_end_dates = as.Date(Quarter_end_dates)) %>%
select(Link_Cust_ID, New_Cust_ID = Cust_ID, Start_date = Date, Quarter_end_dates)
# A tibble: 2 x 4
# Link_Cust_ID New_Cust_ID Start_date Quarter_end_dates
# <dbl> <dbl> <dttm> <date>
# 1 5 1 2018-01-03 00:00:00 2018-03-31
# 2 18 3 2018-01-02 00:00:00 2018-03-31
new_top_customers <-
right_join(customer, link_ids_action2, by = c("Cust_ID" = "Link_Cust_ID")) %>%
filter(as.Date(Date) >= as.Date(Start_date), as.Date(Date) <= Quarter_end_dates) %>%
select(Cust_ID, Date, Quarter, Sales, Action, Link_Cust_ID)
# A tibble: 2 x 6
# Cust_ID Date Quarter Sales Action Link_Cust_ID
# <dbl> <dttm> <chr> <dbl> <dbl> <dbl>
# 1 5 2018-01-03 00:00:00 2018 Q1 10.5 NA NA
# 2 5 2018-01-04 00:00:00 2018 Q1 11.1 NA NA
bind_rows(top_customer, new_top_customers)
# A tibble: 10 x 6
# Cust_ID Date Quarter Sales Action Link_Cust_ID
# <dbl> <dttm> <chr> <dbl> <dbl> <dbl>
# 1 1 2018-01-01 00:00:00 2018 Q1 23 NA NA
# 2 1 2018-01-02 00:00:00 2018 Q1 22.2 NA NA
# 3 1 2018-01-03 00:00:00 2018 Q1 12.1 2 5
# 4 1 2018-01-04 00:00:00 2018 Q1 14.1 NA NA
# 5 3 2018-01-01 00:00:00 2018 Q1 101 NA NA
# 6 3 2018-01-02 00:00:00 2018 Q1 55 2 18
# 7 3 2018-01-03 00:00:00 2018 Q1 56 NA NA
# 8 3 2018-01-04 00:00:00 2018 Q1 55 NA NA
# 9 5 2018-01-03 00:00:00 2018 Q1 10.5 NA NA
# 10 5 2018-01-04 00:00:00 2018 Q1 11.1 NA NA
customer <- structure(list(Cust_ID = c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 5, 5, 5, 5),
Date = structure(c(1514764800, 1514851200, 1514937600, 1515024000,
1514764800, 1514851200, 1514937600, 1515024000,
1514764800, 1514851200, 1514937600, 1515024000,
1514764800, 1514851200, 1514937600, 1515024000),
class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Quarter = c("2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1",
"2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1",
"2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1",
"2018 Q1"),
Sales = c(23, 22.2, 12.1, 14.1, 18, 18.8, 19.2, 19.8, 101, 55, 56,
55, NA, NA, 10.5, 11.1),
Action = c(NA, NA, 2, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA, NA),
Link_Cust_ID = c(NA, NA, 5, NA, NA, NA, NA, NA, NA, 18, NA, NA, NA, NA, NA, NA)),
row.names = c(NA, -16L), class = c("tbl_df", "tbl", "data.frame"))
top_customer <- structure(list(Cust_ID = c(1, 1, 1, 1, 3, 3, 3, 3),
Date = structure(c(1514764800, 1514851200, 1514937600, 1515024000,
1514764800, 1514851200, 1514937600, 1515024000),
class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Quarter = c("2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q1",
"2018 Q1", "2018 Q1", "2018 Q1"),
Sales = c(23, 22.2, 12.1, 14.1, 101, 55, 56, 55),
Action = c(NA, NA, 2, NA, NA, 2, NA, NA),
Link_Cust_ID = c(NA, NA, 5, NA, NA, 18, NA, NA)),
row.names = c(NA, -8L), class = c("tbl_df", "tbl", "data.frame"))