我的数据框如下:
Date User Report Position View
1 2019-01-01 B report_03 Sales_Manager 1
2 2019-01-01 C report_04 Sales_Manager 1
3 2019-01-01 C report_04 Sales_Manager 1
4 2019-01-02 B report_03 Sales_Manager 1
5 2019-01-02 C report_05 Sales_Manager 1
6 2019-01-02 D report_06 Sales_Rep 1
7 2019-01-02 D report_06 Sales_Rep 1
8 2019-01-03 A report_03 CEO 1
9 2019-01-03 C report_04 Sales_Manager 1
10 2019-01-03 D report_06 Sales_Rep 1
11 2019-01-04 A report_01 CEO 1
12 2019-01-04 C report_04 Sales_Manager 1
13 2019-01-04 C report_04 Sales_Manager 1
14 2019-01-04 C report_05 Sales_Manager 1
15 2019-01-04 D report_05 Sales_Rep 1
16 2019-01-04 D report_06 Sales_Rep 1
17 2019-01-05 A report_01 CEO 1
18 2019-01-05 B report_04 Sales_Manager 1
19 2019-01-05 B report_04 Sales_Manager 1
20 2019-01-05 C report_04 Sales_Manager 1
行表示仪表板上某些报告的用户登录信息。根据位置,用户可以访问不同的报告。 CEO(用户A)有权访问report_01,report_02和report_03;销售经理(用户B和C)有权访问report_03,report_04和report_05;销售代表可以访问report_05和report_06。
用户应该每天使用他们有权访问的每个报告,但是在某些时候他们根本不使用某些报告,而在其他日子里他们多次登录。我想在用户未在“查看”列中使用0登录到其报告的行中进行填充。
因此数据框应如下所示:
Date User Report Position View
1 2019-01-01 A report_01 CEO 0
2 2019-01-01 A report_02 CEO 0
3 2019-01-01 A report_03 CEO 0
4 2019-01-01 B report_03 Sales_Manager 1
5 2019-01-01 B report_04 Sales_Manager 0
6 2019-01-01 B report_05 Sales_Manager 0
7 2019-01-01 C report_03 Sales_Manager 0
8 2019-01-01 C report_04 Sales_Manager 1
9 2019-01-01 C report_04 Sales_Manager 1
10 2019-01-01 C report_05 Sales_Manager 0
11 2019-01-01 D report_05 Sales_Rep 0
12 2019-01-01 D report_06 Sales_Rep 0
13 2019-01-02 A report_01 CEO 0
14 2019-01-02 A report_02 CEO 0
15 2019-01-02 A report_03 CEO 0
16 2019-01-02 B report_03 Sales_Manager 1
17 2019-01-02 B report_04 Sales_Manager 0
18 2019-01-02 B report_05 Sales_Manager 0
.
.
.
输出输出:
structure(list(Date = structure(c(17897, 17897, 17897, 17898,
17898, 17898, 17898, 17899, 17899, 17899, 17900, 17900, 17900,
17900, 17900, 17900, 17901, 17901, 17901, 17901, 17901, 17901,
17902, 17902, 17902, 17902, 17902, 17903, 17903, 17903, 17904,
17904, 17904, 17904, 17904, 17904, 17904, 17904, 17905, 17905,
17905, 17905, 17905, 17906, 17906, 17906, 17906, 17906, 17907,
17907, 17907, 17907, 17907, 17908, 17908, 17908, 17908, 17908,
17909, 17909, 17909, 17909, 17910, 17910, 17910, 17911, 17911,
17911, 17911, 17911, 17912, 17912, 17912, 17912, 17913, 17914,
17914, 17914, 17914, 17914, 17915, 17915, 17915, 17915, 17916,
17916, 17916, 17916, 17917, 17917, 17917, 17918, 17918, 17918,
17918, 17919, 17919, 17919, 17919, 17919, 17920, 17920, 17920,
17921, 17921, 17921, 17921, 17922, 17922, 17923, 17923, 17923,
17923, 17923, 17924, 17924, 17924, 17924, 17924, 17925, 17925,
17925, 17925, 17926, 17926, 17926, 17927, 17927, 17927, 17927
), class = "Date"), User = structure(c(2L, 3L, 3L, 2L, 3L, 4L,
4L, 1L, 3L, 4L, 1L, 3L, 3L, 3L, 4L, 4L, 1L, 2L, 2L, 3L, 3L, 4L,
1L, 1L, 1L, 3L, 4L, 2L, 3L, 3L, 1L, 1L, 2L, 3L, 3L, 3L, 4L, 4L,
1L, 2L, 3L, 4L, 4L, 3L, 3L, 4L, 4L, 4L, 3L, 3L, 4L, 4L, 4L, 1L,
1L, 3L, 3L, 4L, 1L, 1L, 4L, 4L, 1L, 1L, 4L, 1L, 2L, 3L, 4L, 4L,
1L, 3L, 4L, 4L, 1L, 1L, 2L, 3L, 3L, 4L, 1L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 1L, 1L, 4L, 1L, 2L, 4L, 4L, 1L, 1L, 3L, 4L, 4L, 1L, 3L,
4L, 1L, 3L, 3L, 4L, 3L, 4L, 2L, 3L, 3L, 3L, 4L, 3L, 3L, 4L, 4L,
4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 3L, 4L), .Label = c("A",
"B", "C", "D"), class = "factor"), report = structure(c(3L, 4L,
4L, 3L, 5L, 6L, 6L, 3L, 4L, 6L, 1L, 4L, 4L, 5L, 5L, 6L, 1L, 4L,
4L, 4L, 5L, 6L, 3L, 3L, 3L, 4L, 5L, 3L, 4L, 4L, 1L, 2L, 4L, 4L,
5L, 5L, 6L, 6L, 2L, 3L, 5L, 5L, 6L, 4L, 5L, 6L, 6L, 6L, 4L, 5L,
5L, 6L, 6L, 1L, 2L, 4L, 5L, 6L, 1L, 3L, 6L, 6L, 1L, 1L, 6L, 2L,
4L, 5L, 6L, 6L, 3L, 4L, 6L, 6L, 1L, 2L, 3L, 4L, 5L, 5L, 2L, 4L,
5L, 6L, 6L, 6L, 6L, 6L, 1L, 2L, 6L, 2L, 3L, 6L, 6L, 1L, 3L, 5L,
5L, 5L, 2L, 5L, 5L, 2L, 4L, 5L, 5L, 5L, 6L, 3L, 4L, 4L, 5L, 6L,
5L, 5L, 5L, 6L, 6L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 3L, 4L, 4L, 6L
), .Label = c("report_01", "report_02", "report_03", "report_04",
"report_05", "report_06"), class = "factor"), Position = c("Sales_Manager",
"Sales_Manager", "Sales_Manager", "Sales_Manager", "Sales_Manager",
"Sales_Rep", "Sales_Rep", "CEO", "Sales_Manager", "Sales_Rep",
"CEO", "Sales_Manager", "Sales_Manager", "Sales_Manager", "Sales_Rep",
"Sales_Rep", "CEO", "Sales_Manager", "Sales_Manager", "Sales_Manager",
"Sales_Manager", "Sales_Rep", "CEO", "CEO", "CEO", "Sales_Manager",
"Sales_Rep", "Sales_Manager", "Sales_Manager", "Sales_Manager",
"CEO", "CEO", "Sales_Manager", "Sales_Manager", "Sales_Manager",
"Sales_Manager", "Sales_Rep", "Sales_Rep", "CEO", "Sales_Manager",
"Sales_Manager", "Sales_Rep", "Sales_Rep", "Sales_Manager", "Sales_Manager",
"Sales_Rep", "Sales_Rep", "Sales_Rep", "Sales_Manager", "Sales_Manager",
"Sales_Rep", "Sales_Rep", "Sales_Rep", "CEO", "CEO", "Sales_Manager",
"Sales_Manager", "Sales_Rep", "CEO", "CEO", "Sales_Rep", "Sales_Rep",
"CEO", "CEO", "Sales_Rep", "CEO", "Sales_Manager", "Sales_Manager",
"Sales_Rep", "Sales_Rep", "CEO", "Sales_Manager", "Sales_Rep",
"Sales_Rep", "CEO", "CEO", "Sales_Manager", "Sales_Manager",
"Sales_Manager", "Sales_Rep", "CEO", "Sales_Manager", "Sales_Rep",
"Sales_Rep", "Sales_Rep", "Sales_Rep", "Sales_Rep", "Sales_Rep",
"CEO", "CEO", "Sales_Rep", "CEO", "Sales_Manager", "Sales_Rep",
"Sales_Rep", "CEO", "CEO", "Sales_Manager", "Sales_Rep", "Sales_Rep",
"CEO", "Sales_Manager", "Sales_Rep", "CEO", "Sales_Manager",
"Sales_Manager", "Sales_Rep", "Sales_Manager", "Sales_Rep", "Sales_Manager",
"Sales_Manager", "Sales_Manager", "Sales_Manager", "Sales_Rep",
"Sales_Manager", "Sales_Manager", "Sales_Rep", "Sales_Rep", "Sales_Rep",
"Sales_Manager", "Sales_Rep", "Sales_Rep", "Sales_Rep", "Sales_Rep",
"Sales_Rep", "Sales_Rep", "Sales_Manager", "Sales_Manager", "Sales_Manager",
"Sales_Rep"), View = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), row.names = c(NA, -130L
), class = "data.frame")
答案 0 :(得分:2)
根据您的需要,您可能需要将第一行更改为第二行。如果要获取2019年1月的数据帧,则在此期间某天中没有人阅读任何报告(例如假期),则第一行代码将不提供当天的零行,而第二行会。
还请注意,它不会检查名称用户和位置的唯一性。如果名称重复,但对应的位置不止一个,则结果将好像他们都是不同的用户。
unique.date <- unique(df[,"Date",drop=F])
# unique.date <- seq(from = as.Date("2019-01-01"), to=as.Date("2019-01-31"), by="1 day")
unique.usr.rpt <- unique(df[,c("User","Report","Position")])
unique.df <- merge(unique.date, unique.usr.rpt)
result <- merge(unique.df, df, by=c("Date", "User", "Report", "Position"), all=T)
result[is.na(result$View), "View"] <- 0
编辑:如果报表视图特权是确定性的,我们可以在顶部添加一些定义行以指定它们。随后的情况将大体相似。整个工作代码:
access <- list(
"CEO" = c("report_01", "report_02", "report_03"),
"Sales_Manager" = c("report_03", "report_04", "report_05"),
"Sales_Rep" = c("report_05", "report_06")
)
access.df <- do.call(rbind, lapply(names(access), function(x) data.frame(x, access[[x]])))
colnames(access.df) <- c("Position", "Report")
unique.usr <- unique(df[,c("User","Position")])
unique.usr.rpt <- merge(unique.usr, access.df, by=c("Position"), all=T)
# What follows is same as before
unique.date <- unique(df[,"Date",drop=F])
# unique.date <- seq(from = as.Date("2019-01-01"), to=as.Date("2019-01-31"), by="1 day")
unique.df <- merge(unique.date, unique.usr.rpt)
result <- merge(unique.df, df, by=c("Date", "User", "Report", "Position"), all=T)
result[is.na(result$View), "View"] <- 0
现在您将获得所需的行:
> head(result[result$User=="B" & result$Report=="report_05",])
Date User Report Position View
6 2019-01-01 B report_05 Sales_Manager 0
18 2019-01-02 B report_05 Sales_Manager 0
30 2019-01-03 B report_05 Sales_Manager 0
41 2019-01-04 B report_05 Sales_Manager 0
54 2019-01-05 B report_05 Sales_Manager 0
67 2019-01-06 B report_05 Sales_Manager 0
> head(result[result$User=="C" & result$Report=="report_03",])
Date User Report Position View
7 2019-01-01 C report_03 Sales_Manager 0
19 2019-01-02 C report_03 Sales_Manager 0
31 2019-01-03 C report_03 Sales_Manager 0
42 2019-01-04 C report_03 Sales_Manager 0
55 2019-01-05 C report_03 Sales_Manager 0
68 2019-01-06 C report_03 Sales_Manager 0
答案 1 :(得分:1)
这是一种基于tidyr::complete
的解决方案,我们也使用map_if
从数据集中添加缺失的组合,例如User B
和report_05
library(dplyr)
library(purrr)
library(tidyr)
ndf <- df %>% complete(Date, nesting(User,report,Position), fill = list(View=0))
posrep_df <- df %>%
group_by(Position) %>%
summarise(report=paste(unique(report), collapse = ',')) %>%
separate_rows(report,sep='\\,')
ndf %>% mutate_if(is.factor, as.character) %>% split(list(.$Date,.$User)) %>%
map_if(~(.x[['Position']] %in% c('CEO','Sales_Manager') && n_distinct(.x[['report']])<3) ||
(.x[['Position']] %in% c('Sales_Rep') && n_distinct(.x[['report']])<2),
~bind_rows(.x,
anti_join(posrep_df %>% filter(Position==.x$Position[1]),
.x ,
by='report') %>%
mutate(Date=.x$Date[1], User=.x$User[1], View=0)
)) %>%
bind_rows() %>%
arrange(Date, User)
# A tibble: 369 x 5
Date User report Position View
<date> <chr> <chr> <chr> <dbl>
1 2019-01-01 A report_01 CEO 0
2 2019-01-01 A report_02 CEO 0
3 2019-01-01 A report_03 CEO 0
4 2019-01-01 B report_03 Sales_Manager 1
5 2019-01-01 B report_04 Sales_Manager 0
6 2019-01-01 B report_05 Sales_Manager 0
7 2019-01-01 C report_04 Sales_Manager 1
8 2019-01-01 C report_04 Sales_Manager 1
9 2019-01-01 C report_05 Sales_Manager 0
10 2019-01-01 C report_03 Sales_Manager 0
# ... with 359 more rows
我们可以编写一个自定义函数来减少map_if
内的括号和计算量
combine_fun <- function(x){
#browser()
x_full <- posrep_df %>%
filter(Position==x$Position[1]) %>%
mutate(Date=x$Date[1], User=x$User[1], View=0)
x_comp <- x_full %>% anti_join(x, by='report')
x_final <- bind_rows(x, x_comp)
x_final
}
#Here a simple example to explore `combine_fun`
df_test <- ndf %>% filter(Date=='2019-01-01' & User=='B')
#Before combine_fun
df_test
# A tibble: 2 x 5
Date User report Position View
<date> <fct> <fct> <chr> <dbl>
1 2019-01-01 B report_03 Sales_Manager 1
2 2019-01-01 B report_04 Sales_Manager 0
#After combine_fun
combine_fun(df_test)
# A tibble: 3 x 5
Date User report Position View
<date> <fct> <chr> <chr> <dbl>
1 2019-01-01 B report_03 Sales_Manager 1
2 2019-01-01 B report_04 Sales_Manager 0
3 2019-01-01 B report_05 Sales_Manager 0
使用map_if
重新构造combine_fun
ndf %>% mutate_if(is.factor, as.character)%>%split(list(.$Date,.$User)) %>%
map_if(.p = ~(.x[['Position']] %in% c('CEO','Sales_Manager') && n_distinct(.x[['report']])<3) ||
(.x[['Position']] %in% c('Sales_Rep') && n_distinct(.x[['report']])<2),
.f = ~combine_fun(.x)) %>%
bind_rows() %>%
arrange(Date, User)