
时间:2018-09-04 16:29:26

标签: r

我有一些数据,其中包含日期和名称,以及一个我想订购的任务,可以计算出人们执行的任务的顺序和任务的流程。 因此,非常简单的是一些示例数据。

    Name    Date        Food
    Fred    01/01/2018  Peanuts
    Jim     03/02/2018  Banana
    Barney  02/02/2018  Rice
    Fred    06/03/2018  Rice
    Barry   12/02/2018  Peanuts
    John    04/04/2018  Rice
    Jim     03/03/2018  Rice
    Fred    20/04/2018  Rice
    Den     12/02/2018  Banana
    Barney  04/05/2018  Banana
    Jim     05/06/2018  Rice
    John    06/07/2018  Peanuts
    Jim     30/06/2018  Banana
    Fred    05/05/2018  Rice




  Flow                 count
  Peanuts to rice      1
  Peanuts to banana    0
  Peanuts to peanuts   0
  Rice to peanuts      1
  Rice to banana       2
  Rice to rice         3
  Banana to rice       1
  Banana to peanuts    0
  Banana to banana     0




因此,下面提供的答案已经给了我我想要的流程图-谢谢。 现在,我想做的是能够编辑原始数据框,以删除我不感兴趣或不想分析的流实例。


2 个答案:

答案 0 :(得分:3)


  1. 它已按Date列的升序排序(或DateName内排序,如您目前所拥有的那样);
  2. NameFood是因子列。

## split by person; not to be messed up by "between person" flow
x <- split(levels(dat$Food)[dat$Food], dat$Name)

#[1] "Rice"   "Banana"
#[1] "Peanuts"
#[1] "Banana"
#[1] "Peanuts" "Rice"    "Rice"    "Rice"   
#[1] "Banana" "Rice"   "Rice"   "Banana"
#[1] "Rice"    "Peanuts"


getFlow1 <- function (u) {
  if (length(u) == 1L) NULL
  else paste(u[-length(u)], u[-1], sep = " to ")

Flow1 <- unlist(lapply(x, getFlow1), use.names = FALSE)
#[1] "Rice to Banana"  "Peanuts to Rice" "Rice to Rice"    "Rice to Rice"   
#[5] "Banana to Rice"  "Rice to Rice"    "Rice to Banana"  "Rice to Peanuts"

## maybe you can control the order of factor levels here
All_Flow <- outer(levels(dat$Food), levels(dat$Food), paste, sep = " to ")
Flow1 <- table("Flow" = factor(Flow1, levels = All_Flow))
#  Banana to Banana  Peanuts to Banana     Rice to Banana  Banana to Peanuts 
#                 0                  0                  2                  0 
#Peanuts to Peanuts    Rice to Peanuts     Banana to Rice    Peanuts to Rice 
#                 0                  1                  1                  1 
#      Rice to Rice 
#                 3 


#                Flow Freq
#1   Banana to Banana    0
#2  Peanuts to Banana    0
#3     Rice to Banana    2
#4  Banana to Peanuts    0
#5 Peanuts to Peanuts    0
#6    Rice to Peanuts    1
#7     Banana to Rice    1
#8    Peanuts to Rice    1
#9       Rice to Rice    3


getFlow2 <- function (u) {
  if (length(u) == 1L) NULL
  else cbind(u[-length(u)], u[-1])

Flow2 <- do.call("rbind", lapply(x, getFlow2))
#     [, 1]     [, 2]     
#[1,] "Rice"    "Banana" 
#[2,] "Peanuts" "Rice"   
#[3,] "Rice"    "Rice"   
#[4,] "Rice"    "Rice"   
#[5,] "Banana"  "Rice"   
#[6,] "Rice"    "Rice"   
#[7,] "Rice"    "Banana" 
#[8,] "Rice"    "Peanuts"

Flow2 <- table("From" = Flow2[, 1], "To" = Flow2[, 2])
#         To
#From      Banana Peanuts Rice
#  Banana       0       0    1
#  Peanuts      0       0    1
#  Rice         2       1    3

#     From      To Freq
#1  Banana  Banana    0
#2 Peanuts  Banana    0
#3    Rice  Banana    2
#4  Banana Peanuts    0
#5 Peanuts Peanuts    0
#6    Rice Peanuts    1
#7  Banana    Rice    1
#8 Peanuts    Rice    1
#9    Rice    Rice    3

答案 1 :(得分:0)



data <- 
  tribble(~Name,    ~Date,        ~Food,
          "Fred",    "01/01/2018",  "Peanuts",
          "Jim",     "03/02/2018",  "Banana",
          "Barney",  "02/02/2018",  "Rice",
          "Fred",    "06/03/2018",  "Rice",
          "Barry",   "12/02/2018",  "Peanuts",
          "John",    "04/04/2018",  "Rice",
          "Jim",     "03/03/2018",  "Rice",
          "Fred",    "20/04/2018",  "Rice",
          "Den",     "12/02/2018",  "Banana",
          "Barney",  "04/05/2018",  "Banana",
          "Jim",     "05/06/2018",  "Rice",
          "John",    "06/07/2018",  "Peanuts",
          "Jim",     "30/06/2018",  "Banana",
          "Fred",    "05/05/2018",  "Rice")


data_clean <-
  data %>% 
  mutate(Date = as.Date(Date, "%d/%m/%Y"))


然后我们获得每个人与arrangesummarisestr_c(..., collapse = ", ")吃的食物清单。

list_of_food_by_person <- 
  data_clean %>%
  group_by(Name) %>%
  distinct(Name, Food) %>% 
  arrange(Food) %>% 
  summarise(List = str_c(Food, collapse = ", "))


# A tibble: 6 x 2
  Name   List         
  <chr>  <chr>        
1 Barney Banana, Rice 
2 Barry  Peanuts      
3 Den    Banana       
4 Fred   Peanuts, Rice
5 Jim    Banana, Rice 
6 John   Peanuts, Rice


flow_of_food_per_person <- 
  data_clean %>% 
  arrange(Date) %>% 
  group_by(Name) %>% 
  summarise(Flow = str_c(Food, collapse = " to "))


# A tibble: 6 x 2
  Name   Flow                            
  <chr>  <chr>                           
1 Barney Rice to Banana                  
2 Barry  Peanuts                         
3 Den    Banana                          
4 Fred   Peanuts to Rice to Rice to Rice 
5 Jim    Banana to Rice to Rice to Banana
6 John   Rice to Peanuts    


flow_count <- 
  data_clean %>%
  arrange(Date) %>% 
  group_by(Name) %>% 
  mutate(Order = sequence(n())) %>% 
  mutate(Previous = lag(Food),
         Flow = str_glue("{Previous} to {Food}")) %>% 
  ungroup() %>% 
  filter(!is.na(Previous)) %>% 


# A tibble: 5 x 2
  Flow                n
  <chr>           <int>
1 Banana to Rice      1
2 Peanuts to Rice     1
3 Rice to Banana      2
4 Rice to Peanuts     1
5 Rice to Rice        3