我有data frame(不允许我在这里展示图片)。我试图获取按组件名称分组的每个顺序位置之间的时差。所以基本上,我想看看每个组件在某个位置需要多长时间,然后想要获取任何组件在某个位置的平均时间,然后是位置和组件类型。我最初试图传播数据,以便位置将是关键和值的时间,然后获得列之间的差异,但每个组件类型具有不同的位置,因此不起作用。
comps <- structure(list(component_name = c("COMPONENT000000001",
"COMPONENT000000001",
"COMPONENT000000001", "COMPONENT000000001", "COMPONENT000000001",
"COMPONENT000000001", "COMPONENT000000002", "COMPONENT000000002",
"COMPONENT000000002", "COMPONENT000000002", "COMPONENT000000002",
"COMPONENT000000002", "COMPONENT000000002"), component_type =
structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("component_0",
"component_1", "component_2", "component_3"), class = "factor"),
location = structure(c(6L, 2L, 14L, 1L, 1L, 4L, 6L, 2L, 14L,
14L, 1L, 1L, 4L), .Label = c("29MSJ_03_01", "5YU1V_01_02",
"7EFLP_03_02", "assembly room", "B57X3_03_00", "GH9CV_00_03",
"HUX1L_02_02", "JX3UO_01_01", "MRX5B_01_00", "TG6IA_00_02",
"VUFVH_00_00", "YBSFJ_00_01", "ZAENM_02_01", "ZZU3X_02_00"
), class = "factor"), times = structure(c(1514764800, 1514771683,
1514784872, 1514794911, 1514806504, 1514820010, 1514764800,
1514776184, 1514789862, 1514794911, 1514806046, 1514831050,
1514843151), class = c("POSIXct", "POSIXt"), tzone = "America/New_York")), .Names = c("component_name",
"component_type", "location", "times"), row.names = c(NA, 13L
), class = "data.frame")
loc_diff <- comps %>%
group_by(., type, location) %>%
mutate(., diff = as.numeric(difftime(max(times), min(times))))
ld <- loc_diff %>%
group_by(., location) %>%
summarise(., avg = mean(diff))
这最初是我试图做的,但它给了我一个数据框,所有的平均值大致相同,我不认为这是基于我做过的其他探索。任何帮助是极大的赞赏。谢谢!
P.S。我不确定我是否应该对此做一些事情,但我是新手,我仍然试图让它发挥作用。
答案 0 :(得分:1)
首先,您可以通过对component_name
进行分组,然后在下一行和当前行的times
之间取差来计算组件在每个位置上花费的时间。可以使用difftime
找到两次之间的差异(以秒为单位)。
library(dplyr)
library(tidyr)
library(lubridate)
#First get the time spend by each component at a location
comps_timesSpendAtLocatoin <- comps %>%
group_by(component_name) %>%
mutate(timeSpendAtLocation = difftime(lead(times),times, units = "secs"))
#Group_by 'component_name' to find average time spend on a location by each component
comps_timesSpendAtLocatoin %>% group_by(component_name) %>%
summarise(avgTimeComponentAtLocation = mean(timeSpendAtLocation, na.rm = TRUE))
# component_name avgTimeComponentAtLocation
# <chr> <time>
# 1 COMPONENT000000001 11042
# 2 COMPONENT000000002 13058.5
#Average time spend on a location by component_type and location
comps_timesSpendAtLocatoin %>% group_by(component_type, location) %>%
summarise(avgTimeComponentTypeAtLocation = mean(timeSpendAtLocation, na.rm = TRUE))
# # A tibble: 5 x 3
# # Groups: component_type [?]
# component_type location avgTimeComponentTypeAtLocation
# <fctr> <fctr> <time>
# 1 component_3 29MSJ_03_01 15551
# 2 component_3 5YU1V_01_02 13433.5
# 3 component_3 assembly room NaN
# 4 component_3 GH9CV_00_03 9133.5
# 5 component_3 ZZU3X_02_00 8741