我有两个数据框,一个包含设备操作期间的故障信息,第二个包含维护操作。有没有办法确定给定先前故障的维护操作的概率。
# Sample problem
# Determine frequency table from two data sources to determine
# if events are sequential
# Generate N random dates in a range
randDates <- function(N, st="2014/01/01", et="2014/06/30") {
st <- as.POSIXct(as.Date(st))
et <- as.POSIXct(as.Date(et))
dt <- as.numeric(difftime(et,st,unit="sec"))
ev <- sort(runif(N, 0, dt))
rt <- st + ev
}
# generate two data sets
# smOper contains operational data that has events
# the event time, event name, and serial number
# smMain contains maintenance data that has actions
# the maintenance action, the maintence time, and the serial number
set.seed(1234)
nSerial = 10
nEventTypes = 10
nOpersPerSerial = 100
nOps = nOpersPerSerial*nSerial
oper_df <- data.frame(time = randDates(nOps),
fault = paste("Event_",sample(1:nEventTypes,nOps,replace=T),sep=""),
serialNo = sample(1:nSerial,nOps,replace=T))
nMaintenancePerSerial = 30
nMaintenanceActions = 20
nMaint = nMaintenancePerSerial * nSerial
maint_df <- data.frame(time = randDates(nOps),
action = paste("Action_",sample(1:nMaintenanceActions,nOps,replace=T),sep=""),
serialNo = sample(1:nSerial,nOps,replace=T))
# Show the frequency of events and actions by serial number
oper_table <- with(oper_df,table(fault,serialNo))
maint_table <- with(maint_df,table(action,serialNo))
# How to determine the frequency of maintenance actions with previous operational faults
# on the same serial no?
temp1_df <- data.frame(serialNo = oper_df$serialNo, time = oper_df$time,
fault = oper_df$fault,
action = NA,
source="OPS")
temp2_df <- data.frame(serialNo = maint_df$serialNo, time = maint_df$time,
action = maint_df$action,
fault=NA,
source="MAINT")
opsMaint_df <- rbind(temp1_df, temp2_df)
opsMaint_df <- opsMaint_df[with(opsMaint_df,order(serialNo,time)),]
head(opsMaint_df)
temp1_df <- data.frame(serialNo = oper_df$serialNo, time = oper_df$time,
info = oper_df$fault,
source="OPS")
temp2_df <- data.frame(serialNo = maint_df$serialNo, time = maint_df$time,
info = maint_df$action,
source="MAINT")
opsMaint_df2 <- rbind(temp1_df, temp2_df)
opsMaint_df2 <- opsMaint_df2[with(opsMaint_df2,order(serialNo,time)),]
head(opsMaint2_df)
with(opsMaint_df,table(action,fault))
现在,我想要一个包含列标题中的操作事件和维护的表 行标题中的操作,行/列交集是序列号操作之前事件发生的时间。
查看前10个项目
serialNo time info source
1001 1 2013-12-31 22:53:07 Action_4 Maint
5 1 2014-01-01 04:16:25 Event_8 Ops
8 1 2014-01-01 09:10:46 Event_2 Ops
1005 1 2014-01-01 23:06:27 Action_10 Maint
1009 1 2014-01-02 08:48:12 Action_1 Maint
11 1 2014-01-02 12:01:18 Event_5 Ops
1011 1 2014-01-02 15:10:40 Action_3 Maint
1031 1 2014-01-05 11:06:17 Action_17 Maint
24 1 2014-01-05 11:43:07 Event_10 Ops
1041 1 2014-01-06 15:57:40 Action_17 Maint
计算操作之前发生的事件
event
action Event_2 Event_5 Event_8 Event_10
Action_1 1 0 1 0
Action_3 1 1 1 0
Action_4 0 0 0 0
Action_10 1 0 1 0
Action_17 2 2 2 1
这是我尝试解决使用循环的问题。这似乎产生了正确的结果,但可能需要太长时间。
# For each group of rows that corresponds to a serial number
# For each row
# if the source is Maint then count the number of prior Faults
# total the number of faults in a (Action, Fault) array
freqCount <- function(df) {
# Frequency counting using loops
uSerial <- sort(unique(df$serialNo))
uFault <- sort(unique(df$info[df$source=="OPS"]))
uAction <- sort((unique(df$info[df$source=="MAINT"])))
freq <- data.frame(matrix(0, nrow = length(uAction), ncol = length(uFault)), stringsAsFactors=FALSE)
names(freq)<-uFault
row.names(freq)<-uAction
for (iSerial in seq(uSerial)) {
serialRows = which(df$serialNo == uSerial[iSerial])
for (jRow in seq(serialRows)) {
if (df$source[serialRows[jRow]] == "MAINT") {
actn = as.character(df$info[serialRows[jRow]])
for (kRow in seq(1,(jRow-1))) {
if (kRow >= 1) {
kInfo = as.character(df$info[serialRows[kRow]])
if (df$source[serialRows[kRow]] == "OPS") {
freq[actn, kInfo ] <- freq[actn, kInfo ] + 1
} else if (actn == kInfo) {
break
}
}
}
}
}
}
freq<-rbind(freq,colSums(freq))
freq<-cbind(freq,rowSums(freq))
names(freq)[ncol(freq)]<-"rowSums"
row.names(freq)[nrow(freq)]<-"colSums"
freq
}
# Reduce the amount of data to match example
#
freqCount(opsMaint_df2[1:10,])
# Analyze all the data
#
freqCount(opsMaint_df2)
这是使用动物园合并/聚合的尝试。但合并会在前10行中创建一个NA为NA的对象吗?
# zoo merge / aggregate frequency counting
require(zoo)
maint_z <- zoo(maint_df[,c("serialNo","action")],order.by=maint_df$time)
oper_z <- zoo(oper_df[,c("serialNo","fault")],order.by=oper_df$time)
zdf <- merge(oper_z, maint_z)
head(zdf)
如何按序号no
分组的先前故障按行动聚合zdf