我正在尝试对输入的温度数据进行分类,以便尽早发现上升趋势(事件= 1)或下降趋势(事件= 2)。上升的开始的特征是从最低点(P1)增加1%。下降趋势的起点是从最高点(Ph)下降1%
数据集使用趋势= 1初始化,Ph和Pl =温度,我想遍历每个新行,更新Pl / Ph并对事件类型进行分类。
使用的数据集
data <- data.frame (Temperature=c(93.37, 93.44, 93.22, 93.28, 93.32, 93.48, 93.32, 92.49, 92.21, 92.16, 91.31, 91.30, 91.37, 91.30, 91.21, 91.37, 91.59, 91.45, 92.07, 92.16, 92.35, 92.52, 92.48, 92.13, 92.46),
event=c(1, rep(NA, 24)), Ph=c(93.37, rep(NA, 24)), Pl=c(93.37, rep(NA, 24)))
预期结果
data <- data.frame (Temperature=c(93.37, 93.44, 93.22, 93.28, 93.32, 93.48, 93.32, 92.49, 92.21, 92.16, 91.31, 91.30, 91.37, 91.30, 91.21, 91.37, 91.59, 91.45, 92.07, 92.16, 92.35, 92.52, 92.48, 92.13, 92.46),
event=c(1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1),
Ph=c(93.37,NA,NA,NA,NA,NA,NA,92.49,92.21,92.16,91.31,91.3,91.3,91.3,91.21,91.21,91.21,91.21,91.21,NA,NA,NA,NA,NA,NA),
Pl=c(93.37,93.44,93.44,93.44,93.44,93.48,93.48,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,92.16,92.35,92.52,92.52,92.52,92.52))
我无法将以下代码转换为可以在历史数据集上执行这些命令的循环。我已经尝试过某个功能以及申请家庭,但均未成功。
data$Ph_lag <- lag(data$Ph, 1)
data$Pl_lag <- lag(data$Pl, 1)
for(i in 2:nrow(data)) {
if (data$event[i-1] == 1) {
if (data$Temperature[i] <= data$Ph_lag[i] * 0.99) { # missing value where TRUE/FALSE needed (=all NA ROWS)
data$event[i] <- 2
data$Pl[i] <- data$Close[i]
} else if (data$Temperature[i] > data$Ph_lag[i]) {
data$Ph[i] <- data$Temperature[i]
data$event[i] <- 1
} else {
data$Ph[i] <- data$Ph_lag[i]
data$event[i] <- 1
}
} else if (data$event[i-1] == 2) {
if (data$Temperature[i] >= data$Pl_lag[i] * 1.01) {
data$event[i] <- 1
data$Ph[i] <- data$Temperature[i]
} else if (data$Temperature[i] < data$Pl_lag[i]) {
data$Pl[i] <- data$Temperature[i]
data$event[i] <- 2
} else {
data$Pl[i] <- data$Pl_lag[i]
data$event[i] <- 2
}}}
在当前状态下,此代码在应用于单行时是成功的,但几乎不能用于填充包含数千个观测值的历史数据。
评论表示感谢,非常感谢
答案 0 :(得分:0)
以下代码运行无错误:
data <- data.frame (Temperature=c(93.37, 93.44, 93.22, 93.28, 93.32, 93.48, 93.32, 92.49, 92.21, 92.16, 91.31, 91.30, 91.37, 91.30, 91.21, 91.37, 91.59, 91.45, 92.07, 92.16, 92.35, 92.52, 92.48, 92.13, 92.46),
event=c(1, rep(NA, 24)), Ph=c(93.37, rep(NA, 24)), Pl=c(93.37, rep(NA, 24)))
result <- data.frame (Temperature=c(93.37, 93.44, 93.22, 93.28, 93.32, 93.48, 93.32, 92.49, 92.21, 92.16, 91.31, 91.30, 91.37, 91.30, 91.21, 91.37, 91.59, 91.45, 92.07, 92.16, 92.35, 92.52, 92.48, 92.13, 92.46),
event=c(1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1),
Ph=c(93.37,NA,NA,NA,NA,NA,NA,92.49,92.21,92.16,91.31,91.3,91.3,91.3,91.21,91.21,91.21,91.21,91.21,NA,NA,NA,NA,NA,NA),
Pl=c(93.37,93.44,93.44,93.44,93.44,93.48,93.48,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,92.16,92.35,92.52,92.52,92.52,92.52))
#data$Ph_lag <- lag(data$Ph, 1)
#data$Pl_lag <- lag(data$Pl, 1)
for(i in 2:nrow(data)) {
if (data$event[i-1] == 1) {
if (data$Temperature[i] <= data$Ph[i-1] * 0.99) { # missing value where TRUE/FALSE needed (=all NA ROWS)
data$event[i] <- 2
data$Pl[i] <- data$Temperature[i]
} else if (data$Temperature[i] > data$Ph[i-1]) {
data$Ph[i] <- data$Temperature[i]
data$event[i] <- 1
} else {
data$Ph[i] <- data$Ph[i-1]
data$event[i] <- 1
}
} else if (data$event[i-1] == 2) {
if (data$Temperature[i] >= data$Pl[i-1] * 1.01) {
data$event[i] <- 1
data$Ph[i] <- data$Temperature[i]
} else if (data$Temperature[i] < data$Pl[i-1]) {
data$Pl[i] <- data$Temperature[i]
data$event[i] <- 2
} else {
data$Pl[i] <- data$Pl[i-1]
data$event[i] <- 2
}}}
但是结果并不完全是您参加的。