我有一个时间序列数据集,比如一个非常简化的版本,时间和价格列。
Time Price
15:30:01 NA
15:30:02 NA
15:30:03 36
15:30:04 38
15:30:05 37.5
15:30:06 NA
15:30:07 NA
15:30:08 37
15:30:09 37.8
15:30:10 39
15:30:11 40
15:30:12 38.5
15:30:13 38
15:30:14 38
我希望编写一个能够返回最佳价格的函数:
Time Price Best Price
15:30:01 NA 36
15:30:02 NA 36
15:30:03 36 36
15:30:04 38 38
15:30:05 37.5 38
15:30:06 NA 38
15:30:07 NA 38
15:30:08 37 38
15:30:09 37.8 38
15:30:10 39 39
15:30:11 40 40
15:30:12 38.5 40
15:30:13 38 40
15:30:14 38 40
我试过
bbo <- function(price1, price2) {
currbestprice <- price2
newbestprice <- ifelse(price1 >= currbestprice, price1, currbestprice)
currbestprice <- newbestprice
return(currbestprice)
}
我将通过na.omit(Price)[1]启动我的price2以获得第一个非NA值。然后我希望currbestprice不断更新,以保持最新的最优价格。 Price1只是价格系列。
但是当我测试时:
p1 <- c(NA,NA,36,38,37.5,NA,NA,37,37.8,39,40,38.5,38,38)
p2 <- 36
bbo(p1,p2)
返回
NA NA 36.0 38.0 37.5 NA NA 37.0 37.8 39.0 40.0 38.5 38.0 38.0
它似乎没有更新我的currbestprice。我被困了,我会感激任何帮助。
答案 0 :(得分:3)
另一个带有cummax
的基本R选项 - 函数:
# create a new column 'BestPrice'
df$BestPrice <- df$Price
# replace the first NA with the first non-NA value
df$BestPrice[is.na(df$BestPrice)][1] <- df$BestPrice[!is.na(df$BestPrice)][1]
# relace the remaining NA's with zero
df$BestPrice[is.na(df$BestPrice)] <- 0
# use 'cummax' to replace the values with the best price untill that point
df$BestPrice <- cummax(df$BestPrice)
给出:
> df Time Price BestPrice 1 15:30:01 NA 36 2 15:30:02 NA 36 3 15:30:03 36.0 36 4 15:30:04 38.0 38 5 15:30:05 37.5 38 6 15:30:06 NA 38 7 15:30:07 NA 38 8 15:30:08 37.0 38 9 15:30:09 37.8 38 10 15:30:10 39.0 39 11 15:30:11 40.0 40 12 15:30:12 38.5 40 13 15:30:13 38.0 40 14 15:30:14 38.0 40
另一种选择是将na.locf
与fromLast = TRUE
- zoo
- 参数结合cummax
结合使用{/ 1}}:
library(zoo)
df$BestPrice <- na.locf(df$Price, fromLast = TRUE)
df$BestPrice <- cummax(df$BestPrice)
答案 1 :(得分:1)
你可以试试这个:
# simulate some data
data = data.frame(
time = 1:100,
price = rpois(1:100, lambda = 10)
);
# add some random NA values to it, as in your data
na_idxs = sample(x = 1:100, size = 30);
data[na_idxs, 2] = NA;
# initialise a value we'll update storing the max price
c_max = 0;
# the vector containing the best prices
best_price = list();
# for every price
for(i in 1:length(x = data$price)){
# if the current price is NA
if(is.na(x = data$price[i])){
# don't update c_max
}
# if the current value exceeds c_max
else if(data$price[i] > c_max){
# update c_max to this value
c_max = data$price[i];
}
# given the above is mutually exclusive, the current price must be less than c_max
# so don't update
else {
# don't update c_max
}
# add c_max to best_price
best_price[[i]] = c_max;
}
# add best_price to data by adding the column to the end
data = cbind(data, best_price = unlist(x = best_price));
# output the data
time price best_price
1 1 8 8
2 2 13 13
3 3 7 13
4 4 7 13
5 5 6 13
6 6 14 14
7 7 6 14
8 8 NA 14
9 9 7 14
10 10 NA 14
11 11 9 14
12 12 11 14
13 13 16 16
14 14 14 16
15 15 14 16
16 16 NA 16
17 17 4 16
18 18 11 16
答案 2 :(得分:1)
你可以尝试这个(假设NA为0)
df <- read.table(text = "
15:30:01 NA
15:30:02 NA
15:30:03 36
15:30:04 38
15:30:05 37.5
15:30:06 NA
15:30:07 NA
15:30:08 37
15:30:09 37.8
15:30:10 39
15:30:11 40
15:30:12 38.5
15:30:13 38
15:30:14 38")
colnames(df) <- c("Time", "Price")
df[which(is.na(df$Price)),"Price"] <- 0
Bestprice = df[which(df$Price > 0)[1],"Price"]
for(i in 1:nrow(df)){
if(Bestprice < df$Price[i]){
df$BestPrice[i] <- df$Price[i]
Bestprice <- df$Price[i]
} else{
df$BestPrice[i] <- Bestprice
}
}
> df
Time Price BestPrice
1 15:30:01 0.0 36
2 15:30:02 0.0 36
3 15:30:03 36.0 36
4 15:30:04 38.0 38
5 15:30:05 37.5 38
6 15:30:06 0.0 38
7 15:30:07 0.0 38
8 15:30:08 37.0 38
9 15:30:09 37.8 38
10 15:30:10 39.0 39
11 15:30:11 40.0 40
12 15:30:12 38.5 40
13 15:30:13 38.0 40
14 15:30:14 38.0 40