我正在处理的数据:
> dput(head(data1))
structure(list(datetime_utc = c("2010-01-04 00:00:00", "2010-01-04 01:00:00",
"2010-01-04 02:00:00", "2010-01-04 03:00:00", "2010-01-04 04:00:00",
"2010-01-04 05:00:00"), Generation_BE = c(13143.7, 13143.7, 13143.7,
13143.7, 13143.7, 13143.7), Generation_FR = c(63599, 62212, 62918,
62613, 62432, 63411), Prices.BE = c(37.15, 33.47, 28, 21.29,
16.92, 28), holidaysBE = c(0L, 0L, 0L, 0L, 0L, 0L)), row.names = c(NA,
6L), class = "data.frame")
我检查了我的数据,发现了缺失值 (NA)。然后,我用中位数替换了 NA 的值。 我的最终目标是研究比利时的价格,因此我制作了比利时价格时间序列。
我的代码如下:
library(dplyr)
# Check for NA values
sum(is.na(data1$Prices.BE))
# We stored the columns name with the missing values in the list called list_na
list_na <- colnames(data1)[ apply(data1, 1, anyNA) ]
list_na
# View rows where the Prices of Belgium is NA
data1[is.na(data1$Prices.BE),]
# Replace the missing observations with the median
median_missing <- apply(data1[,colnames(data1) %in% list_na],
1,
median,
na.rm = TRUE)
newdata1 <- data1 %>%
mutate(replace_median_Prices.BE = ifelse(is.na(Prices.BE), median_missing[1], Prices.BE))
head(newdata1)
# Extract Belgium prices time series from data
belgiumptimeseries <-ts(newdata1$Prices.BE, start =as.Date("2001-01-01"), frequency = 365*24)
belgiumptimeseries
# Plotting Time Series
plot(belgiumptimeseries)
library(tsfeatures)
tsfeatures(belgiumptimeseries)
# Decomposing to estimate the trend, seasonal and random components of this time series
> belgiumptimeseries_componets <-decompose(belgiumptimeseries, type="additive")
Error in na.omit.ts(x) : time series contains internal NAs
> plot(belgiumptimeseries_componets)
Error in plot(belgiumptimeseries_componets) :
object 'belgiumptimeseries_componets' not found
我的代码的最后一行带有错误,表明 NA 值的存在。 我做错了什么,我的代码的哪一部分工作不正常!?任何建议都非常受欢迎,我无法想象我的代码有什么问题!
答案 0 :(得分:0)
我们可以用 NA
值和 median
plot
元素
library(dplyr)
library(zoo)
library(tsfeatures)
# // read the data
data1 <- read.csv(file.choose())
# // check for NAs column wise
colSums(is.na(data1))
# datetime_utc Generation_BE Generation_FR Prices.BE holidaysBE
# 0 0 0 29 0
# // replace the NA with the median of that column and
# // only done for numeric and if there is any NA in the column
newdata1 <- data1 %>%
mutate(across(where(~ is.numeric(.) && anyNA(.)),
na.aggregate, FUN = median))
# // check for NAs again column wise
colSums(is.na(newdata1))
# datetime_utc Generation_BE Generation_FR Prices.BE holidaysBE
# 0 0 0 0 0
构建时间序列
# // Extract Belgium prices time series from data
belgiumptimeseries <- ts(newdata1$Prices.BE,
start = as.Date("2001-01-01"), frequency = 365*24)
检查功能
tsfeatures(belgiumptimeseries)
# A tibble: 1 x 20
# frequency nperiods seasonal_period trend spike linearity curvature e_acf1 e_acf10 seasonal_streng… peak trough
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 8760 1 8760 0.0552 5.93e-7 -35.3 -18.6 0.277 0.418 0.316 2000 3942
# … with 8 more variables: entropy <dbl>, x_acf1 <dbl>, x_acf10 <dbl>, diff1_acf1 <dbl>, diff1_acf10 <dbl>,
# diff2_acf1 <dbl>, diff2_acf10 <dbl>, seas_acf1 <dbl>
分解时间序列
belgiumptimeseries_componets <- decompose(belgiumptimeseries, type="additive")
plot(belgiumptimeseries_componets)