我有季度级数据。我需要进行一个月的月度分析。是否有函数或ggplot功能可以插入季度数据并填写月度数据?
dput(HPF[1:25, ])
structure(list(region = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), path = c(1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
), date = structure(c(16116, 16205, 16297, 16389, 16481, 16570,
16662, 16754, 16846, 16936, 17028, 17120, 17212, 17301, 17393,
17485, 17577, 17666, 17758, 17850, 17942, 18031, 18123, 18215,
18307), class = "Date"), index_value = c(1, 1.033852765, 1.041697122,
1.038876363, 1.041043093, 1.060900982, 1.073728928, 1.075879441,
1.080898915, 1.10368893, 1.119240863, 1.122827602, 1.128639801,
1.15275796, 1.169021733, 1.172707492, 1.178666441, 1.203634882,
1.220348482, 1.223890323, 1.229770019, 1.255791539, 1.273560554,
1.278236959, 1.285508086), index = c(0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24), counter = 1:25, BaseQoQ = c(0, 0.033852765, 0.00758749917354051,
-0.00270784947028013, 0.00208564760655761, 0.0190749923163842,
0.0120915582298895, 0.00200284535874973, 0.00466546139717505,
0.0210843166587877, 0.0140908661646175, 0.00320461762840418,
0.00517639483536669, 0.0213692260175751, 0.0141085757499344,
0.00315285755256367, 0.00508136004984272, 0.0211836361259394,
0.0138859385432799, 0.00290231933930496, 0.00480410367620832,
0.021159663675294, 0.0141496533844698, 0.00367191413499146, 0.00568840303732765
), fdate = structure(c(16116, 16205, 16297, 16389, 16481, 16570,
16662, 16754, 16846, 16936, 17028, 17120, 17212, 17301, 17393,
17485, 17577, 17666, 17758, 17850, 17942, 18031, 18123, 18215,
18307), class = "Date"), StressC = c(0.99749, 1.031342765, 1.039187122,
1.036366363, 1.038533093, 1.058390982, 1.071218928, 1.073369441,
1.078388915, 1.10117893, 1.116730863, 1.120317602, 1.126129801,
1.15024796, 1.166511733, 1.170197492, 1.176156441, 1.201124882,
1.217838482, 1.221380323, 1.229770019, 1.255791539, 1.273560554,
1.278236959, 1.285508086), StressQoQ = c(0, 0.0339379492526242,
0.00760596502560418, -0.0027143898728953, 0.00209069888540969,
0.0191210941026796, 0.0121202336548254, 0.00200753827606026,
0.00467637125510434, 0.0211333913794913, 0.0141229845362187,
0.00321182042946733, 0.00518799221722843, 0.021416855302633,
0.0141393626118667, 0.00315964160130755, 0.00509225924746737,
0.021228843485116, 0.0139149560969629, 0.00290830110260876, 0.0068690282969297,
0.021159663675294, 0.0141496533844698, 0.00367191413499146, 0.00568840303732765
)), .Names = c("region", "path", "date", "index_value", "index",
"counter", "BaseQoQ", "fdate", "StressC", "StressQoQ"), row.names = c(NA,
-25L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), vars = "region", drop = TRUE, indices = list(
0:24), group_sizes = 25L, biggest_group_size = 25L, labels = structure(list(
region = 1), row.names = c(NA, -1L), class = "data.frame", vars = "region", drop = TRUE, .Names = "region"))
非常感谢任何见解!我希望我提供的数据足以提供建议。
我只需要绘制/绘制月度信息,我不需要在data.frame中引用它。
答案 0 :(得分:2)
所以这就是你能做的:
1)创建一个插值日期向量
months <- lapply(X = data$date, FUN = seq.Date, by = "month", length.out = 3)
months <- data.frame(date = do.call(what = c, months))
2)将date.frame连接到months
data.frame以创建用于外推的NA
library(dplyr)
monthly_data <- left_join(x = months, y = data, by = "date")
3)使用na.locf()
na.appox()
na.spline()
之一插入f.ex StressC
library(zoo)
monthly_data$StressC <- na.spline(object = monthly_data$StressC)
注意:上面的推断是
na.locf()
- 最近的一点na.appox()
- 线性na.spline()
- 样条曲线(通常用于图形)运行以下(插值前)以查看差异:
plot(x = monthly_data$StressC, ylab = "StressC", xlab="", xaxt = "n")
lines(x = na.locf(monthly_data$StressC), col = "red")
lines(x = na.approx(monthly_data$StressC), col = "green")
lines(x = na.spline(monthly_data$StressC), col = "blue")
也可以这样做以获得gglot
:
ggplot(monthly_data, aes(x=date)) +
geom_point(aes(y = StressC), colour="black") +
geom_line(aes(y = na.locf(StressC)), col="red") +
geom_line(aes(y = na.spline(StressC)), col="red")