我希望使用R中的运行窗口计算12个变量的相关系数。
我的数据存储在动物园对象中,%m。%d。%Y%H:%M:%S索引,12个变量中的每一个都有1343个观察值。我不知道我将要使用的窗口大小,但如果需要我可以更改它。
@Joshua Ulrich已发布here如何使用rollapplyr计算滚动相关性,但此示例只有两个变量。由于我的R经验有限,我不确定如何使用其中一个应用族函数来运行所有12个变量的相关性。
任何指针都会非常感激
我的数据如下:
> head(wideRawXTS)
DO0182U09A3 DO0182U09B3 DO0182U09C3 DO0182U21A1 DO0182U21A2 DO0182U21A3 DO0182U21B1 DO0182U21B2 DO0182U21B3
2017-01-20 16:30:00 -101.50 -103.37 -103.86 -104.78 -104.95 -105.33 -102.50 -99.43 -104.05
2017-01-20 16:45:00 -101.32 -102.75 -104.22 -104.51 -103.94 -105.29 -102.82 -101.99 -103.94
2017-01-20 17:00:00 -101.45 -103.30 -103.93 -104.70 -104.82 -105.13 -103.72 -103.95 -104.25
2017-01-20 17:15:00 -100.91 -95.92 -99.22 -103.83 -104.72 -105.19 -103.57 -101.36 -104.09
2017-01-20 17:30:00 -100.91 -103.04 -104.09 -102.15 -104.91 -105.18 -103.88 -104.09 -103.96
2017-01-20 17:45:00 -100.97 -103.67 -104.12 -105.07 -104.23 -97.48 -103.92 -103.89 -104.01
DO0182U21C1 DO0182U21C2 DO0182U21C3
2017-01-20 16:30:00 -104.51 -104.42 -105.17
2017-01-20 16:45:00 -104.74 -104.65 -105.25
2017-01-20 17:00:00 -105.02 -105.04 -105.32
2017-01-20 17:15:00 -103.90 -102.95 -105.16
2017-01-20 17:30:00 -104.75 -105.07 -105.23
2017-01-20 17:45:00 -105.08 -105.14 -104.89
#Importing Data
rawDF <- read.csv("RTWP_DO0182_14Day_Raw.csv",
header = F,
col.names = c("Period Start Time",
"PLMNname",
"RNCname",
"WBTSname",
"WBTSID",
"WCELname",
"WCELID",
"Overload",
"AverageRTWP",
"DC_RTWP_High_PRX",
"RTWP_Threshold"),
colClasses = c("character",
"NULL", #drops PLMN name
"NULL", #drops RNC name
"character",
"integer",
"character",
"integer",
"NULL", #drops Overload
"numeric",
"NULL", #drops DC_RTWP_High_PRX
"NULL"), #drops RTWP_Threshold
strip.white=TRUE,
na.strings = c("NA",
"NULL"),
as.is = T,
skip=2)
#convert period.start.time to POSIXlt
rawDF$Period.Start.Time <- as.POSIXlt(strptime(rawDF$Period.Start.Time,
format="%m.%d.%Y %H:%M:%S"))
#dcast the long data frame to a wide data frame
wideRawDF <- dcast(rawDF,
Period.Start.Time ~ WCELname,
value.var = "AverageRTWP")
#assign the date times as rownames for converting to XTS
rownames(wideRawDF) = wideRawDF[[1]]
#drops the duplicate Period Start Time column since date times are rownames
wideRawDF$Period.Start.Time <- NULL
#Convert wideRawDF to XTS time series
wideRawDF <- as.xts(wideRawDF)
#NA values replaced by interpolated values
wideRawDF <- na.approx(wideRawDF, na.rm = FALSE)
#DF is centered by subtracting the column means and scaled by dividing the
#centered columns by their standard deviations
wideRawDFscaled <- scale(wideRawDF, center = TRUE, scale = TRUE)
#window <- 20
#cors <- combn(names(wideRawDFscaled),2,
# function(p) rollapply(wideRawDFscaled, window ,
# function(x) cor(x[,p[1]],x[,p[2]]),
# by.column = FALSE))
#colnames(cors) <- combn(names(wideRawDFscaled),2,paste,collapse=".")
#Running window of correlation coefficients
Cor <- function(x) {
corr <- cor(wideRawDFscaled)
out <- as.data.frame.table(corr)[lower.tri(corr), ]
with(out, setNames(Freq, paste(Var1, Var2)))
}
slidingCor <- rollapplyr(wideRawDFscaled, 6, Cor, by.column = FALSE)
答案 0 :(得分:1)
您可以使用combn
将关联应用于每对列。调整您引用的答案:
window <- 20
cors <- combn(colnames(wideRawXTS),2,
function(p) rollapply(wideRawXTS, window ,
function(x) cor(x[,p[1]],x[,p[2]]),
by.column=FALSE))
colnames(cors) <- combn(colnames(wideRawXTS),2, paste, collapse=".")
答案 1 :(得分:1)
假设在Note中可重复显示3列输入并使用宽度为3的窗口进行说明,定义接受矩阵的相关函数.PHONY
并计算其相关矩阵,提取下三角形部分以消除冗余并添加列名。现在使用Cor
:
rollapplyr
,并提供:
library(xts)
Cor <- function(x) {
corr <- cor(x)
out <- as.data.frame.table(corr)[lower.tri(corr), ]
with(out, setNames(Freq, paste(Var1, Var2)))
}
rollapplyr(xx, 3, Cor, by.column = FALSE)
注意:以可重现的形式输入 DO0182U09B3.DO0182U09A3 DO0182U09C3.DO0182U09A3 DO0182U09C3.DO0182U09B3
2017-01-20 16:30:00 NA NA NA
2017-01-20 16:45:00 NA NA NA
2017-01-20 17:00:00 0.98573 -0.99613 -0.99671
2017-01-20 17:15:00 0.98629 0.95983 0.99297
2017-01-20 17:30:00 0.52664 0.47475 0.99820
2017-01-20 17:45:00 0.56204 0.50460 0.99769
:
xx