我有一系列时间值,如下所示:
Date Stock1 Stocks2 Stock3 Stock4 Stock5
2014-12-12 0.43049618 0.62012496 0.82292694 0.51887804 0.56065709
2014-12-15 0.69277671 1.00000000 0.98740608 0.77923007 1.00000000
2014-12-16 0.74597271 0.55805289 0.84390294 0.97395234 0.95619083
2014-12-17 0.39953887 0.71545285 0.85846613 0.85124830 0.73209062
2014-12-18 0.51999191 0.50113488 0.69509923 0.68881303 0.66698738
2014-12-19 0.38783599 0.68697817 0.76113802 0.68295281 0.74030056
2014-12-22 0.70420921 0.92787280 0.87447896 0.87722413 0.95003376
2014-12-23 0.57677722 0.71422496 0.00000000 0.81869002 0.92373912
2014-12-24 0.44820196 0.45297937 1.00000000 0.70607749 0.54608327
2014-12-26 0.33693471 0.70917672 1.00000000 0.61128286 0.69813454
2014-12-29 0.47741823 0.71516554 0.86265631 0.76560783 0.62194656
2014-12-30 0.59689325 0.94509918 0.90707156 0.57156757 0.74528902
2014-12-31 0.46160632 0.78835863 0.55488135 0.49777964 0.63122553
。
> dput(head(efficiency.scores[,c(1,2,3,4,5)], n=15))
structure(c(0.44696179, 0.395227931, 0.477439822, 0.295309508,
0.712614891, 0.689317114, 0.599395023, 0.610971864, 0.337625508,
0.529290134, 0.596002106, 0.412324483, 0.244831259, 0.443123542,
0.484748065, 0.686165972, 0.711764909, 0.604578061, 0.42144923,
0.669898641, 0.735845192, 0.592157589, 0.81714156, 0.380346873,
0.684386001, 0.672967504, 0.508142689, 0.244274776, 0.548213564,
0.417804342, 0.612475603, 0.665148957, 0.756447435, 0.582448567,
1, 1, 1, 1, 1, 1, 0.71708817, 0.528262036, 0.597354154, 0.886971243,
0.624771744, 0.498557661, 0.382554107, 0.464373083, 0.425888914,
0.747806533, 0.788271626, 0.407617084, 0.784747938, 0.466987506,
0.554976586, 0.621751352, 0.501173993, 0.323827823, 0.659625721,
0.502665703, 0.626577183, 0.458883576, 0.572507952, 0.388946538,
0.897384403, 0.784054708, 0.652210478, 0.850226608, 0.514172118,
0.780114865, 0.710307692, 0.714749488, 0.248817293, 0.576462902,
0.690210031), class = c("xts", "zoo"), .indexCLASS = "Date", tclass = "Date", .indexTZ = "UTC", tzone = "UTC", index = structure(c(1288828800,
1288915200, 1289174400, 1289260800, 1289347200, 1289433600, 1289520000,
1289779200, 1289865600, 1289952000, 1290038400, 1290124800, 1290384000,
1290470400, 1290556800), tzone = "UTC", tclass = "Date"), .Dim = c(15L,
5L), .Dimnames = list(NULL, c("Stock1", "Stock10", "Stock100",
"Stock101", "Stock102")))
>
我首先需要将此xts
对象拆分为n
个句点
我尝试了以下内容:
n = 10
list <- split.xts(data, f = "weeks", drop = TRUE, k =n )
list <- split(data, f = n, drop=TRUE)
list <- split(data, rep(1:nrow(efficiency.scores), each = n))
第一个返回多个不等于10的列表。最后一个返回1042个项目的列表。这正是原始data
文件的行数。它应该是1042 / 10.如果最后剩下的x值不等于n
让我们假设列表问题已经结束。将其缩小到列表中的每个元素...我需要的第二件事是计算每列中所有值的平均值,并找出哪些列名称的平均值介于a
和b
之间
我尝试了以下内容:
a <- 0.9
b <- 1
#Calculate means of columns
means<- as.data.frame(colMeans(test))
#Find row names with mean values between a and b
n <- means[which( means[,1] > 0.9),]
n <- means[apply(means[, -1], MARGIN = 1, function(x) { x > 0.9}), ]
n <- rownames(which(means[,1] > 0.9))
我到处都是错误。
答案 0 :(得分:1)
在等待您的数据时,我会使用data(sample_matrix, package= "xts")
您可以按指定拆分xts
对象。请注意,您的语法有点令人困惑,因为split(...)
(您的第二种方法)与第一种方法split.xts(...)
相同,因为方法调度在每种情况下调用split.xts
。但是,n
不是f
中参数split.xts
的有效参数。
我认为典型的偏好是使用split
并让方法调度做它的事情。
library(xts)
data(sample_matrix, package= "xts")
x <- sample_matrix
x2 <- split(x, f= "weeks")
# get colmeans for a single xts
colMeans(x)
a <- 49
b <- 49.2
names(x)[which((colMeans(x) > a & colMeans(x) < b))]
## or for a list
c_means <- lapply(x2, colMeans)
stks <- lapply(c_means, function(x,a,b) {names(x)[which((x > a & x < b))]}, a= a, b= b)
library(xts)
x <- structure(...)
x2 <- split(x, f= "weeks")
a <- .2 # for a non-zero result
b <- 1
c_means <- lapply(x2, colMeans)
stks <- lapply(c_means, function(x,a,b) {names(x)[which((x > a & x < b))]}, a= a, b= b)