我一直在使用来自xts包的to.daily来实现这一点,但是由于升级R在这个函数中遇到了一个已知的bug,所以想找到另一种方法。
如果可能,我想使用Base R中的函数。看起来我可以用聚合做到这一点。我不知道如何开始,所以任何提示都会受到赞赏。
z <- read.csv("R H2007-EIR_5.csv", sep=",", header=F)
names(z) <-c("date","time","open","high","low","close","volume")
> head(z,20)
date time open high low close volume
1 13/03/2007 09:10 107.66 107.66 107.66 107.66 10
2 13/03/2007 09:40 107.63 107.63 107.63 107.63 50
3 13/03/2007 09:45 107.64 107.64 107.64 107.64 8
4 13/03/2007 10:00 107.62 107.62 107.62 107.62 7
5 13/03/2007 10:45 107.64 107.65 107.64 107.65 94
6 13/03/2007 11:20 107.77 107.77 107.77 107.77 2
7 14/03/2007 09:00 108.00 108.00 108.00 108.00 10
8 14/03/2007 09:05 108.00 108.01 108.00 108.01 45
9 14/03/2007 11:15 108.05 108.05 108.05 108.05 5
10 14/03/2007 11:25 108.05 108.05 108.05 108.05 1
11 14/03/2007 11:40 108.10 108.10 108.10 108.10 25
12 14/03/2007 12:00 108.10 108.10 108.10 108.10 5
13 14/03/2007 12:30 108.05 108.05 108.05 108.05 5
14 14/03/2007 12:55 108.05 108.05 108.05 108.05 800
15 14/03/2007 13:05 108.02 108.02 108.02 108.02 89
16 14/03/2007 13:30 108.00 108.00 108.00 108.00 5
17 14/03/2007 14:25 107.95 107.95 107.95 107.95 5
18 14/03/2007 15:05 107.95 107.95 107.95 107.95 2
19 14/03/2007 16:00 108.01 108.01 108.01 108.01 6
20 15/03/2007 08:05 107.86 107.90 107.86 107.90 2
> dput(z)
structure(list(date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L,
10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L), .Label = c("13/03/2007", "14/03/2007",
"15/03/2007", "16/03/2007", "19/03/2007", "20/03/2007", "21/03/2007",
"22/03/2007", "23/03/2007", "26/03/2007", "27/03/2007", "28/03/2007"
), class = "factor"), time = structure(c(8L, 11L, 12L, 13L, 20L,
26L, 6L, 7L, 25L, 27L, 28L, 29L, 33L, 34L, 35L, 37L, 42L, 46L,
54L, 2L, 3L, 24L, 45L, 46L, 11L, 44L, 55L, 11L, 23L, 31L, 32L,
48L, 10L, 43L, 1L, 14L, 15L, 20L, 31L, 53L, 54L, 36L, 46L, 47L,
49L, 50L, 51L, 12L, 16L, 30L, 40L, 19L, 31L, 38L, 39L, 4L, 27L,
31L, 33L, 41L, 43L, 50L, 52L, 5L, 7L, 9L, 12L, 17L, 18L, 20L,
21L, 22L, 23L), .Label = c("08:00", "08:05", "08:10", "08:30",
"08:55", "09:00", "09:05", "09:10", "09:15", "09:30", "09:40",
"09:45", "10:00", "10:05", "10:15", "10:25", "10:30", "10:35",
"10:40", "10:45", "10:50", "10:55", "11:00", "11:10", "11:15",
"11:20", "11:25", "11:40", "12:00", "12:05", "12:15", "12:20",
"12:30", "12:55", "13:05", "13:25", "13:30", "13:35", "13:55",
"14:00", "14:15", "14:25", "14:30", "14:35", "14:45", "15:05",
"15:10", "15:15", "15:20", "15:25", "15:30", "15:40", "15:55",
"16:00", "16:10"), class = "factor"), open = c(107.66, 107.63,
107.64, 107.62, 107.64, 107.77, 108, 108, 108.05, 108.05, 108.1,
108.1, 108.05, 108.05, 108.02, 108, 107.95, 107.95, 108.01, 107.86,
107.86, 107.88, 107.8, 107.79, 107.78, 107.64, 107.75, 107.61,
107.6, 107.55, 107.55, 107.48, 107.4, 107.43, 107.39, 107.48,
107.53, 107.56, 107.47, 107.39, 107.39, 107.37, 107.3, 107.3,
107.29, 107.29, 107.29, 107.16, 107.13, 107.11, 107.06, 106.8,
106.72, 106.7, 106.72, 106.62, 106.72, 106.58, 106.58, 106.62,
106.66, 106.73, 106.67, 106.61, 106.6, 106.6, 106.57, 106.61,
106.6, 106.59, 106.6, 106.62, 106.65), high = c(107.66, 107.63,
107.64, 107.62, 107.65, 107.77, 108, 108.01, 108.05, 108.05,
108.1, 108.1, 108.05, 108.05, 108.02, 108, 107.95, 107.95, 108.01,
107.9, 107.86, 107.89, 107.8, 107.79, 107.78, 107.64, 107.75,
107.61, 107.6, 107.55, 107.55, 107.48, 107.4, 107.43, 107.39,
107.48, 107.53, 107.56, 107.47, 107.39, 107.39, 107.37, 107.3,
107.3, 107.29, 107.29, 107.29, 107.16, 107.13, 107.11, 107.06,
106.8, 106.72, 106.7, 106.72, 106.62, 106.72, 106.58, 106.58,
106.62, 106.66, 106.73, 106.67, 106.62, 106.6, 106.6, 106.57,
106.61, 106.6, 106.63, 106.6, 106.65, 106.65), low = c(107.66,
107.63, 107.64, 107.62, 107.64, 107.77, 108, 108, 108.05, 108.05,
108.1, 108.1, 108.05, 108.05, 108.02, 108, 107.95, 107.95, 108.01,
107.86, 107.86, 107.88, 107.8, 107.79, 107.78, 107.64, 107.75,
107.61, 107.6, 107.55, 107.55, 107.48, 107.29, 107.43, 107.39,
107.48, 107.53, 107.56, 107.47, 107.39, 107.39, 107.37, 107.3,
107.3, 107.29, 107.29, 107.29, 107.16, 107.13, 107.11, 107.06,
106.8, 106.72, 106.7, 106.72, 106.62, 106.72, 106.58, 106.58,
106.62, 106.66, 106.73, 106.67, 106.61, 106.6, 106.6, 106.57,
106.61, 106.6, 106.59, 106.6, 106.61, 106.65), close = c(107.66,
107.63, 107.64, 107.62, 107.65, 107.77, 108, 108.01, 108.05,
108.05, 108.1, 108.1, 108.05, 108.05, 108.02, 108, 107.95, 107.95,
108.01, 107.9, 107.86, 107.89, 107.8, 107.79, 107.78, 107.64,
107.75, 107.61, 107.6, 107.55, 107.55, 107.48, 107.3, 107.43,
107.39, 107.48, 107.53, 107.56, 107.47, 107.39, 107.39, 107.37,
107.3, 107.3, 107.29, 107.29, 107.29, 107.16, 107.13, 107.11,
107.06, 106.8, 106.72, 106.7, 106.72, 106.62, 106.72, 106.58,
106.58, 106.62, 106.66, 106.73, 106.67, 106.61, 106.6, 106.6,
106.57, 106.61, 106.6, 106.61, 106.6, 106.65, 106.65), volume = c(10L,
50L, 8L, 7L, 94L, 2L, 10L, 45L, 5L, 1L, 25L, 5L, 5L, 800L, 89L,
5L, 5L, 2L, 6L, 2L, 4L, 178L, 5L, 5L, 10L, 1L, 1L, 1L, 2L, 50L,
44L, 100L, 400L, 91L, 1L, 100L, 100L, 3L, 1L, 79L, 21L, 28L,
80L, 20L, 20L, 31L, 49L, 5L, 1L, 25L, 1L, 20L, 284L, 2368L, 454L,
18L, 43L, 11L, 547L, 18L, 1L, 3L, 253L, 200L, 5L, 35L, 30L, 50L,
50L, 172L, 99L, 1728L, 82L)), .Names = c("date", "time", "open",
"high", "low", "close", "volume"), class = "data.frame", row.names = c(NA,
-73L))
期望的输出:
date open high low close
1 13/03/2007 107.66 107.77 107.62 107.77
2 14/03/2007 108.00 108.10 107.95 108.01
3 15/03/2007 107.86 107.90 107.86 107.90
答案 0 :(得分:1)
是这样的吗?
o <- aggregate(data = z, open ~ date, head, 1)
o$max <- aggregate(data = z, high ~ date, max)$high
o$min <- aggregate(data = z, low ~ date, min)$low
o$close <- aggregate(data = z, close ~ date, tail, 1)$close
# date open max min close
# 1 13/03/2007 107.66 107.77 107.62 107.77
# 2 14/03/2007 108.00 108.10 107.95 108.01
# 3 15/03/2007 107.86 107.90 107.86 107.90
答案 1 :(得分:1)
这样的事情可以解决问题:
todaily <- function(z){
zperiod<-split(z,cut(strptime(paste(z$date, z$time), "%d/%m/%Y %H:%M"), "day"))
zperiod<-zperiod[sapply(zperiod,nrow)!=0]
res<-do.call(rbind,lapply(zperiod,
function(x)c(x$open[1],
max(c(x$open,x$max, x$min,x$close), na.rm=TRUE),
min(c(x$open,x$max, x$min,x$close), na.rm=TRUE),
x$close[nrow(x)])))
colnames(res)<-c("open","max","min","close")
res
}
todaily(z)
open max min close
2007-03-13 107.66 107.77 107.62 107.77
2007-03-14 108.00 108.10 107.95 108.01
2007-03-15 107.86 107.90 107.86 107.90
当然,您可以在cut.POSIXt
可以处理的任何时间段内对其进行修改(请参阅?cut.POSIXt
):
toperiod <- function(z, period="day"){
zperiod<-split(z,cut(strptime(paste(z$date, z$time), "%d/%m/%Y %H:%M"), period))
zperiod<-zperiod[sapply(zperiod,nrow)!=0]
res<-do.call(rbind,lapply(zperiod,
function(x)c(x$open[1],
max(c(x$open,x$max, x$min,x$close), na.rm=TRUE),
min(c(x$open,x$max, x$min,x$close), na.rm=TRUE),
x$close[nrow(x)])))
colnames(res)<-c("open","max","min","close")
res
}
使用您上传的完整数据集,以下是最后一项功能可以执行的操作:
toperiod(z, "weeks")
open max min close
2007-03-12 00:00:00 107.66 108.10 107.62 107.75
2007-03-19 00:00:00 107.61 107.61 107.06 107.06
2007-03-26 01:00:00 106.80 106.80 106.57 106.65
toperiod(z, "2 weeks")
open max min close
2007-03-12 00:00:00 107.66 108.1 107.06 107.06
2007-03-26 01:00:00 106.80 106.8 106.57 106.65
答案 2 :(得分:0)
以下是混合xts
和base
功能的解决方案。
首先我创建了我的xts对象。
INDEX <- strptime(paste(dat[,1],dat[,2],sep=' '),'%d/%m/%Y %H:%M')
dat.xts <- xts(dat[,-c(1,2)],INDEX)
最终解决方案是此xts对象的子集。我使用endpoints
来获取最终数据的子集,但使用了错误的coredata。
dat.xts[INDEX,-c(5)]
open high low close
2007-03-13 11:20:00 107.77 107.77 107.77 107.77
2007-03-14 16:00:00 108.01 108.01 108.01 108.01
2007-03-15 08:05:00 107.86 107.90 107.86 107.90
现在我使用lapply
xx <- lapply(1:(length(INDEX) - 1), function(y) {
xi <- as.data.frame(dat.xts[(INDEX[y] + 1):INDEX[y + 1]])
res <- c(
xi$open[1],
max(c(xi$open,x$max, xi$min,xi$close), na.rm=TRUE),
min(c(xi$open,x$max, xi$min,xi$close), na.rm=TRUE),
x$close[nrow(xi)])
})
coredata(res) <- do.call(rbind,xx)
获得所需的结果:
open high low close
2007-03-13 11:20:00 107.66 107.77 107.62 107.77
2007-03-14 16:00:00 108.00 108.10 107.95 108.05
2007-03-15 08:05:00 107.86 107.90 107.86 107.66