我有length(Date_List)
天数,我有length(ISIN_Table$ID)
个项目的信息。
对于每一天(j中的循环),我创建了一个零的数据框,可以容纳所有项目(length(ISIN_Table$ID)
)和一些列(4)。
每个项目在每个矩阵中都是一行,但根据日期会有不同的填充。
#create list that will hold matrices
df.list<-vector("list", length(Dates_List))
for (j in 1:(length(Dates_List))){
df.list[[j]] <- data.frame(matrix(0, nrow = length(ISIN_Table$ID),ncol=4))
}
#Loop over number of days
for (j in 1:(length(Dates_List))){
date<-Dates_List[j]
#create empty dataframe
df.list[[j]] <- data.frame(matrix(0, nrow=length(ISIN_Table$ID), ncol=4))
#loop over every item
for (i in 1:(length(ISIN_Table$ID))){
#check whether item is known at date
if (nrow(data.raw[data.raw$ID==i & data.raw$Date==date,]) < 1){
ID<-i
df.list[[j]][i,1]<-date
df.list[[j]][i,2]<-ID #fill up the row
}
else{
#fill up the row
df.list[[j]][i,]<-c(
as.character(data.raw[data.raw$ID==i & data.raw$Date==date,"Date"]),
(data.raw[data.raw$ID==i & data.raw$Date==date,"ID"]),
(data.raw[data.raw$ID==i & data.raw$Date==date,"Bid.Price"]),
(data.raw[data.raw$ID==i & data.raw$Date==date,"Ask.Price"]))
}
}
}
代码给了我想要的确切输出,然而它却非常慢。我很感激有关如何提高速度的任何意见,目前的版本是不可行的。
# create dummy data:
Dates_List<-c("2007-01-02", "2007-01-03")
ISIN_Table<-data.frame(c(1,2,3))
colnames(ISIN_Table)<-"ID"
ID<-rep(1:2, len=2, each=1)
Date<-c("2007-01-02","2007-01-02","2007-01-03", "2007-01-03")
Bid.Price<-rep(100,4)
Ask.Price<-rep(100,4)
data.raw<-data.frame(ID, Date, Bid.Price, Ask.Price)
要求df.list [[1]]返回:
X1 X2 X3 X4
1 2007-01-02 1 100 100
2 2007-01-02 2 100 100
3 2007-01-02 3 0 0
答案 0 :(得分:1)
<强>更新强> 根据@ Arun的建议,您可以在拆分之前添加缺失的行并完全避免使用mapply
Dates_List <- c("2007-01-02", "2007-01-03")
ISIN_Table <- data.frame(c(1, 2, 3))
colnames(ISIN_Table) <- "ID"
ID <- rep(1:2, len = 2, each = 1)
Date <- c("2007-01-02", "2007-01-02", "2007-01-03", "2007-01-03")
Bid.Price <- rep(100, 4)
Ask.Price <- rep(100, 4)
data.raw <- data.frame(ID, Date, Bid.Price, Ask.Price)
temp <- expand.grid(Dates_List, ISIN_Table$ID)
names(temp) <- c("Date", "ID")
data.raw <- merge(temp, data.raw, all.x = TRUE)
data.raw[is.na(data.raw)] <- 0
data.raw
## Date ID Bid.Price Ask.Price
## 1 2007-01-02 1 100 100
## 2 2007-01-02 2 100 100
## 3 2007-01-02 3 0 0
## 4 2007-01-03 1 100 100
## 5 2007-01-03 2 100 100
## 6 2007-01-03 3 0 0
splitdata <- split(data.raw, data.raw$Date)
splitdata
## $`2007-01-02`
## Date ID Bid.Price Ask.Price
## 1 2007-01-02 1 100 100
## 2 2007-01-02 2 100 100
## 3 2007-01-02 3 0 0
##
## $`2007-01-03`
## Date ID Bid.Price Ask.Price
## 4 2007-01-03 1 100 100
## 5 2007-01-03 2 100 100
## 6 2007-01-03 3 0 0
OLD ANSWER
您可以使用split
按日期拆分数据,然后使用mapply
和merge
来获取甚至在给定日期没有任何数据的ID的行。
Dates_List <- c("2007-01-02", "2007-01-03")
ISIN_Table <- data.frame(c(1, 2, 3))
colnames(ISIN_Table) <- "ID"
ID <- rep(1:2, len = 2, each = 1)
Date <- c("2007-01-02", "2007-01-02", "2007-01-03", "2007-01-03")
Bid.Price <- rep(100, 4)
Ask.Price <- rep(100, 4)
data.raw <- data.frame(ID, Date, Bid.Price, Ask.Price)
splitdata <- split(data.raw, data.raw$Date)
mapply(FUN = function(x, date) merge(x,
data.frame(ID = ISIN_Table$ID,
Date = rep(date, length(ISIN_Table$ID))),
all.y = TRUE),
splitdata, t(names(splitdata)), SIMPLIFY = FALSE)
## $`2007-01-02`
## ID Date Bid.Price Ask.Price
## 1 1 2007-01-02 100 100
## 2 2 2007-01-02 100 100
## 3 3 2007-01-02 NA NA
##
## $`2007-01-03`
## ID Date Bid.Price Ask.Price
## 1 1 2007-01-03 100 100
## 2 2 2007-01-03 100 100
## 3 3 2007-01-03 NA NA