我对R和编程很新。我想知道是否有人可以帮助我以下内容:我在R中有一个for循环,每次迭代取决于之前的所有。我也有一个200k行的数据集,所以这个for循环需要永远执行。有没有更快的方法来做到这一点?
这是我的代码:
library(data.table)
for (i in 2:nrow(DT)) {
if (DT$price2[i]%in%DT$price2[1:i-1]) { #if price occurred before
DT$dummyid[i] = DT$id[max(which(DT$price2[i] == DT$price2[1:i-1]))] #record the latest id of this price's order
if (sum(DT$size[(which(DT$price2[i] == DT$price2[1:i-1]))]) == 0) #if cumulative sum == 0 (excl. current order)
{DT$id[i] = i} #means order just got cancelled, so assign new id
else {DT$id[i] = DT$dummyid[i]} #assign the latest id
}
else {DT$id[i] = i} #price never occurred, so new id
if (DT$id[i]%in%DT$id[1:i-1]) { #if id occurred before
if (sum(DT$size[(which(DT$id[i] == DT$id[1:i]))]) == 0) #and cumulative sum == 0 (incl. current order)
{DT$arc[i] = "C"} #cancel the order
else {
DT$arc[i] = "R" #replace the order
DT$price3[i] = sum(DT$size[(which(DT$id[i] == DT$id[1:i]))]) #with order of this size
}
}
else {
DT$arc[i] = "A" #add new order
}
}
}
这里我正在尝试从TAQ NYSE OpenBook重新格式化原始数据,以便订单包可以读取它。如果有人知道其他任何方式,我会非常感激。
基本上,原始数据(最左边的4列)是特定股票当日的买入/卖出订单的价格变化。
我在这里手动编辑了第一行。此处键入0 = BID,1 = ASK,price2 = price + type * 0.000001以区分出价/要求之间的相同价格。尺寸并不总是正面的。
我正在尝试使用代码:
我正在尝试以与一个订单相同的价格处理所有订单,直到它的累积尺寸== 0,这对我来说是可以的。 arc列应为A = add(如果价格之前从未发生过,或者此价格的订单刚被取消),C =取消(如果累计大小== 0),R =替换(如果价格发生之前)和累积大小<> 0)。
我还需要id来指示哪个订单被取消,添加或替换。
编辑:
很抱歉不包含示例数据集。这是:
> head(DT, n = 20)
time type price size price2 dummyid id arc price3
1: 0 0 20.00 200 20.00 0 1 A NA
2: 0 0 24.41 200 24.41 NA NA NA NA
3: 0 0 32.50 200 32.50 NA NA NA NA
4: 0 0 38.40 1000 38.40 NA NA NA NA
5: 0 0 40.50 1700 40.50 NA NA NA NA
6: 0 0 41.50 100 41.50 NA NA NA NA
7: 0 0 41.69 100 41.69 NA NA NA NA
8: 0 0 42.28 100 42.28 NA NA NA NA
9: 0 0 43.00 100 43.00 NA NA NA NA
10: 0 0 45.00 1700 45.00 NA NA NA NA
11: 0 0 45.12 300 45.12 NA NA NA NA
12: 0 0 45.76 200 45.76 NA NA NA NA
13: 0 0 46.00 100 46.00 NA NA NA NA
14: 0 0 46.76 200 46.76 NA NA NA NA
15: 0 0 47.00 200 47.00 NA NA NA NA
16: 0 0 48.00 500 48.00 NA NA NA NA
17: 0 0 48.10 100 48.10 NA NA NA NA
18: 0 0 48.25 400 48.25 NA NA NA NA
19: 0 0 48.71 300 48.71 NA NA NA NA
20: 0 0 49.05 500 49.05 NA NA NA NA
> dput(head(DT,20))
structure(list(time = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), type = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), price = c(20,
24.41, 32.5, 38.4, 40.5, 41.5, 41.69, 42.28, 43, 45, 45.12, 45.76,
46, 46.76, 47, 48, 48.1, 48.25, 48.71, 49.05), size = c(200L,
200L, 200L, 1000L, 1700L, 100L, 100L, 100L, 100L, 1700L, 300L,
200L, 100L, 200L, 200L, 500L, 100L, 400L, 300L, 500L), price2 = c(20,
24.41, 32.5, 38.4, 40.5, 41.5, 41.69, 42.28, 43, 45, 45.12, 45.76,
46, 46.76, 47, 48, 48.1, 48.25, 48.71, 49.05), dummyid = c("0",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), id = c("1", NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), arc = c("A", NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), price3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA)), .Names = c("time", "type",
"price", "size", "price2", "dummyid", "id", "arc", "price3"), class = c("data.table",
"data.frame"), row.names = c(NA, -20L))
谢谢!