我正在尝试根据每个符号规范化和转换股市数据,我已经创建了这个方法,但我看到函数内部的循环这么慢。有没有办法让它更快或以更好的方式做到?
功能是:
normalize <- function(data, col){
norm <- function(x){
((x - min(x))/ (max(x) - min(x)))
}
symbols <- t(unique(data['Symbol']))
result <- data.frame()
for (s in symbols)
result <- rbind(result, norm(data[which(data['Symbol'] == s) , col]))
result = cbind(t(symbols), result)
colnames(result) <- c('Symbol', paste0('D', t(unique(data['Date']))))
rownames(result) <- NULL
return(result)
}
控制台结果是:
> r = normalize(allData, 'Close')
> head(allData, 20) # allData is the input data frame for the function
Date Open High Low Close Volume Adj.Close Symbol
1 1 41.18 41.31 40.37 40.56 1529200 40.35932 A
2 2 40.32 40.46 39.70 39.80 2041800 39.60308 A
3 3 39.81 40.02 39.02 39.18 2080600 38.98615 A
4 4 39.52 39.81 39.29 39.70 3359700 39.50358 A
5 5 40.24 40.98 40.18 40.89 2116300 40.68769 A
6 6 41.00 41.00 40.29 40.59 1643900 40.38917 AA
7 7 40.61 40.72 39.95 40.11 2770800 39.91155 AA
8 8 40.47 40.70 39.33 39.55 2013100 39.35432 AA
9 9 39.03 39.10 38.21 39.06 5134000 38.86674 AA
10 10 39.06 39.41 37.99 38.01 2628900 37.82194 AA
11 11 37.83 38.46 37.76 38.25 3004000 38.06075 AAL
12 12 38.43 38.66 37.76 37.93 5033600 37.74233 AAL
13 13 37.75 38.41 37.68 38.16 2721600 37.97120 AAL
14 14 38.51 39.86 38.12 39.65 4856600 39.45382 AAL
15 15 39.60 39.60 38.76 38.81 1519300 38.61798 AAL
16 16 38.79 39.17 38.47 39.15 1510900 38.95630 AAP
17 17 38.70 39.25 38.58 38.75 1703500 38.55828 AAP
18 18 39.01 39.05 37.96 38.00 2033500 37.81199 AAP
19 19 38.00 38.47 37.69 38.46 2330000 38.26971 AAP
20 20 38.01 38.32 37.71 37.77 3054300 37.58313 AAP
> head(r, 4)
A 1.898358 1.138356 0.518357 1.038358 2.228356
AA 1.928357 1.448358 0.888356 0.398358 -0.651645
AAL -0.411643 -0.731643 -0.501643 0.988359 0.148358
AAP 0.488359 0.08835696 -0.661643 -0.201644 -0.891643
部分实际数据:
> dput(allData)
structure(list(Date = 2:20, Open = c(40.32, 39.81, 39.52, 40.24,
41, 40.61, 40.47, 39.03, 39.06, 37.83, 38.43, 37.75, 38.51, 39.6,
38.79, 38.7, 39.01, 38, 38.01), High = c(40.46, 40.02, 39.81,
40.98, 41, 40.72, 40.7, 39.1, 39.41, 38.46, 38.66, 38.41, 39.86,
39.6, 39.17, 39.25, 39.05, 38.47, 38.32), Low = c(39.7, 39.02,
39.29, 40.18, 40.29, 39.95, 39.33, 38.21, 37.99, 37.76, 37.76,
37.68, 38.12, 38.76, 38.47, 38.58, 37.96, 37.69, 37.71), Close = c(39.8,
39.18, 39.7, 40.89, 40.59, 40.11, 39.55, 39.06, 38.01, 38.25,
37.93, 38.16, 39.65, 38.81, 39.15, 38.75, 38, 38.46, 37.77),
Volume = c(2041800L, 2080600L, 3359700L, 2116300L, 1643900L,
2770800L, 2013100L, 5134000L, 2628900L, 3004000L, 5033600L,
2721600L, 4856600L, 1519300L, 1510900L, 1703500L, 2033500L,
2330000L, 3054300L), Adj.Close = c(39.60308, 38.98615, 39.50358,
40.68769, 40.38917, 39.91155, 39.35432, 38.86674, 37.82194,
38.06075, 37.74233, 37.9712, 39.45382, 38.61798, 38.9563,
38.55828, 37.81199, 38.26971, 37.58313), Symbol = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L), .Label = c("A", "AA", "AAL", "AAP"), class = "factor")), .Names = c("Date",
"Open", "High", "Low", "Close", "Volume", "Adj.Close", "Symbol"
), class = "data.frame", row.names = c(NA, -19L))
答案 0 :(得分:0)
使用以下功能更改您的功能:
normalize <- function(data, col){
norm <- function(x){
((x - min(x))/ (max(x) - min(x)))
}
symbols <- t(unique(data['Symbol']))
listSymbols<-lapply(symbols,function(s){
norm(data[as.character(data$Symbol) == s , col])
})
symbolsValue<-as.data.frame(matrix(unlist(listSymbols),nrow=length(symbols)))
symbolsValue$Symbol<-t(symbols)
symbolsValue
}
答案 1 :(得分:0)
虽然这可能不是您正在寻找的,但它确实提供了允许每个符号具有不相等数量的条目的灵活性。输出是规范化值的数组列表
library(dplyr)
normalize<- function(data, col) {
norm <- function(x){
((x - min(x))/ (max(x) - min(x)))
}
sym=unique(data$Symbol)
r<-sapply(sym, FUN=function(x) {norm(select(filter(data, Symbol==x), one_of(col)))})
names(r)<-sym
r
}
df<-read.csv("test.csv") #Your data goes here
r<-normalize(df, "Close") #Substitute column of interest here