通过计算和绑定到构建表R进行问题循环Web搜寻

时间:2019-01-17 14:47:10

标签: r loops web-scraping

我正试图通过一个查询使用数字报价符号构建网络抓取数据表,该查询从特定网站的托管人网站中提取1年期的价格和折扣数据。

我的查询页面正确无误,但是我要一步一步执行的循环抓住了每个公司的价格历史记录,然后对其进行了一些相当基本的计算,然后将结果索引到相应的行情记录中,然后将其绑定在一起历届基金的业绩加在一起变成一个更大的桌子,这让我很困惑。

如果有人可以识别问题并提出解决方案,这是我的示例脚本:

library(jsonlite)
library(rvest)
library(dplyr)
library(stringr)
library(PerformanceAnalytics)
library(lubridate)

tickers2 <- c("PMX", "MFM", "CEF", "JLS","CXE","BHV")
tickers2 <- paste0("https://www.cefconnect.com/api/v3/pricinghistory/",tickers2,"/1Y")

lst_scraped_data <- lapply(tickers2, FUN=function(URLLink){
        url <-URLLink 
        page<-html_session(url)
        json3<-readBin(page$response$content, what="json")
        df15 <-fromJSON(json3)
        df15 <- data.frame(df15)

        #  Attempt to Manually Calculate the first tables 52 Week Values
        test4 <- xts(df15[2:4],mdy(df15$Data.PriceHistory.DataDateDisplay))
        colnames(test4) <- c("NAV Price","Discount %","Share Price")
        obs <- dim(test4)[1]

        cur <- tail(test4,n=1)

        WeekMean <- tail(apply(test4, 2, function(x){apply.rolling(x, FUN="mean", width=dim(test4)[1])}),n=1)
        WeekMean <- data.frame(round(WeekMean,digits=2))

        WeekMin <- tail(apply(test4, 2, function(x){apply.rolling(x, FUN="min", width=dim(test4)[1])}),n=1)
        WeekMin <- data.frame(round(WeekMin,digits=2))

        WeekMax <- tail(apply(test4, 2, function(x){apply.rolling(x, FUN="max", width=dim(test4)[1])}),n=1)
        WeekMax <- data.frame(round(WeekMax,digits=2))

        complete <- data.frame(rbind(cur,WeekMean,WeekMax,WeekMin))
        row.names(complete) <- c("Current","Year Avg","Year High","Year Low")

        complete2 <- data.frame(cbind(complete[,3],complete[,1],complete[,2]))
        colnames(complete2) <- c('Share_Price',"NAV","Premium/Discount_%")
        rownames(complete2)[1] <- "Current"

        Ticker <- str_replace_all(URLLink,pattern="https://www.cefconnect.com/api/v3/pricinghistory/",replacement = "")
        Ticker <- str_replace_all(URLLink,pattern="/1Y",replacement = "")
        Checker = data.frame(df15,Ticker)    
})


df13 <- do.call(rbind, lst_scraped_data) 

理想情况下,每个股票报价的最终结果如下所示:

         Share_Price   NAV Premium/Discount_%  Ticker
Current        11.52 10.45              10.24  PMX
WeekMean       11.32 10.66               6.19  PMX
WeekMax        11.78 10.95              11.33  PMX
WeekMin        10.81 10.35               0.65  PMX

完成的表将包括将上述输出绑定到循环中的每个报价器。谢谢!!!

1 个答案:

答案 0 :(得分:0)

目前尚不清楚该问题到底是什么,但是也许会有所帮助。

数据包含格式正确的日期,使用format可以给我们指定日期的星期数。

df15 <-fromJSON(json3)
df <- df15$Data$PriceHistory                # (json3 as in your function)
df$week <- as.integer(format(as.Date(df$DataDate), '%V'))

从现在开始,获取每周数据变得更加简单

# For Example NAV and Discount weekly means, maxs, mins
means <- aggregate(df[, c("NAVData", "DiscountData")], list(df$week), mean)
maxs  <- aggregate(df[, c("NAVData", "DiscountData")], list(df$week), max)
mins  <- aggregate(df[, c("NAVData", "DiscountData")], list(df$week), min)
setNames(merge(merge(means, mins, by = 'Group.1'), maxs, by = 'Group.1'), 
         c('week','NAVMean','DiscountMean','NAVMins','DiscountMins','NAVMaxs','DiscountMaxs'))
#    week NAVMean DiscountMean NAVMins DiscountMins NAVMaxs DiscountMaxs
# 1     1 10.5350       8.2575   10.49         6.20   10.57         9.89
# 2     2 10.5080       9.8980   10.46         8.71   10.56        11.33
# 3     3 10.6540       7.4200   10.45         4.75   10.95        10.24
# ...
# 50   50 10.5300       8.3500   10.49         5.34   10.59         9.92
# 51   51 10.4340       5.5520   10.38         4.39   10.48         6.56
# 52   52 10.4825       6.7975   10.48         6.20   10.49         7.82