我正在试图通过http://www.morningstar.com/funds/xnas/adafx/quote.html从这个案例ADAFX中抓取特定基金的总资产。但结果总是很清楚(空洞);我做错了什么?
我之前使用过rvest的结果好坏参半,所以我花时间从可信赖的大师社区那里获得专家帮助(就是你)。
library(rvest)
Symbol.i ="ADAFX"
url <-Paste("http://www.morningstar.com/funds/xnas/",Symbol.i,"/quote.html",sep="")
tryCatch(NetAssets.i <- url %>%
read_html() %>%
html_nodes(xpath='//*[@id="gr_total_asset_wrap"]/span/span') %>%
html_text(), error = function(e) NetAssets.i = NA)
提前谢谢你, 干杯,
Aaron Soderstrom
答案 0 :(得分:1)
这是一个动态页面,通过XHR请求加载各个sectinons的数据,因此您必须查看Developer Tools Network选项卡以获取目标内容URL。
library(httr)
library(rvest)
res <- GET(url = "http://quotes.morningstar.com/fundq/c-header",
query = list(
t="XNAS:ADAFX",
region="usa",
culture="en-US",
version="RET",
test="QuoteiFrame"
)
)
content(res) %>%
html_nodes("span[vkey='TotalAssets']") %>%
html_text() %>%
trimws()
## [1] "20.6 mil"
答案 1 :(得分:0)
Here是它调用的csv文件。
library(httr)
library(rvest)
library(tm)
library(plyr)
require("dplyr")
MF.List <- read.csv("C:/Users/Aaron/Documents/Investment Committee/Screener/Filtered Funds.csv")
Category.list <- read.csv("C:/Users/Aaron/Documents/Investment Committee/Screener/Category.csv")
Category.list <- na.omit(Category.list)
Category.name <- "Financial"
MF.Category.List <- filter(MF.List, Category == Category.name)
morningstar.scrape <- list()
for(i in 1:nrow(MF.Category.List)){
Symbol.i =as.character(MF.Category.List[i,"Symbol"])
res <- GET(url = "http://quotes.morningstar.com/fundq/c-header",
query = list(
t=paste("XNAS:",Symbol.i,sep=""),
region="usa",
culture="en-US",
version="RET",
test="QuoteiFrame"
)
)
tryCatch(
TTM.Yield <- content(res) %>%
html_nodes("span[vkey='ttmYield']") %>%
html_text() %>%
trimws()
, error = function(e) TTM.Yield<-NA)
tryCatch(
Load <- content(res) %>%
html_nodes("span[vkey='Load']") %>%
html_text() %>%
trimws()
, error = function(e) Load = NA)
tryCatch(
Total.Assets <- content(res) %>%
html_nodes("span[vkey='TotalAssets']") %>%
html_text() %>%
trimws()
, error = function(e) Total.Assets = NA)
tryCatch(
Expense.Ratio <- content(res) %>%
html_nodes("span[vkey='ExpenseRatio']") %>%
html_text() %>%
trimws()
, error = function(e) Expense.Ratio = NA)
tryCatch(
Fee.Level <- content(res) %>%
html_nodes("span[vkey='FeeLevel']") %>%
html_text() %>%
trimws()
, error = function(e) Fee.Level = NA)
tryCatch(
Turnover <- content(res) %>%
html_nodes("span[vkey='Turnover']") %>%
html_text() %>%
trimws()
, error = function(e) Turnover = NA)
tryCatch(
Status <- content(res) %>%
html_nodes("span[vkey='Status']") %>%
html_text() %>%
trimws()
, error = function(e) Status = NA)
tryCatch(
Min.Investment <- content(res) %>%
html_nodes("span[vkey='MinInvestment']") %>%
html_text() %>%
trimws()
, error = function(e) Min.Investment = NA)
tryCatch(
Yield.30day <- content(res) %>%
html_nodes("span[vkey='Yield']") %>%
html_text() %>%
trimws()
, error = function(e) Yield.30day = NA)
tryCatch(
Investment.Style <- content(res) %>%
html_nodes("span[vkey='InvestmentStyle']") %>%
html_text() %>%
trimws()
, error = function(e) Investment.Style = NA)
tryCatch(
Bond.Style <- content(res) %>%
html_nodes("span[vkey='BondStyle']") %>%
html_text() %>%
trimws()
, error = function(e) Bond.Style = NA)
x.frame <- c(Symbol =as.character(Symbol.i),TTM.Yield = as.character(TTM.Yield), Load = as.character(Load),
Total.Assets = as.character(Total.Assets),Expense.Ratio = as.character(Expense.Ratio),
Turnover = as.character(Turnover), Status = as.character(Status),
Yield.30day = as.character(Yield.30day),
Investment.Style = as.character(Investment.Style),Bond.Style = as.character(Bond.Style))
morningstar.scrape[[i]] = x.frame
x.frame = NULL
}
MS.scrape <- do.call(rbind, morningstar.scrape)
答案 2 :(得分:0)
工作代码,
我在网页上添加了一个函数并删除了库(tm)。
library(httr)
library(rvest)
get.morningstar <- function(Symbol.i,htmlnode){
res <- GET(url = "http://quotes.morningstar.com/fundq/c-header",
query = list(
t=paste("XNAS:",Symbol.i,sep=""),
region="usa",
culture="en-US",
version="RET",
test="QuoteiFrame"
)
)
x <- content(res) %>%
html_nodes(htmlnode) %>%
html_text() %>%
trimws()
return(x)
}
MF.List <- read.csv("C:/Users/Aaron/Documents/Bitrix24/Investment Committee/Screener/Filtered Funds.csv")
Category.list <- read.csv("C:/Users/Aaron/Documents/Bitrix24/Investment Committee/Screener/Category.csv")
Category.list <- na.omit(Category.list)
Category.name <- "Small Growth"
MF.Category.List <- MF.List[grepl(Category.name,MF.List$Category), ]
morningstar.scrape <- list()
for(i in 1:nrow(MF.Category.List)){
Symbol.i =as.character(MF.Category.List[i,"Symbol"])
try(Total.Assets <- get.morningstar(Symbol.i,"span[vkey='TotalAssets']"))
print(Total.Assets)
}