我使用了以下代码:
library(XML)
library(RCurl)
getGoogleURL <- function(search.term, domain = '.co.uk', quotes=TRUE)
{
search.term <- gsub(' ', '%20', search.term)
if(quotes) search.term <- paste('%22', search.term, '%22', sep='')
getGoogleURL <- paste('http://www.google', domain, '/search?q=',
search.term, sep='')
}
getGoogleLinks <- function(google.url)
{
doc <- getURL(google.url, httpheader = c("User-Agent" = "R(2.10.0)"))
html <- htmlTreeParse(doc, useInternalNodes = TRUE, error=function(...){})
nodes <- getNodeSet(html, "//a[@href][@class='l']")
return(sapply(nodes, function(x) x <- xmlAttrs(x)[[1]]))
}
search.term <- "cran"
quotes <- "FALSE"
search.url <- getGoogleURL(search.term=search.term, quotes=quotes)
links <- getGoogleLinks(search.url)
我想查找搜索产生的所有链接,并得到以下结果:
> links
list()
我如何获得链接? 另外我想获得谷歌搜索结果的头条新闻和摘要如何获得? 最后是否有办法获取ChillingEffects.org结果中的链接?
答案 0 :(得分:9)
如果您查看html
变量,则可以看到所有搜索结果链接都嵌套在<h3 class="r">
标记中。
尝试将getGoogleLinks
功能更改为:
getGoogleLinks <- function(google.url) {
doc <- getURL(google.url, httpheader = c("User-Agent" = "R
(2.10.0)"))
html <- htmlTreeParse(doc, useInternalNodes = TRUE, error=function
(...){})
nodes <- getNodeSet(html, "//h3[@class='r']//a")
return(sapply(nodes, function(x) x <- xmlAttrs(x)[["href"]]))
}
答案 1 :(得分:6)
我创建了这个函数来读取公司名称列表,然后获得每个公司名称的最高网站结果。它会让你开始,然后你可以根据需要调整它。
#libraries.
library(URLencode)
library(rvest)
#load data
d <-read.csv("P:\\needWebsites.csv")
c <- as.character(d$Company.Name)
# Function for getting website.
getWebsite <- function(name)
{
url = URLencode(paste0("https://www.google.com/search?q=",name))
page <- read_html(url)
results <- page %>%
html_nodes("cite") %>% # Get all notes of type cite. You can change this to grab other node types.
html_text()
result <- results[1]
return(as.character(result)) # Return results if you want to see them all.
}
# Apply the function to a list of company names.
websites <- data.frame(Website = sapply(c,getWebsite))]
答案 2 :(得分:3)
这里的其他解决方案对我不起作用,这是我对@ Bryce-Chamberlain问题的看法,该问题于2019年8月对我有效,它还回答了另一个未解决的问题:company name to URL in R
/**
* Customized app bar provides 2 features:
* 1. Force AppBarLayout to stop fling when nested view starts fling, otherwise scrolling view fling collides with AppBarLayout fling
* 2. Ignore AppBarLayout fling and transfer it to scrolling view. This is done in order to continue page scroll when AppBarLayout
* fully collapses (now after collapse scroll stops)
*/
class SmoothAppBarBehavior : AppBarLayout.Behavior() {
companion object {
private const val FLING_UNITS = 1000 //copied from base class
}
var recyclerView: RecyclerView? = null
private var overScroller: OverScroller? = null
private var pointerId = -1
private var velocityTracker: VelocityTracker? = null
override fun onNestedPreFling(coordinatorLayout: CoordinatorLayout,
child: AppBarLayout,
target: View,
velocityX: Float,
velocityY: Float): Boolean {
stopAppBarLayoutFling()
return super.onNestedPreFling(coordinatorLayout, child, target, velocityX, velocityY)
}
override fun onInterceptTouchEvent(parent: CoordinatorLayout,
child: AppBarLayout,
ev: MotionEvent): Boolean {
val consumed = super.onInterceptTouchEvent(parent, child, ev)
when (ev.actionMasked) {
MotionEvent.ACTION_DOWN -> {
ensureVelocityTracker()
recyclerView?.stopScroll()
pointerId = ev.getPointerId(0)
}
MotionEvent.ACTION_CANCEL -> {
velocityTracker?.recycle()
velocityTracker = null
pointerId = -1
}
else -> {}
}
return consumed
}
override fun onTouchEvent(parent: CoordinatorLayout,
child: AppBarLayout,
ev: MotionEvent): Boolean {
val consumed = super.onTouchEvent(parent, child, ev)
recyclerView?.let {
when (ev.actionMasked) {
MotionEvent.ACTION_DOWN -> {
ensureVelocityTracker()
pointerId = ev.getPointerId(0)
}
MotionEvent.ACTION_UP -> {
stopAppBarLayoutFling()
recyclerView?.fling(0, getYVelocity(ev))
}
MotionEvent.ACTION_CANCEL -> {
velocityTracker?.recycle()
velocityTracker = null
pointerId = -1
}
else -> {}
}
velocityTracker?.addMovement(ev)
}
return consumed
}
private fun ensureVelocityTracker() {
if (velocityTracker == null) {
velocityTracker = VelocityTracker.obtain()
}
}
private fun getYVelocity(event: MotionEvent): Int {
velocityTracker?.let {
it.addMovement(event)
it.computeCurrentVelocity(FLING_UNITS)
return - it.getYVelocity(pointerId).toInt()
}
return 0
}
private fun stopAppBarLayoutFling() {
if (overScroller == null) {
val scrollerField = javaClass.superclass.superclass.superclass.getDeclaredField("scroller")
scrollerField.isAccessible = true
overScroller = scrollerField.get(this) as? OverScroller
}
overScroller?.forceFinished(true)
}
}
由reprex package(v0.2.1)于2019-08-10创建
答案 3 :(得分:0)
免费解决方案不再有效。此外,它不允许您搜索您所在位置以外的区域。这是使用 Google 自定义搜索 API 的解决方案。 API 允许每天 100 次免费 API 调用。下面的函数仅返回 10 个结果或第 1 页。1 个 API 调用仅返回 10 个结果。
Google.Search.API <- function(keyword, google.key, google.cx, country = "us")
{
# keyword = keywords[10]; country = "us"
url <- paste0("https://www.googleapis.com/customsearch/v1?"
, "key=", google.key
, "&q=", gsub(" ", "+", keyword)
, "&gl=", country # Country
, "&hl=en" # Language from Browser, english
, "&cx=", google.cx
, "&fields=items(link)"
)
d2 <- url %>%
httr::GET(ssl.verifypeer=TRUE) %>%
httr::content(.) %>% .[["items"]] %>%
data.table::rbindlist(.) %>%
mutate(keyword, SERP = row_number(), search.engine = "Google API") %>%
rename(source = link) %>%
select(search.engine, keyword, SERP, source)
pause <- round(runif(1, min = 1.1, max = 5), 1)
if(nrow(d2) == 0)
{cat("\nPausing", pause, "seconds. Failed for:", keyword)} else
{cat("\nPausing", pause, "seconds. Successful for:", keyword)}
Sys.sleep(pause)
rm(keyword, country, pause, url, google.key, google.cx)
return(d2)
}