这是我在R
尝试过的,但我无法进行无限滚动,
这是使用Pyhton中的Selenium包进行无限滚动的reference link。我在Python编码方面相当不错,但仍尝试从参考文章进行一些编辑。
以下是R
library(rvest)
uuu_df2 <- data.frame(x = c('http://www.magicbricks.com/property-for-
sale/residential-real-estate?bedroom=1&proptype=Multistorey-Apartment,Builder-
Floor-Apartment,Penthouse,Studio-Apartment&cityName=Thane&BudgetMin=5-
Lacs&BudgetMax=5-Lacs',
'http://www.magicbricks.com/property-for-sale/residential-real-estate?bedroom=1&proptype=Multistorey-Apartment,Builder-Floor-Apartment,Penthouse,Studio-Apartment&cityName=Thane&BudgetMin=5-Lacs&BudgetMax=10-Lacs',
'http://www.magicbricks.com/property-for-sale/residential-real-estate?bedroom=1&proptype=Multistorey-Apartment,Builder-Floor-Apartment,Penthouse,Studio-Apartment&cityName=Thane&BudgetMin=5-Lacs&BudgetMax=10-Lacs'))
urlList <- llply(uuu_df2[,1], function(url){
this_pg <- read_html(url)
results_count <- this_pg %>%
xml_find_first(".//span[@id='resultCount']") %>%
xml_text() %>%
as.integer()
if(!is.na(results_count) & (results_count > 0)){
cards <- this_pg %>%
xml_find_all('//div[@class="SRCard"]')
df <- ldply(cards, .fun=function(x){
y <- data.frame(wine = x %>% xml_find_first('.//span[@class="agentNameh"]') %>% xml_text(),
excerpt = x %>% xml_find_first('.//div[@class="postedOn"]') %>% xml_text(),
locality = x %>% xml_find_first('.//span[@class="localityFirst"]') %>% xml_text(),
society = x %>% xml_find_first('.//div[@class="labValu"]') %>% xml_text() %>% gsub('\\n', '', .))
return(y)
})
} else {
df <- NULL
}
return(df)
}, .progress = 'text')
names(urlList) <- uuu_df2[,1]
这是我尝试从原始帖子
编辑的无限滚动的python代码from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import sys
import unittest, time, re
class Sel(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox()
self.driver.implicitly_wait(30)
self.base_url = "http://www.magicbricks.com/property-for-sale/residential-real-estate?bedroom=1&proptype=Multistorey-Apartment,Builder-Floor-Apartment,Penthouse,Studio-Apartment&cityName=Thane&BudgetMin=5-Lacs&BudgetMax=10-Lacs"
self.verificationErrors = []
self.accept_next_alert = True
def test_sel(self):
driver = self.driver
delay = 3
driver.get(self.base_url + "/search?q=stckoverflow&src=typd")
driver.find_element_by_link_text("All").click()
for i in range(1,html_text(html_node(read_html(self.base_url,'a.act')))): #dummy code line to get the number of pages till it should loop till
self.driver.execute_script(".//span[@class=agentNameh;")
time.sleep(4)
html_source = driver.page_source
data = html_source.encode('utf-8')
if __name__ == "__main__":
unittest.main()
但它给了我错误:
execfile(filename, namespace)
File "C:\Users\user\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "D:/Deepesh/All files/test_forCSVData.py", line 27
self.driver.execute_script(".//span[@class="agentNameh;")
关于应该在我的Python / R代码中进行哪些编辑的任何建议,以便它可以滚动无限。任何帮助将非常感激。