我是编码程序的新手,我从python开始。我用它来搜索网站上的数据,网上商店是具体的。我想刮掉结果页面的每一页(带分页)并将结果网址放在一个csv中
这就是我一直在尝试的
import selenium
import bs4
from selenium import webdriver
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
myurl = 'https://www.tokopedia.com/p/rumah-tangga/alat-pertukangan/obeng?keyword=obeng&page='
chrome_path = '/home/yoga/Downloads/chromedriver'
driver = webdriver.Chrome(chrome_path)
#opening webpage
for number in range(10):
buka = driver.get(myurl + str(number))
page_source = driver.page_source
soup_this = soup(page_source, "html.parser")
product_links = soup_this.findAll("div",{"class":"product-summary"})
for number2 in range(10):
filename = "tokopedia" + str(number2) + ".csv"
f = open(filename, "w")
headers = "Link" + "\n"
f.write(headers)
for product in product_links:
barang = product.a["ng-href"]
print(barang + "\n")
f.write(barang + "\n")
f.close()
driver.close()
我在csv中获得的结果仅适用于一页。你能帮助我吗?
答案 0 :(得分:0)
import selenium
import bs4
from selenium import webdriver
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
myurl = 'https://www.tokopedia.com/p/rumah-tangga/alat-pertukangan/obeng?keyword=obeng&page='
chrome_path = '/home/yoga/Downloads/chromedriver'
driver = webdriver.Chrome(chrome_path)
filename = "tokopedia.csv"
f = open(filename, "w")
#opening webpage
for number in range(10):
buka = driver.get(myurl + str(number))
page_source = driver.page_source
soup_this = soup(page_source, "html.parser")
product_links = soup_this.findAll("div",{"class":"product-summary"})
headers = "Link" + "\n"
f.write(headers)
for product in product_links:
barang = product.a["ng-href"]
print(barang + "\n")
f.write(barang + "\n")
f.close()
driver.close()