我有一个网络刮板,可重新贴在我的墙纸上。当我在测试配置文件上尝试它时,它可以完美运行。但是现在,当我在有大约250个帖子的实际帐户上尝试时,往往会使它们变得混乱。我实际上并不能在表演中捕捉到它,所以我不知道发生了什么或发生在哪里。
刮板使用soup.find_all()查找所有帖子
all_posts = soup.find_all("a", {"data-testid": "product__item"}) #{"class": "styles__ProductImage-sc-5cfswk-5 gPcWvA LazyLoadImage__Image-sc-1732jps-1 cSwkPp"})
find_all是否按照网站上显示的顺序返回元素,还是返回的顺序不可靠(如SQL查询)?
def GetPosts(driver):
#scroll to the bottom of the page
for i in range(20):
time.sleep(0.25)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
soup = BeautifulSoup(driver.page_source, 'html')
all_posts = soup.find_all("a", {"data-testid": "product__item"}) #{"class": "styles__ProductImage-sc-5cfswk-5 gPcWvA LazyLoadImage__Image-sc-1732jps-1 cSwkPp"})
posts = FilterOutSoldItems(all_posts)
postNum = 0
posts.reverse()
for post in posts:
time.sleep(1.25)
print("posts: " + str(postNum))
href = post.get('href')
product_page = href.split("/")[-2] #get the product page identifier
edit_page = "https://www.depop.com/products/edit/" + product_page + "/"
time.sleep(1)
driver.get(edit_page)
time.sleep(1.5)
try:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
save_changes_button = driver.find_element_by_xpath("//button[@data-testid='editProductFormButtons__save']")
time.sleep(1)
save_changes_button.click()
except:
print("post has already been sold")
print(href + " - has been updated at {}".format(datetime.now()))
postNum = 1 + postNum