好的,到目前为止,这是我的整个代码:
from selenium import webdriver
from bs4 import BeautifulSoup as bs
import requests
import time
import os
import Tkinter as tk
def get_page():
global driver
driver = webdriver.Chrome()
driver.get(url)
last_height = driver.execute_script('return
document.body.scrollHeight')
while True:
driver.execute_script('window.scrollTo(0,
document.body.scrollHeight);')
new_height = driver.execute_script('return
document.body.scrollHeight')
if new_height == last_height:
break
else:
last_height = new_height
#This function uses BeautifulSoup to parse through the page source and find images.
def get_img():
sp = bs(driver.page_source, 'html.parser')
for image in sp.find_all('img'):
images.append(image)
#Create folder which will contain downloaded images.
def make_dir():
if not os.path.exists('Downloaded images'):
os.mkdir('Downloaded images')
os.chdir('Downloaded images')
#Function which saves images.
def save_img():
x = 0
for image in images:
try:
url = image['src']
source = requests.get(url)
with open('img-{}.jpg'.format(x), 'wb') as f:
f.write(requests.get(url).content)
x += 1
except:
print 'Error while saving image.'
root = tk.Tk()
root.title('Image Scraper 1.0')
tk.Label(root, text = 'Enter URL:').grid(row=0)
e1 = tk.Entry(root)
e1.grid(row=0, column=1)
e1.insert(driver.get(url))
button1 = tk.Button(root, text = 'SCRAPE', command =scrape_site).grid(row=3, column=1, sticky=tk.W, pady=4)
button1.pack()
root.mainloop()
我试图把整个scrape_site函数放在tkinters按钮command =,这是愚蠢的我现在看到了,显然它不起作用。如您所见,我将整个tkinter代码复制到主要的scraper文件中。有什么想法吗?我将不胜感激任何意见:)
我最近发布了一个关于网络刮刀的问题,它下载了猫的图片。这次我决定,我将向前迈进一步。我想制作GUI web scraper,它将从用户将在tkinter Entry小部件中输入的网站下载图像。这甚至可能吗?我还创建了两个.py文件:一个用于刮刀脚本,另一个用于gui。可以这种方式存储还是应该是一个文件? 这是打开和滚动页面的刮刀代码(使用selenium),它工作正常。我唯一的问题是:如何将它放入tkinter? :)
def get_page():
global driver
driver = webdriver.Chrome()
driver.get(url)
last_height = driver.execute_script('return document.body.scrollHeight')
while True:
driver.execute_script('window.scrollTo(0,
document.body.scrollHeight);')
new_height = driver.execute_script('return
document.body.scrollHeight')
if new_height == last_height:
break
else:
last_height = new_height
get_page()
答案 0 :(得分:0)
正如我的评论中所提到的,您应该修改get_page
以将url作为参数。
下面的简单示例显示了这可能如何工作,但更换了get_page函数(我没有硒)。
try:
import tkinter as tk
except:
import Tkinter as tk
def get_page(url):
print("Getting cats from {}".format(url))
class App(tk.Frame):
def __init__(self,master=None,**kw):
tk.Frame.__init__(self,master=master,**kw)
self.txtURL = tk.StringVar()
self.entryURL = tk.Entry(self,textvariable=self.txtURL)
self.entryURL.grid(row=0,column=0)
self.btnGet = tk.Button(self,text="Get Some Cats!",command=self.getCats)
self.btnGet.grid(row=0,column=1)
def getCats(self):
get_page(self.txtURL.get())
if __name__ == '__main__':
root = tk.Tk()
App(root).grid()
root.mainloop()
您可以在Entry小部件中输入URL,按下按钮,URL将被发送到该功能。
如果您的get_page
函数位于单独的文件中,只需使用from my_other_file import get_page
导入它,其中是包含get_page
函数的python文件的名称