tk entry wiget python web crawler

时间:2015-10-08 11:13:42

标签: python user-interface tkinter web-crawler user-input

你好,我是读者第一次问问题。我想要做的是为我的基本网络爬虫建立一个gui。我遇到的问题是这个,我无法弄清楚如何从条目小部件中获取user_input到一个函数,当按下按钮时,从条目小部件中获取信息并将其用作URL的字符串,这里是网络爬虫没有gui(它不仅仅是这个,这正是我想要先完成的,其余的应该很容易,也请原谅那些愚蠢的颜色,我只是搞乱了)

import urllib
print "please type the url you would like to scan for html"
print "type your url in this format"
print "http://google.com"
textfile = file('outputhtml.txt','a')
thisurl = input("@>")
handle =urllib.urlopen(thisurl)

html_gunk = handle.read()

print html_gunk[:15000000]
raw_input()
textfile.write(html_gunk[:1500000000])

textfile.close()

(这不需要间距,下面的工作正常是gui)

from Tkinter import *
import Tkinter as tk
import tkMessageBox
import os
import HTMLParser
import urllib
def donothing():
   filewin = Toplevel(top)
   button = Button(filewin, text="Do nothing button")
   button.pack()
def CallBack():
   tkMessageBox.showinfo( "this Button is not working", "coming soon")
def runcrawlurl():
    os.system("urlcrawl.py") #this is how i got it worked before by just opening another file 



def runcrawlhtml():
    textfile = file('outputhtml.txt','a')
    e1 = tk.StringVar(top,  )
    vlc = e1.get()
    handle = urllib.urlopen(vlc)
    html_gunk = handle.read()
    print html_gunk[:15000000]
    textfile.write(html_gunk[:1500000000])
    textfile.close()



top = Tk()
e1 = tk.StringVar(top,  )

vlc = e1 
top.wm_title("web crawler")





B1 = Button (top, text ="Rss", command = CallBack, bd =20, justify =LEFT, background="yellow")
b2 = Button (top, text ="HTML", command = runcrawlhtml, bd =20, justify =CENTER, background="green")
b3 = Button (top, text ="url", bd =20, justify =RIGHT, command =runcrawlurl, background="blue")
b4 = Button (top, text = "text", bd =20, command =CallBack , background = "red")
b5 = Button (top, text = "wipe all output files", bd =20, command = CallBack)
e1 = Entry(top, bd =5, textvariable=vlc, text ="GET" )
l1 = Label(top, text ="input url here")
t1 = Text (top, height = 5)
t1.insert(INSERT, "welcome to this webcrawler how can i be of assistance, make sure while inputing urls you have quotation makes at he start and end ")
t1.pack()

e1.pack(side = TOP)
l1.pack(side = TOP)
B1.pack(side =LEFT)
b2.pack(side =LEFT)
b3.pack(side = LEFT)
b4.pack(side = LEFT)
b5.pack(side = RIGHT)
top.mainloop()

这是erro msg

Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Python27\lib\lib-tk\Tkinter.py", line 1536, in __call__
    return self.func(*args)
  File "C:\Users\sascha\Desktop\my1gui\gui2.py", line 31, in runcrawlhtml
    handle = urllib.urlopen(vlc)
  File "C:\Python27\lib\urllib.py", line 87, in urlopen
    return opener.open(url)
  File "C:\Python27\lib\urllib.py", line 213, in open
    return getattr(self, name)(url)
  File "C:\Python27\lib\urllib.py", line 469, in open_file
    return self.open_local_file(url)
  File "C:\Python27\lib\urllib.py", line 483, in open_local_file
    raise IOError(e.errno, e.strerror, e.filename)
IOError: [Errno 2] The system cannot find the path specified: ''

1 个答案:

答案 0 :(得分:1)

刚刚解决了(arm punch)我使用了错误的输入.get()工作.stringval不

from Tkinter import *
import Tkinter as tk
import tkMessageBox
import os
import HTMLParser
import urllib
def donothing():
   filewin = Toplevel(top)
   button = Button(filewin, text="Do nothing button")
   button.pack()
def CallBack():
   tkMessageBox.showinfo( "this Button is not working", "coming soon")
def runcrawlurl():
    os.system("urlcrawl.py")
def wipeall():
    t = file('outputhtml.txt','w')
    b = file('outputtext.txt', 'w')
    c = file('output.txt', 'w')
    b.write("deleted")
    t.write("deleted")
    c.write("deleted")
    t.close()
    c.close()
    b.close()


def runcrawlhtml():
    textfile = file('outputhtml.txt','a')
    vlc = e1.get()
    handle = urllib.urlopen(vlc)
    html_gunk = handle.read()
    print html_gunk[:15000000]
    textfile.write(html_gunk[:1500000000])
    textfile.close()



top = Tk()


top.wm_title("web crawler")





B1 = Button (top, text ="Rss", command = CallBack, bd =20, justify =LEFT, background="yellow")
b2 = Button (top, text ="HTML", command = runcrawlhtml, bd =20, justify =CENTER, background="green")
b3 = Button (top, text ="url", bd =20, justify =RIGHT, command =runcrawlurl, background="blue")
b4 = Button (top, text = "text", bd =20, command =CallBack , background = "red")
b5 = Button (top, text = "wipe all output files", bd =20, command = wipeall)
e1 = Entry(top, bd =5, text ="GET",  )
l1 = Label(top, text ="input url here")
t1 = Text (top, height = 5)
t1.insert(INSERT, "welcome to this webcrawler how can i be of assistance, make sure while inputing urls you have quotation marks at he start and end ")
t1.pack()

e1.pack(side = TOP)
l1.pack(side = TOP)
B1.pack(side =LEFT)
b2.pack(side =LEFT)
b3.pack(side = LEFT)
b4.pack(side = LEFT)
b5.pack(side = RIGHT)
top.mainloop()