尝试/除外 - 查找结果后停止

时间:2015-07-16 12:15:48

标签: python-2.7 beautifulsoup try-except

所以目前我的刮刀脚本可以很好地从设备中提取数据。我主要使用这个脚本来检测网络上的所有打印机,然后用我的发现更新数据库。我知道代码可能不是100%干净,因为我仍在学习和提高我的Python技能,但我有一个快速的问题,我相信有人可以快速回答。我觉得我应该知道这一点,但我对此感到失意。 :/

有没有办法在我的脚本找到结果后停止脚本运行其他函数?

例如:如果printer_a()找到匹配并打印回打印机的结果或它看到的任何设备,有没有办法阻止脚本运行并继续下一个IP地址?现在看来,即使在得到结果之后......它仍继续在其他功能上。我已经尝试了下面的代码的许多其他变体,但我似乎无法想象这个bugger。任何帮助我都会非常感激。谢谢!

from bs4 import BeautifulSoup
import urllib2
import sys

ipaddress_input = raw_input("IP Address? ")

#header = {'User-Agent': 'Mozilla/5.0'} #Needed to prevent 403 error
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'}

def printer_a():
    printer_address_a = str("http://" + ipaddress_input)
    printer_request_a = urllib2.Request(printer_address_a,headers=header)
    printer_page_a = urllib2.urlopen(printer_request_a,timeout=5)
    printer_soup_a = BeautifulSoup(printer_page_a)
    printer_find_a = printer_soup_a.find("title")
    printer_string_a = str(printer_find_a)
    printer_result_a = printer_string_a[7:-8]
    if len(printer_result_a) < 2:
        quit()
    print printer_result_a

def printer_b():
    printer_address_b = str("http://" + ipaddress_input + "/default.asp?Lang=en-us")
    printer_request_b = urllib2.Request(printer_address_b,headers=header)
    printer_page_b = urllib2.urlopen(printer_request_b,timeout=5)
    printer_soup_b = BeautifulSoup(printer_page_b)
    printer_find_b = printer_soup_b.find("title")
    printer_string_b = str(printer_find_b)
    printer_result_b = printer_string_b[7:-8]
    if len(printer_result_b) < 2:
        quit()
    print printer_result_b

def printer_c():
    printer_address_c = str("http://" + ipaddress_input + "/default.html")
    printer_request_c = urllib2.Request(printer_address_c,headers=header)
    printer_page_c = urllib2.urlopen(printer_request_c,timeout=5)
    printer_soup_c = BeautifulSoup(printer_page_c)
    printer_find_c = printer_soup_c.find("title")
    printer_string_c = str(printer_find_c)
    printer_result_c = printer_string_c[7:-8]
    if len(printer_result_c) < 2:
        quit()
    print printer_result_c

def printer_d():
    printer_address_d = str("https://" + ipaddress_input + "/cgi-bin/f_cgi")
    printer_request_d = urllib2.Request(printer_address_d,headers=header)
    printer_page_d = urllib2.urlopen(printer_request_d,timeout=5)
    printer_soup_d = BeautifulSoup(printer_page_d)
    printer_find_d = printer_soup_d.find("title")
    printer_string_d = str(printer_find_d)
    printer_result_d = printer_string_d[7:-8]
    if len(printer_result_d) < 2:
        quit()
    print printer_result_d

def printer_e():
    printer_address_e = str("http://" + ipaddress_input + "/login.html")
    printer_request_e = urllib2.Request(printer_address_e,headers=header)
    printer_page_e = urllib2.urlopen(printer_request_e,timeout=5)
    printer_soup_e = BeautifulSoup(printer_page_e)
    printer_find_e = printer_soup_e.find("title")
    printer_string_e = str(printer_find_e)
    printer_result_e = printer_string_e[7:-8]
    if len(printer_result_e) < 2:
        quit()
    print printer_result_e

def printer_f():
    printer_address_f = str("https://" + ipaddress_input)
    printer_request_f = urllib2.Request(printer_address_f,headers=header)
    printer_page_f = urllib2.urlopen(printer_request_f,timeout=5)
    printer_soup_f = BeautifulSoup(printer_page_f)
    printer_find_f = printer_soup_f.find("title")
    printer_string_f = str(printer_find_f)
    printer_result_f = printer_string_f[7:-8]
    if len(printer_result_f) < 2:
        quit()
    print printer_result_f

def printer_g():
    printer_address_g = str("http://" + ipaddress_input + "/main/main.html")
    printer_request_g = urllib2.Request(printer_address_g,headers=header)
    printer_page_g = urllib2.urlopen(printer_request_g,timeout=5)
    printer_soup_g = BeautifulSoup(printer_page_g)
    printer_find_g = printer_soup_g.find("title")
    printer_string_g = str(printer_find_g)
    fixed = "".join(printer_string_g.split())
    printer_result_g = fixed[7:-8]
    if len(printer_result_g) < 2:
        quit()
    print printer_result_g

try:
    printer_g()
except:
    pass
try:
    printer_a()
except:
    pass
try:
    printer_b()
except:
    pass
try:
    printer_c()
except:
    pass
try:
    printer_d()
except:
    pass
try:
    printer_e()
except:
    pass
try:
    printer_f()
except:
    pass

编辑:这是我提出的新代码。它看似有效,而且更清洁。

from bs4 import BeautifulSoup
import urllib2
import sys

ipaddress_input = raw_input("IP Address? ")

#header = {'User-Agent': 'Mozilla/5.0'} #Needed to prevent 403 error
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'}

addresses = ["/main/main.html", "/hp/device/this.LCDispatcher", "/default.asp?Lang=en-us", "/default.html", "/cgi-bin/f_cgi", "/login.html", ""]
printer_name_oce = "Oce"
printer_name_hp = "hp"
for url_end in addresses:
    try:
        printer_address = str("http://" + ipaddress_input) + url_end
        printer_request = urllib2.Request(printer_address,headers=header)
        printer_page = urllib2.urlopen(printer_request,timeout=7)
        printer_soup = BeautifulSoup(printer_page)
        printer_find = printer_soup.find("title")
        printer_string = str(printer_find)
        printer_result = printer_string[7:-8]
        if printer_name_oce in printer_string:
            fixed = ''.join(printer_string.split())
            printer_result = fixed[7:-8]
            print printer_result
            if len(printer_result) > 2:
                break
        if printer_name_hp in printer_string:
            fixed = ''.join(printer_string.split())
            printer_result = fixed[7:-8]
            print printer_result
            if len(printer_result) > 2:
                break
        if len(printer_result) < 2:
            quit()
        print printer_result
        if len(printer_result) > 2:
            break
    except:
        pass

0 个答案:

没有答案