Question

如何浏览网站中的其他网页以提取数据而无需再次进行身份验证？使用python和urllib2

请参阅下面的代码，我打开第一页http://xx.xx.xx.xx:8080/status并在验证后获得我需要的内容，然后尝试打开第二页http://xx.xx.xx.xx:8080/uistatus.html，但跳转到异常条款。

意外错误HTTP错误401：未经授权

代码：

try:

        pattern = r'\s*Current\s+stream\s+number:\s*(\d+)'
        pattern2 = r'\s*Reconnects:\s*(\d+)'
        SERVER = 'http://xx.xx.xx.xx:8080/status'
        authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
        authinfo.add_password(None, SERVER, 'xxxxxx', 'xxxxxxx')
        page = 'http://xx.xx.xx.xx:8080/status'
        handler = urllib2.HTTPBasicAuthHandler(authinfo)
        myopener = urllib2.build_opener(handler)
        opened = urllib2.install_opener(myopener)
        output = urllib2.urlopen(page)
        #print output.read()
        soup = BeautifulSoup(output.read(), "lxml")
        #print(soup)

        paragraphs = soup.findAll('p')
        data = []
        for para in paragraphs:
                found = re.finditer(pattern, para.text, re.IGNORECASE);

                data.extend([x.group(1) for x in found])


        #print data
        print "exstreamer 1 status: ", int(data[0])
        if int(data[0]) == 1:
                mesg = "Centerpoint exstreamer connected to main streaming host"
                centerpoint_online = "Online"
                centerpoint_connection = "Main"    

        elif int(data[0]) == 2 or int(data[0]) == 3:
                mesg = "Centerpoint exstreamer connected to local qkradio instreamer"
                print 'alert sent', mesg
                with open("/var/www/html/status.log", "a") as myfile:
                    myfile.write(time.strftime("%Y-%m-%d %H:%M")+ "\t Centerpoint exstreamer connected to local qkradio instreamer\n")  
                centerpoint_connection = "Backup"
                system_ok = "Offline"
        data = []
        for para in paragraphs:
                found = re.finditer(pattern2, para.text, re.IGNORECASE);

                data.extend([x.group(1) for x in found])
        centerpoint_reconnect_number_old = centerpoint_reconnect_number
        centerpoint_reconnect_number = int(data[0])
        print "Centerpoint number of reconnects: ", centerpoint_reconnect_number
        if not centerpoint_reconnect_number == centerpoint_reconnect_number_old:
            centerpoint_stream_stable = "Disconnected/ Reconnected to Stream"
            mesg = "Centerpoint exstreamer disconnect/reconnect, possible buffering issues"
            print 'alert sent', mesg
            with open("/var/www/html/status.log", "a") as myfile:
                    myfile.write(time.strftime("%Y-%m-%d %H:%M")+ "\t Centerpoint exstreamer disconnect/reconnect, possible buffering issues\n") 
        else:
            centerpoint_stream_stable = "system ok" 


        page = 'http://xx.xx.xx.xx:8080/uistatus.html'
        output = urllib2.urlopen(page)
        htmlparser = etree.HTMLParser()
        tree = etree.parse(output, htmlparser)
        #print tree.xpath("/html/body/table/tr[3]/th[2]/font/text()")
        print tree.xpath("//th/font[@color]/text()")
        centerpoint_stream_status = tree.xpath("//th/font[@color]/text()")

        if centerpoint_stream_status is "['IDLE']":
            mesg = "Centerpoint exstreamer source IDLE"
            print 'alert sent', mesg
            with open("/var/www/html/status.log", "a") as myfile:
                    myfile.write(time.strftime("%Y-%m-%d %H:%M")+ "\t Centerpoint exstreamer source IDLE\n") 



except urllib2.URLError:
        print "Internet dropped, or error"
        mesg = "Centerpoint exstreamer unreachable"
        print 'alert sent', mesg
        i_centerpoint_online = centerpoint_online + 1
        if centerpoint_online == 3:
            centerpoint_online = 0
            with open("/var/www/html/status.log", "a") as myfile:
                        myfile.write(time.strftime("%Y-%m-%d %H:%M")+ "\t Centerpoint exstreamer unreachable\n")  
            centerpoint_online = "Offline"
            system_ok = "Offline"

except Exception, err:
    print "Unexpected error", err
    centerpoint_online = "Offline"
    system_ok = "Offline"

python urllib2认证后打开第二页

0 个答案: