Python,web scraping:嵌套循环不起作用

时间:2017-07-28 12:51:36

标签: python loops parsing beautifulsoup nested

变量j的嵌套循环不起作用。即使在它之前需要的变量似乎已正确初始化,调试器也会跳过它。

from urllib.request import Request, urlopen
# Get beautifulsoup4 with: pip install beautifulsoup4
import bs4
import pdb
import sys
import json

site = "http://bgp.he.net/report/world"
hdr = {'User-Agent': 'Mozilla/5.0'}
req = Request(site,headers=hdr)
page = urlopen(req)
soup = bs4.BeautifulSoup(page, 'html.parser')

for t in soup.find_all('td', class_='centeralign'):
    s = str(t.string)
    if s != "None": 
        print (s.strip())
        site2 = "http://bgp.he.net/country/" + s.strip()
        req = Request(site2,headers=hdr)
        soup2 = bs4.BeautifulSoup(page, 'html.parser')

    for j in soup2.find_all('td'):
        s2 = str(j.string)
        print (j.strip())

1 个答案:

答案 0 :(得分:0)

from urllib.request import Request, urlopen
# Get beautifulsoup4 with: pip install beautifulsoup4
import bs4
import pdb
import sys
import json

site = "http://bgp.he.net/report/world"
hdr = {'User-Agent': 'Mozilla/5.0'}
req = Request(site,headers=hdr)
page = urlopen(req)
soup = bs4.BeautifulSoup(page, 'html.parser')

for t in soup.find_all('td', class_='centeralign'):
    s = str(t.string)
    if s != "None": 
        print(s.strip())
        site2 = "http://bgp.he.net/country/" + s.strip()
        req2 = Request(site2,headers=hdr) # you missed these two lines
        page2 = urlopen(req2)
        soup2 = bs4.BeautifulSoup(page2, 'html.parser')

        for j in soup2.find_all('td'):
            s2 = str(j.text)
            print(s2.strip()) # wrong variable used by you to strip