变量j
的嵌套循环不起作用。即使在它之前需要的变量似乎已正确初始化,调试器也会跳过它。
from urllib.request import Request, urlopen
# Get beautifulsoup4 with: pip install beautifulsoup4
import bs4
import pdb
import sys
import json
site = "http://bgp.he.net/report/world"
hdr = {'User-Agent': 'Mozilla/5.0'}
req = Request(site,headers=hdr)
page = urlopen(req)
soup = bs4.BeautifulSoup(page, 'html.parser')
for t in soup.find_all('td', class_='centeralign'):
s = str(t.string)
if s != "None":
print (s.strip())
site2 = "http://bgp.he.net/country/" + s.strip()
req = Request(site2,headers=hdr)
soup2 = bs4.BeautifulSoup(page, 'html.parser')
for j in soup2.find_all('td'):
s2 = str(j.string)
print (j.strip())
答案 0 :(得分:0)
from urllib.request import Request, urlopen
# Get beautifulsoup4 with: pip install beautifulsoup4
import bs4
import pdb
import sys
import json
site = "http://bgp.he.net/report/world"
hdr = {'User-Agent': 'Mozilla/5.0'}
req = Request(site,headers=hdr)
page = urlopen(req)
soup = bs4.BeautifulSoup(page, 'html.parser')
for t in soup.find_all('td', class_='centeralign'):
s = str(t.string)
if s != "None":
print(s.strip())
site2 = "http://bgp.he.net/country/" + s.strip()
req2 = Request(site2,headers=hdr) # you missed these two lines
page2 = urlopen(req2)
soup2 = bs4.BeautifulSoup(page2, 'html.parser')
for j in soup2.find_all('td'):
s2 = str(j.text)
print(s2.strip()) # wrong variable used by you to strip