为什么我只获得第一个字符而不是标签中的整个字符串?
#This for multiple URL
import urllib2
from bs4 import BeautifulSoup
import requests
import itertools
with open('site_test.txt') as site_test:
sites = (line.strip() for line in site_test)
for site in sites:
site = urllib2.urlopen(site)
soup = BeautifulSoup(site, "html.parser")
for x, y, z, e in itertools.izip(soup.find_all('h1')[0], soup.find_all('p')[1], soup.find_all('p')[2].text, soup.find_all('p')[3].text):
print x+"--"+y+"--"+z+"--"+e
#Result for x is 'full name'
# y is 'occupation'
结果
全名 - 职业 - 0 - a
'全名'和'职业'工作正常
'0'是第一个字符:它应该是'000-345-678'
'a'是第一个字符:它应该是'alex@email.com'
但是当我在不同的任务中执行它时,只有一个URL。它工作得很好
#This for 1 raw_input URL
import urllib2
from bs4 import BeautifulSoup
site = raw_input("Link: ")
page = urllib2.urlopen(site)
soup = BeautifulSoup(page, "html.parser")
x= soup.h1.string
y= soup.find_all('p')[1].text
z= soup.find_all('p')[2].text
e= soup.find_all('p')[3].text
print x+"--"+y+"--"+z+"--"+e
答案 0 :(得分:0)
错误地有第二个循环
import urllib2
from bs4 import BeautifulSoup
import requests
import itertools
with open('site_test.txt') as site_test:
sites = (line.strip() for line in site_test)
for site in sites:
site = urllib2.urlopen(site)
soup = BeautifulSoup(site, "html.parser")
x= soup.h1.string
y= soup.find_all('p')[1].text
z= soup.find_all('p')[2].text
e= soup.find_all('p')[3].text
print x+"--"+y+"--"+z+"--"+e