我是python的新手,并尝试在两个点之间的HTML源中提取行到Output.txt。但没有任何东西写入输出文本文件,我不知道为什么。任何帮助将不胜感激。
import urllib
sock = urllib.urlopen('http://www.w3schools.com/xpath/xpath_examples.asp')
htmlSource = sock.read()
sock.close()
text_file = open('/home/user/Desktop/Output.txt', 'w')
parsing=False
for line in htmlSource:
if '<html lang="en-US">' in line:
parsing = True
elif '<script src="/bs/js/bootstrap.min.js"></script>' in line:
parsing = False
if parsing:
text_file.write("%s\n" % line)
text_file.close()