我正在尝试使用Python从网站表中抓取数据。
from bs4 import BeautifulSoup
from mechanize import Browser
BASE_URL = "http://www.ggp.com/properties/mall-directory"
def main():
mech = Browser()
url = "http://www.ggp.com/properties/mall-directory"
page1 = mech.open(url)
html1 = page1.read()
soup1 = BeautifulSoup(html1)
extract(soup1, 2007)
def extract(soup,year):
table = soup.find("table")
for row in table.findAll('option'):
print row
main()
行打印出来:
<option value="184">Yakima, WA</option>
<option value="896">Yankton, SD</option>
<option value="851">Yazoo City, MS</option>
<option value="113">York-Hanover, PA</option>
<option value="87">Youngstown-Warren, OH-PA</option>
<option value="235">Yuba City, CA</option>
<option value="205">Yuma, AZ</option>
<option value="424">Zanesville, OH</option>
但我需要的是
Yakima, WA
Yankton, SD
Yazoo City, MS
York-Hanover, PA
etc...
我已经尝试了row.findAll('option value')
,但这不起作用......
答案 0 :(得分:1)
我认为这应该有用
print row.getText()