Question

我正在尝试学习网页抓取，并且试图抓取NBA网站。我一直在尝试获取播放器的详细信息，最终希望将它们导出到CSV文件中。目前，当我单独打印列表的值时，我能够获得所需的输出。但是，当我尝试打印整个列表时，它显示为不可读的目标代码。

from selenium import webdriver
from bs4 import BeautifulSoup
import csv


class Player():
    def __init__(self):
        self.name = ""
        self.link = ""
        self.Weight = ""
        self.Height = ""


driver = webdriver.PhantomJS(executable_path=r'C:\Users\mrtho\Google Drive\Scraping\phantomjs.exe')

url = 'https://www.nba.com/players'

driver.get(url)

soup = BeautifulSoup(driver.page_source,'lxml')

div = soup.find('div',class_='small-12 columns')

player_list = []

for a in div.find_all('a'):
    for name in a.find_all('span',class_='name-label'):

        new_play = Player()
        new_play.name = name.text
        new_play.link = 'https://www.nba.com'+a['href']
        player_list.append(new_play)


driver.quit()

for p in player_list[0:2]:


    driver = webdriver.PhantomJS(executable_path=r'C:\Users\mrtho\Google Drive\Scraping\phantomjs.exe')

    url = p.link

    driver.get(url)

    soup = BeautifulSoup(driver.page_source,'lxml')


    height1 = soup.find('p',class_='nba-player-vitals__top-info-metric')


    weight1 = soup.find('section',class_='nba-player-vitals__top-right small-6')
    weight2 = weight1.find('p',class_='nba-player-vitals__top-info-metric')
    # print('Weight: '+weight2.text)

    p.Height = height1.text
    p.Weight = weight2.text

    driver.quit()


for p in player_list[0:2]:
    print ('\n')
    print (p.name)
    print (p.link)
    print (p.Height)
    print (p.Weight)
    print ('\n')

print(player_list, sep = "\n")

在打印for p in player_list[0,2]时，我可以获得播放器的详细信息，例如：

Adams, Steven
https://www.nba.com/players/steven/adams/203500
2.13m
120.2kg

但是，当我打印整个播放器列表时，我的输出显示为

[<__main__.Player object at 0x0000021F53AF3278>, <__main__.Player object at 0x0000021F53AF32B0>, <__main__.Player object at 0x0000021F53AF32E8>,.....

打印列表返回对象值而不是字符串

0 个答案: