如果我的代码有点问题,因为我之前没有编写Python代码但是想要或必须为我的研究项目做。我想抓取一个网站获取一组数据。我的研究项目是从他们的网站收集数据并放入Excel。
到目前为止,这是我的代码:
import requests
from bs4 import BeautifulSoup
def ended_ico_spider(max_pages):
page = 1
while page <= max_pages:
url = "https://icobench.com/icos?&filterBonus=&filterBounty=&filterMvp=&filterTeam=&filterExpert=&" \
"filterSort=&filterCategory=all&filterRating=any&filterStatus=ended&filterPublished=&" \
"filterCountry=any&filterRegistration=0&filterExcludeArea=none&filterPlatform=any&filterCurrency=any&" \
"filterTrading=any&s=&filterStartAfter=&filterEndBefore=0&page= " + str(page)
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, "lxml")
for link in soup.findAll('a', {'class': 'name'}):
href = "https://icobench.com/" + link.get('href')
title = link.string
print (title)
get_single_ico_rating(href)
get_single_ico_fixed_data(href)
get_single_ico_financial_token_info(href)
get_single_ico_financial_investment_info(href)
# get_single_ico_whitepaper(href)
page += 1
# Abrufen der einzelnen Datenbloecke, der jeweiligen Unterseite. Felder wurden entsprechend des HTML-Codes benannt.
def get_single_ico_rating(single_item_url):
source_code = requests.get(single_item_url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, "lxml")
# Daten aus dem Wertungsfeld
for data in soup.findAll('div', {'class': ['rate color1', 'rate color2', 'rate color3', 'rate color4',
'rate color5', 'col_4 col_3']}):
print(data.text),
def get_single_ico_fixed_data(single_item_url):
source_code = requests.get(single_item_url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, "lxml")
for fixed_data in soup.findAll('div', {'class': 'col_2'}):
print(fixed_data.text)
def get_single_ico_financial_token_info(single_item_url):
source_code = requests.get(single_item_url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, "lxml")
for financial_token_info in soup.findAll('div', {'class': 'box_left'}):
print(financial_token_info.text)
def get_single_ico_financial_investment_info(single_item_url):
source_code = requests.get(single_item_url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, "lxml")
for investment_info in soup.findAll('div', {'class': 'box_right'}):
print(investment_info.text)
# Ich moechte hier herausfinden, ob ein whitepaper auf der jeweiligen Unterseite vorhanden ist oder nicht. Falls eins
# vorhanden ist kann ein Wert X zurueckgegeben werden, ansonsten ein Wert Y.
# def get_single_ico_whitepaper(href):
# source_code = requests.get(href)
# plain_text = source_code.text
# soup = BeautifulSoup(plain_text, "lxml")
# for whitepaper_link in soup.findAll('div', {'class': 'onclick'}):
# print(whitepaper_link.text)
ended_ico_spider(1)
缺少某些部分。以下是我无法解决的缺失点(我只是Python的初学者)。
我需要知道每个ICO(子页面)是否都有白皮书。由于它是一个onclick字段,我不知道如何搜索它,看看是否有白皮书。
将数据导出到csv文件(Excel):此刻的打印看起来有些混乱。有些部分是列中的其他行等。正如您可能猜到的那样,我需要在单独的行中创建一个漂亮的图表,每个ICO和不同列中的不同元素能够使用R或其他程序来进行统计。