我开发了一种在https://www.insidefutures.com/markets/data.php?page=quote&sym=NG&x=19&y=5下抓取网站的方法
数据每10分钟更新一次,我想找到价格和交易量之间的关系。但是,我需要每10分钟下载一次数据,并将其存储以备将来分析。
在网站更新中,我希望我的代码能够运行并每10分钟下载到数据库中以供将来分析。我该如何实现?
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import requests
import numpy as np
res = requests.get('https://shared.websol.barchart.com/quotes/quote.php?')
soup = BeautifulSoup(res.text, 'lxml')
soup.prettify()
Header = soup.findAll('tr', limit=2)[1].findAll('th')
column_headers = [th.getText() for th in soup.findAll('tr', limit=2)
[1].findAll('th')]
data_rows = soup.findAll('tr')[2:]
i = range(len(data_rows))
# for cell in data_rows
Contracts =[]
Lasts =[]
Changes =[]
Opens = []
Highs =[]
Lows =[]
Volumes=[]
Previous_Settles=[]
for td in data_rows:
Contract = td.findAll('td')[0].text
Contracts.append(Contract)
Last = td.findAll('td')[1].text
Lasts.append(Last)
Change = td.findAll('td')[2].text
Changes.append(Change)
Open = td.findAll('td')[3].text
Opens.append(Open)
High = td.findAll('td')[4].text
Highs.append(High)
Low = td.findAll('td')[5].text
Lows.append(Low)
Volume = td.findAll('td')[6].text
Volumes.append(Volume)
Previous_Settled = td.findAll('td')[7].text
Previous_Settles.append(Previous_Settled)
Date_Time = td.findAll('td')[8].text
df = pd.DataFrame({'Contracts' : Contracts, 'Last': Last, 'Change':
Changes, 'Open':Opens, 'High': Highs, 'low': Lows,'Previous_Settled':
Previous_Settles})
print(df)