以下代码适用于美国股票APLE和BHP,但是当我用ASX代码替换它时,它会崩溃。我虽然是因为结肠并且已经将str(ASX:BHP)放在了成功之列。不幸的是雅虎不再提供历史数据。任何想法或解决方案的选择将不胜感激。
由于
import datetime
import pandas as pd
from pandas_datareader import data, wb
list = ["APLE","BHP"]
#list = ["ASX:AMP","ASX:BHP"]
df_all_stock = pd.DataFrame([])
start = datetime.datetime(2016, 1, 1)
end = datetime.datetime(2017, 1, 1)
for row in list:
row = str(row)
df_stock = data.DataReader(row, "google", start, end)
df_all_stock = df_all_stock.append(df_stock)
df_all_stock['code'] = row
df_all_stock
答案 0 :(得分:0)
需要构建一个scraper来从html表中获取数据,然后构建一个类似于我们获得的美国股票数据输出的pandas数据框。
我确定加拿大股票在Google财经上的基本网址是:'https://www.google.ca/finance/historical?q=TSE%3A'要获取股票的数据,我们只需将其名称附加到上述基本网址的末尾。例如,要查看“VCN”的历史股票数据,我们需要转到页面:https://www.google.ca/finance/historical?q=TSE%3AVCN
要在python代码中执行上述操作,我们只需要以下内容,其中可以为任何感兴趣的TSE(Tornto证券交易所)股票更改股票变量。
from datetime import datetime
from pandas import DataFrame
import pandas_datareader.data as web
google_historical_price_site= 'https://www.google.ca/finance/historical?
q=TSE%3A'
stock = 'VCN' #sub any sock in here
historical_price_page = google_historical_price_site + stock
print(historical_price_page)
from urllib.request import urlopen
from bs4 import BeautifulSoup
#open the historical_price_page link and acquire the source code
stock_dat = urlopen(historical_price_page)
#parse the code using BeautifulSoup
historical_page = BeautifulSoup(stock_dat,'lxml')
#scrape the table
table_dat = historical_page.find('table',{'class':'gf-table
historical_price'})
#find all the rows in the table
rows = table_dat.findAll('td',{'class':'lm'})
#get just the dates out of the table rows, strip the newline characters
dates = [x.get_text().rstrip() for x in rows]
#turn dates to python datetime format
datetime_dates = [datetime.strptime(x, '%b %d, %Y') for x in dates]
#next we build up the price dataframe rows
#iterate through the table, taking the siblings to the
#right of the dates and adding to the row's data
prices = []
for num, row in enumerate(rows):
row_dat = [datetime_dates[num]] #first column is the dates
for i in row.next_siblings:
row_dat.append(i.get_text().rstrip()) #iterate through columns, append
prices.append(row_dat) #add the row to the list of rows
#turn the output into the dataframe
outdat = DataFrame(prices,columns =
['Date','Open','High','Low','Close','Volume'])
#make the Volume columns integers, in case we wish to use it later!
outdat["Volume"] = outdat["Volume"].apply(lambda x: int(x.replace(',','')))
#change the other columns to floating point values
for col in ['Open','High','Low','Close']:
outdat[col] = outdat[col].apply(lambda x: float(x))
#set the index to match the american stock data
outdat = outdat.set_index('Date')
#sort the index so it is in the same orientation as the american data
outdat = outdat.sort_index()
#have a look
outdat
答案 1 :(得分:0)
from datetime import datetime
from pandas import DataFrame
import pandas_datareader.data as web
import os
google_historical_price_site='https://finance.google.com/finance/historical?q=HKG:0700'
print(google_historical_price_site)
from urllib.request import urlopen
from bs4 import BeautifulSoup
#open the historical_price_page link and acquire the source code
stock_dat = urlopen(google_historical_price_site)
#parse the code using BeautifulSoup
google_historical_price_site = BeautifulSoup(stock_dat,'lxml')
#scrape the table
table_dat = google_historical_price_site.find('table',{'class':'gf-table
historical_price'})
#find all the rows in the table
rows = table_dat.findAll('td',{'class':'lm'})
#get just the dates out of the table rows, strip the newline characters
dates = [x.get_text().rstrip() for x in rows]
#turn dates to python datetime format
datetime_dates = [datetime.strptime(x, '%b %d, %Y') for x in dates]
#next we build up the price dataframe rows
#iterate through the table, taking the siblings to the
#right of the dates and adding to the row's data
prices = []
for num, row in enumerate(rows):
row_dat = [datetime_dates[num]] #first column is the dates
for i in row.next_siblings:
row_dat.append(i.get_text().rstrip()) #iterate through columns, append
prices.append(row_dat) #add the row to the list of rows
#turn the output into the dataframe
outdat = DataFrame(prices,columns =
['Date','Open','High','Low','Close','Volume'])
#make the Volume columns integers, in case we wish to use it later!
outdat["Volume"] = outdat["Volume"].apply(lambda x: int(x.replace(',','')))
#change the other columns to floating point values
for col in ['Open','High','Low','Close']:
outdat[col] = outdat[col].apply(lambda x: float(x))
#set the index to match the american stock data
outdat = outdat.set_index('Date')
#sort the index so it is in the same orientation as the american data
outdat = outdat.sort_index()
#output CSV.file
df=outdat
path_d = 'C:\MA data'
df.to_csv(os.path.join(path_d, 'HKGstock700.csv'))