我的代码将xhtml文件中的信息提取到csv。似乎无法删除最上面的行“NYSE ,,,Price,Chg,Chg”。例如,使用“del data [0]”可以删除整个列。非常感谢任何帮助。
import sys
import re
import xml.dom.minidom
document = xml.dom.minidom.parse("2017-11-27-16-20-15.xhtml")
tableElements = document.getElementsByTagName('table')[2]
for tr in tableElements.getElementsByTagName('tr'):
data = []
for a in tr.getElementsByTagName('a'):
for node in a.childNodes:
if node.nodeType == node.TEXT_NODE:
data.append(node.nodeValue)
for td in tr.getElementsByTagName('td'):
for node in td.childNodes:
if node.nodeType == node.TEXT_NODE:
data.append(node.nodeValue)
del data[-1]
del data[1]
del data[1]
del data[1]
symbol = data[0]
symm = re.findall(r"\(([A-Za-z0-9_]+)\)", symbol)
sym = (''.join(symm))
company = data[0]
comp = re.sub("\(.*\)\n","", company)
volume = data[1]
lst = volume.replace(',', '')
price = data[2]
lstt = price.lstrip('$')
change = data[-1]
fullList = [sym] + [comp] + [lst] + [lstt] + [change]
fullList.insert(0, "NYSE")
print(','.join(fullList))