Question

我的代码将xhtml文件中的信息提取到csv。似乎无法删除最上面的行“NYSE ,,，Price，Chg，Chg”。例如，使用“del data [0]”可以删除整个列。非常感谢任何帮助。

import sys
import re
import xml.dom.minidom

document = xml.dom.minidom.parse("2017-11-27-16-20-15.xhtml")
tableElements = document.getElementsByTagName('table')[2]

for tr in tableElements.getElementsByTagName('tr'):
data = []

for a in tr.getElementsByTagName('a'):
    for node in a.childNodes:
        if node.nodeType == node.TEXT_NODE:
            data.append(node.nodeValue)


for td in tr.getElementsByTagName('td'):
    for node in td.childNodes:
        if node.nodeType == node.TEXT_NODE:
            data.append(node.nodeValue)

del data[-1]
del data[1]
del data[1]
del data[1]

symbol = data[0]
symm = re.findall(r"\(([A-Za-z0-9_]+)\)", symbol)

sym = (''.join(symm))

company = data[0]
comp = re.sub("\(.*\)\n","", company)

volume = data[1]
lst = volume.replace(',', '')


price = data[2]
lstt = price.lstrip('$')


change = data[-1]

fullList = [sym] + [comp] + [lst] + [lstt] + [change]
fullList.insert(0, "NYSE")
print(','.join(fullList))

output pic

xhtml table(sample pic)

如何使用python删除我的csv文件的顶行？

0 个答案: