我想从中提取数据 Data Charity Register Extract并将其导出为Excel文件(.csv)。
此人使用import.py file
在Github上发布了他的代码以下是我的做法:两种方式:
第一种方式:
我从上面的链接下载文件:RegPlusExtract_November_2015.zip并将其放在C:\ Python27(我也安装Python)中
我打开下面的代码文件(import.py)和IDLE中的(bcp.py)之后的代码文件并运行import.py(使用F5)。我将这两个.py文件放在C:\ Python27
#!/usr/bin/env python
import bcp
import zipfile
import sys
cc_files = {
"extract_acct_submit": [
"regno",
"submit_date",
"arno",
"fyend"
],
"extract_aoo_ref": [
"aootype",
"aookey",
"aooname",
"aoosort",
"welsh",
"master",
"code"
],
"extract_ar_submit": [
"regno",
"arno",
"submit_date"
],
"extract_charity": [
"regno",
"subno",
"name",
"orgtype",
"gd",
"aob",
"aob_defined",
"nhs",
"ha_no",
"corr",
"add1",
"add2",
"add3",
"add4",
"add5",
"postcode",
"phone",
"fax",
],
"extract_charity_aoo": [
"regno",
"aootype",
"aookey",
"welsh",
"master"
],
"extract_class": [
"regno",
"class"
],
"extract_class_ref": [
"classno",
"classtext",
],
"extract_financial": [
"regno",
"fystart",
"fyend",
"income",
"expend"
],
"extract_main_charity": [
"regno",
"coyno",
"trustees",
"fyend",
"welsh",
"incomedate",
"income",
"grouptype",
"email",
"web"
],
"extract_name": [
"regno",
"subno",
"nameno",
"name"
],
"extract_objects": [
"regno",
"subno",
"seqno",
"object"
],
"extract_partb": [
"regno",
"artype",
"fystart",
"fyend",
"inc_leg",
"inc_end",
"inc_vol",
"inc_fr",
"inc_char",
"inc_invest",
"inc_other",
"inc_total",
"invest_gain",
"asset_gain",
"pension_gain",
"exp_vol",
"exp_trade",
"exp_invest",
"exp_grant",
"exp_charble",
"exp_gov",
"exp_other",
"exp_total",
"exp_support",
"exp_dep",
"reserves",
"asset_open",
"asset_close",
"fixed_assets",
"open_assets",
"invest_assets",
"cash_assets",
"current_assets",
"credit_1",
"credit_long",
"pension_assets",
"total_assets",
"funds_end",
"funds_restrict",
"funds_unrestrict",
"funds_total",
"employees",
"volunteers",
"cons_acc",
"charity_acc"
],
"extract_registration": [
"regno",
"subno",
"regdate",
"remdate",
"remcode"
],
"extract_remove_ref": [
"code",
"text"
],
"extract_trustee": [
"regno",
"trustee"
]
}
def import_zip(zip_file):
zf = zipfile.ZipFile(zip_file, 'r')
print 'Opened zip file: %s' % zip_file
for filename in cc_files:
try:
bcp_filename = filename + '.bcp'
csv_filename = filename + '.csv'
bcpdata = zf.read(bcp_filename)
bcp.convert(bcpdata, csvfilename=csv_filename, col_headers=cc_files[filename])
print 'Converted: %s' % bcp_filename
except KeyError:
print 'ERROR: Did not find %s in zip file' % bcp_filename
def main():
zip_file = sys.argv[1]
import_zip(zip_file)
if __name__ == '__main__':
main()
#!/usr/bin/env python
import sys
import csv
def convert(bcpdata, csvfilename="", lineterminator='*@@*', delimiter='@**@', quote='"', newdelimiter=',', col_headers=None, escapechar='\\', newline='\n'):
bcpdata = bcpdata.replace(escapechar, escapechar + escapechar)
bcpdata = bcpdata.replace(quote, escapechar + quote)
bcpdata = bcpdata.replace(delimiter, quote + newdelimiter + quote)
bcpdata = bcpdata.replace(lineterminator, quote + newline + quote)
if csvfilename=="":
csvfilename = 'converted.csv'
with open(csvfilename, 'wb') as csvfile:
if(col_headers):
writer = csv.writer(csvfile)
writer.writerow(col_headers)
csvfile.write('"')
csvfile.write(bcpdata)
csvfile.write('"')
def main():
bcp_filename = sys.argv[1]
try:
csv_filename = sys.argv[2]
except IndexError:
csv_filename = bcp_filename.replace('.bcp', '.csv')
with open(bcp_filename, 'rb') as bcpfile:
bcpdata = bcpfile.read()
convert(bcpdata, csv_filename)
if __name__ == '__main__':
main()
这让我感到震惊:
>>> ================================ RESTART ================================
>>>
Traceback (most recent call last):
File "C:\Python27\bcp.py", line 31, in <module>
main()
File "C:\Python27\bcp.py", line 21, in main
bcp_filename = sys.argv[1]
IndexError: list index out of range
>>> ================================ RESTART ================================
>>>
Traceback (most recent call last):
File "C:\Python27\import.py", line 175, in <module>
main()
File "C:\Python27\import.py", line 171, in main
zip_file = sys.argv[1]
IndexError: list index out of range
>>>
任何人都可以指出哪里出错了吗?
第二种方式:
然后我尝试在Windows中使用命令提示符来运行该文件: 首先,我设置保存所有文件的路径(C:\ python27) 然后我在命令提示符下运行
python import RegPlusExtract_November_2015.zip
让我印象深刻:错误:
File"<stdin>", line 1
python import RegPlusExtract_November_2015.zip
任何人都可以指出我弄错的地方或向我展示如何从上面的数据链接中提取csv文件。
答案 0 :(得分:0)
从IDLE运行脚本时,无法在sys.argv
中传递参数。所以在那个用例中错误是正常的。但是在F5上出现错误之后,您应该可以直接调用:
zip_file = 'RegPlusExtract_November_2015.zip'
import_zip(zip_file)
它应该允许您处理数据。
对于命令提示符中的第二种方法,您必须提供脚本文件的确切名称。命令应该是:
python import.py RegPlusExtract_November_2015.zip
但无论如何,将自己的脚本和其他数据文件放在Python目录中是错误。 C:\Python27
应该只包含初始Python发行版和其他常规实用程序中的文件,不是本地脚本。
通常的方法是将c:\Python
添加到PATH环境中,并使用专用目录进行数据慈善处理