将BCP文件导出为CSV格式

时间:2015-11-27 14:00:47

标签: python csv bcp

我想从中提取数据 Data Charity Register Extract并将其导出为Excel文件(.csv)。

此人使用import.py file

在Github上发布了他的代码

以下是我的做法:两种方式:

第一种方式:

  1. 我从上面的链接下载文件:RegPlusExtract_November_2015.zip并将其放在C:\ Python27(我也安装Python)中

  2. 我打开下面的代码文件(import.py)和IDLE中的(bcp.py)之后的代码文件并运行import.py(使用F5)。我将这两个.py文件放在C:\ Python27

  3. #!/usr/bin/env python
    import bcp
    import zipfile
    import sys
    
    cc_files = {
        "extract_acct_submit": [
          "regno",
          "submit_date",
          "arno",
          "fyend"
        ], 
        "extract_aoo_ref": [
          "aootype",
          "aookey",
          "aooname",
          "aoosort",
          "welsh",
          "master",
          "code"
        ], 
        "extract_ar_submit": [
          "regno",
          "arno",
          "submit_date"
        ], 
        "extract_charity": [
          "regno",
          "subno",
          "name",
          "orgtype",
          "gd",
          "aob",
          "aob_defined",
          "nhs",
          "ha_no",
          "corr",
          "add1",
          "add2",
          "add3",
          "add4",
          "add5",
          "postcode",
          "phone",
          "fax",
        ], 
        "extract_charity_aoo": [
          "regno",
          "aootype",
          "aookey",
          "welsh",
          "master"
        ], 
        "extract_class": [
          "regno",
          "class"
        ], 
        "extract_class_ref": [
          "classno",
          "classtext",
        ], 
        "extract_financial": [
          "regno",
          "fystart",
          "fyend",
          "income",
          "expend"
        ], 
        "extract_main_charity": [
          "regno",
          "coyno",
          "trustees",
          "fyend",
          "welsh",
          "incomedate",
          "income",
          "grouptype",
          "email",
          "web"
        ], 
        "extract_name": [
          "regno",
          "subno",
          "nameno",
          "name"
        ], 
        "extract_objects": [
          "regno",
          "subno",
          "seqno",
          "object"
        ], 
        "extract_partb": [
          "regno",
          "artype",
          "fystart",
          "fyend",
          "inc_leg",
          "inc_end",
          "inc_vol",
          "inc_fr",
          "inc_char",
          "inc_invest",
          "inc_other",
          "inc_total",
          "invest_gain",
          "asset_gain",
          "pension_gain",
          "exp_vol",
          "exp_trade",
          "exp_invest",
          "exp_grant",
          "exp_charble",
          "exp_gov",
          "exp_other",
          "exp_total",
          "exp_support",
          "exp_dep",
          "reserves",
          "asset_open",
          "asset_close",
          "fixed_assets",
          "open_assets",
          "invest_assets",
          "cash_assets",
          "current_assets",
          "credit_1",
          "credit_long",
          "pension_assets",
          "total_assets",
          "funds_end",
          "funds_restrict",
          "funds_unrestrict",
          "funds_total",
          "employees",
          "volunteers",
          "cons_acc",
          "charity_acc"
        ], 
        "extract_registration": [
          "regno",
          "subno",
          "regdate",
          "remdate",
          "remcode"
        ], 
        "extract_remove_ref": [
          "code",
          "text"
        ], 
        "extract_trustee": [
          "regno",
          "trustee"
        ]
    }
    
    def import_zip(zip_file):
        zf = zipfile.ZipFile(zip_file, 'r')
        print 'Opened zip file: %s' % zip_file
        for filename in cc_files:
            try:
                bcp_filename = filename + '.bcp'
                csv_filename = filename + '.csv'
                bcpdata = zf.read(bcp_filename)
                bcp.convert(bcpdata, csvfilename=csv_filename, col_headers=cc_files[filename])
                print 'Converted: %s' % bcp_filename
            except KeyError:
                print 'ERROR: Did not find %s in zip file' % bcp_filename
    
    def main():
        zip_file = sys.argv[1]
        import_zip(zip_file)
    
    if __name__ == '__main__':
        main()
    
    #!/usr/bin/env python
    import sys
    import csv
    
    def convert(bcpdata, csvfilename="", lineterminator='*@@*', delimiter='@**@', quote='"', newdelimiter=',', col_headers=None, escapechar='\\', newline='\n'):
        bcpdata = bcpdata.replace(escapechar, escapechar + escapechar)
        bcpdata = bcpdata.replace(quote, escapechar + quote)
        bcpdata = bcpdata.replace(delimiter, quote + newdelimiter + quote)
        bcpdata = bcpdata.replace(lineterminator, quote + newline + quote)
        if csvfilename=="":
            csvfilename = 'converted.csv'
        with open(csvfilename, 'wb') as csvfile:
            if(col_headers):
                writer = csv.writer(csvfile)
                writer.writerow(col_headers)
            csvfile.write('"')
            csvfile.write(bcpdata)
            csvfile.write('"')
    
    def main():
        bcp_filename = sys.argv[1]
        try:
            csv_filename = sys.argv[2]
        except IndexError:
            csv_filename = bcp_filename.replace('.bcp', '.csv')
        with open(bcp_filename, 'rb') as bcpfile:
            bcpdata = bcpfile.read()
            convert(bcpdata, csv_filename)
    
    if __name__ == '__main__':
        main()
    

    这让我感到震惊:

    >>> ================================ RESTART ================================
    >>> 
    
    Traceback (most recent call last):
      File "C:\Python27\bcp.py", line 31, in <module>
        main()
      File "C:\Python27\bcp.py", line 21, in main
        bcp_filename = sys.argv[1]
    IndexError: list index out of range
    >>> ================================ RESTART ================================
    >>> 
    
    Traceback (most recent call last):
      File "C:\Python27\import.py", line 175, in <module>
        main()
      File "C:\Python27\import.py", line 171, in main
        zip_file = sys.argv[1]
    IndexError: list index out of range
    >>> 
    

    任何人都可以指出哪里出错了吗?

    第二种方式:

    然后我尝试在Windows中使用命令提示符来运行该文件: 首先,我设置保存所有文件的路径(C:\ python27) 然后我在命令提示符下运行

    python import RegPlusExtract_November_2015.zip
    

    让我印象深刻:错误:

     File"<stdin>", line 1
    
    python import RegPlusExtract_November_2015.zip
    

    任何人都可以指出我弄错的地方或向我展示如何从上面的数据链接中提取csv文件。

1 个答案:

答案 0 :(得分:0)

从IDLE运行脚本时,无法在sys.argv中传递参数。所以在那个用例中错误是正常的。但是在F5上出现错误之后,您应该可以直接调用:

zip_file = 'RegPlusExtract_November_2015.zip'
import_zip(zip_file)

它应该允许您处理数据。

对于命令提示符中的第二种方法,您必须提供脚本文件的确切名称。命令应该是:

python import.py RegPlusExtract_November_2015.zip

但无论如何,将自己的脚本和其他数据文件放在Python目录中是错误C:\Python27应该只包含初始Python发行版和其他常规实用程序中的文件,不是本地脚本。  通常的方法是将c:\Python添加到PATH环境中,并使用专用目录进行数据慈善处理