如何使用python来控制下载文件格式?

时间:2016-06-27 08:33:30

标签: python xml excel request

我正在使用python请求从网站下载文件:这是我的代码:

import requests
import re
import wget
import urllib3
import os
import datetime
from datetime import date
import xlrd
my_headers = {
    'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36',
    'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Encoding' : 'identity, deflate, compress, gzip',
    'Accept-Language' : 'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4',
    'Connection' : 'Keep-Alive',
    'Keep-Alive' : 'timeout=10, max=100'
}
sss     = requests.Session()
r       = sss.get(cs_url, headers = my_headers)
reg     = r'<input type="hidden" id="appIdKey" name="appIdKey"\n\t\tvalue="(.*)" />'
pattern = re.compile(reg)
result  = pattern.findall(r.content)
token   = result[0]
my_data = {
    'commit' : 'Sign in',
    'language' : 'US-EN',
    'appIdKey' : token,
    'appleId' : cs_user,
    'accountPassword' : cs_psw
}
cs_url='https://idmsauth.corp.XXX.com/authenticate'
r       = sss.post(cs_url, headers = my_headers, data = my_data)
print r.url, r.status_code, r.history

PhoneSales_URL='https://aosreports2.corp.xxx.com/sabr/index.php?datetype=week&date_filter=201423&table_break=none&col=metric&aft=Transaction&run=Download&role_filter%5B%5D=1415&role_filter%5B%5D=1437&role_filter%5B%5D=7254&role_filter%5B%5D=7247&role_filter%5B%5D=1423&role_filter%5B%5D=7462&role_filter%5B%5D=4463&role_filter%5B%5D=7461&role_filter%5B%5D=15801&role_filter%5B%5D=17641&role_filter%5B%5D=1430&role_filter%5B%5D=1416&role_filter%5B%5D=1418&role_filter%5B%5D=7463&role_filter%5B%5D=5021&role_filter%5B%5D=1318&segment_filter%5B%5D=Consumer&segment_filter%5B%5D=ED+Ind&dim_select=segment%7Ccall_type%7Ccountry%7Cday%7Cweek%7Cmonth%7Cquarter%7Cfinance_region&dim_order=agent_geo_t%7Csite%7Cteam_t%7Ccustom_group%7Crole_e%7Crole_t%7Csupervisor_e%7Csupervisor_t%7Cagent_name%7Cagent_ds_id%7Csegment%7Ccall_type%7Cregion%7Csub_region%7Ccountry%7Cday%7Cweek%7Cmonth%7Cquarter%7Csite_type%7Crole_group_e%7Crole_group_t%7Cagent_plang_t%7Cfinance_region%7Cpcntry_e%7Cpcntry_t%7Cpregion_e%7Cpregion_t%7Cagent_etype_t&metric_select=fr1usd%7Csal1%7Cbk1%7Csbm1%7Cbk8s%7Csbm2%7Cdc11%7Cdc9%7Cdc12%7Cbk29%7Cbk60s%7Cbk110%7Csal1a%7Cbk144%7Cbk157s%7Cbk3%7Cipad_pp%7Csal1d%7Csbm7%7Csbm21%7Csbm13%7Csbm25%7Cbk108s%7Cbk50&dim_select=site%7Csegment%7Ccall_type%7Ccountry%7Cday%7Cweek%7Cmonth%7Cquarter%7Cfinance_region&dim_order=agent_geo_t%7Csite%7Cteam_t%7Ccustom_group%7Crole_e%7Crole_t%7Csupervisor_e%7Csupervisor_t%7Cagent_name%7Cagent_ds_id%7Csegment%7Ccall_type%7Cregion%7Csub_region%7Ccountry%7Cday%7Cweek%7Cmonth%7Cquarter%7Csite_type%7Crole_group_e%7Crole_group_t%7Cagent_plang_t%7Cfinance_region%7Cpcntry_e%7Cpcntry_t%7Cpregion_e%7Cpregion_t%7Cagent_etype_t&metric_select=fr1usd%7Csal1%7Cbk1%7Csbm1%7Cbk8s%7Csbm2%7Cdc11%7Cdc9%7Cdc12%7Cbk29%7Cbk60s%7Cbk110%7Csal1a%7Cbk144%7Cbk157s%7Cbk3%7Cipad_pp%7Csal1d%7Csbm7%7Csbm21%7Csbm13%7Csbm25%7Cbk108s%7Cbk50%7Cbk33%7Cbk112%7Cbk45%7Cbk146%7Csal11%7Csal13%7Csal7%7Csal18%7Csal12%7Csal19%7Cbk39&dim_select=site%7Csegment%7Ccall_type%7Ccountry%7Cday%7Cweek%7Cmonth%7Cquarter%7Cfinance_region&dim_order=agent_geo_t%7Csite%7Cteam_t%7Ccustom_group%7Crole_e%7Crole_t%7Csupervisor_e%7Csupervisor_t%7Cagent_name%7Cagent_ds_id%7Csegment%7Ccall_type%7Cregion%7Csub_region%7Ccountry%7Cday%7Cweek%7Cmonth%7Cquarter%7Csite_type%7Crole_group_e%7Crole_group_t%7Cagent_plang_t%7Cfinance_region%7Cpcntry_e%7Cpcntry_t%7Cpregion_e%7Cpregion_t%7Cagent_etype_t&metric_select=fr1usd%7Csal1%7Cbk1%7Csbm1%7Cbk8s%7Csbm2%7Cdc11%7Cdc9%7Cdc12%7Cbk29%7Cbk60s%7Cbk110%7Csal1a%7Cbk144%7Cbk157s%7Cbk3%7Cipad_pp%7Csal1d%7Csbm7%7Csbm21%7Csbm13%7Csbm25%7Cbk108s%7Cbk50%7Cbk33%7Cbk112%7Cbk45%7Cbk146%7Csal11%7Csal13%7Csal7%7Csal18%7Csal12%7Csal19%7Cbk39&dim_select=site%7Csegment%7Ccall_type%7Ccountry%7Cday%7Cweek%7Cmonth%7Cquarter%7Cfinance_region&dim_order=agent_geo_t%7Csite%7Cteam_t%7Ccustom_group%7Crole_e%7Crole_t%7Csupervisor_e%7Csupervisor_t%7Cagent_name%7Cagent_ds_id%7Csegment%7Ccall_type%7Cregion%7Csub_region%7Ccountry%7Cday%7Cweek%7Cmonth%7Cquarter%7Csite_type%7Crole_group_e%7Crole_group_t%7Cagent_plang_t%7Cfinance_region%7Cpcntry_e%7Cpcntry_t%7Cpregion_e%7Cpregion_t%7Cagent_etype_t&metric_select=fr1usd%7Csal1%7Cbk1%7Csbm1%7Cbk8s%7Csbm2%7Cdc11%7Cdc9%7Cdc12%7Cbk29%7Cbk60s%7Cbk110%7Csal1a%7Cbk144%7Cbk157s%7Cbk3%7Cipad_pp%7Csal1d%7Csbm7%7Csbm21%7Csbm13%7Csbm25%7Cbk108s%7Cbk50%7Cbk33%7Cbk112%7Cbk45%7Cbk146%7Csal11%7Csal13%7Csal7%7Csal18%7Csal12%7Csal19%7Cbk39&dim_select=site%7Csegment%7Ccall_type%7Ccountry%7Cday%7Cweek%7Cmonth%7Cquarter%7Cfinance_region&dim_order=agent_geo_t%7Csite%7Cteam_t%7Ccustom_group%7Crole_e%7Crole_t%7Csupervisor_e%7Csupervisor_t%7Cagent_name%7Cagent_ds_id%7Csegment%7Ccall_type%7Cregion%7Csub_region%7Ccountry%7Cday%7Cweek%7Cmonth%7Cquarter%7Csite_type%7Crole_group_e%7Crole_group_t%7Cagent_plang_t%7Cfinance_region%7Cpcntry_e%7Cpcntry_t%7Cpregion_e%7Cpregion_t%7Cagent_etype_t&metric_select=fr1usd%7Csal1%7Cbk1%7Csbm1%7Cbk8s%7Csbm2%7Cdc11%7Cdc9%7Cdc12%7Cbk29%7Cbk60s%7Cbk110%7Csal1a%7Cbk144%7Cbk157s%7Cbk3%7Cipad_pp%7Csal1d%7Csbm7%7Csbm21%7Csbm13%7Csbm25%7Cbk108s%7Cbk50%7Cbk33%7Cbk112%7Cbk45%7Cbk146%7Csal11%7Csal13%7Csal7%7Csal18%7Csal12%7Csal19%7Cbk39'
filename='/Users/test.xls'
r = sss.get(PhoneSales_URL, stream=False, headers=my_headers)
f = open(filename, 'wb')
for chunk in r.iter_content(chunk_size=512 * 1024): 
    if chunk: # filter out keep-alive new chunks
     f.write(chunk)
     f.flush()
f.close()
r.close()

通过提交此代码,我得到一个Excel 2004 XML电子表格文件,这是python难以处理的,我正在寻找使用普通xls或csv格式下载文件的解决方案。有人可以帮忙吗?谢谢。

如果有帮助,我还附上chrome的信息:

0 个答案:

没有答案