我正在从Postman上传一个excel文件,并尝试在带有熊猫的AWS lambda中读取它。我该怎么办?
我尝试使用'cgi.parse_multipart'从API网关事件读取字节。我能够成功读取csv文件,但无法读取xlsx文件。
from cgi import parse_header, parse_multipart
import pandas as pd
from io import BytesIO, StringIO
import json
def get_data(event):
c_type, c_data = parse_header(event['headers']['content-type'])
c_data['boundary'] = bytes(c_data['boundary'], "utf-8")
assert c_type == 'multipart/form-data'
body = event['body']
body = bytes(body, 'utf-8')
form_data = parse_multipart(BytesIO(body), c_data)
data = form_data['file'][0]
s=str(data,'utf-8')
d = StringIO(s)
# df=pd.read_csv(d)
df=pd.read_excel(d)
print(df)
def run(event, context):
output = {}
output['statusCode'] = 200
output['body'] = json.dumps(get_data(event))
return output
尝试读取xlsx文件时,出现以下错误:
Traceback (most recent call last):
File "/var/task/upload_test.py", line 108, in run
output['body'] = json.dumps(get_data(event))
File "/var/task/upload_test.py", line 52, in get_data
df=pd.read_excel(d)
File "/opt/python/lib/python3.6/site-packages/pandas/util/_decorators.py", line 188, in wrapper
return func(*args, **kwargs)
File "/opt/python/lib/python3.6/site-packages/pandas/util/_decorators.py", line 188, in wrapper
return func(*args, **kwargs)
File "/opt/python/lib/python3.6/site-packages/pandas/io/excel.py", line 350, in read_excel
io = ExcelFile(io, engine=engine)
File "/opt/python/lib/python3.6/site-packages/pandas/io/excel.py", line 653, in __init__
self._reader = self._engines[engine](self._io)
File "/opt/python/lib/python3.6/site-packages/pandas/io/excel.py", line 422, in __init__
self.book = xlrd.open_workbook(file_contents=data)
File "/var/task/xlrd/__init__.py", line 157, in open_workbook
ragged_rows=ragged_rows,
File "/var/task/xlrd/book.py", line 92, in open_workbook_xls
biff_version = bk.getbof(XL_WORKBOOK_GLOBALS)
File "/var/task/xlrd/book.py", line 1274, in getbof
opcode = self.get2bytes()
File "/var/task/xlrd/book.py", line 675, in get2bytes
return (BYTES_ORD(hi) << 8) | BYTES_ORD(lo)
TypeError: unsupported operand type(s) for <<: 'str' and 'int'