我在Python2.7中使用以下实用程序在内存中gzip / gunzip字符串/流。我需要帮助弄清楚如何在python3中处理BytesIO vs StringIO以迁移以下内容:
<!-- language: lang-python -->
from StringIO import StringIO
import gzip
import zlib
def str_to_gz_str(orig_text_str, mode='w'):
"""
@param orig_text_str: Original uncompressed text str
@param mode: 'w' default. can be wb.
@return compressed str
"""
out = StringIO()
with gzip.GzipFile(fileobj=out, mode=mode) as f:
f.write(orig_text_str)
return out.getvalue()
def gz_str_to_str(compressed_str):
'''
@param compressed_str: gzipped string
@return decompressed string
'''
si = StringIO(compressed_str)
return gunzip_stream_to_str(si)
def gunzip_stream_to_str( gz_stream ):
'''
decompress gz stream convert to string
@param gz_stream: gzipped stream
@return decompressed str
'''
unzipped_str = ''
for part in gunzip_stream( gz_stream ):
unzipped_str += part
return unzipped_str
def gunzip_stream(gz_stream):
'''
decompress gzipped stream
@param gz_stream: src stream that is gzipped
@return yield decompressed chunks
'''
dec = zlib.decompressobj(16+zlib.MAX_WBITS)
for chunk in gz_stream:
uz = dec.decompress(chunk)
if uz:
yield uz
并使用以下命令运行:
<!-- language: lang-python -->
txt_orig = "A Quick Brown fox"
txt_gz = str_to_gz_str( txt_orig )
txt_decompr = gz_str_to_str(txt_gz)
assert( txt_orig == txt_decompr )
我尝试使用io库中的StringIO
答案 0 :(得分:0)
以下似乎有效。不确定处理编码的正确方法是什么
from __future__ import unicode_literals
is_py2=False
try:
from StringIO import StringIO as ByStrIO
is_py2=True
except:
from io import BytesIO as ByStrIO
import gzip
import zlib
def str_to_gz_str(txt, mode='w', encoding=None):
out = ByStrIO()
with gzip.GzipFile(fileobj=out, mode=mode) as f:
if (not is_py2) and not encoding:
encoding = 'utf-8'
if encoding:
txt = txt.encode('utf-8')
f.write(txt)
v = out.getvalue()
return v
def gz_str_to_str(compressed_str):
si = ByStrIO(compressed_str)
return gunzip_stream_to_str(si)
def gunzip_stream_to_str( gz_stream ):
unzipped_str = ''
for part in gunzip_stream( gz_stream ):
if not is_py2:
part = part.decode()
unzipped_str += part
return unzipped_str
def gunzip_stream(gz_stream):
dec = zlib.decompressobj(16+zlib.MAX_WBITS)
for chunk in gz_stream:
uz = dec.decompress(chunk)
if uz:
yield uz