在内存中gzip / gunzip的字符串 - 如何将python2.7迁移到python3

时间:2018-01-07 02:45:29

标签: python-3.x python-2.7 refactoring gzip python-2to3

我在Python2.7中使用以下实用程序在内存中gzip / gunzip字符串/流。我需要帮助弄清楚如何在python3中处理BytesIO vs StringIO以迁移以下内容:

<!-- language: lang-python -->
from StringIO import StringIO
import gzip
import zlib

def str_to_gz_str(orig_text_str, mode='w'):
    """
    @param orig_text_str: Original uncompressed text str
    @param mode: 'w' default. can be wb.
    @return compressed str
    """
    out = StringIO()
    with gzip.GzipFile(fileobj=out, mode=mode) as f:
        f.write(orig_text_str)
    return out.getvalue()

def gz_str_to_str(compressed_str):
    '''
    @param compressed_str: gzipped string
    @return decompressed string
    '''
    si = StringIO(compressed_str)
    return gunzip_stream_to_str(si)

def gunzip_stream_to_str( gz_stream ):
    '''
    decompress gz stream convert to string
    @param gz_stream: gzipped stream
    @return decompressed str
    '''
    unzipped_str = ''
    for part in gunzip_stream( gz_stream ):
        unzipped_str += part
    return unzipped_str

def gunzip_stream(gz_stream):
    '''
    decompress gzipped stream
    @param gz_stream: src stream that is gzipped
    @return yield decompressed chunks
    '''
    dec = zlib.decompressobj(16+zlib.MAX_WBITS) 
    for chunk in gz_stream:
        uz = dec.decompress(chunk)
        if uz:
            yield uz

并使用以下命令运行:

<!-- language: lang-python -->
txt_orig = "A Quick Brown fox"
txt_gz = str_to_gz_str( txt_orig )
txt_decompr = gz_str_to_str(txt_gz)
assert( txt_orig == txt_decompr )

我尝试使用io库中的StringIO

1 个答案:

答案 0 :(得分:0)

以下似乎有效。不确定处理编码的正确方法是什么

from __future__ import unicode_literals
is_py2=False
try:
    from StringIO import StringIO as ByStrIO
    is_py2=True
except:
    from io import BytesIO as ByStrIO
import gzip
import zlib

def str_to_gz_str(txt, mode='w', encoding=None):
    out = ByStrIO()
    with gzip.GzipFile(fileobj=out, mode=mode) as f:
        if (not is_py2) and not encoding:
            encoding = 'utf-8'
        if encoding:
            txt = txt.encode('utf-8')
        f.write(txt)
    v = out.getvalue()
    return v

def gz_str_to_str(compressed_str):
    si = ByStrIO(compressed_str)
    return gunzip_stream_to_str(si)

def gunzip_stream_to_str( gz_stream ):
    unzipped_str = ''
    for part in gunzip_stream( gz_stream ):
        if not is_py2:
            part = part.decode()
        unzipped_str += part
    return unzipped_str

def gunzip_stream(gz_stream):
    dec = zlib.decompressobj(16+zlib.MAX_WBITS) 
    for chunk in gz_stream:
        uz = dec.decompress(chunk)
        if uz:
            yield uz