为什么压缩和解压缩后我没有得到原始字符串?

时间:2013-06-14 09:35:33

标签: java zip base64 compression

我目前正在尝试使用 java.util.zip。* 包来执行无损压缩/解压缩。

我使用 apache的 jar来编码和解码在Base64字符集中用作参数的String。

在我的代码中跟随两个静态方法,每个方法用于压缩,一个用于解压缩。

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.zip.*;

import org.apache.commons.codec.binary.Base64;

public class main {

    public String compress(String stringToCompress) throws UnsupportedEncodingException
    {       
        //System.out.println("String to Be Compressed :: " + stringToCompress);
        byte[] input = Base64.decodeBase64(stringToCompress);

        Deflater compressor = new Deflater();
        compressor.setInput(input);
        compressor.finish();

        byte[] output = new byte[100];
        compressor.deflate(output);
        return Base64.encodeBase64String(output);
    }

    public String decompressToString(String stringToDecompress) throws UnsupportedEncodingException, DataFormatException
    {   
        //System.out.println("String to be Decompressed :: " + stringToDecompress);
        byte[] input = Base64.decodeBase64(stringToDecompress);

        Inflater deCompressor = new Inflater();
        deCompressor.setInput(input,0,input.length);

        byte[] output = new byte[100];
        deCompressor.inflate(output);
        deCompressor.end();

        return Base64.encodeBase64String(output);
    }


    public static void main(String[] args) throws UnsupportedEncodingException, DataFormatException {
        main m = new main();
        String strToBeCompressed  = "jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla";
        String compressedString  = m.compress(strToBeCompressed) ;
        String deCompressedString = m.decompressToString(compressedString);

        System.out.println("Original :: " + strToBeCompressed);
        System.out.println("Compressed :: " + compressedString);
        System.out.println("decompressed :: " + deCompressedString);
    }
}

这是输出。

Original :: jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla
Compressed :: eJwBPQDC/44Y5LHYYH5I3bH4ZI4Y725ZGo55ZHX5r5ZLI33aL242ornYb2nY72o4L6IoGr4oKIGroLor2nX4Yo245JXcvx/9AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==
decompressed :: jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhvadjvajgvoigavigogauguivadfhijbjklQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==

如果看到输出,则原始字符串和解压缩字符串不匹配。我不知道为什么?任何人都可以告诉我原因。

2 个答案:

答案 0 :(得分:3)

缓冲区问题

compress()decompressToString()的输出缓冲区的固定大小为100.但输出小于100字节,因此数组的末尾将不使用(全为零)。当这被转换为Base64时,零以A个字符出现(==padding)。

您只需要考虑包含数据的缓冲区部分,而忽略其余部分。方法inflate()deflate()返回它们填充的字节数。不幸的是,Apache的Base64转换器不支持数组中的范围,因此您必须调整缓冲区的大小:

byte[] output = new byte[100];
int size = compressor.deflate(output);
output = Arrays.copyOf(output, size);

,同样适用于decompressToString()

这解决了缓冲区未完全填满的问题,但却带来了更大的问题:缓冲区可能会溢出。如果压缩或解压缩字符串的大小超过100个字节,则需要多次调用inflate()deflate()以获取所有数据。

基本64问题

目前,compress()的输入字符串被解释为Base64字符串。同样,从decompressToString()返回的字符串是编码为Base64字符串的数据。

我认为你的意图是让原始字符串不受限制。在compress()中,不使用Base64.decodeBase64(stringToCompress)从输入字符串中获取字节数组,而只需使用stringToCompress.getBytes()。相反的情况应该发生在decompressToString():将Base64.encodeBase64String(output)更改为new String(output)。您可以使用String(byte[] bytes, int offset, int length)重载来指定输出数组的子范围,而不是创建副本。

完整代码

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.zip.*;
import java.util.*;

import org.apache.commons.codec.binary.Base64;

public class main {

    public String compress(String stringToCompress) throws UnsupportedEncodingException
    {
        //System.out.println("String to Be Compressed :: " + stringToCompress);
        byte[] input = stringToCompress.getBytes();

        Deflater compressor = new Deflater();
        compressor.setInput(input);
        compressor.finish();

        byte[] output = new byte[100];
        int size = compressor.deflate(output);
        output = Arrays.copyOf(output, size);
        return Base64.encodeBase64String(output);
    }

    public String decompressToString(String stringToDecompress) throws UnsupportedEncodingException, DataFormatException
    {
        //System.out.println("String to be Decompressed :: " + stringToDecompress);
        byte[] input = Base64.decodeBase64(stringToDecompress);

        Inflater deCompressor = new Inflater();
        deCompressor.setInput(input,0,input.length);

        byte[] output = new byte[100];
        int size = deCompressor.inflate(output);
        deCompressor.end();

        return new String(output, 0, size);
    }


    public static void main(String[] args) throws UnsupportedEncodingException, DataFormatException {
        main m = new main();
        String strToBeCompressed  = "jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla";
        String compressedString  = m.compress(strToBeCompressed) ;
        String deCompressedString = m.decompressToString(compressedString);

        System.out.println("Original :: " + strToBeCompressed);
        System.out.println("Compressed :: " + compressedString);
        System.out.println("decompressed :: " + deCompressedString);
    }
}

DIFF

@@ -3,2 +3,3 @@
 import java.util.zip.*;
+import java.util.*;

@@ -11,3 +12,3 @@
         //System.out.println("String to Be Compressed :: " + stringToCompress);
-        byte[] input = Base64.decodeBase64(stringToCompress);
+        byte[] input = stringToCompress.getBytes();

@@ -18,3 +19,4 @@
         byte[] output = new byte[100];
-        compressor.deflate(output);
+        int size = compressor.deflate(output);
+        output = Arrays.copyOf(output, size);
         return Base64.encodeBase64String(output);
@@ -31,6 +33,6 @@
         byte[] output = new byte[100];
-        deCompressor.inflate(output);
+        int size = deCompressor.inflate(output);
         deCompressor.end();

-        return Base64.encodeBase64String(output);
+        return new String(output, 0, size);
     }

答案 1 :(得分:3)

我认为你应该将压缩,解压缩,base 64编码和base64解码分开处理,作为单独方法中的单独问题。我无法推断出你参与Base64的原因 - 也许有充分的理由。也许您希望压缩字符串是Base64编码的?

无论如何,这是一个代码版本,可以压缩和解压缩字符串而不会有任何损失(但不涉及Base64):

package dk.tbsalling.stackoverflow;

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.zip.*;

import org.apache.commons.codec.binary.Base64;

public class App
{
    private byte[] compress(String stringToCompress) throws UnsupportedEncodingException
    {
        byte[] compressedData = new byte[1024];
        byte[] stringAsBytes = stringToCompress.getBytes("UTF-8");

        Deflater compressor = new Deflater();
        compressor.setInput(stringAsBytes);
        compressor.finish();
        int compressedDataLength = compressor.deflate(compressedData);

        return Arrays.copyOf(compressedData, compressedDataLength);
    }

    private String decompressToString(byte[] compressedData) throws UnsupportedEncodingException, DataFormatException
    {   
        Inflater deCompressor = new Inflater();
        deCompressor.setInput(compressedData, 0, compressedData.length);
        byte[] output = new byte[1024];
        int decompressedDataLength = deCompressor.inflate(output);
        deCompressor.end();

        return new String(output, 0, decompressedDataLength, "UTF-8");
    }

    public static void main(String[] args) throws UnsupportedEncodingException, DataFormatException {
        App m = new App();
        String strToBeCompressed  = "jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla";
        byte[] compressedData  = m.compress(strToBeCompressed);
        String deCompressedString = m.decompressToString(compressedData);

        System.out.println("Original     :: " + strToBeCompressed.length() + " " + strToBeCompressed);
        System.out.println("Compressed   :: " + compressedData.toString());
        System.out.println("decompressed :: " + deCompressedString.length() + " " + deCompressedString);
    }
}

这会产生输出:

Original     :: 85 jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla
Compressed   :: [B@3ced0338
decompressed :: 85 jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla

Process finished with exit code 0

更新

以下是生成压缩字符串的Base64编码表示的代码:

package dk.tbsalling.stackoverflow;

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.zip.*;

import org.apache.commons.codec.binary.Base64;

public class App
{
    private String compress(String stringToCompress) throws UnsupportedEncodingException
    {
        byte[] compressedData = new byte[1024];
        byte[] stringAsBytes = stringToCompress.getBytes("UTF-8");

        Deflater compressor = new Deflater();
        compressor.setInput(stringAsBytes);
        compressor.finish();
        int compressedDataLength = compressor.deflate(compressedData);

        byte[] bytes = Arrays.copyOf(compressedData, compressedDataLength);
        return Base64.encodeBase64String(bytes);
    }

    private String decompressToString(String base64String) throws UnsupportedEncodingException, DataFormatException
    {
        byte[] compressedData = Base64.decodeBase64(base64String);

        Inflater deCompressor = new Inflater();
        deCompressor.setInput(compressedData, 0, compressedData.length);
        byte[] output = new byte[1024];
        int decompressedDataLength = deCompressor.inflate(output);
        deCompressor.end();

        return new String(output, 0, decompressedDataLength, "UTF-8");
    }

    public static void main(String[] args) throws UnsupportedEncodingException, DataFormatException {
        App m = new App();
        String strToBeCompressed  = "jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla";
        String compressedData  = m.compress(strToBeCompressed);
        String deCompressedString = m.decompressToString(compressedData);

        System.out.println("Original     :: " + strToBeCompressed.length() + " " + strToBeCompressed);
        System.out.println("Compressed   :: " + compressedData.toString());
        System.out.println("decompressed :: " + deCompressedString.length() + " " + deCompressedString);
    }
}

这会产生输出:

Original     :: 85 jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla
Compressed   :: eJwNxMkNwDAIBMBW3BoW4lhI/LDY+pN5DAJ1NdwKei0KAe4uwdul9rDrwvRwQ3I0uETxB+dJX8L04zI+SVGLxEa1fNDSIlU=
decompressed :: 85 jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla

Process finished with exit code 0