LZW压缩似乎无法正常工作

时间:2014-05-01 14:51:03

标签: algorithm encoding compression decode lzw

我试图让这段代码正常工作,但是当我尝试对事物进行编码时,它似乎并没有按预期工作。 我有一个60字节的文本文件。我编码它,输出的文件是100字节。当我解码该文件时,它会像65bytes一样。它正确解码但文件大小比原始大。 我尝试编码jpg,文件大小确实下降,但是我无法打开文件。我试图解码jpg文件,它没有工作,似乎cmd冻结了。这是我试图使用的代码。

import java.util.*;
import java.io.*;

public class LZW {

// Dictionary 
public static short DSIZE = 256;
public static int DSIZEINT = 256;

/** Compress a string to a list of output symbols. */
public static List<Short> compress(String uncompressed) {
    // Build the dictionary.
    short dictSize = DSIZE;
    Map<String,Short> dictionary = new HashMap<String,Short>();
    for (short i = 0; i < DSIZE; i++)
        dictionary.put("" + (char)i, i);

    String w = "";
    List<Short> result = new ArrayList<Short>();
    for (char c : uncompressed.toCharArray()) {
        String wc = w + c;
        if (dictionary.containsKey(wc))
            w = wc;
        else {
            result.add(dictionary.get(w));
            // Add wc to the dictionary.
            dictionary.put(wc, dictSize++);
            w = "" + c;
        }
    }

    // Output the code for w.
    if (!w.equals(""))
        result.add(dictionary.get(w));
    return result;
}

 /** Compress a string to a list of output symbols, supporting larger filesizes. */
public static List<Integer> compressInt(String uncompressed) {
    // Build the dictionary.
    int dictSize = DSIZEINT;
    Map<String,Integer> dictionary = new HashMap<String,Integer>();
    for (int i = 0; i < DSIZEINT; i++)
        dictionary.put("" + (char)i, i);

    String w = "";
    List<Integer> result = new ArrayList<Integer>();
    for (char c : uncompressed.toCharArray()) {
        String wc = w + c;
        if (dictionary.containsKey(wc))
            w = wc;
        else {
            result.add(dictionary.get(w));
            // Add wc to the dictionary.
            dictionary.put(wc, dictSize++);
            w = "" + c;
        }
    }

    // Output the code for w.
    if (!w.equals(""))
        result.add(dictionary.get(w));
    return result;
}

/** Decompress a list of output ks to a string. */
public static String decompress(List<Short> compressed) {
    // Build the dictionary.
    short dictSize = DSIZE;
    Map<Short,String> dictionary = new HashMap<Short,String>();
    for (short i = 0; i < DSIZE; i++)
        dictionary.put(i, "" + (char)i);

    String w = "" + (char)(short)compressed.remove(0);
    String result = w;
    for (short k : compressed) {
        String entry;
        if (dictionary.containsKey(k))
            entry = dictionary.get(k);
        else if (k == dictSize)
            entry = w + w.charAt(0);
        else
            throw new IllegalArgumentException("Bad compressed k: " + k);

        result += entry;

        // Add w+entry[0] to the dictionary.
        dictionary.put(dictSize++, w + entry.charAt(0));

        w = entry;
    }
    return result;
}

/** Decompress a list of output ks to a string, supporting larger filesizes. */
public static String decompressInt(List<Integer> compressed) {
    // Build the dictionary.
    int dictSize = DSIZE;
    Map<Integer,String> dictionary = new HashMap<Integer,String>();
    for (int i = 0; i < DSIZE; i++)
        dictionary.put(i, "" + (char)i);

    String w = "" + (char)(int)compressed.remove(0);
    String result = w;
    for (int k : compressed) {
        String entry;
        if (dictionary.containsKey(k))
            entry = dictionary.get(k);
        else if (k == dictSize)
            entry = w + w.charAt(0);
        else
            throw new IllegalArgumentException("Bad compressed k: " + k);

        result += entry;

        // Add w+entry[0] to the dictionary.
        dictionary.put(dictSize++, w + entry.charAt(0));

        w = entry;
    }
    return result;
}

public static void main(String[] args) {

    String example = "";
    String s = "";
    int command = 0;

    //Check for correct argument
    if(args.length != 1) {
        System.out.println("Please enter 1 argument.\nArg1: Command ('encode', 'decode', 'encodeInt', 'decodeInt')\nAnd ensure that you are feeding in an input file and output file using '<' and '>'");
        System.exit(1);
    }
    if(args[0].equals("encode")){
        command = 1;
    }
    else if(args[0].equals("decode")){
        command = 2;
    }
    else if(args[0].equals("encodeInt")){
        command = 3;
    }
    else if(args[0].equals("decodeInt")){
        command = 4;
    }
    else {
        System.out.println("Please use either 'encode', 'decode', 'encodeInt', 'decodeInt' as the argument.");
        System.exit(1);
    }

    long start;
    long elapsedTime;

    //Compress
    if(command == 1){

        //Read input file
        s = BinaryStdIn.readString();

        //The actual compression
        start = System.nanoTime();
        List<Short> compressed = compress(s);
        elapsedTime = System.nanoTime() - start;

        //System.err.println(compressed);

        //first writes the number of ints to write
        BinaryStdOut.write(compressed.size());
        //writes compression (to file)
        Iterator<Short> compressIterator = compressed.iterator();
        while (compressIterator.hasNext()){
            BinaryStdOut.write(compressIterator.next());
        }

        System.err.println("LZW Encode time: " + elapsedTime + " ns");

    }
    //Decompress
    else if(command == 2){

        //Build Integer List with input
        List<Short> compressed = new ArrayList<Short>();
        int size = BinaryStdIn.readInt();
        while(size > 0){
            try{
                compressed.add(BinaryStdIn.readShort());
            }
            catch(RuntimeException e){
                System.err.print("*");
            }
            size--;
        }

        //System.err.println(compressed);

        //The actual decompression
        start = System.nanoTime();
        String decompressed = decompress(compressed);
        elapsedTime = System.nanoTime() - start;

        //Print out decompressed data (to file)
        System.out.println(decompressed);

        System.err.println("LZW Decode time: " + elapsedTime + " ns");

    }
    //Compress using Integer size
    else if(command == 3){

        //Read input file
        s = BinaryStdIn.readString();

        //The actual compression
        start = System.nanoTime();
        List<Integer> compressed = compressInt(s);
        elapsedTime = System.nanoTime() - start;

        //System.err.println(compressed);

        //first writes the number of ints to write
        BinaryStdOut.write(compressed.size());
        //writes compression (to file)
        Iterator<Integer> compressIterator = compressed.iterator();
        while (compressIterator.hasNext()){
            BinaryStdOut.write(compressIterator.next());
        }

        System.err.println("LZW Encode time: " + elapsedTime + " ns");

    }
    //Decompress using Integer size
    else if(command == 4){

        //Build Integer List with input
        List<Integer> compressed = new ArrayList<Integer>();
        int size = BinaryStdIn.readInt();
        while(size > 0){
            try{
                compressed.add(BinaryStdIn.readInt());
            }
            catch(RuntimeException e){
                System.err.print("*");
            }
            size--;
        }

        //System.err.println(compressed);

        //The actual decompression
        start = System.nanoTime();
        String decompressed = decompressInt(compressed);
        elapsedTime = System.nanoTime() - start;

        //Print out decompressed data (to file)
        System.out.println(decompressed);

        System.err.println("LZW Decode time: " + elapsedTime + " ns");

    }

    BinaryStdOut.close();


}
}

感谢任何帮助。感谢。

1 个答案:

答案 0 :(得分:1)

即使是最好的压缩算法也会偶尔创建一个比输入大的输出。事实上,它是一个很好的测试用例来找到这样的输入。 LZW通过查找重复序列进行压缩,因此无任何重复序列的输入必然会变大。

我曾经不得不像这样创建一个测试输入。我认为它类似于&#34; ABCD ... ACBDEG ...&#34;。

编辑:现在我更仔细地查看代码,我发现你正在为输出编写一个Shorts列表。这几乎肯定是错的;其中一个必要步骤是将每个输出标记打包到最小位数,并且您完全错过了该步骤。

根据您的描述判断代码也存在其他问题,但现在还不够。