编码解码二进制数据

时间:2014-06-19 09:33:22

标签: binary ascii decode encode

我知道转换为十进制时的二进制值“1000001”是65,根据ASCII表是字母A.

现在,我有一个二进制文件(xxx.bin),假设有上面的位序列。我怎么知道tat二进制值是表示十进制值65还是字符A ??

我想当我收到二进制文件时,我应该也知道这个二进制文件遵循什么字符编码或字符集doc?或者我能以某种方式找出它是如何被编码的吗?

有人可以清楚如何正确解码二进制文件(xxx.bin)数据并读取?只需将每个字节转换为十进制值或遵循一些字符编码逻辑???我对此感到困惑!!

2 个答案:

答案 0 :(得分:0)

这是我将字符串编码为'dots'和'spaces'的程序。如果提供代码,它还可以将其解码回原始字符串。亲自尝试一下。其中有一个encode_find()函数,它将一个字符转换为二进制等价物并将其存储在变量“en”中。反向过程可以实现decode_find()函数。

import java.io.*;
class ed2
{

BufferedReader obj = new BufferedReader(new InputStreamReader(System.in));

void encode()throws InterruptedException,IOException
{
    String rs,ren;

    encoder_symbol();
    rs = encode_input();
    ren = encode_find(rs);
    encode_display(ren);
}

void decode()throws InterruptedException,IOException
{
    String rs,rde;

    decoder_symbol();
    rs = decode_input();
    rde = decode_find(rs);
    decode_display(rde);
}

void encoder_symbol()throws InterruptedException //just for fun
{
    System.out.println("********  ***         ***  *********  ************  ******      ********  *****");
    Thread.sleep(100);
    System.out.println("********  ****        ***  *********  ************  ********    ********  *** **");
    Thread.sleep(100);
    System.out.println("***       *****       ***  ***        ***      ***  ***   ***   ***       ***  **");
    Thread.sleep(100);
    System.out.println("***       *** **      ***  ***        ***      ***  ***    ***  ***       *** **");
    Thread.sleep(100);
    System.out.println("******    ***  **     ***  ***        ***      ***  ***    ***  ******    *****");
    Thread.sleep(100);
    System.out.println("******    ***   **    ***  ***        ***      ***  ***    ***  ******    *****");
    Thread.sleep(100);
    System.out.println("***       ***     **  ***  ***        ***      ***  ***    ***  ***       *** **");
    Thread.sleep(100);
    System.out.println("***       ***      ** ***  ***        ***      ***  ***   ***   ***       ***  **");
    Thread.sleep(100);
    System.out.println("*******   ***       *****  *********  ************  ********    ********  ***   **");
    Thread.sleep(100);
    System.out.println("*******   ***        ****  *********  ************  ******      ********  ***    **");
    Thread.sleep(2700);

    System.out.println();
    System.out.println();
}

void decoder_symbol()throws InterruptedException // just for fun
{
    System.out.println("******      ********  *********  ************  ******      ********  *****");
    Thread.sleep(100);
    System.out.println("********    ********  *********  ************  ********    ********  *** **");
    Thread.sleep(100);
    System.out.println("***   ***   ***       ***        ***      ***  ***   ***   ***       ***  **");
    Thread.sleep(100);
    System.out.println("***    ***  ***       ***        ***      ***  ***    ***  ***       *** **");
    Thread.sleep(100);
    System.out.println("***    ***  ******    ***        ***      ***  ***    ***  ******    *****");
    Thread.sleep(100);
    System.out.println("***    ***  ******    ***        ***      ***  ***    ***  ******    *****");
    Thread.sleep(100);
    System.out.println("***    ***  ***       ***        ***      ***  ***    ***  ***       *** **");
    Thread.sleep(100);
    System.out.println("***   ***   ***       ***        ***      ***  ***   ***   ***       ***  **");
    Thread.sleep(100);
    System.out.println("********    ********  *********  ************  ********    ********  ***   **");
    Thread.sleep(100);
    System.out.println("******      ********  *********  ************  ******      ********  ***    **");
    Thread.sleep(1000);

    System.out.println();
    System.out.println();
}

String encode_input()throws IOException
{
    String s;
    System.out.println("ENTER THE STRING TO BE ENCODED");
    s = obj.readLine();
    return(s);
}

String decode_input()throws IOException
{
    String s;
    System.out.println("ENTER THE CODE TO BE DECODED");
    s = obj.readLine();
    return(s);
}

String encode_find(String s)//converting the string into its binary equivalent
{
     int ac,i,j,l,chklen;
     String bc,en="";
     char ic;
     l = s.length();    
     for(i=0;i<l;i++)
     {   
         ic = s.charAt(i); //takes out every character
         bc = ""; 
         ac = (int)ic;  //ASCII value of this character
         while(ac!=0)
         {
             bc = Integer.toString((ac%2)) + bc; //converting the ASCII value into binary equivalent
             ac = ac/2;
         }
         chklen = bc.length();//length of the binary equivalent
         if(chklen<7)
         {
            for(j=1;j<=(7-chklen);j++) //increasing the length of binary equivalent so that it becomes equal to 7
            {  
                bc = "0" + bc;
            }
         }
         en = en+bc; //concatenating all the binary equivalent into one string
     }
     return (en);
}

String decode_find(String s)// converts binary(i.e. in the form of dots and space) to decimal
{
    int f;//for the index of every character of code
    long l,i,j,ac;
    char c;
    String de="";

    l = s.length();
    f = 0;//index of first caharcter
    for(i=0;i<(l/7);i++)//since the length of every binary equivalent of a character is 7 therefore there will be (length/7) characters in a code of length l
    {
        ac = 0;//intializes the decimal(ASCII) equivalent to zero
        for(j=6;j>=0;j--)//loop will work 7 times for every binary equivalent of a character
        {
            c = s.charAt(f);//takes out every dot or space
            if(c=='.')//it means that c corresponds to 'one'
            {
                ac = ac + ((int)Math.pow(2,j));//converting binary into decimal(ASCII) equivalent by adding all the powers of 2 which correspond to one('.') 
            }
            f++;//increasing the index for next character of binary equivalent
        }
        de = de + ((char)ac);//converts the ASCII equivalent into character and then concatenates it with the intitial string
    }
    return(de);
}

void encode_display(String en)//displays the code
{
    int i,l;
    char ic;
    System.out.println("YOUR ENCODED MESSAGE IS :");
    l=en.length();
    for(i=0;i<l;i++)
    {
        ic=en.charAt(i);
        if(ic=='1')//for every 'one' it will print '.'(dot)
        {
             System.out.print(".");
        }
        else if(ic=='0')//for every 'zero' it will print ' '(space)
        {
             System.out.print(" ");
        }
    }
}

void decode_display(String de)
{
    System.out.println(de);
}

public static void main(String args[])throws IOException,InterruptedException
{
    BufferedReader obj = new BufferedReader(new InputStreamReader(System.in));

    char ch;

    ed2 ed = new ed2();

    System.out.println("PRESS 'E' TO ENCODE A MESSAGE OR PRESS 'D' TO DECODE A MESSAGE");
    ch = (char)obj.read();

    if((ch=='e')||(ch=='E'))
    {
        ed.encode();
    }
    else if((ch=='d')||(ch=='D'))
    {
        ed.decode();
    }
}

}

答案 1 :(得分:0)

如果您不知道文件的格式,则会丢失数据。

问题是如何防止错过重要的“元数据”。典型的方法是通过协议和规范。

在某些情况下,您可以确定文件具有在某些编码中无效的字节值或序列,例如0x80无效ASCII和0xE2 0x20无效UTF-8。有这种类型的猜测库。但是,永远不能排除CP437;因此,每个文件都可以被视为文本文件,就像每个文件可以被认为是0-255或-128到127的值序列一样,并且可以考虑具有偶数字节数的每个文件....

有协议分析器根据各种嵌套协议解释字节,例如Wireshark,还有Hex编辑器显示字节和尝试转换为字符。一些十六进制编辑器非常复杂,具有按字节查找,按文本查找,允许您描述协议并将其应用于文件的某些部分等功能。 (旧的磁盘扇区编辑器就是一个例子。)

最重要的是,这是逆向工程,希望你不需要这样做。

BTW-65是许多编码中的字母A;不只是ASCII。