确定C中简单替换密码的移位

时间:2016-09-12 20:09:58

标签: c cryptography frequency-analysis caesar-cipher

我试图实现一个替换密码,将字母表向前移动三个字母来加密文本。

如何通过将实际字母频率与平均字母频率进行比较来解密文本。

下面的代码会在加密文本中生成一个包含实际字母频率的数组。

#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

static char clef[][7] =
{
  ['A'] = "X",
  ['B'] = "Y",
  ['C'] = "Z",
  ['D'] = "A",
  ['E'] = "B",
  ['F'] = "C",
  ['G'] = "D",
  ['H'] = "E",
  ['I'] = "F",
  ['J'] = "G",
  ['K'] = "H",
  ['L'] = "I",
  ['M'] = "J",
  ['N'] = "K",
  ['O'] = "L",
  ['P'] = "M",
  ['Q'] = "N",
  ['R'] = "O",
  ['S'] = "P",
  ['T'] = "Q",
  ['U'] = "R",
  ['V'] = "S",
  ['W'] = "T",
  ['X'] = "U",
  ['Y'] = "V",
  ['Z'] = "W"

};

double frequencyEn[] = {
    .082, .015, .028, .043, .127, .022,
    .020, .061, .070, .002, .008, .040,
    .024, .067, .075, .019, .001, .060,
    .063, .091, .028, .010, .024, .002,
    .020, .001 };

enum { MAX_CLEF = sizeof(clef) / sizeof(clef[0]) };

static char *prompt(FILE *fp, const char *prompt, char *buffer, size_t buflen)
{
  printf("%s", prompt);
  fflush(0);
  return fgets(buffer, buflen, fp);
}

static void substitute(FILE *fp, const char *buffer, const char *pad1, const char *pad2)
{
  int c;
  const char *pad = pad1;
  int col = 0;
  for (int i = 0; (c = buffer[i]) != '\0'; i++)
  {
    if (col == 0)
    {
      fputs(pad, fp);
      col += strlen(pad);
      pad = pad2;
    }

    col++;
    c = toupper(c);
    if (c < MAX_CLEF && clef[c][0] != '\0')
    {
      fputs(clef[c], fp);
      col += strlen(clef[c]);
    }
    else
    {
      putc(c, fp);
      col++;
    }
    if (col > 72)
    {
      putc('\n', fp);
      col = 0;
    }
  }
}


int main(void)
{
  char * buffer = 0;
  char * cryptText = 0;
  long length;
  FILE * plainTextFile = fopen ("plaintext.txt", "rb");
  FILE * cipherTextFile = fopen("ciphertext.txt", "w+");
  char string[100];
  int c = 0, count[26] = {0};
  int accum = 0;

  if (plainTextFile)
  {
    fseek (plainTextFile, 0, SEEK_END);
    length = ftell (plainTextFile);
    fseek (plainTextFile, 0, SEEK_SET);
    buffer = malloc (length);
    if (buffer)
    {
      fread (buffer, 1, length, plainTextFile);
    }
    fclose (plainTextFile);
  }

  if (buffer)
  {
    printf("%s", buffer);
  }
  else {
    printf("failure");
  }

  substitute(cipherTextFile, buffer, "", "     ");

  if (cipherTextFile)
  {
    fseek (cipherTextFile, 0, SEEK_END);
    length = ftell (cipherTextFile);
    fseek (cipherTextFile, 0, SEEK_SET);
    cryptText = malloc (length);
    if (cryptText)
    {
      fread (cryptText, 1, length, cipherTextFile);
    }
    fclose (cipherTextFile);
  }

  if (cryptText)
  {
    printf("%s", cryptText);
  }
  else {
    printf("failure");
  }

  while ( cryptText[c] != '\0' )
   {

      if ( cryptText[c] >= 'a' && cryptText[c] <= 'z' ){
         count[cryptText[c]-'a']++;
         accum++;
      }

      else if (cryptText[c] >= 'A' && cryptText[c] <= 'Z'){
          count[cryptText[c]-'A']++;
          accum++;
      }
      c++;
   }

   for ( c = 0 ; c < 26 ; c++ )
   {
      if( count[c] != 0 )
          printf( "%c %f\n", c+'a', ((double)count[c])/accum);


   }


}

1 个答案:

答案 0 :(得分:1)

tofrocomment中建议的基本想法是合理的。

Chi-Squared Test计算期望值 E i 与观察值 O <之间差异的平方和sub> i ,除以预期值。维基百科页面甚至提到了一个应用程序:

  

在密码分析中,卡方检验用于比较明文和(可能)解密密文的分布。测试的最低值意味着解密成功的概率很高。

将此问题应用于手头的问题,您已经提供了普通英文文本中不同字母的预期频率表。我们需要一个程序将Caesar Cipher应用于给定的明文(或密文):

ec97.c

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>

// #include "stderr.h"
void err_setarg0(const char *argv0);
void err_usage(const char *usestr);
void err_error(const char *errmsg);

int main(int argc, char **argv)
{
    char  *buffer = 0;
    size_t buflen = 0;

    err_setarg0(argv[0]);
    if (argc != 2)
        err_usage("offset");
    int offset = atoi(argv[1]) % 26;
    if (offset < 0)
        err_error("Offset should be a positive number 1..25\n");

    while (getline(&buffer, &buflen, stdin) != -1)
    {
        char *ptr = buffer;
        unsigned char u;
        while ((u = (unsigned char)*ptr++) != '\0')
        {
            if (isupper(u))
                u = (u - 'A' + offset) % 26 + 'A';
            else if (islower(u))
                u = (u - 'a' + offset) % 26 + 'a';
            putchar(u);
        }
    }

    free(buffer);
    return 0;
}

// Minimal stderr.c code
static const char *arg0 = "unknown";
void err_setarg0(const char *argv0)
{
    arg0 = argv0;
}
void err_usage(const char *usestr)
{
    fprintf(stderr, "Usage: %s %s\n", arg0, usestr);
    exit(EXIT_FAILURE);
}
void err_error(const char *errmsg)
{
    fprintf(stderr, "%s: %s\n", arg0, errmsg);
    exit(EXIT_FAILURE);
}

使用示例(程序ec97):

$ ec97 3 <<< 'The quick brown fox jumped over the lazy dog!'
Wkh txlfn eurzq ira mxpshg ryhu wkh odcb grj!
$ ec97 23 <<< 'Wkh txlfn eurzq ira mxpshg ryhu wkh odcb grj!'
The quick brown fox jumped over the lazy dog!
$

我们需要一个程序:

  1. 计算输入中字母的频率,F l ,忽略大小写,加上输入中的字母总数N.
  2. 对于每个可能的键,使用F i 值计算Χ²值作为 O i 和字母频率,L ×N为 E i ,并对下标进行了一些小心的下沉。
  3. 找出最小的Χ²值;它几乎可以肯定是正确的加密密钥。
  4. 或者,在代码中:

    dc97.c

    /* Determine shift used for text encrypted using Caesar Cipher */
    
    #include <ctype.h>
    #include <stdio.h>
    #include <stdlib.h>
    
    /* Letter frequencies a-z in English */
    static const float freq_en[] =
    {
        .082, .015, .028, .043, .127, .022,
        .020, .061, .070, .002, .008, .040,
        .024, .067, .075, .019, .001, .060,
        .063, .091, .028, .010, .024, .002,
        .020, .001
    };
    
    int main(void)
    {
        char *buffer = 0;
        size_t buflen = 0;
        size_t freq[26] = { 0 };
        size_t count = 0;
    
        while (getline(&buffer, &buflen, stdin) != -1)
        {
            char *ptr = buffer;
            unsigned char u;
            while ((u = (unsigned char)*ptr++) != '\0')
            {
                if (isalpha(u))
                {
                    count++;
                    freq[tolower(u) - 'a']++;
                }
            }
        }
        free(buffer);
    
        if (count == 0)
        {
            fprintf(stderr, "No data read!\n");
            return 1;
        }
    
        double chisq[26];
        for (int shift = 0; shift < 26; shift++)
        {
            chisq[shift] = 0.0;
            for (int letter = 0; letter < 26; letter++)
            {
                int index = (shift + letter) % 26;
                double delta = freq[index] - count * freq_en[letter];
                chisq[shift] += (delta * delta) / (count * freq_en[letter]);
            }
            printf("'%c' = %13.6f\n", shift + 'A', chisq[shift]);
        }
    
        int min_i = 0;
        double val_i = chisq[0];
        for (int i = 1; i < 26; i++)
        {
            if (chisq[i] < val_i)
            {
                val_i = chisq[i];
                min_i = i;
            }
        }
    
        printf("Best match is (%d) '%c' = %10.6f\n", min_i, min_i + 'A', val_i);
        return 0;
    }
    

    测试

    示例运行:

    $ ec97 3 <<< 'The quick brown fox jumped over the lazy dog!' | dc97
    'A' =    143.784398
    'B' =    564.772479
    'C' =    125.131609
    'D' =     87.069134
    'E' =    178.249272
    'F' =     90.994048
    'G' =    194.326935
    'H' =    301.117365
    'I' =    710.786241
    'J' =    147.377473
    'K' =    304.179348
    'L' =    243.699823
    'M' =    137.639230
    'N' =    183.885553
    'O' =    135.720804
    'P' =    106.261239
    'Q' =    196.046792
    'R' =    506.812184
    'S' =    517.893291
    'T' =    106.267925
    'U' =    375.525078
    'V' =    202.806561
    'W' =    116.660543
    'X' =    304.590809
    'Y' =    364.746822
    'Z' =    139.113568
    Best match is (3) 'D' =  87.069134
    $
    

    我还在程序源代码和目录中的makefile上运行它 几个数据文件。第一个是通常被称为'The Great Panjandrum'的废话:

    So she went into the garden
    to cut a cabbage-leaf
    to make an apple-pie
    and at the same time
    a great she-bear coming down the street
    pops its head into the shop
    What no soap
    So he died
    and she very imprudently married the Barber
    and there were present
    the Picninnies
    and the Joblillies
    and the Garyulies
    and the great Panjandrum himself
    with the little round button at top
    and they all fell to playing the game of catch-as-catch-can
    till the gunpowder ran out at the heels of their boots
    

    第二个是为这个问题精心设计的:

    Bond was on a roll, playing jazz on his sax.
    In all of his doings, nothing was as bad as what was going on now,
    but nary a jocular hint did his writing contain that
    all was not going as his boss though it would.
    Quit? Zounds!  No way to quit now.
    

    没有那个,可能会被认为可以抛弃它,但似乎没有问题。

    $ x=$(random 1 25)
    $ ec97 $x < great.panjandrum
    Uq ujg ygpv kpvq vjg ictfgp
    vq ewv c ecddcig-ngch
    vq ocmg cp crrng-rkg
    cpf cv vjg ucog vkog
    c itgcv ujg-dgct eqokpi fqyp vjg uvtggv
    rqru kvu jgcf kpvq vjg ujqr
    Yjcv pq uqcr
    Uq jg fkgf
    cpf ujg xgta kortwfgpvna octtkgf vjg Dctdgt
    cpf vjgtg ygtg rtgugpv
    vjg Rkepkppkgu
    cpf vjg Lqdnknnkgu
    cpf vjg Ictawnkgu
    cpf vjg itgcv Rcplcpftwo jkougnh
    ykvj vjg nkvvng tqwpf dwvvqp cv vqr
    cpf vjga cnn hgnn vq rncakpi vjg icog qh ecvej-cu-ecvej-ecp
    vknn vjg iwprqyfgt tcp qwv cv vjg jggnu qh vjgkt dqqvu
    $ ec97 $x | dc97
    'A' =   3403.710518
    'B' =   1821.123417
    'C' =     29.719952
    'D' =   5238.969620
    'E' =   2077.413735
    'F' =   8274.966485
    'G' =   2446.331525
    'H' =   8950.309208
    'I' =   1362.257963
    'J' =   4419.368172
    'K' =   3161.502276
    'L' =   3113.030682
    'M' =   7778.647756
    'N' =   1112.302912
    'O' =   1497.782346
    'P' =   1490.896824
    'Q' =  10395.032795
    'R' =   1985.696886
    'S' =   2382.319358
    'T' =   4874.708427
    'U' =   3280.570608
    'V' =   1467.488275
    'W' =   7318.221432
    'X' =   5634.124795
    'Y' =   3108.392584
    'Z' =   2849.154134
    Best match is (2) 'C' =  29.719952
    $ echo "Key = $x"
    Key = 2
    $
    
    $ x=$(random 1 25)
    $ ec97 $x < bond.jazz
    Ylka txp lk x olii, mixvfkd gxww lk efp pxu.
    Fk xii lc efp alfkdp, klqefkd txp xp yxa xp texq txp dlfkd lk klt,
    yrq kxov x glzrixo efkq afa efp tofqfkd zlkqxfk qexq
    xii txp klq dlfkd xp efp ylpp qelrde fq tlria.
    Nrfq? Wlrkap!  Kl txv ql nrfq klt.
    $ ec97 $x < bond.jazz | dc97
    'A' =   2630.974532
    'B' =   2107.704681
    'C' =   1473.828862
    'D' =    865.368278
    'E' =    715.940980
    'F' =   1010.885486
    'G' =   2881.481606
    'H' =   3297.014998
    'I' =   1302.909485
    'J' =    871.665974
    'K' =    917.232399
    'L' =   2716.342024
    'M' =   2525.973294
    'N' =   2077.065275
    'O' =   3096.667665
    'P' =   2211.782909
    'Q' =   1793.302623
    'R' =   1427.340376
    'S' =   1537.934006
    'T' =    702.667000
    'U' =   3489.590647
    'V' =   3111.999371
    'W' =   1445.825861
    'X' =    142.412135
    'Y' =   2671.998367
    'Z' =   1977.131986
    Best match is (23) 'X' = 142.412135
    $ echo "Key = $x"
    Key = 23
    $
    

    它看起来效果很好。