我试图实现一个替换密码,将字母表向前移动三个字母来加密文本。
如何通过将实际字母频率与平均字母频率进行比较来解密文本。
下面的代码会在加密文本中生成一个包含实际字母频率的数组。
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
static char clef[][7] =
{
['A'] = "X",
['B'] = "Y",
['C'] = "Z",
['D'] = "A",
['E'] = "B",
['F'] = "C",
['G'] = "D",
['H'] = "E",
['I'] = "F",
['J'] = "G",
['K'] = "H",
['L'] = "I",
['M'] = "J",
['N'] = "K",
['O'] = "L",
['P'] = "M",
['Q'] = "N",
['R'] = "O",
['S'] = "P",
['T'] = "Q",
['U'] = "R",
['V'] = "S",
['W'] = "T",
['X'] = "U",
['Y'] = "V",
['Z'] = "W"
};
double frequencyEn[] = {
.082, .015, .028, .043, .127, .022,
.020, .061, .070, .002, .008, .040,
.024, .067, .075, .019, .001, .060,
.063, .091, .028, .010, .024, .002,
.020, .001 };
enum { MAX_CLEF = sizeof(clef) / sizeof(clef[0]) };
static char *prompt(FILE *fp, const char *prompt, char *buffer, size_t buflen)
{
printf("%s", prompt);
fflush(0);
return fgets(buffer, buflen, fp);
}
static void substitute(FILE *fp, const char *buffer, const char *pad1, const char *pad2)
{
int c;
const char *pad = pad1;
int col = 0;
for (int i = 0; (c = buffer[i]) != '\0'; i++)
{
if (col == 0)
{
fputs(pad, fp);
col += strlen(pad);
pad = pad2;
}
col++;
c = toupper(c);
if (c < MAX_CLEF && clef[c][0] != '\0')
{
fputs(clef[c], fp);
col += strlen(clef[c]);
}
else
{
putc(c, fp);
col++;
}
if (col > 72)
{
putc('\n', fp);
col = 0;
}
}
}
int main(void)
{
char * buffer = 0;
char * cryptText = 0;
long length;
FILE * plainTextFile = fopen ("plaintext.txt", "rb");
FILE * cipherTextFile = fopen("ciphertext.txt", "w+");
char string[100];
int c = 0, count[26] = {0};
int accum = 0;
if (plainTextFile)
{
fseek (plainTextFile, 0, SEEK_END);
length = ftell (plainTextFile);
fseek (plainTextFile, 0, SEEK_SET);
buffer = malloc (length);
if (buffer)
{
fread (buffer, 1, length, plainTextFile);
}
fclose (plainTextFile);
}
if (buffer)
{
printf("%s", buffer);
}
else {
printf("failure");
}
substitute(cipherTextFile, buffer, "", " ");
if (cipherTextFile)
{
fseek (cipherTextFile, 0, SEEK_END);
length = ftell (cipherTextFile);
fseek (cipherTextFile, 0, SEEK_SET);
cryptText = malloc (length);
if (cryptText)
{
fread (cryptText, 1, length, cipherTextFile);
}
fclose (cipherTextFile);
}
if (cryptText)
{
printf("%s", cryptText);
}
else {
printf("failure");
}
while ( cryptText[c] != '\0' )
{
if ( cryptText[c] >= 'a' && cryptText[c] <= 'z' ){
count[cryptText[c]-'a']++;
accum++;
}
else if (cryptText[c] >= 'A' && cryptText[c] <= 'Z'){
count[cryptText[c]-'A']++;
accum++;
}
c++;
}
for ( c = 0 ; c < 26 ; c++ )
{
if( count[c] != 0 )
printf( "%c %f\n", c+'a', ((double)count[c])/accum);
}
}
答案 0 :(得分:1)
Chi-Squared Test计算期望值 E i 与观察值 O <之间差异的平方和sub> i ,除以预期值。维基百科页面甚至提到了一个应用程序:
在密码分析中,卡方检验用于比较明文和(可能)解密密文的分布。测试的最低值意味着解密成功的概率很高。
将此问题应用于手头的问题,您已经提供了普通英文文本中不同字母的预期频率表。我们需要一个程序将Caesar Cipher应用于给定的明文(或密文):
ec97.c
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
// #include "stderr.h"
void err_setarg0(const char *argv0);
void err_usage(const char *usestr);
void err_error(const char *errmsg);
int main(int argc, char **argv)
{
char *buffer = 0;
size_t buflen = 0;
err_setarg0(argv[0]);
if (argc != 2)
err_usage("offset");
int offset = atoi(argv[1]) % 26;
if (offset < 0)
err_error("Offset should be a positive number 1..25\n");
while (getline(&buffer, &buflen, stdin) != -1)
{
char *ptr = buffer;
unsigned char u;
while ((u = (unsigned char)*ptr++) != '\0')
{
if (isupper(u))
u = (u - 'A' + offset) % 26 + 'A';
else if (islower(u))
u = (u - 'a' + offset) % 26 + 'a';
putchar(u);
}
}
free(buffer);
return 0;
}
// Minimal stderr.c code
static const char *arg0 = "unknown";
void err_setarg0(const char *argv0)
{
arg0 = argv0;
}
void err_usage(const char *usestr)
{
fprintf(stderr, "Usage: %s %s\n", arg0, usestr);
exit(EXIT_FAILURE);
}
void err_error(const char *errmsg)
{
fprintf(stderr, "%s: %s\n", arg0, errmsg);
exit(EXIT_FAILURE);
}
使用示例(程序ec97
):
$ ec97 3 <<< 'The quick brown fox jumped over the lazy dog!'
Wkh txlfn eurzq ira mxpshg ryhu wkh odcb grj!
$ ec97 23 <<< 'Wkh txlfn eurzq ira mxpshg ryhu wkh odcb grj!'
The quick brown fox jumped over the lazy dog!
$
我们需要一个程序:
或者,在代码中:
dc97.c
/* Determine shift used for text encrypted using Caesar Cipher */
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
/* Letter frequencies a-z in English */
static const float freq_en[] =
{
.082, .015, .028, .043, .127, .022,
.020, .061, .070, .002, .008, .040,
.024, .067, .075, .019, .001, .060,
.063, .091, .028, .010, .024, .002,
.020, .001
};
int main(void)
{
char *buffer = 0;
size_t buflen = 0;
size_t freq[26] = { 0 };
size_t count = 0;
while (getline(&buffer, &buflen, stdin) != -1)
{
char *ptr = buffer;
unsigned char u;
while ((u = (unsigned char)*ptr++) != '\0')
{
if (isalpha(u))
{
count++;
freq[tolower(u) - 'a']++;
}
}
}
free(buffer);
if (count == 0)
{
fprintf(stderr, "No data read!\n");
return 1;
}
double chisq[26];
for (int shift = 0; shift < 26; shift++)
{
chisq[shift] = 0.0;
for (int letter = 0; letter < 26; letter++)
{
int index = (shift + letter) % 26;
double delta = freq[index] - count * freq_en[letter];
chisq[shift] += (delta * delta) / (count * freq_en[letter]);
}
printf("'%c' = %13.6f\n", shift + 'A', chisq[shift]);
}
int min_i = 0;
double val_i = chisq[0];
for (int i = 1; i < 26; i++)
{
if (chisq[i] < val_i)
{
val_i = chisq[i];
min_i = i;
}
}
printf("Best match is (%d) '%c' = %10.6f\n", min_i, min_i + 'A', val_i);
return 0;
}
示例运行:
$ ec97 3 <<< 'The quick brown fox jumped over the lazy dog!' | dc97
'A' = 143.784398
'B' = 564.772479
'C' = 125.131609
'D' = 87.069134
'E' = 178.249272
'F' = 90.994048
'G' = 194.326935
'H' = 301.117365
'I' = 710.786241
'J' = 147.377473
'K' = 304.179348
'L' = 243.699823
'M' = 137.639230
'N' = 183.885553
'O' = 135.720804
'P' = 106.261239
'Q' = 196.046792
'R' = 506.812184
'S' = 517.893291
'T' = 106.267925
'U' = 375.525078
'V' = 202.806561
'W' = 116.660543
'X' = 304.590809
'Y' = 364.746822
'Z' = 139.113568
Best match is (3) 'D' = 87.069134
$
我还在程序源代码和目录中的makefile上运行它 几个数据文件。第一个是通常被称为'The Great Panjandrum'的废话:
So she went into the garden
to cut a cabbage-leaf
to make an apple-pie
and at the same time
a great she-bear coming down the street
pops its head into the shop
What no soap
So he died
and she very imprudently married the Barber
and there were present
the Picninnies
and the Joblillies
and the Garyulies
and the great Panjandrum himself
with the little round button at top
and they all fell to playing the game of catch-as-catch-can
till the gunpowder ran out at the heels of their boots
第二个是为这个问题精心设计的:
Bond was on a roll, playing jazz on his sax.
In all of his doings, nothing was as bad as what was going on now,
but nary a jocular hint did his writing contain that
all was not going as his boss though it would.
Quit? Zounds! No way to quit now.
没有那个,可能会被认为可以抛弃它,但似乎没有问题。
$ x=$(random 1 25)
$ ec97 $x < great.panjandrum
Uq ujg ygpv kpvq vjg ictfgp
vq ewv c ecddcig-ngch
vq ocmg cp crrng-rkg
cpf cv vjg ucog vkog
c itgcv ujg-dgct eqokpi fqyp vjg uvtggv
rqru kvu jgcf kpvq vjg ujqr
Yjcv pq uqcr
Uq jg fkgf
cpf ujg xgta kortwfgpvna octtkgf vjg Dctdgt
cpf vjgtg ygtg rtgugpv
vjg Rkepkppkgu
cpf vjg Lqdnknnkgu
cpf vjg Ictawnkgu
cpf vjg itgcv Rcplcpftwo jkougnh
ykvj vjg nkvvng tqwpf dwvvqp cv vqr
cpf vjga cnn hgnn vq rncakpi vjg icog qh ecvej-cu-ecvej-ecp
vknn vjg iwprqyfgt tcp qwv cv vjg jggnu qh vjgkt dqqvu
$ ec97 $x | dc97
'A' = 3403.710518
'B' = 1821.123417
'C' = 29.719952
'D' = 5238.969620
'E' = 2077.413735
'F' = 8274.966485
'G' = 2446.331525
'H' = 8950.309208
'I' = 1362.257963
'J' = 4419.368172
'K' = 3161.502276
'L' = 3113.030682
'M' = 7778.647756
'N' = 1112.302912
'O' = 1497.782346
'P' = 1490.896824
'Q' = 10395.032795
'R' = 1985.696886
'S' = 2382.319358
'T' = 4874.708427
'U' = 3280.570608
'V' = 1467.488275
'W' = 7318.221432
'X' = 5634.124795
'Y' = 3108.392584
'Z' = 2849.154134
Best match is (2) 'C' = 29.719952
$ echo "Key = $x"
Key = 2
$
$ x=$(random 1 25)
$ ec97 $x < bond.jazz
Ylka txp lk x olii, mixvfkd gxww lk efp pxu.
Fk xii lc efp alfkdp, klqefkd txp xp yxa xp texq txp dlfkd lk klt,
yrq kxov x glzrixo efkq afa efp tofqfkd zlkqxfk qexq
xii txp klq dlfkd xp efp ylpp qelrde fq tlria.
Nrfq? Wlrkap! Kl txv ql nrfq klt.
$ ec97 $x < bond.jazz | dc97
'A' = 2630.974532
'B' = 2107.704681
'C' = 1473.828862
'D' = 865.368278
'E' = 715.940980
'F' = 1010.885486
'G' = 2881.481606
'H' = 3297.014998
'I' = 1302.909485
'J' = 871.665974
'K' = 917.232399
'L' = 2716.342024
'M' = 2525.973294
'N' = 2077.065275
'O' = 3096.667665
'P' = 2211.782909
'Q' = 1793.302623
'R' = 1427.340376
'S' = 1537.934006
'T' = 702.667000
'U' = 3489.590647
'V' = 3111.999371
'W' = 1445.825861
'X' = 142.412135
'Y' = 2671.998367
'Z' = 1977.131986
Best match is (23) 'X' = 142.412135
$ echo "Key = $x"
Key = 23
$
它看起来效果很好。