我有一个二进制文件,其中包含代表ASCII文件的整数。例如,包含hello
的文件的值为(使用xxd)
0000000: 4800 0000 6500 0000 6c00 0000 6c00 0000 H...e...l...l...
0000010: 6f00 0000 0000 0000 o.......
如何读取文件并将其转换为ascii字符串Hello
?
编辑一个示例,准确显示了我的工作和所得到的。只需编译并运行它。在纯文本消息的pwd中放入一个名为“ test.txt”的文件。结果是2个文件test.enc和test.dec。我知道它是完全不安全的,但无论如何我还是要创建它。
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#define ACCURACY 5
#define SINGLE_MAX 100000
#define EXPONENT_MAX 10000
#define BUF_SIZE 1024
#define DEBUG 0
int modpow(long long a, long long b, int c) {
int res = 1;
while(b > 0) {
/* Need long multiplication else this will overflow... */
if(b & 1) {
res = (res * a) % c;
}
b = b >> 1;
a = (a * a) % c; /* Same deal here */
}
return res;
}
int jacobi(int a, int n) {
int twos, temp;
int mult = 1;
while(a > 1 && a != n) {
a = a % n;
if(a <= 1 || a == n) break;
twos = 0;
while(a % 2 == 0 && ++twos) a /= 2; /* Factor out multiples of 2 */
if(twos > 0 && twos % 2 == 1) mult *= (n % 8 == 1 || n % 8 == 7) * 2 - 1;
if(a <= 1 || a == n) break;
if(n % 4 != 1 && a % 4 != 1) mult *= -1; /* Coefficient for flipping */
temp = a;
a = n;
n = temp;
}
if(a == 0) return 0;
else if(a == 1) return mult;
else return 0; /* a == n => gcd(a, n) != 1 */
}
int solovayPrime(int a, int n) {
int x = jacobi(a, n);
if(x == -1) x = n - 1;
return x != 0 && modpow(a, (n - 1)/2, n) == x;
}
int probablePrime(int n, int k) {
if(n == 2) return 1;
else if(n % 2 == 0 || n == 1) return 0;
while(k-- > 0) {
if(!solovayPrime(rand() % (n - 2) + 2, n)) return 0;
}
return 1;
}
int randPrime(int n) {
int prime = rand() % n;
n += n % 2; /* n needs to be even so modulo wrapping preserves oddness */
prime += 1 - prime % 2;
while(1) {
if(probablePrime(prime, ACCURACY)) return prime;
prime = (prime + 2) % n;
}
}
int gcd(int a, int b) {
int temp;
while(b != 0) {
temp = b;
b = a % b;
a = temp;
}
return a;
}
int randExponent(int phi, int n) {
int e = rand() % n;
while(1) {
if(gcd(e, phi) == 1) return e;
e = (e + 1) % n;
if(e <= 2) e = 3;
}
}
int inverse(int n, int modulus) {
int a = n, b = modulus;
int x = 0, y = 1, x0 = 1, y0 = 0, q, temp;
while(b != 0) {
q = a / b;
temp = a % b;
a = b;
b = temp;
temp = x; x = x0 - q * x; x0 = temp;
temp = y; y = y0 - q * y; y0 = temp;
}
if(x0 < 0) x0 += modulus;
return x0;
}
int readFile(FILE* fd, char** buffer, int bytes) {
int len = 0, cap = BUF_SIZE, r;
char buf[BUF_SIZE];
*buffer = malloc(BUF_SIZE * sizeof(char));
while((r = fread(buf, sizeof(char), BUF_SIZE, fd)) > 0) {
if(len + r >= cap) {
cap *= 2;
*buffer = realloc(*buffer, cap);
}
memcpy(&(*buffer)[len], buf, r);
len += r;
}
/* Pad the last block with zeros to signal end of cryptogram. An additional block is added if there is no room */
if(len + bytes - len % bytes > cap) *buffer = realloc(*buffer, len + bytes - len % bytes);
do {
(*buffer)[len] = '\0';
len++;
}
while(len % bytes != 0);
return len;
}
int encode(int m, int e, int n) {
return modpow(m, e, n);
}
int decode(int c, int d, int n) {
return modpow(c, d, n);
}
int* encodeMessage(int len, int bytes, char* message, int exponent, int modulus) {
int *encoded = malloc((len/bytes) * sizeof(int));
int x, i, j;
for(i = 0; i < len; i += bytes) {
x = 0;
for(j = 0; j < bytes; j++) x += message[i + j] * (1 << (7 * j));
encoded[i/bytes] = encode(x, exponent, modulus);
if(DEBUG) printf("%d ", encoded[i/bytes]);
}
return encoded;
}
int* decodeMessage(int len, int bytes, int* cryptogram, int exponent, int modulus) {
int *decoded = malloc(len * bytes * sizeof(int));
int x, i, j;
for(i = 0; i < len; i++) {
x = decode(cryptogram[i], exponent, modulus);
for(j = 0; j < bytes; j++) {
decoded[i*bytes + j] = (x >> (7 * j)) % 128;
if(DEBUG) if(decoded[i*bytes + j] != '\0') printf("%c", decoded[i*bytes + j]);
}
}
return decoded;
}
int main(void) {
int p, q, n, phi, e, d;
int len;
int bytes = 1;
int *encoded, *decoded;
char *buffer;
FILE *f;
srand(time(NULL));
while(1) {
p = randPrime(SINGLE_MAX);
q = randPrime(SINGLE_MAX);
n = p * q;
if(n < 128) {
printf("Modulus is less than 128, trying again\n");
} else {
break;
}
}
phi = (p - 1) * (q - 1);
e = randExponent(phi, EXPONENT_MAX);
d = inverse(e, phi);
// read the file
f = fopen("test.txt", "r");
if(f == NULL) return EXIT_FAILURE;
len = readFile(f, &buffer, bytes); // len will be a multiple of bytes, to send whole chunks
fclose(f);
if(DEBUG) printf("Encoded: ");
encoded = encodeMessage(len, bytes, buffer, e, n);
// save the encoded binary! to file
FILE *encodedFile;
encodedFile = fopen("tmp.enc", "wb");
fwrite(encoded, sizeof(int), len, encodedFile);
fclose(encodedFile);
// read the encodedFile binary! overwrite encoded!!!
FILE *newEncodedFile;
newEncodedFile = fopen("tmp.enc", "rb");
fread(encoded, sizeof(int), len, newEncodedFile);
fclose(newEncodedFile);
if(DEBUG) printf("\nDecoded: ");
decoded = decodeMessage(len/bytes, bytes, encoded, d, n);
// save the decoded binary! to file
FILE *decodedFile;
decodedFile = fopen("tmp.dec", "wb");
fwrite(decoded, sizeof(int), len/bytes, decodedFile);
fclose(decodedFile);
free(encoded);
free(decoded);
free(buffer);
return EXIT_SUCCESS;
}
答案 0 :(得分:1)
字符只是根据某些任意规则显示的整数。因为规则是任意的,所以存在无限的可能性。在这些无限可能中,存在多种标准(EBDIC,ASCII,“扩展ASCII”,Unicode等多种变体),并且由于它们都是整数,因此有许多方式对其进行编码(“原样”,UTF-8,UTF -16LE,UTF-16BE,..)。
要从任何内容转换为任何内容,您需要解码原始编码(如果有),将结果字符转换为新字符集(可能通过查找表),然后将字符重新编码为目标编码( (如果有)。
您显示的数据看起来像“使用UTF-32LE编码的Unicode”。要将其转换为“采用原样编码的ASCII”(以一种可移植的方式),您需要首先解码原始编码(例如codepoint = buffer[0] | (buffer[1] << 8) | (buffer[2] << 16) | (buffer[3] << 24)
)。然后,您需要将生成的代码点(“字符”)转换为ASCII字符集。幸运的是,Unicode中的前128个代码点与ASCII相同。不幸的是,Unicode中几乎所有其他代码点都不能转换为ASCII,因此您需要决定如何处理(用ASCII中的'?'
字符替换它们吗?生成一个“无法转换错误消息并放弃?)。无论如何,它看起来都可能像if(codepoint < 128) { character = codepoint; } else {
。最后,由于ASCII使用“原样”编码,因此您只需将结果字节拍打到内存中即可(“重新编码”无需任何工作)。
答案 1 :(得分:0)
还没有测试,但是像这样:
names.proto
返回值int utf32le2ascii(uint32_t *instr, size_t len, char *outstr, size_t outlen)
{
size_t i, j = 0;
for (i = 0; i < len; i++)
{
if (j >= outlen)
{
return -1;
}
if (instr[i] < 128)
{
outstr[j] = instr[i];
}
else
{
outstr[j] = '?';
}
j++;
}
if (j >= outlen)
{
return -1;
}
outstr[j] = '\0';
return 0;
}
表示失败; -1
表示成功。