Question

我有一个二进制文件，其中包含代表ASCII文件的整数。例如，包含hello的文件的值为（使用xxd）

0000000: 4800 0000 6500 0000 6c00 0000 6c00 0000  H...e...l...l...
0000010: 6f00 0000 0000 0000                      o.......

如何读取文件并将其转换为ascii字符串Hello？

编辑一个示例，准确显示了我的工作和所得到的。只需编译并运行它。在纯文本消息的pwd中放入一个名为“ test.txt”的文件。结果是2个文件test.enc和test.dec。我知道它是完全不安全的，但无论如何我还是要创建它。

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>

#define ACCURACY 5
#define SINGLE_MAX 100000
#define EXPONENT_MAX 10000
#define BUF_SIZE 1024
#define DEBUG 0

int modpow(long long a, long long b, int c) {
    int res = 1;
    while(b > 0) {
        /* Need long multiplication else this will overflow... */
        if(b & 1) {
            res = (res * a) % c;
        }
        b = b >> 1;
        a = (a * a) % c; /* Same deal here */
    }
    return res;
}

int jacobi(int a, int n) {
    int twos, temp;
    int mult = 1;
    while(a > 1 && a != n) {
        a = a % n;
        if(a <= 1 || a == n) break;
        twos = 0;
        while(a % 2 == 0 && ++twos) a /= 2; /* Factor out multiples of 2 */
        if(twos > 0 && twos % 2 == 1) mult *= (n % 8 == 1 || n % 8 == 7) * 2 - 1;
        if(a <= 1 || a == n) break;
        if(n % 4 != 1 && a % 4 != 1) mult *= -1; /* Coefficient for flipping */
        temp = a;
        a = n;
        n = temp;
    }
    if(a == 0) return 0;
    else if(a == 1) return mult;
    else return 0; /* a == n => gcd(a, n) != 1 */
}

int solovayPrime(int a, int n) {
    int x = jacobi(a, n);
    if(x == -1) x = n - 1;
    return x != 0 && modpow(a, (n - 1)/2, n) == x;
}

int probablePrime(int n, int k) {
    if(n == 2) return 1;
    else if(n % 2 == 0 || n == 1) return 0;
    while(k-- > 0) {
        if(!solovayPrime(rand() % (n - 2) + 2, n)) return 0;
    }
    return 1;
}

int randPrime(int n) {
    int prime = rand() % n;
    n += n % 2; /* n needs to be even so modulo wrapping preserves oddness */
    prime += 1 - prime % 2;
    while(1) {
        if(probablePrime(prime, ACCURACY)) return prime;
        prime = (prime + 2) % n;
    }
}

int gcd(int a, int b) {
    int temp;
    while(b != 0) {
        temp = b;
        b = a % b;
        a = temp;
    }
    return a;
}

int randExponent(int phi, int n) {
    int e = rand() % n;
    while(1) {
        if(gcd(e, phi) == 1) return e;
        e = (e + 1) % n;
        if(e <= 2) e = 3;
    }
}

int inverse(int n, int modulus) {
    int a = n, b = modulus;
    int x = 0, y = 1, x0 = 1, y0 = 0, q, temp;
    while(b != 0) {
        q = a / b;
        temp = a % b;
        a = b;
        b = temp;
        temp = x; x = x0 - q * x; x0 = temp;
        temp = y; y = y0 - q * y; y0 = temp;
    }
    if(x0 < 0) x0 += modulus;
    return x0;
}

int readFile(FILE* fd, char** buffer, int bytes) {
    int len = 0, cap = BUF_SIZE, r;
    char buf[BUF_SIZE];
    *buffer = malloc(BUF_SIZE * sizeof(char));
    while((r = fread(buf, sizeof(char), BUF_SIZE, fd)) > 0) {
        if(len + r >= cap) {
            cap *= 2;
            *buffer = realloc(*buffer, cap);
        }
        memcpy(&(*buffer)[len], buf, r);
        len += r;
    }
    /* Pad the last block with zeros to signal end of cryptogram. An additional block is added if there is no room */
    if(len + bytes - len % bytes > cap) *buffer = realloc(*buffer, len + bytes - len % bytes);
    do {
        (*buffer)[len] = '\0';
        len++;
    }
    while(len % bytes != 0);
    return len;
}

int encode(int m, int e, int n) {
    return modpow(m, e, n);
}

int decode(int c, int d, int n) {
    return modpow(c, d, n);
}

int* encodeMessage(int len, int bytes, char* message, int exponent, int modulus) {
    int *encoded = malloc((len/bytes) * sizeof(int));
    int x, i, j;
    for(i = 0; i < len; i += bytes) {
        x = 0;
        for(j = 0; j < bytes; j++) x += message[i + j] * (1 << (7 * j));
        encoded[i/bytes] = encode(x, exponent, modulus);
        if(DEBUG) printf("%d ", encoded[i/bytes]);
    }
    return encoded;
}

int* decodeMessage(int len, int bytes, int* cryptogram, int exponent, int modulus) {
    int *decoded = malloc(len * bytes * sizeof(int));
    int x, i, j;
    for(i = 0; i < len; i++) {
        x = decode(cryptogram[i], exponent, modulus);
        for(j = 0; j < bytes; j++) {
            decoded[i*bytes + j] = (x >> (7 * j)) % 128;
            if(DEBUG) if(decoded[i*bytes + j] != '\0') printf("%c", decoded[i*bytes + j]);
        }
    }
    return decoded;
}       

int main(void) {

    int p, q, n, phi, e, d;
    int len;
    int bytes = 1;
    int *encoded, *decoded;
    char *buffer;
    FILE *f;
    srand(time(NULL));

    while(1) {

        p = randPrime(SINGLE_MAX);              
        q = randPrime(SINGLE_MAX);      
        n = p * q;
        if(n < 128) {
            printf("Modulus is less than 128, trying again\n");
        } else {
            break;
        }

    }   

    phi = (p - 1) * (q - 1);
    e = randExponent(phi, EXPONENT_MAX);    
    d = inverse(e, phi);

    // read the file
    f = fopen("test.txt", "r");
    if(f == NULL) return EXIT_FAILURE;
    len = readFile(f, &buffer, bytes); // len will be a multiple of bytes, to send whole chunks 
    fclose(f);

    if(DEBUG) printf("Encoded: ");
    encoded = encodeMessage(len, bytes, buffer, e, n);

    // save the encoded binary! to file
    FILE *encodedFile;
    encodedFile = fopen("tmp.enc", "wb");
    fwrite(encoded, sizeof(int), len, encodedFile);
    fclose(encodedFile);

    // read the encodedFile binary! overwrite encoded!!!
    FILE *newEncodedFile;
    newEncodedFile = fopen("tmp.enc", "rb");
    fread(encoded, sizeof(int), len, newEncodedFile);
    fclose(newEncodedFile);

    if(DEBUG) printf("\nDecoded: ");
    decoded = decodeMessage(len/bytes, bytes, encoded, d, n);

    // save the decoded binary! to file
    FILE *decodedFile;
    decodedFile = fopen("tmp.dec", "wb");
    fwrite(decoded, sizeof(int), len/bytes, decodedFile);
    fclose(decodedFile);

    free(encoded);
    free(decoded);
    free(buffer);

    return EXIT_SUCCESS;
}

Answer 1

字符只是根据某些任意规则显示的整数。因为规则是任意的，所以存在无限的可能性。在这些无限可能中，存在多种标准（EBDIC，ASCII，“扩展ASCII”，Unicode等多种变体），并且由于它们都是整数，因此有许多方式对其进行编码（“原样”，UTF-8，UTF -16LE，UTF-16BE，..）。

要从任何内容转换为任何内容，您需要解码原始编码（如果有），将结果字符转换为新字符集（可能通过查找表），然后将字符重新编码为目标编码（（如果有）。

您显示的数据看起来像“使用UTF-32LE编码的Unicode”。要将其转换为“采用原样编码的ASCII”（以一种可移植的方式），您需要首先解码原始编码（例如codepoint = buffer[0] | (buffer[1] << 8) | (buffer[2] << 16) | (buffer[3] << 24)）。然后，您需要将生成的代码点（“字符”）转换为ASCII字符集。幸运的是，Unicode中的前128个代码点与ASCII相同。不幸的是，Unicode中几乎所有其他代码点都不能转换为ASCII，因此您需要决定如何处理（用ASCII中的'?'字符替换它们吗？生成一个“无法转换错误消息并放弃？）。无论如何，它看起来都可能像if(codepoint < 128) { character = codepoint; } else {。最后，由于ASCII使用“原样”编码，因此您只需将结果字节拍打到内存中即可（“重新编码”无需任何工作）。

Answer 2

还没有测试，但是像这样：

names.proto

返回值int utf32le2ascii(uint32_t *instr, size_t len, char *outstr, size_t outlen) { size_t i, j = 0; for (i = 0; i < len; i++) { if (j >= outlen) { return -1; } if (instr[i] < 128) { outstr[j] = instr[i]; } else { outstr[j] = '?'; } j++; } if (j >= outlen) { return -1; } outstr[j] = '\0'; return 0; }表示失败； -1表示成功。

将带有整数的二进制文件转换为它的ascii表示形式

2 个答案: