如何在不丢失任何信息的情况下逐字节正确读取文件

时间:2017-12-12 07:32:45

标签: c string unicode executable binaryfiles

我认为真正的目标是成功模仿strace的输出,关于read()系统调用参数读取它神奇地输出。

尽可能明确:

这意味着它以一种不会被控制台解释的方式显示它们。例如,如果文件包含\ 0004或\ 0104,那么它将显示\ 0004和\ 0104作为LITERAL STRINGS(就好像你写了\\ 0004或\\ 0104)所以它不会被控制台解释。

#define _GNU_SOURCE
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <ctype.h>

int main(int argc, char * argv[]) {
    #include <stdio.h>
    int shift(int times) {
        #ifdef SHIFT_VERBOSE
            int shift_mode_verbose = 1;
        #else
            int shift_mode_verbose = 0;
        #endif
        if (shift_mode_verbose == 1) {
            printf("The following arguments were passed (before) to main(%i):\n", argc);
            for(int i=1; i<argc; i++) {
                printf("arg%i: %s\n", i, argv[i]);
            }
            printf("\n");
        }
        if (shift_mode_verbose == 1) {printf("shifting %i times\n", times);}
        for(int t=1; t<times+1; ++t) { // we set this to 1 and respectively increase by 1, to avoid shifting argv[0] as it is holds the program name
            for(int i=1; i<argc; ++i) { // we set this to 1 to avoid shifting argv[0] as it is holds the program name
                if (shift_mode_verbose == 1) {printf("shift %i: arg%i: %s >", t, i, argv[i]);}
                    argv[i]  = argv[i+1];
                if (shift_mode_verbose == 1) {printf(" %s\n", argv[i]);}
            }
            --argc;
        }
        if (shift_mode_verbose == 1) {
            printf("The following arguments were passed (after) to main(%i):\n", argc);
            for(int i=1; i<argc; i++) {
                printf("arg%i: %s\n", i, argv[i]);
            }
            printf("\n");
    }
        return 0;
    }

    int fshift(int times) { // a potentially faster version of shift()
        times = argc < times ? argc : times;
        argc = (argc) - times;
        (argv) += times;
        return 0;
    }
    if (argc < 3) {
        printf("Usage: %s lines (-1 = all lines), files\n", argv[0]);
        return 1;
    }
    int LINES_TO_READ = atoi(argv[1]);
    shift(1);
    for (ssize_t i = 1; i < argc; i++) {
        const char *filename = argv[i];
        printf("printing \"%s\"\n\n", filename);        

        int fd = open(filename, O_RDONLY);

        if (fd < 0) {
            printf("cannot open \"%s\", returned %i\n", filename, fd);
            return -1;
        }

        char unsigned ch;
        size_t lines = 1;

        // Read the file byte by byte
        while (read(fd, &ch, 1) == 1) {
            if (ch == '\n') {
                printf("\\n");
            } else if (ch == '\0') {
                printf("\\0");
            } else if (ch == '\r') {
                printf("\\r");
            } else if (ch == '\t') {
                printf("\\t");
            } else if(isprint(ch)) {
                printf("%c", ch);
            } else {
                printf("\\%03o", ch);
            }
//         FILE *file = fopen(filename, "rb");
//         char unsigned ch;
//         size_t lines = 1, bytes=1;
//         // Read the file byte by byte
//         while (fread(&ch, 1, 1, file) == 1) {
//             if (ch == '\n') {
//                 printf("\\n");
//             } else if (ch == '\0') {
//                 printf("\\0");
//             } else if (ch == '\r') {
//                 printf("\\r");
//             } else if (ch == '\t') {
//                 printf("\\t");
//             } else if(isprint(ch)) {
//                 printf("%c", ch);
//             } else {
//                 printf("\\%03o", ch);
//             }
            if (ch == '\n') {
                // Stop if we read 10 lines already
                if (lines == LINES_TO_READ) {
                    break;
                }
                lines++;
            }
        }

        if (close(fd) < 0) {
            printf("cannot close \"%s\"\n", filename);
            return -1;
        }
        printf("\n");
    }
    return 0;
}

下面是它应该是什么样子的示例(除./catraw的输出不正确之外)。 (ELF标题和说明来自nasmstrace和c at。)

elf_header='\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\2\0\3\0\1\0\0\0T\200\4\0104\0\0\0\0\0\0\0\0\0\0\0004\0 \0\1\0\0\0\0\0\0\0\1\0\0\0\0\0\0\0\0\200\4\10\0\200\4\10T\0\0\0T\0\0\0\5\0\0\0\0\20\0\0'
instructions='\263*f1\300f@\315\200'
printf "$elf_header$instructions" > return_42
chmod +x return_42
./return_42 # test run to see if it actually works
echo $? # echo return code of ./return_42
strace -s 4096 cat ./return_42
gcc catraw.c --static -o catraw
strace ./catraw -1 ./return_42
./catraw -1 ./return_42

[PROGRAM ] \177ELF\001\001\001\0\0\0\0\0\0\0\0\0\002\0\003\0\001\0\0\0T\200\004\0104\0\0\0\0\0\0\0\0\0\0\04\0 \0\001\0\0\0\0\0\0\0\001\0\0\0\0\0\0\0\0\200\004\010\0\200\004\010T\0\0\0T\0\0\0\005\0\0\0\0\020\0\0\263*f1\300f@\315\200

1 个答案:

答案 0 :(得分:1)

这有点不清楚(不确定所有组件),但是这样的事情:

#include <ctype.h>

// Format the character 'c' (which should be cast from char) into a 
// string. Will use backslash-escaped form for non-printables.
void tostring(char *buf, size_t buf_max, unsigned int c)
{
  if(isprint(c))
  {
    snprintf(buf, buf_max, "%c", c);
  }
  else
  {
    snprintf(buf, buf_max, "\\%3o", c);
  }
}

使用"\177"调用c = 0177时构建snprintf()

是的,显然第一种情况下对shuffleImages()的调用可以用于打印。