连续读取并从文件到缓冲区删除CR和CRLF之间的字符,然后写入新文件

时间:2014-04-11 05:25:09

标签: c

我试图将大小为836的文件连续分配到大小为200的缓冲区中,并在CR和CRLF之间搜索字符,然后跳过它们并写入新文件!

像这样的文件

CR
CRLF
1bb8CR
CRLF
!DOC...........text text etx>
html text ...........text text etx...........text text etx....>CR
CRLF
1704CR
CRLF
texte classes=====.......>
.................>
CR
CRLF
0CR
CRLF
CR
CRLF
/EOF
#include <stdlib.h>
#include <stdio.h>
int main()
{
    const int       BUF_SIZE = 200;
    FILE            *fptr_in;
    FILE            *fptr_out;
    char            buffer[BUF_SIZE + 1]={0};
    char            CRLF[BUF_SIZE]={0};
    char            lastChar = '\0';
    int             i = 0, j = 0, z = 0, n = 0, sub;
    size_t          result = 0;
    long            lSize;
    if((fptr_in = fopen("LogFile_ProxyBufferContents_FJ_small.html", "r")) == NULL){
        printf ("\nError opening file");
        return 0;
    }
    else{
        while(fgetc(fptr_in) != EOF){
            n++;
        }
        if(feof(fptr_in)){
            printf_s("\nTotal number of bytes read: %d", n);
            printf_s(" Bytes.\n");
        }
    }
    if((fptr_out = fopen("LogFile_ProxyBufferContents_Out.html", "w")) == NULL){
        fclose(fptr_in);
        return 0;
    }
    // Obtain the File size
    fseek(fptr_in, 0, SEEK_END);
    lSize = ftell(fptr_in);
    rewind(fptr_in);
    // Buffer Null check
    if(buffer == NULL){
        fclose(fptr_in);
        return 0;
    }
    // Read File into Buffer by result size
    while((result = fread_s(buffer, bufSize, 1, bufSize, fptr_in)) != 0){
        while(i < (long)result){
            if(buffer[i] == '\r' && buffer[i + 1] == '\n'){
                if(buffer[i + 6] == '\n'){
                    i += 6;
                }
                else if(buffer[i + 6] == '\r'){
                    i += 7;
                }
            }
            else{
                sub = z -i;
                CRLF[j] = buffer[i];
                j++;
            }
            i++;
        }

    fclose(fptr_in);
    fclose(fptr_out);
    //printf("\nBuffer after removing CRLF %s\n", CRLF);
    system("pause");
    return 0;
}

那么如果CR CRLF的块在开始时没有问题但最后没有使用缓冲区[i + 6]无法解决它?

我的想法是,如果ii+1为真,那么检查下一个6 buffer[i] != '\0',(我使用6,因为每次都保证CR和CRLF之间有3或4个字符)如果是这样,例如,如果CR之后缓冲区的结尾是5的索引而下一个CRLF位于索引2处的下一个缓冲区中,则表示:缓冲区1:texte ... text&gt; CR170 \ 0和下一个缓冲区2是4CRCRLF然后其余的HTML ... 我是编程的新手,如何跳转到下一个读buffer2并在CR之前跳过char并将其他地方的进程保存到文件中,抱歉我的英文。 有什么帮助吗?

编辑: 也许我无法正确解释...我想找到第一个CRLf,第二个然后跳过中间的十六进制数,在我的情况下,它是一个通过代理的套接字缓冲区,总是有一个由CRLF包围的Chunk长度。你能建议我该怎么办? 如果Chunk长度位于顶部或中间或者最后是好的,但是如果块长度分成两半我就堆叠!!

++++示例文件: https://drive.google.com/file/d/0Bw62NZwp1GSnaG1ydXVHREZibEE/edit?usp=sharing

1 个答案:

答案 0 :(得分:0)

我的问题的完整代码,我想分享以帮助一些......

#include <stdio.h>
#include <ctype.h>
#include <tchar.h>
#include <windows.h>

const char* LOG_FILE = "ORIGIN.html";
const char* OUT_FILE = "Out.html";

//#define BUF_SIZE 200
#define BUF_SIZE 4096

// Search for CRLF line endings, if CRLF found its return the position of the next CRNL, else return none found
int find_CRLF(TCHAR* buffer, int size) {
    int result;
    for(TCHAR* pos = buffer; size > 1; ++pos, --size) {
        // Return the line followed by CRLF 
        if(pos[0] == '\r') {
            if(pos[1] == '\n') {
                result = pos - buffer;
                return result;
            }
        }
    }
    // None is found
    return -1;
}

// Check the given string whether a Hex number
int is_HEX(TCHAR* buffer, int size) {
    if(! size){
        return 0;
    }
    for(int i = 0; i < size; ++i) {
        if(! iswxdigit(buffer[i])) {
            return 0;
        }
    }
    //If buffer is_HEX return True
    return 1;
}

// Write the line from input to output if isn't HEX
// buffer input data assumed to start at new line after CRLF or begining of File
int get_Line(TCHAR* buffer, int size, TCHAR* out, int* byteCopied) {
    // Declare line by CRLF endings
    int line_end = find_CRLF(buffer, size);
    // If no end could be found return -1
    if(line_end < 0){
        return line_end;
    }
    // If line is_HEX skip 2 lines
    if(is_HEX(buffer, line_end)) {
        fprintf(stderr, "Hex skipped\n");
        return line_end + 2; /* skip */
    }
    // Else !is_HEX, copy buffer
    for(int i = 0; i < line_end + 2; ++i, ++(*byteCopied)) {    // Dereference pointer & increment the value pointer
        if(buffer[i] == '\r') {     
            return line_end + 2;
        }
        out[i] = buffer[i]; /* copy buffer*/
    }
    return line_end + 2;
}

// Return number of bytes processed, & reminder is non COMPLETE LINES
int filter_Buffer(TCHAR* buffer, int size, TCHAR* out, int* byteCopied) {
    TCHAR* pos = buffer;
    *byteCopied = 0;
    for(;;) {
        int next_start = get_Line(pos, size, out + *byteCopied, byteCopied);//out+address, value)
        if(next_start < 0) {
            return pos - buffer;
        }
        pos += next_start;
        size -= next_start;
    }
}

// Handle the Reminder, return Number of unprocessed Bytes in in_buf or -1 on EOF
int filter_BufferFile(FILE* input, FILE* output, TCHAR* in_buf, int in_pos) {
    TCHAR out_buf[BUF_SIZE];
    int size = fread(in_buf + in_pos, 1, BUF_SIZE - in_pos, input);
    size += in_pos;
    if(! size) {
        return -1;
    }
    int byteCopied;
    int end_pos = filter_Buffer(in_buf, size, out_buf, &byteCopied);
    if(end_pos) {
        for(int i = 0; i < size - end_pos; ++i) {
            in_buf[i] = in_buf[i + end_pos];
        }
    }
    else {
        if(size == BUF_SIZE) {
            fwrite(in_buf, 1, size, output);
            return 0;
        }
        else {
            /* no newline at EOF */
            get_Line(in_buf, size, out_buf, &byteCopied);
        }
    }
    //If no check for buffer[i&i+1]!=\r&\n -> minus 2 bytes 
    //fwrite(out_buf, 1, byteCopied-2, output);
    fwrite(out_buf, 1, byteCopied, output);
    return size - end_pos;
}

// Call untill the whole input file is processed
void filter_File(FILE* input, FILE* output) {
    char in_buf[BUF_SIZE];
    int in_pos = 0;
    do {
        in_pos = filter_BufferFile(input, output, in_buf, in_pos);
    } while(in_pos >= 0);
}

int main(void) {

    FILE* input = fopen(LOG_FILE, "r");
    if(! input) {
        return 1;
    }

    FILE* output = fopen(OUT_FILE, "w");
    if(! output) {
        return 1;
    }

    filter_File(input, output);

    fclose(output);
    fclose(input);

    system("pause");
}

演示文件:

Download the file