Tokenizer无法正常工作

时间:2015-06-05 16:06:54

标签: c tokenize

我正在尝试将字符串标记为一个字符串数组,但似乎我的代码错了。

这是我的代码:

asmInstruction *tokenizeLine(char *charLine) {
    int words = countTokens(charLine);
    char *tokens = (char*) malloc(MAX_LINE_LENGTH);

    asmInstruction *instr = (asmInstruction*) malloc(sizeof(asmInstruction*));
    instr->args = (char**) malloc(MAX_LINE_LENGTH);

    int count = 1;
    tokens = strtok(charLine, " ,");
    while (count <= words) {
        tokens = strtok(NULL, " ,");
        instr->args[count - 1] = (char*)malloc(MAX_LINE_LENGTH);
        instr->args[count - 1] = tokens;
        ++count;
    }

    free(tokens);
    return instr;
}

 /* Reads a file and returns the number of lines in this file. */
    uint32_t countLines(FILE *file) {
    uint32_t lines = 0;
    int32_t c;
    while (EOF != (c = fgetc(file))) {
        if (c == '\n') {
            ++lines;
        }
    }
    /* Reset the file pointer to the start of the file */
    rewind(file);
    return lines;
}

结构:

typedef struct {
    char **args; /* An array of strings*/
} asmInstruction;

我的主要在这里:

int main() {
    char s[] = "ldr r2,r1";
    asmInstruction *instr = tokenizeLine(s);
    printf("%s", instr->args[0]);
}

/* Counts the number of tokens in a line */
uint32_t countTokens(char line[]) {
    /* The correct way to do this! */
    uint32_t numberOfTokens = 0;
    /* Split at spaces and commas */
    char *tokens = strtok(line, " ,");
    while (tokens != NULL) {
        tokens = strtok(NULL, " ,");
        numberOfTokens++;
    }
    return numberOfTokens;
}

所以,这应该打印ldr。 但是,它打印为null。 如果我遍历令牌,它不会打印出来但是为空。 我期待打印出令牌

LDR R2 R1

但只有第一个打印出来。

似乎instr->args[count-1]永远不会被分配一些东西,因为显然tokens还没有被分配。

为什么? 感谢。

2 个答案:

答案 0 :(得分:1)

以下代码:

handles errors
has many/ most of the logic errors corrected
properly defines the struct asmInstruction
performs the functionality indicated in the question.

建议消除struct asmInstruction,因为它不需要,只需使用char ** args = NULL;在tokenizeLine()函数中并返回args。

对于&#39;令牌来说,没有必要也不希望使用malloc内存。因为每次“标记”都会覆盖该内存指针。是从strtok()的返回值设置的 如果完成了malloc,则会出现内存泄漏。

在下面的代码中,仍然需要一些 在调用&#39;退出(EXIT_FAILURE)之前释放malloc内存和关闭文件的附加逻辑;&#39;

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>


struct asmInstruction
{
    char **args; /* An array of strings*/
};

#define MAX_LINE_LENGTH (100)

// prototypes
uint32_t countTokens(char line[]);
uint32_t countLines(FILE *file);
struct asmInstruction *tokenizeLine(char *charLine);

int main( void )
{
    char s[] = "ldr r2,r1";
    struct asmInstruction *instr = tokenizeLine(s);
    printf("%s", instr->args[0]);
    return( 0 );
} // end function: main


/* Counts the number of tokens in a line */
uint32_t countTokens(char line[])
{
    /* The correct way to do this! */
    uint32_t numberOfTokens = 0;
    /* Split at spaces and commas */
    char *tokens = strtok(line, " ,");

    while (tokens != NULL)
    {
        tokens = strtok(NULL, " ,");
        numberOfTokens++;
    }
    return numberOfTokens;
} // end function: countTokens


struct asmInstruction *tokenizeLine(char *charLine)
{
    int words = countTokens(charLine);

    char *tokens = NULL;

    struct asmInstruction *instr = NULL;
    if( NULL == (instr = malloc(sizeof( struct asmInstruction)) ) )
    { // then malloc failed
        perror( "malloc for struct asmInstruction failed" );
        exit( EXIT_FAILURE );
    }

    // implied else, malloc successful

    instr->args = NULL;
    if( NULL == (instr->args = malloc(words*sizeof(char*)) ) )
    { // then malloc failed
        perror( "malloc for array of char pointers failed:" );
        exit( EXIT_FAILURE );
    }

    // implied else, malloc successful

    memset( instr->args, '\0', words*sizeof(char*) );

    int count = 0;
    tokens = strtok(charLine, " ,");

    while ( tokens )
    {
        if( NULL == (instr->args[count] = malloc(strlen(tokens)+1) ) )
        { // then, malloc failed
            perror( "malloc for arg failed" );
            exit( EXIT_FAILURE );
        }

        // implied else, malloc successful

        strcpy(instr->args[count], tokens );
        ++count;
        tokens = strtok(NULL, " ,");
    } // end while

    return instr;
} // end function: tokenizeLine


 /* Reads a file and returns the number of lines in this file. */
    uint32_t countLines(FILE *file)
    {
    uint32_t lines = 0;
    int32_t c;

    while (EOF != (c = fgetc(file)))
    {
        if (c == '\n') {
            ++lines;
        }
    }

    /* Reset the file pointer to the start of the file */
    rewind(file);
    return lines;
} // end function: countLines

答案 1 :(得分:0)

asmInstruction *tokenizeLine(char *charLine) {
    int words = countTokens(charLine);
    char *tokens;//don't need malloc for this, because just pointer holder.

    asmInstruction *instr = (asmInstruction*) malloc(sizeof(asmInstruction));//allocate size isn't sizeof(asmInstruction*)
    instr->args = (char**) malloc((words+1) * sizeof(char*));//+1 for NULL, or add member E.g instr->numOfWords = words

    int count = 0;
    tokens = strtok(charLine, " ,");
    while (tokens) {
        instr->args[count] = malloc(strlen(tokens)+1);
        strcpy(instr->args[count++], tokens);
        //or  process for each line
        //instr->args[count++] = tokens;//no need allocate for word 
        tokens = strtok(NULL, " ,");//get next tokens
    }
    instr->args[count] = NULL;//set sentinel

    return instr;
}