意外字符被添加到C中的字符串末尾

时间:2014-02-04 03:58:43

标签: c

在我的代码中,当我通过函数发送char数组时,我会出现一个随机字符,如下所示:

struct TokenizerT_ {        //Defintion of the struct
char * sep;
char * toks;
};

TokenizerT *TKCreate(char *separators, char *ts) {
TokenizerT * inu = malloc(sizeof(*inu));
inu->toks = malloc(sizeof(char)); //Initialize char array that will store the tokens

strcpy(inu->toks, hr);      
return inu;
}

....... 
best = "sein";
printf("%s\n", best);
char * rondo = malloc(sizeof(char));                       
printf("%s\n", rondo);
TokenizerT * Ray = TKCreate(copy, rondo);                          /
printf("%s\n", Ray->toks);

对于最后一位,打印输出值如下:

sein
sein
sein?

问号出现的原因是什么?这通常是一个随机字符,并不总是一个问号。

  Edit: Full code, really desperate



 struct TokenizerT_ {        //Defintion of the struct
char * sep;
char * toks;
 };

 char nulines[10] = "ntvbrfa\\\"";           //for the arguments with backslashes
 char resp[37] = "0x0a0x090x0b0x080x0d0x0c0x070x5c0x22";
 typedef struct TokenizerT_ TokenizerT;


  TokenizerT *TKCreate(char *separators, char *ts) {

if (ts==NULL) {                 //If there are no tokens to be parsed (empty entry)
    return NULL;
}int lim = 1;

char yr[strlen(separators)]; //Initializes delimitors
yr[0] = *separators;
if(strlen(separators)>0){

int h =1;                          
char zmp = *(separators+h);
for(h=1; h<strlen(separators); h++){
    zmp = *(separators+h);
    int z=0;

    for (z=0; z<lim; z++) {
        if (zmp==yr[z]) {
            z=-1;
            break;
        }
    }

    if(z>-1){
        yr[lim] = zmp;
        lim++;}
    else{
        continue;
    }                                   //yr is local variable that contains delimitors
}}
TokenizerT * inu = malloc(sizeof(*inu));    //Creates TokenizerT
inu->sep = malloc((int)strlen(yr)*sizeof(char)); 
strcpy(inu->sep, yr);              


char hr [strlen(ts)];                       
lim = 0; int q = 0; int wy=0;
for(q=0; q<strlen(ts); q++){
    if(ts[q]=='\\'){
        q++;
        for(wy = 0; wy<strlen(nulines); wy++){
            if (nulines[wy]==ts[q]) {
     hr[lim] = '['; hr[++lim] = '0'; hr[++lim] = 'x'; hr[++lim] = resp[wy*4+2];
     hr[++lim] = resp[wy*4+3];
                hr[++lim] = ']'; lim++;
                break;
            }
        }
        continue;
    }
    else{                               
        hr[lim] = ts[q];
        lim++;
    }
}



inu->toks = (char *)malloc(sizeof(char) * strlen(hr) + 1);

strcpy(inu->toks, hr);      //Makes copy
return inu;
 }



void TKDestroy(TokenizerT *tk) {
free(tk->toks); //Free Memory associated with the token char array
free(tk->sep);  //Free Memory associated with the delimitor char array
free(tk); //Free Memory associated with the tokenizer
}


 char *TKGetNextToken(TokenizerT *tk) {
char * stream = tk->toks;
char * dels = tk->sep;

/*The following two  lines intialize the char array to be printed
 as well as the integers to be used in the various loops*/

char * temps = malloc(sizeof(char)); int g = 0;
int z = 0, x= 0, len = 0;
if (strlen(dels)==0) {          
    return stream;
}



for(z = 0; z<strlen(stream); z++){
    char b = *(stream+z);           

    for(x = 0; x<strlen(dels); x++){ 
        len = (int)strlen(temps); 
        char c = *(dels+x);

        if(c==b){   //Here, the current character is a delimitor
            g = -1;
            break;
        }

    }
    if (g==-1) {    //If delimitor, then return the current token
        return temps;
    }
        *(temps+len) = b;   
}
len = (int)strlen(temps);
*(temps+len) = '\0';    //Returns the string with the null character ending it
return temps;
 }



void TKN(TokenizerT * tin, int sum){

char * tmp = TKGetNextToken(tin);      
char * copy = malloc(sizeof(char));

   strcpy(copy, tin->sep);                 

   int difference = (int)strlen(tmp)+1;
   sum = sum-difference;
  char * best = malloc(sizeof(char));
  strcpy(best, tin->toks + difference);   


    if((int)strlen(tmp)>0){              
   printf("%s\n", tmp);           
  }                                 
  TKDestroy(tin);
tin = TKCreate(copy, best);
while(sum>0){
    tmp = TKGetNextToken(tin);
    if((int)strlen(tmp)>0){                
        printf("%s\n", tmp);
    }
    difference = (int)strlen(tmp)+1;
    sum = sum-difference;
    free(best);
    best = malloc(sizeof(char));
    strcpy(best, tin->toks + difference);
       TKDestroy(tin);
       tin = TKCreate(copy, best);
 }

free(copy);
free(best);
free(tmp);

  TKDestroy(tin); //Freeing up memory associated with the Tokenizer
  return;
}

int main(int argc, char **argv) {
if(argc<2){
    printf("%s\n", "Not enough arguments");
    return 0;
}
else if(argc>3){
    printf("%s\n", "Too many arguments");
    return 0;
}
 else{
char * arr = argv[1];   //Represents delimitors
char * y = argv[2];       //Represents string to be tokenized

TokenizerT * jer = TKCreate(arr, y);    //Create and initialize tokenizer
 //printf("%s\n", jer->toks);
  TKN(jer, (int)strlen(jer->toks)); 
 }
return 0;
 }

2 个答案:

答案 0 :(得分:0)

在大多数malloc中,您不仅要分配一个字符:

malloc(sizeof(char))

你应该写:

malloc(sizeof(char) * n + 1)

其中n是您想要的字符串长度,+1是终止null字符。您看到它是随机字符,因为C和C ++都使用null字符作为字符串数据类型的终止,并且由于没有正确分配,它会在读取之前启动,直到它到达null

struct TokenizerT_ {        //Defintion of the struct
    char * sep;
    char * toks;
};

char nulines[10] = "ntvbrfa\\\"";           //for the arguments with backslashes
char resp[37] = "0x0a0x090x0b0x080x0d0x0c0x070x5c0x22";
typedef struct TokenizerT_ TokenizerT;


TokenizerT *TKCreate(char *separators, char *ts) {

    if (ts==NULL) {                 //If there are no tokens to be parsed (empty entry)
        return NULL;
    }int lim = 1;

    char yr[strlen(separators)]; //Initializes delimitors
    yr[0] = *separators;
    if(strlen(separators)>0){

        int h =1;
        char zmp = *(separators+h);
        for(h=1; h<strlen(separators); h++){
            zmp = *(separators+h);
            int z=0;

            for (z=0; z<lim; z++) {
                if (zmp==yr[z]) {
                    z=-1;
                    break;
                }
            }

            if(z>-1){
                yr[lim] = zmp;
                lim++;}
            else{
                continue;
            }                                   //yr is local variable that contains delimitors
        }}
    TokenizerT * inu = (TokenizerT *)malloc(sizeof(*inu));    //Creates TokenizerT
    inu->sep = (char *)malloc((int)strlen(yr)*sizeof(char));
    strcpy(inu->sep, yr);


    char hr [strlen(ts)];
    lim = 0; int q = 0; int wy=0;
    for(q=0; q<strlen(ts); q++){
        if(ts[q]=='\\'){
            q++;
            for(wy = 0; wy<strlen(nulines); wy++){
                if (nulines[wy]==ts[q]) {
                    hr[lim] = '['; hr[++lim] = '0'; hr[++lim] = 'x'; hr[++lim] = resp[wy*4+2];
                    hr[++lim] = resp[wy*4+3];
                    hr[++lim] = ']'; lim++;
                    break;
                }
            }
            continue;
        }
        else{
            hr[lim] = ts[q];
            lim++;
        }
    }



    inu->toks = (char *)malloc(sizeof(char) * strlen(hr) + 1);

    strcpy(inu->toks, hr);      //Makes copy
    return inu;
}



void TKDestroy(TokenizerT *tk) {
    free(tk->toks); //Free Memory associated with the token char array
    free(tk->sep);  //Free Memory associated with the delimitor char array
    free(tk); //Free Memory associated with the tokenizer
}


char *TKGetNextToken(TokenizerT *tk) {
    char * stream = tk->toks;
    char * dels = tk->sep;

    /*The following two  lines intialize the char array to be printed
     as well as the integers to be used in the various loops*/

    char * temps = (char *)malloc(sizeof(char)); int g = 0;
    int z = 0, x= 0, len = 0;
    if (strlen(dels)==0) {
        return stream;
    }



    for(z = 0; z<strlen(stream); z++){
        char b = *(stream+z);

        for(x = 0; x<strlen(dels); x++){
            len = (int)strlen(temps);
            char c = *(dels+x);

            if(c==b){   //Here, the current character is a delimitor
                g = -1;
                break;
            }

        }
        if (g==-1) {    //If delimitor, then return the current token
            return temps;
        }
        *(temps+len) = b;
    }
    len = (int)strlen(temps);
    *(temps+len) = '\0';    //Returns the string with the null character ending it
    return temps;
}



void TKN(TokenizerT * tin, int sum){

    char * tmp = TKGetNextToken(tin);
    char * copy = (char *)malloc(sizeof(char));

    strcpy(copy, tin->sep);

    int difference = (int)strlen(tmp)+1;
    sum = sum-difference;
    char * best = (char *)malloc(sizeof(char));
    strcpy(best, tin->toks + difference);


    if((int)strlen(tmp)>0){
        printf("%s\n", tmp);
    }
    TKDestroy(tin);
    tin = TKCreate(copy, best);
    while(sum>0){
        tmp = TKGetNextToken(tin);
        if((int)strlen(tmp)>0){
            printf("%s\n", tmp);
        }
        difference = (int)strlen(tmp)+1;
        sum = sum-difference;
        free(best);
        best = (char *)malloc(sizeof(char));
        strcpy(best, tin->toks + difference);
        TKDestroy(tin);
        tin = TKCreate(copy, best);
    }

    free(copy);
    free(best);
    free(tmp);

    TKDestroy(tin); //Freeing up memory associated with the Tokenizer
    return;
}

int main(int argc, char **argv) {
    if(argc<2){
        printf("%s\n", "Not enough arguments");
        return 0;
    }
    else if(argc>3){
        printf("%s\n", "Too many arguments");
        return 0;
    }
    else{
        char * arr = argv[1];   //Represents delimitors
        char * y = argv[2];       //Represents string to be tokenized

        TokenizerT * jer = TKCreate(arr, y);    //Create and initialize tokenizer
                                                //printf("%s\n", jer->toks);
        TKN(jer, (int)strlen(jer->toks));
    }
    return 0;
}

答案 1 :(得分:0)

char * rondo = malloc(sizeof(char));                       
printf("%s\n", rondo);

UB(Undefined behaviour)条件 这就是你在做的事情:

免费商店(堆) - &gt;分配大小为char的内存(通常为1个字节)并获取该位置的地址并将其(地址)存储在rondo中。
因此,当您取消引用伦敦时,*rondo您只能合法地访问char大小的位置,访问它旁边或附近的任何内容都是非法的。

所以在printf("%s\n", rondo);中你要做的是告诉printf你给出的指针是一个指向字符串的指针,所以打印直到你得到一个\0(NULL)字符。但你实际上并没有这样做。这意味着printf实际上正在访问未分配的内存。你所看到的是出于纯粹的运气(或者说是不幸的)。

你只能这样做

printf("%c\n", *rondo);但是在此之前你必须初始化例如

char * rondo  = malloc(sizeof(char));
*rondo = 'K';
printf("%c\n",*rondo);

但是我打赌你的意思是你的意思

char * rondo = malloc(sizeof(char)*no_of_characters_in_string+1);  

其中+1表示NULL字符。

您看到的字符与您的程序无关。你访问了别人的记忆(如果它被分配给其他人或OS的财产)。

编辑: 你的代码中也存在很大的问题。你正在记忆,但永远不会释放它。对于小型演示程序,它可以(不是真的),但它绝对是非常糟糕的。请始终将mallocfree();

相关联

我的建议得到了一本好的教科书。它将告诉你有关这些事情的更多细节。