C - 为什么我的数组被覆盖了?

时间:2018-06-05 05:53:17

标签: c arrays string undefined-behavior overwrite

我可能会开始说我通常是一名C ++程序员,而且我没有经验用C做I / O.我正在编写一个简单的程序,试图从用户那里读取一个单词列表,按字母顺序排列每个单词中的字母(保留大写字母),并按收到的顺序打印出来。

我似乎已经确定了问题区域(强调在我的代码底部附近),但即使用我的代码上的显微镜,它似乎也是某种拜占庭式的bug。这是我的代码:

int comp (const void* left, const void* right) { //for qsort
    return ( *(char*)left - *(char*)right );
}

int main() {
    printf("Please enter a sentence. Press enter to end.\n");
    char str[500]; 
    if (!fgets(str, sizeof(str), stdin)) { //grab line
        printf("die\n");
        return 1;
    }
    str[strcspn(str, "\n")] = 0; //finds newline and kills it
    printf("You entered:\n%s\n", str);

    char abc[sizeof(str)];    //alphabetized array
    int abc_count = 0;        //keeps track of effective size of abc
    char c;   
    char* word = &c;  //will read each word from line
    char* strr = str;  //made to get around a compiler error
    int offset;
    char capitals[25]; //keeps track of capital letters
    int cap = 0;
    int i = 0;

    while (sscanf(strr, " %s%n", word, &offset) == 1) { //%n gives number of characters read by sscanf

        if (i == 0) offset++;  //fixes length of first word, which has no space preceding it
        printf("read a word of size %d\n", offset-1);
        //make sure all letters are lowercase
        cap = 0;
        for (int j = 0; j < (offset-1); j++) {
            if ( isupper(word[j])) {  //if letter is capitalized
                word[j] = tolower(word[j]);     //lowercase it for sorting
                capitals[cap] = word[j]; //remember letter for later
                cap++;
            }
        }
        qsort(word, (offset-1)/sizeof(*word), sizeof(*word), comp);    //Alphabetize. "-1" makes offset not count space in length
        //recapitalize letters
        for (int j = 0; j < (offset-1); j++) {
            for (int k = 0; k < cap; k++) {
                if (word[j] == capitals[k]) {
                    word[j] = toupper(word[j]);  //recapitalize letter
                    capitals[k] = 0;    //capital has been used
                    break;
                }
            }
        }
        //write word to abc                ///// PROBLEM AREA /////
        printf("%s\n", word);
        for (int j = 0; j < (offset-1); j++) {
            if (i == 0) printf("first word: %s\n", word); //debugging
            printf("word[%d] = %c | ", j, word[j]);      //debugging
            abc[abc_count] = word[j];
            printf("abc[%d] = %c\n", abc_count, word[j]);  //debugging
            abc_count++;
        }
        if (i == 0) printf("first word: %s\n", word);
        abc[abc_count] = ' ';
        printf("abc[%d] = space\n", abc_count);
        abc_count++;
        if (i == 0) printf("first word: %s\n", word);
        printf("so far: %s\n\n", abc);
        if (i == 0) offset--; //undo correction for first word
        strr += offset; //stops infinite loop by moving pointer. This line is the reason "strr" exists instead of using "array type" str
        i = 1;
    } //while loop                       ///// PROBLEM AREA /////

    printf("Alphabetized: \n");
    for (i = 0; i < abc_count; i++) {
        printf("%c", abc[i]);  //I write directly from memory because somehow a null char is being added to "abc" after every word entered
    }
    printf("\n");

    return 0;
}

我通过广泛使用&#34; printf&#34;调试是我的一个字符串(总是输入的第一个字)被覆盖,以及要输出的行的前面。这很奇怪。以下是一些示例输出:

Please enter a sentence. Press enter to end.
Chocolate word cAT
You entered:
Chocolate word cAT
read a word of size 9
aCcehloot                //word is sorted as desired. No issues
first word: aCcehloot
word[0] = a | abc[0] = a
first word: aacehloot      //here you can see the strange overwrite behavior
word[1] = a | abc[1] = a
first word: aaaehloot
word[2] = a | abc[2] = a
first word: aaaahloot
word[3] = a | abc[3] = a
first word: aaaaaloot
word[4] = a | abc[4] = a
first word: aaaaaaoot
word[5] = a | abc[5] = a
first word: aaaaaaaot
word[6] = a | abc[6] = a
first word: aaaaaaaat
word[7] = a | abc[7] = a
first word: aaaaaaaaa    //first word is completely replaced by its first alphabetical letter
word[8] = a | abc[8] = a
first word: aaaaaaaaaa?n
abc[9] = space
first word: aaaaaaaaaa n
so far: aaaaaaaaa n

read a word of size 4
dorw
word[0] = d | abc[10] = d
word[1] = o | abc[11] = o
word[2] = r | abc[12] = r
word[3] = w | abc[13] = w
abc[14] = space
so far: orw

read a word of size 3
AcT
word[0] = A | abc[15] = A
word[1] = c | abc[16] = c
word[2] = T | abc[17] = T
abc[18] = space
so far: cT

Alphabetized:
cT  aaaaa dorw AcT
//the final result is shown as: everything but the first letter of the last word, 
//a cut-off remainder of the first overwritten word, the second word, then the third word

1 个答案:

答案 0 :(得分:2)

这部分看起来很奇怪:

char c;   
char* word = &c;  //will read each word from line

这里word是一个指向单个字符的char指针(即c)。

但你这样做:

sscanf(strr, " %s%n", word, &offset)

将多个字符读入word指向的内存。换句话说,你写入超出c的内存,这是未定义的行为(UB)。

我希望word是一个字符数组(或指向一个字符数组),以避免超出边界访问(即UB)。