Question

我正在尝试将一个带有"https://cdn.sstatic.net/stackexchange/img/logos/so/so-logo.png"等网址的大字符串复制到一个只有url和NULL终止符https://cdn.sstatic.net/stackexchange/img/logos/so/so-logo.png\0的较小数组中，但是我在数组后面得到垃圾。< / p>

问题应该在这里：

pos2 = 0;
while (found == 0)
{
    c = fgetc (fp); // get char from file
    link[pos2] = c;
    if (link[pos2-1] == 'g' && link[pos2] == '\"') // png and jpg files
    {
    found = 1;
    }
    ++pos2;
}
--pos2;
found = 0;
char link2[pos2];
for (j = 1; j < pos2; ++j)
{
    link2[j-1] = link[j];
}
link2[j] = '\0';
//sprintf(cmd, "wget -O /home/arturo/Dropbox/Digital_Renders/%d \'%s\'", ++num, link2);
//system(cmd);
printf("%s\n", link2);// checking
sleep(1);

以下是代码：

#include <stdio.h>
#include <stdlib.h> // for using system calls
#include <unistd.h> // for sleep

int main ()
{
    char  body[] = "forum-post-body-content", notes[] = "p-comment-notes", img[] = "img src=", link[200], cmd[200]={0}, file[10];
    int c, pos = 0, pos2 = 0, fin = 0, i, j, num = 0, found = 0;
    FILE *fp;

    for (i = 1; i < 500; ++i)
    {
        sprintf(cmd,"wget -O page%d.txt 'http://www.mtgsalvation.com/forums/creativity/artwork/340782-official-digital-rendering-thread?page=%d'",i,i);
        system(cmd);
        sprintf(file, "page%d.txt", i);
        fp = fopen (file, "r");
        while ((c = fgetc(fp)) != EOF)
        {
            if (body[pos] == c)
            {
                if (pos == 22)
                {
                    pos = 0;
                    while (fin == 0)
                    {
                        c = fgetc (fp);
                        if (feof (fp))
                            break;
                        if (notes[pos] == c)
                        {
                            if (pos == 14)
                            {
                                fin = 1;
                                pos = -1;
                            }
                            ++pos;
                        }
                        else
                        {
                            if(pos > 0)
                                pos = 0;
                        }
                        if (img[pos2] == c)
                        {
                            if (pos2 == 7)
                            {
                                pos2 = 0;
                                while (found == 0)
                                {
                                    c = fgetc (fp); // get char from file
                                    link[pos2] = c;
                                    if (pos2 > 0)
                                    {
                                        if(link[pos2-1] == 'g' && link[pos2] == '\"')
                                        {
                                        found = 1;
                                        }
                                    }
                                    ++pos2;
                                }
                                --pos2;
                                found = 0;
                                char link2[pos2];
                                for (j = 1; j < pos2; ++j)
                                {
                                    link2[j - 1] = link[j];
                                }
                                link2[j - 1] = '\0';
                                //sprintf(cmd, "wget -O /home/arturo/Dropbox/Digital_Renders/%d \'%s\'", ++num, link2);
                                //system(cmd);
                                printf("%s\n", link2);// checking
                                sleep(1);
                                pos2 = -1;
                            }
                            ++pos2;
                        }
                        else
                        {
                            if(pos2 > 0)
                                pos2 = 0;
                        }
                    }
                fin = 0;
                }
                ++pos;
            }
            else
                pos = 0;
        }
        // closing file
        fclose (fp);
        if (remove (file))
        {
            fprintf(stderr, "Can't remove file\n");
        }
    }
}

Answer 1

鉴于声明：

char cmd[50];

这条线很糟糕：

sprintf(cmd,"wget -O page%d.txt 'http://www.mtgsalvation.com/forums/creativity/artwork/340782-official-digital-rendering-thread?page=%d'",i,i);

您正尝试将超过120个字符打包到50字节缓冲区中;这将无法可靠地运作。

Answer 2

您的直接问题似乎是因为这一行：

link2[j] = '\0';

应该是：

link2[j-1] = '\0';

（与循环内的索引一致）。

此外，正如@mch在评论中指出的那样，在while循环的第一次迭代中访问link[pos2-1]将访问一个超出数组边界的项目。

快速扫描剩下的代码，似乎还有其他几个问题，所以你可能需要仔细查看整个代码。

Answer 3

你还有另一种读出界限：

pos2 = 0;           /* pos2 = 0 */
while (found == 0)
{
    c = fgetc (fp);
    link[pos2] = c;
    if (link[pos2-1] == 'g' && link[pos2] == '\"')  /* reading pos2 - 1  (bad) */

特别注意所有数组索引。您很少想要转移索引（例如i-1等），如果这样做，您必须验证您的索引范围不会尝试读取越界

在C中复制数组并删除char数组中的垃圾

3 个答案: