使用C将文件名复制到数组中

时间:2013-10-12 20:22:44

标签: c

我试图找到某种类型的目录中的所有文件(这里硬编码为tif)并将它们复制到一个数组中。一切都干净利落地编译(gcc -Wall没有错误或警告),但有一些内存问题。虽然我写的程序似乎运行得很干净(没有段错误),但是当你在字符串中得到除ascii值之外的东西时,你会得到一些奇怪的字符。这导致我运行valgrind,它显示错误(输出如下),但我无法追查实际问题是什么。在某些目录中,valgrind it self segfaults(程序在同一个dir中运行干净)。

#include <sys/types.h>
#include <dirent.h>
#include <stdio.h>
#include <search.h>
#include <string.h>
#include <error.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdbool.h>



#define min(X, Y)  ((X) < (Y) ? (X) : (Y))

int exitStatus = 0;

/*------------------------------------------------------------------------------
* array_find
*
* ARGS - Takes a pointer to a string, a pointer to an array of strings, and an
* int representing the length of the array.
*
* RETURN - returns an int indicating the first index of the key in the array,
* or -1 if the key was not found
*-----------------------------------------------------------------------------*/

int array_find(char *key, char *argv[], int argc){
    int i;
    for (i = 0; i < argc; i++)
    {
        #ifdef DEBUG_array_find
        printf("strncmp(%s, %s, %d) = %d\n", key, argv[i], min(strlen(key), strlen(argv[i])), strncmp(key, argv[i], min(strlen(key), strlen(argv[i]))));
        #endif
        if (strncmp(key, argv[i], min(strlen(key), strlen(argv[i]))) == 0)
        {
            return i;
        }
    }
    return -1;
}


/*------------------------------------------------------------------------------
* ends_with
*
* ARGS - str = string to be checked
*        sub = string to look for
*
* RETURN - Returns true if str ends with sub or both strings are NULL.
           False otherwise.
*-----------------------------------------------------------------------------*/

bool ends_with(char *str, char *sub){
    if (str == NULL && sub == NULL)
    {
        return true;
    }
    if (str == NULL || sub == NULL)
    {
        return false;
    }
    char *last_instance_of_sub = rindex(str, *sub); //Finds the last index of the first char of sub
    int sub_len = strlen(sub);
    if (last_instance_of_sub == NULL || strlen(last_instance_of_sub) != sub_len)
    {
        return false;
    }
    return strncmp(last_instance_of_sub, sub, sub_len) == 0;
}

int main(int argc, char *argv[])
{
    /*Parse args*/
    DIR *dir;
    int index = array_find("-d", argv, argc);
    char *dirname;
    if (index >= 0)
    {
        dirname = argv[index + 1];
        dir = opendir(dirname);
    }
    else
    {
        dirname = getcwd(NULL, 0);
        if (dirname == NULL)
        {
            perror("Error getting current directory name.");
            exit(1);
        }
        dir = opendir(dirname);
    }
    if (dir == NULL)
    {
        perror(dirname);
        exit(1);
    }

    #ifdef DEBUG_MAIN
        printf("dirname = %s\n", dirname);
    #endif

    int threads = 1;
    index = array_find("-t", argv, argc);
    if (index >= 0)
    {
        threads = atoi(argv[index + 1]);
    }
    #ifdef DEBUG_MAIN
        printf("threads = %d\n", threads);
    #endif

    struct dirent *entry = readdir(dir);
    int num_files = 0;
    while (entry != NULL)
    {
        if (ends_with(entry->d_name, ".tif")){
            #ifdef DEBUG_MAIN
                printf("%s\n", entry->d_name);
            #endif
            num_files++;
        }
        entry = readdir(dir);
    }

    if (closedir(dir) != 0)
    {
        perror("Failed to close directory.");
    }

    #ifdef DEBUG_MAIN
        printf("Num files = %d\n", num_files);
    #endif

    dir = opendir(dirname);
    if (dir == NULL)
    {
        perror(dirname);
        exit(1);
    }

    entry = readdir(dir);

    char *file_names[num_files];
    int i = 0;
    for(; entry != NULL; i++)
    {
        if (ends_with(entry->d_name, ".tif")){
            file_names[i] = strdup(entry->d_name);
            if (file_names[i] == NULL)
            {
                perror("Could not create the filename array.\n");
                exit(1);
            }
        }
        entry = readdir(dir);
    }

/*    #ifdef DEBUG_MAIN*/
        for (i = 0; i < num_files; i++)
        {
            printf("%s\n", file_names[i]);
/*            free(file_names[i]);*/
        }
/*    #endif*/



    free(dir);
    return exitStatus;
}

Valgrind输出:

    ==24488== Memcheck, a memory error detector
==24488== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==24488== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==24488== Command: ./myprogram -d /home/chris/Pictures/Catalinas\ with\ Christie/Processed/
==24488== 
dirname = /home/chris/Pictures/Catalinas with Christie/Processed/
threads = 1
cacti2_lzn.tif
DSC_2139_lzn.tif
DSC_1512_lzn.tif
DSC_1296_lzn.tif
DSC_1577_lzn.tif
DSC_1658_lzn.tif
DSC_1293_lzn.tif
DSC_1631_lzn.tif
DSC_1418_lzn.tif
DSC_1315_2crop_lzn.tif
DSC_1377_lzn2crop.tif
DSC_2167_lzn.tif
1981-1985-HDR3_lzn2.tif
DSC_2129_lzn.tif
DSC_1448_lzn.tif
DSC_1607_lzn.tif
DSC_1564_lzn.tif
DSC_2052-DSC_2072_lzn.tif
DSC_1487_lzn.tif
DSC_1591_2_lzn.tif
DSC_2124_lzn.tif
DSC_1622_lzn.tif
DSC_2157_lzn.tif
DSC_1685_lzn.tif
Num files = 24
cacti2_lzn.tif
DSC_2139_lzn.tif
DSC_1512_lzn.tif
DSC_1296_lzn.tif
DSC_1577_lzn.tif
DSC_1658_lzn.tif
==24488== Use of uninitialised value of size 8
==24488==    at 0x4C2D7C2: __GI_strlen (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==24488==    by 0x4EA4ECB: puts (ioputs.c:36)
==24488==    by 0x400D52: main (batch-convert.c:161)
==24488== 
==24488== Invalid read of size 1
==24488==    at 0x4C2D7C2: __GI_strlen (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==24488==    by 0x4EA4ECB: puts (ioputs.c:36)
==24488==    by 0x400D52: main (batch-convert.c:161)
==24488==  Address 0x0 is not stack'd, malloc'd or (recently) free'd
==24488== 
==24488== 
==24488== Process terminating with default action of signal 11 (SIGSEGV)
==24488==  Access not within mapped region at address 0x0
==24488==    at 0x4C2D7C2: __GI_strlen (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==24488==    by 0x4EA4ECB: puts (ioputs.c:36)
==24488==    by 0x400D52: main (batch-convert.c:161)
==24488==  If you believe this happened as a result of a stack
==24488==  overflow in your program's main thread (unlikely but
==24488==  possible), you can try to increase the size of the
==24488==  main thread stack using the --main-stacksize= flag.
==24488==  The main thread stack size used in this run was 8388608.
==24488== 
==24488== HEAP SUMMARY:
==24488==     in use at exit: 33,243 bytes in 25 blocks
==24488==   total heap usage: 26 allocs, 1 frees, 66,051 bytes allocated
==24488== 
==24488== LEAK SUMMARY:
==24488==    definitely lost: 0 bytes in 0 blocks
==24488==    indirectly lost: 0 bytes in 0 blocks
==24488==      possibly lost: 0 bytes in 0 blocks
==24488==    still reachable: 33,243 bytes in 25 blocks
==24488==         suppressed: 0 bytes in 0 blocks
==24488== Rerun with --leak-check=full to see details of leaked memory
==24488== 
==24488== For counts of detected and suppressed errors, rerun with: -v
==24488== Use --track-origins=yes to see where uninitialised values come from
==24488== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 2 from 2)
Segmentation fault (core dumped)

我已经有一段时间了,因为我完全使用了C语言,但据我所知(来自手册页),strdup应该使用malloc在堆上为字符串副本分配内存。在我记住strdup函数之前,我曾尝试手动执行此操作,并且遇到了相同的错误。我想也许我的代码有缺陷,并认为strdup函数会处理它,但显然还有其他一些问题。

谁能告诉我我做错了什么?

编辑1: 根据要求,我已经添加了该程序的完整源代码。另外,对于那些说我对照num_files进行检查的人,正如你所看到的那样,我提前计算了tif文件的数量,所以我知道将被复制到数组中的确切文件数,从而检查索引没有必要。

另外,作为注释,程序是在定义了DEBUG_MAIN的情况下编译的,因此#ifdef DEBUG_MAIN块中的任何内容都会运行。没有定义其他调试标志。

3 个答案:

答案 0 :(得分:1)

在你的代码中这部分for(; entry != NULL; i++)太危险了,例如假设num_files的值是1000,如果给定目录包含1002个条目,那么你会遇到问题。 将其替换为for(; entry != NULL && i < num_files ; i++)

答案 1 :(得分:0)

应检查数组的索引:

i<num_files

答案 2 :(得分:0)

问题是,如果您有任何与您的模式不匹配的条目(例如...条目),则跳过数组中的相应条目。这也意味着你要在file_names数组之外写作。只有在文件名匹配时才应增加i

使用getcwd()而非仅使用.作为当前目录,但几乎没有必要。

使用free(dir)代替closedir(dir)是一项彻底的灾难。

命令行参数处理很不寻常。如最初编写的那样,它会接受-delete等同于-d。这不是好风格。

#include <assert.h>
#include <dirent.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdbool.h>

bool ends_with(char *str, char *sub);
int array_find(char *key, char *argv[], int argc);

int array_find(char *key, char *argv[], int argc)
{
    for (int i = 0; i < argc; i++)
    {
        if (strcmp(key, argv[i]) == 0)
            return i;
    }
    return -1;
}

bool ends_with(char *str, char *sub)
{
    if (str == NULL && sub == NULL)
        return true;
    if (str == NULL || sub == NULL)
        return false;
    char *last_instance_of_sub = rindex(str, *sub);
    size_t sub_len = strlen(sub);
    if (last_instance_of_sub == NULL || strlen(last_instance_of_sub) != sub_len)
        return false;
    return strcmp(last_instance_of_sub, sub) == 0;
}

int main(int argc, char *argv[])
{
    int index = array_find("-d", argv, argc);
    char *dirname;
    if (index >= 0)
    {
        dirname = argv[index + 1];
    }
    else
    {
        dirname = getcwd(NULL, 0);
        if (dirname == NULL)
        {
            perror("Error getting current directory name.");
            exit(1);
        }
    }
    DIR *dir = opendir(dirname);
    if (dir == NULL)
    {
        perror(dirname);
        exit(1);
    }
    char suffix[] = ".c";

    printf("dirname = %s\n", dirname);

    struct dirent *entry;
    int num_files = 0;
    while ((entry = readdir(dir)) != NULL)
    {
        if (ends_with(entry->d_name, suffix))
            num_files++;
    }

    if (closedir(dir) != 0)
    {
        perror("Failed to close directory.");
    }

    printf("Num files = %d\n", num_files);

    dir = opendir(dirname);
    if (dir == NULL)
    {
        perror(dirname);
        exit(1);
    }

    char *file_names[num_files];
    int i = 0;
    while ((entry = readdir(dir)) != NULL)
    {
        if (ends_with(entry->d_name, suffix))
        {
            file_names[i] = strdup(entry->d_name);
            if (file_names[i++] == NULL)
            {
                perror("Could not create the filename array.\n");
                exit(1);
            }
        }
    }
    assert(i <= num_files);
    if (i < num_files)
        num_files = i;

    for (i = 0; i < num_files; i++)
    {
        printf("%s\n", file_names[i]);
        free(file_names[i]);
    }

    closedir(dir);
    return 0;
}