我正在Linux Ubuntu 16.04上使用C ++编写文件爬虫。
基本上它应该通过一个目录,将文件字节大小添加到链表并相应地创建新节点。因此,如果我有一个包含多个文件的单个文件夹,那么就可以了
我的问题是,如果我有一个文件夹中有另一个文件夹,我会得到一个分段错误,用GNU-debugger测试时读取如下:
编程接收信号SIGSEGV,分段故障。 __strcpy_sse2_unaligned() at ../sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S:714 714 ../sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S:没有这样的文件或目录。
根据stack overflow post 1,我需要以某种方式安装库,但安装正确的版本存在问题。
我不相信这是我的问题,因为故障发生在哪里,根据调试器,它在一个递归函数内部调用时应该会进入目录并获取数据。在我的主循环中调用相同的函数来遍历顶级目录,它在单层上工作正常,当我有嵌套目录时,我遇到了问题。附件是完整的源代码供参考它有点长,但如果使用相同的参数调用,bug应该很容易复制:
// Directory crawler
// Written by Kaz
/*
1) Start at a user provided directory
2) Descend the file tree while tracking each file
3) Groups each file by it's size based off user argument
4) Print a histogram of file sizes in a bin wide groupings
*/
#include<iostream>
#include <dirent.h>
#include<string.h>
#include <errno.h>
#include <stdio.h>
#include<string>
#include <stdint.h>
#include <sys/types.h>
#include <sys/stat.h>
#include<stdlib.h>
using namespace std;
int binCount = 0; // count of total bins
struct node{
node* next, *prev;
int count, name, min, max;
node(){
prev = NULL;
next = NULL;
count = 0;
name = binCount;
min = 0;
max = 0;
}
};
node *nextNode(node* previousNode){
node *nextLink = new node;
nextLink ->name = binCount;
nextLink->prev = previousNode;
nextLink->next = NULL;
nextLink->count = 1;
nextLink->min = previousNode->max + 1;
nextLink->max = ((previousNode->max)*2) + 1;
previousNode ->next = nextLink;
return nextLink;
}
void traverseNewDirectory(node * here, char *dirName){
DIR * nwd;
struct dirent *dip;
node * current;
current = here;
bool isadirectory,isHidden;
if((nwd = opendir(dirName))== NULL){
perror("Can't open derived directory");
return;
}
while ((dip = readdir(nwd)) != NULL){
isadirectory = false;
isHidden = false;
if((dip -> d_type) == DT_UNKNOWN ){
struct stat stbuf;
stat(dip->d_name, &stbuf);
isadirectory = S_ISDIR(stbuf.st_mode);
}
else if((dip -> d_type) == DT_DIR ){
if((strcmp(dip->d_name, ".") == 0) || (strcmp(dip->d_name, "..")) == 0){
isHidden = true;
isadirectory = true;
}
else{
isadirectory = true;
}
}
else{
if((dip-> d_reclen <= current->max)&&(dip->d_reclen >=current->min)){
current->count = current->count+1;
}
else if(dip->d_reclen < current->min){
node*temp = current->prev;
while(temp->prev != NULL){
if((dip-> d_reclen <= current->max)&&(dip->d_reclen >=current->min)){
current->count = current->count+1;
break;
}
else if(dip->d_reclen < current->min){
temp = current->prev;
}
}
}
else{
current -> next = nextNode(current);
current = current -> next;
binCount++;
}
}
if(isadirectory){
char *path;
strcpy(path,dirName);
strcat(path, "/");
strcat(path,dip->d_name);
strcat(path, "\0");
if(isHidden == true){
}
else{
traverseNewDirectory(current, path);
}
}
}
while ( ( closedir (nwd) == -1) && ( errno == EINTR) );
}
void printHistogram(node *head){
node*temp;
temp = head;
while(temp!=NULL){
cout << "[B " << temp->name << "] from " << temp->min << " to " << temp->max << " : ";
for(int i = 0; i < temp->count; i++){
cout << "x";
}
cout << endl;
temp = temp->next;
}
}
int main(int argc,char *argv[]){
// Ensures that a valid directory is provided by the cmd line argument
if (argc != 3){
if(argc == 1){
fprintf (stderr, " argc = %d no directory given \n", argc);
return 1;
}
else if(argc == 2){
fprintf (stderr, " argc = %d no size given \n", argc);
return 2;
}
else{
fprintf(stderr, "argc = %d invalid parameters \n", argc);
return 3;
}
}
DIR * cwd; // current working directory pointer
struct dirent *cwdP; // pointer to dirent struct
int binWidth; // variable for the width of the grouping in the histogram
binWidth = atoi(argv[2]);
node *first = new node;
binCount++;
first->max = binWidth - 1;
node * current;
current = first;
bool isadirectory,isHidden;
if((cwd = opendir(argv[1]))== NULL){
perror("Can't open main directory");
return 2;
}
while ((cwdP = readdir(cwd)) != NULL){
isadirectory = false;
isHidden = false;
if((cwdP -> d_type) == DT_UNKNOWN ){
struct stat stbuf;
stat(cwdP->d_name, &stbuf);
isadirectory = S_ISDIR(stbuf.st_mode);
}
else if((cwdP -> d_type) == DT_DIR ){
if((strcmp(cwdP->d_name, ".") == 0) || (strcmp(cwdP->d_name, "..")) == 0){
isHidden = true;
isadirectory = true;
}
else{
isadirectory = true;
}
}
else{
if((cwdP-> d_reclen <= current->max)&&(cwdP->d_reclen >=current->min)){
current->count = current->count+1;
}
else if(cwdP->d_reclen < current->min){
node*temp = current->prev;
while(temp->prev != NULL){
if((cwdP-> d_reclen <= current->max)&&(cwdP->d_reclen >=current->min)){
current->count = current->count+1;
break;
}
else if(cwdP->d_reclen < current->min){
temp = current->prev;
}
}
}
else{
current -> next = nextNode(current);
current = current -> next;
binCount++;
}
}
if(isadirectory){
char *fullPath;
strcpy(fullPath,argv[1]);
strcat(fullPath,"/");
strcat(fullPath,cwdP->d_name);
strcat(fullPath, "\0");
if(isHidden == true){
}
else{
traverseNewDirectory(current, fullPath);
}
}
}
while ( ( closedir (cwd) == -1) && ( errno == EINTR) );
printHistogram(first);
return 0;
}
答案 0 :(得分:1)
不,这是你的错;)
它在strcpy
中的段错误是你在构建路径时没有分配任何内存的直接赠品(或者你没有足够的内存用于字符串或者不太常见的是你在其他地方有堆腐败)。那么,看看你这两个地方:
char *path;
strcat(path, "/");
strcpy(path,dirName);
strcat(path, "/");
strcat(path,dip->d_name);
strcat(path, "\0");
你最好为此分配足够的内存。目前,您只是根据您从未初始化为path
的值将字符串复制到未知的内存位置。
真的,我不知道你为什么要用C ++编写类似C的代码。您正在使用C ++。所以只需使用std::string
,它会为您处理内存管理。即使这样做也可能:
std::string path = std::string( dirName ) + "/" + dip->d_name;
然后您可以通过访问C字符串来递归:
traverseNewDirectory(current, path.c_str());
您的代码可能还有其他问题。我没有花时间阅读它,但它在指针使用上看起来相当沉重,几乎没有评论,而且过于复杂。所有这些都是解决问题的因素。