我正在开发一个使用一些外部C库的C ++程序。据我所知,虽然这不是问题的原因,但问题在于我的C ++代码。程序运行正常,没有错误或我的测试数据集上的任何内容,但经过几乎整个完整的数据集,我得到一个段错误。运行GDB给了我这个段错误:
(gdb) run -speciesMain=allMis1 -speciesOther=anoCar2 -speciesMain=allMis1 -speciesOther=anoCar2 /hive/data/genomes/allMis1/bed/lastz.anoCar2/mafRBestNet/*.maf.gz Starting program: /cluster/home/jstjohn/bin/mafPairwiseSyntenyDecay -speciesMain=allMis1 -speciesOther=anoCar2 -speciesMain=allMis1 -speciesOther=anoCar2 /hive/data/genome s/allMis1/bed/lastz.anoCar2/mafRBestNet/*.maf.gz Detaching after fork from child process 3718. Program received signal SIGSEGV, Segmentation fault. 0x0000003009cb7672 in __gnu_cxx::__exchange_and_add(int volatile*, int) () from /usr/lib64/libstdc++.so.6 (gdb) up #1 0x0000003009c9db59 in std::basic_string, std::allocator >::~basic_string() () from /usr/lib64/libstdc++.so.6 (gdb) up #2 0x00000000004051e7 in PairAlnInfo::~PairAlnInfo (this=0x7fffffffcd70, __in_chrg=) at mafPairwiseSyntenyDecay.cpp:37 (gdb) up #3 0x0000000000404eb0 in main (argc=2, argv=0x7fffffffcf78) at mafPairwiseSyntenyDecay.cpp:260
看起来有些东西正在继续我的PairAlnInfo类的双倍。奇怪的是我没有定义析构函数,我没有用new
分配任何东西。我已经在linux机器上使用g ++ 44和g ++ 4.1.2尝试了这一点并且得到了相同的结果。
为了让事情变得更奇怪,在我的linux盒子上(有更多可用的RAM和所有东西,不是RAM是这个程序的问题,但它是一个强大的系统)在程序到达之前如上所述发生了seg故障循环打印输出。在使用g ++或clang ++的小得多的macbook air上,程序仍然是段错误的,但是直到在结果打印之后,才会在最后的return(0)
之前打印出来。主功能。以下是在使用Mac的默认g ++ 4.2编译后,在同一文件上运行的Mac上GDB跟踪的样子:
(more results)... 98000 27527 162181 0.83027 99000 27457 161467 0.829953 100000 27411 160794 0.829527 Program received signal EXC_BAD_ACCESS, Could not access memory. Reason: KERN_INVALID_ADDRESS at address: 0x00004a2c00106077 0x00007fff9365a6e5 in std::string::_Rep::_M_dispose () (gdb) up #1 0x00007fff9365a740 in std::basic_string, std::allocator >::~basic_string () (gdb) up #2 0x0000000100003938 in main (argc=1261, argv=0x851d5fbff533) at mafPairwiseSyntenyDecay.cpp:301 (gdb)
万一你没有注意到我发布的时间,现在是凌晨2:30左右...我已经在这个问题上乱砍了大约10个小时了。非常感谢您花时间看看这个并帮助我!复制我的情况的代码和一些说明如下。
如果您对下载和安装依赖项的整个内容感兴趣,请在基本目录中下载我的KentLib存储库make
,然后转到examples/mafPairwiseSyntenyDecay
并在那里运行make
。导致我正在讨论的bug的一个例子(相当大)是这里提供的gziped文件:100Mb file that the program crashes on。然后使用这些参数-speciesMain=allMis1 -speciesOther=anoCar2 anoCar2.allMis1.rbest.maf.gz
执行程序。
/**
* mafPairwiseSyntenyDecay
* Author: John St. John
* Date: 4/26/2012
*
* calculates the mean synteny decay in different range bins
*
*
*/
//Kent source C imports
extern "C" {
#include "common.h"
#include "options.h"
#include "maf.h"
}
#include <map>
#include <string>
#include <set>
#include <vector>
#include <sstream>
#include <iostream>
//#define NDEBUG
#include <assert.h>
using namespace std;
/*
Global variables
*/
class PairAlnInfo {
public:
string oname;
int sstart;
int send;
int ostart;
int oend;
char strand;
PairAlnInfo(string _oname,
int _sstart, int _send,
int _ostart, int _oend,
char _strand):
oname(_oname),
sstart(_sstart),
send(_send),
ostart(_ostart),
oend(_oend),
strand(_strand){}
PairAlnInfo():
oname("DUMMY"),
sstart(-1),
send(-1),
ostart(-1),
oend(-1),
strand(-1){}
};
vector<string> &split(const string &s, char delim, vector<string> &elems) {
stringstream ss(s);
string item;
while(getline(ss, item, delim)) {
elems.push_back(item);
}
return(elems);
}
vector<string> split(const string &s, char delim) {
vector<string> elems;
return(split(s, delim, elems));
}
#define DEF_MIN_LEN (200)
#define DEF_MIN_SCORE (200)
typedef map<int,PairAlnInfo> PairAlnInfoByPos;
typedef map<string, PairAlnInfoByPos > ChromToPairAlnInfoByPos;
ChromToPairAlnInfoByPos pairAlnInfoByPosByChrom;
void usage()
/* Explain usage and exit. */
{
errAbort(
(char*)"mafPairwiseSyntenyDecay -- Calculates pairwise syntenic decay from maf alignment containing at least the two specified species.\n"
"usage:\n"
"\tmafPairwiseSyntenyDecay [options] [*required options] file1.maf[.gz] ... \n"
"Options:\n"
"\t-help\tPrints this message.\n"
"\t-minScore=NUM\tMinimum MAF alignment score to consider (default 200)\n"
"\t-minAlnLen=NUM\tMinimum MAF alignment block length to consider (default 200)\n"
"\t-speciesMain=NAME\t*Name of the main species (exactly as it appears before the '.') in the maf file (REQUIRED)\n"
"\t-speciesOther=NAME\t*Name of the other species (exactly as it appears before the '.') in the maf file (REQUIRED)\n"
);
}//end usage()
static struct optionSpec options[] = {
/* Structure holding command line options */
{(char*)"help",OPTION_STRING},
{(char*)"minScore",OPTION_INT},
{(char*)"minAlnLen",OPTION_INT},
{(char*)"speciesMain",OPTION_STRING},
{(char*)"speciesOther",OPTION_STRING},
{NULL, 0}
}; //end options()
/**
* Main function, takes filenames for paired qseq reads
* and outputs three files.
*/
int iterateOverAlignmentBlocksAndStorePairInfo(char *fileName, const int minScore, const int minAlnLen, const string speciesMain, const string speciesOther){
struct mafFile * mFile = mafOpen(fileName);
struct mafAli * mAli;
//loop over alignment blocks
while((mAli = mafNext(mFile)) != NULL){
struct mafComp *first = mAli->components;
int seqlen = mAli->textSize;
//First find and store set of duplicates in this block
set<string> seen;
set<string> dups;
if(mAli->score < minScore || seqlen < minAlnLen){
//free here and pre-maturely end
mafAliFree(&mAli);
continue;
}
for(struct mafComp *item = first; item != NULL; item = item->next){
string tmp(item->src);
string tname = split(tmp,'.')[0];
if(seen.count(tname)){
//seen this item
dups.insert(tname);
}else{
seen.insert(tname);
}
}
for(struct mafComp *item1 = first; item1->next != NULL; item1 = item1->next){
//stop one before the end
string tmp1(item1->src);
vector<string> nameSplit1(split(tmp1,'.'));
string name1(nameSplit1[0]);
if(dups.count(name1) || (name1 != speciesMain && name1 != speciesOther)){
continue;
}
for(struct mafComp *item2 = item1->next; item2 != NULL; item2 = item2->next){
string tmp2(item2->src);
vector<string> nameSplit2(split(tmp2,'.'));
string name2 = nameSplit2[0];
if(dups.count(name2) || (name2 != speciesMain && name2 != speciesOther)){
continue;
}
string chr1(nameSplit1[1]);
string chr2(nameSplit2[1]);
char strand;
if(item1->strand == item2->strand)
strand = '+';
else
strand = '-';
int start1,end1,start2,end2;
if(item1->strand == '+'){
start1 = item1->start;
end1 = start1 + item1->size;
}else{
end1 = item1->start;
start1 = end1 - item1->size;
}
if(item2->strand == '+'){
start2 = item2->start;
end2 = start2+ item2->size;
}else{
end2 = item2->start;
start2 = end2 - item2->size;
}
if(name1 == speciesMain){
PairAlnInfo aln(chr2,start1,end1,start2,end2,strand);
pairAlnInfoByPosByChrom[chr1][start1] = aln;
}else{
PairAlnInfo aln(chr1,start2,end2,start1,end1,strand);
pairAlnInfoByPosByChrom[chr2][start2] = aln;
}
} //end loop over item2
} //end loop over item1
mafAliFree(&mAli);
}//end loop over alignment blocks
mafFileFree(&mFile);
return(0);
}
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
if(optionExists((char*)"help") || argc <= 1){
usage();
}
int minAlnScore = optionInt((char*)"minScore",DEF_MIN_SCORE);
int minAlnLen = optionInt((char*)"minAlnLen",DEF_MIN_LEN);
string speciesMain(optionVal((char*)"speciesMain",NULL));
string speciesOther(optionVal((char*)"speciesOther",NULL));
if(speciesMain.empty() || speciesOther.empty())
usage();
//load the relevant alignment info from the maf(s)
for(int i = 1; i<argc; i++){
iterateOverAlignmentBlocksAndStorePairInfo(argv[i], minAlnScore, minAlnLen, speciesMain, speciesOther);
}
const int blockSize = 1000;
const int blockCount = 100;
int totalWindows[blockCount] = {0};
int containBreak[blockCount] = {0};
//we want the fraction of windows of each size that contain a break
//
for(ChromToPairAlnInfoByPos::iterator mainChromItter = pairAlnInfoByPosByChrom.begin();
mainChromItter != pairAlnInfoByPosByChrom.end();
mainChromItter++){
//process the alignments shared by this chromosome
//note that map stores them sorted by begin position
vector<int> keys;
for(PairAlnInfoByPos::iterator posIter = mainChromItter->second.begin();
posIter != mainChromItter->second.end();
posIter++){
keys.push_back(posIter->first);
}
for(int i = 0; i < keys.size(); i++){
//first check for trivial window (ie our block)
PairAlnInfo pi1 = mainChromItter->second[keys[i]];
assert(pi1.send > pi1.sstart);
assert(pi1.sstart == keys[i]);
int numBucketsThisWindow = (pi1.send - pi1.sstart) / blockSize;
for(int k = 0; k < numBucketsThisWindow && k < blockCount; k++)
totalWindows[k]++;
for(int j = i+1; j < keys.size(); j++){
PairAlnInfo pi2 = mainChromItter->second[keys[j]];
assert(pi2.sstart == keys[j]);
assert(pi2.send > pi2.sstart);
assert(pi2.sstart > pi1.sstart);
if(pi2.oname == pi1.oname){
int moreToInc = (pi2.send - pi1.sstart) / blockSize;
for(int k = numBucketsThisWindow; k < moreToInc && k < blockCount; k++)
totalWindows[k]++;
numBucketsThisWindow = moreToInc; //so we don't double count
}else{
int numDiscontigBuckets = (pi2.send - pi1.sstart) / blockSize;
for(int k = numBucketsThisWindow; k < numDiscontigBuckets && k < blockSize; k++){
containBreak[k]++;
totalWindows[k]++;
}
numBucketsThisWindow = numDiscontigBuckets;
}
if((keys[j] - keys[i]) >= (blockSize * blockCount)){
//i = j;
break;
}
}
}
}
cout << "#WindowSize\tNumContainBreak\tNumTotal\t1-(NumContainBreak/NumTotal)" << endl;
for(int i = 0; i < blockCount; i++){
cout << (i+1)*blockSize << '\t';
cout << containBreak[i] << '\t';
cout << totalWindows[i] << '\t';
cout << (totalWindows[i] > 0? 1.0 - (double(containBreak[i])/double(totalWindows[i])): 0) << endl;
}
return(0);
} //end main()
答案 0 :(得分:4)
尝试在valgrind下运行程序。这将为您提供可能或实际丢失的内存,未初始化等的报告
答案 1 :(得分:2)
您的问题可能是由于在您看到的实际错误之前的某个时间点程序中发生的内存损坏。
您发布的代码中的一个潜在问题是循环:
for(int k = numBucketsThisWindow; k<numDiscontigBuckets && k < blockSize; k++){
使用blockSize
代替正确的blockCount
,这会导致totalWindows[]
和containBreak[]
数组可能溢出。这将覆盖speciesMain
和speciesOther
字符串,与堆栈中的任何其他字符串一起覆盖,这可能会导致您看到的错误。