嗨我有两个版本,我写过一个,一个使用地图,一个使用两个向量:
class NucleotideSequence{
private:
std::string Name;
std::vector<int> BasePos;
std::vector<char> BaseChar;
public:
NucleotideSequence(std::string name, std::vector<int> &bp, std::vector<char> &bases);
std::string getName();
char getBase(int pos); // get a base by it's position in the char array.
char getAbBase(int abPos); // get a base by it's actual bp position.
};
class NucleotideSequence2{
private:
std::string Name;
std::map<int, char> Sequence;
public:
NucleotideSequence2(std::string &name, std::map<int, char> &seq) throw(FormatError);
std::string getName();
};
然后我为它们定义了构造函数:
NucleotideSequence::NucleotideSequence(std::string name, std::vector<int> &bp, std::vector<char> &bases)
:Name(name), BasePos(bp), BaseChar(bases)
{
for (std::vector<char>::iterator i = BaseChar.begin(); i != BaseChar.end(); i++) {
switch (*i) {
case 'A': case 'T': case 'C': case 'G': case '-': case 'N':
break;
case 'a':
*i = 'A';
break;
case 't':
*i = 'T';
break;
case 'c':
*i = 'C';
break;
case 'g':
*i = 'G';
break;
case 'n':
*i = 'N';
break;
default:
throw FormatError();
break;
}
}
}
NucleotideSequence2::NucleotideSequence2(std::string &name, std::map<int, char> &seq) throw(FormatError)
: Name(name), Sequence(seq)
{
for (std::map<int, char>::iterator i = Sequence.begin(); i != Sequence.end(); i++) {
switch (i->second) {
case 'A': case 'T': case 'C': case 'G': case '-': case 'N':
break;
case 'a':
i->second = 'A';
break;
case 't':
i->second = 'T';
break;
case 'c':
i->second = 'C';
break;
case 'g':
i->second = 'G';
break;
case 'n':
i->second = 'N';
break;
default:
throw FormatError();
break;
}
}
}
这两个构造函数在两个不同的函数中调用:
NucleotideSequence Sequence_stream::get()
{
if (FileStream.is_open() == false)
throw StreamClosed(); // Make sure the stream is indeed open else throw an exception.
if (FileStream.eof())
throw FileEnd();
char currentchar;
int basepos = 0;
std::string name;
std::vector<char> sequence;
std::vector<int> postn;
currentchar = FileStream.get();
if (FileStream.eof())
throw FileEnd();
if (currentchar != '>')
throw FormatError();
currentchar = FileStream.get();
while(currentchar != '\n' && false == FileStream.eof())
{
name.append(1, currentchar);
currentchar = FileStream.get();
} // done getting names, now let's get the sequence.
currentchar = FileStream.get();
while(currentchar != '>' && false == FileStream.eof())
{
if(currentchar != '\n' && currentchar != ' '){
basepos++;
sequence.push_back(currentchar);
postn.push_back(basepos);
}
currentchar = FileStream.get();
}
if(currentchar == '>')
{
FileStream.unget();
}
return NucleotideSequence(name, postn, sequence);
}
NucleotideSequence2 Sequence_stream::get2()
{
if (FileStream.is_open() == false)
throw StreamClosed(); // Make sure the stream is indeed open else throw an exception.
if (FileStream.eof())
throw FileEnd();
char currentchar;
int basepos = 0;
std::string name;
std::map<int, char> sequence;
currentchar = FileStream.get();
if (FileStream.eof())
throw FileEnd();
if (currentchar != '>')
throw FormatError();
currentchar = FileStream.get();
while(currentchar != '\n' && false == FileStream.eof())
{
name.append(1, currentchar);
currentchar = FileStream.get();
} // done getting names, now let's get the sequence.
currentchar = FileStream.get();
while(currentchar != '>' && false == FileStream.eof())
{
if(currentchar != '\n' && currentchar != ' '){
basepos++;
sequence[basepos] = currentchar;
}
currentchar = FileStream.get();
}
if(currentchar == '>')
{
FileStream.unget();
}
return NucleotideSequence2(name, sequence);
}
然后可以从另一个函数调用这两个函数(捕获异常:如果你想知道未被捕获的抛出)。
两个类之间的区别是一个包含两个向量,而在另一个类中,相同的信息包含在地图中。
我的问题是:第一个类和构建它的'get'非常快速地工作 - 几乎是即时的。 而构建第二类(带有地图的那个)的'get2'明显变慢 - 仅超过5秒。
为什么用映射构造类比构造带有两个向量的类慢 - 你应该看到我保留了构造函数和两个get函数几乎完全相同,除了向向量添加元素,或者添加键值对映射。因此,我怀疑反复推回向量比重复添加键值对(mymap['newkey'] = 'newvalue';
更快,更有效率。
如何加快地图版本?
谢谢, 本。
答案 0 :(得分:5)
向量执行单个分配(如果您事先告诉它所需的容量),或者最多只执行小数分配。映射为每个元素执行单独的动态分配。
您可能希望尝试使用对的排序向量,或者可能是“平面地图”(在Boost中)或btree-map(Google代码中有一个)并比较性能。内存局部性可以产生显着的差异,如果你不需要std::map
的强迭代器有效性保证,你可能会发现一个性能更好的数据结构。
答案 1 :(得分:1)
如何加快地图版本的速度?
尝试使用unordered_map而不是常规地图。