我在C#中经常使用HashSet和Dictionary,发现它们非常快......
我尝试过使用std :: map和std :: hash_map,并且在比较中发现它们非常慢。这听起来像预期的行为吗?在使用std :: hash_map时,我可能做错了吗?
或者,那里有更好的C ++ Hash容器吗?
我正在散列int32s,通常大约有100,000个。
更新:我在C#和C ++中创建了一个repro。它进行了两次试验,它们在C#中占用19ms和13ms,在C ++中占用大约11,000ms。我的C ++代码肯定有问题:)
(两者都作为发布版本运行,两者都是控制台应用程序)
C#输出:
Found 511 values in the intersection, in 19 ms
Found 508 values in the intersection, in 13 ms
C ++输出:
Found 308 values in the intersection, in 11764.7ms
Found 316 values in the intersection, in 11742.8ms
C ++输出(使用stdext :: hash_map而不是std :: map)
Found 300 values in the intersection, in 383.552ms
Found 306 values in the intersection, in 2277.02ms
C ++输出(使用stdext :: hash_map,发布x64版本)
Found 292 values in the intersection, in 1037.67ms
Found 302 values in the intersection, in 3663.71ms
注意:
C#:
static void Main(string[] args)
{
int start = DateTime.Now.Millisecond;
int intersectionSize = runIntersectionTest();
int duration = DateTime.Now.Millisecond - start;
Console.WriteLine(String.Format("Found {0} values in the intersection, in {1} ms", intersectionSize, duration));
start = DateTime.Now.Millisecond;
intersectionSize = runIntersectionTest();
duration = DateTime.Now.Millisecond - start;
Console.WriteLine(String.Format("Found {0} values in the intersection, in {1} ms", intersectionSize, duration));
Console.ReadKey();
}
static int runIntersectionTest()
{
Random random = new Random(DateTime.Now.Millisecond);
Dictionary<int,int> theMap = new Dictionary<int,int>();
List<int> set1 = new List<int>();
List<int> set2 = new List<int>();
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set1.Add(value);
}
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int value = 1000000000 + (random.Next() % 200000 + 1);
set2.Add(value);
}
// Now intersect the two sets by populating the map
foreach( int value in set1 )
{
theMap[value] = 1;
}
int intersectionSize = 0;
foreach ( int value in set2 )
{
int count;
if ( theMap.TryGetValue(value, out count ) )
{
intersectionSize++;
theMap[value] = 2;
}
}
return intersectionSize;
}
C ++:
int runIntersectionTest()
{
std::map<int,int> theMap;
vector<int> set1;
vector<int> set2;
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set1.push_back(value);
}
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int random = rand() % 200000 + 1;
random *= 10;
int value = 1000000000 + random;
set2.push_back(value);
}
// Now intersect the two sets by populating the map
for ( vector<int>::iterator iterator = set1.begin(); iterator != set1.end(); iterator++ )
{
int value = *iterator;
theMap[value] = 1;
}
int intersectionSize = 0;
for ( vector<int>::iterator iterator = set2.begin(); iterator != set2.end(); iterator++ )
{
int value = *iterator;
map<int,int>::iterator foundValue = theMap.find(value);
if ( foundValue != theMap.end() )
{
theMap[value] = 2;
intersectionSize++;
}
}
return intersectionSize;
}
int _tmain(int argc, _TCHAR* argv[])
{
srand ( time(NULL) );
Timer timer;
int intersectionSize = runIntersectionTest();
timer.Stop();
cout << "Found " << intersectionSize << " values in the intersection, in " << timer.GetMilliseconds() << "ms" << endl;
timer.Reset();
intersectionSize = runIntersectionTest();
timer.Stop();
cout << "Found " << intersectionSize << " values in the intersection, in " << timer.GetMilliseconds() << "ms" << endl;
getchar();
return 0;
}
答案 0 :(得分:5)
Hash_map和hash_set是非标准的,unordered_map和unordered_set很可能很快成为标准版本。没有复制器,我认为这不会有太大的影响。在引擎盖下,它们是相同的数据结构,因此它们应具有相似的性能。
我在MS Visual Studio 2008 v9.0.30729.1下编译了提供的示例,如Visual C ++ - &gt; Win32 - &gt;控制台应用程序(虽然我推出了自己的Timer类,因为我不确定你在使用什么)。在调试下,我有1000毫秒的时间,但在发布时的编译是50毫秒。
#include <vector>
#include <iostream>
#include <map>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <windows.h>
typedef struct {
LARGE_INTEGER start;
LARGE_INTEGER stop;
} stopWatch;
class CStopWatch {
private:
stopWatch timer;
LARGE_INTEGER frequency;
double LIToSecs( LARGE_INTEGER & L);
public:
CStopWatch();
void startTimer( );
void stopTimer( );
double getElapsedTime();
};
double CStopWatch::LIToSecs( LARGE_INTEGER & L) {
return ((double)L.QuadPart /(double)frequency.QuadPart) ;
}
CStopWatch::CStopWatch(){
timer.start.QuadPart=0;
timer.stop.QuadPart=0;
QueryPerformanceFrequency( &frequency ) ;
}
void CStopWatch::startTimer( ) {
QueryPerformanceCounter(&timer.start) ;
}
void CStopWatch::stopTimer( ) {
QueryPerformanceCounter(&timer.stop) ;
}
double CStopWatch::getElapsedTime() {
LARGE_INTEGER time;
time.QuadPart = timer.stop.QuadPart - timer.start.QuadPart;
return LIToSecs( time) ;
}
using namespace std;
int runIntersectionTest()
{
std::map<int,int> theMap;
vector<int> set1;
vector<int> set2;
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set1.push_back(value);
}
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int random = rand() % 200000 + 1;
random *= 10;
int value = 1000000000 + random;
set2.push_back(value);
}
// Now intersect the two sets by populating the map
for ( vector<int>::iterator iterator = set1.begin(); iterator != set1.end(); iterator++ )
{
int value = *iterator;
theMap[value] = 1;
}
int intersectionSize = 0;
for ( vector<int>::iterator iterator = set2.begin(); iterator != set2.end(); iterator++ )
{
int value = *iterator;
map<int,int>::iterator foundValue = theMap.find(value);
if ( foundValue != theMap.end() )
{
theMap[value] = 2;
intersectionSize++;
}
}
return intersectionSize;
}
int main(int argc, char* argv[])
{
srand ( time(NULL) );
int tests = 2;
while(tests--){
CStopWatch timer;
timer.startTimer();
int intersectionSize = runIntersectionTest();
timer.stopTimer();
cout << "Found " << intersectionSize << " values in the intersection, in " << timer.getElapsedTime() << "s\r\n";
}
getchar();
return 0;
}
(我会尝试使用unordered_map,但我的版本没有它)。我怀疑你的C ++设置存在一些问题。
答案 1 :(得分:1)
我们设法了解到这一点,见:
Why does my STL code run so slowly when I have the debugger/IDE attached?
当您连接调试器时,会使用另一个(DEBUG)内存堆 - 如果需要,可以将其关闭。
答案 2 :(得分:0)
听起来并不合适,但在我们真正提供帮助之前,您需要收集更多细节。您使用的是hash_map实现?你有没有指向一个分析器,如果有的话它是什么告诉你的?
一般情况下,如果哈希表实现在没有明显原因的情况下表现不佳,通常是因为表所使用的哈希函数恰好对您的特定输入执行不良。这可能是你的问题 - C ++ hash_map碰巧使用了一个哈希函数,它将你的键映射到一小部分桶,而C#HashSet却没有 - 或者它可能是完全不同的东西。
std :: map通常实现为树,因此具有不同的性能特征。同样,实现和输入数据的细节很重要。
答案 3 :(得分:0)
我从未使用它,但Google Sparcehash可能是合适的
答案 4 :(得分:0)
您在C ++代码中使用std :: map,其插入和查找时间为O(log(n))。尝试使用hash_map进行测试以获得更好的比较。
答案 5 :(得分:0)