typedef unsigned long Count;
typedef float Weight;
typedef std::map<std::string, Count> StringToCountMap;
typedef std::map<std::string, Weight> StringToWeightMap;
typedef std::map<unsigned long, StringToCountMap> UnsignedToStringToCountMap;
typedef std::map<unsigned long, StringToWeightMap> UnsignedToStringToWeightMap;
typedef std::map<unsigned long, std::size_t> ClustersMap;
class DefaultClusteringAlgorithm
{
public:
// minumum number of documents changing clusters for algorithm to end
static const unsigned DocumentChangeThreshold = 0;
DefaultClusteringAlgorithm(unsigned numClusters, const UnsignedToStringToWeightMap &documentVectors)
: numClusters_(numClusters)
, documentVectors_(documentVectors)
{
}
~DefaultClusteringAlgorithm() {}
const ClustersMap &DoClustering();
private:
void ChooseInitialCentroids();
unsigned ClusterOnCentroids();
void RecalculateCentroids();
float DocumentDotProduct(const StringToWeightMap &left, const StringToWeightMap &right);
float DocumentLength(const StringToWeightMap &document);
unsigned numClusters_;
// stores cluster_id => centroid
std::vector<StringToWeightMap> centroids_;
// maps question id => cluster id
ClustersMap clusters_;
// document vector
const UnsignedToStringToWeightMap &documentVectors_;
};
void DefaultClusteringAlgorithm::RecalculateCentroids()
{
std::vector<unsigned> newCentroidsSizes(centroids_.size());
std::vector<StringToWeightMap> newCentroids(centroids_.size());
ClustersMap::const_iterator clusterMapping = clusters_.begin();
for (; clusterMapping != clusters_.end(); ++clusterMapping)
{
std::size_t clusterId = clusterMapping->second;
++newCentroidsSizes[clusterId];
const StringToWeightMap &document = documentVectors_.at(clusterMapping->first);
StringToWeightMap::const_iterator termWeight = document.cbegin();
for (; termWeight != document.end(); ++termWeight);
{
newCentroids[clusterId][termWeight->first] += termWeight->second;
}
}
std::vector<unsigned>::iterator centroidSize = newCentroidsSizes.begin();
for (; centroidSize != newCentroidsSizes.end(); ++centroidSize)
{
std::size_t clusterId = centroidSize - newCentroidsSizes.begin();
StringToWeightMap::iterator centroidTermWeight = newCentroids[clusterId].begin();
for (; centroidTermWeight != newCentroids[clusterId].end(); ++centroidTermWeight)
{
centroidTermWeight->second /= *centroidSize;
}
}
}
创建const_iterator termWeight时出现问题:
StringToWeightMap::const_iterator termWeight = document.begin();
如上图所示,termWeight const_iterator包含无效数据。但是,const std :: map文档是一个完全有效的std :: map。我想不出有什么理由发生这种情况。
我最近了解到std :: map :: cbegin()存在。我应该使用那种方法吗?
编辑:包含更多上下文
答案 0 :(得分:2)
哈!我发现了错误!在我的for循环结束时,一个愚蠢的小分号!
答案 1 :(得分:0)
std :: map begin()方法可能会返回一个指向地图末尾的迭代器,因为地图中可能根本没有任何元素。