我正在对n个点和k个中心进行K均值聚类。
首先,到目前为止,这是我的代码:
一个点的类:
public class Point
{
public int Id { get; set; }
public double X { get; set; }
public double Y { get; set; }
public Point()
{
Id = -1;
X = -1;
Y = -1;
}
public Point(int id, double x, double y)
{
this.Id = id;
this.X = x;
this.Y = y;
}
public static double FindDistance(Point pt1, Point pt2)
{
double x1 = pt1.X, y1 = pt1.Y;
double x2 = pt2.X, y2 = pt2.Y;
double distance = Math.Sqrt(Math.Pow(x2 - x1, 2.0) + Math.Pow(y2 - y1, 2.0));
return (distance);
}
}
数据类:
public class PointCollection : List<Point>
{
public Point Centroid { get; set; }
public PointCollection()
: base()
{
Centroid = new Point();
}
public void AddPoint(Point p)
{
this.Add(p);
UpdateCentroid();
}
public Point RemovePoint(Point p)
{
Point removedPoint = new Point(p.Id, p.X, p.Y);
this.Remove(p);
UpdateCentroid();
return (removedPoint);
}
public void UpdateCentroid()
{
double xSum = (from p in this select p.X).Sum();
double ySum = (from p in this select p.Y).Sum();
Centroid.X = (xSum / (double)this.Count);
Centroid.Y = (ySum / (double)this.Count);
}
}
主要课程:
public class KMeans
{
public static List<PointCollection> DoKMeans(PointCollection points, int clusterCount)
{
List<PointCollection> allClusters = new List<PointCollection>();
List<List<Point>> allGroups = ListUtility.SplitList<Point>(points, clusterCount);
foreach (List<Point> group in allGroups)
{
PointCollection cluster = new PointCollection();
cluster.AddRange(group);
allClusters.Add(cluster);
}
int movements = 1;
while (movements > 0)
{
movements = 0;
foreach (PointCollection cluster in allClusters)
{
for (int pointIndex = 0; pointIndex < cluster.Count; pointIndex++)
{
Point point = cluster[pointIndex];
int nearestCluster = FindNearestCluster(allClusters, point);
if (nearestCluster != allClusters.IndexOf(cluster))
{
if (cluster.Count > 1)
{
Point removedPoint = cluster.RemovePoint(point);
allClusters[nearestCluster].AddPoint(removedPoint);
movements += 1;
}
}
}
}
}
return (allClusters);
}
public static int FindNearestCluster(List<PointCollection> allClusters, Point point)
{
double minimumDistance = 0.0;
int nearestClusterIndex = -1;
for (int k = 0; k < allClusters.Count; k++)
{
double distance = Point.FindDistance(point, allClusters[k].Centroid);
if (k == 0)
{
minimumDistance = distance;
nearestClusterIndex = 0;
}
else if (minimumDistance > distance)
{
minimumDistance = distance;
nearestClusterIndex = k;
}
}
return (nearestClusterIndex);
}
}
最后帮助列表拆分功能:
public static List<List<T>> SplitList<T>(List<T> items, int groupCount)
{
List<List<T>> allGroups = new List<List<T>>();
int startIndex = 0;
int groupLength = (int)Math.Round((double)items.Count / (double)groupCount, 0);
while (startIndex < items.Count)
{
List<T> group = new List<T>();
group.AddRange(items.GetRange(startIndex, groupLength));
startIndex += groupLength;
if (startIndex + groupLength > items.Count)
{
groupLength = items.Count - startIndex;
}
allGroups.Add(group);
}
if (allGroups.Count > groupCount && allGroups.Count > 2)
{
allGroups[allGroups.Count - 2].AddRange(allGroups.Last());
allGroups.RemoveAt(allGroups.Count - 1);
}
return (allGroups);
}
所以,现在我正在尝试为主类编写第二种方法,它将接受预定义的起始中心。我很难理解这一点,因为我无法在互联网上找到k-means算法将使用初始中心的任何内容。有人能指出我这样的指导方向或给我任何想法如何修改代码?感谢。
编辑:也许还有更多我尝试这样做的原因:我尝试使用k-means编写LBG算法,如https://onlinecourses.science.psu.edu/stat557/node/67
我可以使用我的代码访问计算中心以在每个步骤中进行拆分,但是我需要找到一种方法将它们反馈给k-means类。就像,如果我计算起始中心,我需要把这个中心和epsilon的另一个偏移放入k-means算法。
Edit2:现在的英文代码(我希望)
答案 0 :(得分:0)
找到解决方案,也许会有人使用:
public static List<PointCollection> DoKMeans(PointCollection points, int clusterCount, Point[] startingCentres)
{
// code...
int ctr = 0;
foreach (List<Point> group in allGroups)
{
PointCollection cluster = new PointCollection();
cluster.c.X = startingCentres[ctr].X;
cluster.c.Y = startingCentres[ctr].Y;
cluster.AddRange(group);
allClusters.Add(cluster);
}
// rest of code the same
}