我的目标是使用减法聚类来聚类我的数据,以便我可以从中提取模糊规则。
假设我有以下二维数据: -
X[]=[ {0,.16,.24,.42,.48,.66,.83,.24,.42,.48,.66,.66,.16,.24,.42,.42,.48,.48,.48,.66,.66,.66,.66,.66,.66,.66,.83,.83,.83,.66},
{0,0,0,0,0,0,0,.15,.13,.1,.12,.18,.58,.78,.59,.78,.45,.49,.58,.45,.49,.58,.65,.71,.715,.72,.66,.725,.726,.455}
]
请提供有关如何在java中实现减法聚类的示例。
顺便说一下,我做了一些研究工作,并为此找到了以下算法
算法: -
其中m是数据的维度或类型(在我的例子中为2),n是点数。
3选择最高潜在值作为第一个集群中心并修改所有数据点的潜力直至
我是如何使用java实现的, 代码: - (请注意,由于某种原因,我无法提供完整的运行代码,所以我附加正在执行所提算法的步骤的代码)
class SClustering {
double[][]data;
double normData[][];
ArrayList Potentials=new ArrayList();
ArrayList sortedPotentials;
ArrayList clusters=new ArrayList();
double rj[];
double radii; // radius
double squashFactor=1.5;
double acceptRatio=.5;
double rejectFactor=0.3;
double rb=radii*squashFactor;;
double alpha=4.0/(radii*radii);
double beta=4.0/(Math.pow(rb,2.0));
double max[];
double min[];
int numofdimen=2; // as according to the input dataset
int numofPoints=29; // as according to the input dataset
ArrayList centersArrayList=new ArrayList();
Potential p=new Potential();
double Pi=0;
boolean noCenter=false;
boolean flag=false;
public Clustering(double data[][], double Radii)
{
radii= Radii;
data=new double[2][29];
normData=new double[2][29];
max=new double[2];
min=new double[2];
rj=new double[2];
double[] sigmas=new double[centersArrayList.size()];
rj[0]=100;
rj[1]=50;
int index;
for(int i=0;i<29;i++)
{
for(int j=0;j<2;j++)
{
data[j][i]=data[j][i];
}
}
dataNormalize();
calculatePotential();
int m=0;
while(!flag)
{
sortPotentials();
index=setCenters(Potentials.size()-1);
sigmas=calculateSigmas();
if(index!=-1)
{
new cluster()
//setting the cluster
cluster.setCentroid(getCenterPoint(index));
cluster.setSigmas(sigmas);
RecalculatePotential(index);
}
else
{
flag=true;
}
}
}
public void dataNormalize()
{
//getting the max and min data point
for(int m=0;m<numofdimen;m++)
{
min[m]=data[m][0];
for(int i=0;i<numofPoints;i++)
{
if(min[m]>data[m][i])
{
min[m]=data[m][i];
}
}
}
for(int m=0;m<numofdimen;m++)
{
max[m]=data[m][0];
for(int i=0;i<numofPoints;i++)
{
if(max[m]<data[m][i])
{
max[m]=data[m][i];
}
}
}
//normalizing
for(int m=0;m<numofdimen;m++)
{
for(int i=0;i<numofPoints;i++)
{
normData[m][i]=(data[m][i]-min[m])/(max[m]-min[m]);
}
}
}
public void calculatePotential(){
double distance=0;
double tempPotential=0;
for(int k=0;k<numofPoints;k++)
{
for(int i=0;i<numofPoints;i++)
{
if(k!=i)
{
for(int m=0;m<numofdimen;m++)
{
distance+=normData[m][k]-normData[m][i];
}
tempPotential=(Math.exp(-1* alpha *Math.pow(distance,2)));
if(i!=0)
{
//here p is an object of potential class and here we are getting the previous set potentials
tempPotential+=previousPotentials.getValue();
}
}
}
p.setPotentials(k,tempPotential);
Potentials.add(p);
p=new Potential();
}
}
void RecalculatePotential(int index_of_center)
{
double distance=0;
double tempPotential;
for(int k=0;k<numofPoints;k++)
{
if(k!=index_of_center)
{
for(int m=0;m<numofdimen;m++)
{
distance+=normData[m][k]-normData[m][index_of_center];
}
tempPotential=(Math.exp(-1*beta*Math.pow(distance,2)));
tempPotential=((Potentials.get(k))-(((Potentials.get(index_of_center)))*tempPotential);
p =new Potential();
p.setPotentials(k,tempPotential);
Potentials.set(k,p);
p=new Potential();
}
}
}
boolean ifNewCenter(int index_of_center)
{
//if not new return false
//if new return true
}
double getMinDistance(int index_of_center)
{
double vectorDistances[]=new double[numofdimen];
double distances[]=new double[centersArrayList.size()];
double minDistanceistance;
for(int j=0;j<centersArrayList.size();j++)
{
for(int m=0;m<numofdimen;m++)
{
if(index_of_center!=j)
{
vectorDistances[m]=normData[m][index_of_center]-normData[m][((Integer)(centers.get(j))).intValue()];
}
}
distances[j]=calculateVLength(vectorDistances);
}
//sort the distances
return distances[0];
}
public void sortPotentials()
{
//returns the sorted list of potentials
}
public int setCenters(int maxIndex)
{
double minDistance;
double PotentialCenter;
PotentialCenter=((Double)(sortedPotentials.get(maxIndex))).doubleValue();
if(centersArrayList.size()!=0)
{
if(ifNewCenter()) // here we are checking the the center is new or not
{
minDistance=getMinDistance(maxIndex);
if(PotentialCenter>((acceptRatio)*((Potential)Potentials.get(Potentials.size()-1)).getValue()))
centersArrayList.add(((Integer)((Potential)(sortedPotentials.get(maxIndex))).getIndex()).intValue());
else if(clusteringEnd(maxIndex))
flag=true;
else if((minDistance/radii)+(PotentialCenter/Pi)<1)
{
p=new Potential();
p.setPotentials(maxIndex,0);
Potentials.set(maxIndex,p);
if(maxIndex>0)
{
setCenters(maxIndex-1);
}
else
{
noCenter=true;
return 0;
}
}
else
{
// System.out.println("flag is true nwo------------------------------------");
centersArrayList.add(((Integer)((Potential)(sortedPotentials.get(maxIndex))).getIndex()).intValue());
}
}
else
{
if(maxIndex>0)
{
setCenters(maxIndex-1);
}
else
{
noCenter=true;
return 0;
}
}
}
else
{
centersArrayList.add(((Integer)((Potential)(sortedPotentials.get(maxIndex))).getIndex()).intValue());
Pi=PotentialCenter;
}
if(!noCenter || !flag)
{
return ((Integer)(centersArrayList.get(centersArrayList.size()-1))).intValue();
}
else
{
return -2;
}
}
public boolean clusteringEnd(int centerindex)
{
//comparing the current potential with the rejectFactor* first largest potential
if((((Potential)(Potentials.get(centerindex))))<(rejectFactor*(((Potential)(Potentials.get(Potentials.size()-1))))))
return true;
return false;
}
public double[] calculateSigmas()
{
double sigmas[]=new double[numofdimen];
for(int m=0;m<numofdimen;m++)
{
sigmas[m]=(rj[m]*(max[m]-min[m]))/(Math.sqrt(8.0));
}
return sigmas;
}
public double calculateVLength(double input[]){
double temp=0;
double length=0;
for(int i=0;i<input.length;i++)
{
temp+=Math.pow(input[i],2);
}
length=Math.sqrt(temp);
return length;
}
public static void main(String[] args) {
double Points[][]={ {0,.16,.24,.42,.48,.66,.83,.24,.42,.48,.66,.66,.16,.24,.42,.42,.48,.48,.48,.66,.66,.66,.66,.66,.66,.66,.83,.83,.83,.66},
{0,0,0,0,0,0,0,.15,.13,.1,.12,.18,.58,.78,.59,.78,.45,.49,.58,.45,.49,.58,.65,.71,.715,.72,.66,.725,.726,.455}
};
SClustering sc;
sc=new SClustering(Points,.4);
}
}
但我在代码中的问题是: -
当我运行我的程序时,我只得到两个集群 centroid1:0.83,0.725 centroid2: - 0.83,0.726
但是当我在上面提到的数据集上执行Matlab'clusterfind'程序时,我得到了3个集群
centroid1:0.66,0.65 centroid2: - 0.48,0.10 centroid3: - 0.16,0.0
下图中显示的各种参数值在我的实现中也是相同的
所以我正在实施的算法有任何问题,请提供指导