我需要找出为什么质心位置如此接近?
我发现一些模糊c意味着代码http://msugvnua000.web710.discountasp.net/Posts/Details/3347,并且非常努力地将其转换为java代码(下面)但是有些东西我不知道。
我也试过看这个实现http://www.codeproject.com/Articles/91675/Computer-Vision-Applications-with-C-Fuzzy-C-means代码看起来很相似但是它有所不同,因为不是更新集群索引,会员值得到更新 - 我不知道为什么这个改变是执行吗?
public class CMeansAlgorithm3 {
private static int fuzzyness = 2;
private final Map<Double, Species> integerClusterHashMap = new HashMap<Double, Species>();
/// Array containing all points used by the algorithm
private List<Job> points;
/// Gets or sets membership matrix
public double[][] U;
/// Algorithm precision
private double eps = Math.pow(10, -5);
/// Gets or sets objective function
private double J;
/// Gets or sets log message
public String log;
private List<Species> clusterList;
public CMeansAlgorithm3(List<Job> points, int clusterSize){
this.points = points;
clusterList = initialiseCentroids(points, clusterSize);
U = new double[points.size()][clusterList.size()];
calculateClusterMembershipValues();
recalculateClusterIndexes();
}
private void calculateClusterMembershipValues() {
// Iterate through all points to create initial U matrix
for (int i = 0; i < points.size(); i++) {
Job p = points.get(i);
double sum = 0.0;
for (int j = 0; j < clusterList.size(); j++) {
Cluster c = clusterList.get(j);
double diff = Math.sqrt(Math.pow(p.getMidpointX() - c.getCentroid().getX(), 2.0) + Math.pow(p.getMidpointY() - c.getCentroid().getY(), 2.0));
U[i][j] = (diff == 0) ? eps : diff;
sum += U[i][j];
}
double sum2 = 0.0;
for (int j = 0; j < clusterList.size(); j++) {
U[i][j] = 1.0 / Math.pow(U[i][j] / sum, 2.0 / (fuzzyness - 1.0));
sum2 += U[i][j];
}
for (int j = 0; j < clusterList.size(); j++) {
U[i][j] = U[i][j] / sum2;
}
}
}
/// Recalculates cluster indexes
private void recalculateClusterIndexes() {
for (int i = 0; i < points.size(); i++) {
double max = -1.0;
Job p = points.get(i);
for (int j = 0; j < clusterList.size(); j++) {
max = U[i][j] > max ? U[i][j] : max;
// if (max < U[i][j]) {
// max = U[i][j];
// p.setClusterIndex((max == 0.5) ? 0.5 : j);
// }
}
p.setClusterIndex(max);
}
}
/// Perform a complete run of the algorithm until the desired accuracy is achieved.
/// For demonstration issues, the maximum Iteration counter is set to 20.
/// Algorithm accuracy
/// The number of steps the algorithm needed to complete
public List<Species> run(double accuracy) {
int k = 0;
int maxIterations = 100;
do {
k++;
J = calculateObjectiveFunction();
calculateClusterCentroids();
step();
double Jnew = calculateObjectiveFunction();
if (Math.abs(J - Jnew) < accuracy) break;
}
while (maxIterations > k);
assignJobsToClusters();
return clusterList;
}
/// Calculate the objective function
/// The objective function as double value
private double calculateObjectiveFunction() {
double Jk = 0;
for (int i = 0; i < this.points.size();i++) {
for (int j = 0; j < clusterList.size(); j++) {
Jk += Math.pow(U[i][j], this.fuzzyness) * Math.pow(this.calculateEuclidDistance(points.get(i), clusterList.get(j)), 2);
}
}
return Jk;
}
private List<Species> initialiseCentroids(final List<Job> dataSet, final int speciesSize) {
final List<Species> clusterList = new ArrayList<Species>();
final List<Integer> uniqueIndexes = ToolBox.uniqueIndexes(dataSet.size(), speciesSize);
for (int i=0; i< uniqueIndexes.size(); i++){
final int randomIndex = uniqueIndexes.get(i);
final Species species = new Species(i);
final Centroid centroid = new Centroid(dataSet.get(randomIndex).getMidpointX(), dataSet.get(randomIndex).getMidpointY(), i);
species.setCentroid(centroid);
speciesList.add(species);
}
return clusterList;
}
/// Perform one step of the algorithm
public void step() {
for (int c = 0; c < clusterList.size(); c++) {
for (int h = 0; h < points.size(); h++) {
double top;
top = calculateEuclidDistance(points.get(h), clusterList.get(c));
if (top < 1.0) top = eps;
// sumTerms is the sum of distances from this data point to all clusters.
double sumTerms = 0.0;
for (int ck = 0; ck < clusterList.size(); ck++) {
double thisDistance = calculateEuclidDistance(points.get(h), clusterList.get(ck));
if (thisDistance < 1.0) thisDistance = eps;
sumTerms += Math.pow(top / thisDistance, 2.0 / (fuzzyness - 1.0));
}
// Then the membership value can be calculated as...
U[h][c] = (1.0 / sumTerms);
}
}
recalculateClusterIndexes();
}
/// Calculates Euclid distance between point and centroid
/// Point
/// Centroid
/// Calculated distance
private double calculateEuclidDistance(Job p, Species c) {
return ToolBox.calculateDistance(p.getMidpointX(), p.getMidpointY(), c.getCentroid().getX(), c.getCentroid().getY());
}
/// Calculates the centroids of the clusters
private void calculateClusterCentroids() {
for (int j = 0; j < clusterList.size(); j++) {
Species c = clusterList.get(j);
double uX = 0.0;
double uY = 0.0;
double membershipSum = 0.0;
for (int i = 0; i < points.size(); i++) {
Job p = points.get(i);
double uu = Math.pow(U[i][j], this.fuzzyness);
uX += uu * p.getMidpointX();
uY += uu * p.getMidpointY();
membershipSum += uu;
}
c.setMembershipSum(membershipSum);
c.getCentroid().setX(((uX / membershipSum)));
c.getCentroid().setY(((uY / membershipSum)));
log += String.format("Cluster Centroid: (" + c.getCentroid().getX() + "; " + c.getCentroid().getY() + ")");
}
}
private void assignJobsToClusters(){
for (final Cluster cluster : clusterList){
if (!integerClusterHashMap.containsKey(cluster.getMembershipSum()))
integerClusterHashMap.put(cluster.getMembershipSum(), cluster);
}
for (Job job : points){
final double clusterIndex = job.getClusterIndex();
Species c = integerSpeciesHashMap.get(clusterIndex);
if (c != null) {
c.add(job);
}
}
}
答案 0 :(得分:0)
模糊c表示java中的代码
package f;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Scanner;
/**
*
* @author Anooj.k.varghese
*/
public class F{
static Double data_set[][]=new Double[20000][100];
static Double diff[][]=new Double[20000][100];
static Double eud[][]=new Double[20000][1000];
static Double intial_centroid[][]=new Double[300][400];
static Double new_center[][]=new Double[300][400];
static int num = 0;
static int row=4;//rows in Your DataSet here i use iris dataset
static int cnum;
static int itc=0;
static int checker=1;
private static void readFile() throws FileNotFoundException
{
Scanner scanner = new Scanner(new File("E:/aa.txt"));//Dataset path
scanner.useDelimiter(System.getProperty("line.separator"));
//scanner.useDelimiter(",");
int lineNo = 0;
while (scanner.hasNext())
{
parseLine(scanner.next(),lineNo);
lineNo++;
System.out.println();
}
// System.out.println("total"+num); PRINT THE TOTAL
scanner.close();
}
//read file is copey to the data_set
public static void parseLine(String line,int lineNo)
{ itc=0;
Scanner lineScanner = new Scanner(line);
lineScanner.useDelimiter(",");
for(int col=0;col<row;col++)
{
Double arry=lineScanner.nextDouble();
data_set[num][col]=arry; ///here read data set is assign the variable data_set
}
num++;
}
public static void init()
{
for(int i=0;i<num;i++)
{
data_set[i][row]=0.0;
data_set[i][row+1]=0.0;
}
}
public static void print()
{
double re=0;
double a=0;
if(itc==0)
{
System.out.println("ENTER K");
Scanner sc=new Scanner(System.in);
cnum=sc.nextInt(); //enter the number of cenroid
System.out.println("centroid");
for(int i=0;i<cnum;i++)
{
for(int j=0;j<row;j++)
{
intial_centroid[i][j]=data_set[i][j]; //// CENTROID ARE STORED IN AN intial_centroid variable
System.out.print(intial_centroid[i][j]);
}
System.out.println();
}
}
else
{
for(int i=0;i<cnum;i++)
{
for(int j=0;j<row;j++)
{
intial_centroid[i][j]=new_center[i][j]; //// CENTROID ARE STORED IN AN intial_centroid variable
System.out.print(intial_centroid[i][j]);
}
System.out.println();
}
}
for(int i=0;i<num;i++)
{
for(int j=0;j<cnum;j++)
{
re=0;
for(int k=0;k<row;k++)
{
a= (intial_centroid[j][k]-data_set[i][k]);
//System.out.println(a);
a=a*a;
re=re+a; // store the row sum
}
diff[i][j]= Math.sqrt(re);// find the squre root
System.out.println(diff[i][j]);
}
}
}
public static void s()
{
double b,c;
for(int i=0;i<num;i++)
{
for(int j=0;j<cnum;j++)
{
c=0.0;
b=0.0;
for(int k=0;k<cnum;k++)
{
if(diff[i][k]==0)
{
b=0;
}
if(diff[i][k]!=0)
{
b=diff[i][j]/diff[i][k];
}
c=c+b;
}
if(c==0)
{
eud[i][j]=0.0;
}
else
{
eud[i][j]=1/c;
}
}
}
double a=0;
for(int i=0;i<num;i++)
{
a=0;
for(int j=0;j<cnum;j++)
{
a=a+eud[i][j];
System.out.print(eud[i][j]+" ");
}
System.out.print("total "+a);
System.out.println();
}
double aaa;
int counter=0;
for(int i=0;i<num;i++)
{counter=0;
aaa=eud[i][0];
for(int j=0;j<cnum;j++)
{
if(aaa<=eud[i][j])
{
aaa=eud[i][j];
counter=j;
}
}
if(itc%2==0)
{
data_set[i][row]=(double)counter;
}
if(itc%2==1)
{
data_set[i][row+1]=(double)counter;
}
}
for(int i=0;i<num;i++)
{
for(int j=0;j<=row+1;j++)
{
System.out.print(data_set[i][j]+", ");
}
System.out.println();
}
}
public static void newcenter()
{
itc++;
double a=0.0;
double c=0.0;
double d=0.0;
double f=0.0;
for(int k=0;k<cnum;k++)
{
for(int j=0;j<row;j++)
{
a=0.0;
d=0.0;
c=0.0;
f=0.0;
for(int i=0;i<num;i++)
{
//System.out.print("edu"+eud[i][k]);
a=eud[i][k];
a=a*a;
c=c+a;
//System.out.println("data"+data_set[i][j]);
d=a*data_set[i][j];
f=f+d;
}
new_center[k][j]=f/c;
System.out.println("centroid new "+new_center[k][j]);
// j=row+5;
// k=cnum+5;
}
}
}
public static void print11()
{
System.out.println();
System.out.println();
System.out.println();
System.out.println("----OUTPUT----");
int c=0;
int a=0;
for(int i=0;i<cnum;i++)
{
System.out.println("---------CLUSTER-"+i+"-----");
a=0;
for(int j=0;j<num;j++)
{
if(data_set[j][row]==i)
{a++;
for(int k=0;k<row;k++)
{
System.out.print(data_set[j][k]+" ");
}
c++;
System.out.println();
}
//System.out.println(num);
}
System.out.println("CLUSTER INSTANCES="+a);
}
System.out.println("TOTAL INSTANCE"+c);
}
public static void check()
{
checker=0;
for(int i=0;i<num;i++)
{
//System.out.println("hii");
if(Double.compare(data_set[i][row],data_set[i][row+1]) != 0)
{
checker=1;
//System.out.println("hii " + i + " " + data_set[i][4]+ " "+data_set[i][4]);
break;
}
System.out.println();
}
}
public static void main(String[] args) throws FileNotFoundException {
readFile();
//
init();
while(checker!=0)
//for(int i=0;i<5;i++)
{
print();
s();
newcenter();
check();
}
print11();
}
}