这是KD-tree的示例实现。 我首先考虑尺寸数,点数,要形成的簇数。双分区函数计算质心,具有最大方差的维度。现在基于最大尺寸意味着我开始分裂点。当输入为(尺寸-2,点-20,簇-4)时,该程序工作正常。但不起作用(尺寸-2,点-20,聚类-8)。当我调试程序时,它给出了正确的输出。但是当我运行程序时它停止工作。
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <stdbool.h>
int *gendata(int num);
void bipartition_fn(int dimensions,int nodes,int i0, int im, int *data,int *cluster_size,int *cluster_start, int *cluster_bdry, int *cluster_centroid);
void kdtree_fn(int dimensions, int nodes, int k, int *data,int *k_cluster_size,int **k_cluster_centroid,int *k_cluster_start,int **k_cluster_bdry );
int main()
{
int dimensions,nodes,i,k,j;
printf("enter the number of dimensions");
scanf("%d",&dimensions);
printf("enter the total number of elements in multiples of dimensions");
scanf("%d", &nodes);
printf("enter number of clusters");
scanf("%d",&k);
int *data;
int *k_cluster_size;
int **k_cluster_centroid;
int *k_cluster_start;
int **k_cluster_bdry;
data = gendata(nodes); /*dynamic array generation for data points*/
k_cluster_bdry=(int **)malloc(k*sizeof(int *));
for(i=0;i<(2*k-2);i++)
*(k_cluster_bdry+i)=(int *)malloc(2*dimensions*sizeof(int));
k_cluster_centroid=(int **)malloc(k*sizeof(int *));
for(i=0;i<(2*k-2);i++)
*(k_cluster_centroid+i)=(int *)malloc(dimensions*sizeof(int));
k_cluster_size=malloc((2*k-2)*sizeof(int));
k_cluster_start = malloc((2*k-2)*sizeof(int));
/*calling the kdtree function*/
kdtree_fn(dimensions, nodes, k, data, k_cluster_size, k_cluster_centroid, k_cluster_start, k_cluster_bdry);
/*printing the cluster size */
printf("cluster size \n");
for(i=k-2; i<(2*k - 2); i++){
printf("%d ", k_cluster_size[i]);
}
free(data);
free(k_cluster_bdry);
free(k_cluster_centroid);
free(k_cluster_size);
free(k_cluster_start);
return 0;
}
void kdtree_fn(int dimensions, int nodes, int k, int *data,int *k_cluster_size,int **k_cluster_centroid,int *k_cluster_start,int **k_cluster_bdry){
int i,j,d0,dm,x,m=0,l,n=0,check=1,s,temp=0;
d0 = 0, dm =nodes ;
int *cluster_size, *cluster_start;
int *cluster_bdry;
int *cluster_centroid;
int *query;
int *res;
query = (int *)malloc(sizeof(int)*dimensions);
res = (int *)malloc(sizeof(int)*dimensions);
cluster_centroid = (int*)malloc(dimensions*sizeof(int));
cluster_bdry = (int*)malloc(4*dimensions*sizeof(int));
cluster_size=(int*)malloc(2*sizeof(int));
cluster_start = (int*)malloc(2*sizeof(int));
/* iterating k-1 times to form k clusters */
for(x=0 ; x<k-1; x++){
bipartition_fn(dimensions, nodes, d0, dm, data, cluster_size, cluster_start, cluster_bdry, cluster_centroid);
for( i=0;i<dimensions; i++){
k_cluster_centroid[x][i] = cluster_centroid[i];
}
for( i=0;i<2; i++){
k_cluster_size[m] = cluster_size[i];
k_cluster_start[m] = cluster_start[i];
m++;
}
int p=0,r=0;
while(p<2){
l=0;
i=0;
while(i<2*dimensions){
k_cluster_bdry[temp][l] = cluster_bdry[r];
l++;
i++;
r++;
}
temp++;
p++;
}
s = pow(2,check);
if(x == 0 ||(x%(s-2)) == 0){
d0 =0;
nodes = k_cluster_size[n];
check++;
n++;
}
else{
d0 = d0+k_cluster_size[n-1];
nodes = k_cluster_size[n];
n++;
}
}
free(cluster_bdry);
free(cluster_centroid);
free(cluster_size);
free(cluster_start);
}
/*Each bipartition function gives 2 clusters*/
void bipartition_fn(int dimensions,int nodes,int d0, int dm, int *data,int *cluster_size,int *cluster_start, int *cluster_bdry, int *cluster_centroid){
int i,j,x,k;
int node = nodes/dimensions;
int sum,min,max;
int *cluster_assign;
cluster_assign = malloc(nodes*sizeof(int));
int *assign;
assign= (int *)malloc(node*sizeof(int));
// printf("nodes: %d \n", nodes);
/*calculate centroid and boundaries*/
i=0;
j=d0;
while(i<dimensions){
sum=0;
while(j<(d0+nodes)){
sum = sum+data[j];
j = j+dimensions;
}
cluster_centroid[i] = sum/node;
i = i+1;
j=d0+i;
}
/* Calculate variance of each dimension and find dimension with maximum variance*/
int var[dimensions],g,h;
h=d0;
g=0;
while(g<dimensions){
sum = 0;
while(h<(d0+nodes)){
sum = sum +((cluster_centroid[g] - data[h])*(cluster_centroid[g] - data[h]));
h=h+dimensions;
}
var[g] = sum/node;
g=g+1;
h=(d0+g);
}
int large = var[0];
int max_dimension =0;
int p;
for(p=0; p<dimensions; p++){
if(var[p]>large){
large = var[p];
max_dimension = p;
}
}
/* find mean of maximum variance*/
int mean = cluster_centroid[max_dimension];
//printf("mean %d \n",mean);
i=d0+max_dimension;
x=0;
while(i<(d0+nodes)){
if(data[i] < mean){
assign[x]=0;
}
else{
assign[x]=1;
}
x++;
i= i+dimensions;
}
/* Rearranging the points based on mean points lesser than mean goes to left and greater than mean goes to right*/
x=0;
int count=0;
int y=0;
for(i=0; i<node; i++){
if(assign[y] == 0){
count++;
for(j=dimensions*i; j<dimensions*(i+1); j++){
cluster_assign[x] = data[d0+j];
x++;
}
}
y++;
}
cluster_size[0] = count*dimensions;
cluster_start[0]= d0;
count=0;
y=0;
for(i=0; i<node; i++){
if(assign[y]!=0){
count++;
for(j=dimensions*i; j<dimensions*(i+1); j++){
cluster_assign[x] = data[d0+j];
x++;
}
}
y++;
}
cluster_size[1] = count*dimensions;
cluster_start[1]= d0+cluster_size[0];
int temp1,temp2;
x=0;
p=0;
while(p<2){
j=cluster_start[p];
i=0;
while(i<dimensions){
min=data[j];
max=data[j];
temp1=cluster_start[p];
temp2=cluster_size[p];
while(j < temp1+temp2){
if(data[j]<min)
min = data[j];
if(data[j]>max)
max= data[j];
j = j+dimensions;
}
cluster_bdry[x]=min;
x=x+1;
cluster_bdry[x]=max;
x=x+1;
i = i+1;
j=temp1+i;
}
p++;
}
/*printf("bou");
for(i=0; i<4*dimensions; i++){
printf("%d ",cluster_bdry[i]);
} */
free(cluster_assign);
free(assign);
}
/*Initialize data array*/
int *gendata(int num)
{
int *ptr = (int *)malloc(sizeof(int)*num);
int j = 0;
if(ptr != NULL)
{
for(j = 0; j < num; j++)
{
ptr[j] = -50 + rand()%101;
}
}
return ptr;
}