我试图用伪代码here在C中实现一个简单的DBSCAN。
更广泛地使用DBSCAN,我使用n
矩阵表示m
个nxm
个要素的expandCluster()
元素。
我不清楚算法的某些步骤:
regionQuery()
有用; nx2
返回P2的邻居中的所有点(包括P);如果我有一个nxm
矩阵,其中每个元素有两个特征,我假设距离将按照我们在欧几里德空间中计算,公式为this;在我的m
矩阵中,我有#define ROWS 13
#define COLS 4
#define EPSILON 250.0
#define MIN_POINTS 100
#include <stdio.h>
#include <stdlib.h>
double **data;
int *clusters;
int *visited;
int regionQuery(int elem) {
// return all points within P2 eps-neighborhood (including P)
}
void expandCluster(int elem, int cluster) {
int i;
//add P to cluster C
clusters[elem] = cluster;
//for each point P2 in NeighborPts
for (i=0; i<ROWS; i++) {
//if P2 is not visited {
//mark P2 as visited
//NeighborPts = regionQuery(P2, eps)
//if sizeof(NeighborPts2) >= MinPts
// NeighborPts = NeighborPts joined with NeighborPts2
}
if P2 is not yet member of any cluster
add P2 to cluster C
}
}
void dbscan() {
int i;
int num_points = 0;
int cluster = 0;
// for each point P in dataset D
for (i=0; i<ROWS; i++) {
// if P is not visited
if (!visited[i]) {
// mark P as visited
visited[i] = 1;
// NeighborPts = regionQuery()
num_points = regionQuery(i, cluster);
// if sizeof(NeighborPts) < MinPts
if (num_points < MIN_POINTS)
// mark P as NOISE
printf("element %d is noise\n", i);
else {
//C = next cluster
cluster++;
//expandCluster(P, NeighborPts, C, eps, MinPts)
expandCluster(i, cluster);
}
}
}
}
int main(void)
{
int i;
data = (double**)calloc(ROWS, sizeof(double*));
visited = (int*)calloc(ROWS, sizeof(int));
clusters = (int*)calloc(ROWS, sizeof(int));
for(i = 0; i < ROWS; i++) {
data[i] = (double*)calloc(COLS, sizeof(double));
}
data[ 0][ 0]=0.1; data[ 0][ 1]=0.0; data[ 0][ 2]=9.6; data[ 0][ 3] = 5.6;
data[ 1][ 0]=1.4; data[ 1][ 1]=1.3; data[ 1][ 2]=0.0; data[ 1][ 3] = 3.8;
data[ 2][ 0]=1.2; data[ 2][ 1]=2.5; data[ 2][ 2]=0.0; data[ 2][ 3] = 4.8;
data[ 3][ 0]=2.3; data[ 3][ 1]=1.5; data[ 3][ 2]=9.2; data[ 3][ 3] = 4.3;
data[ 4][ 0]=1.7; data[ 4][ 1]=0.7; data[ 4][ 2]=9.6; data[ 4][ 3] = 3.4;
data[ 5][ 0]=0.0; data[ 5][ 1]=3.9; data[ 5][ 2]=9.8; data[ 5][ 3] = 5.1;
data[ 6][ 0]=6.7; data[ 6][ 1]=3.9; data[ 6][ 2]=5.5; data[ 6][ 3] = 4.8;
data[ 7][ 0]=0.0; data[ 7][ 1]=6.3; data[ 7][ 2]=5.7; data[ 7][ 3] = 4.3;
data[ 8][ 0]=5.7; data[ 8][ 1]=6.9; data[ 8][ 2]=5.6; data[ 8][ 3] = 4.3;
data[ 9][ 0]=0.0; data[ 9][ 1]=2.2; data[ 9][ 2]=5.4; data[ 9][ 3] = 0.0;
data[10][ 0]=3.8; data[10][ 1]=3.5; data[10][ 2]=5.5; data[10][ 3] = 9.6;
data[11][ 0]=0.0; data[11][ 1]=2.3; data[11][ 2]=3.6; data[11][ 3] = 8.5;
data[12][ 0]=4.1; data[12][ 1]=4.5; data[12][ 2]=5.8; data[12][ 3] = 7.6;
dbScan();
for(i = 0; i < ROWS; i++) {
printf("%d\n", clusters[i]);
}
return 0;
}
维空间,因此计算两个元素之间距离的公式为this?这是我现在写的内容。
$.ajax