Friday, September 3, 2010

DBSCAN Algorithm Java Implementation

DBSCAN is a density-based clustering algorithm, and its basic principle is to a given two parameters, ξ and minp, where ξ can be interpreted as the radius, the algorithm will search within a radius of the sample, minp is a ξ To find the radius of the sample number n of constraints, as long as n> = minp, find the sample point is the core of the sample points, the proposed algorithm are described in References 1, below is a java implementation of this algorithm: ...

DBSCAN is a density-based clustering algorithm, and its basic principle is given two parameters, ξ and minp, where ξ can be interpreted as the radius, the algorithm will find the sample in this radius, minp is a search radius ξ n number of samples to restrictions, as long as n> = minp, find the sample point is the core of the sample points, the proposed algorithm are described in References 1, below is a java implementation of this algorithm:
First define a Point class, the representative sample points




package com.sunzhenxing;

public class Point (

private int x;

private int y;

private boolean isKey;

private boolean isClassed;

public boolean isKey () (

return isKey;

)

public void setKey (boolean isKey) (

this.isKey = isKey;

this.isClassed = true;

)

public boolean isClassed () (

return isClassed;

)

public void setClassed (boolean isClassed) (

this.isClassed = isClassed;

)

public int getX () (

return x;

)

public void setX (int x) (

this.x = x;

)

public int getY () (

return y;

)

public void setY (int y) (

this.y = y;

)

public Point () (

x = 0;

y = 0;

)

public Point (int x, int y) (

this.x = x;

this.y = y;

)

public Point (String str) (

String [] p = str.split (",");

this.x = Integer.parseInt (p [0]);

this.y = Integer.parseInt (p [1]);

)

public String print () (

return "<" + this.x +","+ this.y +">";

)

)

And then define a utility class, for the algorithm implementation services:

package com.sunzhenxing;

import java.io.BufferedReader;

import java.io.FileReader;

import java.io.IOException;

import java.util .*;

public class Utility (

/ **

* Test the distance between the two points

* @ Param p point

* @ Param q point

* @ Return returns the distance between two points

* /

public static double getDistance (Point p, Point q) (

int dx = p.getX ()-q.getX ();

int dy = p.getY ()-q.getY ();

double distance = Math.sqrt (dx * dx + dy * dy);

return distance;

) / **
* Check the given point is not the central point

* @ Param lst The list storage point

* @ Param p the point to be tested

* @ Param ee radius

* @ Param minp density threshold

* @ Return a temporary storage point visited

* /

public static List isKeyPoint (List lst, Point p, int e, int minp) (

int count = 0;

List tmpLst = new ArrayList ();

for (Iterator it = lst.iterator (); it.hasNext ();){

Point q = it.next ();

if (getDistance (p, q) <= e) (

+ + Count;

if (! tmpLst.contains (q)) (

tmpLst.add (q);

)

)

)

if (count> = minp) (

p.setKey (true);

return tmpLst;

)

return null;

)

public static void setListClassed (List lst) (

for (Iterator it = lst.iterator (); it.hasNext ();){

Point p = it.next ();

if (! p.isClassed ()) (

p.setClassed (true);

)

)

)

/ **

Merge
* @ Param a

* @ Param b

* @ Return a

* /

public static boolean mergeList (List a, List b) (

boolean merge = false;

for (int index = 0; index

if (a.contains (b.get (index))) (

merge = true;

break;

)

)

if (merge) (

for (int index = 0; index

if (! a.contains (b.get (index))) (

a.add (b.get (index));

)

)

)

return merge;

)

/ **

* Back to the collection point in the text

* @ Return back to the mid-point of the set text

* @ Throws IOException

* /

public static List getPointsList () throws IOException (

List lst = new ArrayList ();

String txtPath = "src \ \ com \ \ sunzhenxing \ \ points.txt";

BufferedReader br = new BufferedReader (new FileReader (txtPath));

String str = "";

while ((str = br.readLine ())!= null & & str !=""){

lst.add (new Point (str));

)

br.close ();

return lst;

)

)

Finally, in the main program to implement algorithm, as follows:

package com.sunzhenxing;

import java.io. *;

import java.util .*;

public class Dbscan (

private static List pointsList = new ArrayList ();// store the set of all points

private static List > resultList = new ArrayList >();// storage DBSCAN algorithm to return the result set

private static int e = 2; / / e radius

private static int minp = 3; / / density threshold

/ **

* Extract all the points in the text and stored in the pointsList

* @ Throws IOException

* /

private static void display () (

int index = 1;

for (Iterator > it = resultList.iterator (); it.hasNext ();){

List lst = it.next ();

if (lst.isEmpty ()) (

continue;

) System.out.println ("----- s "+ index +" a cluster -----");
for (Iterator it1 = lst.iterator (); it1.hasNext ();){

Point p = it1.next ();

System.out.println (p.print ());

)

index + +;

)

)

/ / Find all the cluster can be directly

private static void applyDbscan () (

try (

pointsList = Utility.getPointsList ();

for (Iterator it = pointsList.iterator (); it.hasNext ();){

Point p = it.next ();

if (! p.isClassed ()) (

List tmpLst = new ArrayList ();

if ((tmpLst = Utility.isKeyPoint (pointsList, p, e, minp))! = null) (

/ / End point for all clustering to mark

Utility.setListClassed (tmpLst);

resultList.add (tmpLst);

)

)

)

) Catch (IOException e) (

/ / TODO Auto-generated catch block

e.printStackTrace ();

)

)

/ / Direct access to the clustering of all the merger, that is up to the point and find the indirect merger

private static List > getResult () (

applyDbscan ();// find all the direct clustering

int length = resultList.size ();

for (int i = 0; i

for (int j = i +1; j

if (Utility.mergeList (resultList.get (i), resultList.get (j))) (

resultList.get (j). clear ();

)

)

)

return resultList;

)

/ **

* Program main function

* @ Param args

* /

public static void main (String [] args) (

getResult ();

display ();

/ / System.out.println (Utility.getDistance (new Point (0,0), new Point (0,2)));

)

)

Below is a small test, that is used src \ \ com \ \ sunzhenxing \ \ points.txt contents of the file test, points.txt the file contents are:

0,0

0,1

0,2

0,3

0,4

0,5

12,1

12.2

12.3

12,4

12,5

12.6

0,6

0,7

12,7

0,8

0,9

1,1

The final result of the algorithm is:

----- ----- 1st cluster

<0,0>

<0,1>

<0,2>

<1,1>

<0,3>

<0,4>

<0,5>

<0,6>

<0,7>

<0,8>

<0,9>

----- ----- 2nd cluster

<12,1>

<12.2>

<12.3>

<12,4>

<12,5>

<12.6>

<12,7>

Coordinates we can draw what conclusions the experiment to understand.

No comments:

Post a Comment