KD Tree Doc

Algorithms going to analysis
I. BallTree,
II. Binary Tree,
III. KDTree,
IV. LinearNNSearch
Ball Tree
implementing the BallTree/Metric Tree algorithm for nearest neighbour search.
The connection to dataset is only a reference. For the tree structure the indexes are stored in an
array.
See the implementing classes of different construction methods of the trees for details on its
construction.
Binary Tree
In Java, the key points in the recursion are exactly the same as in C or C++. In fact, I created the
Java solutions by just copying the C solutions, and then making the syntactic changes. The
recursion is the same, however the outer structure is slightly different.
In Java, we will have a BinaryTree object that contains a single root pointer. The root pointer
points to an internal Node class that behaves just like the node struct in the C/C++ version. The
Node class is private -- it is used only for internal storage inside the BinaryTree and is not
exposed to clients. With this OOP structure, almost every operation has two methods: a one-line
method on the BinaryTree that starts the computation, and a recursive method that works on the
Node objects. For the lookup() operation, there is a BinaryTree.lookup() method that the client
uses to start a lookup operation. Internal to the BinaryTree class, there is a private recursive
lookup(Node) method that implements the recursion down the Node structure. This second,
private recursive method is basically the same as the recursive C/C++ functions above -- it takes
a Node argument and uses recursion to iterate over the pointer structure.
KD Tree
k-d tree (short for k-dimensional tree) is a space-partitioning data structure for organizing points
in a k-dimensional space. k-d trees are a useful data structure for several applications, such as
searches involving a multidimensional search key (e.g. range searches and nearest neighbor
searches). k-d trees are a special case of binary space partitioning trees.
Linear search
Linear search is one of the basic search techniques that we've now. Although this is not a very
good search technique, one should understand this concept. Let's consider our aim to search for a
key element in an array of elements. We loop through all the array elements and check for
existence of the key element. Since we go element by element, this search is called as Linear
search or sequential search. Search element is called as key element.
Linear search algorithm
BEGIN
DECLARE key, array, i, found
ASSIGN values to array/ACCEPT array values
PRINT "Please enter key element:"
ACCEPT key
ASSIGN i with 1
FOR EACH i in 1 to array.length
LOOP
IF array[i] = key
THEN
ASSIGN found with true
END IF
END LOOP
IF found = true
THEN
PRINT "Key found"
ELSE
PRINT "Key not found"
END IF
END
Implementation of Kd-tree
1. Differences between KD tree search and exhaustive search
import java.util.Date;
import java.util.Random;
class kdtime {
public static void main(String [] args) {
if (args.length < 3) {
System.err.print("Usage: java kdtime <# points> <# dims> ");
System.err.println("<# trials> [seed]");
System.exit(1);
int n = Integer.parseInt(args[0]);
int k = Integer.parseInt(args[1]);
int t = Integer.parseInt(args[2]);
// generate N random K-dimensional points in (0,1)
Random r = args.length > 3 ? // support random seed as
new Random(Long.parseLong(args[3])) : // optional fourth arg
new Random();
double [][] x = new double[n][k];
for (int i=0; i<n; ++i) {
for (int j=0; j<k; ++j) {
x[i][j] = r.nextDouble();
// build KD-tree with indices as values
KDTree<Integer> kd = new KDTree<Integer>(k);
try {
for (int i=0; i<n; ++i) {
kd.insert(x[i], i);
catch (Exception e) {
System.err.println(e);
System.exit(0);
// compare search
linear_search(x, t, r);
kd_search(x, t, kd, r);
// compare nearest-neighbor
linear_nearest(x, t, r);
kd_nearest(x, t, kd, r);
// do linear search
static void linear_search(double [][] x, int t, Random r) {
long before = getTimeMillis();
for (int i=0; i<t; ++i) {
// pick a random point
double [] targ = x[(int)(x.length*r.nextDouble())];
search(x, targ);
long millis = getTimeMillis() - before;
System.out.println(t + " linear searches took " + millis + " msec.");
// do KD-tree search
static void kd_search(double [][] x, int t, KDTree kd, Random r) {
try {

// pick a random point
double [] targ = x[(int)(x.length*r.nextDouble())];
kd.search(targ);
System.exit(0);
System.out.println(t + " KD-tree searches took " + millis + " msec.");
// do linear nearest neighbor
static void linear_nearest(double [][] x, int t, Random r) {
int k = x[0].length;
double [] targ = random_point(r, k);

int n = neighbor(x, targ);
System.out.println(t + " linear nearest took " + millis + " msec.");
// do KD-tree nearest neighbor
static void kd_nearest(double [][] x, int t, KDTree kd, Random r) {
int k = x[0].length;
try {
double [] targ = random_point(r, k);
Integer nbr = (Integer)kd.nearest(targ);
int n = nbr.intValue();
System.exit(0);
}
System.out.println(t + " KD-tree nearest took " + millis + " msec.");
// linear search for exact match
private static int search(double [][] x, double [] targ) {
for (int i=0; i<x.length; ++i) {
if (equal(x[i], targ)) return i;
return -1;
// linear search for index of neighbor
private static int neighbor(double [][] x, double [] targ) {
double mindst = Double.POSITIVE_INFINITY;
int minidx = -1;
for (int i=0; i<x.length; ++i) {
double d = sqrdst(x[i], targ);
if (d < mindst) {
mindst = d;
minidx = i;
}
return minidx;
// square of Euclidean distance between points
private static double sqrdst(double [] p, double [] q) {
double dst = 0;
for (int i=0; i<p.length; ++i) {
double dif = p[i] - q[i];
dst += dif*dif;
return dst;
// point equality test
private static boolean equal(double [] p, double [] q) {
for (int i=0; i<p.length; ++i) {
if (p[i] != q[i]) return false;
return true;
private static double [] random_point(Random r, int k) {

double [] x = new double[k];
for (int i=0; i<k; ++i) {
x[i] = r.nextDouble();
return x;
private static long getTimeMillis() {
Date d = new Date();
return d.getTime();
2. n-nearest-neighbors method of KDTree class. Creates a KDTree of M keys, and

finds N neighbors of D- dimensional point in center of space (all coords = 0.5), with
N, D, M.
import java.util.List;
import java.util.Iterator;
class kdnbrs {
java.util.Random r = new java.util.Random(0);

System.err.print("Usage: java kdnbrs <# points> <# dims>");
System.err.println(" <# nbrs>");
System.exit(1);
int m = Integer.parseInt(args[0]);
int d = Integer.parseInt(args[1]);
int n = Integer.parseInt(args[2]);
double [][] keys = new double [m][d];
double [] targ = new double [d];
for (int k=0; k<d; ++k) {
targ[k] = 0.5;
// make a D-dimensional KD-tree
KDTree<Integer> kd = new KDTree<Integer>(d);
try {
// add M randomly keyed nodes
for (int i=0; i<m; ++i) {

for (int j=0; j<d; ++j) {
keys[i][j] = r.nextDouble();
kd.insert(keys[i], i);
// get N nearest neighbors and show their keys
long start = time();
List<Integer> nbrs = kd.nearest(targ, n);
System.err.println((time() - start) + " msec");
//System.exit(0);
for (int j : nbrs) {
for (int k=0; k<d; ++k) {
System.out.print(keys[j][k] + " ");
System.err.println();
}
private static long time() {
java.util.GregorianCalendar cal = new java.util.GregorianCalendar();
return cal.getTimeInMillis();
3. Behavior of KDTree range search
import java.util.Arrays;
import java.util.List;
import java.util.Iterator;
class kdrange {
// check arguments
System.err.println("Usage: java kdrange <gridsize> <xradius> " +
"<yradius>");
System.exit(1);
int gsize = Integer.parseInt(args[0]);
int xrad = Integer.parseInt(args[1]);
int yrad = Integer.parseInt(args[2]);
// make a KD-tree
KDTree<Integer> kd = new KDTree<Integer>(2);
// plot grid and add nodes
for (int i=0; i<gsize; ++i) {
for (int j=0; j<gsize; ++j) {
int n = i * gsize + j + 1;
if (i == gsize/2 && j==gsize/2) {
System.out.print("*\t");
else {
System.out.print(n + "\t");
double [] key = {i, j};
try {
kd.insert(key, n);
System.out.println();
try {
// get objects in range of center
double [] lo = {gsize/2-xrad,gsize/2-yrad};
double [] hi = {gsize/2+xrad,gsize/2+yrad};
List<Integer> o = kd.range(lo, hi);
// dump them to stdout
for (int i : o) {
System.out.println(i);
class kddemo {

double [] A = {2, 5};
double [] B = {1, 1};
double [] C = {3, 9};
double [] T = {1, 10};
// make a KD-tree and add some nodes
KDTree<String> kd = new KDTree<String>(2);
try {
kd.insert(A, new String("nnoad A"));
kd.insert(B, new String("node B"));
kd.insert(C, new String("node n"));
// look for node B
try {
String n = kd.search(B);
System.err.println(n);
}
try {
// find T's nearest neighbor, which should be C
String n = kd.nearest(T);
// remove C from the tree
kd.delete(C);
// now T's nearest neighbor should be A
n = kd.nearest(T);
}
4. Behavior of KDTree class
class kddemo {
double [] A = {2, 5};
double [] B = {1, 1};
double [] C = {3, 9};
double [] T = {1, 10};
// make a KD-tree and add some nodes
KDTree<String> kd = new KDTree<String>(2);
try {
kd.insert(A, new String("nnoad A"));
kd.insert(B, new String("node B"));
kd.insert(C, new String("node n"));
// look for node B
try {
String n = kd.search(B);
try {
// find T's nearest neighbor, which should be C
String n = kd.nearest(T);
// remove C from the tree
kd.delete(C);
// now T's nearest neighbor should be A
n = kd.nearest(T);

KD Tree Doc

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

KD Tree Doc

Uploaded by

Copyright:

Available Formats

Algorithms going to analysis

implementing the BallTree/Metric Tree algorithm for nearest neighbour search.

Linear search algorithm

DECLARE key, array, i, found

ASSIGN values to array/ACCEPT array values

PRINT "Please enter key element:"

FOR EACH i in 1 to array.length

ASSIGN found with true

PRINT "Key found"

PRINT "Key not found"

1. Differences between KD tree search and exhaustive search

public static void main(String [] args) {

System.err.print("Usage: java kdtime <# points> <# dims> ");

System.err.println("<# trials> [seed]");

// generate N random K-dimensional points in (0,1)

Random r = args.length > 3 ? // support random seed as

new Random(Long.parseLong(args[3])) : // optional fourth arg

for (int i=0; i<n; ++i) {

for (int j=0; j<k; ++j) {

// build KD-tree with indices as values

KDTree<Integer> kd = new KDTree<Integer>(k);

for (int i=0; i<n; ++i) {

kd_search(x, t, kd, r);

static void linear_search(double [][] x, int t, Random r) {

long before = getTimeMillis();

for (int i=0; i<t; ++i) {

// pick a random point

double [] targ = x[(int)(x.length*r.nextDouble())];

long millis = getTimeMillis() - before;

System.out.println(t + " linear searches took " + millis + " msec.");

static void kd_search(double [][] x, int t, KDTree kd, Random r) {

long before = getTimeMillis();

for (int i=0; i<t; ++i) {

double [] targ = x[(int)(x.length*r.nextDouble())];

long millis = getTimeMillis() - before;

System.out.println(t + " KD-tree searches took " + millis + " msec.");

// do linear nearest neighbor

static void linear_nearest(double [][] x, int t, Random r) {

long before = getTimeMillis();

for (int i=0; i<t; ++i) {

double [] targ = random_point(r, k);

long millis = getTimeMillis() - before;

System.out.println(t + " linear nearest took " + millis + " msec.");

// do KD-tree nearest neighbor

static void kd_nearest(double [][] x, int t, KDTree kd, Random r) {

long before = getTimeMillis();

for (int i=0; i<t; ++i) {

double [] targ = random_point(r, k);

Integer nbr = (Integer)kd.nearest(targ);

long millis = getTimeMillis() - before;

System.out.println(t + " KD-tree nearest took " + millis + " msec.");

// linear search for exact match

private static int search(double [][] x, double [] targ) {

for (int i=0; i<x.length; ++i) {

if (equal(x[i], targ)) return i;

// linear search for index of neighbor

private static int neighbor(double [][] x, double [] targ) {

double mindst = Double.POSITIVE_INFINITY;

int minidx = -1;

for (int i=0; i<x.length; ++i) {

double d = sqrdst(x[i], targ);

// square of Euclidean distance between points