Problem Statement
Let’s given n points in a plane. We’ve to find k nearest neighbours of a given point ‘p’ from the given n points.
Naive Solution O(nlogn)
Let’s call the given point be P and set of points be S.
Iterate over S and find euclidean distance between each point in S and P. Store these distances in a vector. Let’s call this vector distances[]
.
Most obvious solution, seems to sort this vector distances[]
and get first k
nearest neighbours. This algorithm is O(nlogn).
Code Implementation
//
// main.cpp
// K nearest neighbours
//
// Created by Himanshu on 18/08/21.
//
#include <iostream>
#include <vector>
#include <algorithm>
#include <math.h>
using namespace std;
int const N = 5;
bool cmp ( const vector<float>& p, const vector<float>& q ) {
return p[1] < q[1];
}
void solve (int points[][2], int point[], int k) {
vector< vector<float> > distance;
for (int i=0; i<N; i++) {
float d = sqrt((pow((point[0] - points[i][0]), 2)) + (pow((point[1] - points[i][1]), 2)));
float pos = i;
distance.push_back({pos, d});
}
sort(distance.begin(), distance.end(), &cmp);
cout<<"K nearest neighbours:"<<endl;
for (int i=0; i<k; i++) {
cout<<distance[i][0]<<" - distance: "<<distance[i][1]<<endl;
}
}
int main () {
int points[N][2] = {{1, 1}, {3, 4}, {2, 3}, {7, 8}, {2, 5}};
int point[2] = {0, 0};
int k = 2;
solve(points, point, k);
return 0;
}
Output
K nearest neighbours: 0 - distance: 1.41421 2 - distance: 3.60555
Here’s a working example: Ideone
But we can do better than that. We can solve this in O(n).
O(n) approach using Heaps
Make a min heap out of this vector distance[]
. This step is O(n). (You can read more about binary heap here). Now, extract minimum element from this heap using extractMin operation of heap. Operation extractMin takes O(logn) time. Calling it k times, you get the running time as O(n + klogn).
Code Implementation
//
// main.cpp
// K nearest neighbours
//
// Created by Himanshu on 18/08/21.
//
#include <iostream>
#include <vector>
#include <algorithm>
#include <math.h>
using namespace std;
int const N = 5;
bool cmp( const vector<float>& p, const vector<float>& q ) {
return p[1] > q[1];
}
void solve (int points[][2], int point[], int k) {
vector< vector<float> > minHeap;
for (int i=0; i<N; i++) {
float d = sqrt((pow((point[0] - points[i][0]), 2)) + (pow((point[1] - points[i][1]), 2)));
float pos = i;
minHeap.push_back({pos, d});
push_heap(minHeap.begin(), minHeap.end(), &cmp);
}
cout<<"K nearest neighbours:"<<endl;
for (int i=0; i<k; i++) {
vector<float> nearestPoints = minHeap.front();
pop_heap(minHeap.begin(), minHeap.end(), &cmp);
minHeap.pop_back();
cout<<nearestPoints[0]<<" - distance: "<<nearestPoints[1]<<endl;
}
}
int main () {
int points[N][2] = {{1, 1}, {3, 4}, {2, 3}, {7, 8}, {2, 5}};
int point[2] = {0, 0};
int k = 2;
solve(points, point, k);
return 0;
}
Output
K nearest neighbours: 0 - distance: 1.41421 2 - distance: 3.60555
Here’s a working example: Ideone