Disjoint-set data structures, commonly known as Union-Find, are fundamental data-structures in computer science for efficiently solving problems related to connectivity in graphs or set operations. In this blog post, we’ll explore and compare the time execution of the three prominent Union-Find algorithms: Quick Union, Quick Find, and Weighted Quick Union. We’ll implement these algorithms in C++ and analyze their performance.
These algorithms are discussed in detail here – Data Structures for Disjoint Sets | Union Find Algorithm
The Disjoint-Set Problem
The disjoint-set problem involves maintaining a collection of disjoint sets and supporting two operations: Union
(to merge two sets) and Find
or Connected
(to determine which set an element belongs to). The efficiency of these operations is crucial in solving various applications, such as graph connectivity problems.
Code Implementation
//
// main.cpp
// Disjoint-Set Algorithms Comparison
//
// Created by Himanshu on 18/01/24.
//
#include <iostream>
#include <random>
#include <chrono>
using namespace std;
class QuickUnion {
private:
int *id;
int getRoot (int i) {
while (i != id[i]) {
i = id[i];
}
return i;
}
public:
QuickUnion (int N) {
id = new int[N];
for (int i=0; i<N; i++) {
id[i] = i;
}
}
bool connected (int p, int q) {
return (getRoot(p) == getRoot(q));
}
void makeUnion (int p, int q) {
int i = getRoot(p);
int j = getRoot(q);
id[i] = j;
}
};
class QuickFind {
private:
int *id;
int n;
public:
QuickFind (int N) {
n = N;
id = new int[N];
for (int i=0; i<N; i++) {
id[i] = i;
}
}
bool connected (int p, int q) {
return (id[p] == id[q]);
}
void makeUnion (int p, int q) {
int pid = id[p];
int qid = id[q];
for (int i=0; i<n; i++) {
if (id[i] == pid) {
id[i] = qid;
}
}
}
};
class WeightedQuickUnion {
private:
int *id, *setSize;
int getRoot (int i) {
while (i != id[i]) {
id[i] = id[id[i]];
i = id[i];
}
return i;
}
public:
WeightedQuickUnion (int N) {
id = new int[N];
setSize = new int[N];
for (int i=0; i<N; i++) {
id[i] = i;
setSize[i] = 1;
}
}
bool connected (int p, int q) {
return (getRoot(p) == getRoot(q));
}
void makeUnion (int p, int q) {
int i = getRoot(p);
int j = getRoot(q);
if (i == j) {
return;
}
if (setSize[i] < setSize[j]) {
id[i] = j;
setSize[j] += setSize[i];
} else {
id[j] = i;
setSize[i] += setSize[j];
}
}
};
int getRandomId(int low, int high) {
mt19937 gen; // Standard mersenne_twister_engine
gen.seed(42); // Seed the PRNG
uniform_int_distribution<int> distrib(low, high); // Define the range
return distrib(gen); // Generate a random number within the range
}
// Common function to run operations and measure time
template <typename UnionFind>
void runOperations(UnionFind& uf, int iterations, int N) {
auto start = chrono::high_resolution_clock::now();
for (int i = 0; i < iterations; ++i) {
int id1 = getRandomId(0, N - 1);
int id2 = getRandomId(0, N - 1);
uf.makeUnion(id1, id2);
bool isConnected = uf.connected(id1, id2);
id1 = getRandomId(0, N - 1);
id2 = getRandomId(0, N - 1);
isConnected = uf.connected(id1, id2);
}
auto end = chrono::high_resolution_clock::now();
chrono::duration<double> duration = end - start;
cout << "Time taken: " << duration.count() << " seconds" << endl;
}
int main() {
const int N = 1000000; // Number of elements
const int iterations = 10000; // Number of operations
QuickUnion quickUnion(N);
QuickFind quickFind(N);
WeightedQuickUnion weightedQuickUnion(N);
cout << "QuickUnion Performance:" << endl;
runOperations(quickUnion, iterations, N);
cout << endl << "QuickFind Performance:" << endl;
runOperations(quickFind, iterations, N);
cout << endl << "WeightedQuickUnion Performance:" << endl;
runOperations(weightedQuickUnion, iterations, N);
return 0;
}
Output
QuickUnion Performance: Time taken: 0.131054 seconds QuickFind Performance: Time taken: 6.72019 seconds WeightedQuickUnion Performance: Time taken: 0.13015 seconds
As you can see, QuickFind performed the worst among the three algorithms. While Quick Union and Weighted Quick Union both performed comparatively well.
The selection of the best Union-Find algorithm depends on the specific type of problem you are solving. The implementation details and the underlying application can significantly impact the performance of these algorithms. Understanding the strengths and weaknesses of these algorithms is crucial when solving problems that involve connectivity and set operations.