Building from Scratch: A Complete Guide to Simple Hash Tables in C

Hash tables are among the most fundamental and useful data structures in computer science. They provide O(1) average-case time complexity for insertions, deletions, and lookups, making them invaluable for everything from caching to symbol tables. This guide walks you through building a simple yet functional hash table in C from the ground up.

What is a Hash Table?

A hash table is a data structure that maps keys to values using a hash function. The hash function computes an index into an array of buckets, where the key-value pair is stored.

Key: "apple" → Hash Function → Index: 3 → [Bucket 3] → Value: "red"
Key: "banana" → Hash Function → Index: 7 → [Bucket 7] → Value: "yellow"

Core Components

  1. Hash Function: Converts a key into an array index
  2. Array (Buckets): Stores the key-value pairs
  3. Collision Resolution: Handles when two keys hash to the same index

Simple Hash Table Structure

Let's start with the basic structure for a hash table using separate chaining (linked lists) for collision resolution.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
// Node structure for linked list (separate chaining)
typedef struct HashNode {
char *key;
int value;
struct HashNode *next;
} HashNode;
// Hash table structure
typedef struct {
HashNode **buckets;  // Array of pointers to linked lists
int size;            // Number of buckets
int count;           // Number of key-value pairs
} HashTable;

Hash Function

A good hash function distributes keys uniformly across the array. Here's a simple but effective string hash function (djb2 by Dan Bernstein):

// djb2 hash function - simple and effective
unsigned long hash(const char *str, int table_size) {
unsigned long hash = 5381;
int c;
while ((c = *str++)) {
hash = ((hash << 5) + hash) + c;  // hash * 33 + c
}
return hash % table_size;
}

Creating and Destroying Hash Table

// Create a new hash table
HashTable* ht_create(int size) {
HashTable *table = (HashTable*)malloc(sizeof(HashTable));
if (table == NULL) {
fprintf(stderr, "Memory allocation failed\n");
return NULL;
}
table->size = size;
table->count = 0;
// Allocate array of bucket pointers
table->buckets = (HashNode**)calloc(size, sizeof(HashNode*));
if (table->buckets == NULL) {
free(table);
fprintf(stderr, "Memory allocation failed\n");
return NULL;
}
return table;
}
// Create a new node
HashNode* create_node(const char *key, int value) {
HashNode *node = (HashNode*)malloc(sizeof(HashNode));
if (node == NULL) return NULL;
node->key = (char*)malloc(strlen(key) + 1);
if (node->key == NULL) {
free(node);
return NULL;
}
strcpy(node->key, key);
node->value = value;
node->next = NULL;
return node;
}
// Free a node
void free_node(HashNode *node) {
if (node) {
free(node->key);
free(node);
}
}
// Destroy hash table
void ht_destroy(HashTable *table) {
if (table == NULL) return;
// Free all nodes in all buckets
for (int i = 0; i < table->size; i++) {
HashNode *current = table->buckets[i];
while (current != NULL) {
HashNode *temp = current;
current = current->next;
free_node(temp);
}
}
free(table->buckets);
free(table);
}

Insert Operation

// Insert a key-value pair
bool ht_insert(HashTable *table, const char *key, int value) {
if (table == NULL || key == NULL) return false;
// Compute hash index
int index = hash(key, table->size);
// Check if key already exists
HashNode *current = table->buckets[index];
while (current != NULL) {
if (strcmp(current->key, key) == 0) {
// Key exists - update value
current->value = value;
return true;
}
current = current->next;
}
// Key doesn't exist - create new node
HashNode *new_node = create_node(key, value);
if (new_node == NULL) return false;
// Insert at beginning of linked list
new_node->next = table->buckets[index];
table->buckets[index] = new_node;
table->count++;
return true;
}

Search Operation

// Search for a key, return value if found
int ht_search(HashTable *table, const char *key, bool *found) {
if (table == NULL || key == NULL) {
*found = false;
return 0;
}
int index = hash(key, table->size);
HashNode *current = table->buckets[index];
while (current != NULL) {
if (strcmp(current->key, key) == 0) {
*found = true;
return current->value;
}
current = current->next;
}
*found = false;
return 0;
}
// Simplified version that returns pointer to value
int* ht_get(HashTable *table, const char *key) {
if (table == NULL || key == NULL) return NULL;
int index = hash(key, table->size);
HashNode *current = table->buckets[index];
while (current != NULL) {
if (strcmp(current->key, key) == 0) {
return &current->value;
}
current = current->next;
}
return NULL;
}

Delete Operation

// Delete a key-value pair
bool ht_delete(HashTable *table, const char *key) {
if (table == NULL || key == NULL) return false;
int index = hash(key, table->size);
HashNode *current = table->buckets[index];
HashNode *prev = NULL;
while (current != NULL) {
if (strcmp(current->key, key) == 0) {
// Found the node to delete
if (prev == NULL) {
// Node is at the head of the list
table->buckets[index] = current->next;
} else {
prev->next = current->next;
}
free_node(current);
table->count--;
return true;
}
prev = current;
current = current->next;
}
return false;  // Key not found
}

Utility Functions

// Get the number of key-value pairs
int ht_size(HashTable *table) {
return table ? table->count : 0;
}
// Check if the hash table is empty
bool ht_is_empty(HashTable *table) {
return table ? table->count == 0 : true;
}
// Clear all entries (remove all key-value pairs)
void ht_clear(HashTable *table) {
if (table == NULL) return;
for (int i = 0; i < table->size; i++) {
HashNode *current = table->buckets[i];
while (current != NULL) {
HashNode *temp = current;
current = current->next;
free_node(temp);
}
table->buckets[i] = NULL;
}
table->count = 0;
}
// Check if a key exists
bool ht_contains(HashTable *table, const char *key) {
if (table == NULL || key == NULL) return false;
int index = hash(key, table->size);
HashNode *current = table->buckets[index];
while (current != NULL) {
if (strcmp(current->key, key) == 0) {
return true;
}
current = current->next;
}
return false;
}

Iteration

// Simple iterator structure
typedef struct {
HashTable *table;
int bucket_index;
HashNode *current_node;
} HashIterator;
// Create an iterator
HashIterator* ht_iterator_create(HashTable *table) {
HashIterator *iter = (HashIterator*)malloc(sizeof(HashIterator));
if (iter == NULL) return NULL;
iter->table = table;
iter->bucket_index = -1;
iter->current_node = NULL;
return iter;
}
// Get next key-value pair
bool ht_iterator_next(HashIterator *iter, char **key, int *value) {
if (iter == NULL || iter->table == NULL) return false;
// Move to next non-empty bucket
if (iter->current_node != NULL) {
iter->current_node = iter->current_node->next;
}
while (iter->current_node == NULL) {
iter->bucket_index++;
if (iter->bucket_index >= iter->table->size) {
return false;  // No more entries
}
iter->current_node = iter->table->buckets[iter->bucket_index];
}
*key = iter->current_node->key;
*value = iter->current_node->value;
return true;
}
// Destroy iterator
void ht_iterator_destroy(HashIterator *iter) {
free(iter);
}

Display and Statistics

// Print all key-value pairs
void ht_print(HashTable *table) {
if (table == NULL) {
printf("Hash table is NULL\n");
return;
}
printf("Hash Table (size=%d, count=%d):\n", table->size, table->count);
for (int i = 0; i < table->size; i++) {
if (table->buckets[i] != NULL) {
printf("  Bucket %d: ", i);
HashNode *current = table->buckets[i];
while (current != NULL) {
printf("(%s: %d) ", current->key, current->value);
current = current->next;
}
printf("\n");
}
}
}
// Print hash table statistics
void ht_print_stats(HashTable *table) {
if (table == NULL) return;
printf("\n=== Hash Table Statistics ===\n");
printf("Table size: %d\n", table->size);
printf("Number of entries: %d\n", table->count);
printf("Load factor: %.2f\n", (float)table->count / table->size);
int max_chain = 0;
int empty_buckets = 0;
int total_chain_length = 0;
for (int i = 0; i < table->size; i++) {
int chain_length = 0;
HashNode *current = table->buckets[i];
while (current != NULL) {
chain_length++;
current = current->next;
}
if (chain_length == 0) {
empty_buckets++;
}
if (chain_length > max_chain) {
max_chain = chain_length;
}
total_chain_length += chain_length;
}
printf("Empty buckets: %d (%.2f%%)\n", 
empty_buckets, (float)empty_buckets / table->size * 100);
printf("Max chain length: %d\n", max_chain);
printf("Average chain length: %.2f\n", 
(float)total_chain_length / table->size);
}

Complete Example Program

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
// Include all the function definitions above
int main() {
printf("=== Simple Hash Table Demo ===\n\n");
// Create a hash table with 10 buckets
HashTable *table = ht_create(10);
// Insert some values
printf("1. Inserting values:\n");
ht_insert(table, "apple", 10);
ht_insert(table, "banana", 20);
ht_insert(table, "orange", 30);
ht_insert(table, "grape", 40);
ht_insert(table, "watermelon", 50);
ht_insert(table, "mango", 60);
printf("   Inserted 6 items\n");
ht_print_stats(table);
// Display all entries
printf("\n2. Displaying all entries:\n");
ht_print(table);
// Search for values
printf("\n3. Searching:\n");
const char *keys[] = {"apple", "banana", "grape", "kiwi"};
for (int i = 0; i < 4; i++) {
int *value = ht_get(table, keys[i]);
if (value != NULL) {
printf("   %s -> %d\n", keys[i], *value);
} else {
printf("   %s -> not found\n", keys[i]);
}
}
// Update a value
printf("\n4. Updating value:\n");
printf("   Before: apple -> %d\n", *ht_get(table, "apple"));
ht_insert(table, "apple", 100);
printf("   After:  apple -> %d\n", *ht_get(table, "apple"));
// Delete a value
printf("\n5. Deleting:\n");
printf("   Deleting 'banana': %s\n", 
ht_delete(table, "banana") ? "success" : "failed");
printf("   Deleting 'kiwi': %s\n", 
ht_delete(table, "kiwi") ? "success" : "failed");
ht_print_stats(table);
// Iterate through all entries
printf("\n6. Iterating through entries:\n");
HashIterator *iter = ht_iterator_create(table);
char *key;
int value;
while (ht_iterator_next(iter, &key, &value)) {
printf("   %s: %d\n", key, value);
}
ht_iterator_destroy(iter);
// Check if key exists
printf("\n7. Contains checks:\n");
printf("   Contains 'apple': %s\n", 
ht_contains(table, "apple") ? "yes" : "no");
printf("   Contains 'banana': %s\n", 
ht_contains(table, "banana") ? "yes" : "no");
// Clear the table
printf("\n8. Clearing table:\n");
ht_clear(table);
printf("   Size after clear: %d\n", ht_size(table));
printf("   Is empty: %s\n", ht_is_empty(table) ? "yes" : "no");
// Clean up
ht_destroy(table);
printf("\n=== Demo Complete ===\n");
return 0;
}

Collision Resolution: Open Addressing (Alternative)

For completeness, here's a simple open addressing implementation:

// Open addressing hash table
typedef struct {
char **keys;
int *values;
int *occupied;  // 1 if occupied, 0 if empty
int size;
int count;
} OpenHashTable;
OpenHashTable* oht_create(int size) {
OpenHashTable *table = (OpenHashTable*)malloc(sizeof(OpenHashTable));
table->keys = (char**)calloc(size, sizeof(char*));
table->values = (int*)calloc(size, sizeof(int));
table->occupied = (int*)calloc(size, sizeof(int));
table->size = size;
table->count = 0;
return table;
}
// Linear probing insertion
bool oht_insert(OpenHashTable *table, const char *key, int value) {
if (table->count >= table->size) return false;
int index = hash(key, table->size);
int original_index = index;
// Linear probing
while (table->occupied[index]) {
if (table->keys[index] && strcmp(table->keys[index], key) == 0) {
// Update existing key
table->values[index] = value;
return true;
}
index = (index + 1) % table->size;
if (index == original_index) return false;  // Table is full
}
// Insert new key
table->keys[index] = (char*)malloc(strlen(key) + 1);
strcpy(table->keys[index], key);
table->values[index] = value;
table->occupied[index] = 1;
table->count++;
return true;
}
// Linear probing search
int* oht_get(OpenHashTable *table, const char *key) {
int index = hash(key, table->size);
int original_index = index;
while (table->occupied[index]) {
if (table->keys[index] && strcmp(table->keys[index], key) == 0) {
return &table->values[index];
}
index = (index + 1) % table->size;
if (index == original_index) break;
}
return NULL;
}

Performance Testing

#include <time.h>
void performance_test() {
const int NUM_OPERATIONS = 50000;
const int TABLE_SIZE = 10000;
HashTable *table = ht_create(TABLE_SIZE);
// Test insertion
clock_t start = clock();
for (int i = 0; i < NUM_OPERATIONS; i++) {
char key[32];
sprintf(key, "key_%d", i);
ht_insert(table, key, i);
}
clock_t end = clock();
double insert_time = (double)(end - start) / CLOCKS_PER_SEC;
// Test lookup
start = clock();
for (int i = 0; i < NUM_OPERATIONS; i++) {
char key[32];
sprintf(key, "key_%d", i);
ht_get(table, key);
}
end = clock();
double lookup_time = (double)(end - start) / CLOCKS_PER_SEC;
printf("\n=== Performance Results ===\n");
printf("Insert %d items: %.3f seconds\n", NUM_OPERATIONS, insert_time);
printf("Lookup %d items: %.3f seconds\n", NUM_OPERATIONS, lookup_time);
printf("Load factor: %.2f\n", (float)NUM_OPERATIONS / TABLE_SIZE);
ht_destroy(table);
}

Best Practices

  1. Choose a good hash function: Distribute keys uniformly across buckets
  2. Pick appropriate table size: Prime numbers often work well
  3. Monitor load factor: Consider resizing when load factor > 0.75
  4. Handle collisions properly: Separate chaining is simpler, open addressing more memory-efficient
  5. Free memory correctly: Always free keys and nodes
  6. Check for NULL pointers: Always validate input
  7. Use consistent key comparisons: For strings, use strcmp()

Common Pitfalls

// DON'T: Forgetting to check for NULL
ht_insert(table, NULL, 10);  // Will crash
// DO: Check input
bool ht_insert(HashTable *table, const char *key, int value) {
if (table == NULL || key == NULL) return false;
// ...
}
// DON'T: Using a poor hash function
unsigned long bad_hash(const char *str) {
return strlen(str) % size;  // Very poor distribution
}
// DON'T: Not handling collisions
// Always implement collision resolution
// DON'T: Memory leak
// Always free allocated memory

Conclusion

This simple hash table implementation provides a solid foundation for understanding how hash tables work. The key concepts covered include:

  • Hash functions: Converting keys to array indices
  • Collision resolution: Handling multiple keys that hash to the same bucket
  • Basic operations: Insert, search, delete
  • Memory management: Proper allocation and deallocation
  • Iteration: Traversing all key-value pairs

While this implementation is simple, it's functional and can be extended with features like dynamic resizing, generic values, or different collision resolution strategies. Understanding these fundamentals will help you build more sophisticated hash tables for your C projects.

Leave a Reply

Your email address will not be published. Required fields are marked *


Macro Nepal Helper