Building from Scratch: A Complete Guide to Simple Hash Tables in C

Hash tables are among the most fundamental and useful data structures in computer science. They provide O(1) average-case time complexity for insertions, deletions, and lookups, making them invaluable for everything from caching to symbol tables. This guide walks you through building a simple yet functional hash table in C from the ground up.

What is a Hash Table?

A hash table is a data structure that maps keys to values using a hash function. The hash function computes an index into an array of buckets, where the key-value pair is stored.

Key: "apple" → Hash Function → Index: 3 → [Bucket 3] → Value: "red"
Key: "banana" → Hash Function → Index: 7 → [Bucket 7] → Value: "yellow"

Core Components

  1. Hash Function: Converts a key into an array index
  2. Array (Buckets): Stores the key-value pairs
  3. Collision Resolution: Handles when two keys hash to the same index

Simple Hash Table Structure

Let's start with the basic structure for a hash table using separate chaining (linked lists) for collision resolution.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
// Node structure for linked list (separate chaining)
typedef struct HashNode {
char *key;
int value;
struct HashNode *next;
} HashNode;
// Hash table structure
typedef struct {
HashNode **buckets;  // Array of pointers to linked lists
int size;            // Number of buckets
int count;           // Number of key-value pairs
} HashTable;

Hash Function

A good hash function distributes keys uniformly across the array. Here's a simple but effective string hash function (djb2 by Dan Bernstein):

// djb2 hash function - simple and effective
unsigned long hash(const char *str, int table_size) {
unsigned long hash = 5381;
int c;
while ((c = *str++)) {
hash = ((hash << 5) + hash) + c;  // hash * 33 + c
}
return hash % table_size;
}

Creating and Destroying Hash Table

// Create a new hash table
HashTable* ht_create(int size) {
HashTable *table = (HashTable*)malloc(sizeof(HashTable));
if (table == NULL) {
fprintf(stderr, "Memory allocation failed\n");
return NULL;
}
table->size = size;
table->count = 0;
// Allocate array of bucket pointers
table->buckets = (HashNode**)calloc(size, sizeof(HashNode*));
if (table->buckets == NULL) {
free(table);
fprintf(stderr, "Memory allocation failed\n");
return NULL;
}
return table;
}
// Create a new node
HashNode* create_node(const char *key, int value) {
HashNode *node = (HashNode*)malloc(sizeof(HashNode));
if (node == NULL) return NULL;
node->key = (char*)malloc(strlen(key) + 1);
if (node->key == NULL) {
free(node);
return NULL;
}
strcpy(node->key, key);
node->value = value;
node->next = NULL;
return node;
}
// Free a node
void free_node(HashNode *node) {
if (node) {
free(node->key);
free(node);
}
}
// Destroy hash table
void ht_destroy(HashTable *table) {
if (table == NULL) return;
// Free all nodes in all buckets
for (int i = 0; i < table->size; i++) {
HashNode *current = table->buckets[i];
while (current != NULL) {
HashNode *temp = current;
current = current->next;
free_node(temp);
}
}
free(table->buckets);
free(table);
}

Insert Operation

// Insert a key-value pair
bool ht_insert(HashTable *table, const char *key, int value) {
if (table == NULL || key == NULL) return false;
// Compute hash index
int index = hash(key, table->size);
// Check if key already exists
HashNode *current = table->buckets[index];
while (current != NULL) {
if (strcmp(current->key, key) == 0) {
// Key exists - update value
current->value = value;
return true;
}
current = current->next;
}
// Key doesn't exist - create new node
HashNode *new_node = create_node(key, value);
if (new_node == NULL) return false;
// Insert at beginning of linked list
new_node->next = table->buckets[index];
table->buckets[index] = new_node;
table->count++;
return true;
}

Search Operation

// Search for a key, return value if found
int ht_search(HashTable *table, const char *key, bool *found) {
if (table == NULL || key == NULL) {
*found = false;
return 0;
}
int index = hash(key, table->size);
HashNode *current = table->buckets[index];
while (current != NULL) {
if (strcmp(current->key, key) == 0) {
*found = true;
return current->value;
}
current = current->next;
}
*found = false;
return 0;
}
// Simplified version that returns pointer to value
int* ht_get(HashTable *table, const char *key) {
if (table == NULL || key == NULL) return NULL;
int index = hash(key, table->size);
HashNode *current = table->buckets[index];
while (current != NULL) {
if (strcmp(current->key, key) == 0) {
return &current->value;
}
current = current->next;
}
return NULL;
}

Delete Operation

// Delete a key-value pair
bool ht_delete(HashTable *table, const char *key) {
if (table == NULL || key == NULL) return false;
int index = hash(key, table->size);
HashNode *current = table->buckets[index];
HashNode *prev = NULL;
while (current != NULL) {
if (strcmp(current->key, key) == 0) {
// Found the node to delete
if (prev == NULL) {
// Node is at the head of the list
table->buckets[index] = current->next;
} else {
prev->next = current->next;
}
free_node(current);
table->count--;
return true;
}
prev = current;
current = current->next;
}
return false;  // Key not found
}

Utility Functions

// Get the number of key-value pairs
int ht_size(HashTable *table) {
return table ? table->count : 0;
}
// Check if the hash table is empty
bool ht_is_empty(HashTable *table) {
return table ? table->count == 0 : true;
}
// Clear all entries (remove all key-value pairs)
void ht_clear(HashTable *table) {
if (table == NULL) return;
for (int i = 0; i < table->size; i++) {
HashNode *current = table->buckets[i];
while (current != NULL) {
HashNode *temp = current;
current = current->next;
free_node(temp);
}
table->buckets[i] = NULL;
}
table->count = 0;
}
// Check if a key exists
bool ht_contains(HashTable *table, const char *key) {
if (table == NULL || key == NULL) return false;
int index = hash(key, table->size);
HashNode *current = table->buckets[index];
while (current != NULL) {
if (strcmp(current->key, key) == 0) {
return true;
}
current = current->next;
}
return false;
}

Iteration

// Simple iterator structure
typedef struct {
HashTable *table;
int bucket_index;
HashNode *current_node;
} HashIterator;
// Create an iterator
HashIterator* ht_iterator_create(HashTable *table) {
HashIterator *iter = (HashIterator*)malloc(sizeof(HashIterator));
if (iter == NULL) return NULL;
iter->table = table;
iter->bucket_index = -1;
iter->current_node = NULL;
return iter;
}
// Get next key-value pair
bool ht_iterator_next(HashIterator *iter, char **key, int *value) {
if (iter == NULL || iter->table == NULL) return false;
// Move to next non-empty bucket
if (iter->current_node != NULL) {
iter->current_node = iter->current_node->next;
}
while (iter->current_node == NULL) {
iter->bucket_index++;
if (iter->bucket_index >= iter->table->size) {
return false;  // No more entries
}
iter->current_node = iter->table->buckets[iter->bucket_index];
}
*key = iter->current_node->key;
*value = iter->current_node->value;
return true;
}
// Destroy iterator
void ht_iterator_destroy(HashIterator *iter) {
free(iter);
}

Display and Statistics

// Print all key-value pairs
void ht_print(HashTable *table) {
if (table == NULL) {
printf("Hash table is NULL\n");
return;
}
printf("Hash Table (size=%d, count=%d):\n", table->size, table->count);
for (int i = 0; i < table->size; i++) {
if (table->buckets[i] != NULL) {
printf("  Bucket %d: ", i);
HashNode *current = table->buckets[i];
while (current != NULL) {
printf("(%s: %d) ", current->key, current->value);
current = current->next;
}
printf("\n");
}
}
}
// Print hash table statistics
void ht_print_stats(HashTable *table) {
if (table == NULL) return;
printf("\n=== Hash Table Statistics ===\n");
printf("Table size: %d\n", table->size);
printf("Number of entries: %d\n", table->count);
printf("Load factor: %.2f\n", (float)table->count / table->size);
int max_chain = 0;
int empty_buckets = 0;
int total_chain_length = 0;
for (int i = 0; i < table->size; i++) {
int chain_length = 0;
HashNode *current = table->buckets[i];
while (current != NULL) {
chain_length++;
current = current->next;
}
if (chain_length == 0) {
empty_buckets++;
}
if (chain_length > max_chain) {
max_chain = chain_length;
}
total_chain_length += chain_length;
}
printf("Empty buckets: %d (%.2f%%)\n", 
empty_buckets, (float)empty_buckets / table->size * 100);
printf("Max chain length: %d\n", max_chain);
printf("Average chain length: %.2f\n", 
(float)total_chain_length / table->size);
}

Complete Example Program

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
// Include all the function definitions above
int main() {
printf("=== Simple Hash Table Demo ===\n\n");
// Create a hash table with 10 buckets
HashTable *table = ht_create(10);
// Insert some values
printf("1. Inserting values:\n");
ht_insert(table, "apple", 10);
ht_insert(table, "banana", 20);
ht_insert(table, "orange", 30);
ht_insert(table, "grape", 40);
ht_insert(table, "watermelon", 50);
ht_insert(table, "mango", 60);
printf("   Inserted 6 items\n");
ht_print_stats(table);
// Display all entries
printf("\n2. Displaying all entries:\n");
ht_print(table);
// Search for values
printf("\n3. Searching:\n");
const char *keys[] = {"apple", "banana", "grape", "kiwi"};
for (int i = 0; i < 4; i++) {
int *value = ht_get(table, keys[i]);
if (value != NULL) {
printf("   %s -> %d\n", keys[i], *value);
} else {
printf("   %s -> not found\n", keys[i]);
}
}
// Update a value
printf("\n4. Updating value:\n");
printf("   Before: apple -> %d\n", *ht_get(table, "apple"));
ht_insert(table, "apple", 100);
printf("   After:  apple -> %d\n", *ht_get(table, "apple"));
// Delete a value
printf("\n5. Deleting:\n");
printf("   Deleting 'banana': %s\n", 
ht_delete(table, "banana") ? "success" : "failed");
printf("   Deleting 'kiwi': %s\n", 
ht_delete(table, "kiwi") ? "success" : "failed");
ht_print_stats(table);
// Iterate through all entries
printf("\n6. Iterating through entries:\n");
HashIterator *iter = ht_iterator_create(table);
char *key;
int value;
while (ht_iterator_next(iter, &key, &value)) {
printf("   %s: %d\n", key, value);
}
ht_iterator_destroy(iter);
// Check if key exists
printf("\n7. Contains checks:\n");
printf("   Contains 'apple': %s\n", 
ht_contains(table, "apple") ? "yes" : "no");
printf("   Contains 'banana': %s\n", 
ht_contains(table, "banana") ? "yes" : "no");
// Clear the table
printf("\n8. Clearing table:\n");
ht_clear(table);
printf("   Size after clear: %d\n", ht_size(table));
printf("   Is empty: %s\n", ht_is_empty(table) ? "yes" : "no");
// Clean up
ht_destroy(table);
printf("\n=== Demo Complete ===\n");
return 0;
}

Collision Resolution: Open Addressing (Alternative)

For completeness, here's a simple open addressing implementation:

// Open addressing hash table
typedef struct {
char **keys;
int *values;
int *occupied;  // 1 if occupied, 0 if empty
int size;
int count;
} OpenHashTable;
OpenHashTable* oht_create(int size) {
OpenHashTable *table = (OpenHashTable*)malloc(sizeof(OpenHashTable));
table->keys = (char**)calloc(size, sizeof(char*));
table->values = (int*)calloc(size, sizeof(int));
table->occupied = (int*)calloc(size, sizeof(int));
table->size = size;
table->count = 0;
return table;
}
// Linear probing insertion
bool oht_insert(OpenHashTable *table, const char *key, int value) {
if (table->count >= table->size) return false;
int index = hash(key, table->size);
int original_index = index;
// Linear probing
while (table->occupied[index]) {
if (table->keys[index] && strcmp(table->keys[index], key) == 0) {
// Update existing key
table->values[index] = value;
return true;
}
index = (index + 1) % table->size;
if (index == original_index) return false;  // Table is full
}
// Insert new key
table->keys[index] = (char*)malloc(strlen(key) + 1);
strcpy(table->keys[index], key);
table->values[index] = value;
table->occupied[index] = 1;
table->count++;
return true;
}
// Linear probing search
int* oht_get(OpenHashTable *table, const char *key) {
int index = hash(key, table->size);
int original_index = index;
while (table->occupied[index]) {
if (table->keys[index] && strcmp(table->keys[index], key) == 0) {
return &table->values[index];
}
index = (index + 1) % table->size;
if (index == original_index) break;
}
return NULL;
}

Performance Testing

#include <time.h>
void performance_test() {
const int NUM_OPERATIONS = 50000;
const int TABLE_SIZE = 10000;
HashTable *table = ht_create(TABLE_SIZE);
// Test insertion
clock_t start = clock();
for (int i = 0; i < NUM_OPERATIONS; i++) {
char key[32];
sprintf(key, "key_%d", i);
ht_insert(table, key, i);
}
clock_t end = clock();
double insert_time = (double)(end - start) / CLOCKS_PER_SEC;
// Test lookup
start = clock();
for (int i = 0; i < NUM_OPERATIONS; i++) {
char key[32];
sprintf(key, "key_%d", i);
ht_get(table, key);
}
end = clock();
double lookup_time = (double)(end - start) / CLOCKS_PER_SEC;
printf("\n=== Performance Results ===\n");
printf("Insert %d items: %.3f seconds\n", NUM_OPERATIONS, insert_time);
printf("Lookup %d items: %.3f seconds\n", NUM_OPERATIONS, lookup_time);
printf("Load factor: %.2f\n", (float)NUM_OPERATIONS / TABLE_SIZE);
ht_destroy(table);
}

Best Practices

  1. Choose a good hash function: Distribute keys uniformly across buckets
  2. Pick appropriate table size: Prime numbers often work well
  3. Monitor load factor: Consider resizing when load factor > 0.75
  4. Handle collisions properly: Separate chaining is simpler, open addressing more memory-efficient
  5. Free memory correctly: Always free keys and nodes
  6. Check for NULL pointers: Always validate input
  7. Use consistent key comparisons: For strings, use strcmp()

Common Pitfalls

// DON'T: Forgetting to check for NULL
ht_insert(table, NULL, 10);  // Will crash
// DO: Check input
bool ht_insert(HashTable *table, const char *key, int value) {
if (table == NULL || key == NULL) return false;
// ...
}
// DON'T: Using a poor hash function
unsigned long bad_hash(const char *str) {
return strlen(str) % size;  // Very poor distribution
}
// DON'T: Not handling collisions
// Always implement collision resolution
// DON'T: Memory leak
// Always free allocated memory

Conclusion

This simple hash table implementation provides a solid foundation for understanding how hash tables work. The key concepts covered include:

  • Hash functions: Converting keys to array indices
  • Collision resolution: Handling multiple keys that hash to the same bucket
  • Basic operations: Insert, search, delete
  • Memory management: Proper allocation and deallocation
  • Iteration: Traversing all key-value pairs

While this implementation is simple, it's functional and can be extended with features like dynamic resizing, generic values, or different collision resolution strategies. Understanding these fundamentals will help you build more sophisticated hash tables for your C projects.

Complete C Programming Guide + Compilers Collection


1. C srand() Function – Understanding Seed Initialization

https://macronepal.com/understanding-the-c-srand-function
Explains how srand() initializes the pseudo-random number generator in C by setting a seed value. Using the same seed produces the same sequence, while time(NULL) gives different results each run.


2. C rand() Function Mechanics and Limitations

https://macronepal.com/c-rand-function-mechanics-and-limitations
Explains how rand() generates pseudo-random numbers between 0 and RAND_MAX, its deterministic nature, and limitations for security use cases.


3. C log() Function

https://macronepal.com/c-log-function-2
Covers natural logarithm calculation using <math.h> and its applications.


4. Mastering Date and Time in C

https://macronepal.com/mastering-date-and-time-in-c
Explains <time.h> functions like time(), clock(), difftime(), and struct tm.


5. Mastering time_t Type in C

https://macronepal.com/mastering-the-c-time_t-type-for-time-management
Explains time representation as seconds since Unix epoch and conversion functions.


6. C exp() Function

https://macronepal.com/c-exp-function-mechanics-and-implementation
Explains exponential function exp(x) and its scientific applications.


7. C log() Function (Alternate Guide)

https://macronepal.com/c-log-function
Comparison of log() and log10() with usage examples.


8. C log10() Function

https://macronepal.com/mastering-the-log10-function-in-c
Explains base-10 logarithm for engineering and scientific applications.


9. C tan() Function

https://macronepal.com/understanding-the-c-tan-function
Explains tangent function and radian-based calculations.


10. Random Numbers in C (Secure vs Predictable)

https://macronepal.com/mastering-c-random-numbers-for-secure-and-predictable-applications
Explains difference between rand() and secure randomness methods.


11. Free Online C Compiler

https://macronepal.com/free-online-c-code-compiler-2
Browser-based compiler for testing C programs instantly.


C Functions, Arguments, Parameters & Flow

Mastering Functions in C – Complete Guide

https://macronepal.com/c/mastering-functions-in-c-a-complete-guide/
Covers function structure, modular programming, and real-world usage.


Function Arguments in C

https://macronepal.com/c-function-arguments/
Explains how arguments are passed and used in function calls.


Function Parameters in C

https://macronepal.com/c-function-parameters/
Explains defining inputs for functions and matching them with arguments.


Function Declarations in C

https://macronepal.com/c-function-declarations-syntax-rules-and-best-practices/
Covers prototypes, syntax rules, and best practices.


Function Calls in C

https://macronepal.com/understanding-function-calls-in-c-syntax-mechanics-and-best-practices/
Explains execution flow and parameter handling during function calls.


Void Functions in C

https://macronepal.com/understanding-void-functions-in-c-syntax-patterns-and-best-practices/
Explains functions that do not return values.


Return Values in C

https://macronepal.com/c-return-values-mechanics-types-and-best-practices/
Explains different return types and how functions return results.


Pass-by-Value in C

https://macronepal.com/aws/understanding-pass-by-value-in-c-mechanics-implications-and-best-practices/
Explains how copies of variables are passed into functions.


Pass-by-Reference in C

https://macronepal.com/c/understanding-pass-by-reference-in-c-pointers-semantics-and-safe-practices/
Explains using pointers to modify original variables.


C strstr() Function

https://macronepal.com/aws/c-strstr-function/
Explains substring search inside strings in C.


C Preprocessor & Macros

https://macronepal.com/mastering-c-variadic-macros-for-flexible-debugging/
https://macronepal.com/mastering-the-stdc-macro-in-c/
https://macronepal.com/c-time-macro-mechanics-and-usage/
https://macronepal.com/understanding-the-c-date-macro/
https://macronepal.com/c-file-type/
https://macronepal.com/mastering-c-line-macro-for-debugging-and-diagnostics/
https://macronepal.com/mastering-predefined-macros-in-c/
https://macronepal.com/c-error-directive-mechanics-and-usage/
https://macronepal.com/understanding-the-c-pragma-directive/
https://macronepal.com/c-include-directive/


C Structures, Memory, Scope & Linkage

https://macronepal.com/mastering-structures-in-c/
https://macronepal.com/c-structure-declaration-mechanics-and-usage/
https://macronepal.com/c-structure-initialization-mechanics-and-best-practices/
https://macronepal.com/mastering-c-structure-member-access-for-reliable-data-handling/
https://macronepal.com/c-nested-structures/
https://macronepal.com/mastering-arrays-of-structures-in-c/
https://macronepal.com/c-structure-pointers-mechanics-and-implementation/
https://macronepal.com/understanding-c-structure-parameter-passing-mechanics/
https://macronepal.com/mastering-c-returning-structures-for-efficient-data-flow/
https://macronepal.com/c-self-referential-structures/
https://macronepal.com/mastering-structure-alignment-in-c/
https://macronepal.com/c-structure-padding-mechanics-and-optimization/
https://macronepal.com/understanding-c-flexible-array-members-mechanics-and-usage/
https://macronepal.com/mastering-c-anonymous-structures-for-flattened-data-layouts/
https://macronepal.com/c-unions/
https://macronepal.com/mastering-c-name-mangling-and-symbol-decoration/
https://macronepal.com/c-no-linkage-mechanics-and-scope-isolation/
https://macronepal.com/understanding-c-internal-linkage-mechanics-and-architecture/


C Scope, Storage Classes & Typedef

https://macronepal.com/mastering-function-prototype-scope-in-c/
https://macronepal.com/c-function-scope-mechanics-and-visibility/
https://macronepal.com/understanding-c-file-scope-mechanics-and-architecture/
https://macronepal.com/mastering-c-scope-rules-for-predictable-name-resolution/
https://macronepal.com/c-scope-rules/
https://macronepal.com/mastering-c-register-storage-class-for-historical-context-and-modern-alternatives/
https://macronepal.com/mastering-_thread_local-in-c/
https://macronepal.com/c-extern-storage-class-mechanics-and-usage/
https://macronepal.com/understanding-the-c-static-storage-class-mechanics-and-usage/
https://macronepal.com/c-auto-storage-class/
https://macronepal.com/c-typedef-with-pointers/


Extra Articles

https://macronepal.com/13757-2/
https://macronepal.com/13748-2/
https://macronepal.com/13747-2/
https://macronepal.com/13746-2/
https://macronepal.com/13745-2/
https://macronepal.com/13708-2/
https://macronepal.com/13707-2/
https://macronepal.com/13702-2/


Online Compilers

https://macronepal.com/free-html-online-code-compiler/
https://macronepal.com/free-online-python-code-compiler/
https://macronepal.com/free-online-python2-code-compiler/
https://macronepal.com/free-online-java-code-compiler/
https://macronepal.com/free-online-javascript-code-compiler/
https://macronepal.com/free-online-node-js-code-compiler/
https://macronepal.com/free-online-c-code-compiler/
https://macronepal.com/free-online-c-code-compiler-2/
https://macronepal.com/free-online-c-code-compiler-3/
https://macronepal.com/free-online-php-code-compiler/
https://macronepal.com/free-online-ruby-code-compiler/
https://macronepal.com/free-online-perl-code-compiler/
https://macronepal.com/free-online-lua-code-compiler/
https://macronepal.com/free-online-tcl-code-compiler/
https://macronepal.com/free-online-groovy-code-compiler/
https://macronepal.com/free-online-j-shell-code-compiler/
https://macronepal.com/free-online-haskell-code-compiler/
https://macronepal.com/free-online-scala-code-compiler/
https://macronepal.com/free-online-common-lisp-code-compiler/
https://macronepal.com/free-online-d-code-compiler/
https://macronepal.com/free-online-ada-code-compiler/
https://macronepal.com/free-erlang-code-compiler/
https://macronepal.com/free-online-assembly-code-compiler/

Leave a Reply

Your email address will not be published. Required fields are marked *


Macro Nepal Helper