Memory-mapped files provide one of the most powerful and efficient mechanisms for file I/O in C. By mapping a file directly into the process's virtual address space, you can treat file contents as if they were ordinary memory, allowing for simpler code, faster access, and efficient sharing between processes. This comprehensive guide explores everything from basic mmap usage to advanced techniques for performance-critical applications.
What are Memory-Mapped Files?
Memory mapping establishes a direct mapping between a file on disk and a region of virtual memory. When you access data in this mapped region, the operating system automatically handles reading from and writing to the file, using the system's virtual memory subsystem for optimal performance.
┌─────────────────┐ │ Process │ │ Virtual │ │ Address Space │◄─────┐ │ │ │ │ ┌───────────┐ │ │ Memory Mapping │ │ Mapped │ │ │ (mmap) │ │ Region │ │ │ │ └───────────┘ │ │ │ │ │ └─────────────────┘ │ ▼ ┌─────────────┐ │ File on │ │ Disk │ └─────────────┘
Basic Memory-Mapping Operations
1. Including Required Headers
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <fcntl.h> #include <unistd.h> #include <sys/mman.h> #include <sys/stat.h> #include <errno.h>
2. Basic mmap() Usage
void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
addr: Hint for mapping address (usually NULL)length: Number of bytes to mapprot: Protection flags (PROT_READ, PROT_WRITE, PROT_EXEC, PROT_NONE)flags: MAP_SHARED or MAP_PRIVATEfd: File descriptoroffset: Offset in file (must be page-aligned)
3. Simple File Reader Using mmap
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
int main(int argc, char *argv[]) {
if (argc != 2) {
fprintf(stderr, "Usage: %s <filename>\n", argv[0]);
return 1;
}
// Open file
int fd = open(argv[1], O_RDONLY);
if (fd == -1) {
perror("open");
return 1;
}
// Get file size
struct stat sb;
if (fstat(fd, &sb) == -1) {
perror("fstat");
close(fd);
return 1;
}
// Map file into memory
char *data = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (data == MAP_FAILED) {
perror("mmap");
close(fd);
return 1;
}
// Access file contents as memory
printf("File contents:\n%s\n", data);
// Clean up
munmap(data, sb.st_size);
close(fd);
return 0;
}
Protection and Sharing Modes
1. Protection Flags
int prot_flags = PROT_READ; // Read-only int prot_flags = PROT_WRITE; // Write-only int prot_flags = PROT_READ | PROT_WRITE; // Read-write int prot_flags = PROT_EXEC; // Execute (for code) int prot_flags = PROT_NONE; // No access
2. Sharing Flags
// MAP_SHARED: Changes are written back to the file int flags = MAP_SHARED; // MAP_PRIVATE: Changes are private to this process int flags = MAP_PRIVATE; // Additional flags int flags = MAP_SHARED | MAP_POPULATE; // Preload pages int flags = MAP_SHARED | MAP_NORESERVE; // Don't reserve swap space
3. Read-Write Mapping Example
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <string.h>
int main(int argc, char *argv[]) {
if (argc != 2) {
fprintf(stderr, "Usage: %s <filename>\n", argv[0]);
return 1;
}
// Open file for reading and writing
int fd = open(argv[1], O_RDWR);
if (fd == -1) {
perror("open");
return 1;
}
// Get file size
struct stat sb;
if (fstat(fd, &sb) == -1) {
perror("fstat");
close(fd);
return 1;
}
// Map file with read-write access
char *data = mmap(NULL, sb.st_size, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
if (data == MAP_FAILED) {
perror("mmap");
close(fd);
return 1;
}
// Modify file contents (changes are written to disk)
printf("Original first 20 bytes: %.*s\n", 20, data);
// Replace all 'a' with 'X'
for (size_t i = 0; i < sb.st_size; i++) {
if (data[i] == 'a') {
data[i] = 'X';
}
}
printf("Modified first 20 bytes: %.*s\n", 20, data);
// Changes are automatically written when:
// - munmap is called
// - msync is called
// - Process exits
msync(data, sb.st_size, MS_SYNC); // Force write to disk
munmap(data, sb.st_size);
close(fd);
return 0;
}
Advanced Memory-Mapping Techniques
1. Partial File Mapping
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
// Map only a portion of a file
int map_file_portion(const char *filename, off_t offset, size_t length) {
int fd = open(filename, O_RDWR);
if (fd == -1) {
perror("open");
return -1;
}
// Get page size for alignment
long page_size = sysconf(_SC_PAGESIZE);
// Align offset to page boundary
off_t page_offset = offset & ~(page_size - 1);
off_t align = offset - page_offset;
size_t map_size = length + align;
// Map the region
char *data = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, page_offset);
if (data == MAP_FAILED) {
perror("mmap");
close(fd);
return -1;
}
// Access the desired portion
char *portion = data + align;
// Work with the portion
printf("Mapped portion: offset %ld, length %zu\n", offset, length);
printf("First few bytes: %.*s\n", (int)(length > 20 ? 20 : length), portion);
// Clean up
munmap(data, map_size);
close(fd);
return 0;
}
int main() {
map_file_portion("large_file.txt", 1024, 4096);
return 0;
}
2. Creating Anonymous Mappings
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <unistd.h>
// Anonymous mapping (not backed by a file)
int create_anonymous_mapping() {
size_t size = 1024 * 1024; // 1 MB
// MAP_ANONYMOUS: allocate memory not backed by file
void *mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) {
perror("mmap");
return -1;
}
// Use as regular memory
char *buffer = (char *)mem;
strcpy(buffer, "Hello from anonymous mapping!");
printf("Buffer: %s\n", buffer);
// Clean up
munmap(mem, size);
return 0;
}
// Large allocation that can be resized
typedef struct {
void *data;
size_t size;
} DynamicMapping;
DynamicMapping* create_dynamic_mapping(size_t initial_size) {
DynamicMapping *dm = malloc(sizeof(DynamicMapping));
dm->size = initial_size;
dm->data = mmap(NULL, initial_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (dm->data == MAP_FAILED) {
free(dm);
return NULL;
}
return dm;
}
int resize_dynamic_mapping(DynamicMapping *dm, size_t new_size) {
// Remap to new size (may move the mapping)
void *new_data = mremap(dm->data, dm->size, new_size, MREMAP_MAYMOVE);
if (new_data == MAP_FAILED) {
return -1;
}
dm->data = new_data;
dm->size = new_size;
return 0;
}
void destroy_dynamic_mapping(DynamicMapping *dm) {
munmap(dm->data, dm->size);
free(dm);
}
3. Shared Memory Between Processes
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/wait.h>
#define SHM_NAME "/mysharedmem"
#define SHM_SIZE 4096
int main() {
// Create shared memory object
int shm_fd = shm_open(SHM_NAME, O_CREAT | O_RDWR, 0666);
if (shm_fd == -1) {
perror("shm_open");
return 1;
}
// Set size
if (ftruncate(shm_fd, SHM_SIZE) == -1) {
perror("ftruncate");
return 1;
}
// Map shared memory
char *shared = mmap(NULL, SHM_SIZE, PROT_READ | PROT_WRITE,
MAP_SHARED, shm_fd, 0);
if (shared == MAP_FAILED) {
perror("mmap");
return 1;
}
pid_t pid = fork();
if (pid == 0) {
// Child process
printf("Child waiting for parent data...\n");
while (shared[0] == '\0') {
usleep(100000); // Wait for parent to write
}
printf("Child read: %s\n", shared);
munmap(shared, SHM_SIZE);
exit(0);
} else if (pid > 0) {
// Parent process
printf("Parent writing data...\n");
strcpy(shared, "Hello from parent!");
msync(shared, SHM_SIZE, MS_SYNC);
// Wait for child
wait(NULL);
// Clean up
munmap(shared, SHM_SIZE);
shm_unlink(SHM_NAME);
} else {
perror("fork");
return 1;
}
return 0;
}
Performance Optimization
1. Using MAP_POPULATE for Prefetching
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
void prefetch_mapping(const char *filename) {
int fd = open(filename, O_RDONLY);
struct stat sb;
fstat(fd, &sb);
// MAP_POPULATE: Preload all pages (reduces page faults)
void *data = mmap(NULL, sb.st_size, PROT_READ,
MAP_PRIVATE | MAP_POPULATE, fd, 0);
if (data != MAP_FAILED) {
printf("File preloaded into memory\n");
// Access will be very fast (no page faults)
// Process data...
munmap(data, sb.st_size);
}
close(fd);
}
2. Huge Pages for Large Mappings
#define _GNU_SOURCE
#include <sys/mman.h>
#include <stdio.h>
#include <unistd.h>
void use_huge_pages() {
size_t huge_page_size = 2 * 1024 * 1024; // 2 MB
size_t size = 64 * huge_page_size; // 128 MB
// MAP_HUGETLB: Use huge pages (reduces TLB misses)
void *mem = mmap(NULL, size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1, 0);
if (mem == MAP_FAILED) {
perror("mmap (huge pages)");
printf("Try enabling huge pages: echo 100 > /proc/sys/vm/nr_hugepages\n");
return;
}
printf("Allocated %zu bytes using huge pages at %p\n", size, mem);
// Use the memory...
munmap(mem, size);
}
3. Advising the Kernel with madvise
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
void advise_mapping(const char *filename) {
int fd = open(filename, O_RDONLY);
struct stat sb;
fstat(fd, &sb);
void *data = mmap(NULL, sb.st_size, PROT_READ,
MAP_PRIVATE, fd, 0);
if (data != MAP_FAILED) {
// Sequential access pattern
madvise(data, sb.st_size, MADV_SEQUENTIAL);
// Or random access pattern
// madvise(data, sb.st_size, MADV_RANDOM);
// Will need this soon
madvise(data, sb.st_size, MADV_WILLNEED);
// Process data...
// Done with this, can free
madvise(data, sb.st_size, MADV_DONTNEED);
munmap(data, sb.st_size);
}
close(fd);
}
Thread-Safe Mapped File Access
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <string.h>
#define NUM_THREADS 4
#define FILE_SIZE (1024 * 1024) // 1 MB
typedef struct {
char *data;
size_t start;
size_t end;
int thread_id;
} ThreadData;
void *process_chunk(void *arg) {
ThreadData *td = (ThreadData *)arg;
printf("Thread %d: processing [%zu, %zu)\n",
td->thread_id, td->start, td->end);
// Process the chunk
for (size_t i = td->start; i < td->end; i++) {
td->data[i] = toupper(td->data[i]);
}
return NULL;
}
int main() {
// Create and initialize file
int fd = open("test.dat", O_RDWR | O_CREAT | O_TRUNC, 0666);
if (fd == -1) {
perror("open");
return 1;
}
// Set file size
ftruncate(fd, FILE_SIZE);
// Map file
char *data = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
if (data == MAP_FAILED) {
perror("mmap");
close(fd);
return 1;
}
// Initialize with lowercase letters
for (size_t i = 0; i < FILE_SIZE; i++) {
data[i] = 'a' + (i % 26);
}
// Create threads
pthread_t threads[NUM_THREADS];
ThreadData thread_data[NUM_THREADS];
size_t chunk_size = FILE_SIZE / NUM_THREADS;
for (int i = 0; i < NUM_THREADS; i++) {
thread_data[i].data = data;
thread_data[i].start = i * chunk_size;
thread_data[i].end = (i == NUM_THREADS - 1) ?
FILE_SIZE : (i + 1) * chunk_size;
thread_data[i].thread_id = i;
pthread_create(&threads[i], NULL, process_chunk, &thread_data[i]);
}
// Wait for threads
for (int i = 0; i < NUM_THREADS; i++) {
pthread_join(threads[i], NULL);
}
// Force sync to disk
msync(data, FILE_SIZE, MS_SYNC);
// Clean up
munmap(data, FILE_SIZE);
close(fd);
printf("Processing complete\n");
return 0;
}
Memory-Mapped Database Example
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
#define MAX_RECORDS 10000
#define RECORD_SIZE 256
typedef struct {
char data[RECORD_SIZE];
} Record;
typedef struct {
int num_records;
Record records[];
} Database;
Database* create_database(const char *filename, int initial_records) {
size_t db_size = sizeof(Database) + initial_records * sizeof(Record);
int fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0666);
if (fd == -1) {
return NULL;
}
// Set file size
ftruncate(fd, db_size);
// Map the file
Database *db = mmap(NULL, db_size, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
if (db == MAP_FAILED) {
close(fd);
return NULL;
}
db->num_records = initial_records;
// Initialize records
for (int i = 0; i < initial_records; i++) {
snprintf(db->records[i].data, RECORD_SIZE, "Record %d", i);
}
msync(db, db_size, MS_SYNC);
close(fd);
return db;
}
int append_record(Database **db, const char *filename, const char *data) {
size_t old_size = sizeof(Database) + (*db)->num_records * sizeof(Record);
size_t new_size = old_size + sizeof(Record);
// Remap to larger size
void *new_db = mremap(*db, old_size, new_size, MREMAP_MAYMOVE);
if (new_db == MAP_FAILED) {
return -1;
}
*db = new_db;
Database *db_ptr = (Database *)*db;
// Add new record
strncpy(db_ptr->records[db_ptr->num_records].data, data, RECORD_SIZE - 1);
db_ptr->num_records++;
// Ensure file size is updated
int fd = open(filename, O_RDWR);
ftruncate(fd, new_size);
close(fd);
return db_ptr->num_records - 1;
}
Record* get_record(Database *db, int index) {
if (index < 0 || index >= db->num_records) {
return NULL;
}
return &db->records[index];
}
void close_database(Database *db, const char *filename) {
size_t size = sizeof(Database) + db->num_records * sizeof(Record);
msync(db, size, MS_SYNC);
munmap(db, size);
}
int main() {
Database *db = create_database("test.db", 100);
if (!db) {
perror("create_database");
return 1;
}
printf("Created database with %d records\n", db->num_records);
// Get a record
Record *rec = get_record(db, 50);
if (rec) {
printf("Record 50: %s\n", rec->data);
}
// Append a record
int idx = append_record(&db, "test.db", "New custom record");
printf("Appended record at index %d\n", idx);
// Verify the new record
rec = get_record(db, idx);
if (rec) {
printf("New record: %s\n", rec->data);
}
close_database(db, "test.db");
return 0;
}
Error Handling and Safety
#include <errno.h>
#include <string.h>
// Safe mmap wrapper with error checking
void* safe_mmap(const char *filename, size_t *size) {
int fd = open(filename, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Failed to open %s: %s\n", filename, strerror(errno));
return MAP_FAILED;
}
struct stat sb;
if (fstat(fd, &sb) == -1) {
fprintf(stderr, "Failed to stat %s: %s\n", filename, strerror(errno));
close(fd);
return MAP_FAILED;
}
*size = sb.st_size;
if (sb.st_size == 0) {
fprintf(stderr, "Warning: File %s is empty\n", filename);
close(fd);
return NULL; // Empty file is valid but can't be mapped
}
void *data = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (data == MAP_FAILED) {
fprintf(stderr, "Failed to mmap %s: %s\n", filename, strerror(errno));
close(fd);
return MAP_FAILED;
}
close(fd);
return data;
}
// Safe munmap wrapper
int safe_munmap(void *data, size_t size) {
if (data == NULL || data == MAP_FAILED) {
return 0;
}
if (munmap(data, size) == -1) {
fprintf(stderr, "Failed to munmap: %s\n", strerror(errno));
return -1;
}
return 0;
}
Performance Comparison
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <time.h>
#include <string.h>
#define FILE_SIZE (100 * 1024 * 1024) // 100 MB
#define BUFFER_SIZE 8192
void benchmark_read_standard(const char *filename) {
FILE *f = fopen(filename, "rb");
if (!f) return;
char buffer[BUFFER_SIZE];
clock_t start = clock();
while (fread(buffer, 1, BUFFER_SIZE, f) > 0) {
// Simulate processing
volatile size_t sum = 0;
for (int i = 0; i < BUFFER_SIZE; i++) {
sum += buffer[i];
}
}
clock_t end = clock();
double time = (double)(end - start) / CLOCKS_PER_SEC;
printf("Standard I/O: %.3f seconds\n", time);
fclose(f);
}
void benchmark_read_mmap(const char *filename) {
int fd = open(filename, O_RDONLY);
if (fd == -1) return;
struct stat sb;
fstat(fd, &sb);
char *data = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (data == MAP_FAILED) {
close(fd);
return;
}
clock_t start = clock();
// Process the mapped data
volatile size_t sum = 0;
for (size_t i = 0; i < sb.st_size; i++) {
sum += data[i];
}
clock_t end = clock();
double time = (double)(end - start) / CLOCKS_PER_SEC;
printf("Memory-mapped I/O: %.3f seconds\n", time);
munmap(data, sb.st_size);
close(fd);
}
int main() {
// Create a test file
FILE *f = fopen("testfile.bin", "wb");
char *buffer = malloc(FILE_SIZE);
memset(buffer, 'A', FILE_SIZE);
fwrite(buffer, 1, FILE_SIZE, f);
fclose(f);
free(buffer);
printf("Benchmarking 100 MB file read:\n");
benchmark_read_standard("testfile.bin");
benchmark_read_mmap("testfile.bin");
unlink("testfile.bin");
return 0;
}
Common Pitfalls and Solutions
1. Alignment Issues
// WRONG: Offset not page-aligned void *data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 1024); // RIGHT: Align offset to page boundary long page_size = sysconf(_SC_PAGESIZE); off_t aligned_offset = (offset / page_size) * page_size; off_t diff = offset - aligned_offset; size_t map_size = size + diff; void *data = mmap(NULL, map_size, PROT_READ, MAP_SHARED, fd, aligned_offset); char *actual_data = (char *)data + diff;
2. File Size Changes
// If file can grow while mapped, handle carefully
void handle_growing_file(const char *filename) {
int fd = open(filename, O_RDWR);
struct stat sb;
fstat(fd, &sb);
size_t current_size = sb.st_size;
void *data = mmap(NULL, current_size, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
// When file grows, need to remap
ftruncate(fd, new_size);
data = mremap(data, current_size, new_size, MREMAP_MAYMOVE);
// Now data is valid for new size
}
3. Signal Safety
// SIGBUS can occur if file is truncated while mapped
#include <signal.h>
void sigbus_handler(int sig) {
printf("SIGBUS: File truncated while mapped!\n");
// Clean up or exit
}
void setup_sigbus_handler() {
struct sigaction sa;
sa.sa_handler = sigbus_handler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = 0;
sigaction(SIGBUS, &sa, NULL);
}
Best Practices Summary
- Always check return values:
mmap()returnsMAP_FAILEDon error - Use appropriate protection flags: Don't give write permission if not needed
- Align offsets correctly: Offsets must be page-aligned
- Handle SIGBUS: Be prepared for file truncation while mapped
- Use
msync()for data integrity: Ensure changes are written to disk - Consider huge pages: For large mappings, huge pages can improve performance
- Use
madvise(): Advise the kernel about access patterns - Clean up properly: Always
munmap()when done - Check file size: Handle empty files specially
- Use proper synchronization: For shared mappings between processes
Conclusion
Memory-mapped files provide a powerful abstraction for file I/O in C, offering simplicity, performance, and flexibility. By treating files as memory, you can:
- Simplify code that would otherwise require complex read/write loops
- Achieve near-DRAM speeds for file access
- Share memory between processes efficiently
- Implement large data structures that persist to disk
The key to effective use of memory-mapped files is understanding the underlying virtual memory system, proper error handling, and choosing the right flags for your use case. Whether building high-performance databases, processing large files, or implementing inter-process communication, memory-mapped files are an essential tool in the C programmer's toolkit.