While basic unions are straightforward, advanced union techniques unlock powerful patterns for type punning, memory optimization, protocol implementation, and even object-oriented-like polymorphism in C. This guide explores sophisticated union applications that demonstrate the full power of this often-underutilized feature.
1. Tagged Unions (Discriminated Unions)
The most important advanced pattern is the tagged union, which tracks the active type:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
// Type identifiers
typedef enum {
TYPE_NONE,
TYPE_INT,
TYPE_DOUBLE,
TYPE_STRING,
TYPE_ARRAY,
TYPE_OBJECT
} ValueType;
// Forward declarations for recursive types
typedef struct Value Value;
// Array structure
typedef struct {
Value *elements;
int count;
int capacity;
} Array;
// Object property
typedef struct {
char *key;
Value *value;
} Property;
// Object structure
typedef struct {
Property *properties;
int count;
int capacity;
} Object;
// Advanced tagged union - can represent any JSON-like value
struct Value {
ValueType type;
union {
int int_value;
double double_value;
char *string_value;
Array array_value;
Object object_value;
} data;
};
// Create functions
Value* create_int(int value) {
Value *v = malloc(sizeof(Value));
v->type = TYPE_INT;
v->data.int_value = value;
return v;
}
Value* create_double(double value) {
Value *v = malloc(sizeof(Value));
v->type = TYPE_DOUBLE;
v->data.double_value = value;
return v;
}
Value* create_string(const char *value) {
Value *v = malloc(sizeof(Value));
v->type = TYPE_STRING;
v->data.string_value = malloc(strlen(value) + 1);
strcpy(v->data.string_value, value);
return v;
}
Value* create_array() {
Value *v = malloc(sizeof(Value));
v->type = TYPE_ARRAY;
v->data.array_value.elements = NULL;
v->data.array_value.count = 0;
v->data.array_value.capacity = 0;
return v;
}
// Array operations
void array_append(Value *array, Value *element) {
assert(array->type == TYPE_ARRAY);
Array *arr = &array->data.array_value;
if (arr->count >= arr->capacity) {
arr->capacity = arr->capacity == 0 ? 4 : arr->capacity * 2;
arr->elements = realloc(arr->elements,
arr->capacity * sizeof(Value*));
}
arr->elements[arr->count++] = element;
}
// Object operations
Value* create_object() {
Value *v = malloc(sizeof(Value));
v->type = TYPE_OBJECT;
v->data.object_value.properties = NULL;
v->data.object_value.count = 0;
v->data.object_value.capacity = 0;
return v;
}
void object_set(Value *obj, const char *key, Value *value) {
assert(obj->type == TYPE_OBJECT);
Object *o = &obj->data.object_value;
// Check if key exists
for (int i = 0; i < o->count; i++) {
if (strcmp(o->properties[i].key, key) == 0) {
// Replace existing value
free(o->properties[i].key);
o->properties[i].key = strdup(key);
o->properties[i].value = value;
return;
}
}
// Add new property
if (o->count >= o->capacity) {
o->capacity = o->capacity == 0 ? 4 : o->capacity * 2;
o->properties = realloc(o->properties,
o->capacity * sizeof(Property));
}
o->properties[o->count].key = strdup(key);
o->properties[o->count].value = value;
o->count++;
}
// Free value recursively
void free_value(Value *v) {
if (!v) return;
switch (v->type) {
case TYPE_STRING:
free(v->data.string_value);
break;
case TYPE_ARRAY:
for (int i = 0; i < v->data.array_value.count; i++) {
free_value(v->data.array_value.elements[i]);
}
free(v->data.array_value.elements);
break;
case TYPE_OBJECT:
for (int i = 0; i < v->data.object_value.count; i++) {
free(v->data.object_value.properties[i].key);
free_value(v->data.object_value.properties[i].value);
}
free(v->data.object_value.properties);
break;
default:
break;
}
free(v);
}
// Print value (JSON-like)
void print_value(const Value *v, int indent) {
if (!v) return;
switch (v->type) {
case TYPE_INT:
printf("%d", v->data.int_value);
break;
case TYPE_DOUBLE:
printf("%f", v->data.double_value);
break;
case TYPE_STRING:
printf("\"%s\"", v->data.string_value);
break;
case TYPE_ARRAY:
printf("[");
for (int i = 0; i < v->data.array_value.count; i++) {
if (i > 0) printf(", ");
print_value(v->data.array_value.elements[i], indent);
}
printf("]");
break;
case TYPE_OBJECT:
printf("{\n");
for (int i = 0; i < v->data.object_value.count; i++) {
for (int j = 0; j < indent + 2; j++) printf(" ");
printf("\"%s\": ",
v->data.object_value.properties[i].key);
print_value(v->data.object_value.properties[i].value,
indent + 2);
if (i < v->data.object_value.count - 1) printf(",");
printf("\n");
}
for (int j = 0; j < indent; j++) printf(" ");
printf("}");
break;
default:
printf("null");
}
}
int main() {
// Create a complex nested structure
Value *person = create_object();
object_set(person, "name", create_string("John Doe"));
object_set(person, "age", create_int(30));
object_set(person, "salary", create_double(75000.50));
Value *hobbies = create_array();
array_append(hobbies, create_string("reading"));
array_append(hobbies, create_string("swimming"));
array_append(hobbies, create_string("coding"));
object_set(person, "hobbies", hobbies);
Value *address = create_object();
object_set(address, "street", create_string("123 Main St"));
object_set(address, "city", create_string("Anytown"));
object_set(address, "zip", create_int(12345));
object_set(person, "address", address);
printf("Person object:\n");
print_value(person, 0);
printf("\n\n");
free_value(person);
return 0;
}
2. Type Punning and Serialization
Unions provide a standard-compliant way to perform type punning:
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <inttypes.h>
// Safe type punning using unions
typedef union {
float f;
uint32_t i;
struct {
uint32_t mantissa : 23;
uint32_t exponent : 8;
uint32_t sign : 1;
} parts;
} FloatBits;
typedef union {
double d;
uint64_t i;
struct {
uint64_t mantissa : 52;
uint64_t exponent : 11;
uint64_t sign : 1;
} parts;
} DoubleBits;
void analyze_float(float f) {
FloatBits fb = {.f = f};
printf("Float: %f\n", f);
printf(" Bits: 0x%08" PRIx32 "\n", fb.i);
printf(" Sign: %u\n", fb.parts.sign);
printf(" Exponent: %u (value: %d)\n",
fb.parts.exponent, (int)fb.parts.exponent - 127);
printf(" Mantissa: 0x%06" PRIx32 "\n", fb.parts.mantissa);
// Special values
if (fb.parts.exponent == 0 && fb.parts.mantissa == 0) {
printf(" Zero\n");
} else if (fb.parts.exponent == 0 && fb.parts.mantissa != 0) {
printf(" Subnormal number\n");
} else if (fb.parts.exponent == 255 && fb.parts.mantissa == 0) {
printf(" Infinity\n");
} else if (fb.parts.exponent == 255 && fb.parts.mantissa != 0) {
printf(" NaN\n");
}
printf("\n");
}
// Serialization framework
typedef enum {
SER_TYPE_INT,
SER_TYPE_FLOAT,
SER_TYPE_DOUBLE,
SER_TYPE_STRING,
SER_TYPE_BLOB
} SerialType;
typedef struct {
SerialType type;
union {
int int_value;
float float_value;
double double_value;
struct {
char *data;
size_t length;
} string;
struct {
void *data;
size_t size;
} blob;
} data;
} Serializable;
// Serialize to byte buffer
uint8_t* serialize(const Serializable *s, size_t *out_size) {
uint8_t *buffer = NULL;
size_t size = 0;
// First byte: type
size += 1;
switch (s->type) {
case SER_TYPE_INT:
size += sizeof(int);
buffer = malloc(size);
buffer[0] = SER_TYPE_INT;
memcpy(buffer + 1, &s->data.int_value, sizeof(int));
break;
case SER_TYPE_FLOAT:
size += sizeof(float);
buffer = malloc(size);
buffer[0] = SER_TYPE_FLOAT;
memcpy(buffer + 1, &s->data.float_value, sizeof(float));
break;
case SER_TYPE_DOUBLE:
size += sizeof(double);
buffer = malloc(size);
buffer[0] = SER_TYPE_DOUBLE;
memcpy(buffer + 1, &s->data.double_value, sizeof(double));
break;
case SER_TYPE_STRING:
size += sizeof(size_t) + s->data.string.length;
buffer = malloc(size);
buffer[0] = SER_TYPE_STRING;
memcpy(buffer + 1, &s->data.string.length, sizeof(size_t));
memcpy(buffer + 1 + sizeof(size_t),
s->data.string.data, s->data.string.length);
break;
case SER_TYPE_BLOB:
size += sizeof(size_t) + s->data.blob.size;
buffer = malloc(size);
buffer[0] = SER_TYPE_BLOB;
memcpy(buffer + 1, &s->data.blob.size, sizeof(size_t));
memcpy(buffer + 1 + sizeof(size_t),
s->data.blob.data, s->data.blob.size);
break;
}
*out_size = size;
return buffer;
}
// Deserialize from byte buffer
Serializable* deserialize(const uint8_t *buffer, size_t size) {
if (size < 1) return NULL;
Serializable *s = malloc(sizeof(Serializable));
s->type = buffer[0];
switch (s->type) {
case SER_TYPE_INT:
if (size < 1 + sizeof(int)) goto error;
memcpy(&s->data.int_value, buffer + 1, sizeof(int));
break;
case SER_TYPE_FLOAT:
if (size < 1 + sizeof(float)) goto error;
memcpy(&s->data.float_value, buffer + 1, sizeof(float));
break;
case SER_TYPE_DOUBLE:
if (size < 1 + sizeof(double)) goto error;
memcpy(&s->data.double_value, buffer + 1, sizeof(double));
break;
case SER_TYPE_STRING:
if (size < 1 + sizeof(size_t)) goto error;
memcpy(&s->data.string.length, buffer + 1, sizeof(size_t));
if (size < 1 + sizeof(size_t) + s->data.string.length) goto error;
s->data.string.data = malloc(s->data.string.length + 1);
memcpy(s->data.string.data,
buffer + 1 + sizeof(size_t), s->data.string.length);
s->data.string.data[s->data.string.length] = '\0';
break;
case SER_TYPE_BLOB:
if (size < 1 + sizeof(size_t)) goto error;
memcpy(&s->data.blob.size, buffer + 1, sizeof(size_t));
if (size < 1 + sizeof(size_t) + s->data.blob.size) goto error;
s->data.blob.data = malloc(s->data.blob.size);
memcpy(s->data.blob.data,
buffer + 1 + sizeof(size_t), s->data.blob.size);
break;
default:
goto error;
}
return s;
error:
free(s);
return NULL;
}
void free_serializable(Serializable *s) {
if (!s) return;
if (s->type == SER_TYPE_STRING || s->type == SER_TYPE_BLOB) {
free(s->data.string.data); // Same pointer for blob
}
free(s);
}
int main() {
// Analyze float representation
analyze_float(3.14159f);
analyze_float(0.0f);
analyze_float(1.0f / 0.0f); // Infinity
// Serialization example
Serializable s = {
.type = SER_TYPE_STRING,
.data.string.data = "Hello, Serialization!",
.data.string.length = 21
};
size_t serialized_size;
uint8_t *serialized = serialize(&s, &serialized_size);
printf("Serialized %zu bytes\n", serialized_size);
for (size_t i = 0; i < serialized_size; i++) {
printf("%02X ", serialized[i]);
}
printf("\n\n");
Serializable *deserialized = deserialize(serialized, serialized_size);
if (deserialized && deserialized->type == SER_TYPE_STRING) {
printf("Deserialized: %s\n", deserialized->data.string.data);
}
free(serialized);
free_serializable(deserialized);
return 0;
}
3. Polymorphism with Unions
Implementing object-oriented-like polymorphism:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
// Shape types
typedef enum {
SHAPE_CIRCLE,
SHAPE_RECTANGLE,
SHAPE_TRIANGLE
} ShapeType;
// Forward declaration
typedef struct Shape Shape;
// Virtual function table
typedef struct {
double (*area)(const Shape*);
double (*perimeter)(const Shape*);
void (*print)(const Shape*);
void (*destroy)(Shape*);
} ShapeVTable;
// Base shape structure
struct Shape {
ShapeType type;
const ShapeVTable *vtable;
};
// Circle
typedef struct {
Shape base;
double radius;
} Circle;
double circle_area(const Shape *shape) {
const Circle *c = (const Circle*)shape;
return M_PI * c->radius * c->radius;
}
double circle_perimeter(const Shape *shape) {
const Circle *c = (const Circle*)shape;
return 2 * M_PI * c->radius;
}
void circle_print(const Shape *shape) {
const Circle *c = (const Circle*)shape;
printf("Circle (radius=%.2f)", c->radius);
}
void circle_destroy(Shape *shape) {
free(shape);
}
const ShapeVTable circle_vtable = {
.area = circle_area,
.perimeter = circle_perimeter,
.print = circle_print,
.destroy = circle_destroy
};
Shape* create_circle(double radius) {
Circle *c = malloc(sizeof(Circle));
c->base.type = SHAPE_CIRCLE;
c->base.vtable = &circle_vtable;
c->radius = radius;
return (Shape*)c;
}
// Rectangle
typedef struct {
Shape base;
double width;
double height;
} Rectangle;
double rectangle_area(const Shape *shape) {
const Rectangle *r = (const Rectangle*)shape;
return r->width * r->height;
}
double rectangle_perimeter(const Shape *shape) {
const Rectangle *r = (const Rectangle*)shape;
return 2 * (r->width + r->height);
}
void rectangle_print(const Shape *shape) {
const Rectangle *r = (const Rectangle*)shape;
printf("Rectangle (%.2f x %.2f)", r->width, r->height);
}
void rectangle_destroy(Shape *shape) {
free(shape);
}
const ShapeVTable rectangle_vtable = {
.area = rectangle_area,
.perimeter = rectangle_perimeter,
.print = rectangle_print,
.destroy = rectangle_destroy
};
Shape* create_rectangle(double width, double height) {
Rectangle *r = malloc(sizeof(Rectangle));
r->base.type = SHAPE_RECTANGLE;
r->base.vtable = &rectangle_vtable;
r->width = width;
r->height = height;
return (Shape*)r;
}
// Triangle
typedef struct {
Shape base;
double a, b, c; // Three sides
} Triangle;
double triangle_area(const Shape *shape) {
const Triangle *t = (const Triangle*)shape;
// Heron's formula
double s = (t->a + t->b + t->c) / 2.0;
return sqrt(s * (s - t->a) * (s - t->b) * (s - t->c));
}
double triangle_perimeter(const Shape *shape) {
const Triangle *t = (const Triangle*)shape;
return t->a + t->b + t->c;
}
void triangle_print(const Shape *shape) {
const Triangle *t = (const Triangle*)shape;
printf("Triangle (sides=%.2f,%.2f,%.2f)", t->a, t->b, t->c);
}
void triangle_destroy(Shape *shape) {
free(shape);
}
const ShapeVTable triangle_vtable = {
.area = triangle_area,
.perimeter = triangle_perimeter,
.print = triangle_print,
.destroy = triangle_destroy
};
Shape* create_triangle(double a, double b, double c) {
Triangle *t = malloc(sizeof(Triangle));
t->base.type = SHAPE_TRIANGLE;
t->base.vtable = &triangle_vtable;
t->a = a;
t->b = b;
t->c = c;
return (Shape*)t;
}
// Union-based shape container (can hold any shape type)
typedef union {
Circle circle;
Rectangle rectangle;
Triangle triangle;
} ShapeUnion;
// But careful - union doesn't include vtable!
// Better to use tagged union with pointer to shape
typedef struct {
ShapeType type;
union {
Circle *circle;
Rectangle *rectangle;
Triangle *triangle;
} ptr;
} ShapeHandle;
ShapeHandle create_shape_handle(Shape *shape) {
ShapeHandle handle;
handle.type = shape->type;
switch (shape->type) {
case SHAPE_CIRCLE:
handle.ptr.circle = (Circle*)shape;
break;
case SHAPE_RECTANGLE:
handle.ptr.rectangle = (Rectangle*)shape;
break;
case SHAPE_TRIANGLE:
handle.ptr.triangle = (Triangle*)shape;
break;
}
return handle;
}
double shape_handle_area(const ShapeHandle *handle) {
switch (handle->type) {
case SHAPE_CIRCLE:
return circle_area((Shape*)handle->ptr.circle);
case SHAPE_RECTANGLE:
return rectangle_area((Shape*)handle->ptr.rectangle);
case SHAPE_TRIANGLE:
return triangle_area((Shape*)handle->ptr.triangle);
}
return 0;
}
int main() {
// Create shapes
Shape *shapes[] = {
create_circle(5.0),
create_rectangle(4.0, 6.0),
create_triangle(3.0, 4.0, 5.0)
};
int num_shapes = sizeof(shapes) / sizeof(shapes[0]);
// Polymorphic behavior
printf("Shapes (using virtual functions):\n");
for (int i = 0; i < num_shapes; i++) {
shapes[i]->vtable->print(shapes[i]);
printf(": area=%.2f, perimeter=%.2f\n",
shapes[i]->vtable->area(shapes[i]),
shapes[i]->vtable->perimeter(shapes[i]));
}
printf("\nShapes (using union handles):\n");
for (int i = 0; i < num_shapes; i++) {
ShapeHandle handle = create_shape_handle(shapes[i]);
printf("Area: %.2f\n", shape_handle_area(&handle));
}
// Cleanup
for (int i = 0; i < num_shapes; i++) {
shapes[i]->vtable->destroy(shapes[i]);
}
return 0;
}
4. Memory-Mapped I/O and Hardware Registers
Unions are essential for hardware programming:
#include <stdio.h>
#include <stdint.h>
// Hardware register definitions
typedef union {
uint32_t value;
struct {
uint32_t enable : 1; // Bit 0
uint32_t mode : 2; // Bits 1-2
uint32_t interrupt : 1; // Bit 3
uint32_t error : 1; // Bit 4
uint32_t reserved1 : 3; // Bits 5-7
uint32_t baud_rate : 4; // Bits 8-11
uint32_t data_bits : 3; // Bits 12-14
uint32_t stop_bits : 1; // Bit 15
uint32_t parity : 2; // Bits 16-17
uint32_t reserved2 : 14; // Bits 18-31
} bits;
} UARTControlReg;
typedef union {
uint32_t value;
struct {
uint8_t rx_data; // Bits 0-7
uint8_t tx_data; // Bits 8-15
uint8_t status; // Bits 16-23
uint8_t unused; // Bits 24-31
} bytes;
struct {
uint32_t rx_data : 8;
uint32_t tx_data : 8;
uint32_t rx_full : 1;
uint32_t tx_empty : 1;
uint32_t rx_error : 1;
uint32_t tx_error : 1;
uint32_t reserved : 12;
} fields;
} UARTDataReg;
// Simulated hardware registers
UARTControlReg uart_control = {0};
UARTDataReg uart_data = {0};
// Hardware access functions
void uart_init(int baud_rate, int data_bits, int stop_bits, int parity) {
uart_control.bits.enable = 1;
uart_control.bits.baud_rate = baud_rate;
uart_control.bits.data_bits = data_bits - 5; // 5-8 bits
uart_control.bits.stop_bits = stop_bits - 1; // 1-2 bits
uart_control.bits.parity = parity;
printf("UART initialized: control=0x%08X\n", uart_control.value);
printf(" Enable: %u\n", uart_control.bits.enable);
printf(" Baud rate: %u\n", uart_control.bits.baud_rate);
printf(" Data bits: %u\n", uart_control.bits.data_bits + 5);
printf(" Stop bits: %u\n", uart_control.bits.stop_bits + 1);
printf(" Parity: %u\n", uart_control.bits.parity);
}
void uart_send(char c) {
// Wait for TX buffer empty
while (!uart_data.fields.tx_empty) {
// In real hardware, this would check status
uart_data.fields.tx_empty = 1;
}
// Send data
uart_data.bytes.tx_data = c;
uart_data.fields.tx_empty = 0;
printf("Sent: %c (data=0x%02X)\n", c, c);
}
char uart_receive() {
// Wait for RX buffer full
while (!uart_data.fields.rx_full) {
// Simulate receiving data
static char test_data[] = "Hello";
static int index = 0;
if (index < 5) {
uart_data.bytes.rx_data = test_data[index++];
uart_data.fields.rx_full = 1;
}
}
char c = uart_data.bytes.rx_data;
uart_data.fields.rx_full = 0;
printf("Received: %c (data=0x%02X)\n", c, c);
return c;
}
int main() {
// Initialize UART
uart_init(4, 8, 1, 0); // 9600 baud, 8 data bits, 1 stop, no parity
printf("\n--- UART Communication ---\n");
// Send data
const char *message = "UART";
for (int i = 0; message[i]; i++) {
uart_send(message[i]);
}
// Receive data
printf("\nReceiving:\n");
for (int i = 0; i < 5; i++) {
uart_receive();
}
return 0;
}
5. Network Protocol Headers
Unions are perfect for parsing network packets:
#include <stdio.h>
#include <stdint.h>
#include <arpa/inet.h>
// Ethernet header
typedef struct {
uint8_t dest_mac[6];
uint8_t src_mac[6];
uint16_t ethertype;
} __attribute__((packed)) EthernetHeader;
// IPv4 header
typedef struct {
uint8_t version_ihl; // Version (4 bits) + IHL (4 bits)
uint8_t dscp_ecn; // DSCP + ECN
uint16_t total_length;
uint16_t identification;
uint16_t flags_fragment;
uint8_t ttl;
uint8_t protocol;
uint16_t checksum;
uint32_t src_ip;
uint32_t dst_ip;
} __attribute__((packed)) IPv4Header;
// IPv6 header
typedef struct {
uint32_t version_tc_flow; // Version (4), Traffic Class (8), Flow Label (20)
uint16_t payload_length;
uint8_t next_header;
uint8_t hop_limit;
uint8_t src_ip[16];
uint8_t dst_ip[16];
} __attribute__((packed)) IPv6Header;
// TCP header
typedef struct {
uint16_t src_port;
uint16_t dst_port;
uint32_t seq_num;
uint32_t ack_num;
uint8_t data_offset;
uint8_t flags;
uint16_t window;
uint16_t checksum;
uint16_t urgent_ptr;
} __attribute__((packed)) TCPHeader;
// UDP header
typedef struct {
uint16_t src_port;
uint16_t dst_port;
uint16_t length;
uint16_t checksum;
} __attribute__((packed)) UDPHeader;
// Union for IP header (can be IPv4 or IPv6)
typedef union {
IPv4Header v4;
IPv6Header v6;
} IPHeaderUnion;
// Complete packet representation
typedef struct {
EthernetHeader eth;
union {
struct {
IPv4Header ip;
union {
TCPHeader tcp;
UDPHeader udp;
} transport;
} v4;
struct {
IPv6Header ip;
union {
TCPHeader tcp;
UDPHeader udp;
} transport;
} v6;
} ip;
} Packet;
// Protocol identification
typedef enum {
PROTO_UNKNOWN,
PROTO_TCP,
PROTO_UDP,
PROTO_ICMP
} TransportProtocol;
void parse_packet(const uint8_t *raw_data, size_t length) {
if (length < sizeof(EthernetHeader)) {
printf("Packet too short\n");
return;
}
const EthernetHeader *eth = (const EthernetHeader*)raw_data;
uint16_t ethertype = ntohs(eth->ethertype);
printf("Ethernet Header:\n");
printf(" Dest MAC: %02X:%02X:%02X:%02X:%02X:%02X\n",
eth->dest_mac[0], eth->dest_mac[1], eth->dest_mac[2],
eth->dest_mac[3], eth->dest_mac[4], eth->dest_mac[5]);
printf(" Src MAC: %02X:%02X:%02X:%02X:%02X:%02X\n",
eth->src_mac[0], eth->src_mac[1], eth->src_mac[2],
eth->src_mac[3], eth->src_mac[4], eth->src_mac[5]);
printf(" Ethertype: 0x%04X\n", ethertype);
if (ethertype == 0x0800) { // IPv4
const IPv4Header *ip = (const IPv4Header*)(raw_data + sizeof(EthernetHeader));
uint8_t version = ip->version_ihl >> 4;
uint8_t ihl = ip->version_ihl & 0x0F;
printf("\nIPv4 Header:\n");
printf(" Version: %u\n", version);
printf(" IHL: %u (%u bytes)\n", ihl, ihl * 4);
printf(" Total Length: %u\n", ntohs(ip->total_length));
printf(" Protocol: %u\n", ip->protocol);
uint32_t src_ip = ntohl(ip->src_ip);
uint32_t dst_ip = ntohl(ip->dst_ip);
printf(" Source IP: %u.%u.%u.%u\n",
(src_ip >> 24) & 0xFF,
(src_ip >> 16) & 0xFF,
(src_ip >> 8) & 0xFF,
src_ip & 0xFF);
printf(" Dest IP: %u.%u.%u.%u\n",
(dst_ip >> 24) & 0xFF,
(dst_ip >> 16) & 0xFF,
(dst_ip >> 8) & 0xFF,
dst_ip & 0xFF);
// Parse transport layer
const uint8_t *transport_data = raw_data + sizeof(EthernetHeader) + ihl * 4;
if (ip->protocol == 6) { // TCP
const TCPHeader *tcp = (const TCPHeader*)transport_data;
printf("\nTCP Header:\n");
printf(" Source Port: %u\n", ntohs(tcp->src_port));
printf(" Dest Port: %u\n", ntohs(tcp->dst_port));
printf(" Sequence: %u\n", ntohl(tcp->seq_num));
printf(" Flags: 0x%02X\n", tcp->flags);
} else if (ip->protocol == 17) { // UDP
const UDPHeader *udp = (const UDPHeader*)transport_data;
printf("\nUDP Header:\n");
printf(" Source Port: %u\n", ntohs(udp->src_port));
printf(" Dest Port: %u\n", ntohs(udp->dst_port));
printf(" Length: %u\n", ntohs(udp->length));
}
} else if (ethertype == 0x86DD) { // IPv6
const IPv6Header *ip = (const IPv6Header*)(raw_data + sizeof(EthernetHeader));
uint32_t version_tc_flow = ntohl(ip->version_tc_flow);
uint8_t version = version_tc_flow >> 28;
printf("\nIPv6 Header:\n");
printf(" Version: %u\n", version);
printf(" Next Header: %u\n", ip->next_header);
printf(" Hop Limit: %u\n", ip->hop_limit);
// Print IPv6 addresses (simplified)
printf(" Source IP: ");
for (int i = 0; i < 16; i += 2) {
printf("%02x%02x", ip->src_ip[i], ip->src_ip[i+1]);
if (i < 14) printf(":");
}
printf("\n");
}
}
int main() {
// Create a sample TCP/IP packet (simplified)
uint8_t packet[1024];
size_t offset = 0;
// Ethernet header
EthernetHeader *eth = (EthernetHeader*)packet;
memset(eth->dest_mac, 0x01, 6);
memset(eth->src_mac, 0x02, 6);
eth->ethertype = htons(0x0800); // IPv4
offset += sizeof(EthernetHeader);
// IPv4 header
IPv4Header *ip = (IPv4Header*)(packet + offset);
ip->version_ihl = (4 << 4) | 5; // IPv4, 5 words (20 bytes)
ip->total_length = htons(40); // 20 IP + 20 TCP
ip->protocol = 6; // TCP
ip->src_ip = htonl(0xC0A80164); // 192.168.1.100
ip->dst_ip = htonl(0xC0A80101); // 192.168.1.1
offset += sizeof(IPv4Header);
// TCP header
TCPHeader *tcp = (TCPHeader*)(packet + offset);
tcp->src_port = htons(12345);
tcp->dst_port = htons(80);
tcp->seq_num = htonl(1000);
tcp->flags = 0x02; // SYN
// Parse the packet
parse_packet(packet, offset + sizeof(TCPHeader));
return 0;
}
6. Memory Pools and Custom Allocators
Advanced memory management using unions:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#define POOL_SIZE 1024
#define ALIGNMENT 8
#define ALIGN_SIZE(size) (((size) + (ALIGNMENT - 1)) & ~(ALIGNMENT - 1))
// Memory block header (uses union to ensure alignment)
typedef union BlockHeader {
struct {
union BlockHeader *next; // Next free block
size_t size; // Block size
int magic; // Magic number for debugging
} data;
double align_dummy; // Ensure alignment for any type
void *align_ptr_dummy; // Also align for pointers
} BlockHeader;
// Memory pool
typedef struct {
uint8_t memory[POOL_SIZE];
BlockHeader *free_list;
size_t total_allocated;
size_t peak_allocated;
int allocations;
} MemoryPool;
// Initialize pool
void pool_init(MemoryPool *pool) {
// Initialize free list with entire pool
pool->free_list = (BlockHeader*)pool->memory;
pool->free_list->data.next = NULL;
pool->free_list->data.size = POOL_SIZE - sizeof(BlockHeader);
pool->free_list->data.magic = 0xDEADBEEF;
pool->total_allocated = 0;
pool->peak_allocated = 0;
pool->allocations = 0;
}
// Allocate from pool
void* pool_alloc(MemoryPool *pool, size_t size) {
if (size == 0) return NULL;
// Align size
size = ALIGN_SIZE(size);
size_t total_needed = size + sizeof(BlockHeader);
// Find suitable free block
BlockHeader *prev = NULL;
BlockHeader *curr = pool->free_list;
while (curr) {
if (curr->data.size >= size) {
// Found block
if (curr->data.size > size + sizeof(BlockHeader)) {
// Split block
BlockHeader *new_block = (BlockHeader*)((uint8_t*)curr + total_needed);
new_block->data.next = curr->data.next;
new_block->data.size = curr->data.size - total_needed;
new_block->data.magic = 0xDEADBEEF;
if (prev) {
prev->data.next = new_block;
} else {
pool->free_list = new_block;
}
curr->data.size = size;
curr->data.next = NULL;
} else {
// Use entire block
if (prev) {
prev->data.next = curr->data.next;
} else {
pool->free_list = curr->data.next;
}
}
curr->data.magic = 0xCAFEBABE; // Allocated magic
pool->total_allocated += curr->data.size;
pool->allocations++;
if (pool->total_allocated > pool->peak_allocated) {
pool->peak_allocated = pool->total_allocated;
}
return (uint8_t*)curr + sizeof(BlockHeader);
}
prev = curr;
curr = curr->data.next;
}
return NULL; // Out of memory
}
// Free memory back to pool
void pool_free(MemoryPool *pool, void *ptr) {
if (!ptr) return;
BlockHeader *block = (BlockHeader*)((uint8_t*)ptr - sizeof(BlockHeader));
// Validate magic number (debug)
if (block->data.magic != 0xCAFEBABE) {
printf("Error: Invalid free or memory corruption!\n");
return;
}
block->data.magic = 0xDEADBEEF;
pool->total_allocated -= block->data.size;
pool->allocations--;
// Add back to free list (coalesce with adjacent free blocks)
BlockHeader *curr = pool->free_list;
BlockHeader *prev = NULL;
// Find position to insert (address order)
while (curr && curr < block) {
prev = curr;
curr = curr->data.next;
}
// Coalesce with next block if adjacent
if (curr && (uint8_t*)block + sizeof(BlockHeader) + block->data.size == (uint8_t*)curr) {
block->data.size += sizeof(BlockHeader) + curr->data.size;
block->data.next = curr->data.next;
} else {
block->data.next = curr;
}
// Coalesce with previous block if adjacent
if (prev && (uint8_t*)prev + sizeof(BlockHeader) + prev->data.size == (uint8_t*)block) {
prev->data.size += sizeof(BlockHeader) + block->data.size;
prev->data.next = block->data.next;
} else {
if (prev) {
prev->data.next = block;
} else {
pool->free_list = block;
}
}
}
// Pool statistics
void pool_stats(const MemoryPool *pool) {
printf("Pool Statistics:\n");
printf(" Total allocated: %zu bytes\n", pool->total_allocated);
printf(" Peak allocated: %zu bytes\n", pool->peak_allocated);
printf(" Current allocations: %d\n", pool->allocations);
// Walk free list
int free_blocks = 0;
size_t free_bytes = 0;
BlockHeader *curr = pool->free_list;
while (curr) {
free_blocks++;
free_bytes += curr->data.size;
curr = curr->data.next;
}
printf(" Free blocks: %d (%zu bytes)\n", free_blocks, free_bytes);
}
int main() {
MemoryPool pool;
pool_init(&pool);
// Allocate various types
int *p1 = pool_alloc(&pool, sizeof(int));
double *p2 = pool_alloc(&pool, sizeof(double));
char *p3 = pool_alloc(&pool, 100);
int *p4 = pool_alloc(&pool, 10 * sizeof(int));
// Use the memory
*p1 = 42;
*p2 = 3.14159;
sprintf(p3, "Hello from pool!");
for (int i = 0; i < 10; i++) p4[i] = i * 10;
printf("p1: %d\n", *p1);
printf("p2: %f\n", *p2);
printf("p3: %s\n", p3);
printf("p4[5]: %d\n", p4[5]);
pool_stats(&pool);
// Free some allocations
pool_free(&pool, p2);
pool_free(&pool, p4);
printf("\nAfter freeing:\n");
pool_stats(&pool);
// Reuse freed memory
char *p5 = pool_alloc(&pool, 200);
sprintf(p5, "Reusing freed memory!");
printf("\np5: %s\n", p5);
pool_stats(&pool);
return 0;
}
7. Dual-Purpose Data Structures
Unions enable data structures that can be used in multiple ways:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// A data structure that can be used as either a stack or a queue
typedef enum {
DS_STACK,
DS_QUEUE
} DataStructureType;
typedef struct {
DataStructureType type;
size_t capacity;
size_t size;
union {
struct {
int top; // For stack
} stack;
struct {
int front; // For queue
int rear;
} queue;
} state;
int *data;
} DataStructure;
DataStructure* ds_create(DataStructureType type, size_t capacity) {
DataStructure *ds = malloc(sizeof(DataStructure));
ds->type = type;
ds->capacity = capacity;
ds->size = 0;
ds->data = malloc(capacity * sizeof(int));
if (type == DS_STACK) {
ds->state.stack.top = -1;
} else {
ds->state.queue.front = 0;
ds->state.queue.rear = -1;
}
return ds;
}
void ds_destroy(DataStructure *ds) {
free(ds->data);
free(ds);
}
int ds_push(DataStructure *ds, int value) {
if (ds->size >= ds->capacity) return -1;
if (ds->type == DS_STACK) {
// Stack behavior
ds->state.stack.top++;
ds->data[ds->state.stack.top] = value;
} else {
// Queue behavior (enqueue)
ds->state.queue.rear = (ds->state.queue.rear + 1) % ds->capacity;
ds->data[ds->state.queue.rear] = value;
}
ds->size++;
return 0;
}
int ds_pop(DataStructure *ds, int *value) {
if (ds->size == 0) return -1;
if (ds->type == DS_STACK) {
// Stack pop
*value = ds->data[ds->state.stack.top];
ds->state.stack.top--;
} else {
// Queue dequeue
*value = ds->data[ds->state.queue.front];
ds->state.queue.front = (ds->state.queue.front + 1) % ds->capacity;
}
ds->size--;
return 0;
}
int ds_peek(const DataStructure *ds, int *value) {
if (ds->size == 0) return -1;
if (ds->type == DS_STACK) {
*value = ds->data[ds->state.stack.top];
} else {
*value = ds->data[ds->state.queue.front];
}
return 0;
}
void ds_print(const DataStructure *ds) {
printf("%s (%zu/%zu): ",
ds->type == DS_STACK ? "Stack" : "Queue",
ds->size, ds->capacity);
if (ds->type == DS_STACK) {
// Print stack from bottom to top
for (int i = 0; i <= ds->state.stack.top; i++) {
printf("%d ", ds->data[i]);
}
printf("(top)");
} else {
// Print queue from front to rear
int count = 0;
int i = ds->state.queue.front;
while (count < ds->size) {
printf("%d ", ds->data[i]);
i = (i + 1) % ds->capacity;
count++;
}
printf("(front)");
}
printf("\n");
}
int main() {
// Create stack
DataStructure *stack = ds_create(DS_STACK, 5);
printf("Stack operations:\n");
ds_push(stack, 10);
ds_push(stack, 20);
ds_push(stack, 30);
ds_print(stack);
int val;
ds_peek(stack, &val);
printf("Peek: %d\n", val);
ds_pop(stack, &val);
printf("Pop: %d\n", val);
ds_print(stack);
// Create queue
DataStructure *queue = ds_create(DS_QUEUE, 5);
printf("\nQueue operations:\n");
ds_push(queue, 100);
ds_push(queue, 200);
ds_push(queue, 300);
ds_print(queue);
ds_peek(queue, &val);
printf("Peek: %d\n", val);
ds_pop(queue, &val);
printf("Dequeue: %d\n", val);
ds_print(queue);
ds_destroy(stack);
ds_destroy(queue);
return 0;
}
8. Optimized Variant Records
Space-efficient variant records:
#include <stdio.h>
#include <string.h>
#include <stdint.h>
// Compact variant record using union and type field
typedef struct {
uint8_t type; // 0=int, 1=float, 2=string, 3=color
union {
int int_val;
float float_val;
char string_val[24]; // Fixed size for union
struct {
uint8_t r, g, b;
} color;
} data;
} VariantRecord;
// Print variant
void print_variant(const VariantRecord *v) {
switch (v->type) {
case 0:
printf("Int: %d\n", v->data.int_val);
break;
case 1:
printf("Float: %f\n", v->data.float_val);
break;
case 2:
printf("String: %s\n", v->data.string_val);
break;
case 3:
printf("Color: RGB(%u,%u,%u)\n",
v->data.color.r, v->data.color.g, v->data.color.b);
break;
}
}
// Array of variants (saves memory compared to struct with void*)
typedef struct {
VariantRecord records[100];
int count;
} VariantArray;
void array_add_int(VariantArray *a, int val) {
a->records[a->count].type = 0;
a->records[a->count].data.int_val = val;
a->count++;
}
void array_add_float(VariantArray *a, float val) {
a->records[a->count].type = 1;
a->records[a->count].data.float_val = val;
a->count++;
}
void array_add_string(VariantArray *a, const char *val) {
a->records[a->count].type = 2;
strncpy(a->records[a->count].data.string_val, val, 23);
a->records[a->count].data.string_val[23] = '\0';
a->count++;
}
void array_add_color(VariantArray *a, uint8_t r, uint8_t g, uint8_t b) {
a->records[a->count].type = 3;
a->records[a->count].data.color.r = r;
a->records[a->count].data.color.g = g;
a->records[a->count].data.color.b = b;
a->count++;
}
int main() {
VariantArray va = {.count = 0};
array_add_int(&va, 42);
array_add_float(&va, 3.14159f);
array_add_string(&va, "Hello, Variant!");
array_add_color(&va, 255, 128, 0);
printf("Size of VariantRecord: %zu bytes\n", sizeof(VariantRecord));
printf("Size of int*: %zu bytes\n", sizeof(int*));
printf("Array has %d elements:\n", va.count);
for (int i = 0; i < va.count; i++) {
printf(" [%d] ", i);
print_variant(&va.records[i]);
}
return 0;
}
9. Union Best Practices and Advanced Tips
#include <stdio.h>
#include <stdalign.h>
// 1. Controlling alignment
typedef union {
char bytes[32];
double d; // Forces 8-byte alignment
void *ptr; // Forces pointer-sized alignment
} AlignedBuffer;
// 2. Anonymous union (C11)
typedef struct {
int type;
union { // Anonymous union - members accessed directly
int i;
float f;
double d;
}; // No name!
} AnonVariant;
// 3. Union of structs with common initial sequence
typedef struct {
int type;
int id;
} Base;
typedef struct {
Base base; // Common initial sequence
int x, y;
} Point;
typedef struct {
Base base;
int radius;
} Circle;
typedef union {
Base base; // Common initial sequence
Point point;
Circle circle;
} ShapeUnion;
void print_shape_info(const ShapeUnion *shape) {
// Safe to access base.type through any member
int type = shape->base.type; // or shape->point.base.type
printf("Type: %d, ID: %d\n", shape->base.type, shape->base.id);
}
// 4. Checking union size at compile time
typedef union {
int i;
double d;
char str[100];
} MyUnion;
// Compile-time assertion
_Static_assert(sizeof(MyUnion) == 100,
"MyUnion should be 100 bytes");
// 5. Union with flexible array member (C99)
typedef struct {
int type;
union {
int i;
double d;
struct {
int count;
char data[]; // Flexible array member
} string;
};
} FlexVariant;
int main() {
// Alignment control
AlignedBuffer buf;
printf("AlignedBuffer address: %p\n", (void*)&buf);
printf("Is 8-byte aligned? %d\n",
(int)((uintptr_t)&buf % 8 == 0));
// Anonymous union
AnonVariant av = {.type = 1, .i = 42};
printf("AnonVariant: type=%d, value=%d\n", av.type, av.i);
// Common initial sequence
ShapeUnion su;
su.point.base.type = 100;
su.point.base.id = 200;
su.point.x = 10;
su.point.y = 20;
print_shape_info(&su);
// Size check
printf("Size of FlexVariant: %zu\n", sizeof(FlexVariant));
return 0;
}
Advanced Union Patterns Cheat Sheet
| Pattern | Use Case | Benefits |
|---|---|---|
| Tagged Union | Variant types, JSON | Type safety, memory efficiency |
| Type Punning | Float analysis, serialization | Standard-compliant reinterpretation |
| Polymorphism | Object-oriented C | Virtual function-like behavior |
| Hardware Registers | Embedded systems | Bit field access, atomic updates |
| Protocol Headers | Networking | Efficient packet parsing |
| Memory Pools | Custom allocators | Alignment guarantee, overhead reduction |
| Dual-Purpose Structures | Generic data structures | Code reuse, flexibility |
| Common Initial Sequence | Type hierarchies | Safe upcasting |
Best Practices
- Always track the active member (use tagged unions)
- Initialize before use - set at least one member
- Use
_Static_assertto verify sizes and alignments - Document the active member in comments
- Consider alignment requirements for performance
- Use anonymous unions (C11) for cleaner syntax
- Prefer unions over type punning through casts for standard compliance
- Use unions for hardware access with volatile qualifiers
Common Pitfalls (Advanced)
// Pitfall 1: Assuming union size is sum of members
union Large {
int i;
double d;
char str[1000];
}; // Size is 1000, not 1000 + 8 + 4
// Pitfall 2: Type punning with incompatible types (strict aliasing)
float f = 3.14f;
int i = *(int*)&f; // Undefined behavior! Use union instead
// Pitfall 3: Forgetting about padding in structs within unions
struct S { char c; int i; }; // Likely 8 bytes
union U { struct S s; double d; }; // Size max(8,8) = 8
// Pitfall 4: Volatile and unions in hardware access
volatile union Reg {
uint32_t word;
struct { uint8_t low, high; } bytes;
} reg; // Compiler may optimize! Use volatile on each access
Conclusion
Advanced union usage in C enables sophisticated programming patterns that are essential for systems programming, embedded development, and performance-critical applications. Key advanced techniques include:
- Tagged unions for type-safe variants
- Type punning for binary representation analysis
- Polymorphism through union-based vtables
- Hardware register mapping with bit fields
- Protocol parsing with nested unions
- Custom memory allocators with alignment control
Understanding these advanced patterns allows you to write more efficient, portable, and expressive C code, particularly in domains where memory is constrained, performance is critical, or hardware interaction is required.