Beyond Basics: Advanced Union Techniques in C

While basic unions are straightforward, advanced union techniques unlock powerful patterns for type punning, memory optimization, protocol implementation, and even object-oriented-like polymorphism in C. This guide explores sophisticated union applications that demonstrate the full power of this often-underutilized feature.

1. Tagged Unions (Discriminated Unions)

The most important advanced pattern is the tagged union, which tracks the active type:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
// Type identifiers
typedef enum {
TYPE_NONE,
TYPE_INT,
TYPE_DOUBLE,
TYPE_STRING,
TYPE_ARRAY,
TYPE_OBJECT
} ValueType;
// Forward declarations for recursive types
typedef struct Value Value;
// Array structure
typedef struct {
Value *elements;
int count;
int capacity;
} Array;
// Object property
typedef struct {
char *key;
Value *value;
} Property;
// Object structure
typedef struct {
Property *properties;
int count;
int capacity;
} Object;
// Advanced tagged union - can represent any JSON-like value
struct Value {
ValueType type;
union {
int int_value;
double double_value;
char *string_value;
Array array_value;
Object object_value;
} data;
};
// Create functions
Value* create_int(int value) {
Value *v = malloc(sizeof(Value));
v->type = TYPE_INT;
v->data.int_value = value;
return v;
}
Value* create_double(double value) {
Value *v = malloc(sizeof(Value));
v->type = TYPE_DOUBLE;
v->data.double_value = value;
return v;
}
Value* create_string(const char *value) {
Value *v = malloc(sizeof(Value));
v->type = TYPE_STRING;
v->data.string_value = malloc(strlen(value) + 1);
strcpy(v->data.string_value, value);
return v;
}
Value* create_array() {
Value *v = malloc(sizeof(Value));
v->type = TYPE_ARRAY;
v->data.array_value.elements = NULL;
v->data.array_value.count = 0;
v->data.array_value.capacity = 0;
return v;
}
// Array operations
void array_append(Value *array, Value *element) {
assert(array->type == TYPE_ARRAY);
Array *arr = &array->data.array_value;
if (arr->count >= arr->capacity) {
arr->capacity = arr->capacity == 0 ? 4 : arr->capacity * 2;
arr->elements = realloc(arr->elements, 
arr->capacity * sizeof(Value*));
}
arr->elements[arr->count++] = element;
}
// Object operations
Value* create_object() {
Value *v = malloc(sizeof(Value));
v->type = TYPE_OBJECT;
v->data.object_value.properties = NULL;
v->data.object_value.count = 0;
v->data.object_value.capacity = 0;
return v;
}
void object_set(Value *obj, const char *key, Value *value) {
assert(obj->type == TYPE_OBJECT);
Object *o = &obj->data.object_value;
// Check if key exists
for (int i = 0; i < o->count; i++) {
if (strcmp(o->properties[i].key, key) == 0) {
// Replace existing value
free(o->properties[i].key);
o->properties[i].key = strdup(key);
o->properties[i].value = value;
return;
}
}
// Add new property
if (o->count >= o->capacity) {
o->capacity = o->capacity == 0 ? 4 : o->capacity * 2;
o->properties = realloc(o->properties, 
o->capacity * sizeof(Property));
}
o->properties[o->count].key = strdup(key);
o->properties[o->count].value = value;
o->count++;
}
// Free value recursively
void free_value(Value *v) {
if (!v) return;
switch (v->type) {
case TYPE_STRING:
free(v->data.string_value);
break;
case TYPE_ARRAY:
for (int i = 0; i < v->data.array_value.count; i++) {
free_value(v->data.array_value.elements[i]);
}
free(v->data.array_value.elements);
break;
case TYPE_OBJECT:
for (int i = 0; i < v->data.object_value.count; i++) {
free(v->data.object_value.properties[i].key);
free_value(v->data.object_value.properties[i].value);
}
free(v->data.object_value.properties);
break;
default:
break;
}
free(v);
}
// Print value (JSON-like)
void print_value(const Value *v, int indent) {
if (!v) return;
switch (v->type) {
case TYPE_INT:
printf("%d", v->data.int_value);
break;
case TYPE_DOUBLE:
printf("%f", v->data.double_value);
break;
case TYPE_STRING:
printf("\"%s\"", v->data.string_value);
break;
case TYPE_ARRAY:
printf("[");
for (int i = 0; i < v->data.array_value.count; i++) {
if (i > 0) printf(", ");
print_value(v->data.array_value.elements[i], indent);
}
printf("]");
break;
case TYPE_OBJECT:
printf("{\n");
for (int i = 0; i < v->data.object_value.count; i++) {
for (int j = 0; j < indent + 2; j++) printf(" ");
printf("\"%s\": ", 
v->data.object_value.properties[i].key);
print_value(v->data.object_value.properties[i].value, 
indent + 2);
if (i < v->data.object_value.count - 1) printf(",");
printf("\n");
}
for (int j = 0; j < indent; j++) printf(" ");
printf("}");
break;
default:
printf("null");
}
}
int main() {
// Create a complex nested structure
Value *person = create_object();
object_set(person, "name", create_string("John Doe"));
object_set(person, "age", create_int(30));
object_set(person, "salary", create_double(75000.50));
Value *hobbies = create_array();
array_append(hobbies, create_string("reading"));
array_append(hobbies, create_string("swimming"));
array_append(hobbies, create_string("coding"));
object_set(person, "hobbies", hobbies);
Value *address = create_object();
object_set(address, "street", create_string("123 Main St"));
object_set(address, "city", create_string("Anytown"));
object_set(address, "zip", create_int(12345));
object_set(person, "address", address);
printf("Person object:\n");
print_value(person, 0);
printf("\n\n");
free_value(person);
return 0;
}

2. Type Punning and Serialization

Unions provide a standard-compliant way to perform type punning:

#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <inttypes.h>
// Safe type punning using unions
typedef union {
float f;
uint32_t i;
struct {
uint32_t mantissa : 23;
uint32_t exponent : 8;
uint32_t sign : 1;
} parts;
} FloatBits;
typedef union {
double d;
uint64_t i;
struct {
uint64_t mantissa : 52;
uint64_t exponent : 11;
uint64_t sign : 1;
} parts;
} DoubleBits;
void analyze_float(float f) {
FloatBits fb = {.f = f};
printf("Float: %f\n", f);
printf("  Bits: 0x%08" PRIx32 "\n", fb.i);
printf("  Sign: %u\n", fb.parts.sign);
printf("  Exponent: %u (value: %d)\n", 
fb.parts.exponent, (int)fb.parts.exponent - 127);
printf("  Mantissa: 0x%06" PRIx32 "\n", fb.parts.mantissa);
// Special values
if (fb.parts.exponent == 0 && fb.parts.mantissa == 0) {
printf("  Zero\n");
} else if (fb.parts.exponent == 0 && fb.parts.mantissa != 0) {
printf("  Subnormal number\n");
} else if (fb.parts.exponent == 255 && fb.parts.mantissa == 0) {
printf("  Infinity\n");
} else if (fb.parts.exponent == 255 && fb.parts.mantissa != 0) {
printf("  NaN\n");
}
printf("\n");
}
// Serialization framework
typedef enum {
SER_TYPE_INT,
SER_TYPE_FLOAT,
SER_TYPE_DOUBLE,
SER_TYPE_STRING,
SER_TYPE_BLOB
} SerialType;
typedef struct {
SerialType type;
union {
int int_value;
float float_value;
double double_value;
struct {
char *data;
size_t length;
} string;
struct {
void *data;
size_t size;
} blob;
} data;
} Serializable;
// Serialize to byte buffer
uint8_t* serialize(const Serializable *s, size_t *out_size) {
uint8_t *buffer = NULL;
size_t size = 0;
// First byte: type
size += 1;
switch (s->type) {
case SER_TYPE_INT:
size += sizeof(int);
buffer = malloc(size);
buffer[0] = SER_TYPE_INT;
memcpy(buffer + 1, &s->data.int_value, sizeof(int));
break;
case SER_TYPE_FLOAT:
size += sizeof(float);
buffer = malloc(size);
buffer[0] = SER_TYPE_FLOAT;
memcpy(buffer + 1, &s->data.float_value, sizeof(float));
break;
case SER_TYPE_DOUBLE:
size += sizeof(double);
buffer = malloc(size);
buffer[0] = SER_TYPE_DOUBLE;
memcpy(buffer + 1, &s->data.double_value, sizeof(double));
break;
case SER_TYPE_STRING:
size += sizeof(size_t) + s->data.string.length;
buffer = malloc(size);
buffer[0] = SER_TYPE_STRING;
memcpy(buffer + 1, &s->data.string.length, sizeof(size_t));
memcpy(buffer + 1 + sizeof(size_t), 
s->data.string.data, s->data.string.length);
break;
case SER_TYPE_BLOB:
size += sizeof(size_t) + s->data.blob.size;
buffer = malloc(size);
buffer[0] = SER_TYPE_BLOB;
memcpy(buffer + 1, &s->data.blob.size, sizeof(size_t));
memcpy(buffer + 1 + sizeof(size_t), 
s->data.blob.data, s->data.blob.size);
break;
}
*out_size = size;
return buffer;
}
// Deserialize from byte buffer
Serializable* deserialize(const uint8_t *buffer, size_t size) {
if (size < 1) return NULL;
Serializable *s = malloc(sizeof(Serializable));
s->type = buffer[0];
switch (s->type) {
case SER_TYPE_INT:
if (size < 1 + sizeof(int)) goto error;
memcpy(&s->data.int_value, buffer + 1, sizeof(int));
break;
case SER_TYPE_FLOAT:
if (size < 1 + sizeof(float)) goto error;
memcpy(&s->data.float_value, buffer + 1, sizeof(float));
break;
case SER_TYPE_DOUBLE:
if (size < 1 + sizeof(double)) goto error;
memcpy(&s->data.double_value, buffer + 1, sizeof(double));
break;
case SER_TYPE_STRING:
if (size < 1 + sizeof(size_t)) goto error;
memcpy(&s->data.string.length, buffer + 1, sizeof(size_t));
if (size < 1 + sizeof(size_t) + s->data.string.length) goto error;
s->data.string.data = malloc(s->data.string.length + 1);
memcpy(s->data.string.data, 
buffer + 1 + sizeof(size_t), s->data.string.length);
s->data.string.data[s->data.string.length] = '\0';
break;
case SER_TYPE_BLOB:
if (size < 1 + sizeof(size_t)) goto error;
memcpy(&s->data.blob.size, buffer + 1, sizeof(size_t));
if (size < 1 + sizeof(size_t) + s->data.blob.size) goto error;
s->data.blob.data = malloc(s->data.blob.size);
memcpy(s->data.blob.data, 
buffer + 1 + sizeof(size_t), s->data.blob.size);
break;
default:
goto error;
}
return s;
error:
free(s);
return NULL;
}
void free_serializable(Serializable *s) {
if (!s) return;
if (s->type == SER_TYPE_STRING || s->type == SER_TYPE_BLOB) {
free(s->data.string.data);  // Same pointer for blob
}
free(s);
}
int main() {
// Analyze float representation
analyze_float(3.14159f);
analyze_float(0.0f);
analyze_float(1.0f / 0.0f);  // Infinity
// Serialization example
Serializable s = {
.type = SER_TYPE_STRING,
.data.string.data = "Hello, Serialization!",
.data.string.length = 21
};
size_t serialized_size;
uint8_t *serialized = serialize(&s, &serialized_size);
printf("Serialized %zu bytes\n", serialized_size);
for (size_t i = 0; i < serialized_size; i++) {
printf("%02X ", serialized[i]);
}
printf("\n\n");
Serializable *deserialized = deserialize(serialized, serialized_size);
if (deserialized && deserialized->type == SER_TYPE_STRING) {
printf("Deserialized: %s\n", deserialized->data.string.data);
}
free(serialized);
free_serializable(deserialized);
return 0;
}

3. Polymorphism with Unions

Implementing object-oriented-like polymorphism:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
// Shape types
typedef enum {
SHAPE_CIRCLE,
SHAPE_RECTANGLE,
SHAPE_TRIANGLE
} ShapeType;
// Forward declaration
typedef struct Shape Shape;
// Virtual function table
typedef struct {
double (*area)(const Shape*);
double (*perimeter)(const Shape*);
void (*print)(const Shape*);
void (*destroy)(Shape*);
} ShapeVTable;
// Base shape structure
struct Shape {
ShapeType type;
const ShapeVTable *vtable;
};
// Circle
typedef struct {
Shape base;
double radius;
} Circle;
double circle_area(const Shape *shape) {
const Circle *c = (const Circle*)shape;
return M_PI * c->radius * c->radius;
}
double circle_perimeter(const Shape *shape) {
const Circle *c = (const Circle*)shape;
return 2 * M_PI * c->radius;
}
void circle_print(const Shape *shape) {
const Circle *c = (const Circle*)shape;
printf("Circle (radius=%.2f)", c->radius);
}
void circle_destroy(Shape *shape) {
free(shape);
}
const ShapeVTable circle_vtable = {
.area = circle_area,
.perimeter = circle_perimeter,
.print = circle_print,
.destroy = circle_destroy
};
Shape* create_circle(double radius) {
Circle *c = malloc(sizeof(Circle));
c->base.type = SHAPE_CIRCLE;
c->base.vtable = &circle_vtable;
c->radius = radius;
return (Shape*)c;
}
// Rectangle
typedef struct {
Shape base;
double width;
double height;
} Rectangle;
double rectangle_area(const Shape *shape) {
const Rectangle *r = (const Rectangle*)shape;
return r->width * r->height;
}
double rectangle_perimeter(const Shape *shape) {
const Rectangle *r = (const Rectangle*)shape;
return 2 * (r->width + r->height);
}
void rectangle_print(const Shape *shape) {
const Rectangle *r = (const Rectangle*)shape;
printf("Rectangle (%.2f x %.2f)", r->width, r->height);
}
void rectangle_destroy(Shape *shape) {
free(shape);
}
const ShapeVTable rectangle_vtable = {
.area = rectangle_area,
.perimeter = rectangle_perimeter,
.print = rectangle_print,
.destroy = rectangle_destroy
};
Shape* create_rectangle(double width, double height) {
Rectangle *r = malloc(sizeof(Rectangle));
r->base.type = SHAPE_RECTANGLE;
r->base.vtable = &rectangle_vtable;
r->width = width;
r->height = height;
return (Shape*)r;
}
// Triangle
typedef struct {
Shape base;
double a, b, c;  // Three sides
} Triangle;
double triangle_area(const Shape *shape) {
const Triangle *t = (const Triangle*)shape;
// Heron's formula
double s = (t->a + t->b + t->c) / 2.0;
return sqrt(s * (s - t->a) * (s - t->b) * (s - t->c));
}
double triangle_perimeter(const Shape *shape) {
const Triangle *t = (const Triangle*)shape;
return t->a + t->b + t->c;
}
void triangle_print(const Shape *shape) {
const Triangle *t = (const Triangle*)shape;
printf("Triangle (sides=%.2f,%.2f,%.2f)", t->a, t->b, t->c);
}
void triangle_destroy(Shape *shape) {
free(shape);
}
const ShapeVTable triangle_vtable = {
.area = triangle_area,
.perimeter = triangle_perimeter,
.print = triangle_print,
.destroy = triangle_destroy
};
Shape* create_triangle(double a, double b, double c) {
Triangle *t = malloc(sizeof(Triangle));
t->base.type = SHAPE_TRIANGLE;
t->base.vtable = &triangle_vtable;
t->a = a;
t->b = b;
t->c = c;
return (Shape*)t;
}
// Union-based shape container (can hold any shape type)
typedef union {
Circle circle;
Rectangle rectangle;
Triangle triangle;
} ShapeUnion;
// But careful - union doesn't include vtable!
// Better to use tagged union with pointer to shape
typedef struct {
ShapeType type;
union {
Circle *circle;
Rectangle *rectangle;
Triangle *triangle;
} ptr;
} ShapeHandle;
ShapeHandle create_shape_handle(Shape *shape) {
ShapeHandle handle;
handle.type = shape->type;
switch (shape->type) {
case SHAPE_CIRCLE:
handle.ptr.circle = (Circle*)shape;
break;
case SHAPE_RECTANGLE:
handle.ptr.rectangle = (Rectangle*)shape;
break;
case SHAPE_TRIANGLE:
handle.ptr.triangle = (Triangle*)shape;
break;
}
return handle;
}
double shape_handle_area(const ShapeHandle *handle) {
switch (handle->type) {
case SHAPE_CIRCLE:
return circle_area((Shape*)handle->ptr.circle);
case SHAPE_RECTANGLE:
return rectangle_area((Shape*)handle->ptr.rectangle);
case SHAPE_TRIANGLE:
return triangle_area((Shape*)handle->ptr.triangle);
}
return 0;
}
int main() {
// Create shapes
Shape *shapes[] = {
create_circle(5.0),
create_rectangle(4.0, 6.0),
create_triangle(3.0, 4.0, 5.0)
};
int num_shapes = sizeof(shapes) / sizeof(shapes[0]);
// Polymorphic behavior
printf("Shapes (using virtual functions):\n");
for (int i = 0; i < num_shapes; i++) {
shapes[i]->vtable->print(shapes[i]);
printf(": area=%.2f, perimeter=%.2f\n",
shapes[i]->vtable->area(shapes[i]),
shapes[i]->vtable->perimeter(shapes[i]));
}
printf("\nShapes (using union handles):\n");
for (int i = 0; i < num_shapes; i++) {
ShapeHandle handle = create_shape_handle(shapes[i]);
printf("Area: %.2f\n", shape_handle_area(&handle));
}
// Cleanup
for (int i = 0; i < num_shapes; i++) {
shapes[i]->vtable->destroy(shapes[i]);
}
return 0;
}

4. Memory-Mapped I/O and Hardware Registers

Unions are essential for hardware programming:

#include <stdio.h>
#include <stdint.h>
// Hardware register definitions
typedef union {
uint32_t value;
struct {
uint32_t enable      : 1;   // Bit 0
uint32_t mode        : 2;   // Bits 1-2
uint32_t interrupt   : 1;   // Bit 3
uint32_t error       : 1;   // Bit 4
uint32_t reserved1   : 3;   // Bits 5-7
uint32_t baud_rate   : 4;   // Bits 8-11
uint32_t data_bits   : 3;   // Bits 12-14
uint32_t stop_bits   : 1;   // Bit 15
uint32_t parity      : 2;   // Bits 16-17
uint32_t reserved2   : 14;  // Bits 18-31
} bits;
} UARTControlReg;
typedef union {
uint32_t value;
struct {
uint8_t rx_data;      // Bits 0-7
uint8_t tx_data;      // Bits 8-15
uint8_t status;       // Bits 16-23
uint8_t unused;       // Bits 24-31
} bytes;
struct {
uint32_t rx_data  : 8;
uint32_t tx_data  : 8;
uint32_t rx_full  : 1;
uint32_t tx_empty : 1;
uint32_t rx_error : 1;
uint32_t tx_error : 1;
uint32_t reserved : 12;
} fields;
} UARTDataReg;
// Simulated hardware registers
UARTControlReg uart_control = {0};
UARTDataReg uart_data = {0};
// Hardware access functions
void uart_init(int baud_rate, int data_bits, int stop_bits, int parity) {
uart_control.bits.enable = 1;
uart_control.bits.baud_rate = baud_rate;
uart_control.bits.data_bits = data_bits - 5;  // 5-8 bits
uart_control.bits.stop_bits = stop_bits - 1;  // 1-2 bits
uart_control.bits.parity = parity;
printf("UART initialized: control=0x%08X\n", uart_control.value);
printf("  Enable: %u\n", uart_control.bits.enable);
printf("  Baud rate: %u\n", uart_control.bits.baud_rate);
printf("  Data bits: %u\n", uart_control.bits.data_bits + 5);
printf("  Stop bits: %u\n", uart_control.bits.stop_bits + 1);
printf("  Parity: %u\n", uart_control.bits.parity);
}
void uart_send(char c) {
// Wait for TX buffer empty
while (!uart_data.fields.tx_empty) {
// In real hardware, this would check status
uart_data.fields.tx_empty = 1;
}
// Send data
uart_data.bytes.tx_data = c;
uart_data.fields.tx_empty = 0;
printf("Sent: %c (data=0x%02X)\n", c, c);
}
char uart_receive() {
// Wait for RX buffer full
while (!uart_data.fields.rx_full) {
// Simulate receiving data
static char test_data[] = "Hello";
static int index = 0;
if (index < 5) {
uart_data.bytes.rx_data = test_data[index++];
uart_data.fields.rx_full = 1;
}
}
char c = uart_data.bytes.rx_data;
uart_data.fields.rx_full = 0;
printf("Received: %c (data=0x%02X)\n", c, c);
return c;
}
int main() {
// Initialize UART
uart_init(4, 8, 1, 0);  // 9600 baud, 8 data bits, 1 stop, no parity
printf("\n--- UART Communication ---\n");
// Send data
const char *message = "UART";
for (int i = 0; message[i]; i++) {
uart_send(message[i]);
}
// Receive data
printf("\nReceiving:\n");
for (int i = 0; i < 5; i++) {
uart_receive();
}
return 0;
}

5. Network Protocol Headers

Unions are perfect for parsing network packets:

#include <stdio.h>
#include <stdint.h>
#include <arpa/inet.h>
// Ethernet header
typedef struct {
uint8_t dest_mac[6];
uint8_t src_mac[6];
uint16_t ethertype;
} __attribute__((packed)) EthernetHeader;
// IPv4 header
typedef struct {
uint8_t  version_ihl;      // Version (4 bits) + IHL (4 bits)
uint8_t  dscp_ecn;         // DSCP + ECN
uint16_t total_length;
uint16_t identification;
uint16_t flags_fragment;
uint8_t  ttl;
uint8_t  protocol;
uint16_t checksum;
uint32_t src_ip;
uint32_t dst_ip;
} __attribute__((packed)) IPv4Header;
// IPv6 header
typedef struct {
uint32_t version_tc_flow;  // Version (4), Traffic Class (8), Flow Label (20)
uint16_t payload_length;
uint8_t  next_header;
uint8_t  hop_limit;
uint8_t  src_ip[16];
uint8_t  dst_ip[16];
} __attribute__((packed)) IPv6Header;
// TCP header
typedef struct {
uint16_t src_port;
uint16_t dst_port;
uint32_t seq_num;
uint32_t ack_num;
uint8_t  data_offset;
uint8_t  flags;
uint16_t window;
uint16_t checksum;
uint16_t urgent_ptr;
} __attribute__((packed)) TCPHeader;
// UDP header
typedef struct {
uint16_t src_port;
uint16_t dst_port;
uint16_t length;
uint16_t checksum;
} __attribute__((packed)) UDPHeader;
// Union for IP header (can be IPv4 or IPv6)
typedef union {
IPv4Header v4;
IPv6Header v6;
} IPHeaderUnion;
// Complete packet representation
typedef struct {
EthernetHeader eth;
union {
struct {
IPv4Header ip;
union {
TCPHeader tcp;
UDPHeader udp;
} transport;
} v4;
struct {
IPv6Header ip;
union {
TCPHeader tcp;
UDPHeader udp;
} transport;
} v6;
} ip;
} Packet;
// Protocol identification
typedef enum {
PROTO_UNKNOWN,
PROTO_TCP,
PROTO_UDP,
PROTO_ICMP
} TransportProtocol;
void parse_packet(const uint8_t *raw_data, size_t length) {
if (length < sizeof(EthernetHeader)) {
printf("Packet too short\n");
return;
}
const EthernetHeader *eth = (const EthernetHeader*)raw_data;
uint16_t ethertype = ntohs(eth->ethertype);
printf("Ethernet Header:\n");
printf("  Dest MAC: %02X:%02X:%02X:%02X:%02X:%02X\n",
eth->dest_mac[0], eth->dest_mac[1], eth->dest_mac[2],
eth->dest_mac[3], eth->dest_mac[4], eth->dest_mac[5]);
printf("  Src MAC:  %02X:%02X:%02X:%02X:%02X:%02X\n",
eth->src_mac[0], eth->src_mac[1], eth->src_mac[2],
eth->src_mac[3], eth->src_mac[4], eth->src_mac[5]);
printf("  Ethertype: 0x%04X\n", ethertype);
if (ethertype == 0x0800) {  // IPv4
const IPv4Header *ip = (const IPv4Header*)(raw_data + sizeof(EthernetHeader));
uint8_t version = ip->version_ihl >> 4;
uint8_t ihl = ip->version_ihl & 0x0F;
printf("\nIPv4 Header:\n");
printf("  Version: %u\n", version);
printf("  IHL: %u (%u bytes)\n", ihl, ihl * 4);
printf("  Total Length: %u\n", ntohs(ip->total_length));
printf("  Protocol: %u\n", ip->protocol);
uint32_t src_ip = ntohl(ip->src_ip);
uint32_t dst_ip = ntohl(ip->dst_ip);
printf("  Source IP: %u.%u.%u.%u\n",
(src_ip >> 24) & 0xFF,
(src_ip >> 16) & 0xFF,
(src_ip >> 8) & 0xFF,
src_ip & 0xFF);
printf("  Dest IP:   %u.%u.%u.%u\n",
(dst_ip >> 24) & 0xFF,
(dst_ip >> 16) & 0xFF,
(dst_ip >> 8) & 0xFF,
dst_ip & 0xFF);
// Parse transport layer
const uint8_t *transport_data = raw_data + sizeof(EthernetHeader) + ihl * 4;
if (ip->protocol == 6) {  // TCP
const TCPHeader *tcp = (const TCPHeader*)transport_data;
printf("\nTCP Header:\n");
printf("  Source Port: %u\n", ntohs(tcp->src_port));
printf("  Dest Port:   %u\n", ntohs(tcp->dst_port));
printf("  Sequence:    %u\n", ntohl(tcp->seq_num));
printf("  Flags:       0x%02X\n", tcp->flags);
} else if (ip->protocol == 17) {  // UDP
const UDPHeader *udp = (const UDPHeader*)transport_data;
printf("\nUDP Header:\n");
printf("  Source Port: %u\n", ntohs(udp->src_port));
printf("  Dest Port:   %u\n", ntohs(udp->dst_port));
printf("  Length:      %u\n", ntohs(udp->length));
}
} else if (ethertype == 0x86DD) {  // IPv6
const IPv6Header *ip = (const IPv6Header*)(raw_data + sizeof(EthernetHeader));
uint32_t version_tc_flow = ntohl(ip->version_tc_flow);
uint8_t version = version_tc_flow >> 28;
printf("\nIPv6 Header:\n");
printf("  Version: %u\n", version);
printf("  Next Header: %u\n", ip->next_header);
printf("  Hop Limit: %u\n", ip->hop_limit);
// Print IPv6 addresses (simplified)
printf("  Source IP: ");
for (int i = 0; i < 16; i += 2) {
printf("%02x%02x", ip->src_ip[i], ip->src_ip[i+1]);
if (i < 14) printf(":");
}
printf("\n");
}
}
int main() {
// Create a sample TCP/IP packet (simplified)
uint8_t packet[1024];
size_t offset = 0;
// Ethernet header
EthernetHeader *eth = (EthernetHeader*)packet;
memset(eth->dest_mac, 0x01, 6);
memset(eth->src_mac, 0x02, 6);
eth->ethertype = htons(0x0800);  // IPv4
offset += sizeof(EthernetHeader);
// IPv4 header
IPv4Header *ip = (IPv4Header*)(packet + offset);
ip->version_ihl = (4 << 4) | 5;  // IPv4, 5 words (20 bytes)
ip->total_length = htons(40);    // 20 IP + 20 TCP
ip->protocol = 6;                  // TCP
ip->src_ip = htonl(0xC0A80164);   // 192.168.1.100
ip->dst_ip = htonl(0xC0A80101);   // 192.168.1.1
offset += sizeof(IPv4Header);
// TCP header
TCPHeader *tcp = (TCPHeader*)(packet + offset);
tcp->src_port = htons(12345);
tcp->dst_port = htons(80);
tcp->seq_num = htonl(1000);
tcp->flags = 0x02;  // SYN
// Parse the packet
parse_packet(packet, offset + sizeof(TCPHeader));
return 0;
}

6. Memory Pools and Custom Allocators

Advanced memory management using unions:

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#define POOL_SIZE 1024
#define ALIGNMENT 8
#define ALIGN_SIZE(size) (((size) + (ALIGNMENT - 1)) & ~(ALIGNMENT - 1))
// Memory block header (uses union to ensure alignment)
typedef union BlockHeader {
struct {
union BlockHeader *next;  // Next free block
size_t size;              // Block size
int magic;                // Magic number for debugging
} data;
double align_dummy;           // Ensure alignment for any type
void *align_ptr_dummy;        // Also align for pointers
} BlockHeader;
// Memory pool
typedef struct {
uint8_t memory[POOL_SIZE];
BlockHeader *free_list;
size_t total_allocated;
size_t peak_allocated;
int allocations;
} MemoryPool;
// Initialize pool
void pool_init(MemoryPool *pool) {
// Initialize free list with entire pool
pool->free_list = (BlockHeader*)pool->memory;
pool->free_list->data.next = NULL;
pool->free_list->data.size = POOL_SIZE - sizeof(BlockHeader);
pool->free_list->data.magic = 0xDEADBEEF;
pool->total_allocated = 0;
pool->peak_allocated = 0;
pool->allocations = 0;
}
// Allocate from pool
void* pool_alloc(MemoryPool *pool, size_t size) {
if (size == 0) return NULL;
// Align size
size = ALIGN_SIZE(size);
size_t total_needed = size + sizeof(BlockHeader);
// Find suitable free block
BlockHeader *prev = NULL;
BlockHeader *curr = pool->free_list;
while (curr) {
if (curr->data.size >= size) {
// Found block
if (curr->data.size > size + sizeof(BlockHeader)) {
// Split block
BlockHeader *new_block = (BlockHeader*)((uint8_t*)curr + total_needed);
new_block->data.next = curr->data.next;
new_block->data.size = curr->data.size - total_needed;
new_block->data.magic = 0xDEADBEEF;
if (prev) {
prev->data.next = new_block;
} else {
pool->free_list = new_block;
}
curr->data.size = size;
curr->data.next = NULL;
} else {
// Use entire block
if (prev) {
prev->data.next = curr->data.next;
} else {
pool->free_list = curr->data.next;
}
}
curr->data.magic = 0xCAFEBABE;  // Allocated magic
pool->total_allocated += curr->data.size;
pool->allocations++;
if (pool->total_allocated > pool->peak_allocated) {
pool->peak_allocated = pool->total_allocated;
}
return (uint8_t*)curr + sizeof(BlockHeader);
}
prev = curr;
curr = curr->data.next;
}
return NULL;  // Out of memory
}
// Free memory back to pool
void pool_free(MemoryPool *pool, void *ptr) {
if (!ptr) return;
BlockHeader *block = (BlockHeader*)((uint8_t*)ptr - sizeof(BlockHeader));
// Validate magic number (debug)
if (block->data.magic != 0xCAFEBABE) {
printf("Error: Invalid free or memory corruption!\n");
return;
}
block->data.magic = 0xDEADBEEF;
pool->total_allocated -= block->data.size;
pool->allocations--;
// Add back to free list (coalesce with adjacent free blocks)
BlockHeader *curr = pool->free_list;
BlockHeader *prev = NULL;
// Find position to insert (address order)
while (curr && curr < block) {
prev = curr;
curr = curr->data.next;
}
// Coalesce with next block if adjacent
if (curr && (uint8_t*)block + sizeof(BlockHeader) + block->data.size == (uint8_t*)curr) {
block->data.size += sizeof(BlockHeader) + curr->data.size;
block->data.next = curr->data.next;
} else {
block->data.next = curr;
}
// Coalesce with previous block if adjacent
if (prev && (uint8_t*)prev + sizeof(BlockHeader) + prev->data.size == (uint8_t*)block) {
prev->data.size += sizeof(BlockHeader) + block->data.size;
prev->data.next = block->data.next;
} else {
if (prev) {
prev->data.next = block;
} else {
pool->free_list = block;
}
}
}
// Pool statistics
void pool_stats(const MemoryPool *pool) {
printf("Pool Statistics:\n");
printf("  Total allocated: %zu bytes\n", pool->total_allocated);
printf("  Peak allocated:  %zu bytes\n", pool->peak_allocated);
printf("  Current allocations: %d\n", pool->allocations);
// Walk free list
int free_blocks = 0;
size_t free_bytes = 0;
BlockHeader *curr = pool->free_list;
while (curr) {
free_blocks++;
free_bytes += curr->data.size;
curr = curr->data.next;
}
printf("  Free blocks: %d (%zu bytes)\n", free_blocks, free_bytes);
}
int main() {
MemoryPool pool;
pool_init(&pool);
// Allocate various types
int *p1 = pool_alloc(&pool, sizeof(int));
double *p2 = pool_alloc(&pool, sizeof(double));
char *p3 = pool_alloc(&pool, 100);
int *p4 = pool_alloc(&pool, 10 * sizeof(int));
// Use the memory
*p1 = 42;
*p2 = 3.14159;
sprintf(p3, "Hello from pool!");
for (int i = 0; i < 10; i++) p4[i] = i * 10;
printf("p1: %d\n", *p1);
printf("p2: %f\n", *p2);
printf("p3: %s\n", p3);
printf("p4[5]: %d\n", p4[5]);
pool_stats(&pool);
// Free some allocations
pool_free(&pool, p2);
pool_free(&pool, p4);
printf("\nAfter freeing:\n");
pool_stats(&pool);
// Reuse freed memory
char *p5 = pool_alloc(&pool, 200);
sprintf(p5, "Reusing freed memory!");
printf("\np5: %s\n", p5);
pool_stats(&pool);
return 0;
}

7. Dual-Purpose Data Structures

Unions enable data structures that can be used in multiple ways:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// A data structure that can be used as either a stack or a queue
typedef enum {
DS_STACK,
DS_QUEUE
} DataStructureType;
typedef struct {
DataStructureType type;
size_t capacity;
size_t size;
union {
struct {
int top;  // For stack
} stack;
struct {
int front;  // For queue
int rear;
} queue;
} state;
int *data;
} DataStructure;
DataStructure* ds_create(DataStructureType type, size_t capacity) {
DataStructure *ds = malloc(sizeof(DataStructure));
ds->type = type;
ds->capacity = capacity;
ds->size = 0;
ds->data = malloc(capacity * sizeof(int));
if (type == DS_STACK) {
ds->state.stack.top = -1;
} else {
ds->state.queue.front = 0;
ds->state.queue.rear = -1;
}
return ds;
}
void ds_destroy(DataStructure *ds) {
free(ds->data);
free(ds);
}
int ds_push(DataStructure *ds, int value) {
if (ds->size >= ds->capacity) return -1;
if (ds->type == DS_STACK) {
// Stack behavior
ds->state.stack.top++;
ds->data[ds->state.stack.top] = value;
} else {
// Queue behavior (enqueue)
ds->state.queue.rear = (ds->state.queue.rear + 1) % ds->capacity;
ds->data[ds->state.queue.rear] = value;
}
ds->size++;
return 0;
}
int ds_pop(DataStructure *ds, int *value) {
if (ds->size == 0) return -1;
if (ds->type == DS_STACK) {
// Stack pop
*value = ds->data[ds->state.stack.top];
ds->state.stack.top--;
} else {
// Queue dequeue
*value = ds->data[ds->state.queue.front];
ds->state.queue.front = (ds->state.queue.front + 1) % ds->capacity;
}
ds->size--;
return 0;
}
int ds_peek(const DataStructure *ds, int *value) {
if (ds->size == 0) return -1;
if (ds->type == DS_STACK) {
*value = ds->data[ds->state.stack.top];
} else {
*value = ds->data[ds->state.queue.front];
}
return 0;
}
void ds_print(const DataStructure *ds) {
printf("%s (%zu/%zu): ",
ds->type == DS_STACK ? "Stack" : "Queue",
ds->size, ds->capacity);
if (ds->type == DS_STACK) {
// Print stack from bottom to top
for (int i = 0; i <= ds->state.stack.top; i++) {
printf("%d ", ds->data[i]);
}
printf("(top)");
} else {
// Print queue from front to rear
int count = 0;
int i = ds->state.queue.front;
while (count < ds->size) {
printf("%d ", ds->data[i]);
i = (i + 1) % ds->capacity;
count++;
}
printf("(front)");
}
printf("\n");
}
int main() {
// Create stack
DataStructure *stack = ds_create(DS_STACK, 5);
printf("Stack operations:\n");
ds_push(stack, 10);
ds_push(stack, 20);
ds_push(stack, 30);
ds_print(stack);
int val;
ds_peek(stack, &val);
printf("Peek: %d\n", val);
ds_pop(stack, &val);
printf("Pop: %d\n", val);
ds_print(stack);
// Create queue
DataStructure *queue = ds_create(DS_QUEUE, 5);
printf("\nQueue operations:\n");
ds_push(queue, 100);
ds_push(queue, 200);
ds_push(queue, 300);
ds_print(queue);
ds_peek(queue, &val);
printf("Peek: %d\n", val);
ds_pop(queue, &val);
printf("Dequeue: %d\n", val);
ds_print(queue);
ds_destroy(stack);
ds_destroy(queue);
return 0;
}

8. Optimized Variant Records

Space-efficient variant records:

#include <stdio.h>
#include <string.h>
#include <stdint.h>
// Compact variant record using union and type field
typedef struct {
uint8_t type;  // 0=int, 1=float, 2=string, 3=color
union {
int int_val;
float float_val;
char string_val[24];  // Fixed size for union
struct {
uint8_t r, g, b;
} color;
} data;
} VariantRecord;
// Print variant
void print_variant(const VariantRecord *v) {
switch (v->type) {
case 0:
printf("Int: %d\n", v->data.int_val);
break;
case 1:
printf("Float: %f\n", v->data.float_val);
break;
case 2:
printf("String: %s\n", v->data.string_val);
break;
case 3:
printf("Color: RGB(%u,%u,%u)\n", 
v->data.color.r, v->data.color.g, v->data.color.b);
break;
}
}
// Array of variants (saves memory compared to struct with void*)
typedef struct {
VariantRecord records[100];
int count;
} VariantArray;
void array_add_int(VariantArray *a, int val) {
a->records[a->count].type = 0;
a->records[a->count].data.int_val = val;
a->count++;
}
void array_add_float(VariantArray *a, float val) {
a->records[a->count].type = 1;
a->records[a->count].data.float_val = val;
a->count++;
}
void array_add_string(VariantArray *a, const char *val) {
a->records[a->count].type = 2;
strncpy(a->records[a->count].data.string_val, val, 23);
a->records[a->count].data.string_val[23] = '\0';
a->count++;
}
void array_add_color(VariantArray *a, uint8_t r, uint8_t g, uint8_t b) {
a->records[a->count].type = 3;
a->records[a->count].data.color.r = r;
a->records[a->count].data.color.g = g;
a->records[a->count].data.color.b = b;
a->count++;
}
int main() {
VariantArray va = {.count = 0};
array_add_int(&va, 42);
array_add_float(&va, 3.14159f);
array_add_string(&va, "Hello, Variant!");
array_add_color(&va, 255, 128, 0);
printf("Size of VariantRecord: %zu bytes\n", sizeof(VariantRecord));
printf("Size of int*: %zu bytes\n", sizeof(int*));
printf("Array has %d elements:\n", va.count);
for (int i = 0; i < va.count; i++) {
printf("  [%d] ", i);
print_variant(&va.records[i]);
}
return 0;
}

9. Union Best Practices and Advanced Tips

#include <stdio.h>
#include <stdalign.h>
// 1. Controlling alignment
typedef union {
char bytes[32];
double d;  // Forces 8-byte alignment
void *ptr; // Forces pointer-sized alignment
} AlignedBuffer;
// 2. Anonymous union (C11)
typedef struct {
int type;
union {  // Anonymous union - members accessed directly
int i;
float f;
double d;
};  // No name!
} AnonVariant;
// 3. Union of structs with common initial sequence
typedef struct {
int type;
int id;
} Base;
typedef struct {
Base base;  // Common initial sequence
int x, y;
} Point;
typedef struct {
Base base;
int radius;
} Circle;
typedef union {
Base base;    // Common initial sequence
Point point;
Circle circle;
} ShapeUnion;
void print_shape_info(const ShapeUnion *shape) {
// Safe to access base.type through any member
int type = shape->base.type;  // or shape->point.base.type
printf("Type: %d, ID: %d\n", shape->base.type, shape->base.id);
}
// 4. Checking union size at compile time
typedef union {
int i;
double d;
char str[100];
} MyUnion;
// Compile-time assertion
_Static_assert(sizeof(MyUnion) == 100, 
"MyUnion should be 100 bytes");
// 5. Union with flexible array member (C99)
typedef struct {
int type;
union {
int i;
double d;
struct {
int count;
char data[];  // Flexible array member
} string;
};
} FlexVariant;
int main() {
// Alignment control
AlignedBuffer buf;
printf("AlignedBuffer address: %p\n", (void*)&buf);
printf("Is 8-byte aligned? %d\n", 
(int)((uintptr_t)&buf % 8 == 0));
// Anonymous union
AnonVariant av = {.type = 1, .i = 42};
printf("AnonVariant: type=%d, value=%d\n", av.type, av.i);
// Common initial sequence
ShapeUnion su;
su.point.base.type = 100;
su.point.base.id = 200;
su.point.x = 10;
su.point.y = 20;
print_shape_info(&su);
// Size check
printf("Size of FlexVariant: %zu\n", sizeof(FlexVariant));
return 0;
}

Advanced Union Patterns Cheat Sheet

PatternUse CaseBenefits
Tagged UnionVariant types, JSONType safety, memory efficiency
Type PunningFloat analysis, serializationStandard-compliant reinterpretation
PolymorphismObject-oriented CVirtual function-like behavior
Hardware RegistersEmbedded systemsBit field access, atomic updates
Protocol HeadersNetworkingEfficient packet parsing
Memory PoolsCustom allocatorsAlignment guarantee, overhead reduction
Dual-Purpose StructuresGeneric data structuresCode reuse, flexibility
Common Initial SequenceType hierarchiesSafe upcasting

Best Practices

  1. Always track the active member (use tagged unions)
  2. Initialize before use - set at least one member
  3. Use _Static_assert to verify sizes and alignments
  4. Document the active member in comments
  5. Consider alignment requirements for performance
  6. Use anonymous unions (C11) for cleaner syntax
  7. Prefer unions over type punning through casts for standard compliance
  8. Use unions for hardware access with volatile qualifiers

Common Pitfalls (Advanced)

// Pitfall 1: Assuming union size is sum of members
union Large {
int i;
double d;
char str[1000];
};  // Size is 1000, not 1000 + 8 + 4
// Pitfall 2: Type punning with incompatible types (strict aliasing)
float f = 3.14f;
int i = *(int*)&f;  // Undefined behavior! Use union instead
// Pitfall 3: Forgetting about padding in structs within unions
struct S { char c; int i; };  // Likely 8 bytes
union U { struct S s; double d; };  // Size max(8,8) = 8
// Pitfall 4: Volatile and unions in hardware access
volatile union Reg {
uint32_t word;
struct { uint8_t low, high; } bytes;
} reg;  // Compiler may optimize! Use volatile on each access

Conclusion

Advanced union usage in C enables sophisticated programming patterns that are essential for systems programming, embedded development, and performance-critical applications. Key advanced techniques include:

  • Tagged unions for type-safe variants
  • Type punning for binary representation analysis
  • Polymorphism through union-based vtables
  • Hardware register mapping with bit fields
  • Protocol parsing with nested unions
  • Custom memory allocators with alignment control

Understanding these advanced patterns allows you to write more efficient, portable, and expressive C code, particularly in domains where memory is constrained, performance is critical, or hardware interaction is required.

Leave a Reply

Your email address will not be published. Required fields are marked *


Macro Nepal Helper