Strings are fundamental to almost every C program, from simple user prompts to complex text processing applications. Unlike higher-level languages that provide built-in string types, C handles strings as arrays of characters terminated by a special null character. This approach gives programmers fine-grained control over memory and performance but requires careful management to avoid common pitfalls. Understanding C strings thoroughly is essential for writing robust, secure, and efficient code.
What is a String in C?
In C, a string is a sequence of characters stored in contiguous memory locations, terminated by a null character (\0). The null terminator marks the end of the string, allowing functions to determine where the string stops.
String Representation:
Index: 0 1 2 3 4 5 6 7 8 9 ┌────┬────┬────┬────┬────┬────┬────┬────┬────┬────┐ │ H │ e │ l │ l │ o │ \0 │ │ │ │ │ └────┴────┴────┴────┴────┴────┴────┴────┴────┴────┘
The string "Hello" occupies 6 bytes: 5 characters plus the null terminator.
Declaring and Initializing Strings
Method 1: String Literal (Array)
#include <stdio.h>
int main() {
// Method 1: Character array with initialization
char str1[] = "Hello"; // Automatically adds '\0'
// Equivalent to:
char str2[6] = {'H', 'e', 'l', 'l', 'o', '\0'};
// Size is automatically determined
char str3[] = "Hello, World!";
printf("str1: %s\n", str1);
printf("str2: %s\n", str2);
printf("str3: %s\n", str3);
printf("Size of str3: %lu bytes\n", sizeof(str3)); // Includes null terminator
return 0;
}
Method 2: Character Array (Fixed Size)
#include <stdio.h>
int main() {
// Fixed-size array (must leave room for null terminator)
char buffer[50] = "Hello"; // Rest initialized to '\0'
// Explicit initialization with size
char name[20] = {'J', 'o', 'h', 'n', '\0'};
// Partially initialized - rest filled with zeros
char city[20] = "New York";
printf("Buffer: %s\n", buffer);
printf("Name: %s\n", name);
printf("City: %s\n", city);
return 0;
}
Method 3: Pointer to String Literal
#include <stdio.h>
int main() {
// Pointer to string literal (read-only)
const char *str = "Hello, World!";
// Cannot modify string literal (undefined behavior)
// str[0] = 'h'; // WRONG! May crash
printf("String literal: %s\n", str);
printf("Character at index 1: %c\n", str[1]); // Reading is fine
return 0;
}
Important Distinction: Array vs Pointer
#include <stdio.h>
int main() {
char arr[] = "Hello"; // Array - mutable copy
const char *ptr = "Hello"; // Pointer to read-only literal
arr[0] = 'h'; // OK - modifies array copy
// ptr[0] = 'h'; // Undefined behavior - may crash
printf("Array: %s\n", arr);
printf("Pointer: %s\n", ptr);
// sizeof gives different results
printf("sizeof(arr): %lu\n", sizeof(arr)); // 6 (including '\0')
printf("sizeof(ptr): %lu\n", sizeof(ptr)); // 8 (pointer size)
return 0;
}
String Input/Output
1. printf() and puts() for Output
#include <stdio.h>
int main() {
char name[] = "Alice";
char city[] = "New York";
int age = 25;
// printf with %s format specifier
printf("Name: %s\n", name);
printf("City: %s, Age: %d\n", city, age);
// puts - simpler, automatically adds newline
puts("Hello, World!");
puts(name); // Prints "Alice" followed by newline
// fputs - no automatic newline
fputs("This is ", stdout);
fputs("a test", stdout);
fputc('\n', stdout);
return 0;
}
2. scanf() for Input (Simple but Limited)
#include <stdio.h>
int main() {
char name[50];
char city[50];
printf("Enter your name: ");
scanf("%s", name); // No & needed for arrays
printf("Enter your city: ");
scanf("%s", city);
printf("Hello %s from %s!\n", name, city);
// Problem: scanf stops at whitespace
printf("\n--- Problem with spaces ---\n");
char fullName[50];
printf("Enter your full name: ");
scanf("%s", fullName); // Only reads first word!
printf("You entered: %s\n", fullName);
return 0;
}
3. fgets() for Safe Input
#include <stdio.h>
#include <string.h>
int main() {
char name[50];
char city[50];
printf("Enter your full name: ");
fgets(name, sizeof(name), stdin);
// Remove trailing newline if present
size_t len = strlen(name);
if (len > 0 && name[len-1] == '\n') {
name[len-1] = '\0';
}
printf("Enter your city: ");
fgets(city, sizeof(city), stdin);
// Remove trailing newline
len = strlen(city);
if (len > 0 && city[len-1] == '\n') {
city[len-1] = '\0';
}
printf("Hello %s from %s!\n", name, city);
return 0;
}
4. getchar() for Character-by-Character Input
#include <stdio.h>
int main() {
char line[100];
int i = 0;
int ch;
printf("Enter a line of text: ");
// Read characters until newline or buffer full
while (i < 99 && (ch = getchar()) != '\n' && ch != EOF) {
line[i++] = ch;
}
line[i] = '\0'; // Null terminate
printf("You entered: %s\n", line);
return 0;
}
Essential String Functions
C provides a rich set of string functions in the <string.h> header.
1. strlen() - Get String Length
#include <stdio.h>
#include <string.h>
int main() {
char str1[] = "Hello";
char str2[] = "Hello, World!";
char str3[] = "";
printf("Length of '%s': %lu\n", str1, strlen(str1)); // 5
printf("Length of '%s': %lu\n", str2, strlen(str2)); // 13
printf("Length of empty string: %lu\n", strlen(str3)); // 0
// sizeof vs strlen
printf("\n--- sizeof vs strlen ---\n");
printf("sizeof(str1): %lu\n", sizeof(str1)); // 6 (includes '\0')
printf("strlen(str1): %lu\n", strlen(str1)); // 5 (characters only)
return 0;
}
2. strcpy() and strncpy() - Copy Strings
#include <stdio.h>
#include <string.h>
int main() {
char source[] = "Hello, World!";
char destination1[50];
char destination2[50];
char destination3[10]; // Too small!
// strcpy - simple but dangerous (no bounds checking)
strcpy(destination1, source);
printf("strcpy: %s\n", destination1);
// strncpy - safer with size limit
strncpy(destination2, source, sizeof(destination2) - 1);
destination2[sizeof(destination2) - 1] = '\0'; // Ensure null termination
printf("strncpy: %s\n", destination2);
// DANGER: Buffer overflow!
// strcpy(destination3, source); // WRONG! Will overflow
// Safe copy with size limit
strncpy(destination3, source, sizeof(destination3) - 1);
destination3[sizeof(destination3) - 1] = '\0';
printf("Truncated: %s\n", destination3); // "Hello, Wo"
return 0;
}
3. strcat() and strncat() - Concatenate Strings
#include <stdio.h>
#include <string.h>
int main() {
char str1[50] = "Hello";
char str2[] = ", World!";
char str3[50] = "Hello";
char str4[] = " and welcome!";
// strcat - concatenate (dangerous without bounds)
strcat(str1, str2);
printf("strcat result: %s\n", str1);
// strncat - safer with size limit
strncat(str3, str4, sizeof(str3) - strlen(str3) - 1);
printf("strncat result: %s\n", str3);
// Building a string incrementally
char buffer[100] = "";
strncat(buffer, "Hello", sizeof(buffer) - 1);
strncat(buffer, " ", sizeof(buffer) - strlen(buffer) - 1);
strncat(buffer, "World", sizeof(buffer) - strlen(buffer) - 1);
strncat(buffer, "!", sizeof(buffer) - strlen(buffer) - 1);
printf("Built string: %s\n", buffer);
return 0;
}
4. strcmp() and strncmp() - Compare Strings
#include <stdio.h>
#include <string.h>
int main() {
char str1[] = "apple";
char str2[] = "apple";
char str3[] = "banana";
char str4[] = "Apple";
// strcmp - compare entire strings
printf("strcmp(\"%s\", \"%s\") = %d\n", str1, str2, strcmp(str1, str2)); // 0 (equal)
printf("strcmp(\"%s\", \"%s\") = %d\n", str1, str3, strcmp(str1, str3)); // negative (a < b)
printf("strcmp(\"%s\", \"%s\") = %d\n", str3, str1, strcmp(str3, str1)); // positive (b > a)
// Case sensitivity
printf("strcmp(\"%s\", \"%s\") = %d\n", str1, str4, strcmp(str1, str4)); // Not zero (case matters)
// strncmp - compare first n characters
printf("strncmp(\"%s\", \"%s\", 3) = %d\n", str1, str4, strncmp(str1, str4, 3)); // 0? No, case still matters
// Case-insensitive comparison (not standard)
#ifdef _WIN32
printf("stricmp: %d\n", _stricmp(str1, str4)); // Windows
#else
// On Linux/Unix, use strcasecmp
#include <strings.h>
printf("strcasecmp: %d\n", strcasecmp(str1, str4)); // 0 (case-insensitive)
#endif
return 0;
}
5. strchr() and strrchr() - Find Characters
#include <stdio.h>
#include <string.h>
int main() {
const char *str = "Hello, World!";
char ch = 'o';
// Find first occurrence
char *first = strchr(str, ch);
if (first != NULL) {
printf("First '%c' found at position: %ld\n", ch, first - str);
printf("From first '%c': %s\n", ch, first);
}
// Find last occurrence
char *last = strrchr(str, ch);
if (last != NULL) {
printf("Last '%c' found at position: %ld\n", ch, last - str);
printf("From last '%c': %s\n", ch, last);
}
// Find a character not in string
char *notFound = strchr(str, 'z');
if (notFound == NULL) {
printf("Character 'z' not found\n");
}
return 0;
}
6. strstr() - Find Substring
#include <stdio.h>
#include <string.h>
int main() {
const char *text = "The quick brown fox jumps over the lazy dog";
const char *substring = "fox";
// Find first occurrence of substring
char *found = strstr(text, substring);
if (found != NULL) {
printf("Found '%s' at position: %ld\n", substring, found - text);
printf("Context: ...%s...\n", found);
} else {
printf("'%s' not found\n", substring);
}
// Multiple occurrences
char *ptr = text;
int count = 0;
while ((ptr = strstr(ptr, "the")) != NULL) {
count++;
printf("Found 'the' at position: %ld\n", ptr - text);
ptr++; // Move past this occurrence
}
printf("Total occurrences of 'the': %d\n", count);
return 0;
}
Common String Operations
1. Converting String Case
#include <stdio.h>
#include <ctype.h>
#include <string.h>
void toUpperCase(char *str) {
for (int i = 0; str[i] != '\0'; i++) {
str[i] = toupper(str[i]);
}
}
void toLowerCase(char *str) {
for (int i = 0; str[i] != '\0'; i++) {
str[i] = tolower(str[i]);
}
}
int main() {
char text[] = "Hello, World!";
printf("Original: %s\n", text);
toUpperCase(text);
printf("Uppercase: %s\n", text);
toLowerCase(text);
printf("Lowercase: %s\n", text);
return 0;
}
2. Trimming Whitespace
#include <stdio.h>
#include <string.h>
#include <ctype.h>
char *ltrim(char *str) {
while (isspace((unsigned char)*str)) {
str++;
}
return str;
}
char *rtrim(char *str) {
char *end = str + strlen(str) - 1;
while (end > str && isspace((unsigned char)*end)) {
end--;
}
*(end + 1) = '\0';
return str;
}
char *trim(char *str) {
str = ltrim(str);
return rtrim(str);
}
int main() {
char str[] = " Hello, World! ";
printf("Original: '%s'\n", str);
printf("Length: %lu\n", strlen(str));
char *trimmed = trim(str); // Modifies original
printf("Trimmed: '%s'\n", trimmed);
printf("Length: %lu\n", strlen(trimmed));
return 0;
}
3. Tokenizing Strings (Splitting)
#include <stdio.h>
#include <string.h>
int main() {
char str[] = "apple,banana,orange,grape,kiwi";
char *token;
// strtok modifies the original string!
printf("Original: %s\n", str);
printf("Tokens:\n");
token = strtok(str, ",");
while (token != NULL) {
printf(" - %s\n", token);
token = strtok(NULL, ",");
}
// Original string is now modified
printf("After strtok: %s\n", str);
// Multiple delimiters
char text[] = "one,two;three:four five";
char delimiters[] = ",;: ";
printf("\nMultiple delimiters:\n");
token = strtok(text, delimiters);
while (token != NULL) {
printf(" - %s\n", token);
token = strtok(NULL, delimiters);
}
return 0;
}
4. Reversing a String
#include <stdio.h>
#include <string.h>
void reverse(char *str) {
int length = strlen(str);
int i, j;
char temp;
for (i = 0, j = length - 1; i < j; i++, j--) {
temp = str[i];
str[i] = str[j];
str[j] = temp;
}
}
int main() {
char str[] = "Hello, World!";
printf("Original: %s\n", str);
reverse(str);
printf("Reversed: %s\n", str);
return 0;
}
5. Checking for Palindromes
#include <stdio.h>
#include <string.h>
#include <ctype.h>
int isPalindrome(const char *str) {
int left = 0;
int right = strlen(str) - 1;
while (left < right) {
// Skip non-alphanumeric characters
if (!isalnum(str[left])) {
left++;
continue;
}
if (!isalnum(str[right])) {
right--;
continue;
}
// Case-insensitive comparison
if (tolower(str[left]) != tolower(str[right])) {
return 0; // Not a palindrome
}
left++;
right--;
}
return 1; // Is a palindrome
}
int main() {
const char *tests[] = {
"racecar",
"A man, a plan, a canal: Panama",
"hello",
"Madam",
"12321"
};
for (int i = 0; i < 5; i++) {
printf("'%s' is %s palindrome\n",
tests[i],
isPalindrome(tests[i]) ? "a" : "not a");
}
return 0;
}
Arrays of Strings
1. 2D Character Array
#include <stdio.h>
#include <string.h>
int main() {
// Array of strings - 2D character array
char fruits[5][20] = {
"Apple",
"Banana",
"Orange",
"Grape",
"Kiwi"
};
printf("Fruits:\n");
for (int i = 0; i < 5; i++) {
printf(" %d: %s (length: %lu)\n", i + 1, fruits[i], strlen(fruits[i]));
}
// Modify a string
strcpy(fruits[2], "Strawberry");
printf("\nAfter change:\n");
printf(" fruit[2]: %s\n", fruits[2]);
return 0;
}
2. Array of Pointers to Strings
#include <stdio.h>
int main() {
// Array of pointers to string literals (read-only)
const char *colors[] = {
"Red",
"Green",
"Blue",
"Yellow",
"Purple"
};
int numColors = sizeof(colors) / sizeof(colors[0]);
printf("Colors:\n");
for (int i = 0; i < numColors; i++) {
printf(" %d: %s\n", i + 1, colors[i]);
}
// Reassign pointer (OK) but cannot modify literals
colors[1] = "Cyan"; // OK - pointer reassignment
// colors[1][0] = 'c'; // WRONG - modifying literal
printf("\nAfter reassignment:\n");
for (int i = 0; i < numColors; i++) {
printf(" %d: %s\n", i + 1, colors[i]);
}
return 0;
}
3. Sorting an Array of Strings
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
// Comparison function for qsort
int compareStrings(const void *a, const void *b) {
return strcmp(*(const char **)a, *(const char **)b);
}
int main() {
const char *words[] = {
"banana",
"apple",
"grape",
"cherry",
"date"
};
int numWords = sizeof(words) / sizeof(words[0]);
printf("Original:\n");
for (int i = 0; i < numWords; i++) {
printf(" %s\n", words[i]);
}
// Sort the array
qsort(words, numWords, sizeof(char *), compareStrings);
printf("\nSorted:\n");
for (int i = 0; i < numWords; i++) {
printf(" %s\n", words[i]);
}
return 0;
}
Dynamic String Allocation
1. Basic Dynamic Strings
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main() {
// Allocate memory for a string
char *dynamic = malloc(50 * sizeof(char));
if (dynamic == NULL) {
fprintf(stderr, "Memory allocation failed\n");
return 1;
}
// Use the allocated memory
strcpy(dynamic, "Hello, dynamically allocated string!");
printf("Dynamic string: %s\n", dynamic);
printf("Length: %lu\n", strlen(dynamic));
// Resize if needed
char *resized = realloc(dynamic, 100 * sizeof(char));
if (resized != NULL) {
dynamic = resized;
strcat(dynamic, " This is additional text.");
printf("After resize: %s\n", dynamic);
}
// Always free allocated memory
free(dynamic);
return 0;
}
2. Function Returning Dynamically Allocated String
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *duplicateString(const char *source) {
if (source == NULL) {
return NULL;
}
size_t len = strlen(source) + 1; // +1 for null terminator
char *copy = malloc(len);
if (copy != NULL) {
strcpy(copy, source);
}
return copy; // Caller must free()
}
char *concatenateStrings(const char *s1, const char *s2) {
if (s1 == NULL || s2 == NULL) {
return NULL;
}
size_t len1 = strlen(s1);
size_t len2 = strlen(s2);
char *result = malloc(len1 + len2 + 1); // +1 for null terminator
if (result != NULL) {
strcpy(result, s1);
strcat(result, s2);
}
return result;
}
int main() {
char *copy = duplicateString("Hello, World!");
if (copy != NULL) {
printf("Copy: %s\n", copy);
free(copy);
}
char *combined = concatenateStrings("Hello, ", "Dynamic World!");
if (combined != NULL) {
printf("Combined: %s\n", combined);
free(combined);
}
return 0;
}
3. Building Strings with sprintf()
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *formatString(const char *name, int age, double height) {
// First, determine required size
int size = snprintf(NULL, 0, "Name: %s, Age: %d, Height: %.2f",
name, age, height);
if (size < 0) {
return NULL;
}
// Allocate exact size needed (+1 for null terminator)
char *result = malloc(size + 1);
if (result != NULL) {
sprintf(result, "Name: %s, Age: %d, Height: %.2f",
name, age, height);
}
return result;
}
int main() {
char *info = formatString("Alice", 25, 1.75);
if (info != NULL) {
printf("Formatted: %s\n", info);
free(info);
}
return 0;
}
Common Pitfalls and Best Practices
Pitfall 1: Buffer Overflow
#include <stdio.h>
#include <string.h>
int main() {
char buffer[10];
// DANGEROUS - buffer overflow
// strcpy(buffer, "This string is way too long for the buffer");
// SAFE - using strncpy
strncpy(buffer, "Hello", sizeof(buffer) - 1);
buffer[sizeof(buffer) - 1] = '\0'; // Ensure null termination
printf("Buffer: %s\n", buffer);
// Even safer with snprintf
snprintf(buffer, sizeof(buffer), "%s", "This is a test");
printf("Buffer: %s\n", buffer);
return 0;
}
Pitfall 2: Forgetting Null Terminator
#include <stdio.h>
#include <string.h>
int main() {
char str[5];
str[0] = 'H';
str[1] = 'e';
str[2] = 'l';
str[3] = 'l';
str[4] = 'o';
// str[5] = '\0'; // MISSING - no null terminator!
// Undefined behavior - will keep reading until finds a null
// printf("%s\n", str); // DANGEROUS!
// Correct way
char correct[6] = "Hello"; // Automatically adds '\0'
printf("Correct: %s\n", correct);
return 0;
}
Pitfall 3: Comparing Strings with ==
#include <stdio.h>
#include <string.h>
int main() {
char str1[] = "Hello";
char str2[] = "Hello";
const char *str3 = "Hello";
const char *str4 = "Hello";
// WRONG - compares addresses, not content
if (str1 == str2) {
printf("str1 == str2 (WRONG - won't print)\n");
}
// WRONG - still compares addresses
if (str1 == str3) {
printf("str1 == str3 (WRONG - won't print)\n");
}
// RIGHT - use strcmp
if (strcmp(str1, str2) == 0) {
printf("str1 equals str2 (CORRECT)\n");
}
// String literals may share memory (compiler-dependent)
if (str3 == str4) {
printf("str3 == str4 (may be true due to string pooling)\n");
}
// But still use strcmp for reliable comparison
if (strcmp(str3, str4) == 0) {
printf("str3 equals str4 (CORRECT)\n");
}
return 0;
}
Pitfall 4: Modifying String Literals
#include <stdio.h>
#include <string.h>
int main() {
char *str = "Hello"; // Points to read-only memory
// Undefined behavior - may crash
// str[0] = 'h'; // WRONG!
// Correct - make a mutable copy
char mutable[] = "Hello";
mutable[0] = 'h';
printf("Mutable: %s\n", mutable);
return 0;
}
Pitfall 5: Memory Leaks
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *createString() {
char *str = malloc(100);
strcpy(str, "Dynamically allocated");
return str;
}
int main() {
char *str = createString();
printf("String: %s\n", str);
// Don't forget to free!
free(str);
// Memory leak example
for (int i = 0; i < 10; i++) {
char *leak = malloc(1000);
strcpy(leak, "Leaked memory");
// No free() - memory leak!
}
return 0;
}
Best Practices Summary
| Practice | Why | Example |
|---|---|---|
| Always null-terminate | String functions rely on '\0' | buffer[sizeof(buffer)-1] = '\0' |
| Use bounds-checked functions | Prevent buffer overflows | strncpy(), strncat(), snprintf() |
| Check return values | Detect allocation failures | if (ptr != NULL) |
| Free dynamic memory | Prevent memory leaks | free(ptr) after malloc() |
Use const for literals | Prevent accidental modification | const char *msg = "Hello" |
Use strcmp() for comparison | Compare content, not addresses | if (strcmp(s1, s2) == 0) |
| Validate input | Prevent security issues | Check length, content before use |
| Document string ownership | Clarify who frees memory | Comments on allocation/deallocation |
Performance Considerations
1. Avoid Repeated strlen() in Loops
// Inefficient - calls strlen each iteration
for (int i = 0; i < strlen(str); i++) {
process(str[i]);
}
// Efficient - call once
size_t len = strlen(str);
for (int i = 0; i < len; i++) {
process(str[i]);
}
2. Build Strings Efficiently
// Inefficient - multiple reallocations
char *result = malloc(1);
*result = '\0';
for (int i = 0; i < 100; i++) {
char temp[20];
sprintf(temp, "%d", i);
result = realloc(result, strlen(result) + strlen(temp) + 1);
strcat(result, temp);
}
// Better - pre-calculate size or use progressive buffer
char buffer[1024];
int pos = 0;
for (int i = 0; i < 100; i++) {
pos += sprintf(buffer + pos, "%d", i);
}
3. Use strtok with Caution
// strtok modifies original string - make a copy if needed
char *copy = malloc(strlen(original) + 1);
strcpy(copy, original);
char *token = strtok(copy, ",");
while (token) {
// Process token
token = strtok(NULL, ",");
}
free(copy); // Original unchanged
Conclusion
Strings in C are both powerful and perilous. The manual memory management and null-terminated design give programmers unprecedented control but demand careful attention to details that higher-level languages handle automatically.
Key takeaways:
- Strings are arrays of characters terminated by '\0'
- Two main forms: character arrays (mutable) and string literals (read-only)
- Always ensure buffers are large enough and null-terminated
- Use safe functions (
strncpy,strncat,snprintf) instead of unsafe ones - String functions are in
<string.h> - Dynamic strings require manual allocation and deallocation
- String literals are read-only and may be shared by the compiler
- Always use
strcmp()for comparison, never==
Mastering C strings is essential for systems programming, embedded development, and any application where performance and control are paramount. While the learning curve is steeper than in higher-level languages, the understanding gained provides insight into how all programming languages ultimately handle text at the memory level. With careful attention to the practices outlined in this guide, you can write robust, secure, and efficient string-handling code in C.