String Tokenization in C: Complete Guide

Introduction to String Tokenization

String tokenization is the process of splitting a string into smaller parts called tokens based on delimiters. In C, string tokenization is crucial for parsing input, processing commands, analyzing text, and many other applications.


Tokenization Architecture Overview

String Tokenization Process
├── Input String
│   └── "Hello,World;How:Are|You"
├── Delimiters
│   └── ",;:|"
├── Tokenization
│   ├── Token 1: "Hello"
│   ├── Token 2: "World"
│   ├── Token 3: "How"
│   ├── Token 4: "Are"
│   └── Token 5: "You"
└── Token Storage
├── Array of strings
├── Linked list
└── Dynamic allocation

Standard Library Tokenization

1. strtok() Function

#include <stdio.h>
#include <string.h>
int main() {
char str[] = "Hello,World!How:Are|You";
char delimiters[] = ",!:|";
printf("Original string: %s\n", str);
printf("Delimiters: %s\n", delimiters);
printf("Tokens:\n");
// Get first token
char *token = strtok(str, delimiters);
// Get remaining tokens
while (token != NULL) {
printf("  - %s\n", token);
token = strtok(NULL, delimiters);
}
return 0;
}

Output:

Original string: Hello,World!How:Are|You
Delimiters: ,!:|
Tokens:
- Hello
- World
- How
- Are
- You

2. Multiple Delimiters Example

#include <stdio.h>
#include <string.h>
int main() {
char sentence[] = "The quick-brown fox jumps over the lazy dog.";
char delimiters[] = " -.";  // Space, hyphen, period
printf("Sentence: %s\n", sentence);
printf("Delimiters: '%s'\n", delimiters);
printf("Words:\n");
char *token = strtok(sentence, delimiters);
int wordCount = 0;
while (token != NULL) {
printf("  %2d: %s\n", ++wordCount, token);
token = strtok(NULL, delimiters);
}
printf("\nTotal words: %d\n", wordCount);
return 0;
}

Output:

Sentence: The quick-brown fox jumps over the lazy dog.
Delimiters: ' -.'
Words:
1: The
2: quick
3: brown
4: fox
5: jumps
6: over
7: the
8: lazy
9: dog
Total words: 9

3. strtok_r() - Thread-Safe Version

#include <stdio.h>
#include <string.h>
int main() {
char str[] = "apple,banana,grape,orange";
char *saveptr;  // Save pointer for context
printf("Original: %s\n", str);
printf("Tokens:\n");
char *token = strtok_r(str, ",", &saveptr);
int i = 1;
while (token != NULL) {
printf("  %d: %s\n", i++, token);
token = strtok_r(NULL, ",", &saveptr);
}
return 0;
}

Custom Tokenization Implementation

1. Manual Tokenization with strchr()

#include <stdio.h>
#include <string.h>
#include <stdbool.h>
bool isDelimiter(char c, const char *delimiters) {
for (int i = 0; delimiters[i] != '\0'; i++) {
if (c == delimiters[i]) {
return true;
}
}
return false;
}
void tokenizeManual(const char *str, const char *delimiters) {
int start = 0;
int end = 0;
int length = strlen(str);
printf("Manual tokenization:\n");
while (end <= length) {
if (isDelimiter(str[end], delimiters) || str[end] == '\0') {
if (end > start) {
// Print token
printf("  Token: ");
for (int i = start; i < end; i++) {
putchar(str[i]);
}
printf("\n");
}
start = end + 1;
}
end++;
}
}
int main() {
const char *text = "Hello,World:How|Are-You";
const char *delims = ",:| -";
printf("Text: %s\n", text);
tokenizeManual(text, delims);
return 0;
}

2. Tokenization with Dynamic Array

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
char **tokens;
int count;
int capacity;
} TokenArray;
// Initialize token array
void initTokenArray(TokenArray *ta) {
ta->tokens = NULL;
ta->count = 0;
ta->capacity = 0;
}
// Add token to array
void addToken(TokenArray *ta, const char *token, int length) {
if (ta->count >= ta->capacity) {
ta->capacity = (ta->capacity == 0) ? 10 : ta->capacity * 2;
ta->tokens = realloc(ta->tokens, ta->capacity * sizeof(char*));
}
ta->tokens[ta->count] = malloc(length + 1);
strncpy(ta->tokens[ta->count], token, length);
ta->tokens[ta->count][length] = '\0';
ta->count++;
}
// Free token array
void freeTokenArray(TokenArray *ta) {
for (int i = 0; i < ta->count; i++) {
free(ta->tokens[i]);
}
free(ta->tokens);
ta->tokens = NULL;
ta->count = 0;
ta->capacity = 0;
}
// Tokenize and store in array
void tokenizeToArray(const char *str, const char *delimiters, TokenArray *ta) {
int start = 0;
int end = 0;
int length = strlen(str);
while (end <= length) {
int isDelim = 0;
for (int i = 0; delimiters[i] != '\0'; i++) {
if (str[end] == delimiters[i] || str[end] == '\0') {
isDelim = 1;
break;
}
}
if (isDelim) {
if (end > start) {
addToken(ta, str + start, end - start);
}
start = end + 1;
}
end++;
}
}
int main() {
const char *text = "apple,banana;grape:orange|mango";
const char *delims = ",;:|";
TokenArray ta;
initTokenArray(&ta);
printf("Text: %s\n", text);
tokenizeToArray(text, delims, &ta);
printf("Tokens (%d found):\n", ta.count);
for (int i = 0; i < ta.count; i++) {
printf("  %d: %s\n", i + 1, ta.tokens[i]);
}
freeTokenArray(&ta);
return 0;
}

3. Tokenization with Linked List

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct TokenNode {
char *token;
struct TokenNode *next;
} TokenNode;
typedef struct {
TokenNode *head;
TokenNode *tail;
int count;
} TokenList;
// Initialize token list
void initTokenList(TokenList *list) {
list->head = NULL;
list->tail = NULL;
list->count = 0;
}
// Add token to list
void addTokenToList(TokenList *list, const char *token, int length) {
TokenNode *newNode = malloc(sizeof(TokenNode));
newNode->token = malloc(length + 1);
strncpy(newNode->token, token, length);
newNode->token[length] = '\0';
newNode->next = NULL;
if (list->head == NULL) {
list->head = newNode;
list->tail = newNode;
} else {
list->tail->next = newNode;
list->tail = newNode;
}
list->count++;
}
// Free token list
void freeTokenList(TokenList *list) {
TokenNode *current = list->head;
while (current != NULL) {
TokenNode *temp = current;
current = current->next;
free(temp->token);
free(temp);
}
list->head = NULL;
list->tail = NULL;
list->count = 0;
}
// Print token list
void printTokenList(TokenList *list) {
TokenNode *current = list->head;
int i = 1;
while (current != NULL) {
printf("  %d: %s\n", i++, current->token);
current = current->next;
}
}
// Tokenize to linked list
void tokenizeToList(const char *str, const char *delimiters, TokenList *list) {
int start = 0;
int end = 0;
int length = strlen(str);
while (end <= length) {
int isDelim = 0;
for (int i = 0; delimiters[i] != '\0'; i++) {
if (str[end] == delimiters[i] || str[end] == '\0') {
isDelim = 1;
break;
}
}
if (isDelim) {
if (end > start) {
addTokenToList(list, str + start, end - start);
}
start = end + 1;
}
end++;
}
}
int main() {
const char *text = "red,green;blue:yellow|purple";
const char *delims = ",;:|";
TokenList list;
initTokenList(&list);
printf("Text: %s\n", text);
tokenizeToList(text, delims, &list);
printf("Tokens (%d found):\n", list.count);
printTokenList(&list);
freeTokenList(&list);
return 0;
}

Advanced Tokenization Techniques

1. Quoted String Handling

#include <stdio.h>
#include <string.h>
#include <ctype.h>
void tokenizeWithQuotes(const char *input) {
const char *ptr = input;
char buffer[256];
int inQuotes = 0;
int bufIndex = 0;
printf("Input: %s\n", input);
printf("Tokens:\n");
while (*ptr != '\0') {
if (*ptr == '"') {
if (inQuotes) {
// End of quoted string
buffer[bufIndex] = '\0';
printf("  [QUOTED]: %s\n", buffer);
bufIndex = 0;
inQuotes = 0;
} else {
// Start of quoted string
inQuotes = 1;
}
} else if (isspace(*ptr) && !inQuotes) {
if (bufIndex > 0) {
buffer[bufIndex] = '\0';
printf("  [NORMAL]: %s\n", buffer);
bufIndex = 0;
}
} else {
buffer[bufIndex++] = *ptr;
}
ptr++;
}
// Handle last token
if (bufIndex > 0) {
buffer[bufIndex] = '\0';
printf("  [NORMAL]: %s\n", buffer);
}
}
int main() {
const char *test1 = "hello world \"quoted string\" test";
const char *test2 = "command \"file with spaces.txt\" verbose";
tokenizeWithQuotes(test1);
printf("\n");
tokenizeWithQuotes(test2);
return 0;
}

2. CSV Parsing with Complex Delimiters

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
typedef struct {
char **fields;
int fieldCount;
} CSVRow;
typedef struct {
CSVRow *rows;
int rowCount;
} CSVData;
// Parse CSV line handling quoted fields
CSVRow parseCSVLine(const char *line) {
CSVRow row;
row.fields = NULL;
row.fieldCount = 0;
const char *ptr = line;
char field[1024];
int fieldIndex = 0;
int inQuotes = 0;
while (*ptr != '\0') {
if (*ptr == '"') {
inQuotes = !inQuotes;
} else if (*ptr == ',' && !inQuotes) {
// End of field
field[fieldIndex] = '\0';
row.fieldCount++;
row.fields = realloc(row.fields, row.fieldCount * sizeof(char*));
row.fields[row.fieldCount - 1] = strdup(field);
fieldIndex = 0;
} else {
field[fieldIndex++] = *ptr;
}
ptr++;
}
// Last field
if (fieldIndex > 0 || *line == '\0') {
field[fieldIndex] = '\0';
row.fieldCount++;
row.fields = realloc(row.fields, row.fieldCount * sizeof(char*));
row.fields[row.fieldCount - 1] = strdup(field);
}
return row;
}
// Free CSV row
void freeCSVRow(CSVRow *row) {
for (int i = 0; i < row->fieldCount; i++) {
free(row->fields[i]);
}
free(row->fields);
}
int main() {
const char *csvLine = "John,Doe,\"123 Main St, Apt 4B\",New York,30";
printf("CSV Line: %s\n", csvLine);
CSVRow row = parseCSVLine(csvLine);
printf("Fields (%d):\n", row.fieldCount);
for (int i = 0; i < row.fieldCount; i++) {
printf("  %d: '%s'\n", i + 1, row.fields[i]);
}
freeCSVRow(&row);
return 0;
}

3. Whitespace-Aware Tokenization

#include <stdio.h>
#include <string.h>
#include <ctype.h>
typedef enum {
TOKEN_WORD,
TOKEN_NUMBER,
TOKEN_SYMBOL,
TOKEN_WHITESPACE
} TokenType;
typedef struct {
char *text;
TokenType type;
int start;
int length;
} Token;
void tokenizeWithTypes(const char *input) {
int i = 0;
int tokenStart = 0;
int inToken = 0;
TokenType currentType = TOKEN_WHITESPACE;
printf("Input: '%s'\n", input);
printf("Tokens with types:\n");
while (input[i] != '\0') {
int isSpace = isspace(input[i]);
int isDigit = isdigit(input[i]);
int isAlpha = isalpha(input[i]);
if (!inToken) {
if (!isSpace) {
inToken = 1;
tokenStart = i;
if (isDigit) {
currentType = TOKEN_NUMBER;
} else if (isAlpha) {
currentType = TOKEN_WORD;
} else {
currentType = TOKEN_SYMBOL;
}
}
} else {
int endToken = 0;
if (currentType == TOKEN_NUMBER && !isDigit) {
endToken = 1;
} else if (currentType == TOKEN_WORD && !isAlpha) {
endToken = 1;
} else if (currentType == TOKEN_SYMBOL && (isAlpha || isDigit || isSpace)) {
endToken = 1;
}
if (endToken || isSpace) {
printf("  Type %d: '", currentType);
for (int j = tokenStart; j < i; j++) {
putchar(input[j]);
}
printf("' (pos %d-%d)\n", tokenStart, i - 1);
inToken = 0;
}
}
i++;
}
// Handle last token
if (inToken) {
printf("  Type %d: '", currentType);
for (int j = tokenStart; j < i; j++) {
putchar(input[j]);
}
printf("' (pos %d-%d)\n", tokenStart, i - 1);
}
}
int main() {
const char *test = "abc 123 xyz! 456";
tokenizeWithTypes(test);
return 0;
}

Practical Applications

1. Command Line Parser

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
typedef struct {
char **args;
int argc;
char *inputFile;
char *outputFile;
int background;
} Command;
Command* parseCommand(const char *cmdLine) {
Command *cmd = malloc(sizeof(Command));
cmd->args = NULL;
cmd->argc = 0;
cmd->inputFile = NULL;
cmd->outputFile = NULL;
cmd->background = 0;
char buffer[256];
int bufIndex = 0;
const char *ptr = cmdLine;
while (*ptr != '\0') {
if (isspace(*ptr)) {
if (bufIndex > 0) {
buffer[bufIndex] = '\0';
// Check for redirection
if (strcmp(buffer, "<") == 0) {
// Input redirection - next token is input file
// Skip for simplicity
} else if (strcmp(buffer, ">") == 0) {
// Output redirection - next token is output file
// Skip for simplicity
} else if (strcmp(buffer, "&") == 0) {
cmd->background = 1;
} else {
cmd->argc++;
cmd->args = realloc(cmd->args, cmd->argc * sizeof(char*));
cmd->args[cmd->argc - 1] = strdup(buffer);
}
bufIndex = 0;
}
} else {
buffer[bufIndex++] = *ptr;
}
ptr++;
}
// Last token
if (bufIndex > 0) {
buffer[bufIndex] = '\0';
cmd->argc++;
cmd->args = realloc(cmd->args, cmd->argc * sizeof(char*));
cmd->args[cmd->argc - 1] = strdup(buffer);
}
return cmd;
}
void freeCommand(Command *cmd) {
for (int i = 0; i < cmd->argc; i++) {
free(cmd->args[i]);
}
free(cmd->args);
if (cmd->inputFile) free(cmd->inputFile);
if (cmd->outputFile) free(cmd->outputFile);
free(cmd);
}
int main() {
const char *testCmds[] = {
"ls -la",
"grep pattern file.txt",
"cat file.txt | wc -l",
"sort < input.txt > output.txt &",
NULL
};
for (int i = 0; testCmds[i] != NULL; i++) {
printf("\nCommand: %s\n", testCmds[i]);
Command *cmd = parseCommand(testCmds[i]);
printf("  Arguments (%d):\n", cmd->argc);
for (int j = 0; j < cmd->argc; j++) {
printf("    %d: %s\n", j, cmd->args[j]);
}
printf("  Background: %s\n", cmd->background ? "yes" : "no");
freeCommand(cmd);
}
return 0;
}

2. Configuration File Parser

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
typedef struct {
char *key;
char *value;
} ConfigEntry;
typedef struct {
ConfigEntry *entries;
int count;
} Config;
Config* parseConfigFile(const char *filename) {
Config *config = malloc(sizeof(Config));
config->entries = NULL;
config->count = 0;
FILE *file = fopen(filename, "r");
if (!file) {
printf("Cannot open file: %s\n", filename);
return config;
}
char line[256];
int lineNum = 0;
while (fgets(line, sizeof(line), file)) {
lineNum++;
// Remove trailing newline
line[strcspn(line, "\n")] = '\0';
// Skip empty lines and comments
if (line[0] == '\0' || line[0] == '#') {
continue;
}
// Find delimiter
char *delim = strchr(line, '=');
if (!delim) {
printf("Warning: Invalid line %d: %s\n", lineNum, line);
continue;
}
// Extract key
*delim = '\0';
char *key = line;
char *value = delim + 1;
// Trim whitespace
while (isspace(*key)) key++;
char *end = key + strlen(key) - 1;
while (end > key && isspace(*end)) *end-- = '\0';
while (isspace(*value)) value++;
end = value + strlen(value) - 1;
while (end > value && isspace(*end)) *end-- = '\0';
// Store entry
config->count++;
config->entries = realloc(config->entries, 
config->count * sizeof(ConfigEntry));
config->entries[config->count - 1].key = strdup(key);
config->entries[config->count - 1].value = strdup(value);
}
fclose(file);
return config;
}
void freeConfig(Config *config) {
for (int i = 0; i < config->count; i++) {
free(config->entries[i].key);
free(config->entries[i].value);
}
free(config->entries);
free(config);
}
void printConfig(Config *config) {
printf("Configuration (%d entries):\n", config->count);
for (int i = 0; i < config->count; i++) {
printf("  %s = %s\n", 
config->entries[i].key, 
config->entries[i].value);
}
}
int main() {
// Create a sample config file
FILE *file = fopen("test.conf", "w");
fprintf(file, "# Database configuration\n");
fprintf(file, "host = localhost\n");
fprintf(file, "port = 5432\n");
fprintf(file, "database = mydb\n");
fprintf(file, "username = admin\n");
fprintf(file, "password = secret\n");
fclose(file);
Config *config = parseConfigFile("test.conf");
printConfig(config);
freeConfig(config);
return 0;
}

3. URL Parser

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
typedef struct {
char *protocol;
char *host;
int port;
char *path;
char *query;
char *fragment;
} URL;
URL* parseURL(const char *urlStr) {
URL *url = calloc(1, sizeof(URL));
char *urlCopy = strdup(urlStr);
char *ptr = urlCopy;
// Parse protocol
char *colon = strstr(ptr, "://");
if (colon) {
*colon = '\0';
url->protocol = strdup(ptr);
ptr = colon + 3;
}
// Parse host and port
char *slash = strchr(ptr, '/');
char *hostPort;
if (slash) {
hostPort = strndup(ptr, slash - ptr);
ptr = slash;
} else {
hostPort = strdup(ptr);
ptr = NULL;
}
// Parse host and port
char *portColon = strchr(hostPort, ':');
if (portColon) {
*portColon = '\0';
url->host = strdup(hostPort);
url->port = atoi(portColon + 1);
} else {
url->host = strdup(hostPort);
url->port = (strcmp(url->protocol, "https") == 0) ? 443 : 80;
}
free(hostPort);
// Parse path, query, fragment
if (ptr) {
char *hash = strchr(ptr, '#');
if (hash) {
*hash = '\0';
url->fragment = strdup(hash + 1);
}
char *question = strchr(ptr, '?');
if (question) {
*question = '\0';
url->path = strdup(ptr);
url->query = strdup(question + 1);
} else if (ptr[0] != '\0') {
url->path = strdup(ptr);
}
}
free(urlCopy);
return url;
}
void freeURL(URL *url) {
free(url->protocol);
free(url->host);
free(url->path);
free(url->query);
free(url->fragment);
free(url);
}
void printURL(URL *url) {
printf("URL Components:\n");
printf("  Protocol: %s\n", url->protocol ? url->protocol : "(none)");
printf("  Host: %s\n", url->host ? url->host : "(none)");
printf("  Port: %d\n", url->port);
printf("  Path: %s\n", url->path ? url->path : "(none)");
printf("  Query: %s\n", url->query ? url->query : "(none)");
printf("  Fragment: %s\n", url->fragment ? url->fragment : "(none)");
}
int main() {
const char *urls[] = {
"https://example.com",
"http://localhost:8080/api/users",
"https://google.com/search?q=c+programming#results",
"ftp://ftp.example.com/files/document.txt",
NULL
};
for (int i = 0; urls[i] != NULL; i++) {
printf("\nParsing: %s\n", urls[i]);
URL *url = parseURL(urls[i]);
printURL(url);
freeURL(url);
}
return 0;
}

Performance Comparison

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#define ITERATIONS 100000
#define TEST_STRING "apple,banana;grape:orange|mango,kiwi;pear:plum|cherry"
// Method 1: strtok
void benchmarkStrtok() {
char str[256];
for (int i = 0; i < ITERATIONS; i++) {
strcpy(str, TEST_STRING);
char *token = strtok(str, ",;:|");
while (token != NULL) {
token = strtok(NULL, ",;:|");
}
}
}
// Method 2: Manual tokenization
void benchmarkManual() {
char str[256];
for (int i = 0; i < ITERATIONS; i++) {
strcpy(str, TEST_STRING);
int start = 0;
int end = 0;
int length = strlen(str);
while (end <= length) {
int isDelim = 0;
for (int j = 0; ",;:|"[j] != '\0'; j++) {
if (str[end] == ",;:|"[j] || str[end] == '\0') {
isDelim = 1;
break;
}
}
if (isDelim) {
if (end > start) {
// Token found
}
start = end + 1;
}
end++;
}
}
}
int main() {
clock_t start, end;
double cpu_time_used;
// Benchmark strtok
start = clock();
benchmarkStrtok();
end = clock();
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("strtok: %.3f seconds\n", cpu_time_used);
// Benchmark manual
start = clock();
benchmarkManual();
end = clock();
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("Manual: %.3f seconds\n", cpu_time_used);
return 0;
}

Common Pitfalls and Best Practices

1. strtok Modifies Original String

#include <stdio.h>
#include <string.h>
int main() {
char str[] = "Hello,World";
char *strCopy = strdup(str);  // Make a copy
printf("Original: %s\n", str);
char *token = strtok(str, ",");
while (token) {
printf("Token: %s\n", token);
token = strtok(NULL, ",");
}
printf("After strtok: %s\n", str);  // String is modified!
printf("Copy still intact: %s\n", strCopy);
free(strCopy);
return 0;
}

2. Thread Safety

#include <stdio.h>
#include <string.h>
#include <pthread.h>
void* threadFunction(void *arg) {
char str[] = "apple,banana,grape";
char *saveptr;
char *token = strtok_r(str, ",", &saveptr);
while (token) {
printf("Thread %ld: %s\n", (long)arg, token);
token = strtok_r(NULL, ",", &saveptr);
}
return NULL;
}
int main() {
pthread_t threads[3];
// Create multiple threads using strtok_r (safe)
for (long i = 0; i < 3; i++) {
pthread_create(&threads[i], NULL, threadFunction, (void*)i);
}
for (int i = 0; i < 3; i++) {
pthread_join(threads[i], NULL);
}
return 0;
}

3. Handling Empty Tokens

#include <stdio.h>
#include <string.h>
void tokenizeWithEmpty(const char *str, const char *delim) {
char *strCopy = strdup(str);
char *token;
char *saveptr;
int count = 0;
printf("Input: '%s'\n", str);
printf("Delimiter: '%s'\n", delim);
printf("Tokens:\n");
token = strtok_r(strCopy, delim, &saveptr);
while (token) {
printf("  %d: '%s'\n", ++count, token);
token = strtok_r(NULL, delim, &saveptr);
}
// Note: strtok skips empty tokens
printf("Note: Empty tokens are skipped\n\n");
free(strCopy);
}
int main() {
tokenizeWithEmpty("a,,b,,c", ",");
tokenizeWithEmpty(":a::b:", ":");
return 0;
}

Best Practices Summary

Do's and Don'ts

// DO: Make a copy before using strtok
char *original = strdup(input);
char *token = strtok(original, delim);
// DON'T: Use strtok on string literals
// char *token = strtok("hello,world", ",");  // WRONG - crashes!
// DO: Use strtok_r in multi-threaded code
char *saveptr;
char *token = strtok_r(str, delim, &saveptr);
// DON'T: Assume strtok handles empty tokens
// Empty fields are skipped
// DO: Handle quoted strings properly
// Implement custom parser for CSV with quotes
// DON'T: Use fixed-size buffers without bounds checking
// Always ensure buffer is large enough
// DO: Free dynamically allocated memory
free(original);

Comparison of Tokenization Methods

MethodProsConsBest For
strtok()Simple, fastModifies string, not thread-safeSingle-threaded parsing
strtok_r()Thread-safeStill modifies stringMulti-threaded parsing
strchr() manualNo modification, flexibleMore codeCustom parsing needs
sscanf()Pattern-basedLimited to simple formatsKnown format parsing
Custom parserFull controlComplexComplex formats (CSV with quotes)

Conclusion

String tokenization in C offers multiple approaches:

  1. strtok() - Simple and fast for basic needs
  2. strtok_r() - Thread-safe version
  3. Manual parsing - Full control and flexibility
  4. Custom parsers - Handle complex formats like CSV with quotes

Key Takeaways

  • Always consider whether the original string can be modified
  • Use thread-safe versions in multi-threaded code
  • Handle edge cases like empty strings and quoted content
  • Consider performance requirements for your application
  • Free dynamically allocated memory to prevent leaks

Mastering string tokenization is essential for text processing, command parsing, configuration file handling, and many other common programming tasks in C.

Leave a Reply

Your email address will not be published. Required fields are marked *


Macro Nepal Helper