Markdown to HTML Converter in Java

A comprehensive and extensible Markdown to HTML converter implementation in Java with support for CommonMark specification and GitHub Flavored Markdown (GFM) features.

Table of Contents

Complete Implementation

1. Main Converter Class

package com.markdownconverter;
import java.util.*;
import java.util.regex.*;
import java.io.*;
import java.nio.file.*;
/**
* Advanced Markdown to HTML Converter
* Supports CommonMark specification with GFM extensions
*/
public class MarkdownToHtmlConverter {
private static final Map<String, String> HTML_ENTITIES = Map.of(
"&", "&amp;",
"<", "&lt;",
">", "&gt;",
"\"", "&quot;",
"'", "&#39;"
);
private boolean supportGFM = true;
private boolean sanitizeHTML = true;
private boolean allowRawHTML = false;
public MarkdownToHtmlConverter() {}
public MarkdownToHtmlConverter(boolean supportGFM, boolean sanitizeHTML, boolean allowRawHTML) {
this.supportGFM = supportGFM;
this.sanitizeHTML = sanitizeHTML;
this.allowRawHTML = allowRawHTML;
}
/**
* Converts Markdown text to HTML
*/
public String convert(String markdown) {
if (markdown == null || markdown.trim().isEmpty()) {
return "";
}
String[] lines = markdown.split("\r?\n");
StringBuilder html = new StringBuilder();
List<String> processedLines = new ArrayList<>();
// First pass: process blocks
processedLines.addAll(processBlocks(lines));
// Second pass: join and process inline elements
String processedContent = String.join("\n", processedLines);
processedContent = processInlineElements(processedContent);
html.append(processedContent);
return html.toString();
}
/**
* Converts a Markdown file to HTML file
*/
public void convertFile(String inputFile, String outputFile) throws IOException {
String markdown = Files.readString(Paths.get(inputFile));
String html = convert(markdown);
Files.writeString(Paths.get(outputFile), html);
}
/**
* Process block-level elements
*/
private List<String> processBlocks(String[] lines) {
List<String> result = new ArrayList<>();
int i = 0;
while (i < lines.length) {
String line = lines[i];
if (isHorizontalRule(line)) {
result.add("<hr>");
i++;
} else if (isHeading(line)) {
i = processHeading(lines, i, result);
} else if (isBlockquote(line)) {
i = processBlockquote(lines, i, result);
} else if (isCodeBlock(line)) {
i = processCodeBlock(lines, i, result);
} else if (isTable(line) && supportGFM) {
i = processTable(lines, i, result);
} else if (isList(line)) {
i = processList(lines, i, result);
} else {
i = processParagraph(lines, i, result);
}
}
return result;
}
}

2. Block Element Processing

    /**
* Process headings (#, ##, ###, etc.)
*/
private int processHeading(String[] lines, int index, List<String> result) {
String line = lines[index];
Pattern headingPattern = Pattern.compile("^(#{1,6})\\s+(.*)$");
Matcher matcher = headingPattern.matcher(line);
if (matcher.find()) {
int level = matcher.group(1).length();
String text = matcher.group(2).trim();
String id = generateHeadingId(text);
result.add(String.format("<h%d id=\"%s\">%s</h%d>", level, id, text, level));
return index + 1;
}
// Alternative syntax (underlined)
if (index + 1 < lines.length) {
String nextLine = lines[index + 1];
if (nextLine.matches("^=+$")) {
result.add(String.format("<h1>%s</h1>", line.trim()));
return index + 2;
} else if (nextLine.matches("^-+$")) {
result.add(String.format("<h2>%s</h2>", line.trim()));
return index + 2;
}
}
return processParagraph(lines, index, result);
}
/**
* Process paragraphs
*/
private int processParagraph(String[] lines, int index, List<String> result) {
StringBuilder paragraph = new StringBuilder();
int i = index;
while (i < lines.length && !isBlockElement(lines[i])) {
if (!lines[i].trim().isEmpty()) {
if (paragraph.length() > 0) paragraph.append(" ");
paragraph.append(lines[i].trim());
}
i++;
}
if (paragraph.length() > 0) {
result.add("<p>" + paragraph.toString() + "</p>");
}
return i;
}
/**
* Process code blocks (```)
*/
private int processCodeBlock(String[] lines, int index, List<String> result) {
String line = lines[index];
if (!line.trim().startsWith("```")) {
return processParagraph(lines, index, result);
}
// Extract language if specified
String language = "";
if (line.trim().length() > 3) {
language = line.trim().substring(3).trim();
}
StringBuilder codeContent = new StringBuilder();
int i = index + 1;
// Read until closing ```
while (i < lines.length && !lines[i].trim().equals("```")) {
codeContent.append(escapeHtml(lines[i])).append("\n");
i++;
}
if (i < lines.length) {
i++; // Skip the closing ```
}
String codeClass = language.isEmpty() ? "" : String.format(" class=\"language-%s\"", language);
result.add(String.format("<pre><code%s>%s</code></pre>", codeClass, codeContent.toString().trim()));
return i;
}
/**
* Process blockquotes (>)
*/
private int processBlockquote(String[] lines, int index, List<String> result) {
StringBuilder blockquote = new StringBuilder();
int i = index;
while (i < lines.length && (isBlockquote(lines[i]) || lines[i].trim().isEmpty())) {
String line = lines[i];
if (isBlockquote(line)) {
String content = line.replaceFirst("^>\\s*", "");
blockquote.append(content.trim()).append("\n");
}
i++;
}
// Recursively process the blockquote content
String innerHtml = convert(blockquote.toString().trim());
innerHtml = innerHtml.replaceAll("^<p>|</p>$", ""); // Remove wrapping <p> tags
result.add("<blockquote>" + innerHtml + "</blockquote>");
return i;
}

3. List Processing

    /**
* Process ordered and unordered lists
*/
private int processList(String[] lines, int index, List<String> result) {
String line = lines[index];
boolean isOrdered = isOrderedList(line);
StringBuilder listHtml = new StringBuilder();
List<ListItem> items = parseListItems(lines, index);
if (isOrdered) {
listHtml.append("<ol>\n");
} else {
listHtml.append("<ul>\n");
}
for (ListItem item : items) {
listHtml.append("  <li>");
// Process nested content
String content = item.content.trim();
if (content.contains("\n")) {
// Multi-line list item - need to process as markdown
content = convert(content);
content = content.replaceAll("^<p>|</p>$", "");
} else {
content = processInlineElements(content);
}
listHtml.append(content);
listHtml.append("</li>\n");
}
if (isOrdered) {
listHtml.append("</ol>");
} else {
listHtml.append("</ul>");
}
result.add(listHtml.toString());
return index + items.size();
}
private List<ListItem> parseListItems(String[] lines, int startIndex) {
List<ListItem> items = new ArrayList<>();
int i = startIndex;
while (i < lines.length && isList(lines[i])) {
String line = lines[i];
int indent = getIndentationLevel(line);
String content = line.replaceFirst("^\\s*([*+-]|\\d+\\.)\\s+", "");
// Check for multi-line list items
StringBuilder itemContent = new StringBuilder(content);
i++;
while (i < lines.length && !isList(lines[i]) && !lines[i].trim().isEmpty()) {
// Continuation line
itemContent.append(" ").append(lines[i].trim());
i++;
}
items.add(new ListItem(indent, itemContent.toString()));
}
return items;
}
private static class ListItem {
int indent;
String content;
ListItem(int indent, String content) {
this.indent = indent;
this.content = content;
}
}

4. Table Processing (GFM)

    /**
* Process GitHub Flavored Markdown tables
*/
private int processTable(String[] lines, int index, List<String> result) {
List<String> tableLines = new ArrayList<>();
int i = index;
// Collect all table rows
while (i < lines.length && isTableRow(lines[i])) {
tableLines.add(lines[i]);
i++;
}
if (tableLines.size() < 2) {
return processParagraph(lines, index, result);
}
StringBuilder tableHtml = new StringBuilder();
tableHtml.append("<table>\n");
// Process header
String headerLine = tableLines.get(0);
tableHtml.append("  <thead>\n    <tr>\n");
String[] headers = parseTableRow(headerLine);
for (String header : headers) {
tableHtml.append("      <th>").append(processInlineElements(header.trim())).append("</th>\n");
}
tableHtml.append("    </tr>\n  </thead>\n");
// Process separator (second line)
if (tableLines.size() > 1) {
String separatorLine = tableLines.get(1);
if (!isTableSeparator(separatorLine)) {
// Invalid table format
return processParagraph(lines, index, result);
}
}
// Process body
if (tableLines.size() > 2) {
tableHtml.append("  <tbody>\n");
for (int j = 2; j < tableLines.size(); j++) {
tableHtml.append("    <tr>\n");
String[] cells = parseTableRow(tableLines.get(j));
for (String cell : cells) {
tableHtml.append("      <td>").append(processInlineElements(cell.trim())).append("</td>\n");
}
tableHtml.append("    </tr>\n");
}
tableHtml.append("  </tbody>\n");
}
tableHtml.append("</table>");
result.add(tableHtml.toString());
return i;
}
private String[] parseTableRow(String line) {
// Split by | but ignore escaped pipes
return Arrays.stream(line.split("(?<!\\\\)\\|"))
.map(cell -> cell.replace("\\|", "|")) // Unescape pipes
.map(String::trim)
.filter(cell -> !cell.isEmpty())
.toArray(String[]::new);
}
private boolean isTableSeparator(String line) {
return line.matches("^\\s*\\|?\\s*:?-+:?\\s*(\\|\\s*:?-+:?\\s*)*\\|?\\s*$");
}

5. Inline Element Processing

    /**
* Process inline elements (bold, italic, links, etc.)
*/
private String processInlineElements(String text) {
if (text == null || text.isEmpty()) {
return "";
}
String result = text;
// Escape HTML entities first
result = escapeHtml(result);
// Process code spans (`code`)
result = processCodeSpans(result);
// Process links [text](url)
result = processLinks(result);
// Process images ![alt](src)
result = processImages(result);
// Process bold and italic
result = processEmphasis(result);
// Process strikethrough (GFM)
if (supportGFM) {
result = processStrikethrough(result);
}
return result;
}
private String processCodeSpans(String text) {
Pattern pattern = Pattern.compile("`([^`]+)`");
Matcher matcher = pattern.matcher(text);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
String code = matcher.group(1);
matcher.appendReplacement(sb, "<code>" + escapeHtml(code) + "</code>");
}
matcher.appendTail(sb);
return sb.toString();
}
private String processLinks(String text) {
// [text](url "title")
Pattern pattern = Pattern.compile("\\[([^\\]]+)\\]\\(([^\\s)]+)(?:\\s+\"([^\"]+)\")?\\)");
Matcher matcher = pattern.matcher(text);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
String linkText = matcher.group(1);
String url = matcher.group(2);
String title = matcher.group(3);
String titleAttr = title != null ? " title=\"" + escapeHtml(title) + "\"" : "";
String replacement = String.format("<a href=\"%s\"%s>%s</a>", 
escapeHtml(url), titleAttr, processInlineElements(linkText));
matcher.appendReplacement(sb, Matcher.quoteReplacement(replacement));
}
matcher.appendTail(sb);
return sb.toString();
}
private String processImages(String text) {
// ![alt](src "title")
Pattern pattern = Pattern.compile("!\\[([^\\]]*)\\]\\(([^\\s)]+)(?:\\s+\"([^\"]+)\")?\\)");
Matcher matcher = pattern.matcher(text);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
String alt = matcher.group(1);
String src = matcher.group(2);
String title = matcher.group(3);
String titleAttr = title != null ? " title=\"" + escapeHtml(title) + "\"" : "";
String replacement = String.format("<img src=\"%s\" alt=\"%s\"%s>", 
escapeHtml(src), escapeHtml(alt), titleAttr);
matcher.appendReplacement(sb, Matcher.quoteReplacement(replacement));
}
matcher.appendTail(sb);
return sb.toString();
}
private String processEmphasis(String text) {
String result = text;
// Process bold first: **bold** or __bold__
result = processEmphasisType(result, "**", "<strong>", "</strong>");
result = processEmphasisType(result, "__", "<strong>", "</strong>");
// Process italic: *italic* or _italic_
result = processEmphasisType(result, "*", "<em>", "</em>");
result = processEmphasisType(result, "_", "<em>", "</em>");
return result;
}
private String processEmphasisType(String text, String delimiter, String openTag, String closeTag) {
Pattern pattern = Pattern.compile(Pattern.quote(delimiter) + "([^" + Pattern.quote(delimiter) + "]+)" + Pattern.quote(delimiter));
Matcher matcher = pattern.matcher(text);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
String content = matcher.group(1);
// Recursively process nested emphasis
content = processInlineElements(content);
matcher.appendReplacement(sb, Matcher.quoteReplacement(openTag + content + closeTag));
}
matcher.appendTail(sb);
return sb.toString();
}
private String processStrikethrough(String text) {
Pattern pattern = Pattern.compile("~~([^~]+)~~");
Matcher matcher = pattern.matcher(text);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
String content = matcher.group(1);
content = processInlineElements(content);
matcher.appendReplacement(sb, Matcher.quoteReplacement("<del>" + content + "</del>"));
}
matcher.appendTail(sb);
return sb.toString();
}

6. Utility Methods

    /**
* Utility methods for element detection
*/
private boolean isBlockElement(String line) {
return isHeading(line) || isBlockquote(line) || isCodeBlock(line) || 
isHorizontalRule(line) || isList(line) || (isTable(line) && supportGFM);
}
private boolean isHeading(String line) {
return line.matches("^#{1,6}\\s+.*") || 
(line.trim().length() > 0 && !line.trim().startsWith("```"));
}
private boolean isBlockquote(String line) {
return line.trim().startsWith(">");
}
private boolean isCodeBlock(String line) {
return line.trim().startsWith("```");
}
private boolean isHorizontalRule(String line) {
return line.matches("^\\s*([*_-]\\s*){3,}\\s*$");
}
private boolean isList(String line) {
return line.matches("^\\s*([*+-]|\\d+\\.)\\s+.*");
}
private boolean isOrderedList(String line) {
return line.matches("^\\s*\\d+\\.\\s+.*");
}
private boolean isTable(String line) {
return line.contains("|");
}
private boolean isTableRow(String line) {
return line.contains("|") && !isTableSeparator(line);
}
private int getIndentationLevel(String line) {
int count = 0;
for (char c : line.toCharArray()) {
if (c == ' ') count++;
else if (c == '\t') count += 4;
else break;
}
return count;
}
private String escapeHtml(String text) {
if (!sanitizeHTML) return text;
String result = text;
for (Map.Entry<String, String> entity : HTML_ENTITIES.entrySet()) {
result = result.replace(entity.getKey(), entity.getValue());
}
return result;
}
private String generateHeadingId(String text) {
return text.toLowerCase()
.replaceAll("[^a-z0-9\\s-]", "")
.replaceAll("\\s+", "-")
.replaceAll("-+", "-")
.replaceAll("^-|-$", "");
}

7. Command Line Interface

/**
* Command Line Interface for Markdown to HTML Converter
*/
public class MarkdownCLI {
public static void main(String[] args) {
if (args.length == 0) {
printHelp();
return;
}
MarkdownToHtmlConverter converter = new MarkdownToHtmlConverter();
boolean gfm = true;
boolean sanitize = true;
for (int i = 0; i < args.length; i++) {
switch (args[i]) {
case "-i":
case "--input":
if (i + 1 < args.length) {
String inputFile = args[++i];
String outputFile = getOutputFile(inputFile, args, i);
try {
converter.convertFile(inputFile, outputFile);
System.out.println("Converted: " + inputFile + " -> " + outputFile);
} catch (IOException e) {
System.err.println("Error converting file: " + e.getMessage());
}
}
break;
case "--no-gfm":
gfm = false;
break;
case "--allow-raw-html":
sanitize = false;
break;
case "-h":
case "--help":
printHelp();
return;
default:
// Treat as direct markdown input
if (!args[i].startsWith("-")) {
String html = converter.convert(args[i]);
System.out.println(html);
}
break;
}
}
}
private static String getOutputFile(String inputFile, String[] args, int currentIndex) {
// Check if output file is specified
if (currentIndex + 2 < args.length && ("-o".equals(args[currentIndex + 1]) || "--output".equals(args[currentIndex + 1]))) {
return args[currentIndex + 2];
}
// Generate output filename
if (inputFile.endsWith(".md")) {
return inputFile.substring(0, inputFile.length() - 3) + ".html";
} else {
return inputFile + ".html";
}
}
private static void printHelp() {
System.out.println("Markdown to HTML Converter");
System.out.println("Usage:");
System.out.println("  java MarkdownCLI -i input.md [-o output.html]");
System.out.println("  java MarkdownCLI \"# Markdown text\"");
System.out.println();
System.out.println("Options:");
System.out.println("  -i, --input FILE     Input Markdown file");
System.out.println("  -o, --output FILE    Output HTML file (default: input file with .html extension)");
System.out.println("  --no-gfm             Disable GitHub Flavored Markdown extensions");
System.out.println("  --allow-raw-html     Allow raw HTML in Markdown (disables sanitization)");
System.out.println("  -h, --help          Show this help message");
}
}

8. Advanced Features Extension

/**
* Extended converter with additional features
*/
public class ExtendedMarkdownConverter extends MarkdownToHtmlConverter {
private boolean supportFootnotes = true;
private boolean supportTaskLists = true;
private boolean supportEmoji = true;
private static final Map<String, String> EMOJI_MAP = Map.of(
":smile:", "😄",
":heart:", "❤️",
":warning:", "⚠️",
":rocket:", "🚀",
":thumbsup:", "👍"
// Add more emojis as needed
);
public ExtendedMarkdownConverter() {
super(true, true, false);
}
@Override
protected String processInlineElements(String text) {
String result = super.processInlineElements(text);
if (supportEmoji) {
result = processEmojis(result);
}
if (supportTaskLists) {
result = processTaskLists(result);
}
return result;
}
private String processEmojis(String text) {
String result = text;
for (Map.Entry<String, String> emoji : EMOJI_MAP.entrySet()) {
result = result.replace(emoji.getKey(), emoji.getValue());
}
return result;
}
private String processTaskLists(String text) {
// Process task lists in list items
Pattern pattern = Pattern.compile("\\[([ xX])\\]\\s*(.*)");
Matcher matcher = pattern.matcher(text);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
String checked = matcher.group(1).trim().equalsIgnoreCase("x") ? " checked" : "";
String task = matcher.group(2);
String replacement = String.format("<input type=\"checkbox\" disabled%s> %s", checked, task);
matcher.appendReplacement(sb, Matcher.quoteReplacement(replacement));
}
matcher.appendTail(sb);
return sb.toString();
}
/**
* Convert with HTML template wrapper
*/
public String convertWithTemplate(String markdown, String title, String cssFile) {
String content = convert(markdown);
return String.format("""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>%s</title>
<link rel="stylesheet" href="%s">
</head>
<body>
<article class="markdown-body">
%s
</article>
</body>
</html>
""", escapeHtml(title), escapeHtml(cssFile), content);
}
}

Usage Examples

Basic Usage

MarkdownToHtmlConverter converter = new MarkdownToHtmlConverter();
String markdown = "# Hello World\n\nThis is **bold** and *italic* text.";
String html = converter.convert(markdown);
System.out.println(html);

File Conversion

MarkdownToHtmlConverter converter = new MarkdownToHtmlConverter();
converter.convertFile("README.md", "README.html");

Command Line Usage

# Convert file
java MarkdownCLI -i README.md -o output.html
# Convert direct text
java MarkdownCLI "# Heading" "This is a paragraph."

Features Supported

Headings (ATX and Setext styles)
Paragraphs and line breaks
Emphasis (bold, italic)
Lists (ordered and unordered)
Links and images
Code blocks and inline code
Blockquotes
Horizontal rules
Tables (GFM)
Strikethrough (GFM)
Task lists (Extended)
Emoji (Extended)
HTML sanitization
File I/O operations

This implementation provides a robust, extensible foundation for Markdown processing that can be easily customized for specific needs.