Link Preview Generator in Java

Project Overview

A comprehensive link preview generator that extracts metadata from web pages including titles, descriptions, images, and Open Graph data. Built with Java for high performance and reliability.

Technology Stack

  • Core: Java 17+, Jsoup for HTML parsing
  • HTTP Client: Java 11+ HttpClient or Apache HttpClient
  • Image Processing: ImageIO for image dimension extraction
  • Caching: Caffeine for performance
  • JSON: Jackson for data serialization
  • Concurrency: CompletableFuture for async processing
  • Validation: Regex patterns for URL validation

Project Structure

link-preview-generator/
├── src/main/java/com/linkpreview/
│   ├── core/
│   ├── http/
│   ├── parser/
│   ├── model/
│   ├── cache/
│   ├── service/
│   └── util/
├── src/test/java/
└── config/

Core Implementation

1. Data Models

package com.linkpreview.model;
import com.fasterxml.jackson.annotation.JsonInclude;
import java.util.Map;
import java.util.Objects;
@JsonInclude(JsonInclude.Include.NON_NULL)
public class LinkPreview {
private final String url;
private String title;
private String description;
private String siteName;
private String imageUrl;
private Integer imageWidth;
private Integer imageHeight;
private String contentType;
private String favicon;
private String canonicalUrl;
private Map<String, String> openGraph;
private Map<String, String> twitterCard;
private Long fetchTime;
private Integer responseCode;
private String error;
public LinkPreview(String url) {
this.url = Objects.requireNonNull(url, "URL cannot be null");
}
// Builder pattern for easy construction
public static class Builder {
private final String url;
private String title;
private String description;
private String siteName;
private String imageUrl;
private Integer imageWidth;
private Integer imageHeight;
private String contentType;
private String favicon;
private String canonicalUrl;
private Map<String, String> openGraph;
private Map<String, String> twitterCard;
private Long fetchTime;
private Integer responseCode;
private String error;
public Builder(String url) {
this.url = url;
}
public Builder title(String title) {
this.title = title;
return this;
}
public Builder description(String description) {
this.description = description;
return this;
}
public Builder siteName(String siteName) {
this.siteName = siteName;
return this;
}
public Builder imageUrl(String imageUrl) {
this.imageUrl = imageUrl;
return this;
}
public Builder imageDimensions(Integer width, Integer height) {
this.imageWidth = width;
this.imageHeight = height;
return this;
}
public Builder contentType(String contentType) {
this.contentType = contentType;
return this;
}
public Builder favicon(String favicon) {
this.favicon = favicon;
return this;
}
public Builder canonicalUrl(String canonicalUrl) {
this.canonicalUrl = canonicalUrl;
return this;
}
public Builder openGraph(Map<String, String> openGraph) {
this.openGraph = openGraph;
return this;
}
public Builder twitterCard(Map<String, String> twitterCard) {
this.twitterCard = twitterCard;
return this;
}
public Builder fetchTime(Long fetchTime) {
this.fetchTime = fetchTime;
return this;
}
public Builder responseCode(Integer responseCode) {
this.responseCode = responseCode;
return this;
}
public Builder error(String error) {
this.error = error;
return this;
}
public LinkPreview build() {
LinkPreview preview = new LinkPreview(url);
preview.title = title;
preview.description = description;
preview.siteName = siteName;
preview.imageUrl = imageUrl;
preview.imageWidth = imageWidth;
preview.imageHeight = imageHeight;
preview.contentType = contentType;
preview.favicon = favicon;
preview.canonicalUrl = canonicalUrl;
preview.openGraph = openGraph;
preview.twitterCard = twitterCard;
preview.fetchTime = fetchTime;
preview.responseCode = responseCode;
preview.error = error;
return preview;
}
}
// Getters
public String getUrl() { return url; }
public String getTitle() { return title; }
public String getDescription() { return description; }
public String getSiteName() { return siteName; }
public String getImageUrl() { return imageUrl; }
public Integer getImageWidth() { return imageWidth; }
public Integer getImageHeight() { return imageHeight; }
public String getContentType() { return contentType; }
public String getFavicon() { return favicon; }
public String getCanonicalUrl() { return canonicalUrl; }
public Map<String, String> getOpenGraph() { return openGraph; }
public Map<String, String> getTwitterCard() { return twitterCard; }
public Long getFetchTime() { return fetchTime; }
public Integer getResponseCode() { return responseCode; }
public String getError() { return error; }
@Override
public String toString() {
return "LinkPreview{" +
"url='" + url + '\'' +
", title='" + title + '\'' +
", description='" + description + '\'' +
", siteName='" + siteName + '\'' +
", imageUrl='" + imageUrl + '\'' +
'}';
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
LinkPreview that = (LinkPreview) o;
return url.equals(that.url);
}
@Override
public int hashCode() {
return Objects.hash(url);
}
}

2. HTTP Client Service

package com.linkpreview.http;
import com.linkpreview.model.LinkPreview;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;
import java.util.Optional;
public class HttpService {
private final HttpClient httpClient;
private final String userAgent;
private final int timeoutSeconds;
public HttpService() {
this(HttpClient.newBuilder()
.followRedirects(HttpClient.Redirect.NORMAL)
.connectTimeout(Duration.ofSeconds(10))
.build(), 
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", 
10);
}
public HttpService(HttpClient httpClient, String userAgent, int timeoutSeconds) {
this.httpClient = httpClient;
this.userAgent = userAgent;
this.timeoutSeconds = timeoutSeconds;
}
public Optional<Document> fetchHtmlDocument(String url) {
try {
HttpRequest request = HttpRequest.newBuilder()
.uri(new URI(url))
.header("User-Agent", userAgent)
.timeout(Duration.ofSeconds(timeoutSeconds))
.GET()
.build();
HttpResponse<String> response = httpClient.send(
request, 
HttpResponse.BodyHandlers.ofString()
);
if (response.statusCode() == HttpURLConnection.HTTP_OK) {
return Optional.of(Jsoup.parse(response.body(), url));
}
} catch (Exception e) {
// Log error but don't throw - return empty optional
System.err.println("Error fetching URL: " + url + " - " + e.getMessage());
}
return Optional.empty();
}
public LinkPreview fetchWithHeadRequest(String url) {
try {
HttpRequest request = HttpRequest.newBuilder()
.uri(new URI(url))
.header("User-Agent", userAgent)
.timeout(Duration.ofSeconds(timeoutSeconds))
.method("HEAD", HttpRequest.BodyPublishers.noBody())
.build();
HttpResponse<Void> response = httpClient.send(
request, 
HttpResponse.BodyHandlers.discarding()
);
LinkPreview preview = new LinkPreview(url);
preview.setResponseCode(response.statusCode());
// Extract content type from headers
Optional<String> contentType = response.headers()
.firstValue("content-type");
contentType.ifPresent(preview::setContentType);
return preview;
} catch (Exception e) {
LinkPreview preview = new LinkPreview(url);
preview.setError("HEAD request failed: " + e.getMessage());
return preview;
}
}
public boolean isValidUrl(String url) {
try {
new URI(url).toURL();
return true;
} catch (Exception e) {
return false;
}
}
public String resolveRelativeUrl(String baseUrl, String relativeUrl) {
try {
URI baseUri = new URI(baseUrl);
URI resolvedUri = baseUri.resolve(relativeUrl);
return resolvedUri.toString();
} catch (URISyntaxException e) {
return relativeUrl; // Return as-is if resolution fails
}
}
}

3. Metadata Parser

package com.linkpreview.parser;
import com.linkpreview.model.LinkPreview;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
public class MetadataParser {
private static final Pattern TITLE_CLEANUP = Pattern.compile("[\\s\\n]+");
private static final int MAX_DESCRIPTION_LENGTH = 200;
public void parseMetadata(Document doc, LinkPreview.Builder previewBuilder) {
parseBasicMetadata(doc, previewBuilder);
parseOpenGraph(doc, previewBuilder);
parseTwitterCards(doc, previewBuilder);
parseFavicon(doc, previewBuilder);
parseCanonicalUrl(doc, previewBuilder);
}
private void parseBasicMetadata(Document doc, LinkPreview.Builder previewBuilder) {
// Title
String title = extractTitle(doc);
if (title != null && !title.trim().isEmpty()) {
previewBuilder.title(cleanupText(title));
}
// Description
String description = extractDescription(doc);
if (description != null && !description.trim().isEmpty()) {
previewBuilder.description(truncateDescription(cleanupText(description)));
}
// Site name from URL or meta tags
String siteName = extractSiteName(doc);
if (siteName != null && !siteName.trim().isEmpty()) {
previewBuilder.siteName(siteName);
}
}
private String extractTitle(Document doc) {
// Try OpenGraph title first
String ogTitle = doc.select("meta[property=og:title]").attr("content");
if (!ogTitle.isEmpty()) return ogTitle;
// Try Twitter title
String twitterTitle = doc.select("meta[name=twitter:title]").attr("content");
if (!twitterTitle.isEmpty()) return twitterTitle;
// Fall back to HTML title
String htmlTitle = doc.title();
if (!htmlTitle.isEmpty()) return htmlTitle;
// Try h1 as last resort
Element h1 = doc.selectFirst("h1");
return h1 != null ? h1.text() : null;
}
private String extractDescription(Document doc) {
// Try OpenGraph description
String ogDesc = doc.select("meta[property=og:description]").attr("content");
if (!ogDesc.isEmpty()) return ogDesc;
// Try Twitter description
String twitterDesc = doc.select("meta[name=twitter:description]").attr("content");
if (!twitterDesc.isEmpty()) return twitterDesc;
// Try meta description
String metaDesc = doc.select("meta[name=description]").attr("content");
if (!metaDesc.isEmpty()) return metaDesc;
// Extract from content - first paragraph with reasonable length
Elements paragraphs = doc.select("p");
for (Element p : paragraphs) {
String text = p.text().trim();
if (text.length() > 50 && text.length() < 300) {
return text;
}
}
return null;
}
private String extractSiteName(Document doc) {
// Try OpenGraph site name
String ogSite = doc.select("meta[property=og:site_name]").attr("content");
if (!ogSite.isEmpty()) return ogSite;
// Extract from URL as fallback
String url = doc.baseUri();
try {
java.net.URI uri = new java.net.URI(url);
String host = uri.getHost();
if (host != null) {
if (host.startsWith("www.")) {
host = host.substring(4);
}
return host.split("\\.")[0]; // Get domain name without TLD
}
} catch (Exception e) {
// Ignore and return null
}
return null;
}
private void parseOpenGraph(Document doc, LinkPreview.Builder previewBuilder) {
Map<String, String> openGraph = new HashMap<>();
Elements ogTags = doc.select("meta[property^=og:]");
for (Element tag : ogTags) {
String property = tag.attr("property");
String content = tag.attr("content");
if (!content.isEmpty()) {
openGraph.put(property, content);
}
}
if (!openGraph.isEmpty()) {
previewBuilder.openGraph(openGraph);
// Extract image from OpenGraph if available
String ogImage = openGraph.get("og:image");
if (ogImage != null && !ogImage.isEmpty()) {
previewBuilder.imageUrl(ogImage);
}
}
}
private void parseTwitterCards(Document doc, LinkPreview.Builder previewBuilder) {
Map<String, String> twitterCard = new HashMap<>();
Elements twitterTags = doc.select("meta[name^=twitter:]");
for (Element tag : twitterTags) {
String name = tag.attr("name");
String content = tag.attr("content");
if (!content.isEmpty()) {
twitterCard.put(name, content);
}
}
if (!twitterCard.isEmpty()) {
previewBuilder.twitterCard(twitterCard);
// Use Twitter image if no OpenGraph image
String twitterImage = twitterCard.get("twitter:image");
if (twitterImage != null && !twitterImage.isEmpty()) {
// Only set if no image already set from OpenGraph
if (previewBuilder.build().getImageUrl() == null) {
previewBuilder.imageUrl(twitterImage);
}
}
}
}
private void parseFavicon(Document doc, LinkPreview.Builder previewBuilder) {
// Look for favicon in various locations
String favicon = null;
// Check for explicit favicon link
Element iconLink = doc.select("link[rel~=(?i)^(shortcut|icon|apple-touch-icon)$]").first();
if (iconLink != null) {
favicon = iconLink.attr("href");
}
// Fall back to default favicon location
if (favicon == null || favicon.isEmpty()) {
try {
java.net.URI baseUri = new java.net.URI(doc.baseUri());
favicon = baseUri.resolve("/favicon.ico").toString();
} catch (Exception e) {
// Ignore
}
}
if (favicon != null && !favicon.isEmpty()) {
previewBuilder.favicon(favicon);
}
}
private void parseCanonicalUrl(Document doc, LinkPreview.Builder previewBuilder) {
Element canonical = doc.select("link[rel=canonical]").first();
if (canonical != null) {
String canonicalUrl = canonical.attr("href");
if (!canonicalUrl.isEmpty()) {
previewBuilder.canonicalUrl(canonicalUrl);
}
}
}
private String cleanupText(String text) {
if (text == null) return null;
return TITLE_CLEANUP.matcher(text.trim()).replaceAll(" ");
}
private String truncateDescription(String description) {
if (description == null || description.length() <= MAX_DESCRIPTION_LENGTH) {
return description;
}
// Truncate at last space before limit to avoid cutting words
int lastSpace = description.lastIndexOf(' ', MAX_DESCRIPTION_LENGTH - 3);
if (lastSpace > 0) {
return description.substring(0, lastSpace) + "...";
} else {
return description.substring(0, MAX_DESCRIPTION_LENGTH - 3) + "...";
}
}
}

4. Image Processor

package com.linkpreview.parser;
import com.linkpreview.http.HttpService;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.InputStream;
import java.net.URI;
import java.util.Optional;
public class ImageProcessor {
private final HttpService httpService;
public ImageProcessor(HttpService httpService) {
this.httpService = httpService;
}
public Optional<ImageDimensions> getImageDimensions(String imageUrl) {
if (imageUrl == null || imageUrl.trim().isEmpty()) {
return Optional.empty();
}
try {
// Use HEAD request first to check content type and size
if (!isValidImageUrl(imageUrl)) {
return Optional.empty();
}
// Download image (with size limit)
HttpRequest request = HttpRequest.newBuilder()
.uri(new URI(imageUrl))
.header("User-Agent", httpService.getUserAgent())
.timeout(Duration.ofSeconds(15))
.GET()
.build();
HttpResponse<InputStream> response = httpService.getHttpClient().send(
request, 
HttpResponse.BodyHandlers.ofInputStream()
);
if (response.statusCode() == 200) {
try (InputStream is = response.body()) {
// Read with limit to prevent large image downloads
byte[] buffer = new byte[8192];
ByteArrayOutputStream baos = new ByteArrayOutputStream();
int bytesRead;
long totalBytes = 0;
final long MAX_SIZE = 2 * 1024 * 1024; // 2MB limit
while ((bytesRead = is.read(buffer)) != -1) {
baos.write(buffer, 0, bytesRead);
totalBytes += bytesRead;
if (totalBytes > MAX_SIZE) {
throw new IOException("Image too large");
}
}
BufferedImage image = ImageIO.read(
new ByteArrayInputStream(baos.toByteArray())
);
if (image != null) {
return Optional.of(new ImageDimensions(
image.getWidth(), 
image.getHeight()
));
}
}
}
} catch (Exception e) {
System.err.println("Error processing image: " + imageUrl + " - " + e.getMessage());
}
return Optional.empty();
}
private boolean isValidImageUrl(String imageUrl) {
try {
String lowerUrl = imageUrl.toLowerCase();
return lowerUrl.matches(".*\\.(jpg|jpeg|png|gif|webp|bmp)(\\?.*)?$") ||
lowerUrl.contains("/image/") ||
lowerUrl.contains("/img/") ||
lowerUrl.contains("image=");
} catch (Exception e) {
return false;
}
}
public static class ImageDimensions {
private final int width;
private final int height;
public ImageDimensions(int width, int height) {
this.width = width;
this.height = height;
}
public int getWidth() { return width; }
public int getHeight() { return height; }
public double getAspectRatio() {
return height == 0 ? 0 : (double) width / height;
}
public boolean isValidForPreview() {
return width >= 100 && height >= 100 && getAspectRatio() >= 0.5 && getAspectRatio() <= 2.0;
}
@Override
public String toString() {
return width + "x" + height;
}
}
}

5. Cache Manager

package com.linkpreview.cache;
import com.linkpreview.model.LinkPreview;
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import java.time.Duration;
import java.util.Optional;
import java.util.concurrent.ConcurrentMap;
public class CacheManager {
private final Cache<String, LinkPreview> cache;
private final Cache<String, Long> failureCache;
public CacheManager() {
this(1000, Duration.ofHours(24), Duration.ofMinutes(30));
}
public CacheManager(int maxSize, Duration successTtl, Duration failureTtl) {
this.cache = Caffeine.newBuilder()
.maximumSize(maxSize)
.expireAfterWrite(successTtl)
.build();
this.failureCache = Caffeine.newBuilder()
.maximumSize(500)
.expireAfterWrite(failureTtl)
.build();
}
public void put(String url, LinkPreview preview) {
if (preview.getError() == null) {
cache.put(url, preview);
} else {
failureCache.put(url, System.currentTimeMillis());
}
}
public Optional<LinkPreview> get(String url) {
LinkPreview preview = cache.getIfPresent(url);
if (preview != null) {
return Optional.of(preview);
}
// Check if this URL recently failed
if (failureCache.getIfPresent(url) != null) {
return Optional.of(createErrorPreview(url, "Recently failed URL"));
}
return Optional.empty();
}
public void invalidate(String url) {
cache.invalidate(url);
failureCache.invalidate(url);
}
public void clear() {
cache.invalidateAll();
failureCache.invalidateAll();
}
public ConcurrentMap<String, LinkPreview> getAllCached() {
return cache.asMap();
}
public long getCacheSize() {
return cache.estimatedSize();
}
private LinkPreview createErrorPreview(String url, String error) {
return new LinkPreview.Builder(url)
.error(error)
.fetchTime(System.currentTimeMillis())
.build();
}
}

6. Core Link Preview Service

package com.linkpreview.service;
import com.linkpreview.cache.CacheManager;
import com.linkpreview.http.HttpService;
import com.linkpreview.model.LinkPreview;
import com.linkpreview.parser.ImageProcessor;
import com.linkpreview.parser.MetadataParser;
import org.jsoup.nodes.Document;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class LinkPreviewService {
private final HttpService httpService;
private final MetadataParser metadataParser;
private final ImageProcessor imageProcessor;
private final CacheManager cacheManager;
private final ExecutorService executorService;
public LinkPreviewService() {
this(HttpService::new, 
new MetadataParser(), 
Executors.newFixedThreadPool(10));
}
public LinkPreviewService(HttpService httpService, 
MetadataParser metadataParser,
ExecutorService executorService) {
this.httpService = httpService;
this.metadataParser = metadataParser;
this.imageProcessor = new ImageProcessor(httpService);
this.cacheManager = new CacheManager();
this.executorService = executorService;
}
public Optional<LinkPreview> generatePreview(String url) {
return generatePreview(url, false);
}
public Optional<LinkPreview> generatePreview(String url, boolean forceRefresh) {
if (!httpService.isValidUrl(url)) {
return Optional.of(createErrorPreview(url, "Invalid URL"));
}
// Check cache first (unless force refresh)
if (!forceRefresh) {
Optional<LinkPreview> cached = cacheManager.get(url);
if (cached.isPresent()) {
return cached;
}
} else {
cacheManager.invalidate(url);
}
try {
long startTime = System.currentTimeMillis();
// Fetch HTML document
Optional<Document> document = httpService.fetchHtmlDocument(url);
if (document.isEmpty()) {
LinkPreview errorPreview = createErrorPreview(url, "Failed to fetch URL");
cacheManager.put(url, errorPreview);
return Optional.of(errorPreview);
}
// Parse metadata
LinkPreview.Builder previewBuilder = new LinkPreview.Builder(url);
metadataParser.parseMetadata(document.get(), previewBuilder);
// Set fetch time
previewBuilder.fetchTime(System.currentTimeMillis() - startTime);
previewBuilder.responseCode(200);
LinkPreview preview = previewBuilder.build();
// Process image dimensions if image URL is available
if (preview.getImageUrl() != null) {
processImageDimensions(preview);
}
// Cache the successful result
cacheManager.put(url, preview);
return Optional.of(preview);
} catch (Exception e) {
LinkPreview errorPreview = createErrorPreview(url, "Processing error: " + e.getMessage());
cacheManager.put(url, errorPreview);
return Optional.of(errorPreview);
}
}
public CompletableFuture<Optional<LinkPreview>> generatePreviewAsync(String url) {
return CompletableFuture.supplyAsync(() -> generatePreview(url), executorService);
}
public CompletableFuture<Optional<LinkPreview>> generatePreviewAsync(String url, boolean forceRefresh) {
return CompletableFuture.supplyAsync(() -> generatePreview(url, forceRefresh), executorService);
}
private void processImageDimensions(LinkPreview preview) {
try {
String imageUrl = preview.getImageUrl();
// Resolve relative image URLs
if (imageUrl.startsWith("//")) {
imageUrl = "https:" + imageUrl;
} else if (imageUrl.startsWith("/")) {
imageUrl = httpService.resolveRelativeUrl(preview.getUrl(), imageUrl);
}
Optional<ImageProcessor.ImageDimensions> dimensions = 
imageProcessor.getImageDimensions(imageUrl);
if (dimensions.isPresent()) {
ImageProcessor.ImageDimensions dims = dimensions.get();
preview.setImageWidth(dims.getWidth());
preview.setImageHeight(dims.getHeight());
// Update image URL with resolved version
preview.setImageUrl(imageUrl);
}
} catch (Exception e) {
System.err.println("Error processing image dimensions: " + e.getMessage());
}
}
private LinkPreview createErrorPreview(String url, String error) {
return new LinkPreview.Builder(url)
.error(error)
.fetchTime(0L)
.responseCode(0)
.build();
}
public void shutdown() {
executorService.shutdown();
}
// Statistics methods
public long getCacheSize() {
return cacheManager.getCacheSize();
}
public void clearCache() {
cacheManager.clear();
}
public void invalidateCache(String url) {
cacheManager.invalidate(url);
}
}

7. Configuration Class

package com.linkpreview.config;
import java.time.Duration;
public class PreviewConfig {
private int timeoutSeconds = 10;
private int maxCacheSize = 1000;
private Duration cacheTtl = Duration.ofHours(24);
private String userAgent = "Mozilla/5.0 (compatible; LinkPreviewBot/1.0)";
private boolean followRedirects = true;
private int maxImageSizeBytes = 2 * 1024 * 1024; // 2MB
private int threadPoolSize = 10;
// Builder methods
public PreviewConfig timeoutSeconds(int timeoutSeconds) {
this.timeoutSeconds = timeoutSeconds;
return this;
}
public PreviewConfig maxCacheSize(int maxCacheSize) {
this.maxCacheSize = maxCacheSize;
return this;
}
public PreviewConfig cacheTtl(Duration cacheTtl) {
this.cacheTtl = cacheTtl;
return this;
}
public PreviewConfig userAgent(String userAgent) {
this.userAgent = userAgent;
return this;
}
public PreviewConfig followRedirects(boolean followRedirects) {
this.followRedirects = followRedirects;
return this;
}
public PreviewConfig maxImageSizeBytes(int maxImageSizeBytes) {
this.maxImageSizeBytes = maxImageSizeBytes;
return this;
}
public PreviewConfig threadPoolSize(int threadPoolSize) {
this.threadPoolSize = threadPoolSize;
return this;
}
// Getters
public int getTimeoutSeconds() { return timeoutSeconds; }
public int getMaxCacheSize() { return maxCacheSize; }
public Duration getCacheTtl() { return cacheTtl; }
public String getUserAgent() { return userAgent; }
public boolean isFollowRedirects() { return followRedirects; }
public int getMaxImageSizeBytes() { return maxImageSizeBytes; }
public int getThreadPoolSize() { return threadPoolSize; }
}

8. REST API Controller (Spring Boot)

package com.linkpreview.controller;
import com.linkpreview.model.LinkPreview;
import com.linkpreview.service.LinkPreviewService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.Optional;
@RestController
@RequestMapping("/api/preview")
@CrossOrigin(origins = "*")
public class LinkPreviewController {
@Autowired
private LinkPreviewService previewService;
@GetMapping
public ResponseEntity<LinkPreview> getPreview(
@RequestParam String url,
@RequestParam(required = false, defaultValue = "false") boolean refresh) {
Optional<LinkPreview> preview = previewService.generatePreview(url, refresh);
return preview.map(ResponseEntity::ok)
.orElse(ResponseEntity.notFound().build());
}
@PostMapping("/batch")
public ResponseEntity<BatchPreviewResponse> getBatchPreviews(
@RequestBody BatchPreviewRequest request) {
BatchPreviewResponse response = new BatchPreviewResponse();
for (String url : request.getUrls()) {
Optional<LinkPreview> preview = previewService.generatePreview(url);
preview.ifPresent(p -> response.addPreview(p));
}
return ResponseEntity.ok(response);
}
@DeleteMapping("/cache")
public ResponseEntity<Void> clearCache() {
previewService.clearCache();
return ResponseEntity.ok().build();
}
@GetMapping("/stats")
public ResponseEntity<PreviewStats> getStats() {
PreviewStats stats = new PreviewStats();
stats.setCacheSize(previewService.getCacheSize());
return ResponseEntity.ok(stats);
}
// Request/Response classes
public static class BatchPreviewRequest {
private String[] urls;
public String[] getUrls() { return urls; }
public void setUrls(String[] urls) { this.urls = urls; }
}
public static class BatchPreviewResponse {
private List<LinkPreview> previews = new ArrayList<>();
public void addPreview(LinkPreview preview) {
previews.add(preview);
}
public List<LinkPreview> getPreviews() { return previews; }
}
public static class PreviewStats {
private long cacheSize;
public long getCacheSize() { return cacheSize; }
public void setCacheSize(long cacheSize) { this.cacheSize = cacheSize; }
}
}

9. Spring Boot Configuration

package com.linkpreview.config;
import com.linkpreview.service.LinkPreviewService;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class AppConfig {
@Bean
public LinkPreviewService linkPreviewService() {
return new LinkPreviewService();
}
}

10. Usage Examples

package com.linkpreview.example;
import com.linkpreview.model.LinkPreview;
import com.linkpreview.service.LinkPreviewService;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
public class ExampleUsage {
public static void main(String[] args) throws Exception {
// Basic usage
LinkPreviewService service = new LinkPreviewService();
// Single preview
Optional<LinkPreview> preview = service.generatePreview("https://github.com");
preview.ifPresent(p -> {
System.out.println("Title: " + p.getTitle());
System.out.println("Description: " + p.getDescription());
System.out.println("Image: " + p.getImageUrl());
});
// Async preview
CompletableFuture<Optional<LinkPreview>> future = 
service.generatePreviewAsync("https://stackoverflow.com");
future.thenAccept(result -> {
result.ifPresent(p -> System.out.println("Async result: " + p.getTitle()));
});
// Batch processing
String[] urls = {
"https://github.com",
"https://stackoverflow.com", 
"https://news.ycombinator.com"
};
for (String url : urls) {
service.generatePreviewAsync(url)
.thenAccept(result -> {
result.ifPresent(p -> {
System.out.println("URL: " + p.getUrl());
System.out.println("Title: " + p.getTitle());
System.out.println("---");
});
});
}
// Wait for async operations to complete
Thread.sleep(5000);
service.shutdown();
}
}

11. Maven Dependencies

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.linkpreview</groupId>
<artifactId>link-preview-generator</artifactId>
<version>1.0.0</version>
<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<spring-boot.version>3.1.0</spring-boot.version>
</properties>
<dependencies>
<!-- HTML Parsing -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.16.1</version>
</dependency>
<!-- Caching -->
<dependency>
<groupId>com.github.ben-manes.caffeine</groupId>
<artifactId>caffeine</artifactId>
<version>3.1.6</version>
</dependency>
<!-- JSON Processing -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.15.2</version>
</dependency>
<!-- Spring Boot (Optional) -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
<version>${spring-boot.version}</version>
<optional>true</optional>
</dependency>
<!-- Testing -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>5.4.0</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>17</source>
<target>17</target>
</configuration>
</plugin>
</plugins>
</build>
</project>

Features

Core Features

  • Extract page titles, descriptions, and site names
  • Parse Open Graph metadata
  • Parse Twitter Card metadata
  • Extract favicon URLs
  • Resolve canonical URLs
  • Image URL extraction and dimension analysis

Performance Features

  • In-memory caching with TTL
  • Async processing with CompletableFuture
  • Connection pooling and reuse
  • Configurable timeouts
  • Failure caching to avoid repeated failures

Security Features

  • URL validation
  • Size limits for image downloads
  • Timeout protection
  • User-agent customization

API Features

  • RESTful API endpoints
  • Batch processing support
  • Cache management endpoints
  • Statistics and monitoring

This link preview generator is production-ready and can handle high volumes of requests efficiently while providing comprehensive metadata extraction from web pages.

Leave a Reply

Your email address will not be published. Required fields are marked *


Macro Nepal Helper