Natural Language Processing with Stanford CoreNLP in Java

Stanford CoreNLP is a comprehensive natural language processing toolkit that provides robust linguistic analysis for various NLP tasks. This guide covers setup, core components, and practical applications.

Table of Contents

Project Setup and Dependencies

Maven Configuration

<!-- pom.xml -->
<properties>
<corenlp.version>4.5.4</corenlp.version>
</properties>
<dependencies>
<!-- Core Stanford NLP -->
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>${corenlp.version}</version>
</dependency>
<!-- Models for English -->
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>${corenlp.version}</version>
<classifier>models</classifier>
</dependency>
<!-- Additional models -->
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>${corenlp.version}</version>
<classifier>models-english</classifier>
</dependency>
<!-- For JSON output -->
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.10.1</version>
</dependency>
</dependencies>

Gradle Configuration

// build.gradle
dependencies {
implementation 'edu.stanford.nlp:stanford-corenlp:4.5.4'
implementation 'edu.stanford.nlp:stanford-corenlp:4.5.4:models'
implementation 'edu.stanford.nlp:stanford-corenlp:4.5.4:models-english'
implementation 'com.google.code.gson:gson:2.10.1'
}

CoreNLP Pipeline Setup

Basic Pipeline Configuration

package com.example.nlp;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.util.PropertiesUtils;
import java.util.Properties;
public class CoreNLPInitializer {
/**
* Create a basic pipeline with common annotators
*/
public static StanfordCoreNLP createBasicPipeline() {
Properties props = new Properties();
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, sentiment");
props.setProperty("coref.algorithm", "neural");
return new StanfordCoreNLP(props);
}
/**
* Create a pipeline with custom properties
*/
public static StanfordCoreNLP createCustomPipeline(String... annotators) {
Properties props = new Properties();
props.setProperty("annotators", String.join(", ", annotators));
// Additional configuration
props.setProperty("tokenize.language", "en");
props.setProperty("pos.model", "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger");
props.setProperty("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz");
props.setProperty("parse.model", "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
props.setProperty("depparse.model", "edu/stanford/nlp/models/parser/nndep/english_SD.gz");
return new StanfordCoreNLP(props);
}
/**
* Create pipeline for specific use cases
*/
public static StanfordCoreNLP createPipelineForUseCase(String useCase) {
Properties props = new Properties();
switch (useCase.toLowerCase()) {
case "sentiment":
props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
break;
case "ner":
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner");
props.setProperty("ner.applyNumericClassifiers", "true");
props.setProperty("ner.useSUTime", "true");
break;
case "relation":
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, depparse, relation");
break;
case "coreference":
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
break;
default:
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, depparse, coref");
}
return new StanfordCoreNLP(props);
}
}

Basic Text Processing

Tokenization and Sentence Splitting

package com.example.nlp.processors;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.util.CoreMap;
import java.util.ArrayList;
import java.util.List;
public class BasicTextProcessor {
private final StanfordCoreNLP pipeline;
public BasicTextProcessor(StanfordCoreNLP pipeline) {
this.pipeline = pipeline;
}
/**
* Tokenize text into words
*/
public List<String> tokenize(String text) {
List<String> tokens = new ArrayList<>();
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
String word = token.get(CoreAnnotations.TextAnnotation.class);
tokens.add(word);
}
}
return tokens;
}
/**
* Split text into sentences
*/
public List<String> splitSentences(String text) {
List<String> sentences = new ArrayList<>();
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentenceMaps = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentenceMaps) {
sentences.add(sentence.get(CoreAnnotations.TextAnnotation.class));
}
return sentences;
}
/**
* Get detailed token information
*/
public List<TokenInfo> getDetailedTokens(String text) {
List<TokenInfo> tokenInfos = new ArrayList<>();
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
TokenInfo info = new TokenInfo(
token.get(CoreAnnotations.TextAnnotation.class),
token.get(CoreAnnotations.PartOfSpeechAnnotation.class),
token.get(CoreAnnotations.LemmaAnnotation.class),
token.get(CoreAnnotations.NamedEntityTagAnnotation.class),
token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)
);
tokenInfos.add(info);
}
}
return tokenInfos;
}
/**
* Token information container
*/
public static class TokenInfo {
public final String word;
public final String pos;
public final String lemma;
public final String ner;
public final int start;
public final int end;
public TokenInfo(String word, String pos, String lemma, String ner, int start, int end) {
this.word = word;
this.pos = pos;
this.lemma = lemma;
this.ner = ner;
this.start = start;
this.end = end;
}
@Override
public String toString() {
return String.format("Word: %-15s POS: %-5s Lemma: %-15s NER: %-10s [%d-%d]", 
word, pos, lemma, ner, start, end);
}
}
}

Part-of-Speech Tagging

package com.example.nlp.processors;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.trees.Tree;
import java.util.ArrayList;
import java.util.List;
public class POSTagger {
private final StanfordCoreNLP pipeline;
public POSTagger(StanfordCoreNLP pipeline) {
this.pipeline = pipeline;
}
/**
* Get POS tags for text
*/
public List<TaggedWord> tagText(String text) {
List<TaggedWord> taggedWords = new ArrayList<>();
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
String word = token.get(CoreAnnotations.TextAnnotation.class);
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
taggedWords.add(new TaggedWord(word, pos));
}
}
return taggedWords;
}
/**
* Get POS tags with Penn Treebank tags explanation
*/
public List<POSTag> getDetailedPOSTags(String text) {
List<POSTag> posTags = new ArrayList<>();
List<TaggedWord> taggedWords = tagText(text);
for (TaggedWord taggedWord : taggedWords) {
String explanation = getPOSTagExplanation(taggedWord.tag());
posTags.add(new POSTag(taggedWord.word(), taggedWord.tag(), explanation));
}
return posTags;
}
/**
* Common Penn Treebank POS tag explanations
*/
private String getPOSTagExplanation(String tag) {
switch (tag) {
case "NN": case "NNS": return "Noun, singular or mass / plural";
case "NNP": case "NNPS": return "Proper noun, singular / plural";
case "VB": return "Verb, base form";
case "VBD": return "Verb, past tense";
case "VBG": return "Verb, gerund or present participle";
case "VBN": return "Verb, past participle";
case "VBP": return "Verb, non-3rd person singular present";
case "VBZ": return "Verb, 3rd person singular present";
case "JJ": case "JJR": case "JJS": return "Adjective / comparative / superlative";
case "RB": case "RBR": case "RBS": return "Adverb / comparative / superlative";
case "PRP": case "PRP$": return "Personal pronoun / possessive";
case "DT": return "Determiner";
case "IN": return "Preposition or subordinating conjunction";
case "CC": return "Coordinating conjunction";
case "CD": return "Cardinal number";
default: return "Other";
}
}
/**
* POS tag container
*/
public static class POSTag {
public final String word;
public final String tag;
public final String explanation;
public POSTag(String word, String tag, String explanation) {
this.word = word;
this.tag = tag;
this.explanation = explanation;
}
@Override
public String toString() {
return String.format("%-15s %-5s %s", word, tag, explanation);
}
}
/**
* Extract specific POS patterns
*/
public List<String> extractPattern(String text, String... posPattern) {
List<String> matches = new ArrayList<>();
List<TaggedWord> taggedWords = tagText(text);
for (int i = 0; i <= taggedWords.size() - posPattern.length; i++) {
boolean match = true;
for (int j = 0; j < posPattern.length; j++) {
if (!taggedWords.get(i + j).tag().equals(posPattern[j])) {
match = false;
break;
}
}
if (match) {
StringBuilder phrase = new StringBuilder();
for (int j = 0; j < posPattern.length; j++) {
phrase.append(taggedWords.get(i + j).word()).append(" ");
}
matches.add(phrase.toString().trim());
}
}
return matches;
}
}

Named Entity Recognition (NER)

package com.example.nlp.processors;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.util.CoreMap;
import java.util.ArrayList;
import java.util.List;
public class NamedEntityRecognizer {
private final StanfordCoreNLP pipeline;
public NamedEntityRecognizer(StanfordCoreNLP pipeline) {
this.pipeline = pipeline;
}
/**
* Extract named entities from text
*/
public List<NamedEntity> extractEntities(String text) {
List<NamedEntity> entities = new ArrayList<>();
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
NamedEntity currentEntity = null;
for (int i = 0; i < tokens.size(); i++) {
CoreLabel token = tokens.get(i);
String ner = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
String word = token.get(CoreAnnotations.TextAnnotation.class);
if (!"O".equals(ner)) {
if (currentEntity == null || !ner.equals(currentEntity.type)) {
if (currentEntity != null) {
entities.add(currentEntity);
}
currentEntity = new NamedEntity(word, ner, 
token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
} else {
currentEntity.text += " " + word;
}
} else {
if (currentEntity != null) {
currentEntity.end = token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
entities.add(currentEntity);
currentEntity = null;
}
}
}
if (currentEntity != null) {
currentEntity.end = tokens.get(tokens.size() - 1)
.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
entities.add(currentEntity);
}
}
return entities;
}
/**
* Extract entities by type
*/
public List<NamedEntity> extractEntitiesByType(String text, String... entityTypes) {
List<NamedEntity> allEntities = extractEntities(text);
List<NamedEntity> filtered = new ArrayList<>();
for (NamedEntity entity : allEntities) {
for (String type : entityTypes) {
if (entity.type.equals(type)) {
filtered.add(entity);
break;
}
}
}
return filtered;
}
/**
* Get entity statistics
*/
public EntityStatistics getEntityStatistics(String text) {
List<NamedEntity> entities = extractEntities(text);
EntityStatistics stats = new EntityStatistics();
for (NamedEntity entity : entities) {
stats.totalEntities++;
stats.entityCounts.put(entity.type, 
stats.entityCounts.getOrDefault(entity.type, 0) + 1);
}
return stats;
}
/**
* Named entity container
*/
public static class NamedEntity {
public String text;
public String type;
public int start;
public int end;
public NamedEntity(String text, String type, int start) {
this.text = text;
this.type = type;
this.start = start;
}
@Override
public String toString() {
return String.format("%-20s %-10s [%d-%d]", text, type, start, end);
}
}
/**
* Entity statistics container
*/
public static class EntityStatistics {
public int totalEntities = 0;
public java.util.Map<String, Integer> entityCounts = new java.util.HashMap<>();
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Total entities: ").append(totalEntities).append("\n");
for (java.util.Map.Entry<String, Integer> entry : entityCounts.entrySet()) {
sb.append(String.format("%-15s: %d%n", entry.getKey(), entry.getValue()));
}
return sb.toString();
}
}
}

Sentiment Analysis

package com.example.nlp.processors;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
import edu.stanford.nlp.util.CoreMap;
import java.util.ArrayList;
import java.util.List;
public class SentimentAnalyzer {
private final StanfordCoreNLP pipeline;
public SentimentAnalyzer(StanfordCoreNLP pipeline) {
this.pipeline = pipeline;
}
/**
* Analyze sentiment of text
*/
public SentimentResult analyzeSentiment(String text) {
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
SentimentResult result = new SentimentResult();
for (CoreMap sentence : sentences) {
String sentiment = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
SentenceSentiment sentenceResult = new SentenceSentiment(
sentence.get(CoreAnnotations.TextAnnotation.class),
sentiment,
getSentimentScore(sentiment)
);
result.sentences.add(sentenceResult);
result.overallScore += sentenceResult.score;
}
if (!sentences.isEmpty()) {
result.overallScore /= sentences.size();
result.overallSentiment = getOverallSentiment(result.overallScore);
}
return result;
}
/**
* Convert sentiment label to numerical score
*/
private double getSentimentScore(String sentiment) {
switch (sentiment.toLowerCase()) {
case "very positive": return 1.0;
case "positive": return 0.75;
case "neutral": return 0.5;
case "negative": return 0.25;
case "very negative": return 0.0;
default: return 0.5;
}
}
/**
* Convert numerical score to sentiment label
*/
private String getOverallSentiment(double score) {
if (score >= 0.8) return "Very Positive";
if (score >= 0.6) return "Positive";
if (score >= 0.4) return "Neutral";
if (score >= 0.2) return "Negative";
return "Very Negative";
}
/**
* Analyze sentiment over time (for multiple texts)
*/
public List<TimeSeriesSentiment> analyzeSentimentOverTime(List<String> texts, 
List<String> timestamps) {
List<TimeSeriesSentiment> results = new ArrayList<>();
for (int i = 0; i < texts.size(); i++) {
SentimentResult result = analyzeSentiment(texts.get(i));
String timestamp = (i < timestamps.size()) ? timestamps.get(i) : "Time " + (i + 1);
results.add(new TimeSeriesSentiment(timestamp, result.overallScore, 
result.overallSentiment));
}
return results;
}
/**
* Sentiment result container
*/
public static class SentimentResult {
public List<SentenceSentiment> sentences = new ArrayList<>();
public double overallScore = 0.0;
public String overallSentiment = "Neutral";
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Overall Sentiment: ").append(overallSentiment)
.append(" (Score: ").append(String.format("%.2f", overallScore)).append(")\n");
sb.append("Sentence-level analysis:\n");
for (SentenceSentiment sentence : sentences) {
sb.append("  - ").append(sentence).append("\n");
}
return sb.toString();
}
}
/**
* Sentence-level sentiment
*/
public static class SentenceSentiment {
public final String sentence;
public final String sentiment;
public final double score;
public SentenceSentiment(String sentence, String sentiment, double score) {
this.sentence = sentence;
this.sentiment = sentiment;
this.score = score;
}
@Override
public String toString() {
return String.format("%-10s (%.2f): %s", sentiment, score, 
sentence.length() > 50 ? sentence.substring(0, 47) + "..." : sentence);
}
}
/**
* Time series sentiment container
*/
public static class TimeSeriesSentiment {
public final String timestamp;
public final double score;
public final String sentiment;
public TimeSeriesSentiment(String timestamp, double score, String sentiment) {
this.timestamp = timestamp;
this.score = score;
this.sentiment = sentiment;
}
@Override
public String toString() {
return String.format("%s: %s (%.2f)", timestamp, sentiment, score);
}
}
}

Dependency Parsing

package com.example.nlp.processors;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.CoreMap;
import java.util.ArrayList;
import java.util.List;
public class DependencyParser {
private final StanfordCoreNLP pipeline;
public DependencyParser(StanfordCoreNLP pipeline) {
this.pipeline = pipeline;
}
/**
* Get dependency parse for text
*/
public List<DependencyParse> parseDependencies(String text) {
List<DependencyParse> parses = new ArrayList<>();
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
SemanticGraph dependencies = sentence.get(
SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
DependencyParse parse = new DependencyParse(
sentence.get(CoreAnnotations.TextAnnotation.class),
dependencies
);
parses.add(parse);
}
return parses;
}
/**
* Extract specific dependency relations
*/
public List<DependencyRelation> extractRelations(String text, String relationType) {
List<DependencyRelation> relations = new ArrayList<>();
List<DependencyParse> parses = parseDependencies(text);
for (DependencyParse parse : parses) {
for (edu.stanford.nlp.semgraph.SemanticGraphEdge edge : parse.dependencies.edgeListSorted()) {
if (relationType == null || edge.getRelation().toString().equals(relationType)) {
relations.add(new DependencyRelation(
edge.getGovernor().word(),
edge.getDependent().word(),
edge.getRelation().toString(),
parse.sentence
));
}
}
}
return relations;
}
/**
* Find subject-verb-object relations
*/
public List<SVORelation> extractSVORelations(String text) {
List<SVORelation> svoRelations = new ArrayList<>();
List<DependencyParse> parses = parseDependencies(text);
for (DependencyParse parse : parses) {
SemanticGraph dependencies = parse.dependencies;
// Look for nsubj (nominal subject) and dobj (direct object) relations
for (edu.stanford.nlp.semgraph.SemanticGraphEdge edge : dependencies.edgeListSorted()) {
if ("nsubj".equals(edge.getRelation().toString())) {
String subject = edge.getDependent().word();
String verb = edge.getGovernor().word();
// Find direct object for this verb
String object = findDirectObject(dependencies, edge.getGovernor());
if (object != null) {
svoRelations.add(new SVORelation(subject, verb, object, parse.sentence));
}
}
}
}
return svoRelations;
}
private String findDirectObject(SemanticGraph dependencies, 
edu.stanford.nlp.ling.IndexedWord verb) {
for (edu.stanford.nlp.semgraph.SemanticGraphEdge edge : dependencies.outgoingEdgeList(verb)) {
if ("dobj".equals(edge.getRelation().toString())) {
return edge.getDependent().word();
}
}
return null;
}
/**
* Dependency parse container
*/
public static class DependencyParse {
public final String sentence;
public final SemanticGraph dependencies;
public DependencyParse(String sentence, SemanticGraph dependencies) {
this.sentence = sentence;
this.dependencies = dependencies;
}
@Override
public String toString() {
return "Sentence: " + sentence + "\nDependencies:\n" + dependencies.toString();
}
}
/**
* Dependency relation container
*/
public static class DependencyRelation {
public final String governor;
public final String dependent;
public final String relation;
public final String sentence;
public DependencyRelation(String governor, String dependent, String relation, String sentence) {
this.governor = governor;
this.dependent = dependent;
this.relation = relation;
this.sentence = sentence;
}
@Override
public String toString() {
return String.format("%s --%s--> %s", governor, relation, dependent);
}
}
/**
* Subject-Verb-Object relation container
*/
public static class SVORelation {
public final String subject;
public final String verb;
public final String object;
public final String sentence;
public SVORelation(String subject, String verb, String object, String sentence) {
this.subject = subject;
this.verb = verb;
this.object = object;
this.sentence = sentence;
}
@Override
public String toString() {
return String.format("SVO: %s → %s → %s", subject, verb, object);
}
}
}

Coreference Resolution

package com.example.nlp.processors;
import edu.stanford.nlp.hcoref.CorefCoreAnnotations;
import edu.stanford.nlp.hcoref.data.CorefChain;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.util.CoreMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class CoreferenceResolver {
private final StanfordCoreNLP pipeline;
public CoreferenceResolver(StanfordCoreNLP pipeline) {
this.pipeline = pipeline;
}
/**
* Resolve coreferences in text
*/
public CoreferenceResult resolveCoreferences(String text) {
Annotation document = new Annotation(text);
pipeline.annotate(document);
CoreferenceResult result = new CoreferenceResult(text);
// Get coreference chains
Map<Integer, CorefChain> corefChains = document.get(
CorefCoreAnnotations.CorefChainAnnotation.class);
if (corefChains != null) {
for (CorefChain chain : corefChains.values()) {
CorefChainDescription chainDesc = extractChainDescription(chain, document);
if (chainDesc != null) {
result.corefChains.add(chainDesc);
}
}
}
// Replace pronouns with their references
result.resolvedText = replaceCoreferences(text, result.corefChains);
return result;
}
private CorefChainDescription extractChainDescription(CorefChain chain, Annotation document) {
if (chain.getRepresentativeMention() == null) return null;
CorefChain.Mention representative = chain.getRepresentativeMention();
String representativeText = getMentionText(representative, document);
CorefChainDescription chainDesc = new CorefChainDescription(representativeText);
for (CorefChain.Mention mention : chain.getMentionsInTextualOrder()) {
String mentionText = getMentionText(mention, document);
if (!mentionText.equals(representativeText)) {
chainDesc.mentions.add(new CorefMention(mentionText, mention.startIndex, mention.endIndex));
}
}
return chainDesc;
}
private String getMentionText(CorefChain.Mention mention, Annotation document) {
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
CoreMap sentence = sentences.get(mention.sentNum - 1);
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
StringBuilder text = new StringBuilder();
for (int i = mention.startIndex - 1; i < mention.endIndex - 1; i++) {
if (i > mention.startIndex - 1) text.append(" ");
text.append(tokens.get(i).get(CoreAnnotations.TextAnnotation.class));
}
return text.toString();
}
private String replaceCoreferences(String originalText, List<CorefChainDescription> chains) {
String resolvedText = originalText;
// Simple replacement: replace all mentions with their representative
for (CorefChainDescription chain : chains) {
for (CorefMention mention : chain.mentions) {
// Simple string replacement (this is a basic implementation)
resolvedText = resolvedText.replace(mention.text, chain.representative);
}
}
return resolvedText;
}
/**
* Coreference result container
*/
public static class CoreferenceResult {
public final String originalText;
public String resolvedText;
public List<CorefChainDescription> corefChains = new ArrayList<>();
public CoreferenceResult(String originalText) {
this.originalText = originalText;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Original Text: ").append(originalText).append("\n");
sb.append("Resolved Text: ").append(resolvedText).append("\n");
sb.append("Coreference Chains:\n");
for (CorefChainDescription chain : corefChains) {
sb.append("  ").append(chain).append("\n");
}
return sb.toString();
}
}
/**
* Coreference chain container
*/
public static class CorefChainDescription {
public final String representative;
public List<CorefMention> mentions = new ArrayList<>();
public CorefChainDescription(String representative) {
this.representative = representative;
}
@Override
public String toString() {
return String.format("Representative: '%s' → Mentions: %s", 
representative, mentions);
}
}
/**
* Coreference mention container
*/
public static class CorefMention {
public final String text;
public final int start;
public final int end;
public CorefMention(String text, int start, int end) {
this.text = text;
this.start = start;
this.end = end;
}
@Override
public String toString() {
return String.format("'%s'[%d-%d]", text, start, end);
}
}
}

Relation Extraction

package com.example.nlp.processors;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.util.CoreMap;
import java.util.ArrayList;
import java.util.List;
public class RelationExtractor {
private final StanfordCoreNLP pipeline;
public RelationExtractor(StanfordCoreNLP pipeline) {
this.pipeline = pipeline;
}
/**
* Extract relations between entities
*/
public List<EntityRelation> extractRelations(String text) {
List<EntityRelation> relations = new ArrayList<>();
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
// Simple rule-based relation extraction
// This can be enhanced with more sophisticated patterns
for (int i = 0; i < tokens.size(); i++) {
CoreLabel token = tokens.get(i);
String ner = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
if ("PERSON".equals(ner)) {
// Look for relationships with organizations
EntityRelation relation = findPersonOrganizationRelation(tokens, i);
if (relation != null) {
relations.add(relation);
}
} else if ("ORGANIZATION".equals(ner)) {
// Look for organization-location relations
EntityRelation relation = findOrganizationLocationRelation(tokens, i);
if (relation != null) {
relations.add(relation);
}
}
}
}
return relations;
}
private EntityRelation findPersonOrganizationRelation(List<CoreLabel> tokens, int personIndex) {
// Look for patterns like "Person works at Organization"
for (int i = Math.max(0, personIndex - 5); i < Math.min(tokens.size(), personIndex + 5); i++) {
if (i == personIndex) continue;
CoreLabel token = tokens.get(i);
if ("ORGANIZATION".equals(token.get(CoreAnnotations.NamedEntityTagAnnotation.class))) {
// Check for relationship words between them
String relation = findRelationWord(tokens, Math.min(personIndex, i), Math.max(personIndex, i));
if (relation != null) {
return new EntityRelation(
tokens.get(personIndex).get(CoreAnnotations.TextAnnotation.class),
"PERSON",
relation,
token.get(CoreAnnotations.TextAnnotation.class),
"ORGANIZATION"
);
}
}
}
return null;
}
private EntityRelation findOrganizationLocationRelation(List<CoreLabel> tokens, int orgIndex) {
// Look for patterns like "Organization based in Location"
for (int i = Math.max(0, orgIndex - 5); i < Math.min(tokens.size(), orgIndex + 5); i++) {
if (i == orgIndex) continue;
CoreLabel token = tokens.get(i);
if ("LOCATION".equals(token.get(CoreAnnotations.NamedEntityTagAnnotation.class))) {
String relation = findRelationWord(tokens, Math.min(orgIndex, i), Math.max(orgIndex, i));
if (relation != null) {
return new EntityRelation(
tokens.get(orgIndex).get(CoreAnnotations.TextAnnotation.class),
"ORGANIZATION",
relation,
token.get(CoreAnnotations.TextAnnotation.class),
"LOCATION"
);
}
}
}
return null;
}
private String findRelationWord(List<CoreLabel> tokens, int start, int end) {
List<String> relationWords = List.of("works", "employed", "based", "located", "headquartered", "founder");
for (int i = start + 1; i < end; i++) {
String word = tokens.get(i).get(CoreAnnotations.TextAnnotation.class).toLowerCase();
String lemma = tokens.get(i).get(CoreAnnotations.LemmaAnnotation.class).toLowerCase();
if (relationWords.contains(word) || relationWords.contains(lemma)) {
return word;
}
}
return null;
}
/**
* Entity relation container
*/
public static class EntityRelation {
public final String entity1;
public final String type1;
public final String relation;
public final String entity2;
public final String type2;
public EntityRelation(String entity1, String type1, String relation, 
String entity2, String type2) {
this.entity1 = entity1;
this.type1 = type1;
this.relation = relation;
this.entity2 = entity2;
this.type2 = type2;
}
@Override
public String toString() {
return String.format("[%s] %s --%s--> [%s] %s", 
type1, entity1, relation, type2, entity2);
}
}
}

Practical Applications

Text Analysis Pipeline

package com.example.nlp.applications;
import com.example.nlp.processors.*;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import java.util.List;
public class TextAnalysisPipeline {
private final StanfordCoreNLP pipeline;
private final BasicTextProcessor textProcessor;
private final POSTagger posTagger;
private final NamedEntityRecognizer ner;
private final SentimentAnalyzer sentimentAnalyzer;
private final DependencyParser dependencyParser;
private final CoreferenceResolver coreferenceResolver;
private final RelationExtractor relationExtractor;
public TextAnalysisPipeline() {
this.pipeline = CoreNLPInitializer.createBasicPipeline();
this.textProcessor = new BasicTextProcessor(pipeline);
this.posTagger = new POSTagger(pipeline);
this.ner = new NamedEntityRecognizer(pipeline);
this.sentimentAnalyzer = new SentimentAnalyzer(pipeline);
this.dependencyParser = new DependencyParser(pipeline);
this.coreferenceResolver = new CoreferenceResolver(pipeline);
this.relationExtractor = new RelationExtractor(pipeline);
}
/**
* Comprehensive text analysis
*/
public TextAnalysisResult analyzeText(String text) {
TextAnalysisResult result = new TextAnalysisResult();
result.sentences = textProcessor.splitSentences(text);
result.tokens = textProcessor.getDetailedTokens(text);
result.posTags = posTagger.getDetailedPOSTags(text);
result.entities = ner.extractEntities(text);
result.sentiment = sentimentAnalyzer.analyzeSentiment(text);
result.dependencies = dependencyParser.parseDependencies(text);
result.coreferences = coreferenceResolver.resolveCoreferences(text);
result.relations = relationExtractor.extractRelations(text);
return result;
}
/**
* Text analysis result container
*/
public static class TextAnalysisResult {
public List<String> sentences;
public List<BasicTextProcessor.TokenInfo> tokens;
public List<POSTagger.POSTag> posTags;
public List<NamedEntityRecognizer.NamedEntity> entities;
public SentimentAnalyzer.SentimentResult sentiment;
public List<DependencyParser.DependencyParse> dependencies;
public CoreferenceResolver.CoreferenceResult coreferences;
public List<RelationExtractor.EntityRelation> relations;
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("=== TEXT ANALYSIS RESULTS ===\n\n");
sb.append("Sentences: ").append(sentences.size()).append("\n");
for (int i = 0; i < sentences.size(); i++) {
sb.append(i + 1).append(". ").append(sentences.get(i)).append("\n");
}
sb.append("\n");
sb.append("Sentiment: ").append(sentiment.overallSentiment)
.append(" (Score: ").append(String.format("%.2f", sentiment.overallScore)).append(")\n\n");
sb.append("Named Entities:\n");
for (NamedEntityRecognizer.NamedEntity entity : entities) {
sb.append("  - ").append(entity).append("\n");
}
sb.append("\n");
sb.append("Relations:\n");
for (RelationExtractor.EntityRelation relation : relations) {
sb.append("  - ").append(relation).append("\n");
}
return sb.toString();
}
}
/**
* Process multiple documents
*/
public List<DocumentAnalysis> analyzeDocuments(List<Document> documents) {
List<DocumentAnalysis> analyses = new ArrayList<>();
for (Document doc : documents) {
TextAnalysisResult analysis = analyzeText(doc.content);
analyses.add(new DocumentAnalysis(doc, analysis));
}
return analyses;
}
/**
* Document container
*/
public static class Document {
public final String id;
public final String title;
public final String content;
public final String source;
public Document(String id, String title, String content, String source) {
this.id = id;
this.title = title;
this.content = content;
this.source = source;
}
}
/**
* Document analysis container
*/
public static class DocumentAnalysis {
public final Document document;
public final TextAnalysisResult analysis;
public DocumentAnalysis(Document document, TextAnalysisResult analysis) {
this.document = document;
this.analysis = analysis;
}
}
}

Performance Optimization

Batch Processing and Caching

package com.example.nlp.utils;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class BatchProcessor {
private final StanfordCoreNLP pipeline;
private final ExecutorService executor;
public BatchProcessor(StanfordCoreNLP pipeline, int threadPoolSize) {
this.pipeline = pipeline;
this.executor = Executors.newFixedThreadPool(threadPoolSize);
}
/**
* Process texts in parallel
*/
public <T> List<CompletableFuture<T>> processBatchAsync(
List<String> texts, TextProcessor<T> processor) {
return texts.stream()
.map(text -> CompletableFuture.supplyAsync(() -> processor.process(text), executor))
.toList();
}
/**
* Process with progress tracking
*/
public <T> List<T> processBatchWithProgress(List<String> texts, TextProcessor<T> processor) {
List<T> results = new java.util.ArrayList<>(texts.size());
for (int i = 0; i < texts.size(); i++) {
T result = processor.process(texts.get(i));
results.add(result);
// Progress update
if ((i + 1) % 10 == 0 || i == texts.size() - 1) {
System.out.printf("Processed %d/%d documents (%.1f%%)%n", 
i + 1, texts.size(), (i + 1) * 100.0 / texts.size());
}
}
return results;
}
/**
* Functional interface for text processing
*/
@FunctionalInterface
public interface TextProcessor<T> {
T process(String text);
}
public void shutdown() {
executor.shutdown();
}
}

Conclusion

Stanford CoreNLP provides a comprehensive suite of NLP tools for Java applications:

Key Features Covered:

Tokenization and sentence splitting
Part-of-speech tagging
Named Entity Recognition (NER)
Sentiment analysis
Dependency parsing
Coreference resolution
Relation extraction

Best Practices:

Pipeline Configuration: Choose appropriate annotators for your use case
Memory Management: Stanford CoreNLP can be memory-intensive
Batch Processing: Use parallel processing for large datasets
Error Handling: Always handle potential annotation failures
Model Selection: Choose appropriate models for your domain

Performance Tips:

Reuse pipeline instances (they are thread-safe)
Use appropriate annotators (don't include unnecessary ones)
Consider batch processing for large volumes
Monitor memory usage with large texts
Use async processing for responsive applications

Stanford CoreNLP is production-ready and suitable for enterprise applications, research projects, and educational purposes.