Stanford CoreNLP is a comprehensive natural language processing toolkit that provides robust linguistic analysis for various NLP tasks. This guide covers setup, core components, and practical applications.
Project Setup and Dependencies
Maven Configuration
<!-- pom.xml -->
<properties>
<corenlp.version>4.5.4</corenlp.version>
</properties>
<dependencies>
<!-- Core Stanford NLP -->
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>${corenlp.version}</version>
</dependency>
<!-- Models for English -->
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>${corenlp.version}</version>
<classifier>models</classifier>
</dependency>
<!-- Additional models -->
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>${corenlp.version}</version>
<classifier>models-english</classifier>
</dependency>
<!-- For JSON output -->
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.10.1</version>
</dependency>
</dependencies>
Gradle Configuration
// build.gradle
dependencies {
implementation 'edu.stanford.nlp:stanford-corenlp:4.5.4'
implementation 'edu.stanford.nlp:stanford-corenlp:4.5.4:models'
implementation 'edu.stanford.nlp:stanford-corenlp:4.5.4:models-english'
implementation 'com.google.code.gson:gson:2.10.1'
}
CoreNLP Pipeline Setup
Basic Pipeline Configuration
package com.example.nlp;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.util.PropertiesUtils;
import java.util.Properties;
public class CoreNLPInitializer {
/**
* Create a basic pipeline with common annotators
*/
public static StanfordCoreNLP createBasicPipeline() {
Properties props = new Properties();
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, sentiment");
props.setProperty("coref.algorithm", "neural");
return new StanfordCoreNLP(props);
}
/**
* Create a pipeline with custom properties
*/
public static StanfordCoreNLP createCustomPipeline(String... annotators) {
Properties props = new Properties();
props.setProperty("annotators", String.join(", ", annotators));
// Additional configuration
props.setProperty("tokenize.language", "en");
props.setProperty("pos.model", "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger");
props.setProperty("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz");
props.setProperty("parse.model", "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
props.setProperty("depparse.model", "edu/stanford/nlp/models/parser/nndep/english_SD.gz");
return new StanfordCoreNLP(props);
}
/**
* Create pipeline for specific use cases
*/
public static StanfordCoreNLP createPipelineForUseCase(String useCase) {
Properties props = new Properties();
switch (useCase.toLowerCase()) {
case "sentiment":
props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
break;
case "ner":
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner");
props.setProperty("ner.applyNumericClassifiers", "true");
props.setProperty("ner.useSUTime", "true");
break;
case "relation":
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, depparse, relation");
break;
case "coreference":
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
break;
default:
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, depparse, coref");
}
return new StanfordCoreNLP(props);
}
}
Basic Text Processing
Tokenization and Sentence Splitting
package com.example.nlp.processors;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.util.CoreMap;
import java.util.ArrayList;
import java.util.List;
public class BasicTextProcessor {
private final StanfordCoreNLP pipeline;
public BasicTextProcessor(StanfordCoreNLP pipeline) {
this.pipeline = pipeline;
}
/**
* Tokenize text into words
*/
public List<String> tokenize(String text) {
List<String> tokens = new ArrayList<>();
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
String word = token.get(CoreAnnotations.TextAnnotation.class);
tokens.add(word);
}
}
return tokens;
}
/**
* Split text into sentences
*/
public List<String> splitSentences(String text) {
List<String> sentences = new ArrayList<>();
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentenceMaps = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentenceMaps) {
sentences.add(sentence.get(CoreAnnotations.TextAnnotation.class));
}
return sentences;
}
/**
* Get detailed token information
*/
public List<TokenInfo> getDetailedTokens(String text) {
List<TokenInfo> tokenInfos = new ArrayList<>();
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
TokenInfo info = new TokenInfo(
token.get(CoreAnnotations.TextAnnotation.class),
token.get(CoreAnnotations.PartOfSpeechAnnotation.class),
token.get(CoreAnnotations.LemmaAnnotation.class),
token.get(CoreAnnotations.NamedEntityTagAnnotation.class),
token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)
);
tokenInfos.add(info);
}
}
return tokenInfos;
}
/**
* Token information container
*/
public static class TokenInfo {
public final String word;
public final String pos;
public final String lemma;
public final String ner;
public final int start;
public final int end;
public TokenInfo(String word, String pos, String lemma, String ner, int start, int end) {
this.word = word;
this.pos = pos;
this.lemma = lemma;
this.ner = ner;
this.start = start;
this.end = end;
}
@Override
public String toString() {
return String.format("Word: %-15s POS: %-5s Lemma: %-15s NER: %-10s [%d-%d]",
word, pos, lemma, ner, start, end);
}
}
}
Part-of-Speech Tagging
package com.example.nlp.processors;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.trees.Tree;
import java.util.ArrayList;
import java.util.List;
public class POSTagger {
private final StanfordCoreNLP pipeline;
public POSTagger(StanfordCoreNLP pipeline) {
this.pipeline = pipeline;
}
/**
* Get POS tags for text
*/
public List<TaggedWord> tagText(String text) {
List<TaggedWord> taggedWords = new ArrayList<>();
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
String word = token.get(CoreAnnotations.TextAnnotation.class);
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
taggedWords.add(new TaggedWord(word, pos));
}
}
return taggedWords;
}
/**
* Get POS tags with Penn Treebank tags explanation
*/
public List<POSTag> getDetailedPOSTags(String text) {
List<POSTag> posTags = new ArrayList<>();
List<TaggedWord> taggedWords = tagText(text);
for (TaggedWord taggedWord : taggedWords) {
String explanation = getPOSTagExplanation(taggedWord.tag());
posTags.add(new POSTag(taggedWord.word(), taggedWord.tag(), explanation));
}
return posTags;
}
/**
* Common Penn Treebank POS tag explanations
*/
private String getPOSTagExplanation(String tag) {
switch (tag) {
case "NN": case "NNS": return "Noun, singular or mass / plural";
case "NNP": case "NNPS": return "Proper noun, singular / plural";
case "VB": return "Verb, base form";
case "VBD": return "Verb, past tense";
case "VBG": return "Verb, gerund or present participle";
case "VBN": return "Verb, past participle";
case "VBP": return "Verb, non-3rd person singular present";
case "VBZ": return "Verb, 3rd person singular present";
case "JJ": case "JJR": case "JJS": return "Adjective / comparative / superlative";
case "RB": case "RBR": case "RBS": return "Adverb / comparative / superlative";
case "PRP": case "PRP$": return "Personal pronoun / possessive";
case "DT": return "Determiner";
case "IN": return "Preposition or subordinating conjunction";
case "CC": return "Coordinating conjunction";
case "CD": return "Cardinal number";
default: return "Other";
}
}
/**
* POS tag container
*/
public static class POSTag {
public final String word;
public final String tag;
public final String explanation;
public POSTag(String word, String tag, String explanation) {
this.word = word;
this.tag = tag;
this.explanation = explanation;
}
@Override
public String toString() {
return String.format("%-15s %-5s %s", word, tag, explanation);
}
}
/**
* Extract specific POS patterns
*/
public List<String> extractPattern(String text, String... posPattern) {
List<String> matches = new ArrayList<>();
List<TaggedWord> taggedWords = tagText(text);
for (int i = 0; i <= taggedWords.size() - posPattern.length; i++) {
boolean match = true;
for (int j = 0; j < posPattern.length; j++) {
if (!taggedWords.get(i + j).tag().equals(posPattern[j])) {
match = false;
break;
}
}
if (match) {
StringBuilder phrase = new StringBuilder();
for (int j = 0; j < posPattern.length; j++) {
phrase.append(taggedWords.get(i + j).word()).append(" ");
}
matches.add(phrase.toString().trim());
}
}
return matches;
}
}
Named Entity Recognition (NER)
package com.example.nlp.processors;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.util.CoreMap;
import java.util.ArrayList;
import java.util.List;
public class NamedEntityRecognizer {
private final StanfordCoreNLP pipeline;
public NamedEntityRecognizer(StanfordCoreNLP pipeline) {
this.pipeline = pipeline;
}
/**
* Extract named entities from text
*/
public List<NamedEntity> extractEntities(String text) {
List<NamedEntity> entities = new ArrayList<>();
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
NamedEntity currentEntity = null;
for (int i = 0; i < tokens.size(); i++) {
CoreLabel token = tokens.get(i);
String ner = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
String word = token.get(CoreAnnotations.TextAnnotation.class);
if (!"O".equals(ner)) {
if (currentEntity == null || !ner.equals(currentEntity.type)) {
if (currentEntity != null) {
entities.add(currentEntity);
}
currentEntity = new NamedEntity(word, ner,
token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
} else {
currentEntity.text += " " + word;
}
} else {
if (currentEntity != null) {
currentEntity.end = token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
entities.add(currentEntity);
currentEntity = null;
}
}
}
if (currentEntity != null) {
currentEntity.end = tokens.get(tokens.size() - 1)
.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
entities.add(currentEntity);
}
}
return entities;
}
/**
* Extract entities by type
*/
public List<NamedEntity> extractEntitiesByType(String text, String... entityTypes) {
List<NamedEntity> allEntities = extractEntities(text);
List<NamedEntity> filtered = new ArrayList<>();
for (NamedEntity entity : allEntities) {
for (String type : entityTypes) {
if (entity.type.equals(type)) {
filtered.add(entity);
break;
}
}
}
return filtered;
}
/**
* Get entity statistics
*/
public EntityStatistics getEntityStatistics(String text) {
List<NamedEntity> entities = extractEntities(text);
EntityStatistics stats = new EntityStatistics();
for (NamedEntity entity : entities) {
stats.totalEntities++;
stats.entityCounts.put(entity.type,
stats.entityCounts.getOrDefault(entity.type, 0) + 1);
}
return stats;
}
/**
* Named entity container
*/
public static class NamedEntity {
public String text;
public String type;
public int start;
public int end;
public NamedEntity(String text, String type, int start) {
this.text = text;
this.type = type;
this.start = start;
}
@Override
public String toString() {
return String.format("%-20s %-10s [%d-%d]", text, type, start, end);
}
}
/**
* Entity statistics container
*/
public static class EntityStatistics {
public int totalEntities = 0;
public java.util.Map<String, Integer> entityCounts = new java.util.HashMap<>();
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Total entities: ").append(totalEntities).append("\n");
for (java.util.Map.Entry<String, Integer> entry : entityCounts.entrySet()) {
sb.append(String.format("%-15s: %d%n", entry.getKey(), entry.getValue()));
}
return sb.toString();
}
}
}
Sentiment Analysis
package com.example.nlp.processors;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
import edu.stanford.nlp.util.CoreMap;
import java.util.ArrayList;
import java.util.List;
public class SentimentAnalyzer {
private final StanfordCoreNLP pipeline;
public SentimentAnalyzer(StanfordCoreNLP pipeline) {
this.pipeline = pipeline;
}
/**
* Analyze sentiment of text
*/
public SentimentResult analyzeSentiment(String text) {
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
SentimentResult result = new SentimentResult();
for (CoreMap sentence : sentences) {
String sentiment = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
SentenceSentiment sentenceResult = new SentenceSentiment(
sentence.get(CoreAnnotations.TextAnnotation.class),
sentiment,
getSentimentScore(sentiment)
);
result.sentences.add(sentenceResult);
result.overallScore += sentenceResult.score;
}
if (!sentences.isEmpty()) {
result.overallScore /= sentences.size();
result.overallSentiment = getOverallSentiment(result.overallScore);
}
return result;
}
/**
* Convert sentiment label to numerical score
*/
private double getSentimentScore(String sentiment) {
switch (sentiment.toLowerCase()) {
case "very positive": return 1.0;
case "positive": return 0.75;
case "neutral": return 0.5;
case "negative": return 0.25;
case "very negative": return 0.0;
default: return 0.5;
}
}
/**
* Convert numerical score to sentiment label
*/
private String getOverallSentiment(double score) {
if (score >= 0.8) return "Very Positive";
if (score >= 0.6) return "Positive";
if (score >= 0.4) return "Neutral";
if (score >= 0.2) return "Negative";
return "Very Negative";
}
/**
* Analyze sentiment over time (for multiple texts)
*/
public List<TimeSeriesSentiment> analyzeSentimentOverTime(List<String> texts,
List<String> timestamps) {
List<TimeSeriesSentiment> results = new ArrayList<>();
for (int i = 0; i < texts.size(); i++) {
SentimentResult result = analyzeSentiment(texts.get(i));
String timestamp = (i < timestamps.size()) ? timestamps.get(i) : "Time " + (i + 1);
results.add(new TimeSeriesSentiment(timestamp, result.overallScore,
result.overallSentiment));
}
return results;
}
/**
* Sentiment result container
*/
public static class SentimentResult {
public List<SentenceSentiment> sentences = new ArrayList<>();
public double overallScore = 0.0;
public String overallSentiment = "Neutral";
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Overall Sentiment: ").append(overallSentiment)
.append(" (Score: ").append(String.format("%.2f", overallScore)).append(")\n");
sb.append("Sentence-level analysis:\n");
for (SentenceSentiment sentence : sentences) {
sb.append(" - ").append(sentence).append("\n");
}
return sb.toString();
}
}
/**
* Sentence-level sentiment
*/
public static class SentenceSentiment {
public final String sentence;
public final String sentiment;
public final double score;
public SentenceSentiment(String sentence, String sentiment, double score) {
this.sentence = sentence;
this.sentiment = sentiment;
this.score = score;
}
@Override
public String toString() {
return String.format("%-10s (%.2f): %s", sentiment, score,
sentence.length() > 50 ? sentence.substring(0, 47) + "..." : sentence);
}
}
/**
* Time series sentiment container
*/
public static class TimeSeriesSentiment {
public final String timestamp;
public final double score;
public final String sentiment;
public TimeSeriesSentiment(String timestamp, double score, String sentiment) {
this.timestamp = timestamp;
this.score = score;
this.sentiment = sentiment;
}
@Override
public String toString() {
return String.format("%s: %s (%.2f)", timestamp, sentiment, score);
}
}
}
Dependency Parsing
package com.example.nlp.processors;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.CoreMap;
import java.util.ArrayList;
import java.util.List;
public class DependencyParser {
private final StanfordCoreNLP pipeline;
public DependencyParser(StanfordCoreNLP pipeline) {
this.pipeline = pipeline;
}
/**
* Get dependency parse for text
*/
public List<DependencyParse> parseDependencies(String text) {
List<DependencyParse> parses = new ArrayList<>();
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
SemanticGraph dependencies = sentence.get(
SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
DependencyParse parse = new DependencyParse(
sentence.get(CoreAnnotations.TextAnnotation.class),
dependencies
);
parses.add(parse);
}
return parses;
}
/**
* Extract specific dependency relations
*/
public List<DependencyRelation> extractRelations(String text, String relationType) {
List<DependencyRelation> relations = new ArrayList<>();
List<DependencyParse> parses = parseDependencies(text);
for (DependencyParse parse : parses) {
for (edu.stanford.nlp.semgraph.SemanticGraphEdge edge : parse.dependencies.edgeListSorted()) {
if (relationType == null || edge.getRelation().toString().equals(relationType)) {
relations.add(new DependencyRelation(
edge.getGovernor().word(),
edge.getDependent().word(),
edge.getRelation().toString(),
parse.sentence
));
}
}
}
return relations;
}
/**
* Find subject-verb-object relations
*/
public List<SVORelation> extractSVORelations(String text) {
List<SVORelation> svoRelations = new ArrayList<>();
List<DependencyParse> parses = parseDependencies(text);
for (DependencyParse parse : parses) {
SemanticGraph dependencies = parse.dependencies;
// Look for nsubj (nominal subject) and dobj (direct object) relations
for (edu.stanford.nlp.semgraph.SemanticGraphEdge edge : dependencies.edgeListSorted()) {
if ("nsubj".equals(edge.getRelation().toString())) {
String subject = edge.getDependent().word();
String verb = edge.getGovernor().word();
// Find direct object for this verb
String object = findDirectObject(dependencies, edge.getGovernor());
if (object != null) {
svoRelations.add(new SVORelation(subject, verb, object, parse.sentence));
}
}
}
}
return svoRelations;
}
private String findDirectObject(SemanticGraph dependencies,
edu.stanford.nlp.ling.IndexedWord verb) {
for (edu.stanford.nlp.semgraph.SemanticGraphEdge edge : dependencies.outgoingEdgeList(verb)) {
if ("dobj".equals(edge.getRelation().toString())) {
return edge.getDependent().word();
}
}
return null;
}
/**
* Dependency parse container
*/
public static class DependencyParse {
public final String sentence;
public final SemanticGraph dependencies;
public DependencyParse(String sentence, SemanticGraph dependencies) {
this.sentence = sentence;
this.dependencies = dependencies;
}
@Override
public String toString() {
return "Sentence: " + sentence + "\nDependencies:\n" + dependencies.toString();
}
}
/**
* Dependency relation container
*/
public static class DependencyRelation {
public final String governor;
public final String dependent;
public final String relation;
public final String sentence;
public DependencyRelation(String governor, String dependent, String relation, String sentence) {
this.governor = governor;
this.dependent = dependent;
this.relation = relation;
this.sentence = sentence;
}
@Override
public String toString() {
return String.format("%s --%s--> %s", governor, relation, dependent);
}
}
/**
* Subject-Verb-Object relation container
*/
public static class SVORelation {
public final String subject;
public final String verb;
public final String object;
public final String sentence;
public SVORelation(String subject, String verb, String object, String sentence) {
this.subject = subject;
this.verb = verb;
this.object = object;
this.sentence = sentence;
}
@Override
public String toString() {
return String.format("SVO: %s → %s → %s", subject, verb, object);
}
}
}
Coreference Resolution
package com.example.nlp.processors;
import edu.stanford.nlp.hcoref.CorefCoreAnnotations;
import edu.stanford.nlp.hcoref.data.CorefChain;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.util.CoreMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class CoreferenceResolver {
private final StanfordCoreNLP pipeline;
public CoreferenceResolver(StanfordCoreNLP pipeline) {
this.pipeline = pipeline;
}
/**
* Resolve coreferences in text
*/
public CoreferenceResult resolveCoreferences(String text) {
Annotation document = new Annotation(text);
pipeline.annotate(document);
CoreferenceResult result = new CoreferenceResult(text);
// Get coreference chains
Map<Integer, CorefChain> corefChains = document.get(
CorefCoreAnnotations.CorefChainAnnotation.class);
if (corefChains != null) {
for (CorefChain chain : corefChains.values()) {
CorefChainDescription chainDesc = extractChainDescription(chain, document);
if (chainDesc != null) {
result.corefChains.add(chainDesc);
}
}
}
// Replace pronouns with their references
result.resolvedText = replaceCoreferences(text, result.corefChains);
return result;
}
private CorefChainDescription extractChainDescription(CorefChain chain, Annotation document) {
if (chain.getRepresentativeMention() == null) return null;
CorefChain.Mention representative = chain.getRepresentativeMention();
String representativeText = getMentionText(representative, document);
CorefChainDescription chainDesc = new CorefChainDescription(representativeText);
for (CorefChain.Mention mention : chain.getMentionsInTextualOrder()) {
String mentionText = getMentionText(mention, document);
if (!mentionText.equals(representativeText)) {
chainDesc.mentions.add(new CorefMention(mentionText, mention.startIndex, mention.endIndex));
}
}
return chainDesc;
}
private String getMentionText(CorefChain.Mention mention, Annotation document) {
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
CoreMap sentence = sentences.get(mention.sentNum - 1);
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
StringBuilder text = new StringBuilder();
for (int i = mention.startIndex - 1; i < mention.endIndex - 1; i++) {
if (i > mention.startIndex - 1) text.append(" ");
text.append(tokens.get(i).get(CoreAnnotations.TextAnnotation.class));
}
return text.toString();
}
private String replaceCoreferences(String originalText, List<CorefChainDescription> chains) {
String resolvedText = originalText;
// Simple replacement: replace all mentions with their representative
for (CorefChainDescription chain : chains) {
for (CorefMention mention : chain.mentions) {
// Simple string replacement (this is a basic implementation)
resolvedText = resolvedText.replace(mention.text, chain.representative);
}
}
return resolvedText;
}
/**
* Coreference result container
*/
public static class CoreferenceResult {
public final String originalText;
public String resolvedText;
public List<CorefChainDescription> corefChains = new ArrayList<>();
public CoreferenceResult(String originalText) {
this.originalText = originalText;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Original Text: ").append(originalText).append("\n");
sb.append("Resolved Text: ").append(resolvedText).append("\n");
sb.append("Coreference Chains:\n");
for (CorefChainDescription chain : corefChains) {
sb.append(" ").append(chain).append("\n");
}
return sb.toString();
}
}
/**
* Coreference chain container
*/
public static class CorefChainDescription {
public final String representative;
public List<CorefMention> mentions = new ArrayList<>();
public CorefChainDescription(String representative) {
this.representative = representative;
}
@Override
public String toString() {
return String.format("Representative: '%s' → Mentions: %s",
representative, mentions);
}
}
/**
* Coreference mention container
*/
public static class CorefMention {
public final String text;
public final int start;
public final int end;
public CorefMention(String text, int start, int end) {
this.text = text;
this.start = start;
this.end = end;
}
@Override
public String toString() {
return String.format("'%s'[%d-%d]", text, start, end);
}
}
}
Relation Extraction
package com.example.nlp.processors;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.util.CoreMap;
import java.util.ArrayList;
import java.util.List;
public class RelationExtractor {
private final StanfordCoreNLP pipeline;
public RelationExtractor(StanfordCoreNLP pipeline) {
this.pipeline = pipeline;
}
/**
* Extract relations between entities
*/
public List<EntityRelation> extractRelations(String text) {
List<EntityRelation> relations = new ArrayList<>();
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
// Simple rule-based relation extraction
// This can be enhanced with more sophisticated patterns
for (int i = 0; i < tokens.size(); i++) {
CoreLabel token = tokens.get(i);
String ner = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
if ("PERSON".equals(ner)) {
// Look for relationships with organizations
EntityRelation relation = findPersonOrganizationRelation(tokens, i);
if (relation != null) {
relations.add(relation);
}
} else if ("ORGANIZATION".equals(ner)) {
// Look for organization-location relations
EntityRelation relation = findOrganizationLocationRelation(tokens, i);
if (relation != null) {
relations.add(relation);
}
}
}
}
return relations;
}
private EntityRelation findPersonOrganizationRelation(List<CoreLabel> tokens, int personIndex) {
// Look for patterns like "Person works at Organization"
for (int i = Math.max(0, personIndex - 5); i < Math.min(tokens.size(), personIndex + 5); i++) {
if (i == personIndex) continue;
CoreLabel token = tokens.get(i);
if ("ORGANIZATION".equals(token.get(CoreAnnotations.NamedEntityTagAnnotation.class))) {
// Check for relationship words between them
String relation = findRelationWord(tokens, Math.min(personIndex, i), Math.max(personIndex, i));
if (relation != null) {
return new EntityRelation(
tokens.get(personIndex).get(CoreAnnotations.TextAnnotation.class),
"PERSON",
relation,
token.get(CoreAnnotations.TextAnnotation.class),
"ORGANIZATION"
);
}
}
}
return null;
}
private EntityRelation findOrganizationLocationRelation(List<CoreLabel> tokens, int orgIndex) {
// Look for patterns like "Organization based in Location"
for (int i = Math.max(0, orgIndex - 5); i < Math.min(tokens.size(), orgIndex + 5); i++) {
if (i == orgIndex) continue;
CoreLabel token = tokens.get(i);
if ("LOCATION".equals(token.get(CoreAnnotations.NamedEntityTagAnnotation.class))) {
String relation = findRelationWord(tokens, Math.min(orgIndex, i), Math.max(orgIndex, i));
if (relation != null) {
return new EntityRelation(
tokens.get(orgIndex).get(CoreAnnotations.TextAnnotation.class),
"ORGANIZATION",
relation,
token.get(CoreAnnotations.TextAnnotation.class),
"LOCATION"
);
}
}
}
return null;
}
private String findRelationWord(List<CoreLabel> tokens, int start, int end) {
List<String> relationWords = List.of("works", "employed", "based", "located", "headquartered", "founder");
for (int i = start + 1; i < end; i++) {
String word = tokens.get(i).get(CoreAnnotations.TextAnnotation.class).toLowerCase();
String lemma = tokens.get(i).get(CoreAnnotations.LemmaAnnotation.class).toLowerCase();
if (relationWords.contains(word) || relationWords.contains(lemma)) {
return word;
}
}
return null;
}
/**
* Entity relation container
*/
public static class EntityRelation {
public final String entity1;
public final String type1;
public final String relation;
public final String entity2;
public final String type2;
public EntityRelation(String entity1, String type1, String relation,
String entity2, String type2) {
this.entity1 = entity1;
this.type1 = type1;
this.relation = relation;
this.entity2 = entity2;
this.type2 = type2;
}
@Override
public String toString() {
return String.format("[%s] %s --%s--> [%s] %s",
type1, entity1, relation, type2, entity2);
}
}
}
Practical Applications
Text Analysis Pipeline
package com.example.nlp.applications;
import com.example.nlp.processors.*;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import java.util.List;
public class TextAnalysisPipeline {
private final StanfordCoreNLP pipeline;
private final BasicTextProcessor textProcessor;
private final POSTagger posTagger;
private final NamedEntityRecognizer ner;
private final SentimentAnalyzer sentimentAnalyzer;
private final DependencyParser dependencyParser;
private final CoreferenceResolver coreferenceResolver;
private final RelationExtractor relationExtractor;
public TextAnalysisPipeline() {
this.pipeline = CoreNLPInitializer.createBasicPipeline();
this.textProcessor = new BasicTextProcessor(pipeline);
this.posTagger = new POSTagger(pipeline);
this.ner = new NamedEntityRecognizer(pipeline);
this.sentimentAnalyzer = new SentimentAnalyzer(pipeline);
this.dependencyParser = new DependencyParser(pipeline);
this.coreferenceResolver = new CoreferenceResolver(pipeline);
this.relationExtractor = new RelationExtractor(pipeline);
}
/**
* Comprehensive text analysis
*/
public TextAnalysisResult analyzeText(String text) {
TextAnalysisResult result = new TextAnalysisResult();
result.sentences = textProcessor.splitSentences(text);
result.tokens = textProcessor.getDetailedTokens(text);
result.posTags = posTagger.getDetailedPOSTags(text);
result.entities = ner.extractEntities(text);
result.sentiment = sentimentAnalyzer.analyzeSentiment(text);
result.dependencies = dependencyParser.parseDependencies(text);
result.coreferences = coreferenceResolver.resolveCoreferences(text);
result.relations = relationExtractor.extractRelations(text);
return result;
}
/**
* Text analysis result container
*/
public static class TextAnalysisResult {
public List<String> sentences;
public List<BasicTextProcessor.TokenInfo> tokens;
public List<POSTagger.POSTag> posTags;
public List<NamedEntityRecognizer.NamedEntity> entities;
public SentimentAnalyzer.SentimentResult sentiment;
public List<DependencyParser.DependencyParse> dependencies;
public CoreferenceResolver.CoreferenceResult coreferences;
public List<RelationExtractor.EntityRelation> relations;
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("=== TEXT ANALYSIS RESULTS ===\n\n");
sb.append("Sentences: ").append(sentences.size()).append("\n");
for (int i = 0; i < sentences.size(); i++) {
sb.append(i + 1).append(". ").append(sentences.get(i)).append("\n");
}
sb.append("\n");
sb.append("Sentiment: ").append(sentiment.overallSentiment)
.append(" (Score: ").append(String.format("%.2f", sentiment.overallScore)).append(")\n\n");
sb.append("Named Entities:\n");
for (NamedEntityRecognizer.NamedEntity entity : entities) {
sb.append(" - ").append(entity).append("\n");
}
sb.append("\n");
sb.append("Relations:\n");
for (RelationExtractor.EntityRelation relation : relations) {
sb.append(" - ").append(relation).append("\n");
}
return sb.toString();
}
}
/**
* Process multiple documents
*/
public List<DocumentAnalysis> analyzeDocuments(List<Document> documents) {
List<DocumentAnalysis> analyses = new ArrayList<>();
for (Document doc : documents) {
TextAnalysisResult analysis = analyzeText(doc.content);
analyses.add(new DocumentAnalysis(doc, analysis));
}
return analyses;
}
/**
* Document container
*/
public static class Document {
public final String id;
public final String title;
public final String content;
public final String source;
public Document(String id, String title, String content, String source) {
this.id = id;
this.title = title;
this.content = content;
this.source = source;
}
}
/**
* Document analysis container
*/
public static class DocumentAnalysis {
public final Document document;
public final TextAnalysisResult analysis;
public DocumentAnalysis(Document document, TextAnalysisResult analysis) {
this.document = document;
this.analysis = analysis;
}
}
}
Performance Optimization
Batch Processing and Caching
package com.example.nlp.utils;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class BatchProcessor {
private final StanfordCoreNLP pipeline;
private final ExecutorService executor;
public BatchProcessor(StanfordCoreNLP pipeline, int threadPoolSize) {
this.pipeline = pipeline;
this.executor = Executors.newFixedThreadPool(threadPoolSize);
}
/**
* Process texts in parallel
*/
public <T> List<CompletableFuture<T>> processBatchAsync(
List<String> texts, TextProcessor<T> processor) {
return texts.stream()
.map(text -> CompletableFuture.supplyAsync(() -> processor.process(text), executor))
.toList();
}
/**
* Process with progress tracking
*/
public <T> List<T> processBatchWithProgress(List<String> texts, TextProcessor<T> processor) {
List<T> results = new java.util.ArrayList<>(texts.size());
for (int i = 0; i < texts.size(); i++) {
T result = processor.process(texts.get(i));
results.add(result);
// Progress update
if ((i + 1) % 10 == 0 || i == texts.size() - 1) {
System.out.printf("Processed %d/%d documents (%.1f%%)%n",
i + 1, texts.size(), (i + 1) * 100.0 / texts.size());
}
}
return results;
}
/**
* Functional interface for text processing
*/
@FunctionalInterface
public interface TextProcessor<T> {
T process(String text);
}
public void shutdown() {
executor.shutdown();
}
}
Conclusion
Stanford CoreNLP provides a comprehensive suite of NLP tools for Java applications:
Key Features Covered:
- Tokenization and sentence splitting
- Part-of-speech tagging
- Named Entity Recognition (NER)
- Sentiment analysis
- Dependency parsing
- Coreference resolution
- Relation extraction
Best Practices:
- Pipeline Configuration: Choose appropriate annotators for your use case
- Memory Management: Stanford CoreNLP can be memory-intensive
- Batch Processing: Use parallel processing for large datasets
- Error Handling: Always handle potential annotation failures
- Model Selection: Choose appropriate models for your domain
Performance Tips:
- Reuse pipeline instances (they are thread-safe)
- Use appropriate annotators (don't include unnecessary ones)
- Consider batch processing for large volumes
- Monitor memory usage with large texts
- Use async processing for responsive applications
Stanford CoreNLP is production-ready and suitable for enterprise applications, research projects, and educational purposes.