Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add corenlp provider microservice #259

Merged
merged 33 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
9ff21cd
changed structure of CoreNLPProvider: now possible to add CoreNLPProv…
Laxraa Jun 12, 2023
c142648
added textprovider-json dependency
Laxraa Jun 12, 2023
775f172
implemented TextProcessorService
Laxraa Jun 12, 2023
4a724f8
added config
Laxraa Jun 13, 2023
f568d62
refactored ConfigManager
Laxraa Jun 13, 2023
6208ad9
added health check for service
Laxraa Jun 13, 2023
fb8dd82
added url encoding and changed filepath of config file
Laxraa Jun 13, 2023
5045fd5
added javadoc
Laxraa Jun 13, 2023
7c75e80
add MicroserviceChecker
Laxraa Jun 24, 2023
06bc011
removed TextProcessorFactory.java and added logger
Laxraa Jun 24, 2023
c6b82dd
add authenticated microservice requests
Laxraa Jul 1, 2023
563906f
update MicroserviceChecker
Laxraa Aug 11, 2023
c7c65bc
update version of textprovider-json
Laxraa Aug 11, 2023
0671bc7
update path of config.properties
Laxraa Aug 13, 2023
e5c0f2d
add usage of environment variables in ConfigManager
Laxraa Aug 14, 2023
a3f7411
Apply formatting changes
Laxraa Aug 14, 2023
210ae2d
remove code smells
Laxraa Aug 14, 2023
47a8ea6
Merge remote-tracking branch 'origin/feature/corenlp_provider_microse…
Laxraa Aug 14, 2023
ceb2ba2
Apply formatting changes
Laxraa Aug 14, 2023
46804a5
Fixed paths to health endpoint
dfuchss Aug 14, 2023
509da4b
Merge branch 'main' into feature/corenlp_provider_microservice
dfuchss Aug 22, 2023
fe9e55f
replace the magic strings by proper getters in the ConfigManager
Laxraa Aug 22, 2023
d0c51f7
give variable a better name in TextProcessor
Laxraa Aug 22, 2023
acce894
use Apache HttpClients and add HttpCommunicator
Laxraa Aug 23, 2023
c3f0f61
refactor HttpCommunicator
Laxraa Aug 23, 2023
719b5dd
use dependency management
Laxraa Aug 23, 2023
01c399d
use BasicHttpClientResponseHandler
Laxraa Aug 27, 2023
9a9c7bf
Format
Gram21 Aug 28, 2023
5a2e7c7
Merge branch 'main' into feature/corenlp_provider_microservice
Gram21 Aug 28, 2023
050cf82
fixed failed build
Laxraa Aug 29, 2023
ffda826
Merge branch 'main' into feature/corenlp_provider_microservice
Gram21 Aug 29, 2023
6ea791d
Update ConfigManager to Enum
Gram21 Aug 29, 2023
246f432
Adapt MicroserviceChecker
Gram21 Aug 29, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions stages/text-preprocessing/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@
<version>${stanford.corenlp.version}</version>
<classifier>models</classifier>
</dependency>
<dependency>
<groupId>io.github.ardoco</groupId>
<artifactId>text-provider-json</artifactId>
<version>0.8.0</version>
Laxraa marked this conversation as resolved.
Show resolved Hide resolved
</dependency>
<dependency>
<groupId>io.github.ardoco.core</groupId>
<artifactId>common</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,15 @@
package edu.kit.kastel.mcse.ardoco.core.text.providers.informants.corenlp;

import java.util.Map;
import java.util.Properties;

import edu.kit.kastel.mcse.ardoco.core.api.PreprocessingData;
import edu.kit.kastel.mcse.ardoco.core.api.text.NlpInformant;
import edu.kit.kastel.mcse.ardoco.core.api.text.Text;
import edu.kit.kastel.mcse.ardoco.core.common.util.DataRepositoryHelper;
import edu.kit.kastel.mcse.ardoco.core.data.DataRepository;
import edu.stanford.nlp.pipeline.CoreDocument;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.kit.kastel.mcse.ardoco.core.text.providers.informants.corenlp.textprocessor.TextProcessor;

public class CoreNLPProvider extends NlpInformant {
private static final String ANNOTATORS = "tokenize,ssplit,pos,parse,depparse,lemma"; // further: ",ner,coref"
private static final String DEPENDENCIES_ANNOTATION = "EnhancedPlusPlusDependenciesAnnotation";

private Text annotatedText;

Expand Down Expand Up @@ -56,26 +52,8 @@ public synchronized Text getAnnotatedText() {
return annotatedText;
}

private static Properties getStanfordProperties(Properties properties) {
if (properties == null) {
throw new IllegalArgumentException("Properties are null");
}
var allStanfordProperties = new Properties(properties);
allStanfordProperties.setProperty("annotators", ANNOTATORS);

allStanfordProperties.put("parse", DEPENDENCIES_ANNOTATION);
allStanfordProperties.put("depparse", DEPENDENCIES_ANNOTATION);
allStanfordProperties.put("coref.algorithm", "fastneural");

return allStanfordProperties;
}

private Text processText(String inputText) {
Properties props = getStanfordProperties(new Properties());
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
CoreDocument document = new CoreDocument(inputText);
pipeline.annotate(document);
return new TextImpl(document);
return new TextProcessor().processText(inputText);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
import edu.kit.kastel.mcse.ardoco.core.api.text.Word;
import edu.stanford.nlp.pipeline.CoreDocument;

class TextImpl implements Text {
public class TextImpl implements Text {

final CoreDocument coreDocument;
private ImmutableList<Sentence> sentences = Lists.immutable.empty();
private ImmutableList<Word> words = Lists.immutable.empty();

TextImpl(CoreDocument coreDocument) {
public TextImpl(CoreDocument coreDocument) {
this.coreDocument = coreDocument;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/* Licensed under MIT 2023. */
package edu.kit.kastel.mcse.ardoco.core.text.providers.informants.corenlp.config;

import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* This Singleton manages access to the config file.
*/
public class ConfigManager {

Logger logger = LoggerFactory.getLogger(ConfigManager.class);

private static ConfigManager instance;
private final Properties properties;
private static final String FILE_PATH = "config.properties";

private ConfigManager() {
properties = new Properties();
try (InputStream fileInputStream = ConfigManager.class.getClassLoader().getResourceAsStream(FILE_PATH);) {
properties.load(fileInputStream);
} catch (IOException e) {
logger.warn("Could not load config file. ", e);
properties.setProperty("microserviceUrl", "http://localhost:8080");
properties.setProperty("nlpProviderSource", "local");
properties.setProperty("corenlpService", "/stanfordnlp?text=");
properties.setProperty("healthService", "/stanfordnlp/health");
}
if (System.getenv("MICROSERVICE_URL") != null) {
properties.setProperty("microserviceUrl", System.getenv("MICROSERVICE_URL"));
}
if (System.getenv("NLP_PROVIDER_SOURCE") != null) {
properties.setProperty("nlpProviderSource", System.getenv("NLP_PROVIDER_SOURCE"));
}
}

public static ConfigManager getInstance() {
if (instance == null) {
instance = new ConfigManager();
}
return instance;
}

/**
* gets the value of the given key in the config file
*
* @param key the key
* @return the value
*/
public String getProperty(String key) {
return properties.getProperty(key);
}

/**
* sets the value of the given key in the config file
*
* @param key the key
* @param value the new value
*/
public void setProperty(String key, String value) {
properties.setProperty(key, value);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/* Licensed under MIT 2023. */
package edu.kit.kastel.mcse.ardoco.core.text.providers.informants.corenlp.textprocessor;

import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Base64;

import edu.kit.kastel.mcse.ardoco.core.text.providers.informants.corenlp.config.ConfigManager;

/**
* This utility class provides methods to check whether the microservice is available.
*/
public final class MicroserviceChecker {

private MicroserviceChecker() {
}

/**
* checks if the CoreNLP microservice is available and can provide its services.
*
* @return whether the microservice is available
*/
public static boolean isMicroserviceAvailable() throws IOException {
String requestUrl = ConfigManager.getInstance().getProperty("microserviceUrl") + ConfigManager.getInstance().getProperty("healthService");
Laxraa marked this conversation as resolved.
Show resolved Hide resolved

String username = System.getenv("USERNAME");
String password = System.getenv("PASSWORD");
if (username == null || password == null) {
throw new IOException("Environment variables USERNAME and PASSWORD must be set.");
}
URL url = new URL(requestUrl);
HttpURLConnection con = (HttpURLConnection) url.openConnection();
con.setRequestMethod("GET");
con.setConnectTimeout(5000); // timeout after 5 sec

// Encode the username and password
String authString = username + ":" + password;
String encodedAuthString = Base64.getEncoder().encodeToString(authString.getBytes());
String authHeaderValue = "Basic " + encodedAuthString;
con.setRequestProperty("Authorization", authHeaderValue);
int statusCode = con.getResponseCode();
con.disconnect();
return statusCode == HttpURLConnection.HTTP_OK;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/* Licensed under MIT 2023. */
package edu.kit.kastel.mcse.ardoco.core.text.providers.informants.corenlp.textprocessor;

import java.io.IOException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.kit.kastel.mcse.ardoco.core.api.text.Text;
import edu.kit.kastel.mcse.ardoco.core.text.providers.informants.corenlp.config.ConfigManager;
import io.github.ardoco.textproviderjson.error.InvalidJsonException;
import io.github.ardoco.textproviderjson.error.NotConvertableException;

/**
* This text processor processes texts using CoreNLP.
*/
public class TextProcessor {

private static final int MAX_FAILED_SERVICE_REQUESTS = 2;
Logger logger = LoggerFactory.getLogger(TextProcessor.class);

/**
* processes and annotates a given text
*
* @param inputText the input text
* @return the annotated text
*/
public Text processText(String inputText) {
boolean microserviceAvailable;
try {
microserviceAvailable = MicroserviceChecker.isMicroserviceAvailable();
} catch (IOException e) {
microserviceAvailable = false;
logger.warn("Could not check if CoreNLP microservice is available. ", e);
}
if (ConfigManager.getInstance().getProperty("nlpProviderSource").equals("microservice") && microserviceAvailable) {
int k = 0;
Laxraa marked this conversation as resolved.
Show resolved Hide resolved
while (k < MAX_FAILED_SERVICE_REQUESTS) {
try {
Text processedText = processService(inputText);
logger.info("Processed text with CoreNLP microservice.");
return processedText;
} catch (IOException e) {
k++;
logger.warn("Could not process text with CoreNLP microservice. Trying again. ", e);
} catch (NotConvertableException | InvalidJsonException e) {
logger.warn("Could not process text with CoreNLP microservice. Text not convertable. ", e);
return processLocally(inputText);
}
}
logger.warn("Could not process text with CoreNLP microservice. Processing locally instead.");
}
logger.info("Processed text locally.");
return processLocally(inputText);
}

private Text processLocally(String inputText) {
return new TextProcessorLocal().processText(inputText);
}

private Text processService(String inputText) throws IOException, NotConvertableException, InvalidJsonException {
return new TextProcessorService().processText(inputText);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/* Licensed under MIT 2023. */
package edu.kit.kastel.mcse.ardoco.core.text.providers.informants.corenlp.textprocessor;

import java.util.Properties;

import edu.kit.kastel.mcse.ardoco.core.api.text.Text;
import edu.kit.kastel.mcse.ardoco.core.text.providers.informants.corenlp.TextImpl;
import edu.stanford.nlp.pipeline.CoreDocument;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;

/**
* This text processor processes texts locally using CoreNLP.
*/
public class TextProcessorLocal {
private static final String ANNOTATORS = "tokenize,ssplit,pos,parse,depparse,lemma"; // further: ",ner,coref"
private static final String DEPENDENCIES_ANNOTATION = "EnhancedPlusPlusDependenciesAnnotation";

/**
* processes and annotates a given text locally using CoreNLP.
*
* @param inputText the input text
* @return the annotated text
*/
public Text processText(String inputText) {
Properties props = getStanfordProperties(new Properties());
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
CoreDocument document = new CoreDocument(inputText);
pipeline.annotate(document);
return new TextImpl(document);
}

private static Properties getStanfordProperties(Properties properties) {
if (properties == null) {
throw new IllegalArgumentException("Properties are null");
}
var allStanfordProperties = new Properties(properties);
allStanfordProperties.setProperty("annotators", ANNOTATORS);

allStanfordProperties.put("parse", DEPENDENCIES_ANNOTATION);
allStanfordProperties.put("depparse", DEPENDENCIES_ANNOTATION);
allStanfordProperties.put("coref.algorithm", "fastneural");

return allStanfordProperties;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/* Licensed under MIT 2023. */
package edu.kit.kastel.mcse.ardoco.core.text.providers.informants.corenlp.textprocessor;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Base64;

import edu.kit.kastel.mcse.ardoco.core.api.text.Text;
import edu.kit.kastel.mcse.ardoco.core.text.providers.informants.corenlp.config.ConfigManager;
import io.github.ardoco.textproviderjson.converter.DtoToObjectConverter;
import io.github.ardoco.textproviderjson.converter.JsonConverter;
import io.github.ardoco.textproviderjson.dto.TextDto;
import io.github.ardoco.textproviderjson.error.InvalidJsonException;
import io.github.ardoco.textproviderjson.error.NotConvertableException;

/**
* This text processor processes texts by sending requests to a microservice, which provides text processing using CoreNLP.
*/
public class TextProcessorService {

/**
* processes and annotates a given text by sending requests to a microservice
*
* @param inputText the input text
* @return the annotated text
*/
public Text processText(String inputText) throws IOException, InvalidJsonException, NotConvertableException {
TextDto textDto;
String jsonText = sendCorenlpRequest(inputText);
textDto = JsonConverter.fromJsonString(jsonText);
return new DtoToObjectConverter().convertText(textDto);
}

private String sendCorenlpRequest(String inputText) throws IOException {
inputText = URLEncoder.encode(inputText, StandardCharsets.UTF_8);
String requestUrl = ConfigManager.getInstance().getProperty("microserviceUrl") + ConfigManager.getInstance().getProperty("corenlpService") + inputText;
return sendAuthenticatedGetRequest(requestUrl);
}

public String sendAuthenticatedGetRequest(String requestUrl) throws IOException {
String username = System.getenv("USERNAME");
String password = System.getenv("PASSWORD");
if (username == null || password == null) {
throw new IOException("Environment variables USERNAME and PASSWORD must be set.");
}
URL url = new URL(requestUrl);
Laxraa marked this conversation as resolved.
Show resolved Hide resolved
HttpURLConnection con = (HttpURLConnection) url.openConnection();
con.setRequestMethod("GET");

// Encode the username and password
String authString = username + ":" + password;
String encodedAuthString = Base64.getEncoder().encodeToString(authString.getBytes());
String authHeaderValue = "Basic " + encodedAuthString;
con.setRequestProperty("Authorization", authHeaderValue);

String content = readGetResponse(con);
con.disconnect();
return content;
}

private String readGetResponse(HttpURLConnection con) throws IOException {
if (con.getResponseCode() != HttpURLConnection.HTTP_OK) {
throw new IOException("HTTP error code: " + con.getResponseCode());
}
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
String inputLine;
StringBuilder content = new StringBuilder();
while ((inputLine = in.readLine()) != null) {
content.append(inputLine);
}
in.close();
return content.toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
nlpProviderSource=microservice
microserviceUrl= http://localhost:8080
corenlpService=/stanfordnlp?text=
healthService=/stanfordnlp/health