diff --git a/README.md b/README.md index 94db231..f670a24 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ # What is Indra? -Indra is an efficient library and service to deliver word-embeddings and semantic relatedness to real-world applications in the domains of machine learning and natural language processing. It offers 60+ pre-build models in 14 languages and several model algorithms and corpora. +Indra is an efficient library and service to deliver word-embeddings and semantic relatedness to real-world applications in the domains of machine learning and natural language processing. It offers 60+ pre-build models in 15 languages and several model algorithms and corpora. Indra is powered by [spotify-annoy](https://github.com/spotify/annoy) delivering an efficient [approximate nearest neighbors](http://en.wikipedia.org/wiki/Nearest_neighbor_search#Approximate_nearest_neighbor) function. diff --git a/indra-core/src/main/java/org/lambda3/indra/core/annoy/AnnoyVectorSpace.java b/indra-core/src/main/java/org/lambda3/indra/core/annoy/AnnoyVectorSpace.java index 9eace9a..c35cb9b 100644 --- a/indra-core/src/main/java/org/lambda3/indra/core/annoy/AnnoyVectorSpace.java +++ b/indra-core/src/main/java/org/lambda3/indra/core/annoy/AnnoyVectorSpace.java @@ -86,10 +86,10 @@ private void loadMappings() { String line; while ((line = reader.readLine()) != null) { - String[] parts = line.split(Pattern.quote("|")); - int id = Integer.parseInt(parts[0]); - this.idToWord[id] = parts[1]; - this.wordToId.put(parts[1], id); + int i = line.indexOf('|'); + int id = Integer.parseInt(line.substring(0, i)); + this.idToWord[id] = line.substring(i + 1); + this.wordToId.put(this.idToWord[id], id); } } catch (IOException e) { String msg = String.format("errors when loading mappings. BASEDIR=%s | MAPPING_GILE=%s", dataDir, WORD_MAPPING_FILE);