Skip to content

Commit

Permalink
add ewts option, doc
Browse files Browse the repository at this point in the history
  • Loading branch information
eroux committed Sep 20, 2017
1 parent e2b2f26 commit 13a5eec
Showing 1 changed file with 14 additions and 7 deletions.
21 changes: 14 additions & 7 deletions src/main/java/io/bdrc/lucene/bo/TibetanAnalyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,31 +58,38 @@ public final class TibetanAnalyzer extends Analyzer {
boolean segmentInWords = false;
boolean lemmatize = false;
boolean filterChars = false;
boolean fromEwts = false;

/**
* Creates a new {@link TibetanAnalyzer}
*
* @param segmentInWords if the segmentation is on words instead of syllables
* @param lemmatize if the analyzer should remove affixed particles, and normalize words in words mode
* @param filterChars if the text should be converted to NFD (necessary for texts containing NFC strings)
* @param fromEwts if the text should be converted from EWTS to Unicode
*/
public TibetanAnalyzer(boolean segmentInWords, boolean lemmatize, boolean filterChars) {
public TibetanAnalyzer(boolean segmentInWords, boolean lemmatize, boolean filterChars, boolean fromEwts) {
this.segmentInWords = segmentInWords;
this.lemmatize = lemmatize;
this.filterChars = filterChars;
this.fromEwts = fromEwts;
}

/**
* Creates a new {@link TibetanAnalyzer} with the default values
*/
public TibetanAnalyzer() {
this(true, true, true);
this(true, true, true, false);
}

@Override
protected Reader initReader(String fieldName, Reader reader) {
if (filterChars) {
TibCharFilter charFilter = new TibCharFilter(reader);
return super.initReader(fieldName, charFilter);
} else {
return super.initReader(fieldName, reader);
if (this.fromEwts) {
reader = new TibEwtsFilter(reader);
} else if (filterChars) { // filterChars is never needed after ewts translation
reader = new TibCharFilter(reader);
}
return super.initReader(fieldName, reader);
}

@Override
Expand Down

0 comments on commit 13a5eec

Please sign in to comment.