diff --git a/src/main/java/io/bdrc/lucene/bo/TibEwtsFilter.java b/src/main/java/io/bdrc/lucene/bo/TibEwtsFilter.java index b0def3a..ccc1bbe 100644 --- a/src/main/java/io/bdrc/lucene/bo/TibEwtsFilter.java +++ b/src/main/java/io/bdrc/lucene/bo/TibEwtsFilter.java @@ -64,6 +64,7 @@ public int read() throws IOException { } buffer.freeBefore(inputOff); if (replacement == null || replacement.isEmpty()) { + replacementLen = -1; return -1; } int diff = (inputOff - initialInputOff) - replacement.length(); diff --git a/src/test/java/io/bdrc/lucene/bo/TibetanAnalyzerTest.java b/src/test/java/io/bdrc/lucene/bo/TibetanAnalyzerTest.java index aa153a2..92c43b5 100644 --- a/src/test/java/io/bdrc/lucene/bo/TibetanAnalyzerTest.java +++ b/src/test/java/io/bdrc/lucene/bo/TibetanAnalyzerTest.java @@ -166,6 +166,11 @@ public void ewtsFilterTest() throws IOException System.out.print(input + " => "); TokenStream res = tokenize(new TibEwtsFilter(reader), new TibSyllableTokenizer()); assertTokenStream(res, expected); + // long string, provoked a bug + input = "de'i sprul sku yi ngogs chos rje dge 'dun mkhas grub ni mkhan chen dge 'dun rgya mtsho'i gyi sku tshar chu khyi (1742) lor 'khrungs/ rje de nyid las bcu gsum pa la rab tu byung/ dgon chen du mdo sngags la sbyangs/ rig gnas thams cad la mkhas/ nyer gcig pa chu sprel la smon lam rab 'byams pa mdzad/ kun mkhyen bar mas mgo 'dren mdzad de lcang skya rin po chen nas chos rje'i cho lo gnang/ mkhan chen gshegs par dngul srang stong dang nyis brgyas mchod rten bzhengs/ lcags byi lor rgyud khrir bzhugs/ bde mchog yi dam mdzad/ gsung rtsom yang 'ga' zhig snang/ bdun cu pa lcags byi (mdo smad chos 'byung du bdun cu pa lcags byi lor gshegs pa zer ba lo grangs dang lo snying thod mi thug pa dpyad gzhi ru sor bzhag byas pa) lor gshegs/ de'i sprul sku dge 'dun yon tan rgya mtsho chos srid kyi mkhyen rgya che zhing rgyud pa'i khri mdzad/ de'i sprul sku yi ngogs nas 'khrungs pa dkon mchog rgyal mtshan da lta bzhugs"; + reader = new StringReader(input); + res = tokenize(new TibEwtsFilter(reader), new TibSyllableTokenizer()); + while (res.incrementToken()) {} // with trigger the exception in case of a bug } @Test