Skip to content

Commit

Permalink
fix long strings
Browse files Browse the repository at this point in the history
  • Loading branch information
eroux committed Sep 21, 2017
1 parent 56b8ae1 commit 109aa79
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/main/java/io/bdrc/lucene/bo/TibEwtsFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ public int read() throws IOException {
}
buffer.freeBefore(inputOff);
if (replacement == null || replacement.isEmpty()) {
replacementLen = -1;
return -1;
}
int diff = (inputOff - initialInputOff) - replacement.length();
Expand Down
5 changes: 5 additions & 0 deletions src/test/java/io/bdrc/lucene/bo/TibetanAnalyzerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,11 @@ public void ewtsFilterTest() throws IOException
System.out.print(input + " => ");
TokenStream res = tokenize(new TibEwtsFilter(reader), new TibSyllableTokenizer());
assertTokenStream(res, expected);
// long string, provoked a bug
input = "de'i sprul sku yi ngogs chos rje dge 'dun mkhas grub ni mkhan chen dge 'dun rgya mtsho'i gyi sku tshar chu khyi (1742) lor 'khrungs/ rje de nyid las bcu gsum pa la rab tu byung/ dgon chen du mdo sngags la sbyangs/ rig gnas thams cad la mkhas/ nyer gcig pa chu sprel la smon lam rab 'byams pa mdzad/ kun mkhyen bar mas mgo 'dren mdzad de lcang skya rin po chen nas chos rje'i cho lo gnang/ mkhan chen gshegs par dngul srang stong dang nyis brgyas mchod rten bzhengs/ lcags byi lor rgyud khrir bzhugs/ bde mchog yi dam mdzad/ gsung rtsom yang 'ga' zhig snang/ bdun cu pa lcags byi (mdo smad chos 'byung du bdun cu pa lcags byi lor gshegs pa zer ba lo grangs dang lo snying thod mi thug pa dpyad gzhi ru sor bzhag byas pa) lor gshegs/ de'i sprul sku dge 'dun yon tan rgya mtsho chos srid kyi mkhyen rgya che zhing rgyud pa'i khri mdzad/ de'i sprul sku yi ngogs nas 'khrungs pa dkon mchog rgyal mtshan da lta bzhugs";
reader = new StringReader(input);
res = tokenize(new TibEwtsFilter(reader), new TibSyllableTokenizer());
while (res.incrementToken()) {} // with trigger the exception in case of a bug
}

@Test
Expand Down

0 comments on commit 109aa79

Please sign in to comment.