diff --git a/src/main/java/org/grobid/core/engines/QuantityParser.java b/src/main/java/org/grobid/core/engines/QuantityParser.java index 7d3c9d43..792fcdb5 100644 --- a/src/main/java/org/grobid/core/engines/QuantityParser.java +++ b/src/main/java/org/grobid/core/engines/QuantityParser.java @@ -795,11 +795,11 @@ private static void populateRawOffsetsAndText(Measurement m, List t m.setRawString(measurementRawOffsetsAndText.getRight().replace("\n", " ")); } - private OffsetPosition findSentenceOffset(List sentences, OffsetPosition offsets) { - return findSentenceOffset(sentences, offsets); + protected OffsetPosition findSentenceOffset(List sentences, OffsetPosition offsets) { + return findSentenceOffset(sentences, offsets, 0); } - private OffsetPosition findSentenceOffset(List sentences, OffsetPosition currentMeasureOffset, int firstTokenOffset) { + protected OffsetPosition findSentenceOffset(List sentences, OffsetPosition currentMeasureOffset, int firstTokenOffset) { List sentencesCurrentMeasure = sentences.stream() .filter(sop -> sop.start <= currentMeasureOffset.start - firstTokenOffset && sop.end > currentMeasureOffset.end - firstTokenOffset) @@ -818,11 +818,11 @@ private OffsetPosition findSentenceOffset(List sentences, Offset } } - private OffsetPosition findSentenceOffset(List sentences, Measurement measurement) { + protected OffsetPosition findSentenceOffset(List sentences, Measurement measurement) { return findSentenceOffset(sentences, measurement, 0); } - private OffsetPosition findSentenceOffset(List sentences, Measurement measurement, int firstTokenOffset) { + protected OffsetPosition findSentenceOffset(List sentences, Measurement measurement, int firstTokenOffset) { OffsetPosition currentMeasureOffset = measurementOperations.calculateExtremitiesOffsets(measurement); return findSentenceOffset(sentences, currentMeasureOffset, firstTokenOffset); diff --git a/src/test/kotlin/org/grobid/core/engines/QuantitiesEngineTest.kt b/src/test/kotlin/org/grobid/core/engines/QuantitiesEngineTest.kt index f2f8cd27..c4533ecc 100644 --- a/src/test/kotlin/org/grobid/core/engines/QuantitiesEngineTest.kt +++ b/src/test/kotlin/org/grobid/core/engines/QuantitiesEngineTest.kt @@ -10,11 +10,18 @@ class QuantitiesEngineTest { @Test fun normaliseAndCleanup_shouldReplaceToken() { - val tokens = QuantityAnalyzer.getInstance().tokenizeWithLayoutToken("This \uF0A0 is an interesting") + val tokens = QuantityAnalyzer.getInstance().tokenizeWithLayoutToken("This \uF0A0 is an interesting") val tokensNormalised = QuantitiesEngine.normaliseAndCleanup(tokens) - - assertThat(tokensNormalised, hasSize(tokens.size)) - assertThat(tokensNormalised[2].text, `is`(" ")) + + assertThat(tokensNormalised, hasSize(tokens.size - 2)) + assertThat(tokensNormalised[1].text, `is`(" ")) + assertThat(tokensNormalised[2].text, `is`("is")) + assertThat(tokensNormalised[0].offset, `is`(0)) + assertThat(tokensNormalised[1].offset, `is`(4)) + assertThat(tokensNormalised[2].offset, `is`(5)) + assertThat(tokensNormalised[3].offset, `is`(7)) + assertThat(tokensNormalised[4].offset, `is`(8)) + assertThat(tokensNormalised[5].offset, `is`(10)) } @Test diff --git a/src/test/kotlin/org/grobid/core/engines/QuantityParserTest.kt b/src/test/kotlin/org/grobid/core/engines/QuantityParserTest.kt new file mode 100644 index 00000000..5e0f12ae --- /dev/null +++ b/src/test/kotlin/org/grobid/core/engines/QuantityParserTest.kt @@ -0,0 +1,90 @@ +package org.grobid.core.engines + +import org.grobid.core.GrobidModels +import org.grobid.core.utilities.GrobidConfig.ModelParameters +import org.grobid.core.utilities.GrobidProperties +import org.grobid.core.utilities.OffsetPosition +import org.hamcrest.MatcherAssert.assertThat +import org.hamcrest.Matchers.`is` +import org.junit.jupiter.api.AfterEach +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.Test +import java.util.* + +class QuantityParserTest { + private var target: QuantityParser? = null + + @BeforeEach + fun setUp() { + target = QuantityParser(GrobidModels.DUMMY, null, null, null) + } + + @AfterEach + fun tearDown() { + } + + + @Test + fun testGetSentence_entityInsideSentence() { + + val sentences: List = listOf( + OffsetPosition(0, 3), + OffsetPosition(4, 6), + OffsetPosition(7, 10) + ) + + val entityPosition = OffsetPosition(1, 2) + + val foundSentence = target?.findSentenceOffset(sentences, entityPosition) + + assertThat(foundSentence, `is`(sentences[0])) + } + + + @Test + fun testGetSentence_entityBetweenSentences() { + + val sentences: List = listOf( + OffsetPosition(0, 3), + OffsetPosition(4, 6), + OffsetPosition(7, 10) + ) + + val entityPosition = OffsetPosition(2, 5) + + val foundSentence = target?.findSentenceOffset(sentences, entityPosition) + + assertThat(foundSentence, `is`(OffsetPosition(0, 10))) + } + + @Test + fun testGetSentence_entityIncludingASentence() { + + val sentences: List = listOf( + OffsetPosition(0, 3), + OffsetPosition(4, 6), + OffsetPosition(7, 10) + ) + + val entityPosition = OffsetPosition(2, 8) + + val foundSentence = target?.findSentenceOffset(sentences, entityPosition) + + assertThat(foundSentence, `is`(OffsetPosition(0, 10))) + } + + companion object { + @JvmStatic + @BeforeAll + @Throws(Exception::class) + fun before(): Unit { + val modelParameters = ModelParameters() + modelParameters.name = "bao" + GrobidProperties.addModel(modelParameters) + GrobidProperties.getInstance() + } + } + + +} \ No newline at end of file