From f23e4d396640406d81f3cc07f0d433c0986d15d9 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 29 Mar 2024 07:37:34 +0900 Subject: [PATCH] add more tests --- .../engines/utilities/LabellingUtilsTest.kt | 93 +++++++++---------- 1 file changed, 44 insertions(+), 49 deletions(-) diff --git a/src/test/kotlin/org/grobid/core/engines/utilities/LabellingUtilsTest.kt b/src/test/kotlin/org/grobid/core/engines/utilities/LabellingUtilsTest.kt index 2d363b5e..b5fab41e 100644 --- a/src/test/kotlin/org/grobid/core/engines/utilities/LabellingUtilsTest.kt +++ b/src/test/kotlin/org/grobid/core/engines/utilities/LabellingUtilsTest.kt @@ -96,66 +96,61 @@ class LabellingUtilsTest { } @Test - fun testCorrectingRangeValues() { + fun testCorrectingRangeValues1() { val input = "70\t70\t7\t70\t70\t70\t0\t70\t70\t70\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + "±\t±\t±\t±\t±\t±\t±\t±\t±\t±\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t±\t±\t0\t0\t\n" + "9\t9\t9\t9\t9\t9\t9\t9\t9\t9\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + - "kg\tkg\tk\tkg\tkg\tkg\tg\tkg\tkg\tkg\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "15\t15\t1\t15\t15\t15\t5\t15\t15\t15\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + - "±\t±\t±\t±\t±\t±\t±\t±\t±\t±\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t±\t±\t0\t0\t\n" + - "5\t5\t5\t5\t5\t5\t5\t5\t5\t5\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + - "%\t%\t%\t%\t%\t%\t%\t%\t%\t%\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t%\t%\t0\t0\t\n" + - "of\tof\to\tof\tof\tof\tf\tof\tof\tof\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + - "fat\tfat\tf\tfa\tfat\tfat\tt\tat\tfat\tfat\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t1\t0\t\n" + - "mass\tmass\tm\tma\tmas\tmass\ts\tss\tass\tmass\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t1\t0\t\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "VO\tvo\tV\tVO\tVO\tVO\tO\tVO\tVO\tVO\tALLCAPS\tNODIGIT\t0\tNOPUNCT\tXX\tX\t0\t0\t\n" + - "2\t2\t2\t2\t2\t2\t2\t2\t2\t2\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + - "max\tmax\tm\tma\tmax\tmax\tx\tax\tmax\tmax\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - ":\t:\t:\t:\t:\t:\t:\t:\t:\t:\tALLCAPS\tNODIGIT\t1\tPUNCT\t:\t:\t0\t0\t\n" + - "50\t50\t5\t50\t50\t50\t0\t50\t50\t50\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + + "kg\tkg\tk\tkg\tkg\tkg\tg\tkg\tkg\tkg\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t" + + var output = LabellingUtils.correctLabelling(input) + + assertEquals(output, input) + } + + @Test + fun testCorrectingRangeValues2() { + val input = "70\t70\t7\t70\t70\t70\t0\t70\t70\t70\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + "±\t±\t±\t±\t±\t±\t±\t±\t±\t±\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t±\t±\t0\t0\t\n" + - "8\t8\t8\t8\t8\t8\t8\t8\t8\t8\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + - "ml\tml\tm\tml\tml\tml\tl\tml\tml\tml\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + - "•\t•\t•\t•\t•\t•\t•\t•\t•\t•\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t•\t•\t0\t0\t\n" + + "9\t9\t9\t9\t9\t9\t9\t9\t9\t9\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + + "9\t9\t9\t9\t9\t9\t9\t9\t9\t9\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + + "9\t9\t9\t9\t9\t9\t9\t9\t9\t9\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + "kg\tkg\tk\tkg\tkg\tkg\tg\tkg\tkg\tkg\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + - "-\t-\t-\t-\t-\t-\t-\t-\t-\t-\tALLCAPS\tNODIGIT\t1\tHYPHEN\t-\t-\t0\t0\t\n" + - "1\t1\t1\t1\t1\t1\t1\t1\t1\t1\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + - "•\t•\t•\t•\t•\t•\t•\t•\t•\t•\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t•\t•\t0\t0\t\n" + - "min\tmin\tm\tmi\tmin\tmin\tn\tin\tmin\tmin\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t1\t0\t\n" + - "-\t-\t-\t-\t-\t-\t-\t-\t-\t-\tALLCAPS\tNODIGIT\t1\tHYPHEN\t-\t-\t0\t0\t\n" + - "1\t1\t1\t1\t1\t1\t1\t1\t1\t1\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + - "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "21\t21\t2\t21\t21\t21\t1\t21\t21\t21\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + - "of\tof\to\tof\tof\tof\tf\tof\tof\tof\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + - "race\trace\tr\tra\trac\trace\te\tce\tace\trace\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "A\ta\tA\tA\tA\tA\tA\tA\tA\tA\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t1\t0\t\n" + - "(\t(\t(\t(\t(\t(\t(\t(\t(\t(\tALLCAPS\tNODIGIT\t1\tOPENBRACKET\t(\t(\t0\t0\t\n" + - "6\t6\t6\t6\t6\t6\t6\t6\t6\t6\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + - "women\twomen\tw\two\twom\twome\tn\ten\tmen\tomen\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "15\t15\t1\t15\t15\t15\t5\t15\t15\t15\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + - "men\tmen\tm\tme\tmen\tmen\tn\ten\tmen\tmen\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "40\t40\t4\t40\t40\t40\t0\t40\t40\t40\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + - "±\t±\t±\t±\t±\t±\t±\t±\t±\t±\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t±\t±\t0\t0\t\n" + - "7\t7\t7\t7\t7\t7\t7\t7\t7\t7\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + - "years\tyears\ty\tye\tyea\tyear\ts\trs\tars\tears\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t1\t0\t\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "176\t176\t1\t17\t176\t176\t6\t76\t176\t176\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tddd\td\t0\t0\tI-\n" + + ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t" + + var output = LabellingUtils.correctLabelling(input) + + assertEquals(output, input) + } + + @Test + fun testCorrectingRangeValues3() { + val input = "70\t70\t7\t70\t70\t70\t0\t70\t70\t70\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + "±\t±\t±\t±\t±\t±\t±\t±\t±\t±\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t±\t±\t0\t0\t\n" + - "7\t7\t7\t7\t7\t7\t7\t7\t7\t7\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + - "cm\tcm\tc\tcm\tcm\tcm\tm\tcm\tcm\tcm\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "72\t72\t7\t72\t72\t72\t2\t72\t72\t72\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + + "9\t9\t9\t9\t9\t9\t9\t9\t9\t9\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + + "9\t9\t9\t9\t9\t9\t9\t9\t9\t9\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + + "9\t9\t9\t9\t9\t9\t9\t9\t9\t9\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + + "kg\tkg\tk\tkg\tkg\tkg\tg\tkg\tkg\tkg\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + + ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t" + + var output = LabellingUtils.correctLabelling(input) + + assertEquals(output, input) + } + + @Test + fun testCorrectingRangeValues4() { + val input = "70\t70\t7\t70\t70\t70\t0\t70\t70\t70\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + "±\t±\t±\t±\t±\t±\t±\t±\t±\t±\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t±\t±\t0\t0\t\n" + - "10\t10\t1\t10\t10\t10\t0\t10\t10\t10\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\t\n" + - "kg\tkg\tk\tkg\tkg\tkg\tg\tkg\tkg\tkg\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t" + "9\t9\t9\t9\t9\t9\t9\t9\t9\t9\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + + "9\t9\t9\t9\t9\t9\t9\t9\t9\t9\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + + "9\t9\t9\t9\t9\t9\t9\t9\t9\t9\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + + "kg\tkg\tk\tkg\tkg\tkg\tg\tkg\tkg\tkg\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\tI-\n" + + ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t" var output = LabellingUtils.correctLabelling(input) assertEquals(output, input) } + } \ No newline at end of file