diff --git a/CHANGES.xml b/CHANGES.xml index 2a59800b..35ee92fa 100644 --- a/CHANGES.xml +++ b/CHANGES.xml @@ -7,18 +7,18 @@ - - + New DOMPreserveTransformer. Maven dependency updates: norconex-commons-maven-parent 1.0.2-SNAPSHOT. - + + Fix RegexTagger not picking up XML-configured "fieldMatcher". + - diff --git a/src/main/java/com/norconex/importer/handler/tagger/impl/RegexTagger.java b/src/main/java/com/norconex/importer/handler/tagger/impl/RegexTagger.java index 93788163..b0d3b28f 100644 --- a/src/main/java/com/norconex/importer/handler/tagger/impl/RegexTagger.java +++ b/src/main/java/com/norconex/importer/handler/tagger/impl/RegexTagger.java @@ -198,6 +198,7 @@ public void setFieldMatcher(TextMatcher fieldMatcher) { @Override protected void loadStringTaggerFromXML(XML xml) { + fieldMatcher.loadFromXML(xml.getXML("fieldMatcher")); List nodes = xml.getXMLList("pattern"); for (XML node : nodes) { node.checkDeprecated("@caseSensitive", "ignoreCase", true); @@ -209,6 +210,7 @@ protected void loadStringTaggerFromXML(XML xml) { @Override protected void saveStringTaggerToXML(XML xml) { + xml.addElement("fieldMatcher", fieldMatcher); for (RegexFieldValueExtractor rfe : patterns) { rfe.saveToXML(xml.addElement("pattern")); } diff --git a/src/test/java/com/norconex/importer/handler/tagger/impl/RegexTaggerTest.java b/src/test/java/com/norconex/importer/handler/tagger/impl/RegexTaggerTest.java index 3fd88360..092112a5 100644 --- a/src/test/java/com/norconex/importer/handler/tagger/impl/RegexTaggerTest.java +++ b/src/test/java/com/norconex/importer/handler/tagger/impl/RegexTaggerTest.java @@ -89,8 +89,9 @@ public void testExtractFirst100ContentChars() } @Test - public void testWriteRead() { + public void testWriteRead() { RegexTagger tagger = new RegexTagger(); + tagger.getFieldMatcher().setPattern("(.*)(crawler)").partial(); tagger.addPattern("field1", "123.*890"); tagger.addPattern("field2", "abc.*xyz", 3); tagger.addPattern(new RegexFieldValueExtractor("blah")