From f552f2d4de965e61c0f0ad762041124bf5c13a12 Mon Sep 17 00:00:00 2001 From: Roman Chyla Date: Thu, 17 Feb 2022 14:11:40 -0500 Subject: [PATCH] feat: use proper generics --- .../author/AuthorQueryVariations.java | 342 +++++++++--------- .../solr/analysis/author/AuthorUtils.java | 10 +- .../flexible/aqp/TestAqpExtendedLGSimple.java | 6 +- 3 files changed, 174 insertions(+), 184 deletions(-) diff --git a/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorQueryVariations.java b/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorQueryVariations.java index 9fa001d11..11cc5b33f 100644 --- a/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorQueryVariations.java +++ b/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorQueryVariations.java @@ -1,191 +1,181 @@ package org.apache.solr.analysis.author; - import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * Creates a variations of the author names, it receives eg: - * John, K + * Creates a variations of the author names, it receives eg: John, K */ public class AuthorQueryVariations { - public static final Logger log = LoggerFactory.getLogger(AuthorQueryVariations.class); - - /** - * This method takes input string, e.g. "Hector, Gomez Q" and generates variations - * of the author name. - * - *
-   *  HECTOR, GOMEZ
-   *	HECTOR, G
-   *  HECTOR,
-   *  HECTOR, GOMEZ Q*
-   *  HECTOR, G Q*
-   * 
- * - * It is essentially the same output as you get from getSynonymVariations except - * a few special cases. These special cases are variations needed for querying the - * index of author names, but not needed or wanted for the process of transforming - * the curated synonyms - *

- * Example "h quintero gomez" will output: - *

-   * GOMEZ, H\w* QUINTERO\b.*
-   * GOMEZ,
-   * GOMEZ, H\w*
-   * GOMEZ, H\w* Q\b.*   <-- only this one is extra added
-   * 
- * - * @param authorString - * author name - * @return map with string mappings - */ - public static HashSet getQueryVariationsInclRegex(String authorString) { - - HashMap parsedAuthor = null; - parsedAuthor = AuthorUtils.parseAuthor(authorString); - - HashSet variations = new LinkedHashSet(); - if (parsedAuthor == null) { - variations.add(authorString); - return variations; - } - generateNameVariations(parsedAuthor, variations); - return variations; - } - - public static HashSet generateNameVariations(HashMap parsedAuthor) { - HashSet variations = new LinkedHashSet(); - return generateNameVariations(parsedAuthor, variations); - } - - protected static HashSet generateNameVariations( - HashMap parsedAuthor, - HashSet variations) { - - // get the base variations - generateSynonymVariations(parsedAuthor, variations); - - // add the variations that are needed only for the query phase - String last = parsedAuthor.get("last"); - String first = parsedAuthor.get("first"); - String middle = parsedAuthor.get("middle"); - - if (first != null) { - if (middle != null) { - if (first.length() > 1) { - if (middle.length() > 1) { - variations.add(last + ", " + first + " " + middle.substring(0,1) + "\\b.*"); - } else { - variations.add(last + ", " + first + " " + middle + ".*"); - variations.add(last + ", " + first.substring(0,1) + " " + middle + ".*"); - } - } else { - if (middle.length() > 1) { - variations.add(last + ", " + first + "\\w* " + middle.substring(0,1) + "\\b.*"); - } - } - } else { - if (first.length() > 1) { - variations.add(last + ", " + first.substring(0,1) + "\\b.*"); - } - } - } - - return variations; - } - - - - - /** - * This method takes input string, e.g. "Hector, Gomez Q" and generates variations - * of the author name PLUS enhances the variations with regular expression patterns. - * - * The process that transforms the curated synonyms uses *only* the variations - * generated here. This limited set is also included in the variations used at query - * time but DON'T ADD THINGS HERE that are only necessary for the query phase--use - * getNameVariations for that - * - * HECTOR, GOMEZ - * HECTOR, G - * HECTOR, - * HECTOR, GOMEZ Q* - * HECTOR, G Q* - * - * - * @param authorString - * name in the natural form - * @return map with string mappings - */ - public static HashSet getQueryVariations(String authorString) { - - HashMap parsedAuthor = null; - parsedAuthor = AuthorUtils.parseAuthor(authorString); - - HashSet variations = new LinkedHashSet(); - if (parsedAuthor == null) { - variations.add(authorString); - return variations; - } - return generateSynonymVariations(parsedAuthor, variations); - } - - public static HashSet generateSynonymVariations(HashMap parsedAuthor) { - HashSet variations = new LinkedHashSet(); - return generateSynonymVariations(parsedAuthor, variations); - } - - protected static HashSet generateSynonymVariations( - HashMap parsedAuthor, - HashSet variations) { - - String last = parsedAuthor.get("Last"); - String first = parsedAuthor.get("First"); - String middle = parsedAuthor.get("Middle"); - - if (parsedAuthor.size() == 1 && last != null) { - variations.add(String.format("%s,.*", last)); // all we got was last name - } else { - variations.add(String.format("%s,", last)); - } - - if (first != null) { - if (middle != null) { - if (first.length() > 1) { - variations.add(last + ", " + first); - variations.add(last + ", " + first.substring(0,1)); - if (middle.length() > 1) { - variations.add(last + ", " + first + " " + middle + "\\b.*"); - variations.add(last + ", " + first.substring(0,1) + " " + middle.substring(0,1) + "\\b.*"); - } else if (middle.length() == 1) { - // variations.add(last + ", " + first.substring(0,1) + " " + middle + ".*"); - // variations.add(last + ", " + first + " " + middle + ".*"); - } - } else { - variations.add(last + ", " + first + "\\w*"); - if (middle.length() > 1) { - variations.add(last + ", " + first + "\\w* " + middle + "\\b.*"); - } else if (middle.length() == 1) { - variations.add(last + ", " + first + "\\w* " + middle + ".*"); - } - } - } else { - if (first.length() > 1) { - variations.add(last + ", " + first + "\\b.*"); - variations.add(last + ", " + first.substring(0,1)); - } else if (first.length() == 1) { - variations.add(last + ", " + first + ".*"); - } - } - } - - return variations; - } + public static final Logger log = LoggerFactory.getLogger(AuthorQueryVariations.class); + + /** + * This method takes input string, e.g. "Hector, Gomez Q" and generates + * variations of the author name. + * + *
+	 *  HECTOR, GOMEZ
+	 *	HECTOR, G
+	 *  HECTOR,
+	 *  HECTOR, GOMEZ Q*
+	 *  HECTOR, G Q*
+	 * 
+ * + * It is essentially the same output as you get from getSynonymVariations except + * a few special cases. These special cases are variations needed for querying + * the index of author names, but not needed or wanted for the process of + * transforming the curated synonyms + *

+ * Example "h quintero gomez" will output: + * + *

+	 * GOMEZ, H\w* QUINTERO\b.*
+	 * GOMEZ,
+	 * GOMEZ, H\w*
+	 * GOMEZ, H\w* Q\b.*   <-- only this one is extra added
+	 * 
+ * + * @param authorString author name + * @return map with string mappings + */ + public static HashSet getQueryVariationsInclRegex(String authorString) { + + Map parsedAuthor = null; + parsedAuthor = AuthorUtils.parseAuthor(authorString); + + HashSet variations = new LinkedHashSet(); + if (parsedAuthor == null) { + variations.add(authorString); + return variations; + } + generateNameVariations(parsedAuthor, variations); + return variations; + } + + public static Set generateNameVariations(HashMap parsedAuthor) { + HashSet variations = new LinkedHashSet(); + return generateNameVariations(parsedAuthor, variations); + } + + protected static Set generateNameVariations(Map parsedAuthor, Set variations) { + + // get the base variations + generateSynonymVariations(parsedAuthor, variations); + + // add the variations that are needed only for the query phase + String last = parsedAuthor.get("last"); + String first = parsedAuthor.get("first"); + String middle = parsedAuthor.get("middle"); + + if (first != null) { + if (middle != null) { + if (first.length() > 1) { + if (middle.length() > 1) { + variations.add(last + ", " + first + " " + middle.substring(0, 1) + "\\b.*"); + } else { + variations.add(last + ", " + first + " " + middle + ".*"); + variations.add(last + ", " + first.substring(0, 1) + " " + middle + ".*"); + } + } else { + if (middle.length() > 1) { + variations.add(last + ", " + first + "\\w* " + middle.substring(0, 1) + "\\b.*"); + } + } + } else { + if (first.length() > 1) { + variations.add(last + ", " + first.substring(0, 1) + "\\b.*"); + } + } + } + + return variations; + } + + /** + * This method takes input string, e.g. "Hector, Gomez Q" and generates + * variations of the author name PLUS enhances the variations with regular + * expression patterns. + * + * The process that transforms the curated synonyms uses *only* the variations + * generated here. This limited set is also included in the variations used at + * query time but DON'T ADD THINGS HERE that are only necessary for the query + * phase--use getNameVariations for that + * + * HECTOR, GOMEZ HECTOR, G HECTOR, HECTOR, GOMEZ Q* HECTOR, G Q* + * + * + * @param authorString name in the natural form + * @return map with string mappings + */ + public static HashSet getQueryVariations(String authorString) { + + Map parsedAuthor = null; + parsedAuthor = AuthorUtils.parseAuthor(authorString); + + HashSet variations = new LinkedHashSet(); + if (parsedAuthor == null) { + variations.add(authorString); + return variations; + } + return generateSynonymVariations(parsedAuthor, variations); + } + + public static HashSet generateSynonymVariations(HashMap parsedAuthor) { + HashSet variations = new LinkedHashSet(); + return generateSynonymVariations(parsedAuthor, variations); + } + + protected static HashSet generateSynonymVariations(Map parsedAuthor, + Set variations) { + + String last = parsedAuthor.get("Last"); + String first = parsedAuthor.get("First"); + String middle = parsedAuthor.get("Middle"); + + if (parsedAuthor.size() == 1 && last != null) { + variations.add(String.format("%s,.*", last)); // all we got was last name + } else { + variations.add(String.format("%s,", last)); + } + + if (first != null) { + if (middle != null) { + if (first.length() > 1) { + variations.add(last + ", " + first); + variations.add(last + ", " + first.substring(0, 1)); + if (middle.length() > 1) { + variations.add(last + ", " + first + " " + middle + "\\b.*"); + variations.add(last + ", " + first.substring(0, 1) + " " + middle.substring(0, 1) + "\\b.*"); + } else if (middle.length() == 1) { + // variations.add(last + ", " + first.substring(0,1) + " " + middle + ".*"); + // variations.add(last + ", " + first + " " + middle + ".*"); + } + } else { + variations.add(last + ", " + first + "\\w*"); + if (middle.length() > 1) { + variations.add(last + ", " + first + "\\w* " + middle + "\\b.*"); + } else if (middle.length() == 1) { + variations.add(last + ", " + first + "\\w* " + middle + ".*"); + } + } + } else { + if (first.length() > 1) { + variations.add(last + ", " + first + "\\b.*"); + variations.add(last + ", " + first.substring(0, 1)); + } else if (first.length() == 1) { + variations.add(last + ", " + first + ".*"); + } + } + } + + return variations; + } } diff --git a/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorUtils.java b/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorUtils.java index b79a5dec0..bc6c1df28 100644 --- a/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorUtils.java +++ b/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorUtils.java @@ -52,8 +52,8 @@ public static String normalizeAuthor(String a) { /** * Method used by the tokenizer chain to normalize author names. * - * @param a - * @param keepApostrophe + * @param a - author name + * @param keepApostrophe - bool, preserve apostrophe * @return Normalized string */ public static String normalizeAuthor(String a, boolean keepApostrophe) { @@ -82,7 +82,7 @@ public static String normalizeAuthor(String a, boolean keepApostrophe) { /** * Utility method to split string (author name) into constituting parts * - * @param a + * @param a - author name * @return map with 'last', 'first', 'middle' keys */ public static Map parseAuthor(String a) { @@ -102,8 +102,8 @@ public static Map parseAuthor(String a, boolean normalize) { * Utility method employed by AuthorTransliterationTokenizer and also by other components * inside the parser chain to discover other potential reading of the author's name. * - * @param a - * @return + * @param a - author name as string + * @return - list of author name variants in ascii form */ public static ArrayList getAsciiTransliteratedVariants(String a) { HashSet synonyms = new HashSet(); diff --git a/contrib/antlrqueryparser/src/test/org/apache/lucene/queryparser/flexible/aqp/TestAqpExtendedLGSimple.java b/contrib/antlrqueryparser/src/test/org/apache/lucene/queryparser/flexible/aqp/TestAqpExtendedLGSimple.java index 59c7ec3a5..163c14736 100644 --- a/contrib/antlrqueryparser/src/test/org/apache/lucene/queryparser/flexible/aqp/TestAqpExtendedLGSimple.java +++ b/contrib/antlrqueryparser/src/test/org/apache/lucene/queryparser/flexible/aqp/TestAqpExtendedLGSimple.java @@ -36,13 +36,13 @@ public void testExtensions() throws Exception { ((AqpQueryTreeBuilder) qp.getQueryBuilder()).setBuilder(AqpNearQueryNode.class, new AqpNearQueryNodeBuilder()); assertQueryMatch(qp, "this NEAR that", "field", - "spanNear([field:this, field:that], 5, true)"); + "spanNear([field:this, field:that], 5, false)"); assertQueryMatch(qp, "this NEAR3 that", "field", - "spanNear([field:this, field:that], 3, true)"); + "spanNear([field:this, field:that], 3, false)"); assertQueryMatch(qp, "this NEAR3 (that OR foo*)", "field", - "spanNear([field:this, spanOr([field:that, SpanMultiTermQueryWrapper(field:foo*)])], 3, true)"); + "spanNear([field:this, spanOr([field:that, SpanMultiTermQueryWrapper(field:foo*)])], 3, false)"); }