From 33dcb9a41a98b246fefdfc04aeb44725ff7d5838 Mon Sep 17 00:00:00 2001
From: Roman Chyla <roman.chyla@gmail.com>
Date: Thu, 3 Feb 2022 16:29:24 -0500
Subject: [PATCH] feat: removed all from unfielded search and also from
 copyField in schema.xml

---
 .../author/AuthorQueryVariations.java         |   6 +-
 .../solr/analysis/author/AuthorUtils.java     | 411 ++++++++----------
 .../solr/analysis/author/NameParser.java      | 291 +------------
 .../server/solr/collection1/conf/schema.xml   |  28 +-
 .../solr/collection1/conf/solrconfig.xml      |  34 +-
 5 files changed, 216 insertions(+), 554 deletions(-)
diff --git a/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorQueryVariations.java b/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorQueryVariations.java
index 37f89c470..9fa001d11 100644
--- a/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorQueryVariations.java
+++ b/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorQueryVariations.java
@@ -146,9 +146,9 @@ protected static HashSet<String> generateSynonymVariations(
       HashMap<String,String> parsedAuthor,
       HashSet<String> variations) {
 
-    String last = parsedAuthor.get("last");
-    String first = parsedAuthor.get("first");
-    String middle = parsedAuthor.get("middle");
+    String last = parsedAuthor.get("Last");
+    String first = parsedAuthor.get("First");
+    String middle = parsedAuthor.get("Middle");
 
     if (parsedAuthor.size() == 1 && last != null) {
       variations.add(String.format("%s,.*", last)); // all we got was last name
diff --git a/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorUtils.java b/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorUtils.java
index f9189679c..7a850c53d 100644
--- a/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorUtils.java
+++ b/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorUtils.java
@@ -5,26 +5,27 @@
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.jython.JythonObjectFactory;
+import org.jython.monty.interfaces.JythonNameParser;
+
 import java.text.*;
 import static net.gcardone.junidecode.Junidecode.unidecode;
 
-/**
- * OK; i'll admit it - this is extremely ugly piece of code which i inherited;
- * I tried to rewrite it once, but it was so convoluted that I resigned. 
- * 
- *
- */
 public class AuthorUtils {
-	
+
+
+	static final NameParser nameParser = new NameParser(); 
+
 	public static final String AUTHOR_QUERY_VARIANT = "AUTHOR_QUERY_VARIANT";
 	public static final String AUTHOR_INPUT = "AUTHOR_INPUT";
 	public static final String AUTHOR_TRANSLITERATED = "AUTHOR_TRANSLITERATED";
 	public static final String AUTHOR_CURATED_SYN = "AUTHOR_CURATED_SYN";
-	
+
 	// to remove commas from behind initials B. => B
 	static Pattern n0 = Pattern.compile("(?<=\\b\\p{L})\\.(?=\\s*\\b)");
 	// these are the characters we allow for author names
@@ -32,158 +33,94 @@ public class AuthorUtils {
 	// and some special characters
 	// original, which may miss diacritics: "(?<=\\b\\p{L})\\.(?=\\s*\\b)" \P{M}\p{M}*+
 	// [^,\\-\\w\\s\\{N}\\p{L}\\p{M}*+]
-	
-	
+
+
 	static Pattern n1 = Pattern.compile("[^,\\-\\s\\p{N}\\p{L}\\p{M}]");
 	static Pattern n1b = Pattern.compile("[^,\\-\\s\\'\\p{N}\\p{L}\\p{M}]");
-	
+
 	// to normalize spaces
 	static Pattern n2 = Pattern.compile("\\s+");
 	// to normalize non escaped commas
 	static Pattern n3 = Pattern.compile("(?<!\\\\),\\s*");
 	// deal with word delimiters
 	static Pattern n4 = Pattern.compile("(?<=\\p{L})\\'\\s*");
-	
+
 	public static String normalizeAuthor(String a) {
 		return normalizeAuthor(a, false);
 	}
-	
+
 	public static String normalizeAuthor(String a, boolean keepApostrophe) {
 		boolean hasWildcards = a.indexOf('*') > -1 || a.indexOf('?') > -1; // \*\? should never be encountered here 
-	  if (!keepApostrophe)
-	    a = n4.matcher(a).replaceAll("-");
-    a = n0.matcher(a).replaceAll(" ");
-    if (keepApostrophe)
-      a = n1b.matcher(a).replaceAll("");
-    else
-      a = n1.matcher(a).replaceAll("");
-    a = n3.matcher(a).replaceAll(", ");
-    a = n2.matcher(a.trim()).replaceAll(" ");
-    
-    
-    if (!hasWildcards && !(a.contains(","))) // || a.contains(" ")
-      a = a + ",";
-    // do this at the end, we want to see the space instead of '-'
-    a = a.replace('-', ' ');
-    // normalize spaces once again
-    a = n2.matcher(a.trim()).replaceAll(" ");
-    return a;
-  }
-	
-	/**
-	 * this whole thing become obsolete when we included the python
-	 * name parser library (that does MUCH better job in parsing names)
-	 * 
-	 * TODO: kill AuthorUtils.parseAuthor 
-	 */
-	public static HashMap<String,String> parseAuthor(String a) {
+		if (!keepApostrophe)
+			a = n4.matcher(a).replaceAll("-");
+		a = n0.matcher(a).replaceAll(" ");
+		if (keepApostrophe)
+			a = n1b.matcher(a).replaceAll("");
+		else
+			a = n1.matcher(a).replaceAll("");
+		a = n3.matcher(a).replaceAll(", ");
+		a = n2.matcher(a.trim()).replaceAll(" ");
+
+
+		if (!hasWildcards && !(a.contains(","))) // || a.contains(" ")
+			a = a + ",";
+		// do this at the end, we want to see the space instead of '-'
+		a = a.replace('-', ' ');
+		// normalize spaces once again
+		a = n2.matcher(a.trim()).replaceAll(" ");
+		return a;
+	}
+
+
+	public static Map<String,String> parseAuthor(String a) {
 		return parseAuthor(a, true);
 	}
-	
-	public static HashMap<String,String> parseAuthor(String a, boolean normalize) {
-		HashMap<String,String> parsed = new HashMap<String,String>();
-		if (a == null || a.length() == 0) {
-			return parsed;
-		}
+
+	public static Map<String,String> parseAuthor(String a, boolean normalize) {
 		if (normalize) {
-			a = AuthorUtils.normalizeAuthor(a);
-		}
-		NameParser np = new NameParser();
-		String[] p;
-		try {
-			p = np.parseName(a);
-		} catch (Exception e) {
-			throw new RuntimeException(e);
+			return nameParser.parseName(AuthorUtils.normalizeAuthor(a));
 		}
-		String[] keys = {"title", "first", "middle", "last", "suffix"};
-		for (int i = 0; i < keys.length; i++) {
-			if (p[i] != null) {
-				parsed.put(keys[i], p[i]);
-			}
+		else {			
+			return nameParser.parseName(a);
 		}
-		return parsed;
 	}
-	
+
 	public static ArrayList<String> getAsciiTransliteratedVariants(String a) {
 		HashSet<String> synonyms = new HashSet<String>();
 		//a = a.toUpperCase();
-		
+
 		// include original
 		synonyms.add(a);
-		
+
 		// downgrade to ascii
 		String downgraded = foldToAscii(a);
 		synonyms.add(downgraded);
-		
-	  // transliterate accents
-    String transAcc = transliterateAccents(a);
-    synonyms.add(transAcc);
-    
+
+
 		// handle russian name stuff
 		HashSet<String> transRus = transliterateRussianNames(synonyms);
 		synonyms.addAll(transRus);
-		
+
 		// apostrophes are now preserved in the index
 		// so we need to generate translits for those
 		if (a.contains("'"))
-		  synonyms.add(a.replace("'", ""));
-		
+			synonyms.add(a.replace("'", ""));
+
 		// remove the original input from the set
 		synonyms.remove(a);
-		
+
 		return new ArrayList<String>(synonyms);
 	}
-	
+
 	public static String foldToAscii(String a) {
-	  String b = unidecode(a.trim());
-	  if (b.contains(" ,"))
-	    b = b.replace(" ,", ",");
-	  return b;
+		String b = unidecode(a.trim());
+		if (b.contains(" ,"))
+			b = b.replace(" ,", ",");
+		return b;
 	}
 
-	
-  static String transliterateAccents(String a) {
-    String decomposed = Normalizer.normalize(a, Normalizer.Form.NFD);
-    char[] in = decomposed.toCharArray();
-    char[] out = new char[in.length * 4];
-    int outPos = 0;
-    for (int i = 0; i < in.length; i++) {
-      final char c = in[i];
-      // prev will be the 1st part of the decomp char
-      char prev = (i > 0) ? in[i - 1] : '\0';
-      char replacement;
-      if (c < '\u0080') {
-        out[outPos++] = c;
-        continue;
-      }
-      switch (c) {
-        case '\u0141':
-          replacement = 'L';
-          break;
-        case '\u0308':
-          replacement = 'E';
-          break;
-        case '\u030a':
-          replacement = 'A';
-          break;
-        case '\u0301': 
-          replacement = 'E';
-          break;
-        case '\u030c':
-          replacement = 'H';
-          break;
-        default:
-          prev = '\0';
-          replacement = c;
-      }
-      if (prev != '\0' && !Character.isUpperCase(prev)) {
-        replacement = Character.toLowerCase(replacement);
-      }
-      out[outPos++] = replacement;
-    }
-    return String.copyValueOf(out).trim();
-  }
-  
+
+
 	/*
 	 * Splits name into parts (separated by comma and then by space)
 	 * The comma is retained; spaces between parts of names are removed
@@ -196,7 +133,7 @@ public static String[] splitName(String name) {
 			String[] nameParts = name.substring(comma+1).trim().split(" ");
 			if (nameParts[0].equals(""))
 				return new String[]{name.substring(0, comma).trim() + ","};
-			
+
 			String[] out = new String[nameParts.length+1];
 			out[0] = name.substring(0, comma).trim() + ",";
 			int i = 1;
@@ -211,10 +148,10 @@ public static String[] splitName(String name) {
 			return name.split(" ");
 		}
 	}
-	
-	
 
-  // XXX: this doesn't look right to me, the fifth step gets (possibly)
+
+
+	// XXX: this doesn't look right to me, the fifth step gets (possibly)
 	// 5 times more items than the first step
 	public static HashSet<String> transliterateRussianNames(Set<String> in) {
 		HashSet<String> synonyms = new HashSet<String>();
@@ -232,181 +169,181 @@ public static HashSet<String> transliterateRussianNames(Set<String> in) {
 		}
 		return synonyms;
 	}
-	
-    /*
-     * take care of russian apostrophes:
-     * 'E => E == IE == YE
-     * note that we do not index 'E since the search
-     * engine simply strips all apostrophes
-     */
+
+	/*
+	 * take care of russian apostrophes:
+	 * 'E => E == IE == YE
+	 * note that we do not index 'E since the search
+	 * engine simply strips all apostrophes
+	 */
 	static Pattern p0 = Pattern.compile("(?<=\\w{2})'(?=[Ee])");
 	static HashSet<String> translitRussianApostrophes(Iterator<String> itr) {
 		HashSet<String> syn = new HashSet<String>();
-		
+
 		String x;
 		while (itr.hasNext()) {
-		  x = itr.next();
+			x = itr.next();
 			Matcher m = p0.matcher(x);
 			if (m.find()) {
-			  if (x.charAt(m.end()) == 'E') {
-          syn.add(m.replaceAll("I"));
-          syn.add(m.replaceAll("Y"));
-          syn.add(m.replaceAll(""));			    
-			  }
-			  else {
-			    syn.add(m.replaceAll("i"));
-			    syn.add(m.replaceAll("y"));
-			    syn.add(m.replaceAll(""));
-			  }
+				if (x.charAt(m.end()) == 'E') {
+					syn.add(m.replaceAll("I"));
+					syn.add(m.replaceAll("Y"));
+					syn.add(m.replaceAll(""));			    
+				}
+				else {
+					syn.add(m.replaceAll("i"));
+					syn.add(m.replaceAll("y"));
+					syn.add(m.replaceAll(""));
+				}
 			}
 		}
 		//log.debug("apostrophes: " + syn);
 		return syn;
 	}
-		
-    /* russian last names I:
-     * [^IJY]EV$ => IEV$ == YEV$ == JEV$ 
-     * [^IJY]EVA$ => IEVA$ == YEVA$ == JEVA$ 
-     */
+
+	/* russian last names I:
+	 * [^IJY]EV$ => IEV$ == YEV$ == JEV$ 
+	 * [^IJY]EVA$ => IEVA$ == YEVA$ == JEVA$ 
+	 */
 	static Pattern p1 = Pattern.compile("(?<![IJYijy])[Ee][Vv](?=[aA]?,)");
 	static HashSet<String> translitRussianLastNames1(Iterator<String> itr) {
 		HashSet<String> syn = new HashSet<String>();
 		String x;
 		while (itr.hasNext()) {
-		  x = itr.next();
+			x = itr.next();
 			Matcher m = p1.matcher(x);
 			if (m.find()) {
-			  if (x.charAt(m.start()) == 'E') {
-			    syn.add(m.replaceAll("IEV"));
-	        syn.add(m.replaceAll("YEV"));
-	        syn.add(m.replaceAll("JEV"));
-			  }
-			  else {
-			    syn.add(m.replaceAll("iev"));
-	        syn.add(m.replaceAll("yev"));
-	        syn.add(m.replaceAll("jev"));
-			  }
-				
+				if (x.charAt(m.start()) == 'E') {
+					syn.add(m.replaceAll("IEV"));
+					syn.add(m.replaceAll("YEV"));
+					syn.add(m.replaceAll("JEV"));
+				}
+				else {
+					syn.add(m.replaceAll("iev"));
+					syn.add(m.replaceAll("yev"));
+					syn.add(m.replaceAll("jev"));
+				}
+
 			}
 		}
 		//log.debug("last names I: " + syn);
 		return syn;
-   }
-		
-    /* russian last names II:
-     * ([NRBO])IA$ == $1IIA$ == $1IYA$
-     */
+	}
+
+	/* russian last names II:
+	 * ([NRBO])IA$ == $1IIA$ == $1IYA$
+	 */
 	static Pattern p2 = Pattern.compile("(?<=[NRBOnrbo])[Ii](?=[Aa],)");
 	static HashSet<String> translitRussianLastNames2(Iterator<String> itr) {
 		HashSet<String> syn = new HashSet<String>();
 		String x;
 		while (itr.hasNext()) {
-		  x = itr.next();
+			x = itr.next();
 			Matcher m = p2.matcher(x);
 			if (m.find()) {
-			  if (x.charAt(m.start()) == 'I') {
-			    syn.add(m.replaceAll("II"));
-			    syn.add(m.replaceAll("IY"));			    
-			  }
-			  else {
-			    syn.add(m.replaceAll("ii"));
-          syn.add(m.replaceAll("iy"));
-			  }
+				if (x.charAt(m.start()) == 'I') {
+					syn.add(m.replaceAll("II"));
+					syn.add(m.replaceAll("IY"));			    
+				}
+				else {
+					syn.add(m.replaceAll("ii"));
+					syn.add(m.replaceAll("iy"));
+				}
 			}
 		}
 		//log.debug("last names II: " + syn);
 		return syn;
 	}
 
-    /* russian last names III:
-     * ([DHKLMNPSZ])IAN$ == $1YAN$ == $1JAN$ 
-     */
+	/* russian last names III:
+	 * ([DHKLMNPSZ])IAN$ == $1YAN$ == $1JAN$ 
+	 */
 	static Pattern p3 = Pattern.compile("(?<=[DHKLMNPSZdhklmnpsz])[IJYijy](?=[Aa][Nn],)");
 	static HashSet<String> translitRussianLastNames3(Iterator<String> itr) {
 		HashSet<String> syn = new HashSet<String>();
 		String x;
 		while (itr.hasNext()) {
-		  x = itr.next();
+			x = itr.next();
 			Matcher m = p3.matcher(x);
 			if (m.find()) {
-			  if (x.charAt(m.start()) == 'I' || x.charAt(m.start()) == 'J' || x.charAt(m.start()) == 'Y') {
-			    syn.add(m.replaceAll("I"));
-			    syn.add(m.replaceAll("J"));
-			    syn.add(m.replaceAll("Y"));			    
-			  }
-			  else {
-			    syn.add(m.replaceAll("i"));
-          syn.add(m.replaceAll("j"));
-          syn.add(m.replaceAll("y"));
-			  }
+				if (x.charAt(m.start()) == 'I' || x.charAt(m.start()) == 'J' || x.charAt(m.start()) == 'Y') {
+					syn.add(m.replaceAll("I"));
+					syn.add(m.replaceAll("J"));
+					syn.add(m.replaceAll("Y"));			    
+				}
+				else {
+					syn.add(m.replaceAll("i"));
+					syn.add(m.replaceAll("j"));
+					syn.add(m.replaceAll("y"));
+				}
 			}
 		}
 		//log.debug("last names III: " + syn);
 		return syn;
 	}
-		
-    /* russian last names IV:
-     * AIA$ == AYA$ == AJA$ 
-     */
+
+	/* russian last names IV:
+	 * AIA$ == AYA$ == AJA$ 
+	 */
 	static Pattern p4 = Pattern.compile("(?<=[KNVknv][Aa])[IJYijy](?=[Aa],)");
 	static HashSet<String> translitRussianLastNames4(Iterator<String> itr) {
 		HashSet<String> syn = new HashSet<String>();
 		String x;
 		while (itr.hasNext()) {
-		  x = itr.next();
+			x = itr.next();
 			Matcher m = p4.matcher(x);
 			if (m.find()) {
-			  if (x.charAt(m.start()) == 'I' || x.charAt(m.start()) == 'J' || x.charAt(m.start()) == 'Y') {
-			    syn.add(m.replaceAll("I"));
-			    syn.add(m.replaceAll("J"));
-			    syn.add(m.replaceAll("Y"));			    
-			  }
-			  else {
-			    syn.add(m.replaceAll("i"));
-          syn.add(m.replaceAll("j"));
-          syn.add(m.replaceAll("y"));
-			  }
+				if (x.charAt(m.start()) == 'I' || x.charAt(m.start()) == 'J' || x.charAt(m.start()) == 'Y') {
+					syn.add(m.replaceAll("I"));
+					syn.add(m.replaceAll("J"));
+					syn.add(m.replaceAll("Y"));			    
+				}
+				else {
+					syn.add(m.replaceAll("i"));
+					syn.add(m.replaceAll("j"));
+					syn.add(m.replaceAll("y"));
+				}
 			}
 		}
 		//log.debug("last names IV: " + syn);
 		return syn;
 	}
-		
-    /* russian last names V:
-     * KI$ == KII$ == KIJ$ == KIY$ = KYI$
-     * VI$ == VII$ == VIJ$ == VIY$ = VYI$
-     * first transform [KVH]I into [KVH]II
-     */
+
+	/* russian last names V:
+	 * KI$ == KII$ == KIJ$ == KIY$ = KYI$
+	 * VI$ == VII$ == VIJ$ == VIY$ = VYI$
+	 * first transform [KVH]I into [KVH]II
+	 */
 	static Pattern p5 = Pattern.compile("(?<=[KVkv])[Ii](?=,)");
 	static HashSet<String> translitRussianLastNames5(Iterator<String> itr) {
 		HashSet<String> syn = new HashSet<String>();
 		String x;
 		while (itr.hasNext()) {
-		  x = itr.next();
+			x = itr.next();
 			Matcher m = p5.matcher(x);
 			if (m.find()) {
-			  if (x.charAt(m.start()) == 'I') {
-			    syn.add(m.replaceAll("I"));
-			    syn.add(m.replaceAll("Y"));
-			    syn.add(m.replaceAll("YI"));
-			    syn.add(m.replaceAll("IY"));
-			    syn.add(m.replaceAll("IJ"));
-			    syn.add(m.replaceAll("II"));			    
-			  }
-			  else {
-			    syn.add(m.replaceAll("i"));
-          syn.add(m.replaceAll("y"));
-          syn.add(m.replaceAll("yi"));
-          syn.add(m.replaceAll("iy"));
-          syn.add(m.replaceAll("ij"));
-          syn.add(m.replaceAll("ii"));
-			  }
+				if (x.charAt(m.start()) == 'I') {
+					syn.add(m.replaceAll("I"));
+					syn.add(m.replaceAll("Y"));
+					syn.add(m.replaceAll("YI"));
+					syn.add(m.replaceAll("IY"));
+					syn.add(m.replaceAll("IJ"));
+					syn.add(m.replaceAll("II"));			    
+				}
+				else {
+					syn.add(m.replaceAll("i"));
+					syn.add(m.replaceAll("y"));
+					syn.add(m.replaceAll("yi"));
+					syn.add(m.replaceAll("iy"));
+					syn.add(m.replaceAll("ij"));
+					syn.add(m.replaceAll("ii"));
+				}
 			}
 		}
 		//log.debug("last names V: " + syn);
 		return syn;
 	}
-		
+
 	/* russian first names
 	 * ^IU == ^YU
 	 * ^IA == ^YA
@@ -416,21 +353,21 @@ static HashSet<String> translitRussianFirstNames(Iterator<String> itr) {
 		HashSet<String> syn = new HashSet<String>();
 		String x;
 		while (itr.hasNext()) {
-		  x = itr.next();
+			x = itr.next();
 			Matcher m = p6.matcher(x);
 			if (m.find()) {
-			  if (x.charAt(m.start()) == 'I' || x.charAt(m.start()) == 'Y') {
-			    syn.add(m.replaceAll("I"));
-          syn.add(m.replaceAll("Y"));
-			  }
-			  else {
-			    syn.add(m.replaceAll("i"));
-			    syn.add(m.replaceAll("y"));			    
-			  }
+				if (x.charAt(m.start()) == 'I' || x.charAt(m.start()) == 'Y') {
+					syn.add(m.replaceAll("I"));
+					syn.add(m.replaceAll("Y"));
+				}
+				else {
+					syn.add(m.replaceAll("i"));
+					syn.add(m.replaceAll("y"));			    
+				}
 			}
 		}
 		//log.debug("first names: " + syn);
 		return syn;
 	}
-			
+
 }
diff --git a/contrib/adsabs/src/java/org/apache/solr/analysis/author/NameParser.java b/contrib/adsabs/src/java/org/apache/solr/analysis/author/NameParser.java
index 1b280ab2a..1864dbe10 100644
--- a/contrib/adsabs/src/java/org/apache/solr/analysis/author/NameParser.java
+++ b/contrib/adsabs/src/java/org/apache/solr/analysis/author/NameParser.java
@@ -1,292 +1,25 @@
 package org.apache.solr.analysis.author;
 
-//Thanks to Robert Cooper for this!
-//package com.totsp.bookworm.util;
+import java.util.Map;
 
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Set;
+import org.jython.JythonObjectFactory;
+import org.jython.monty.interfaces.JythonNameParser;
 
-import org.apache.commons.lang.StringUtils;
 
-/**
-*
-* @author kebernet
-*/
 public class NameParser {
 	
-    
-	private static final Set<String> TITLES = new HashSet<String>();
-	private static final Set<String> SUFFIXES = new HashSet<String>();
-	private static final Set<String> COMPOUND_NAMES = new HashSet<String>();
-	public static final int TITLE = 0;
-	public static final int FIRST_NAME = 1;
-	public static final int MIDDLE_NAME = 2;
-	public static final int LAST_NAME = 3;
-	public static final int SUFFIX = 4;
-
-	static {
-		for (String title : new String[] { "dr.", "dr", "doctor", "mr.", "mr", "mister", "ms.", "ms", "miss", "mrs.",
-             "mrs", "mistress", "hn.", "hn", "honorable", "the", "honorable", "his", "her", "honor", "fr", "fr.",
-             "frau", "hr", "herr", "rv.", "rv", "rev.", "rev", "reverend", "reverend", "madam", "lord", "lady",
-             "sir", "senior", "bishop", "rabbi", "holiness", "rebbe", "deacon", "eminence", "majesty", "consul",
-             "vice", "president", "ambassador", "secretary", "undersecretary", "deputy", "inspector", "ins.",
-             "detective", "det", "det.", "constable", "private", "pvt.", "pvt", "petty", "p.o.", "po", "first",
-             "class", "p.f.c.", "pfc", "lcp.", "lcp", "corporal", "cpl.", "cpl", "colonel", "col", "col.",
-             "capitain", "cpt.", "cpt", "ensign", "ens.", "ens", "lieutenant", "lt.", "lt", "ltc.", "ltc",
-             "commander", "cmd.", "cmd", "cmdr", "rear", "radm", "r.adm.", "admiral", "adm.", "adm", "commodore",
-             "cmd.", "cmd", "general", "gen", "gen.", "ltgen", "lt.gen.", "maj.gen.", "majgen.", "major", "maj.",
-             "mjr", "maj", "seargent", "sgt.", "sgt", "chief", "cf.", "cf", "petty", "officer", "c.p.o.", "cpo",
-             "master", "cmcpo", "fltmc", "formc", "mcpo", "mcpocg", "command", "fleet", "force" }) {
-	       NameParser.TITLES.add(title);
-	    }
-
-	    for (String suffix : new String[] { "jr.", "jr", "junior", "ii", "iii", "iv", "senior", "sr.", "sr", //family
-             "phd", "ph.d", "ph.d.", "m.d.", "md", "d.d.s.", "dds", // doctors
-             "k.c.v.o", "kcvo", "o.o.c", "ooc", "o.o.a", "ooa", "g.b.e", "gbe", // knighthoods
-             "k.b.e.", "kbe", "c.b.e.", "cbe", "o.b.e.", "obe", "m.b.e", "mbe", //   cont
-             "esq.", "esq", "esquire", "j.d.", "jd", // lawyers
-             "m.f.a.", "mfa", //misc
-             "r.n.", "rn", "l.p.n.", "lpn", "l.n.p.", "lnp", //nurses
-             "c.p.a.", "cpa", //money men
-             "d.d.", "dd", "d.div.", "ddiv", //preachers
-             "ret", "ret." }) {
-	       NameParser.SUFFIXES.add(suffix);
-	    }
-
-	    for (String comp : new String[] { "de", "la", "st", "st.", "ste", "ste.", "saint", "van", "der", "al", "bin",
-             "le", "mac", "di", "del", "vel", "von", "e'", "san", "af", "el", "\'t" }) {
-	       NameParser.COMPOUND_NAMES.add(comp);
-	    }
-	}
+    private JythonNameParser jythonParser;
 
+	public NameParser() {
+    	JythonObjectFactory factory = new JythonObjectFactory(JythonNameParser.class, "jython_name_parser", "HumanParser");
+        this.jythonParser = (JythonNameParser) factory.createObject(); 
+    }
+	
 	/**
 	 * This method will parse a name into first middle and last names.
-	 * <p>
-	 *  Notes: "Al" is treated as a name. "al" as a name fragment. That is the
-	 *  only exception for capitalization.
-	 * </p>
-	 * @param name name to parse
-	 * @return String[5] containing title, first, middle and last names, suffix
+	 * @return Map containing title, first, middle and last names, suffix
 	 */
-	public String[] parseName(String name) {
-	    // NOTE Add lookahead for Suffixes to support 
-	    // "Winthrop Wolfcasts, the 31st Duke of Winchester"
-	    String[] result = new String[5];
-	
-	    if (name == null) {
-	       return result;
-	    }
-	
-	    StringBuffer title = new StringBuffer();
-	    StringBuffer first = new StringBuffer();
-	    StringBuffer middle = new StringBuffer();
-	    StringBuffer last = new StringBuffer();
-	    StringBuffer suffix = new StringBuffer();
-	    boolean isLastCommaFirst = false;
-	
-	    if (name.indexOf(",") != -1) {
-	       String[] lastRest = name.split(",");
-	
-	       if (lastRest.length > 2) {
-	          isLastCommaFirst = true;
-	       } else if (lastRest.length > 1) {
-	          String[] suffixes = lastRest[1].toLowerCase().trim().split(" ");
-	
-	          for (String check : suffixes) {
-	             if (!NameParser.SUFFIXES.contains(check)) {
-	                isLastCommaFirst = true;
-	
-	                break;
-	             }
-	          }
-	       } else if (lastRest.length == 1) {
-	    	   name = name.replaceFirst(",$", "");
-	       }
-	    }
-	
-	    if (isLastCommaFirst) // the user split the last name
-	    {
-	       ArrayList<String> lastRest = new ArrayList<String>(Arrays.asList(name.split(",")));
-	
-//	       if (lastRest.size() > 2) {
-//	          for (int i = 2; i < lastRest.length; i++) //append the remaining elements to the end of the second element
-//	          {
-//	             lastRest[1] += (" " + lastRest[i]);
-//	          }
-//	       }
-	
-	       result[NameParser.LAST_NAME] = lastRest.remove(0).trim();
-	
-	       if ((lastRest.size() == 1) && (lastRest.get(0).trim().indexOf(" ") == -1)) // easy case
-	       {
-	          result[NameParser.FIRST_NAME] = lastRest.remove(0).trim();
-	
-	          return result;
-	       } else {
-	    	   // join the rest together and split again on whitespace
-	          ArrayList<String> rest = new ArrayList<String>(Arrays.asList(StringUtils.join(lastRest, " ").trim().split("\\s+")));
-	
-	          //parse titles
-	          for (int i = 0; i < rest.size(); i++) {
-	        	  if (NameParser.TITLES.contains(rest.get(i).toLowerCase().trim())) {
-	        		  title.append(rest.remove(i));
-	        	  }
-	          }
-	
-	          if (title.length() > 0) {
-	             result[NameParser.TITLE] = title.toString();
-	          }
-	
-	          //parse suffixes
-	          for (int i = 0; i < rest.size(); i++) {
-	        	  if (NameParser.SUFFIXES.contains(rest.get(i).toLowerCase().trim())) {
-	        		  suffix.insert(0, rest.remove(i));
-	        	  }
-	          }
-	
-	          if (suffix.length() > 0) {
-	             result[NameParser.SUFFIX] = suffix.toString();
-	          }
-	
-	          int[] nextNameOrder = new int[] { NameParser.FIRST_NAME, NameParser.MIDDLE_NAME };
-	          int nextNameIndex = 0;
-	
-	          for (int i = 0; i < rest.size(); i++) {
-	             StringBuffer nextName = new StringBuffer();
-	
-	             while (!rest.get(i).trim().equals("Al") && NameParser.COMPOUND_NAMES.contains(rest.get(i).toLowerCase().trim())) {
-	                nextName.append(rest.get(i).trim());
-	
-	                if (i != (rest.size() - 1)) {
-	                   nextName.append(' ');
-	                }
-	
-	                i++;
-	
-	                if (i == (rest.size() - 1)) {
-	                   break;
-	                }
-	             }
-	
-	             nextName.append(rest.get(i));
-	             if (nextNameIndex < nextNameOrder.length) {
-		             result[nextNameOrder[nextNameIndex]] = nextName.toString();
-	             } else {
-	            	 result[nextNameOrder[nextNameOrder.length - 1]] += " " + nextName.toString();
-	             }
-	             nextNameIndex++;
-	             
-	
-//	             if (nextNameIndex == nextNameOrder.length) {
-//	                for (int j = i + 1; j < tail; j++) {
-//	                   if (j != (i + 1)) {
-//	                      nextName.append(' ');
-//	                   }
-//	
-//	                   nextName.append(rest[j]);
-//	                }
-//	
-//	                result[nextNameOrder[nextNameIndex - 1]] = nextName.toString();
-//	
-//	                break;
-//	             }
-	          }
-	       }
-	    } // end last, first case.
-	    else {
-	       String[] names = name.split(" ");
-	       int head = 0;
-	       int tail = names.length - 1;
-	
-	       //parse titles
-	       for (int i = head; (i < tail) && NameParser.TITLES.contains(names[i].toLowerCase().trim()); i++) {
-	          if (i != 0) {
-	             title.append(' ');
-	          }
-	
-	          title.append(names[i]);
-	          head++;
-	       }
-	
-	       if (title.length() > 0) {
-	          result[NameParser.TITLE] = title.toString();
-	       }
-	
-	       //parse suffixes
-	       for (int i = tail; (i >= head) && NameParser.SUFFIXES.contains(names[i].toLowerCase().trim()); i--) {
-	          if (i != tail) {
-	             suffix.insert(0, ' ');
-	          }
-	
-	          suffix.insert(0, names[i]);
-	          tail--;
-	       }
-	
-	       if (suffix.length() > 0) {
-	          result[NameParser.SUFFIX] = suffix.toString();
-	          names[tail] = names[tail].replaceAll(",", "");
-	       }
-	
-	       if (head == tail) { //Only one name left
-	
-	          if (names[head].trim().length() > 0) {
-	             result[NameParser.LAST_NAME] = names[head];
-	          }
-	       } else {
-	          //parse last name
-	          last.append(names[tail]);
-	          tail--;
-	
-	          for (int i = tail; (i >= head) && !names[i].trim().equals("Al")
-	                   && NameParser.COMPOUND_NAMES.contains(names[i].toLowerCase().trim()); i--) {
-	             last.insert(0, ' ');
-	
-	             last.insert(0, names[i]);
-	             tail--;
-	          }
-	
-	          boolean firstPass = true;
-	
-	          //parse first name
-	          for (int i = head; i <= tail; i++) {
-	             if (!firstPass) {
-	                first.append(' ');
-	             }
-	
-	             first.append(names[i].trim());
-	             head++;
-	             firstPass = false;
-	
-	             if (names[i].trim().equals("Al") || !NameParser.COMPOUND_NAMES.contains(names[i].trim().toLowerCase())) {
-	                break;
-	             }
-	          }
-	
-	          //build middle name
-	          for (int i = head; i <= tail; i++) {
-	             if (i != head) {
-	                middle.append(' ');
-	             }
-	
-	             middle.append(names[i].trim());
-	          }
-	       }
-	
-	       if (first.length() > 0) {
-	          result[NameParser.FIRST_NAME] = first.toString().trim();
-	       }
-	
-	       if (last.length() > 0) {
-	          result[NameParser.LAST_NAME] = last.toString().trim();
-	       }
-	
-	       if (middle.length() > 0) {
-	          result[NameParser.MIDDLE_NAME] = middle.toString().trim();
-	       }
-	    }
-	
-	    return result;
+	public Map<String, String> parseName(String name) {	
+		return jythonParser.parse_human_name(name);
 	}
 }
diff --git a/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml b/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml
index 3ec97fd0f..d77125007 100644
--- a/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml
+++ b/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml
@@ -528,7 +528,7 @@
 				<!-- final normalization -->
 				<filter class="solr.TrimFilterFactory" />
 				<filter class="solr.LowerCaseFilterFactory" />
-				<!-- <filter class="org.apache.solr.analysis.DiagnoseFilterFactory" msg="index:1"/> -->
+
 			</analyzer>
 			<analyzer type="query">
 				<charFilter class="solr.HTMLStripCharFilterFactory" />
@@ -536,14 +536,15 @@
 					expression -->
 				<charFilter class="solr.PatternReplaceCharFilterFactory"
 					pattern="\b(?i:(MESSIER)(-|\s+)([0-9]+[A-Z]*))\b"
-					replacement="$1-$3" />
+					replacement="$1$3" />
 				<charFilter class="solr.PatternReplaceCharFilterFactory"
-					pattern="\b(?i:(ABELL)(-|\s+)([0-9]+[A-Z]*))\b" replacement="$1-$3" />
+					pattern="\b(?i:(ABELL)(-|\s+)([0-9]+[A-Z]*))\b" replacement="$1$3" />
 				<charFilter class="solr.PatternReplaceCharFilterFactory"
-					pattern="\b(?i:(NGC|N)(-|\s+)([0-9]+[A-Z]*))\b" replacement="$1-$3" />
+					pattern="\b(?i:(NGC|N)(-|\s+)([0-9]+[A-Z]*))\b" replacement="$1$3" />
 				<charFilter class="solr.PatternReplaceCharFilterFactory"
 					pattern="\b(?i:([34]CR?|ADS|H[DHR]|IC|[MW]|MKN|NGC|PKS|PSR[BJ]?|SAO|UGC|UT)(-|\s+)([0-9]+[A-Z]*))\b"
-					replacement="$1-$3" />
+					replacement="$1$3" />
+
 
 				<!-- tokenize on empty space (if it is not a hyphen connecting other 
 					words) -->
@@ -561,6 +562,7 @@
 					catenateNumbers="0" catenateAll="1" splitOnCaseChange="0"
 					splitOnNumerics="0" stemEnglishPossessive="1" preserveOriginal="0" />
 
+				<!-- <filter class="org.apache.solr.analysis.DiagnoseFilterFactory" msg="query:split"/> -->
 
 				<!-- lowercase words, but keep ACRONYMS case ie. MOND => MOND Mond => 
 					mond Hubble Space Telescope => hubble space telescope -->
@@ -890,7 +892,6 @@
 					types="wdafftypes.txt" />
 				<filter class="solr.LowerCaseFilterFactory" />
 				<filter class="solr.TrimFilterFactory" />
-				<!-- <filter class="org.apache.solr.analysis.DiagnoseFilterFactory" msg="aff_tokens"/>  -->
 			</analyzer>
 			<analyzer type="query">
 				<charFilter class="solr.PatternReplaceCharFilterFactory" 
@@ -1246,18 +1247,18 @@
 			type="normalized_text_ascii_notokenization" indexed="true"
 			stored="true" multiValued="true" omitNorms="true" />
 
-		<field name="author_facet" type="normalized_string" indexed="true"
+		<field name="author_facet" type="string" indexed="true"
 			stored="${storeAll:false}" multiValued="true" omitNorms="true"
 			omitTermFreqAndPositions="true" />
 
-		<field name="author_facet_hier" type="normalized_string" indexed="true"
+		<field name="author_facet_hier" type="string" indexed="true"
 			stored="${storeAll:false}" multiValued="true" omitNorms="true"
 			omitTermFreqAndPositions="true" docValues="true" />
 
 		<field name="author_count" type="int" indexed="true"
 			stored="true" omitNorms="true" omitTermFreqAndPositions="true" />
 
-		<field name="first_author_facet_hier" type="normalized_string"
+		<field name="first_author_facet_hier" type="string"
 			indexed="true" stored="${storeAll:false}" multiValued="true"
 			omitNorms="true" omitTermFreqAndPositions="true" docValues="true" />
 
@@ -1291,6 +1292,9 @@
                docValues="true"/>
         <field name="aff_id" type="affiliation_tokens" indexed="true" stored="true"
                multiValued="true" omitNorms="true"/>
+        <!--  TODO: remove once index has been rebuilt -->
+        <field name="aff_raw" type="affiliation_text" indexed="true" stored="true"
+               omitNorms="true" multiValued="true" />
         
         <!-- for Unified Astronomy Thesaurus -->
         <field name="uat" type="uat_tokens" indexed="true" stored="true"
@@ -1973,12 +1977,6 @@
 
 
 	<copyField source="id" dest="recid" />
-	<copyField source="author_norm" dest="all" />
-	<copyField source="alternate_title" dest="all" />
-	<copyField source="bibcode" dest="all" />
-	<copyField source="doi" dest="all" />
-	<copyField source="identifier" dest="all" />
-
 	<copyField source="alternate_title" dest="title" />
 
 </schema>
diff --git a/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml b/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml
index f6973a967..96d511ee8 100644
--- a/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml
+++ b/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml
@@ -5,6 +5,7 @@
   <luceneMatchVersion>6.0</luceneMatchVersion>
 
   <lib dir="../lib" />
+  <lib dir="${solr.extra.dir:}" />
 
   <lib dir="../../../dist/" regex="apache-solr-cell-\d.*\.jar" />
   <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
@@ -34,12 +35,13 @@
    
   <indexConfig>
     
+    <maxIndexingThreads>${solr.maxIndexingThreads:8}</maxIndexingThreads>
+    
     <useCompoundFile>${solr.useCompoundFile:false}</useCompoundFile>
 
     <ramBufferSizeMB>${solr.ramBufferSize:1000}</ramBufferSizeMB>
     <maxBufferedDocs>${solr.maxBufferedDocs:50000}</maxBufferedDocs>
 
-
     
     <!-- When we run several instances sharing the same index, we must
          make sure that only one writer is modifying it; and other 
@@ -86,14 +88,9 @@
     </updateLog>
     
      <autoCommit> 
-       <maxDocs>${montysolr.autoCommit.maxDocs:40000}</maxDocs>
-       <maxTime>${montysolr.autoCommit.maxTime:1800000}</maxTime> 
+       <maxDocs>${montysolr.maxDocs:40000}</maxDocs>
+       <maxTime>${montysolr.maxTime:1800000}</maxTime> 
      </autoCommit>
-	  
-     <!-- disabled by default -->
-     <autoSoftCommit> 
-       <maxTime>${montysolr.autoSoftCommit.maxTime:-1}</maxTime> 
-     </autoSoftCommit>
 
 
     <!-- trigger core reload of the read-only instance of the same datadir
@@ -329,7 +326,6 @@
        Modified qf:
        old: first_author^5 author^2 title^1.5 abstract^1.3 identifier^1 bibstem^1 year^2
        new: first_author^0.9 author^0.85 year^0.8 title^0.8 abstract^0.7 identifier^0.8 bibstem^0.8 keyword^0.8
-              
         -->
        <str name="qf">first_author^0.9 author^0.85 year^0.8 title^0.8 abstract^0.7 identifier^0.8 bibstem^0.8 keyword^0.8</str>
        
@@ -342,8 +338,7 @@
        <str name="aqp.classic_scoring.modifier">0.5</str>
        <str name="aqp.constant_scoring">first_author^14 author^13 year^10 bibstem^10</str>
        <str name="aqp.multiphrase.keep_one">SYNONYM</str>
-       <str name="aqp.multiphrase.keep_one.ignore.fields">aff_raw,aff_id,institution</str>
-       <str name="aqp.unfielded.max.uppercase.tokens">2</str>
+       
        <str name="q.op">AND</str>
        
      </lst>
@@ -362,7 +357,7 @@
     <lst name="defaults">
        <str name="echoParams">explicit</str>
        <int name="rows">10</int>
-       <str name="qf">first_author^0.9 author^0.85 year^0.8 title^0.8 abstract^0.7 identifier^0.8 bibstem^0.8 keyword^0.8</str>
+       <str name="qf">first_author^5 author^2 title^1.5 abstract^1.3 identifier^1 bibstem^1 year^2</str>
        <str name="defType">aqp</str>
        <str name="aqp.unfielded.tokens.strategy">disjuncts</str>
        <str name="aqp.unfielded.tokens.new.type">simple</str>
@@ -372,8 +367,7 @@
        <str name="aqp.classic_scoring.modifier">0.5</str>
        <str name="aqp.constant_scoring">first_author^14 author^13 year^10 bibstem^10</str>
        <str name="aqp.multiphrase.keep_one">SYNONYM</str>
-       <str name="aqp.multiphrase.keep_one.ignore.fields">aff_raw,aff_id,institution</str>
-       <str name="aqp.unfielded.max.uppercase.tokens">2</str>
+       
        <str name="q.op">AND</str>
        <str name="df">unfielded_search</str>
      </lst>
@@ -419,7 +413,7 @@
        Make sure these defaults are set also in other public
        query handlers (e.g. tvrh - used by the word cloud)
         -->
-       <str name="qf">first_author^0.9 author^0.85 year^0.8 title^0.8 abstract^0.7 identifier^0.8 bibstem^0.8 keyword^0.8</str>
+       <str name="qf">first_author^5 author^2 title^1.5 abstract^1.3 identifier^1 bibstem^1 year^2</str>
        <str name="defType">aqp</str>
        <str name="aqp.unfielded.tokens.strategy">disjuncts</str>
        <str name="aqp.unfielded.tokens.new.type">simple</str>
@@ -427,7 +421,7 @@
        <str name="aqp.unfielded.tokens.function.name">edismax_combined_aqp</str>
        <str name="useFastVectorHighlighter">true</str>
        <str name="aqp.allow.leading_wildcard">true</str>
-       <str name="aqp.unfielded.max.uppercase.tokens">2</str>
+       
        <str name="q.op">AND</str>
        <str name="df">unfielded_search</str>
      </lst>
@@ -700,7 +694,7 @@
       <str name="defaultOperator">AND</str>
       <str name="fieldMap">arxiv identifier;collection database</str>
       <str name="fieldMapPostAnalysis">entdate entry_date;pubdate date;author_nosyn author_notrans author_nosyn_notrans author</str>
-      <str name="unfieldedSearch">author^1.5 title^1.4 abstract^1.3 all</str>
+      <str name="unfieldedSearch">author^1.5 title^1.4 abstract^1.3</str>
     </lst>
   </requestHandler>
   
@@ -724,7 +718,7 @@
        <str name="aqp.defaultOperator">AND</str>
        <str name="aqp.fieldMap">arxiv identifier;collection database</str>
        <str name="aqp.fieldMapPostAnalysis">entdate entry_date;pubdate date;author_nosyn author_notrans author_nosyn_notrans author;title_nosyn title;alternate_title_nosyn alternate_title;abstract_nosyn abstract;all_nosyn all;full_nosyn full;body_nosyn body;ack_nosyn ack;keyword_nosyn keyword</str>
-       <str name="aqp.unfieldedSearch">author^1.5 title^1.4 abstract^1.3 all</str>
+       <str name="aqp.unfieldedSearch">author^1.5 title^1.4 abstract^1.3</str>
        <str name="aqp.dateFormat">yyyy-MM-dd'T'HH:mm:ss</str>
        <str name="aqp.timestampFormat">yyyy-MM-dd'T'HH:mm:ss.SSS</str>
        <str name="aqp.dateFields">entry_date,date</str>
@@ -757,7 +751,7 @@
        <str name="aqp.defaultOperator">AND</str>
        <str name="aqp.fieldMap">arxiv identifier;collection database</str>
        <str name="aqp.fieldMapPostAnalysis">entdate entry_date;pubdate date;author_nosyn author_notrans author_nosyn_notrans author;title_nosyn title;alternate_title_nosyn alternate_title;abstract_nosyn abstract;all_nosyn all;full_nosyn full;body_nosyn body;ack_nosyn ack;keyword_nosyn keyword</str>
-       <str name="aqp.unfieldedSearch">author^1.5 title^1.4 abstract^1.3 all</str>
+       <str name="aqp.unfieldedSearch">author^1.5 title^1.4 abstract^1.3</str>
        <str name="aqp.dateFormat">yyyy-MM-dd'T'HH:mm:ss</str>
        <str name="aqp.timestampFormat">yyyy-MM-dd'T'HH:mm:ss.SSS</str>
        <str name="aqp.dateFields">entry_date,date</str>
@@ -836,7 +830,7 @@
        <str name="aqp.defaultOperator">AND</str>
        <str name="aqp.fieldMap">arxiv identifier;collection database</str>
        <str name="aqp.fieldMapPostAnalysis">entdate entry_date;pubdate date;author_nosyn author_notrans author_nosyn_notrans author;title_nosyn title;alternate_title_nosyn alternate_title;abstract_nosyn abstract;all_nosyn all;full_nosyn full;body_nosyn body;ack_nosyn ack;keyword_nosyn keyword</str>
-       <str name="aqp.unfieldedSearch">author^1.5 title^1.4 abstract^1.3 all</str>
+       <str name="aqp.unfieldedSearch">author^1.5 title^1.4 abstract^1.3</str>
        <str name="aqp.dateFormat">yyyy-MM-dd'T'HH:mm:ss</str>
        <str name="aqp.timestampFormat">yyyy-MM-dd'T'HH:mm:ss.SSS</str>
        <str name="aqp.dateFields">entry_date,date</str>