diff --git a/.classpath b/.classpath index 37c7ea3a8..c62d64af5 100644 --- a/.classpath +++ b/.classpath @@ -91,7 +91,7 @@ - + diff --git a/contrib/adsabs/src/test/monty/solr/util/MontySolrQueryTestCase.java b/contrib/adsabs/src/test/monty/solr/util/MontySolrQueryTestCase.java index e8d927401..136899553 100644 --- a/contrib/adsabs/src/test/monty/solr/util/MontySolrQueryTestCase.java +++ b/contrib/adsabs/src/test/monty/solr/util/MontySolrQueryTestCase.java @@ -141,7 +141,13 @@ public void setDebug(boolean v) { /* * This is only for printing/debugging, DO NOT use this for testing!!! - * This method can go away + * + * It will only work if the field(s) are indexed with stored positions + * i.e. + * + * + * + * Also, the codec used must NOT be SimpleTextCodec */ public void dumpDoc(Integer docId, String...fields) throws Exception { //DirectoryReader reader = h.getCore().getSearcher().get().getIndexReader(); @@ -162,7 +168,7 @@ public void dumpDoc(Integer docId, String...fields) throws Exception { docs = new int[]{docId}; } - DocReconstructor reconstructor = new DocReconstructor(reader, fields, 10); + DocReconstructor reconstructor = new DocReconstructor(reader, fields, -1); Reconstructed d; for (Integer dd: docs) { diff --git a/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java b/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java index 11982c645..673d3daf2 100644 --- a/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java +++ b/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java @@ -135,7 +135,7 @@ public void test() throws Exception { ", \"aff_id\": [\"61814\", \"A1036\", \"-\"]" + ", \"aff_abbrev\": [\"CfA\", \"Harvard U/Dep Ast\", \"-\"]" + ", \"aff_canonical\": [\"Harvard Smithsonian Center for Astrophysics\", \"Harvard University, Department of Astronomy\", \"-\"]" + - ", \"aff_raw\": [\"-\", \"NASA Kavli space center, Cambridge, MA 02138, USA\", \"Einstein institute, Zurych, Switzerland\"]" + + ", \"aff\": [\"-\", \"NASA Kavli space center, Cambridge, MA 02138, USA\", \"Einstein institute, Zurych, Switzerland\"]" + ", \"institution\": [\"CfA\", \"Harvard U/Dep Ast\", \"-\", \"foo/bar baz\"]" + ", \"aff_facet\": [[\"A1234\", \"facet abbrev/parent abbrev\"]]" + ", \"aff_facet_hier\": [\"1/1812/61814\", \"1/8264/61814\", \"1/1812/A1036\", \"-\"]" + @@ -496,29 +496,29 @@ public void test() throws Exception { /* - * aff - is a virtual field; + * affil - is a virtual field; * all aff_ fields must be the same order as authors */ - assertQ(req("q", "aff:NASA"), + assertQ(req("q", "affil:NASA"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); - assertQ(req("q", "aff:NASA AND author:\"Anders\""), + assertQ(req("q", "affil:NASA AND author:\"Anders\""), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); - assertQ(req("q", "aff:SPACE"), "//*[@numFound='0']"); // be case sensitive with uppercased query terms - assertQ(req("q", "aff:KAVLI"), "//*[@numFound='0']"); // same here - assertQ(req("q", "aff:kavli"), // otherwise case-insensitive + assertQ(req("q", "affil:SPACE"), "//*[@numFound='0']"); // be case sensitive with uppercased query terms + assertQ(req("q", "affil:KAVLI"), "//*[@numFound='0']"); // same here + assertQ(req("q", "affil:kavli"), // otherwise case-insensitive "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); - assertQ(req("q", "aff:Kavli"), + assertQ(req("q", "affil:Kavli"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); - assertQ(req("q", "aff:\"kavli space\""), + assertQ(req("q", "affil:\"kavli space\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); @@ -548,7 +548,7 @@ public void test() throws Exception { ); - assertQ(req("q", "aff:\"Kavli\""), + assertQ(req("q", "affil:\"Kavli\""), "//*[@numFound='1']"); assertQ(req("q", "aff_canonical:\"Harvard\""), "//*[@numFound='1']"); @@ -556,7 +556,7 @@ public void test() throws Exception { // "//*[@numFound='1']"); assertQ(req("q", "aff_id:\"61814\""), "//*[@numFound='1']"); - assertQ(req("q", "aff_raw:\"02138\""), + assertQ(req("q", "aff:\"02138\""), "//*[@numFound='1']"); assertQ(req("q", "aff_canonical:\"Smithsonian\""), "//*[@numFound='1']"); @@ -565,15 +565,15 @@ public void test() throws Exception { assert h.query(req("q", "recid:100")) - .contains("" + + .contains("" + "-" + "NASA Kavli space center, Cambridge, MA 02138, USA" + "Einstein institute, Zurych, Switzerland" ); - assertQ(req("q", "=aff:\"acr::nasa\" AND recid:100"), + assertQ(req("q", "=affil:\"acr::nasa\" AND recid:100"), "//*[@numFound='1']" ); - assertQ(req("q", "pos(aff_raw:kavli, 2) AND recid:100"), + assertQ(req("q", "pos(aff:kavli, 2) AND recid:100"), "//*[@numFound='1']" ); @@ -1622,12 +1622,11 @@ public void test() throws Exception { * these are the cases that depend on the default parameters specified in the * solrcofig.xml; here we just test what came up as bugs */ - assertQueryEquals(req("q", "aff:\"ASTRO 3D\""), - "(aff_abbrev:\"acr::astro (3d 3d)\" | aff_abbrev:\"acr::astro 3 d\") " - + "(institution:astro 3d)^2.0 " - + "aff_id:astro 3d " - + "(aff_canonical:\"acr::astro (3d 3d)\" | aff_canonical:\"acr::astro 3 d\") " - + "((aff_raw:\"acr::astro (3d 3d)\" | aff_raw:\"acr::astro 3 d\"))^0.5", + assertQueryEquals(req("q", "affil:\"ASTRO 3D\""), + "((aff:\"acr::astro (3d 3d)\" | aff:\"acr::astro 3 d\"))^0.5 " + + "(aff_abbrev:\"acr::astro (3d 3d)\" | aff_abbrev:\"acr::astro 3 d\") " + + "(institution:astro 3d)^2.0 aff_id:astro 3d " + + "(aff_canonical:\"acr::astro (3d 3d)\" | aff_canonical:\"acr::astro 3 d\")", BooleanQuery.class); } } diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationText.java b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationText.java index 02aefcdf7..9b2121870 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationText.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationText.java @@ -55,83 +55,83 @@ public static void beforeClass() throws Exception { public void test() throws Exception { - assertU(addDocs("aff_raw", "W.K. Kellogg Radiation Laboratory, California Institute of Technology, Pasadena, CA 91125, USA")); - assertU(addDocs("aff_raw", "W.K. Kellogg Radiation Laboratory, California Institute of Technology, Pasadena, CA(91125), USA")); - assertU(addDocs("aff_raw", "IMCCE/Observatoire de Paris")); - assertU(addDocs("aff_raw", "INAF - Osservatorio Astronomico di Brera, via E. Bianchi 46, I-23807 Merate, Italy;", - "aff_raw", "INAF - IASF Milano, via E. Bassini 15, I-20133 Milano, Italy")); - assertU(addDocs("aff_raw", "Instituto de Astrofísica de Andalucía (IAA-CSIC) foo:doo")); - assertU(addDocs("aff_raw", "foo1", "aff_raw", "foo2", "aff_raw", "-", "aff_raw", "foo4")); + assertU(addDocs("aff", "W.K. Kellogg Radiation Laboratory, California Institute of Technology, Pasadena, CA 91125, USA")); + assertU(addDocs("aff", "W.K. Kellogg Radiation Laboratory, California Institute of Technology, Pasadena, CA(91125), USA")); + assertU(addDocs("aff", "IMCCE/Observatoire de Paris")); + assertU(addDocs("aff", "INAF - Osservatorio Astronomico di Brera, via E. Bianchi 46, I-23807 Merate, Italy;", + "aff", "INAF - IASF Milano, via E. Bassini 15, I-20133 Milano, Italy")); + assertU(addDocs("aff", "Instituto de Astrofísica de Andalucía (IAA-CSIC) foo:doo")); + assertU(addDocs("aff", "foo1", "aff", "foo2", "aff", "-", "aff", "foo4")); assertU(commit()); - //dumpDoc(null, "aff_raw"); - //System.err.println(h.query(req("q", "aff_raw:foo1"))); + //dumpDoc(null, "aff"); + //System.err.println(h.query(req("q", "aff:foo1"))); assertQ(req("q", "*:*"), "//*[@numFound>='2']"); - assertQ(req("q", "aff_raw:xfoo"), "//*[@numFound='0']"); + assertQ(req("q", "aff:xfoo"), "//*[@numFound='0']"); - assertQueryEquals(req("q", "aff_raw:\"Pasadena, CA 91125\"", "qt", "aqp"), - "aff_raw:\"pasadena acr::ca 91125\"", + assertQueryEquals(req("q", "aff:\"Pasadena, CA 91125\"", "qt", "aqp"), + "aff:\"pasadena acr::ca 91125\"", PhraseQuery.class ); - assertQueryEquals(req("q", "aff_raw:\"Pasadena, CA(91125)\"", "qt", "aqp"), - "aff_raw:\"pasadena acr::ca 91125\"", + assertQueryEquals(req("q", "aff:\"Pasadena, CA(91125)\"", "qt", "aqp"), + "aff:\"pasadena acr::ca 91125\"", PhraseQuery.class ); - assertQ(req("q", "aff_raw:\"Pasadena, CA 91125\""), + assertQ(req("q", "aff:\"Pasadena, CA 91125\""), "//*[@numFound='2']", "//doc/str[@name='id'][.='0']", "//doc/str[@name='id'][.='1']" ); - assertQ(req("q", "aff_raw:IMCCE"), + assertQ(req("q", "aff:IMCCE"), "//*[@numFound='1']", "//doc/str[@name='id'][.='2']" ); - assertQ(req("q", "aff_raw:imcce"), + assertQ(req("q", "aff:imcce"), "//*[@numFound='1']", "//doc/str[@name='id'][.='2']" ); - assertQ(req("q", "aff_raw:IASF"), + assertQ(req("q", "aff:IASF"), "//*[@numFound='1']", "//doc/str[@name='id'][.='3']" ); - assertQ(req("q", "aff_raw:iasf"), + assertQ(req("q", "aff:iasf"), "//*[@numFound='1']", "//doc/str[@name='id'][.='3']" ); - assertQ(req("q", "aff_raw:IAA-CSIC"), + assertQ(req("q", "aff:IAA-CSIC"), "//*[@numFound='1']", "//doc/str[@name='id'][.='4']" ); - assertQ(req("q", "aff_raw:IAACSIC"), + assertQ(req("q", "aff:IAACSIC"), "//*[@numFound='1']", "//doc/str[@name='id'][.='4']" ); - assertQ(req("q", "aff_raw:iaa-csic"), + assertQ(req("q", "aff:iaa-csic"), "//*[@numFound='1']", "//doc/str[@name='id'][.='4']" ); - assertQ(req("q", "aff_raw:\"INAF - IASF\""), + assertQ(req("q", "aff:\"INAF - IASF\""), "//*[@numFound='1']", "//doc/str[@name='id'][.='3']" ); - assertQ(req("q", "aff_raw:\"inaf - iasf\""), + assertQ(req("q", "aff:\"inaf - iasf\""), "//*[@numFound='1']", "//doc/str[@name='id'][.='3']" ); - assert h.query(req("q", "aff_raw:foo1")) - .contains("" + + assert h.query(req("q", "aff:foo1")) + .contains("" + "foo1" + "foo2" + "-" + diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java index 8379f3697..49b9c5152 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java @@ -104,6 +104,7 @@ * TODO: the analyzer for the synonyms must use the same StopFilters as the query chain * */ + public class TestAdsabsTypeFulltextParsing extends MontySolrQueryTestCase { @@ -134,7 +135,7 @@ public static String getConfigFile() { newConfig = duplicateFile(new File(configFile)); - replaceInFile(newConfig, "solr.SchemaCodecFactory", "solr.SimpleTextCodecFactory"); + //replaceInFile(newConfig, "solr.SchemaCodecFactory", "solr.SimpleTextCodecFactory"); } catch (IOException e) { e.printStackTrace(); @@ -1224,6 +1225,35 @@ public void testOtherCases() throws Exception { //TODO: this test is intentionally left failing; it used to work until the scoring changes (i'd like to // investigate more how the multi-token affects recall) + dumpDoc(null, "title", "bibcode"); + assertQ(req("q", "title:\"γ-ray Sources\"", + "indent", "true", + "debugQuery", "true"), + "//*[@numFound='4']", + "//doc/str[@name='id'][.='400']", + "//doc/str[@name='id'][.='401']", + "//doc/str[@name='id'][.='402']", + "//doc/str[@name='id'][.='403']" + ); + assertQ(req("q", "title:\"γ ray Sources\"", + "indent", "true", + "debugQuery", "true"), + "//*[@numFound='4']", + "//doc/str[@name='id'][.='400']", + "//doc/str[@name='id'][.='401']", + "//doc/str[@name='id'][.='402']", + "//doc/str[@name='id'][.='403']" + ); + assertQ(req("q", "title:\"$\\gamma$ ray Sources\"", + "indent", "true", + "debugQuery", "true"), + "//*[@numFound='4']", + "//doc/str[@name='id'][.='400']", + "//doc/str[@name='id'][.='401']", + "//doc/str[@name='id'][.='402']", + "//doc/str[@name='id'][.='403']" + ); + assertQ(req("q", "title:\"A 350-MHz GBT Survey of 50 Faint Fermi γ-ray Sources for Radio Millisecond Pulsars\"", "indent", "true", "debugQuery", "true"), @@ -1241,6 +1271,10 @@ public void testOtherCases() throws Exception { "//doc/str[@name='id'][.='403']"); + //assertU(adoc("id", "402", "bibcode", "xxxxxxxxxx402", "title", + //"A 350-MHz GBT Survey of 50 Faint Fermi $\\gamma$ ray Sources for Radio Millisecond Pulsars")); + //assertU(adoc("id", "403", "bibcode", "xxxxxxxxxx403", "title", + //"A 350-MHz GBT Survey of 50 Faint Fermi γ ray Sources for Radio Millisecond Pulsars")); } diff --git a/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml b/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml index 4730b7f47..f1e0b25fa 100644 --- a/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml +++ b/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml @@ -1200,8 +1200,9 @@ - + multiValued="true" + /> + @@ -1242,7 +1243,7 @@ - + + diff --git a/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml b/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml index 5b155b90a..2bdb1c73a 100644 --- a/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml +++ b/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml @@ -695,7 +695,7 @@ title^2 abstract^2 body keyword ack title abstract keyword orcid_pub orcid_user^0.9 orcid_other^0.8 - aff_abbrev aff_canonical aff_id institution^2 aff_raw^0.5 + aff_abbrev aff_canonical aff_id institution^2 aff^0.5 unfielded_search unfielded_search @@ -726,7 +726,7 @@ title^2 abstract^2 body keyword ack title abstract keyword orcid_pub orcid_user^0.9 orcid_other^0.8 - aff_abbrev aff_canonical aff_id institution^2 aff_raw^0.5 + aff_abbrev aff_canonical aff_id institution^2 aff^0.5 unfielded_search unfielded_search