From 72de1899adada5f01a2875b7c03d1ce6480721e7 Mon Sep 17 00:00:00 2001 From: Roman Chyla Date: Thu, 2 Jan 2020 17:10:09 -0500 Subject: [PATCH] Made changes for affiliations --- .../src/test/org/adsabs/TestAdsAllFields.java | 39 +++-- .../TestAdsabsTypeAffiliationText.java | 52 +++--- .../TestAdsabsTypeAffiliationTokens.java | 47 +++++- .../server/solr/collection1/conf/schema.xml | 151 +++++++++--------- .../solr/collection1/conf/solrconfig.xml | 2 + 5 files changed, 176 insertions(+), 115 deletions(-) diff --git a/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java b/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java index 6ceb5fec4..11982c645 100644 --- a/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java +++ b/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java @@ -132,14 +132,14 @@ public void test() throws Exception { //", \"recid\": 100" + ", \"abstract\": \"all no-sky survey q'i quotient\"" + - ", \"aff\": [\"-\", \"NASA Kavli space center, Cambridge, MA 02138, USA\", \"Einstein institute, Zurych, Switzerland\"]" + - ", \"aff_abbrev\": [\"CfA\", \"Harvard U/Dep Ast\", \"-\"]" + - ", \"institution\": [\"CfA\", \"Harvard U/Dep Ast\", \"-\", \"foo/bar baz\"]" + + ", \"aff_id\": [\"61814\", \"A1036\", \"-\"]" + + ", \"aff_abbrev\": [\"CfA\", \"Harvard U/Dep Ast\", \"-\"]" + ", \"aff_canonical\": [\"Harvard Smithsonian Center for Astrophysics\", \"Harvard University, Department of Astronomy\", \"-\"]" + + ", \"aff_raw\": [\"-\", \"NASA Kavli space center, Cambridge, MA 02138, USA\", \"Einstein institute, Zurych, Switzerland\"]" + + ", \"institution\": [\"CfA\", \"Harvard U/Dep Ast\", \"-\", \"foo/bar baz\"]" + ", \"aff_facet\": [[\"A1234\", \"facet abbrev/parent abbrev\"]]" + ", \"aff_facet_hier\": [\"1/1812/61814\", \"1/8264/61814\", \"1/1812/A1036\", \"-\"]" + - ", \"aff_id\": [\"61814\", \"A1036\", \"-\"]" + - ", \"aff_raw\": [\"-\", \"Center for Astrophysics, 60 Garden Street, Cambridge MA 02138, USA\", \"Department of Astronomy, Harvard University, 60 Garden St., Cambridge, MA 02130, USA\"]" + + ", \"alternate_bibcode\": [\"2014JNuM..455...1a1\", \"2014JNuM..455...1a2\"]" + ", \"alternate_title\": \"This is of the alternate\"" + @@ -449,6 +449,10 @@ public void test() throws Exception { "Anders, John Michael" + "Einstein, A"); + assertQ(req("q", "author:\"t' Hoof*\""), + "//*[@numFound='3']" + ); + /* * book_author */ @@ -492,7 +496,8 @@ public void test() throws Exception { /* - * aff - must be the same order as authors + * aff - is a virtual field; + * all aff_ fields must be the same order as authors */ assertQ(req("q", "aff:NASA"), @@ -551,22 +556,26 @@ public void test() throws Exception { // "//*[@numFound='1']"); assertQ(req("q", "aff_id:\"61814\""), "//*[@numFound='1']"); - assertQ(req("q", "aff_raw:\"Harvard\""), + assertQ(req("q", "aff_raw:\"02138\""), + "//*[@numFound='1']"); + assertQ(req("q", "aff_canonical:\"Smithsonian\""), + "//*[@numFound='1']"); + assertQ(req("q", "institution:\"Harvard U/Dep Ast\""), "//*[@numFound='1']"); assert h.query(req("q", "recid:100")) - .contains("" + + .contains("" + "-" + "NASA Kavli space center, Cambridge, MA 02138, USA" + "Einstein institute, Zurych, Switzerland" ); - assertQ(req("q", "pos(aff:kavli, 2) AND recid:100"), - "//*[@numFound='1']" - ); assertQ(req("q", "=aff:\"acr::nasa\" AND recid:100"), "//*[@numFound='1']" ); + assertQ(req("q", "pos(aff_raw:kavli, 2) AND recid:100"), + "//*[@numFound='1']" + ); /* @@ -1614,7 +1623,11 @@ public void test() throws Exception { * solrcofig.xml; here we just test what came up as bugs */ assertQueryEquals(req("q", "aff:\"ASTRO 3D\""), - "(aff:\"acr::astro (3d 3d)\" | aff:\"acr::astro 3 d\")", - DisjunctionMaxQuery.class); + "(aff_abbrev:\"acr::astro (3d 3d)\" | aff_abbrev:\"acr::astro 3 d\") " + + "(institution:astro 3d)^2.0 " + + "aff_id:astro 3d " + + "(aff_canonical:\"acr::astro (3d 3d)\" | aff_canonical:\"acr::astro 3 d\") " + + "((aff_raw:\"acr::astro (3d 3d)\" | aff_raw:\"acr::astro 3 d\"))^0.5", + BooleanQuery.class); } } diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationText.java b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationText.java index 9b2121870..02aefcdf7 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationText.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationText.java @@ -55,83 +55,83 @@ public static void beforeClass() throws Exception { public void test() throws Exception { - assertU(addDocs("aff", "W.K. Kellogg Radiation Laboratory, California Institute of Technology, Pasadena, CA 91125, USA")); - assertU(addDocs("aff", "W.K. Kellogg Radiation Laboratory, California Institute of Technology, Pasadena, CA(91125), USA")); - assertU(addDocs("aff", "IMCCE/Observatoire de Paris")); - assertU(addDocs("aff", "INAF - Osservatorio Astronomico di Brera, via E. Bianchi 46, I-23807 Merate, Italy;", - "aff", "INAF - IASF Milano, via E. Bassini 15, I-20133 Milano, Italy")); - assertU(addDocs("aff", "Instituto de Astrofísica de Andalucía (IAA-CSIC) foo:doo")); - assertU(addDocs("aff", "foo1", "aff", "foo2", "aff", "-", "aff", "foo4")); + assertU(addDocs("aff_raw", "W.K. Kellogg Radiation Laboratory, California Institute of Technology, Pasadena, CA 91125, USA")); + assertU(addDocs("aff_raw", "W.K. Kellogg Radiation Laboratory, California Institute of Technology, Pasadena, CA(91125), USA")); + assertU(addDocs("aff_raw", "IMCCE/Observatoire de Paris")); + assertU(addDocs("aff_raw", "INAF - Osservatorio Astronomico di Brera, via E. Bianchi 46, I-23807 Merate, Italy;", + "aff_raw", "INAF - IASF Milano, via E. Bassini 15, I-20133 Milano, Italy")); + assertU(addDocs("aff_raw", "Instituto de Astrofísica de Andalucía (IAA-CSIC) foo:doo")); + assertU(addDocs("aff_raw", "foo1", "aff_raw", "foo2", "aff_raw", "-", "aff_raw", "foo4")); assertU(commit()); - //dumpDoc(null, "aff"); - //System.err.println(h.query(req("q", "aff:foo1"))); + //dumpDoc(null, "aff_raw"); + //System.err.println(h.query(req("q", "aff_raw:foo1"))); assertQ(req("q", "*:*"), "//*[@numFound>='2']"); - assertQ(req("q", "aff:xfoo"), "//*[@numFound='0']"); + assertQ(req("q", "aff_raw:xfoo"), "//*[@numFound='0']"); - assertQueryEquals(req("q", "aff:\"Pasadena, CA 91125\"", "qt", "aqp"), - "aff:\"pasadena acr::ca 91125\"", + assertQueryEquals(req("q", "aff_raw:\"Pasadena, CA 91125\"", "qt", "aqp"), + "aff_raw:\"pasadena acr::ca 91125\"", PhraseQuery.class ); - assertQueryEquals(req("q", "aff:\"Pasadena, CA(91125)\"", "qt", "aqp"), - "aff:\"pasadena acr::ca 91125\"", + assertQueryEquals(req("q", "aff_raw:\"Pasadena, CA(91125)\"", "qt", "aqp"), + "aff_raw:\"pasadena acr::ca 91125\"", PhraseQuery.class ); - assertQ(req("q", "aff:\"Pasadena, CA 91125\""), + assertQ(req("q", "aff_raw:\"Pasadena, CA 91125\""), "//*[@numFound='2']", "//doc/str[@name='id'][.='0']", "//doc/str[@name='id'][.='1']" ); - assertQ(req("q", "aff:IMCCE"), + assertQ(req("q", "aff_raw:IMCCE"), "//*[@numFound='1']", "//doc/str[@name='id'][.='2']" ); - assertQ(req("q", "aff:imcce"), + assertQ(req("q", "aff_raw:imcce"), "//*[@numFound='1']", "//doc/str[@name='id'][.='2']" ); - assertQ(req("q", "aff:IASF"), + assertQ(req("q", "aff_raw:IASF"), "//*[@numFound='1']", "//doc/str[@name='id'][.='3']" ); - assertQ(req("q", "aff:iasf"), + assertQ(req("q", "aff_raw:iasf"), "//*[@numFound='1']", "//doc/str[@name='id'][.='3']" ); - assertQ(req("q", "aff:IAA-CSIC"), + assertQ(req("q", "aff_raw:IAA-CSIC"), "//*[@numFound='1']", "//doc/str[@name='id'][.='4']" ); - assertQ(req("q", "aff:IAACSIC"), + assertQ(req("q", "aff_raw:IAACSIC"), "//*[@numFound='1']", "//doc/str[@name='id'][.='4']" ); - assertQ(req("q", "aff:iaa-csic"), + assertQ(req("q", "aff_raw:iaa-csic"), "//*[@numFound='1']", "//doc/str[@name='id'][.='4']" ); - assertQ(req("q", "aff:\"INAF - IASF\""), + assertQ(req("q", "aff_raw:\"INAF - IASF\""), "//*[@numFound='1']", "//doc/str[@name='id'][.='3']" ); - assertQ(req("q", "aff:\"inaf - iasf\""), + assertQ(req("q", "aff_raw:\"inaf - iasf\""), "//*[@numFound='1']", "//doc/str[@name='id'][.='3']" ); - assert h.query(req("q", "aff:foo1")) - .contains("" + + assert h.query(req("q", "aff_raw:foo1")) + .contains("" + "foo1" + "foo2" + "-" + diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationTokens.java b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationTokens.java index c9ee425b6..0c5f199f0 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationTokens.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationTokens.java @@ -21,8 +21,12 @@ import monty.solr.util.MontySolrQueryTestCase; import monty.solr.util.MontySolrSetup; +import java.io.File; +import java.io.IOException; + import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; import org.junit.BeforeClass; @@ -44,8 +48,7 @@ public static void beforeClass() throws Exception { System.setProperty("solr.allow.unsafe.resourceloading", "true"); - schemaString = MontySolrSetup.getMontySolrHome() - + "/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml"; + schemaString = getSchemaFile(); configString = MontySolrSetup.getMontySolrHome() + "/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml"; @@ -53,7 +56,38 @@ public static void beforeClass() throws Exception { initCore(configString, schemaString, MontySolrSetup.getSolrHome() + "/example/solr"); } - + + public static String getSchemaFile() { + + /* + * For purposes of the test, we make a copy of the schema.xml, and create + * our own synonym files + */ + + String configFile = MontySolrSetup.getMontySolrHome() + + "/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml"; + + File newConfig; + try { + + newConfig = duplicateFile(new File(configFile)); + + + File simpleTokenSynonymsFile = createTempFile( + new String[] { "id1,id2\n" + + "ror.1,foo,bar" + }); + + replaceInFile(newConfig, "synonyms=\"aff_id.synonyms\"", + "synonyms=\"" + simpleTokenSynonymsFile.getAbsolutePath() + "\""); + + } catch (IOException e) { + e.printStackTrace(); + throw new IllegalStateException(e.getMessage()); + } + + return newConfig.getAbsolutePath(); + } public void test() throws Exception { @@ -65,6 +99,13 @@ public void test() throws Exception { )); assertU(commit()); + + // test synonyms + assertQueryEquals(req("q", "aff_id:\"ror.1\""), + "Synonym(aff_id:bar aff_id:foo aff_id:ror.1)", + SynonymQuery.class + ); + // make sure docs are there assertQ(req("q", "*:*"), "//*[@numFound>='2']"); diff --git a/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml b/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml index 025f52119..22dafa65e 100644 --- a/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml +++ b/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml @@ -835,36 +835,41 @@ - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + @@ -873,45 +878,44 @@ - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + multiValued="true" omitNorms="true"/> @@ -1251,7 +1256,7 @@ docValues="true"/> - diff --git a/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml b/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml index 990753470..5b155b90a 100644 --- a/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml +++ b/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml @@ -695,6 +695,7 @@ title^2 abstract^2 body keyword ack title abstract keyword orcid_pub orcid_user^0.9 orcid_other^0.8 + aff_abbrev aff_canonical aff_id institution^2 aff_raw^0.5 unfielded_search unfielded_search @@ -725,6 +726,7 @@ title^2 abstract^2 body keyword ack title abstract keyword orcid_pub orcid_user^0.9 orcid_other^0.8 + aff_abbrev aff_canonical aff_id institution^2 aff_raw^0.5 unfielded_search unfielded_search