From e8141b9578edc552c2521aec40ecf6b0ed9d462e Mon Sep 17 00:00:00 2001 From: Roman Chyla Date: Wed, 27 Jan 2016 18:05:59 -0500 Subject: [PATCH] Updating author synonyms --- .../TestAuthorTransliterationFilter.java | 10 ++++++++++ .../collection1/conf/author_curated.synonyms | 19 +++++++++++++++++-- .../solr/collection1/conf/solrconfig.xml | 2 +- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAuthorTransliterationFilter.java b/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAuthorTransliterationFilter.java index f157cc1dd..374a3916c 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAuthorTransliterationFilter.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAuthorTransliterationFilter.java @@ -39,4 +39,14 @@ public void testAuthorSynonyms() throws Exception { String[] expected = { "Müller, Bill", "Mueller, Bill", "Muller, Bill" }; assertTokenStreamContents(stream, expected); } + + public void testAccents() throws Exception { + Reader reader = new StringReader("Jeřábková, Tereza"); + Tokenizer tokenizer = new KeywordTokenizer(reader); + AuthorTransliterationFactory factory = new AuthorTransliterationFactory(new HashMap()); + TokenStream stream = factory.create(new TestFilter(tokenizer)); + + String[] expected = { "Jeřábková, Tereza", "Jerabkova, Tereza"}; + assertTokenStreamContents(stream, expected); + } } diff --git a/contrib/examples/adsabs/solr/collection1/conf/author_curated.synonyms b/contrib/examples/adsabs/solr/collection1/conf/author_curated.synonyms index 5ece26a69..9a5c27d00 100644 --- a/contrib/examples/adsabs/solr/collection1/conf/author_curated.synonyms +++ b/contrib/examples/adsabs/solr/collection1/conf/author_curated.synonyms @@ -62,7 +62,7 @@ bergmann, thaisa; storchi bergmann, thaisa bergstrahl, j; bergstralh, j berriman, bruce; berriman, graham berrios salas, m l; berrios, m l -berta thompson, zachory; berta, zachory +berta thompson, zachory k; berta, zachory k bessel, michael s; bessell, michael s bhattacharya, b; bhattacharyya, b bhattacharya, d; bhattacharyya, d @@ -94,13 +94,14 @@ burton, butler; burton, w butler burundkov, a s; burundukov, a s bushuiv, ye i; bushuyev, ye i calle, ignacio; de la calle perez, ignacio; de la calle, ignacio +cabezon gomez, ruben m; cabezon, ruben m camero arranz, ascension; camero, ascension cardona nunez, octavio; cardona, octavio caro, d; fraipont caro, d carollo, c marcella; carollo, marcella carozzi meyssonnier, n; carozzi, n carswell, bob; carswell, robert -casetti dinescu, d; casetti, d; dinescu, d +casetti dinescu, d; casetti, d; casian, v; dinescu, d; kasiyan, v castro perelman, c; castro, c cayrel de strobel, giusa; cayrel, giusa; de strobel, giusa celis s, l; celis, l @@ -110,6 +111,7 @@ chaudhuri, s; chaudhury, s chavarria k, carlos; chavarria, carlos chedia, o v; chediia, o v chekanihina, o a; chekanikhina, o a +chetrus, p; ketrush, p chilingarian, a; chilingaryan, a chilingarian, i; chilingaryan, i chitre, aparna; maybhate, aparna @@ -129,6 +131,7 @@ cojocaru, i a; kozhokari, i a collier cameron, andrew; collier, andrew collin souffrin, suzy; collin, suzy conn henry, richard; henry, richard conn +contreras pena, carlos; pena, carlos corona galindo, m g; corona, m g costa, roberto; dellaglio dias da costa, roberto; dellaglio, d courtois, helene m; di nella, helene m @@ -173,6 +176,7 @@ de vegt, chr; devegt, chr de vries, c; devries, c de vries, j; devries, j de vries, m; devries, m +de witt, cecile; dewitt, cecile; morette, cecile; witt, cecile de zeeuw, p tim; de zeeuw, tim debish, a m; samir debish, a m dedes, carolin; hieret, carolin @@ -196,6 +200,7 @@ di serego alighieri, sperello; di serego, sperello di stefano, rosanne; distefano, rosanne diakonov, m n; dyakonov, m n diakonova, m i; dyakonova, m i +dierkes, jens; zuther, jens diethelm sutter, roger; diethelm, roger diminshtein, o s; diminstein, o s dixon, william; van dyke dixon, william @@ -255,6 +260,7 @@ gamal el din, a i; gamaleldin, a i garcia lopez, ramon; lopez, ramon garcia rissmann, aurea; garcia, aurea garmany, catherine; garmany, katy +gashin, petru a; gasin, petru a gayon markt, julie; gayon, julie gauss, carl friedrich; gauss, karl friedrich gazol patino, adriana; gazol, adriana @@ -384,12 +390,14 @@ kisliuk, v s; kislyuk, v s kiszkurno koziej, e; kiszkurno, e kjeldseth moe, olav; moe, olav kleinman, scot j; kleinmann, scot j +klocova, tereza; krejcova, tereza kniazev, a yu; knyazev, a yu kocharian, a; kocharyan, a kogan, a; kogan, i; kogan, y korpi, maarit j; mantere, maarit j kostrzewa rutkowska, zuzanna; kostrzewa, zuzanna kotliarevskii, d m; kotlyarevski, d m +kovacevic dojcinovic, jelena; kovacevic, jelena kovalchuck, g u; kovalchuk, g u kraan korteweg, r; kraan, r krasinski, g; krasinsky, g @@ -470,6 +478,7 @@ maureira fredes, cristian; maureira, cristian mayall, margaret walton; walton mayall, margaret l; walton, margaret l lorente, nuria p f; mckay, nuria p f mckenna lawlor, susan m p; mckenna, susan m p +medling, anne m; rajala, anne m medvededva, g i; medvedeva, g i meliakian, n d; melikian, n d; melikjan, n d; melikyan, n d mendoza b, cesar a; mendoza, cesar a @@ -530,12 +539,14 @@ norton, lisa; young, lisa novakova jezkova, m; novakova, m novoseltseva, m v; novoseltseva, r v ocoulitch, leonid vladislavovich; ocoulitsch, leonid vladislavovich; okulich, leonid vladislavovich +oey, m sally; oey, sally o murchadha, niall; omurchadha, niall olivo melchiorri, b; olivo, b olsson steel, duncan; steel, duncan oppenheimer, ben r; oppenheimer, rebecca orlitova, ivana; stoklasova, ivana oscoz abad, a; oscoz, a +osorio, maria rosa; zapatero osorio, maria rosa otulakowska hypka, magdalena; otulakowska, magdalena ougolnikov, o s; ugolnikov, o s ozernoi, l m; ozernoy, l m @@ -599,6 +610,7 @@ saint louis, nicole; st louis, nicole saint maurice, j p; st maurice, j p sanchawala, kaushar; vaidya, kaushar sarmiento g, antonio f; sarmiento, antonio f +savanevich, vadim; savanevych, vadym saveleva, m v; saveljeva, m v savvidi, g k; savvidy, g k sborshchikov, v g; sborshikov, v g @@ -622,6 +634,7 @@ shamarian, n i; shamarin, n i shermanzon, a m; shermanzon, e m shlafshtein, e e; shlafshtejn, e e siemieniec ozieblo, g; siemieniec, g +simaschevici, alexei; simashkevich, alexei sirodzev, n; sirodzhaev, n; sirodzhev, n siuniaev, rashid; suniaev, rashid; sunyaev, rashid; sunyayev, rashid; syunyaev, rashid; syunyayev, rashid skliarov, v v; sklyarov, v v @@ -641,8 +654,10 @@ spokoiny, b l; spokoinyi, b l stacey, gordon j; stacy, gordon j stamenov, i n; stamenov, j n; stamenov, y n straizhis, v; straizys, v +struve, otto wilhelm; von struve, otto wilhelm styashkin, v a; styazhkin, v a sundara raman, k; sundararaman, k +kurcz, a; swieton, a tagle, g t; tenorio tagle, g t tallquist, s; tallqvist, s tam, p h; tam, thomas diff --git a/contrib/examples/adsabs/solr/collection1/conf/solrconfig.xml b/contrib/examples/adsabs/solr/collection1/conf/solrconfig.xml index 6926f6b88..fddfa8330 100644 --- a/contrib/examples/adsabs/solr/collection1/conf/solrconfig.xml +++ b/contrib/examples/adsabs/solr/collection1/conf/solrconfig.xml @@ -28,7 +28,7 @@ - + 8