From 6148a5a564559b40130f9d1c18ff055b9e72f407 Mon Sep 17 00:00:00 2001 From: Seweryn Presnal Date: Fri, 22 Dec 2023 03:25:51 +0100 Subject: [PATCH] More adjective rules --- .../Linguistics/StringExtensionsTests.cs | 10 ++++++++++ commonItems/Linguistics/adjective_rules.txt | 14 +++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/commonItems.UnitTests/Linguistics/StringExtensionsTests.cs b/commonItems.UnitTests/Linguistics/StringExtensionsTests.cs index 70ce0746..83e261f8 100644 --- a/commonItems.UnitTests/Linguistics/StringExtensionsTests.cs +++ b/commonItems.UnitTests/Linguistics/StringExtensionsTests.cs @@ -204,6 +204,12 @@ public class StringExtensionsTests { [InlineData("Arecibo", "Areciboan")] [InlineData("Shahrisabz", "Shahrisabzian")] [InlineData("El Paso", "El Pasoan")] + [InlineData("Kerch", "Kerchian")] + [InlineData("Konotop", "Konotopian")] + [InlineData("Iskilip", "Iskilipian")] + [InlineData("Peruwelz", "Peruwelzian")] + [InlineData("Schmargendorf", "Schmargendorfer")] + [InlineData("Schwelm", "Schwelmer")] // adjectives relying on rewrite rules [InlineData("Armenia Maioris", "Greater Armenian")] @@ -508,8 +514,12 @@ public void AdjectiveRuleExistsForEveryCountryAndMajorCityInTheWorld() { var cities = CsvReader.ReadFromText(csv) .Select(line => line[0]) .Where(city => !string.IsNullOrEmpty(city)) + .Where(city => !city.Contains(" in der ")) + .Where(city => !city.Contains(" an der ")) + .Where(city => !city.Contains(" am ")) .Where(city => !city.StartsWith("Zürich (Kreis")) .Where(city => !city.StartsWith("Sector ")) + .Where(city => !city.EndsWith(" II")) .Distinct() .ToList(); diff --git a/commonItems/Linguistics/adjective_rules.txt b/commonItems/Linguistics/adjective_rules.txt index ba559a50..d7e84b86 100644 --- a/commonItems/Linguistics/adjective_rules.txt +++ b/commonItems/Linguistics/adjective_rules.txt @@ -397,6 +397,7 @@ {"*ego", "*egan"}, // San Diego {"*egu", "*egu"}, // Daegu {"*eia", "*ian"}, // Eleia +{"*elm", "*elmer"}, // Schwelm {"*ene", "*enian"}, // Cyrene {"*eno", "*enoite"}, // Reno {"*eru", "*eruvian"}, // Peru @@ -471,6 +472,7 @@ {"*omo", "*omo"}, {"*oof", "*oofian"}, // Duiwelskloof, rule made up {"*oon", "*oonian"}, // Cameroon +{"*orf", "*orfer"}, // Schmargendorf {"*osu", "*osuite"}, // Yeosu {"*pan", "*panese"}, // Japan {"*peg", "*pegger"}, // Winnipeg @@ -565,6 +567,7 @@ {"*bz", "*bzian"}, // Shahrisabz {"*ca", "*can"}, // Africa {"*ce", "*cian"}, // Thrace +{"*ch", "*chian"}, // Kerch {"*co", "*can"}, // Morocco {"*de", "*dean"}, // Cape Verde {"*do", "*dense"}, // https://en.wikipedia.org/wiki/Ejido,_Venezuela @@ -577,6 +580,7 @@ {"*en", "*enese"}, // Jan Mayen {"*én", "*enés"}, // Jaén {"*eo", "*ean"}, // Bengeo +{"*ep", "*epian"}, // Pochep {"*er", "*erite"}, // Casper {"*es", "*ian"}, // Maldives, {"*ew", "*ewense"}, // Trelew @@ -589,6 +593,7 @@ {"*in", "*inese"}, // Benin {"*im", "*imite"}, {"*io", "*ian"}, // San Antonio +{"*ip", "*ipian"}, // Iskilip {"*is", "*ian"}, // Locris {"*it", "*iti"}, // Kuwait {"*ix", "*ician"}, // Phoenix @@ -605,11 +610,16 @@ {"*ku", "*kuvian"}, // Baku {"*ld", "*ldian"}, // Chesterfield {"*ll", "*llese"}, // Marshall +{"*lm", "*lmite"}, // Stockholm {"*ln", "*lnite"}, // https://en.wikipedia.org/wiki/Lincoln,_Nebraska {"*lo", "*lonian"}, // Buffalo +{"*lp", "*lpian"}, // Kulp (rule made up) +{"*lz", "*lzian"}, // Peruwelz {"*lu", "*lan"}, // Honolulu {"*me", "*man"}, // Rome {"*mi", "*mian"}, // Symi +{"*mo", "*moan"}, // Alamo +{"*mp", "*mpian"}, // Oostkamp {"*my", "*mois"}, // Saint Barthélemy {"*na", "*nian"}, // Argentina {"*nj", "*nji"}, @@ -620,8 +630,9 @@ {"*oh", "*ohan"}, {"*om", "*omian"}, {"*on", "*onese"}, // Gabon -{"*oy", "*ojan"}, // Troy +{"*op", "*opian"}, // Konotop {"*os", "*ian"}, // Thasos +{"*oy", "*ojan"}, // Troy {"*ów", "*ovian"}, // Kraków {"*pa", "*pan"}, // Tampa {"*pe", "*pean"}, // Europe @@ -654,6 +665,7 @@ {"*yn", "*ynite"}, // Brooklyn {"*ys", "*ysian"}, // Arambys (rule made up) {"*ze", "*zean"}, // Belize +{"*zh", "*zhian"}, // Novovoronezh (rule made up) {"*[v]h", "*[v]hite"}, {"*[v]k", "*[v]kian"}, {"*[c]s", "*[c]ian"}, // Athens