From dc267463ff5197185c51a5b5c5cdae2378441a51 Mon Sep 17 00:00:00 2001 From: IhateTrains Date: Sat, 7 Oct 2023 23:46:57 +0100 Subject: [PATCH] More adjective rules and a test for GetAdjective against all major cities in the world (#360) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Test GetAdjective against all major cities in the world * AdjectiveRuleExistsForEveryCountryAndMajorCityInTheWorld * Rules for Envigado and Nanaimo * Adjective rule for Chigorodó * More adjective rules * Update commonItems.csproj --- .../Linguistics/StringExtensionsTests.cs | 39 +++++++++++++++++++ commonItems/Linguistics/adjective_rules.txt | 9 ++++- commonItems/commonItems.csproj | 2 +- 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/commonItems.UnitTests/Linguistics/StringExtensionsTests.cs b/commonItems.UnitTests/Linguistics/StringExtensionsTests.cs index 2964b081..3c73d91c 100644 --- a/commonItems.UnitTests/Linguistics/StringExtensionsTests.cs +++ b/commonItems.UnitTests/Linguistics/StringExtensionsTests.cs @@ -1,7 +1,10 @@ using commonItems.Linguistics; using Csv; +using FluentAssertions; +using System; using System.IO; using System.Linq; +using System.Net; using Xunit; namespace commonItems.UnitTests.Linguistics; @@ -181,6 +184,13 @@ public class StringExtensionsTests { [InlineData("Mcif", "Mcifi")] [InlineData("Maqomo", "Maqomo")] [InlineData("Frej", "Frejite")] + [InlineData("Trelew", "Trelewense")] + [InlineData("Nanaimo", "Nanaimoite")] + [InlineData("Envigado", "Envigadeño")] + [InlineData("Chigorodó", "Chigorodoseño")] + [InlineData("San Bernardo", "San Bernardino")] + [InlineData("Encantado", "Encantense")] + [InlineData("Lincoln", "Lincolnite")] // adjectives relying on rewrite rules [InlineData("Armenia Maioris", "Greater Armenian")] @@ -472,6 +482,35 @@ public void CorrectAdjectivesAreGeneratedForNamesFromCsv(string csvFilePath) { } } + [Fact] + public void AdjectiveRuleExistsForEveryCountryAndMajorCityInTheWorld() { + var csvUrl = "https://datahub.io/core/world-cities/r/world-cities.csv"; + var csv = new WebClient().DownloadString(csvUrl); + var cities = CsvReader.ReadFromText(csv) + .Select(line => line[0]) + .Where(city => !string.IsNullOrEmpty(city) && !city.StartsWith("Zürich (Kreis")) + .Distinct() + .ToList(); + + var countries = CsvReader.ReadFromText(csv) + .Select(line => line[1]) + .Where(country => !string.IsNullOrEmpty(country)) + .Distinct() + .ToList(); + + var output = new StringWriter(); + Console.SetOut(output); + + foreach (var city in cities) { + _ = city.GetAdjective(); + } + + foreach (var country in countries) { + _ = country.GetAdjective(); + } + output.ToString().Should().NotContain("No matching adjective rule found"); + } + [Theory] // ReSharper disable StringLiteralTypo [InlineData("Vilath#?!", "Vilathian")] diff --git a/commonItems/Linguistics/adjective_rules.txt b/commonItems/Linguistics/adjective_rules.txt index ec8b5a0d..af3ac44d 100644 --- a/commonItems/Linguistics/adjective_rules.txt +++ b/commonItems/Linguistics/adjective_rules.txt @@ -191,6 +191,7 @@ {"*anon", "*anese"}, // Lebanon {"*anya", "*anyan"}, // Netanya {"*aras", "*arentine"}, // Taras +{"*ardo", "*ardino"}, // San Bernardo {"*aros", "*arian"}, // Paros {"*atar", "*atari"}, // Qatar {"*atte", "*atian"}, // Ville Platte @@ -235,6 +236,7 @@ {"*etus", "*esian"}, // Miletus {"*ford", "*fordian"}, // Bedford {"*furt", "*furter"}, // Frankfurt +{"*gado", "*gadeño"}, // Envigado {"*gamo", "*gamasque"}, // Bergamo {"*gano", "*ganese"}, // Lugano {"*gana", "*ganite"}, // Telangana @@ -341,6 +343,7 @@ {"*[c]cis", "*[c]cidian"}, // Chalcis {"*[c]nai", "*[c]naite"}, // Chennai {"*[v]ris", "*[v]risian"}, // Marmaris +{"*[v]ado", "*[v]ense"}, // Encantado {"*[c]ver", "*[c]ver"}, // Denver {"*[v]ver", "*[v]verian"}, // Hanover @@ -407,6 +410,7 @@ {"*ice", "*icean"}, // Nice {"*ier", "*ieran"}, // Napier {"*iev", "*ievan"}, // Kiev +{"*imo", "*imoite"}, // Nanaimo {"*ing", "*ingite"}, // Kuching {"*ini", "*inian"}, // Leontini {"*inz", "*inzer"}, // Linz @@ -548,6 +552,7 @@ {"*ce", "*cian"}, // Thrace {"*co", "*can"}, // Morocco {"*de", "*dean"}, // Cape Verde +{"*dó", "*doseño"}, // Chigorodó {"*du", "*dunite"}, // Ordu {"*eh", "*ehi"}, {"*ej", "*ejite"}, @@ -557,7 +562,8 @@ {"*én", "*enés"}, // Jaén {"*eo", "*ean"}, // Bengeo {"*er", "*erite"}, // Casper -{"*es", "*ian"}, // Maldives +{"*es", "*ian"}, // Maldives, +{"*ew", "*ewense"}, // Trelew {"*ge", "*ginian"}, // Carthage {"*gh", "*ghian"}, // Middlesbroughian {"*gk", "*gkan"}, @@ -581,6 +587,7 @@ {"*ku", "*kuvian"}, // Baku {"*ld", "*ldian"}, // Chesterfield {"*ll", "*llese"}, // Marshall +{"*ln", "*lnite"}, // https://en.wikipedia.org/wiki/Lincoln,_Nebraska {"*lo", "*lonian"}, // Buffalo {"*lu", "*lan"}, // Honolulu {"*me", "*man"}, // Rome diff --git a/commonItems/commonItems.csproj b/commonItems/commonItems.csproj index 39201c66..d89cccf7 100644 --- a/commonItems/commonItems.csproj +++ b/commonItems/commonItems.csproj @@ -6,7 +6,7 @@ False PGCG.$(AssemblyName) - 8.4.1 + 8.4.2 PGCG https://github.com/ParadoxGameConverters/commonItems.NET https://github.com/ParadoxGameConverters/commonItems.NET