From 107d544b2619941b9c8fb17f3f1d51281592e1e0 Mon Sep 17 00:00:00 2001 From: Patrick Detlefsen Date: Tue, 8 Nov 2022 14:09:35 +0100 Subject: [PATCH] Properly support Unicode Strings (#3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Properly support unicode strings ``` iex(47)> "✔︎" |> String.replace(~r/[\p{P}\p{S}]/, " ") |> String.valid? false iex(48)> "✔︎" |> String.replace(~r/[\p{P}\p{S}]/u, " ") |> String.valid? true ``` * add a test case for unicode strings Co-authored-by: Ryan Johnson --- lib/fuzzy_compare/preprocessor.ex | 2 +- test/preprocessor_test.exs | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 test/preprocessor_test.exs diff --git a/lib/fuzzy_compare/preprocessor.ex b/lib/fuzzy_compare/preprocessor.ex index 5b72938..834103a 100644 --- a/lib/fuzzy_compare/preprocessor.ex +++ b/lib/fuzzy_compare/preprocessor.ex @@ -11,7 +11,7 @@ defmodule FuzzyCompare.Preprocessor do alias FuzzyCompare.Preprocessed # Replaces all punctuation - @regex ~r/[\p{P}\p{S}]/ + @regex ~r/[\p{P}\p{S}]/u @spec process(binary(), binary()) :: {Preprocessed.t(), Preprocessed.t()} def process(left, right) when is_binary(left) and is_binary(right) do diff --git a/test/preprocessor_test.exs b/test/preprocessor_test.exs new file mode 100644 index 0000000..cf97c2d --- /dev/null +++ b/test/preprocessor_test.exs @@ -0,0 +1,10 @@ +defmodule PreprocessorTest do + use ExUnit.Case + + @subject FuzzyCompare.Preprocessor + + test "when replacing whitespace and punctuation in unicode strings the string remains a valid unicode string" do + result = @subject.process("✔︎") + assert String.valid?(result.string) == true + end +end