diff --git a/lib/strip_attributes.rb b/lib/strip_attributes.rb index 7b3eb41..86e10c9 100644 --- a/lib/strip_attributes.rb +++ b/lib/strip_attributes.rb @@ -34,6 +34,7 @@ module StripAttributes # U+FEFF ZERO WIDTH NO-BREAK SPACE MULTIBYTE_WHITE = "\u180E\u200B\u200C\u200D\u2060\uFEFF" MULTIBYTE_SPACE = /[[:space:]#{MULTIBYTE_WHITE}]/ + MULTIBYTE_BLANK = /[[:blank:]#{MULTIBYTE_WHITE}]/ MULTIBYTE_SUPPORTED = "\u0020" == " " def self.strip(record_or_string, options = nil) @@ -82,8 +83,12 @@ def self.strip_string(value, options = nil) value.gsub!(/[\r\n]+/, " ") end - if collapse_spaces && value.respond_to?(:squeeze!) - value.squeeze!(' ') + if collapse_spaces + if MULTIBYTE_SUPPORTED && value.respond_to?(:gsub!) && Encoding.compatible?(value, MULTIBYTE_BLANK) + value.gsub!(/#{MULTIBYTE_BLANK}+/, " ") + elsif value.respond_to?(:squeeze!) + value.squeeze!(" ") + end end value diff --git a/test/strip_attributes_test.rb b/test/strip_attributes_test.rb index 5f2c945..6c2c22c 100644 --- a/test/strip_attributes_test.rb +++ b/test/strip_attributes_test.rb @@ -258,6 +258,10 @@ def test_should_collapse_spaces assert_equal "1 2 3", StripAttributes.strip(" 1 2 3\t ", :collapse_spaces => true) end + def test_should_collapse_multibyte_spaces + assert_equal "1 2 3", StripAttributes.strip(" 1 \u00A0 2\u00A03\t ", :collapse_spaces => true) + end + def test_should_replace_newlines assert_equal "1 2", StripAttributes.strip("1\n2", :replace_newlines => true) assert_equal "1 2", StripAttributes.strip("1\r\n2", :replace_newlines => true)