From c3f854430876ddd5d318315a0c5fb4c13a563622 Mon Sep 17 00:00:00 2001
From: tom-lord <lord.thom@gmail.com>
Date: Sat, 18 May 2024 12:17:52 +0100
Subject: [PATCH 1/6] Udate gemspec

---
 .ruby-version           | 2 +-
 Gemfile                 | 1 -
 regexp-examples.gemspec | 7 ++++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.ruby-version b/.ruby-version
index 4a36342..ef538c2 100644
--- a/.ruby-version
+++ b/.ruby-version
@@ -1 +1 @@
-3.0.0
+3.1.2
diff --git a/Gemfile b/Gemfile
index 29b0bc6..553544a 100644
--- a/Gemfile
+++ b/Gemfile
@@ -3,7 +3,6 @@ source 'https://rubygems.org'
 group :test do
   gem 'rspec'
   gem 'coveralls', require: false
-  gem 'pry'
 end
 
 # Specify your gem's dependencies in regexp-examples.gemspec
diff --git a/regexp-examples.gemspec b/regexp-examples.gemspec
index f433c9f..562197f 100644
--- a/regexp-examples.gemspec
+++ b/regexp-examples.gemspec
@@ -14,9 +14,10 @@ Gem::Specification.new do |s|
   s.test_files       = s.files.grep(/^(test|spec|features)\//)
   s.require_paths    = ['lib']
   s.homepage         = 'http://rubygems.org/gems/regexp-examples'
-  s.add_development_dependency 'bundler', '> 1.7'
-  s.add_development_dependency 'rake', '~> 12.0'
-  s.add_development_dependency 'pry', '~> 0.12.0'
+  s.add_dependency 'regexp_property_values', '~> 1.5'
+  s.add_development_dependency 'bundler', '~> 2.4'
+  s.add_development_dependency 'rake', '~> 13.0'
+  s.add_development_dependency 'pry'
   s.add_development_dependency 'warning', '~> 0.10.0'
   s.license          = 'MIT'
   s.required_ruby_version = '>= 2.4.0'

From aa435863343f05b37e2b274f78f74beff75aa860 Mon Sep 17 00:00:00 2001
From: tom-lord <lord.thom@gmail.com>
Date: Sat, 18 May 2024 13:43:53 +0100
Subject: [PATCH 2/6] Replace pstore implementation with gem

---
 .../parse_after_backslash_group_helper.rb     |  4 +++-
 spec/regexp-examples_spec.rb                  | 24 ++-----------------
 2 files changed, 5 insertions(+), 23 deletions(-)

diff --git a/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb b/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb
index b011c8c..886f377 100644
--- a/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb
+++ b/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb
@@ -1,3 +1,5 @@
+require 'regexp_property_values'
+
 module RegexpExamples
   # A collection of related helper methods, utilised by the `Parser` class
   module ParseAfterBackslashGroupHelper
@@ -97,7 +99,7 @@ def parse_backslash_named_property(p_negation, caret_negation, property_name)
       # Beware of double negatives! E.g. /\P{^Space}/
       is_negative = (p_negation == 'P') ^ (caret_negation == '^')
       CharGroup.new(
-        negate_if(CharSets::NamedPropertyCharMap[property_name.downcase], is_negative),
+        negate_if(RegexpPropertyValues[property_name.downcase].matched_characters, is_negative),
         @ignorecase
       )
     end
diff --git a/spec/regexp-examples_spec.rb b/spec/regexp-examples_spec.rb
index 3c49b5c..e419fce 100644
--- a/spec/regexp-examples_spec.rb
+++ b/spec/regexp-examples_spec.rb
@@ -190,20 +190,8 @@ def self.examples_are_empty(*regexps)
         /\P{Ll}/, # Negation syntax type 2
         /\P{^Ll}/ # Double negation!! (Should cancel out)
       )
-      # An exhaustive set of tests for all named properties!!! This is useful
-      # for verifying the PStore contains correct values for all ruby versions
-      %w[
-        Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit
-        Word ASCII Any Assigned L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd
-        Ps Pe Pi Pf Po S Sm Sc Sk So Z Zs Zl Zp C Cc Cf Cn Co Arabic Armenian
-        Balinese Bengali Bopomofo Braille Buginese Buhid Canadian_Aboriginal
-        Cham Cherokee Common Coptic Cyrillic Devanagari Ethiopic Georgian
-        Glagolitic Greek Gujarati Gurmukhi Han Hangul Hanunoo Hebrew Hiragana
-        Inherited Kannada Katakana Kayah_Li Khmer Lao Latin Lepcha Limbu Malayalam
-        Mongolian Myanmar New_Tai_Lue Nko Ogham Ol_Chiki Oriya Phags_Pa Rejang
-        Runic Saurashtra Sinhala Sundanese Syloti_Nagri Syriac Tagalog Tagbanwa
-        Tai_Le Tamil Telugu Thaana Thai Tibetan Tifinagh Vai Yi
-      ].each do |property|
+
+      RegexpPropertyValues.all_for_current_ruby.map(&:identifier).each do |property|
         it "examples for /\p{#{property}}/" do
           regexp_examples = /\p{#{property}}/.examples(max_group_results: 99_999)
           expect(regexp_examples)
@@ -214,14 +202,6 @@ def self.examples_are_empty(*regexps)
           expect(regexp_examples.join('')).to match(/\A\p{#{property}}+\z/)
         end
       end
-
-      # The following seem to genuinely have no matching examples (!!??!!?!)
-      %w[
-        Cs Carian Cuneiform Cypriot Deseret Gothic Kharoshthi Linear_B Lycian
-        Lydian Old_Italic Old_Persian Osmanya Phoenician Shavian Ugaritic
-      ].each do |property|
-        examples_are_empty(/\p{#{property}}/)
-      end
     end
 
     context 'for control characters' do

From 3a13bc0eaa7eba5e589dff9317c6cd75146feac8 Mon Sep 17 00:00:00 2001
From: tom-lord <lord.thom@gmail.com>
Date: Mon, 20 May 2024 18:56:07 +0100
Subject: [PATCH 3/6] Passing specs

---
 .../parser_helpers/charset_negation_helper.rb |  2 +-
 .../parse_after_backslash_group_helper.rb     | 10 ++++++--
 spec/regexp-examples_spec.rb                  | 23 ++++++++++++-------
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/lib/regexp-examples/parser_helpers/charset_negation_helper.rb b/lib/regexp-examples/parser_helpers/charset_negation_helper.rb
index a243b93..e142f2f 100644
--- a/lib/regexp-examples/parser_helpers/charset_negation_helper.rb
+++ b/lib/regexp-examples/parser_helpers/charset_negation_helper.rb
@@ -2,7 +2,7 @@
 module RegexpExamples
   module CharsetNegationHelper
     def negate_if(charset, is_negative)
-      is_negative ? (CharSets::Any.dup - charset) : charset
+      is_negative ? (CharSets::Any.dup - charset.to_a) : charset
     end
   end
 end
diff --git a/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb b/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb
index 886f377..a1cfb52 100644
--- a/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb
+++ b/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb
@@ -93,13 +93,19 @@ def parse_backslash_unicode_sequence(full_hex_sequence)
     end
 
     def parse_backslash_named_property(p_negation, caret_negation, property_name)
-      @current_position += (caret_negation.length + # 0 or 1, of '^' is present
+      @current_position += (caret_negation.length + # 0 or 1, if '^' is present
                             property_name.length +
                             2) # Length of opening and closing brackets (always 2)
       # Beware of double negatives! E.g. /\P{^Space}/
       is_negative = (p_negation == 'P') ^ (caret_negation == '^')
       CharGroup.new(
-        negate_if(RegexpPropertyValues[property_name.downcase].matched_characters, is_negative),
+        negate_if(
+          RegexpPropertyValues[property_name.downcase]
+            .matched_codepoints
+            .lazy
+            .filter_map { |cp| cp.chr('utf-8') unless cp.between?(0xD800, 0xDFFF) },
+          is_negative
+        ),
         @ignorecase
       )
     end
diff --git a/spec/regexp-examples_spec.rb b/spec/regexp-examples_spec.rb
index e419fce..a64c953 100644
--- a/spec/regexp-examples_spec.rb
+++ b/spec/regexp-examples_spec.rb
@@ -191,15 +191,22 @@ def self.examples_are_empty(*regexps)
         /\P{^Ll}/ # Double negation!! (Should cancel out)
       )
 
+      expected_empty_properties = %w[surrogate inlowsurrogates inhighsurrogates inhighprivateusesurrogates]
+
       RegexpPropertyValues.all_for_current_ruby.map(&:identifier).each do |property|
-        it "examples for /\p{#{property}}/" do
-          regexp_examples = /\p{#{property}}/.examples(max_group_results: 99_999)
-          expect(regexp_examples)
-            .not_to be_empty,
-                    "No examples were generated for regexp: /\p{#{property}}/"
-          # Just do one big check, for test system performance (~30% faster)
-          # (Otherwise, we're doing up to 128 checks on 123 properties!!!)
-          expect(regexp_examples.join('')).to match(/\A\p{#{property}}+\z/)
+        if(expected_empty_properties).include?(property)
+          examples_are_empty(/\p{#{property}}/)
+        else
+          it "examples for /\p{#{property}}/" do
+            regexp_examples = /\p{#{property}}/.examples(max_group_results: 99_999)
+
+            expect(regexp_examples)
+              .not_to be_empty,
+            "No examples were generated for regexp: /\p{#{property}}/"
+              # Just do one big check, for test system performance (~30% faster)
+              # (Otherwise, we're doing up to 128 checks on 123 properties!!!)
+              expect(regexp_examples.join('')).to match(/\A\p{#{property}}+\z/)
+          end
         end
       end
     end

From 62d93c69c96d29f4766234707106deebad17ed0a Mon Sep 17 00:00:00 2001
From: tom-lord <lord.thom@gmail.com>
Date: Mon, 20 May 2024 19:22:47 +0100
Subject: [PATCH 4/6] Delete code

---
 lib/regexp-examples.rb                     |  2 -
 lib/regexp-examples/char_sets.rb           |  2 -
 lib/regexp-examples/unicode_char_ranges.rb | 59 -------------------
 scripts/unicode_lister.rb                  | 68 ----------------------
 4 files changed, 131 deletions(-)
 delete mode 100644 lib/regexp-examples/unicode_char_ranges.rb
 delete mode 100644 scripts/unicode_lister.rb

diff --git a/lib/regexp-examples.rb b/lib/regexp-examples.rb
index 0e91d60..26700ec 100644
--- a/lib/regexp-examples.rb
+++ b/lib/regexp-examples.rb
@@ -1,4 +1,3 @@
-require_relative 'regexp-examples/unicode_char_ranges'
 require_relative 'regexp-examples/chargroup_parser'
 require_relative 'regexp-examples/config'
 require_relative 'regexp-examples/char_sets'
@@ -8,6 +7,5 @@
 require_relative 'regexp-examples/helpers'
 require_relative 'regexp-examples/parser'
 require_relative 'regexp-examples/repeaters'
-require_relative 'regexp-examples/unicode_char_ranges'
 require_relative 'regexp-examples/version'
 require_relative 'core_extensions/regexp/examples'
diff --git a/lib/regexp-examples/char_sets.rb b/lib/regexp-examples/char_sets.rb
index 825693e..cc5402f 100644
--- a/lib/regexp-examples/char_sets.rb
+++ b/lib/regexp-examples/char_sets.rb
@@ -53,7 +53,5 @@ module CharSets
       'word'   => Word,
       'ascii'  => Any
     }.freeze
-
-    NamedPropertyCharMap = UnicodeCharRanges.instance
   end.freeze
 end
diff --git a/lib/regexp-examples/unicode_char_ranges.rb b/lib/regexp-examples/unicode_char_ranges.rb
deleted file mode 100644
index ad18ce7..0000000
--- a/lib/regexp-examples/unicode_char_ranges.rb
+++ /dev/null
@@ -1,59 +0,0 @@
-require 'pstore'
-require 'singleton'
-
-module RegexpExamples
-  # Interface to the retrieve the character sets that match a regex named property.
-  # E.g. `/\p{Alpha}/`
-  # These matching values are stored, compressed, in a PStore. They are specific to
-  # the ruby minor version.
-  class UnicodeCharRanges
-    include Singleton
-    # These values were generated by: scripts/unicode_lister.rb
-    # Note: Only the first 128 results are listed, for performance.
-    # Also, some groups seem to have no matches (weird!)
-    STORE_FILENAME = "unicode_ranges_#{RbConfig::CONFIG['UNICODE_VERSION']}.pstore".freeze
-
-    attr_reader :range_store
-
-    def initialize
-      @range_store = PStore.new(unicode_ranges_file)
-    end
-
-    def get(key)
-      range_store.transaction(true) do
-        ranges_to_unicode(range_store[key])
-      end
-    end
-
-    alias [] get
-
-    private
-
-    # The method is written like this to future-proof it a little,
-    # i.e. the gem won't completely break for a new ruby version release
-    def unicode_ranges_file
-      db_path = File.join(__dir__, '../../db')
-      Dir["#{db_path}/*.pstore"].sort.select do |file|
-        file <= "#{db_path}/#{STORE_FILENAME}"
-      end.last
-    end
-
-    # TODO: Document example input/output of this method
-    # It's pretty simple, but this code is a little confusing!!
-    def ranges_to_unicode(ranges)
-      result = []
-      ranges.each do |range|
-        if range.is_a? Integer # Small hack to increase data compression
-          result << hex_to_unicode(range.to_s(16))
-        else
-          range.each { |num| result << hex_to_unicode(num.to_s(16)) }
-        end
-      end
-      result
-    end
-
-    def hex_to_unicode(hex)
-      [hex.to_i(16)].pack('U')
-    end
-  end
-end
diff --git a/scripts/unicode_lister.rb b/scripts/unicode_lister.rb
deleted file mode 100644
index 253ace9..0000000
--- a/scripts/unicode_lister.rb
+++ /dev/null
@@ -1,68 +0,0 @@
-require 'pstore'
-require_relative '../lib/regexp-examples/unicode_char_ranges'
-# A script to generate lists of all unicode characters
-# that match all named group/character properties regexps.
-# For use in e.g. /\p{Arabic}/.examples
-
-# To (re-)generate this list, simply run this file!
-# > ruby scripts/unicode_lister.rb
-
-# Taken from ruby documentation:
-# http://ruby-doc.org//core-2.2.0/Regexp.html#class-Regexp-label-Character+Properties
-NAMED_GROUPS = %w(
-  Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit Word ASCII
-  Any Assigned
-
-  L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd Ps Pe Pi Pf Po S Sm Sc Sk So Z Zs Zl
-  Zp C Cc Cf Cn Co Cs
-
-  Arabic Armenian Balinese Bengali Bopomofo Braille Buginese Buhid Canadian_Aboriginal
-  Carian Cham Cherokee Common Coptic Cuneiform Cypriot Cyrillic Deseret Devanagari
-  Ethiopic Georgian Glagolitic Gothic Greek Gujarati Gurmukhi Han Hangul Hanunoo Hebrew
-  Hiragana Inherited Kannada Katakana Kayah_Li Kharoshthi Khmer Lao Latin Lepcha Limbu
-  Linear_B Lycian Lydian Malayalam Mongolian Myanmar New_Tai_Lue Nko Ogham Ol_Chiki
-  Old_Italic Old_Persian Oriya Osmanya Phags_Pa Phoenician Rejang Runic Saurashtra
-  Shavian Sinhala Sundanese Syloti_Nagri Syriac Tagalog Tagbanwa Tai_Le Tamil Telugu
-  Thaana Thai Tibetan Tifinagh Ugaritic Vai Yi
-)
-
-# Note: For the range 55296..57343, these are reserved values that are not legal
-# unicode characters.
-# I.e. a character encoding-related exception gets raised when you do:
-# `/regex/ =~ eval("?\\u{#{x.to_s(16)}}")`
-# TODO: Add a link to somewhere that explains this better.
-
-# "Compresses" the values in an array by using ranges.
-# Example input: [1, 2, 3, 4, 6, 7, 12, 14]
-# Example output: [1..4, 6..7, 12, 14]
-def calculate_ranges(matching_codes)
-  return [] if matching_codes.empty?
-  first = matching_codes.shift
-  matching_codes.inject([first..first]) do |r, x|
-    if r.last.last.succ != x
-      r << (x..x) # Start new range
-    else
-      r[0..-2] << (r.last.first..x) # Update last range
-    end
-  end
-    .map { |range| range.size == 1 ? range.first : range } # Replace `int..int` with `int`
-end
-
-count = 0
-filename = "./db/#{RegexpExamples::UnicodeCharRanges::STORE_FILENAME}"
-store = PStore.new(filename)
-store.transaction do
-  NAMED_GROUPS.each do |name|
-    count += 1
-    # Only generating first 128 matches, for performance...
-    # (I have tried this with generating ALL examples, and it makes the ruby gem
-    # painfully slow and bloated... Especially the test suite.)
-    matching_codes = [(0..55_295), (57_344..65_535)].map(&:to_a).flatten.lazy
-                     .select { |x| /\p{#{name}}/ =~ eval("?\\u{#{x.to_s(16)}}") }
-                     .first(128)
-    store[name.downcase] = calculate_ranges(matching_codes)
-    puts "(#{count}/#{NAMED_GROUPS.length}) Finished property: #{name}"
-  end
-  puts '*' * 50
-  puts "Finished! Result stored in: #{filename}"
-end

From 47c8c96562ef107088bf3f2fcc7df2c4c6c39270 Mon Sep 17 00:00:00 2001
From: tom-lord <lord.thom@gmail.com>
Date: Mon, 20 May 2024 19:28:25 +0100
Subject: [PATCH 5/6] Remove now-redandant downcase

---
 .../parser_helpers/parse_after_backslash_group_helper.rb        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb b/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb
index a1cfb52..e5e947e 100644
--- a/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb
+++ b/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb
@@ -100,7 +100,7 @@ def parse_backslash_named_property(p_negation, caret_negation, property_name)
       is_negative = (p_negation == 'P') ^ (caret_negation == '^')
       CharGroup.new(
         negate_if(
-          RegexpPropertyValues[property_name.downcase]
+          RegexpPropertyValues[property_name]
             .matched_codepoints
             .lazy
             .filter_map { |cp| cp.chr('utf-8') unless cp.between?(0xD800, 0xDFFF) },

From 3d89ff247ac5a60f19b2b25f1eab8da54a22e2d6 Mon Sep 17 00:00:00 2001
From: tom-lord <lord.thom@gmail.com>
Date: Mon, 20 May 2024 19:28:31 +0100
Subject: [PATCH 6/6] Update comment

---
 spec/regexp-examples_spec.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spec/regexp-examples_spec.rb b/spec/regexp-examples_spec.rb
index a64c953..f64560c 100644
--- a/spec/regexp-examples_spec.rb
+++ b/spec/regexp-examples_spec.rb
@@ -204,7 +204,7 @@ def self.examples_are_empty(*regexps)
               .not_to be_empty,
             "No examples were generated for regexp: /\p{#{property}}/"
               # Just do one big check, for test system performance (~30% faster)
-              # (Otherwise, we're doing up to 128 checks on 123 properties!!!)
+              # (Otherwise, we're potentially doing 99999 checks on 123 properties!!!)
               expect(regexp_examples.join('')).to match(/\A\p{#{property}}+\z/)
           end
         end