Skip to content

Commit

Permalink
Merge pull request #40 from tom-lord/use_regexp_property_values
Browse files Browse the repository at this point in the history
Use `regexp_property_values` gem for accurate property mapping
  • Loading branch information
tom-lord authored May 20, 2024
2 parents 642cea6 + 3d89ff2 commit ed29069
Show file tree
Hide file tree
Showing 10 changed files with 33 additions and 169 deletions.
2 changes: 1 addition & 1 deletion .ruby-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.0.0
3.1.2
1 change: 0 additions & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ source 'https://rubygems.org'
group :test do
gem 'rspec'
gem 'coveralls', require: false
gem 'pry'
end

# Specify your gem's dependencies in regexp-examples.gemspec
Expand Down
2 changes: 0 additions & 2 deletions lib/regexp-examples.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
require_relative 'regexp-examples/unicode_char_ranges'
require_relative 'regexp-examples/chargroup_parser'
require_relative 'regexp-examples/config'
require_relative 'regexp-examples/char_sets'
Expand All @@ -8,6 +7,5 @@
require_relative 'regexp-examples/helpers'
require_relative 'regexp-examples/parser'
require_relative 'regexp-examples/repeaters'
require_relative 'regexp-examples/unicode_char_ranges'
require_relative 'regexp-examples/version'
require_relative 'core_extensions/regexp/examples'
2 changes: 0 additions & 2 deletions lib/regexp-examples/char_sets.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,5 @@ module CharSets
'word' => Word,
'ascii' => Any
}.freeze

NamedPropertyCharMap = UnicodeCharRanges.instance
end.freeze
end
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
module RegexpExamples
module CharsetNegationHelper
def negate_if(charset, is_negative)
is_negative ? (CharSets::Any.dup - charset) : charset
is_negative ? (CharSets::Any.dup - charset.to_a) : charset
end
end
end
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
require 'regexp_property_values'

module RegexpExamples
# A collection of related helper methods, utilised by the `Parser` class
module ParseAfterBackslashGroupHelper
Expand Down Expand Up @@ -91,13 +93,19 @@ def parse_backslash_unicode_sequence(full_hex_sequence)
end

def parse_backslash_named_property(p_negation, caret_negation, property_name)
@current_position += (caret_negation.length + # 0 or 1, of '^' is present
@current_position += (caret_negation.length + # 0 or 1, if '^' is present
property_name.length +
2) # Length of opening and closing brackets (always 2)
# Beware of double negatives! E.g. /\P{^Space}/
is_negative = (p_negation == 'P') ^ (caret_negation == '^')
CharGroup.new(
negate_if(CharSets::NamedPropertyCharMap[property_name.downcase], is_negative),
negate_if(
RegexpPropertyValues[property_name]
.matched_codepoints
.lazy
.filter_map { |cp| cp.chr('utf-8') unless cp.between?(0xD800, 0xDFFF) },
is_negative
),
@ignorecase
)
end
Expand Down
59 changes: 0 additions & 59 deletions lib/regexp-examples/unicode_char_ranges.rb

This file was deleted.

7 changes: 4 additions & 3 deletions regexp-examples.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ Gem::Specification.new do |s|
s.test_files = s.files.grep(/^(test|spec|features)\//)
s.require_paths = ['lib']
s.homepage = 'http://rubygems.org/gems/regexp-examples'
s.add_development_dependency 'bundler', '> 1.7'
s.add_development_dependency 'rake', '~> 12.0'
s.add_development_dependency 'pry', '~> 0.12.0'
s.add_dependency 'regexp_property_values', '~> 1.5'
s.add_development_dependency 'bundler', '~> 2.4'
s.add_development_dependency 'rake', '~> 13.0'
s.add_development_dependency 'pry'
s.add_development_dependency 'warning', '~> 0.10.0'
s.license = 'MIT'
s.required_ruby_version = '>= 2.4.0'
Expand Down
68 changes: 0 additions & 68 deletions scripts/unicode_lister.rb

This file was deleted.

47 changes: 17 additions & 30 deletions spec/regexp-examples_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -190,37 +190,24 @@ def self.examples_are_empty(*regexps)
/\P{Ll}/, # Negation syntax type 2
/\P{^Ll}/ # Double negation!! (Should cancel out)
)
# An exhaustive set of tests for all named properties!!! This is useful
# for verifying the PStore contains correct values for all ruby versions
%w[
Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit
Word ASCII Any Assigned L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd
Ps Pe Pi Pf Po S Sm Sc Sk So Z Zs Zl Zp C Cc Cf Cn Co Arabic Armenian
Balinese Bengali Bopomofo Braille Buginese Buhid Canadian_Aboriginal
Cham Cherokee Common Coptic Cyrillic Devanagari Ethiopic Georgian
Glagolitic Greek Gujarati Gurmukhi Han Hangul Hanunoo Hebrew Hiragana
Inherited Kannada Katakana Kayah_Li Khmer Lao Latin Lepcha Limbu Malayalam
Mongolian Myanmar New_Tai_Lue Nko Ogham Ol_Chiki Oriya Phags_Pa Rejang
Runic Saurashtra Sinhala Sundanese Syloti_Nagri Syriac Tagalog Tagbanwa
Tai_Le Tamil Telugu Thaana Thai Tibetan Tifinagh Vai Yi
].each do |property|
it "examples for /\p{#{property}}/" do
regexp_examples = /\p{#{property}}/.examples(max_group_results: 99_999)
expect(regexp_examples)
.not_to be_empty,
"No examples were generated for regexp: /\p{#{property}}/"
# Just do one big check, for test system performance (~30% faster)
# (Otherwise, we're doing up to 128 checks on 123 properties!!!)
expect(regexp_examples.join('')).to match(/\A\p{#{property}}+\z/)
end
end

# The following seem to genuinely have no matching examples (!!??!!?!)
%w[
Cs Carian Cuneiform Cypriot Deseret Gothic Kharoshthi Linear_B Lycian
Lydian Old_Italic Old_Persian Osmanya Phoenician Shavian Ugaritic
].each do |property|
examples_are_empty(/\p{#{property}}/)
expected_empty_properties = %w[surrogate inlowsurrogates inhighsurrogates inhighprivateusesurrogates]

RegexpPropertyValues.all_for_current_ruby.map(&:identifier).each do |property|
if(expected_empty_properties).include?(property)
examples_are_empty(/\p{#{property}}/)
else
it "examples for /\p{#{property}}/" do
regexp_examples = /\p{#{property}}/.examples(max_group_results: 99_999)

expect(regexp_examples)
.not_to be_empty,
"No examples were generated for regexp: /\p{#{property}}/"
# Just do one big check, for test system performance (~30% faster)
# (Otherwise, we're potentially doing 99999 checks on 123 properties!!!)
expect(regexp_examples.join('')).to match(/\A\p{#{property}}+\z/)
end
end
end
end

Expand Down

0 comments on commit ed29069

Please sign in to comment.