Skip to content

Commit

Permalink
Merge pull request #33 from xiejiangzhi/master
Browse files Browse the repository at this point in the history
Support unicode range
  • Loading branch information
tom-lord authored Jul 8, 2019
2 parents 412920b + 98d562f commit 54783ec
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 29 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@
mkmf.log
tags
/coverage/

/spec/examples.txt
2 changes: 1 addition & 1 deletion .rspec
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
--color
--require spec_helper
--require gem_helper
26 changes: 21 additions & 5 deletions lib/regexp-examples/chargroup_parser.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
require_relative 'parser_helpers/charset_negation_helper'
require_relative 'parser_helpers/parse_group_helper'
require_relative 'parser_helpers/parse_after_backslash_group_helper'

module RegexpExamples
# A "sub-parser", for char groups in a regular expression
Expand All @@ -13,6 +15,8 @@ module RegexpExamples
# [[^:alpha:]&&[\n]a-c] - all of the above!!!! (should match "\n")
class ChargroupParser
include CharsetNegationHelper
include ParseGroupHelper
include ParseAfterBackslashGroupHelper

attr_reader :regexp_string, :current_position
alias length current_position
Expand All @@ -37,7 +41,6 @@ def parse
parse_after_ampersand
else
@charset.concat parse_checking_backlash
@current_position += 1
end
end

Expand Down Expand Up @@ -79,15 +82,23 @@ def parse_checking_backlash
@current_position += 1
parse_after_backslash
else
[next_char]
r = [next_char]
@current_position += 1
r
end
end

def parse_after_backslash
if next_char == 'b'
@current_position += 1
["\b"]
elsif rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/
@current_position += 1
parse_backslash_unicode_sequence(Regexp.last_match(1)).result.map(&:to_s)
else
CharSets::BackslashCharMap.fetch(next_char, [next_char])
char = CharSets::BackslashCharMap.fetch(next_char, [next_char])
@current_position += 1
char
end
end

Expand Down Expand Up @@ -117,13 +128,18 @@ def parse_sub_group_intersect
end

def parse_after_hyphen
if regexp_string[@current_position + 1] == ']' # e.g. /[abc-]/ -- not a range!
r = if regexp_string[@current_position + 1] == ']' # e.g. /[abc-]/ -- not a range!
@current_position += 1
@charset << '-'
elsif rest_of_string =~ /\A-\\u(\h{4}|\{\h{1,4}\})/
@current_position += 3
char = parse_backslash_unicode_sequence(Regexp.last_match(1)).result.first.to_s
@charset.concat((@charset.last..char).to_a)
else
@current_position += 1
@charset.concat((@charset.last..parse_checking_backlash.first).to_a)
end
@current_position += 1
r
end

def rest_of_string
Expand Down
6 changes: 3 additions & 3 deletions regexp-examples.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ Gem::Specification.new do |s|
s.executables = s.files.grep(/^bin\//) { |f| File.basename(f) }
s.test_files = s.files.grep(/^(test|spec|features)\//)
s.require_paths = ['lib']
s.homepage =
'http://rubygems.org/gems/regexp-examples'
s.add_development_dependency 'bundler'
s.homepage = 'http://rubygems.org/gems/regexp-examples'
s.add_development_dependency 'bundler', '> 1.7'
s.add_development_dependency 'rake', '~> 12.0'
s.add_development_dependency 'pry', '~> 0.12.0'
s.license = 'MIT'
s.required_ruby_version = '>= 2.4.0'
end
19 changes: 19 additions & 0 deletions spec/gem_helper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
require 'spec_helper'

require 'coveralls'
Coveralls.wear!

require './lib/regexp-examples.rb'
require 'helpers'
require 'pry'

# Several of these tests (intentionally) use "weird" regex patterns,
# that spam annoying warnings when running.
# E.g. warning: invalid back reference: /\k/
# and warning: character class has ']' without escape: /[]]/
# This config disables those warnings.
$VERBOSE = nil

RSpec.configure do |config|
config.include Helpers
end
1 change: 1 addition & 0 deletions spec/regexp-examples_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def self.examples_are_empty(*regexps)
/[abc]/,
/[a-c]/,
/[abc-e]/,
/[\u4e00-\u9fa5]/,
/[^a-zA-Z]/,
/[\w]/,
/[]]/,
Expand Down
92 changes: 72 additions & 20 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
require 'coveralls'
Coveralls.wear!

require './lib/regexp-examples.rb'
require 'helpers'
require 'pry'

# Several of these tests (intentionally) use "weird" regex patterns,
# that spam annoying warnings when running.
# E.g. warning: invalid back reference: /\k/
# and warning: character class has ']' without escape: /[]]/
# This config disables those warnings.
$VERBOSE = nil

# This file was generated by the `rspec --init` command. Conventionally, all
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
# The generated `.rspec` file contains `--require spec_helper` which will cause
# this file to always be loaded, without a need to explicitly require it in any
# files.
#
# Given that it is always loaded, you are encouraged to keep this file as
# light-weight as possible. Requiring heavyweight dependencies from this file
# will add to the boot time of your test suite on EVERY test run, even for an
# individual file that may not need all of that loaded. Instead, consider making
# a separate helper file that requires the additional dependencies and performs
# the additional setup, and require it from the spec files that actually need
# it.
#
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
RSpec.configure do |config|
config.include Helpers

# rspec-expectations config goes here. You can use an alternate
# assertion/expectation library such as wrong or the stdlib/minitest
# assertions if you prefer.
config.expect_with :rspec do |expectations|
# This option will default to `true` in RSpec 4. It makes the `description`
# and `failure_message` of custom matchers include text for helper methods
# defined using `chain`, e.g.:
# be_bigger_than(2).and_smaller_than(4).description
# # => "be bigger than 2 and smaller than 4"
# be_bigger_than(2).and_smaller_than(4).description
# # => "be bigger than 2 and smaller than 4"
# ...rather than:
# # => "be bigger than 2"
# # => "be bigger than 2"
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
end

Expand All @@ -35,10 +37,60 @@
mocks.verify_partial_doubles = true
end

# This option will default to `:apply_to_host_groups` in RSpec 4 (and will
# have no way to turn it off -- the option exists only for backwards
# compatibility in RSpec 3). It causes shared context metadata to be
# inherited by the metadata hash of host groups and examples, rather than
# triggering implicit auto-inclusion in groups with matching metadata.
config.shared_context_metadata_behavior = :apply_to_host_groups

# This allows you to limit a spec run to individual examples or groups
# you care about by tagging them with `:focus` metadata. When nothing
# is tagged with `:focus`, all examples get run. RSpec also provides
# aliases for `it`, `describe`, and `context` that include `:focus`
# metadata: `fit`, `fdescribe` and `fcontext`, respectively.
config.filter_run_when_matching :focus

# Allows RSpec to persist some state between runs in order to support
# the `--only-failures` and `--next-failure` CLI options. We recommend
# you configure your source control system to ignore this file.
config.example_status_persistence_file_path = "spec/examples.txt"

# Limits the available syntax to the non-monkey patched syntax that is
# recommended. For more details, see:
# - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/
# - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
# - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode
config.disable_monkey_patching!

# This setting enables warnings. It's recommended, but in some cases may
# be too noisy due to issues in dependencies.
config.warnings = true

# Many RSpec users commonly either run the entire suite or an individual
# file, and it's useful to allow more verbose output when running an
# individual spec file.
if config.files_to_run.one?
# Use the documentation formatter for detailed output,
# unless a formatter has already been configured
# (e.g. via a command-line flag).
config.default_formatter = "doc"
end

# Print the 10 slowest examples and example groups at the
# end of the spec run, to help surface which specs are running
# particularly slow.
# config.profile_examples = 10
config.profile_examples = 10

# Run specs in random order to surface order dependencies. If you find an
# order dependency and want to debug it, you can fix the order by providing
# the seed, which is printed after each run.
# --seed 1234
config.order = :random

# Seed global randomization in this process using the `--seed` CLI option.
# Setting this allows you to use `--seed` to deterministically reproduce
# test failures related to randomization by passing the same `--seed` value
# as the one that triggered the failure.
Kernel.srand config.seed
end

0 comments on commit 54783ec

Please sign in to comment.