From dc538cc725d7eaef5d19598f74eb954df25d9838 Mon Sep 17 00:00:00 2001 From: = Date: Sun, 11 Jan 2015 21:55:55 +0000 Subject: [PATCH] Non capture and named capture groups implemeneted Warning: Not yet added named capture group backreference code --- README.md | 4 ++-- lib/regexp-examples/parser.rb | 30 ++++++++++++++++++++++++------ spec/regexp-examples_spec.rb | 14 ++++++++++++++ 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index a391dad..1f7b33b 100644 --- a/README.md +++ b/README.md @@ -30,14 +30,14 @@ or a huge number of possible matches, such as `/.\w/`, then only a subset of the * Character sets (inluding ranges and negation!), e.g. `/[abc]/`, `/[A-Z0-9]/`, `/[^a-z]/` * Escaped characters, e.g. `/\n/`, `/\w/`, `/\D/` (and so on...) * Capture groups, and backreferences(!!), e.g. `/(this|that) \1/` +* Named capture groups, e.g. `(?bar)/`(Warning: Named capture group backreferences not yet implemented!) +* Non-capture groups, e.g. `/(?:foo)/` * Arbitrarily complex combinations of all the above! ## Not-Yet-Supported syntax I plan to add the following features to the gem (in order of most -> least likely), but have not yet got round to it: -* Non-capture groups, e.g. `/(?:foo)/` -* Named capture groups, e.g. `(?bar)/` * Throw exceptions if illegal syntax (see below) is used * POSIX bracket expressions, e.g. `/[[:alnum:]]/`, `/[[:space:]]/` * Options, e.g. `/pattern/i`, `/foo.*bar/m` diff --git a/lib/regexp-examples/parser.rb b/lib/regexp-examples/parser.rb index 8277a5c..8a862e7 100644 --- a/lib/regexp-examples/parser.rb +++ b/lib/regexp-examples/parser.rb @@ -45,7 +45,7 @@ def parse_group(repeaters) def parse_after_backslash_group @current_position += 1 case - when regexp_string[@current_position..-1] =~ /^(\d+)/ + when rest_of_string =~ /\A(\d+)/ group = parse_backreference_group($&) when BackslashCharMap.keys.include?(regexp_string[@current_position]) group = CharGroup.new( @@ -79,11 +79,25 @@ def parse_repeater(group) def parse_multi_group @current_position += 1 @num_groups += 1 - this_group_num = @num_groups + group_id = nil # init + rest_of_string.match(/\A(\?)?(:|!|=|<(!|=|[^!=][^>]*))?/) do |match| + case + when match[1].nil? # e.g. /(normal)/ + group_id = @num_groups + when match[2] == ':' # e.g. /(?:nocapture)/ + @current_position += 2 + group_id = nil + when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/ + # TODO: Raise exception + when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?namedgroup)/ + @current_position += (match[3].length + 3) + group_id = match[3] + end + end groups = parse - # TODO: Non-capture groups, i.e. /...(?:foo).../ - # TODO: Named capture groups, i.e. /...(?foo).../ - MultiGroup.new(groups, this_group_num) + MultiGroup.new(groups, group_id) end def parse_multi_end_group @@ -146,7 +160,7 @@ def parse_question_mark_repeater(group) end def parse_range_repeater(group) - match = regexp_string[@current_position..-1].match(/^\{(\d+)(,)?(\d+)?\}/) + match = rest_of_string.match(/\A\{(\d+)(,)?(\d+)?\}/) @current_position += match[0].size min = match[1].to_i if match[1] has_comma = !match[2].nil? @@ -157,6 +171,10 @@ def parse_range_repeater(group) def parse_one_time_repeater(group) OneTimeRepeater.new(group) end + + def rest_of_string + regexp_string[@current_position..-1] + end end end diff --git a/spec/regexp-examples_spec.rb b/spec/regexp-examples_spec.rb index a29637f..ad43c69 100644 --- a/spec/regexp-examples_spec.rb +++ b/spec/regexp-examples_spec.rb @@ -52,6 +52,20 @@ def self.examples_exist_and_match(*regexps) ) end + context "for complex multi groups" do + examples_exist_and_match( + /(normal)/, + /(?:nocapture)/, + /(?namedgroup)/ + ) + # TODO: These are not yet implemented + # (expect to raise exception) +# /(?=lookahead)/, +# /(?!neglookahead)/, +# /(?<=lookbehind)/, +# /(?