Skip to content

Commit

Permalink
Merge pull request #7 from tom-lord/OrGroup_random_example_probabilit…
Browse files Browse the repository at this point in the history
…y_distribution

Or group random example probability distribution
  • Loading branch information
tom-lord committed Jul 17, 2015
2 parents 0ef73be + 28cbfcb commit a0d3cf8
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 54 deletions.
2 changes: 2 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Metrics/LineLength:
Max: 90
58 changes: 37 additions & 21 deletions lib/regexp-examples/chargroup_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,29 +25,11 @@ def parse
until next_char == ']'
case next_char
when '['
@current_position += 1
sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
@charset.concat sub_group_parser.result
@current_position += sub_group_parser.length
parse_sub_group_concat
when '-'
if regexp_string[@current_position + 1] == ']' # e.g. /[abc-]/ -- not a range!
@charset << '-'
@current_position += 1
else
@current_position += 1
@charset.concat (@charset.last..parse_checking_backlash.first).to_a
@current_position += 1
end
parse_after_hyphen
when '&'
if regexp_string[@current_position + 1] == '&'
@current_position += 2
sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
@charset &= sub_group_parser.result
@current_position += (sub_group_parser.length - 1)
else
@charset << '&'
@current_position += 1
end
parse_after_ampersand
else
@charset.concat parse_checking_backlash
@current_position += 1
Expand Down Expand Up @@ -116,6 +98,40 @@ def parse_after_backslash
end
end

def parse_sub_group_concat
@current_position += 1
sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
@charset.concat sub_group_parser.result
@current_position += sub_group_parser.length
end

def parse_after_ampersand
if regexp_string[@current_position + 1] == '&'
parse_sub_group_intersect
else
@charset << '&'
@current_position += 1
end
end

def parse_sub_group_intersect
@current_position += 2
sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
@charset &= sub_group_parser.result
@current_position += (sub_group_parser.length - 1)
end

def parse_after_hyphen
if regexp_string[@current_position + 1] == ']' # e.g. /[abc-]/ -- not a range!
@charset << '-'
@current_position += 1
else
@current_position += 1
@charset.concat (@charset.last..parse_checking_backlash.first).to_a
@current_position += 1
end
end

def rest_of_string
regexp_string[@current_position..-1]
end
Expand Down
41 changes: 24 additions & 17 deletions lib/regexp-examples/groups.rb
Original file line number Diff line number Diff line change
Expand Up @@ -153,38 +153,45 @@ def result_by_method(method)
end

# A boolean "or" group.
# It really is boolean: The implementation is to pass in 2 set of
# (repeaters of) groups. The simplest example is: /a|b/
# If you have more than one boolean "or" operator, then this is
# constructed using multiple *boolean* OrGroups, e.g.
# /a|b|c|d/ is treated like /((a|b)|c)|d/
# The implementation is to pass in 2 set of (repeaters of) groups.
# The simplest example is: /a|b/
# If you have more than one boolean "or" operator, then this is initially
# parsed as an OrGroup containing another OrGroup. However, in order to avoid
# probability distribution issues in Regexp#random_example, this then gets
# simplified down to one OrGroup containing 3+ repeaters.
class OrGroup
attr_reader :repeaters_list

def initialize(left_repeaters, right_repeaters)
@left_repeaters = left_repeaters
@right_repeaters = right_repeaters
@repeaters_list = [left_repeaters, *merge_if_orgroup(right_repeaters)]
end

def result
result_by_method(:map_results)
end

def random_result
# TODO: This logic is flawed in terms of choosing a truly "random" example! E.g.
# /a|b|c|d/.random_example will choose a letter with the following probabilities:
# a = 50%, b = 25%, c = 12.5%, d = 12.5%
# In order to fix this, I must either apply some weighted selection logic,
# or change how the OrGroup examples are generated
# - i.e. make this class work with >2 repeaters
result_by_method(:map_random_result).sample(1)
end

private

def result_by_method(method)
left_result = RegexpExamples.public_send(method, @left_repeaters)
right_result = RegexpExamples.public_send(method, @right_repeaters)
left_result.concat(right_result).flatten.uniq.map do |result|
GroupResult.new(result)
repeaters_list.map do |repeaters|
RegexpExamples.public_send(method, repeaters)
end
.inject(:concat)
.map do |result|
GroupResult.new(result)
end
.uniq
end

def merge_if_orgroup(repeaters)
if repeaters.size == 1 && repeaters.first.is_a?(OrGroup)
repeaters.first.repeaters_list
else
[repeaters]
end
end
end
Expand Down
25 changes: 14 additions & 11 deletions lib/regexp-examples/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def parse
repeaters = []
until end_of_regexp
group = parse_group(repeaters)
return [OneTimeRepeater.new(group)] if group.is_a? OrGroup
return [group] if group.is_a? OrGroup
@current_position += 1
repeaters << parse_repeater(group)
end
Expand Down Expand Up @@ -148,7 +148,7 @@ def parse_after_backslash_group
) # Using "\r\n" as one character is little bit hacky...
when next_char == 'g' # Subexpression call
fail IllegalSyntaxError,
'Subexpression calls (\\g) cannot be supported, as they are not regular'
'Subexpression calls (\\g) cannot be supported, as they are not regular'
when next_char =~ /[bB]/ # Anchors
raise_anchors_exception!
when next_char =~ /[AG]/ # Start of string
Expand All @@ -159,6 +159,7 @@ def parse_after_backslash_group
end
when next_char =~ /[zZ]/ # End of string
if @current_position == (regexp_string.length - 1)
# TODO: /\Z/ should be treated as /\n?/
group = PlaceHolderGroup.new
else
raise_anchors_exception!
Expand Down Expand Up @@ -212,10 +213,10 @@ def parse_multi_group
end
when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/
fail IllegalSyntaxError,
'Lookaheads are not regular; cannot generate examples'
'Lookaheads are not regular; cannot generate examples'
when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/
fail IllegalSyntaxError,
'Lookbehinds are not regular; cannot generate examples'
'Lookbehinds are not regular; cannot generate examples'
else # e.g. /(?<name>namedgroup)/
@current_position += (match[3].length + 3)
group_id = match[3]
Expand All @@ -237,12 +238,14 @@ def remember_old_regexp_options
end

def regexp_options_toggle(on, off)
@ignorecase = true if on.include? 'i'
@ignorecase = false if off.include? 'i'
@multiline = true if on.include? 'm'
@multiline = false if off.include? 'm'
@extended = true if on.include? 'x'
@extended = false if off.include? 'x'
regexp_option_toggle(on, off, '@ignorecase', 'i')
regexp_option_toggle(on, off, '@multiline', 'm')
regexp_option_toggle(on, off, '@extended', 'x')
end

def regexp_option_toggle(on, off, var, char)
instance_variable_set(var, true) if on.include? char
instance_variable_set(var, false) if off.include? char
end

def parse_char_group
Expand Down Expand Up @@ -327,7 +330,7 @@ def parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)

def raise_anchors_exception!
fail IllegalSyntaxError,
"Anchors ('#{next_char}') cannot be supported, as they are not regular"
"Anchors ('#{next_char}') cannot be supported, as they are not regular"
end

def parse_one_time_repeater(group)
Expand Down
2 changes: 1 addition & 1 deletion lib/regexp-examples/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module RegexpExamples
VERSION = '1.1.2'
VERSION = '1.1.3'
end
14 changes: 10 additions & 4 deletions spec/regexp-examples_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ def self.examples_exist_and_match(*regexps)
it "examples for /#{regexp.source}/" do
regexp_examples = regexp.examples(max_group_results: 99_999)

expect(regexp_examples).not_to be_empty,
"No examples were generated for regexp: /#{regexp.source}/"
expect(regexp_examples)
.not_to be_empty,
"No examples were generated for regexp: /#{regexp.source}/"
regexp_examples.each do |example|
expect(example).to match(/\A(?:#{regexp.source})\z/)
end
Expand Down Expand Up @@ -205,8 +206,9 @@ def self.examples_are_empty(*regexps)
).each do |property|
it "examples for /\p{#{property}}/" do
regexp_examples = /\p{#{property}}/.examples(max_group_results: 99_999)
expect(regexp_examples).not_to be_empty,
"No examples were generated for regexp: /\p{#{property}}/"
expect(regexp_examples)
.not_to be_empty,
"No examples were generated for regexp: /\p{#{property}}/"
# Just do one big check, for test system performance (~30% faster)
# (Otherwise, we're doing up to 128 checks on 123 properties!!!)
expect(regexp_examples.join('')).to match(/\A\p{#{property}}+\z/)
Expand Down Expand Up @@ -301,6 +303,10 @@ def self.examples_are_empty(*regexps)
it { expect(/(a|b){2}/.examples).to match_array %w(aa ab ba bb) }
it { expect(/a+|b?/.examples).to match_array ['a', 'aa', 'aaa', '', 'b'] }

# Only display unique examples:
it { expect(/a|a|b|b/.examples).to match_array ['a', 'b'] }
it { expect(/[ccdd]/.examples).to match_array ['c', 'd'] }

# a{1}? should be equivalent to (?:a{1})?, i.e. NOT a "non-greedy quantifier"
it { expect(/a{1}?/.examples).to match_array ['', 'a'] }
end
Expand Down

0 comments on commit a0d3cf8

Please sign in to comment.