From 4919825e7fa8b3b7c6c3d05d619a3884d6c12e78 Mon Sep 17 00:00:00 2001 From: Tom Lord Date: Sun, 8 Mar 2015 04:08:04 +0000 Subject: [PATCH 01/12] Comment tidy-up --- lib/regexp-examples/groups.rb | 2 +- lib/regexp-examples/helpers.rb | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/regexp-examples/groups.rb b/lib/regexp-examples/groups.rb index abb719c..5826d88 100644 --- a/lib/regexp-examples/groups.rb +++ b/lib/regexp-examples/groups.rb @@ -48,7 +48,7 @@ def result end end - # Used as a workaround for when a grep is expected to be returned, + # Used as a workaround for when a group is expected to be returned, # but there are no results for the group. # i.e. PlaceHolderGroup.new.result == '' == SingleCharGroup.new('').result # (But using PlaceHolderGroup makes it clearer what the intention is!) diff --git a/lib/regexp-examples/helpers.rb b/lib/regexp-examples/helpers.rb index 14d8c06..4948d85 100644 --- a/lib/regexp-examples/helpers.rb +++ b/lib/regexp-examples/helpers.rb @@ -1,8 +1,6 @@ module RegexpExamples - # Given an array of arrays of strings, - # returns all possible perutations, - # for strings created by joining one - # element from each array + # Given an array of arrays of strings, returns all possible perutations + # for strings, created by joining one element from each array # # For example: # permutations_of_strings [ ['a'], ['b'], ['c', 'd', 'e'] ] #=> ['abc', 'abd', 'abe'] From 257cae4e9e62a8f93b1006ebc801e49f41f1acdf Mon Sep 17 00:00:00 2001 From: Tom Lord Date: Sun, 8 Mar 2015 04:09:10 +0000 Subject: [PATCH 02/12] Display "\n" in group result, with top priority e.g. If multiline option is enabled, this should be made clear in the examples --- lib/regexp-examples/constants.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/regexp-examples/constants.rb b/lib/regexp-examples/constants.rb index 6d910dd..fffaff8 100644 --- a/lib/regexp-examples/constants.rb +++ b/lib/regexp-examples/constants.rb @@ -44,7 +44,8 @@ module CharSets Whitespace = [' ', "\t", "\n", "\r", "\v", "\f"] Control = (0..31).map(&:chr) | ["\x7f"] # Ensure that the "common" characters appear first in the array - Any = Lower | Upper | Digit | Punct | (0..127).map(&:chr) + # Also, ensure "\n" comes first, to make it obvious when included + Any = ["\n"] | Lower | Upper | Digit | Punct | (0..127).map(&:chr) AnyNoNewLine = Any - ["\n"] end.freeze From 2692ff3f2300f453c2365d10ff4b81c11e1d93b5 Mon Sep 17 00:00:00 2001 From: Tom Lord Date: Sun, 8 Mar 2015 04:09:42 +0000 Subject: [PATCH 03/12] Initial implementation of Regexp#random_example This works, but there are still a few obvious improvements to be made, such as performace optimisation when the regex uses capture groups. --- .../core_extensions/regexp/examples.rb | 7 +++ lib/regexp-examples/groups.rb | 20 ++++--- lib/regexp-examples/helpers.rb | 11 +++- lib/regexp-examples/repeaters.rb | 56 +++++++++---------- 4 files changed, 55 insertions(+), 39 deletions(-) diff --git a/lib/regexp-examples/core_extensions/regexp/examples.rb b/lib/regexp-examples/core_extensions/regexp/examples.rb index c31e752..a9057ae 100644 --- a/lib/regexp-examples/core_extensions/regexp/examples.rb +++ b/lib/regexp-examples/core_extensions/regexp/examples.rb @@ -7,6 +7,13 @@ def examples(config_options={}) ) RegexpExamples::BackReferenceReplacer.new.substitute_backreferences(full_examples) end + + def random_example + full_examples = RegexpExamples.map_random_result( + RegexpExamples::Parser.new(source, options, max_group_results: 1000000).parse + ) + RegexpExamples::BackReferenceReplacer.new.substitute_backreferences(full_examples).first + end end end end diff --git a/lib/regexp-examples/groups.rb b/lib/regexp-examples/groups.rb index 5826d88..e71f719 100644 --- a/lib/regexp-examples/groups.rb +++ b/lib/regexp-examples/groups.rb @@ -37,7 +37,13 @@ def result end end - class SingleCharGroup + class BaseGroup + def random_result + result.sample(1) + end + end + + class SingleCharGroup < BaseGroup prepend GroupWithIgnoreCase def initialize(char, ignorecase) @char = char @@ -52,13 +58,13 @@ def result # but there are no results for the group. # i.e. PlaceHolderGroup.new.result == '' == SingleCharGroup.new('').result # (But using PlaceHolderGroup makes it clearer what the intention is!) - class PlaceHolderGroup + class PlaceHolderGroup < BaseGroup def result [GroupResult.new('')] end end - class CharGroup + class CharGroup < BaseGroup prepend GroupWithIgnoreCase def initialize(chars, ignorecase) @chars = chars @@ -73,7 +79,7 @@ def result end - class DotGroup + class DotGroup < BaseGroup attr_reader :multiline def initialize(multiline) @multiline = multiline @@ -87,7 +93,7 @@ def result end end - class MultiGroup + class MultiGroup < BaseGroup attr_reader :group_id def initialize(groups, group_id) @groups = groups @@ -108,7 +114,7 @@ def result class MultiGroupEnd end - class OrGroup + class OrGroup < BaseGroup def initialize(left_repeaters, right_repeaters) @left_repeaters = left_repeaters @right_repeaters = right_repeaters @@ -124,7 +130,7 @@ def result end end - class BackReferenceGroup + class BackReferenceGroup < BaseGroup attr_reader :id def initialize(id) @id = id diff --git a/lib/regexp-examples/helpers.rb b/lib/regexp-examples/helpers.rb index 4948d85..be238c2 100644 --- a/lib/regexp-examples/helpers.rb +++ b/lib/regexp-examples/helpers.rb @@ -27,8 +27,17 @@ def self.join_preserving_capture_groups(result) end def self.map_results(repeaters) + generic_map_result(repeaters, :result) + end + + def self.map_random_result(repeaters) + generic_map_result(repeaters, :random_result) + end + + private + def self.generic_map_result(repeaters, method) repeaters - .map {|repeater| repeater.result} + .map {|repeater| repeater.send(method)} .instance_eval do |partial_results| RegexpExamples.permutations_of_strings(partial_results) end diff --git a/lib/regexp-examples/repeaters.rb b/lib/regexp-examples/repeaters.rb index 9f1b668..740f33f 100644 --- a/lib/regexp-examples/repeaters.rb +++ b/lib/regexp-examples/repeaters.rb @@ -1,12 +1,12 @@ module RegexpExamples class BaseRepeater - attr_reader :group + attr_reader :group, :min_repeats, :max_repeats def initialize(group) @group = group end - def result(min_repeats, max_repeats) - group_results = @group.result[0 .. RegexpExamples.MaxGroupResults-1] + def result + group_results = group.result[0 .. RegexpExamples.MaxGroupResults-1] results = [] min_repeats.upto(max_repeats) do |repeats| if repeats.zero? @@ -19,66 +19,60 @@ def result(min_repeats, max_repeats) end results.flatten.uniq end + + def random_result + result = [] + rand(min_repeats..max_repeats).times { result << group.random_result } + result << [ GroupResult.new('') ] if result.empty? # in case of 0.times + RegexpExamples::permutations_of_strings(result) + end end class OneTimeRepeater < BaseRepeater def initialize(group) super - end - - def result - super(1, 1) + @min_repeats = 1 + @max_repeats = 1 end end class StarRepeater < BaseRepeater def initialize(group) super - end - - def result - super(0, RegexpExamples.MaxRepeaterVariance) + @min_repeats = 0 + @max_repeats = RegexpExamples.MaxRepeaterVariance end end class PlusRepeater < BaseRepeater def initialize(group) super - end - - def result - super(1, RegexpExamples.MaxRepeaterVariance + 1) + @min_repeats = 1 + @max_repeats = RegexpExamples.MaxRepeaterVariance + 1 end end class QuestionMarkRepeater < BaseRepeater def initialize(group) super - end - - def result - super(0, 1) + @min_repeats = 0 + @max_repeats = 1 end end class RangeRepeater < BaseRepeater def initialize(group, min, has_comma, max) super(group) - @min = min || 0 - if max - # Prevent huge number of results in case of e.g. /.{1,100}/.examples - @max = smallest(max, @min + RegexpExamples.MaxRepeaterVariance) - elsif has_comma - @max = @min + RegexpExamples.MaxRepeaterVariance - else - @max = @min + @min_repeats = min || 0 + if max # e.g. {1,100} --> Treat as {1,3} or similar, to prevent a huge number of results + @max_repeats = smallest(max, @min_repeats + RegexpExamples.MaxRepeaterVariance) + elsif has_comma # e.g. {2,} --> Treat as {2,4} or similar + @max_repeats = @min_repeats + RegexpExamples.MaxRepeaterVariance + else # e.g. {3} --> Treat as {3,3} + @max_repeats = @min_repeats end end - def result - super(@min, @max) - end - private def smallest(x, y) (x < y) ? x : y From 8aee4a64a016d7a1abff69ccbfaa52f6a3969b61 Mon Sep 17 00:00:00 2001 From: Tom Lord Date: Sun, 8 Mar 2015 09:05:22 +0000 Subject: [PATCH 04/12] BIG performance improvement, for #random_example Previously, for capture groups and "Or" groups, the #random_example method was basically calling #examples under the hood. This meant that for a "complex" pattern, the system simple froze! E.g. /\w{1000}/.random_example # worked fine /(\w{1000})/.random_example # tries to store 63**1000 examples in memory; not fine!! This is now fixed. ALL regexes can have a random example generated "quickly". --- lib/regexp-examples/groups.rb | 56 +++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/lib/regexp-examples/groups.rb b/lib/regexp-examples/groups.rb index e71f719..30b9069 100644 --- a/lib/regexp-examples/groups.rb +++ b/lib/regexp-examples/groups.rb @@ -37,13 +37,14 @@ def result end end - class BaseGroup + module RandomResultBySample def random_result result.sample(1) end end - class SingleCharGroup < BaseGroup + class SingleCharGroup + include RandomResultBySample prepend GroupWithIgnoreCase def initialize(char, ignorecase) @char = char @@ -58,13 +59,15 @@ def result # but there are no results for the group. # i.e. PlaceHolderGroup.new.result == '' == SingleCharGroup.new('').result # (But using PlaceHolderGroup makes it clearer what the intention is!) - class PlaceHolderGroup < BaseGroup + class PlaceHolderGroup + include RandomResultBySample def result [GroupResult.new('')] end end - class CharGroup < BaseGroup + class CharGroup + include RandomResultBySample prepend GroupWithIgnoreCase def initialize(chars, ignorecase) @chars = chars @@ -79,7 +82,8 @@ def result end - class DotGroup < BaseGroup + class DotGroup + include RandomResultBySample attr_reader :multiline def initialize(multiline) @multiline = multiline @@ -93,18 +97,26 @@ def result end end - class MultiGroup < BaseGroup + class MultiGroup attr_reader :group_id def initialize(groups, group_id) @groups = groups @group_id = group_id end - # Generates the result of each contained group - # and adds the filled group of each result to - # itself def result - strings = @groups.map {|repeater| repeater.result} + result_by_method(:result) + end + + def random_result + result_by_method(:random_result) + end + + private + # Generates the result of each contained group + # and adds the filled group of each result to itself + def result_by_method(method) + strings = @groups.map {|repeater| repeater.send(method)} RegexpExamples.permutations_of_strings(strings).map do |result| GroupResult.new(result, group_id) end @@ -114,23 +126,37 @@ def result class MultiGroupEnd end - class OrGroup < BaseGroup + class OrGroup def initialize(left_repeaters, right_repeaters) @left_repeaters = left_repeaters @right_repeaters = right_repeaters end - def result - left_result = RegexpExamples.map_results(@left_repeaters) - right_result = RegexpExamples.map_results(@right_repeaters) + result_by_method(:map_results) + end + + def random_result + # TODO: This logic is flawed in terms of choosing a truly "random" example! + # E.g. /a|b|c|d/.random_example will choose a letter with the following probabilities: + # a = 50%, b = 25%, c = 12.5%, d = 12.5% + # In order to fix this, I must either apply some weighted selection logic, + # or change how the OrGroup examples are generated - i.e. make this class work with >2 repeaters + result_by_method(:map_random_result).sample(1) + end + + private + def result_by_method(method) + left_result = RegexpExamples.send(method, @left_repeaters) + right_result = RegexpExamples.send(method, @right_repeaters) left_result.concat(right_result).flatten.uniq.map do |result| GroupResult.new(result) end end end - class BackReferenceGroup < BaseGroup + class BackReferenceGroup + include RandomResultBySample attr_reader :id def initialize(id) @id = id From 3916f91aec56f8ac3bc38a86dc9e66383c396a54 Mon Sep 17 00:00:00 2001 From: Tom Lord Date: Sun, 8 Mar 2015 09:46:59 +0000 Subject: [PATCH 05/12] Tidy up/refactor of config_options * These should not be the concern of the parser; moved the logic out. * Implemented for #random_example (max_repeater_variance only, since max_group_results is irrelevant here!) * Used modern ruby syntax --- lib/regexp-examples/constants.rb | 2 +- .../core_extensions/regexp/examples.rb | 26 +++++++++++++------ lib/regexp-examples/parser.rb | 6 +---- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/lib/regexp-examples/constants.rb b/lib/regexp-examples/constants.rb index fffaff8..029e567 100644 --- a/lib/regexp-examples/constants.rb +++ b/lib/regexp-examples/constants.rb @@ -17,7 +17,7 @@ class ResultCountLimiters class << self attr_reader :max_repeater_variance, :max_group_results - def configure!(max_repeater_variance, max_group_results) + def configure!(max_repeater_variance, max_group_results = nil) @max_repeater_variance = (max_repeater_variance || MaxRepeaterVarianceDefault) @max_group_results = (max_group_results || MaxGroupResultsDefault) end diff --git a/lib/regexp-examples/core_extensions/regexp/examples.rb b/lib/regexp-examples/core_extensions/regexp/examples.rb index a9057ae..d254998 100644 --- a/lib/regexp-examples/core_extensions/regexp/examples.rb +++ b/lib/regexp-examples/core_extensions/regexp/examples.rb @@ -1,19 +1,29 @@ module CoreExtensions module Regexp module Examples - def examples(config_options={}) - full_examples = RegexpExamples.map_results( - RegexpExamples::Parser.new(source, options, config_options).parse + def examples(**config_options) + RegexpExamples::ResultCountLimiters.configure!( + config_options[:max_repeater_variance], + config_options[:max_group_results] ) - RegexpExamples::BackReferenceReplacer.new.substitute_backreferences(full_examples) + examples_by_method(:map_results) end - def random_example - full_examples = RegexpExamples.map_random_result( - RegexpExamples::Parser.new(source, options, max_group_results: 1000000).parse + def random_example(**config_options) + RegexpExamples::ResultCountLimiters.configure!( + config_options[:max_repeater_variance] ) - RegexpExamples::BackReferenceReplacer.new.substitute_backreferences(full_examples).first + examples_by_method(:map_random_result).first end + + private + def examples_by_method(method) + full_examples = RegexpExamples.send( + method, + RegexpExamples::Parser.new(source, options).parse + ) + RegexpExamples::BackReferenceReplacer.new.substitute_backreferences(full_examples) + end end end end diff --git a/lib/regexp-examples/parser.rb b/lib/regexp-examples/parser.rb index 9cda954..f72463c 100644 --- a/lib/regexp-examples/parser.rb +++ b/lib/regexp-examples/parser.rb @@ -2,17 +2,13 @@ module RegexpExamples IllegalSyntaxError = Class.new(StandardError) class Parser attr_reader :regexp_string - def initialize(regexp_string, regexp_options, config_options={}) + def initialize(regexp_string, regexp_options) @regexp_string = regexp_string @ignorecase = !(regexp_options & Regexp::IGNORECASE).zero? @multiline = !(regexp_options & Regexp::MULTILINE).zero? @extended = !(regexp_options & Regexp::EXTENDED).zero? @num_groups = 0 @current_position = 0 - ResultCountLimiters.configure!( - config_options[:max_repeater_variance], - config_options[:max_group_results] - ) end def parse From 3b58abd7d115a616439588d8e22a79689a2892e2 Mon Sep 17 00:00:00 2001 From: Tom Lord Date: Sun, 8 Mar 2015 09:51:21 +0000 Subject: [PATCH 06/12] send --> public_send, where possible Just making it clear that there's no private methods being called! --- lib/regexp-examples/core_extensions/regexp/examples.rb | 2 +- lib/regexp-examples/groups.rb | 6 +++--- lib/regexp-examples/helpers.rb | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/regexp-examples/core_extensions/regexp/examples.rb b/lib/regexp-examples/core_extensions/regexp/examples.rb index d254998..147d0e2 100644 --- a/lib/regexp-examples/core_extensions/regexp/examples.rb +++ b/lib/regexp-examples/core_extensions/regexp/examples.rb @@ -18,7 +18,7 @@ def random_example(**config_options) private def examples_by_method(method) - full_examples = RegexpExamples.send( + full_examples = RegexpExamples.public_send( method, RegexpExamples::Parser.new(source, options).parse ) diff --git a/lib/regexp-examples/groups.rb b/lib/regexp-examples/groups.rb index 30b9069..5a857fd 100644 --- a/lib/regexp-examples/groups.rb +++ b/lib/regexp-examples/groups.rb @@ -116,7 +116,7 @@ def random_result # Generates the result of each contained group # and adds the filled group of each result to itself def result_by_method(method) - strings = @groups.map {|repeater| repeater.send(method)} + strings = @groups.map {|repeater| repeater.public_send(method)} RegexpExamples.permutations_of_strings(strings).map do |result| GroupResult.new(result, group_id) end @@ -147,8 +147,8 @@ def random_result private def result_by_method(method) - left_result = RegexpExamples.send(method, @left_repeaters) - right_result = RegexpExamples.send(method, @right_repeaters) + left_result = RegexpExamples.public_send(method, @left_repeaters) + right_result = RegexpExamples.public_send(method, @right_repeaters) left_result.concat(right_result).flatten.uniq.map do |result| GroupResult.new(result) end diff --git a/lib/regexp-examples/helpers.rb b/lib/regexp-examples/helpers.rb index be238c2..194b84d 100644 --- a/lib/regexp-examples/helpers.rb +++ b/lib/regexp-examples/helpers.rb @@ -37,7 +37,7 @@ def self.map_random_result(repeaters) private def self.generic_map_result(repeaters, method) repeaters - .map {|repeater| repeater.send(method)} + .map {|repeater| repeater.public_send(method)} .instance_eval do |partial_results| RegexpExamples.permutations_of_strings(partial_results) end From 4122f5a7cf5f6d73ffcdebb8c84c3d97acaee08d Mon Sep 17 00:00:00 2001 From: Tom Lord Date: Sun, 8 Mar 2015 11:06:37 +0000 Subject: [PATCH 07/12] Documented Regexp#random_example --- README.md | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 6be524c..1dcc991 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,11 @@ [![Build Status](https://travis-ci.org/tom-lord/regexp-examples.svg?branch=master)](https://travis-ci.org/tom-lord/regexp-examples/builds) [![Coverage Status](https://coveralls.io/repos/tom-lord/regexp-examples/badge.svg?branch=master)](https://coveralls.io/r/tom-lord/regexp-examples?branch=master) -Extends the Regexp class with the method: Regexp#examples +Extends the Regexp class with the methods: Regexp#examples and Regexp#random_example -This method generates a list of (some\*) strings that will match the given regular expression. +Regexp#examples generates a list of all\* strings that will match the given regular expression. + +Regexp#random_example returns one, random string (from all possible strings!!) that matches the regex. \* If the regex has an infinite number of possible srings that match it, such as `/a*b+c{2,}/`, or a huge number of possible matches, such as `/.\w/`, then only a subset of these will be listed. @@ -31,6 +33,14 @@ For more detail on this, see [configuration options](#configuration-options). | \u{28}\u2310\u25a0\u{5f}\u25a0\u{29} /x.examples #=> ["(•_•)", "( •_•)>⌐■-■ ", "(⌐■_■)"] + +################################################################################### + +# Obviously, you will get different results if you try these yourself! +/\w{10}@(hotmail|gmail)\.com/.random_example #=> "TTsJsiwzKS@gmail.com" +/\p{Greek}{80}/.random_example + #=> "ΖΆΧͷᵦμͷηϒϰΟᵝΔ΄θϔζΌψΨεκᴪΓΕπι϶ονϵΓϹᵦΟπᵡήϴϜΦϚϴϑ͵ϴΉϺ͵ϹϰϡᵠϝΤΏΨϹϊϻαώΞΰϰΑͼΈΘͽϙͽξΆΆΡΡΉΓς" +/written by tom lord/i.random_example #=> "WrITtEN bY tOM LORD" ``` ## Installation @@ -51,7 +61,7 @@ Or install it yourself as: ## Supported syntax -Short answer: **Everything** is supported, apart from "irregular" aspects of the regexp language -- see [impossible features](#impossible-features-illegal-syntax) +Short answer: **Everything** is supported, apart from "irregular" aspects of the regexp language -- see [impossible features](#impossible-features-illegal-syntax). Long answer: @@ -89,7 +99,7 @@ Long answer: ## Bugs and Not-Yet-Supported syntax * There are some (rare) edge cases where backreferences do not work properly, e.g. `/(a*)a* \1/.examples` - which includes "aaaa aa". This is because each repeater is not context-aware, so the "greediness" logic is flawed. (E.g. in this case, the second `a*` should always evaluate to an empty string, because the previous `a*` was greedy! However, patterns like this are highly unusual...) -* Some named properties, e.g. `/\p{Arabic}/`, list non-matching examples for ruby 2.0/2.1 (as the definitions changed in ruby 2.2). This will be fixed in version 1.1.0 (see the pending pull request)! +* Some named properties, e.g. `/\p{Arabic}/`, list non-matching examples for ruby 2.0/2.1 (as the definitions changed in ruby 2.2). This will be fixed in version 1.1.1 (see the pending pull request)! Since the Regexp language is so vast, it's quite likely I've missed something (please raise an issue if you find something)! The only missing feature that I'm currently aware of is: * Conditional capture groups, e.g. `/(group1)? (?(1)yes|no)/.examples` (which *should* return: `["group1 yes", " no"]`) @@ -127,6 +137,8 @@ When generating examples, the gem uses 2 configurable values to limit how many e * `[h-s]` is equivalent to `[hijkl]` * `(1|2|3|4|5|6|7|8)` is equivalent to `[12345]` +Rexexp#examples makes use of *both* these options; Rexexp#random_example only uses `max_repeater_variance`, since the other option is redundant! + To use an alternative value, simply pass the configuration option as follows: ```ruby @@ -134,26 +146,29 @@ To use an alternative value, simply pass the configuration option as follows: #=> [''. 'a', 'aa', 'aaa', 'aaaa' 'aaaaa'] /[F-X]/.examples(max_group_results: 10) #=> ['F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O'] +/.*/.random_example(max_repeater_variance: 50) + #=> "A very unlikely result!" ``` -_**WARNING**: Choosing huge numbers, along with a "complex" regex, could easily cause your system to freeze!_ +_**WARNING**: Choosing huge numbers for Regexp#examples, along with a "complex" regex, could easily cause your system to freeze!_ For example, if you try to generate a list of _all_ 5-letter words: `/\w{5}/.examples(max_group_results: 999)`, then since there are actually `63` "word" characters (upper/lower case letters, numbers and "\_"), this will try to generate `63**5 #=> 992436543` (almost 1 _trillion_) examples! In other words, think twice before playing around with this config! -A more sensible use case might be, for example, to generate one random 1-4 digit string: +A more sensible use case might be, for example, to generate all 1-4 digit strings: + +`/\d{1,4}/.examples(max_repeater_variance: 3, max_group_results: 10)` -`/\d{1,4}/.examples(max_repeater_variance: 3, max_group_results: 10).sample(1)` +Due to code optimisation, this is not something you need to worry about (much) for Regexp#random_example. For instance, the following takes no more than ~ 1 second on my machine: -(Note: I may develop a much more efficient way to "generate one example" in a later release of this gem.) +`/.*\w+\d{100}/.random_example(max_repeater_variance: 1000)` ## TODO * Performance improvements: * Use of lambdas/something (in [constants.rb](lib/regexp-examples/constants.rb)) to improve the library load time. See the pending pull request. * (Maybe?) add a `max_examples` configuration option and use lazy evaluation, to ensure the method never "freezes". -* Potential future feature: `Regexp#random_example` - but implementing this properly is non-trivial, due to performance issues that need addressing first! * Write a blog post about how this amazing gem works! :) ## Contributing From 82c84739c47753a73ec0e00d553b3bbb85269b5d Mon Sep 17 00:00:00 2001 From: Tom Lord Date: Sun, 8 Mar 2015 11:16:55 +0000 Subject: [PATCH 08/12] Display "Regexp#examples" and "Regexp#random_example" like code --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1dcc991..9e8e55c 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,11 @@ [![Build Status](https://travis-ci.org/tom-lord/regexp-examples.svg?branch=master)](https://travis-ci.org/tom-lord/regexp-examples/builds) [![Coverage Status](https://coveralls.io/repos/tom-lord/regexp-examples/badge.svg?branch=master)](https://coveralls.io/r/tom-lord/regexp-examples?branch=master) -Extends the Regexp class with the methods: Regexp#examples and Regexp#random_example +Extends the Regexp class with the methods: `Regexp#examples` and `Regexp#random_example` -Regexp#examples generates a list of all\* strings that will match the given regular expression. +`Regexp#examples` generates a list of all\* strings that will match the given regular expression. -Regexp#random_example returns one, random string (from all possible strings!!) that matches the regex. +`Regexp#random_example` returns one, random string (from all possible strings!!) that matches the regex. \* If the regex has an infinite number of possible srings that match it, such as `/a*b+c{2,}/`, or a huge number of possible matches, such as `/.\w/`, then only a subset of these will be listed. @@ -150,7 +150,7 @@ To use an alternative value, simply pass the configuration option as follows: #=> "A very unlikely result!" ``` -_**WARNING**: Choosing huge numbers for Regexp#examples, along with a "complex" regex, could easily cause your system to freeze!_ +_**WARNING**: Choosing huge numbers for `Regexp#examples`, along with a "complex" regex, could easily cause your system to freeze!_ For example, if you try to generate a list of _all_ 5-letter words: `/\w{5}/.examples(max_group_results: 999)`, then since there are actually `63` "word" characters (upper/lower case letters, numbers and "\_"), this will try to generate `63**5 #=> 992436543` (almost 1 _trillion_) examples! @@ -160,7 +160,7 @@ A more sensible use case might be, for example, to generate all 1-4 digit string `/\d{1,4}/.examples(max_repeater_variance: 3, max_group_results: 10)` -Due to code optimisation, this is not something you need to worry about (much) for Regexp#random_example. For instance, the following takes no more than ~ 1 second on my machine: +Due to code optimisation, this is not something you need to worry about (much) for `Regexp#random_example`. For instance, the following takes no more than ~ 1 second on my machine: `/.*\w+\d{100}/.random_example(max_repeater_variance: 1000)` From 19381572508bb29ead59142ee08dc6ea36932314 Mon Sep 17 00:00:00 2001 From: Tom Lord Date: Sun, 8 Mar 2015 11:51:22 +0000 Subject: [PATCH 09/12] Removed MultiGroupEnd hack I never really liked it, anyway... --- lib/regexp-examples/groups.rb | 3 --- lib/regexp-examples/parser.rb | 13 +++++-------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/lib/regexp-examples/groups.rb b/lib/regexp-examples/groups.rb index 5a857fd..bf97e23 100644 --- a/lib/regexp-examples/groups.rb +++ b/lib/regexp-examples/groups.rb @@ -123,9 +123,6 @@ def result_by_method(method) end end - class MultiGroupEnd - end - class OrGroup def initialize(left_repeaters, right_repeaters) @left_repeaters = left_repeaters diff --git a/lib/regexp-examples/parser.rb b/lib/regexp-examples/parser.rb index f72463c..ba0579b 100644 --- a/lib/regexp-examples/parser.rb +++ b/lib/regexp-examples/parser.rb @@ -13,9 +13,8 @@ def initialize(regexp_string, regexp_options) def parse repeaters = [] - while @current_position < regexp_string.length + until end_of_regexp group = parse_group(repeaters) - break if group.is_a? MultiGroupEnd if group.is_a? OrGroup return [OneTimeRepeater.new(group)] end @@ -31,8 +30,6 @@ def parse_group(repeaters) case next_char when '(' group = parse_multi_group - when ')' - group = parse_multi_end_group when '[' group = parse_char_group when '.' @@ -237,10 +234,6 @@ def regexp_options_toggle(on, off) @extended = false if (off.include? "x") end - def parse_multi_end_group - MultiGroupEnd.new - end - def parse_char_group @current_position += 1 # Skip past opening "[" chargroup_parser = ChargroupParser.new(rest_of_string) @@ -341,6 +334,10 @@ def rest_of_string def next_char regexp_string[@current_position] end + + def end_of_regexp + next_char == ")" || @current_position >= regexp_string.length + end end end From fe506bdf20b4bd2abb6a954fe4375fe370404462 Mon Sep 17 00:00:00 2001 From: Tom Lord Date: Sun, 8 Mar 2015 12:58:52 +0000 Subject: [PATCH 10/12] Some simple "smoke tests" for Regexp#random_example Possible future tests to add, if needed: * Performance test * "How random is it?" test --- spec/regexp-random_example_spec.rb | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 spec/regexp-random_example_spec.rb diff --git a/spec/regexp-random_example_spec.rb b/spec/regexp-random_example_spec.rb new file mode 100644 index 0000000..62001da --- /dev/null +++ b/spec/regexp-random_example_spec.rb @@ -0,0 +1,22 @@ +RSpec.describe Regexp, "#random_example" do + def self.random_example_matches(*regexps) + regexps.each do |regexp| + it "random example for /#{regexp.source}/" do + random_example = regexp.random_example + + expect(random_example).to be_a String # Not an Array! + expect(random_example).to match(Regexp.new("\\A(?:#{regexp.source})\\z", regexp.options)) + end + end + end + + context "smoke tests" do + # Just a few "smoke tests", to ensure the basic method isn't broken. + # Testing of the RegexpExamples::Parser class is all covered by Regexp#examples test already. + random_example_matches( + /\w{10}/, + /(we(need(to(go(deeper)?)?)?)?) \1/, + /case insensitive/i + ) + end +end From d58aa7b4084e0a69701fe7714d90123c770266b4 Mon Sep 17 00:00:00 2001 From: Tom Lord Date: Sun, 8 Mar 2015 13:14:28 +0000 Subject: [PATCH 11/12] Version bump (v1.1.0) Implemented Regexp#random_example !!! This is effectively just a (MUCH) more efficient way of doing: /complicated .{20} regex/.examples(max_group_results: 9999999999).sample(1) Instead, you can use much more optimised code for this method via: /complicated .{20} regex/.random_example The key differrence being that Regexp#random_example does not store any more than 1 result in memory, at any time, making its performance scale linearly rather than exponentially. --- lib/regexp-examples/version.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/regexp-examples/version.rb b/lib/regexp-examples/version.rb index afeb156..e2ae8a2 100644 --- a/lib/regexp-examples/version.rb +++ b/lib/regexp-examples/version.rb @@ -1,3 +1,3 @@ module RegexpExamples - VERSION = '1.0.2' + VERSION = '1.1.0' end From 919676674f752892b21db1c42d24035801b02af5 Mon Sep 17 00:00:00 2001 From: Tom Lord Date: Sun, 8 Mar 2015 13:27:28 +0000 Subject: [PATCH 12/12] Test for random_example with OrGroup Missing test area for 100% coverage --- spec/regexp-random_example_spec.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spec/regexp-random_example_spec.rb b/spec/regexp-random_example_spec.rb index 62001da..5e6a229 100644 --- a/spec/regexp-random_example_spec.rb +++ b/spec/regexp-random_example_spec.rb @@ -16,7 +16,8 @@ def self.random_example_matches(*regexps) random_example_matches( /\w{10}/, /(we(need(to(go(deeper)?)?)?)?) \1/, - /case insensitive/i + /case insensitive/i, + /front seat|back seat/, # Which seat will I take?? ) end end