From a37bddeb36f495f151a2353d13f83ed3fae3f413 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Sun, 17 Nov 2024 23:36:10 -0400 Subject: [PATCH] store StringScanner in ParseContext and reuse it through parsing --- lib/liquid.rb | 1 - lib/liquid/context.rb | 6 ++++- lib/liquid/expression.rb | 40 ++++++++++++++--------------- lib/liquid/lexer.rb | 10 +++----- lib/liquid/parse_context.rb | 21 ++++++++++++--- lib/liquid/parser.rb | 4 +-- lib/liquid/range_lookup.rb | 6 ++--- lib/liquid/string_scanner_pool.rb | 24 ----------------- lib/liquid/tags/for.rb | 2 +- lib/liquid/tags/if.rb | 2 +- lib/liquid/tokenizer.rb | 32 ++++++++++++++++------- lib/liquid/variable.rb | 2 +- lib/liquid/variable_lookup.rb | 16 ++++++++---- performance/theme_runner.rb | 7 ++++- test/integration/expression_test.rb | 2 +- test/unit/lexer_unit_test.rb | 2 +- test/unit/parser_unit_test.rb | 26 +++++++++++-------- test/unit/tag_unit_test.rb | 17 +++++++++--- 18 files changed, 124 insertions(+), 96 deletions(-) delete mode 100644 lib/liquid/string_scanner_pool.rb diff --git a/lib/liquid.rb b/lib/liquid.rb index 63c087bee..367fc3c43 100644 --- a/lib/liquid.rb +++ b/lib/liquid.rb @@ -49,7 +49,6 @@ module Liquid require "liquid/version" require "liquid/deprecations" require "liquid/const" -require "liquid/string_scanner_pool" require 'liquid/standardfilters' require 'liquid/file_system' require 'liquid/parser_switching' diff --git a/lib/liquid/context.rb b/lib/liquid/context.rb index 19daaeb73..1ef272d46 100644 --- a/lib/liquid/context.rb +++ b/lib/liquid/context.rb @@ -40,6 +40,10 @@ def initialize(environments = {}, outer_scope = {}, registers = {}, rethrow_erro @global_filter = nil @disabled_tags = {} + # Instead of constructing new StringScanner objects for each Expression parse, + # we recycle the same one. + @string_scanner = StringScanner.new("") + @registers.static[:cached_partials] ||= {} @registers.static[:file_system] ||= environment.file_system @registers.static[:template_factory] ||= Liquid::TemplateFactory.new @@ -176,7 +180,7 @@ def []=(key, value) # Example: # products == empty #=> products.empty? def [](expression) - evaluate(Expression.parse(expression)) + evaluate(Expression.parse(expression, @string_scanner)) end def key?(key) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index ea8c68211..501c57500 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -22,7 +22,7 @@ class Expression1 # malicious input as described in https://github.com/Shopify/liquid/issues/1357 RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/ - def self.parse(markup) + def self.parse(markup, _ss = nil) return nil unless markup markup = markup.strip @@ -58,7 +58,7 @@ class Expression2 'false' => false, 'blank' => '', 'empty' => '', - '-' => VariableLookup.parse("-") + '-' => VariableLookup.parse("-", nil), }.freeze DOT = ".".ord @@ -72,7 +72,7 @@ class Expression2 CACHE = LruRedux::Cache.new(10_000) # most themes would have less than 2,000 unique expression class << self - def parse(markup) + def parse(markup, ss = StringScanner.new("")) return unless markup markup = markup.strip # markup can be a frozen string @@ -86,33 +86,36 @@ def parse(markup) return CACHE[markup] if CACHE.key?(markup) - CACHE[markup] = inner_parse(markup) + CACHE[markup] = inner_parse(markup, ss) end - def inner_parse(markup) + def inner_parse(markup, ss) if (markup.start_with?("(") && markup.end_with?(")")) && markup =~ RANGES_REGEX - return RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2)) + return RangeLookup.parse( + Regexp.last_match(1), + Regexp.last_match(2), + ss, + ) end - if (num = parse_number(markup)) + if (num = parse_number(markup, ss)) num else - VariableLookup.parse(markup) + VariableLookup.parse(markup, ss) end end - def parse_number(markup) - ss = StringScannerPool.pop(markup) - - is_integer = true - last_dot_pos = nil - num_end_pos = nil - + def parse_number(markup, ss) + ss.string = markup # the first byte must be a digit, a period, or a dash byte = ss.scan_byte return false if byte != DASH && byte != DOT && (byte < ZERO || byte > NINE) + is_integer = true + last_dot_pos = nil + num_end_pos = nil + while (byte = ss.scan_byte) return false if byte != DOT && (byte < ZERO || byte > NINE) @@ -136,14 +139,9 @@ def parse_number(markup) if num_end_pos # number ends with a number "123.123" markup.byteslice(0, num_end_pos).to_f - elsif last_dot_pos - markup.byteslice(0, last_dot_pos).to_f else - # we should never reach this point - false + markup.byteslice(0, last_dot_pos).to_f end - ensure - StringScannerPool.release(ss) end end end diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index d9a073aa8..0ab9f3ca3 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -26,8 +26,8 @@ class Lexer1 WHITESPACE_OR_NOTHING = /\s*/ class << self - def tokenize(input) - ss = StringScanner.new(input) + def tokenize(input, ss = StringScanner.new("")) + ss.string = input output = [] until ss.eos? @@ -158,8 +158,8 @@ class Lexer2 # rubocop:disable Metrics/BlockNesting class << self - def tokenize(input) - ss = StringScannerPool.pop(input) + def tokenize(input, ss) + ss.string = input output = [] until ss.eos? @@ -220,8 +220,6 @@ def tokenize(input) end # rubocop:enable Metrics/BlockNesting output << EOS - ensure - StringScannerPool.release(ss) end def raise_syntax_error(start_pos, ss) diff --git a/lib/liquid/parse_context.rb b/lib/liquid/parse_context.rb index 7bd5418d5..f66e86be3 100644 --- a/lib/liquid/parse_context.rb +++ b/lib/liquid/parse_context.rb @@ -3,7 +3,7 @@ module Liquid class ParseContext attr_accessor :locale, :line_number, :trim_whitespace, :depth - attr_reader :partial, :warnings, :error_mode, :environment + attr_reader :partial, :warnings, :error_mode, :environment, :string_scanner def initialize(options = Const::EMPTY_HASH) @environment = options.fetch(:environment, Environment.default) @@ -12,6 +12,10 @@ def initialize(options = Const::EMPTY_HASH) @locale = @template_options[:locale] ||= I18n.new @warnings = [] + # constructing new StringScanner in Lexer, Tokenizer, etc is expensive + # This StringScanner will be shared by all of them + @string_scanner = StringScanner.new("") + self.depth = 0 self.partial = false end @@ -24,12 +28,21 @@ def new_block_body Liquid::BlockBody.new end - def new_tokenizer(markup, start_line_number: nil, for_liquid_tag: false) - Tokenizer.new(markup, line_number: start_line_number, for_liquid_tag: for_liquid_tag) + def new_parser(input) + Parser.new(input, @string_scanner) + end + + def new_tokenizer(source, start_line_number: nil, for_liquid_tag: false) + Tokenizer.new( + source: source, + string_scanner: @string_scanner, + line_number: start_line_number, + for_liquid_tag: for_liquid_tag, + ) end def parse_expression(markup) - Expression.parse(markup) + Expression.parse(markup, string_scanner) end def partial=(value) diff --git a/lib/liquid/parser.rb b/lib/liquid/parser.rb index ed18909a2..a4f781d70 100644 --- a/lib/liquid/parser.rb +++ b/lib/liquid/parser.rb @@ -2,8 +2,8 @@ module Liquid class Parser - def initialize(input) - @tokens = Lexer.tokenize(input) + def initialize(input, string_scanner) + @tokens = Lexer.tokenize(input, string_scanner) @p = 0 # pointer to current location end diff --git a/lib/liquid/range_lookup.rb b/lib/liquid/range_lookup.rb index fd208a676..763717240 100644 --- a/lib/liquid/range_lookup.rb +++ b/lib/liquid/range_lookup.rb @@ -2,9 +2,9 @@ module Liquid class RangeLookup - def self.parse(start_markup, end_markup) - start_obj = Expression.parse(start_markup) - end_obj = Expression.parse(end_markup) + def self.parse(start_markup, end_markup, string_scanner) + start_obj = Expression.parse(start_markup, string_scanner) + end_obj = Expression.parse(end_markup, string_scanner) if start_obj.respond_to?(:evaluate) || end_obj.respond_to?(:evaluate) new(start_obj, end_obj) else diff --git a/lib/liquid/string_scanner_pool.rb b/lib/liquid/string_scanner_pool.rb deleted file mode 100644 index 6ebc49212..000000000 --- a/lib/liquid/string_scanner_pool.rb +++ /dev/null @@ -1,24 +0,0 @@ -# frozen_string_literal: true - -module Liquid - class StringScannerPool - class << self - def pop(input) - @ss_pool ||= 5.times.each_with_object([]) { |_i, arr| arr << StringScanner.new("") } - - if @ss_pool.empty? - StringScanner.new(input) - else - ss = @ss_pool.pop - ss.string = input - ss - end - end - - def release(ss) - @ss_pool ||= [] - @ss_pool << ss - end - end - end -end diff --git a/lib/liquid/tags/for.rb b/lib/liquid/tags/for.rb index 0dda3081d..27af15bf1 100644 --- a/lib/liquid/tags/for.rb +++ b/lib/liquid/tags/for.rb @@ -88,7 +88,7 @@ def lax_parse(markup) end def strict_parse(markup) - p = Parser.new(markup) + p = @parse_context.new_parser(markup) @variable_name = p.consume(:id) raise SyntaxError, options[:locale].t("errors.syntax.for_invalid_in") unless p.id?('in') diff --git a/lib/liquid/tags/if.rb b/lib/liquid/tags/if.rb index bcf250a11..040fecb84 100644 --- a/lib/liquid/tags/if.rb +++ b/lib/liquid/tags/if.rb @@ -102,7 +102,7 @@ def lax_parse(markup) end def strict_parse(markup) - p = Parser.new(markup) + p = @parse_context.new_parser(markup) condition = parse_binary_comparisons(p) p.consume(:end_of_string) condition diff --git a/lib/liquid/tokenizer.rb b/lib/liquid/tokenizer.rb index 092121f4c..ce09406c2 100644 --- a/lib/liquid/tokenizer.rb +++ b/lib/liquid/tokenizer.rb @@ -6,7 +6,13 @@ module Liquid class Tokenizer1 attr_reader :line_number, :for_liquid_tag - def initialize(source, line_numbers = false, line_number: nil, for_liquid_tag: false) + def initialize( + source:, + string_scanner:, # this is not used + line_numbers: false, + line_number: nil, + for_liquid_tag: false + ) @source = source @line_number = line_number || (line_numbers ? 1 : nil) @for_liquid_tag = for_liquid_tag @@ -56,12 +62,22 @@ class Tokenizer2 CLOSE_CURLEY = "}".ord PERCENTAGE = "%".ord - def initialize(source, line_numbers = false, line_number: nil, for_liquid_tag: false) - @line_number = line_number || (line_numbers ? 1 : nil) + def initialize( + source:, + string_scanner:, + line_numbers: false, + line_number: nil, + for_liquid_tag: false + ) + @line_number = line_number || (line_numbers ? 1 : nil) @for_liquid_tag = for_liquid_tag - @source = source - @offset = 0 - @tokens = [] + @source = source + @offset = 0 + @tokens = [] + + @ss = string_scanner + @ss.string = @source + tokenize end @@ -85,13 +101,11 @@ def tokenize if @for_liquid_tag @tokens = @source.split("\n") else - @ss = StringScannerPool.pop(@source) @tokens << shift_normal until @ss.eos? end @source = nil - ensure - StringScannerPool.release(@ss) if @ss + @ss = nil end def shift_normal diff --git a/lib/liquid/variable.rb b/lib/liquid/variable.rb index 372ee4dbf..9c4704909 100644 --- a/lib/liquid/variable.rb +++ b/lib/liquid/variable.rb @@ -61,7 +61,7 @@ def lax_parse(markup) def strict_parse(markup) @filters = [] - p = Parser.new(markup) + p = @parse_context.new_parser(markup) return if p.look(:end_of_string) diff --git a/lib/liquid/variable_lookup.rb b/lib/liquid/variable_lookup.rb index 1ade30ef9..f544b3bc4 100644 --- a/lib/liquid/variable_lookup.rb +++ b/lib/liquid/variable_lookup.rb @@ -6,16 +6,19 @@ class VariableLookup attr_reader :name, :lookups - def self.parse(markup) - new(markup) + def self.parse(markup, string_scanner) + new(markup, string_scanner) end - def initialize(markup) + def initialize(markup, string_scanner = StringScanner.new("")) lookups = markup.scan(VariableParser) name = lookups.shift if name&.start_with?('[') && name&.end_with?(']') - name = Expression.parse(name[1..-2]) + name = Expression.parse( + name[1..-2], + string_scanner, + ) end @name = name @@ -25,7 +28,10 @@ def initialize(markup) @lookups.each_index do |i| lookup = lookups[i] if lookup&.start_with?('[') && lookup&.end_with?(']') - lookups[i] = Expression.parse(lookup[1..-2]) + lookups[i] = Expression.parse( + lookup[1..-2], + string_scanner, + ) elsif COMMAND_METHODS.include?(lookup) @command_flags |= 1 << i end diff --git a/performance/theme_runner.rb b/performance/theme_runner.rb index c158f8778..469503670 100644 --- a/performance/theme_runner.rb +++ b/performance/theme_runner.rb @@ -50,8 +50,13 @@ def compile # `tokenize` will just test the tokenizen portion of liquid without any templates def tokenize + ss = StringScanner.new("") @tests.each do |test_hash| - tokenizer = Liquid::Tokenizer.new(test_hash[:liquid], true) + tokenizer = Liquid::Tokenizer.new( + source: test_hash[:liquid], + string_scanner: ss, + line_numbers: true, + ) while tokenizer.shift; end end end diff --git a/test/integration/expression_test.rb b/test/integration/expression_test.rb index dc57ad286..6d29fc792 100644 --- a/test/integration/expression_test.rb +++ b/test/integration/expression_test.rb @@ -43,7 +43,7 @@ def test_range end def test_quirky_negative_sign_expression_markup - result = Expression.parse("-") + result = Expression.parse("-", nil) assert(result.is_a?(VariableLookup)) assert_equal("-", result.name) diff --git a/test/unit/lexer_unit_test.rb b/test/unit/lexer_unit_test.rb index d4f22fb60..e6f161288 100644 --- a/test/unit/lexer_unit_test.rb +++ b/test/unit/lexer_unit_test.rb @@ -134,6 +134,6 @@ def test_tokenize_incomplete_expression private def tokenize(input) - Lexer.tokenize(input) + Lexer.tokenize(input, StringScanner.new("")) end end diff --git a/test/unit/parser_unit_test.rb b/test/unit/parser_unit_test.rb index 130b8b793..789c5b663 100644 --- a/test/unit/parser_unit_test.rb +++ b/test/unit/parser_unit_test.rb @@ -6,20 +6,20 @@ class ParserUnitTest < Minitest::Test include Liquid def test_consume - p = Parser.new("wat: 7") + p = new_parser("wat: 7") assert_equal('wat', p.consume(:id)) assert_equal(':', p.consume(:colon)) assert_equal('7', p.consume(:number)) end def test_jump - p = Parser.new("wat: 7") + p = new_parser("wat: 7") p.jump(2) assert_equal('7', p.consume(:number)) end def test_consume? - p = Parser.new("wat: 7") + p = new_parser("wat: 7") assert_equal('wat', p.consume?(:id)) assert_equal(false, p.consume?(:dot)) assert_equal(':', p.consume(:colon)) @@ -27,7 +27,7 @@ def test_consume? end def test_id? - p = Parser.new("wat 6 Peter Hegemon") + p = new_parser("wat 6 Peter Hegemon") assert_equal('wat', p.id?('wat')) assert_equal(false, p.id?('endgame')) assert_equal('6', p.consume(:number)) @@ -36,7 +36,7 @@ def test_id? end def test_look - p = Parser.new("wat 6 Peter Hegemon") + p = new_parser("wat 6 Peter Hegemon") assert_equal(true, p.look(:id)) assert_equal('wat', p.consume(:id)) assert_equal(false, p.look(:comparison)) @@ -46,12 +46,12 @@ def test_look end def test_expressions - p = Parser.new("hi.there hi?[5].there? hi.there.bob") + p = new_parser("hi.there hi?[5].there? hi.there.bob") assert_equal('hi.there', p.expression) assert_equal('hi?[5].there?', p.expression) assert_equal('hi.there.bob', p.expression) - p = Parser.new("567 6.0 'lol' \"wut\"") + p = new_parser("567 6.0 'lol' \"wut\"") assert_equal('567', p.expression) assert_equal('6.0', p.expression) assert_equal("'lol'", p.expression) @@ -59,7 +59,7 @@ def test_expressions end def test_ranges - p = Parser.new("(5..7) (1.5..9.6) (young..old) (hi[5].wat..old)") + p = new_parser("(5..7) (1.5..9.6) (young..old) (hi[5].wat..old)") assert_equal('(5..7)', p.expression) assert_equal('(1.5..9.6)', p.expression) assert_equal('(young..old)', p.expression) @@ -67,7 +67,7 @@ def test_ranges end def test_arguments - p = Parser.new("filter: hi.there[5], keyarg: 7") + p = new_parser("filter: hi.there[5], keyarg: 7") assert_equal('filter', p.consume(:id)) assert_equal(':', p.consume(:colon)) assert_equal('hi.there[5]', p.argument) @@ -77,8 +77,14 @@ def test_arguments def test_invalid_expression assert_raises(SyntaxError) do - p = Parser.new("==") + p = new_parser("==") p.expression end end + + private + + def new_parser(str) + Parser.new(str, StringScanner.new("")) + end end diff --git a/test/unit/tag_unit_test.rb b/test/unit/tag_unit_test.rb index 7f9b490c9..9b4bf88b6 100644 --- a/test/unit/tag_unit_test.rb +++ b/test/unit/tag_unit_test.rb @@ -6,18 +6,18 @@ class TagUnitTest < Minitest::Test include Liquid def test_tag - tag = Tag.parse('tag', "", Tokenizer.new(""), ParseContext.new) + tag = Tag.parse('tag', "", new_tokenizer, ParseContext.new) assert_equal('liquid::tag', tag.name) assert_equal('', tag.render(Context.new)) end def test_return_raw_text_of_tag - tag = Tag.parse("long_tag", "param1, param2, param3", Tokenizer.new(""), ParseContext.new) + tag = Tag.parse("long_tag", "param1, param2, param3", new_tokenizer, ParseContext.new) assert_equal("long_tag param1, param2, param3", tag.raw) end def test_tag_name_should_return_name_of_the_tag - tag = Tag.parse("some_tag", "", Tokenizer.new(""), ParseContext.new) + tag = Tag.parse("some_tag", "", new_tokenizer, ParseContext.new) assert_equal('some_tag', tag.tag_name) end @@ -26,7 +26,16 @@ def render(_context); end end def test_tag_render_to_output_buffer_nil_value - custom_tag = CustomTag.parse("some_tag", "", Tokenizer.new(""), ParseContext.new) + custom_tag = CustomTag.parse("some_tag", "", new_tokenizer, ParseContext.new) assert_equal('some string', custom_tag.render_to_output_buffer(Context.new, "some string")) end + + private + + def new_tokenizer + Tokenizer.new( + source: "", + string_scanner: StringScanner.new(""), + ) + end end