From 02411acabc72fb93b7ee9ce6680fa340f3708c31 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Sun, 17 Nov 2024 16:36:04 -0400 Subject: [PATCH] add StringScannerPool for thread safety --- lib/liquid.rb | 1 + lib/liquid/expression.rb | 9 ++--- lib/liquid/lexer.rb | 58 ++++++++++++++----------------- lib/liquid/string_scanner_pool.rb | 23 ++++++++++++ lib/liquid/tokenizer.rb | 12 ++----- 5 files changed, 57 insertions(+), 46 deletions(-) create mode 100644 lib/liquid/string_scanner_pool.rb diff --git a/lib/liquid.rb b/lib/liquid.rb index 367fc3c43..63c087bee 100644 --- a/lib/liquid.rb +++ b/lib/liquid.rb @@ -49,6 +49,7 @@ module Liquid require "liquid/version" require "liquid/deprecations" require "liquid/const" +require "liquid/string_scanner_pool" require 'liquid/standardfilters' require 'liquid/file_system' require 'liquid/parser_switching' diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index c9a50fd15..ea8c68211 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -72,10 +72,6 @@ class Expression2 CACHE = LruRedux::Cache.new(10_000) # most themes would have less than 2,000 unique expression class << self - def string_scanner - @ss ||= StringScanner.new("") - end - def parse(markup) return unless markup @@ -106,8 +102,7 @@ def inner_parse(markup) end def parse_number(markup) - ss = string_scanner - ss.string = markup + ss = StringScannerPool.pop(markup) is_integer = true last_dot_pos = nil @@ -147,6 +142,8 @@ def parse_number(markup) # we should never reach this point false end + ensure + StringScannerPool.release(ss) end end end diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index 4e8d3d6ef..3dae267a8 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -157,40 +157,34 @@ class Lexer2 table.freeze end - class << self - def string_scanner - @string_scanner ||= StringScanner.new("") - end - end - def initialize(input) - @ss = self.class.string_scanner - @ss.string = input + @input = input end # rubocop:disable Metrics/BlockNesting def tokenize + ss = StringScannerPool.pop(@input) @output = [] - until @ss.eos? - @ss.skip(WHITESPACE_OR_NOTHING) + until ss.eos? + ss.skip(WHITESPACE_OR_NOTHING) - break if @ss.eos? + break if ss.eos? - start_pos = @ss.pos - peeked = @ss.peek_byte + start_pos = ss.pos + peeked = ss.peek_byte if (special = SPECIAL_TABLE[peeked]) - @ss.scan_byte + ss.scan_byte # Special case for ".." - if special == DOT && @ss.peek_byte == DOT_ORD - @ss.scan_byte + if special == DOT && ss.peek_byte == DOT_ORD + ss.scan_byte @output << DOTDOT elsif special == DASH # Special case for negative numbers - if (peeked_byte = @ss.peek_byte) && NUMBER_TABLE[peeked_byte] - @ss.pos -= 1 - @output << [:number, @ss.scan(NUMBER_LITERAL)] + if (peeked_byte = ss.peek_byte) && NUMBER_TABLE[peeked_byte] + ss.pos -= 1 + @output << [:number, ss.scan(NUMBER_LITERAL)] else @output << special end @@ -198,25 +192,25 @@ def tokenize @output << special end elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked]) - @ss.scan_byte - if (peeked_byte = @ss.peek_byte) && (found = sub_table[peeked_byte]) + ss.scan_byte + if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte]) @output << found - @ss.scan_byte + ss.scan_byte else - raise_syntax_error(start_pos) + raise_syntax_error(start_pos, ss) end elsif (sub_table = COMPARISON_JUMP_TABLE[peeked]) - @ss.scan_byte - if (peeked_byte = @ss.peek_byte) && (found = sub_table[peeked_byte]) + ss.scan_byte + if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte]) @output << found - @ss.scan_byte + ss.scan_byte else @output << SINGLE_COMPARISON_TOKENS[peeked] end else type, pattern = NEXT_MATCHER_JUMP_TABLE[peeked] - if type && (t = @ss.scan(pattern)) + if type && (t = ss.scan(pattern)) # Special case for "contains" @output << if type == :id && t == "contains" && @output.last&.first != :dot COMPARISON_CONTAINS @@ -224,19 +218,21 @@ def tokenize [type, t] end else - raise_syntax_error(start_pos) + raise_syntax_error(start_pos, ss) end end end # rubocop:enable Metrics/BlockNesting @output << EOS + ensure + StringScannerPool.release(ss) end - def raise_syntax_error(start_pos) - @ss.pos = start_pos + def raise_syntax_error(start_pos, ss) + ss.pos = start_pos # the character could be a UTF-8 character, use getch to get all the bytes - raise SyntaxError, "Unexpected character #{@ss.getch}" + raise SyntaxError, "Unexpected character #{ss.getch}" end end diff --git a/lib/liquid/string_scanner_pool.rb b/lib/liquid/string_scanner_pool.rb new file mode 100644 index 000000000..4114d7a2d --- /dev/null +++ b/lib/liquid/string_scanner_pool.rb @@ -0,0 +1,23 @@ +module Liquid + class StringScannerPool + class << self + def pop(input) + @ss_pool ||= [StringScanner.new("")] * 5 + + if @ss_pool.empty? + StringScanner.new(input) + else + ss = @ss_pool.pop + ss.string = input + ss + end + end + + def release(ss) + binding.irb if ss.nil? + @ss_pool ||= [] + @ss_pool << ss + end + end + end +end diff --git a/lib/liquid/tokenizer.rb b/lib/liquid/tokenizer.rb index 99c66d507..092121f4c 100644 --- a/lib/liquid/tokenizer.rb +++ b/lib/liquid/tokenizer.rb @@ -56,12 +56,6 @@ class Tokenizer2 CLOSE_CURLEY = "}".ord PERCENTAGE = "%".ord - class << self - def string_scanner - @string_scanner ||= StringScanner.new("") - end - end - def initialize(source, line_numbers = false, line_number: nil, for_liquid_tag: false) @line_number = line_number || (line_numbers ? 1 : nil) @for_liquid_tag = for_liquid_tag @@ -91,13 +85,13 @@ def tokenize if @for_liquid_tag @tokens = @source.split("\n") else - @ss = self.class.string_scanner - @ss.string = @source + @ss = StringScannerPool.pop(@source) @tokens << shift_normal until @ss.eos? end - @ss = nil @source = nil + ensure + StringScannerPool.release(@ss) if @ss end def shift_normal