From 438b88dcde4bf6f925361f5e167e90c45fada50d Mon Sep 17 00:00:00 2001 From: Junichi Kobayashi Date: Mon, 1 Apr 2024 23:16:00 +0900 Subject: [PATCH 1/6] Set %define when parsing grammar files --- lib/lrama/grammar.rb | 3 ++- lib/lrama/parser.rb | 9 +++++++-- parser.y | 2 +- spec/lrama/parser_spec.rb | 1 + 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/lrama/grammar.rb b/lib/lrama/grammar.rb index 3724f828..0432490a 100644 --- a/lib/lrama/grammar.rb +++ b/lib/lrama/grammar.rb @@ -28,7 +28,7 @@ class Grammar attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux, :parameterizing_rule_resolver attr_accessor :union, :expect, :printers, :error_tokens, :lex_param, :parse_param, :initial_action, :after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack, - :symbols_resolver, :types, :rules, :rule_builders, :sym_to_rules, :no_stdlib, :locations + :symbols_resolver, :types, :rules, :rule_builders, :sym_to_rules, :no_stdlib, :locations, :define def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term, :find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol, @@ -57,6 +57,7 @@ def initialize(rule_counter) @aux = Auxiliary.new @no_stdlib = false @locations = false + @define = {} append_special_symbols end diff --git a/lib/lrama/parser.rb b/lib/lrama/parser.rb index 62c9ba64..a74b42f0 100644 --- a/lib/lrama/parser.rb +++ b/lib/lrama/parser.rb @@ -914,7 +914,7 @@ def raise_parse_error(error_message, location) 2, 72, :_reduce_15, 1, 59, :_reduce_none, 2, 59, :_reduce_17, - 3, 59, :_reduce_none, + 3, 59, :_reduce_18, 2, 59, :_reduce_none, 2, 59, :_reduce_20, 2, 59, :_reduce_21, @@ -1324,7 +1324,12 @@ def _reduce_17(val, _values, result) end .,., -# reduce 18 omitted +module_eval(<<'.,.,', 'parser.y', 27) + def _reduce_18(val, _values, result) + @grammar.define[val[1].s_value] = val[2]&.s_value + result + end +.,., # reduce 19 omitted diff --git a/parser.y b/parser.y index c95f60b7..fed2a9f2 100644 --- a/parser.y +++ b/parser.y @@ -25,7 +25,7 @@ rule bison_declaration: grammar_declaration | "%expect" INTEGER { @grammar.expect = val[1] } - | "%define" variable value + | "%define" variable value { @grammar.define[val[1].s_value] = val[2]&.s_value } | "%param" param+ | "%lex-param" param+ { diff --git a/spec/lrama/parser_spec.rb b/spec/lrama/parser_spec.rb index 8b14ee5f..db6c2205 100644 --- a/spec/lrama/parser_spec.rb +++ b/spec/lrama/parser_spec.rb @@ -59,6 +59,7 @@ CODE expect(grammar.expect).to eq(0) + expect(grammar.define).to eq({'api.pure' => nil, 'parse.error' => 'verbose'}) expect(grammar.printers).to eq([ Printer.new( ident_or_tags: [T::Tag.new(s_value: "")], From 79c69cdea0476094ae570186690650665b2f0185 Mon Sep 17 00:00:00 2001 From: Junichi Kobayashi Date: Fri, 3 May 2024 01:09:19 +0900 Subject: [PATCH 2/6] Parse --define options --- lib/lrama/command.rb | 2 +- lib/lrama/grammar.rb | 4 ++-- lib/lrama/option_parser.rb | 1 + lib/lrama/options.rb | 9 ++++++++- lib/lrama/parser.rb | 5 +++-- parser.y | 5 +++-- spec/lrama/option_parser_spec.rb | 1 + 7 files changed, 19 insertions(+), 8 deletions(-) diff --git a/lib/lrama/command.rb b/lib/lrama/command.rb index 0095c1a1..72a59312 100644 --- a/lib/lrama/command.rb +++ b/lib/lrama/command.rb @@ -19,7 +19,7 @@ def run(argv) text = options.y.read options.y.close if options.y != STDIN begin - grammar = Lrama::Parser.new(text, options.grammar_file, options.debug).parse + grammar = Lrama::Parser.new(text, options.grammar_file, options.debug, options.define).parse unless grammar.no_stdlib stdlib_grammar = Lrama::Parser.new(File.read(STDLIB_FILE_PATH), STDLIB_FILE_PATH, options.debug).parse grammar.insert_before_parameterizing_rules(stdlib_grammar.parameterizing_rules) diff --git a/lib/lrama/grammar.rb b/lib/lrama/grammar.rb index 0432490a..7fb0f044 100644 --- a/lib/lrama/grammar.rb +++ b/lib/lrama/grammar.rb @@ -35,7 +35,7 @@ class Grammar :find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type, :fill_printer, :fill_destructor, :fill_error_token, :sort_by_number! - def initialize(rule_counter) + def initialize(rule_counter, define = {}) @rule_counter = rule_counter # Code defined by "%code" @@ -57,7 +57,7 @@ def initialize(rule_counter) @aux = Auxiliary.new @no_stdlib = false @locations = false - @define = {} + @define = define.map {|d| d.split('=') }.to_h append_special_symbols end diff --git a/lib/lrama/option_parser.rb b/lib/lrama/option_parser.rb index 0727d1b3..e3f2be43 100644 --- a/lib/lrama/option_parser.rb +++ b/lib/lrama/option_parser.rb @@ -61,6 +61,7 @@ def parse_by_option_parser(argv) o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v } o.on('-t', 'reserved, do nothing') { } o.on('--debug', 'display debugging outputs of internal parser') {|v| @options.debug = true } + o.on('-D', '--define=NAME[=VALUE]', "similar to '%define NAME VALUE'") {|v| @options.define = v } o.separator '' o.separator 'Output:' o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v } diff --git a/lib/lrama/options.rb b/lib/lrama/options.rb index ccd76803..bc071e54 100644 --- a/lib/lrama/options.rb +++ b/lib/lrama/options.rb @@ -7,10 +7,11 @@ class Options :report_file, :outfile, :error_recovery, :grammar_file, :trace_opts, :report_opts, - :diagnostic, :y, :debug + :diagnostic, :y, :debug, :define def initialize @skeleton = "bison/yacc.c" + @define = {} @header = false @header_file = nil @report_file = nil @@ -23,5 +24,11 @@ def initialize @y = STDIN @debug = false end + + def define=(v) + v.split(',').each do |p_define| + @define.store *p_define.split('=') + end + end end end diff --git a/lib/lrama/parser.rb b/lib/lrama/parser.rb index a74b42f0..0910da4d 100644 --- a/lib/lrama/parser.rb +++ b/lib/lrama/parser.rb @@ -658,17 +658,18 @@ class Parser < Racc::Parser include Lrama::Report::Duration -def initialize(text, path, debug = false) +def initialize(text, path, debug = false, define = {}) @grammar_file = Lrama::Lexer::GrammarFile.new(path, text) @yydebug = debug @rule_counter = Lrama::Grammar::Counter.new(0) @midrule_action_counter = Lrama::Grammar::Counter.new(1) + @define = define end def parse report_duration(:parse) do @lexer = Lrama::Lexer.new(@grammar_file) - @grammar = Lrama::Grammar.new(@rule_counter) + @grammar = Lrama::Grammar.new(@rule_counter, @define) @precedence_number = 0 reset_precs do_parse diff --git a/parser.y b/parser.y index fed2a9f2..bde1848b 100644 --- a/parser.y +++ b/parser.y @@ -404,17 +404,18 @@ end include Lrama::Report::Duration -def initialize(text, path, debug = false) +def initialize(text, path, debug = false, define = {}) @grammar_file = Lrama::Lexer::GrammarFile.new(path, text) @yydebug = debug @rule_counter = Lrama::Grammar::Counter.new(0) @midrule_action_counter = Lrama::Grammar::Counter.new(1) + @define = define end def parse report_duration(:parse) do @lexer = Lrama::Lexer.new(@grammar_file) - @grammar = Lrama::Grammar.new(@rule_counter) + @grammar = Lrama::Grammar.new(@rule_counter, @define) @precedence_number = 0 reset_precs do_parse diff --git a/spec/lrama/option_parser_spec.rb b/spec/lrama/option_parser_spec.rb index eef10aa3..d723fe56 100644 --- a/spec/lrama/option_parser_spec.rb +++ b/spec/lrama/option_parser_spec.rb @@ -50,6 +50,7 @@ -S, --skeleton=FILE specify the skeleton to use -t reserved, do nothing --debug display debugging outputs of internal parser + -D, --define=NAME[=VALUE] similar to '%define NAME VALUE' Output: -H, --header=[FILE] also produce a header file named FILE From a786828c922f5dbaadcfa1626cc8424a7e01a0de Mon Sep 17 00:00:00 2001 From: Junichi Kobayashi Date: Tue, 2 Apr 2024 00:58:02 +0900 Subject: [PATCH 3/6] Support IELR(1) parser generation --- lib/lrama/command.rb | 1 + lib/lrama/grammar.rb | 4 + lib/lrama/option_parser.rb | 2 +- lib/lrama/options.rb | 6 - lib/lrama/state.rb | 273 ++++++++++++++++++++++++++++++- lib/lrama/states.rb | 75 ++++++++- lib/lrama/states/item.rb | 8 + spec/fixtures/integration/ielr.y | 62 +++++++ spec/lrama/states_spec.rb | 13 ++ 9 files changed, 435 insertions(+), 9 deletions(-) create mode 100644 spec/fixtures/integration/ielr.y diff --git a/lib/lrama/command.rb b/lib/lrama/command.rb index 72a59312..3ff39d57 100644 --- a/lib/lrama/command.rb +++ b/lib/lrama/command.rb @@ -34,6 +34,7 @@ def run(argv) end states = Lrama::States.new(grammar, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure])) states.compute + states.compute_ielr if grammar.ielr_defined? context = Lrama::Context.new(states) if options.report_file diff --git a/lib/lrama/grammar.rb b/lib/lrama/grammar.rb index 7fb0f044..6ea396d5 100644 --- a/lib/lrama/grammar.rb +++ b/lib/lrama/grammar.rb @@ -172,6 +172,10 @@ def find_rules_by_symbol(sym) @sym_to_rules[sym.number] end + def ielr_defined? + @define.key?('lr.type') && @define['lr.type'] == 'ielr' + end + private def compute_nullable diff --git a/lib/lrama/option_parser.rb b/lib/lrama/option_parser.rb index e3f2be43..d6f4b32b 100644 --- a/lib/lrama/option_parser.rb +++ b/lib/lrama/option_parser.rb @@ -61,7 +61,7 @@ def parse_by_option_parser(argv) o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v } o.on('-t', 'reserved, do nothing') { } o.on('--debug', 'display debugging outputs of internal parser') {|v| @options.debug = true } - o.on('-D', '--define=NAME[=VALUE]', "similar to '%define NAME VALUE'") {|v| @options.define = v } + o.on('-D', '--define=NAME[=VALUE]', Array, "similar to '%define NAME VALUE'") {|v| @options.define = v } o.separator '' o.separator 'Output:' o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v } diff --git a/lib/lrama/options.rb b/lib/lrama/options.rb index bc071e54..08f75a77 100644 --- a/lib/lrama/options.rb +++ b/lib/lrama/options.rb @@ -24,11 +24,5 @@ def initialize @y = STDIN @debug = false end - - def define=(v) - v.split(',').each do |p_define| - @define.store *p_define.split('=') - end - end end end diff --git a/lib/lrama/state.rb b/lib/lrama/state.rb index c2623746..ab0ca2d0 100644 --- a/lib/lrama/state.rb +++ b/lib/lrama/state.rb @@ -10,7 +10,7 @@ module Lrama class State attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts, :default_reduction_rule, :closure, :items - attr_accessor :shifts, :reduces + attr_accessor :shifts, :reduces, :ielr_isocores, :lalr_isocore def initialize(id, accessing_symbol, kernels) @id = id @@ -23,6 +23,9 @@ def initialize(id, accessing_symbol, kernels) @conflicts = [] @resolved_conflicts = [] @default_reduction_rule = nil + @predecessors = [] + @lalr_isocore = self + @ielr_isocores = [self] end def closure=(closure) @@ -84,6 +87,18 @@ def transitions @transitions ||= shifts.map {|shift| [shift, @items_to_state[shift.next_items]] } end + def update_transition(shift, next_state) + set_items_to_state(shift.next_items, next_state) + next_state.append_predecessor(self) + clear_transitions_cache + end + + def clear_transitions_cache + @nterm_transitions = nil + @term_transitions = nil + @transitions = nil + end + def selected_term_transitions term_transitions.reject do |shift, next_state| shift.not_selected @@ -142,5 +157,261 @@ def rr_conflicts conflict.type == :reduce_reduce end end + + def propagate_lookaheads(next_state) + next_state.kernels.map {|item| + lookahead_sets = + if item.position == 1 + goto_follow_set(item.lhs) + else + kernel = kernels.find {|k| k.predecessor_item_of?(item) } + item_lookahead_set[kernel] + end + + [item, lookahead_sets & next_state.lookahead_set_filters[item]] + }.to_h + end + + def lookaheads_recomputed + !@item_lookahead_set.nil? + end + + def compatible_lookahead?(filtered_lookahead) + !lookaheads_recomputed || + @lalr_isocore.annotation_list.all? {|token, actions| + a = dominant_contribution(token, actions, item_lookahead_set) + b = dominant_contribution(token, actions, filtered_lookahead) + a.nil? || b.nil? || a == b + } + end + + def lookahead_set_filters + kernels.map {|kernel| + [kernel, + @lalr_isocore.annotation_list.select {|token, actions| + token.term? && actions.any? {|action, contributions| + !contributions.nil? && contributions.key?(kernel) && contributions[kernel] + } + }.map {|token, _| token } + ] + }.to_h + end + + def dominant_contribution(token, actions, lookaheads) + a = actions.select {|action, contributions| + contributions.nil? || contributions.any? {|item, contributed| contributed && lookaheads[item].include?(token) } + }.map {|action, _| action } + return nil if a.empty? + a.reject {|action| + if action.is_a?(State::Shift) + action.not_selected + elsif action.is_a?(State::Reduce) + action.not_selected_symbols.include?(token) + end + } + end + + def inadequacy_list + return @inadequacy_list if @inadequacy_list + + shift_contributions = shifts.map {|shift| + [shift.next_sym, [shift]] + }.to_h + reduce_contributions = reduces.map {|reduce| + (reduce.look_ahead || []).map {|sym| + [sym, [reduce]] + }.to_h + }.reduce(Hash.new([])) {|hash, cont| + hash.merge(cont) {|_, a, b| a | b } + } + + list = shift_contributions.merge(reduce_contributions) {|_, a, b| a | b } + @inadequacy_list = list.select {|token, actions| token.term? && actions.size > 1 } + end + + def annotation_list + manifestations = annotate_manifestation + predecessors = transitions.map {|_, next_state| next_state.annotate_predecessor(self) } + predecessors.reduce(manifestations) {|result, annotations| + result.merge(annotations) {|_, actions_a, actions_b| + if actions_a.nil? || actions_b.nil? + actions_a || actions_b + else + actions_a.merge(actions_b) {|_, contributions_a, contributions_b| + contributions_a.merge(contributions_b) {|_, contributed_a, contributed_b| + contributed_a || contributed_b + } + } + end + } + } + end + + def annotate_manifestation + inadequacy_list.transform_values {|actions| + actions.map {|action| + if action.is_a?(Shift) + [action, nil] + elsif action.is_a?(Reduce) + if action.rule.empty_rule? + [action, lhs_contributions(action.rule.lhs, inadequacy_list.key(actions))] + else + contributions = kernels.map {|kernel| [kernel, kernel.rule == action.rule && kernel.end_of_rule?] }.to_h + [action, contributions] + end + end + }.to_h + } + end + + def annotate_predecessor(predecessor) + annotation_list.transform_values {|actions| + token = annotation_list.key(actions) + actions.transform_values {|inadequacy| + next nil if inadequacy.nil? + lhs_adequacy = kernels.all? {|kernel| + inadequacy[kernel] && kernel.position == 1 && predecessor.lhs_contributions(kernel.lhs, token).nil? + } + if lhs_adequacy + next nil + else + predecessor.kernels.map {|pred_k| + [pred_k, kernels.any? {|k| + inadequacy[k] && ( + pred_k.predecessor_item_of?(k) && predecessor.item_lookahead_set[pred_k].include?(token) || + k.position == 1 && predecessor.lhs_contributions(k.lhs, token)[pred_k] + ) + }] + }.to_h + end + } + } + end + + def lhs_contributions(sym, token) + shift, next_state = nterm_transitions.find {|sh, _| sh.next_sym == sym } + if always_follows(shift, next_state).include?(token) + nil + else + kernels.map {|kernel| [kernel, follow_kernel_items(shift, next_state, kernel) && item_lookahead_set[kernel].include?(token)] }.to_h + end + end + + def follow_kernel_items(shift, next_state, kernel) + queue = [[self, shift, next_state]] + until queue.empty? + st, sh, next_st = queue.pop + return true if kernel.next_sym == sh.next_sym && kernel.symbols_after_transition.all?(&:nullable) + st.internal_dependencies(sh, next_st).each {|v| queue << v } + end + false + end + + def item_lookahead_set + return @item_lookahead_set if @item_lookahead_set + + kernels.map {|item| + value = + if item.lhs.accept_symbol? + [] + elsif item.position > 1 + prev_items = predecessors_with_item(item) + prev_items.map {|st, i| st.item_lookahead_set[i] }.reduce([]) {|acc, syms| acc |= syms } + elsif item.position == 1 + prev_state = @predecessors.find {|p| p.shifts.any? {|shift| shift.next_sym == item.lhs } } + shift, next_state = prev_state.nterm_transitions.find {|shift, _| shift.next_sym == item.lhs } + prev_state.goto_follows(shift, next_state) + end + [item, value] + }.to_h + end + + def item_lookahead_set=(k) + @item_lookahead_set = k + end + + def predecessors_with_item(item) + result = [] + @predecessors.each do |pre| + pre.items.each do |i| + result << [pre, i] if i.predecessor_item_of?(item) + end + end + result + end + + def append_predecessor(prev_state) + @predecessors << prev_state + @predecessors.uniq! + end + + def goto_follow_set(nterm_token) + return [] if nterm_token.accept_symbol? + shift, next_state = @lalr_isocore.nterm_transitions.find {|sh, _| sh.next_sym == nterm_token } + + @kernels + .select {|kernel| follow_kernel_items(shift, next_state, kernel) } + .map {|kernel| item_lookahead_set[kernel] } + .reduce(always_follows(shift, next_state)) {|result, terms| result |= terms } + end + + def goto_follows(shift, next_state) + queue = internal_dependencies(shift, next_state) + predecessor_dependencies(shift, next_state) + terms = always_follows(shift, next_state) + until queue.empty? + st, sh, next_st = queue.pop + terms |= st.always_follows(sh, next_st) + st.internal_dependencies(sh, next_st).each {|v| queue << v } + st.predecessor_dependencies(sh, next_st).each {|v| queue << v } + end + terms + end + + def always_follows(shift, next_state) + queue = internal_dependencies(shift, next_state) + successor_dependencies(shift, next_state) + terms = [] + until queue.empty? + st, sh, next_st = queue.pop + terms |= next_st.term_transitions.map {|sh, _| sh.next_sym } + st.internal_dependencies(sh, next_st).each {|v| queue << v } + st.successor_dependencies(sh, next_st).each {|v| queue << v } + end + terms + end + + def internal_dependencies(shift, next_state) + syms = @items.select {|i| + i.next_sym == shift.next_sym && i.symbols_after_transition.all?(&:nullable) && i.position == 0 + }.map(&:lhs).uniq + nterm_transitions.select {|sh, _| syms.include?(sh.next_sym) }.map {|goto| [self, *goto] } + end + + def successor_dependencies(shift, next_state) + next_state.nterm_transitions + .select {|next_shift, _| next_shift.next_sym.nullable } + .map {|transition| [next_state, *transition] } + end + + def predecessor_dependencies(shift, next_state) + state_items = [] + @kernels.select {|kernel| + kernel.next_sym == shift.next_sym && kernel.symbols_after_transition.all?(&:nullable) + }.each do |item| + queue = predecessors_with_item(item) + until queue.empty? + st, i = queue.pop + if i.position == 0 + state_items << [st, i] + else + st.predecessors_with_item(i).each {|v| queue << v } + end + end + end + + state_items.map {|state, item| + sh, next_st = state.nterm_transitions.find {|shi, _| shi.next_sym == item.lhs } + [state, sh, next_st] + } + end end end diff --git a/lib/lrama/states.rb b/lib/lrama/states.rb index 0ed4bff9..f75a2c45 100644 --- a/lib/lrama/states.rb +++ b/lib/lrama/states.rb @@ -92,6 +92,21 @@ def compute report_duration(:compute_default_reduction) { compute_default_reduction } end + def compute_ielr + report_duration(:compute_predecessors) { compute_predecessors } + report_duration(:split_states) { split_states } + report_duration(:compute_direct_read_sets) { compute_direct_read_sets } + report_duration(:compute_reads_relation) { compute_reads_relation } + report_duration(:compute_read_sets) { compute_read_sets } + report_duration(:compute_includes_relation) { compute_includes_relation } + report_duration(:compute_lookback_relation) { compute_lookback_relation } + report_duration(:compute_follow_sets) { compute_follow_sets } + report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets } + report_duration(:compute_conflicts) { compute_conflicts } + + report_duration(:compute_default_reduction) { compute_default_reduction } + end + def reporter StatesReporter.new(self) end @@ -235,7 +250,7 @@ def enqueue_state(states, state) # Trace previous = state.kernels.first.previous_sym trace_state do |out| - out << sprintf("state_list_append (state = %d, symbol = %d (%s))", + out << sprintf("state_list_append (state = %d, symbol = %d (%s))\n", @states.count, previous.number, previous.display_name) end @@ -524,5 +539,63 @@ def compute_default_reduction end.first end end + + def compute_predecessors + queue = [@states.first] + until queue.empty? + state = queue.shift + state.transitions.each do |_, next_state| + next_state.append_predecessor(state) + queue << next_state + end + end + end + + def split_states + transition_queue = [] + @states.each do |state| + state.transitions.each do |shift, next_state| + compute_state(state, shift, next_state) + end + end + end + + def merge_lookaheads(state, filtered_lookaheads) + return if state.kernels.all? {|item| (filtered_lookaheads[item] - state.item_lookahead_set[item]).empty? } + + state.item_lookahead_set = state.item_lookahead_set.merge {|_, v1, v2| v1 | v2 } + state.transitions.each do |shift, next_state| + next if next_state.lookaheads_recomputed + compute_state(state, shift, next_state) + end + end + + def compute_state(state, shift, next_state) + filtered_lookaheads = state.propagate_lookaheads(next_state) + s = next_state.ielr_isocores.find {|st| st.compatible_lookahead?(filtered_lookaheads) } + + if s.nil? + s = next_state.ielr_isocores.last + new_state = State.new(@states.count, s.accessing_symbol, s.kernels) + new_state.closure = s.closure + new_state.compute_shifts_reduces + s.transitions.each do |sh, next_state| + new_state.set_items_to_state(sh.next_items, next_state) + end + @states << new_state + new_state.lalr_isocore = s + s.ielr_isocores << new_state + s.ielr_isocores.each do |st| + st.ielr_isocores = s.ielr_isocores + end + new_state.item_lookahead_set = filtered_lookaheads + state.update_transition(shift, new_state) + elsif(!s.lookaheads_recomputed) + s.item_lookahead_set = filtered_lookaheads + else + state.update_transition(shift, s) + merge_lookaheads(s, filtered_lookaheads) + end + end end end diff --git a/lib/lrama/states/item.rb b/lib/lrama/states/item.rb index 5074e943..e89cb969 100644 --- a/lib/lrama/states/item.rb +++ b/lib/lrama/states/item.rb @@ -64,6 +64,10 @@ def symbols_after_dot # steep:ignore rhs[position..-1] end + def symbols_after_transition + rhs[position+1..-1] + end + def to_s "#{lhs.id.s_value}: #{display_name}" end @@ -78,6 +82,10 @@ def display_rest r = symbols_after_dot.map(&:display_name).join(" ") ". #{r} (rule #{rule_id})" end + + def predecessor_item_of?(other_item) + rule == other_item.rule && position == other_item.position - 1 + end end end end diff --git a/spec/fixtures/integration/ielr.y b/spec/fixtures/integration/ielr.y new file mode 100644 index 00000000..d8680a30 --- /dev/null +++ b/spec/fixtures/integration/ielr.y @@ -0,0 +1,62 @@ +%{ +#include +#include +#include "y.tab.h" +#define YYDEBUG 1 +static int yylex(YYSTYPE *val, YYLTYPE *loc); +static int yyerror(YYLTYPE *loc, const char *str); +%} + +%union { + int val; +} + +%token a +%token b +%token c +%define lr.type ielr + +%% +S: a A B a + | b A B b +A: a C D E +B: c + | // empty +C: D +D: a +E: a + | // empty + +%% + +static int yylex(YYSTYPE *yylval, YYLTYPE *loc) { + int c = getchar(); + printf("%c\n", c); + int val; + + switch (c) { + case ' ': case '\t': + return yylex(yylval, loc); + + case 'a': case 'b': case 'c': + return c; + + case '\n': + exit(0); + + default: + fprintf(stderr, "unknown character: %c\n", c); + exit(1); + } +} + +static int yyerror(YYLTYPE *loc, const char *str) { + fprintf(stderr, "parse error: %s\n", str); + return 0; +} + +int main() { + printf("Enter the formula:\n"); + yyparse(); + return 0; +} diff --git a/spec/lrama/states_spec.rb b/spec/lrama/states_spec.rb index 953ac54c..c4d4cf0f 100644 --- a/spec/lrama/states_spec.rb +++ b/spec/lrama/states_spec.rb @@ -1787,4 +1787,17 @@ class go to state 5 STR end end + + describe '#compute_ielr' do + it 'recompute states' do + path = "integration/ielr.y" + y = File.read(fixture_path(path)) + grammar = Lrama::Parser.new(y, path).parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, warning) + states.compute + states.compute_ielr + end + end end From 222b252f353327402cc86edc0de689773d3f832f Mon Sep 17 00:00:00 2001 From: Junichi Kobayashi Date: Thu, 26 Sep 2024 01:35:49 +0900 Subject: [PATCH 4/6] Add a integration test case --- spec/fixtures/integration/ielr.l | 34 ++++++ spec/lrama/states_spec.rb | 200 ++++++++++++++++++++++++++++++- 2 files changed, 233 insertions(+), 1 deletion(-) create mode 100644 spec/fixtures/integration/ielr.l diff --git a/spec/fixtures/integration/ielr.l b/spec/fixtures/integration/ielr.l new file mode 100644 index 00000000..f4d782d6 --- /dev/null +++ b/spec/fixtures/integration/ielr.l @@ -0,0 +1,34 @@ +%option noinput nounput noyywrap never-interactive bison-bridge bison-locations + +%{ + +#include +#include +#include "bison-generated.h" +#include "bison-generated-lexer.h" + +%} + +%% + + +[abc] { + return yytext[0]; +} + +[\n|\r\n] { + return(YYEOF); +} + +[[:space:]] {} + +<> { + return(YYEOF); +} + +. { + fprintf(stderr, "Illegal character '%s'\n", yytext); + return(YYEOF); +} + +%% diff --git a/spec/lrama/states_spec.rb b/spec/lrama/states_spec.rb index c4d4cf0f..a717d14c 100644 --- a/spec/lrama/states_spec.rb +++ b/spec/lrama/states_spec.rb @@ -1795,9 +1795,207 @@ class go to state 5 grammar = Lrama::Parser.new(y, path).parse grammar.prepare grammar.validate! - states = Lrama::States.new(grammar, warning) + states = Lrama::States.new(grammar) states.compute states.compute_ielr + + io = StringIO.new + states.reporter.report(io, states: true) + + expect(io.string).to eq(<<~STR) + State 14 conflicts: 2 shift/reduce + + + State 0 + + 0 $accept: • S "end of file" + + a shift, and go to state 1 + b shift, and go to state 2 + + S go to state 3 + + + State 1 + + 1 S: a • A B a + + a shift, and go to state 4 + + A go to state 5 + + + State 2 + + 2 S: b • A B b + + a shift, and go to state 19 + + A go to state 6 + + + State 3 + + 0 $accept: S • "end of file" + + "end of file" shift, and go to state 7 + + + State 4 + + 3 A: a • C D E + + a shift, and go to state 8 + + C go to state 9 + D go to state 10 + + + State 5 + + 1 S: a A • B a + + c shift, and go to state 11 + + $default reduce using rule 5 (B) + + B go to state 12 + + + State 6 + + 2 S: b A • B b + + c shift, and go to state 11 + + $default reduce using rule 5 (B) + + B go to state 13 + + + State 7 + + 0 $accept: S "end of file" • + + $default accept + + + State 8 + + 7 D: a • + + $default reduce using rule 7 (D) + + + State 9 + + 3 A: a C • D E + + a shift, and go to state 8 + + D go to state 14 + + + State 10 + + 6 C: D • + + $default reduce using rule 6 (C) + + + State 11 + + 4 B: c • + + $default reduce using rule 4 (B) + + + State 12 + + 1 S: a A B • a + + a shift, and go to state 15 + + + State 13 + + 2 S: b A B • b + + b shift, and go to state 16 + + + State 14 + + 3 A: a C D • E + + a shift, and go to state 17 + + a reduce using rule 9 (E) + b reduce using rule 9 (E) + c reduce using rule 9 (E) + + E go to state 18 + + + State 15 + + 1 S: a A B a • + + $default reduce using rule 1 (S) + + + State 16 + + 2 S: b A B b • + + $default reduce using rule 2 (S) + + + State 17 + + 8 E: a • + + $default reduce using rule 8 (E) + + + State 18 + + 3 A: a C D E • + + $default reduce using rule 3 (A) + + + State 19 + + 3 A: a • C D E + + a shift, and go to state 8 + + C go to state 20 + D go to state 10 + + + State 20 + + 3 A: a C • D E + + a shift, and go to state 8 + + D go to state 21 + + + State 21 + + 3 A: a C D • E + + a shift, and go to state 17 + + $default reduce using rule 9 (E) + + E go to state 18 + + + STR end end end From 0a03198e93ca0feb7f65755f2ee78b2bea81337b Mon Sep 17 00:00:00 2001 From: Junichi Kobayashi Date: Fri, 25 Oct 2024 17:40:41 +0900 Subject: [PATCH 5/6] Optimize calculating predecessors --- lib/lrama/states.rb | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/lib/lrama/states.rb b/lib/lrama/states.rb index f75a2c45..dc317609 100644 --- a/lib/lrama/states.rb +++ b/lib/lrama/states.rb @@ -93,7 +93,6 @@ def compute end def compute_ielr - report_duration(:compute_predecessors) { compute_predecessors } report_duration(:split_states) { split_states } report_duration(:compute_direct_read_sets) { compute_direct_read_sets } report_duration(:compute_reads_relation) { compute_reads_relation } @@ -280,7 +279,10 @@ def compute_lr0_states state.shifts.each do |shift| new_state, created = create_state(shift.next_sym, shift.next_items, states_created) state.set_items_to_state(shift.next_items, new_state) - enqueue_state(states, new_state) if created + if created + enqueue_state(states, new_state) + new_state.append_predecessor(state) + end end end end @@ -540,17 +542,6 @@ def compute_default_reduction end end - def compute_predecessors - queue = [@states.first] - until queue.empty? - state = queue.shift - state.transitions.each do |_, next_state| - next_state.append_predecessor(state) - queue << next_state - end - end - end - def split_states transition_queue = [] @states.each do |state| From 7da96768fb91d6e017102a0b9a06528d7cf43f39 Mon Sep 17 00:00:00 2001 From: Junichi Kobayashi Date: Thu, 14 Nov 2024 13:10:18 +0900 Subject: [PATCH 6/6] Check existence of contributions --- lib/lrama/state.rb | 50 ++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/lib/lrama/state.rb b/lib/lrama/state.rb index ab0ca2d0..3008786c 100644 --- a/lib/lrama/state.rb +++ b/lib/lrama/state.rb @@ -26,6 +26,9 @@ def initialize(id, accessing_symbol, kernels) @predecessors = [] @lalr_isocore = self @ielr_isocores = [self] + @internal_dependencies = {} + @successor_dependencies = {} + @always_follows = {} end def closure=(closure) @@ -230,21 +233,27 @@ def inadequacy_list end def annotation_list - manifestations = annotate_manifestation - predecessors = transitions.map {|_, next_state| next_state.annotate_predecessor(self) } - predecessors.reduce(manifestations) {|result, annotations| - result.merge(annotations) {|_, actions_a, actions_b| - if actions_a.nil? || actions_b.nil? - actions_a || actions_b - else - actions_a.merge(actions_b) {|_, contributions_a, contributions_b| - contributions_a.merge(contributions_b) {|_, contributed_a, contributed_b| - contributed_a || contributed_b - } + return @annotation_list if @annotation_list + + @annotation_list = annotate_manifestation + @annotation_list = @items_to_state.values.map {|next_state| next_state.annotate_predecessor(self) } + .reduce(@annotation_list) {|result, annotations| + result.merge(annotations) {|_, actions_a, actions_b| + if actions_a.nil? || actions_b.nil? + actions_a || actions_b + else + actions_a.merge(actions_b) {|_, contributions_a, contributions_b| + if contributions_a.nil? || contributions_b.nil? + next contributions_a || contributions_b + end + + contributions_a.merge(contributions_b) {|_, contributed_a, contributed_b| + contributed_a || contributed_b + } + } + end } - end } - } end def annotate_manifestation @@ -269,7 +278,7 @@ def annotate_predecessor(predecessor) token = annotation_list.key(actions) actions.transform_values {|inadequacy| next nil if inadequacy.nil? - lhs_adequacy = kernels.all? {|kernel| + lhs_adequacy = kernels.any? {|kernel| inadequacy[kernel] && kernel.position == 1 && predecessor.lhs_contributions(kernel.lhs, token).nil? } if lhs_adequacy @@ -368,6 +377,8 @@ def goto_follows(shift, next_state) end def always_follows(shift, next_state) + return @always_follows[[shift, next_state]] if @always_follows[[shift, next_state]] + queue = internal_dependencies(shift, next_state) + successor_dependencies(shift, next_state) terms = [] until queue.empty? @@ -376,18 +387,23 @@ def always_follows(shift, next_state) st.internal_dependencies(sh, next_st).each {|v| queue << v } st.successor_dependencies(sh, next_st).each {|v| queue << v } end - terms + @always_follows[[shift, next_state]] = terms end def internal_dependencies(shift, next_state) + return @internal_dependencies[[shift, next_state]] if @internal_dependencies[[shift, next_state]] + syms = @items.select {|i| i.next_sym == shift.next_sym && i.symbols_after_transition.all?(&:nullable) && i.position == 0 }.map(&:lhs).uniq - nterm_transitions.select {|sh, _| syms.include?(sh.next_sym) }.map {|goto| [self, *goto] } + @internal_dependencies[[shift, next_state]] = nterm_transitions.select {|sh, _| syms.include?(sh.next_sym) }.map {|goto| [self, *goto] } end def successor_dependencies(shift, next_state) - next_state.nterm_transitions + return @successor_dependencies[[shift, next_state]] if @successor_dependencies[[shift, next_state]] + + @successor_dependencies[[shift, next_state]] = + next_state.nterm_transitions .select {|next_shift, _| next_shift.next_sym.nullable } .map {|transition| [next_state, *transition] } end