Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
yui-knk committed Nov 26, 2023
1 parent a49ab82 commit 25226a0
Show file tree
Hide file tree
Showing 23 changed files with 1,383 additions and 404 deletions.
4 changes: 4 additions & 0 deletions Steepfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ target :lib do
check "lib/lrama/grammar/code/printer_code.rb"
check "lib/lrama/grammar/code.rb"
check "lib/lrama/grammar/counter.rb"
check "lib/lrama/grammar/parser_state.rb"
check "lib/lrama/grammar/percent_code.rb"
check "lib/lrama/grammar/precedence.rb"
check "lib/lrama/grammar/printer.rb"
Expand All @@ -15,6 +16,9 @@ target :lib do
check "lib/lrama/lexer/token/char.rb"
check "lib/lrama/lexer/token/ident.rb"
check "lib/lrama/lexer/token/parameterizing.rb"
check "lib/lrama/lexer/token/parser_state_pop.rb"
check "lib/lrama/lexer/token/parser_state_push.rb"
check "lib/lrama/lexer/token/parser_state_set.rb"
check "lib/lrama/lexer/token/tag.rb"
check "lib/lrama/lexer/token/user_code.rb"
check "lib/lrama/lexer/location.rb"
Expand Down
10 changes: 8 additions & 2 deletions lib/lrama/grammar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
require "lrama/grammar/code"
require "lrama/grammar/counter"
require "lrama/grammar/error_token"
require "lrama/grammar/parser_state"
require "lrama/grammar/percent_code"
require "lrama/grammar/precedence"
require "lrama/grammar/printer"
Expand All @@ -16,7 +17,7 @@
module Lrama
# Grammar is the result of parsing an input grammar file
class Grammar
attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
attr_reader :percent_codes, :parser_states, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
attr_accessor :union, :expect,
:printers, :error_tokens,
:lex_param, :parse_param, :initial_action,
Expand All @@ -31,6 +32,7 @@ def initialize(rule_counter)
@percent_codes = []
@printers = []
@error_tokens = []
@parser_states = []
@symbols = []
@types = []
@rule_builders = []
Expand Down Expand Up @@ -58,6 +60,10 @@ def add_error_token(ident_or_tags:, token_code:, lineno:)
@error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
end

def add_parser_state(state_id, state_list)
@parser_states << ParserState.new(state_id: state_id, state_list: state_list)
end

def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
if replace
Expand Down Expand Up @@ -186,7 +192,7 @@ def find_symbol_by_id(id)
end

def find_symbol_by_id!(id)
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
find_symbol_by_id(id) || (raise "Symbol not found: #{id.s_value}")
end

def find_symbol_by_number!(number)
Expand Down
138 changes: 138 additions & 0 deletions lib/lrama/grammar/parser_state.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
module Lrama
class Grammar
class ParserState
attr_reader :state_id, :state_list

def initialize(state_id:, state_list:)
@state_id = state_id
@state_list = state_list
end

def enum_definition
<<~ENUM
enum #{enum_name}
{
#{enum_body}
};
typedef enum #{enum_name} #{enum_type};
static const char *const #{enum_name_table_name}[] = {
#{int_to_name.join(", ")}
};
YY_ATTRIBUTE_UNUSED
static const char *
#{enum_name}_name (#{enum_type} num)
{
return #{enum_name_table_name}[num];
}
# define #{state_name_macro}(value) #{enum_name}_name (value)
# define #{current_state_name_macro} #{state_name_macro} (*#{stack_prefix}_p)
ENUM
end

def state_name_macro
"YY_STATE_#{state_name.upcase}_NAME"
end

def current_state_name_macro
"YY_CURRENT_STATE_#{state_name.upcase}_NAME"
end

def states_functions
<<~FUNC
# define YYPUSH_STATE_#{state_name.upcase}(value) \\
do \\
{ \\
if (#{stack_prefix} + #{states_stack_size_name} - 1 <= #{stack_prefix}_p) \\
YYSTATE_STACK_INCREASE (#{stack_prefix}_a, #{stack_prefix}, #{stack_prefix}_p, #{states_stack_size_name}, "#{state_name}"); \\
YYDPRINTF ((stderr, "Push %s to #{state_name}\\n", #{state_name_macro} (value))); \\
*++#{stack_prefix}_p = value; \\
} \\
while (0)
# define YYPOP_STATE_#{state_name.upcase}() \\
do \\
{ \\
YYDPRINTF ((stderr, "Pop #{state_name}\\n")); \\
if (#{stack_prefix}_p != #{stack_prefix}) \\
{ \\
#{stack_prefix}_p -= 1; \\
} \\
else \\
{ \\
YYDPRINTF ((stderr, "Try to pop empty #{state_name} stack\\n")); \\
} \\
} \\
while (0)
# define YYSET_STATE_#{state_name.upcase}(value) \\
do \\
{ \\
YYDPRINTF ((stderr, "Set %s to #{state_name}\\n", #{state_name_macro} (value))); \\
*#{stack_prefix}_p = value; \\
} \\
while (0)
# define YY_STATE_#{state_name.upcase} #{stack_prefix}_p
FUNC
end

def states_clean_up_stack
<<~CODE
if (#{stack_prefix} != #{stack_prefix}_a)
YYSTACK_FREE (#{stack_prefix});
CODE
end

def states_stack_size_name
"#{stack_prefix}_stacksize"
end

def states_stacks
<<~STACKS
/* Current size of state stack size */
YYPTRDIFF_T #{states_stack_size_name} = YYINITDEPTH;
/* The parser state stack (#{stack_prefix}): array, bottom, top. */
int #{stack_prefix}_a[YYINITDEPTH];
int *#{stack_prefix} = #{stack_prefix}_a;
int *#{stack_prefix}_p = #{stack_prefix};
STACKS
end

def state_name
state_id.s_value
end

def enum_name
"yyparser_state_#{state_name}"
end

def enum_type
"#{enum_name}_t"
end

def enum_body
state_list.map do |state|
state.s_value
end.join(",\n ")
end

def int_to_name
state_list.map do |state|
"\"#{state.s_value}\""
end << "YY_NULLPTR"
end

def enum_name_table_name
"#{enum_name}_names"
end

def stack_prefix
"yyparser_state_#{state_name}"
end
end
end
end
20 changes: 20 additions & 0 deletions lib/lrama/grammar/rule_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ def process_rhs
@parameterizing_rules << r
end
@replaced_rhs << parameterizing.build_token
when Lrama::Lexer::Token::ParserStatePop
process_parser_state_token(token, "parser_state_pop_", "YYPOP_STATE_#{token.s_value.upcase}();", i)
when Lrama::Lexer::Token::ParserStatePush
process_parser_state_token(token, "parser_state_push_", "YYPUSH_STATE_#{token.s_value.upcase}(#{token.state.s_value});", i)
when Lrama::Lexer::Token::ParserStateSet
process_parser_state_token(token, "parser_state_set_", "YYSET_STATE_#{token.s_value.upcase}(#{token.state.s_value});", i)
when Lrama::Lexer::Token::UserCode
prefix = token.referred ? "@" : "$@"
new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s)
Expand All @@ -132,6 +138,20 @@ def process_rhs
end
end

def process_parser_state_token(token, prefix, code, position_in_original_rule_rhs)
new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + token.s_value + @midrule_action_counter.increment.to_s)
user_code = Lrama::Lexer::Token::UserCode.new(s_value: code, location: token.location)

@replaced_rhs << new_token
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, position_in_original_rule_rhs, skip_preprocess_references: true)
rule_builder.lhs = new_token
rule_builder.user_code = user_code
rule_builder.complete_input
rule_builder.setup_rules

@rule_builders_for_derived_rules << rule_builder
end

def numberize_references
# Bison n'th component is 1-origin
(rhs + [user_code]).compact.each.with_index(1) do |token, i|
Expand Down
4 changes: 4 additions & 0 deletions lib/lrama/lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ class Lexer
%error-token
%empty
%code
%parser-state-push
%parser-state-pop
%parser-state-set
%parser-state
)

def initialize(text)
Expand Down
3 changes: 3 additions & 0 deletions lib/lrama/lexer/token.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
require 'lrama/lexer/token/char'
require 'lrama/lexer/token/ident'
require 'lrama/lexer/token/parameterizing'
require 'lrama/lexer/token/parser_state_pop'
require 'lrama/lexer/token/parser_state_push'
require 'lrama/lexer/token/parser_state_set'
require 'lrama/lexer/token/tag'
require 'lrama/lexer/token/user_code'

Expand Down
8 changes: 8 additions & 0 deletions lib/lrama/lexer/token/parser_state_pop.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
module Lrama
class Lexer
class Token
class ParserStatePop < Token
end
end
end
end
9 changes: 9 additions & 0 deletions lib/lrama/lexer/token/parser_state_push.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module Lrama
class Lexer
class Token
class ParserStatePush < Token
attr_accessor :state
end
end
end
end
9 changes: 9 additions & 0 deletions lib/lrama/lexer/token/parser_state_set.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module Lrama
class Lexer
class Token
class ParserStateSet < Token
attr_accessor :state
end
end
end
end
28 changes: 28 additions & 0 deletions lib/lrama/output.rb
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,34 @@ def percent_code(name)
end.join
end

def has_parser_states?
!@grammar.parser_states.empty?
end

def parser_states_enums
@grammar.parser_states.map do |ps|
ps.enum_definition
end
end

def parser_states_stacks
@grammar.parser_states.map do |ps|
ps.states_stacks
end
end

def parser_states_functions
@grammar.parser_states.map do |ps|
ps.states_functions
end
end

def parser_states_clean_up_stack
@grammar.parser_states.map do |ps|
ps.states_clean_up_stack
end
end

private

def eval_template(file, path)
Expand Down
Loading

0 comments on commit 25226a0

Please sign in to comment.