Skip to content

Commit

Permalink
Merge pull request #62 from junk0612/use-racc
Browse files Browse the repository at this point in the history
Replace parser with one made by Racc
  • Loading branch information
yui-knk authored Oct 6, 2023
2 parents 884c91c + 94ec817 commit 6adc53d
Show file tree
Hide file tree
Showing 9 changed files with 1,517 additions and 2 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ gem "rspec"
gem "pry"
# stackprof doesn't support Windows
gem "stackprof", platforms: [:ruby]
gem "racc"
gem "rake"
gem "rbs", require: false
gem "steep", require: false
Expand Down
12 changes: 12 additions & 0 deletions Rakefile
Original file line number Diff line number Diff line change
@@ -1 +1,13 @@
require "bundler/gem_tasks"

namespace "build" do
desc "build parser from parser.y by using Racc"
task :racc_parser do
`bundle exec racc parser.y -o lib/lrama/new_parser.rb`
end

desc "build parser for debugging"
task :racc_verbose_parser do
`bundle exec racc parser.y -o lib/lrama/new_parser.rb -t -v`
end
end
2 changes: 2 additions & 0 deletions lib/lrama.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@
require "lrama/type"
require "lrama/version"
require "lrama/warning"
require "lrama/new_parser"
require "lrama/new_lexer"
3 changes: 2 additions & 1 deletion lib/lrama/command.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ def run(argv)
Report::Duration.enable if options.trace_opts[:time]

warning = Lrama::Warning.new
grammar = Lrama::Parser.new(options.y.read).parse
text = options.y.read
options.y.close if options.y != STDIN
grammar = Lrama::NewParser.new(text).parse
states = Lrama::States.new(grammar, warning, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure]))
states.compute
context = Lrama::Context.new(states)
Expand Down
181 changes: 181 additions & 0 deletions lib/lrama/grammar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,187 @@ def nterms
@nterms ||= @symbols.select(&:nterm?)
end

def extract_references
unless initial_action.nil?
scanner = StringScanner.new(initial_action.s_value)
references = []

while !scanner.eos? do
start = scanner.pos
case
# $ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, "$", tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]

# @ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/@\$/) # @$
references << [:at, "$", nil, start, scanner.pos - 1]
when scanner.scan(/@(\d+)/) # @1
references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]
else
scanner.getch
end
end

initial_action.token_code.references = references
build_references(initial_action.token_code)
end

@printers.each do |printer|
scanner = StringScanner.new(printer.code.s_value)
references = []

while !scanner.eos? do
start = scanner.pos
case
# $ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, "$", tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]

# @ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/@\$/) # @$
references << [:at, "$", nil, start, scanner.pos - 1]
when scanner.scan(/@(\d+)/) # @1
references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]
else
scanner.getch
end
end

printer.code.token_code.references = references
build_references(printer.code.token_code)
end

@error_tokens.each do |error_token|
scanner = StringScanner.new(error_token.code.s_value)
references = []

while !scanner.eos? do
start = scanner.pos
case
# $ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, "$", tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]

# @ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/@\$/) # @$
references << [:at, "$", nil, start, scanner.pos - 1]
when scanner.scan(/@(\d+)/) # @1
references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]
else
scanner.getch
end
end

error_token.code.token_code.references = references
build_references(error_token.code.token_code)
end

@_rules.each do |lhs, rhs, _|
rhs.each_with_index do |token, index|
next if token.class == Lrama::Grammar::Symbol || token.type != Lrama::Lexer::Token::User_code

scanner = StringScanner.new(token.s_value)
references = []

while !scanner.eos? do
start = scanner.pos
case
# $ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, "$", tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]

# @ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/@\$/) # @$
references << [:at, "$", nil, start, scanner.pos - 1]
when scanner.scan(/@(\d+)/) # @1
references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]

when scanner.scan(/\/\*/)
scanner.scan_until(/\*\//)
else
scanner.getch
end
end

token.references = references
token.numberize_references(lhs, rhs)
build_references(token)
end
end
end

def create_token(type, s_value, line, column)
t = Token.new(type: type, s_value: s_value)
t.line = line
t.column = column

return t
end
private

def find_nterm_by_id!(id)
Expand Down
130 changes: 130 additions & 0 deletions lib/lrama/new_lexer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
require "strscan"

module Lrama
class NewLexer
attr_accessor :status
attr_accessor :end_symbol

def initialize(text)
@scanner = StringScanner.new(text)
@head = @scanner.pos
@line = 1
@status = :initial
@end_symbol = nil
end

def next_token
case @status
when :initial
lex_token
when :c_declaration
lex_c_code
end
end

def line
@line
end

def col
@scanner.pos - @head
end

def lex_token
while !@scanner.eos? do
case
when @scanner.scan(/\n/)
@line += 1
@head = @scanner.pos + 1
when @scanner.scan(/\s+/)
# noop
when @scanner.scan(/\/\*/)
lex_comment
when @scanner.scan(/%{/)
return [@scanner.matched, @scanner.matched]
when @scanner.scan(/%}/)
return [@scanner.matched, @scanner.matched]
when @scanner.scan(/%%/)
return [@scanner.matched, @scanner.matched]
when @scanner.scan(/{/)
return [@scanner.matched, @scanner.matched]
when @scanner.scan(/}/)
return [@scanner.matched, @scanner.matched]
when @scanner.scan(/\[/)
return [@scanner.matched, @scanner.matched]
when @scanner.scan(/\]/)
return [@scanner.matched, @scanner.matched]
when @scanner.scan(/:/)
return [@scanner.matched, @scanner.matched]
when @scanner.scan(/\|/)
return [@scanner.matched, @scanner.matched]
when @scanner.scan(/;/)
return [@scanner.matched, @scanner.matched]
when @scanner.scan(/%union|%token|%type|%left|%right|%nonassoc|%expect|%define|%require|%printer|%lex-param|%parse-param|%initial-action|%prec|%error-token/)
return [@scanner.matched, @scanner.matched]
when @scanner.scan(/<\w+>/)
return [:TAG, @scanner.matched]
when @scanner.scan(/'.'/)
return [:CHARACTER, @scanner.matched]
when @scanner.scan(/'\\\\'|'\\t'|'\\f'|'\\r'|'\\n'|'\\13'/)
return [:CHARACTER, @scanner.matched]
when @scanner.scan(/"/)
return [:STRING, @scanner.scan_until(/"/)[0..-2]]
when @scanner.scan(/\d+/)
return [:INTEGER, @scanner.matched]
when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
return [:IDENTIFIER, @scanner.matched]
else
raise
end
end
end

def lex_c_code
nested = 0
code = ''
while !@scanner.eos? do
case
when @scanner.scan(/{/)
code += @scanner.matched
nested += 1
when @scanner.scan(/}/)
if nested == 0 && @end_symbol == '}'
@scanner.unscan
return [:C_DECLARATION, code]
else
code += @scanner.matched
nested -= 1
end
when @scanner.check(/#{@end_symbol}/)
return [:C_DECLARATION, code]
when @scanner.scan(/\n/)
code += @scanner.matched
@line += 1
@head = @scanner.pos + 1
when @scanner.scan(/"/)
matched = @scanner.scan_until(/"/)[0..-2]
code += %Q("#{matched}")
@line += matched.count("\n")
else
code += @scanner.getch
end
end
raise
end

def lex_comment
while !@scanner.eos? do
case
when @scanner.scan(/\n/)
@line += 1
@head = @scanner.pos + 1
when @scanner.scan(/\*\//)
return
else
@scanner.getch
end
end
end
end
end
Loading

0 comments on commit 6adc53d

Please sign in to comment.