Skip to content

Commit

Permalink
Fix dstr unparsing
Browse files Browse the repository at this point in the history
* This is an entirely new approach.
* Instead to find the "correct" dstr segments we simply try all and unparse the first one
  that round trips.
* This so far guarantees we always get good concrete syntax, but it can be time intensive as
  the combinatoric space of possible dynamic string sequence is quadratic with the dstr children size.
* For this reason we try above (currently) dstr children to unparse as heredoc first.
* Passes the entire corpus and fixes bugs.

[fix #249]
  • Loading branch information
mbj committed Oct 30, 2024
1 parent 2163503 commit 285fef6
Show file tree
Hide file tree
Showing 59 changed files with 1,429 additions and 885 deletions.
7 changes: 7 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# v0.7.0 2024-09-16

[#366](https://github.com/mbj/unparser/pull/366)

* Fix all known dstring issues.
* Interface changes.

# v0.6.15 2024-06-10

[#373](https://github.com/mbj/unparser/pull/373)
Expand Down
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@

source 'https://rubygems.org'

gem 'mutant', path: '../mutant'

gemspec
58 changes: 29 additions & 29 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
PATH
remote: ../mutant
specs:
mutant (0.12.4)
diff-lcs (~> 1.3)
parser (~> 3.3.0)
regexp_parser (~> 2.9.0)
sorbet-runtime (~> 0.5.0)
unparser (~> 0.7.0)

PATH
remote: .
specs:
unparser (0.6.15)
unparser (0.7.0)
diff-lcs (~> 1.3)
parser (>= 3.3.0)

Expand All @@ -10,67 +20,57 @@ GEM
specs:
ast (2.4.2)
diff-lcs (1.5.1)
json (2.7.2)
json (2.7.5)
language_server-protocol (3.17.0.3)
mutant (0.12.3)
diff-lcs (~> 1.3)
parser (~> 3.3.0)
regexp_parser (~> 2.9.0)
sorbet-runtime (~> 0.5.0)
unparser (~> 0.6.14)
mutant-rspec (0.12.3)
mutant (= 0.12.3)
mutant-rspec (0.12.4)
mutant (= 0.12.4)
rspec-core (>= 3.8.0, < 4.0.0)
parallel (1.25.1)
parser (3.3.2.0)
parallel (1.26.3)
parser (3.3.5.0)
ast (~> 2.4.1)
racc
racc (1.8.0)
racc (1.8.1)
rainbow (3.1.1)
regexp_parser (2.9.2)
rexml (3.2.9)
strscan
rspec (3.13.0)
rspec-core (~> 3.13.0)
rspec-expectations (~> 3.13.0)
rspec-mocks (~> 3.13.0)
rspec-core (3.13.0)
rspec-core (3.13.2)
rspec-support (~> 3.13.0)
rspec-expectations (3.13.0)
rspec-expectations (3.13.3)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.13.0)
rspec-its (1.3.0)
rspec-its (1.3.1)
rspec-core (>= 3.0.0)
rspec-expectations (>= 3.0.0)
rspec-mocks (3.13.1)
rspec-mocks (3.13.2)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.13.0)
rspec-support (3.13.1)
rubocop (1.64.1)
rubocop (1.67.0)
json (~> 2.3)
language_server-protocol (>= 3.17.0)
parallel (~> 1.10)
parser (>= 3.3.0.2)
rainbow (>= 2.2.2, < 4.0)
regexp_parser (>= 1.8, < 3.0)
rexml (>= 3.2.5, < 4.0)
rubocop-ast (>= 1.31.1, < 2.0)
regexp_parser (>= 2.4, < 3.0)
rubocop-ast (>= 1.32.2, < 2.0)
ruby-progressbar (~> 1.7)
unicode-display_width (>= 2.4.0, < 3.0)
rubocop-ast (1.31.3)
rubocop-ast (1.33.0)
parser (>= 3.3.1.0)
rubocop-packaging (0.5.2)
rubocop (>= 1.33, < 2.0)
ruby-progressbar (1.13.0)
sorbet-runtime (0.5.11422)
strscan (3.1.0)
unicode-display_width (2.5.0)
sorbet-runtime (0.5.11625)
unicode-display_width (2.6.0)

PLATFORMS
ruby
x86_64-linux

DEPENDENCIES
mutant (~> 0.12.2)
mutant!
mutant-rspec (~> 0.12.2)
rspec (~> 3.9)
rspec-core (~> 3.9)
Expand Down
96 changes: 92 additions & 4 deletions bin/corpus
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

require 'etc'
require 'mutant'
require 'optparse'
require 'pathname'
require 'unparser'

Thread.abort_on_exception = true

module Unparser
module Corpus
ROOT = Pathname.new(__dir__).parent
Expand All @@ -17,16 +21,85 @@ module Unparser
#
# @return [Boolean]
def verify
puts("Verifiying: #{name}")
checkout
command = %W[unparser #{repo_path}]
exclude.each do |name|
command.push('--ignore', repo_path.join(name).to_s)

paths = Pathname.glob(Pathname.new(repo_path).join('**/*.rb'))

driver = Mutant::Parallel.async(
config: Mutant::Parallel::Config.new(
block: method(:verify_path),
jobs: Etc.nprocessors,
on_process_start: ->(*) {},
process_name: 'unparser-corpus-test',
sink: Sink.new,
source: Mutant::Parallel::Source::Array.new(jobs: paths),
thread_name: 'unparser-corpus-test',
timeout: nil
),
world: Mutant::WORLD
)

loop do
status = driver.wait_timeout(1)

puts("Processed: #{status.payload.total}")

status.payload.errors.each do |report|
puts report
fail
end

break if status.done?
end
Kernel.system(*command)

true
end

private

class Sink
include Mutant::Parallel::Sink

attr_reader :errors, :total

def initialize
@errors = []
@total = 0
end

def stop?
!@errors.empty?
end

def status
self
end

def response(response)
if response.error
Mutant::WORLD.stderr.puts(response.log)
fail response.error
end

@total += 1

if response.result
@errors << response.result
end
end
end

def verify_path(path)
validation = Validation.from_path(path)

if original_syntax_error?(validation) || generated_encoding_error?(validation) || validation.success?
return
end

validation.report
end

def checkout
TMP.mkdir unless TMP.directory?

Expand All @@ -50,6 +123,21 @@ module Unparser
TMP.join(name)
end

private

# This happens if the original source contained a non UTF charset meta comment.
# These are not exposed to the AST in a way unparser could know about to generate a non UTF-8
# target and emit that meta comment itself.
# For the purpose of corpus testing these cases are ignored.
def generated_encoding_error?(validation)
exception = validation.generated_node.from_left { return false }
exception.instance_of?(Parser::SyntaxError) && exception.message.eql?('literal contains escape sequences incompatible with UTF-8')
end

def original_syntax_error?(validation)
validation.original_node.from_left { return false }.instance_of?(Parser::SyntaxError)
end

def system(arguments)
return if Kernel.system(*arguments)

Expand Down
41 changes: 33 additions & 8 deletions bin/parser-round-trip-test
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,14 @@ class Test
:rubies
)

EXPECT_FAILURE = {}.freeze
EXPECT_FAILURE = {}.freeze
STATIC_LOCAL_VARIABLES = %w[foo bar baz].to_set.freeze

NO_ROUND_TRIP = %i[
test_int___LINE__
test_pattern_matching__FILE__LINE_literals
test_string___FILE__
].freeze

def legacy_attributes
default_builder_attributes.reject do |attribute_name, value|
Expand All @@ -56,6 +63,8 @@ class Test
"Non targeted rubies: #{rubies.join(',')}"
elsif validation.original_node.left?
'Test specifies a syntax error'
elsif NO_ROUND_TRIP.include?(name)
'Test not round trippable'
end
end

Expand All @@ -77,20 +86,25 @@ class Test

# rubocop:disable Metrics/AbcSize
def validation
identification = name.to_s
identification = name.to_s

ast = Unparser::AST.new(
comments: [],
explicit_encoding: nil,
node: node,
static_local_variables: STATIC_LOCAL_VARIABLES
)

generated_source = Unparser.unparse_either(node)
generated_source = Unparser.unparse_ast_either(ast)
.fmap { |string| string.dup.force_encoding(parser_source.encoding).freeze }

generated_node = generated_source.bind do |source|
parse_either(source, identification)
end
generated_node = generated_source.bind { |source| parse_either(source, identification) }

Unparser::Validation.new(
generated_node: generated_node,
generated_source: generated_source,
identification: identification,
original_node: parse_either(parser_source, identification).fmap { node },
original_ast: parse_either_ast(parser_source, identification),
original_source: right(parser_source)
)
end
Expand All @@ -99,7 +113,7 @@ class Test

def parser
Unparser.parser.tap do |parser|
%w[foo bar baz].each(&parser.static_env.method(:declare))
STATIC_LOCAL_VARIABLES.each(&parser.static_env.method(:declare))
end
end

Expand All @@ -108,6 +122,17 @@ class Test
parser.parse(Unparser.buffer(source, identification))
end
end

def parse_either_ast(source, identification)
parse_either(source, identification).fmap do |node|
Unparser::AST.new(
comments: [],
explicit_encoding: nil,
node: node,
static_local_variables: Set.new
)
end
end
end

class Execution
Expand Down
Loading

0 comments on commit 285fef6

Please sign in to comment.