From a9196672afb32012c42fdbf3468555ed9d97b3ed Mon Sep 17 00:00:00 2001 From: DavePearce Date: Tue, 12 Nov 2024 12:11:52 +0700 Subject: [PATCH] Enable name resolution This adds support for name resolution, which allows names to be referenced out-of-order in a given source file. --- pkg/cmd/debug.go | 1 - pkg/cmd/util.go | 7 +- pkg/corset/ast.go | 7 +- pkg/corset/compiler.go | 16 ++-- pkg/corset/environment.go | 134 ++++++++++++++++++++++++++++++ pkg/corset/parser.go | 47 +++++++++-- pkg/corset/resolver.go | 170 ++++++++++++++++++++++++++++++++++++++ pkg/corset/translator.go | 153 ++++++++++++++++++++++++++++++---- pkg/sexp/parser.go | 2 +- pkg/sexp/source_file.go | 82 ++++++++++++++++++ pkg/sexp/source_map.go | 118 ++++++++++---------------- pkg/sexp/translator.go | 44 ++++++++-- pkg/test/ir_test.go | 2 +- 13 files changed, 658 insertions(+), 125 deletions(-) create mode 100644 pkg/corset/environment.go diff --git a/pkg/cmd/debug.go b/pkg/cmd/debug.go index cd8c4d3..2541182 100644 --- a/pkg/cmd/debug.go +++ b/pkg/cmd/debug.go @@ -30,7 +30,6 @@ var debugCmd = &cobra.Command{ stats := GetFlag(cmd, "stats") // Parse constraints hirSchema := readSchema(args) - // Print constraints if stats { printStats(hirSchema, hir, mir, air) diff --git a/pkg/cmd/util.go b/pkg/cmd/util.go index 8e6bb18..c3e3ec0 100644 --- a/pkg/cmd/util.go +++ b/pkg/cmd/util.go @@ -185,7 +185,7 @@ func readSourceFiles(filenames []string) *hir.Schema { // Parse and compile source files schema, errs := corset.CompileSourceFiles(srcfiles) // Check for any errors - if errs == nil { + if len(errs) == 0 { return schema } // Report errors @@ -201,10 +201,7 @@ func readSourceFiles(filenames []string) *hir.Schema { // Print a syntax error with appropriate highlighting. func printSyntaxError(err *sexp.SyntaxError) { span := err.Span() - // Construct empty source map in order to determine enclosing line. - srcmap := sexp.NewSourceMap[sexp.SExp](err.SourceFile().Contents()) - // - line := srcmap.FindFirstEnclosingLine(span) + line := err.FirstEnclosingLine() // Print error + line number fmt.Printf("%s:%d: %s\n", err.SourceFile().Filename(), line.Number(), err.Message()) // Print separator line diff --git a/pkg/corset/ast.go b/pkg/corset/ast.go index d0776a0..6d76744 100644 --- a/pkg/corset/ast.go +++ b/pkg/corset/ast.go @@ -41,7 +41,7 @@ type Declaration interface { // DefColumns captures a set of one or more columns being declared. type DefColumns struct { - Columns []DefColumn + Columns []*DefColumn } // Resolve something. @@ -58,8 +58,9 @@ func (p *DefColumns) Lisp() sexp.SExp { // DefColumn packages together those piece relevant to declaring an individual // column, such its name and type. type DefColumn struct { - Name string - DataType sc.Type + Name string + DataType sc.Type + LengthMultiplier uint } // Lisp converts this node into its lisp representation. This is primarily used diff --git a/pkg/corset/compiler.go b/pkg/corset/compiler.go index b2ef2e5..494539b 100644 --- a/pkg/corset/compiler.go +++ b/pkg/corset/compiler.go @@ -1,6 +1,8 @@ package corset import ( + "fmt" + "github.com/consensys/go-corset/pkg/hir" "github.com/consensys/go-corset/pkg/sexp" ) @@ -61,14 +63,16 @@ func NewCompiler(circuit Circuit, srcmaps *sexp.SourceMaps[Node]) *Compiler { // expression refers to a non-existent module or column, or is not well-typed, // etc. func (p *Compiler) Compile() (*hir.Schema, []SyntaxError) { - schema := hir.EmptySchema() - // Allocate columns? - // // Resolve variables (via nested scopes) + env, errs := ResolveCircuit(p.srcmap, &p.circuit) + // Check whether any errors were encountered. If so, terminate since we + // cannot proceed with translation. + if len(errs) != 0 { + return nil, errs + } // Check constraint contexts (e.g. for constraints, lookups, etc) // Type check constraints + fmt.Println("Translating circuit...") // Finally, translate everything and add it to the schema. - errors := translateCircuit(&p.circuit, schema) - // Done - return schema, errors + return TranslateCircuit(env, p.srcmap, &p.circuit) } diff --git a/pkg/corset/environment.go b/pkg/corset/environment.go new file mode 100644 index 0000000..2eb0cba --- /dev/null +++ b/pkg/corset/environment.go @@ -0,0 +1,134 @@ +package corset + +import ( + "fmt" + + "github.com/consensys/go-corset/pkg/schema" + "github.com/consensys/go-corset/pkg/trace" +) + +// =================================================================== +// Environment +// =================================================================== + +// Identifies a specific column within the environment. +type colRef struct { + module uint + column string +} + +// Packages up information about a declared column (either input or assignment). +type colInfo struct { + // Column index + cid uint + // Length multiplier + multiplier uint + // Datatype + datatype schema.Type +} + +// Environment maps module and column names to their (respective) module and +// column indices. The environment separates input columns from assignment +// columns because they are disjoint in the schema being constructed (i.e. input +// columns always have a lower index than assignments). +type Environment struct { + // Maps module names to their module indices. + modules map[string]uint + // Maps input columns to their column indices. + columns map[colRef]colInfo +} + +// EmptyEnvironment constructs an empty environment. +func EmptyEnvironment() *Environment { + modules := make(map[string]uint) + columns := make(map[colRef]colInfo) + // + return &Environment{modules, columns} +} + +// RegisterModule registers a new module within this environment. Observe that +// this will panic if the module already exists. Furthermore, the module +// identifier is always determined as the next available identifier. +func (p *Environment) RegisterModule(module string) trace.Context { + if p.HasModule(module) { + panic(fmt.Sprintf("module %s already exists", module)) + } + // Update schema + mid := uint(len(p.modules)) + // Update cache + p.modules[module] = mid + // Done + return trace.NewContext(mid, 1) +} + +// RegisterColumn registers a new column (input or assignment) within a given +// module. Observe that this will panic if the column already exists. +// Furthermore, the column identifier is always determined as the next available +// identifier. Hence, care must be taken when declaring columns to ensure they +// are allocated in the right order. +func (p *Environment) RegisterColumn(context trace.Context, column string, datatype schema.Type) uint { + if p.HasColumn(context.Module(), column) { + panic(fmt.Sprintf("column %d:%s already exists", context.Module(), column)) + } + // Update cache + cid := uint(len(p.columns)) + cref := colRef{context.Module(), column} + p.columns[cref] = colInfo{cid, context.LengthMultiplier(), datatype} + // Done + return cid +} + +// LookupModule determines the module index for a given named module, or return +// false if no such module exists. +func (p *Environment) LookupModule(module string) (uint, bool) { + mid, ok := p.modules[module] + return mid, ok +} + +// LookupColumn determines the column index for a given named column in a given +// module, or return false if no such column exists. +func (p *Environment) LookupColumn(module uint, column string) (uint, bool) { + cref := colRef{module, column} + cinfo, ok := p.columns[cref] + + return cinfo.cid, ok +} + +// Module determines the module index for a given module. This assumes the +// module exists, and will panic otherwise. +func (p *Environment) Module(module string) uint { + ctx, ok := p.LookupModule(module) + // Sanity check we found something + if !ok { + panic(fmt.Sprintf("unknown module %s", module)) + } + // Discard column index + return ctx +} + +// Column determines the column index for a given column declared in a given +// module. This assumes the column / module exist, and will panic otherwise. +func (p *Environment) Column(module uint, column string) uint { + // FIXME: doesn't make sense using context here. + cid, ok := p.LookupColumn(module, column) + // Sanity check we found something + if !ok { + panic(fmt.Sprintf("unknown column %s", column)) + } + // Discard column index + return cid +} + +// HasModule checks whether a given module exists, or not. +func (p *Environment) HasModule(module string) bool { + _, ok := p.LookupModule(module) + // Discard column index + return ok +} + +// HasColumn checks whether a given module has a given column, or not. +func (p *Environment) HasColumn(module uint, column string) bool { + _, ok := p.LookupColumn(module, column) + // Discard column index + return ok +} diff --git a/pkg/corset/parser.go b/pkg/corset/parser.go index 908ab7c..20b3b5f 100644 --- a/pkg/corset/parser.go +++ b/pkg/corset/parser.go @@ -115,7 +115,7 @@ func ParseSourceFile(srcfile *sexp.SourceFile) (Circuit, *sexp.SourceMap[Node], } } // Done - return circuit, p.nodemap, nil + return circuit, p.NodeMap(), nil } // Parser implements a simple parser for the Corset language. The parser itself @@ -137,7 +137,7 @@ type Parser struct { func NewParser(srcfile *sexp.SourceFile, srcmap *sexp.SourceMap[sexp.SExp]) *Parser { p := sexp.NewTranslator[Expr](srcfile, srcmap) // Construct (initially empty) node map - nodemap := sexp.NewSourceMap[Node](srcmap.Text()) + nodemap := sexp.NewSourceMap[Node](srcmap.Source()) // Construct parser parser := &Parser{p, nodemap} // Configure expression translator @@ -155,6 +155,23 @@ func NewParser(srcfile *sexp.SourceFile, srcmap *sexp.SourceMap[sexp.SExp]) *Par return parser } +// NodeMap extract the node map constructec by this parser. A key task here is +// to copy all mappings from the expression translator, which maintains its own +// map. +func (p *Parser) NodeMap() *sexp.SourceMap[Node] { + // Copy all mappings from translator's source map into this map. A mapping + // function is required to coerce the types. + sexp.JoinMaps(p.nodemap, p.translator.SourceMap(), func(e Expr) Node { return e }) + // Done + return p.nodemap +} + +// Register a source mapping from a given S-Expression to a given target node. +func (p *Parser) mapSourceNode(from sexp.SExp, to Node) { + span := p.translator.SpanOf(from) + p.nodemap.Put(to, span) +} + // Extract all declarations associated with a given module and package them up. func (p *Parser) parseModuleContents(terms []sexp.SExp) ([]Declaration, []sexp.SExp, *SyntaxError) { // @@ -201,10 +218,17 @@ func (p *Parser) parseModuleStart(s sexp.SExp) (string, *SyntaxError) { } func (p *Parser) parseDeclaration(s *sexp.List) (Declaration, *SyntaxError) { + var ( + decl Declaration + error *SyntaxError + ) + // if s.MatchSymbols(1, "defcolumns") { - return p.parseColumnDeclarations(s) + decl, error = p.parseColumnDeclarations(s) } else if s.Len() == 4 && s.MatchSymbols(2, "defconstraint") { - return p.parseConstraintDeclaration(s.Elements) + decl, error = p.parseConstraintDeclaration(s.Elements) + } else { + error = p.translator.SyntaxError(s, "malformed declaration") } /* else if e.Len() == 3 && e.MatchSymbols(2, "assert") { @@ -220,12 +244,17 @@ func (p *Parser) parseDeclaration(s *sexp.List) (Declaration, *SyntaxError) { } else if e.Len() == 3 && e.MatchSymbols(1, "defpurefun") { return p.parsePureFunDeclaration(env, e) } */ - return nil, p.translator.SyntaxError(s, "malformed declaration") + // Register node if appropriate + if decl != nil { + p.mapSourceNode(s, decl) + } + // done + return decl, error } // Parse a column declaration func (p *Parser) parseColumnDeclarations(l *sexp.List) (*DefColumns, *SyntaxError) { - columns := make([]DefColumn, l.Len()-1) + columns := make([]*DefColumn, l.Len()-1) // Sanity check declaration if len(l.Elements) == 1 { return nil, p.translator.SyntaxError(l, "malformed column declaration") @@ -244,8 +273,8 @@ func (p *Parser) parseColumnDeclarations(l *sexp.List) (*DefColumns, *SyntaxErro return &DefColumns{columns}, nil } -func (p *Parser) parseColumnDeclaration(e sexp.SExp) (DefColumn, *SyntaxError) { - var defcolumn DefColumn +func (p *Parser) parseColumnDeclaration(e sexp.SExp) (*DefColumn, *SyntaxError) { + defcolumn := &DefColumn{"", nil, 1} // Default to field type defcolumn.DataType = &sc.FieldType{} // Check whether extended declaration or not. @@ -269,6 +298,8 @@ func (p *Parser) parseColumnDeclaration(e sexp.SExp) (DefColumn, *SyntaxError) { } else { defcolumn.Name = e.String(false) } + // Update source mapping + p.mapSourceNode(e, defcolumn) // return defcolumn, nil } diff --git a/pkg/corset/resolver.go b/pkg/corset/resolver.go index 9998c5c..b9b7d9b 100644 --- a/pkg/corset/resolver.go +++ b/pkg/corset/resolver.go @@ -1 +1,171 @@ package corset + +import ( + "github.com/consensys/go-corset/pkg/sexp" + tr "github.com/consensys/go-corset/pkg/trace" +) + +// ResolveCircuit resolves all symbols declared and used within a circuit, +// producing an environment which can subsequently be used to look up the +// relevant module or column identifiers. This process can fail, of course, it +// a symbol (e.g. a column) is referred to which doesn't exist. Likewise, if +// two modules or columns with identical names are declared in the same scope, +// etc. +func ResolveCircuit(srcmap *sexp.SourceMaps[Node], circuit *Circuit) (*Environment, []SyntaxError) { + r := resolver{EmptyEnvironment(), srcmap} + // Allocate declared modules + errs := r.resolveModules(circuit) + // Allocate declared input columns + errs = append(errs, r.resolveInputColumns(circuit)...) + // TODO: Allocate declared assignments + // Check expressions + errs = append(errs, r.resolveConstraints(circuit)...) + // Done + return r.env, errs +} + +// Resolver packages up information necessary for resolving a circuit and +// checking that everything makes sense. +type resolver struct { + // Environment determines module and column indices, as needed for + // translating the various constructs found in a circuit. + env *Environment + // Source maps nodes in the circuit back to the spans in their original + // source files. This is needed when reporting syntax errors to generate + // highlights of the relevant source line(s) in question. + srcmap *sexp.SourceMaps[Node] +} + +// Process all module declarations, and allocating them into the environment. +// If any duplicates are found, one or more errors will be reported. Note: it +// is important that this traverses the modules in an identical order to the +// translator. This is to ensure that the relevant module identifiers line up. +func (r *resolver) resolveModules(circuit *Circuit) []SyntaxError { + // Register the root module (which should always exist) + r.env.RegisterModule("") + // + for _, m := range circuit.Modules { + r.env.RegisterModule(m.Name) + } + // Done + return nil +} + +// Process all input (column) declarations. These must be allocated before +// assignemts, since the hir.Schema separates these out. Again, if any +// duplicates are found then one or more errors will be reported. +func (r *resolver) resolveInputColumns(circuit *Circuit) []SyntaxError { + errs := r.resolveInputColumnsInModule(r.env.Module(""), circuit.Declarations) + // + for _, m := range circuit.Modules { + // The module must exist given after resolveModules. + ctx := r.env.Module(m.Name) + // Process all declarations in the module + merrs := r.resolveInputColumnsInModule(ctx, m.Declarations) + // Package up all errors + errs = append(errs, merrs...) + } + // + return errs +} + +func (r *resolver) resolveInputColumnsInModule(module uint, decls []Declaration) []SyntaxError { + var errors []SyntaxError + // + for _, d := range decls { + // Look for defcolumns decalarations only + if dcols, ok := d.(*DefColumns); ok { + // Found one. + for _, col := range dcols.Columns { + if r.env.HasColumn(module, col.Name) { + errors = append(errors, *r.srcmap.SyntaxError(col, "duplicate declaration")) + } else { + context := tr.NewContext(module, col.LengthMultiplier) + r.env.RegisterColumn(context, col.Name, col.DataType) + } + } + } + } + // + return errors +} + +// Examine each constraint and attempt to resolve any variables used within +// them. For example, a vanishing constraint may refer to some variable "X". +// Prior to this function being called, its not clear what "X" refers to --- it +// could refer to a column a constant, or even an alias. The purpose of this +// pass is to: firstly, check that every variable refers to something which was +// declared; secondly, to determine what each variable represents (i.e. column +// access, a constant, etc). +func (r *resolver) resolveConstraints(circuit *Circuit) []SyntaxError { + errs := r.resolveConstraintsInModule(r.env.Module(""), circuit.Declarations) + // + for _, m := range circuit.Modules { + // The module must exist given after resolveModules. + ctx := r.env.Module(m.Name) + // Process all declarations in the module + merrs := r.resolveConstraintsInModule(ctx, m.Declarations) + // Package up all errors + errs = append(errs, merrs...) + } + // + return errs +} + +// Helper for resolve constraints which considers those constraints declared in +// a particular module. +func (r *resolver) resolveConstraintsInModule(module uint, decls []Declaration) []SyntaxError { + var errors []SyntaxError + + for _, d := range decls { + // Look for defcolumns decalarations only + if _, ok := d.(*DefColumns); ok { + // Safe to ignore. + } else if c, ok := d.(*DefConstraint); ok { + errors = append(errors, r.resolveDefConstraintInModule(module, c)...) + } else { + errors = append(errors, *r.srcmap.SyntaxError(d, "unknown declaration")) + } + } + // + return errors +} + +// Resolve those variables appearing in either the guard or the body of this constraint. +func (r *resolver) resolveDefConstraintInModule(module uint, decl *DefConstraint) []SyntaxError { + var errors []SyntaxError + if decl.Guard != nil { + errors = r.resolveExpressionInModule(module, decl.Constraint) + } + // Resolve constraint body + errors = append(errors, r.resolveExpressionInModule(module, decl.Constraint)...) + // Done + return errors +} + +// Resolve any variable accesses with this expression (which is declared in a +// given module). The enclosing module is required to resolve unqualified +// variable accesses. As above, the goal is ensure variable refers to something +// that was declared and, more specifically, what kind of access it is (e.g. +// column access, constant access, etc). +func (r *resolver) resolveExpressionInModule(module uint, expr Expr) []SyntaxError { + if _, ok := expr.(*Constant); ok { + return nil + } else if v, ok := expr.(*VariableAccess); ok { + return r.resolveVariableInModule(module, v) + } else { + return r.srcmap.SyntaxErrors(expr, "unknown expression") + } +} + +// Resolve a specific variable access contained within some expression which, in +// turn, is contained within some module. +func (r *resolver) resolveVariableInModule(module uint, expr *VariableAccess) []SyntaxError { + // FIXME: handle qualified variable accesses + // Attempt resolve as a column access in enclosing module + if _, ok := r.env.LookupColumn(module, expr.Name); ok { + return nil + } + // Unable to resolve variable + return r.srcmap.SyntaxErrors(expr, "unknown variable") +} diff --git a/pkg/corset/translator.go b/pkg/corset/translator.go index 13de05b..9cb45ec 100644 --- a/pkg/corset/translator.go +++ b/pkg/corset/translator.go @@ -1,16 +1,79 @@ package corset import ( + "fmt" + "github.com/consensys/go-corset/pkg/hir" + "github.com/consensys/go-corset/pkg/sexp" + tr "github.com/consensys/go-corset/pkg/trace" ) -// Translate the components of a Corset circuit and add them to the schema. By -// the time we get to this point, all malformed source files should have been -// rejected already and the translation should go through easily. Thus, whilst -// syntax errors can be returned here, this should never happen. The mechanism -// is supported, however, to simplify development of new features, etc. -func translateCircuit(circuit *Circuit, schema *hir.Schema) []SyntaxError { - panic("todo") +// TranslateCircuit translates the components of a Corset circuit and add them +// to the schema. By the time we get to this point, all malformed source files +// should have been rejected already and the translation should go through +// easily. Thus, whilst syntax errors can be returned here, this should never +// happen. The mechanism is supported, however, to simplify development of new +// features, etc. +func TranslateCircuit(env *Environment, srcmap *sexp.SourceMaps[Node], circuit *Circuit) (*hir.Schema, []SyntaxError) { + t := translator{env, srcmap, hir.EmptySchema()} + // Allocate all modules into schema + t.translateModules(circuit) + // Translate root declarations + errors := t.translateDeclarations("", circuit.Declarations) + // Translate nested declarations + for _, m := range circuit.Modules { + errs := t.translateDeclarations(m.Name, m.Declarations) + errors = append(errors, errs...) + } + // Done + return t.schema, errors +} + +// Translator packages up information necessary for translating a circuit into +// the schema form required for the HIR level. +type translator struct { + // Environment determines module and column indices, as needed for + // translating the various constructs found in a circuit. + env *Environment + // Source maps nodes in the circuit back to the spans in their original + // source files. This is needed when reporting syntax errors to generate + // highlights of the relevant source line(s) in question. + srcmap *sexp.SourceMaps[Node] + // Represents the schema being constructed by this translator. + schema *hir.Schema +} + +func (t *translator) translateModules(circuit *Circuit) { + // Add root module + t.schema.AddModule("") + // Add nested modules + for _, m := range circuit.Modules { + mid := t.schema.AddModule(m.Name) + aid := t.env.Module(m.Name) + // Sanity check everything lines up. + if aid != mid { + panic(fmt.Sprintf("Invalid module identifier: %d vs %d", mid, aid)) + } + } +} + +// Translate all Corset declarations in a given module, adding them to the +// schema. By the time we get to this point, all malformed source files should +// have been rejected already and the translation should go through easily. +// Thus, whilst syntax errors can be returned here, this should never happen. +// The mechanism is supported, however, to simplify development of new features, +// etc. +func (t *translator) translateDeclarations(module string, decls []Declaration) []SyntaxError { + var errors []SyntaxError + // Construct context for enclosing module + context := t.env.Module(module) + // + for _, d := range decls { + errs := t.translateDeclaration(d, context) + errors = append(errors, errs...) + } + // Done + return errors } // Translate a Corset declaration and add it to the schema. By the time we get @@ -18,20 +81,78 @@ func translateCircuit(circuit *Circuit, schema *hir.Schema) []SyntaxError { // and the translation should go through easily. Thus, whilst syntax errors can // be returned here, this should never happen. The mechanism is supported, // however, to simplify development of new features, etc. -func translateDeclaration(decl Declaration, schema *hir.Schema) []SyntaxError { +func (t *translator) translateDeclaration(decl Declaration, module uint) []SyntaxError { + var errors []SyntaxError + // if d, ok := decl.(*DefColumns); ok { - translateDefColumns(d, schema) + t.translateDefColumns(d, module) } else if d, ok := decl.(*DefConstraint); ok { - translateDefConstraint(d, schema) + errors = t.translateDefConstraint(d, module) + } else { + // Error handling + panic("unknown declaration") } - // Error handling - panic("unknown declaration") + // + return errors } -func translateDefColumns(decl *DefColumns, schema *hir.Schema) { - panic("TODO") +// Translate a "defcolumns" declaration. +func (t *translator) translateDefColumns(decl *DefColumns, module uint) { + // Add each column to schema + for _, c := range decl.Columns { + // FIXME: support user-defined length multiplier + context := tr.NewContext(module, 1) + cid := t.schema.AddDataColumn(context, c.Name, c.DataType) + // Sanity check column identifier + if id := t.env.Column(module, c.Name); id != cid { + panic(fmt.Sprintf("invalid column identifier: %d vs %d", cid, id)) + } + } +} + +// Translate a "defconstraint" declaration. +func (t *translator) translateDefConstraint(decl *DefConstraint, module uint) []SyntaxError { + // Translate constraint body + constraint, errors := t.translateExpr(decl.Constraint, module) + // Translate (optional) guard + guard, guard_errors := t.translateOptionalExpr(decl.Guard, module) + // Combine errors + errors = append(errors, guard_errors...) + // Apply guard + if guard != nil { + constraint = &hir.Mul{Args: []hir.Expr{guard, constraint}} + } + // + if len(errors) == 0 { + context := tr.NewContext(module, 1) + // Add translated constraint + t.schema.AddVanishingConstraint(decl.Handle, context, decl.Domain, constraint) + } + // Done + return errors } -func translateDefConstraint(decl *DefConstraint, schema *hir.Schema) { - panic("TODO") +// Translate an optional expression in a given context. That is an expression +// which maybe nil (i.e. doesn't exist). In such case, nil is returned (i.e. +// without any errors). +func (t *translator) translateOptionalExpr(expr Expr, module uint) (hir.Expr, []SyntaxError) { + if expr != nil { + return t.translateExpr(expr, module) + } + + return nil, nil +} + +// Translate an expression situated in a given context. The context is +// necessary to resolve unqualified names (e.g. for column access, function +// invocations, etc). +func (t *translator) translateExpr(expr Expr, module uint) (hir.Expr, []SyntaxError) { + if e, ok := expr.(*Constant); ok { + return &hir.Constant{Val: e.Val}, nil + } else if e, ok := expr.(*VariableAccess); ok { + cid := t.env.Column(module, e.Name) + return &hir.ColumnAccess{Column: cid, Shift: e.Shift}, nil + } else { + return nil, t.srcmap.SyntaxErrors(expr, "unknown expression") + } } diff --git a/pkg/sexp/parser.go b/pkg/sexp/parser.go index ef8a23d..617eef6 100644 --- a/pkg/sexp/parser.go +++ b/pkg/sexp/parser.go @@ -24,7 +24,7 @@ func NewParser(srcfile *SourceFile) *Parser { srcfile: srcfile, text: srcfile.Contents(), index: 0, - srcmap: NewSourceMap[SExp](srcfile.Contents()), + srcmap: NewSourceMap[SExp](*srcfile), } } diff --git a/pkg/sexp/source_file.go b/pkg/sexp/source_file.go index 901df37..1abf9d3 100644 --- a/pkg/sexp/source_file.go +++ b/pkg/sexp/source_file.go @@ -4,6 +4,42 @@ import ( "fmt" ) +// Line provides information about a given line within the original string. +// This includes the line number (counting from 1), and the span of the line +// within the original string. +type Line struct { + // Original text + text []rune + // Span within original text of this line. + span Span + // Line number of this line (counting from 1). + number int +} + +// Get the string representing this line. +func (p *Line) String() string { + // Extract runes representing line + runes := p.text[p.span.start:p.span.end] + // Convert into string + return string(runes) +} + +// Number gets the line number of this line, where the first line in a string +// has line number 1. +func (p *Line) Number() int { + return p.number +} + +// Start returns the starting index of this line in the original string. +func (p *Line) Start() int { + return p.span.start +} + +// Length returns the number of characters in this line. +func (p *Line) Length() int { + return p.span.Length() +} + // SourceFile represents a given source file (typically stored on disk). type SourceFile struct { // File name for this source file. @@ -72,6 +108,32 @@ func (s *SourceFile) SyntaxError(span Span, msg string) *SyntaxError { return &SyntaxError{s, span, msg} } +// FindFirstEnclosingLine determines the first line in this source file which +// encloses the start of a span. Observe that, if the position is beyond the +// bounds of the source file then the last physical line is returned. Also, +// the returned line is not guaranteed to enclose the entire span, as these can +// cross multiple lines. +func (s *SourceFile) FindFirstEnclosingLine(span Span) Line { + // Index identifies the current position within the original text. + index := span.start + // Num records the line number, counting from 1. + num := 1 + // Start records the starting offset of the current line. + start := 0 + // Find the line. + for i := 0; i < len(s.contents); i++ { + if i == index { + end := findEndOfLine(index, s.contents) + return Line{s.contents, Span{start, end}, num} + } else if s.contents[i] == '\n' { + num++ + start = i + 1 + } + } + // + return Line{s.contents, Span{start, len(s.contents)}, num} +} + // SyntaxError is a structured error which retains the index into the original // string where an error occurred, along with an error message. type SyntaxError struct { @@ -101,3 +163,23 @@ func (p *SyntaxError) Message() string { func (p *SyntaxError) Error() string { return fmt.Sprintf("%d:%d:%s", p.span.Start(), p.span.End(), p.Message()) } + +// FirstEnclosingLine determines the first line in this source file to which +// this error is associated. Observe that, if the position is beyond the bounds +// of the source file then the last physical line is returned. Also, the +// returned line is not guaranteed to enclose the entire span, as these can +// cross multiple lines. +func (p *SyntaxError) FirstEnclosingLine() Line { + return p.srcfile.FindFirstEnclosingLine(p.span) +} + +// Find the end of the enclosing line +func findEndOfLine(index int, text []rune) int { + for i := index; i < len(text); i++ { + if text[i] == '\n' { + return i + } + } + // No end in sight! + return len(text) +} diff --git a/pkg/sexp/source_map.go b/pkg/sexp/source_map.go index fb70a35..e172c78 100644 --- a/pkg/sexp/source_map.go +++ b/pkg/sexp/source_map.go @@ -39,42 +39,6 @@ func (p *Span) Length() int { return p.end - p.start } -// Line provides information about a given line within the original string. -// This includes the line number (counting from 1), and the span of the line -// within the original string. -type Line struct { - // Original text - text []rune - // Span within original text of this line. - span Span - // Line number of this line (counting from 1). - number int -} - -// Get the string representing this line. -func (p *Line) String() string { - // Extract runes representing line - runes := p.text[p.span.start:p.span.end] - // Convert into string - return string(runes) -} - -// Number gets the line number of this line, where the first line in a string -// has line number 1. -func (p *Line) Number() int { - return p.number -} - -// Start returns the starting index of this line in the original string. -func (p *Line) Start() int { - return p.span.start -} - -// Length returns the number of characters in this line. -func (p *Line) Length() int { - return p.span.Length() -} - // SourceMaps provides a mechanism for mapping terms from an AST to multiple // source files. type SourceMaps[T comparable] struct { @@ -88,6 +52,29 @@ func NewSourceMaps[T comparable]() *SourceMaps[T] { return &SourceMaps[T]{[]SourceMap[T]{}} } +// SyntaxError constructs a syntax error for a given node contained within one +// of the source files managed by this set of source maps. +func (p *SourceMaps[T]) SyntaxError(node T, msg string) *SyntaxError { + for _, m := range p.maps { + if m.Has(node) { + span := m.Get(node) + return m.srcfile.SyntaxError(span, msg) + } + } + // If we get here, then it means the node on which the error occurrs is not + // present in any of the source maps. This should not be possible, provided + // the parser is implemented correctly. + panic("missing mapping for source node") +} + +// SyntaxErrors is really just a helper that construct a syntax error and then +// places it into an array of size one. This is helpful for situations where +// sets of syntax errors are being passed around. +func (p *SourceMaps[T]) SyntaxErrors(node T, msg string) []SyntaxError { + err := p.SyntaxError(node, msg) + return []SyntaxError{*err} +} + // Join a given source map into this set of source maps. The effect of this is // that nodes recorded in the given source map can be accessed from this set. func (p *SourceMaps[T]) Join(srcmap *SourceMap[T]) { @@ -103,19 +90,19 @@ func (p *SourceMaps[T]) Join(srcmap *SourceMap[T]) { type SourceMap[T comparable] struct { // Maps a given AST object to a span in the original string. mapping map[T]Span - // Original string - text []rune + // Enclosing source file + srcfile SourceFile } // NewSourceMap constructs an initially empty source map for a given string. -func NewSourceMap[T comparable](text []rune) *SourceMap[T] { +func NewSourceMap[T comparable](srcfile SourceFile) *SourceMap[T] { mapping := make(map[T]Span) - return &SourceMap[T]{mapping, text} + return &SourceMap[T]{mapping, srcfile} } -// Text returns underlying text of this source map. -func (p *SourceMap[T]) Text() []rune { - return p.text +// Source returns the underlying source file on which this map operates. +func (p *SourceMap[T]) Source() SourceFile { + return p.srcfile } // Put registers a new AST item with a given span. Note, if the item exists @@ -128,6 +115,12 @@ func (p *SourceMap[T]) Put(item T, span Span) { p.mapping[item] = span } +// Has checks whether a given item is contained within this source map. +func (p *SourceMap[T]) Has(item T) bool { + _, ok := p.mapping[item] + return ok +} + // Get determines the span associated with a given AST item extract from the // original text. Note, if the item is not registered with this source map, // then it will panic. @@ -139,38 +132,11 @@ func (p *SourceMap[T]) Get(item T) Span { panic(fmt.Sprintf("invalid source map key: %s", any(item))) } -// FindFirstEnclosingLine determines the first line which encloses the start of -// a span. Observe that, if the position is beyond the bounds of the source -// string then the last physical line is returned. Also, the returned line is -// not guaranteed to enclose the entire span, as these can cross multiple lines. -func (p *SourceMap[T]) FindFirstEnclosingLine(span Span) Line { - // Index identifies the current position within the original text. - index := span.start - // Num records the line number, counting from 1. - num := 1 - // Start records the starting offset of the current line. - start := 0 - // Find the line. - for i := 0; i < len(p.text); i++ { - if i == index { - end := findEndOfLine(index, p.text) - return Line{p.text, Span{start, end}, num} - } else if p.text[i] == '\n' { - num++ - start = i + 1 - } - } - // - return Line{p.text, Span{start, len(p.text)}, num} -} - -// Find the end of the enclosing line -func findEndOfLine(index int, text []rune) int { - for i := index; i < len(text); i++ { - if text[i] == '\n' { - return i - } +// JoinMaps incorporates all mappings from one source map (the source) into +// another source map (the target), whilst applying a given mapping to the node +// types. +func JoinMaps[S comparable, T comparable](target *SourceMap[S], source *SourceMap[T], mapping func(T) S) { + for i, k := range source.mapping { + target.Put(mapping(i), k) } - // No end in sight! - return len(text) } diff --git a/pkg/sexp/translator.go b/pkg/sexp/translator.go index a3bb486..349a611 100644 --- a/pkg/sexp/translator.go +++ b/pkg/sexp/translator.go @@ -53,13 +53,21 @@ func NewTranslator[T comparable](srcfile *SourceFile, srcmap *SourceMap[SExp]) * list_default: nil, symbols: make([]SymbolRule[T], 0), old_srcmap: srcmap, - new_srcmap: NewSourceMap[T](srcmap.text), + new_srcmap: NewSourceMap[T](srcmap.srcfile), } } -// =================================================================== -// Public -// =================================================================== +// SourceMap returns the source map maintained for terms constructed by this +// translator. +func (p *Translator[T]) SourceMap() *SourceMap[T] { + return p.new_srcmap +} + +// SpanOf gets the span associated with a given S-Expression in the original +// source file. +func (p *Translator[T]) SpanOf(sexp SExp) Span { + return p.old_srcmap.Get(sexp) +} // Translate a given string into a given structured representation T // using an appropriately configured. @@ -171,12 +179,15 @@ func translateSExp[T comparable](p *Translator[T], s SExp) (T, *SyntaxError) { return translateSExpList[T](p, e) case *Symbol: for i := 0; i != len(p.symbols); i++ { - ir, ok, err := (p.symbols[i])(e.Value) + node, ok, err := (p.symbols[i])(e.Value) if ok && err != nil { // Transform into syntax error return empty, p.SyntaxError(s, err.Error()) } else if ok { - return ir, nil + // Update source map + map2sexp(p, node, s) + // Done + return node, nil } } } @@ -200,10 +211,27 @@ func translateSExpList[T comparable](p *Translator[T], l *List) (T, *SyntaxError t := p.lists[name] // Check whether we found one. if t != nil { - return (t)(l) + node, err := (t)(l) + // Update source mapping + map2sexp(p, node, l) + // Done + return node, err } else if p.list_default != nil { - return (p.list_default)(l) + node, err := (p.list_default)(l) + // Update source mapping + map2sexp(p, node, l) + // Done + return node, err } // Default fall back return empty, p.SyntaxError(l, "unknown list encountered") } + +// Add a mapping from a given item to the S-expression from which it was +// generated. This updates the underlying source map to reflect this. +func map2sexp[T comparable](p *Translator[T], item T, sexp SExp) { + // Lookup enclosing span + span := p.old_srcmap.Get(sexp) + // Map it the new source map + p.new_srcmap.Put(item, span) +} diff --git a/pkg/test/ir_test.go b/pkg/test/ir_test.go index c9b4db8..f5df77a 100644 --- a/pkg/test/ir_test.go +++ b/pkg/test/ir_test.go @@ -559,7 +559,7 @@ func Check(t *testing.T, test string) { schema, errs := corset.CompileSourceFile(srcfile) // Check terms parsed ok if len(errs) > 0 { - t.Fatalf("Error parsing %s: %s\n", filename, errs) + t.Fatalf("Error parsing %s: %v\n", filename, errs) } // Check valid traces are accepted accepts_file := fmt.Sprintf("%s.%s", test, "accepts")