From 5e90d5734aafef6a55b39c6efd33a287f0cdc3d5 Mon Sep 17 00:00:00 2001 From: John Bartholomew Date: Wed, 7 Feb 2024 22:20:31 +0000 Subject: [PATCH] fix jsonnetfmt --debug-desugaring The desugaring step handles string backslash escape sequences, but it puts the result back into the same AST elements that the input strings came from. When formatting, these strings need to be re-escaped to turn them into printable string literals. --- core/ast.h | 2 +- core/desugarer.cpp | 9 +++++---- core/formatter.cpp | 3 +++ core/libjsonnet.cpp | 34 +++++++++++++++++++++++++++++++++- 4 files changed, 42 insertions(+), 6 deletions(-) diff --git a/core/ast.h b/core/ast.h index 9bcf94890..a21221e0a 100644 --- a/core/ast.h +++ b/core/ast.h @@ -586,7 +586,7 @@ struct LiteralNumber : public AST { /** Represents JSON strings. */ struct LiteralString : public AST { UString value; - enum TokenKind { SINGLE, DOUBLE, BLOCK, VERBATIM_SINGLE, VERBATIM_DOUBLE }; + enum TokenKind { SINGLE, DOUBLE, BLOCK, VERBATIM_SINGLE, VERBATIM_DOUBLE, RAW_DESUGARED }; TokenKind tokenKind; std::string blockIndent; // Only contains ' ' and '\t'. std::string blockTermIndent; // Only contains ' ' and '\t'. diff --git a/core/desugarer.cpp b/core/desugarer.cpp index 625e3b475..5fd9acfd5 100644 --- a/core/desugarer.cpp +++ b/core/desugarer.cpp @@ -119,12 +119,12 @@ class Desugarer { LiteralString *str(const UString &s) { - return make(E, EF, s, LiteralString::DOUBLE, "", ""); + return make(E, EF, s, LiteralString::RAW_DESUGARED, "", ""); } LiteralString *str(const LocationRange &loc, const UString &s) { - return make(loc, EF, s, LiteralString::DOUBLE, "", ""); + return make(loc, EF, s, LiteralString::RAW_DESUGARED, "", ""); } LiteralNull *null(void) @@ -851,12 +851,13 @@ class Desugarer { // Nothing to do. } else if (auto *ast = dynamic_cast(ast_)) { - if ((ast->tokenKind != LiteralString::BLOCK) && + if ((ast->tokenKind != LiteralString::RAW_DESUGARED) && + (ast->tokenKind != LiteralString::BLOCK) && (ast->tokenKind != LiteralString::VERBATIM_DOUBLE) && (ast->tokenKind != LiteralString::VERBATIM_SINGLE)) { ast->value = jsonnet_string_unescape(ast->location, ast->value); } - ast->tokenKind = LiteralString::DOUBLE; + ast->tokenKind = LiteralString::RAW_DESUGARED; ast->blockIndent.clear(); } else if (dynamic_cast(ast_)) { diff --git a/core/formatter.cpp b/core/formatter.cpp index a17483556..cad3369c1 100644 --- a/core/formatter.cpp +++ b/core/formatter.cpp @@ -490,6 +490,7 @@ class Unparser { o << ast->originalString; } else if (auto *ast = dynamic_cast(ast_)) { + assert(ast->tokenKind != LiteralString::RAW_DESUGARED); if (ast->tokenKind == LiteralString::DOUBLE) { o << "\""; o << encode_utf8(ast->value); @@ -647,6 +648,7 @@ class EnforceStringStyle : public FmtPass { EnforceStringStyle(Allocator &alloc, const FmtOpts &opts) : FmtPass(alloc, opts) {} void visit(LiteralString *lit) { + assert(lit->tokenKind != LiteralString::RAW_DESUGARED); if (lit->tokenKind == LiteralString::BLOCK) return; if (lit->tokenKind == LiteralString::VERBATIM_DOUBLE) @@ -1881,6 +1883,7 @@ class FixIndentation { column += ast->originalString.length(); } else if (auto *ast = dynamic_cast(ast_)) { + assert(ast->tokenKind != LiteralString::RAW_DESUGARED); if (ast->tokenKind == LiteralString::DOUBLE) { column += 2 + ast->value.length(); // Include quotes } else if (ast->tokenKind == LiteralString::SINGLE) { diff --git a/core/libjsonnet.cpp b/core/libjsonnet.cpp index 5bd92157e..730cef8e7 100644 --- a/core/libjsonnet.cpp +++ b/core/libjsonnet.cpp @@ -35,6 +35,8 @@ extern "C" { #include "parser.h" #include "static_analysis.h" #include "vm.h" +#include "pass.h" +#include "string_utils.h" namespace { using ::jsonnet::internal::Allocator; @@ -42,12 +44,40 @@ using ::jsonnet::internal::AST; using ::jsonnet::internal::FmtOpts; using ::jsonnet::internal::Fodder; using ::jsonnet::internal::jsonnet_lex; +using ::jsonnet::internal::jsonnet_string_escape; using ::jsonnet::internal::RuntimeError; using ::jsonnet::internal::StaticError; using ::jsonnet::internal::Tokens; using ::jsonnet::internal::VmExt; using ::jsonnet::internal::VmNativeCallback; using ::jsonnet::internal::VmNativeCallbackMap; +using ::jsonnet::internal::CompilerPass; +using ::jsonnet::internal::LiteralString; + +// Used in fmtDebugDesugaring mode to ensure the AST can be pretty-printed. +class ReEscapeStrings : public CompilerPass { + using CompilerPass::visit; + public: + ReEscapeStrings(Allocator &alloc) : CompilerPass(alloc) {} + void visit(LiteralString *lit) + { + if (lit->tokenKind != LiteralString::RAW_DESUGARED) + return; + + // TODO: Share code with formatter.cpp EnforceStringStyle. + unsigned num_single = 0, num_double = 0; + for (char32_t c : lit->value) { + if (c == '\'') + num_single++; + if (c == '"') + num_double++; + } + bool use_single = (num_double > 0) && (num_single == 0); + + lit->value = jsonnet_string_escape(lit->value, use_single); + lit->tokenKind = use_single ? LiteralString::SINGLE : LiteralString::DOUBLE; + } +}; } // namespace static void memory_panic(void) @@ -450,8 +480,10 @@ static char *jsonnet_fmt_snippet_aux(JsonnetVm *vm, const char *filename, const expr = jsonnet_parse(&alloc, tokens); Fodder final_fodder = tokens.front().fodder; - if (vm->fmtDebugDesugaring) + if (vm->fmtDebugDesugaring) { jsonnet_desugar(&alloc, expr, &vm->tla); + ReEscapeStrings(alloc).expr(expr); + } json_str = jsonnet_fmt(expr, final_fodder, vm->fmtOpts);