Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use hex escape sequences instead of octal escape sequences #291

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ class TranslatorSpec extends AnyFunSpec {
GoCompiler -> "[]uint8{34, 0, 10, 64, 65, 66, 92}",
JavaCompiler -> "new byte[] { 34, 0, 10, 64, 65, 66, 92 }",
JavaScriptCompiler -> "new Uint8Array([34, 0, 10, 64, 65, 66, 92])",
LuaCompiler -> "\"\\034\\000\\010\\064\\065\\066\\092\"",
LuaCompiler -> "\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"",
PerlCompiler -> "pack('C*', (34, 0, 10, 64, 65, 66, 92))",
PHPCompiler -> "\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"",
PythonCompiler -> "b\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"",
Expand All @@ -420,7 +420,7 @@ class TranslatorSpec extends AnyFunSpec {
GoCompiler -> "[]uint8{255, 0, 255}",
JavaCompiler -> "new byte[] { -1, 0, -1 }",
JavaScriptCompiler -> "new Uint8Array([255, 0, 255])",
LuaCompiler -> "\"\\255\\000\\255\"",
LuaCompiler -> "\"\\xFF\\x00\\xFF\"",
PerlCompiler -> "pack('C*', (255, 0, 255))",
PHPCompiler -> "\"\\xFF\\x00\\xFF\"",
PythonCompiler -> "b\"\\xFF\\x00\\xFF\"",
Expand All @@ -435,7 +435,7 @@ class TranslatorSpec extends AnyFunSpec {
GoCompiler -> "len([]uint8{0, 1, 2})",
JavaCompiler -> "new byte[] { 0, 1, 2 }.length",
JavaScriptCompiler -> "new Uint8Array([0, 1, 2]).length",
LuaCompiler -> "#\"\\000\\001\\002\"",
LuaCompiler -> "#\"\\x00\\x01\\x02\"",
PerlCompiler -> "length(pack('C*', (0, 1, 2)))",
PHPCompiler -> "strlen(\"\\x00\\x01\\x02\")",
PythonCompiler -> "len(b\"\\x00\\x01\\x02\")",
Expand Down Expand Up @@ -555,14 +555,14 @@ class TranslatorSpec extends AnyFunSpec {
full("\"str\\0next\"", CalcIntType, CalcStrType, ResultMap(
CppCompiler -> "std::string(\"str\\000next\", 8)",
CSharpCompiler -> "\"str\\0next\"",
GoCompiler -> "\"str\\000next\"",
GoCompiler -> "\"str\\x00next\"",
JavaCompiler -> "\"str\\000next\"",
JavaScriptCompiler -> "\"str\\x00next\"",
LuaCompiler -> "\"str\\000next\"",
PerlCompiler -> "\"str\\000next\"",
PHPCompiler -> "\"str\\000next\"",
PythonCompiler -> "u\"str\\000next\"",
RubyCompiler -> "\"str\\000next\""
LuaCompiler -> "\"str\\x00next\"",
PerlCompiler -> "\"str\\x00next\"",
PHPCompiler -> "\"str\\x00next\"",
PythonCompiler -> "u\"str\\x00next\"",
RubyCompiler -> "\"str\\x00next\""
))
}

Expand Down
1 change: 0 additions & 1 deletion shared/src/main/scala/io/kaitai/struct/JSON.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ object JSON extends CommonLiterals {
}
}

/** octal escapes (which [[translators.CommonLiterals.strLiteralGenericCC]] uses by default) are not allowed in JSON */
override def strLiteralGenericCC(code: Char): String = strLiteralUnicode(code)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment should be updated, not deleted. The fact that we're overriding strLiteralGenericCC at all still deserves an explanation. Nothing has changed about that, because JSON allows neither octal nor hex escapes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Returned back with "hex escapes"


def stringToJson(str: String): String =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ trait CommonLiterals {
/**
* Handle ASCII character conversion for inlining into string literals.
* Default implementation consults [[asciiCharQuoteMap]] first, then
* just dumps it as is if it's a printable ASCII charcter, or calls
* just dumps it as is if it's a printable ASCII character, or calls
* [[strLiteralGenericCC]] if it's a control character.
* @param code character code to convert into string for inclusion in
* a string literal
Expand All @@ -53,18 +53,14 @@ trait CommonLiterals {

/**
* Converts generic control character code into something that's allowed
* inside a string literal. Default implementation uses octal encoding,
* inside a string literal. Default implementation uses hex encoding,
* which is ok for most C-derived languages.
*
* Note that we use strictly 3 octal digits to work around potential
* problems with following decimal digits, i.e. "\0" + "2" that would be
* parsed as single character "\02" = "\x02", instead of two characters
* "\x00\x32".
* @param code character code to represent
* @return string literal representation of given code
*/
def strLiteralGenericCC(code: Char): String =
"\\%03o".format(code.toInt)
"\\x%02X".format(code.toInt)

/**
* Converts Unicode (typically, non-ASCII) character code into something
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,21 @@ class CppTranslator(provider: TypeProvider, importListSrc: CppImportList, import
}
}

/**
* Hex escapes in C++ does not limited in length, so we use octal, as they are shorter.
*
* Note that we use strictly 3 octal digits to work around potential
* problems with following decimal digits, i.e. "\0" + "2" that would be
* parsed as single character "\02" = "\x02", instead of two characters
* "\x00\x32".
*
* @see https://en.cppreference.com/w/cpp/language/escape
* @param code character code to represent
* @return string literal representation of given code
*/
override def strLiteralGenericCC(code: Char): String =
"\\%03o".format(code.toInt)

override def genericBinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr, extPrec: Int) = {
(detectType(left), detectType(right), op) match {
case (_: IntType, _: IntType, Ast.operator.Mod) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,6 @@ class JavaScriptTranslator(provider: TypeProvider, importList: ImportList) exten
override def doByteArrayNonLiteral(elts: Seq[Ast.expr]): String =
s"new Uint8Array([${elts.map(translate).mkString(", ")}])"

/**
* JavaScript rendition of common control character that would use hex form,
* not octal. "Octal" control character string literals might be accepted
* in non-strict JS mode, but in strict mode only hex or unicode are ok.
* Here we'll use hex, as they are shorter.
*
* @see https://github.com/kaitai-io/kaitai_struct/issues/279
* @param code character code to represent
* @return string literal representation of given code
*/
override def strLiteralGenericCC(code: Char): String =
"\\x%02x".format(code.toInt)

override def genericBinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr, extPrec: Int) = {
(detectType(left), detectType(right), op) match {
case (_: IntType, _: IntType, Ast.operator.Div) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,21 @@ class JavaTranslator(provider: TypeProvider, importList: ImportList) extends Bas
override def doByteArrayNonLiteral(elts: Seq[expr]): String =
s"new byte[] { ${elts.map(translate).mkString(", ")} }"

/**
* Java does not support two-digit hex escape sequences, so use octal, as they are shorter.
*
* Note that we use strictly 3 octal digits to work around potential
* problems with following decimal digits, i.e. "\0" + "2" that would be
* parsed as single character "\02" = "\x02", instead of two characters
* "\x00\x32".
*
* @see https://docs.oracle.com/javase/specs/jls/se7/html/jls-3.html#jls-3.10.6
* @param code character code to represent
* @return string literal representation of given code
*/
override def strLiteralGenericCC(code: Char): String =
"\\%03o".format(code.toInt)

override def genericBinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr, extPrec: Int) = {
(detectType(left), detectType(right), op) match {
case (_: IntType, _: IntType, Ast.operator.Mod) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ class LuaTranslator(provider: TypeProvider, importList: ImportList) extends Base
'\b' -> "\\b",
'\u000b' -> "\\v",
'\f' -> "\\f",
'\u001b' -> "\\027"
)

override def strLiteralUnicode(code: Char): String =
Expand Down Expand Up @@ -71,7 +70,7 @@ class LuaTranslator(provider: TypeProvider, importList: ImportList) extends Base
override def doArrayLiteral(t: DataType, value: Seq[Ast.expr]): String =
"{" + value.map((v) => translate(v)).mkString(", ") + "}"
override def doByteArrayLiteral(arr: Seq[Byte]): String =
"\"" + decEscapeByteArray(arr) + "\""
"\"" + Utils.hexEscapeByteArray(arr) + "\""
override def doByteArrayNonLiteral(values: Seq[Ast.expr]): String =
// It is assumed that every expression produces integer in the range [0; 255]
"string.char(" + values.map(translate).mkString(", ") + ")"
Expand Down Expand Up @@ -189,14 +188,4 @@ class LuaTranslator(provider: TypeProvider, importList: ImportList) extends Base
case Ast.unaryop.Not => "not"
case _ => super.unaryOp(op)
}

/**
* Converts byte array (Seq[Byte]) into decimal-escaped Lua-style literal
* characters (i.e. like \255).
*
* @param arr byte array to escape
* @return array contents decimal-escaped as string
*/
private def decEscapeByteArray(arr: Seq[Byte]): String =
arr.map((x) => "\\%03d".format(x & 0xff)).mkString
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ class RustTranslator(provider: TypeProvider, config: RuntimeConfig)
override def strLiteralGenericCC(code: Char): String =
strLiteralUnicode(code)

/**
* Hex escapes in form `\xHH` in Rust allows only codes in the range 0x00 - 0x7f.
*
* @see https://doc.rust-lang.org/reference/tokens.html#examples
* @param code character code to represent
* @return string literal representation of given code
*/
override def strLiteralUnicode(code: Char): String =
"\\u{%x}".format(code.toInt)

Expand Down
Loading