From 4253cdaa5267687e3b7612da7ff105d4414174da Mon Sep 17 00:00:00 2001 From: Sean Cunningham Date: Wed, 4 Mar 2020 17:11:57 -0500 Subject: [PATCH] JSON encoder may produce invalid utf-8 when provided invalid utf-8 message pack string. Fix encoder and add unit test which validates expectation. --- msgp/json.go | 62 +++++++++++++++++++++++++++++++++-------------- msgp/json_test.go | 28 +++++++++++++++++++++ 2 files changed, 72 insertions(+), 18 deletions(-) diff --git a/msgp/json.go b/msgp/json.go index 4325860a..77601e52 100644 --- a/msgp/json.go +++ b/msgp/json.go @@ -466,7 +466,23 @@ func rwquoted(dst jsWriter, s []byte) (n int, err error) { return } n++ + case '\t': + err = dst.WriteByte('\\') + if err != nil { + return + } + n++ + err = dst.WriteByte('t') + if err != nil { + return + } + n++ default: + // This encodes bytes < 0x20 except for \t, \n and \r. + // It also escapes <, >, and & + // because they can lead to security holes when + // user-controlled strings are rendered into JSON + // and served to some browsers. nn, err = dst.WriteString(`\u00`) n += nn if err != nil { @@ -495,16 +511,23 @@ func rwquoted(dst jsWriter, s []byte) (n int, err error) { if err != nil { return } - nn, err = dst.WriteString(`\ufffd`) - n += nn - if err != nil { - return - } - i += size - start = i - continue } + nn, err = dst.WriteString(`\ufffd`) + n += nn + if err != nil { + return + } + i += size + start = i + continue } + // U+2028 is LINE SEPARATOR. + // U+2029 is PARAGRAPH SEPARATOR. + // They are both technically valid characters in JSON strings, + // but don't work in JSONP, which has to be evaluated as JavaScript, + // and can lead to security holes there. It is valid JSON to + // escape them, so we do so unconditionally. + // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. if c == '\u2028' || c == '\u2029' { if start < i { nn, err = dst.Write(s[start:i]) @@ -512,17 +535,20 @@ func rwquoted(dst jsWriter, s []byte) (n int, err error) { if err != nil { return } - nn, err = dst.WriteString(`\u202`) - n += nn - if err != nil { - return - } - err = dst.WriteByte(hex[c&0xF]) - if err != nil { - return - } - n++ } + nn, err = dst.WriteString(`\u202`) + n += nn + if err != nil { + return + } + err = dst.WriteByte(hex[c&0xF]) + if err != nil { + return + } + n++ + i += size + start = i + continue } i += size } diff --git a/msgp/json_test.go b/msgp/json_test.go index 439d4790..3ca01e8c 100644 --- a/msgp/json_test.go +++ b/msgp/json_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "reflect" "testing" + "unicode/utf8" ) func TestCopyJSON(t *testing.T) { @@ -69,6 +70,33 @@ func TestCopyJSON(t *testing.T) { } } +// Encoder should generate valid utf-8 even if passed bad input +func TestCopyJSONNegativeUTF8(t *testing.T) { + // Single string with non-compliant utf-8 byte + stringWithBadUTF8 := []byte{ + 0xa1, 0xe0, + } + + src := bytes.NewBuffer(stringWithBadUTF8) + + var js bytes.Buffer + _, err := CopyToJSON(&js, src) + if err != nil { + t.Fatal(err) + } + + // Even though we provided bad input, should have escaped the naughty character + if !utf8.Valid(js.Bytes()) { + t.Errorf("Expected JSON to be valid utf-8 even when provided bad input") + } + + // Expect a bad character string + expected := `"\ufffd"` + if js.String() != expected { + t.Errorf("Expected: '%s', got: '%s'", expected, js.String()) + } +} + func BenchmarkCopyToJSON(b *testing.B) { var buf bytes.Buffer enc := NewWriter(&buf)