Skip to content

Commit

Permalink
JSON encoder may produce invalid utf-8 when provided invalid utf-8 me…
Browse files Browse the repository at this point in the history
…ssage pack string.

Fix encoder and add unit test which validates expectation.
  • Loading branch information
Sean Cunningham authored and philhofer committed Mar 5, 2020
1 parent 490d90d commit 4253cda
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 18 deletions.
62 changes: 44 additions & 18 deletions msgp/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,23 @@ func rwquoted(dst jsWriter, s []byte) (n int, err error) {
return
}
n++
case '\t':
err = dst.WriteByte('\\')
if err != nil {
return
}
n++
err = dst.WriteByte('t')
if err != nil {
return
}
n++
default:
// This encodes bytes < 0x20 except for \t, \n and \r.
// It also escapes <, >, and &
// because they can lead to security holes when
// user-controlled strings are rendered into JSON
// and served to some browsers.
nn, err = dst.WriteString(`\u00`)
n += nn
if err != nil {
Expand Down Expand Up @@ -495,34 +511,44 @@ func rwquoted(dst jsWriter, s []byte) (n int, err error) {
if err != nil {
return
}
nn, err = dst.WriteString(`\ufffd`)
n += nn
if err != nil {
return
}
i += size
start = i
continue
}
nn, err = dst.WriteString(`\ufffd`)
n += nn
if err != nil {
return
}
i += size
start = i
continue
}
// U+2028 is LINE SEPARATOR.
// U+2029 is PARAGRAPH SEPARATOR.
// They are both technically valid characters in JSON strings,
// but don't work in JSONP, which has to be evaluated as JavaScript,
// and can lead to security holes there. It is valid JSON to
// escape them, so we do so unconditionally.
// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
if c == '\u2028' || c == '\u2029' {
if start < i {
nn, err = dst.Write(s[start:i])
n += nn
if err != nil {
return
}
nn, err = dst.WriteString(`\u202`)
n += nn
if err != nil {
return
}
err = dst.WriteByte(hex[c&0xF])
if err != nil {
return
}
n++
}
nn, err = dst.WriteString(`\u202`)
n += nn
if err != nil {
return
}
err = dst.WriteByte(hex[c&0xF])
if err != nil {
return
}
n++
i += size
start = i
continue
}
i += size
}
Expand Down
28 changes: 28 additions & 0 deletions msgp/json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/json"
"reflect"
"testing"
"unicode/utf8"
)

func TestCopyJSON(t *testing.T) {
Expand Down Expand Up @@ -69,6 +70,33 @@ func TestCopyJSON(t *testing.T) {
}
}

// Encoder should generate valid utf-8 even if passed bad input
func TestCopyJSONNegativeUTF8(t *testing.T) {
// Single string with non-compliant utf-8 byte
stringWithBadUTF8 := []byte{
0xa1, 0xe0,
}

src := bytes.NewBuffer(stringWithBadUTF8)

var js bytes.Buffer
_, err := CopyToJSON(&js, src)
if err != nil {
t.Fatal(err)
}

// Even though we provided bad input, should have escaped the naughty character
if !utf8.Valid(js.Bytes()) {
t.Errorf("Expected JSON to be valid utf-8 even when provided bad input")
}

// Expect a bad character string
expected := `"\ufffd"`
if js.String() != expected {
t.Errorf("Expected: '%s', got: '%s'", expected, js.String())
}
}

func BenchmarkCopyToJSON(b *testing.B) {
var buf bytes.Buffer
enc := NewWriter(&buf)
Expand Down

0 comments on commit 4253cda

Please sign in to comment.