From 9b7959fc8e0fafe2de3125b414d3dbc9118e359c Mon Sep 17 00:00:00 2001 From: Jud White Date: Mon, 14 Nov 2016 00:55:42 -0600 Subject: [PATCH 1/4] golint cleanup --- encode_test.go | 8 ++++---- fuzz.go | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/encode_test.go b/encode_test.go index c7aa1d5..094cb80 100644 --- a/encode_test.go +++ b/encode_test.go @@ -125,8 +125,8 @@ func TestMarshalKeyvals(t *testing.T) { if err != d.err { t.Errorf("%#v: got error: %v, want error: %v", d.in, err, d.err) } - if got, want := got, d.want; !reflect.DeepEqual(got, want) { - t.Errorf("%#v: got '%s', want '%s'", d.in, got, want) + if !reflect.DeepEqual(got, d.want) { + t.Errorf("%#v: got '%s', want '%s'", d.in, got, d.want) } } } @@ -181,12 +181,12 @@ func (t marshalerStringer) String() string { return fmt.Sprint(t.a + t.b) } -var marshalError = errors.New("marshal error") +var errMarshal = errors.New("marshal error") type errorMarshaler struct{} func (errorMarshaler) MarshalText() ([]byte, error) { - return nil, marshalError + return nil, errMarshal } func BenchmarkEncodeKeyval(b *testing.B) { diff --git a/fuzz.go b/fuzz.go index ab916a9..46a590c 100644 --- a/fuzz.go +++ b/fuzz.go @@ -12,13 +12,14 @@ import ( kr "github.com/kr/logfmt" ) +// Fuzz checks reserialized data matches func Fuzz(data []byte) int { parsed, err := parse(data) if err != nil { return 0 } var w1 bytes.Buffer - if err := write(parsed, &w1); err != nil { + if err = write(parsed, &w1); err != nil { panic(err) } parsed, err = parse(data) @@ -26,7 +27,7 @@ func Fuzz(data []byte) int { panic(err) } var w2 bytes.Buffer - if err := write(parsed, &w2); err != nil { + if err = write(parsed, &w2); err != nil { panic(err) } if !bytes.Equal(w1.Bytes(), w2.Bytes()) { @@ -35,6 +36,7 @@ func Fuzz(data []byte) int { return 1 } +// FuzzVsKR checks go-logfmt/logfmt against kr/logfmt func FuzzVsKR(data []byte) int { parsed, err := parse(data) parsedKR, errKR := parseKR(data) From 282c134fb5ac3ba3ec471570b8f7e97f0e0d108c Mon Sep 17 00:00:00 2001 From: Jud White Date: Mon, 14 Nov 2016 00:04:24 -0600 Subject: [PATCH 2/4] add failing tests --- decode_test.go | 2 ++ encode_test.go | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/decode_test.go b/decode_test.go index d261880..8cdc384 100644 --- a/decode_test.go +++ b/decode_test.go @@ -118,6 +118,8 @@ func TestDecoder_errors(t *testing.T) { {"a=\"1\\", &SyntaxError{Msg: "unterminated quoted value", Line: 1, Pos: 6}}, {"a=\"\\t1", &SyntaxError{Msg: "unterminated quoted value", Line: 1, Pos: 7}}, {"a=\"\\u1\"", &SyntaxError{Msg: "invalid quoted value", Line: 1, Pos: 8}}, + {"a\ufffd=bar", &SyntaxError{Msg: "invalid key", Line: 1, Pos: 5}}, + {"\x80=bar", &SyntaxError{Msg: "invalid key", Line: 1, Pos: 2}}, } for _, test := range tests { diff --git a/encode_test.go b/encode_test.go index 094cb80..877e6e4 100644 --- a/encode_test.go +++ b/encode_test.go @@ -53,6 +53,11 @@ func TestEncodeKeyval(t *testing.T) { {key: decimalStringer{5, 9}, value: "v", want: "5.9=v"}, {key: (*decimalStringer)(nil), value: "v", err: logfmt.ErrNilKey}, {key: marshalerStringer{5, 9}, value: "v", want: "5.9=v"}, + {key: "k", value: "\xbd", want: "k=\"\\ufffd\""}, + {key: "k", value: "\ufffd\x00", want: "k=\"\\ufffd\\u0000\""}, + {key: "k", value: "\ufffd", want: "k=\"\\ufffd\""}, + {key: "k", value: []byte("\ufffd\x00"), want: "k=\"\\ufffd\\u0000\""}, + {key: "k", value: []byte("\ufffd"), want: "k=\"\\ufffd\""}, } for _, d := range data { @@ -82,6 +87,8 @@ func TestMarshalKeyvals(t *testing.T) { {in: kv(), want: nil}, {in: kv(nil, "v"), err: logfmt.ErrNilKey}, {in: kv(nilPtr, "v"), err: logfmt.ErrNilKey}, + {in: kv("\ufffd"), err: logfmt.ErrInvalidKey}, + {in: kv("\xbd"), err: logfmt.ErrInvalidKey}, {in: kv("k"), want: []byte("k=null")}, {in: kv("k", nil), want: []byte("k=null")}, {in: kv("k", ""), want: []byte("k=")}, From 52e29b14dbd438cd277b869ebeccccf8c8912951 Mon Sep 17 00:00:00 2001 From: Jud White Date: Mon, 14 Nov 2016 00:34:34 -0600 Subject: [PATCH 3/4] fuzz: - for parse/write/parse check, change second parse to parse the write of the parsed value instead of the original input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit encode: - invalidKeyRune: treat utf8.RuneError as invalid rune - add invalidKey, invalidKeyString funcs - checks len(key) == 0, invalidKeyRune, and utf8.Valid - needsQuotedValueRune: if value contains utf8.RuneError quote the value. a literal k=\ufffd encodes to k=\"\\ufffd\" - writeStringValue, writeBytesValue: quote any invalid utf8 string - the above two changes fix the "reserialized data does not match" error found during fuzz testing decode: - reject invalidKey as parse error jsonstring: - remove "&& size == 1" when checking for rune decode error fuzz testing output: - .quoted: "0=\"\xbd\x00\"" - .output: panic: reserialized data does not match: "0=\"\\ufffd\\u0000\"\n" "0=\"�\\u0000\"\n" --- decode.go | 14 ++++++++++++++ encode.go | 21 +++++++++++++++------ fuzz.go | 2 +- jsonstring.go | 6 +++--- 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/decode.go b/decode.go index a04981f..d8d3991 100644 --- a/decode.go +++ b/decode.go @@ -68,6 +68,8 @@ func (dec *Decoder) ScanKeyval() bool { return false key: + const invalidKeyError = "invalid key" + start := dec.pos for p, c := range line[dec.pos:] { switch { @@ -75,6 +77,10 @@ key: dec.pos += p if dec.pos > start { dec.key = line[start:dec.pos] + if invalidKey(dec.key) { + dec.syntaxError(invalidKeyError) + return false + } } if dec.key == nil { dec.unexpectedByte(c) @@ -89,6 +95,10 @@ key: dec.pos += p if dec.pos > start { dec.key = line[start:dec.pos] + if invalidKey(dec.key) { + dec.syntaxError(invalidKeyError) + return false + } } return true } @@ -96,6 +106,10 @@ key: dec.pos = len(line) if dec.pos > start { dec.key = line[start:dec.pos] + if invalidKey(dec.key) { + dec.syntaxError(invalidKeyError) + return false + } } return true diff --git a/encode.go b/encode.go index 4d0fa23..aa45ee0 100644 --- a/encode.go +++ b/encode.go @@ -8,6 +8,7 @@ import ( "io" "reflect" "strings" + "unicode/utf8" ) // MarshalKeyvals returns the logfmt encoding of keyvals, a variadic sequence @@ -165,11 +166,19 @@ func writeKey(w io.Writer, key interface{}) error { } func invalidKeyRune(r rune) bool { - return r <= ' ' || r == '=' || r == '"' + return r <= ' ' || r == '=' || r == '"' || r == utf8.RuneError +} + +func invalidKeyString(key string) bool { + return len(key) == 0 || strings.IndexFunc(key, invalidKeyRune) != -1 || !utf8.ValidString(key) +} + +func invalidKey(key []byte) bool { + return len(key) == 0 || bytes.IndexFunc(key, invalidKeyRune) != -1 || !utf8.Valid(key) } func writeStringKey(w io.Writer, key string) error { - if len(key) == 0 || strings.IndexFunc(key, invalidKeyRune) != -1 { + if invalidKeyString(key) { return ErrInvalidKey } _, err := io.WriteString(w, key) @@ -177,7 +186,7 @@ func writeStringKey(w io.Writer, key string) error { } func writeBytesKey(w io.Writer, key []byte) error { - if len(key) == 0 || bytes.IndexFunc(key, invalidKeyRune) != -1 { + if invalidKey(key) { return ErrInvalidKey } _, err := w.Write(key) @@ -223,14 +232,14 @@ func writeValue(w io.Writer, value interface{}) error { } func needsQuotedValueRune(r rune) bool { - return r <= ' ' || r == '=' || r == '"' + return r <= ' ' || r == '=' || r == '"' || r == utf8.RuneError } func writeStringValue(w io.Writer, value string, ok bool) error { var err error if ok && value == "null" { _, err = io.WriteString(w, `"null"`) - } else if strings.IndexFunc(value, needsQuotedValueRune) != -1 { + } else if strings.IndexFunc(value, needsQuotedValueRune) != -1 || !utf8.ValidString(value) { _, err = writeQuotedString(w, value) } else { _, err = io.WriteString(w, value) @@ -240,7 +249,7 @@ func writeStringValue(w io.Writer, value string, ok bool) error { func writeBytesValue(w io.Writer, value []byte) error { var err error - if bytes.IndexFunc(value, needsQuotedValueRune) >= 0 { + if bytes.IndexFunc(value, needsQuotedValueRune) >= 0 || !utf8.Valid(value) { _, err = writeQuotedBytes(w, value) } else { _, err = w.Write(value) diff --git a/fuzz.go b/fuzz.go index 46a590c..5e93bf1 100644 --- a/fuzz.go +++ b/fuzz.go @@ -22,7 +22,7 @@ func Fuzz(data []byte) int { if err = write(parsed, &w1); err != nil { panic(err) } - parsed, err = parse(data) + parsed, err = parse(w1.Bytes()) if err != nil { panic(err) } diff --git a/jsonstring.go b/jsonstring.go index 53b6532..030ac85 100644 --- a/jsonstring.go +++ b/jsonstring.go @@ -71,7 +71,7 @@ func writeQuotedString(w io.Writer, s string) (int, error) { continue } c, size := utf8.DecodeRuneInString(s[i:]) - if c == utf8.RuneError && size == 1 { + if c == utf8.RuneError { if start < i { buf.WriteString(s[start:i]) } @@ -129,7 +129,7 @@ func writeQuotedBytes(w io.Writer, s []byte) (int, error) { continue } c, size := utf8.DecodeRune(s[i:]) - if c == utf8.RuneError && size == 1 { + if c == utf8.RuneError { if start < i { buf.Write(s[start:i]) } @@ -182,7 +182,7 @@ func unquoteBytes(s []byte) (t []byte, ok bool) { continue } rr, size := utf8.DecodeRune(s[r:]) - if rr == utf8.RuneError && size == 1 { + if rr == utf8.RuneError { break } r += size From a9a71ce80e1282dc9f85b690e1ede891b96716da Mon Sep 17 00:00:00 2001 From: Jud White Date: Tue, 15 Nov 2016 04:34:42 -0600 Subject: [PATCH 4/4] apply review feedback --- decode.go | 15 ++++++++------- decode_test.go | 1 + encode.go | 8 ++++---- encode_test.go | 10 +++++----- fuzz.go | 1 - 5 files changed, 18 insertions(+), 17 deletions(-) diff --git a/decode.go b/decode.go index d8d3991..04e0eff 100644 --- a/decode.go +++ b/decode.go @@ -2,8 +2,10 @@ package logfmt import ( "bufio" + "bytes" "fmt" "io" + "unicode/utf8" ) // A Decoder reads and decodes logfmt records from an input stream. @@ -70,14 +72,14 @@ func (dec *Decoder) ScanKeyval() bool { key: const invalidKeyError = "invalid key" - start := dec.pos + start, multibyte := dec.pos, false for p, c := range line[dec.pos:] { switch { case c == '=': dec.pos += p if dec.pos > start { dec.key = line[start:dec.pos] - if invalidKey(dec.key) { + if multibyte && bytes.IndexRune(dec.key, utf8.RuneError) != -1 { dec.syntaxError(invalidKeyError) return false } @@ -95,18 +97,20 @@ key: dec.pos += p if dec.pos > start { dec.key = line[start:dec.pos] - if invalidKey(dec.key) { + if multibyte && bytes.IndexRune(dec.key, utf8.RuneError) != -1 { dec.syntaxError(invalidKeyError) return false } } return true + case c >= utf8.RuneSelf: + multibyte = true } } dec.pos = len(line) if dec.pos > start { dec.key = line[start:dec.pos] - if invalidKey(dec.key) { + if multibyte && bytes.IndexRune(dec.key, utf8.RuneError) != -1 { dec.syntaxError(invalidKeyError) return false } @@ -200,9 +204,6 @@ func (dec *Decoder) Value() []byte { return dec.value } -// func (dec *Decoder) DecodeValue() ([]byte, error) { -// } - // Err returns the first non-EOF error that was encountered by the Scanner. func (dec *Decoder) Err() error { return dec.err diff --git a/decode_test.go b/decode_test.go index 8cdc384..363152d 100644 --- a/decode_test.go +++ b/decode_test.go @@ -120,6 +120,7 @@ func TestDecoder_errors(t *testing.T) { {"a=\"\\u1\"", &SyntaxError{Msg: "invalid quoted value", Line: 1, Pos: 8}}, {"a\ufffd=bar", &SyntaxError{Msg: "invalid key", Line: 1, Pos: 5}}, {"\x80=bar", &SyntaxError{Msg: "invalid key", Line: 1, Pos: 2}}, + {"\x80", &SyntaxError{Msg: "invalid key", Line: 1, Pos: 2}}, } for _, test := range tests { diff --git a/encode.go b/encode.go index aa45ee0..55f1603 100644 --- a/encode.go +++ b/encode.go @@ -170,11 +170,11 @@ func invalidKeyRune(r rune) bool { } func invalidKeyString(key string) bool { - return len(key) == 0 || strings.IndexFunc(key, invalidKeyRune) != -1 || !utf8.ValidString(key) + return len(key) == 0 || strings.IndexFunc(key, invalidKeyRune) != -1 } func invalidKey(key []byte) bool { - return len(key) == 0 || bytes.IndexFunc(key, invalidKeyRune) != -1 || !utf8.Valid(key) + return len(key) == 0 || bytes.IndexFunc(key, invalidKeyRune) != -1 } func writeStringKey(w io.Writer, key string) error { @@ -239,7 +239,7 @@ func writeStringValue(w io.Writer, value string, ok bool) error { var err error if ok && value == "null" { _, err = io.WriteString(w, `"null"`) - } else if strings.IndexFunc(value, needsQuotedValueRune) != -1 || !utf8.ValidString(value) { + } else if strings.IndexFunc(value, needsQuotedValueRune) != -1 { _, err = writeQuotedString(w, value) } else { _, err = io.WriteString(w, value) @@ -249,7 +249,7 @@ func writeStringValue(w io.Writer, value string, ok bool) error { func writeBytesValue(w io.Writer, value []byte) error { var err error - if bytes.IndexFunc(value, needsQuotedValueRune) >= 0 || !utf8.Valid(value) { + if bytes.IndexFunc(value, needsQuotedValueRune) != -1 { _, err = writeQuotedBytes(w, value) } else { _, err = w.Write(value) diff --git a/encode_test.go b/encode_test.go index 877e6e4..ebebaae 100644 --- a/encode_test.go +++ b/encode_test.go @@ -53,11 +53,11 @@ func TestEncodeKeyval(t *testing.T) { {key: decimalStringer{5, 9}, value: "v", want: "5.9=v"}, {key: (*decimalStringer)(nil), value: "v", err: logfmt.ErrNilKey}, {key: marshalerStringer{5, 9}, value: "v", want: "5.9=v"}, - {key: "k", value: "\xbd", want: "k=\"\\ufffd\""}, - {key: "k", value: "\ufffd\x00", want: "k=\"\\ufffd\\u0000\""}, - {key: "k", value: "\ufffd", want: "k=\"\\ufffd\""}, - {key: "k", value: []byte("\ufffd\x00"), want: "k=\"\\ufffd\\u0000\""}, - {key: "k", value: []byte("\ufffd"), want: "k=\"\\ufffd\""}, + {key: "k", value: "\xbd", want: `k="\ufffd"`}, + {key: "k", value: "\ufffd\x00", want: `k="\ufffd\u0000"`}, + {key: "k", value: "\ufffd", want: `k="\ufffd"`}, + {key: "k", value: []byte("\ufffd\x00"), want: `k="\ufffd\u0000"`}, + {key: "k", value: []byte("\ufffd"), want: `k="\ufffd"`}, } for _, d := range data { diff --git a/fuzz.go b/fuzz.go index 5e93bf1..6553b35 100644 --- a/fuzz.go +++ b/fuzz.go @@ -73,7 +73,6 @@ func parse(data []byte) ([][]kv, error) { kvs = append(kvs, kv{dec.Key(), dec.Value()}) } got = append(got, kvs) - kvs = nil } return got, dec.Err() }