Skip to content

Commit

Permalink
Handle the invalid "utf8" encoding
Browse files Browse the repository at this point in the history
While this encoding name is non-standard, it does appear in the wild. As
its meaning is unambiguous, we should recognize and handle it (but not
emit it ourselves).
  • Loading branch information
benburwell authored and emersion committed Jul 9, 2019
1 parent 5c83a65 commit 8f23f91
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
4 changes: 2 additions & 2 deletions charset/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ func init() {
// Reader returns an io.Reader that converts the provided charset to UTF-8.
func Reader(charset string, input io.Reader) (io.Reader, error) {
charset = strings.ToLower(charset)
// "ascii" is not in the spec but is common
if charset == "utf-8" || charset == "us-ascii" || charset == "ascii" {
// "ascii" and "utf8" are not in the spec but are common
if charset == "utf-8" || charset == "utf8" || charset == "us-ascii" || charset == "ascii" {
return input, nil
}
if enc, ok := charsets[charset]; ok {
Expand Down
5 changes: 5 additions & 0 deletions charset/charset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ var testCharsets = []struct {
encoded: []byte("café"),
decoded: "café",
},
{
charset: "utf8",
encoded: []byte("café"),
decoded: "café",
},
{
charset: "windows-1250",
encoded: []byte{0x8c, 0x8d, 0x8f, 0x9c, 0x9d, 0x9f, 0xbc, 0xbe},
Expand Down

0 comments on commit 8f23f91

Please sign in to comment.