Handle the invalid "utf8" encoding

While this encoding name is non-standard, it does appear in the wild. As its meaning is unambiguous, we should recognize and handle it (but not emit it ourselves).
emersion · Jul 9, 2019 · 8f23f91 · 8f23f91
1 parent 5c83a65
commit 8f23f91
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 2 deletions.
diff --git a/charset/charset.go b/charset/charset.go
@@ -49,8 +49,8 @@ func init() {
 // Reader returns an io.Reader that converts the provided charset to UTF-8.
 func Reader(charset string, input io.Reader) (io.Reader, error) {
 	charset = strings.ToLower(charset)
-	// "ascii" is not in the spec but is common
-	if charset == "utf-8" || charset == "us-ascii" || charset == "ascii" {
+	// "ascii" and "utf8" are not in the spec but are common
+	if charset == "utf-8" || charset == "utf8" || charset == "us-ascii" || charset == "ascii" {
 		return input, nil
 	}
 	if enc, ok := charsets[charset]; ok {

diff --git a/charset/charset_test.go b/charset/charset_test.go
@@ -21,6 +21,11 @@ var testCharsets = []struct {
 		encoded: []byte("café"),
 		decoded: "café",
 	},
+	{
+		charset: "utf8",
+		encoded: []byte("café"),
+		decoded: "café",
+	},
 	{
 		charset: "windows-1250",
 		encoded: []byte{0x8c, 0x8d, 0x8f, 0x9c, 0x9d, 0x9f, 0xbc, 0xbe},