|
|
// Copyright (c) 2022 Proton AG |
|
|
// |
|
|
// This file is part of Proton Mail Bridge. |
|
|
// |
|
|
// Proton Mail Bridge is free software: you can redistribute it and/or modify |
|
|
// it under the terms of the GNU General Public License as published by |
|
|
// the Free Software Foundation, either version 3 of the License, or |
|
|
// (at your option) any later version. |
|
|
// |
|
|
// Proton Mail Bridge is distributed in the hope that it will be useful, |
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
|
// GNU General Public License for more details. |
|
|
// |
|
|
// You should have received a copy of the GNU General Public License |
|
|
// along with Proton Mail Bridge. If not, see <https://www.gnu.org/licenses/>. |
|
|
|
|
|
package pmmime |
|
|
|
|
|
import ( |
|
|
"bytes" |
|
|
"strings" |
|
|
"testing" |
|
|
|
|
|
"golang.org/x/text/encoding/htmlindex" |
|
|
|
|
|
a "github.com/stretchr/testify/assert" |
|
|
) |
|
|
|
|
|
func TestDecodeHeader(t *testing.T) { |
|
|
testData := []struct{ raw, expected string }{ |
|
|
{ |
|
|
"", |
|
|
"", |
|
|
}, |
|
|
{ |
|
|
"=?iso-2022-jp?Q?=1B$B!Z=1B(BTimes_Car_PLUS=1B$B![JV5Q>Z=1B(B?=", |
|
|
"【Times Car PLUS】返却証", |
|
|
}, |
|
|
{ |
|
|
`=?iso-2022-jp?Q?iTunes_Movie_=1B$B%K%e!<%j%j!<%9$HCmL\:nIJ=1B(B?=`, |
|
|
"iTunes Movie ニューリリースと注目作品", |
|
|
}, |
|
|
{ |
|
|
"=?UTF-8?B?w4TDi8OPw5bDnA==?= =?UTF-8?B?IMOkw6vDr8O2w7w=?=", |
|
|
"ÄËÏÖÜ äëïöü", |
|
|
}, |
|
|
{ |
|
|
"=?ISO-8859-2?B?xMtJ1tw=?= =?ISO-8859-2?B?IOTrafb8?=", |
|
|
"ÄËIÖÜ äëiöü", |
|
|
}, |
|
|
{ |
|
|
"=?uknown?B?xMtJ1tw=?= =?ISO-8859-2?B?IOTrafb8?=", |
|
|
"=?uknown?B?xMtJ1tw=?= =?ISO-8859-2?B?IOTrafb8?=", |
|
|
}, |
|
|
} |
|
|
|
|
|
for _, val := range testData { |
|
|
if decoded, err := DecodeHeader(val.raw); strings.Compare(val.expected, decoded) != 0 { |
|
|
t.Errorf("Incorrect decoding of header %q expected %q but have %q; Error %v", val.raw, val.expected, decoded, err) |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
type testParseMediaTypeData struct { |
|
|
arg, wantMediaType string |
|
|
wantParams map[string]string |
|
|
} |
|
|
|
|
|
func (d *testParseMediaTypeData) run(t *testing.T) { |
|
|
gotMediaType, params, err := ParseMediaType(d.arg) |
|
|
a.Nil(t, err) |
|
|
a.Equal(t, d.wantMediaType, gotMediaType) |
|
|
a.Equal(t, d.wantParams, params) |
|
|
} |
|
|
|
|
|
func TestParseMediaType(t *testing.T) { |
|
|
testTable := map[string]testParseMediaTypeData{ |
|
|
"TwiceTheSameParameter": { |
|
|
arg: "attachment; filename=joy.txt; filename=JOY.TXT; title=hi;", |
|
|
wantMediaType: "attachment", |
|
|
wantParams: map[string]string{"filename": "JOY.TXT", "title": "hi"}, |
|
|
}, |
|
|
"SingleLineUTF8": { |
|
|
arg: "attachment;\nfilename*=utf-8''%F0%9F%98%81%F0%9F%98%82.txt;\n title=smile", |
|
|
wantMediaType: "attachment", |
|
|
wantParams: map[string]string{"filename": "😁😂.txt", "title": "smile"}, |
|
|
}, |
|
|
"MultiLineUTF8": { |
|
|
arg: "attachment;\nfilename*0*=utf-8''%F0%9F%98%81; title=smile;\nfilename*1*=%F0%9F%98%82;\nfilename*2=.txt", |
|
|
wantMediaType: "attachment", |
|
|
wantParams: map[string]string{"filename": "😁😂.txt", "title": "smile"}, |
|
|
}, |
|
|
"MultiLineFirstNoEncNextUTF8": { |
|
|
arg: "attachment;\nfilename*0*=utf-8''joy ;\n title*=utf-8''smile; \nfilename*1*=%F0%9F%98%82;\nfilename*2=.txt", |
|
|
wantMediaType: "attachment", |
|
|
wantParams: map[string]string{"filename": "joy😂.txt", "title": "smile"}, |
|
|
}, |
|
|
"SingleLineBig5": { |
|
|
arg: "attachment;\nfilename*=big5''%B3%C6%A7%D1%BF%FD.m4a; title*=utf8''memorandum", |
|
|
wantMediaType: "attachment", |
|
|
wantParams: map[string]string{"filename": "備忘錄.m4a", "title": "memorandum"}, |
|
|
}, |
|
|
"MultiLineBig5": { |
|
|
arg: "attachment;\nfilename*0*=big5''%B3%C6a; title*0=utf8''memorandum; filename*2=%BF%FD.m4a; \nfilename*1*=%A7%D1b;", |
|
|
wantMediaType: "attachment", |
|
|
wantParams: map[string]string{"filename": "備a忘b錄.m4a", "title": "memorandum"}, |
|
|
}, |
|
|
"SingleLineBadEncoding": { |
|
|
arg: "attachment;\nfilename*=utf-8'%F0%9F%98%81%F0%9F%98%82.txt;\n title=smile", |
|
|
wantMediaType: "attachment", |
|
|
wantParams: map[string]string{"title": "smile"}, |
|
|
}, |
|
|
"MultiLineBadEncoding": { |
|
|
arg: "attachment;\nfilename*0*=utf-8'%F0%9F%98%81; title=smile;\nfilename*1*=%F0%9F%98%82;\nfilename*2=.txt", |
|
|
wantMediaType: "attachment", |
|
|
wantParams: map[string]string{"filename": "😂.txt", "title": "smile"}, |
|
|
}, |
|
|
} |
|
|
for name, testData := range testTable { |
|
|
t.Run(name, testData.run) |
|
|
} |
|
|
} |
|
|
|
|
|
func TestGetEncoding(t *testing.T) { |
|
|
// All MIME charsets with aliases can be found here: |
|
|
// https://www.iana.org/assignments/character-sets/character-sets.xhtml |
|
|
mimesets := map[string][]string{ |
|
|
"utf-8": { // MIB 16 |
|
|
"utf8", |
|
|
"csutf8", |
|
|
"unicode-1-1-utf-8", |
|
|
"iso-utf-8", |
|
|
"utf8mb4", |
|
|
}, |
|
|
"gbk": { |
|
|
"gb2312", // MIB 2025 |
|
|
//"euc-cn": []string{ |
|
|
"euccn", |
|
|
"ibm-euccn", |
|
|
}, |
|
|
//"utf7": []string{"utf-7", "unicode-1-1-utf-7"}, |
|
|
"iso-8859-2": { // MIB 5 |
|
|
"iso-ir-101", |
|
|
"iso_8859-2", |
|
|
"iso8859-2", |
|
|
"latin2", |
|
|
"l2", |
|
|
"csisolatin2", |
|
|
"ibm852", |
|
|
//"FAILEDibm852", |
|
|
}, |
|
|
"iso-8859-3": { // MIB 6 |
|
|
"iso-ir-109", |
|
|
"iso_8859-3", |
|
|
"latin3", |
|
|
"l3", |
|
|
"csisolatin3", |
|
|
}, |
|
|
"iso-8859-4": { // MIB 7 |
|
|
"iso-ir-110", |
|
|
"iso_8859-4", |
|
|
"latin4", |
|
|
"l4", |
|
|
"csisolatin4", |
|
|
}, |
|
|
"iso-8859-5": { // MIB 8 |
|
|
"iso-ir-144", |
|
|
"iso_8859-5", |
|
|
"cyrillic", |
|
|
"csisolatincyrillic", |
|
|
}, |
|
|
"iso-8859-6": { // MIB 9 |
|
|
"iso-ir-127", |
|
|
"iso_8859-6", |
|
|
"ecma-114", |
|
|
"asmo-708", |
|
|
"arabic", |
|
|
"csisolatinarabic", |
|
|
//"iso-8859-6e": []string{ // MIB 81 just direction |
|
|
"csiso88596e", |
|
|
"iso-8859-6-e", |
|
|
//"iso-8859-6i": []string{ // MIB 82 |
|
|
"csiso88596i", |
|
|
"iso-8859-6-i", |
|
|
}, |
|
|
"iso-8859-7": { // MIB 10 |
|
|
"iso-ir-126", |
|
|
"iso_8859-7", |
|
|
"elot_928", |
|
|
"ecma-118", |
|
|
"greek", |
|
|
"greek8", |
|
|
"csisolatingreek", |
|
|
}, |
|
|
"iso-8859-8": { // MIB 11 |
|
|
"iso-ir-138", |
|
|
"iso_8859-8", |
|
|
"hebrew", |
|
|
"csisolatinhebrew", |
|
|
//"iso-8859-8e": []string{ // MIB 84 (directionality |
|
|
"csiso88598e", |
|
|
"iso-8859-8-e", |
|
|
}, |
|
|
"iso-8859-8-i": { // MIB 85 |
|
|
"logical", |
|
|
"csiso88598i", |
|
|
"iso-8859-8-i", // Hebrew, the "i" means right-to-left, probably unnecessary with ISO cleaning above. |
|
|
}, |
|
|
"iso-8859-10": { // MIB 13 |
|
|
"iso-ir-157", |
|
|
"l6", |
|
|
"iso_8859-10:1992", |
|
|
"csisolatin6", |
|
|
"latin6", |
|
|
}, |
|
|
"iso-8859-13": { // MIB 109 |
|
|
"csiso885913"}, |
|
|
"iso-8859-14": { // MIB 110 |
|
|
"iso-ir-199", |
|
|
"iso_8859-14:1998", |
|
|
"iso_8859-14", |
|
|
"latin8", |
|
|
"iso-celtic", |
|
|
"l8", |
|
|
"csiso885914", |
|
|
}, |
|
|
"iso-8859-15": { // MIB 111 |
|
|
"iso_8859-15", |
|
|
"latin-9", |
|
|
"csiso885915", |
|
|
"ISO8859-15", |
|
|
}, |
|
|
"iso-8859-16": { // MIB 112 |
|
|
"iso-ir-226", |
|
|
"iso_8859-16:2001", |
|
|
"iso_8859-16", |
|
|
"latin10", |
|
|
"l10", |
|
|
"csiso885916", |
|
|
}, |
|
|
"windows-874": { // MIB 2109 |
|
|
"cswindows874", |
|
|
"cp874", |
|
|
"iso-8859-11", |
|
|
"tis-620", |
|
|
}, |
|
|
"windows-1250": { // MIB 2250 |
|
|
"cswindows1250", |
|
|
"cp1250", |
|
|
}, |
|
|
"windows-1251": { // MIB 2251 |
|
|
"cswindows1251", |
|
|
"cp1251", |
|
|
}, |
|
|
"windows-1252": { // MIB 2252 |
|
|
"cswindows1252", |
|
|
"cp1252", |
|
|
"3dwindows-1252", |
|
|
"we8mswin1252", |
|
|
"us-ascii", // MIB 3 |
|
|
"ansi_x3.110-1983", // MIB 74 // usascii |
|
|
//"iso-8859-1": []string{ // MIB 4 succeed by win1252 |
|
|
"iso8859-1", |
|
|
"iso-ir-100", |
|
|
"iso_8859-1", |
|
|
"latin1", |
|
|
"l1", |
|
|
"ibm819", |
|
|
"cp819", |
|
|
"csisolatin1", |
|
|
"ansi_x3.4-1968", |
|
|
"ansi_x3.4-1986", |
|
|
"cp850", |
|
|
"cp858", // "cp850" Mostly correct except for the Euro sign. |
|
|
"iso_646.irv:1991", |
|
|
"iso646-us", |
|
|
"us", |
|
|
"ibm367", |
|
|
"cp367", |
|
|
"csascii", |
|
|
"ascii", |
|
|
"iso-ir-6", |
|
|
"we8iso8859p1", |
|
|
}, |
|
|
"windows-1253": {"cswindows1253", "cp1253"}, // MIB 2253 |
|
|
"windows-1254": {"cswindows1254", "cp1254"}, // MIB 2254 |
|
|
"windows-1255": {"cSwindows1255", "cp1255"}, // MIB 2255 |
|
|
"windows-1256": {"cswIndows1256", "cp1256"}, // MIB 2256 |
|
|
"windows-1257": {"cswinDows1257", "cp1257"}, // MIB 2257 |
|
|
"windows-1258": {"cswindoWs1258", "cp1258"}, // MIB 2257 |
|
|
"koi8-r": {"cskoi8r", "koi8r"}, // MIB 2084 |
|
|
"koi8-u": {"cskoi8u", "koi8u"}, // MIB 2088 |
|
|
"macintosh": {"mac", "macroman", "csmacintosh"}, // MIB 2027 |
|
|
"big5": { |
|
|
"zht16mswin950", // cp950 |
|
|
"cp950", |
|
|
}, |
|
|
"euc-kr": { |
|
|
"euckr", // MIB 38 |
|
|
"ibm-euckr", |
|
|
//"uhc": []string{ // Korea |
|
|
"ks_c_5601-1987", |
|
|
"ksc5601", |
|
|
"cp949", |
|
|
}, |
|
|
"euc-jp": { |
|
|
"eucjp", |
|
|
"ibm-eucjp", |
|
|
}, |
|
|
"shift_jis": { |
|
|
"CP932", |
|
|
"MS932", |
|
|
"Windows-932", |
|
|
"Windows-31J", |
|
|
"MS_Kanji", |
|
|
"IBM-943", |
|
|
"CP943", |
|
|
}, |
|
|
"iso-2022-jp": { // MIB 39 |
|
|
"iso2022jp", |
|
|
"csiso2022jp", |
|
|
}, |
|
|
} |
|
|
|
|
|
for expected, names := range mimesets { |
|
|
expenc, _ := htmlindex.Get(expected) |
|
|
if canonical, err := htmlindex.Name(expenc); canonical != expected || err != nil { |
|
|
t.Fatalf("Error while get canonical name. Expected '%v' but have %v `%#v`: %v", expected, canonical, expenc, err) |
|
|
} |
|
|
for _, name := range names { |
|
|
enc, err := getEncoding(name) |
|
|
if err != nil || enc == nil { |
|
|
t.Errorf("Error while getting encoding for %v returned: '%#v' and error: '%v'", name, enc, err) |
|
|
} |
|
|
if expenc != enc { |
|
|
t.Errorf("For %v expected %v '%v' but have '%v'", name, expected, expenc, enc) |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
// sample text for UTF8 http://www.columbia.edu/~fdc/utf8/index.html |
|
|
func TestEncodeReader(t *testing.T) { |
|
|
// define test data |
|
|
testData := []struct { |
|
|
charset string |
|
|
original []byte |
|
|
message string |
|
|
}{ |
|
|
// russian |
|
|
{ |
|
|
"koi8-r", |
|
|
// а, з, б, у, к, а, а, б, в, г, д, е, ё |
|
|
[]byte{0xC1, 0xDA, 0xC2, 0xD5, 0xCB, 0xC1, 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xA3}, |
|
|
"азбукаабвгдеё", |
|
|
}, |
|
|
{ |
|
|
"KOI8-R", |
|
|
[]byte{0xC1, 0xDA, 0xC2, 0xD5, 0xCB, 0xC1, 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xA3}, |
|
|
"азбукаабвгдеё", |
|
|
}, |
|
|
{ |
|
|
"csKOI8R", |
|
|
[]byte{0xC1, 0xDA, 0xC2, 0xD5, 0xCB, 0xC1, 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xA3}, |
|
|
"азбукаабвгдеё", |
|
|
}, |
|
|
{ |
|
|
"koi8-u", |
|
|
[]byte{0xC1, 0xDA, 0xC2, 0xD5, 0xCB, 0xC1, 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xA3}, |
|
|
"азбукаабвгдеё", |
|
|
}, |
|
|
{ |
|
|
"iso-8859-5", |
|
|
// а , з , б , у , к , а , а , б , в , г , д , е , ё |
|
|
[]byte{0xD0, 0xD7, 0xD1, 0xE3, 0xDA, 0xD0, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xF1}, |
|
|
"азбукаабвгдеё", |
|
|
}, |
|
|
{ |
|
|
"csWrong", |
|
|
[]byte{0xD0, 0xD7, 0xD1, 0xE3, 0xDA, 0xD0, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6}, |
|
|
"", |
|
|
}, |
|
|
{ |
|
|
"utf8", |
|
|
[]byte{0xD0, 0xB0, 0xD0, 0xB7, 0xD0, 0xB1, 0xD1, 0x83, 0xD0, 0xBA, 0xD0, 0xB0, 0xD0, 0xB0, 0xD0, 0xB1, 0xD0, 0xB2, 0xD0, 0xB3, 0xD0, 0xB4, 0xD0, 0xB5, 0xD1, 0x91}, |
|
|
"азбукаабвгдеё", |
|
|
}, |
|
|
// czechoslovakia |
|
|
{ |
|
|
"windows-1250", |
|
|
[]byte{225, 228, 232, 233, 236, 244}, |
|
|
"áäčéěô", |
|
|
}, |
|
|
// umlauts |
|
|
{ |
|
|
"iso-8859-1", |
|
|
[]byte{196, 203, 214, 220, 228, 235, 246, 252}, |
|
|
"ÄËÖÜäëöü", |
|
|
}, |
|
|
// latvia |
|
|
{ |
|
|
"iso-8859-4", |
|
|
[]byte{224, 239, 243, 182, 254}, |
|
|
"āīķļū", |
|
|
}, |
|
|
{ // encoded by https://www.motobit.com/util/charset-codepage-conversion.asp |
|
|
"utf7", |
|
|
[]byte("He wes Leovena+APA-es sone -- li+APA-e him be Drihten.+A6QDtw- +A7MDuwPOA8MDwwOx- +A7wDvwPF- +A60DtAPJA8MDsQO9- +A7UDuwO7A7cDvQO5A7oDrg-. +BCcENQRABD0ENQQ7BDg- +BDgENwQxBEs- +BDcENAQ1BEEETA- +BDg- +BEIEMAQ8-,+BCcENQRABD0ENQQ7BDg- +BDgENwQxBEs- +BDcENAQ1BEEETA- +BDg- +BEIEMAQ8-,+C68LvguuC7ELvwuoC80LpA- +C64Lygu0C78LlQuzC78LsgvH- +C6QLrgu/C7QLzQuuC8oLtAu/- +C6oLywuyC80- +C4cLqQu/C6QLvgu1C6QLwQ- +C44LmQvNC5ULwQuuC80- +C5ULvgujC8sLrgvN-."), |
|
|
"He wes Leovenaðes sone -- liðe him be Drihten.Τη γλώσσα μου έδωσαν ελληνική. Чернели избы здесь и там,Чернели избы здесь и там,யாமறிந்த மொழிகளிலே தமிழ்மொழி போல் இனிதாவது எங்கும் காணோம்.", |
|
|
}, |
|
|
|
|
|
// iconv -f UTF8 -t GB2312 utf8.txt | hexdump -v -e '"0x" 1/1 "%x, "' |
|
|
{ // encoded by iconv; dump by `cat gb2312.txt | hexdump -v -e '"0x" 1/1 "%x "'` and reformat; text from https://zh.wikipedia.org/wiki/GB_2312 |
|
|
"GB2312", |
|
|
[]byte{0x47, 0x42, 0x20, 0x32, 0x33, 0x31, 0x32, 0xb5, 0xc4, 0xb3, 0xf6, 0xcf, 0xd6, 0xa3, 0xac, 0xbb, 0xf9, 0xb1, 0xbe, 0xc2, 0xfa, 0xd7, 0xe3, 0xc1, 0xcb, 0xba, 0xba, 0xd7, 0xd6, 0xb5, 0xc4, 0xbc, 0xc6, 0xcb, 0xe3, 0xbb, 0xfa, 0xb4, 0xa6, 0xc0, 0xed, 0xd0, 0xe8, 0xd2, 0xaa, 0xa3, 0xac, 0xcb, 0xfc, 0xcb, 0xf9, 0xca, 0xd5, 0xc2, 0xbc, 0xb5, 0xc4, 0xba, 0xba, 0xd7, 0xd6, 0xd2, 0xd1, 0xbe, 0xad, 0xb8, 0xb2, 0xb8, 0xc7, 0xd6, 0xd0, 0xb9, 0xfa, 0xb4, 0xf3, 0xc2, 0xbd, 0x39, 0x39, 0x2e, 0x37, 0x35, 0x25, 0xb5, 0xc4, 0xca, 0xb9, 0xd3, 0xc3, 0xc6, 0xb5, 0xc2, 0xca, 0xa1, 0xa3, 0xb5, 0xab, 0xb6, 0xd4, 0xd3, 0xda, 0xc8, 0xcb, 0xc3, 0xfb}, |
|
|
"GB 2312的出现,基本满足了汉字的计算机处理需要,它所收录的汉字已经覆盖中国大陆99.75%的使用频率。但对于人名", |
|
|
}, |
|
|
|
|
|
{ // encoded by iconv; text from https://jp.wikipedia.org/wiki/Shift_JIS |
|
|
"shift-jis", |
|
|
[]byte{0x95, 0xb6, 0x8e, 0x9a, 0x95, 0x84, 0x8d, 0x86, 0x89, 0xbb, 0x95, 0xfb, 0x8e, 0xae, 0x53, 0x68, 0x69, 0x66, 0x74, 0x5f, 0x4a, 0x49, 0x53, 0x82, 0xcc, 0x90, 0xdd, 0x8c, 0x76, 0x8e, 0xd2, 0x82, 0xe7, 0x82, 0xcd, 0x81, 0x41, 0x90, 0xe6, 0x8d, 0x73, 0x82, 0xb5, 0x82, 0xc4, 0x82, 0xe6, 0x82, 0xad, 0x97, 0x98, 0x97, 0x70, 0x82, 0xb3, 0x82, 0xea, 0x82, 0xc4, 0x82, 0xa2, 0x82, 0xbd, 0x4a, 0x49, 0x53, 0x20, 0x43, 0x20, 0x36, 0x32, 0x32, 0x30, 0x81, 0x69, 0x8c, 0xbb, 0x8d, 0xdd, 0x82, 0xcc, 0x4a, 0x49, 0x53, 0x20, 0x58, 0x20, 0x30, 0x32, 0x30, 0x31, 0x81, 0x6a, 0x82, 0xcc, 0x38, 0x83, 0x72, 0x83, 0x62, 0x83, 0x67, 0x95, 0x84, 0x8d, 0x86, 0x81, 0x69, 0x88, 0xc8, 0x89, 0xba, 0x81, 0x75, 0x89, 0x70, 0x90, 0x94, 0x8e, 0x9a, 0x81, 0x45, 0x94, 0xbc, 0x8a, 0x70, 0x83, 0x4a, 0x83, 0x69, 0x81, 0x76, 0x81, 0x6a, 0x82, 0xc6, 0x81, 0x41, 0x4a, 0x49, 0x53, 0x20, 0x43, 0x20, 0x36, 0x32, 0x32, 0x36, 0x81, 0x69, 0x8c, 0xbb, 0x8d, 0xdd, 0x82, 0xcc, 0x4a, 0x49, 0x53, 0x20, 0x58, 0x20, 0x30, 0x32, 0x30, 0x38, 0x81, 0x41, 0x88, 0xc8, 0x89, 0xba, 0x81, 0x75, 0x8a, 0xbf, 0x8e, 0x9a, 0x81, 0x76, 0x81, 0x6a, 0x82, 0xcc, 0x97, 0xbc, 0x95, 0xb6, 0x8e, 0x9a, 0x8f, 0x57, 0x8d, 0x87, 0x82, 0xf0, 0x95, 0x5c, 0x8c, 0xbb, 0x82, 0xb5, 0x82, 0xe6, 0x82, 0xa4, 0x82, 0xc6, 0x82, 0xb5, 0x82, 0xbd, 0x81, 0x42, 0x82, 0xdc, 0x82, 0xbd, 0x81, 0x41, 0x83, 0x74, 0x83, 0x40, 0x83, 0x43, 0x83, 0x8b, 0x82, 0xcc, 0x91, 0xe5, 0x82, 0xab, 0x82, 0xb3, 0x82, 0xe2, 0x8f, 0x88, 0x97, 0x9d, 0x8e, 0x9e, 0x8a, 0xd4, 0x82, 0xcc, 0x92, 0x5a, 0x8f, 0x6b, 0x82, 0xf0, 0x90, 0x7d, 0x82, 0xe9, 0x82, 0xbd, 0x82, 0xdf, 0x81, 0x41, 0x83, 0x47, 0x83, 0x58, 0x83, 0x50, 0x81, 0x5b, 0x83, 0x76, 0x83, 0x56, 0x81, 0x5b, 0x83, 0x50, 0x83, 0x93, 0x83, 0x58, 0x82, 0xc8, 0x82, 0xb5, 0x82, 0xc5, 0x8d, 0xac, 0x8d, 0xdd, 0x89, 0xc2, 0x94, 0x5c, 0x82, 0xc9, 0x82, 0xb7, 0x82, 0xe9, 0x82, 0xb1, 0x82, 0xc6, 0x82, 0xf0, 0x8a, 0xe9, 0x90, 0x7d, 0x82, 0xb5, 0x82, 0xbd, 0x81, 0x42}, |
|
|
"文字符号化方式Shift_JISの設計者らは、先行してよく利用されていたJIS C 6220(現在のJIS X 0201)の8ビット符号(以下「英数字・半角カナ」)と、JIS C 6226(現在のJIS X 0208、以下「漢字」)の両文字集合を表現しようとした。また、ファイルの大きさや処理時間の短縮を図るため、エスケープシーケンスなしで混在可能にすることを企図した。", |
|
|
}, |
|
|
|
|
|
// add more from mutations of https://en.wikipedia.org/wiki/World_Wide_Web |
|
|
|
|
|
} |
|
|
|
|
|
// run tests |
|
|
for _, val := range testData { |
|
|
// fmt.Println("Testing ", val) |
|
|
expected := []byte(val.message) |
|
|
decoded, err := DecodeCharset(val.original, "text/plain; charset="+val.charset) |
|
|
if len(expected) == 0 { |
|
|
if err == nil { |
|
|
t.Error("Expected err but have ", err) |
|
|
} else { |
|
|
// fmt.Println("Expected err: ", err) |
|
|
continue |
|
|
} |
|
|
} else { |
|
|
if err != nil { |
|
|
t.Error("Expected ok but have ", err) |
|
|
} |
|
|
} |
|
|
|
|
|
if bytes.Equal(decoded, expected) { |
|
|
// fmt.Println("Succesfull decoding of ", val.params, ":", string(decoded)) |
|
|
} else { |
|
|
t.Error("Wrong encoding of ", val.charset, ".Expected\n", expected, "\nbut have\n", decoded) |
|
|
} |
|
|
if strings.Compare(val.message, string(decoded)) != 0 { |
|
|
t.Error("Wrong message for ", val.charset, ".Expected\n", val.message, "\nbut have\n", string(decoded)) |
|
|
} |
|
|
} |
|
|
}
|
|
|
|