You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
188 lines
5.2 KiB
188 lines
5.2 KiB
// Copyright (c) 2022 Proton AG |
|
// |
|
// This file is part of Proton Mail Bridge. |
|
// |
|
// Proton Mail Bridge is free software: you can redistribute it and/or modify |
|
// it under the terms of the GNU General Public License as published by |
|
// the Free Software Foundation, either version 3 of the License, or |
|
// (at your option) any later version. |
|
// |
|
// Proton Mail Bridge is distributed in the hope that it will be useful, |
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
// GNU General Public License for more details. |
|
// |
|
// You should have received a copy of the GNU General Public License |
|
// along with Proton Mail Bridge. If not, see <https://www.gnu.org/licenses/>. |
|
|
|
package pmmime |
|
|
|
import ( |
|
"encoding/base64" |
|
"errors" |
|
"unicode/utf16" |
|
"unicode/utf8" |
|
|
|
"golang.org/x/text/encoding" |
|
"golang.org/x/text/transform" |
|
) |
|
|
|
// utf7Decoder copied from: https://github.com/cention-sany/utf7/blob/master/utf7.go |
|
// We need `encoding.Decoder` instead of function `UTF7DecodeBytes`. |
|
type utf7Decoder struct { |
|
transform.NopResetter |
|
} |
|
|
|
// NewUtf7Decoder returns a new decoder for utf7. |
|
func NewUtf7Decoder() *encoding.Decoder { |
|
return &encoding.Decoder{Transformer: utf7Decoder{}} |
|
} |
|
|
|
const ( |
|
uRepl = '\uFFFD' // Unicode replacement code point |
|
u7min = 0x20 // Minimum self-representing UTF-7 value |
|
u7max = 0x7E // Maximum self-representing UTF-7 value |
|
) |
|
|
|
// ErrBadUTF7 is returned to indicate the invalid modified UTF-7 encoding. |
|
var ErrBadUTF7 = errors.New("utf7: bad utf-7 encoding") |
|
|
|
const modifiedbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" |
|
|
|
var u7enc = base64.NewEncoding(modifiedbase64) |
|
|
|
func isModifiedBase64(r byte) bool { |
|
if r >= 'A' && r <= 'Z' { |
|
return true |
|
} else if r >= 'a' && r <= 'z' { |
|
return true |
|
} else if r >= '0' && r <= '9' { |
|
return true |
|
} else if r == '+' || r == '/' { |
|
return true |
|
} |
|
return false |
|
} |
|
|
|
func (d utf7Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
|
var implicit bool |
|
var tmp int |
|
|
|
nd, n := len(dst), len(src) |
|
if n == 0 && !atEOF { |
|
return 0, 0, transform.ErrShortSrc |
|
} |
|
for ; nSrc < n; nSrc++ { |
|
if nDst >= nd { |
|
return nDst, nSrc, transform.ErrShortDst |
|
} |
|
if c := src[nSrc]; ((c < u7min || c > u7max) && |
|
c != '\t' && c != '\r' && c != '\n') || |
|
c == '~' || c == '\\' { |
|
return nDst, nSrc, ErrBadUTF7 // Illegal code point in ASCII mode. |
|
} else if c != '+' { |
|
dst[nDst] = c // Character is self-representing. |
|
nDst++ |
|
continue |
|
} |
|
// Found '+'. |
|
start := nSrc + 1 |
|
tmp = nSrc // nSrc still points to '+', tmp points to the end of BASE64. |
|
|
|
// Find the end of the Base64 or "+-" segment. |
|
implicit = false |
|
for tmp++; tmp < n && src[tmp] != '-'; tmp++ { |
|
if !isModifiedBase64(src[tmp]) { |
|
if tmp == start { |
|
return nDst, tmp, ErrBadUTF7 // '+' next char must modified base64. |
|
} |
|
// Implicit shift back to ASCII, no need for '-' character. |
|
implicit = true |
|
break |
|
} |
|
} |
|
if tmp == start { |
|
if tmp == n { |
|
// Did not find '-' sign and '+' is the last character. |
|
// Total nSrc does not include '+'. |
|
if atEOF { |
|
return nDst, nSrc, ErrBadUTF7 // '+' can not be at the end. |
|
} |
|
// '+' can not be at the end, the source is too short. |
|
return nDst, nSrc, transform.ErrShortSrc |
|
} |
|
dst[nDst] = '+' // Escape sequence "+-". |
|
nDst++ |
|
} else if tmp == n && !atEOF { |
|
// No EOF found, the source is too short. |
|
return nDst, nSrc, transform.ErrShortSrc |
|
} else if b := utf7dec(src[start:tmp]); len(b) > 0 { |
|
if len(b)+nDst > nd { |
|
// Need more space in dst for the decoded modified BASE64 unicode. |
|
// Total nSrc does not include '+'. |
|
return nDst, nSrc, transform.ErrShortDst |
|
} |
|
copy(dst[nDst:], b) // Control or non-ASCII code points in Base64. |
|
nDst += len(b) |
|
if implicit { |
|
if nDst >= nd { |
|
return nDst, tmp, transform.ErrShortDst |
|
} |
|
dst[nDst] = src[tmp] // Implicit shift. |
|
nDst++ |
|
} |
|
if tmp == n { |
|
return nDst, tmp, nil |
|
} |
|
} else { |
|
return nDst, nSrc, ErrBadUTF7 // Bad encoding. |
|
} |
|
nSrc = tmp |
|
} |
|
return |
|
} |
|
|
|
// utf7dec extracts UTF-16-BE bytes from Base64 data and converts them to UTF-8. |
|
// A nil slice is returned if the encoding is invalid. |
|
func utf7dec(b64 []byte) []byte { |
|
var b []byte |
|
|
|
// Allocate a single block of memory large enough to store the Base64 data |
|
// (if padding is required), UTF-16-BE bytes, and decoded UTF-8 bytes. |
|
// Since a 2-byte UTF-16 sequence may expand into a 3-byte UTF-8 sequence, |
|
// double the space allocation for UTF-8. |
|
if n := len(b64); b64[n-1] == '=' { |
|
return nil |
|
} else if n&3 == 0 { |
|
b = make([]byte, u7enc.DecodedLen(n)*3) |
|
} else { |
|
n += 4 - n&3 |
|
b = make([]byte, n+u7enc.DecodedLen(n)*3) |
|
copy(b[copy(b, b64):n], []byte("==")) |
|
b64, b = b[:n], b[n:] |
|
} |
|
|
|
// Decode Base64 into the first 1/3rd of b. |
|
n, err := u7enc.Decode(b, b64) |
|
if err != nil || n&1 == 1 { |
|
return nil |
|
} |
|
|
|
// Decode UTF-16-BE into the remaining 2/3rds of b. |
|
b, s := b[:n], b[n:] |
|
j := 0 |
|
for i := 0; i < n; i += 2 { |
|
r := rune(b[i])<<8 | rune(b[i+1]) |
|
if utf16.IsSurrogate(r) { |
|
if i += 2; i == n { |
|
return nil |
|
} |
|
r2 := rune(b[i])<<8 | rune(b[i+1]) |
|
if r = utf16.DecodeRune(r, r2); r == uRepl { |
|
return nil |
|
} |
|
} |
|
j += utf8.EncodeRune(s[j:], r) |
|
} |
|
return s[:j] |
|
}
|
|
|