okular/generators/chm/lib/ebook_chm_encoding.cpp

/*
 *  Kchmviewer - a CHM and EPUB file viewer with broad language support
 *  Copyright (C) 2004-2014 George Yunaev, gyunaev@ulduzsoft.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "ebook_chm_encoding.h"

typedef struct {
    const char *qtcodec;
    const short *lcids;
} EbookChmTextEncodingEntry;

// Do not try to embed those in the text_encoding_table, it does not work - at least with gcc.
static short lcid_arabic[] = {
    0x1401, 0x3C01, 0x0C01, 0x0801, 0x2C01, 0x3401, 0x3001, 0x1001, 0x1801, 0x2001, 0x4001, 0x0401, 0x2801, 0x1C01, 0x3801, 0x2401, 0x0429, 0x0420, 0,
};

static short lcid_baltic[] = {0x0425, 0x0426, 0x0427, 0};

static short lcid_centralEuropean[] = {0x041C, 0x041A, 0x0405, 0x040E, 0x0415, 0x0418, 0x081A, 0x041B, 0x0424, 0};

static short lcid_ChineseSimplifiedGB18030[] = {0x0804, 0};

static short lcid_ChineseSimplifiedGBK[] = {0x0804, 0};

static short lcid_ChineseSimplifiedGB2313[] = {0x1004, 0};

static short lcid_ChineseTraditionalBig5[] = {0x0404, 0x1404, 0};

static short lcid_ChineseTraditionalBigHKSCS[] = {0x0C04, 0};

static short lcid_CyrillicCP1251[] = {0x082C, 0x0423, 0x0402, 0x042F, 0x0419, 0x0C1A, 0x0444, 0x0422, 0x0843, 0};

static short lcid_CyrillicKOI8R[] = {0x7001, // artifical LCID
                                     0};

static short lcid_Greek[] = {0x0408, 0};

static short lcid_Hebrew[] = {0x040D, 0};

static short lcid_Japanese_eucJP[] = {0x0411, 0};

static short lcid_Japanese_JIS7[] = {0x0411, 0};

static short lcid_Japanese_ShiftJIS[] = {0x0411, 0};

static short lcid_Korean_eucKR[] = {0x0412, 0};

static short lcid_TamilTSCII[] = {0x0449, 0};

static short lcid_ThaiTIS[] = {0x041E, 0};

static short lcid_UkrainianKOI[] = {0x7006, 0};

static short lcid_Turkish[] = {0x042C, 0x041F, 0x0443, 0};

static short lcid_Vietnamese[] = {0x042A, 0};

static short lcid_UnicodeUTF8[] = {0x7004, // artifical LCID
                                   0};

static short lcid_UnicodeUTF16[] = {0x7005, // artifical LCID
                                    0};

static short lcid_Western[] = {0x0436, 0x042D, 0x0403, 0x0406, 0x0813, 0x0413, 0x0C09, 0x2809, 0x1009, 0x2409, 0x1809, 0x2009, 0x1409, 0x3409, 0x1C09, 0x2C09, 0x0809, 0x0409, 0x0438, 0x040B, 0x080C, 0x0C0C, 0x040C, 0x140C, 0x100C,
                               0x0C07, 0x0407, 0x1407, 0x1007, 0x0807, 0x040F, 0x0421, 0x0410, 0x0810, 0x083E, 0x043E, 0x0414, 0x0814, 0x0416, 0x0816, 0x0432, 0x2C0A, 0x400A, 0x340A, 0x240A, 0x140A, 0x1C0A, 0x300A, 0x440A, 0x100A,
                               0x480A, 0x080A, 0x4C0A, 0x180A, 0x3C0A, 0x280A, 0x500A, 0x0C0A, 0x380A, 0x200A, 0x0441, 0x081D, 0x041D, 0x0434, 0x0435, 0x042B, 0x042C, 0x0439, 0x043A, 0x044E, 0x044F, 0x081A, 0x0443, 0};

static const EbookChmTextEncodingEntry text_encoding_table[] = {{"CP1256", lcid_arabic},

                                                                {"CP1257", lcid_baltic},

                                                                {"CP1250", lcid_centralEuropean},

                                                                {"GB18030", lcid_ChineseSimplifiedGB18030},

                                                                {"GBK", lcid_ChineseSimplifiedGBK},

                                                                {"GB2313", lcid_ChineseSimplifiedGB2313},

                                                                {"Big5", lcid_ChineseTraditionalBig5},

                                                                {"Big5-HKSCS", lcid_ChineseTraditionalBigHKSCS},

                                                                {"CP1251", lcid_CyrillicCP1251},

                                                                {"KOI8-R", lcid_CyrillicKOI8R},

                                                                {"CP1253", lcid_Greek},

                                                                {"CP1255", lcid_Hebrew},

                                                                {"Shift-JIS", lcid_Japanese_ShiftJIS},

                                                                {"eucJP", lcid_Japanese_eucJP},

                                                                {"JIS7", lcid_Japanese_JIS7},

                                                                {"eucKR", lcid_Korean_eucKR},

                                                                {"TSCII", lcid_TamilTSCII},

                                                                {"TIS-620", lcid_ThaiTIS},

                                                                {"KOI8-U", lcid_UkrainianKOI},

                                                                {"CP1254", lcid_Turkish},

                                                                {"CP1258", lcid_Vietnamese},

                                                                {"UTF-8", lcid_UnicodeUTF8},

                                                                {"UTF-16", lcid_UnicodeUTF16},

                                                                {"CP1252", lcid_Western},

                                                                {nullptr, nullptr}};

QString Ebook_CHM_Encoding::guessByLCID(unsigned short lcid)
{
    for (const EbookChmTextEncodingEntry *t = text_encoding_table; t->qtcodec; ++t) {
        for (const short *lcids = t->lcids; *lcids; lcids++)
            if (*lcids == lcid)
                return t->qtcodec;
    }

    return QStringLiteral("UTF-8");
}