okular/generators/chm/lib/ebook_chm.cpp

/*
    Kchmviewer - a CHM and EPUB file viewer with broad language support
    SPDX-FileCopyrightText: 2004-2014 George Yunaev gyunaev@ulduzsoft.com

    SPDX-License-Identifier: GPL-3.0-or-later
*/

#include <QDebug>
#include <QFile>
#include <QVector>

#include "ebook_chm.h"
#include "ebook_chm_encoding.h"

#include "bitfiddle.h"

// Big-enough buffer size for use with various routines.
#define BUF_SIZE 4096
#define COMMON_BUF_LEN 1025

#define TOPICS_ENTRY_LEN 16
#define URLTBL_ENTRY_LEN 12

//#define DEBUGPARSER(A)	qDebug A
#define DEBUGPARSER(A)

static const char *URL_SCHEME_CHM = "ms-its";

EBook_CHM::EBook_CHM()
    : EBook()
{
    m_envOptions = qgetenv("KCHMVIEWEROPTS");
    m_chmFile = nullptr;
    m_filename = m_font = QString();

    m_textCodec = nullptr;
    m_textCodecForSpecialFiles = nullptr;
    m_detectedLCID = 0;
    m_currentEncoding = QStringLiteral("UTF-8");
}

EBook_CHM::~EBook_CHM()
{
    close();
}

void EBook_CHM::close()
{
    if (m_chmFile == nullptr) {
        return;
    }

    chm_close(m_chmFile);

    m_chmFile = nullptr;
    m_filename = m_font = QString();

    m_home.clear();
    m_topicsFile.clear();
    m_indexFile.clear();

    m_textCodec = nullptr;
    m_textCodecForSpecialFiles = nullptr;
    m_detectedLCID = 0;
    m_currentEncoding = QStringLiteral("UTF-8");
}

QString EBook_CHM::title() const
{
    return encodeWithCurrentCodec(m_title);
}

QUrl EBook_CHM::homeUrl() const
{
    return pathToUrl(m_home);
}

bool EBook_CHM::hasFeature(EBook::Feature code) const
{
    switch (code) {
    case FEATURE_TOC:
        return m_tocAvailable;

    case FEATURE_INDEX:
        return m_indexAvailable;

    case FEATURE_ENCODING:
        return true;
    }

    return false;
}

bool EBook_CHM::getTableOfContents(QList<EBookTocEntry> &toc) const
{
    if (parseBinaryTOC(toc)) {
        return true;
    }

    // Parse the plain text TOC
    QList<ParsedEntry> parsed;

    if (!parseFileAndFillArray(m_topicsFile, parsed, false)) {
        return false;
    }

    // Find out the root offset, and reduce the indent level to it
    // so the toc starts from zero offset.
    int root_offset = -1;

    // Fill up the real toc
    toc.reserve(parsed.size());
    for (const ParsedEntry &e : qAsConst(parsed)) {
        if (root_offset == -1) {
            root_offset = e.indent;
        }

        EBookTocEntry entry;
        entry.iconid = (EBookTocEntry::Icon)e.iconid;
        entry.indent = e.indent - root_offset;
        entry.name = e.name;

        if (!e.urls.empty()) {
            entry.url = e.urls[0];
        }

        toc.append(entry);
    }

    return true;
}

bool EBook_CHM::getIndex(QList<EBookIndexEntry> &index) const
{
    // Parse the plain text index
    QList<ParsedEntry> parsed;

    if (!parseFileAndFillArray(m_indexFile, parsed, true)) {
        return false;
    }

    // Find out the root offset, and reduce the indent level to it
    // so the index starts from zero offset.
    int root_offset = 0;

    // Fill up the real index
    index.reserve(parsed.size());

    // Find the index root offset
    const QList<ParsedEntry> &parsedList = parsed;
    for (const ParsedEntry &e : parsedList) {
        if (e.urls.empty()) {
            continue;
        }

        root_offset = qMin(root_offset, e.indent);
    }

    // And apply the index
    for (const ParsedEntry &e : parsedList) {
        if (e.urls.empty()) {
            continue;
        }

        EBookIndexEntry entry;
        entry.name = e.name;
        entry.urls = e.urls;
        entry.seealso = e.seealso;

        // If the index array is empty, make sure the first entry is on root offset
        if (index.isEmpty()) {
            entry.indent = root_offset;
        } else {
            entry.indent = e.indent - root_offset;
        }

        index.append(entry);
        printf("%d: %s\n", entry.indent, qPrintable(entry.name));
    }

    return true;
}

bool EBook_CHM::getFileContentAsString(QString &str, const QUrl &url) const
{
    return getTextContent(str, urlToPath(url));
}

bool EBook_CHM::getFileContentAsBinary(QByteArray &data, const QUrl &url) const
{
    return getBinaryContent(data, urlToPath(url));
}

bool EBook_CHM::getBinaryContent(QByteArray &data, const QString &url) const
{
    chmUnitInfo ui;

    if (!ResolveObject(url, &ui)) {
        return false;
    }

    data.resize(ui.length);

    if (RetrieveObject(&ui, (unsigned char *)data.data(), 0, ui.length)) {
        return true;
    }

    return false;
}

bool EBook_CHM::getTextContent(QString &str, const QString &url, bool internal_encoding) const
{
    QByteArray buf;

    if (getBinaryContent(buf, url)) {
        unsigned int length = buf.size();

        if (length > 0) {
            buf.resize(length + 1);
            buf[length] = '\0';

            str = internal_encoding ? (QString)(buf.constData()) : encodeWithCurrentCodec(buf.constData());
            return true;
        }
    }

    return false;
}

int EBook_CHM::getContentSize(const QString &url)
{
    chmUnitInfo ui;

    if (!ResolveObject(url, &ui)) {
        return -1;
    }

    return ui.length;
}

bool EBook_CHM::load(const QString &archiveName)
{
    QString filename;

    // If the file has a file:// prefix, remove it
    if (archiveName.startsWith(QLatin1String("file://"))) {
        filename = archiveName.mid(7); // strip it
    } else {
        filename = archiveName;
    }

    if (m_chmFile) {
        close();
    }

#if defined(WIN32)
    m_chmFile = chm_open((BSTR)QFile::encodeName(filename).constData());
#else
    m_chmFile = chm_open(QFile::encodeName(filename));
#endif

    if (m_chmFile == nullptr) {
        return false;
    }

    m_filename = filename;

    // Reset encoding
    m_textCodec = nullptr;
    m_textCodecForSpecialFiles = nullptr;
    m_currentEncoding = QStringLiteral("UTF-8");

    // Get information from /#WINDOWS and /#SYSTEM files (encoding, title, context file and so)
    // and guess the encoding
    getInfoFromWindows();
    getInfoFromSystem();
    guessTextEncoding();

    // Check whether the search tables are present
    if (ResolveObject(QStringLiteral("/#TOPICS"), &m_chmTOPICS) && ResolveObject(QStringLiteral("/#STRINGS"), &m_chmSTRINGS) && ResolveObject(QStringLiteral("/#URLTBL"), &m_chmURLTBL) &&
        ResolveObject(QStringLiteral("/#URLSTR"), &m_chmURLSTR)) {
        m_lookupTablesValid = true;
        fillTopicsUrlMap();
    } else {
        m_lookupTablesValid = false;
    }

    // Some CHM files have toc and index files, but do not set the name properly.
    // Some heuristics here.
    if (m_topicsFile.isEmpty() && hasFile(QStringLiteral("/toc.hhc"))) {
        m_topicsFile = "/toc.hhc";
    }

    if (m_indexFile.isEmpty() && hasFile(QStringLiteral("/index.hhk"))) {
        m_indexFile = "/index.hhk";
    }

    if (!m_topicsFile.isEmpty() || (m_lookupTablesValid && hasFile(QStringLiteral("/#TOCIDX")))) {
        m_tocAvailable = true;
    } else {
        m_tocAvailable = false;
    }

    if (!m_indexFile.isEmpty() || (m_lookupTablesValid && hasFile(QStringLiteral("/$WWKeywordLinks/BTree")))) {
        m_indexAvailable = true;
    } else {
        m_indexAvailable = false;
    }

    return true;
}

int EBook_CHM::findStringInQuotes(const QString &tag, int offset, QString &value, bool firstquote, bool decodeentities) const
{
    int qbegin = tag.indexOf('"', offset);

    if (qbegin == -1) {
        qFatal("EBook_CHMImpl::findStringInQuotes: cannot find first quote in <param> tag: '%s'", qPrintable(tag));
    }

    int qend = firstquote ? tag.indexOf('"', qbegin + 1) : tag.lastIndexOf('"');

    if (qend == -1 || qend <= qbegin) {
        qFatal("EBook_CHMImpl::findStringInQuotes: cannot find last quote in <param> tag: '%s'", qPrintable(tag));
    }

    // If we do not need to decode HTML entities, just return.
    if (decodeentities) {
        QString htmlentity = QString();
        bool fill_entity = false;

        value.reserve(qend - qbegin); // to avoid multiple memory allocations

        for (int i = qbegin + 1; i < qend; i++) {
            if (!fill_entity) {
                if (tag[i] == '&') { // HTML entity starts
                    fill_entity = true;
                } else {
                    value.append(tag[i]);
                }
            } else {
                if (tag[i] == ';') // HTML entity ends
                {
                    // If entity is an ASCII code, just decode it
                    QString decode = m_htmlEntityDecoder.decode(htmlentity);

                    if (decode.isNull()) {
                        break;
                    }

                    value.append(decode);
                    htmlentity = QString();
                    fill_entity = false;
                } else {
                    htmlentity.append(tag[i]);
                }
            }
        }
    } else {
        value = tag.mid(qbegin + 1, qend - qbegin - 1);
    }

    return qend + 1;
}

bool EBook_CHM::parseFileAndFillArray(const QString &file, QList<ParsedEntry> &data, bool asIndex) const
{
    QString src;
    const int MAX_NEST_DEPTH = 256;

    if (!getTextContent(src, file) || src.isEmpty()) {
        return false;
    }

    /*
        // Save the index for debugging purposes
        QFile outfile( "parsed.htm" );

        if ( outfile.open( QIODevice::WriteOnly ) )
        {
            QTextStream textstream( &outfile );
            textstream << src;
            outfile.close();
        }
    */

    EBookTocEntry::Icon defaultimagenum = EBookTocEntry::IMAGE_AUTO;
    int pos = 0, indent = 0, root_indent_offset = 0;
    bool in_object = false, root_indent_offset_set = false;

    ParsedEntry entry;
    entry.iconid = defaultimagenum;

    // Split the HHC file by HTML tags
    int stringlen = src.length();

    while (pos < stringlen && (pos = src.indexOf('<', pos)) != -1) {
        int i, word_end = 0;

        for (i = ++pos; i < stringlen; i++) {
            // If a " or ' is found, skip to the next one.
            if ((src[i] == '"' || src[i] == '\'')) {
                // find where quote ends, either by another quote, or by '>' symbol (some people don't know HTML)
                int nextpos = src.indexOf(src[i], i + 1);
                if (nextpos == -1 && (nextpos = src.indexOf('>', i + 1)) == -1) {
                    qWarning("EBook_CHMImpl::ParseHhcAndFillTree: corrupted TOC: %s", qPrintable(src.mid(i)));
                    return false;
                }

                i = nextpos;
            } else if (src[i] == '>') {
                break;
            } else if (!src[i].isLetterOrNumber() && src[i] != '/' && !word_end) {
                word_end = i;
            }
        }

        QString tagword, tag = src.mid(pos, i - pos);

        if (word_end) {
            tagword = src.mid(pos, word_end - pos).toLower();
        } else {
            tagword = tag.toLower();
        }

        // DEBUGPARSER(("tag: '%s', tagword: '%s'\n", qPrintable( tag ), qPrintable( tagword ) ));

        // <OBJECT type="text/sitemap"> - a topic entry
        if (tagword == QLatin1String("object") && tag.indexOf(QLatin1String("text/sitemap"), 0, Qt::CaseInsensitive) != -1) {
            in_object = true;
        } else if (tagword == QLatin1String("/object") && in_object) {
            // a topic entry closed. Add a tree item
            if (entry.name.isEmpty() && entry.urls.isEmpty()) {
                qWarning("EBook_CHMImpl::ParseAndFillTopicsTree: <object> tag is parsed, but both name and url are empty.");
            } else {
                // If the name is empty, use the URL as name
                if (entry.name.isEmpty()) {
                    entry.name = entry.urls[0].toString();
                }

                if (!root_indent_offset_set) {
                    root_indent_offset_set = true;
                    root_indent_offset = indent;

                    if (root_indent_offset > 1) {
                        qWarning("CHM has improper index; root indent offset is %d", root_indent_offset);
                    }
                }

                // Trim the entry name
                entry.name = entry.name.trimmed();

                int real_indent = indent - root_indent_offset;

                entry.indent = real_indent;
                data.push_back(entry);
            }

            entry.name = QString();
            entry.urls.clear();
            entry.iconid = defaultimagenum;
            entry.seealso.clear();
            in_object = false;
        } else if (tagword == QLatin1String("param") && in_object) {
            // <param name="Name" value="First Page">
            int offset; // strlen("param ")
            const QString name_pattern = QStringLiteral("name="), value_pattern = QStringLiteral("value=");
            QString pname, pvalue;

            if ((offset = tag.indexOf(name_pattern, 0, Qt::CaseInsensitive)) == -1) {
                qFatal("EBook_CHMImpl::ParseAndFillTopicsTree: bad <param> tag '%s': no name=\n", qPrintable(tag));
            }

            // offset+5 skips 'name='
            offset = findStringInQuotes(tag, offset + name_pattern.length(), pname, true, false);
            pname = pname.toLower();

            if ((offset = tag.indexOf(value_pattern, offset, Qt::CaseInsensitive)) == -1) {
                qFatal("EBook_CHMImpl::ParseAndFillTopicsTree: bad <param> tag '%s': no value=\n", qPrintable(tag));
            }

            // offset+6 skips 'value='
            findStringInQuotes(tag, offset + value_pattern.length(), pvalue, false, true);

            // DEBUGPARSER(("<param>: name '%s', value '%s'", qPrintable( pname ), qPrintable( pvalue )));

            if (pname == QLatin1String("name") || pname == QLatin1String("keyword")) {
                // Some help files contain duplicate names, where the second name is empty. Work it around by keeping the first one
                if (!pvalue.isEmpty()) {
                    entry.name = pvalue;
                }
            } else if (pname == QLatin1String("merge")) {
                // MERGE implementation is experimental
                QUrl mergeurl = pathToUrl(pvalue);
                QString mergecontent;

                if (getFileContentAsString(mergecontent, mergeurl) && !mergecontent.isEmpty()) {
                    qWarning("MERGE is used in index; the implementation is experimental. Please let me know if it works");

                    // Merge the read value into the current parsed file.
                    // To save memory it is done in a kinda hacky way:
                    src = mergecontent + src.mid(i);
                    pos = 0;
                    stringlen = src.length();
                } else {
                    qWarning("MERGE is used in index but file %s was not found in CHM archive", qPrintable(pvalue));
                }
            } else if (pname == QLatin1String("local")) {
                // Check for URL duplication
                QUrl url = pathToUrl(pvalue);

                if (!entry.urls.contains(url)) {
                    entry.urls.push_back(url);
                }
            } else if (pname == QLatin1String("see also") && asIndex && entry.name != pvalue) {
                entry.urls.push_back(QUrl(QStringLiteral("seealso")));
                entry.seealso = pvalue;
            } else if (pname == QLatin1String("imagenumber")) {
                bool bok;
                int imgnum = pvalue.toInt(&bok);

                if (bok && imgnum >= 0 && imgnum < EBookTocEntry::MAX_BUILTIN_ICONS) {
                    entry.iconid = (EBookTocEntry::Icon)imgnum;
                }
            }
        } else if (tagword == QLatin1String("ul")) // increase indent level
        {
            // Fix for buggy help files
            if (++indent >= MAX_NEST_DEPTH) {
                qFatal("EBook_CHMImpl::ParseAndFillTopicsTree: max nest depth (%d) is reached, error in help file", MAX_NEST_DEPTH);
            }

            DEBUGPARSER(("<ul>: new intent is %d\n", indent - root_indent_offset));
        } else if (tagword == QLatin1String("/ul")) // decrease indent level
        {
            if (--indent < root_indent_offset) {
                indent = root_indent_offset;
            }

            DEBUGPARSER(("</ul>: new intent is %d\n", indent - root_indent_offset));
        }

        pos = i;
    }

    // Dump our array
    //    for ( int i = 0; i < data.size(); i++ )
    //        qDebug() << data[i].indent << data[i].name << data[i].urls;

    return true;
}

bool EBook_CHM::ResolveObject(const QString &fileName, chmUnitInfo *ui) const
{
    return m_chmFile != nullptr && ::chm_resolve_object(m_chmFile, qPrintable(fileName), ui) == CHM_RESOLVE_SUCCESS;
}

bool EBook_CHM::hasFile(const QString &fileName) const
{
    chmUnitInfo ui;

    return m_chmFile != nullptr && ::chm_resolve_object(m_chmFile, qPrintable(fileName), &ui) == CHM_RESOLVE_SUCCESS;
}

size_t EBook_CHM::RetrieveObject(const chmUnitInfo *ui, unsigned char *buffer, LONGUINT64 fileOffset, LONGINT64 bufferSize) const
{
    return ::chm_retrieve_object(m_chmFile, const_cast<chmUnitInfo *>(ui), buffer, fileOffset, bufferSize);
}

bool EBook_CHM::getInfoFromWindows()
{
#define WIN_HEADER_LEN 0x08
    unsigned char buffer[BUF_SIZE];
    unsigned int factor;
    chmUnitInfo ui;
    long size = 0;

    if (ResolveObject(QStringLiteral("/#WINDOWS"), &ui)) {
        if (!RetrieveObject(&ui, buffer, 0, WIN_HEADER_LEN)) {
            return false;
        }

        unsigned int entries = get_int32_le(reinterpret_cast<unsigned int *>(buffer));
        unsigned int entry_size = get_int32_le(reinterpret_cast<unsigned int *>(buffer + 0x04));

        QVector<unsigned char> uptr(entries * entry_size);
        unsigned char *raw = (unsigned char *)uptr.data();

        if (!RetrieveObject(&ui, raw, 8, entries * entry_size)) {
            return false;
        }

        if (!ResolveObject(QStringLiteral("/#STRINGS"), &ui)) {
            return false;
        }

        for (unsigned int i = 0; i < entries; ++i) {
            unsigned int offset = i * entry_size;

            unsigned int off_title = get_int32_le(reinterpret_cast<unsigned int *>(raw + offset + 0x14));
            unsigned int off_home = get_int32_le(reinterpret_cast<unsigned int *>(raw + offset + 0x68));
            unsigned int off_hhc = get_int32_le(reinterpret_cast<unsigned int *>(raw + offset + 0x60));
            unsigned int off_hhk = get_int32_le(reinterpret_cast<unsigned int *>(raw + offset + 0x64));

            factor = off_title / 4096;

            if (size == 0) {
                size = RetrieveObject(&ui, buffer, factor * 4096, BUF_SIZE);
            }

            if (size && off_title) {
                m_title = QByteArray((const char *)(buffer + off_title % 4096));
            }

            if (factor != off_home / 4096) {
                factor = off_home / 4096;
                size = RetrieveObject(&ui, buffer, factor * 4096, BUF_SIZE);
            }

            if (size && off_home) {
                m_home = QByteArray("/") + QByteArray((const char *)buffer + off_home % 4096);
            }

            if (factor != off_hhc / 4096) {
                factor = off_hhc / 4096;
                size = RetrieveObject(&ui, buffer, factor * 4096, BUF_SIZE);
            }

            if (size && off_hhc) {
                m_topicsFile = QByteArray("/") + QByteArray((const char *)buffer + off_hhc % 4096);
            }

            if (factor != off_hhk / 4096) {
                factor = off_hhk / 4096;
                size = RetrieveObject(&ui, buffer, factor * 4096, BUF_SIZE);
            }

            if (size && off_hhk) {
                m_indexFile = QByteArray("/") + QByteArray((const char *)buffer + off_hhk % 4096);
            }
        }
    }
    return true;
}

bool EBook_CHM::getInfoFromSystem()
{
    unsigned char buffer[BUF_SIZE];
    chmUnitInfo ui;

    int index = 0;
    unsigned char *cursor = nullptr, *p;
    unsigned short value = 0;
    long size = 0;

    // Run the first loop to detect the encoding. We need this, because title could be
    // already encoded in user encoding. Same for file names
    if (!ResolveObject(QStringLiteral("/#SYSTEM"), &ui)) {
        return false;
    }

    // Can we pull BUFF_SIZE bytes of the #SYSTEM file?
    if ((size = RetrieveObject(&ui, buffer, 4, BUF_SIZE)) == 0) {
        return false;
    }

    buffer[size - 1] = 0;

    // First loop to detect the encoding
    for (index = 0; index < (size - 1 - (long)sizeof(unsigned short));) {
        cursor = buffer + index;
        value = UINT16ARRAY(cursor);

        switch (value) {
        case 0:
            index += 2;
            cursor = buffer + index;

            if (m_topicsFile.isEmpty()) {
                m_topicsFile = QByteArray("/") + QByteArray((const char *)buffer + index + 2);
            }

            break;

        case 1:
            index += 2;
            cursor = buffer + index;

            if (m_indexFile.isEmpty()) {
                m_indexFile = QByteArray("/") + QByteArray((const char *)buffer + index + 2);
            }
            break;

        case 2:
            index += 2;
            cursor = buffer + index;

            if (m_home.isEmpty() || m_home == "/") {
                m_home = QByteArray("/") + QByteArray((const char *)buffer + index + 2);
            }
            break;

        case 3:
            index += 2;
            cursor = buffer + index;
            m_title = QByteArray((const char *)(buffer + index + 2));
            break;

        case 4:
            index += 2;
            cursor = buffer + index;

            p = buffer + index + 2;
            m_detectedLCID = (short)(p[0] | (p[1] << 8));

            break;

        case 6:
            index += 2;
            cursor = buffer + index;

            if (m_topicsFile.isEmpty()) {
                QString topicAttempt = QStringLiteral("/");
                topicAttempt += QString((const char *)buffer + index + 2);

                QString tmp = topicAttempt + ".hhc";

                if (ResolveObject(tmp, &ui)) {
                    m_topicsFile = qPrintable(tmp);
                }

                tmp = topicAttempt + ".hhk";

                if (ResolveObject(tmp, &ui)) {
                    m_indexFile = qPrintable(tmp);
                }
            }
            break;

        case 16:
            index += 2;
            cursor = buffer + index;

            m_font = QString((const char *)buffer + index + 2);
            break;

        default:
            index += 2;
            cursor = buffer + index;
        }

        value = UINT16ARRAY(cursor);
        index += value + 2;
    }

    return true;
}

QString EBook_CHM::getTopicByUrl(const QUrl &url)
{
    QMap<QUrl, QString>::const_iterator it = m_url2topics.constFind(url);

    if (it == m_url2topics.constEnd()) {
        return QString();
    }

    return it.value();
}

static int chm_enumerator_callback(struct chmFile *, struct chmUnitInfo *ui, void *context)
{
    EBook_CHM tmp;
    ((QList<QUrl> *)context)->push_back(tmp.pathToUrl(ui->path));
    return CHM_ENUMERATOR_CONTINUE;
}

bool EBook_CHM::enumerateFiles(QList<QUrl> &files)
{
    files.clear();
    return chm_enumerate(m_chmFile, CHM_ENUMERATE_ALL, chm_enumerator_callback, &files);
}

QString EBook_CHM::currentEncoding() const
{
    return m_currentEncoding;
}

bool EBook_CHM::setCurrentEncoding(const char *encoding)
{
    m_currentEncoding = encoding;
    return changeFileEncoding(encoding);
}

bool EBook_CHM::isSupportedUrl(const QUrl &url)
{
    return url.scheme() == URL_SCHEME_CHM;
}

bool EBook_CHM::guessTextEncoding()
{
    if (!m_detectedLCID) {
        qWarning("Could not detect LCID");
        return false;
    }

    QString enc = Ebook_CHM_Encoding::guessByLCID(m_detectedLCID);

    if (changeFileEncoding(enc)) {
        m_currentEncoding = enc;
        return true;
    }

    return false;
}

bool EBook_CHM::changeFileEncoding(const QString &qtencoding)
{
    // Encoding could be either simple Qt codepage, or set like CP1251/KOI8, which allows to
    // set up encodings separately for text (first) and internal files (second)
    int p = qtencoding.indexOf('/');

    if (p != -1) {
        QString global = qtencoding.left(p);
        QString special = qtencoding.mid(p + 1);

        m_textCodec = QTextCodec::codecForName(global.toUtf8());

        if (!m_textCodec) {
            qWarning("Could not set up Text Codec for encoding '%s'", qPrintable(global));
            return false;
        }

        m_textCodecForSpecialFiles = QTextCodec::codecForName(special.toUtf8());

        if (!m_textCodecForSpecialFiles) {
            qWarning("Could not set up Text Codec for encoding '%s'", qPrintable(special));
            return false;
        }
    } else {
        m_textCodecForSpecialFiles = m_textCodec = QTextCodec::codecForName(qtencoding.toUtf8());

        if (!m_textCodec) {
            qWarning("Could not set up Text Codec for encoding '%s'", qPrintable(qtencoding));
            return false;
        }
    }

    m_htmlEntityDecoder.changeEncoding(m_textCodec);
    return true;
}

void EBook_CHM::fillTopicsUrlMap()
{
    if (!m_lookupTablesValid) {
        return;
    }

    // Read those tables
    QVector<unsigned char> topics(m_chmTOPICS.length), urltbl(m_chmURLTBL.length), urlstr(m_chmURLSTR.length), strings(m_chmSTRINGS.length);

    if (!RetrieveObject(&m_chmTOPICS, (unsigned char *)topics.data(), 0, m_chmTOPICS.length) || !RetrieveObject(&m_chmURLTBL, (unsigned char *)urltbl.data(), 0, m_chmURLTBL.length) ||
        !RetrieveObject(&m_chmURLSTR, (unsigned char *)urlstr.data(), 0, m_chmURLSTR.length) || !RetrieveObject(&m_chmSTRINGS, (unsigned char *)strings.data(), 0, m_chmSTRINGS.length)) {
        return;
    }

    for (LONGUINT64 i = 0; i < m_chmTOPICS.length; i += TOPICS_ENTRY_LEN) {
        unsigned int off_title = get_int32_le(reinterpret_cast<unsigned int *>(topics.data() + i + 4));
        unsigned int off_url = get_int32_le(reinterpret_cast<unsigned int *>(topics.data() + i + 8));
        off_url = get_int32_le(reinterpret_cast<unsigned int *>(urltbl.data() + off_url + 8)) + 8;

        QUrl url = pathToUrl((const char *)urlstr.data() + off_url);

        if (off_title < (unsigned int)strings.size()) {
            m_url2topics[url] = encodeWithCurrentCodec((const char *)strings.data() + off_title);
        } else {
            m_url2topics[url] = QStringLiteral("Untitled");
        }
    }
}

bool EBook_CHM::parseBinaryTOC(QList<EBookTocEntry> &toc) const
{
    if (!m_lookupTablesValid) {
        return false;
    }

    QByteArray tocidx, topics, urltbl, urlstr, strings;

    // Read the index tables
    if (!getBinaryContent(tocidx, QStringLiteral("/#TOCIDX")) || !getBinaryContent(topics, QStringLiteral("/#TOPICS")) || !getBinaryContent(urltbl, QStringLiteral("/#URLTBL")) || !getBinaryContent(urlstr, QStringLiteral("/#URLSTR")) ||
        !getBinaryContent(strings, QStringLiteral("/#STRINGS"))) {
        return false;
    }

    // Shamelessly stolen from xchm
    if (!RecurseLoadBTOC(tocidx, topics, urltbl, urlstr, strings, UINT32ARRAY(tocidx.data()), toc, 0)) {
        qWarning("Failed to parse binary TOC, fallback to text-based TOC");
        toc.clear();
        return false;
    }

    return true;
}

//
// This piece of code was based on the one in xchm written by  Razvan Cojocaru <razvanco@gmx.net>
//
bool EBook_CHM::RecurseLoadBTOC(const QByteArray &tocidx, const QByteArray &topics, const QByteArray &urltbl, const QByteArray &urlstr, const QByteArray &strings, int offset, QList<EBookTocEntry> &entries, int level) const
{
    while (offset) {
        // If this is end of TOCIDX, return.
        if (tocidx.size() < offset + 20) {
            return true;
        }

        unsigned int flags = UINT32ARRAY(tocidx.data() + offset + 4);
        int index = UINT32ARRAY(tocidx.data() + offset + 8);

        if ((flags & 0x04) || (flags & 0x08)) {
            QString name, value;

            if ((flags & 0x08) == 0) {
                if (strings.size() < index + 1) {
                    qWarning("EBook_CHM::RecurseLoadBTOC: invalid name index (%d) for book TOC entry!", index);
                    return false;
                }

                name = encodeWithCurrentCodec(strings.data() + index);
            } else {
                if (topics.size() < (index * 16) + 12) {
                    qWarning("EBook_CHM::RecurseLoadBTOC: invalid name index (%d) for local TOC entry!", index);
                    return false;
                }

                int tocoffset = (int)UINT32ARRAY(topics.data() + (index * 16) + 4);

                if (strings.size() < tocoffset + 1) {
                    qWarning("EBook_CHM::RecurseLoadBTOC: invalid name tocoffset (%d) for TOC entry!", tocoffset);
                    return false;
                }

                if (tocoffset < 0) {
                    name.clear();
                } else {
                    name = encodeWithCurrentCodec(strings.data() + tocoffset);
                }

                // #URLTBL index
                tocoffset = (int)UINT32ARRAY(topics.data() + (index * 16) + 8);

                if (tocoffset < 0 || urltbl.size() < tocoffset + 12) {
                    qWarning("EBook_CHM::RecurseLoadBTOC: invalid url index (%d) for TOC entry!", tocoffset);
                    return false;
                }

                tocoffset = (int)UINT32ARRAY(urltbl.data() + tocoffset + 8);

                if (tocoffset < 0 || urlstr.size() < tocoffset) {
                    qWarning("EBook_CHM::RecurseLoadBTOC: invalid url offset (%d) for TOC entry!", tocoffset);
                    return false;
                }

                value = encodeWithCurrentCodec(urlstr.data() + tocoffset + 8);
            }

            EBookTocEntry entry;
            entry.name = name.trimmed();

            if (!entry.name.isEmpty()) {
                if (!value.isEmpty()) {
                    entry.url = pathToUrl(value);
                }

                entry.iconid = EBookTocEntry::IMAGE_AUTO;
                entry.indent = level;
                entries.push_back(entry);
            }
        }

        if (flags & 0x04) {
            // book
            if (tocidx.size() < offset + 24) {
                qWarning("EBook_CHM::RecurseLoadBTOC: invalid child entry offset (%d)", offset);
                return false;
            }

            unsigned int childoffset = UINT32ARRAY(tocidx.data() + offset + 20);

            if (childoffset) {
                if (!RecurseLoadBTOC(tocidx, topics, urltbl, urlstr, strings, childoffset, entries, level + 1)) {
                    return false;
                }
            }
        }

        offset = UINT32ARRAY(tocidx.data() + offset + 0x10);
    }

    return true;
}

bool EBook_CHM::hasOption(const QString &name) const
{
    if (!m_envOptions.isEmpty() && m_envOptions.contains(name)) {
        return true;
    }

    return false;
}

QUrl EBook_CHM::pathToUrl(const QString &link) const
{
    if (link.startsWith(QLatin1String("http://")) || link.startsWith(QLatin1String("https://"))) {
        return QUrl(link);
    }

    QUrl url;
    url.setScheme(URL_SCHEME_CHM);
    url.setHost(URL_SCHEME_CHM);

    // Does the link contain the fragment as well?
    int off = link.indexOf('#');
    QString path;

    if (off != -1) {
        path = link.left(off);
        url.setFragment(link.mid(off + 1));
    } else {
        path = link;
    }

    if (!path.startsWith('/')) {
        path.prepend('/');
    }

    url.setPath(QUrl::fromPercentEncoding(path.toUtf8()));
    return url;
}

QString EBook_CHM::urlToPath(const QUrl &link) const
{
    if (link.scheme() == URL_SCHEME_CHM) {
        if (link.path() == QLatin1String("/") || link.path().isEmpty()) {
            return m_home;
        }

        return link.path();
    }

    return QLatin1String("");
}

EBook_CHM::ParsedEntry::ParsedEntry()
{
    iconid = 0;
    indent = 0;
}