You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1113 lines
28 KiB
1113 lines
28 KiB
/* |
|
* Kchmviewer - a CHM and EPUB file viewer with broad language support |
|
* Copyright (C) 2004-2014 George Yunaev, gyunaev@ulduzsoft.com |
|
* |
|
* This program is free software: you can redistribute it and/or modify |
|
* it under the terms of the GNU General Public License as published by |
|
* the Free Software Foundation, either version 3 of the License, or |
|
* (at your option) any later version. |
|
* |
|
* This program is distributed in the hope that it will be useful, |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
* GNU General Public License for more details. |
|
* |
|
* You should have received a copy of the GNU General Public License |
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
*/ |
|
|
|
#include <QFile> |
|
#include <QVector> |
|
#include <QDebug> |
|
|
|
#include "ebook_chm.h" |
|
#include "ebook_chm_encoding.h" |
|
#include "helper_entitydecoder.h" |
|
|
|
#include "bitfiddle.h" |
|
|
|
// Big-enough buffer size for use with various routines. |
|
#define BUF_SIZE 4096 |
|
#define COMMON_BUF_LEN 1025 |
|
|
|
#define TOPICS_ENTRY_LEN 16 |
|
#define URLTBL_ENTRY_LEN 12 |
|
|
|
//#define DEBUGPARSER(A) qDebug A |
|
#define DEBUGPARSER(A) |
|
|
|
static const char * URL_SCHEME_CHM = "ms-its"; |
|
|
|
|
|
EBook_CHM::EBook_CHM() |
|
: EBook() |
|
{ |
|
m_envOptions = qgetenv("KCHMVIEWEROPTS"); |
|
m_chmFile = nullptr; |
|
m_filename = m_font = QString(); |
|
|
|
m_textCodec = nullptr; |
|
m_textCodecForSpecialFiles = nullptr; |
|
m_detectedLCID = 0; |
|
m_currentEncoding = "UTF-8"; |
|
m_htmlEntityDecoder = nullptr; |
|
} |
|
|
|
EBook_CHM::~EBook_CHM() |
|
{ |
|
close(); |
|
} |
|
|
|
void EBook_CHM::close() |
|
{ |
|
if ( m_chmFile == nullptr ) |
|
return; |
|
|
|
chm_close( m_chmFile ); |
|
|
|
m_chmFile = nullptr; |
|
m_filename = m_font = QString(); |
|
|
|
m_home.clear(); |
|
m_topicsFile.clear(); |
|
m_indexFile.clear(); |
|
|
|
m_textCodec = nullptr; |
|
m_textCodecForSpecialFiles = nullptr; |
|
m_detectedLCID = 0; |
|
m_currentEncoding = "UTF-8"; |
|
} |
|
|
|
QString EBook_CHM::title() const |
|
{ |
|
return encodeWithCurrentCodec( m_title ); |
|
} |
|
|
|
QUrl EBook_CHM::homeUrl() const |
|
{ |
|
return pathToUrl( m_home ); |
|
} |
|
|
|
bool EBook_CHM::hasFeature(EBook::Feature code) const |
|
{ |
|
switch ( code ) |
|
{ |
|
case FEATURE_TOC: |
|
return m_tocAvailable; |
|
|
|
case FEATURE_INDEX: |
|
return m_indexAvailable; |
|
|
|
case FEATURE_ENCODING: |
|
return true; |
|
} |
|
|
|
return false; |
|
} |
|
|
|
bool EBook_CHM::getTableOfContents( QList<EBookTocEntry> &toc ) const |
|
{ |
|
if ( parseBinaryTOC( toc ) ) |
|
return true; |
|
|
|
// Parse the plain text TOC |
|
QList< ParsedEntry > parsed; |
|
|
|
if ( !parseFileAndFillArray( m_topicsFile, parsed, false ) ) |
|
return false; |
|
|
|
// Find out the root offset, and reduce the indent level to it |
|
// so the toc starts from zero offset. |
|
int root_offset = -1; |
|
|
|
// Fill up the real toc |
|
toc.reserve( parsed.size() ); |
|
for ( const ParsedEntry &e : qAsConst(parsed) ) |
|
{ |
|
if ( root_offset == -1 ) |
|
root_offset = e.indent; |
|
|
|
EBookTocEntry entry; |
|
entry.iconid = (EBookTocEntry::Icon) e.iconid; |
|
entry.indent = e.indent - root_offset; |
|
entry.name = e.name; |
|
|
|
if ( !e.urls.empty() ) |
|
entry.url = e.urls[0]; |
|
|
|
toc.append( entry ); |
|
} |
|
|
|
return true; |
|
} |
|
|
|
bool EBook_CHM::getIndex(QList<EBookIndexEntry> &index) const |
|
{ |
|
// Parse the plain text index |
|
QList< ParsedEntry > parsed; |
|
|
|
if ( !parseFileAndFillArray( m_indexFile, parsed, true ) ) |
|
return false; |
|
|
|
// Find out the root offset, and reduce the indent level to it |
|
// so the index starts from zero offset. |
|
int root_offset = 0; |
|
|
|
// Fill up the real index |
|
index.reserve( parsed.size() ); |
|
|
|
// Find the index root offset |
|
const QList< ParsedEntry > &parsedList = parsed; |
|
for ( const ParsedEntry &e : parsedList ) |
|
{ |
|
if ( e.urls.empty() ) |
|
continue; |
|
|
|
root_offset = qMin( root_offset, e.indent ); |
|
} |
|
|
|
// And apply the index |
|
for( const ParsedEntry &e : parsedList ) |
|
{ |
|
if ( e.urls.empty() ) |
|
continue; |
|
|
|
EBookIndexEntry entry; |
|
entry.name = e.name; |
|
entry.urls = e.urls; |
|
entry.seealso = e.seealso; |
|
|
|
// If the index array is empty, make sure the first entry is on root offset |
|
if ( index.isEmpty() ) |
|
entry.indent = root_offset; |
|
else |
|
entry.indent = e.indent - root_offset; |
|
|
|
index.append( entry ); |
|
printf("%d: %s\n", entry.indent, qPrintable(entry.name)); |
|
} |
|
|
|
return true; |
|
} |
|
|
|
bool EBook_CHM::getFileContentAsString( QString &str, const QUrl &url ) const |
|
{ |
|
return getTextContent( str, urlToPath( url ) ); |
|
} |
|
|
|
bool EBook_CHM::getFileContentAsBinary( QByteArray &data, const QUrl &url ) const |
|
{ |
|
return getBinaryContent( data, urlToPath(url) ); |
|
} |
|
|
|
bool EBook_CHM::getBinaryContent( QByteArray &data, const QString &url ) const |
|
{ |
|
chmUnitInfo ui; |
|
|
|
if( !ResolveObject( url, &ui ) ) |
|
return false; |
|
|
|
data.resize( ui.length ); |
|
|
|
if ( RetrieveObject( &ui, (unsigned char*) data.data(), 0, ui.length ) ) |
|
return true; |
|
|
|
return false; |
|
} |
|
|
|
bool EBook_CHM::getTextContent( QString& str, const QString& url, bool internal_encoding ) const |
|
{ |
|
QByteArray buf; |
|
|
|
if ( getBinaryContent( buf, url ) ) |
|
{ |
|
unsigned int length = buf.size(); |
|
|
|
if ( length > 0 ) |
|
{ |
|
buf.resize( length + 1 ); |
|
buf [length] = '\0'; |
|
|
|
str = internal_encoding ? (QString)( buf.constData() ) : encodeWithCurrentCodec( buf.constData() ); |
|
return true; |
|
} |
|
} |
|
|
|
return false; |
|
} |
|
|
|
int EBook_CHM::getContentSize(const QString &url) |
|
{ |
|
chmUnitInfo ui; |
|
|
|
if( !ResolveObject( url, &ui ) ) |
|
return -1; |
|
|
|
return ui.length; |
|
} |
|
|
|
bool EBook_CHM::load(const QString &archiveName) |
|
{ |
|
QString filename; |
|
|
|
// If the file has a file:// prefix, remove it |
|
if ( archiveName.startsWith( "file://" ) ) |
|
filename = archiveName.mid( 7 ); // strip it |
|
else |
|
filename = archiveName; |
|
|
|
if( m_chmFile ) |
|
close(); |
|
|
|
#if defined (WIN32) |
|
// chm_open on Windows OS uses the following prototype: |
|
// struct chmFile* chm_open(BSTR filename); |
|
// |
|
// however internally it simply passes the filename |
|
// directly to CreateFileW function without any conversion. |
|
// Thus we need to pass it as WCHAR * and not BSTR. |
|
m_chmFile = chm_open( (BSTR) filename.toStdWString().c_str() ); |
|
#else |
|
m_chmFile = chm_open( QFile::encodeName(filename) ); |
|
#endif |
|
|
|
if ( m_chmFile == nullptr ) |
|
return false; |
|
|
|
m_filename = filename; |
|
|
|
// Reset encoding |
|
m_textCodec = nullptr; |
|
m_textCodecForSpecialFiles = nullptr; |
|
m_currentEncoding = "UTF-8"; |
|
|
|
// Get information from /#WINDOWS and /#SYSTEM files (encoding, title, context file and so) |
|
// and guess the encoding |
|
getInfoFromWindows(); |
|
getInfoFromSystem(); |
|
guessTextEncoding(); |
|
|
|
// Check whether the search tables are present |
|
if ( ResolveObject("/#TOPICS", &m_chmTOPICS) |
|
&& ResolveObject("/#STRINGS", &m_chmSTRINGS) |
|
&& ResolveObject("/#URLTBL", &m_chmURLTBL) |
|
&& ResolveObject("/#URLSTR", &m_chmURLSTR) ) |
|
{ |
|
m_lookupTablesValid = true; |
|
fillTopicsUrlMap(); |
|
} |
|
else |
|
m_lookupTablesValid = false; |
|
|
|
// Some CHM files have toc and index files, but do not set the name properly. |
|
// Some heuristics here. |
|
if ( m_topicsFile.isEmpty() && hasFile( "/toc.hhc" ) ) |
|
m_topicsFile = "/toc.hhc"; |
|
|
|
if ( m_indexFile.isEmpty() && hasFile( "/index.hhk" ) ) |
|
m_indexFile = "/index.hhk"; |
|
|
|
if ( !m_topicsFile.isEmpty() || ( m_lookupTablesValid && hasFile( "/#TOCIDX" ) ) ) |
|
m_tocAvailable = true; |
|
else |
|
m_tocAvailable = false; |
|
|
|
if ( !m_indexFile.isEmpty() || ( m_lookupTablesValid && hasFile( "/$WWKeywordLinks/BTree" ) ) ) |
|
m_indexAvailable = true; |
|
else |
|
m_indexAvailable = false; |
|
|
|
return true; |
|
} |
|
|
|
int EBook_CHM::findStringInQuotes (const QString& tag, int offset, QString& value, bool firstquote, bool decodeentities) const |
|
{ |
|
int qbegin = tag.indexOf ('"', offset); |
|
|
|
if ( qbegin == -1 ) |
|
qFatal ("EBook_CHMImpl::findStringInQuotes: cannot find first quote in <param> tag: '%s'", qPrintable( tag )); |
|
|
|
int qend = firstquote ? tag.indexOf ('"', qbegin + 1) : tag.lastIndexOf ('"'); |
|
|
|
if ( qend == -1 || qend <= qbegin ) |
|
qFatal ("EBook_CHMImpl::findStringInQuotes: cannot find last quote in <param> tag: '%s'", qPrintable( tag )); |
|
|
|
// If we do not need to decode HTML entities, just return. |
|
if ( decodeentities ) |
|
{ |
|
QString htmlentity = QString(); |
|
bool fill_entity = false; |
|
|
|
value.reserve (qend - qbegin); // to avoid multiple memory allocations |
|
|
|
for ( int i = qbegin + 1; i < qend; i++ ) |
|
{ |
|
if ( !fill_entity ) |
|
{ |
|
if ( tag[i] == '&' ) // HTML entity starts |
|
fill_entity = true; |
|
else |
|
value.append (tag[i]); |
|
} |
|
else |
|
{ |
|
if ( tag[i] == ';' ) // HTML entity ends |
|
{ |
|
// If entity is an ASCII code, just decode it |
|
QString decode = m_htmlEntityDecoder.decode( htmlentity ); |
|
|
|
if ( decode.isNull() ) |
|
break; |
|
|
|
value.append ( decode ); |
|
htmlentity = QString(); |
|
fill_entity = false; |
|
} |
|
else |
|
htmlentity.append (tag[i]); |
|
} |
|
} |
|
} |
|
else |
|
value = tag.mid (qbegin + 1, qend - qbegin - 1); |
|
|
|
return qend + 1; |
|
} |
|
|
|
|
|
bool EBook_CHM::parseFileAndFillArray( const QString& file, QList< ParsedEntry >& data, bool asIndex ) const |
|
{ |
|
QString src; |
|
const int MAX_NEST_DEPTH = 256; |
|
|
|
if ( !getTextContent( src, file ) || src.isEmpty() ) |
|
return false; |
|
|
|
/* |
|
// Save the index for debugging purposes |
|
QFile outfile( "parsed.htm" ); |
|
|
|
if ( outfile.open( QIODevice::WriteOnly ) ) |
|
{ |
|
QTextStream textstream( &outfile ); |
|
textstream << src; |
|
outfile.close(); |
|
} |
|
*/ |
|
|
|
EBookTocEntry::Icon defaultimagenum = EBookTocEntry::IMAGE_AUTO; |
|
int pos = 0, indent = 0, root_indent_offset = 0; |
|
bool in_object = false, root_indent_offset_set = false; |
|
|
|
ParsedEntry entry; |
|
entry.iconid = defaultimagenum; |
|
|
|
// Split the HHC file by HTML tags |
|
int stringlen = src.length(); |
|
|
|
while ( pos < stringlen && (pos = src.indexOf ('<', pos)) != -1 ) |
|
{ |
|
int i, word_end = 0; |
|
|
|
for ( i = ++pos; i < stringlen; i++ ) |
|
{ |
|
// If a " or ' is found, skip to the next one. |
|
if ( (src[i] == '"' || src[i] == '\'') ) |
|
{ |
|
// find where quote ends, either by another quote, or by '>' symbol (some people don't know HTML) |
|
int nextpos = src.indexOf (src[i], i+1); |
|
if ( nextpos == -1 && (nextpos = src.indexOf ('>', i+1)) == -1 ) |
|
{ |
|
qWarning ("EBook_CHMImpl::ParseHhcAndFillTree: corrupted TOC: %s", qPrintable( src.mid(i) )); |
|
return false; |
|
} |
|
|
|
i = nextpos; |
|
} |
|
else if ( src[i] == '>' ) |
|
break; |
|
else if ( !src[i].isLetterOrNumber() && src[i] != '/' && !word_end ) |
|
word_end = i; |
|
} |
|
|
|
QString tagword, tag = src.mid (pos, i - pos); |
|
|
|
if ( word_end ) |
|
tagword = src.mid (pos, word_end - pos).toLower(); |
|
else |
|
tagword = tag.toLower(); |
|
|
|
//DEBUGPARSER(("tag: '%s', tagword: '%s'\n", qPrintable( tag ), qPrintable( tagword ) )); |
|
|
|
// <OBJECT type="text/sitemap"> - a topic entry |
|
if ( tagword == "object" && tag.indexOf ("text/sitemap", 0, Qt::CaseInsensitive ) != -1 ) |
|
in_object = true; |
|
else if ( tagword == "/object" && in_object ) |
|
{ |
|
// a topic entry closed. Add a tree item |
|
if ( entry.name.isEmpty() && entry.urls.isEmpty() ) |
|
{ |
|
qWarning ("EBook_CHMImpl::ParseAndFillTopicsTree: <object> tag is parsed, but both name and url are empty."); |
|
} |
|
else |
|
{ |
|
// If the name is empty, use the URL as name |
|
if ( entry.name.isEmpty() ) |
|
entry.name = entry.urls[0].toString(); |
|
|
|
if ( !root_indent_offset_set ) |
|
{ |
|
root_indent_offset_set = true; |
|
root_indent_offset = indent; |
|
|
|
if ( root_indent_offset > 1 ) |
|
qWarning("CHM has improper index; root indent offset is %d", root_indent_offset); |
|
} |
|
|
|
// Trim the entry name |
|
entry.name = entry.name.trimmed(); |
|
|
|
int real_indent = indent - root_indent_offset; |
|
|
|
entry.indent = real_indent; |
|
data.push_back( entry ); |
|
} |
|
|
|
entry.name = QString(); |
|
entry.urls.clear(); |
|
entry.iconid = defaultimagenum; |
|
entry.seealso.clear(); |
|
in_object = false; |
|
} |
|
else if ( tagword == "param" && in_object ) |
|
{ |
|
// <param name="Name" value="First Page"> |
|
int offset; // strlen("param ") |
|
QString name_pattern = "name=", value_pattern = "value="; |
|
QString pname, pvalue; |
|
|
|
if ( (offset = tag.indexOf (name_pattern, 0, Qt::CaseInsensitive )) == -1 ) |
|
qFatal ("EBook_CHMImpl::ParseAndFillTopicsTree: bad <param> tag '%s': no name=\n", qPrintable( tag )); |
|
|
|
// offset+5 skips 'name=' |
|
offset = findStringInQuotes (tag, offset + name_pattern.length(), pname, true, false); |
|
pname = pname.toLower(); |
|
|
|
if ( (offset = tag.indexOf(value_pattern, offset, Qt::CaseInsensitive )) == -1 ) |
|
qFatal ("EBook_CHMImpl::ParseAndFillTopicsTree: bad <param> tag '%s': no value=\n", qPrintable( tag )); |
|
|
|
// offset+6 skips 'value=' |
|
findStringInQuotes (tag, offset + value_pattern.length(), pvalue, false, true); |
|
|
|
//DEBUGPARSER(("<param>: name '%s', value '%s'", qPrintable( pname ), qPrintable( pvalue ))); |
|
|
|
if ( pname == "name" || pname == "keyword" ) |
|
{ |
|
// Some help files contain duplicate names, where the second name is empty. Work it around by keeping the first one |
|
if ( !pvalue.isEmpty() ) |
|
entry.name = pvalue; |
|
} |
|
else if ( pname == "merge" ) |
|
{ |
|
// MERGE implementation is experimental |
|
QUrl mergeurl = pathToUrl( pvalue ); |
|
QString mergecontent; |
|
|
|
if ( getFileContentAsString( mergecontent, mergeurl ) && !mergecontent.isEmpty() ) |
|
{ |
|
qWarning( "MERGE is used in index; the implementation is experimental. Please let me know if it works" ); |
|
|
|
// Merge the read value into the current parsed file. |
|
// To save memory it is done in a kinda hacky way: |
|
src = mergecontent + src.mid( i ); |
|
pos = 0; |
|
stringlen = src.length(); |
|
} |
|
else |
|
qWarning( "MERGE is used in index but file %s was not found in CHM archive", qPrintable(pvalue) ); |
|
} |
|
else if ( pname == "local" ) |
|
{ |
|
// Check for URL duplication |
|
QUrl url = pathToUrl( pvalue ); |
|
|
|
if ( !entry.urls.contains( url ) ) |
|
entry.urls.push_back( url ); |
|
} |
|
else if ( pname == "see also" && asIndex && entry.name != pvalue ) |
|
{ |
|
entry.urls.push_back( QUrl("seealso") ); |
|
entry.seealso = pvalue; |
|
} |
|
else if ( pname == "imagenumber" ) |
|
{ |
|
bool bok; |
|
int imgnum = pvalue.toInt (&bok); |
|
|
|
if ( bok && imgnum >= 0 && imgnum < EBookTocEntry::MAX_BUILTIN_ICONS ) |
|
entry.iconid = (EBookTocEntry::Icon) imgnum; |
|
} |
|
} |
|
else if ( tagword == "ul" ) // increase indent level |
|
{ |
|
// Fix for buggy help files |
|
if ( ++indent >= MAX_NEST_DEPTH ) |
|
qFatal("EBook_CHMImpl::ParseAndFillTopicsTree: max nest depth (%d) is reached, error in help file", MAX_NEST_DEPTH); |
|
|
|
DEBUGPARSER(("<ul>: new intent is %d\n", indent - root_indent_offset)); |
|
} |
|
else if ( tagword == "/ul" ) // decrease indent level |
|
{ |
|
if ( --indent < root_indent_offset ) |
|
indent = root_indent_offset; |
|
|
|
DEBUGPARSER(("</ul>: new intent is %d\n", indent - root_indent_offset)); |
|
} |
|
|
|
pos = i; |
|
} |
|
|
|
// Dump our array |
|
// for ( int i = 0; i < data.size(); i++ ) |
|
// qDebug() << data[i].indent << data[i].name << data[i].urls; |
|
|
|
return true; |
|
} |
|
|
|
bool EBook_CHM::ResolveObject(const QString& fileName, chmUnitInfo *ui) const |
|
{ |
|
return m_chmFile != nullptr |
|
&& ::chm_resolve_object(m_chmFile, qPrintable( fileName ), ui) == |
|
CHM_RESOLVE_SUCCESS; |
|
} |
|
|
|
|
|
bool EBook_CHM::hasFile(const QString & fileName) const |
|
{ |
|
chmUnitInfo ui; |
|
|
|
return m_chmFile != nullptr |
|
&& ::chm_resolve_object(m_chmFile, qPrintable( fileName ), &ui) == |
|
CHM_RESOLVE_SUCCESS; |
|
} |
|
|
|
|
|
size_t EBook_CHM::RetrieveObject(const chmUnitInfo *ui, unsigned char *buffer, |
|
LONGUINT64 fileOffset, LONGINT64 bufferSize) const |
|
{ |
|
return ::chm_retrieve_object(m_chmFile, const_cast<chmUnitInfo*>(ui), |
|
buffer, fileOffset, bufferSize); |
|
} |
|
|
|
bool EBook_CHM::getInfoFromWindows() |
|
{ |
|
#define WIN_HEADER_LEN 0x08 |
|
unsigned char buffer[BUF_SIZE]; |
|
unsigned int factor; |
|
chmUnitInfo ui; |
|
long size = 0; |
|
|
|
if ( ResolveObject("/#WINDOWS", &ui) ) |
|
{ |
|
if ( !RetrieveObject(&ui, buffer, 0, WIN_HEADER_LEN) ) |
|
return false; |
|
|
|
unsigned int entries = get_int32_le( reinterpret_cast<unsigned int *>(buffer) ); |
|
unsigned int entry_size = get_int32_le( reinterpret_cast<unsigned int *>(buffer + 0x04) ); |
|
|
|
QVector<unsigned char> uptr(entries * entry_size); |
|
unsigned char* raw = (unsigned char*) uptr.data(); |
|
|
|
if ( !RetrieveObject (&ui, raw, 8, entries * entry_size) ) |
|
return false; |
|
|
|
if( !ResolveObject ("/#STRINGS", &ui) ) |
|
return false; |
|
|
|
for ( unsigned int i = 0; i < entries; ++i ) |
|
{ |
|
unsigned int offset = i * entry_size; |
|
|
|
unsigned int off_title = get_int32_le( reinterpret_cast<unsigned int *>(raw + offset + 0x14) ); |
|
unsigned int off_home = get_int32_le( reinterpret_cast<unsigned int *>(raw + offset + 0x68) ); |
|
unsigned int off_hhc = get_int32_le( reinterpret_cast<unsigned int *>(raw + offset + 0x60) ); |
|
unsigned int off_hhk = get_int32_le( reinterpret_cast<unsigned int *>(raw + offset + 0x64) ); |
|
|
|
factor = off_title / 4096; |
|
|
|
if ( size == 0 ) |
|
size = RetrieveObject(&ui, buffer, factor * 4096, BUF_SIZE); |
|
|
|
if ( size && off_title ) |
|
m_title = QByteArray( (const char*) (buffer + off_title % 4096) ); |
|
|
|
if ( factor != off_home / 4096) |
|
{ |
|
factor = off_home / 4096; |
|
size = RetrieveObject (&ui, buffer, factor * 4096, BUF_SIZE); |
|
} |
|
|
|
if ( size && off_home ) |
|
m_home = QByteArray("/") + QByteArray( (const char*) buffer + off_home % 4096); |
|
|
|
if ( factor != off_hhc / 4096) |
|
{ |
|
factor = off_hhc / 4096; |
|
size = RetrieveObject(&ui, buffer, factor * 4096, BUF_SIZE); |
|
} |
|
|
|
if ( size && off_hhc ) |
|
m_topicsFile = QByteArray("/") + QByteArray((const char*) buffer + off_hhc % 4096); |
|
|
|
if ( factor != off_hhk / 4096) |
|
{ |
|
factor = off_hhk / 4096; |
|
size = RetrieveObject (&ui, buffer, factor * 4096, BUF_SIZE); |
|
} |
|
|
|
if ( size && off_hhk ) |
|
m_indexFile = QByteArray("/") + QByteArray((const char*) buffer + off_hhk % 4096); |
|
} |
|
} |
|
return true; |
|
} |
|
|
|
|
|
|
|
bool EBook_CHM::getInfoFromSystem() |
|
{ |
|
unsigned char buffer[BUF_SIZE]; |
|
chmUnitInfo ui; |
|
|
|
int index = 0; |
|
unsigned char* cursor = nullptr, *p; |
|
unsigned short value = 0; |
|
long size = 0; |
|
|
|
// Run the first loop to detect the encoding. We need this, because title could be |
|
// already encoded in user encoding. Same for file names |
|
if ( !ResolveObject ("/#SYSTEM", &ui) ) |
|
return false; |
|
|
|
// Can we pull BUFF_SIZE bytes of the #SYSTEM file? |
|
if ( (size = RetrieveObject (&ui, buffer, 4, BUF_SIZE)) == 0 ) |
|
return false; |
|
|
|
buffer[size - 1] = 0; |
|
|
|
// First loop to detect the encoding |
|
for ( index = 0; index < (size - 1 - (long)sizeof(unsigned short)) ;) |
|
{ |
|
cursor = buffer + index; |
|
value = UINT16ARRAY(cursor); |
|
|
|
switch(value) |
|
{ |
|
case 0: |
|
index += 2; |
|
cursor = buffer + index; |
|
|
|
if(m_topicsFile.isEmpty()) |
|
m_topicsFile = QByteArray("/") + QByteArray((const char*) buffer + index + 2); |
|
|
|
break; |
|
|
|
case 1: |
|
index += 2; |
|
cursor = buffer + index; |
|
|
|
if(m_indexFile.isEmpty()) |
|
m_indexFile = QByteArray("/") + QByteArray((const char*)buffer + index + 2); |
|
break; |
|
|
|
case 2: |
|
index += 2; |
|
cursor = buffer + index; |
|
|
|
if(m_home.isEmpty() || m_home == "/") |
|
m_home = QByteArray("/") + QByteArray((const char*) buffer + index + 2); |
|
break; |
|
|
|
case 3: |
|
index += 2; |
|
cursor = buffer + index; |
|
m_title = QByteArray( (const char*) (buffer + index + 2) ); |
|
break; |
|
|
|
case 4: |
|
index += 2; |
|
cursor = buffer + index; |
|
|
|
p = buffer + index + 2; |
|
m_detectedLCID = (short) (p[0] | (p[1]<<8)); |
|
|
|
break; |
|
|
|
case 6: |
|
index += 2; |
|
cursor = buffer + index; |
|
|
|
if ( m_topicsFile.isEmpty() ) |
|
{ |
|
QString topicAttempt = "/", tmp; |
|
topicAttempt += QString ((const char*) buffer +index +2); |
|
|
|
tmp = topicAttempt + ".hhc"; |
|
|
|
if ( ResolveObject( tmp, &ui) ) |
|
m_topicsFile = qPrintable( tmp ); |
|
|
|
tmp = topicAttempt + ".hhk"; |
|
|
|
if ( ResolveObject( tmp, &ui) ) |
|
m_indexFile = qPrintable( tmp ); |
|
} |
|
break; |
|
|
|
case 16: |
|
index += 2; |
|
cursor = buffer + index; |
|
|
|
m_font = QString ((const char*) buffer + index + 2); |
|
break; |
|
|
|
default: |
|
index += 2; |
|
cursor = buffer + index; |
|
} |
|
|
|
value = UINT16ARRAY(cursor); |
|
index += value + 2; |
|
} |
|
|
|
return true; |
|
} |
|
|
|
QString EBook_CHM::getTopicByUrl( const QUrl& url ) |
|
{ |
|
QMap< QUrl, QString >::const_iterator it = m_url2topics.constFind( url ); |
|
|
|
if ( it == m_url2topics.constEnd() ) |
|
return QString(); |
|
|
|
return it.value(); |
|
} |
|
|
|
|
|
static int chm_enumerator_callback( struct chmFile*, struct chmUnitInfo *ui, void *context ) |
|
{ |
|
EBook_CHM tmp; |
|
((QList<QUrl> *) context)->push_back( tmp.pathToUrl( ui->path ) ); |
|
return CHM_ENUMERATOR_CONTINUE; |
|
} |
|
|
|
bool EBook_CHM::enumerateFiles(QList<QUrl> &files ) |
|
{ |
|
files.clear(); |
|
return chm_enumerate( m_chmFile, CHM_ENUMERATE_ALL, chm_enumerator_callback, &files ); |
|
} |
|
|
|
QString EBook_CHM::currentEncoding() const |
|
{ |
|
return m_currentEncoding; |
|
} |
|
|
|
bool EBook_CHM::setCurrentEncoding( const char * encoding ) |
|
{ |
|
m_currentEncoding = encoding; |
|
return changeFileEncoding( encoding ); |
|
} |
|
|
|
bool EBook_CHM::isSupportedUrl(const QUrl &url) |
|
{ |
|
return url.scheme() == URL_SCHEME_CHM; |
|
} |
|
|
|
bool EBook_CHM::guessTextEncoding() |
|
{ |
|
if ( !m_detectedLCID ) |
|
{ |
|
qWarning ("Could not detect LCID"); |
|
return false; |
|
} |
|
|
|
QString enc = Ebook_CHM_Encoding::guessByLCID( m_detectedLCID ); |
|
|
|
if ( changeFileEncoding ( enc ) ) |
|
{ |
|
m_currentEncoding = enc; |
|
return true; |
|
} |
|
|
|
return false; |
|
} |
|
|
|
bool EBook_CHM::changeFileEncoding( const QString& qtencoding ) |
|
{ |
|
// Encoding could be either simple Qt codepage, or set like CP1251/KOI8, which allows to |
|
// set up encodings separately for text (first) and internal files (second) |
|
int p = qtencoding.indexOf( '/' ); |
|
|
|
if ( p != -1 ) |
|
{ |
|
QString global = qtencoding.left( p ); |
|
QString special = qtencoding.mid( p + 1 ); |
|
|
|
m_textCodec = QTextCodec::codecForName( global.toUtf8() ); |
|
|
|
if ( !m_textCodec ) |
|
{ |
|
qWarning( "Could not set up Text Codec for encoding '%s'", qPrintable( global ) ); |
|
return false; |
|
} |
|
|
|
m_textCodecForSpecialFiles = QTextCodec::codecForName( special.toUtf8() ); |
|
|
|
if ( !m_textCodecForSpecialFiles ) |
|
{ |
|
qWarning( "Could not set up Text Codec for encoding '%s'", qPrintable( special ) ); |
|
return false; |
|
} |
|
} |
|
else |
|
{ |
|
m_textCodecForSpecialFiles = m_textCodec = QTextCodec::codecForName( qtencoding.toUtf8() ); |
|
|
|
if ( !m_textCodec ) |
|
{ |
|
qWarning( "Could not set up Text Codec for encoding '%s'", qPrintable( qtencoding ) ); |
|
return false; |
|
} |
|
} |
|
|
|
m_htmlEntityDecoder.changeEncoding( m_textCodec ); |
|
return true; |
|
} |
|
|
|
|
|
void EBook_CHM::fillTopicsUrlMap() |
|
{ |
|
if ( !m_lookupTablesValid ) |
|
return; |
|
|
|
// Read those tables |
|
QVector<unsigned char> topics( m_chmTOPICS.length ), urltbl( m_chmURLTBL.length ), urlstr( m_chmURLSTR.length ), strings( m_chmSTRINGS.length ); |
|
|
|
if ( !RetrieveObject( &m_chmTOPICS, (unsigned char*) topics.data(), 0, m_chmTOPICS.length ) |
|
|| !RetrieveObject( &m_chmURLTBL, (unsigned char*) urltbl.data(), 0, m_chmURLTBL.length ) |
|
|| !RetrieveObject( &m_chmURLSTR, (unsigned char*) urlstr.data(), 0, m_chmURLSTR.length ) |
|
|| !RetrieveObject( &m_chmSTRINGS, (unsigned char*) strings.data(), 0, m_chmSTRINGS.length ) ) |
|
return; |
|
|
|
for ( LONGUINT64 i = 0; i < m_chmTOPICS.length; i += TOPICS_ENTRY_LEN ) |
|
{ |
|
unsigned int off_title = get_int32_le( reinterpret_cast<unsigned int *>(topics.data() + i + 4) ); |
|
unsigned int off_url = get_int32_le( reinterpret_cast<unsigned int *>(topics.data() + i + 8) ); |
|
off_url = get_int32_le( reinterpret_cast<unsigned int *>( urltbl.data() + off_url + 8) ) + 8; |
|
|
|
QUrl url = pathToUrl( (const char*) urlstr.data() + off_url ); |
|
|
|
if ( off_title < (unsigned int)strings.size() ) |
|
m_url2topics[url] = encodeWithCurrentCodec ( (const char*) strings.data() + off_title ); |
|
else |
|
m_url2topics[url] = "Untitled"; |
|
} |
|
} |
|
|
|
|
|
bool EBook_CHM::parseBinaryTOC( QList< EBookTocEntry >& toc ) const |
|
{ |
|
if ( !m_lookupTablesValid ) |
|
return false; |
|
|
|
QByteArray tocidx, topics, urltbl, urlstr, strings; |
|
|
|
// Read the index tables |
|
if ( !getBinaryContent( tocidx, "/#TOCIDX" ) |
|
|| !getBinaryContent( topics, "/#TOPICS" ) |
|
|| !getBinaryContent( urltbl, "/#URLTBL" ) |
|
|| !getBinaryContent( urlstr, "/#URLSTR" ) |
|
|| !getBinaryContent( strings, "/#STRINGS" ) ) |
|
return false; |
|
|
|
// Shamelessly stolen from xchm |
|
if ( !RecurseLoadBTOC( tocidx, topics, urltbl, urlstr, strings, UINT32ARRAY( tocidx.data() ), toc, 0 ) ) |
|
{ |
|
qWarning("Failed to parse binary TOC, fallback to text-based TOC"); |
|
toc.clear(); |
|
return false; |
|
} |
|
|
|
return true; |
|
} |
|
|
|
|
|
// |
|
// This piece of code was based on the one in xchm written by Razvan Cojocaru <razvanco@gmx.net> |
|
// |
|
bool EBook_CHM::RecurseLoadBTOC( const QByteArray& tocidx, |
|
const QByteArray& topics, |
|
const QByteArray& urltbl, |
|
const QByteArray& urlstr, |
|
const QByteArray& strings, |
|
int offset, |
|
QList< EBookTocEntry >& entries, |
|
int level ) const |
|
{ |
|
while ( offset ) |
|
{ |
|
// If this is end of TOCIDX, return. |
|
if ( tocidx.size() < offset + 20 ) |
|
return true; |
|
|
|
unsigned int flags = UINT32ARRAY( tocidx.data() + offset + 4 ); |
|
int index = UINT32ARRAY( tocidx.data() + offset + 8 ); |
|
|
|
if ( (flags & 0x04) || (flags & 0x08)) |
|
{ |
|
QString name, value; |
|
|
|
if ( (flags & 0x08) == 0 ) |
|
{ |
|
if ( strings.size() < index + 1 ) |
|
{ |
|
qWarning("EBook_CHM::RecurseLoadBTOC: invalid name index (%d) for book TOC entry!", index ); |
|
return false; |
|
} |
|
|
|
name = encodeWithCurrentCodec( strings.data() + index); |
|
} |
|
else |
|
{ |
|
if ( topics.size() < (index * 16) + 12 ) |
|
{ |
|
qWarning("EBook_CHM::RecurseLoadBTOC: invalid name index (%d) for local TOC entry!", index ); |
|
return false; |
|
} |
|
|
|
int tocoffset = (int) UINT32ARRAY(topics.data()+ (index * 16) + 4); |
|
|
|
if ( strings.size() < tocoffset + 1 ) |
|
{ |
|
qWarning("EBook_CHM::RecurseLoadBTOC: invalid name tocoffset (%d) for TOC entry!", tocoffset ); |
|
return false; |
|
} |
|
|
|
if ( tocoffset < 0 ) |
|
name.clear(); |
|
else |
|
name = encodeWithCurrentCodec( strings.data() + tocoffset ); |
|
|
|
// #URLTBL index |
|
tocoffset = (int) UINT32ARRAY( topics.data() + (index * 16) + 8 ); |
|
|
|
if ( tocoffset < 0 || urltbl.size() < tocoffset + 12 ) |
|
{ |
|
qWarning("EBook_CHM::RecurseLoadBTOC: invalid url index (%d) for TOC entry!", tocoffset ); |
|
return false; |
|
} |
|
|
|
tocoffset = (int) UINT32ARRAY(urltbl.data() + tocoffset + 8); |
|
|
|
if ( tocoffset < 0 || urlstr.size() < tocoffset ) |
|
{ |
|
qWarning("EBook_CHM::RecurseLoadBTOC: invalid url offset (%d) for TOC entry!", tocoffset ); |
|
return false; |
|
} |
|
|
|
value = encodeWithCurrentCodec( urlstr.data() + tocoffset + 8 ); |
|
} |
|
|
|
EBookTocEntry entry; |
|
entry.name = name.trimmed(); |
|
|
|
if ( !entry.name.isEmpty() ) |
|
{ |
|
if ( !value.isEmpty() ) |
|
entry.url = pathToUrl( value ); |
|
|
|
entry.iconid = EBookTocEntry::IMAGE_AUTO; |
|
entry.indent = level; |
|
entries.push_back( entry ); |
|
} |
|
} |
|
|
|
if ( flags & 0x04 ) |
|
{ |
|
// book |
|
if ( tocidx.size() < offset + 24 ) |
|
{ |
|
qWarning("EBook_CHM::RecurseLoadBTOC: invalid child entry offset (%d)", offset ); |
|
return false; |
|
} |
|
|
|
unsigned int childoffset = UINT32ARRAY( tocidx.data() + offset + 20 ); |
|
|
|
if ( childoffset ) |
|
{ |
|
if ( !RecurseLoadBTOC( tocidx, topics, urltbl, urlstr, strings, childoffset, entries, level + 1 ) ) |
|
return false; |
|
} |
|
} |
|
|
|
offset = UINT32ARRAY( tocidx.data() + offset + 0x10 ); |
|
} |
|
|
|
return true; |
|
} |
|
|
|
bool EBook_CHM::hasOption(const QString & name) const |
|
{ |
|
if ( !m_envOptions.isEmpty() && m_envOptions.contains( name ) ) |
|
return true; |
|
|
|
return false; |
|
} |
|
|
|
QUrl EBook_CHM::pathToUrl(const QString &link) const |
|
{ |
|
if ( link.startsWith( "http://" ) || link.startsWith( "https://" ) ) |
|
return QUrl( link ); |
|
|
|
QUrl url; |
|
url.setScheme( URL_SCHEME_CHM ); |
|
url.setHost( URL_SCHEME_CHM ); |
|
|
|
// Does the link contain the fragment as well? |
|
int off = link.indexOf( '#' ); |
|
QString path; |
|
|
|
if ( off != -1 ) |
|
{ |
|
path = link.left( off ); |
|
url.setFragment( link.mid( off + 1 ) ); |
|
} |
|
else |
|
path = link; |
|
|
|
if ( !path.startsWith( '/' ) ) |
|
path.prepend( '/' ); |
|
|
|
url.setPath( QUrl::fromPercentEncoding( path.toUtf8() ) ); |
|
return url; |
|
} |
|
|
|
QString EBook_CHM::urlToPath(const QUrl &link) const |
|
{ |
|
if ( link.scheme() == URL_SCHEME_CHM ) |
|
{ |
|
if ( link.path() == "/" || link.path().isEmpty() ) |
|
return m_home; |
|
|
|
return link.path(); |
|
} |
|
|
|
return ""; |
|
} |
|
|
|
|
|
EBook_CHM::ParsedEntry::ParsedEntry() |
|
{ |
|
iconid = 0; |
|
indent = 0; |
|
}
|
|
|