You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

1113 lines
28 KiB

/*
* Kchmviewer - a CHM and EPUB file viewer with broad language support
* Copyright (C) 2004-2014 George Yunaev, gyunaev@ulduzsoft.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <QFile>
#include <QVector>
#include <QDebug>
#include "ebook_chm.h"
#include "ebook_chm_encoding.h"
#include "helper_entitydecoder.h"
#include "bitfiddle.h"
// Big-enough buffer size for use with various routines.
#define BUF_SIZE 4096
#define COMMON_BUF_LEN 1025
#define TOPICS_ENTRY_LEN 16
#define URLTBL_ENTRY_LEN 12
//#define DEBUGPARSER(A) qDebug A
#define DEBUGPARSER(A)
static const char * URL_SCHEME_CHM = "ms-its";
EBook_CHM::EBook_CHM()
: EBook()
{
m_envOptions = qgetenv("KCHMVIEWEROPTS");
m_chmFile = nullptr;
m_filename = m_font = QString();
m_textCodec = nullptr;
m_textCodecForSpecialFiles = nullptr;
m_detectedLCID = 0;
m_currentEncoding = "UTF-8";
m_htmlEntityDecoder = nullptr;
}
EBook_CHM::~EBook_CHM()
{
close();
}
void EBook_CHM::close()
{
if ( m_chmFile == nullptr )
return;
chm_close( m_chmFile );
m_chmFile = nullptr;
m_filename = m_font = QString();
m_home.clear();
m_topicsFile.clear();
m_indexFile.clear();
m_textCodec = nullptr;
m_textCodecForSpecialFiles = nullptr;
m_detectedLCID = 0;
m_currentEncoding = "UTF-8";
}
QString EBook_CHM::title() const
{
return encodeWithCurrentCodec( m_title );
}
QUrl EBook_CHM::homeUrl() const
{
return pathToUrl( m_home );
}
bool EBook_CHM::hasFeature(EBook::Feature code) const
{
switch ( code )
{
case FEATURE_TOC:
return m_tocAvailable;
case FEATURE_INDEX:
return m_indexAvailable;
case FEATURE_ENCODING:
return true;
}
return false;
}
bool EBook_CHM::getTableOfContents( QList<EBookTocEntry> &toc ) const
{
if ( parseBinaryTOC( toc ) )
return true;
// Parse the plain text TOC
QList< ParsedEntry > parsed;
if ( !parseFileAndFillArray( m_topicsFile, parsed, false ) )
return false;
// Find out the root offset, and reduce the indent level to it
// so the toc starts from zero offset.
int root_offset = -1;
// Fill up the real toc
toc.reserve( parsed.size() );
for ( const ParsedEntry &e : qAsConst(parsed) )
{
if ( root_offset == -1 )
root_offset = e.indent;
EBookTocEntry entry;
entry.iconid = (EBookTocEntry::Icon) e.iconid;
entry.indent = e.indent - root_offset;
entry.name = e.name;
if ( !e.urls.empty() )
entry.url = e.urls[0];
toc.append( entry );
}
return true;
}
bool EBook_CHM::getIndex(QList<EBookIndexEntry> &index) const
{
// Parse the plain text index
QList< ParsedEntry > parsed;
if ( !parseFileAndFillArray( m_indexFile, parsed, true ) )
return false;
// Find out the root offset, and reduce the indent level to it
// so the index starts from zero offset.
int root_offset = 0;
// Fill up the real index
index.reserve( parsed.size() );
// Find the index root offset
const QList< ParsedEntry > &parsedList = parsed;
for ( const ParsedEntry &e : parsedList )
{
if ( e.urls.empty() )
continue;
root_offset = qMin( root_offset, e.indent );
}
// And apply the index
for( const ParsedEntry &e : parsedList )
{
if ( e.urls.empty() )
continue;
EBookIndexEntry entry;
entry.name = e.name;
entry.urls = e.urls;
entry.seealso = e.seealso;
// If the index array is empty, make sure the first entry is on root offset
if ( index.isEmpty() )
entry.indent = root_offset;
else
entry.indent = e.indent - root_offset;
index.append( entry );
printf("%d: %s\n", entry.indent, qPrintable(entry.name));
}
return true;
}
bool EBook_CHM::getFileContentAsString( QString &str, const QUrl &url ) const
{
return getTextContent( str, urlToPath( url ) );
}
bool EBook_CHM::getFileContentAsBinary( QByteArray &data, const QUrl &url ) const
{
return getBinaryContent( data, urlToPath(url) );
}
bool EBook_CHM::getBinaryContent( QByteArray &data, const QString &url ) const
{
chmUnitInfo ui;
if( !ResolveObject( url, &ui ) )
return false;
data.resize( ui.length );
if ( RetrieveObject( &ui, (unsigned char*) data.data(), 0, ui.length ) )
return true;
return false;
}
bool EBook_CHM::getTextContent( QString& str, const QString& url, bool internal_encoding ) const
{
QByteArray buf;
if ( getBinaryContent( buf, url ) )
{
unsigned int length = buf.size();
if ( length > 0 )
{
buf.resize( length + 1 );
buf [length] = '\0';
str = internal_encoding ? (QString)( buf.constData() ) : encodeWithCurrentCodec( buf.constData() );
return true;
}
}
return false;
}
int EBook_CHM::getContentSize(const QString &url)
{
chmUnitInfo ui;
if( !ResolveObject( url, &ui ) )
return -1;
return ui.length;
}
bool EBook_CHM::load(const QString &archiveName)
{
QString filename;
// If the file has a file:// prefix, remove it
if ( archiveName.startsWith( "file://" ) )
filename = archiveName.mid( 7 ); // strip it
else
filename = archiveName;
if( m_chmFile )
close();
#if defined (WIN32)
// chm_open on Windows OS uses the following prototype:
// struct chmFile* chm_open(BSTR filename);
//
// however internally it simply passes the filename
// directly to CreateFileW function without any conversion.
// Thus we need to pass it as WCHAR * and not BSTR.
m_chmFile = chm_open( (BSTR) filename.toStdWString().c_str() );
#else
m_chmFile = chm_open( QFile::encodeName(filename) );
#endif
if ( m_chmFile == nullptr )
return false;
m_filename = filename;
// Reset encoding
m_textCodec = nullptr;
m_textCodecForSpecialFiles = nullptr;
m_currentEncoding = "UTF-8";
// Get information from /#WINDOWS and /#SYSTEM files (encoding, title, context file and so)
// and guess the encoding
getInfoFromWindows();
getInfoFromSystem();
guessTextEncoding();
// Check whether the search tables are present
if ( ResolveObject("/#TOPICS", &m_chmTOPICS)
&& ResolveObject("/#STRINGS", &m_chmSTRINGS)
&& ResolveObject("/#URLTBL", &m_chmURLTBL)
&& ResolveObject("/#URLSTR", &m_chmURLSTR) )
{
m_lookupTablesValid = true;
fillTopicsUrlMap();
}
else
m_lookupTablesValid = false;
// Some CHM files have toc and index files, but do not set the name properly.
// Some heuristics here.
if ( m_topicsFile.isEmpty() && hasFile( "/toc.hhc" ) )
m_topicsFile = "/toc.hhc";
if ( m_indexFile.isEmpty() && hasFile( "/index.hhk" ) )
m_indexFile = "/index.hhk";
if ( !m_topicsFile.isEmpty() || ( m_lookupTablesValid && hasFile( "/#TOCIDX" ) ) )
m_tocAvailable = true;
else
m_tocAvailable = false;
if ( !m_indexFile.isEmpty() || ( m_lookupTablesValid && hasFile( "/$WWKeywordLinks/BTree" ) ) )
m_indexAvailable = true;
else
m_indexAvailable = false;
return true;
}
int EBook_CHM::findStringInQuotes (const QString& tag, int offset, QString& value, bool firstquote, bool decodeentities) const
{
int qbegin = tag.indexOf ('"', offset);
if ( qbegin == -1 )
qFatal ("EBook_CHMImpl::findStringInQuotes: cannot find first quote in <param> tag: '%s'", qPrintable( tag ));
int qend = firstquote ? tag.indexOf ('"', qbegin + 1) : tag.lastIndexOf ('"');
if ( qend == -1 || qend <= qbegin )
qFatal ("EBook_CHMImpl::findStringInQuotes: cannot find last quote in <param> tag: '%s'", qPrintable( tag ));
// If we do not need to decode HTML entities, just return.
if ( decodeentities )
{
QString htmlentity = QString();
bool fill_entity = false;
value.reserve (qend - qbegin); // to avoid multiple memory allocations
for ( int i = qbegin + 1; i < qend; i++ )
{
if ( !fill_entity )
{
if ( tag[i] == '&' ) // HTML entity starts
fill_entity = true;
else
value.append (tag[i]);
}
else
{
if ( tag[i] == ';' ) // HTML entity ends
{
// If entity is an ASCII code, just decode it
QString decode = m_htmlEntityDecoder.decode( htmlentity );
if ( decode.isNull() )
break;
value.append ( decode );
htmlentity = QString();
fill_entity = false;
}
else
htmlentity.append (tag[i]);
}
}
}
else
value = tag.mid (qbegin + 1, qend - qbegin - 1);
return qend + 1;
}
bool EBook_CHM::parseFileAndFillArray( const QString& file, QList< ParsedEntry >& data, bool asIndex ) const
{
QString src;
const int MAX_NEST_DEPTH = 256;
if ( !getTextContent( src, file ) || src.isEmpty() )
return false;
/*
// Save the index for debugging purposes
QFile outfile( "parsed.htm" );
if ( outfile.open( QIODevice::WriteOnly ) )
{
QTextStream textstream( &outfile );
textstream << src;
outfile.close();
}
*/
EBookTocEntry::Icon defaultimagenum = EBookTocEntry::IMAGE_AUTO;
int pos = 0, indent = 0, root_indent_offset = 0;
bool in_object = false, root_indent_offset_set = false;
ParsedEntry entry;
entry.iconid = defaultimagenum;
// Split the HHC file by HTML tags
int stringlen = src.length();
while ( pos < stringlen && (pos = src.indexOf ('<', pos)) != -1 )
{
int i, word_end = 0;
for ( i = ++pos; i < stringlen; i++ )
{
// If a " or ' is found, skip to the next one.
if ( (src[i] == '"' || src[i] == '\'') )
{
// find where quote ends, either by another quote, or by '>' symbol (some people don't know HTML)
int nextpos = src.indexOf (src[i], i+1);
if ( nextpos == -1 && (nextpos = src.indexOf ('>', i+1)) == -1 )
{
qWarning ("EBook_CHMImpl::ParseHhcAndFillTree: corrupted TOC: %s", qPrintable( src.mid(i) ));
return false;
}
i = nextpos;
}
else if ( src[i] == '>' )
break;
else if ( !src[i].isLetterOrNumber() && src[i] != '/' && !word_end )
word_end = i;
}
QString tagword, tag = src.mid (pos, i - pos);
if ( word_end )
tagword = src.mid (pos, word_end - pos).toLower();
else
tagword = tag.toLower();
//DEBUGPARSER(("tag: '%s', tagword: '%s'\n", qPrintable( tag ), qPrintable( tagword ) ));
// <OBJECT type="text/sitemap"> - a topic entry
if ( tagword == "object" && tag.indexOf ("text/sitemap", 0, Qt::CaseInsensitive ) != -1 )
in_object = true;
else if ( tagword == "/object" && in_object )
{
// a topic entry closed. Add a tree item
if ( entry.name.isEmpty() && entry.urls.isEmpty() )
{
qWarning ("EBook_CHMImpl::ParseAndFillTopicsTree: <object> tag is parsed, but both name and url are empty.");
}
else
{
// If the name is empty, use the URL as name
if ( entry.name.isEmpty() )
entry.name = entry.urls[0].toString();
if ( !root_indent_offset_set )
{
root_indent_offset_set = true;
root_indent_offset = indent;
if ( root_indent_offset > 1 )
qWarning("CHM has improper index; root indent offset is %d", root_indent_offset);
}
// Trim the entry name
entry.name = entry.name.trimmed();
int real_indent = indent - root_indent_offset;
entry.indent = real_indent;
data.push_back( entry );
}
entry.name = QString();
entry.urls.clear();
entry.iconid = defaultimagenum;
entry.seealso.clear();
in_object = false;
}
else if ( tagword == "param" && in_object )
{
// <param name="Name" value="First Page">
int offset; // strlen("param ")
QString name_pattern = "name=", value_pattern = "value=";
QString pname, pvalue;
if ( (offset = tag.indexOf (name_pattern, 0, Qt::CaseInsensitive )) == -1 )
qFatal ("EBook_CHMImpl::ParseAndFillTopicsTree: bad <param> tag '%s': no name=\n", qPrintable( tag ));
// offset+5 skips 'name='
offset = findStringInQuotes (tag, offset + name_pattern.length(), pname, true, false);
pname = pname.toLower();
if ( (offset = tag.indexOf(value_pattern, offset, Qt::CaseInsensitive )) == -1 )
qFatal ("EBook_CHMImpl::ParseAndFillTopicsTree: bad <param> tag '%s': no value=\n", qPrintable( tag ));
// offset+6 skips 'value='
findStringInQuotes (tag, offset + value_pattern.length(), pvalue, false, true);
//DEBUGPARSER(("<param>: name '%s', value '%s'", qPrintable( pname ), qPrintable( pvalue )));
if ( pname == "name" || pname == "keyword" )
{
// Some help files contain duplicate names, where the second name is empty. Work it around by keeping the first one
if ( !pvalue.isEmpty() )
entry.name = pvalue;
}
else if ( pname == "merge" )
{
// MERGE implementation is experimental
QUrl mergeurl = pathToUrl( pvalue );
QString mergecontent;
if ( getFileContentAsString( mergecontent, mergeurl ) && !mergecontent.isEmpty() )
{
qWarning( "MERGE is used in index; the implementation is experimental. Please let me know if it works" );
// Merge the read value into the current parsed file.
// To save memory it is done in a kinda hacky way:
src = mergecontent + src.mid( i );
pos = 0;
stringlen = src.length();
}
else
qWarning( "MERGE is used in index but file %s was not found in CHM archive", qPrintable(pvalue) );
}
else if ( pname == "local" )
{
// Check for URL duplication
QUrl url = pathToUrl( pvalue );
if ( !entry.urls.contains( url ) )
entry.urls.push_back( url );
}
else if ( pname == "see also" && asIndex && entry.name != pvalue )
{
entry.urls.push_back( QUrl("seealso") );
entry.seealso = pvalue;
}
else if ( pname == "imagenumber" )
{
bool bok;
int imgnum = pvalue.toInt (&bok);
if ( bok && imgnum >= 0 && imgnum < EBookTocEntry::MAX_BUILTIN_ICONS )
entry.iconid = (EBookTocEntry::Icon) imgnum;
}
}
else if ( tagword == "ul" ) // increase indent level
{
// Fix for buggy help files
if ( ++indent >= MAX_NEST_DEPTH )
qFatal("EBook_CHMImpl::ParseAndFillTopicsTree: max nest depth (%d) is reached, error in help file", MAX_NEST_DEPTH);
DEBUGPARSER(("<ul>: new intent is %d\n", indent - root_indent_offset));
}
else if ( tagword == "/ul" ) // decrease indent level
{
if ( --indent < root_indent_offset )
indent = root_indent_offset;
DEBUGPARSER(("</ul>: new intent is %d\n", indent - root_indent_offset));
}
pos = i;
}
// Dump our array
// for ( int i = 0; i < data.size(); i++ )
// qDebug() << data[i].indent << data[i].name << data[i].urls;
return true;
}
bool EBook_CHM::ResolveObject(const QString& fileName, chmUnitInfo *ui) const
{
return m_chmFile != nullptr
&& ::chm_resolve_object(m_chmFile, qPrintable( fileName ), ui) ==
CHM_RESOLVE_SUCCESS;
}
bool EBook_CHM::hasFile(const QString & fileName) const
{
chmUnitInfo ui;
return m_chmFile != nullptr
&& ::chm_resolve_object(m_chmFile, qPrintable( fileName ), &ui) ==
CHM_RESOLVE_SUCCESS;
}
size_t EBook_CHM::RetrieveObject(const chmUnitInfo *ui, unsigned char *buffer,
LONGUINT64 fileOffset, LONGINT64 bufferSize) const
{
return ::chm_retrieve_object(m_chmFile, const_cast<chmUnitInfo*>(ui),
buffer, fileOffset, bufferSize);
}
bool EBook_CHM::getInfoFromWindows()
{
#define WIN_HEADER_LEN 0x08
unsigned char buffer[BUF_SIZE];
unsigned int factor;
chmUnitInfo ui;
long size = 0;
if ( ResolveObject("/#WINDOWS", &ui) )
{
if ( !RetrieveObject(&ui, buffer, 0, WIN_HEADER_LEN) )
return false;
unsigned int entries = get_int32_le( reinterpret_cast<unsigned int *>(buffer) );
unsigned int entry_size = get_int32_le( reinterpret_cast<unsigned int *>(buffer + 0x04) );
QVector<unsigned char> uptr(entries * entry_size);
unsigned char* raw = (unsigned char*) uptr.data();
if ( !RetrieveObject (&ui, raw, 8, entries * entry_size) )
return false;
if( !ResolveObject ("/#STRINGS", &ui) )
return false;
for ( unsigned int i = 0; i < entries; ++i )
{
unsigned int offset = i * entry_size;
unsigned int off_title = get_int32_le( reinterpret_cast<unsigned int *>(raw + offset + 0x14) );
unsigned int off_home = get_int32_le( reinterpret_cast<unsigned int *>(raw + offset + 0x68) );
unsigned int off_hhc = get_int32_le( reinterpret_cast<unsigned int *>(raw + offset + 0x60) );
unsigned int off_hhk = get_int32_le( reinterpret_cast<unsigned int *>(raw + offset + 0x64) );
factor = off_title / 4096;
if ( size == 0 )
size = RetrieveObject(&ui, buffer, factor * 4096, BUF_SIZE);
if ( size && off_title )
m_title = QByteArray( (const char*) (buffer + off_title % 4096) );
if ( factor != off_home / 4096)
{
factor = off_home / 4096;
size = RetrieveObject (&ui, buffer, factor * 4096, BUF_SIZE);
}
if ( size && off_home )
m_home = QByteArray("/") + QByteArray( (const char*) buffer + off_home % 4096);
if ( factor != off_hhc / 4096)
{
factor = off_hhc / 4096;
size = RetrieveObject(&ui, buffer, factor * 4096, BUF_SIZE);
}
if ( size && off_hhc )
m_topicsFile = QByteArray("/") + QByteArray((const char*) buffer + off_hhc % 4096);
if ( factor != off_hhk / 4096)
{
factor = off_hhk / 4096;
size = RetrieveObject (&ui, buffer, factor * 4096, BUF_SIZE);
}
if ( size && off_hhk )
m_indexFile = QByteArray("/") + QByteArray((const char*) buffer + off_hhk % 4096);
}
}
return true;
}
bool EBook_CHM::getInfoFromSystem()
{
unsigned char buffer[BUF_SIZE];
chmUnitInfo ui;
int index = 0;
unsigned char* cursor = nullptr, *p;
unsigned short value = 0;
long size = 0;
// Run the first loop to detect the encoding. We need this, because title could be
// already encoded in user encoding. Same for file names
if ( !ResolveObject ("/#SYSTEM", &ui) )
return false;
// Can we pull BUFF_SIZE bytes of the #SYSTEM file?
if ( (size = RetrieveObject (&ui, buffer, 4, BUF_SIZE)) == 0 )
return false;
buffer[size - 1] = 0;
// First loop to detect the encoding
for ( index = 0; index < (size - 1 - (long)sizeof(unsigned short)) ;)
{
cursor = buffer + index;
value = UINT16ARRAY(cursor);
switch(value)
{
case 0:
index += 2;
cursor = buffer + index;
if(m_topicsFile.isEmpty())
m_topicsFile = QByteArray("/") + QByteArray((const char*) buffer + index + 2);
break;
case 1:
index += 2;
cursor = buffer + index;
if(m_indexFile.isEmpty())
m_indexFile = QByteArray("/") + QByteArray((const char*)buffer + index + 2);
break;
case 2:
index += 2;
cursor = buffer + index;
if(m_home.isEmpty() || m_home == "/")
m_home = QByteArray("/") + QByteArray((const char*) buffer + index + 2);
break;
case 3:
index += 2;
cursor = buffer + index;
m_title = QByteArray( (const char*) (buffer + index + 2) );
break;
case 4:
index += 2;
cursor = buffer + index;
p = buffer + index + 2;
m_detectedLCID = (short) (p[0] | (p[1]<<8));
break;
case 6:
index += 2;
cursor = buffer + index;
if ( m_topicsFile.isEmpty() )
{
QString topicAttempt = "/", tmp;
topicAttempt += QString ((const char*) buffer +index +2);
tmp = topicAttempt + ".hhc";
if ( ResolveObject( tmp, &ui) )
m_topicsFile = qPrintable( tmp );
tmp = topicAttempt + ".hhk";
if ( ResolveObject( tmp, &ui) )
m_indexFile = qPrintable( tmp );
}
break;
case 16:
index += 2;
cursor = buffer + index;
m_font = QString ((const char*) buffer + index + 2);
break;
default:
index += 2;
cursor = buffer + index;
}
value = UINT16ARRAY(cursor);
index += value + 2;
}
return true;
}
QString EBook_CHM::getTopicByUrl( const QUrl& url )
{
QMap< QUrl, QString >::const_iterator it = m_url2topics.constFind( url );
if ( it == m_url2topics.constEnd() )
return QString();
return it.value();
}
static int chm_enumerator_callback( struct chmFile*, struct chmUnitInfo *ui, void *context )
{
EBook_CHM tmp;
((QList<QUrl> *) context)->push_back( tmp.pathToUrl( ui->path ) );
return CHM_ENUMERATOR_CONTINUE;
}
bool EBook_CHM::enumerateFiles(QList<QUrl> &files )
{
files.clear();
return chm_enumerate( m_chmFile, CHM_ENUMERATE_ALL, chm_enumerator_callback, &files );
}
QString EBook_CHM::currentEncoding() const
{
return m_currentEncoding;
}
bool EBook_CHM::setCurrentEncoding( const char * encoding )
{
m_currentEncoding = encoding;
return changeFileEncoding( encoding );
}
bool EBook_CHM::isSupportedUrl(const QUrl &url)
{
return url.scheme() == URL_SCHEME_CHM;
}
bool EBook_CHM::guessTextEncoding()
{
if ( !m_detectedLCID )
{
qWarning ("Could not detect LCID");
return false;
}
QString enc = Ebook_CHM_Encoding::guessByLCID( m_detectedLCID );
if ( changeFileEncoding ( enc ) )
{
m_currentEncoding = enc;
return true;
}
return false;
}
bool EBook_CHM::changeFileEncoding( const QString& qtencoding )
{
// Encoding could be either simple Qt codepage, or set like CP1251/KOI8, which allows to
// set up encodings separately for text (first) and internal files (second)
int p = qtencoding.indexOf( '/' );
if ( p != -1 )
{
QString global = qtencoding.left( p );
QString special = qtencoding.mid( p + 1 );
m_textCodec = QTextCodec::codecForName( global.toUtf8() );
if ( !m_textCodec )
{
qWarning( "Could not set up Text Codec for encoding '%s'", qPrintable( global ) );
return false;
}
m_textCodecForSpecialFiles = QTextCodec::codecForName( special.toUtf8() );
if ( !m_textCodecForSpecialFiles )
{
qWarning( "Could not set up Text Codec for encoding '%s'", qPrintable( special ) );
return false;
}
}
else
{
m_textCodecForSpecialFiles = m_textCodec = QTextCodec::codecForName( qtencoding.toUtf8() );
if ( !m_textCodec )
{
qWarning( "Could not set up Text Codec for encoding '%s'", qPrintable( qtencoding ) );
return false;
}
}
m_htmlEntityDecoder.changeEncoding( m_textCodec );
return true;
}
void EBook_CHM::fillTopicsUrlMap()
{
if ( !m_lookupTablesValid )
return;
// Read those tables
QVector<unsigned char> topics( m_chmTOPICS.length ), urltbl( m_chmURLTBL.length ), urlstr( m_chmURLSTR.length ), strings( m_chmSTRINGS.length );
if ( !RetrieveObject( &m_chmTOPICS, (unsigned char*) topics.data(), 0, m_chmTOPICS.length )
|| !RetrieveObject( &m_chmURLTBL, (unsigned char*) urltbl.data(), 0, m_chmURLTBL.length )
|| !RetrieveObject( &m_chmURLSTR, (unsigned char*) urlstr.data(), 0, m_chmURLSTR.length )
|| !RetrieveObject( &m_chmSTRINGS, (unsigned char*) strings.data(), 0, m_chmSTRINGS.length ) )
return;
for ( LONGUINT64 i = 0; i < m_chmTOPICS.length; i += TOPICS_ENTRY_LEN )
{
unsigned int off_title = get_int32_le( reinterpret_cast<unsigned int *>(topics.data() + i + 4) );
unsigned int off_url = get_int32_le( reinterpret_cast<unsigned int *>(topics.data() + i + 8) );
off_url = get_int32_le( reinterpret_cast<unsigned int *>( urltbl.data() + off_url + 8) ) + 8;
QUrl url = pathToUrl( (const char*) urlstr.data() + off_url );
if ( off_title < (unsigned int)strings.size() )
m_url2topics[url] = encodeWithCurrentCodec ( (const char*) strings.data() + off_title );
else
m_url2topics[url] = "Untitled";
}
}
bool EBook_CHM::parseBinaryTOC( QList< EBookTocEntry >& toc ) const
{
if ( !m_lookupTablesValid )
return false;
QByteArray tocidx, topics, urltbl, urlstr, strings;
// Read the index tables
if ( !getBinaryContent( tocidx, "/#TOCIDX" )
|| !getBinaryContent( topics, "/#TOPICS" )
|| !getBinaryContent( urltbl, "/#URLTBL" )
|| !getBinaryContent( urlstr, "/#URLSTR" )
|| !getBinaryContent( strings, "/#STRINGS" ) )
return false;
// Shamelessly stolen from xchm
if ( !RecurseLoadBTOC( tocidx, topics, urltbl, urlstr, strings, UINT32ARRAY( tocidx.data() ), toc, 0 ) )
{
qWarning("Failed to parse binary TOC, fallback to text-based TOC");
toc.clear();
return false;
}
return true;
}
//
// This piece of code was based on the one in xchm written by Razvan Cojocaru <razvanco@gmx.net>
//
bool EBook_CHM::RecurseLoadBTOC( const QByteArray& tocidx,
const QByteArray& topics,
const QByteArray& urltbl,
const QByteArray& urlstr,
const QByteArray& strings,
int offset,
QList< EBookTocEntry >& entries,
int level ) const
{
while ( offset )
{
// If this is end of TOCIDX, return.
if ( tocidx.size() < offset + 20 )
return true;
unsigned int flags = UINT32ARRAY( tocidx.data() + offset + 4 );
int index = UINT32ARRAY( tocidx.data() + offset + 8 );
if ( (flags & 0x04) || (flags & 0x08))
{
QString name, value;
if ( (flags & 0x08) == 0 )
{
if ( strings.size() < index + 1 )
{
qWarning("EBook_CHM::RecurseLoadBTOC: invalid name index (%d) for book TOC entry!", index );
return false;
}
name = encodeWithCurrentCodec( strings.data() + index);
}
else
{
if ( topics.size() < (index * 16) + 12 )
{
qWarning("EBook_CHM::RecurseLoadBTOC: invalid name index (%d) for local TOC entry!", index );
return false;
}
int tocoffset = (int) UINT32ARRAY(topics.data()+ (index * 16) + 4);
if ( strings.size() < tocoffset + 1 )
{
qWarning("EBook_CHM::RecurseLoadBTOC: invalid name tocoffset (%d) for TOC entry!", tocoffset );
return false;
}
if ( tocoffset < 0 )
name.clear();
else
name = encodeWithCurrentCodec( strings.data() + tocoffset );
// #URLTBL index
tocoffset = (int) UINT32ARRAY( topics.data() + (index * 16) + 8 );
if ( tocoffset < 0 || urltbl.size() < tocoffset + 12 )
{
qWarning("EBook_CHM::RecurseLoadBTOC: invalid url index (%d) for TOC entry!", tocoffset );
return false;
}
tocoffset = (int) UINT32ARRAY(urltbl.data() + tocoffset + 8);
if ( tocoffset < 0 || urlstr.size() < tocoffset )
{
qWarning("EBook_CHM::RecurseLoadBTOC: invalid url offset (%d) for TOC entry!", tocoffset );
return false;
}
value = encodeWithCurrentCodec( urlstr.data() + tocoffset + 8 );
}
EBookTocEntry entry;
entry.name = name.trimmed();
if ( !entry.name.isEmpty() )
{
if ( !value.isEmpty() )
entry.url = pathToUrl( value );
entry.iconid = EBookTocEntry::IMAGE_AUTO;
entry.indent = level;
entries.push_back( entry );
}
}
if ( flags & 0x04 )
{
// book
if ( tocidx.size() < offset + 24 )
{
qWarning("EBook_CHM::RecurseLoadBTOC: invalid child entry offset (%d)", offset );
return false;
}
unsigned int childoffset = UINT32ARRAY( tocidx.data() + offset + 20 );
if ( childoffset )
{
if ( !RecurseLoadBTOC( tocidx, topics, urltbl, urlstr, strings, childoffset, entries, level + 1 ) )
return false;
}
}
offset = UINT32ARRAY( tocidx.data() + offset + 0x10 );
}
return true;
}
bool EBook_CHM::hasOption(const QString & name) const
{
if ( !m_envOptions.isEmpty() && m_envOptions.contains( name ) )
return true;
return false;
}
QUrl EBook_CHM::pathToUrl(const QString &link) const
{
if ( link.startsWith( "http://" ) || link.startsWith( "https://" ) )
return QUrl( link );
QUrl url;
url.setScheme( URL_SCHEME_CHM );
url.setHost( URL_SCHEME_CHM );
// Does the link contain the fragment as well?
int off = link.indexOf( '#' );
QString path;
if ( off != -1 )
{
path = link.left( off );
url.setFragment( link.mid( off + 1 ) );
}
else
path = link;
if ( !path.startsWith( '/' ) )
path.prepend( '/' );
url.setPath( QUrl::fromPercentEncoding( path.toUtf8() ) );
return url;
}
QString EBook_CHM::urlToPath(const QUrl &link) const
{
if ( link.scheme() == URL_SCHEME_CHM )
{
if ( link.path() == "/" || link.path().isEmpty() )
return m_home;
return link.path();
}
return "";
}
EBook_CHM::ParsedEntry::ParsedEntry()
{
iconid = 0;
indent = 0;
}