From 81f027ec30ba87d46c0d51c674b17a94abf20e0c Mon Sep 17 00:00:00 2001 From: Jakub Stachowski Date: Mon, 10 Nov 2008 12:20:53 +0000 Subject: [PATCH] Support for files compressed with Huffman encoding. This means you can finally read some files that FBReader cannot handle svn path=/trunk/playground/graphics/okular/mobipocket/; revision=882322 --- CMakeLists.txt | 2 +- TODO | 1 - converter.cpp | 4 +- decompressor.cpp | 236 +++++++++++++++++++++++++++++++++++++++++++++++ decompressor.h | 29 ++++++ mobipocket.cpp | 117 ++++------------------- mobipocket.h | 6 -- test.cpp | 13 +++ 8 files changed, 300 insertions(+), 108 deletions(-) create mode 100644 decompressor.cpp create mode 100644 decompressor.h create mode 100644 test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 878961d02..7e7a290f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ find_package(SharedMimeInfo REQUIRED) set(okularGenerator_mobi_PART_SRCS converter.cpp mobipocket.cpp + decompressor.cpp mobidocument.cpp generator_mobi.cpp ) @@ -28,7 +29,6 @@ target_link_libraries(okularGenerator_mobi okularcore ${mobi_LIBRARIES} ${KDE4_K install(TARGETS okularGenerator_mobi DESTINATION ${PLUGIN_INSTALL_DIR}) - ########### install files ############### install( FILES libokularGenerator_mobi.desktop okularMobi.desktop DESTINATION ${SERVICES_INSTALL_DIR} ) diff --git a/TODO b/TODO index e9f9a05f7..f8dadf39f 100644 --- a/TODO +++ b/TODO @@ -1,6 +1,5 @@ - better error handling - tests for Mobipocket classes - anchors (a filepos=) -- handle files compression with Huffman encoding - decryption for DRMed files - metadata diff --git a/converter.cpp b/converter.cpp index b019aafef..e4df55227 100644 --- a/converter.cpp +++ b/converter.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include @@ -36,7 +36,7 @@ QTextDocument* Converter::convert( const QString &fileName ) { MobiDocument* newDocument=new MobiDocument(fileName); if (!newDocument->isValid()) { - emit error(i18n("Error while opening the EPub document."), -1); + emit error(i18n("Error while opening the Mobipocket document."), -1); delete newDocument; return NULL; } diff --git a/decompressor.cpp b/decompressor.cpp new file mode 100644 index 000000000..b63c796d3 --- /dev/null +++ b/decompressor.cpp @@ -0,0 +1,236 @@ +/*************************************************************************** + * Copyright (C) 2008 by Jakub Stachowski * + * * + * RLE decompressor based on FBReader * + * Copyright (C) 2004-2008 Geometer Plus * + * * + * Huffdic decompressor based on Python code by Igor Skochinsky * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + ***************************************************************************/ + +#include "mobipocket.h" +#include "decompressor.h" + +#include + +static unsigned char TOKEN_CODE[256] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +}; + +namespace Mobipocket { + +class NOOPDecompressor : public Decompressor +{ +public: + NOOPDecompressor(const PDB& p) : Decompressor(p) {} + QByteArray decompress(const QByteArray& data) { return data; } +}; + + +class RLEDecompressor : public Decompressor +{ +public: + RLEDecompressor(const PDB& p) : Decompressor(p) {} + QByteArray decompress(const QByteArray& data); +}; + +class BitReader +{ +public: + BitReader(const QByteArray& d) : pos(0), data(d) + { + data.append("\000\000\000\000"); + len=data.size()*8; + } + + quint32 read() { + quint32 g=0; + quint64 r=0; + while (g<32) { + r=(r << 8) | (quint8)data[(pos+g)>>3]; + g=g+8 - ((pos+g) & 7); + } + return (r >> (g-32)); + } + bool eat(int n) { + pos+=n; + return pos <= len; + } + + int left() { + return len - pos; + } + +private: + int pos; + int len; + QByteArray data; +}; + +class HuffdicDecompressor : public Decompressor +{ +public: + HuffdicDecompressor(const PDB& p); + QByteArray decompress(const QByteArray& data); +private: + void unpack(BitReader reader, int depth = 0); + QList dicts; + quint32 entry_bits; + quint32 dict1[256]; + quint32 dict2[64]; + + QByteArray buf; +}; + + + +QByteArray RLEDecompressor::decompress(const QByteArray& data) +{ + QByteArray ret; + ret.reserve(8192); + + unsigned char token; + unsigned short copyLength, N, shift; + unsigned short shifted; + int i=0; + int maxIndex=data.size()-1; + + while (i maxIndex) ) { + goto endOfLoop; + } + ret.append(data.mid(i,token)); + i+=token; + break; + case 2: + ret.append(' '); + ret.append(token ^ 0x80); + break; + case 3: + if (i + 1 > maxIndex) { + goto endOfLoop; + } + N = token; + N<<=8; + N+=(unsigned char)data.at(i++); + copyLength = (N & 7) + 3; + shift = (N & 0x3fff) / 8; + shifted = ret.size()-shift; + if (shifted>(ret.size()-1)) goto endOfLoop; + for (int i=0;i32) goto fail; + while (reader.left()) { + quint32 dw=reader.read(); + quint32 v=dict1[dw>>24]; + quint8 codelen = v & 0x1F; + if (!codelen) goto fail; + quint32 code = dw >> (32 - codelen); + quint32 r=(v >> 8); + if (!( v & 0x80)) { + while (code < dict2[(codelen-1)*2]) { + codelen++; + code = dw >> (32 - codelen); + } + r = dict2[(codelen-1)*2+1]; + } + r-=code; + if (!codelen) goto fail; + if (!reader.eat(codelen)) return; + quint32 dict_no = r >> entry_bits; + quint32 off1 = 16 + (r - (dict_no << entry_bits))*2; + QByteArray dict=dicts[dict_no]; + quint32 off2 = 16 + (unsigned char)dict[off1]*256 + (unsigned char)dict[off1+1]; + quint32 blen = (unsigned char)dict[off2]*256 + (unsigned char)dict[off2+1]; + QByteArray slice=dict.mid(off2+2,(blen & 0x7fff)); + if (blen & 0x8000) buf+=slice; + else unpack(BitReader(slice),depth+1); + } + return; +fail: + valid=false; +} + +Decompressor* Decompressor::create(quint8 type, const PDB& pdb) +{ + switch (type) { + case 1 : return new NOOPDecompressor(pdb); + case 2 : return new RLEDecompressor(pdb); + case 'H' : return new HuffdicDecompressor(pdb); + default : return 0; + } + +} +} diff --git a/decompressor.h b/decompressor.h new file mode 100644 index 000000000..2509c72df --- /dev/null +++ b/decompressor.h @@ -0,0 +1,29 @@ +/*************************************************************************** + * Copyright (C) 2008 by Jakub Stachowski * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + ***************************************************************************/ + +#include +namespace Mobipocket { + +class PDB; + +class Decompressor { +public: + Decompressor(const PDB& p) : pdb(p), valid(true) {} + virtual QByteArray decompress(const QByteArray& data) = 0; + virtual ~Decompressor() {} + bool isValid() const { return valid; } + + static Decompressor* create(quint8 type, const PDB& pdb); +protected: + const PDB& pdb; + bool valid; +}; + + +} \ No newline at end of file diff --git a/mobipocket.cpp b/mobipocket.cpp index 32fba2866..8e9fd08f8 100644 --- a/mobipocket.cpp +++ b/mobipocket.cpp @@ -1,110 +1,23 @@ /*************************************************************************** * Copyright (C) 2008 by Jakub Stachowski * * * - * RLE decompressor based on FBReader * - * Copyright (C) 2004-2008 Geometer Plus * - * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * ***************************************************************************/ -#include +#include "mobipocket.h" +#include "decompressor.h" + #include #include #include #include #include -static unsigned char TOKEN_CODE[256] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -}; - namespace Mobipocket { -class NOOPDecompressor : public Decompressor -{ -public: - QByteArray decompress(const QByteArray& data) { return data; } -}; - - -class RLEDecompressor : public Decompressor -{ -public: - QByteArray decompress(const QByteArray& data); -}; - -QByteArray RLEDecompressor::decompress(const QByteArray& data) -{ - QByteArray ret; - ret.reserve(8192); - - unsigned char token; - unsigned short copyLength, N, shift; - unsigned short shifted; - int i=0; - int maxIndex=data.size()-1; - - while (i maxIndex) ) { - goto endOfLoop; - } - ret.append(data.mid(i,token)); - i+=token; - break; - case 2: - ret.append(' '); - ret.append(token ^ 0x80); - break; - case 3: - if (i + 1 > maxIndex) { - goto endOfLoop; - } -// N = (token << 8) + data.at(i++); - N = token; - N<<=8; - N+=(unsigned char)data.at(i++); - copyLength = (N & 7) + 3; - shift = (N & 0x3fff) / 8; - shifted = ret.size()-shift; - if (shifted>(ret.size()-1)) goto endOfLoop; - for (int i=0;i recordOffsets; QIODevice* device; @@ -184,16 +97,19 @@ struct DocumentPrivate valid=pdb.isValid(); if (!valid) return; QByteArray mhead=pdb.getRecord(0); - if (mhead[0]!=(char)0) {} - - switch (mhead[1]) { - case 1 : dec = new NOOPDecompressor(); break; - case 2 : dec = new RLEDecompressor(); break; - default : dec=0; {} - } + kDebug() << "MHEAD" << (int)mhead[0]; +// if (mhead[0]!=(char)0) goto fail; + + kDebug() << "MHEAD" << (int)mhead[1]; + dec = Decompressor::create(mhead[1], pdb); + if (!dec) goto fail; ntextrecords=(unsigned char)mhead[8]; ntextrecords<<=8; ntextrecords+=(unsigned char)mhead[9]; + return; + fail: + valid=false; + } void findFirstImage() { firstImageRecord=ntextrecords+1; @@ -217,8 +133,13 @@ Document::Document(QIODevice* dev) : d(new DocumentPrivate(dev)) QString Document::text() const { QByteArray whole; - for (int i=1;intextrecords;i++) + for (int i=1;intextrecords+1;i++) { whole+=d->dec->decompress(d->pdb.getRecord(i)); + if (!d->dec->isValid()) { + d->valid=false; + return QString::null; + } + } return QString::fromUtf8(whole); } diff --git a/mobipocket.h b/mobipocket.h index 94f77a66b..5891c2db8 100644 --- a/mobipocket.h +++ b/mobipocket.h @@ -30,12 +30,6 @@ private: PDBPrivate* const d; }; -class Decompressor { -public: - virtual QByteArray decompress(const QByteArray& data) = 0; - virtual ~Decompressor() {} -}; - struct DocumentPrivate; class Document { public: diff --git a/test.cpp b/test.cpp new file mode 100644 index 000000000..0d2b7d540 --- /dev/null +++ b/test.cpp @@ -0,0 +1,13 @@ +#include "mobipocket.h" +#include +#include + +int main(int argc, char ** argv) +{ + QFile f(argv[1]); + f.open(QIODevice::ReadOnly); + Mobipocket::Document* d=new Mobipocket::Document(&f); + kDebug() << d->isValid(); + d->text(); + return 0; +}