You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
284 lines
8.2 KiB
284 lines
8.2 KiB
/*************************************************************************** |
|
* Copyright (C) 2008 by Jakub Stachowski <qbast@go2.pl> * |
|
* * |
|
* This program is free software; you can redistribute it and/or modify * |
|
* it under the terms of the GNU General Public License as published by * |
|
* the Free Software Foundation; either version 2 of the License, or * |
|
* (at your option) any later version. * |
|
***************************************************************************/ |
|
|
|
#include "mobipocket.h" |
|
#include "decompressor.h" |
|
|
|
#include <QtCore/QIODevice> |
|
#include <QtCore/QtEndian> |
|
#include <QtCore/QBuffer> |
|
#include <QtGui/QImageReader> |
|
#include <kdebug.h> |
|
|
|
namespace Mobipocket { |
|
|
|
struct PDBPrivate { |
|
QList<quint32> recordOffsets; |
|
QIODevice* device; |
|
QString fileType; |
|
QString name; |
|
quint16 nrecords; |
|
bool valid; |
|
|
|
void init(); |
|
}; |
|
|
|
void PDBPrivate::init() |
|
{ |
|
valid=true; |
|
quint16 word; |
|
quint32 dword; |
|
device->seek(0); |
|
name=QString::fromLatin1(device->read(32)); |
|
device->seek(0x3c); |
|
fileType=QString::fromLatin1(device->read(8)); |
|
|
|
device->seek(0x4c); |
|
device->read((char*)&word,2); |
|
nrecords=qFromBigEndian(word); |
|
|
|
for (int i=0;i<nrecords;i++) { |
|
device->read((char*)&dword,4); |
|
recordOffsets.append(qFromBigEndian(dword)); |
|
device->read((char*)&dword,4); |
|
} |
|
} |
|
|
|
PDB::PDB(QIODevice* dev) : d(new PDBPrivate) |
|
{ |
|
d->device=dev; |
|
d->init(); |
|
} |
|
|
|
QByteArray PDB::getRecord(int i) const |
|
{ |
|
if (i>=d->nrecords) return QByteArray(); |
|
quint32 offset=d->recordOffsets[i]; |
|
bool last=(i==(d->nrecords-1)); |
|
quint32 size=0; |
|
if (last) size=d->device->size()-offset; |
|
else size=d->recordOffsets[i+1]-offset; |
|
d->device->seek(offset); |
|
return d->device->read(size); |
|
} |
|
|
|
QString PDB::name() const |
|
{ |
|
return d->name; |
|
} |
|
|
|
bool PDB::isValid() const |
|
{ |
|
return d->valid; |
|
} |
|
|
|
int PDB::recordCount() const |
|
{ |
|
return d->nrecords; |
|
} |
|
|
|
//////////////////////////////////////////// |
|
struct DocumentPrivate |
|
{ |
|
DocumentPrivate(QIODevice* d) : pdb(d), valid(true), firstImageRecord(0), isUtf(false), |
|
drm(false), thumbnailIndex(0) {} |
|
PDB pdb; |
|
Decompressor* dec; |
|
quint16 ntextrecords; |
|
bool valid; |
|
|
|
// number of first record holding image. Usually it is directly after end of text, but not always |
|
quint16 firstImageRecord; |
|
QMap<Document::MetaKey, QString> metadata; |
|
bool isUtf; |
|
bool drm; |
|
|
|
// index of thumbnail in image list. May be specified in EXTH. |
|
// If not then just use first image and hope for the best |
|
int thumbnailIndex; |
|
|
|
void init(); |
|
void findFirstImage(); |
|
void parseEXTH(const QByteArray& data); |
|
void parseHtmlHead(const QString& data); |
|
QString readEXTHRecord(const QByteArray& data, quint32& offset); |
|
QString decodeString(const QByteArray& data) const; |
|
QImage getImageFromRecord(int recnum); |
|
}; |
|
|
|
QString DocumentPrivate::decodeString(const QByteArray& data) const |
|
{ |
|
return isUtf ? QString::fromUtf8(data) : QString::fromLatin1(data); |
|
} |
|
|
|
void DocumentPrivate::parseHtmlHead(const QString& data) |
|
{ |
|
static QRegExp title("<dc:title.*>(.*)</dc:title>", Qt::CaseInsensitive); |
|
static QRegExp author("<dc:creator.*>(.*)</dc:creator>", Qt::CaseInsensitive); |
|
static QRegExp copyright("<dc:rights.*>(.*)</dc:rights>", Qt::CaseInsensitive); |
|
static QRegExp subject("<dc:subject.*>(.*)</dc:subject>", Qt::CaseInsensitive); |
|
static QRegExp description("<dc:description.*>(.*)</dc:description>", Qt::CaseInsensitive); |
|
title.setMinimal(true); |
|
author.setMinimal(true); |
|
copyright.setMinimal(true); |
|
subject.setMinimal(true); |
|
description.setMinimal(true); |
|
|
|
// title could have been already taken from MOBI record |
|
if (!metadata.contains(Document::Title) && title.indexIn(data)!=-1) metadata[Document::Title]=title.capturedTexts()[1]; |
|
if (author.indexIn(data)!=-1) metadata[Document::Author]=author.capturedTexts()[1]; |
|
if (copyright.indexIn(data)!=-1) metadata[Document::Copyright]=copyright.capturedTexts()[1]; |
|
if (subject.indexIn(data)!=-1) metadata[Document::Subject]=subject.capturedTexts()[1]; |
|
if (description.indexIn(data)!=-1) metadata[Document::Description]=description.capturedTexts()[1]; |
|
|
|
} |
|
|
|
void DocumentPrivate::init() |
|
{ |
|
valid=pdb.isValid(); |
|
if (!valid) return; |
|
QByteArray mhead=pdb.getRecord(0); |
|
dec = Decompressor::create(mhead[1], pdb); |
|
if ((int)mhead[12]!=0 || (int)mhead[13]!=0) drm=true; |
|
if (!dec) { |
|
valid=false; |
|
return; |
|
} |
|
ntextrecords=(unsigned char)mhead[8]; |
|
ntextrecords<<=8; |
|
ntextrecords+=(unsigned char)mhead[9]; |
|
quint32 encoding=readBELong(mhead, 28); |
|
if (encoding==65001) isUtf=true; |
|
if (mhead.size()>176) parseEXTH(mhead); |
|
|
|
// try getting metadata from HTML if nothing or only title was recovered from MOBI and EXTH records |
|
if (metadata.size()<2 && !drm) parseHtmlHead(decodeString(dec->decompress(pdb.getRecord(1)))); |
|
} |
|
|
|
void DocumentPrivate::findFirstImage() { |
|
firstImageRecord=ntextrecords+1; |
|
while (firstImageRecord<pdb.recordCount()) { |
|
QByteArray rec=pdb.getRecord(firstImageRecord); |
|
if (rec.isNull()) return; |
|
QBuffer buf(&rec); |
|
buf.open(QIODevice::ReadOnly); |
|
QImageReader r(&buf); |
|
if (r.canRead()) return; |
|
firstImageRecord++; |
|
} |
|
} |
|
|
|
QString DocumentPrivate::readEXTHRecord(const QByteArray& data, quint32& offset) |
|
{ |
|
quint32 len=readBELong(data,offset); |
|
offset+=4; |
|
len-=8; |
|
QString ret=decodeString(data.mid(offset,len)); |
|
offset+=len; |
|
return ret; |
|
} |
|
|
|
QImage DocumentPrivate::getImageFromRecord(int i) |
|
{ |
|
QByteArray rec=pdb.getRecord(i); |
|
QByteArray rec2=pdb.getRecord(i-2); |
|
return QImage::fromData(rec); |
|
} |
|
|
|
|
|
void DocumentPrivate::parseEXTH(const QByteArray& data) |
|
{ |
|
// try to get name |
|
if (data.size()>=92) { |
|
quint32 nameoffset=readBELong(data,84); |
|
quint32 namelen=readBELong(data,88); |
|
if ( (nameoffset + namelen) < data.size() ) { |
|
metadata[Document::Title]=decodeString(data.mid(nameoffset, namelen)); |
|
} |
|
} |
|
|
|
quint32 exthoffs=readBELong(data,20)+16; |
|
|
|
if (data.mid(exthoffs,4)!="EXTH") return; |
|
quint32 records=readBELong(data,exthoffs+8); |
|
quint32 offset=exthoffs+12; |
|
for (unsigned int i=0;i<records;i++) { |
|
quint32 type=readBELong(data,offset); |
|
offset+=4; |
|
switch (type) { |
|
case 100: metadata[Document::Author]=readEXTHRecord(data,offset); break; |
|
case 103: metadata[Document::Description]=readEXTHRecord(data,offset); break; |
|
case 105: metadata[Document::Subject]=readEXTHRecord(data,offset); break; |
|
case 109: metadata[Document::Copyright]=readEXTHRecord(data,offset); break; |
|
case 202: thumbnailIndex = readBELong(data,offset); offset+=4; break; |
|
default: readEXTHRecord(data,offset); |
|
} |
|
} |
|
|
|
|
|
} |
|
|
|
Document::Document(QIODevice* dev) : d(new DocumentPrivate(dev)) |
|
{ |
|
d->init(); |
|
} |
|
|
|
QString Document::text() const |
|
{ |
|
QByteArray whole; |
|
for (int i=1;i<d->ntextrecords+1;i++) { |
|
whole+=d->dec->decompress(d->pdb.getRecord(i)); |
|
if (!d->dec->isValid()) { |
|
d->valid=false; |
|
return QString::null; |
|
} |
|
} |
|
return d->decodeString(whole); |
|
} |
|
|
|
int Document::imageCount() const |
|
{ |
|
//FIXME: don't count FLIS and FCIS records |
|
return d->pdb.recordCount()-d->ntextrecords; |
|
} |
|
|
|
bool Document::isValid() const |
|
{ |
|
return d->valid; |
|
} |
|
|
|
QImage Document::getImage(int i) const |
|
{ |
|
if (!d->firstImageRecord) d->findFirstImage(); |
|
return d->getImageFromRecord(d->firstImageRecord+i); |
|
} |
|
|
|
QMap<Document::MetaKey,QString> Document::metadata() const |
|
{ |
|
return d->metadata; |
|
} |
|
|
|
bool Document::hasDRM() const |
|
{ |
|
return d->drm; |
|
} |
|
|
|
QImage Document::thumbnail() const |
|
{ |
|
if (!d->firstImageRecord) d->findFirstImage(); |
|
QImage img=d->getImageFromRecord(d->thumbnailIndex+d->firstImageRecord); |
|
// does not work, try first image |
|
if (img.isNull() && d->thumbnailIndex) { |
|
d->thumbnailIndex=0; |
|
img=d->getImageFromRecord(d->firstImageRecord); |
|
} |
|
return img; |
|
} |
|
|
|
}
|
|
|