You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
127 lines
3.5 KiB
127 lines
3.5 KiB
/* |
|
* Kchmviewer - a CHM and EPUB file viewer with broad language support |
|
* Copyright (C) 2004-2014 George Yunaev, gyunaev@ulduzsoft.com |
|
* |
|
* This program is free software: you can redistribute it and/or modify |
|
* it under the terms of the GNU General Public License as published by |
|
* the Free Software Foundation, either version 3 of the License, or |
|
* (at your option) any later version. |
|
* |
|
* This program is distributed in the hope that it will be useful, |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
* GNU General Public License for more details. |
|
* |
|
* You should have received a copy of the GNU General Public License |
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
*/ |
|
|
|
#ifndef EBOOK_SEARCH_INDEX_H |
|
#define EBOOK_SEARCH_INDEX_H |
|
|
|
#include <QUrl> |
|
#include <QHash> |
|
#include <QVector> |
|
#include <QDataStream> |
|
#include <QStringList> |
|
|
|
#include "helper_entitydecoder.h" |
|
|
|
|
|
class EBook; |
|
|
|
// This code is based on some pretty old version of Qt Assistant |
|
namespace QtAs |
|
{ |
|
|
|
struct Document |
|
{ |
|
Document( int d, int f ) : docNumber( d ), frequency( f ) {} |
|
Document() : docNumber( -1 ), frequency( 0 ) {} |
|
bool operator==( const Document &doc ) const |
|
{ |
|
return docNumber == doc.docNumber; |
|
} |
|
|
|
bool operator<( const Document &doc ) const |
|
{ |
|
return frequency > doc.frequency; |
|
} |
|
|
|
bool operator<=( const Document &doc ) const |
|
{ |
|
return frequency >= doc.frequency; |
|
} |
|
|
|
bool operator>( const Document &doc ) const |
|
{ |
|
return frequency < doc.frequency; |
|
} |
|
|
|
qint16 docNumber; |
|
qint16 frequency; |
|
}; |
|
|
|
QDataStream &operator>>( QDataStream &s, Document &l ); |
|
QDataStream &operator<<( QDataStream &s, const Document &l ); |
|
|
|
class Index : public QObject |
|
{ |
|
Q_OBJECT |
|
public: |
|
|
|
Index(); |
|
|
|
void writeDict( QDataStream& stream ); |
|
bool readDict( QDataStream& stream ); |
|
bool makeIndex(const QList<QUrl> &docs, EBook * chmFile ); |
|
QList<QUrl> query( const QStringList&, const QStringList&, const QStringList&, EBook * chmFile ); |
|
QString getCharsSplit() const { return m_charssplit; } |
|
QString getCharsPartOfWord() const { return m_charsword; } |
|
|
|
signals: |
|
void indexingProgress( int, const QString& ); |
|
|
|
public slots: |
|
void setLastWinClosed(); |
|
|
|
private: |
|
struct Entry |
|
{ |
|
Entry( int d ) { documents.append( Document( d, 1 ) ); } |
|
Entry( QVector<Document> l ) : documents( l ) {} |
|
QVector<Document> documents; |
|
}; |
|
|
|
struct PosEntry |
|
{ |
|
PosEntry( int p ) { positions.append( p ); } |
|
QList<uint> positions; |
|
}; |
|
|
|
bool parseDocumentToStringlist( EBook * chmFile, const QUrl& filename, QStringList& tokenlist ); |
|
void insertInDict( const QString&, int ); |
|
|
|
QStringList getWildcardTerms( const QString& ); |
|
QStringList split( const QString& ); |
|
QList<Document> setupDummyTerm( const QStringList& ); |
|
bool searchForPhrases(const QStringList &phrases, const QStringList &words, const QUrl &filename, EBook * chmFile ); |
|
|
|
QList< QUrl > docList; |
|
QHash<QString, Entry*> dict; |
|
QHash<QString,PosEntry*>miniDict; |
|
bool lastWindowClosed; |
|
HelperEntityDecoder entityDecoder; |
|
|
|
// Those characters are splitters (i.e. split the word), but added themselves into dictionary too. |
|
// This makes the dictionary MUCH larger, but ensure that for the piece of "window->print" both |
|
// search for "print" and "->print" will find it. |
|
QString m_charssplit; |
|
|
|
// Those characters are parts of word - for example, '_' is here, and search for _debug will find only _debug. |
|
QString m_charsword; |
|
}; |
|
|
|
}; |
|
|
|
#endif // EBOOK_SEARCH_INDEX_H
|
|
|