// Author: Sam Magnuson // License GPL -- indexing logic for KMail #include "kmmsgindex.h" #include "kmsearchpattern.h" #include "kmfoldersearch.h" #include "kmfoldermgr.h" #include "kmmsgdict.h" #include "kmkernel.h" #include "mimelib/message.h" #include "mimelib/headers.h" #include "mimelib/utility.h" #include "mimelib/enum.h" #include "mimelib/body.h" #include "mimelib/bodypart.h" #include "mimelib/field.h" #include #include #include #include #include #include #include #include #include #include #include //#define USE_MMAP class KMMsgIndexRef { #ifdef USE_MMAP Q_UINT32 *mRef; #endif int mFD, mSize; public: KMMsgIndexRef(int f, int size); ~KMMsgIndexRef() { } bool error(); void resize(int size); void sync(); bool write(int off, Q_UINT32 val); Q_UINT32 read(int off, bool *ok=NULL); }; KMMsgIndexRef::KMMsgIndexRef(int f, int size) : mFD(f), mSize(size) { #ifdef USE_MMAP if(mSize != 0) mRef = (Q_UINT32*)mmap(0, mSize * sizeof(Q_INT32), PROT_READ|PROT_WRITE, MAP_SHARED, mFD, 0); else mRef = (Q_UINT32*)MAP_FAILED; #endif } void KMMsgIndexRef::sync() { #ifdef USE_MMAP if(mRef != MAP_FAILED) msync(mRef, mSize * sizeof(Q_INT32), MS_SYNC); #endif } bool KMMsgIndexRef::error() { #ifdef USE_MMAP if(mRef == MAP_FAILED) return TRUE; #endif return FALSE; } void KMMsgIndexRef::resize(int newSize) { #ifdef USE_MMAP if(mRef != MAP_FAILED) munmap(mRef, mSize * sizeof(Q_INT32)); if(ftruncate(mFD, newSize * sizeof(Q_INT32)) == -1) { for(Q_INT32 i = mSize; i < newSize; i++) ::write(mFD, &i, sizeof(i)); } #endif mSize = newSize; #ifdef USE_MMAP mRef = (Q_UINT32*)mmap(0, mSize * sizeof(Q_INT32), PROT_READ|PROT_WRITE, MAP_SHARED, mFD, 0); #endif } bool KMMsgIndexRef::write(int off, Q_UINT32 val) { if(off > mSize) return FALSE; #ifdef USE_MMAP mRef[off] = val; #else lseek(mFD, off * sizeof(Q_INT32), SEEK_SET); ::write(mFD, &val, sizeof(val)); #endif return TRUE; } Q_UINT32 KMMsgIndexRef::read(int off, bool *ok) { if(off > mSize) { if(ok) *ok = FALSE; return 0; } #ifdef USE_MMAP return mRef[off]; #else Q_UINT32 ret; lseek(mFD, off * sizeof(Q_INT32), SEEK_SET); ::read(mFD, &ret, sizeof(ret)); return ret; #endif return 0; } inline bool km_isSeparator(const char *content, uint i, uint content_len) { return !(isalnum(content[i]) || (i < content_len - 1 && content[i+1] != '\n' && content[i+1] != '\t' && content[i+1] != ' ' && (content[i] == '.' || content[i] == '-' || content[i] == '\\' || content[i] == '/' || content[i] == '\'' || content[i] == ':'))); } inline bool km_isSeparator(const QChar *content, uint i, uint content_len) { return !(content[i].isLetterOrNumber() || (i < content_len - 1 && content[i+1] != '\n' && content[i+1] != '\t' && content[i+1] != ' ' && (content[i] == '.' || content[i] == '-' || content[i] == '\\' || content[i] == '/' || content[i] == '\'' || content[i] == ':'))); } inline bool km_isSeparator(const QString &s, int i, int content_len=-1) { return km_isSeparator(s.unicode(), i, content_len < 0 ? s.length() : content_len); } inline bool km_isSeparated(const QString &f) { for(uint i=0, l=f.length(); i < f.length(); i++) { if(km_isSeparator(f.unicode(), i, l)) return TRUE; } return FALSE; } inline QStringList km_separate(const QString &f) { if(!km_isSeparated(f)) return QStringList(f); QStringList ret; uint i_o = 0; for(uint i=0, l=f.length(); i < f.length(); i++) { if(km_isSeparator(f.unicode(), i, l)) { QString chnk = f.mid(i_o, i - i_o).latin1(); if(!chnk.isEmpty()) ret << chnk; i_o = i+1; } } if(i_o != f.length()) { QString chnk = f.mid(i_o, f.length() - i_o); if(!chnk.isEmpty()) ret << chnk; } return ret; } enum { HEADER_BYTEORDER = 0, HEADER_VERSION = 1, HEADER_COMPLETE = 2, HEADER_COUNT = 3, HEADER_USED = 4, HEADER_INDEXED = 5, HEADER_REMOVED = 6, HEADER_end = 7, CHUNK_HEADER_COUNT = 0, CHUNK_HEADER_USED = 1, CHUNK_HEADER_NEXT = 2, CHUNK_HEADER_end = 3, TOC_BODY = 0, TOC_HEADER_NAME = 1, TOC_HEADER_DATA = 2 }; #define KMMSGINDEX_VERSION 6067 static int kmindex_grow_increment = 40960; //grow this many buckets at a time KMMsgIndex::KMMsgIndex(QObject *o, const char *n) : QObject(o, n), mIndexState(INDEX_IDLE), delay_cnt(0), mLastSearch() { mTermIndex.loc = kmkernel->folderMgr()->basePath() + "/.kmmsgindex_search"; mTermTOC.loc = kmkernel->folderMgr()->basePath() + "/.kmmsgindex_toc"; mTermProcessed.loc = kmkernel->folderMgr()->basePath() + "/.kmmsgindex_processed"; } void KMMsgIndex::init() { mActiveSearches.setAutoDelete(TRUE); reset(FALSE); readIndex(); connect(kmkernel->folderMgr(), SIGNAL(msgRemoved(KMFolder*, Q_UINT32)), this, SLOT(slotRemoveMsg(KMFolder*, Q_UINT32))); connect(kmkernel->folderMgr(), SIGNAL(msgAdded(KMFolder*, Q_UINT32)), this, SLOT(slotAddMsg(KMFolder*, Q_UINT32))); } void KMMsgIndex::remove() { unlink(mTermIndex.loc.latin1()); unlink(mTermTOC.loc.latin1()); unlink(mTermProcessed.loc.latin1()); } // resets the state of the indexer to nothing (if clean it is assumed // anything not initialized is cleaned up.. void KMMsgIndex::reset(bool clean) { //active searches if(clean) mActiveSearches.clear(); //create if(create.timer_id != -1) { if(clean) killTimer(create.timer_id); create.timer_id = -1; } //restore if(restore.timer_id != -1) { if(clean) killTimer(restore.timer_id); restore.timer_id = -1; } //TOC if(clean) mTermTOC.body.clear(); if(mTermTOC.fd != -1) { if(clean) close(mTermTOC.fd); mTermTOC.fd = -1; } mTermTOC.h.next_hnum = 0; if(clean) { mTermTOC.h.header_lookup.clear(); mTermTOC.h.headers.clear(); } //processed if(mTermProcessed.fd != -1) { if(clean) close(mTermProcessed.fd); mTermProcessed.fd = -1; } if(clean) mTermProcessed.known.clear(); restore.reading_processed = FALSE; //index { if(clean) delete mTermIndex.ref; mTermIndex.ref = NULL; } mTermIndex.removed = mTermIndex.indexed = 0; mTermIndex.used = mTermIndex.count = 0; if(mTermIndex.fd != -1) { if(clean) close(mTermIndex.fd); mTermIndex.fd = -1; } } // finds cnt buckets and allocates to make room int KMMsgIndex::allocTermChunk(int cnt) { int ret = mTermIndex.used; mTermIndex.used += cnt; //update the used if(mTermIndex.count < mTermIndex.used) { //time for a remap mTermIndex.count = QMAX(mTermIndex.count + kmindex_grow_increment, mTermIndex.used); mTermIndex.ref->resize(mTermIndex.count); mTermIndex.ref->write(HEADER_COUNT, mTermIndex.count); } mTermIndex.ref->write(HEADER_USED, mTermIndex.used); return ret; } // returns whether header is a header we care about.. bool KMMsgIndex::isKillHeader(const char *header, uchar header_len) { const char *watched_headers[] = { "Subject", "From", "To", "CC", "BCC", "Reply-To", "Organization", "List-ID", "X-Mailing-List", "X-Loop", "X-Mailer", NULL }; for(int i = 0; watched_headers[i]; i++) { if(!strncmp(header, watched_headers[i], header_len)) return FALSE; } return TRUE; } //returns whether a term is in the stop list.. bool KMMsgIndex::isKillTerm(const char *term, uchar term_len) { if(!term || term_len < 1) return TRUE; if(term_len <= 2) //too few letters.. return TRUE; { //no numbers! int numlen = 0; if(term[numlen] == '+' || term[numlen] == '-') numlen++; for( ; numlen < term_len; numlen++) { if(!isdigit(term[numlen]) || term[numlen] == '.') break; } if(numlen == term_len - 1 && term[numlen] == '%') numlen++; if(numlen == term_len) return TRUE; } { //static kill words list static QDict *killDict = NULL; if(!killDict) { killDict = new QDict(); const char *kills[] = { "from", "kmail", "is", "in", "and", "it", "this", "of", "that", "on", "you", "if", "be", "not", "with", "for", "to", "the", "but", NULL }; for(int i = 0; kills[i]; i++) killDict->insert(kills[i], (void*)1); } if(killDict->find(term)) return TRUE; } return FALSE; } // finds a free bucket starting at where to put serNum in the dbase int KMMsgIndex::addBucket(int where, Q_UINT32 serNum) { int ret = where; if(where == -1) { //enough for two (and the tail).. int first_chunk_size = CHUNK_HEADER_end + 2 + 1; int off = ret = allocTermChunk(first_chunk_size); //special case to mark the tail for the first mTermIndex.ref->write(off, off+1); off++; first_chunk_size--; //now mark in index mTermIndex.ref->write(off+CHUNK_HEADER_COUNT, first_chunk_size); mTermIndex.ref->write(off+CHUNK_HEADER_USED, CHUNK_HEADER_end + 1); mTermIndex.ref->write(off+CHUNK_HEADER_end, serNum); } else { uint len = mTermIndex.ref->read(where+CHUNK_HEADER_COUNT); if(len == mTermIndex.ref->read(where+CHUNK_HEADER_USED)) { len = 34; //let's make a bit more room this time.. int blk = ret = allocTermChunk(len); mTermIndex.ref->write(where+CHUNK_HEADER_NEXT, blk); mTermIndex.ref->write(blk+CHUNK_HEADER_COUNT, len); mTermIndex.ref->write(blk+CHUNK_HEADER_USED, CHUNK_HEADER_end + 1); mTermIndex.ref->write(blk+CHUNK_HEADER_end, serNum); } else { mTermIndex.ref->write(where+ mTermIndex.ref->read(where+CHUNK_HEADER_USED), serNum); mTermIndex.ref->write(where+CHUNK_HEADER_USED, mTermIndex.ref->read(where+CHUNK_HEADER_USED)+1); } } return ret; } // adds the body term to the index bool KMMsgIndex::addBodyTerm(const char *term, uchar term_len, Q_UINT32 serNum) { if(mTermIndex.ref->error()) return FALSE; if(isKillTerm(term, term_len)) return TRUE; //sorta.. if(mIndexState == INDEX_RESTORE) //just have to finish reading.. restoreState(); if(!mTermTOC.body.contains(term)) { int w = addBucket(-1, serNum); mTermTOC.body.insert(term, w); //mark in TOC const uchar marker = TOC_BODY; write(mTermTOC.fd, &marker, sizeof(marker)); write(mTermTOC.fd, &term_len, sizeof(term_len)); write(mTermTOC.fd, term, term_len); write(mTermTOC.fd, &w, sizeof(w)); } else { int map_off = mTermTOC.body[term], w = addBucket(mTermIndex.ref->read(map_off), serNum); if(w != -1) mTermIndex.ref->write(map_off, w); } return TRUE; } // adds the body term to the index bool KMMsgIndex::addHeaderTerm(Q_UINT16 hnum, const char *term, uchar term_len, Q_UINT32 serNum) { if(mTermIndex.ref->error()) return FALSE; if(isKillTerm(term, term_len)) return TRUE; //sorta.. if(mIndexState == INDEX_RESTORE) //just have to finish reading.. restoreState(); if(!mTermTOC.h.headers.contains(hnum)) mTermTOC.h.headers.insert(hnum, QMap()); if(!mTermTOC.h.headers[hnum].contains(term)) { int w = addBucket(-1, serNum); mTermTOC.h.headers[hnum].insert(term, w); const uchar marker = TOC_HEADER_DATA; write(mTermTOC.fd, &marker, sizeof(marker)); write(mTermTOC.fd, &hnum, sizeof(hnum)); write(mTermTOC.fd, &term_len, sizeof(term_len)); write(mTermTOC.fd, term, term_len); write(mTermTOC.fd, &w, sizeof(w)); } else { int map_off = mTermTOC.h.headers[hnum][term], w = addBucket(mTermIndex.ref->read(map_off), serNum); if(w != -1) mTermIndex.ref->write(map_off, w); } return TRUE; } // processes the message at serNum and returns the number of terms processed int KMMsgIndex::processMsg(Q_UINT32 serNum) { if(mIndexState == INDEX_RESTORE) { create.serNums.push(serNum); return -1; } if(mTermProcessed.known[serNum]) return -1; int idx = -1; KMFolder *folder = 0; kmkernel->msgDict()->getLocation(serNum, &folder, &idx); if(!folder || (idx == -1) || (idx >= folder->count())) return -1; if(mOpenedFolders.findIndex(folder) == -1) { folder->open(); mOpenedFolders.append(folder); } int ret = 0; bool unget = !folder->getMsgBase(idx)->isMessage(); KMMessage *msg = folder->getMsg(idx); const DwMessage *dw_msg = msg->asDwMessage(); DwHeaders& headers = dw_msg->Headers(); uchar build_i = 0; char build_str[255]; //process header for(DwField *field = headers.FirstField(); field; field = field->Next()) { if(isKillHeader(field->FieldNameStr().data(), field->FieldNameStr().length())) continue; const char *name = field->FieldNameStr().c_str(), *content = field->FieldBodyStr().data(); uint content_len = field->FieldBodyStr().length(); Q_UINT16 hnum = 0; if(mTermTOC.h.header_lookup.contains(name)) { hnum = mTermTOC.h.header_lookup[name]; } else { hnum = mTermTOC.h.next_hnum++; mTermTOC.h.header_lookup.insert(name, hnum); const uchar marker = TOC_HEADER_NAME; write(mTermTOC.fd, &marker, sizeof(marker)); uchar len = field->FieldNameStr().length(); write(mTermTOC.fd, &len, sizeof(len)); write(mTermTOC.fd, name, len); write(mTermTOC.fd, &hnum, sizeof(hnum)); } for(uint i = 0; i < content_len; i++) { if(build_i < 254 && !km_isSeparator(content, i, content_len)) { build_str[build_i++] = tolower(content[i]); } else if(build_i) { build_str[build_i] = 0; if(addHeaderTerm(hnum, build_str, build_i, serNum)) ret++; build_i = 0; } } if(build_i) { build_str[build_i] = 0; if(addHeaderTerm(hnum, build_str, build_i, serNum)) ret++; build_i = 0; } } //process body const DwEntity *dw_ent = msg->asDwMessage(); DwString dw_body; DwString body; if(dw_ent && dw_ent->hasHeaders() && dw_ent->Headers().HasContentType() && (dw_ent->Headers().ContentType().Type() == DwMime::kTypeText)) { dw_body = dw_ent->Body().AsString(); } else { dw_ent = msg->getFirstDwBodyPart(); if (dw_ent) dw_body = msg->getFirstDwBodyPart()->AsString(); } if(dw_ent && dw_ent->hasHeaders() && dw_ent->Headers().HasContentType() && (dw_ent->Headers().ContentType().Type() == DwMime::kTypeText)) { DwHeaders& headers = dw_ent->Headers(); if(headers.HasContentTransferEncoding()) { switch(headers.ContentTransferEncoding().AsEnum()) { case DwMime::kCteBase64: { DwString raw_body = dw_body; DwDecodeBase64(raw_body, body); break; } case DwMime::kCteQuotedPrintable: { DwString raw_body = dw_body; DwDecodeQuotedPrintable(raw_body, body); break; } default: body = dw_body; break; } } else { body = dw_body; } } QDict found_terms; const char *body_s = body.data(); for(uint i = 0; i < body.length(); i++) { if(build_i < 254 && !km_isSeparator(body_s, i, body.length())) { build_str[build_i++] = tolower(body_s[i]); } else if(build_i) { build_str[build_i] = 0; if(!found_terms[build_str] && addBodyTerm(build_str, build_i, serNum)) { found_terms.insert(build_str, (void*)1); ret++; } build_i = 0; } } if(build_i) { build_str[build_i] = 0; if(!found_terms[build_str] && addBodyTerm(build_str, build_i, serNum)) { found_terms.insert(build_str, (void*)1); ret++; } } if (unget) folder->unGetMsg(idx); //I don't need it anymore.. mTermIndex.ref->write(HEADER_INDEXED, ++mTermIndex.indexed); mTermProcessed.known.insert(serNum, (void*)1); write(mTermProcessed.fd, &serNum, sizeof(serNum)); return ret; } //Determines if it is time for another cleanup bool KMMsgIndex::isTimeForClean() { return (mTermIndex.removed > 2500 && //minimum mTermIndex.removed * 4 >= mTermIndex.indexed && //fraction removed (mLastSearch.isNull() || //never mLastSearch.secsTo(QTime::currentTime()) > 60 * 60 * 2)); //hours } //removes bogus entries from the index, and optimizes the index file void KMMsgIndex::cleanUp() { if(mIndexState != INDEX_IDLE) return; reset(TRUE); remove(); recreateIndex(); } // flushes all open file descriptors.. void KMMsgIndex::flush() { #if 0 mTermIndex.ref->sync(); sync(); #endif } // slot fired when a serial number is no longer used void KMMsgIndex::slotRemoveMsg(KMFolder *, Q_UINT32) { mTermIndex.ref->write(HEADER_REMOVED, ++mTermIndex.removed); } // slot fired when new serial numbers are allocated.. void KMMsgIndex::slotAddMsg(KMFolder *, Q_UINT32 serNum) { if(mIndexState == INDEX_CREATE) { create.serNums.push(serNum); } else if(isTimeForClean()) { cleanUp(); } else { processMsg(serNum); flush(); } } // handles the lazy processing of messages void KMMsgIndex::timerEvent(QTimerEvent *e) { if(qApp->hasPendingEvents()) //nah, some other time.. delay_cnt = 10; else if(delay_cnt) --delay_cnt; else if(mIndexState == INDEX_CREATE && e->timerId() == create.timer_id) createState(FALSE); else if(mIndexState == INDEX_RESTORE && e->timerId() == restore.timer_id) restoreState(FALSE); } bool KMMsgIndex::createState(bool finish) { int terms = 0, processed = 0, skipped = 0; const int max_terms = 300, max_process = 30; while(!create.serNums.isEmpty()) { if(!finish && (terms >= max_terms || processed >= max_process || skipped >= (max_process*4))) { flush(); return TRUE; } int cnt = processMsg(create.serNums.pop()); if(cnt == -1) { skipped++; } else { terms += cnt; processed++; } } if(KMFolder *f = create.folders.pop()) { if(mOpenedFolders.findIndex(f) == -1) { f->open(); mOpenedFolders.append(f); } for(int i = 0, s; i < f->count(); ++i) { s = kmkernel->msgDict()->getMsgSerNum(f, i); if(finish || (terms < max_terms && processed < max_process && skipped < (max_process*4))) { int cnt = processMsg(s); if(cnt == -1) { skipped++; } else { terms += cnt; processed++; } } else if(!mTermProcessed.known[s]){ create.serNums.push(s); } } if(finish) { while(!createState(TRUE)); return TRUE; } } else { mIndexState = INDEX_IDLE; killTimer(create.timer_id); create.timer_id = -1; QValueListConstIterator > it; for (it = mOpenedFolders.begin(); it != mOpenedFolders.end(); ++it) { KMFolder *folder = *it; if(folder) folder->close(); } mOpenedFolders.clear(); create.folders.clear(); mTermIndex.ref->write(HEADER_COMPLETE, 1); return TRUE; } flush(); return FALSE; } // reads in some terms from the index (non-blocking) if finish is true it // will read in everything left to do. It is possible for this to turn from the // RESTORE state into the CREATE state - so you must handle this case if you // need to use the index immediately (ie finish is true) bool KMMsgIndex::restoreState(bool finish) { if(mIndexState != INDEX_RESTORE) return FALSE; uchar marker, len; char in[255]; Q_UINT32 off; for(int cnt = 0; finish || cnt < 400; cnt++) { if(restore.reading_processed) { Q_UINT32 ser; if(!read(mTermProcessed.fd, &ser, sizeof(ser))) { mIndexState = INDEX_IDLE; killTimer(restore.timer_id); restore.timer_id = -1; if(restore.restart_index) { mIndexState = INDEX_CREATE; syncIndex(); } break; } mTermProcessed.known.insert(ser, (void*)1); } else { if(!read(mTermTOC.fd, &marker, sizeof(marker))) restore.reading_processed = TRUE; if(marker == TOC_BODY) { read(mTermTOC.fd, &len, sizeof(len)); read(mTermTOC.fd, in, len); in[len] = 0; read(mTermTOC.fd, &off, sizeof(off)); mTermTOC.body.insert(in, off); } else if(marker == TOC_HEADER_DATA) { Q_UINT16 hnum; read(mTermTOC.fd, &hnum, sizeof(hnum)); read(mTermTOC.fd, &len, sizeof(len)); read(mTermTOC.fd, in, len); in[len] = 0; read(mTermTOC.fd, &off, sizeof(off)); if(!mTermTOC.h.headers.contains(hnum)) { QMap map; map.insert(in, off); mTermTOC.h.headers.insert(hnum, map); } else { mTermTOC.h.headers[hnum].insert(in, off); } } else if(marker == TOC_HEADER_NAME) { read(mTermTOC.fd, &len, sizeof(len)); read(mTermTOC.fd, in, len); in[len] = 0; Q_UINT16 hnum; read(mTermTOC.fd, &hnum, sizeof(hnum)); if(!mTermTOC.h.header_lookup.contains(in)) { mTermTOC.h.header_lookup.insert(in, hnum); mTermTOC.h.next_hnum = hnum + 1; } } } } return TRUE; } // nulls the current index and begins a refresh of the indexed data.. bool KMMsgIndex::recreateIndex() { if(mIndexState != INDEX_IDLE) return FALSE; mIndexState = INDEX_CREATE; mTermProcessed.fd = open(mTermProcessed.loc.latin1(), O_WRONLY|O_CREAT|O_TRUNC, S_IREAD|S_IWRITE); mTermTOC.fd = open(mTermTOC.loc.latin1(), O_RDWR|O_CREAT|O_TRUNC, S_IREAD|S_IWRITE); mTermIndex.fd = open(mTermIndex.loc.latin1(), O_RDWR|O_CREAT|O_TRUNC, S_IREAD|S_IWRITE); mTermIndex.count = kmindex_grow_increment; mTermIndex.used = HEADER_end; mTermIndex.ref = new KMMsgIndexRef(mTermIndex.fd, 0); mTermIndex.ref->resize(mTermIndex.count); mTermIndex.ref->write(HEADER_BYTEORDER, 0x12345678); mTermIndex.ref->write(HEADER_VERSION, KMMSGINDEX_VERSION); mTermIndex.ref->write(HEADER_COMPLETE, 0); //marker for incomplete index mTermIndex.ref->write(HEADER_COUNT, mTermIndex.count); mTermIndex.ref->write(HEADER_USED, mTermIndex.used);//including this header mTermIndex.ref->write(HEADER_INDEXED, mTermIndex.indexed); mTermIndex.ref->write(HEADER_REMOVED, mTermIndex.removed); syncIndex(); return TRUE; } // processes all current messages as if they were newly added void KMMsgIndex::syncIndex() { if(mIndexState != INDEX_CREATE) return; QValueStack > folders; folders.push(&(kmkernel->folderMgr()->dir())); while(KMFolderDir *dir = folders.pop()) { for(KMFolderNode *child = dir->first(); child; child = dir->next()) { if(child->isDir()) folders.push((KMFolderDir*)child); else create.folders.push((KMFolder*)child); } } if(create.timer_id == -1) create.timer_id = startTimer(0); } // read the existing index and load into memory void KMMsgIndex::readIndex() { if(mIndexState != INDEX_IDLE) return; mIndexState = INDEX_RESTORE; bool read_success = FALSE; if((mTermTOC.fd = open(mTermTOC.loc.latin1(), O_RDWR)) != -1) { if((mTermIndex.fd = open(mTermIndex.loc.latin1(), O_RDWR)) != -1) { mTermProcessed.fd = open(mTermProcessed.loc.latin1(), O_RDWR); Q_INT32 byteOrder = 0, version; read(mTermIndex.fd, &byteOrder, sizeof(byteOrder)); if(byteOrder != 0x12345678) goto error_with_read; read(mTermIndex.fd, &version, sizeof(version)); if(version != KMMSGINDEX_VERSION) goto error_with_read; Q_UINT32 complete_index = 0; read(mTermIndex.fd, &complete_index, sizeof(complete_index)); restore.restart_index = !complete_index; read(mTermIndex.fd, &mTermIndex.count, sizeof(mTermIndex.count)); read(mTermIndex.fd, &mTermIndex.used, sizeof(mTermIndex.used)); read(mTermIndex.fd, &mTermIndex.indexed, sizeof(mTermIndex.indexed)); read(mTermIndex.fd, &mTermIndex.removed, sizeof(mTermIndex.removed)); mTermIndex.ref = new KMMsgIndexRef(mTermIndex.fd, mTermIndex.count); if(mTermIndex.ref->error()) goto error_with_read; restore.timer_id = startTimer(0); read_success = TRUE; } } error_with_read: if(!read_success) { mIndexState = INDEX_IDLE; reset(); remove(); recreateIndex(); } } // returns whether rule is a valid rule to be processed by the indexer bool KMMsgIndex::canHandleQuery(KMSearchRule *rule) { if(mIndexState == INDEX_RESTORE) //just have to finish reading.. restoreState(); //this might flip us into INDEX_CREATE state.. if(mIndexState != INDEX_IDLE) //not while we are doing other stuff.. return FALSE; if(rule->field().isEmpty() || rule->contents().isEmpty()) //not a real search return TRUE; if(rule->function() != KMSearchRule::FuncEquals && rule->function() != KMSearchRule::FuncContains) { return FALSE; } else if(rule->field().left(1) == "<") { if(rule->field() == "" || rule->field() == "") { if(rule->function() != KMSearchRule::FuncContains) return FALSE; } else if(rule->field() != "") { //unknown.. return FALSE; } } else if(isKillHeader(rule->field().data(), rule->field().length())) { return FALSE; } QString match = rule->contents().lower(); //general case if(km_isSeparated(match)) { uint killed = 0; QStringList sl = km_separate(match); for(QStringList::Iterator it = sl.begin(); it != sl.end(); ++it) { QString str = (*it).lower(); if(isKillTerm(str.latin1(), str.length())) killed++; } if(killed == sl.count()) return FALSE; } else if(isKillTerm(match.latin1(), match.length())) { return FALSE; } return TRUE; } // returns whether pat is a valid pattern to be processed by the indexer bool KMMsgIndex::canHandleQuery(KMSearchPattern *pat) { if(mIndexState == INDEX_RESTORE) //just have to finish reading.. restoreState(); //this might flip us into INDEX_CREATE state.. if(mIndexState != INDEX_IDLE) //not while we are creating the index.. return FALSE; if(pat->op() != KMSearchPattern::OpAnd && pat->op() != KMSearchPattern::OpOr) return FALSE; for(QPtrListIterator it(*pat); it.current(); ++it) { if(!canHandleQuery((*it))) return FALSE; } return TRUE; } // returns the data set at begin_chunk through to end_chunk void KMMsgIndex::values(int begin_chunk, int end_chunk, QValueList &lst) { lst.clear(); for(int off = begin_chunk; TRUE; off = mTermIndex.ref->read(off+CHUNK_HEADER_NEXT)) { uint used = mTermIndex.ref->read(off+CHUNK_HEADER_USED); for(uint i = CHUNK_HEADER_end; i < used; i++) lst << mTermIndex.ref->read(off+i); if(mTermIndex.ref->read(off) != used || off == end_chunk) break; } } // returns the data set at begin_chunk through to end_chunk void KMMsgIndex::values(int begin_chunk, int end_chunk, QIntDict &dct) { dct.clear(); for(int off = begin_chunk; TRUE; off = mTermIndex.ref->read(off+CHUNK_HEADER_NEXT)) { uint used = mTermIndex.ref->read(off+CHUNK_HEADER_USED); for(uint i = CHUNK_HEADER_end; i < used; i++) dct.insert(mTermIndex.ref->read(off+i), (void*)1); if(mTermIndex.ref->read(off) != used || off == end_chunk) break; } } // performs an actual search in the index QValueList KMMsgIndex::find(QString data, bool contains, KMSearchRule *rule, bool body, Q_UINT16 hnum) { QValueList ret; if(!body && !mTermTOC.h.headers.contains(hnum)) return ret; if(contains) { QIntDict foundDict; QMap *map = &(mTermTOC.body); if(!body) map = &(mTermTOC.h.headers[hnum]); QStringList sl = km_separate(data); for(QStringList::Iterator slit = sl.begin(); slit != sl.end(); ++slit) { for(QMapIterator it = map->begin(); it != map->end(); ++it) { QString qstr = it.key(); bool matches = FALSE; if(sl.count() == 1) matches = qstr.contains((*slit)); else if(slit == sl.begin()) matches = qstr.endsWith((*slit)); else if(slit == sl.end()) matches = qstr.startsWith((*slit)); else matches = (qstr == (*slit)); if(matches) { QValueList tmp = find(it.key(), FALSE, rule, body, hnum); for(QValueListIterator tmp_it = tmp.begin(); tmp_it != tmp.end(); ++tmp_it) { if(!foundDict[(*tmp_it)]) foundDict.insert((*tmp_it), (void*)1); } } } } for(QIntDictIterator it(foundDict); it.current(); ++it) ret << it.currentKey(); return ret; } mLastSearch = QTime::currentTime(); bool exhaustive_search = FALSE; if(km_isSeparated(data)) { //phrase search.. bool first = TRUE; QIntDict foundDict; QStringList sl = km_separate(data); for(QStringList::Iterator it = sl.begin(); it != sl.end(); ++it) { if(!isKillTerm((*it).latin1(), (*it).length())) { QCString cstr((*it).latin1()); int map_off = 0; if(body) { if(!mTermTOC.body.contains(cstr)) return ret; map_off = mTermTOC.body[cstr]; } else { if(!mTermTOC.h.headers[hnum].contains(cstr)) return ret; map_off = mTermTOC.h.headers[hnum][cstr]; } if(first) { first = FALSE; values(map_off+1, mTermIndex.ref->read(map_off), foundDict); } else { QIntDict andDict; QValueList tmp; values(map_off+1, mTermIndex.ref->read(map_off), tmp); for(QValueListIterator it = tmp.begin(); it != tmp.end(); ++it) { if(foundDict[(*it)]) andDict.insert((*it), (void*)1); } foundDict = andDict; } } } for(QIntDictIterator it(foundDict); it.current(); ++it) ret << it.currentKey(); exhaustive_search = TRUE; } else if(!isKillTerm(data.latin1(), data.length())) { QCString cstr(data.latin1()); int map_off = -1; if(body) { if(mTermTOC.body.contains(cstr)) map_off = mTermTOC.body[cstr]; } else { if(mTermTOC.h.headers[hnum].contains(cstr)) map_off = mTermTOC.h.headers[hnum][cstr]; } if(map_off != -1) values(map_off+1, mTermIndex.ref->read(map_off), ret); } if(!ret.isEmpty() && rule && (exhaustive_search || rule->function() == KMSearchRule::FuncEquals)) { QValueList tmp; for(QValueListIterator it = ret.begin(); it != ret.end(); ++it) { int idx = -1, ser = (*it); KMFolder *folder = 0; kmkernel->msgDict()->getLocation(ser, &folder, &idx); if(!folder || (idx == -1)) continue; KMMessage *msg = folder->getMsg(idx); if(rule->matches(msg)) tmp << ser; } return tmp; } return ret; } // processes rule and performs the indexed look up, if exhaustive_search // is true it will interpret body() as a full phrase rather than AND'd set QValueList KMMsgIndex::query(KMSearchRule *rule, bool exhaustive_search) { if(!canHandleQuery(rule) || rule->field().isEmpty() || rule->contents().isEmpty()) return QValueList(); if(rule->field().left(1) == "<") { if((rule->field() == "" || rule->field() == "") && rule->function() == KMSearchRule::FuncContains) { return find(rule->contents().lower(), TRUE, exhaustive_search ? rule : NULL, TRUE); } else if(rule->field() == "") { bool first = TRUE; QIntDict foundDict; QString contents = rule->contents().lower(); const char *hdrs[] = { "To", "CC", "BCC", NULL }; for(int i = 0; hdrs[i]; i++) { int l = strlen(hdrs[i]); if(isKillHeader(hdrs[i], l)) //can't really happen continue; QValueList tmp = find(contents, rule->function() == KMSearchRule::FuncContains, exhaustive_search ? rule : NULL, FALSE, mTermTOC.h.header_lookup[hdrs[i]]); if(first) { first = FALSE; for(QValueListIterator it = tmp.begin(); it != tmp.end(); ++it) foundDict.insert((*it), (void*)1); } else { for(QValueListIterator it = tmp.begin(); it != tmp.end(); ++it) { if(!foundDict[(*it)]) foundDict.insert((*it), (void*)1); } } } QValueList ret; for(QIntDictIterator it(foundDict); it.current(); ++it) ret << it.currentKey(); return ret; } return QValueList(); //can't really happen.. } //general header case.. if(!mTermTOC.h.header_lookup.contains(rule->field())) return QValueList(); return find(rule->contents().lower(), rule->function() == KMSearchRule::FuncContains, exhaustive_search ? rule : NULL, FALSE, mTermTOC.h.header_lookup[rule->field()]); } // processes rule and performs the indexed look up, if exhaustive_search // is true it will interpret body()[s] as full phrases rather than AND'd sets QValueList KMMsgIndex::query(KMSearchPattern *pat, bool exhaustive_search) { QValueList ret; if(pat->isEmpty() || !canHandleQuery(pat)) return ret; if(pat->count() == 1) { ret = query(pat->first(), exhaustive_search); } else { bool first = TRUE; QIntDict foundDict; for(QPtrListIterator it(*pat); it.current(); ++it) { if((*it)->field().isEmpty() || (*it)->contents().isEmpty()) continue; QValueList tmp = query((*it), exhaustive_search); if(first) { first = FALSE; for(QValueListIterator it = tmp.begin(); it != tmp.end(); ++it) foundDict.insert((long int)(*it), (void*)1); } else { if(pat->op() == KMSearchPattern::OpAnd) { QIntDict andDict; for(QValueListIterator it = tmp.begin(); it != tmp.end(); ++it) { if(foundDict[(*it)]) andDict.insert((*it), (void*)1); } foundDict = andDict; } else if(pat->op() == KMSearchPattern::OpOr) { for(QValueListIterator it = tmp.begin(); it != tmp.end(); ++it) { if(!foundDict[(*it)]) foundDict.insert((long int)(*it), (void*)1); } } } } for(QIntDictIterator it(foundDict); it.current(); ++it) ret << it.currentKey(); } return ret; } // Code to bind to a KMSearch KMIndexSearchTarget::KMIndexSearchTarget(KMSearch *s) : QObject(NULL, NULL), mVerifyResult(FALSE) { mSearch = s; mId = startTimer(0); { QValueList lst = kmkernel->msgIndex()->query( s->searchPattern(), FALSE); for(QValueListConstIterator it = lst.begin(); it != lst.end(); ++it) mSearchResult.push((*it)); } for(QPtrListIterator it(*s->searchPattern()); it.current(); ++it) { if((*it)->function() != KMSearchRule::FuncContains || km_isSeparated((*it)->contents())) { mVerifyResult = TRUE; break; } } QObject::connect(this, SIGNAL(proxyFound(Q_UINT32)), s, SIGNAL(found(Q_UINT32))); QObject::connect(this, SIGNAL(proxyFinished(bool)), s, SIGNAL(finished(bool))); } KMIndexSearchTarget::~KMIndexSearchTarget() { stop(); QValueListConstIterator > it; for (it = mOpenedFolders.begin(); it != mOpenedFolders.end(); ++it) { KMFolder *folder = *it; if(folder) folder->close(); } mOpenedFolders.clear(); } void KMIndexSearchTarget::timerEvent(QTimerEvent *) { if(qApp->hasPendingEvents()) return; //no time now bool finished = FALSE; if(mSearch) { KMFolder *folder; const uint max_src = mVerifyResult ? 100 : 500; int stop_at = QMIN(mSearchResult.count(), max_src); for(int i = 0, idx; i < stop_at; i++) { Q_UINT32 serNum = mSearchResult.pop(); kmkernel->msgDict()->getLocation(serNum, &folder, &idx); if (!folder || (idx == -1)) continue; if(mSearch->inScope(folder)) { mSearch->setSearchedCount(mSearch->searchedCount()+1); mSearch->setCurrentFolder(folder->label()); if(mVerifyResult) { //full phrase.. if(mOpenedFolders.findIndex(folder) == -1) { folder->open(); mOpenedFolders.append(folder); } if(!mSearch->searchPattern()->matches( folder->getDwString(idx))) continue; } mSearch->setFoundCount(mSearch->foundCount()+1); emit proxyFound(serNum); } } if(mSearchResult.isEmpty()) finished = TRUE; } else { finished = TRUE; } if(finished) { if(mSearch && mSearch->running()) mSearch->setRunning(FALSE); stop(TRUE); killTimer(mId); kmkernel->msgIndex()->stopQuery(id()); } //!!!!! do nothing else because we might be deleted.. } bool KMMsgIndex::startQuery(KMSearch *s) { if(!canHandleQuery(s->searchPattern())) return FALSE; KMIndexSearchTarget *targ = new KMIndexSearchTarget(s); mActiveSearches.insert(targ->id(), targ); return TRUE; } bool KMMsgIndex::stopQuery(KMSearch *s) { int id = -1; for(QIntDictIterator it(mActiveSearches); it.current(); ++it) { if(it.current()->search() == s) { it.current()->stop(FALSE); id = it.currentKey(); break; } } if(id == -1) return FALSE; return stopQuery(id); } #include "kmmsgindex.moc"