From eb8ae20cc159a3db45b464e6eb0ebcd9a19b3e93 Mon Sep 17 00:00:00 2001 From: David Rosca Date: Sun, 13 Apr 2014 11:49:39 +0200 Subject: [PATCH] [AdBlockRule] Improve performance of rule matching with QStringMatcher Use QStringMatcher instead of QString::contains in RegExpFilters. QStringMatcher internally uses fast Boyer-Moore algorithm. --- src/lib/adblock/adblockrule.cpp | 21 ++++++++++++++++----- src/lib/adblock/adblockrule.h | 5 +++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/lib/adblock/adblockrule.cpp b/src/lib/adblock/adblockrule.cpp index d777ca4f3..eee3a2d0d 100644 --- a/src/lib/adblock/adblockrule.cpp +++ b/src/lib/adblock/adblockrule.cpp @@ -468,7 +468,7 @@ void AdBlockRule::parseFilter() m_type = RegExpMatchRule; m_regExp = new RegExp; m_regExp->regExp = QzRegExp(parsedLine, m_caseSensitivity); - m_regExp->regExpStrings = parseRegExpFilter(parsedLine); + m_regExp->matchers = createStringMatchers(parseRegExpFilter(parsedLine)); return; } @@ -509,7 +509,7 @@ void AdBlockRule::parseFilter() m_type = RegExpMatchRule; m_regExp = new RegExp; m_regExp->regExp = QzRegExp(createRegExpFromFilter(parsedLine), m_caseSensitivity); - m_regExp->regExpStrings = parseRegExpFilter(parsedLine); + m_regExp->matchers = createStringMatchers(parseRegExpFilter(parsedLine)); return; } @@ -632,6 +632,18 @@ QString AdBlockRule::createRegExpFromFilter(const QString &filter) const return parsed; } +QList AdBlockRule::createStringMatchers(const QStringList &filters) const +{ + QList matchers; + matchers.reserve(filters.size()); + + foreach (const QString &filter, filters) { + matchers.append(QStringMatcher(filter, m_caseSensitivity)); + } + + return matchers; +} + bool AdBlockRule::isMatchingDomain(const QString &domain, const QString &filter) const { return QzTools::matchDomain(filter, domain); @@ -641,10 +653,9 @@ bool AdBlockRule::isMatchingRegExpStrings(const QString &url) const { Q_ASSERT(m_regExp); - foreach (const QString &string, m_regExp->regExpStrings) { - if (!url.contains(string)) { + foreach (const QStringMatcher &matcher, m_regExp->matchers) { + if (matcher.indexIn(url) == -1) return false; - } } return true; diff --git a/src/lib/adblock/adblockrule.h b/src/lib/adblock/adblockrule.h index cb7e46557..a624a72c2 100644 --- a/src/lib/adblock/adblockrule.h +++ b/src/lib/adblock/adblockrule.h @@ -47,7 +47,7 @@ #define ADBLOCKRULE_H #include -#include +#include #include "qzcommon.h" #include "qzregexp.h" @@ -136,6 +136,7 @@ private: bool filterIsOnlyDomain(const QString &filter) const; bool filterIsOnlyEndsMatch(const QString &filter) const; QString createRegExpFromFilter(const QString &filter) const; + QList createStringMatchers(const QStringList &filters) const; AdBlockSubscription* m_subscription; @@ -159,7 +160,7 @@ private: struct RegExp { QzRegExp regExp; - QStringList regExpStrings; + QList matchers; }; // Use dynamic allocation to save memory