diff --git a/core/textpage.cpp b/core/textpage.cpp index a95f7c6b2..348dea084 100644 --- a/core/textpage.cpp +++ b/core/textpage.cpp @@ -62,37 +62,49 @@ static bool CaseSensitiveCmpFn( const QStringRef & from, const QStringRef & to ) return from.compare( to, Qt::CaseSensitive ) == 0; } + /** - * If the vertical arm of one rectangle fully contains the other (example below) - * -------- ---- ----- first - * ---- -------- ----- second - * or we can make it overlap of spaces by threshold% -*/ -static bool doesConsumeY(const QRect& first, const QRect& second, int threshold) + * Returns true iff segments [@p left1, @p right1] and [@p left2, @p right2] on the real line + * overlap within @p threshold percent, i. e. iff the ratio of the length of the + * intersection of the segments to the length of the shortest of the two input segments + * is not smaller than the threshold. + */ +static bool segmentsOverlap(double left1, double right1, double left2, double right2, int threshold) { - // if one consumes another fully - if(first.top() <= second.top() && first.bottom() >= second.bottom()) + // check if one consumes another fully (speed optimization) + + if (left1 <= left2 && right1 >= right2) return true; - if(first.top() >= second.top() && first.bottom() <= second.bottom()) + if (left1 >= left2 && right1 <= right2) return true; - // or if there is overlap of space by more than 80% - // there is overlap - if(second.bottom() >= first.top() && first.bottom() >= second.top()) + // check if there is overlap above threshold + if (right2 >= left1 && right1 >= left2) { - const int overlap = (second.bottom() >= first.bottom()) ? first.bottom() - second.top() - : second.bottom() - first.top(); - //we will divide by the smaller rectangle to calculate the overlap - const int percentage = (first.height() < second.height()) ? overlap * 100 / (first.bottom() - first.top()) - : overlap * 100 / (second.bottom() - second.top()); + double overlap = (right2 >= right1) ? right1 - left2 + : right2 - left1; + + double length1 = right1 - left1, + length2 = right2 - left2; - if(percentage >= threshold) return true; + return overlap * 100 >= threshold * qMin(length1, length2); } return false; } +static bool doesConsumeY(const QRect& first, const QRect& second, int threshold) +{ + return segmentsOverlap(first.top(), first.bottom(), second.top(), second.bottom(), threshold); +} + +static bool doesConsumeY(const NormalizedRect& first, const NormalizedRect& second, int threshold) +{ + return segmentsOverlap(first.top, first.bottom, second.top, second.bottom, threshold); +} + + /* Rationale behind TinyTextEntity: @@ -764,7 +776,7 @@ RegularAreaRect* TextPage::findText( int searchID, const QString &query, SearchD // we have a '-' just followed by a '\n' character // check if the string contains a '-' character // if the '-' is the last entry -static int stringLengthAdaptedWithHyphen(const QString &str, const TextList::ConstIterator &it, const TextList::ConstIterator &textListEnd, PagePrivate *page) +static int stringLengthAdaptedWithHyphen(const QString &str, const TextList::ConstIterator &it, const TextList::ConstIterator &textListEnd) { int len = str.length(); @@ -786,11 +798,8 @@ static int stringLengthAdaptedWithHyphen(const QString &str, const TextList::Con else { // 2. if the next word is in a different line or not - const int pageWidth = page->m_page->width(); - const int pageHeight = page->m_page->height(); - - const QRect hyphenArea = (*it)->area.roundedGeometry(pageWidth, pageHeight); - const QRect lookaheadArea = (*(it + 1))->area.roundedGeometry(pageWidth, pageHeight); + const NormalizedRect& hyphenArea = (*it)->area; + const NormalizedRect& lookaheadArea = (*(it + 1))->area; // lookahead to check whether both the '-' rect and next character rect overlap if( !doesConsumeY( hyphenArea, lookaheadArea, 70 ) ) @@ -852,7 +861,7 @@ RegularAreaRect* TextPagePrivate::findTextInternalForward( int searchID, const Q { const TinyTextEntity* curEntity = *it; const QString& str = curEntity->text(); - int len = stringLengthAdaptedWithHyphen(str, it, m_words.constEnd(), m_page); + int len = stringLengthAdaptedWithHyphen(str, it, m_words.constEnd()); if (offset >= len) { @@ -970,7 +979,7 @@ RegularAreaRect* TextPagePrivate::findTextInternalBackward( int searchID, const const TinyTextEntity* curEntity = *it; const QString& str = curEntity->text(); - int len = stringLengthAdaptedWithHyphen(str, it, m_words.constEnd(), m_page); + int len = stringLengthAdaptedWithHyphen(str, it, m_words.constEnd()); if (offset <= 0) { @@ -1447,7 +1456,7 @@ static void calculateStatisticalInformation(const QList &wor if(space != 0 && space != pageWidth) { // increase the count of the space amount - if(hor_space_stat.contains(space)) hor_space_stat[space] = hor_space_stat[space]++; + if(hor_space_stat.contains(space)) hor_space_stat[space]++; else hor_space_stat[space] = 1; int left,right,top,bottom; @@ -1468,14 +1477,14 @@ static void calculateStatisticalInformation(const QList &wor if(hor_space_stat.contains(maxSpace)) { if(hor_space_stat[maxSpace] != 1) - hor_space_stat[maxSpace] = hor_space_stat[maxSpace]--; + hor_space_stat[maxSpace]--; else hor_space_stat.remove(maxSpace); } if(maxSpace != 0) { if (col_space_stat.contains(maxSpace)) - col_space_stat[maxSpace] = col_space_stat[maxSpace]++; + col_space_stat[maxSpace]++; else col_space_stat[maxSpace] = 1; //store the max rect of each line @@ -1866,8 +1875,13 @@ WordsWithCharacters addNecessarySpace(RegionTextList tree, int pageWidth, int pa */ void TextPagePrivate::correctTextOrder() { - const int pageWidth = m_page->m_page->width(); - const int pageHeight = m_page->m_page->height(); + //m_page->m_page->width() and m_page->m_page->height() are in pixels at + //100% zoom level, and thus depend on display DPI. We scale pageWidth and + //pageHeight to remove the dependence. Otherwise bugs would be more difficult + //to reproduce and Okular could fail in extreme cases like a large TV with low DPI. + const double scalingFactor = 2000.0 / (m_page->m_page->width() + m_page->m_page->height()); + const int pageWidth = (int) (scalingFactor * m_page->m_page->width() ); + const int pageHeight = (int) (scalingFactor * m_page->m_page->height()); TextList characters = m_words; diff --git a/tests/searchtest.cpp b/tests/searchtest.cpp index caceb6fd3..e24ab24e7 100644 --- a/tests/searchtest.cpp +++ b/tests/searchtest.cpp @@ -46,6 +46,8 @@ class SearchTest : public QObject void testHyphenAtEndOfLineWithoutYOverlap(); void testHyphenWithYOverlap(); void testHyphenAtEndOfPage(); + void testOneColumn(); + void testTwoColumns(); }; void SearchTest::initTestCase() @@ -54,23 +56,28 @@ void SearchTest::initTestCase() Okular::SettingsCore::instance( "searchtest" ); } -static Okular::TextPage* createTextPage(const QString text[], const Okular::NormalizedRect rect[], - int n, Okular::Page*& page) { +static void createTextPage(const QVector& text, const QVector& rect, + Okular::TextPage*& tp, Okular::Page*& page) { - Okular::TextPage* tp = new Okular::TextPage(); - for (int i = 0; i < n; i++) { + tp = new Okular::TextPage(); + for (int i = 0; i < text.size(); i++) { tp->append(text[i], new Okular::NormalizedRect(rect[i])); } - //We must create a Page object because TextPage::stringLengthAdaptedWithHyphen uses - //m_page->width() and m_page->height() (for some dubious reason). + //The Page::setTextPage method invokes the layout analysis algorithms tested by some tests here + //and also sets the tp->d->m_page field (the latter was used in older versions of Okular by + //TextPage::stringLengthAdaptedWithHyphen). //Note that calling "delete page;" will delete the TextPage as well. page = new Okular::Page(1, 100, 100, Okular::Rotation0); page -> setTextPage(tp); - - return tp; } +#define CREATE_PAGE \ + QCOMPARE(text.size(), rect.size()); \ + Okular::Page* page; \ + Okular::TextPage* tp; \ + createTextPage(text, rect, tp, page); + #define TEST_NEXT_PREV(searchType, expectedStatus) \ { \ Okular::RegularAreaRect* result = tp->findText(0, searchString, searchType, Qt::CaseSensitive, NULL); \ @@ -130,16 +137,14 @@ void SearchTest::testNextAndPrevious() for (int i = 0; i < TEST_NEXT_PREV_SITUATION_COUNT; i++) { const QVector& text = texts[i]; const QString& searchString = searchStrings[i]; - const int n = text.size(); - Okular::NormalizedRect* rect = new Okular::NormalizedRect[n]; \ + QVector rect; \ - for (int i = 0; i < n; i++) { - rect[i] = Okular::NormalizedRect(0.1*i, 0.0, 0.1*(i+1), 0.1); \ + for (int i = 0; i < text.size(); i++) { + rect << Okular::NormalizedRect(0.1*i, 0.0, 0.1*(i+1), 0.1); \ } - Okular::Page* page; - Okular::TextPage* tp = createTextPage(text.data(), rect, n, page); + CREATE_PAGE; //Test 3 of the 8 cases listed above: //FromTop, Next-Next (match) and Next-Next (no match) @@ -156,7 +161,6 @@ void SearchTest::testNextAndPrevious() TEST_NEXT_PREV(Okular::PreviousResult, false); delete page; - delete[] rect; } } @@ -194,15 +198,13 @@ void SearchTest::test311232() void SearchTest::test323262() { - QString text[] = { - "a\n" - }; - Okular::NormalizedRect rect[] = { - Okular::NormalizedRect(1, 2, 3, 4) - }; + QVector text; + text << "a\n"; + + QVector rect; + rect << Okular::NormalizedRect(1, 2, 3, 4); - Okular::Page* page; - Okular::TextPage* tp = createTextPage(text, rect, 1, page); + CREATE_PAGE; Okular::RegularAreaRect* result = tp->findText(0, "a", Okular::FromBottom, Qt::CaseSensitive, NULL); QVERIFY(result); @@ -213,17 +215,15 @@ void SearchTest::test323262() void SearchTest::test323263() { - QString text[] = { - "a", "a", "b" - }; - Okular::NormalizedRect rect[] = { - Okular::NormalizedRect(0, 0, 1, 1), - Okular::NormalizedRect(1, 0, 2, 1), - Okular::NormalizedRect(2, 0, 3, 1) - }; + QVector text; + text << "a" << "a" << "b"; + + QVector rect; + rect << Okular::NormalizedRect(0, 0, 1, 1) + << Okular::NormalizedRect(1, 0, 2, 1) + << Okular::NormalizedRect(2, 0, 3, 1); - Okular::Page* page; - Okular::TextPage* tp = createTextPage(text, rect, 3, page); + CREATE_PAGE; Okular::RegularAreaRect* result = tp->findText(0, "ab", Okular::FromTop, Qt::CaseSensitive, NULL); QVERIFY(result); @@ -248,15 +248,13 @@ void SearchTest::testDottedI() //mode as well (QString::compare does not match them, at least in non-Turkish locales, since it follows //the Unicode case-folding rules http://www.unicode.org/Public/6.2.0/ucd/CaseFolding.txt). - QString text[] = { - QString::fromUtf8("İ") - }; - Okular::NormalizedRect rect[] = { - Okular::NormalizedRect(1, 2, 3, 4) - }; + QVector text; + text << QString::fromUtf8("İ"); - Okular::Page* page; - Okular::TextPage* tp = createTextPage(text, rect, 1, page); + QVector rect; + rect << Okular::NormalizedRect(1, 2, 3, 4); + + CREATE_PAGE; Okular::RegularAreaRect* result = tp->findText(0, QString::fromUtf8("İ"), Okular::FromTop, Qt::CaseInsensitive, NULL); QVERIFY(result); @@ -265,34 +263,29 @@ void SearchTest::testDottedI() delete page; } -void SearchTest::testHyphenAtEndOfLineWithoutYOverlap() { - QString text[] = { - "super-", - "cali-\n", - "fragilistic", "-", - "expiali", "-\n", - "docious" - }; - - Okular::NormalizedRect rect[] = { - Okular::NormalizedRect(0.4, 0.0, 0.9, 0.1), - Okular::NormalizedRect(0.0, 0.1, 0.6, 0.2), - Okular::NormalizedRect(0.0, 0.2, 0.8, 0.3), Okular::NormalizedRect(0.8, 0.2, 0.9, 0.3), - Okular::NormalizedRect(0.0, 0.3, 0.8, 0.4), Okular::NormalizedRect(0.8, 0.3, 0.9, 0.4), - Okular::NormalizedRect(0.0, 0.4, 0.7, 0.5) - }; - - size_t n = sizeof(text)/sizeof(QString); - QCOMPARE(n, sizeof(rect)/sizeof(Okular::NormalizedRect)); - - Okular::Page* page; - Okular::TextPage* tp = createTextPage(text, rect, n, page); +void SearchTest::testHyphenAtEndOfLineWithoutYOverlap() +{ + QVector text; + text << "super-" + << "cali-\n" + << "fragilistic" << "-" + << "expiali" << "-\n" + << "docious"; + + QVector rect; + rect << Okular::NormalizedRect(0.4, 0.0, 0.9, 0.1) + << Okular::NormalizedRect(0.0, 0.1, 0.6, 0.2) + << Okular::NormalizedRect(0.0, 0.2, 0.8, 0.3) << Okular::NormalizedRect(0.8, 0.2, 0.9, 0.3) + << Okular::NormalizedRect(0.0, 0.3, 0.8, 0.4) << Okular::NormalizedRect(0.8, 0.3, 0.9, 0.4) + << Okular::NormalizedRect(0.0, 0.4, 0.7, 0.5); + + CREATE_PAGE; Okular::RegularAreaRect* result = tp->findText(0, "supercalifragilisticexpialidocious", Okular::FromTop, Qt::CaseSensitive, NULL); QVERIFY(result); Okular::RegularAreaRect expected; - for (unsigned i = 0; i < n; i++) { + for (int i = 0; i < text.size(); i++) { expected.append(rect[i]); } expected.simplify(); @@ -302,69 +295,63 @@ void SearchTest::testHyphenAtEndOfLineWithoutYOverlap() { delete page; } -static bool testHyphen(Okular::NormalizedRect rect[], QString searchString) { - QString text[] = { - "a-", - "b" - }; - - int n = 2; - - Okular::Page* page; - Okular::TextPage* tp = createTextPage(text, rect, n, page); - - Okular::RegularAreaRect* result = tp->findText(0, searchString, - Okular::FromTop, Qt::CaseSensitive, NULL); - - bool found = result; - - delete result; - delete page; - - return found; +#define CREATE_PAGE_AND_TEST_SEARCH(searchString, matchExpected) \ +{ \ + CREATE_PAGE; \ + \ + Okular::RegularAreaRect* result = tp->findText(0, searchString, \ + Okular::FromTop, Qt::CaseSensitive, NULL); \ + \ + QCOMPARE(!!result, matchExpected); \ + \ + delete result; \ + delete page; \ } -void SearchTest::testHyphenWithYOverlap() { - Okular::NormalizedRect rect[2]; +void SearchTest::testHyphenWithYOverlap() +{ + QVector text; + text << "a-" + << "b"; + + QVector rect(2); //different lines (50% y-coordinate overlap), first rectangle has larger height rect[0] = Okular::NormalizedRect(0.0, 0.0, 0.9, 0.35); rect[1] = Okular::NormalizedRect(0.0, 0.3, 0.2, 0.4); - QVERIFY(testHyphen(rect, "ab")); + CREATE_PAGE_AND_TEST_SEARCH("ab", true); //different lines (50% y-coordinate overlap), second rectangle has larger height rect[0] = Okular::NormalizedRect(0.0, 0.0, 0.9, 0.1); rect[1] = Okular::NormalizedRect(0.0, 0.05, 0.2, 0.4); - QVERIFY(testHyphen(rect, "ab")); + CREATE_PAGE_AND_TEST_SEARCH("ab", true); //same line (90% y-coordinate overlap), first rectangle has larger height rect[0] = Okular::NormalizedRect(0.0, 0.0, 0.4, 0.2); rect[1] = Okular::NormalizedRect(0.4, 0.11, 0.6, 0.21); - QVERIFY(!testHyphen(rect, "ab")); - QVERIFY(testHyphen(rect, "a-b")); + CREATE_PAGE_AND_TEST_SEARCH("ab", false); + CREATE_PAGE_AND_TEST_SEARCH("a-b", true); //same line (90% y-coordinate overlap), second rectangle has larger height rect[0] = Okular::NormalizedRect(0.0, 0.0, 0.4, 0.1); rect[1] = Okular::NormalizedRect(0.4, 0.01, 0.6, 0.2); - QVERIFY(!testHyphen(rect, "ab")); - QVERIFY(testHyphen(rect, "a-b")); + CREATE_PAGE_AND_TEST_SEARCH("ab", false); + CREATE_PAGE_AND_TEST_SEARCH("a-b", true); } -void SearchTest::testHyphenAtEndOfPage() { +void SearchTest::testHyphenAtEndOfPage() +{ //Tests for segmentation fault that would occur if //we tried look ahead (for determining whether the //next character is at the same line) at the end of the page. - QString text[] = { - "a-" - }; + QVector text; + text << "a-"; - Okular::NormalizedRect rect[] = { - Okular::NormalizedRect(0, 0, 1, 1) - }; + QVector rect; + rect << Okular::NormalizedRect(0, 0, 1, 1); - Okular::Page* page; - Okular::TextPage* tp = createTextPage(text, rect, 1, page); + CREATE_PAGE; { Okular::RegularAreaRect* result = tp->findText(0, "a", @@ -383,6 +370,65 @@ void SearchTest::testHyphenAtEndOfPage() { delete page; } +void SearchTest::testOneColumn() +{ + //Tests that the layout analysis algorithm does not create too many columns. + //Bug 326207 was caused by the fact that if all the horizontal breaks in a line + //had the same length and were smaller than vertical breaks between lines then + //the horizontal breaks were treated as column separators. + //(Note that "same length" means "same length after rounding rectangles to integer pixels". + //The resolution used by the XY Cut algorithm with a square page is 1000 x 1000, + //and the horizontal spaces in the example are 0.1, so they are indeed both exactly 100 pixels.) + + QVector text; + text << "Only" << "one" << "column" + << "here"; + + //characters and line breaks have length 0.05, word breaks 0.1 + QVector rect; + rect << Okular::NormalizedRect(0.0, 0.0, 0.2, 0.1) + << Okular::NormalizedRect(0.3, 0.0, 0.5, 0.1) + << Okular::NormalizedRect(0.6, 0.0, 0.9, 0.1) + << Okular::NormalizedRect(0.0, 0.15, 0.2, 0.25); + + CREATE_PAGE; + + Okular::RegularAreaRect* result = tp->findText(0, "Only one column", + Okular::FromTop, Qt::CaseSensitive, NULL); + QVERIFY(result); + delete result; + + delete page; +} + +void SearchTest::testTwoColumns() +{ + //Tests that the layout analysis algorithm can detect two columns. + + QVector text; + text << "This" << "text" << "in" << "two" + << "is" << "set" << "columns."; + + //characters, word breaks and line breaks have length 0.05 + QVector rect; + rect << Okular::NormalizedRect(0.0, 0.0, 0.20, 0.1) + << Okular::NormalizedRect(0.25, 0.0, 0.45, 0.1) + << Okular::NormalizedRect(0.6, 0.0, 0.7, 0.1) + << Okular::NormalizedRect(0.75, 0.0, 0.9, 0.1) + << Okular::NormalizedRect(0.0, 0.15, 0.1, 0.25) + << Okular::NormalizedRect(0.15, 0.15, 0.3, 0.25) + << Okular::NormalizedRect(0.6, 0.15, 1.0, 0.25); + + CREATE_PAGE; + + Okular::RegularAreaRect* result = tp->findText(0, "This text in", + Okular::FromTop, Qt::CaseSensitive, NULL); + QVERIFY(!result); + delete result; + + delete page; +} + QTEST_KDEMAIN( SearchTest, GUI ) #include "searchtest.moc"