diff --git a/core/textpage.cpp b/core/textpage.cpp index 0c362a92a..ab5167c59 100644 --- a/core/textpage.cpp +++ b/core/textpage.cpp @@ -974,21 +974,34 @@ void TextPagePrivate::printTextList(int i, TextList list){ } -//copies a TextList to m_words -void TextPagePrivate::copy(TextList &list){ +//copies a TextList to m_words with the same pointer +void TextPagePrivate::copyTo(TextList &list){ - int i; - - while(m_words.length()) - m_words.pop_back(); + TextList::Iterator it = m_words.begin(), itEnd = m_words.end(); + for( ; it != itEnd ; it++){ + m_words.erase(it); + } - for(i = 0 ; i < list.length() ; i++){ + for(int i = 0 ; i < list.length() ; i++){ TinyTextEntity *ent = list.at(i); - m_words.append(ent); + m_words.append( new TinyTextEntity(ent->text(),ent->area) ); } } +// copies from m_words to list with distince pointers +void TextPagePrivate::copyFrom(TextList &list){ + + TextList::Iterator it = list.begin(), itEnd = list.end(); + for( ; it != itEnd ; it++){ + list.erase(it); + } + + for(int i = 0 ; i < m_words.length() ; i++){ + TinyTextEntity* ent = m_words.at(i); + list.append( new TinyTextEntity( ent->text(),ent->area ) ); + } +} // if the horizontal arm of one rectangle fully contains the other (example below) // -------- ---- ----- first @@ -1076,9 +1089,12 @@ bool doesConsumeY(QRect first, QRect second, int threshold){ //we are taking now the characters are horizontally next to next in current m_words, it actually is like that void TextPagePrivate::makeWordFromCharacters(){ - TextList tmpList = m_words; + TextList tmpList; TextList newList; + // we are making a new copy from m_words to tmpList before using it + copyFrom(tmpList); + TextList::Iterator it = tmpList.begin(), itEnd = tmpList.end(), tmpIt; int newLeft,newRight,newTop,newBottom; int pageWidth = m_page->m_page->width(), pageHeight = m_page->m_page->height(); @@ -1093,15 +1109,11 @@ void TextPagePrivate::makeWordFromCharacters(){ QRect lineArea = (*it)->area.roundedGeometry(pageWidth,pageHeight),elementArea; TextList word; //It will contain all the TextEntities in a simple word - tmpIt = it; - int space = 0; while(!space ){ - // we must have to put this line before the if condition of it==itEnd - // otherwise the last character can be missed if(textString.length()){ newString.append(textString); @@ -1120,6 +1132,8 @@ void TextPagePrivate::makeWordFromCharacters(){ it++; + // we must have to put this line before the if condition of it==itEnd + // otherwise the last character can be missed if(it == itEnd) break; //the first textEntity area @@ -1144,9 +1158,6 @@ void TextPagePrivate::makeWordFromCharacters(){ space = elementArea.left() - lineArea.right(); // cout << "space " << space << " "; - // if space more than one or if space is less than zero, that means - // we are erroneously merging a character with another character - // which is really before to it if(space > 0 || space < 0){ it--; break; @@ -1154,7 +1165,6 @@ void TextPagePrivate::makeWordFromCharacters(){ newLeft = text_x1 < line_x1 ? text_x1 : line_x1; newRight = line_x2 > text_x2 ? line_x2 : text_x2; - newTop = text_y1 > line_y1 ? line_y1 : text_y1; newBottom = text_y2 > line_y2 ? text_y2 : line_y2; @@ -1170,9 +1180,8 @@ void TextPagePrivate::makeWordFromCharacters(){ if(newString.length()){ NormalizedRect newRect(lineArea,pageWidth,pageHeight); - TinyTextEntity *ent = new TinyTextEntity(newString.normalized - (QString::NormalizationForm_KC), newRect ); - newList.append(ent); + newList.append(new TinyTextEntity(newString.normalized + (QString::NormalizationForm_KC), newRect )); QRect rect = newRect.geometry(pageWidth,pageHeight); @@ -1192,7 +1201,7 @@ void TextPagePrivate::makeWordFromCharacters(){ cout << "words: " << index << endl; - copy(newList); + copyTo(newList); // for(int i = 0 ; i < m_words.length() ; i++){ @@ -1210,11 +1219,13 @@ void TextPagePrivate::makeWordFromCharacters(){ // } // cout << endl; // } - + // Pointers to element in tmpList and newList are different + qDeleteAll(tmpList); + qDeleteAll(newList); } -void TextPagePrivate::makeAndSortLines(TextList &words, SortedTextList &lines, LineRect &line_rects){ +void TextPagePrivate::makeAndSortLines(TextList &wordsTmp, SortedTextList &lines, LineRect &line_rects){ /** we cannot assume that the generator will give us texts in the right order. We can only assume @@ -1226,8 +1237,15 @@ void TextPagePrivate::makeAndSortLines(TextList &words, SortedTextList &lines, L 3. Within each line sort the TinyTextEntity 's by x0(left) **/ - // Step:1 ....................................... + // Make a new copy of the TextList in the words, so that the wordsTmp and lines do not contain + // same pointers for all the TinyTextEntity + TextList words; + for(int i = 0 ; i < wordsTmp.length() ; i++){ + TinyTextEntity* ent = wordsTmp.at(i); + words.append( new TinyTextEntity( ent->text(),ent->area ) ); + } + // Step:1 ....................................... qSort(words.begin(),words.end(),compareTinyTextEntityY); @@ -1318,8 +1336,6 @@ void TextPagePrivate::makeAndSortLines(TextList &words, SortedTextList &lines, L } } -// cout << "lines length: " << lines.length() << endl; - // Step 3: ....................................... for(i = 0 ; i < lines.length() ; i++){ @@ -1328,9 +1344,11 @@ void TextPagePrivate::makeAndSortLines(TextList &words, SortedTextList &lines, L qSort(list.begin(),list.end(),compareTinyTextEntityX); lines.replace(i,list); - printTextList(i,list); +// printTextList(i,list); } + //we cannot delete words here, as lines contains the same pointers as words does + } @@ -1345,7 +1363,12 @@ void TextPagePrivate::XYCutForBoundingBoxes(int tcx, int tcy){ // The XY Tree, where the node is a RegionText RegionTextList tree; QRect contentRect(m_page->m_page->boundingBox().geometry(pageWidth,pageHeight)); - RegionText root(m_words,contentRect); + + //creating a copy of m_words in words so that we do not have same pointers + TextList words; + + copyFrom(words); + RegionText root(words,contentRect); // start the tree with the root, it is our only region at the start tree.push_back(root); @@ -1465,12 +1488,12 @@ void TextPagePrivate::XYCutForBoundingBoxes(int tcx, int tcy){ int tnx = (int)((double)avgX * 10.0 / 100.0 + 0.5), tny = 0; - cout << "noise on x_axis: " << avgX << " " << tnx << endl; +// cout << "noise on x_axis: " << avgX << " " << tnx << endl; - cout << endl << "projection on x axis ............." << endl << endl; +// cout << endl << "projection on x axis ............." << endl << endl; for( j = 0 ; j < size_proj_x ; j++ ){ proj_on_xaxis[j] -= tnx; - cout << "index: " << j << " value: " << proj_on_xaxis[j] << endl; +// cout << "index: " << j << " value: " << proj_on_xaxis[j] << endl; } // cout << endl << "projection on y axis ............ " << endl << endl; @@ -1536,10 +1559,10 @@ void TextPagePrivate::XYCutForBoundingBoxes(int tcx, int tcy){ int cut_pos_x = pos_ver, cut_pos_y = pos_hor; int gap_x = gap_ver, gap_y = gap_hor; - cout << "gap X: " << gap_x << endl; - cout << "gap Y: " << gap_y << endl; - cout << "cut X: " << cut_pos_x << endl; - cout << "cut Y: " << cut_pos_y << endl; +// cout << "gap X: " << gap_x << endl; +// cout << "gap Y: " << gap_y << endl; +// cout << "cut X: " << cut_pos_x << endl; +// cout << "cut Y: " << cut_pos_y << endl; /** 4. Cut the region and make nodes (left,right) or (up,down) ................ **/ @@ -1679,17 +1702,17 @@ void TextPagePrivate::XYCutForBoundingBoxes(int tcx, int tcy){ cout << "list1: " << list1.length() << endl; cout << "list2: " << list2.length() << endl; - cout << "Node1 text: ........................ " << endl << endl; +// cout << "Node1 text: ........................ " << endl << endl; for(j = 0 ; j < list1.length() ; j++){ TinyTextEntity *ent = list1.at(j); - cout << ent->text().toAscii().data(); +// cout << ent->text().toAscii().data(); } cout << endl; - cout << "Node2 text: ........................ " << endl << endl; +// cout << "Node2 text: ........................ " << endl << endl; for(j = 0 ; j < list2.length() ; j++){ TinyTextEntity *ent = list2.at(j); - cout << ent->text().toAscii().data(); +// cout << ent->text().toAscii().data(); } cout << endl; @@ -1702,21 +1725,23 @@ void TextPagePrivate::XYCutForBoundingBoxes(int tcx, int tcy){ for(i = 0 ; i < tree.length() ; i++){ TextList list = tree.at(i).text(); - cout << "Node: " << i << endl; +// cout << "Node: " << i << endl; for(j = 0 ; j < list.length() ; j++){ TinyTextEntity *ent = list.at(j); tmp.append(ent); - cout << ent->text().toAscii().data(); +// cout << ent->text().toAscii().data(); } - cout << endl << endl; +// cout << endl << endl; } - copy(tmp); + //copying elements of tmp to m_words + copyTo(tmp); + // we are not removing tmp because, the elements of tmp are in m_XY_cut_tree, we will finally free from m_XY_cut_tree m_XY_cut_tree = tree; } @@ -1813,15 +1838,16 @@ void TextPagePrivate::addNecessarySpace(){ for(j = 0 ; j < list.length() ; j++){ TinyTextEntity *ent = list.at(j); - tmp.append(ent); + //creating new Entities + tmp.append(new TinyTextEntity(ent->text(),ent->area)); } } - copy(tmp); + copyTo(tmp); } -// Break Words into Characters +// Break Words into Characters, takes Entities from m_words and for each of them insert in tmp the character entities void TextPagePrivate::breakWordIntoCharacters(){ QString spaceStr(" "); @@ -1870,7 +1896,7 @@ void TextPagePrivate::breakWordIntoCharacters(){ } } - copy(tmp); + copyTo(tmp); // print the final text diff --git a/core/textpage_p.h b/core/textpage_p.h index a1844d6f3..4da8a74a3 100644 --- a/core/textpage_p.h +++ b/core/textpage_p.h @@ -59,7 +59,10 @@ class TextPagePrivate void printTextList(int i, TextList list); /** copy a TextList to m_words **/ - void copy(TextList &list); + void copyTo(TextList &list); + + /** copy m_words to a TextList **/ + void copyFrom(TextList &list); /** print the textpage contents with area (text and bounding rect) @@ -113,7 +116,6 @@ class TextPagePrivate RegionTextList m_XY_cut_tree; TextList m_spaces; TextList m_words; -// TextList m_tmp_words; QMap< int, SearchPoint* > m_searchPoints; PagePrivate *m_page; SortedTextList m_lines;