You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3529 lines
86 KiB
3529 lines
86 KiB
//======================================================================== |
|
// |
|
// TextOutputDev.cc |
|
// |
|
// Copyright 1997-2003 Glyph & Cog, LLC |
|
// |
|
//======================================================================== |
|
|
|
#include <aconf.h> |
|
|
|
#ifdef USE_GCC_PRAGMAS |
|
#pragma implementation |
|
#endif |
|
|
|
#include <stdio.h> |
|
#include <stdlib.h> |
|
#include <stddef.h> |
|
#include <math.h> |
|
#include <ctype.h> |
|
#ifdef WIN32 |
|
#include <fcntl.h> // for O_BINARY |
|
#include <io.h> // for setmode |
|
#endif |
|
#include "gmem.h" |
|
#include "GString.h" |
|
#include "GList.h" |
|
#include "xpdf_config.h" |
|
#include "Error.h" |
|
#include "GlobalParams.h" |
|
#include "UnicodeMap.h" |
|
#include "UnicodeTypeTable.h" |
|
#include "GfxState.h" |
|
#include "TextOutputDev.h" |
|
|
|
#ifdef MACOS |
|
// needed for setting type/creator of MacOS files |
|
#include "ICSupport.h" |
|
#endif |
|
|
|
//------------------------------------------------------------------------ |
|
// parameters |
|
//------------------------------------------------------------------------ |
|
|
|
// Each bucket in a text pool includes baselines within a range of |
|
// this many points. |
|
#define textPoolStep 4 |
|
|
|
// Inter-character space width which will cause addChar to start a new |
|
// word. |
|
#define minWordBreakSpace 0.1 |
|
|
|
// Negative inter-character space width, i.e., overlap, which will |
|
// cause addChar to start a new word. |
|
#define minDupBreakOverlap 0.2 |
|
|
|
// Max distance between baselines of two lines within a block, as a |
|
// fraction of the font size. |
|
#define maxLineSpacingDelta 1.5 |
|
|
|
// Max difference in primary font sizes on two lines in the same |
|
// block. Delta1 is used when examining new lines above and below the |
|
// current block; delta2 is used when examining text that overlaps the |
|
// current block; delta3 is used when examining text to the left and |
|
// right of the current block. |
|
#define maxBlockFontSizeDelta1 0.05 |
|
#define maxBlockFontSizeDelta2 0.6 |
|
#define maxBlockFontSizeDelta3 0.2 |
|
|
|
// Max difference in font sizes inside a word. |
|
#define maxWordFontSizeDelta 0.05 |
|
|
|
// Maximum distance between baselines of two words on the same line, |
|
// e.g., distance between subscript or superscript and the primary |
|
// baseline, as a fraction of the font size. |
|
#define maxIntraLineDelta 0.5 |
|
|
|
// Minimum inter-word spacing, as a fraction of the font size. (Only |
|
// used for raw ordering.) |
|
#define minWordSpacing 0.15 |
|
|
|
// Maximum inter-word spacing, as a fraction of the font size. |
|
#define maxWordSpacing 1.5 |
|
|
|
// Maximum horizontal spacing which will allow a word to be pulled |
|
// into a block. |
|
#define minColSpacing1 0.3 |
|
|
|
// Minimum spacing between columns, as a fraction of the font size. |
|
#define minColSpacing2 1.0 |
|
|
|
// Maximum vertical spacing between blocks within a flow, as a |
|
// multiple of the font size. |
|
#define maxBlockSpacing 2.5 |
|
|
|
// Minimum spacing between characters within a word, as a fraction of |
|
// the font size. |
|
#define minCharSpacing -0.2 |
|
|
|
// Maximum spacing between characters within a word, as a fraction of |
|
// the font size, when there is no obvious extra-wide character |
|
// spacing. |
|
#define maxCharSpacing 0.03 |
|
|
|
// When extra-wide character spacing is detected, the inter-character |
|
// space threshold is set to the minimum inter-character space |
|
// multiplied by this constant. |
|
#define maxWideCharSpacingMul 1.3 |
|
|
|
// Max difference in primary,secondary coordinates (as a fraction of |
|
// the font size) allowed for duplicated text (fake boldface, drop |
|
// shadows) which is to be discarded. |
|
#define dupMaxPriDelta 0.1 |
|
#define dupMaxSecDelta 0.2 |
|
|
|
//------------------------------------------------------------------------ |
|
// TextFontInfo |
|
//------------------------------------------------------------------------ |
|
|
|
TextFontInfo::TextFontInfo(GfxState *state) { |
|
gfxFont = state->getFont(); |
|
#if TEXTOUT_WORD_LIST |
|
fontName = (gfxFont && gfxFont->getOrigName()) |
|
? gfxFont->getOrigName()->copy() |
|
: (GString *)NULL; |
|
#endif |
|
} |
|
|
|
TextFontInfo::~TextFontInfo() { |
|
#if TEXTOUT_WORD_LIST |
|
if (fontName) { |
|
delete fontName; |
|
} |
|
#endif |
|
} |
|
|
|
GBool TextFontInfo::matches(GfxState *state) { |
|
return state->getFont() == gfxFont; |
|
} |
|
|
|
//------------------------------------------------------------------------ |
|
// TextWord |
|
//------------------------------------------------------------------------ |
|
|
|
TextWord::TextWord(GfxState *state, int rotA, double x0, double y0, |
|
int charPosA, TextFontInfo *fontA, double fontSizeA) { |
|
GfxFont *gfxFont; |
|
double x, y, ascent, descent; |
|
|
|
rot = rotA; |
|
charPos = charPosA; |
|
charLen = 0; |
|
font = fontA; |
|
fontSize = fontSizeA; |
|
state->transform(x0, y0, &x, &y); |
|
if ((gfxFont = font->gfxFont)) { |
|
ascent = gfxFont->getAscent() * fontSize; |
|
descent = gfxFont->getDescent() * fontSize; |
|
} else { |
|
// this means that the PDF file draws text without a current font, |
|
// which should never happen |
|
ascent = 0.95 * fontSize; |
|
descent = -0.35 * fontSize; |
|
} |
|
switch (rot) { |
|
case 0: |
|
yMin = y - ascent; |
|
yMax = y - descent; |
|
if (yMin == yMax) { |
|
// this is a sanity check for a case that shouldn't happen -- but |
|
// if it does happen, we want to avoid dividing by zero later |
|
yMin = y; |
|
yMax = y + 1; |
|
} |
|
base = y; |
|
break; |
|
case 1: |
|
xMin = x + descent; |
|
xMax = x + ascent; |
|
if (xMin == xMax) { |
|
// this is a sanity check for a case that shouldn't happen -- but |
|
// if it does happen, we want to avoid dividing by zero later |
|
xMin = x; |
|
xMax = x + 1; |
|
} |
|
base = x; |
|
break; |
|
case 2: |
|
yMin = y + descent; |
|
yMax = y + ascent; |
|
if (yMin == yMax) { |
|
// this is a sanity check for a case that shouldn't happen -- but |
|
// if it does happen, we want to avoid dividing by zero later |
|
yMin = y; |
|
yMax = y + 1; |
|
} |
|
base = y; |
|
break; |
|
case 3: |
|
xMin = x - ascent; |
|
xMax = x - descent; |
|
if (xMin == xMax) { |
|
// this is a sanity check for a case that shouldn't happen -- but |
|
// if it does happen, we want to avoid dividing by zero later |
|
xMin = x; |
|
xMax = x + 1; |
|
} |
|
base = x; |
|
break; |
|
} |
|
text = NULL; |
|
edge = NULL; |
|
len = size = 0; |
|
spaceAfter = gFalse; |
|
next = NULL; |
|
|
|
#if TEXTOUT_WORD_LIST |
|
GfxRGB rgb; |
|
|
|
if ((state->getRender() & 3) == 1) { |
|
state->getStrokeRGB(&rgb); |
|
} else { |
|
state->getFillRGB(&rgb); |
|
} |
|
colorR = rgb.r; |
|
colorG = rgb.g; |
|
colorB = rgb.b; |
|
#endif |
|
} |
|
|
|
TextWord::~TextWord() { |
|
gfree(text); |
|
gfree(edge); |
|
} |
|
|
|
void TextWord::addChar(GfxState */*state*/, double x, double y, |
|
double dx, double dy, Unicode u) { |
|
if (len == size) { |
|
size += 16; |
|
text = (Unicode *)grealloc(text, size * sizeof(Unicode)); |
|
edge = (double *)grealloc(edge, (size + 1) * sizeof(double)); |
|
} |
|
text[len] = u; |
|
switch (rot) { |
|
case 0: |
|
if (len == 0) { |
|
xMin = x; |
|
} |
|
edge[len] = x; |
|
xMax = edge[len+1] = x + dx; |
|
break; |
|
case 1: |
|
if (len == 0) { |
|
yMin = y; |
|
} |
|
edge[len] = y; |
|
yMax = edge[len+1] = y + dy; |
|
break; |
|
case 2: |
|
if (len == 0) { |
|
xMax = x; |
|
} |
|
edge[len] = x; |
|
xMin = edge[len+1] = x + dx; |
|
break; |
|
case 3: |
|
if (len == 0) { |
|
yMax = y; |
|
} |
|
edge[len] = y; |
|
yMin = edge[len+1] = y + dy; |
|
break; |
|
} |
|
++len; |
|
} |
|
|
|
void TextWord::merge(TextWord *word) { |
|
int i; |
|
|
|
if (word->xMin < xMin) { |
|
xMin = word->xMin; |
|
} |
|
if (word->yMin < yMin) { |
|
yMin = word->yMin; |
|
} |
|
if (word->xMax > xMax) { |
|
xMax = word->xMax; |
|
} |
|
if (word->yMax > yMax) { |
|
yMax = word->yMax; |
|
} |
|
if (len + word->len > size) { |
|
size = len + word->len; |
|
text = (Unicode *)grealloc(text, size * sizeof(Unicode)); |
|
edge = (double *)grealloc(edge, (size + 1) * sizeof(double)); |
|
} |
|
for (i = 0; i < word->len; ++i) { |
|
text[len + i] = word->text[i]; |
|
edge[len + i] = word->edge[i]; |
|
} |
|
edge[len + word->len] = word->edge[word->len]; |
|
len += word->len; |
|
charLen += word->charLen; |
|
} |
|
|
|
inline int TextWord::primaryCmp(TextWord *word) { |
|
double cmp; |
|
|
|
cmp = 0; // make gcc happy |
|
switch (rot) { |
|
case 0: |
|
cmp = xMin - word->xMin; |
|
break; |
|
case 1: |
|
cmp = yMin - word->yMin; |
|
break; |
|
case 2: |
|
cmp = word->xMax - xMax; |
|
break; |
|
case 3: |
|
cmp = word->yMax - yMax; |
|
break; |
|
} |
|
return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; |
|
} |
|
|
|
double TextWord::primaryDelta(TextWord *word) { |
|
double delta; |
|
|
|
delta = 0; // make gcc happy |
|
switch (rot) { |
|
case 0: |
|
delta = word->xMin - xMax; |
|
break; |
|
case 1: |
|
delta = word->yMin - yMax; |
|
break; |
|
case 2: |
|
delta = xMin - word->xMax; |
|
break; |
|
case 3: |
|
delta = yMin - word->yMax; |
|
break; |
|
} |
|
return delta; |
|
} |
|
|
|
int TextWord::cmpYX(const void *p1, const void *p2) { |
|
TextWord *word1 = *(TextWord **)p1; |
|
TextWord *word2 = *(TextWord **)p2; |
|
double cmp; |
|
|
|
cmp = word1->yMin - word2->yMin; |
|
if (cmp == 0) { |
|
cmp = word1->xMin - word2->xMin; |
|
} |
|
return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; |
|
} |
|
|
|
#if TEXTOUT_WORD_LIST |
|
|
|
GString *TextWord::getText() { |
|
GString *s; |
|
UnicodeMap *uMap; |
|
char buf[8]; |
|
int n, i; |
|
|
|
s = new GString(); |
|
if (!(uMap = globalParams->getTextEncoding())) { |
|
return s; |
|
} |
|
for (i = 0; i < len; ++i) { |
|
n = uMap->mapUnicode(text[i], buf, sizeof(buf)); |
|
s->append(buf, n); |
|
} |
|
uMap->decRefCnt(); |
|
return s; |
|
} |
|
|
|
#endif // TEXTOUT_WORD_LIST |
|
|
|
//------------------------------------------------------------------------ |
|
// TextPool |
|
//------------------------------------------------------------------------ |
|
|
|
TextPool::TextPool() { |
|
minBaseIdx = 0; |
|
maxBaseIdx = -1; |
|
pool = NULL; |
|
cursor = NULL; |
|
cursorBaseIdx = -1; |
|
} |
|
|
|
TextPool::~TextPool() { |
|
int baseIdx; |
|
TextWord *word, *word2; |
|
|
|
for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) { |
|
for (word = pool[baseIdx - minBaseIdx]; word; word = word2) { |
|
word2 = word->next; |
|
delete word; |
|
} |
|
} |
|
gfree(pool); |
|
} |
|
|
|
int TextPool::getBaseIdx(double base) { |
|
int baseIdx; |
|
|
|
baseIdx = (int)(base / textPoolStep); |
|
if (baseIdx < minBaseIdx) { |
|
return minBaseIdx; |
|
} |
|
if (baseIdx > maxBaseIdx) { |
|
return maxBaseIdx; |
|
} |
|
return baseIdx; |
|
} |
|
|
|
void TextPool::addWord(TextWord *word) { |
|
TextWord **newPool; |
|
int wordBaseIdx, newMinBaseIdx, newMaxBaseIdx, baseIdx; |
|
TextWord *w0, *w1; |
|
|
|
// expand the array if needed |
|
wordBaseIdx = (int)(word->base / textPoolStep); |
|
if (minBaseIdx > maxBaseIdx) { |
|
minBaseIdx = wordBaseIdx - 128; |
|
maxBaseIdx = wordBaseIdx + 128; |
|
pool = (TextWord **)gmalloc((maxBaseIdx - minBaseIdx + 1) * |
|
sizeof(TextWord *)); |
|
for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) { |
|
pool[baseIdx - minBaseIdx] = NULL; |
|
} |
|
} else if (wordBaseIdx < minBaseIdx) { |
|
newMinBaseIdx = wordBaseIdx - 128; |
|
newPool = (TextWord **)gmalloc((maxBaseIdx - newMinBaseIdx + 1) * |
|
sizeof(TextWord *)); |
|
for (baseIdx = newMinBaseIdx; baseIdx < minBaseIdx; ++baseIdx) { |
|
newPool[baseIdx - newMinBaseIdx] = NULL; |
|
} |
|
memcpy(&newPool[minBaseIdx - newMinBaseIdx], pool, |
|
(maxBaseIdx - minBaseIdx + 1) * sizeof(TextWord *)); |
|
gfree(pool); |
|
pool = newPool; |
|
minBaseIdx = newMinBaseIdx; |
|
} else if (wordBaseIdx > maxBaseIdx) { |
|
newMaxBaseIdx = wordBaseIdx + 128; |
|
pool = (TextWord **)grealloc(pool, (newMaxBaseIdx - minBaseIdx + 1) * |
|
sizeof(TextWord *)); |
|
for (baseIdx = maxBaseIdx + 1; baseIdx <= newMaxBaseIdx; ++baseIdx) { |
|
pool[baseIdx - minBaseIdx] = NULL; |
|
} |
|
maxBaseIdx = newMaxBaseIdx; |
|
} |
|
|
|
// insert the new word |
|
if (cursor && wordBaseIdx == cursorBaseIdx && |
|
word->primaryCmp(cursor) > 0) { |
|
w0 = cursor; |
|
w1 = cursor->next; |
|
} else { |
|
w0 = NULL; |
|
w1 = pool[wordBaseIdx - minBaseIdx]; |
|
} |
|
for (; w1 && word->primaryCmp(w1) > 0; w0 = w1, w1 = w1->next) ; |
|
word->next = w1; |
|
if (w0) { |
|
w0->next = word; |
|
} else { |
|
pool[wordBaseIdx - minBaseIdx] = word; |
|
} |
|
cursor = word; |
|
cursorBaseIdx = wordBaseIdx; |
|
} |
|
|
|
//------------------------------------------------------------------------ |
|
// TextLine |
|
//------------------------------------------------------------------------ |
|
|
|
TextLine::TextLine(TextBlock *blkA, int rotA, double baseA) { |
|
blk = blkA; |
|
rot = rotA; |
|
xMin = yMin = 0; |
|
xMax = yMax = -1; |
|
base = baseA; |
|
words = lastWord = NULL; |
|
text = NULL; |
|
edge = NULL; |
|
col = NULL; |
|
len = 0; |
|
convertedLen = 0; |
|
hyphenated = gFalse; |
|
next = NULL; |
|
} |
|
|
|
TextLine::~TextLine() { |
|
TextWord *word; |
|
|
|
while (words) { |
|
word = words; |
|
words = words->next; |
|
delete word; |
|
} |
|
gfree(text); |
|
gfree(edge); |
|
gfree(col); |
|
} |
|
|
|
void TextLine::addWord(TextWord *word) { |
|
if (lastWord) { |
|
lastWord->next = word; |
|
} else { |
|
words = word; |
|
} |
|
lastWord = word; |
|
|
|
if (xMin > xMax) { |
|
xMin = word->xMin; |
|
xMax = word->xMax; |
|
yMin = word->yMin; |
|
yMax = word->yMax; |
|
} else { |
|
if (word->xMin < xMin) { |
|
xMin = word->xMin; |
|
} |
|
if (word->xMax > xMax) { |
|
xMax = word->xMax; |
|
} |
|
if (word->yMin < yMin) { |
|
yMin = word->yMin; |
|
} |
|
if (word->yMax > yMax) { |
|
yMax = word->yMax; |
|
} |
|
} |
|
} |
|
|
|
double TextLine::primaryDelta(TextLine *line) { |
|
double delta; |
|
|
|
delta = 0; // make gcc happy |
|
switch (rot) { |
|
case 0: |
|
delta = line->xMin - xMax; |
|
break; |
|
case 1: |
|
delta = line->yMin - yMax; |
|
break; |
|
case 2: |
|
delta = xMin - line->xMax; |
|
break; |
|
case 3: |
|
delta = yMin - line->yMax; |
|
break; |
|
} |
|
return delta; |
|
} |
|
|
|
int TextLine::primaryCmp(TextLine *line) { |
|
double cmp; |
|
|
|
cmp = 0; // make gcc happy |
|
switch (rot) { |
|
case 0: |
|
cmp = xMin - line->xMin; |
|
break; |
|
case 1: |
|
cmp = yMin - line->yMin; |
|
break; |
|
case 2: |
|
cmp = line->xMax - xMax; |
|
break; |
|
case 3: |
|
cmp = line->yMax - yMax; |
|
break; |
|
} |
|
return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; |
|
} |
|
|
|
int TextLine::secondaryCmp(TextLine *line) { |
|
double cmp; |
|
|
|
cmp = (rot == 0 || rot == 3) ? base - line->base : line->base - base; |
|
return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; |
|
} |
|
|
|
int TextLine::cmpYX(TextLine *line) { |
|
int cmp; |
|
|
|
if ((cmp = secondaryCmp(line))) { |
|
return cmp; |
|
} |
|
return primaryCmp(line); |
|
} |
|
|
|
int TextLine::cmpXY(const void *p1, const void *p2) { |
|
TextLine *line1 = *(TextLine **)p1; |
|
TextLine *line2 = *(TextLine **)p2; |
|
int cmp; |
|
|
|
if ((cmp = line1->primaryCmp(line2))) { |
|
return cmp; |
|
} |
|
return line1->secondaryCmp(line2); |
|
} |
|
|
|
void TextLine::coalesce(UnicodeMap *uMap) { |
|
TextWord *word0, *word1; |
|
double space, delta, minSpace; |
|
GBool isUnicode; |
|
char buf[8]; |
|
int i, j; |
|
|
|
if (words->next) { |
|
|
|
// compute the inter-word space threshold |
|
if (words->len > 1 || words->next->len > 1) { |
|
minSpace = 0; |
|
} else { |
|
minSpace = words->primaryDelta(words->next); |
|
for (word0 = words->next, word1 = word0->next; |
|
word1 && minSpace > 0; |
|
word0 = word1, word1 = word0->next) { |
|
if (word1->len > 1) { |
|
minSpace = 0; |
|
} |
|
delta = word0->primaryDelta(word1); |
|
if (delta < minSpace) { |
|
minSpace = delta; |
|
} |
|
} |
|
} |
|
if (minSpace <= 0) { |
|
space = maxCharSpacing * words->fontSize; |
|
} else { |
|
space = maxWideCharSpacingMul * minSpace; |
|
} |
|
|
|
// merge words |
|
word0 = words; |
|
word1 = words->next; |
|
while (word1) { |
|
if (word0->primaryDelta(word1) >= space) { |
|
word0->spaceAfter = gTrue; |
|
word0 = word1; |
|
word1 = word1->next; |
|
} else if (word0->font == word1->font && |
|
fabs(word0->fontSize - word1->fontSize) < |
|
maxWordFontSizeDelta * words->fontSize && |
|
word1->charPos == word0->charPos + word0->charLen) { |
|
word0->merge(word1); |
|
word0->next = word1->next; |
|
delete word1; |
|
word1 = word0->next; |
|
} else { |
|
word0 = word1; |
|
word1 = word1->next; |
|
} |
|
} |
|
} |
|
|
|
// build the line text |
|
isUnicode = uMap ? uMap->isUnicode() : gFalse; |
|
len = 0; |
|
for (word1 = words; word1; word1 = word1->next) { |
|
len += word1->len; |
|
if (word1->spaceAfter) { |
|
++len; |
|
} |
|
} |
|
text = (Unicode *)gmalloc(len * sizeof(Unicode)); |
|
edge = (double *)gmalloc((len + 1) * sizeof(double)); |
|
i = 0; |
|
for (word1 = words; word1; word1 = word1->next) { |
|
for (j = 0; j < word1->len; ++j) { |
|
text[i] = word1->text[j]; |
|
edge[i] = word1->edge[j]; |
|
++i; |
|
} |
|
edge[i] = word1->edge[word1->len]; |
|
if (word1->spaceAfter) { |
|
text[i] = (Unicode)0x0020; |
|
++i; |
|
} |
|
} |
|
|
|
// compute convertedLen and set up the col array |
|
col = (int *)gmalloc((len + 1) * sizeof(int)); |
|
convertedLen = 0; |
|
for (i = 0; i < len; ++i) { |
|
col[i] = convertedLen; |
|
if (isUnicode) { |
|
++convertedLen; |
|
} else if (uMap) { |
|
convertedLen += uMap->mapUnicode(text[i], buf, sizeof(buf)); |
|
} |
|
} |
|
col[len] = convertedLen; |
|
|
|
// check for hyphen at end of line |
|
//~ need to check for other chars used as hyphens |
|
hyphenated = text[len - 1] == (Unicode)'-'; |
|
} |
|
|
|
//------------------------------------------------------------------------ |
|
// TextLineFrag |
|
//------------------------------------------------------------------------ |
|
|
|
class TextLineFrag { |
|
public: |
|
|
|
TextLine *line; // the line object |
|
int start, len; // offset and length of this fragment |
|
// (in Unicode chars) |
|
double xMin, xMax; // bounding box coordinates |
|
double yMin, yMax; |
|
double base; // baseline virtual coordinate |
|
int col; // first column |
|
|
|
void init(TextLine *lineA, int startA, int lenA); |
|
void computeCoords(GBool oneRot); |
|
|
|
static int cmpYXPrimaryRot(const void *p1, const void *p2); |
|
static int cmpYXLineRot(const void *p1, const void *p2); |
|
static int cmpXYLineRot(const void *p1, const void *p2); |
|
}; |
|
|
|
void TextLineFrag::init(TextLine *lineA, int startA, int lenA) { |
|
line = lineA; |
|
start = startA; |
|
len = lenA; |
|
col = line->col[start]; |
|
} |
|
|
|
void TextLineFrag::computeCoords(GBool oneRot) { |
|
TextBlock *blk; |
|
double d0, d1, d2, d3, d4; |
|
|
|
if (oneRot) { |
|
|
|
switch (line->rot) { |
|
case 0: |
|
xMin = line->edge[start]; |
|
xMax = line->edge[start + len]; |
|
yMin = line->yMin; |
|
yMax = line->yMax; |
|
break; |
|
case 1: |
|
xMin = line->xMin; |
|
xMax = line->xMax; |
|
yMin = line->edge[start]; |
|
yMax = line->edge[start + len]; |
|
break; |
|
case 2: |
|
xMin = line->edge[start + len]; |
|
xMax = line->edge[start]; |
|
yMin = line->yMin; |
|
yMax = line->yMax; |
|
break; |
|
case 3: |
|
xMin = line->xMin; |
|
xMax = line->xMax; |
|
yMin = line->edge[start + len]; |
|
yMax = line->edge[start]; |
|
break; |
|
} |
|
base = line->base; |
|
|
|
} else { |
|
|
|
if (line->rot == 0 && line->blk->page->primaryRot == 0) { |
|
|
|
xMin = line->edge[start]; |
|
xMax = line->edge[start + len]; |
|
yMin = line->yMin; |
|
yMax = line->yMax; |
|
base = line->base; |
|
|
|
} else { |
|
|
|
blk = line->blk; |
|
d0 = line->edge[start]; |
|
d1 = line->edge[start + len]; |
|
d2 = d3 = d4 = 0; // make gcc happy |
|
|
|
switch (line->rot) { |
|
case 0: |
|
d2 = line->yMin; |
|
d3 = line->yMax; |
|
d4 = line->base; |
|
d0 = (d0 - blk->xMin) / (blk->xMax - blk->xMin); |
|
d1 = (d1 - blk->xMin) / (blk->xMax - blk->xMin); |
|
d2 = (d2 - blk->yMin) / (blk->yMax - blk->yMin); |
|
d3 = (d3 - blk->yMin) / (blk->yMax - blk->yMin); |
|
d4 = (d4 - blk->yMin) / (blk->yMax - blk->yMin); |
|
break; |
|
case 1: |
|
d2 = line->xMax; |
|
d3 = line->xMin; |
|
d4 = line->base; |
|
d0 = (d0 - blk->yMin) / (blk->yMax - blk->yMin); |
|
d1 = (d1 - blk->yMin) / (blk->yMax - blk->yMin); |
|
d2 = (blk->xMax - d2) / (blk->xMax - blk->xMin); |
|
d3 = (blk->xMax - d3) / (blk->xMax - blk->xMin); |
|
d4 = (blk->xMax - d4) / (blk->xMax - blk->xMin); |
|
break; |
|
case 2: |
|
d2 = line->yMax; |
|
d3 = line->yMin; |
|
d4 = line->base; |
|
d0 = (blk->xMax - d0) / (blk->xMax - blk->xMin); |
|
d1 = (blk->xMax - d1) / (blk->xMax - blk->xMin); |
|
d2 = (blk->yMax - d2) / (blk->yMax - blk->yMin); |
|
d3 = (blk->yMax - d3) / (blk->yMax - blk->yMin); |
|
d4 = (blk->yMax - d4) / (blk->yMax - blk->yMin); |
|
break; |
|
case 3: |
|
d2 = line->xMin; |
|
d3 = line->xMax; |
|
d4 = line->base; |
|
d0 = (blk->yMax - d0) / (blk->yMax - blk->yMin); |
|
d1 = (blk->yMax - d1) / (blk->yMax - blk->yMin); |
|
d2 = (d2 - blk->xMin) / (blk->xMax - blk->xMin); |
|
d3 = (d3 - blk->xMin) / (blk->xMax - blk->xMin); |
|
d4 = (d4 - blk->xMin) / (blk->xMax - blk->xMin); |
|
break; |
|
} |
|
|
|
switch (line->blk->page->primaryRot) { |
|
case 0: |
|
xMin = blk->xMin + d0 * (blk->xMax - blk->xMin); |
|
xMax = blk->xMin + d1 * (blk->xMax - blk->xMin); |
|
yMin = blk->yMin + d2 * (blk->yMax - blk->yMin); |
|
yMax = blk->yMin + d3 * (blk->yMax - blk->yMin); |
|
base = blk->yMin + base * (blk->yMax - blk->yMin); |
|
break; |
|
case 1: |
|
xMin = blk->xMax - d3 * (blk->xMax - blk->xMin); |
|
xMax = blk->xMax - d2 * (blk->xMax - blk->xMin); |
|
yMin = blk->yMin + d0 * (blk->yMax - blk->yMin); |
|
yMax = blk->yMin + d1 * (blk->yMax - blk->yMin); |
|
base = blk->xMax - d4 * (blk->xMax - blk->xMin); |
|
break; |
|
case 2: |
|
xMin = blk->xMax - d1 * (blk->xMax - blk->xMin); |
|
xMax = blk->xMax - d0 * (blk->xMax - blk->xMin); |
|
yMin = blk->yMax - d3 * (blk->yMax - blk->yMin); |
|
yMax = blk->yMax - d2 * (blk->yMax - blk->yMin); |
|
base = blk->yMax - d4 * (blk->yMax - blk->yMin); |
|
break; |
|
case 3: |
|
xMin = blk->xMin + d2 * (blk->xMax - blk->xMin); |
|
xMax = blk->xMin + d3 * (blk->xMax - blk->xMin); |
|
yMin = blk->yMax - d1 * (blk->yMax - blk->yMin); |
|
yMax = blk->yMax - d0 * (blk->yMax - blk->yMin); |
|
base = blk->xMin + d4 * (blk->xMax - blk->xMin); |
|
break; |
|
} |
|
|
|
} |
|
} |
|
} |
|
|
|
int TextLineFrag::cmpYXPrimaryRot(const void *p1, const void *p2) { |
|
TextLineFrag *frag1 = (TextLineFrag *)p1; |
|
TextLineFrag *frag2 = (TextLineFrag *)p2; |
|
double cmp; |
|
|
|
cmp = 0; // make gcc happy |
|
switch (frag1->line->blk->page->primaryRot) { |
|
case 0: |
|
if ((cmp = frag1->yMin - frag2->yMin) == 0) { |
|
cmp = frag1->xMin - frag2->xMin; |
|
} |
|
break; |
|
case 1: |
|
if ((cmp = frag2->xMax - frag1->xMax) == 0) { |
|
cmp = frag1->yMin - frag2->yMin; |
|
} |
|
break; |
|
case 2: |
|
if ((cmp = frag2->yMin - frag1->yMin) == 0) { |
|
cmp = frag2->xMax - frag1->xMax; |
|
} |
|
break; |
|
case 3: |
|
if ((cmp = frag1->xMax - frag2->xMax) == 0) { |
|
cmp = frag2->yMax - frag1->yMax; |
|
} |
|
break; |
|
} |
|
return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; |
|
} |
|
|
|
int TextLineFrag::cmpYXLineRot(const void *p1, const void *p2) { |
|
TextLineFrag *frag1 = (TextLineFrag *)p1; |
|
TextLineFrag *frag2 = (TextLineFrag *)p2; |
|
double cmp; |
|
|
|
cmp = 0; // make gcc happy |
|
switch (frag1->line->rot) { |
|
case 0: |
|
if ((cmp = frag1->yMin - frag2->yMin) == 0) { |
|
cmp = frag1->xMin - frag2->xMin; |
|
} |
|
break; |
|
case 1: |
|
if ((cmp = frag2->xMax - frag1->xMax) == 0) { |
|
cmp = frag1->yMin - frag2->yMin; |
|
} |
|
break; |
|
case 2: |
|
if ((cmp = frag2->yMin - frag1->yMin) == 0) { |
|
cmp = frag2->xMax - frag1->xMax; |
|
} |
|
break; |
|
case 3: |
|
if ((cmp = frag1->xMax - frag2->xMax) == 0) { |
|
cmp = frag2->yMax - frag1->yMax; |
|
} |
|
break; |
|
} |
|
return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; |
|
} |
|
|
|
int TextLineFrag::cmpXYLineRot(const void *p1, const void *p2) { |
|
TextLineFrag *frag1 = (TextLineFrag *)p1; |
|
TextLineFrag *frag2 = (TextLineFrag *)p2; |
|
double cmp; |
|
|
|
cmp = 0; // make gcc happy |
|
switch (frag1->line->rot) { |
|
case 0: |
|
if ((cmp = frag1->xMin - frag2->xMin) == 0) { |
|
cmp = frag1->yMin - frag2->yMin; |
|
} |
|
break; |
|
case 1: |
|
if ((cmp = frag1->yMin - frag2->yMin) == 0) { |
|
cmp = frag2->xMax - frag1->xMax; |
|
} |
|
break; |
|
case 2: |
|
if ((cmp = frag2->xMax - frag1->xMax) == 0) { |
|
cmp = frag2->yMin - frag1->yMin; |
|
} |
|
break; |
|
case 3: |
|
if ((cmp = frag2->yMax - frag1->yMax) == 0) { |
|
cmp = frag1->xMax - frag2->xMax; |
|
} |
|
break; |
|
} |
|
return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; |
|
} |
|
|
|
//------------------------------------------------------------------------ |
|
// TextBlock |
|
//------------------------------------------------------------------------ |
|
|
|
TextBlock::TextBlock(TextPage *pageA, int rotA) { |
|
page = pageA; |
|
rot = rotA; |
|
xMin = yMin = 0; |
|
xMax = yMax = -1; |
|
priMin = 0; |
|
priMax = page->pageWidth; |
|
pool = new TextPool(); |
|
lines = NULL; |
|
curLine = NULL; |
|
next = NULL; |
|
stackNext = NULL; |
|
} |
|
|
|
TextBlock::~TextBlock() { |
|
TextLine *line; |
|
|
|
delete pool; |
|
while (lines) { |
|
line = lines; |
|
lines = lines->next; |
|
delete line; |
|
} |
|
} |
|
|
|
void TextBlock::addWord(TextWord *word) { |
|
pool->addWord(word); |
|
if (xMin > xMax) { |
|
xMin = word->xMin; |
|
xMax = word->xMax; |
|
yMin = word->yMin; |
|
yMax = word->yMax; |
|
} else { |
|
if (word->xMin < xMin) { |
|
xMin = word->xMin; |
|
} |
|
if (word->xMax > xMax) { |
|
xMax = word->xMax; |
|
} |
|
if (word->yMin < yMin) { |
|
yMin = word->yMin; |
|
} |
|
if (word->yMax > yMax) { |
|
yMax = word->yMax; |
|
} |
|
} |
|
} |
|
|
|
void TextBlock::coalesce(UnicodeMap *uMap) { |
|
TextWord *word0, *word1, *word2, *bestWord0, *bestWord1, *lastWord; |
|
TextLine *line, *line0, *line1; |
|
int poolMinBaseIdx, startBaseIdx, minBaseIdx, maxBaseIdx; |
|
int baseIdx, bestWordBaseIdx, idx0, idx1; |
|
double minBase, maxBase; |
|
double fontSize, delta, priDelta, secDelta; |
|
TextLine **lineArray; |
|
GBool found; |
|
int col1, col2; |
|
int i, j, k; |
|
|
|
// discard duplicated text (fake boldface, drop shadows) |
|
for (idx0 = pool->minBaseIdx; idx0 <= pool->maxBaseIdx; ++idx0) { |
|
word0 = pool->getPool(idx0); |
|
while (word0) { |
|
priDelta = dupMaxPriDelta * word0->fontSize; |
|
secDelta = dupMaxSecDelta * word0->fontSize; |
|
if (rot == 0 || rot == 3) { |
|
maxBaseIdx = pool->getBaseIdx(word0->base + secDelta); |
|
} else { |
|
maxBaseIdx = pool->getBaseIdx(word0->base - secDelta); |
|
} |
|
found = gFalse; |
|
word1 = word2 = NULL; // make gcc happy |
|
for (idx1 = idx0; idx1 <= maxBaseIdx; ++idx1) { |
|
if (idx1 == idx0) { |
|
word1 = word0; |
|
word2 = word0->next; |
|
} else { |
|
word1 = NULL; |
|
word2 = pool->getPool(idx1); |
|
} |
|
for (; word2; word1 = word2, word2 = word2->next) { |
|
if (word2->len == word0->len && |
|
!memcmp(word2->text, word0->text, |
|
word0->len * sizeof(Unicode))) { |
|
switch (rot) { |
|
case 0: |
|
case 2: |
|
found = fabs(word0->xMin - word2->xMin) < priDelta && |
|
fabs(word0->xMax - word2->xMax) < priDelta && |
|
fabs(word0->yMin - word2->yMin) < secDelta && |
|
fabs(word0->yMax - word2->yMax) < secDelta; |
|
break; |
|
case 1: |
|
case 3: |
|
found = fabs(word0->xMin - word2->xMin) < secDelta && |
|
fabs(word0->xMax - word2->xMax) < secDelta && |
|
fabs(word0->yMin - word2->yMin) < priDelta && |
|
fabs(word0->yMax - word2->yMax) < priDelta; |
|
break; |
|
} |
|
} |
|
if (found) { |
|
break; |
|
} |
|
} |
|
if (found) { |
|
break; |
|
} |
|
} |
|
if (found) { |
|
if (word1) { |
|
word1->next = word2->next; |
|
} else { |
|
pool->setPool(idx1, word2->next); |
|
} |
|
delete word2; |
|
} else { |
|
word0 = word0->next; |
|
} |
|
} |
|
} |
|
|
|
// build the lines |
|
curLine = NULL; |
|
poolMinBaseIdx = pool->minBaseIdx; |
|
charCount = 0; |
|
nLines = 0; |
|
while (1) { |
|
|
|
// find the first non-empty line in the pool |
|
for (; |
|
poolMinBaseIdx <= pool->maxBaseIdx && !pool->getPool(poolMinBaseIdx); |
|
++poolMinBaseIdx) ; |
|
if (poolMinBaseIdx > pool->maxBaseIdx) { |
|
break; |
|
} |
|
|
|
// look for the left-most word in the first four lines of the |
|
// pool -- this avoids starting with a superscript word |
|
startBaseIdx = poolMinBaseIdx; |
|
for (baseIdx = poolMinBaseIdx + 1; |
|
baseIdx < poolMinBaseIdx + 4 && baseIdx <= pool->maxBaseIdx; |
|
++baseIdx) { |
|
if (!pool->getPool(baseIdx)) { |
|
continue; |
|
} |
|
if (pool->getPool(baseIdx)->primaryCmp(pool->getPool(startBaseIdx)) |
|
< 0) { |
|
startBaseIdx = baseIdx; |
|
} |
|
} |
|
|
|
// create a new line |
|
word0 = pool->getPool(startBaseIdx); |
|
pool->setPool(startBaseIdx, word0->next); |
|
word0->next = NULL; |
|
line = new TextLine(this, word0->rot, word0->base); |
|
line->addWord(word0); |
|
lastWord = word0; |
|
|
|
// compute the search range |
|
fontSize = word0->fontSize; |
|
minBase = word0->base - maxIntraLineDelta * fontSize; |
|
maxBase = word0->base + maxIntraLineDelta * fontSize; |
|
minBaseIdx = pool->getBaseIdx(minBase); |
|
maxBaseIdx = pool->getBaseIdx(maxBase); |
|
|
|
// find the rest of the words in this line |
|
while (1) { |
|
|
|
// find the left-most word whose baseline is in the range for |
|
// this line |
|
bestWordBaseIdx = 0; |
|
bestWord0 = bestWord1 = NULL; |
|
for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) { |
|
for (word0 = NULL, word1 = pool->getPool(baseIdx); |
|
word1; |
|
word0 = word1, word1 = word1->next) { |
|
if (word1->base >= minBase && |
|
word1->base <= maxBase && |
|
(delta = lastWord->primaryDelta(word1)) >= |
|
minCharSpacing * fontSize) { |
|
if (delta < maxWordSpacing * fontSize && |
|
(!bestWord1 || word1->primaryCmp(bestWord1) < 0)) { |
|
bestWordBaseIdx = baseIdx; |
|
bestWord0 = word0; |
|
bestWord1 = word1; |
|
} |
|
break; |
|
} |
|
} |
|
} |
|
if (!bestWord1) { |
|
break; |
|
} |
|
|
|
// remove it from the pool, and add it to the line |
|
if (bestWord0) { |
|
bestWord0->next = bestWord1->next; |
|
} else { |
|
pool->setPool(bestWordBaseIdx, bestWord1->next); |
|
} |
|
bestWord1->next = NULL; |
|
line->addWord(bestWord1); |
|
lastWord = bestWord1; |
|
} |
|
|
|
// add the line |
|
if (curLine && line->cmpYX(curLine) > 0) { |
|
line0 = curLine; |
|
line1 = curLine->next; |
|
} else { |
|
line0 = NULL; |
|
line1 = lines; |
|
} |
|
for (; |
|
line1 && line->cmpYX(line1) > 0; |
|
line0 = line1, line1 = line1->next) ; |
|
if (line0) { |
|
line0->next = line; |
|
} else { |
|
lines = line; |
|
} |
|
line->next = line1; |
|
curLine = line; |
|
line->coalesce(uMap); |
|
charCount += line->len; |
|
++nLines; |
|
} |
|
|
|
// sort lines into xy order for column assignment |
|
lineArray = (TextLine **)gmalloc(nLines * sizeof(TextLine *)); |
|
for (line = lines, i = 0; line; line = line->next, ++i) { |
|
lineArray[i] = line; |
|
} |
|
qsort(lineArray, nLines, sizeof(TextLine *), &TextLine::cmpXY); |
|
|
|
// column assignment |
|
nColumns = 0; |
|
for (i = 0; i < nLines; ++i) { |
|
line0 = lineArray[i]; |
|
col1 = 0; |
|
for (j = 0; j < i; ++j) { |
|
line1 = lineArray[j]; |
|
if (line1->primaryDelta(line0) >= 0) { |
|
col2 = line1->col[line1->len] + 1; |
|
} else { |
|
k = 0; // make gcc happy |
|
switch (rot) { |
|
case 0: |
|
for (k = 0; |
|
k < line1->len && |
|
line0->xMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]); |
|
++k) ; |
|
break; |
|
case 1: |
|
for (k = 0; |
|
k < line1->len && |
|
line0->yMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]); |
|
++k) ; |
|
break; |
|
case 2: |
|
for (k = 0; |
|
k < line1->len && |
|
line0->xMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]); |
|
++k) ; |
|
break; |
|
case 3: |
|
for (k = 0; |
|
k < line1->len && |
|
line0->yMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]); |
|
++k) ; |
|
break; |
|
} |
|
col2 = line1->col[k]; |
|
} |
|
if (col2 > col1) { |
|
col1 = col2; |
|
} |
|
} |
|
for (k = 0; k <= line0->len; ++k) { |
|
line0->col[k] += col1; |
|
} |
|
if (line0->col[line0->len] > nColumns) { |
|
nColumns = line0->col[line0->len]; |
|
} |
|
} |
|
gfree(lineArray); |
|
} |
|
|
|
void TextBlock::updatePriMinMax(TextBlock *blk) { |
|
double newPriMin, newPriMax; |
|
GBool gotPriMin, gotPriMax; |
|
|
|
gotPriMin = gotPriMax = gFalse; |
|
newPriMin = newPriMax = 0; // make gcc happy |
|
switch (page->primaryRot) { |
|
case 0: |
|
case 2: |
|
if (blk->yMin < yMax && blk->yMax > yMin) { |
|
if (blk->xMin < xMin) { |
|
newPriMin = blk->xMax; |
|
gotPriMin = gTrue; |
|
} |
|
if (blk->xMax > xMax) { |
|
newPriMax = blk->xMin; |
|
gotPriMax = gTrue; |
|
} |
|
} |
|
break; |
|
case 1: |
|
case 3: |
|
if (blk->xMin < xMax && blk->xMax > xMin) { |
|
if (blk->yMin < yMin) { |
|
newPriMin = blk->yMax; |
|
gotPriMin = gTrue; |
|
} |
|
if (blk->yMax > yMax) { |
|
newPriMax = blk->yMin; |
|
gotPriMax = gTrue; |
|
} |
|
} |
|
break; |
|
} |
|
if (gotPriMin) { |
|
if (newPriMin > xMin) { |
|
newPriMin = xMin; |
|
} |
|
if (newPriMin > priMin) { |
|
priMin = newPriMin; |
|
} |
|
} |
|
if (gotPriMax) { |
|
if (newPriMax < xMax) { |
|
newPriMax = xMax; |
|
} |
|
if (newPriMax < priMax) { |
|
priMax = newPriMax; |
|
} |
|
} |
|
} |
|
|
|
int TextBlock::cmpXYPrimaryRot(const void *p1, const void *p2) { |
|
TextBlock *blk1 = *(TextBlock **)p1; |
|
TextBlock *blk2 = *(TextBlock **)p2; |
|
double cmp; |
|
|
|
cmp = 0; // make gcc happy |
|
switch (blk1->page->primaryRot) { |
|
case 0: |
|
if ((cmp = blk1->xMin - blk2->xMin) == 0) { |
|
cmp = blk1->yMin - blk2->yMin; |
|
} |
|
break; |
|
case 1: |
|
if ((cmp = blk1->yMin - blk2->yMin) == 0) { |
|
cmp = blk2->xMax - blk1->xMax; |
|
} |
|
break; |
|
case 2: |
|
if ((cmp = blk2->xMax - blk1->xMax) == 0) { |
|
cmp = blk2->yMin - blk1->yMin; |
|
} |
|
break; |
|
case 3: |
|
if ((cmp = blk2->yMax - blk1->yMax) == 0) { |
|
cmp = blk1->xMax - blk2->xMax; |
|
} |
|
break; |
|
} |
|
return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; |
|
} |
|
|
|
int TextBlock::cmpYXPrimaryRot(const void *p1, const void *p2) { |
|
TextBlock *blk1 = *(TextBlock **)p1; |
|
TextBlock *blk2 = *(TextBlock **)p2; |
|
double cmp; |
|
|
|
cmp = 0; // make gcc happy |
|
switch (blk1->page->primaryRot) { |
|
case 0: |
|
if ((cmp = blk1->yMin - blk2->yMin) == 0) { |
|
cmp = blk1->xMin - blk2->xMin; |
|
} |
|
break; |
|
case 1: |
|
if ((cmp = blk2->xMax - blk1->xMax) == 0) { |
|
cmp = blk1->yMin - blk2->yMin; |
|
} |
|
break; |
|
case 2: |
|
if ((cmp = blk2->yMin - blk1->yMin) == 0) { |
|
cmp = blk2->xMax - blk1->xMax; |
|
} |
|
break; |
|
case 3: |
|
if ((cmp = blk1->xMax - blk2->xMax) == 0) { |
|
cmp = blk2->yMax - blk1->yMax; |
|
} |
|
break; |
|
} |
|
return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; |
|
} |
|
|
|
int TextBlock::primaryCmp(TextBlock *blk) { |
|
double cmp; |
|
|
|
cmp = 0; // make gcc happy |
|
switch (rot) { |
|
case 0: |
|
cmp = xMin - blk->xMin; |
|
break; |
|
case 1: |
|
cmp = yMin - blk->yMin; |
|
break; |
|
case 2: |
|
cmp = blk->xMax - xMax; |
|
break; |
|
case 3: |
|
cmp = blk->yMax - yMax; |
|
break; |
|
} |
|
return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; |
|
} |
|
|
|
double TextBlock::secondaryDelta(TextBlock *blk) { |
|
double delta; |
|
|
|
delta = 0; // make gcc happy |
|
switch (rot) { |
|
case 0: |
|
delta = blk->yMin - yMax; |
|
break; |
|
case 1: |
|
delta = xMin - blk->xMax; |
|
break; |
|
case 2: |
|
delta = yMin - blk->yMax; |
|
break; |
|
case 3: |
|
delta = blk->xMin - xMax; |
|
break; |
|
} |
|
return delta; |
|
} |
|
|
|
GBool TextBlock::isBelow(TextBlock *blk) { |
|
GBool below; |
|
|
|
below = gFalse; // make gcc happy |
|
switch (page->primaryRot) { |
|
case 0: |
|
below = xMin >= blk->priMin && xMax <= blk->priMax && |
|
yMin > blk->yMin; |
|
break; |
|
case 1: |
|
below = yMin >= blk->priMin && yMax <= blk->priMax && |
|
xMax < blk->xMax; |
|
break; |
|
case 2: |
|
below = xMin >= blk->priMin && xMax <= blk->priMax && |
|
yMax < blk->yMax; |
|
break; |
|
case 3: |
|
below = yMin >= blk->priMin && yMax <= blk->priMax && |
|
xMin > blk->xMin; |
|
break; |
|
} |
|
|
|
return below; |
|
} |
|
|
|
//------------------------------------------------------------------------ |
|
// TextFlow |
|
//------------------------------------------------------------------------ |
|
|
|
TextFlow::TextFlow(TextPage *pageA, TextBlock *blk) { |
|
page = pageA; |
|
xMin = blk->xMin; |
|
xMax = blk->xMax; |
|
yMin = blk->yMin; |
|
yMax = blk->yMax; |
|
priMin = blk->priMin; |
|
priMax = blk->priMax; |
|
blocks = lastBlk = blk; |
|
next = NULL; |
|
} |
|
|
|
TextFlow::~TextFlow() { |
|
TextBlock *blk; |
|
|
|
while (blocks) { |
|
blk = blocks; |
|
blocks = blocks->next; |
|
delete blk; |
|
} |
|
} |
|
|
|
void TextFlow::addBlock(TextBlock *blk) { |
|
if (lastBlk) { |
|
lastBlk->next = blk; |
|
} else { |
|
blocks = blk; |
|
} |
|
lastBlk = blk; |
|
if (blk->xMin < xMin) { |
|
xMin = blk->xMin; |
|
} |
|
if (blk->xMax > xMax) { |
|
xMax = blk->xMax; |
|
} |
|
if (blk->yMin < yMin) { |
|
yMin = blk->yMin; |
|
} |
|
if (blk->yMax > yMax) { |
|
yMax = blk->yMax; |
|
} |
|
} |
|
|
|
GBool TextFlow::blockFits(TextBlock *blk, TextBlock */*prevBlk*/) { |
|
GBool fits; |
|
|
|
// lower blocks must use smaller fonts |
|
if (blk->lines->words->fontSize > lastBlk->lines->words->fontSize) { |
|
return gFalse; |
|
} |
|
|
|
fits = gFalse; // make gcc happy |
|
switch (page->primaryRot) { |
|
case 0: |
|
fits = blk->xMin >= priMin && blk->xMax <= priMax; |
|
break; |
|
case 1: |
|
fits = blk->yMin >= priMin && blk->yMax <= priMax; |
|
break; |
|
case 2: |
|
fits = blk->xMin >= priMin && blk->xMax <= priMax; |
|
break; |
|
case 3: |
|
fits = blk->yMin >= priMin && blk->yMax <= priMax; |
|
break; |
|
} |
|
return fits; |
|
} |
|
|
|
#if TEXTOUT_WORD_LIST |
|
|
|
//------------------------------------------------------------------------ |
|
// TextWordList |
|
//------------------------------------------------------------------------ |
|
|
|
TextWordList::TextWordList(TextPage *text, GBool physLayout) { |
|
TextFlow *flow; |
|
TextBlock *blk; |
|
TextLine *line; |
|
TextWord *word; |
|
TextWord **wordArray; |
|
int nWords, i; |
|
|
|
words = new GList(); |
|
|
|
if (text->rawOrder) { |
|
for (word = text->rawWords; word; word = word->next) { |
|
words->append(word); |
|
} |
|
|
|
} else if (physLayout) { |
|
// this is inefficient, but it's also the least useful of these |
|
// three cases |
|
nWords = 0; |
|
for (flow = text->flows; flow; flow = flow->next) { |
|
for (blk = flow->blocks; blk; blk = blk->next) { |
|
for (line = blk->lines; line; line = line->next) { |
|
for (word = line->words; word; word = word->next) { |
|
++nWords; |
|
} |
|
} |
|
} |
|
} |
|
wordArray = (TextWord **)gmalloc(nWords * sizeof(TextWord *)); |
|
i = 0; |
|
for (flow = text->flows; flow; flow = flow->next) { |
|
for (blk = flow->blocks; blk; blk = blk->next) { |
|
for (line = blk->lines; line; line = line->next) { |
|
for (word = line->words; word; word = word->next) { |
|
wordArray[i++] = word; |
|
} |
|
} |
|
} |
|
} |
|
qsort(wordArray, nWords, sizeof(TextWord *), &TextWord::cmpYX); |
|
for (i = 0; i < nWords; ++i) { |
|
words->append(wordArray[i]); |
|
} |
|
gfree(wordArray); |
|
|
|
} else { |
|
for (flow = text->flows; flow; flow = flow->next) { |
|
for (blk = flow->blocks; blk; blk = blk->next) { |
|
for (line = blk->lines; line; line = line->next) { |
|
for (word = line->words; word; word = word->next) { |
|
words->append(word); |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
TextWordList::~TextWordList() { |
|
delete words; |
|
} |
|
|
|
int TextWordList::getLength() { |
|
return words->getLength(); |
|
} |
|
|
|
TextWord *TextWordList::get(int idx) { |
|
if (idx < 0 || idx >= words->getLength()) { |
|
return NULL; |
|
} |
|
return (TextWord *)words->get(idx); |
|
} |
|
|
|
#endif // TEXTOUT_WORD_LIST |
|
|
|
//------------------------------------------------------------------------ |
|
// TextPage |
|
//------------------------------------------------------------------------ |
|
|
|
TextPage::TextPage(GBool rawOrderA) { |
|
int rot; |
|
|
|
rawOrder = rawOrderA; |
|
curWord = NULL; |
|
charPos = 0; |
|
curFont = NULL; |
|
curFontSize = 0; |
|
nest = 0; |
|
nTinyChars = 0; |
|
lastCharOverlap = gFalse; |
|
if (!rawOrder) { |
|
for (rot = 0; rot < 4; ++rot) { |
|
pools[rot] = new TextPool(); |
|
} |
|
} |
|
flows = NULL; |
|
blocks = NULL; |
|
rawWords = NULL; |
|
rawLastWord = NULL; |
|
fonts = new GList(); |
|
lastFindXMin = lastFindYMin = 0; |
|
haveLastFind = gFalse; |
|
} |
|
|
|
TextPage::~TextPage() { |
|
int rot; |
|
|
|
clear(); |
|
if (!rawOrder) { |
|
for (rot = 0; rot < 4; ++rot) { |
|
delete pools[rot]; |
|
} |
|
} |
|
delete fonts; |
|
} |
|
|
|
void TextPage::startPage(GfxState *state) { |
|
clear(); |
|
if (state) { |
|
pageWidth = state->getPageWidth(); |
|
pageHeight = state->getPageHeight(); |
|
} else { |
|
pageWidth = pageHeight = 0; |
|
} |
|
} |
|
|
|
void TextPage::endPage() { |
|
if (curWord) { |
|
endWord(); |
|
} |
|
} |
|
|
|
void TextPage::clear() { |
|
int rot; |
|
TextFlow *flow; |
|
TextWord *word; |
|
|
|
if (curWord) { |
|
delete curWord; |
|
curWord = NULL; |
|
} |
|
if (rawOrder) { |
|
while (rawWords) { |
|
word = rawWords; |
|
rawWords = rawWords->next; |
|
delete word; |
|
} |
|
} else { |
|
for (rot = 0; rot < 4; ++rot) { |
|
delete pools[rot]; |
|
} |
|
while (flows) { |
|
flow = flows; |
|
flows = flows->next; |
|
delete flow; |
|
} |
|
gfree(blocks); |
|
} |
|
deleteGList(fonts, TextFontInfo); |
|
|
|
curWord = NULL; |
|
charPos = 0; |
|
curFont = NULL; |
|
curFontSize = 0; |
|
nest = 0; |
|
nTinyChars = 0; |
|
if (!rawOrder) { |
|
for (rot = 0; rot < 4; ++rot) { |
|
pools[rot] = new TextPool(); |
|
} |
|
} |
|
flows = NULL; |
|
blocks = NULL; |
|
rawWords = NULL; |
|
rawLastWord = NULL; |
|
fonts = new GList(); |
|
} |
|
|
|
void TextPage::updateFont(GfxState *state) { |
|
GfxFont *gfxFont; |
|
double *fm; |
|
const char *name; |
|
int code, mCode, letterCode, anyCode; |
|
double w; |
|
int i; |
|
|
|
// get the font info object |
|
curFont = NULL; |
|
for (i = 0; i < fonts->getLength(); ++i) { |
|
curFont = (TextFontInfo *)fonts->get(i); |
|
if (curFont->matches(state)) { |
|
break; |
|
} |
|
curFont = NULL; |
|
} |
|
if (!curFont) { |
|
curFont = new TextFontInfo(state); |
|
fonts->append(curFont); |
|
} |
|
|
|
// adjust the font size |
|
gfxFont = state->getFont(); |
|
curFontSize = state->getTransformedFontSize(); |
|
if (gfxFont && gfxFont->getType() == fontType3) { |
|
// This is a hack which makes it possible to deal with some Type 3 |
|
// fonts. The problem is that it's impossible to know what the |
|
// base coordinate system used in the font is without actually |
|
// rendering the font. This code tries to guess by looking at the |
|
// width of the character 'm' (which breaks if the font is a |
|
// subset that doesn't contain 'm'). |
|
mCode = letterCode = anyCode = -1; |
|
for (code = 0; code < 256; ++code) { |
|
name = ((Gfx8BitFont *)gfxFont)->getCharName(code); |
|
if (name && name[0] == 'm' && name[1] == '\0') { |
|
mCode = code; |
|
} |
|
if (letterCode < 0 && name && name[1] == '\0' && |
|
((name[0] >= 'A' && name[0] <= 'Z') || |
|
(name[0] >= 'a' && name[0] <= 'z'))) { |
|
letterCode = code; |
|
} |
|
if (anyCode < 0 && name && |
|
((Gfx8BitFont *)gfxFont)->getWidth(code) > 0) { |
|
anyCode = code; |
|
} |
|
} |
|
if (mCode >= 0 && |
|
(w = ((Gfx8BitFont *)gfxFont)->getWidth(mCode)) > 0) { |
|
// 0.6 is a generic average 'm' width -- yes, this is a hack |
|
curFontSize *= w / 0.6; |
|
} else if (letterCode >= 0 && |
|
(w = ((Gfx8BitFont *)gfxFont)->getWidth(letterCode)) > 0) { |
|
// even more of a hack: 0.5 is a generic letter width |
|
curFontSize *= w / 0.5; |
|
} else if (anyCode >= 0 && |
|
(w = ((Gfx8BitFont *)gfxFont)->getWidth(anyCode)) > 0) { |
|
// better than nothing: 0.5 is a generic character width |
|
curFontSize *= w / 0.5; |
|
} |
|
fm = gfxFont->getFontMatrix(); |
|
if (fm[0] != 0) { |
|
curFontSize *= fabs(fm[3] / fm[0]); |
|
} |
|
} |
|
} |
|
|
|
void TextPage::beginWord(GfxState *state, double x0, double y0) { |
|
double *txtm, *ctm, *fontm; |
|
double m[4], m2[4]; |
|
int rot; |
|
|
|
// This check is needed because Type 3 characters can contain |
|
// text-drawing operations (when TextPage is being used via |
|
// {X,Win}SplashOutputDev rather than TextOutputDev). |
|
if (curWord) { |
|
++nest; |
|
return; |
|
} |
|
|
|
// compute the rotation |
|
txtm = state->getTextMat(); |
|
ctm = state->getCTM(); |
|
m[0] = txtm[0] * ctm[0] + txtm[1] * ctm[2]; |
|
m[1] = txtm[0] * ctm[1] + txtm[1] * ctm[3]; |
|
m[2] = txtm[2] * ctm[0] + txtm[3] * ctm[2]; |
|
m[3] = txtm[2] * ctm[1] + txtm[3] * ctm[3]; |
|
if (state->getFont()->getType() == fontType3) { |
|
fontm = state->getFont()->getFontMatrix(); |
|
m2[0] = fontm[0] * m[0] + fontm[1] * m[2]; |
|
m2[1] = fontm[0] * m[1] + fontm[1] * m[3]; |
|
m2[2] = fontm[2] * m[0] + fontm[3] * m[2]; |
|
m2[3] = fontm[2] * m[1] + fontm[3] * m[3]; |
|
m[0] = m2[0]; |
|
m[1] = m2[1]; |
|
m[2] = m2[2]; |
|
m[3] = m2[3]; |
|
} |
|
if (fabs(m[0] * m[3]) > fabs(m[1] * m[2])) { |
|
rot = (m[3] < 0) ? 0 : 2; |
|
} else { |
|
rot = (m[2] > 0) ? 1 : 3; |
|
} |
|
|
|
curWord = new TextWord(state, rot, x0, y0, charPos, curFont, curFontSize); |
|
} |
|
|
|
void TextPage::addChar(GfxState *state, double x, double y, |
|
double dx, double dy, |
|
CharCode c, Unicode *u, int uLen) { |
|
double x1, y1, w1, h1, dx2, dy2, base, sp; |
|
int i; |
|
|
|
// if the previous char was a space, addChar will have called |
|
// endWord, so we need to start a new word |
|
if (!curWord) { |
|
beginWord(state, x, y); |
|
} |
|
|
|
// throw away chars that aren't inside the page bounds |
|
state->transform(x, y, &x1, &y1); |
|
if (x1 < 0 || x1 > pageWidth || |
|
y1 < 0 || y1 > pageHeight) { |
|
return; |
|
} |
|
|
|
// subtract char and word spacing from the dx,dy values |
|
sp = state->getCharSpace(); |
|
if (c == (CharCode)0x20) { |
|
sp += state->getWordSpace(); |
|
} |
|
state->textTransformDelta(sp * state->getHorizScaling(), 0, &dx2, &dy2); |
|
dx -= dx2; |
|
dy -= dy2; |
|
state->transformDelta(dx, dy, &w1, &h1); |
|
|
|
// check the tiny chars limit |
|
if (!globalParams->getTextKeepTinyChars() && |
|
fabs(w1) < 3 && fabs(h1) < 3) { |
|
if (++nTinyChars > 50000) { |
|
return; |
|
} |
|
} |
|
|
|
// break words at space character |
|
if (uLen == 1 && u[0] == (Unicode)0x20) { |
|
++curWord->charLen; |
|
++charPos; |
|
endWord(); |
|
return; |
|
} |
|
|
|
// start a new word if: |
|
// (1) this character's baseline doesn't match the current word's |
|
// baseline, or |
|
// (2) there is space between the end of the current word and this |
|
// character, or |
|
// (3) this character overlaps the previous one (duplicated text), or |
|
// (4) the previous character was an overlap (we want each duplicated |
|
// characters to be in a word by itself) |
|
base = sp = 0; // make gcc happy |
|
if (curWord->len > 0) { |
|
switch (curWord->rot) { |
|
case 0: |
|
base = y1; |
|
sp = x1 - curWord->xMax; |
|
break; |
|
case 1: |
|
base = x1; |
|
sp = y1 - curWord->yMax; |
|
break; |
|
case 2: |
|
base = y1; |
|
sp = curWord->xMin - x1; |
|
break; |
|
case 3: |
|
base = x1; |
|
sp = curWord->yMin - y1; |
|
break; |
|
} |
|
if (fabs(base - curWord->base) > 0.5 || |
|
sp > minWordBreakSpace * curWord->fontSize || |
|
sp < -minDupBreakOverlap * curWord->fontSize || |
|
lastCharOverlap) { |
|
lastCharOverlap = gTrue; |
|
endWord(); |
|
beginWord(state, x, y); |
|
} else { |
|
lastCharOverlap = gFalse; |
|
} |
|
} else { |
|
lastCharOverlap = gFalse; |
|
} |
|
|
|
// page rotation and/or transform matrices can cause text to be |
|
// drawn in reverse order -- in this case, swap the begin/end |
|
// coordinates and break text into individual chars |
|
if ((curWord->rot == 0 && w1 < 0) || |
|
(curWord->rot == 1 && h1 < 0) || |
|
(curWord->rot == 2 && w1 > 0) || |
|
(curWord->rot == 3 && h1 > 0)) { |
|
endWord(); |
|
beginWord(state, x + dx, y + dy); |
|
x1 += w1; |
|
y1 += h1; |
|
w1 = -w1; |
|
h1 = -h1; |
|
} |
|
|
|
// add the characters to the current word |
|
if (uLen != 0) { |
|
w1 /= uLen; |
|
h1 /= uLen; |
|
} |
|
for (i = 0; i < uLen; ++i) { |
|
curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]); |
|
} |
|
++curWord->charLen; |
|
++charPos; |
|
} |
|
|
|
void TextPage::endWord() { |
|
// This check is needed because Type 3 characters can contain |
|
// text-drawing operations (when TextPage is being used via |
|
// {X,Win}SplashOutputDev rather than TextOutputDev). |
|
if (nest > 0) { |
|
--nest; |
|
return; |
|
} |
|
|
|
if (curWord) { |
|
addWord(curWord); |
|
curWord = NULL; |
|
} |
|
} |
|
|
|
void TextPage::addWord(TextWord *word) { |
|
// throw away zero-length words -- they don't have valid xMin/xMax |
|
// values, and they're useless anyway |
|
if (word->len == 0) { |
|
delete word; |
|
return; |
|
} |
|
|
|
if (rawOrder) { |
|
if (rawLastWord) { |
|
rawLastWord->next = word; |
|
} else { |
|
rawWords = word; |
|
} |
|
rawLastWord = word; |
|
} else { |
|
pools[word->rot]->addWord(word); |
|
} |
|
} |
|
|
|
void TextPage::coalesce(GBool /*physLayout*/) { |
|
UnicodeMap *uMap; |
|
TextPool *pool; |
|
TextWord *word0, *word1, *word2; |
|
TextLine *line; |
|
TextBlock *blkList, *blkStack, *blk, *lastBlk, *blk0, *blk1; |
|
TextBlock **blkArray; |
|
TextFlow *flow, *lastFlow; |
|
int rot, poolMinBaseIdx, baseIdx, startBaseIdx; |
|
double minBase, maxBase, newMinBase, newMaxBase; |
|
double fontSize, colSpace1, colSpace2, lineSpace, intraLineSpace, blkSpace; |
|
GBool found; |
|
int count[4]; |
|
int lrCount; |
|
int firstBlkIdx, nBlocksLeft; |
|
int col1, col2; |
|
int i, j, n; |
|
|
|
if (rawOrder) { |
|
primaryRot = 0; |
|
primaryLR = gTrue; |
|
return; |
|
} |
|
|
|
uMap = globalParams->getTextEncoding(); |
|
blkList = NULL; |
|
lastBlk = NULL; |
|
nBlocks = 0; |
|
primaryRot = -1; |
|
|
|
#if 0 // for debugging |
|
printf("*** initial words ***\n"); |
|
for (rot = 0; rot < 4; ++rot) { |
|
pool = pools[rot]; |
|
for (baseIdx = pool->minBaseIdx; baseIdx <= pool->maxBaseIdx; ++baseIdx) { |
|
for (word0 = pool->getPool(baseIdx); word0; word0 = word0->next) { |
|
printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f '", |
|
word0->xMin, word0->xMax, word0->yMin, word0->yMax, |
|
word0->base, word0->fontSize); |
|
for (i = 0; i < word0->len; ++i) { |
|
fputc(word0->text[i] & 0xff, stdout); |
|
} |
|
printf("'\n"); |
|
} |
|
} |
|
} |
|
printf("\n"); |
|
#endif |
|
|
|
//----- assemble the blocks |
|
|
|
//~ add an outer loop for writing mode (vertical text) |
|
|
|
// build blocks for each rotation value |
|
for (rot = 0; rot < 4; ++rot) { |
|
pool = pools[rot]; |
|
poolMinBaseIdx = pool->minBaseIdx; |
|
count[rot] = 0; |
|
|
|
// add blocks until no more words are left |
|
while (1) { |
|
|
|
// find the first non-empty line in the pool |
|
for (; |
|
poolMinBaseIdx <= pool->maxBaseIdx && |
|
!pool->getPool(poolMinBaseIdx); |
|
++poolMinBaseIdx) ; |
|
if (poolMinBaseIdx > pool->maxBaseIdx) { |
|
break; |
|
} |
|
|
|
// look for the left-most word in the first four lines of the |
|
// pool -- this avoids starting with a superscript word |
|
startBaseIdx = poolMinBaseIdx; |
|
for (baseIdx = poolMinBaseIdx + 1; |
|
baseIdx < poolMinBaseIdx + 4 && baseIdx <= pool->maxBaseIdx; |
|
++baseIdx) { |
|
if (!pool->getPool(baseIdx)) { |
|
continue; |
|
} |
|
if (pool->getPool(baseIdx)->primaryCmp(pool->getPool(startBaseIdx)) |
|
< 0) { |
|
startBaseIdx = baseIdx; |
|
} |
|
} |
|
|
|
// create a new block |
|
word0 = pool->getPool(startBaseIdx); |
|
pool->setPool(startBaseIdx, word0->next); |
|
word0->next = NULL; |
|
blk = new TextBlock(this, rot); |
|
blk->addWord(word0); |
|
|
|
fontSize = word0->fontSize; |
|
minBase = maxBase = word0->base; |
|
colSpace1 = minColSpacing1 * fontSize; |
|
colSpace2 = minColSpacing2 * fontSize; |
|
lineSpace = maxLineSpacingDelta * fontSize; |
|
intraLineSpace = maxIntraLineDelta * fontSize; |
|
|
|
// add words to the block |
|
do { |
|
found = gFalse; |
|
|
|
// look for words on the line above the current top edge of |
|
// the block |
|
newMinBase = minBase; |
|
for (baseIdx = pool->getBaseIdx(minBase); |
|
baseIdx >= pool->getBaseIdx(minBase - lineSpace); |
|
--baseIdx) { |
|
word0 = NULL; |
|
word1 = pool->getPool(baseIdx); |
|
while (word1) { |
|
if (word1->base < minBase && |
|
word1->base >= minBase - lineSpace && |
|
((rot == 0 || rot == 2) |
|
? (word1->xMin < blk->xMax && word1->xMax > blk->xMin) |
|
: (word1->yMin < blk->yMax && word1->yMax > blk->yMin)) && |
|
fabs(word1->fontSize - fontSize) < |
|
maxBlockFontSizeDelta1 * fontSize) { |
|
word2 = word1; |
|
if (word0) { |
|
word0->next = word1->next; |
|
} else { |
|
pool->setPool(baseIdx, word1->next); |
|
} |
|
word1 = word1->next; |
|
word2->next = NULL; |
|
blk->addWord(word2); |
|
found = gTrue; |
|
newMinBase = word2->base; |
|
} else { |
|
word0 = word1; |
|
word1 = word1->next; |
|
} |
|
} |
|
} |
|
minBase = newMinBase; |
|
|
|
// look for words on the line below the current bottom edge of |
|
// the block |
|
newMaxBase = maxBase; |
|
for (baseIdx = pool->getBaseIdx(maxBase); |
|
baseIdx <= pool->getBaseIdx(maxBase + lineSpace); |
|
++baseIdx) { |
|
word0 = NULL; |
|
word1 = pool->getPool(baseIdx); |
|
while (word1) { |
|
if (word1->base > maxBase && |
|
word1->base <= maxBase + lineSpace && |
|
((rot == 0 || rot == 2) |
|
? (word1->xMin < blk->xMax && word1->xMax > blk->xMin) |
|
: (word1->yMin < blk->yMax && word1->yMax > blk->yMin)) && |
|
fabs(word1->fontSize - fontSize) < |
|
maxBlockFontSizeDelta1 * fontSize) { |
|
word2 = word1; |
|
if (word0) { |
|
word0->next = word1->next; |
|
} else { |
|
pool->setPool(baseIdx, word1->next); |
|
} |
|
word1 = word1->next; |
|
word2->next = NULL; |
|
blk->addWord(word2); |
|
found = gTrue; |
|
newMaxBase = word2->base; |
|
} else { |
|
word0 = word1; |
|
word1 = word1->next; |
|
} |
|
} |
|
} |
|
maxBase = newMaxBase; |
|
|
|
// look for words that are on lines already in the block, and |
|
// that overlap the block horizontally |
|
for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); |
|
baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); |
|
++baseIdx) { |
|
word0 = NULL; |
|
word1 = pool->getPool(baseIdx); |
|
while (word1) { |
|
if (word1->base >= minBase - intraLineSpace && |
|
word1->base <= maxBase + intraLineSpace && |
|
((rot == 0 || rot == 2) |
|
? (word1->xMin < blk->xMax + colSpace1 && |
|
word1->xMax > blk->xMin - colSpace1) |
|
: (word1->yMin < blk->yMax + colSpace1 && |
|
word1->yMax > blk->yMin - colSpace1)) && |
|
fabs(word1->fontSize - fontSize) < |
|
maxBlockFontSizeDelta2 * fontSize) { |
|
word2 = word1; |
|
if (word0) { |
|
word0->next = word1->next; |
|
} else { |
|
pool->setPool(baseIdx, word1->next); |
|
} |
|
word1 = word1->next; |
|
word2->next = NULL; |
|
blk->addWord(word2); |
|
found = gTrue; |
|
} else { |
|
word0 = word1; |
|
word1 = word1->next; |
|
} |
|
} |
|
} |
|
|
|
// only check for outlying words (the next two chunks of code) |
|
// if we didn't find anything else |
|
if (found) { |
|
continue; |
|
} |
|
|
|
// scan down the left side of the block, looking for words |
|
// that are near (but not overlapping) the block; if there are |
|
// three or fewer, add them to the block |
|
n = 0; |
|
for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); |
|
baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); |
|
++baseIdx) { |
|
word1 = pool->getPool(baseIdx); |
|
while (word1) { |
|
if (word1->base >= minBase - intraLineSpace && |
|
word1->base <= maxBase + intraLineSpace && |
|
((rot == 0 || rot == 2) |
|
? (word1->xMax <= blk->xMin && |
|
word1->xMax > blk->xMin - colSpace2) |
|
: (word1->yMax <= blk->yMin && |
|
word1->yMax > blk->yMin - colSpace2)) && |
|
fabs(word1->fontSize - fontSize) < |
|
maxBlockFontSizeDelta3 * fontSize) { |
|
++n; |
|
break; |
|
} |
|
word1 = word1->next; |
|
} |
|
} |
|
if (n > 0 && n <= 3) { |
|
for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); |
|
baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); |
|
++baseIdx) { |
|
word0 = NULL; |
|
word1 = pool->getPool(baseIdx); |
|
while (word1) { |
|
if (word1->base >= minBase - intraLineSpace && |
|
word1->base <= maxBase + intraLineSpace && |
|
((rot == 0 || rot == 2) |
|
? (word1->xMax <= blk->xMin && |
|
word1->xMax > blk->xMin - colSpace2) |
|
: (word1->yMax <= blk->yMin && |
|
word1->yMax > blk->yMin - colSpace2)) && |
|
fabs(word1->fontSize - fontSize) < |
|
maxBlockFontSizeDelta3 * fontSize) { |
|
word2 = word1; |
|
if (word0) { |
|
word0->next = word1->next; |
|
} else { |
|
pool->setPool(baseIdx, word1->next); |
|
} |
|
word1 = word1->next; |
|
word2->next = NULL; |
|
blk->addWord(word2); |
|
if (word2->base < minBase) { |
|
minBase = word2->base; |
|
} else if (word2->base > maxBase) { |
|
maxBase = word2->base; |
|
} |
|
found = gTrue; |
|
break; |
|
} else { |
|
word0 = word1; |
|
word1 = word1->next; |
|
} |
|
} |
|
} |
|
} |
|
|
|
// scan down the right side of the block, looking for words |
|
// that are near (but not overlapping) the block; if there are |
|
// three or fewer, add them to the block |
|
n = 0; |
|
for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); |
|
baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); |
|
++baseIdx) { |
|
word1 = pool->getPool(baseIdx); |
|
while (word1) { |
|
if (word1->base >= minBase - intraLineSpace && |
|
word1->base <= maxBase + intraLineSpace && |
|
((rot == 0 || rot == 2) |
|
? (word1->xMin >= blk->xMax && |
|
word1->xMin < blk->xMax + colSpace2) |
|
: (word1->yMin >= blk->yMax && |
|
word1->yMin < blk->yMax + colSpace2)) && |
|
fabs(word1->fontSize - fontSize) < |
|
maxBlockFontSizeDelta3 * fontSize) { |
|
++n; |
|
break; |
|
} |
|
word1 = word1->next; |
|
} |
|
} |
|
if (n > 0 && n <= 3) { |
|
for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); |
|
baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); |
|
++baseIdx) { |
|
word0 = NULL; |
|
word1 = pool->getPool(baseIdx); |
|
while (word1) { |
|
if (word1->base >= minBase - intraLineSpace && |
|
word1->base <= maxBase + intraLineSpace && |
|
((rot == 0 || rot == 2) |
|
? (word1->xMin >= blk->xMax && |
|
word1->xMin < blk->xMax + colSpace2) |
|
: (word1->yMin >= blk->yMax && |
|
word1->yMin < blk->yMax + colSpace2)) && |
|
fabs(word1->fontSize - fontSize) < |
|
maxBlockFontSizeDelta3 * fontSize) { |
|
word2 = word1; |
|
if (word0) { |
|
word0->next = word1->next; |
|
} else { |
|
pool->setPool(baseIdx, word1->next); |
|
} |
|
word1 = word1->next; |
|
word2->next = NULL; |
|
blk->addWord(word2); |
|
if (word2->base < minBase) { |
|
minBase = word2->base; |
|
} else if (word2->base > maxBase) { |
|
maxBase = word2->base; |
|
} |
|
found = gTrue; |
|
break; |
|
} else { |
|
word0 = word1; |
|
word1 = word1->next; |
|
} |
|
} |
|
} |
|
} |
|
|
|
} while (found); |
|
|
|
//~ need to compute the primary writing mode (horiz/vert) in |
|
//~ addition to primary rotation |
|
|
|
// coalesce the block, and add it to the list |
|
blk->coalesce(uMap); |
|
if (lastBlk) { |
|
lastBlk->next = blk; |
|
} else { |
|
blkList = blk; |
|
} |
|
lastBlk = blk; |
|
count[rot] += blk->charCount; |
|
if (primaryRot < 0 || count[rot] > count[primaryRot]) { |
|
primaryRot = rot; |
|
} |
|
++nBlocks; |
|
} |
|
} |
|
|
|
#if 0 // for debugging |
|
printf("*** rotation ***\n"); |
|
for (rot = 0; rot < 4; ++rot) { |
|
printf(" %d: %6d\n", rot, count[rot]); |
|
} |
|
printf(" primary rot = %d\n", primaryRot); |
|
printf("\n"); |
|
#endif |
|
|
|
#if 0 // for debugging |
|
printf("*** blocks ***\n"); |
|
for (blk = blkList; blk; blk = blk->next) { |
|
printf("block: rot=%d x=%.2f..%.2f y=%.2f..%.2f\n", |
|
blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax); |
|
for (line = blk->lines; line; line = line->next) { |
|
printf(" line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f\n", |
|
line->xMin, line->xMax, line->yMin, line->yMax, line->base); |
|
for (word0 = line->words; word0; word0 = word0->next) { |
|
printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '", |
|
word0->xMin, word0->xMax, word0->yMin, word0->yMax, |
|
word0->base, word0->fontSize, word0->spaceAfter); |
|
for (i = 0; i < word0->len; ++i) { |
|
fputc(word0->text[i] & 0xff, stdout); |
|
} |
|
printf("'\n"); |
|
} |
|
} |
|
} |
|
printf("\n"); |
|
#endif |
|
|
|
// determine the primary direction |
|
lrCount = 0; |
|
for (blk = blkList; blk; blk = blk->next) { |
|
for (line = blk->lines; line; line = line->next) { |
|
for (word0 = line->words; word0; word0 = word0->next) { |
|
for (i = 0; i < word0->len; ++i) { |
|
if (unicodeTypeL(word0->text[i])) { |
|
++lrCount; |
|
} else if (unicodeTypeR(word0->text[i])) { |
|
--lrCount; |
|
} |
|
} |
|
} |
|
} |
|
} |
|
primaryLR = lrCount >= 0; |
|
|
|
#if 0 // for debugging |
|
printf("*** direction ***\n"); |
|
printf("lrCount = %d\n", lrCount); |
|
printf("primaryLR = %d\n", primaryLR); |
|
#endif |
|
|
|
//----- column assignment |
|
|
|
// sort blocks into xy order for column assignment |
|
blocks = (TextBlock **)gmalloc(nBlocks * sizeof(TextBlock *)); |
|
for (blk = blkList, i = 0; blk; blk = blk->next, ++i) { |
|
blocks[i] = blk; |
|
} |
|
qsort(blocks, nBlocks, sizeof(TextBlock *), &TextBlock::cmpXYPrimaryRot); |
|
|
|
// column assignment |
|
for (i = 0; i < nBlocks; ++i) { |
|
blk0 = blocks[i]; |
|
col1 = 0; |
|
for (j = 0; j < i; ++j) { |
|
blk1 = blocks[j]; |
|
col2 = 0; // make gcc happy |
|
switch (primaryRot) { |
|
case 0: |
|
if (blk0->xMin > blk1->xMax) { |
|
col2 = blk1->col + blk1->nColumns + 3; |
|
} else { |
|
col2 = blk1->col + (int)(((blk0->xMin - blk1->xMin) / |
|
(blk1->xMax - blk1->xMin)) * |
|
blk1->nColumns); |
|
} |
|
break; |
|
case 1: |
|
if (blk0->yMin > blk1->yMax) { |
|
col2 = blk1->col + blk1->nColumns + 3; |
|
} else { |
|
col2 = blk1->col + (int)(((blk0->yMin - blk1->yMin) / |
|
(blk1->yMax - blk1->yMin)) * |
|
blk1->nColumns); |
|
} |
|
break; |
|
case 2: |
|
if (blk0->xMax < blk1->xMin) { |
|
col2 = blk1->col + blk1->nColumns + 3; |
|
} else { |
|
col2 = blk1->col + (int)(((blk0->xMax - blk1->xMax) / |
|
(blk1->xMin - blk1->xMax)) * |
|
blk1->nColumns); |
|
} |
|
break; |
|
case 3: |
|
if (blk0->yMax < blk1->yMin) { |
|
col2 = blk1->col + blk1->nColumns + 3; |
|
} else { |
|
col2 = blk1->col + (int)(((blk0->yMax - blk1->yMax) / |
|
(blk1->yMin - blk1->yMax)) * |
|
blk1->nColumns); |
|
} |
|
break; |
|
} |
|
if (col2 > col1) { |
|
col1 = col2; |
|
} |
|
} |
|
blk0->col = col1; |
|
for (line = blk0->lines; line; line = line->next) { |
|
for (j = 0; j <= line->len; ++j) { |
|
line->col[j] += col1; |
|
} |
|
} |
|
} |
|
|
|
#if 0 // for debugging |
|
printf("*** blocks, after column assignment ***\n"); |
|
for (blk = blkList; blk; blk = blk->next) { |
|
printf("block: rot=%d x=%.2f..%.2f y=%.2f..%.2f col=%d nCols=%d\n", |
|
blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax, blk->col, |
|
blk->nColumns); |
|
for (line = blk->lines; line; line = line->next) { |
|
printf(" line:\n"); |
|
for (word0 = line->words; word0; word0 = word0->next) { |
|
printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '", |
|
word0->xMin, word0->xMax, word0->yMin, word0->yMax, |
|
word0->base, word0->fontSize, word0->spaceAfter); |
|
for (i = 0; i < word0->len; ++i) { |
|
fputc(word0->text[i] & 0xff, stdout); |
|
} |
|
printf("'\n"); |
|
} |
|
} |
|
} |
|
printf("\n"); |
|
#endif |
|
|
|
//----- reading order sort |
|
|
|
// sort blocks into yx order (in preparation for reading order sort) |
|
qsort(blocks, nBlocks, sizeof(TextBlock *), &TextBlock::cmpYXPrimaryRot); |
|
|
|
// compute space on left and right sides of each block |
|
for (i = 0; i < nBlocks; ++i) { |
|
blk0 = blocks[i]; |
|
for (j = 0; j < nBlocks; ++j) { |
|
blk1 = blocks[j]; |
|
if (blk1 != blk0) { |
|
blk0->updatePriMinMax(blk1); |
|
} |
|
} |
|
} |
|
|
|
#if 0 // for debugging |
|
printf("*** blocks, after yx sort ***\n"); |
|
for (i = 0; i < nBlocks; ++i) { |
|
blk = blocks[i]; |
|
printf("block: rot=%d x=%.2f..%.2f y=%.2f..%.2f space=%.2f..%.2f\n", |
|
blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax, |
|
blk->priMin, blk->priMax); |
|
for (line = blk->lines; line; line = line->next) { |
|
printf(" line:\n"); |
|
for (word0 = line->words; word0; word0 = word0->next) { |
|
printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '", |
|
word0->xMin, word0->xMax, word0->yMin, word0->yMax, |
|
word0->base, word0->fontSize, word0->spaceAfter); |
|
for (j = 0; j < word0->len; ++j) { |
|
fputc(word0->text[j] & 0xff, stdout); |
|
} |
|
printf("'\n"); |
|
} |
|
} |
|
} |
|
printf("\n"); |
|
#endif |
|
|
|
// build the flows |
|
//~ this needs to be adjusted for writing mode (vertical text) |
|
//~ this also needs to account for right-to-left column ordering |
|
blkArray = (TextBlock **)gmalloc(nBlocks * sizeof(TextBlock *)); |
|
memcpy(blkArray, blocks, nBlocks * sizeof(TextBlock *)); |
|
flows = lastFlow = NULL; |
|
firstBlkIdx = 0; |
|
nBlocksLeft = nBlocks; |
|
while (nBlocksLeft > 0) { |
|
|
|
// find the upper-left-most block |
|
for (; !blkArray[firstBlkIdx]; ++firstBlkIdx) ; |
|
i = firstBlkIdx; |
|
blk = blkArray[i]; |
|
for (j = firstBlkIdx + 1; j < nBlocks; ++j) { |
|
blk1 = blkArray[j]; |
|
if (blk1) { |
|
if (blk && blk->secondaryDelta(blk1) > 0) { |
|
break; |
|
} |
|
if (blk1->primaryCmp(blk) < 0) { |
|
i = j; |
|
blk = blk1; |
|
} |
|
} |
|
} |
|
blkArray[i] = NULL; |
|
--nBlocksLeft; |
|
blk->next = NULL; |
|
|
|
// create a new flow, starting with the upper-left-most block |
|
flow = new TextFlow(this, blk); |
|
if (lastFlow) { |
|
lastFlow->next = flow; |
|
} else { |
|
flows = flow; |
|
} |
|
lastFlow = flow; |
|
fontSize = blk->lines->words->fontSize; |
|
|
|
// push the upper-left-most block on the stack |
|
blk->stackNext = NULL; |
|
blkStack = blk; |
|
|
|
// find the other blocks in this flow |
|
while (blkStack) { |
|
|
|
// find the upper-left-most block under (but within |
|
// maxBlockSpacing of) the top block on the stack |
|
blkSpace = maxBlockSpacing * blkStack->lines->words->fontSize; |
|
blk = NULL; |
|
i = -1; |
|
for (j = firstBlkIdx; j < nBlocks; ++j) { |
|
blk1 = blkArray[j]; |
|
if (blk1) { |
|
if (blkStack->secondaryDelta(blk1) > blkSpace) { |
|
break; |
|
} |
|
if (blk && blk->secondaryDelta(blk1) > 0) { |
|
break; |
|
} |
|
if (blk1->isBelow(blkStack) && |
|
(!blk || blk1->primaryCmp(blk) < 0)) { |
|
i = j; |
|
blk = blk1; |
|
} |
|
} |
|
} |
|
|
|
// if a suitable block was found, add it to the flow and push it |
|
// onto the stack |
|
if (blk && flow->blockFits(blk, blkStack)) { |
|
blkArray[i] = NULL; |
|
--nBlocksLeft; |
|
blk->next = NULL; |
|
flow->addBlock(blk); |
|
fontSize = blk->lines->words->fontSize; |
|
blk->stackNext = blkStack; |
|
blkStack = blk; |
|
|
|
// otherwise (if there is no block under the top block or the |
|
// block is not suitable), pop the stack |
|
} else { |
|
blkStack = blkStack->stackNext; |
|
} |
|
} |
|
} |
|
gfree(blkArray); |
|
|
|
#if 0 // for debugging |
|
printf("*** flows ***\n"); |
|
for (flow = flows; flow; flow = flow->next) { |
|
printf("flow: x=%.2f..%.2f y=%.2f..%.2f pri:%.2f..%.2f\n", |
|
flow->xMin, flow->xMax, flow->yMin, flow->yMax, |
|
flow->priMin, flow->priMax); |
|
for (blk = flow->blocks; blk; blk = blk->next) { |
|
printf(" block: rot=%d x=%.2f..%.2f y=%.2f..%.2f pri=%.2f..%.2f\n", |
|
blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax, |
|
blk->priMin, blk->priMax); |
|
for (line = blk->lines; line; line = line->next) { |
|
printf(" line:\n"); |
|
for (word0 = line->words; word0; word0 = word0->next) { |
|
printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '", |
|
word0->xMin, word0->xMax, word0->yMin, word0->yMax, |
|
word0->base, word0->fontSize, word0->spaceAfter); |
|
for (i = 0; i < word0->len; ++i) { |
|
fputc(word0->text[i] & 0xff, stdout); |
|
} |
|
printf("'\n"); |
|
} |
|
} |
|
} |
|
} |
|
printf("\n"); |
|
#endif |
|
|
|
if (uMap) { |
|
uMap->decRefCnt(); |
|
} |
|
} |
|
|
|
GBool TextPage::findText(Unicode *s, int len, |
|
GBool startAtTop, GBool stopAtBottom, |
|
GBool startAtLast, GBool stopAtLast, |
|
double *xMin, double *yMin, |
|
double *xMax, double *yMax) { |
|
TextBlock *blk; |
|
TextLine *line; |
|
Unicode *p; |
|
Unicode u1, u2; |
|
int m, i, j, k; |
|
double xStart, yStart, xStop, yStop; |
|
double xMin0, yMin0, xMax0, yMax0; |
|
double xMin1, yMin1, xMax1, yMax1; |
|
GBool found; |
|
|
|
//~ needs to handle right-to-left text |
|
|
|
if (rawOrder) { |
|
return gFalse; |
|
} |
|
|
|
xStart = yStart = xStop = yStop = 0; |
|
if (startAtLast && haveLastFind) { |
|
xStart = lastFindXMin; |
|
yStart = lastFindYMin; |
|
} else if (!startAtTop) { |
|
xStart = *xMin; |
|
yStart = *yMin; |
|
} |
|
if (stopAtLast && haveLastFind) { |
|
xStop = lastFindXMin; |
|
yStop = lastFindYMin; |
|
} else if (!stopAtBottom) { |
|
xStop = *xMax; |
|
yStop = *yMax; |
|
} |
|
|
|
found = gFalse; |
|
xMin0 = xMax0 = yMin0 = yMax0 = 0; // make gcc happy |
|
xMin1 = xMax1 = yMin1 = yMax1 = 0; // make gcc happy |
|
|
|
for (i = 0; i < nBlocks; ++i) { |
|
blk = blocks[i]; |
|
|
|
// check: is the block above the top limit? |
|
if (!startAtTop && blk->yMax < yStart) { |
|
continue; |
|
} |
|
|
|
// check: is the block below the bottom limit? |
|
if (!stopAtBottom && blk->yMin > yStop) { |
|
break; |
|
} |
|
|
|
for (line = blk->lines; line; line = line->next) { |
|
|
|
// check: is the line above the top limit? |
|
if (!startAtTop && line->yMin < yStart) { |
|
continue; |
|
} |
|
|
|
// check: is the line below the bottom limit? |
|
if (!stopAtBottom && line->yMin > yStop) { |
|
continue; |
|
} |
|
|
|
// search each position in this line |
|
m = line->len; |
|
for (j = 0, p = line->text; j <= m - len; ++j, ++p) { |
|
|
|
// compare the strings |
|
for (k = 0; k < len; ++k) { |
|
#if 1 //~ this lowercases Latin A-Z only -- this will eventually be |
|
//~ extended to handle other character sets |
|
if (p[k] >= 0x41 && p[k] <= 0x5a) { |
|
u1 = p[k] + 0x20; |
|
} else { |
|
u1 = p[k]; |
|
} |
|
if (s[k] >= 0x41 && s[k] <= 0x5a) { |
|
u2 = s[k] + 0x20; |
|
} else { |
|
u2 = s[k]; |
|
} |
|
#endif |
|
if (u1 != u2) { |
|
break; |
|
} |
|
} |
|
|
|
// found it |
|
if (k == len) { |
|
switch (line->rot) { |
|
case 0: |
|
xMin1 = line->edge[j]; |
|
xMax1 = line->edge[j + len]; |
|
yMin1 = line->yMin; |
|
yMax1 = line->yMax; |
|
break; |
|
case 1: |
|
xMin1 = line->xMin; |
|
xMax1 = line->xMax; |
|
yMin1 = line->edge[j]; |
|
yMax1 = line->edge[j + len]; |
|
break; |
|
case 2: |
|
xMin1 = line->edge[j + len]; |
|
xMax1 = line->edge[j]; |
|
yMin1 = line->yMin; |
|
yMax1 = line->yMax; |
|
break; |
|
case 3: |
|
xMin1 = line->xMin; |
|
xMax1 = line->xMax; |
|
yMin1 = line->edge[j + len]; |
|
yMax1 = line->edge[j]; |
|
break; |
|
} |
|
if ((startAtTop || |
|
yMin1 > yStart || (yMin1 == yStart && xMin1 > xStart)) && |
|
(stopAtBottom || |
|
yMin1 < yStop || (yMin1 == yStop && xMin1 < yStop))) { |
|
if (!found || yMin1 < yMin0 || (yMin1 == yMin0 && xMin1 < xMin0)) { |
|
xMin0 = xMin1; |
|
xMax0 = xMax1; |
|
yMin0 = yMin1; |
|
yMax0 = yMax1; |
|
found = gTrue; |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
if (found) { |
|
*xMin = xMin0; |
|
*xMax = xMax0; |
|
*yMin = yMin0; |
|
*yMax = yMax0; |
|
lastFindXMin = xMin0; |
|
lastFindYMin = yMin0; |
|
haveLastFind = gTrue; |
|
return gTrue; |
|
} |
|
|
|
return gFalse; |
|
} |
|
|
|
GString *TextPage::getText(double xMin, double yMin, |
|
double xMax, double yMax) { |
|
GString *s; |
|
UnicodeMap *uMap; |
|
GBool isUnicode; |
|
TextBlock *blk; |
|
TextLine *line; |
|
TextLineFrag *frags; |
|
int nFrags, fragsSize; |
|
TextLineFrag *frag; |
|
char space[8], eol[16]; |
|
int spaceLen, eolLen; |
|
int lastRot; |
|
double x, y; |
|
int col, idx0, idx1, i, j; |
|
GBool multiLine, oneRot; |
|
|
|
s = new GString(); |
|
|
|
if (rawOrder) { |
|
return s; |
|
} |
|
|
|
// get the output encoding |
|
if (!(uMap = globalParams->getTextEncoding())) { |
|
return s; |
|
} |
|
isUnicode = uMap->isUnicode(); |
|
spaceLen = uMap->mapUnicode(0x20, space, sizeof(space)); |
|
eolLen = 0; // make gcc happy |
|
switch (globalParams->getTextEOL()) { |
|
case eolUnix: |
|
eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol)); |
|
break; |
|
case eolDOS: |
|
eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); |
|
eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen); |
|
break; |
|
case eolMac: |
|
eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); |
|
break; |
|
} |
|
|
|
//~ writing mode (horiz/vert) |
|
|
|
// collect the line fragments that are in the rectangle |
|
fragsSize = 256; |
|
frags = (TextLineFrag *)gmalloc(fragsSize * sizeof(TextLineFrag)); |
|
nFrags = 0; |
|
lastRot = -1; |
|
oneRot = gTrue; |
|
for (i = 0; i < nBlocks; ++i) { |
|
blk = blocks[i]; |
|
if (xMin < blk->xMax && blk->xMin < xMax && |
|
yMin < blk->yMax && blk->yMin < yMax) { |
|
for (line = blk->lines; line; line = line->next) { |
|
if (xMin < line->xMax && line->xMin < xMax && |
|
yMin < line->yMax && line->yMin < yMax) { |
|
idx0 = idx1 = -1; |
|
switch (line->rot) { |
|
case 0: |
|
y = 0.5 * (line->yMin + line->yMax); |
|
if (yMin < y && y < yMax) { |
|
j = 0; |
|
while (j < line->len) { |
|
if (0.5 * (line->edge[j] + line->edge[j+1]) > xMin) { |
|
idx0 = j; |
|
break; |
|
} |
|
++j; |
|
} |
|
j = line->len - 1; |
|
while (j >= 0) { |
|
if (0.5 * (line->edge[j] + line->edge[j+1]) < xMax) { |
|
idx1 = j; |
|
break; |
|
} |
|
--j; |
|
} |
|
} |
|
break; |
|
case 1: |
|
x = 0.5 * (line->xMin + line->xMax); |
|
if (xMin < x && x < xMax) { |
|
j = 0; |
|
while (j < line->len) { |
|
if (0.5 * (line->edge[j] + line->edge[j+1]) > yMin) { |
|
idx0 = j; |
|
break; |
|
} |
|
++j; |
|
} |
|
j = line->len - 1; |
|
while (j >= 0) { |
|
if (0.5 * (line->edge[j] + line->edge[j+1]) < yMax) { |
|
idx1 = j; |
|
break; |
|
} |
|
--j; |
|
} |
|
} |
|
break; |
|
case 2: |
|
y = 0.5 * (line->yMin + line->yMax); |
|
if (yMin < y && y < yMax) { |
|
j = 0; |
|
while (j < line->len) { |
|
if (0.5 * (line->edge[j] + line->edge[j+1]) < xMax) { |
|
idx0 = j; |
|
break; |
|
} |
|
++j; |
|
} |
|
j = line->len - 1; |
|
while (j >= 0) { |
|
if (0.5 * (line->edge[j] + line->edge[j+1]) > xMin) { |
|
idx1 = j; |
|
break; |
|
} |
|
--j; |
|
} |
|
} |
|
break; |
|
case 3: |
|
x = 0.5 * (line->xMin + line->xMax); |
|
if (xMin < x && x < xMax) { |
|
j = 0; |
|
while (j < line->len) { |
|
if (0.5 * (line->edge[j] + line->edge[j+1]) < yMax) { |
|
idx0 = j; |
|
break; |
|
} |
|
++j; |
|
} |
|
j = line->len - 1; |
|
while (j >= 0) { |
|
if (0.5 * (line->edge[j] + line->edge[j+1]) > yMin) { |
|
idx1 = j; |
|
break; |
|
} |
|
--j; |
|
} |
|
} |
|
break; |
|
} |
|
if (idx0 >= 0 && idx1 >= 0) { |
|
if (nFrags == fragsSize) { |
|
fragsSize *= 2; |
|
frags = (TextLineFrag *) |
|
grealloc(frags, fragsSize * sizeof(TextLineFrag)); |
|
} |
|
frags[nFrags].init(line, idx0, idx1 - idx0 + 1); |
|
++nFrags; |
|
if (lastRot >= 0 && line->rot != lastRot) { |
|
oneRot = gFalse; |
|
} |
|
lastRot = line->rot; |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
// sort the fragments and generate the string |
|
if (nFrags > 0) { |
|
|
|
for (i = 0; i < nFrags; ++i) { |
|
frags[i].computeCoords(oneRot); |
|
} |
|
assignColumns(frags, nFrags, oneRot); |
|
|
|
// if all lines in the region have the same rotation, use it; |
|
// otherwise, use the page's primary rotation |
|
if (oneRot) { |
|
qsort(frags, nFrags, sizeof(TextLineFrag), |
|
&TextLineFrag::cmpYXLineRot); |
|
} else { |
|
qsort(frags, nFrags, sizeof(TextLineFrag), |
|
&TextLineFrag::cmpYXPrimaryRot); |
|
} |
|
|
|
col = 0; |
|
multiLine = gFalse; |
|
for (i = 0; i < nFrags; ++i) { |
|
frag = &frags[i]; |
|
|
|
// insert a return |
|
if (frag->col < col || |
|
(i > 0 && fabs(frag->base - frags[i-1].base) > |
|
maxIntraLineDelta * frags[i-1].line->words->fontSize)) { |
|
s->append(eol, eolLen); |
|
col = 0; |
|
multiLine = gTrue; |
|
} |
|
|
|
// column alignment |
|
for (; col < frag->col; ++col) { |
|
s->append(space, spaceLen); |
|
} |
|
|
|
// get the fragment text |
|
col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, s); |
|
} |
|
|
|
if (multiLine) { |
|
s->append(eol, eolLen); |
|
} |
|
} |
|
|
|
gfree(frags); |
|
uMap->decRefCnt(); |
|
|
|
return s; |
|
} |
|
|
|
GBool TextPage::findCharRange(int pos, int length, |
|
double *xMin, double *yMin, |
|
double *xMax, double *yMax) { |
|
TextBlock *blk; |
|
TextLine *line; |
|
TextWord *word; |
|
double xMin0, xMax0, yMin0, yMax0; |
|
double xMin1, xMax1, yMin1, yMax1; |
|
GBool first; |
|
int i, j0, j1; |
|
|
|
if (rawOrder) { |
|
return gFalse; |
|
} |
|
|
|
//~ this doesn't correctly handle: |
|
//~ - ranges split across multiple lines (the highlighted region |
|
//~ is the bounding box of all the parts of the range) |
|
//~ - cases where characters don't convert one-to-one into Unicode |
|
first = gTrue; |
|
xMin0 = xMax0 = yMin0 = yMax0 = 0; // make gcc happy |
|
xMin1 = xMax1 = yMin1 = yMax1 = 0; // make gcc happy |
|
for (i = 0; i < nBlocks; ++i) { |
|
blk = blocks[i]; |
|
for (line = blk->lines; line; line = line->next) { |
|
for (word = line->words; word; word = word->next) { |
|
if (pos < word->charPos + word->charLen && |
|
word->charPos < pos + length) { |
|
j0 = pos - word->charPos; |
|
if (j0 < 0) { |
|
j0 = 0; |
|
} |
|
j1 = pos + length - 1 - word->charPos; |
|
if (j1 >= word->len) { |
|
j1 = word->len - 1; |
|
} |
|
switch (line->rot) { |
|
case 0: |
|
xMin1 = word->edge[j0]; |
|
xMax1 = word->edge[j1 + 1]; |
|
yMin1 = word->yMin; |
|
yMax1 = word->yMax; |
|
break; |
|
case 1: |
|
xMin1 = word->xMin; |
|
xMax1 = word->xMax; |
|
yMin1 = word->edge[j0]; |
|
yMax1 = word->edge[j1 + 1]; |
|
break; |
|
case 2: |
|
xMin1 = word->edge[j1 + 1]; |
|
xMax1 = word->edge[j0]; |
|
yMin1 = word->yMin; |
|
yMax1 = word->yMax; |
|
break; |
|
case 3: |
|
xMin1 = word->xMin; |
|
xMax1 = word->xMax; |
|
yMin1 = word->edge[j1 + 1]; |
|
yMax1 = word->edge[j0]; |
|
break; |
|
} |
|
if (first || xMin1 < xMin0) { |
|
xMin0 = xMin1; |
|
} |
|
if (first || xMax1 > xMax0) { |
|
xMax0 = xMax1; |
|
} |
|
if (first || yMin1 < yMin0) { |
|
yMin0 = yMin1; |
|
} |
|
if (first || yMax1 > yMax0) { |
|
yMax0 = yMax1; |
|
} |
|
first = gFalse; |
|
} |
|
} |
|
} |
|
} |
|
if (!first) { |
|
*xMin = xMin0; |
|
*xMax = xMax0; |
|
*yMin = yMin0; |
|
*yMax = yMax0; |
|
return gTrue; |
|
} |
|
return gFalse; |
|
} |
|
|
|
void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, |
|
GBool physLayout) { |
|
UnicodeMap *uMap; |
|
TextFlow *flow; |
|
TextBlock *blk; |
|
TextLine *line; |
|
TextLineFrag *frags; |
|
TextWord *word; |
|
int nFrags, fragsSize; |
|
TextLineFrag *frag; |
|
char space[8], eol[16], eop[8]; |
|
int spaceLen, eolLen, eopLen; |
|
GBool pageBreaks; |
|
GString *s; |
|
int col, i, d, n; |
|
|
|
// get the output encoding |
|
if (!(uMap = globalParams->getTextEncoding())) { |
|
return; |
|
} |
|
spaceLen = uMap->mapUnicode(0x20, space, sizeof(space)); |
|
eolLen = 0; // make gcc happy |
|
switch (globalParams->getTextEOL()) { |
|
case eolUnix: |
|
eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol)); |
|
break; |
|
case eolDOS: |
|
eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); |
|
eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen); |
|
break; |
|
case eolMac: |
|
eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); |
|
break; |
|
} |
|
eopLen = uMap->mapUnicode(0x0c, eop, sizeof(eop)); |
|
pageBreaks = globalParams->getTextPageBreaks(); |
|
|
|
//~ writing mode (horiz/vert) |
|
|
|
// output the page in raw (content stream) order |
|
if (rawOrder) { |
|
|
|
for (word = rawWords; word; word = word->next) { |
|
s = new GString(); |
|
dumpFragment(word->text, word->len, uMap, s); |
|
(*outputFunc)(outputStream, s->getCString(), s->getLength()); |
|
delete s; |
|
if (word->next && |
|
fabs(word->next->base - word->base) < |
|
maxIntraLineDelta * word->fontSize) { |
|
if (word->next->xMin > word->xMax + minWordSpacing * word->fontSize) { |
|
(*outputFunc)(outputStream, space, spaceLen); |
|
} |
|
} else { |
|
(*outputFunc)(outputStream, eol, eolLen); |
|
} |
|
} |
|
|
|
// output the page, maintaining the original physical layout |
|
} else if (physLayout) { |
|
|
|
// collect the line fragments for the page and sort them |
|
fragsSize = 256; |
|
frags = (TextLineFrag *)gmalloc(fragsSize * sizeof(TextLineFrag)); |
|
nFrags = 0; |
|
for (i = 0; i < nBlocks; ++i) { |
|
blk = blocks[i]; |
|
for (line = blk->lines; line; line = line->next) { |
|
if (nFrags == fragsSize) { |
|
fragsSize *= 2; |
|
frags = (TextLineFrag *)grealloc(frags, |
|
fragsSize * sizeof(TextLineFrag)); |
|
} |
|
frags[nFrags].init(line, 0, line->len); |
|
frags[nFrags].computeCoords(gTrue); |
|
++nFrags; |
|
} |
|
} |
|
qsort(frags, nFrags, sizeof(TextLineFrag), &TextLineFrag::cmpYXPrimaryRot); |
|
|
|
// generate output |
|
col = 0; |
|
for (i = 0; i < nFrags; ++i) { |
|
frag = &frags[i]; |
|
|
|
// column alignment |
|
for (; col < frag->col; ++col) { |
|
(*outputFunc)(outputStream, space, spaceLen); |
|
} |
|
|
|
// print the line |
|
s = new GString(); |
|
col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, s); |
|
(*outputFunc)(outputStream, s->getCString(), s->getLength()); |
|
delete s; |
|
|
|
// print one or more returns if necessary |
|
if (i == nFrags - 1 || |
|
frags[i+1].col < col || |
|
fabs(frags[i+1].base - frag->base) > |
|
maxIntraLineDelta * frag->line->words->fontSize) { |
|
if (i < nFrags - 1) { |
|
d = (int)((frags[i+1].base - frag->base) / |
|
frag->line->words->fontSize); |
|
if (d < 1) { |
|
d = 1; |
|
} else if (d > 5) { |
|
d = 5; |
|
} |
|
} else { |
|
d = 1; |
|
} |
|
for (; d > 0; --d) { |
|
(*outputFunc)(outputStream, eol, eolLen); |
|
} |
|
col = 0; |
|
} |
|
} |
|
|
|
gfree(frags); |
|
|
|
// output the page, "undoing" the layout |
|
} else { |
|
for (flow = flows; flow; flow = flow->next) { |
|
for (blk = flow->blocks; blk; blk = blk->next) { |
|
for (line = blk->lines; line; line = line->next) { |
|
n = line->len; |
|
if (line->hyphenated && (line->next || blk->next)) { |
|
--n; |
|
} |
|
s = new GString(); |
|
dumpFragment(line->text, n, uMap, s); |
|
(*outputFunc)(outputStream, s->getCString(), s->getLength()); |
|
delete s; |
|
if (!line->hyphenated) { |
|
if (line->next) { |
|
(*outputFunc)(outputStream, space, spaceLen); |
|
} else if (blk->next) { |
|
//~ this is a bit of a kludge - we should really do a more |
|
//~ intelligent determination of paragraphs |
|
if (blk->next->lines->words->fontSize == |
|
blk->lines->words->fontSize) { |
|
(*outputFunc)(outputStream, space, spaceLen); |
|
} else { |
|
(*outputFunc)(outputStream, eol, eolLen); |
|
} |
|
} |
|
} |
|
} |
|
} |
|
(*outputFunc)(outputStream, eol, eolLen); |
|
(*outputFunc)(outputStream, eol, eolLen); |
|
} |
|
} |
|
|
|
// end of page |
|
if (pageBreaks) { |
|
(*outputFunc)(outputStream, eop, eopLen); |
|
(*outputFunc)(outputStream, eol, eolLen); |
|
} |
|
|
|
uMap->decRefCnt(); |
|
} |
|
|
|
void TextPage::assignColumns(TextLineFrag *frags, int nFrags, GBool oneRot) { |
|
TextLineFrag *frag0, *frag1; |
|
int rot, col1, col2, i, j, k; |
|
|
|
// all text in the region has the same rotation -- recompute the |
|
// column numbers based only on the text in the region |
|
if (oneRot) { |
|
qsort(frags, nFrags, sizeof(TextLineFrag), &TextLineFrag::cmpXYLineRot); |
|
rot = frags[0].line->rot; |
|
for (i = 0; i < nFrags; ++i) { |
|
frag0 = &frags[i]; |
|
col1 = 0; |
|
for (j = 0; j < i; ++j) { |
|
frag1 = &frags[j]; |
|
col2 = 0; // make gcc happy |
|
switch (rot) { |
|
case 0: |
|
if (frag0->xMin >= frag1->xMax) { |
|
col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] - |
|
frag1->line->col[frag1->start]) + 1; |
|
} else { |
|
for (k = frag1->start; |
|
k < frag1->start + frag1->len && |
|
frag0->xMin >= 0.5 * (frag1->line->edge[k] + |
|
frag1->line->edge[k+1]); |
|
++k) ; |
|
col2 = frag1->col + |
|
frag1->line->col[k] - frag1->line->col[frag1->start]; |
|
} |
|
break; |
|
case 1: |
|
if (frag0->yMin >= frag1->yMax) { |
|
col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] - |
|
frag1->line->col[frag1->start]) + 1; |
|
} else { |
|
for (k = frag1->start; |
|
k < frag1->start + frag1->len && |
|
frag0->yMin >= 0.5 * (frag1->line->edge[k] + |
|
frag1->line->edge[k+1]); |
|
++k) ; |
|
col2 = frag1->col + |
|
frag1->line->col[k] - frag1->line->col[frag1->start]; |
|
} |
|
break; |
|
case 2: |
|
if (frag0->xMax <= frag1->xMin) { |
|
col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] - |
|
frag1->line->col[frag1->start]) + 1; |
|
} else { |
|
for (k = frag1->start; |
|
k < frag1->start + frag1->len && |
|
frag0->xMax <= 0.5 * (frag1->line->edge[k] + |
|
frag1->line->edge[k+1]); |
|
++k) ; |
|
col2 = frag1->col + |
|
frag1->line->col[k] - frag1->line->col[frag1->start]; |
|
} |
|
break; |
|
case 3: |
|
if (frag0->yMax <= frag1->yMin) { |
|
col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] - |
|
frag1->line->col[frag1->start]) + 1; |
|
} else { |
|
for (k = frag1->start; |
|
k < frag1->start + frag1->len && |
|
frag0->yMax <= 0.5 * (frag1->line->edge[k] + |
|
frag1->line->edge[k+1]); |
|
++k) ; |
|
col2 = frag1->col + |
|
frag1->line->col[k] - frag1->line->col[frag1->start]; |
|
} |
|
break; |
|
} |
|
if (col2 > col1) { |
|
col1 = col2; |
|
} |
|
} |
|
frag0->col = col1; |
|
} |
|
|
|
// the region includes text at different rotations -- use the |
|
// globally assigned column numbers, offset by the minimum column |
|
// number (i.e., shift everything over to column 0) |
|
} else { |
|
col1 = frags[0].col; |
|
for (i = 1; i < nFrags; ++i) { |
|
if (frags[i].col < col1) { |
|
col1 = frags[i].col; |
|
} |
|
} |
|
for (i = 0; i < nFrags; ++i) { |
|
frags[i].col -= col1; |
|
} |
|
} |
|
} |
|
|
|
int TextPage::dumpFragment(Unicode *text, int len, UnicodeMap *uMap, |
|
GString *s) { |
|
char lre[8], rle[8], popdf[8], buf[8]; |
|
int lreLen, rleLen, popdfLen, n; |
|
int nCols, i, j, k; |
|
|
|
nCols = 0; |
|
|
|
if (uMap->isUnicode()) { |
|
|
|
lreLen = uMap->mapUnicode(0x202a, lre, sizeof(lre)); |
|
rleLen = uMap->mapUnicode(0x202b, rle, sizeof(rle)); |
|
popdfLen = uMap->mapUnicode(0x202c, popdf, sizeof(popdf)); |
|
|
|
if (primaryLR) { |
|
|
|
i = 0; |
|
while (i < len) { |
|
// output a left-to-right section |
|
for (j = i; j < len && !unicodeTypeR(text[j]); ++j) ; |
|
for (k = i; k < j; ++k) { |
|
n = uMap->mapUnicode(text[k], buf, sizeof(buf)); |
|
s->append(buf, n); |
|
++nCols; |
|
} |
|
i = j; |
|
// output a right-to-left section |
|
for (j = i; j < len && !unicodeTypeL(text[j]); ++j) ; |
|
if (j > i) { |
|
s->append(rle, rleLen); |
|
for (k = j - 1; k >= i; --k) { |
|
n = uMap->mapUnicode(text[k], buf, sizeof(buf)); |
|
s->append(buf, n); |
|
++nCols; |
|
} |
|
s->append(popdf, popdfLen); |
|
i = j; |
|
} |
|
} |
|
|
|
} else { |
|
|
|
s->append(rle, rleLen); |
|
i = len - 1; |
|
while (i >= 0) { |
|
// output a right-to-left section |
|
for (j = i; j >= 0 && !unicodeTypeL(text[j]); --j) ; |
|
for (k = i; k > j; --k) { |
|
n = uMap->mapUnicode(text[k], buf, sizeof(buf)); |
|
s->append(buf, n); |
|
++nCols; |
|
} |
|
i = j; |
|
// output a left-to-right section |
|
for (j = i; j >= 0 && !unicodeTypeR(text[j]); --j) ; |
|
if (j < i) { |
|
s->append(lre, lreLen); |
|
for (k = j + 1; k <= i; ++k) { |
|
n = uMap->mapUnicode(text[k], buf, sizeof(buf)); |
|
s->append(buf, n); |
|
++nCols; |
|
} |
|
s->append(popdf, popdfLen); |
|
i = j; |
|
} |
|
} |
|
s->append(popdf, popdfLen); |
|
|
|
} |
|
|
|
} else { |
|
for (i = 0; i < len; ++i) { |
|
n = uMap->mapUnicode(text[i], buf, sizeof(buf)); |
|
s->append(buf, n); |
|
nCols += n; |
|
} |
|
} |
|
|
|
return nCols; |
|
} |
|
|
|
#if TEXTOUT_WORD_LIST |
|
TextWordList *TextPage::makeWordList(GBool physLayout) { |
|
return new TextWordList(this, physLayout); |
|
} |
|
#endif |
|
|
|
//------------------------------------------------------------------------ |
|
// TextOutputDev |
|
//------------------------------------------------------------------------ |
|
|
|
static void outputToFile(void *stream, char *text, int len) { |
|
fwrite(text, 1, len, (FILE *)stream); |
|
} |
|
|
|
TextOutputDev::TextOutputDev(char *fileName, GBool physLayoutA, |
|
GBool rawOrderA, GBool append) { |
|
text = NULL; |
|
physLayout = physLayoutA; |
|
rawOrder = rawOrderA; |
|
ok = gTrue; |
|
|
|
// open file |
|
needClose = gFalse; |
|
if (fileName) { |
|
if (!strcmp(fileName, "-")) { |
|
outputStream = stdout; |
|
#ifdef WIN32 |
|
// keep DOS from munging the end-of-line characters |
|
setmode(fileno(stdout), O_BINARY); |
|
#endif |
|
} else if ((outputStream = fopen(fileName, append ? "ab" : "wb"))) { |
|
needClose = gTrue; |
|
} else { |
|
error(-1, "Couldn't open text file '%s'", fileName); |
|
ok = gFalse; |
|
return; |
|
} |
|
outputFunc = &outputToFile; |
|
} else { |
|
outputStream = NULL; |
|
} |
|
|
|
// set up text object |
|
text = new TextPage(rawOrderA); |
|
} |
|
|
|
TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream, |
|
GBool physLayoutA, GBool rawOrderA) { |
|
outputFunc = func; |
|
outputStream = stream; |
|
needClose = gFalse; |
|
physLayout = physLayoutA; |
|
rawOrder = rawOrderA; |
|
text = new TextPage(rawOrderA); |
|
ok = gTrue; |
|
} |
|
|
|
TextOutputDev::~TextOutputDev() { |
|
if (needClose) { |
|
#ifdef MACOS |
|
ICS_MapRefNumAndAssign((short)((FILE *)outputStream)->handle); |
|
#endif |
|
fclose((FILE *)outputStream); |
|
} |
|
if (text) { |
|
delete text; |
|
} |
|
} |
|
|
|
void TextOutputDev::startPage(int /*pageNum*/, GfxState *state) { |
|
text->startPage(state); |
|
} |
|
|
|
void TextOutputDev::endPage() { |
|
text->endPage(); |
|
text->coalesce(physLayout); |
|
if (outputStream) { |
|
text->dump(outputStream, outputFunc, physLayout); |
|
} |
|
} |
|
|
|
void TextOutputDev::updateFont(GfxState *state) { |
|
text->updateFont(state); |
|
} |
|
|
|
void TextOutputDev::beginString(GfxState */*state*/, GString */*s*/) { |
|
} |
|
|
|
void TextOutputDev::endString(GfxState */*state*/) { |
|
} |
|
|
|
void TextOutputDev::drawChar(GfxState *state, double x, double y, |
|
double dx, double dy, |
|
double /*originX*/, double /*originY*/, |
|
CharCode c, Unicode *u, int uLen) { |
|
text->addChar(state, x, y, dx, dy, c, u, uLen); |
|
} |
|
|
|
GBool TextOutputDev::findText(Unicode *s, int len, |
|
GBool startAtTop, GBool stopAtBottom, |
|
GBool startAtLast, GBool stopAtLast, |
|
double *xMin, double *yMin, |
|
double *xMax, double *yMax) { |
|
return text->findText(s, len, startAtTop, stopAtBottom, |
|
startAtLast, stopAtLast, xMin, yMin, xMax, yMax); |
|
} |
|
|
|
GString *TextOutputDev::getText(double xMin, double yMin, |
|
double xMax, double yMax) { |
|
return text->getText(xMin, yMin, xMax, yMax); |
|
} |
|
|
|
GBool TextOutputDev::findCharRange(int pos, int length, |
|
double *xMin, double *yMin, |
|
double *xMax, double *yMax) { |
|
return text->findCharRange(pos, length, xMin, yMin, xMax, yMax); |
|
} |
|
|
|
#if TEXTOUT_WORD_LIST |
|
TextWordList *TextOutputDev::makeWordList() { |
|
return text->makeWordList(physLayout); |
|
} |
|
#endif
|
|
|