You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
474 lines
9.0 KiB
474 lines
9.0 KiB
//======================================================================== |
|
// |
|
// Lexer.cc |
|
// |
|
// Copyright 1996-2003 Glyph & Cog, LLC |
|
// |
|
//======================================================================== |
|
|
|
#include <aconf.h> |
|
|
|
#ifdef USE_GCC_PRAGMAS |
|
#pragma implementation |
|
#endif |
|
|
|
#include <stdlib.h> |
|
#include <stddef.h> |
|
#include <string.h> |
|
#include <ctype.h> |
|
#include "Lexer.h" |
|
#include "Error.h" |
|
|
|
//------------------------------------------------------------------------ |
|
|
|
// A '1' in this array means the character is white space. A '1' or |
|
// '2' means the character ends a name or command. |
|
static char specialChars[256] = { |
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x |
|
1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx |
|
}; |
|
|
|
//------------------------------------------------------------------------ |
|
// Lexer |
|
//------------------------------------------------------------------------ |
|
|
|
Lexer::Lexer(XRef *xref, Stream *str) { |
|
Object obj; |
|
|
|
curStr.initStream(str); |
|
streams = new Array(xref); |
|
streams->add(curStr.copy(&obj)); |
|
strPtr = 0; |
|
freeArray = gTrue; |
|
curStr.streamReset(); |
|
} |
|
|
|
Lexer::Lexer(XRef *xref, Object *obj) { |
|
Object obj2; |
|
|
|
if (obj->isStream()) { |
|
streams = new Array(xref); |
|
freeArray = gTrue; |
|
streams->add(obj->copy(&obj2)); |
|
} else { |
|
streams = obj->getArray(); |
|
freeArray = gFalse; |
|
} |
|
strPtr = 0; |
|
if (streams->getLength() > 0) { |
|
streams->get(strPtr, &curStr); |
|
curStr.streamReset(); |
|
} |
|
} |
|
|
|
Lexer::~Lexer() { |
|
if (!curStr.isNone()) { |
|
curStr.streamClose(); |
|
curStr.free(); |
|
} |
|
if (freeArray) { |
|
delete streams; |
|
} |
|
} |
|
|
|
int Lexer::getChar() { |
|
int c; |
|
|
|
c = EOF; |
|
while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) { |
|
curStr.streamClose(); |
|
curStr.free(); |
|
++strPtr; |
|
if (strPtr < streams->getLength()) { |
|
streams->get(strPtr, &curStr); |
|
curStr.streamReset(); |
|
} |
|
} |
|
return c; |
|
} |
|
|
|
int Lexer::lookChar() { |
|
if (curStr.isNone()) { |
|
return EOF; |
|
} |
|
return curStr.streamLookChar(); |
|
} |
|
|
|
Object *Lexer::getObj(Object *obj) { |
|
char *p; |
|
int c, c2; |
|
GBool comment, neg, done; |
|
int numParen; |
|
int xi; |
|
double xf, scale; |
|
GString *s; |
|
int n, m; |
|
|
|
// skip whitespace and comments |
|
comment = gFalse; |
|
while (1) { |
|
if ((c = getChar()) == EOF) { |
|
return obj->initEOF(); |
|
} |
|
if (comment) { |
|
if (c == '\r' || c == '\n') |
|
comment = gFalse; |
|
} else if (c == '%') { |
|
comment = gTrue; |
|
} else if (specialChars[c] != 1) { |
|
break; |
|
} |
|
} |
|
|
|
// start reading token |
|
switch (c) { |
|
|
|
// number |
|
case '0': case '1': case '2': case '3': case '4': |
|
case '5': case '6': case '7': case '8': case '9': |
|
case '-': case '.': |
|
neg = gFalse; |
|
xi = 0; |
|
if (c == '-') { |
|
neg = gTrue; |
|
} else if (c == '.') { |
|
goto doReal; |
|
} else { |
|
xi = c - '0'; |
|
} |
|
while (1) { |
|
c = lookChar(); |
|
if (isdigit(c)) { |
|
getChar(); |
|
xi = xi * 10 + (c - '0'); |
|
} else if (c == '.') { |
|
getChar(); |
|
goto doReal; |
|
} else { |
|
break; |
|
} |
|
} |
|
if (neg) |
|
xi = -xi; |
|
obj->initInt(xi); |
|
break; |
|
doReal: |
|
xf = xi; |
|
scale = 0.1; |
|
while (1) { |
|
c = lookChar(); |
|
if (!isdigit(c)) { |
|
break; |
|
} |
|
getChar(); |
|
xf = xf + scale * (c - '0'); |
|
scale *= 0.1; |
|
} |
|
if (neg) |
|
xf = -xf; |
|
obj->initReal(xf); |
|
break; |
|
|
|
// string |
|
case '(': |
|
p = tokBuf; |
|
n = 0; |
|
numParen = 1; |
|
done = gFalse; |
|
s = NULL; |
|
do { |
|
c2 = EOF; |
|
switch (c = getChar()) { |
|
|
|
case EOF: |
|
#if 0 |
|
// This breaks some PDF files, e.g., ones from Photoshop. |
|
case '\r': |
|
case '\n': |
|
#endif |
|
error(getPos(), "Unterminated string"); |
|
done = gTrue; |
|
break; |
|
|
|
case '(': |
|
++numParen; |
|
c2 = c; |
|
break; |
|
|
|
case ')': |
|
if (--numParen == 0) { |
|
done = gTrue; |
|
} else { |
|
c2 = c; |
|
} |
|
break; |
|
|
|
case '\\': |
|
switch (c = getChar()) { |
|
case 'n': |
|
c2 = '\n'; |
|
break; |
|
case 'r': |
|
c2 = '\r'; |
|
break; |
|
case 't': |
|
c2 = '\t'; |
|
break; |
|
case 'b': |
|
c2 = '\b'; |
|
break; |
|
case 'f': |
|
c2 = '\f'; |
|
break; |
|
case '\\': |
|
case '(': |
|
case ')': |
|
c2 = c; |
|
break; |
|
case '0': case '1': case '2': case '3': |
|
case '4': case '5': case '6': case '7': |
|
c2 = c - '0'; |
|
c = lookChar(); |
|
if (c >= '0' && c <= '7') { |
|
getChar(); |
|
c2 = (c2 << 3) + (c - '0'); |
|
c = lookChar(); |
|
if (c >= '0' && c <= '7') { |
|
getChar(); |
|
c2 = (c2 << 3) + (c - '0'); |
|
} |
|
} |
|
break; |
|
case '\r': |
|
c = lookChar(); |
|
if (c == '\n') { |
|
getChar(); |
|
} |
|
break; |
|
case '\n': |
|
break; |
|
case EOF: |
|
error(getPos(), "Unterminated string"); |
|
done = gTrue; |
|
break; |
|
default: |
|
c2 = c; |
|
break; |
|
} |
|
break; |
|
|
|
default: |
|
c2 = c; |
|
break; |
|
} |
|
|
|
if (c2 != EOF) { |
|
if (n == tokBufSize) { |
|
if (!s) |
|
s = new GString(tokBuf, tokBufSize); |
|
else |
|
s->append(tokBuf, tokBufSize); |
|
p = tokBuf; |
|
n = 0; |
|
} |
|
*p++ = (char)c2; |
|
++n; |
|
} |
|
} while (!done); |
|
if (!s) |
|
s = new GString(tokBuf, n); |
|
else |
|
s->append(tokBuf, n); |
|
obj->initString(s); |
|
break; |
|
|
|
// name |
|
case '/': |
|
p = tokBuf; |
|
n = 0; |
|
while ((c = lookChar()) != EOF && !specialChars[c]) { |
|
getChar(); |
|
if (c == '#') { |
|
c2 = lookChar(); |
|
if (c2 >= '0' && c2 <= '9') { |
|
c = c2 - '0'; |
|
} else if (c2 >= 'A' && c2 <= 'F') { |
|
c = c2 - 'A' + 10; |
|
} else if (c2 >= 'a' && c2 <= 'f') { |
|
c = c2 - 'a' + 10; |
|
} else { |
|
goto notEscChar; |
|
} |
|
getChar(); |
|
c <<= 4; |
|
c2 = getChar(); |
|
if (c2 >= '0' && c2 <= '9') { |
|
c += c2 - '0'; |
|
} else if (c2 >= 'A' && c2 <= 'F') { |
|
c += c2 - 'A' + 10; |
|
} else if (c2 >= 'a' && c2 <= 'f') { |
|
c += c2 - 'a' + 10; |
|
} else { |
|
error(getPos(), "Illegal digit in hex char in name"); |
|
} |
|
} |
|
notEscChar: |
|
if (++n == tokBufSize) { |
|
error(getPos(), "Name token too long"); |
|
break; |
|
} |
|
*p++ = c; |
|
} |
|
*p = '\0'; |
|
obj->initName(tokBuf); |
|
break; |
|
|
|
// array punctuation |
|
case '[': |
|
case ']': |
|
tokBuf[0] = c; |
|
tokBuf[1] = '\0'; |
|
obj->initCmd(tokBuf); |
|
break; |
|
|
|
// hex string or dict punctuation |
|
case '<': |
|
c = lookChar(); |
|
|
|
// dict punctuation |
|
if (c == '<') { |
|
getChar(); |
|
tokBuf[0] = tokBuf[1] = '<'; |
|
tokBuf[2] = '\0'; |
|
obj->initCmd(tokBuf); |
|
|
|
// hex string |
|
} else { |
|
p = tokBuf; |
|
m = n = 0; |
|
c2 = 0; |
|
s = NULL; |
|
while (1) { |
|
c = getChar(); |
|
if (c == '>') { |
|
break; |
|
} else if (c == EOF) { |
|
error(getPos(), "Unterminated hex string"); |
|
break; |
|
} else if (specialChars[c] != 1) { |
|
c2 = c2 << 4; |
|
if (c >= '0' && c <= '9') |
|
c2 += c - '0'; |
|
else if (c >= 'A' && c <= 'F') |
|
c2 += c - 'A' + 10; |
|
else if (c >= 'a' && c <= 'f') |
|
c2 += c - 'a' + 10; |
|
else |
|
error(getPos(), "Illegal character <%02x> in hex string", c); |
|
if (++m == 2) { |
|
if (n == tokBufSize) { |
|
if (!s) |
|
s = new GString(tokBuf, tokBufSize); |
|
else |
|
s->append(tokBuf, tokBufSize); |
|
p = tokBuf; |
|
n = 0; |
|
} |
|
*p++ = (char)c2; |
|
++n; |
|
c2 = 0; |
|
m = 0; |
|
} |
|
} |
|
} |
|
if (!s) |
|
s = new GString(tokBuf, n); |
|
else |
|
s->append(tokBuf, n); |
|
if (m == 1) |
|
s->append((char)(c2 << 4)); |
|
obj->initString(s); |
|
} |
|
break; |
|
|
|
// dict punctuation |
|
case '>': |
|
c = lookChar(); |
|
if (c == '>') { |
|
getChar(); |
|
tokBuf[0] = tokBuf[1] = '>'; |
|
tokBuf[2] = '\0'; |
|
obj->initCmd(tokBuf); |
|
} else { |
|
error(getPos(), "Illegal character '>'"); |
|
obj->initError(); |
|
} |
|
break; |
|
|
|
// error |
|
case ')': |
|
case '{': |
|
case '}': |
|
error(getPos(), "Illegal character '%c'", c); |
|
obj->initError(); |
|
break; |
|
|
|
// command |
|
default: |
|
p = tokBuf; |
|
*p++ = c; |
|
n = 1; |
|
while ((c = lookChar()) != EOF && !specialChars[c]) { |
|
getChar(); |
|
if (++n == tokBufSize) { |
|
error(getPos(), "Command token too long"); |
|
break; |
|
} |
|
*p++ = c; |
|
} |
|
*p = '\0'; |
|
if (tokBuf[0] == 't' && !strcmp(tokBuf, "true")) { |
|
obj->initBool(gTrue); |
|
} else if (tokBuf[0] == 'f' && !strcmp(tokBuf, "false")) { |
|
obj->initBool(gFalse); |
|
} else if (tokBuf[0] == 'n' && !strcmp(tokBuf, "null")) { |
|
obj->initNull(); |
|
} else { |
|
obj->initCmd(tokBuf); |
|
} |
|
break; |
|
} |
|
|
|
return obj; |
|
} |
|
|
|
void Lexer::skipToNextLine() { |
|
int c; |
|
|
|
while (1) { |
|
c = getChar(); |
|
if (c == EOF || c == '\n') { |
|
return; |
|
} |
|
if (c == '\r') { |
|
if ((c = lookChar()) == '\n') { |
|
getChar(); |
|
} |
|
return; |
|
} |
|
} |
|
}
|
|
|