You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
533 lines
12 KiB
533 lines
12 KiB
//======================================================================== |
|
// |
|
// CharCodeToUnicode.cc |
|
// |
|
// Copyright 2001-2003 Glyph & Cog, LLC |
|
// |
|
//======================================================================== |
|
|
|
#include <aconf.h> |
|
|
|
#ifdef USE_GCC_PRAGMAS |
|
#pragma implementation |
|
#endif |
|
|
|
#include <stdio.h> |
|
#include <string.h> |
|
#include "gmem.h" |
|
#include "gfile.h" |
|
#include "GString.h" |
|
#include "Error.h" |
|
#include "GlobalParams.h" |
|
#include "PSTokenizer.h" |
|
#include "CharCodeToUnicode.h" |
|
|
|
//------------------------------------------------------------------------ |
|
|
|
#define maxUnicodeString 8 |
|
|
|
struct CharCodeToUnicodeString { |
|
CharCode c; |
|
Unicode u[maxUnicodeString]; |
|
int len; |
|
}; |
|
|
|
//------------------------------------------------------------------------ |
|
|
|
static int getCharFromString(void *data) { |
|
char *p; |
|
int c; |
|
|
|
p = *(char **)data; |
|
if (*p) { |
|
c = *p++; |
|
*(char **)data = p; |
|
} else { |
|
c = EOF; |
|
} |
|
return c; |
|
} |
|
|
|
static int getCharFromFile(void *data) { |
|
return fgetc((FILE *)data); |
|
} |
|
|
|
//------------------------------------------------------------------------ |
|
|
|
CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *fileName, |
|
GString *collection) { |
|
FILE *f; |
|
Unicode *mapA; |
|
CharCode size, mapLenA; |
|
char buf[64]; |
|
Unicode u; |
|
CharCodeToUnicode *ctu; |
|
|
|
if (!(f = fopen(fileName->getCString(), "r"))) { |
|
error(-1, "Couldn't open cidToUnicode file '%s'", |
|
fileName->getCString()); |
|
return NULL; |
|
} |
|
|
|
size = 32768; |
|
mapA = (Unicode *)gmalloc(size * sizeof(Unicode)); |
|
mapLenA = 0; |
|
|
|
while (getLine(buf, sizeof(buf), f)) { |
|
if (mapLenA == size) { |
|
size *= 2; |
|
mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode)); |
|
} |
|
if (sscanf(buf, "%x", &u) == 1) { |
|
mapA[mapLenA] = u; |
|
} else { |
|
error(-1, "Bad line (%d) in cidToUnicode file '%s'", |
|
(int)(mapLenA + 1), fileName->getCString()); |
|
mapA[mapLenA] = 0; |
|
} |
|
++mapLenA; |
|
} |
|
fclose(f); |
|
|
|
ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue, |
|
NULL, 0, 0); |
|
gfree(mapA); |
|
return ctu; |
|
} |
|
|
|
CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode( |
|
GString *fileName) { |
|
FILE *f; |
|
Unicode *mapA; |
|
CharCodeToUnicodeString *sMapA; |
|
CharCode size, oldSize, len, sMapSizeA, sMapLenA; |
|
char buf[256]; |
|
char *tok; |
|
Unicode u0; |
|
Unicode uBuf[maxUnicodeString]; |
|
CharCodeToUnicode *ctu; |
|
int line, n, i; |
|
|
|
if (!(f = fopen(fileName->getCString(), "r"))) { |
|
error(-1, "Couldn't open unicodeToUnicode file '%s'", |
|
fileName->getCString()); |
|
return NULL; |
|
} |
|
|
|
size = 4096; |
|
mapA = (Unicode *)gmalloc(size * sizeof(Unicode)); |
|
memset(mapA, 0, size * sizeof(Unicode)); |
|
len = 0; |
|
sMapA = NULL; |
|
sMapSizeA = sMapLenA = 0; |
|
|
|
line = 0; |
|
while (getLine(buf, sizeof(buf), f)) { |
|
++line; |
|
if (!(tok = strtok(buf, " \t\r\n")) || |
|
sscanf(tok, "%x", &u0) != 1) { |
|
error(-1, "Bad line (%d) in unicodeToUnicode file '%s'", |
|
line, fileName->getCString()); |
|
continue; |
|
} |
|
n = 0; |
|
while (n < maxUnicodeString) { |
|
if (!(tok = strtok(NULL, " \t\r\n"))) { |
|
break; |
|
} |
|
if (sscanf(tok, "%x", &uBuf[n]) != 1) { |
|
error(-1, "Bad line (%d) in unicodeToUnicode file '%s'", |
|
line, fileName->getCString()); |
|
break; |
|
} |
|
++n; |
|
} |
|
if (n < 1) { |
|
error(-1, "Bad line (%d) in unicodeToUnicode file '%s'", |
|
line, fileName->getCString()); |
|
continue; |
|
} |
|
if (u0 >= size) { |
|
oldSize = size; |
|
while (u0 >= size) { |
|
size *= 2; |
|
} |
|
mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode)); |
|
memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode)); |
|
} |
|
if (n == 1) { |
|
mapA[u0] = uBuf[0]; |
|
} else { |
|
mapA[u0] = 0; |
|
if (sMapLenA == sMapSizeA) { |
|
sMapSizeA += 16; |
|
sMapA = (CharCodeToUnicodeString *) |
|
grealloc(sMapA, sMapSizeA * sizeof(CharCodeToUnicodeString)); |
|
} |
|
sMapA[sMapLenA].c = u0; |
|
for (i = 0; i < n; ++i) { |
|
sMapA[sMapLenA].u[i] = uBuf[i]; |
|
} |
|
sMapA[sMapLenA].len = n; |
|
++sMapLenA; |
|
} |
|
if (u0 >= len) { |
|
len = u0 + 1; |
|
} |
|
} |
|
fclose(f); |
|
|
|
ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue, |
|
sMapA, sMapLenA, sMapSizeA); |
|
gfree(mapA); |
|
return ctu; |
|
} |
|
|
|
CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) { |
|
return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0); |
|
} |
|
|
|
CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) { |
|
CharCodeToUnicode *ctu; |
|
char *p; |
|
|
|
ctu = new CharCodeToUnicode(NULL); |
|
p = buf->getCString(); |
|
ctu->parseCMap1(&getCharFromString, &p, nBits); |
|
return ctu; |
|
} |
|
|
|
void CharCodeToUnicode::mergeCMap(GString *buf, int nBits) { |
|
char *p; |
|
|
|
p = buf->getCString(); |
|
parseCMap1(&getCharFromString, &p, nBits); |
|
} |
|
|
|
void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data, |
|
int nBits) { |
|
PSTokenizer *pst; |
|
char tok1[256], tok2[256], tok3[256]; |
|
int nDigits, n1, n2, n3; |
|
CharCode i; |
|
CharCode code1, code2; |
|
GString *name; |
|
FILE *f; |
|
|
|
nDigits = nBits / 4; |
|
pst = new PSTokenizer(getCharFunc, data); |
|
pst->getToken(tok1, sizeof(tok1), &n1); |
|
while (pst->getToken(tok2, sizeof(tok2), &n2)) { |
|
if (!strcmp(tok2, "usecmap")) { |
|
if (tok1[0] == '/') { |
|
name = new GString(tok1 + 1); |
|
if ((f = globalParams->findToUnicodeFile(name))) { |
|
parseCMap1(&getCharFromFile, f, nBits); |
|
fclose(f); |
|
} else { |
|
error(-1, "Couldn't find ToUnicode CMap file for '%s'", |
|
name->getCString()); |
|
} |
|
delete name; |
|
} |
|
pst->getToken(tok1, sizeof(tok1), &n1); |
|
} else if (!strcmp(tok2, "beginbfchar")) { |
|
while (pst->getToken(tok1, sizeof(tok1), &n1)) { |
|
if (!strcmp(tok1, "endbfchar")) { |
|
break; |
|
} |
|
if (!pst->getToken(tok2, sizeof(tok2), &n2) || |
|
!strcmp(tok2, "endbfchar")) { |
|
error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); |
|
break; |
|
} |
|
if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && |
|
tok2[0] == '<' && tok2[n2 - 1] == '>')) { |
|
error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); |
|
continue; |
|
} |
|
tok1[n1 - 1] = tok2[n2 - 1] = '\0'; |
|
if (sscanf(tok1 + 1, "%x", &code1) != 1) { |
|
error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); |
|
continue; |
|
} |
|
addMapping(code1, tok2 + 1, n2 - 1, 0); |
|
} |
|
pst->getToken(tok1, sizeof(tok1), &n1); |
|
} else if (!strcmp(tok2, "beginbfrange")) { |
|
while (pst->getToken(tok1, sizeof(tok1), &n1)) { |
|
if (!strcmp(tok1, "endbfrange")) { |
|
break; |
|
} |
|
if (!pst->getToken(tok2, sizeof(tok2), &n2) || |
|
!strcmp(tok2, "endbfrange") || |
|
!pst->getToken(tok3, sizeof(tok3), &n3) || |
|
!strcmp(tok3, "endbfrange")) { |
|
error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); |
|
break; |
|
} |
|
if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && |
|
n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>')) { |
|
error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); |
|
continue; |
|
} |
|
tok1[n1 - 1] = tok2[n2 - 1] = '\0'; |
|
if (sscanf(tok1 + 1, "%x", &code1) != 1 || |
|
sscanf(tok2 + 1, "%x", &code2) != 1) { |
|
error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); |
|
continue; |
|
} |
|
if (!strcmp(tok3, "[")) { |
|
i = 0; |
|
while (pst->getToken(tok1, sizeof(tok1), &n1) && |
|
code1 + i <= code2) { |
|
if (!strcmp(tok1, "]")) { |
|
break; |
|
} |
|
if (tok1[0] == '<' && tok1[n1 - 1] == '>') { |
|
tok1[n1 - 1] = '\0'; |
|
addMapping(code1 + i, tok1 + 1, n1 - 2, 0); |
|
} else { |
|
error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); |
|
} |
|
++i; |
|
} |
|
} else if (tok3[0] == '<' && tok3[n3 - 1] == '>') { |
|
tok3[n3 - 1] = '\0'; |
|
for (i = 0; code1 <= code2; ++code1, ++i) { |
|
addMapping(code1, tok3 + 1, n3 - 2, i); |
|
} |
|
|
|
} else { |
|
error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); |
|
} |
|
} |
|
pst->getToken(tok1, sizeof(tok1), &n1); |
|
} else { |
|
strcpy(tok1, tok2); |
|
} |
|
} |
|
delete pst; |
|
} |
|
|
|
void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n, |
|
int offset) { |
|
CharCode oldLen, i; |
|
Unicode u; |
|
char uHex[5]; |
|
int j; |
|
|
|
if (code >= mapLen) { |
|
oldLen = mapLen; |
|
mapLen = (code + 256) & ~255; |
|
map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode)); |
|
for (i = oldLen; i < mapLen; ++i) { |
|
map[i] = 0; |
|
} |
|
} |
|
if (n <= 4) { |
|
if (sscanf(uStr, "%x", &u) != 1) { |
|
error(-1, "Illegal entry in ToUnicode CMap"); |
|
return; |
|
} |
|
map[code] = u + offset; |
|
} else { |
|
if (sMapLen >= sMapSize) { |
|
sMapSize = sMapSize + 16; |
|
sMap = (CharCodeToUnicodeString *) |
|
grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString)); |
|
} |
|
map[code] = 0; |
|
sMap[sMapLen].c = code; |
|
sMap[sMapLen].len = n / 4; |
|
for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) { |
|
strncpy(uHex, uStr + j*4, 4); |
|
uHex[4] = '\0'; |
|
if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { |
|
error(-1, "Illegal entry in ToUnicode CMap"); |
|
} |
|
} |
|
sMap[sMapLen].u[sMap[sMapLen].len - 1] += offset; |
|
++sMapLen; |
|
} |
|
} |
|
|
|
CharCodeToUnicode::CharCodeToUnicode(GString *tagA) { |
|
CharCode i; |
|
|
|
tag = tagA; |
|
mapLen = 256; |
|
map = (Unicode *)gmalloc(mapLen * sizeof(Unicode)); |
|
for (i = 0; i < mapLen; ++i) { |
|
map[i] = 0; |
|
} |
|
sMap = NULL; |
|
sMapLen = sMapSize = 0; |
|
refCnt = 1; |
|
#if MULTITHREADED |
|
gInitMutex(&mutex); |
|
#endif |
|
} |
|
|
|
CharCodeToUnicode::CharCodeToUnicode(GString *tagA, Unicode *mapA, |
|
CharCode mapLenA, GBool copyMap, |
|
CharCodeToUnicodeString *sMapA, |
|
int sMapLenA, int sMapSizeA) { |
|
tag = tagA; |
|
mapLen = mapLenA; |
|
if (copyMap) { |
|
map = (Unicode *)gmalloc(mapLen * sizeof(Unicode)); |
|
memcpy(map, mapA, mapLen * sizeof(Unicode)); |
|
} else { |
|
map = mapA; |
|
} |
|
sMap = sMapA; |
|
sMapLen = sMapLenA; |
|
sMapSize = sMapSizeA; |
|
refCnt = 1; |
|
#if MULTITHREADED |
|
gInitMutex(&mutex); |
|
#endif |
|
} |
|
|
|
CharCodeToUnicode::~CharCodeToUnicode() { |
|
if (tag) { |
|
delete tag; |
|
} |
|
gfree(map); |
|
if (sMap) { |
|
gfree(sMap); |
|
} |
|
#if MULTITHREADED |
|
gDestroyMutex(&mutex); |
|
#endif |
|
} |
|
|
|
void CharCodeToUnicode::incRefCnt() { |
|
#if MULTITHREADED |
|
gLockMutex(&mutex); |
|
#endif |
|
++refCnt; |
|
#if MULTITHREADED |
|
gUnlockMutex(&mutex); |
|
#endif |
|
} |
|
|
|
void CharCodeToUnicode::decRefCnt() { |
|
GBool done; |
|
|
|
#if MULTITHREADED |
|
gLockMutex(&mutex); |
|
#endif |
|
done = --refCnt == 0; |
|
#if MULTITHREADED |
|
gUnlockMutex(&mutex); |
|
#endif |
|
if (done) { |
|
delete this; |
|
} |
|
} |
|
|
|
GBool CharCodeToUnicode::match(GString *tagA) { |
|
return tag && !tag->cmp(tagA); |
|
} |
|
|
|
void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) { |
|
int i; |
|
|
|
if (len == 1) { |
|
map[c] = u[0]; |
|
} else { |
|
map[c] = 0; |
|
if (sMapLen == sMapSize) { |
|
sMapSize += 8; |
|
sMap = (CharCodeToUnicodeString *) |
|
grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString)); |
|
} |
|
sMap[sMapLen].c = c; |
|
sMap[sMapLen].len = len; |
|
for (i = 0; i < len && i < maxUnicodeString; ++i) { |
|
sMap[sMapLen].u[i] = u[i]; |
|
} |
|
++sMapLen; |
|
} |
|
} |
|
|
|
int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) { |
|
int i, j; |
|
|
|
if (c >= mapLen) { |
|
return 0; |
|
} |
|
if (map[c]) { |
|
u[0] = map[c]; |
|
return 1; |
|
} |
|
for (i = 0; i < sMapLen; ++i) { |
|
if (sMap[i].c == c) { |
|
for (j = 0; j < sMap[i].len && j < size; ++j) { |
|
u[j] = sMap[i].u[j]; |
|
} |
|
return j; |
|
} |
|
} |
|
return 0; |
|
} |
|
|
|
//------------------------------------------------------------------------ |
|
|
|
CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) { |
|
int i; |
|
|
|
size = sizeA; |
|
cache = (CharCodeToUnicode **)gmalloc(size * sizeof(CharCodeToUnicode *)); |
|
for (i = 0; i < size; ++i) { |
|
cache[i] = NULL; |
|
} |
|
} |
|
|
|
CharCodeToUnicodeCache::~CharCodeToUnicodeCache() { |
|
int i; |
|
|
|
for (i = 0; i < size; ++i) { |
|
if (cache[i]) { |
|
cache[i]->decRefCnt(); |
|
} |
|
} |
|
gfree(cache); |
|
} |
|
|
|
CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GString *tag) { |
|
CharCodeToUnicode *ctu; |
|
int i, j; |
|
|
|
if (cache[0] && cache[0]->match(tag)) { |
|
cache[0]->incRefCnt(); |
|
return cache[0]; |
|
} |
|
for (i = 1; i < size; ++i) { |
|
if (cache[i] && cache[i]->match(tag)) { |
|
ctu = cache[i]; |
|
for (j = i; j >= 1; --j) { |
|
cache[j] = cache[j - 1]; |
|
} |
|
cache[0] = ctu; |
|
ctu->incRefCnt(); |
|
return ctu; |
|
} |
|
} |
|
return NULL; |
|
} |
|
|
|
void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) { |
|
int i; |
|
|
|
if (cache[size - 1]) { |
|
cache[size - 1]->decRefCnt(); |
|
} |
|
for (i = size - 1; i >= 1; --i) { |
|
cache[i] = cache[i - 1]; |
|
} |
|
cache[0] = ctu; |
|
ctu->incRefCnt(); |
|
}
|
|
|