You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
392 lines
9.6 KiB
392 lines
9.6 KiB
//======================================================================== |
|
// |
|
// CharCodeToUnicode.cc |
|
// |
|
// Copyright 2001-2003 Glyph & Cog, LLC |
|
// |
|
//======================================================================== |
|
|
|
#include <aconf.h> |
|
|
|
#ifdef USE_GCC_PRAGMAS |
|
#pragma implementation |
|
#endif |
|
|
|
#include <stdio.h> |
|
#include <string.h> |
|
#include "gmem.h" |
|
#include "gfile.h" |
|
#include "GString.h" |
|
#include "Error.h" |
|
#include "GlobalParams.h" |
|
#include "PSTokenizer.h" |
|
#include "CharCodeToUnicode.h" |
|
|
|
//------------------------------------------------------------------------ |
|
|
|
#define maxUnicodeString 8 |
|
|
|
struct CharCodeToUnicodeString { |
|
CharCode c; |
|
Unicode u[maxUnicodeString]; |
|
int len; |
|
}; |
|
|
|
//------------------------------------------------------------------------ |
|
|
|
static int getCharFromString(void *data) { |
|
char *p; |
|
int c; |
|
|
|
p = *(char **)data; |
|
if (*p) { |
|
c = *p++; |
|
*(char **)data = p; |
|
} else { |
|
c = EOF; |
|
} |
|
return c; |
|
} |
|
|
|
static int getCharFromFile(void *data) { |
|
return fgetc((FILE *)data); |
|
} |
|
|
|
//------------------------------------------------------------------------ |
|
|
|
CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) { |
|
FILE *f; |
|
Unicode *mapA; |
|
CharCode size, mapLenA; |
|
char buf[64]; |
|
Unicode u; |
|
CharCodeToUnicode *ctu; |
|
|
|
if (!(f = globalParams->getCIDToUnicodeFile(collectionA))) { |
|
error(-1, "Couldn't find cidToUnicode file for the '%s' collection", |
|
collectionA->getCString()); |
|
return NULL; |
|
} |
|
|
|
size = 32768; |
|
mapA = (Unicode *)gmalloc(size * sizeof(Unicode)); |
|
mapLenA = 0; |
|
|
|
while (getLine(buf, sizeof(buf), f)) { |
|
if (mapLenA == size) { |
|
size *= 2; |
|
mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode)); |
|
} |
|
if (sscanf(buf, "%x", &u) == 1) { |
|
mapA[mapLenA] = u; |
|
} else { |
|
error(-1, "Bad line (%d) in cidToUnicode file for the '%s' collection", |
|
(int)(mapLenA + 1), collectionA->getCString()); |
|
mapA[mapLenA] = 0; |
|
} |
|
++mapLenA; |
|
} |
|
fclose(f); |
|
|
|
ctu = new CharCodeToUnicode(collectionA->copy(), mapA, mapLenA, gTrue, |
|
NULL, 0); |
|
gfree(mapA); |
|
return ctu; |
|
} |
|
|
|
CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) { |
|
return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0); |
|
} |
|
|
|
CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) { |
|
CharCodeToUnicode *ctu; |
|
char *p; |
|
|
|
ctu = new CharCodeToUnicode(NULL); |
|
p = buf->getCString(); |
|
ctu->parseCMap1(&getCharFromString, &p, nBits); |
|
return ctu; |
|
} |
|
|
|
void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data, |
|
int nBits) { |
|
PSTokenizer *pst; |
|
char tok1[256], tok2[256], tok3[256]; |
|
int nDigits, n1, n2, n3; |
|
CharCode oldLen, i; |
|
CharCode code1, code2; |
|
Unicode u; |
|
char uHex[5]; |
|
int j; |
|
GString *name; |
|
FILE *f; |
|
|
|
nDigits = nBits / 4; |
|
pst = new PSTokenizer(getCharFunc, data); |
|
pst->getToken(tok1, sizeof(tok1), &n1); |
|
while (pst->getToken(tok2, sizeof(tok2), &n2)) { |
|
if (!strcmp(tok2, "usecmap")) { |
|
if (tok1[0] == '/') { |
|
name = new GString(tok1 + 1); |
|
if ((f = globalParams->findToUnicodeFile(name))) { |
|
parseCMap1(&getCharFromFile, f, nBits); |
|
fclose(f); |
|
} else { |
|
error(-1, "Couldn't find ToUnicode CMap file for '%s'", |
|
name->getCString()); |
|
} |
|
delete name; |
|
} |
|
pst->getToken(tok1, sizeof(tok1), &n1); |
|
} else if (!strcmp(tok2, "beginbfchar")) { |
|
while (pst->getToken(tok1, sizeof(tok1), &n1)) { |
|
if (!strcmp(tok1, "endbfchar")) { |
|
break; |
|
} |
|
if (!pst->getToken(tok2, sizeof(tok2), &n2) || |
|
!strcmp(tok2, "endbfchar")) { |
|
error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); |
|
break; |
|
} |
|
if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && |
|
tok2[0] == '<' && tok2[n2 - 1] == '>')) { |
|
error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); |
|
continue; |
|
} |
|
tok1[n1 - 1] = tok2[n2 - 1] = '\0'; |
|
if (sscanf(tok1 + 1, "%x", &code1) != 1) { |
|
error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); |
|
continue; |
|
} |
|
if (code1 >= mapLen) { |
|
oldLen = mapLen; |
|
mapLen = (code1 + 256) & ~255; |
|
map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode)); |
|
for (i = oldLen; i < mapLen; ++i) { |
|
map[i] = 0; |
|
} |
|
} |
|
if (n2 == 6) { |
|
if (sscanf(tok2 + 1, "%x", &u) != 1) { |
|
error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); |
|
continue; |
|
} |
|
map[code1] = u; |
|
} else { |
|
map[code1] = 0; |
|
if (sMapLen == sMapSize) { |
|
sMapSize += 8; |
|
sMap = (CharCodeToUnicodeString *) |
|
grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString)); |
|
} |
|
sMap[sMapLen].c = code1; |
|
sMap[sMapLen].len = (n2 - 2) / 4; |
|
for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) { |
|
strncpy(uHex, tok2 + 1 + j*4, 4); |
|
uHex[4] = '\0'; |
|
if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { |
|
error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); |
|
} |
|
} |
|
++sMapLen; |
|
} |
|
} |
|
pst->getToken(tok1, sizeof(tok1), &n1); |
|
} else if (!strcmp(tok2, "beginbfrange")) { |
|
while (pst->getToken(tok1, sizeof(tok1), &n1)) { |
|
if (!strcmp(tok1, "endbfrange")) { |
|
break; |
|
} |
|
if (!pst->getToken(tok2, sizeof(tok2), &n2) || |
|
!strcmp(tok2, "endbfrange") || |
|
!pst->getToken(tok3, sizeof(tok3), &n3) || |
|
!strcmp(tok3, "endbfrange")) { |
|
error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); |
|
break; |
|
} |
|
if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && |
|
n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' && |
|
tok3[0] == '<' && tok3[n3 - 1] == '>')) { |
|
error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); |
|
continue; |
|
} |
|
tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0'; |
|
if (sscanf(tok1 + 1, "%x", &code1) != 1 || |
|
sscanf(tok2 + 1, "%x", &code2) != 1) { |
|
error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); |
|
continue; |
|
} |
|
if (code2 >= mapLen) { |
|
oldLen = mapLen; |
|
mapLen = (code2 + 256) & ~255; |
|
map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode)); |
|
for (i = oldLen; i < mapLen; ++i) { |
|
map[i] = 0; |
|
} |
|
} |
|
if (n3 <= 6) { |
|
if (sscanf(tok3 + 1, "%x", &u) != 1) { |
|
error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); |
|
continue; |
|
} |
|
for (; code1 <= code2; ++code1) { |
|
map[code1] = u++; |
|
} |
|
} else { |
|
if (sMapLen + (int)(code2 - code1 + 1) > sMapSize) { |
|
sMapSize = (sMapSize + (code2 - code1 + 1) + 7) & ~7; |
|
sMap = (CharCodeToUnicodeString *) |
|
grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString)); |
|
} |
|
for (i = 0; code1 <= code2; ++code1, ++i) { |
|
map[code1] = 0; |
|
sMap[sMapLen].c = code1; |
|
sMap[sMapLen].len = (n3 - 2) / 4; |
|
for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) { |
|
strncpy(uHex, tok3 + 1 + j*4, 4); |
|
uHex[4] = '\0'; |
|
if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { |
|
error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); |
|
} |
|
} |
|
sMap[sMapLen].u[sMap[sMapLen].len - 1] += i; |
|
++sMapLen; |
|
} |
|
} |
|
} |
|
pst->getToken(tok1, sizeof(tok1), &n1); |
|
} else { |
|
strcpy(tok1, tok2); |
|
} |
|
} |
|
delete pst; |
|
} |
|
|
|
CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) { |
|
CharCode i; |
|
|
|
collection = collectionA; |
|
mapLen = 256; |
|
map = (Unicode *)gmalloc(mapLen * sizeof(Unicode)); |
|
for (i = 0; i < mapLen; ++i) { |
|
map[i] = 0; |
|
} |
|
sMap = NULL; |
|
sMapLen = sMapSize = 0; |
|
refCnt = 1; |
|
} |
|
|
|
CharCodeToUnicode::CharCodeToUnicode(GString *collectionA, Unicode *mapA, |
|
CharCode mapLenA, GBool copyMap, |
|
CharCodeToUnicodeString *sMapA, |
|
int sMapLenA) { |
|
collection = collectionA; |
|
mapLen = mapLenA; |
|
if (copyMap) { |
|
map = (Unicode *)gmalloc(mapLen * sizeof(Unicode)); |
|
memcpy(map, mapA, mapLen * sizeof(Unicode)); |
|
} else { |
|
map = mapA; |
|
} |
|
sMap = sMapA; |
|
sMapLen = sMapSize = sMapLenA; |
|
refCnt = 1; |
|
} |
|
|
|
CharCodeToUnicode::~CharCodeToUnicode() { |
|
if (collection) { |
|
delete collection; |
|
} |
|
gfree(map); |
|
if (sMap) { |
|
gfree(sMap); |
|
} |
|
} |
|
|
|
void CharCodeToUnicode::incRefCnt() { |
|
++refCnt; |
|
} |
|
|
|
void CharCodeToUnicode::decRefCnt() { |
|
if (--refCnt == 0) { |
|
delete this; |
|
} |
|
} |
|
|
|
GBool CharCodeToUnicode::match(GString *collectionA) { |
|
return collection && !collection->cmp(collectionA); |
|
} |
|
|
|
int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) { |
|
int i, j; |
|
|
|
if (c >= mapLen) { |
|
return 0; |
|
} |
|
if (map[c]) { |
|
u[0] = map[c]; |
|
return 1; |
|
} |
|
for (i = 0; i < sMapLen; ++i) { |
|
if (sMap[i].c == c) { |
|
for (j = 0; j < sMap[i].len && j < size; ++j) { |
|
u[j] = sMap[i].u[j]; |
|
} |
|
return j; |
|
} |
|
} |
|
return 0; |
|
} |
|
|
|
//------------------------------------------------------------------------ |
|
|
|
CIDToUnicodeCache::CIDToUnicodeCache() { |
|
int i; |
|
|
|
for (i = 0; i < cidToUnicodeCacheSize; ++i) { |
|
cache[i] = NULL; |
|
} |
|
} |
|
|
|
CIDToUnicodeCache::~CIDToUnicodeCache() { |
|
int i; |
|
|
|
for (i = 0; i < cidToUnicodeCacheSize; ++i) { |
|
if (cache[i]) { |
|
cache[i]->decRefCnt(); |
|
} |
|
} |
|
} |
|
|
|
CharCodeToUnicode *CIDToUnicodeCache::getCIDToUnicode(GString *collection) { |
|
CharCodeToUnicode *ctu; |
|
int i, j; |
|
|
|
if (cache[0] && cache[0]->match(collection)) { |
|
cache[0]->incRefCnt(); |
|
return cache[0]; |
|
} |
|
for (i = 1; i < cidToUnicodeCacheSize; ++i) { |
|
if (cache[i] && cache[i]->match(collection)) { |
|
ctu = cache[i]; |
|
for (j = i; j >= 1; --j) { |
|
cache[j] = cache[j - 1]; |
|
} |
|
cache[0] = ctu; |
|
ctu->incRefCnt(); |
|
return ctu; |
|
} |
|
} |
|
if ((ctu = CharCodeToUnicode::parseCIDToUnicode(collection))) { |
|
if (cache[cidToUnicodeCacheSize - 1]) { |
|
cache[cidToUnicodeCacheSize - 1]->decRefCnt(); |
|
} |
|
for (j = cidToUnicodeCacheSize - 1; j >= 1; --j) { |
|
cache[j] = cache[j - 1]; |
|
} |
|
cache[0] = ctu; |
|
ctu->incRefCnt(); |
|
return ctu; |
|
} |
|
return NULL; |
|
}
|
|
|