You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1184 lines
35 KiB
1184 lines
35 KiB
/* -*- mode: c; indent-tabs-mode: nil; -*- |
|
* $Id: unpluck.c,v 1.12 2003/12/28 20:59:21 chrish Exp $ |
|
* |
|
* unpluck -- a library to read Plucker data files |
|
* Copyright (c) 2002, Bill Janssen |
|
* |
|
* This program is free software; you can redistribute it and/or |
|
* modify it under the terms of the GNU General Public License |
|
* as published by the Free Software Foundation; either version 2 |
|
* of the License, or (at your option) any later version. |
|
* |
|
* This program is distributed in the hope that it will be useful, |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
* GNU General Public License for more details. |
|
* |
|
* You should have received a copy of the GNU General Public License |
|
* along with this program; if not, write to the Free Software |
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
|
* |
|
*/ |
|
|
|
#if !defined(WIN32) |
|
#include <unistd.h> /* for lseek, etc. */ |
|
#else |
|
#include <io.h> |
|
#endif |
|
#include <stdlib.h> |
|
#include <sys/types.h> |
|
#include <sys/stat.h> /* for fstat() */ |
|
#include <string.h> /* for strndup() */ |
|
#include <errno.h> /* for errno */ |
|
#include <fcntl.h> /* for O_RDONLY */ |
|
#include <assert.h> /* for assert() */ |
|
|
|
#include <zlib.h> |
|
|
|
#include "unpluck.h" |
|
#include "unpluckint.h" |
|
|
|
|
|
/***********************************************************************/ |
|
/***********************************************************************/ |
|
/***** *****/ |
|
/***** Decompression code (taken from the Plucker PalmOS viewer *****/ |
|
/***** sources, Copyright (c) 1998-2002, by Mark Ian Lillywhite *****/ |
|
/***** and Michael Nordström, also under the GPL) *****/ |
|
/***** *****/ |
|
/***********************************************************************/ |
|
/***********************************************************************/ |
|
|
|
/* uncompress DOC compressed document/image */ |
|
static unsigned int UncompressDOC |
|
( |
|
unsigned char* src, /* in: compressed document */ |
|
unsigned int src_len, /* in: size of compressed document */ |
|
unsigned char* dest, /* out: buffer to put uncompressed |
|
document in */ |
|
unsigned int dest_len /* out: size of buffer to put uncompressed |
|
document in */ |
|
) |
|
{ |
|
unsigned int offset; |
|
unsigned int src_index; |
|
unsigned int dest_index; |
|
|
|
assert (src != NULL && src_len != 0 && dest != NULL && dest_len != 0); |
|
|
|
offset = 0; |
|
src_index = 0; |
|
dest_index = 0; |
|
memset (dest, 0, dest_len); |
|
|
|
while (src_index < src_len) { |
|
unsigned int token; |
|
|
|
token = (unsigned int) src[src_index++]; |
|
if (0 < token && token < 9) { |
|
while (token != 0) { |
|
dest[dest_index++] = src[src_index++]; |
|
token--; |
|
} |
|
} |
|
else if (token < 0x80) { |
|
dest[dest_index++] = token; |
|
} |
|
else if (0xc0 <= token) { |
|
dest[dest_index++] = ' '; |
|
dest[dest_index++] = token ^ 0x80; |
|
} |
|
else { |
|
int m; |
|
int n; |
|
|
|
token *= 256; |
|
token += src[src_index++]; |
|
|
|
m = (token & 0x3fff) / 8; |
|
n = token & 7; |
|
n += 3; |
|
while (n != 0) { |
|
dest[dest_index] = dest[dest_index - m]; |
|
dest_index++; |
|
n--; |
|
} |
|
} |
|
} |
|
assert (src_index == src_len && dest_index == dest_len); |
|
|
|
return 1; |
|
} |
|
|
|
/* uncompress ZLib compressed document/image */ |
|
static unsigned int UncompressZLib |
|
( |
|
unsigned char* src, /* in: compressed document */ |
|
unsigned int src_len, /* in: size of compressed document */ |
|
unsigned char* dest, /* out: buffer to put uncompressed |
|
document in */ |
|
unsigned int dest_len, /* out: size of buffer to put uncompressed |
|
document in */ |
|
unsigned char* owner_id /* in: owner-id key */ |
|
) |
|
{ |
|
z_stream z; |
|
unsigned int err; |
|
unsigned int keylen; |
|
unsigned int i; |
|
unsigned char keybuf[OWNER_ID_HASH_LEN]; |
|
|
|
assert (src != NULL && src_len != 0 && dest != NULL && dest_len != 0); |
|
|
|
keylen = (owner_id == NULL) ? 0 : MIN (src_len, OWNER_ID_HASH_LEN); |
|
|
|
memset (&z, 0, sizeof z); |
|
|
|
if (owner_id != NULL) { |
|
|
|
for (i = 0; i < keylen; i++) |
|
keybuf[i] = src[i] ^ owner_id[i]; |
|
z.next_in = keybuf; |
|
z.avail_in = keylen; |
|
|
|
} |
|
else { |
|
|
|
z.next_in = src; |
|
z.avail_in = src_len; |
|
|
|
} |
|
|
|
z.next_out = dest; |
|
z.avail_out = dest_len; |
|
|
|
err = inflateInit (&z); |
|
if (err != Z_OK) { |
|
return err; |
|
} |
|
|
|
do { |
|
if (z.avail_in == 0 && keylen > 0) { |
|
z.next_in = src + keylen; |
|
z.avail_in = src_len - keylen; |
|
} |
|
|
|
err = inflate (&z, Z_SYNC_FLUSH); |
|
|
|
} while (err == Z_OK); |
|
|
|
if (err != Z_STREAM_END) |
|
return err; |
|
|
|
assert (z.total_out == dest_len); |
|
|
|
return inflateEnd (&z); |
|
} |
|
|
|
/***********************************************************************/ |
|
/***********************************************************************/ |
|
/***** *****/ |
|
/***** "Open" the DB (read the headers and parse the various *****/ |
|
/***** metadata, like URLs, default categories, charsets, etc.) *****/ |
|
/***** *****/ |
|
/***********************************************************************/ |
|
/***********************************************************************/ |
|
|
|
static void FreePluckerDoc |
|
( |
|
plkr_Document* doc |
|
) |
|
{ |
|
if (doc->name != NULL) |
|
free (doc->name); |
|
if (doc->title != NULL) |
|
free (doc->title); |
|
if (doc->author != NULL) |
|
free (doc->author); |
|
if (doc->records != NULL) { |
|
int i; |
|
for (i = 0; i < doc->nrecords; i++) { |
|
if (doc->records[i].cache != NULL) |
|
free (doc->records[i].cache); |
|
} |
|
free (doc->records); |
|
} |
|
if (doc->urls != NULL) |
|
free (doc->urls); |
|
if (doc->handle != NULL) |
|
doc->handle->free (doc->handle); |
|
free (doc); |
|
} |
|
|
|
static plkr_DataRecord* FindRecordByIndex |
|
( |
|
plkr_Document* doc, |
|
int record_index |
|
) |
|
{ |
|
int imin; |
|
int imax; |
|
int itest; |
|
|
|
for (imin = 0, imax = doc->nrecords; imin < imax;) { |
|
itest = imin + (imax - imin) / 2; |
|
/* _plkr_message("imin = %2d, imax = %2d, itest = %2d (%2d), record_index = %2d", |
|
imin, imax, itest, doc->records[itest].uid, record_index); */ |
|
if (doc->records[itest].uid == record_index) |
|
return &doc->records[itest]; |
|
else if (record_index > doc->records[itest].uid) |
|
imin = itest + 1; |
|
else if (record_index < doc->records[itest].uid) |
|
imax = itest; |
|
} |
|
return NULL; |
|
} |
|
|
|
static int GetUncompressedRecord |
|
( |
|
plkr_Document* doc, |
|
plkr_DBHandle handle, |
|
int record_index, |
|
unsigned char* buffer, |
|
int buffer_size, |
|
plkr_DataRecordType expected_type, |
|
unsigned char** buffer_out, |
|
int* buffer_size_out, |
|
plkr_DataRecord** record_out |
|
) |
|
{ |
|
/* read whole data record, including header, into buffer. If some part of the |
|
record is compressed, uncompress it. If "buffer" is NULL, allocate enough |
|
bytes to fit. Returns TRUE if read is successful, and sets "buffer_out" and |
|
"buffer_size_out" and "record_out" on successful return. */ |
|
|
|
plkr_DataRecord* record; |
|
unsigned char* tbuffer = buffer; |
|
int size_needed; |
|
int blen = buffer_size; |
|
|
|
record = FindRecordByIndex (doc, record_index); |
|
if (record == NULL) { |
|
_plkr_message ("No record with index %d", record_index); |
|
return FALSE; |
|
}; |
|
if (expected_type != PLKR_DRTYPE_NONE && record->type != expected_type) { |
|
_plkr_message ("Record %d has unexpected type %d; expected %d", |
|
record_index, record->type, expected_type); |
|
return FALSE; |
|
} |
|
|
|
/* figure size needed */ |
|
size_needed = record->uncompressed_size + 8; |
|
if ((record->type == PLKR_DRTYPE_TEXT_COMPRESSED) |
|
|| (record->type == PLKR_DRTYPE_TEXT)) |
|
size_needed += 4 * record->nparagraphs; |
|
|
|
if (!buffer) { |
|
if (buffer_out == NULL) { |
|
_plkr_message ("No output buffer"); |
|
return FALSE; |
|
} |
|
else if (record->cache) { |
|
tbuffer = record->cache; |
|
size_needed = record->cached_size; |
|
} |
|
else { |
|
tbuffer = (unsigned char *) malloc (size_needed); |
|
blen = size_needed; |
|
} |
|
} |
|
else { |
|
tbuffer = buffer; |
|
if (buffer_size < size_needed) { |
|
_plkr_message ("Buffer too small; needs %d", size_needed); |
|
return FALSE; |
|
} |
|
else if (record->cache) { |
|
memcpy (buffer, record->cache, record->cached_size); |
|
size_needed = record->cached_size; |
|
} |
|
} |
|
|
|
if (!record->cache) { |
|
|
|
if ((record->type == PLKR_DRTYPE_TEXT_COMPRESSED) || |
|
(record->type == PLKR_DRTYPE_IMAGE_COMPRESSED) || |
|
(record->type == PLKR_DRTYPE_TABLE_COMPRESSED) || |
|
(record->type == PLKR_DRTYPE_GLYPHPAGE) || |
|
(record->type == PLKR_DRTYPE_LINKS_COMPRESSED)) { |
|
|
|
unsigned char *start_of_data, *output_ptr; |
|
int len_of_data, buffer_remaining, buf_to_use; |
|
unsigned char *buf = (unsigned char*)malloc (record->size); |
|
|
|
if (!handle->seek (handle, record->offset) || |
|
(handle->read (handle, buf, record->size, record->size) != |
|
record->size)) { |
|
_plkr_message |
|
("Bad read from DBHandle while reading record %d", |
|
record->uid); |
|
free (buf); |
|
if (tbuffer != buffer) |
|
free (tbuffer); |
|
return FALSE; |
|
} |
|
|
|
#if 0 |
|
_plkr_message |
|
("data record %d (%d): uid is %d, # paras = %d, size = %d, type = %d", |
|
record_index, record->size, (buf[0] << 8) + buf[1], |
|
(buf[2] << 8) + buf[3], (buf[4] << 8) + buf[5], buf[6]); |
|
#endif |
|
|
|
memcpy (tbuffer, buf, 8); |
|
output_ptr = tbuffer + 8; |
|
buffer_remaining = blen - 8; |
|
start_of_data = buf + 8; |
|
len_of_data = record->size - 8; |
|
if (record->type == PLKR_DRTYPE_TEXT_COMPRESSED) { |
|
/* skip over the paragraph headers */ |
|
memcpy (output_ptr, start_of_data, |
|
4 * record->nparagraphs); |
|
start_of_data += (4 * record->nparagraphs); |
|
len_of_data -= (4 * record->nparagraphs); |
|
output_ptr += (4 * record->nparagraphs); |
|
buffer_remaining -= (4 * record->nparagraphs); |
|
} |
|
|
|
buf_to_use = size_needed - (start_of_data - buf); |
|
if (doc->compression == PLKR_COMPRESSION_ZLIB) { |
|
if (UncompressZLib (start_of_data, len_of_data, output_ptr, |
|
buf_to_use, |
|
(doc->owner_id_required ? doc-> |
|
owner_id_key : NULL)) != Z_OK) { |
|
_plkr_message ("Bad Zlib uncompress of record %d", |
|
record_index); |
|
free (buf); |
|
if (tbuffer != buffer) |
|
free (tbuffer); |
|
return FALSE; |
|
}; |
|
} |
|
else if (doc->compression == PLKR_COMPRESSION_DOC) { |
|
if (UncompressDOC (start_of_data, len_of_data, output_ptr, |
|
buf_to_use) != 1) { |
|
_plkr_message ("Bad DOC uncompress of record %d", |
|
record_index); |
|
free (buf); |
|
if (tbuffer != buffer) |
|
free (tbuffer); |
|
return FALSE; |
|
}; |
|
} |
|
free (buf); |
|
} |
|
else { |
|
/* all the record types which don't use compression */ |
|
if (!handle->seek (handle, record->offset) || |
|
(handle->read (handle, tbuffer, blen, size_needed) != |
|
size_needed)) { |
|
_plkr_message |
|
("Bad read from DBHandle while reading record %d", |
|
record->uid); |
|
if (tbuffer != buffer) |
|
free (tbuffer); |
|
return FALSE; |
|
} |
|
} |
|
} |
|
|
|
if (record_out) |
|
*record_out = record; |
|
if (buffer_out) |
|
*buffer_out = tbuffer; |
|
if (buffer_size_out) |
|
*buffer_size_out = size_needed; |
|
return TRUE; |
|
} |
|
|
|
static int ParseCategories |
|
( |
|
plkr_Document* newdoc, |
|
plkr_DBHandle handle |
|
) |
|
{ |
|
struct _plkr_CategoryName* categories; |
|
struct _plkr_CategoryName* newc; |
|
plkr_DataRecord *record; |
|
unsigned char* buf; |
|
unsigned char* ptr; |
|
int bufsize; |
|
|
|
if (GetUncompressedRecord |
|
(newdoc, handle, newdoc->default_category_record_uid, NULL, 0, |
|
PLKR_DRTYPE_CATEGORY, &buf, &bufsize, &record)) { |
|
/* keep the record data, since the list of char * ptrs will point into it */ |
|
record->cache = buf; |
|
record->cached_size = bufsize; |
|
categories = NULL; |
|
for (ptr = buf + 8; (ptr - buf) < bufsize;) { |
|
newc = (struct _plkr_CategoryName *) |
|
malloc (sizeof (struct _plkr_CategoryName)); |
|
newc->next = categories; |
|
categories = newc; |
|
newc->name = (char*)ptr; |
|
ptr += (strlen ((char*)ptr) + 1); |
|
} |
|
newdoc->default_categories = categories; |
|
return TRUE; |
|
} |
|
else { |
|
return FALSE; |
|
} |
|
} |
|
|
|
|
|
static int ParseMetadata |
|
( |
|
plkr_Document* newdoc, |
|
plkr_DBHandle handle |
|
) |
|
{ |
|
unsigned char* buf; |
|
unsigned char* ptr; |
|
int bufsize; |
|
int nsubrecords; |
|
int typecode; |
|
int subrecord_length; |
|
int i; |
|
|
|
if (!GetUncompressedRecord |
|
(newdoc, handle, newdoc->metadata_record_uid, NULL, 0, |
|
PLKR_DRTYPE_METADATA, &buf, &bufsize, NULL)) { |
|
return FALSE; |
|
} |
|
else { |
|
nsubrecords = (buf[8] << 8) + buf[9]; |
|
for (i = 0, ptr = buf + 10; i < nsubrecords; i++) { |
|
typecode = (ptr[0] << 8) + ptr[1]; |
|
subrecord_length = ((ptr[2] << 8) + ptr[3]) * 2; |
|
|
|
if (typecode == PLKR_MDTYPE_DEFAULTCHARSET) { |
|
|
|
newdoc->default_charset_mibenum = (ptr[4] << 8) + ptr[5]; |
|
ptr += 6; |
|
|
|
} |
|
else if (typecode == PLKR_MDTYPE_EXCEPTCHARSETS) { |
|
|
|
int i, n, record_id, mibenum; |
|
plkr_DataRecord *record; |
|
|
|
ptr += 4; |
|
for (i = 0, n = subrecord_length / 4; i < n; i++, ptr += 4) { |
|
record_id = (ptr[0] << 8) + ptr[1]; |
|
mibenum = (ptr[2] << 8) + ptr[3]; |
|
record = FindRecordByIndex (newdoc, record_id); |
|
if (record == NULL) { |
|
_plkr_message ("Can't find record with id %d", |
|
record_id); |
|
free (buf); |
|
return FALSE; |
|
} |
|
record->charset_mibenum = mibenum; |
|
} |
|
|
|
} |
|
else if (typecode == PLKR_MDTYPE_OWNERIDCRC) { |
|
|
|
newdoc->owner_id_required = TRUE; |
|
ptr += 8; |
|
|
|
} |
|
else if (typecode == PLKR_MDTYPE_AUTHOR) { |
|
|
|
newdoc->author = _plkr_strndup ((char*)( ptr + 4 ), subrecord_length); |
|
ptr += (4 + subrecord_length); |
|
|
|
} |
|
else if (typecode == PLKR_MDTYPE_TITLE) { |
|
|
|
newdoc->title = _plkr_strndup ((char*)( ptr + 4 ), subrecord_length); |
|
ptr += (4 + subrecord_length); |
|
|
|
} |
|
else if (typecode == PLKR_MDTYPE_PUBLICATIONTIME) { |
|
|
|
newdoc->publication_time = |
|
READ_BIGENDIAN_LONG (ptr + 4) - PLKR_TIMEADJUST; |
|
ptr += 8; |
|
|
|
} |
|
else { |
|
_plkr_message |
|
("Bad metadata typecode %d encountered in metadata record", |
|
typecode); |
|
free (buf); |
|
return FALSE; |
|
} |
|
} |
|
free (buf); |
|
return TRUE; |
|
} |
|
} |
|
|
|
|
|
static int ParseURLs |
|
( |
|
plkr_Document* newdoc, |
|
plkr_DBHandle handle |
|
) |
|
{ |
|
plkr_DataRecord* record; |
|
unsigned char* buf; |
|
unsigned char* ptr; |
|
char** urls; |
|
int id; |
|
int i; |
|
int n; |
|
int count; |
|
int nurls; |
|
int bufsize; |
|
|
|
struct url_index_record { |
|
int last_url_index; |
|
int record_id; |
|
} *records; |
|
|
|
buf = NULL; |
|
urls = NULL; |
|
records = NULL; |
|
|
|
if (!GetUncompressedRecord |
|
(newdoc, handle, newdoc->urls_index_record_uid, NULL, 0, |
|
PLKR_DRTYPE_LINKS_INDEX, &buf, &bufsize, NULL)) { |
|
return FALSE; |
|
} |
|
else { |
|
n = ((buf[4] << 8) + buf[5]) / 4; |
|
records = |
|
(struct url_index_record *) malloc (n * sizeof (*records)); |
|
for (i = 0, nurls = 0; i < n; i++) { |
|
ptr = buf + 8 + (i * 4); |
|
records[i].last_url_index = (ptr[0] << 8) + ptr[1]; |
|
records[i].record_id = (ptr[2] << 8) + ptr[3]; |
|
#ifdef DEBUGURLS |
|
_plkr_message ("index %3d: last = %d, record_id = %d", i, |
|
records[i].last_url_index, |
|
records[i].record_id); |
|
#endif /* def DEBUGURLS */ |
|
nurls = MAX (nurls, records[i].last_url_index); |
|
} |
|
free (buf); |
|
buf = NULL; |
|
} |
|
|
|
urls = (char **) malloc (nurls * sizeof (char *)); |
|
memset (urls, 0, nurls * sizeof (char *)); |
|
|
|
for (count = 0, i = 0; i < n; i++) { |
|
|
|
id = records[i].record_id; |
|
if (!GetUncompressedRecord (newdoc, handle, id, |
|
NULL, 0, PLKR_DRTYPE_NONE, &buf, |
|
&bufsize, &record)) { |
|
goto errout4; |
|
} |
|
if (record->type != PLKR_DRTYPE_LINKS |
|
&& record->type != PLKR_DRTYPE_LINKS_COMPRESSED) { |
|
_plkr_message ("Supposed URLs record has bad type %d", |
|
record->type); |
|
goto errout4; |
|
} |
|
record->cache = buf; |
|
record->cached_size = bufsize; |
|
buf = NULL; |
|
for (ptr = record->cache + 8; |
|
(ptr - record->cache) < record->cached_size; |
|
ptr += (strlen ((char*)ptr) + 1)) { |
|
#ifdef DEBUGURLS |
|
_plkr_message ("%3d: %s", count, ptr); |
|
#endif /* def DEBUGURLS */ |
|
assert (count < nurls); |
|
urls[count++] = (char*)ptr; |
|
} |
|
} |
|
free (records); |
|
newdoc->urls = urls; |
|
newdoc->nurls = nurls; |
|
|
|
return TRUE; |
|
|
|
errout4: |
|
if (buf != NULL) |
|
free (buf); |
|
free (urls); |
|
free (records); |
|
return FALSE; |
|
} |
|
|
|
|
|
plkr_Document* plkr_OpenDoc |
|
( |
|
plkr_DBHandle handle |
|
) |
|
{ |
|
ReservedRecordEntry reserved[MAX_RESERVED]; |
|
plkr_DataRecord* record; |
|
plkr_Document* newdoc; |
|
unsigned char utilbuf[128]; |
|
static char id_stamp[9] = "DataPlkr"; |
|
int i; |
|
int nreserved; |
|
int records_size; |
|
int compression; |
|
|
|
|
|
if (!handle->seek (handle, 0) || |
|
(handle->read (handle, utilbuf, sizeof (utilbuf), 78) != 78)) { |
|
_plkr_message ("Bad read of DB header"); |
|
return NULL; |
|
} |
|
|
|
/* check for type stamp */ |
|
if (strncmp ((char *) (utilbuf + 60), id_stamp, 8) != 0) { |
|
_plkr_message ("Bad magic number"); |
|
return NULL; |
|
} |
|
|
|
/* check for version 1 */ |
|
i = (utilbuf[34] << 8) + utilbuf[35]; |
|
if (i != 1) { |
|
_plkr_message ("Not version 1 of Plucker format; version %d", i); |
|
return NULL; |
|
} |
|
|
|
/* get the title, creation time, and last modification time from header */ |
|
newdoc = (plkr_Document *) malloc (sizeof (plkr_Document)); |
|
memset (newdoc, 0, sizeof (plkr_Document)); |
|
newdoc->name = (char*)_plkr_strndup ((char*)utilbuf, MIN (strlen ((char*)utilbuf), 32)); |
|
newdoc->creation_time = (time_t) ((utilbuf[36] << 24) + |
|
(utilbuf[37] << 16) + |
|
(utilbuf[38] << 8) + |
|
utilbuf[39] - PLKR_TIMEADJUST); |
|
newdoc->modification_time = (time_t) ((utilbuf[40] << 24) + |
|
(utilbuf[41] << 16) + |
|
(utilbuf[42] << 8) + |
|
utilbuf[43] - PLKR_TIMEADJUST); |
|
newdoc->nrecords = (utilbuf[76] << 8) + utilbuf[77]; |
|
|
|
/* Now read the record-list to find out where the records are */ |
|
records_size = sizeof (plkr_DataRecord) * newdoc->nrecords; |
|
newdoc->records = (plkr_DataRecord *) malloc (records_size); |
|
memset (newdoc->records, 0, records_size); |
|
for (i = 0; i < newdoc->nrecords; i++) { |
|
if (handle->read (handle, utilbuf, sizeof (utilbuf), 8) != 8) { |
|
_plkr_message ("Bad read of record list"); |
|
FreePluckerDoc (newdoc); |
|
return NULL; |
|
} |
|
newdoc->records[i].offset = |
|
(utilbuf[0] << 24) + (utilbuf[1] << 16) + (utilbuf[2] << 8) + |
|
utilbuf[3]; |
|
} |
|
|
|
/* process the index record */ |
|
if (!handle->seek (handle, newdoc->records[0].offset) || |
|
(handle->read (handle, utilbuf, sizeof (utilbuf), 6) != 6)) { |
|
_plkr_message ("Bad read of index record"); |
|
FreePluckerDoc (newdoc); |
|
return NULL; |
|
} |
|
if ((utilbuf[0] << 8) + utilbuf[1] != 1) { |
|
_plkr_message ("index record has bad UID %d", |
|
(utilbuf[0] << 8) + utilbuf[1]); |
|
FreePluckerDoc (newdoc); |
|
return NULL; |
|
} |
|
newdoc->records[0].uid = 1; |
|
compression = (utilbuf[2] << 8) + utilbuf[3]; |
|
if (compression == PLKR_COMPRESSION_DOC) |
|
newdoc->compression = PLKR_COMPRESSION_DOC; |
|
else if (compression == PLKR_COMPRESSION_ZLIB) |
|
newdoc->compression = PLKR_COMPRESSION_ZLIB; |
|
else { |
|
_plkr_message ("Unknown compression type %d", compression); |
|
FreePluckerDoc (newdoc); |
|
return NULL; |
|
} |
|
nreserved = (utilbuf[4] << 8) + utilbuf[5]; |
|
if (nreserved > MAX_RESERVED) { |
|
_plkr_message ("Too many reserved records (%d) for software", |
|
nreserved); |
|
FreePluckerDoc (newdoc); |
|
return NULL; |
|
} |
|
for (i = 0; i < nreserved; i++) { |
|
if (handle->read (handle, utilbuf, sizeof (utilbuf), 4) != 4) { |
|
_plkr_message ("Bad read of reserved record list"); |
|
FreePluckerDoc (newdoc); |
|
return NULL; |
|
} |
|
reserved[i].name = (ReservedRecordName)( (utilbuf[0] << 8) + utilbuf[1] ); |
|
reserved[i].uid = (utilbuf[2] << 8) + utilbuf[3]; |
|
} |
|
|
|
/* OK, now process the data records */ |
|
newdoc->max_record_size = 0; |
|
for (i = 1; i < newdoc->nrecords; i++) { |
|
record = newdoc->records + i; |
|
if (!handle->seek (handle, record->offset) || |
|
(handle->read (handle, utilbuf, sizeof (utilbuf), 8) != 8)) { |
|
_plkr_message ("Can't read header of record %d", i); |
|
FreePluckerDoc (newdoc); |
|
return NULL; |
|
} |
|
newdoc->records[i - 1].size = |
|
record->offset - newdoc->records[i - 1].offset; |
|
record->uid = (utilbuf[0] << 8) + utilbuf[1]; |
|
record->nparagraphs = (utilbuf[2] << 8) + utilbuf[3]; |
|
record->uncompressed_size = (utilbuf[4] << 8) + utilbuf[5]; |
|
record->type = (plkr_DataRecordType)utilbuf[6]; |
|
newdoc->max_record_size = |
|
MAX (newdoc->max_record_size, record->uncompressed_size); |
|
} |
|
/* To get the size of the last record we subtract its offset from the total size of the DB. */ |
|
if ((i = handle->size (handle)) == 0) { |
|
_plkr_message ("Can't obtain size of DB"); |
|
FreePluckerDoc (newdoc); |
|
return NULL; |
|
}; |
|
record = newdoc->records + (newdoc->nrecords - 1); |
|
record->size = i - record->offset; |
|
/* make sure the uncompressed size is set, now that we know the record sizes */ |
|
for (i = 0; i < newdoc->nrecords; i++) { |
|
record = newdoc->records + i; |
|
if (record->uncompressed_size == 0) { |
|
if (record->type == PLKR_DRTYPE_LINKS_COMPRESSED || |
|
record->type == PLKR_DRTYPE_TEXT_COMPRESSED || |
|
record->type == PLKR_DRTYPE_TABLE_COMPRESSED || |
|
record->type == PLKR_DRTYPE_IMAGE_COMPRESSED) { |
|
_plkr_message ("Bad uncompressed size 0 in record uid %d", |
|
record->uid); |
|
FreePluckerDoc (newdoc); |
|
return NULL; |
|
} |
|
else { |
|
record->uncompressed_size = record->size - 8; |
|
} |
|
} |
|
#ifdef DEBUGOPEN |
|
{ |
|
static char *types[] = |
|
{ "TEXT", "TEXTC", "IMAGE", "IMAGEC", "MAILTO", |
|
"URLINDEX", "URLS", "URLSC", "BOOKMARKS", "CATEGORIES", |
|
"METADATA" |
|
}; |
|
_plkr_message |
|
("%3d: type=%10s, offset=%07x, size=%5d, uncompressed_size=%5d", |
|
record->uid, |
|
types[MIN |
|
(record->type, sizeof (types) / sizeof (char *))], |
|
record->offset, record->size, record->uncompressed_size); |
|
} |
|
#endif |
|
} |
|
|
|
/* find the reserved records */ |
|
|
|
/* do metadata first, to find out whether we need an owner_id key */ |
|
for (i = 0; i < nreserved; i++) { |
|
if (reserved[i].name == PLKR_METADATA_NAME) { |
|
newdoc->metadata_record_uid = reserved[i].uid; |
|
if (!ParseMetadata (newdoc, handle)) { |
|
_plkr_message ("Error parsing metadata record"); |
|
FreePluckerDoc (newdoc); |
|
return NULL; |
|
} |
|
} |
|
} |
|
|
|
if (newdoc->owner_id_required) { |
|
|
|
/* we need to set up the owner-id key before uncompressing |
|
any records... */ |
|
|
|
char *owner_id = plkr_GetConfigString (NULL, "owner_id", NULL); |
|
|
|
if (owner_id != NULL) { |
|
unsigned long crc; |
|
int owner_id_len = strlen (owner_id); |
|
crc = crc32 (0L, NULL, 0); |
|
crc = crc32 (crc, (const Bytef*)owner_id, owner_id_len); |
|
for (i = 0; i < 10; i++) { |
|
crc = crc32 (crc, (const Bytef*)owner_id, owner_id_len); |
|
newdoc->owner_id_key[(i * 4) + 0] = (unsigned char)((crc >> 24) & 0xFF); |
|
newdoc->owner_id_key[(i * 4) + 1] = (unsigned char)((crc >> 16) & 0xFF); |
|
newdoc->owner_id_key[(i * 4) + 2] = (unsigned char)((crc >> 8) & 0xFF); |
|
newdoc->owner_id_key[(i * 4) + 3] = (unsigned char)(crc & 0xFF); |
|
} |
|
} |
|
else { |
|
_plkr_message ("Document requires owner-id to open"); |
|
FreePluckerDoc (newdoc); |
|
return NULL; |
|
} |
|
} |
|
|
|
/* now do the rest of the reserved records */ |
|
|
|
for (i = 0; i < nreserved; i++) { |
|
if (reserved[i].name == PLKR_HOME_NAME) |
|
newdoc->home_record_uid = reserved[i].uid; |
|
else if (reserved[i].name == PLKR_DEFAULT_CATEGORY_NAME) { |
|
newdoc->default_category_record_uid = reserved[i].uid; |
|
if (!ParseCategories (newdoc, handle)) { |
|
_plkr_message ("Error parsing default-categories record"); |
|
FreePluckerDoc (newdoc); |
|
return NULL; |
|
} |
|
} |
|
else if (reserved[i].name == PLKR_URLS_INDEX_NAME) { |
|
newdoc->urls_index_record_uid = reserved[i].uid; |
|
if (!ParseURLs (newdoc, handle)) { |
|
_plkr_message ("Error parsing URLs records"); |
|
FreePluckerDoc (newdoc); |
|
return NULL; |
|
} |
|
} |
|
} |
|
|
|
newdoc->handle = handle; |
|
|
|
#ifdef DEBUGOPEN |
|
/* test the record fetch by fetching them! */ |
|
for (i = 1; i < newdoc->nrecords; i++) { |
|
plkr_DataRecordType type; |
|
int n; |
|
printf ("==============================================\n" |
|
"record %3d (%d bytes)\n", newdoc->records[i].uid, |
|
newdoc->records[i].size); |
|
(void) plkr_GetRecordBytes (newdoc, newdoc->records[i].uid, &n, |
|
&type); |
|
} |
|
#endif |
|
|
|
return newdoc; |
|
} |
|
|
|
int plkr_GetUidForIndex ( |
|
plkr_Document *doc, |
|
int record_index |
|
) |
|
{ |
|
return doc->records[ record_index ].uid; |
|
} |
|
|
|
void plkr_CloseDoc |
|
( |
|
plkr_Document * doc |
|
) |
|
{ |
|
if (doc == NULL) { |
|
_plkr_message ("Attempt to free NULL doc"); |
|
} |
|
else { |
|
FreePluckerDoc (doc); |
|
} |
|
} |
|
|
|
/***********************************************************************/ |
|
/***********************************************************************/ |
|
/***** *****/ |
|
/***** An implementation of a file-based DBHandle *****/ |
|
/***** *****/ |
|
/***********************************************************************/ |
|
/***********************************************************************/ |
|
|
|
static int FpSeek |
|
( |
|
plkr_DBHandle handle, |
|
long offset |
|
) |
|
{ |
|
long result; |
|
|
|
result = lseek (handle->dbprivate, offset, SEEK_SET); |
|
if (result != offset) { |
|
_plkr_message ("Unable to seek fp %d to offset %d -- %d instead\n", |
|
handle->dbprivate, offset, result); |
|
} |
|
return (result == offset); |
|
} |
|
|
|
static int FpRead |
|
( |
|
plkr_DBHandle handle, |
|
unsigned char* buffer, |
|
int buffersize, |
|
int readsize |
|
) |
|
{ |
|
int result; |
|
|
|
result = |
|
read (handle->dbprivate, buffer, |
|
MIN (buffersize, readsize)); |
|
if (result != readsize) { |
|
_plkr_message |
|
("Unable to read %d bytes from fp %d -- read %d instead\n", |
|
MIN (buffersize, readsize), handle->dbprivate, |
|
result); |
|
} |
|
return (result); |
|
} |
|
|
|
static void FpFree |
|
( |
|
plkr_DBHandle handle |
|
) |
|
{ |
|
int fp = handle->dbprivate; |
|
|
|
if (fp > 0) |
|
close (fp); |
|
} |
|
|
|
static long FpSize |
|
( |
|
plkr_DBHandle handle |
|
) |
|
{ |
|
int fp = handle->dbprivate; |
|
|
|
struct stat buf; |
|
|
|
if (fstat (fp, &buf) != 0) { |
|
_plkr_message ("Can't stat file; errno %d", errno); |
|
return 0; |
|
}; |
|
return buf.st_size; |
|
} |
|
|
|
plkr_Document* plkr_OpenDBFile |
|
( |
|
char* filename |
|
) |
|
{ |
|
plkr_DBHandle handle; |
|
plkr_Document* doc; |
|
int fp; |
|
|
|
#if !defined(WIN32) |
|
fp = open (filename, O_RDONLY); |
|
#else |
|
fp = open (filename, O_RDONLY | O_BINARY); |
|
#endif |
|
if (fp < 0) { |
|
_plkr_message ("Can't open file %s", filename); |
|
return NULL; |
|
} |
|
handle = (plkr_DBHandle) malloc (sizeof (*handle)); |
|
handle->dbprivate = fp; |
|
handle->seek = FpSeek; |
|
handle->read = FpRead; |
|
handle->free = FpFree; |
|
handle->size = FpSize; |
|
doc = plkr_OpenDoc (handle); |
|
if (doc == NULL) |
|
close (fp); |
|
return doc; |
|
} |
|
|
|
/***********************************************************************/ |
|
/***********************************************************************/ |
|
/***** *****/ |
|
/***** Routines to access individual uncompressed records *****/ |
|
/***** *****/ |
|
/***********************************************************************/ |
|
/***********************************************************************/ |
|
|
|
int plkr_CopyRecordBytes |
|
( |
|
plkr_Document* doc, |
|
int record_index, |
|
unsigned char* output_buffer, |
|
int output_buffer_size, |
|
plkr_DataRecordType* type |
|
) { |
|
plkr_DataRecord* record; |
|
int output_size; |
|
|
|
if (!FindRecordByIndex (doc, record_index)) |
|
return 0; |
|
|
|
if (!GetUncompressedRecord (doc, doc->handle, record_index, |
|
output_buffer, output_buffer_size, |
|
PLKR_DRTYPE_NONE, NULL, &output_size, |
|
&record)) |
|
return 0; |
|
else { |
|
*type = record->type; |
|
return output_size; |
|
} |
|
} |
|
|
|
|
|
unsigned char *plkr_GetRecordBytes |
|
( |
|
plkr_Document* doc, |
|
int record_index, |
|
int* size, |
|
plkr_DataRecordType* type |
|
) { |
|
plkr_DataRecord* record; |
|
unsigned char* buf; |
|
|
|
if (!FindRecordByIndex (doc, record_index)) |
|
return NULL; |
|
|
|
if (!GetUncompressedRecord (doc, doc->handle, record_index, |
|
NULL, 0, PLKR_DRTYPE_NONE, |
|
&buf, size, &record)) |
|
return NULL; |
|
else { |
|
if (!record->cache) { |
|
record->cache = buf; |
|
record->cached_size = *size; |
|
} |
|
*type = record->type; |
|
return buf; |
|
} |
|
} |
|
|
|
int plkr_GetHomeRecordID |
|
( |
|
plkr_Document* doc |
|
) |
|
{ |
|
return doc->home_record_uid; |
|
} |
|
|
|
char* plkr_GetName |
|
( |
|
plkr_Document* doc |
|
) |
|
{ |
|
return doc->name; |
|
} |
|
|
|
char* plkr_GetTitle |
|
( |
|
plkr_Document* doc |
|
) |
|
{ |
|
return doc->title; |
|
} |
|
|
|
char* plkr_GetAuthor |
|
( |
|
plkr_Document* doc |
|
) |
|
{ |
|
return doc->author; |
|
} |
|
|
|
int plkr_GetDefaultCharset |
|
( |
|
plkr_Document* doc |
|
) |
|
{ |
|
return doc->default_charset_mibenum; |
|
} |
|
|
|
unsigned long plkr_GetPublicationTime |
|
( |
|
plkr_Document* doc |
|
) |
|
{ |
|
if (doc->publication_time) |
|
return (unsigned long) doc->publication_time; |
|
else |
|
return (unsigned long) doc->creation_time; |
|
} |
|
|
|
plkr_CategoryList plkr_GetDefaultCategories |
|
( |
|
plkr_Document* doc |
|
) |
|
{ |
|
return doc->default_categories; |
|
} |
|
|
|
int plkr_GetRecordCount |
|
( |
|
plkr_Document* doc |
|
) |
|
{ |
|
return doc->nrecords; |
|
} |
|
|
|
int plkr_GetMaxRecordSize |
|
( |
|
plkr_Document* doc |
|
) |
|
{ |
|
return doc->max_record_size; |
|
} |
|
|
|
char* plkr_GetRecordURL |
|
( |
|
plkr_Document * doc, |
|
int record_index |
|
) |
|
{ |
|
if (record_index < 1 || record_index > doc->nurls) |
|
return NULL; |
|
else |
|
return (doc->urls[record_index - 1]); |
|
} |
|
|
|
int plkr_HasRecordWithID |
|
( |
|
plkr_Document* doc, |
|
int record_index |
|
) |
|
{ |
|
return (FindRecordByIndex (doc, record_index) != NULL); |
|
} |
|
|
|
int plkr_GetRecordType |
|
( |
|
plkr_Document* doc, |
|
int record_index |
|
) |
|
{ |
|
plkr_DataRecord* r; |
|
|
|
r = FindRecordByIndex (doc, record_index); |
|
if (r) |
|
return r->type; |
|
else |
|
return PLKR_DRTYPE_NONE; |
|
} |
|
|
|
int plkr_GetRecordCharset |
|
( |
|
plkr_Document* doc, |
|
int record_index |
|
) |
|
{ |
|
plkr_DataRecord* r; |
|
|
|
r = FindRecordByIndex (doc, record_index); |
|
if (r && ((r->type == PLKR_DRTYPE_TEXT_COMPRESSED) |
|
|| (r->type == PLKR_DRTYPE_TEXT))) { |
|
if (r->charset_mibenum == 0) |
|
return doc->default_charset_mibenum; |
|
else |
|
return r->charset_mibenum; |
|
} |
|
else |
|
return 0; |
|
}
|
|
|