okular/generators/plucker/unpluck/unpluck.cpp

/* -*- mode: c; indent-tabs-mode: nil; -*-
    $Id: unpluck.c,v 1.12 2003/12/28 20:59:21 chrish Exp $

    unpluck -- a library to read Plucker data files
    SPDX-FileCopyrightText: 2002 Bill Janssen

    SPDX-License-Identifier: GPL-2.0-or-later

*/

#if !defined(WIN32)
#include <unistd.h> /* for lseek, etc. */
#else
#include <io.h>
#endif
#include <assert.h> /* for assert() */
#include <errno.h>  /* for errno */
#include <fcntl.h>  /* for O_RDONLY */
#include <stdlib.h>
#include <string.h>   /* for strndup() */
#include <sys/stat.h> /* for fstat() */
#include <sys/types.h>

#include <zlib.h>

#include "unpluck.h"
#include "unpluckint.h"

/***********************************************************************/
/***********************************************************************/
/*****                                                             *****/
/*****   Decompression code (taken from the Plucker PalmOS viewer  *****/
/*****   sources, SPDX-FileCopyrightText: 1998-2002 Mark Ian Lillywhite *****/
/*****   and Michael Nordstr<EFBFBD>m, also under the GPL)                *****/
/*****                                                             *****/
/***********************************************************************/
/***********************************************************************/

/* uncompress DOC compressed document/image */
static unsigned int UncompressDOC(unsigned char *src,   /* in:  compressed document */
                                  unsigned int src_len, /* in:  size of compressed document */
                                  unsigned char *dest,  /* out: buffer to put uncompressed
                                                           document in */
                                  unsigned int dest_len /* out: size of buffer to put uncompressed
                                                             document in */
)
{
    //     unsigned int  offset;
    unsigned int src_index;
    unsigned int dest_index;

    assert(src != nullptr && src_len != 0 && dest != nullptr && dest_len != 0);

    //     offset = 0;
    src_index = 0;
    dest_index = 0;
    memset(dest, 0, dest_len);

    while (src_index < src_len) {
        unsigned int token;

        token = (unsigned int)src[src_index++];
        if (0 < token && token < 9) {
            while (token != 0) {
                dest[dest_index++] = src[src_index++];
                token--;
            }
        } else if (token < 0x80) {
            dest[dest_index++] = token;
        } else if (0xc0 <= token) {
            dest[dest_index++] = ' ';
            dest[dest_index++] = token ^ 0x80;
        } else {
            int m;
            int n;

            token *= 256;
            token += src[src_index++];

            m = (token & 0x3fff) / 8;
            n = token & 7;
            n += 3;
            while (n != 0) {
                dest[dest_index] = dest[dest_index - m];
                dest_index++;
                n--;
            }
        }
    }
    assert(src_index == src_len && dest_index == dest_len);

    return 1;
}

/* uncompress ZLib compressed document/image */
static unsigned int UncompressZLib(unsigned char *src,     /* in:  compressed document */
                                   unsigned int src_len,   /* in:  size of compressed document */
                                   unsigned char *dest,    /* out: buffer to put uncompressed
                                                              document in */
                                   unsigned int dest_len,  /* out: size of buffer to put uncompressed
                                                              document in */
                                   unsigned char *owner_id /* in:  owner-id key */
)
{
    z_stream z;
    unsigned int err;
    unsigned int keylen;
    unsigned int i;
    unsigned char keybuf[OWNER_ID_HASH_LEN];

    assert(src != nullptr && src_len != 0 && dest != nullptr && dest_len != 0);

    keylen = (owner_id == nullptr) ? 0 : MIN(src_len, OWNER_ID_HASH_LEN);

    memset(&z, 0, sizeof z);

    if (owner_id != nullptr) {
        for (i = 0; i < keylen; i++)
            keybuf[i] = src[i] ^ owner_id[i];
        z.next_in = keybuf;
        z.avail_in = keylen;

    } else {
        z.next_in = src;
        z.avail_in = src_len;
    }

    z.next_out = dest;
    z.avail_out = dest_len;

    err = inflateInit(&z);
    if (err != Z_OK) {
        return err;
    }

    do {
        if (z.avail_in == 0 && keylen > 0) {
            z.next_in = src + keylen;
            z.avail_in = src_len - keylen;
        }

        err = inflate(&z, Z_SYNC_FLUSH);

    } while (err == Z_OK);

    if (err != Z_STREAM_END)
        return err;

    assert(z.total_out == dest_len);

    return inflateEnd(&z);
}

/***********************************************************************/
/***********************************************************************/
/*****                                                             *****/
/*****   "Open" the DB (read the headers and parse the various     *****/
/*****   metadata, like URLs, default categories, charsets, etc.)  *****/
/*****                                                             *****/
/***********************************************************************/
/***********************************************************************/

static void FreePluckerDoc(plkr_Document *doc)
{
    if (doc->name != nullptr)
        free(doc->name);
    if (doc->title != nullptr)
        free(doc->title);
    if (doc->author != nullptr)
        free(doc->author);
    if (doc->records != nullptr) {
        int i;
        for (i = 0; i < doc->nrecords; i++) {
            if (doc->records[i].cache != nullptr)
                free(doc->records[i].cache);
        }
        free(doc->records);
    }
    if (doc->urls != nullptr)
        free(doc->urls);
    if (doc->handle != nullptr)
        doc->handle->free(doc->handle);
    free(doc);
}

static plkr_DataRecord *FindRecordByIndex(plkr_Document *doc, int record_index)
{
    int imin;
    int imax;
    int itest;

    for (imin = 0, imax = doc->nrecords; imin < imax;) {
        itest = imin + (imax - imin) / 2;
        /* _plkr_message("imin = %2d, imax = %2d, itest = %2d (%2d), record_index = %2d",
           imin, imax, itest, doc->records[itest].uid, record_index); */
        if (doc->records[itest].uid == record_index)
            return &doc->records[itest];
        else if (record_index > doc->records[itest].uid)
            imin = itest + 1;
        else if (record_index < doc->records[itest].uid)
            imax = itest;
    }
    return nullptr;
}

static int
GetUncompressedRecord(plkr_Document *doc, plkr_DBHandle handle, int record_index, unsigned char *buffer, int buffer_size, plkr_DataRecordType expected_type, unsigned char **buffer_out, int *buffer_size_out, plkr_DataRecord **record_out)
{
    /* read whole data record, including header, into buffer.  If some part of the
       record is compressed, uncompress it.  If "buffer" is NULL, allocate enough
       bytes to fit.  Returns TRUE if read is successful, and sets "buffer_out" and
       "buffer_size_out" and "record_out" on successful return. */

    plkr_DataRecord *record;
    unsigned char *tbuffer = buffer;
    int size_needed;
    int blen = buffer_size;

    record = FindRecordByIndex(doc, record_index);
    if (record == nullptr) {
        _plkr_message("No record with index %d", record_index);
        return FALSE;
    };
    if (expected_type != PLKR_DRTYPE_NONE && record->type != expected_type) {
        _plkr_message("Record %d has unexpected type %d; expected %d", record_index, record->type, expected_type);
        return FALSE;
    }

    /* figure size needed */
    size_needed = record->uncompressed_size + 8;
    if ((record->type == PLKR_DRTYPE_TEXT_COMPRESSED) || (record->type == PLKR_DRTYPE_TEXT))
        size_needed += 4 * record->nparagraphs;

    if (!buffer) {
        if (buffer_out == nullptr) {
            _plkr_message("No output buffer");
            return FALSE;
        } else if (record->cache) {
            tbuffer = record->cache;
            size_needed = record->cached_size;
        } else {
            tbuffer = (unsigned char *)malloc(size_needed);
            blen = size_needed;
        }
    } else {
        tbuffer = buffer;
        if (buffer_size < size_needed) {
            _plkr_message("Buffer too small; needs %d", size_needed);
            return FALSE;
        } else if (record->cache) {
            memcpy(buffer, record->cache, record->cached_size);
            size_needed = record->cached_size;
        }
    }

    if (!record->cache) {
        if ((record->type == PLKR_DRTYPE_TEXT_COMPRESSED) || (record->type == PLKR_DRTYPE_IMAGE_COMPRESSED) || (record->type == PLKR_DRTYPE_TABLE_COMPRESSED) || (record->type == PLKR_DRTYPE_GLYPHPAGE) ||
            (record->type == PLKR_DRTYPE_LINKS_COMPRESSED)) {
            unsigned char *start_of_data, *output_ptr;
            int len_of_data, buffer_remaining, buf_to_use;
            unsigned char *buf = (unsigned char *)malloc(record->size);

            if (!handle->seek(handle, record->offset) || (handle->read(handle, buf, record->size, record->size) != record->size)) {
                _plkr_message("Bad read from DBHandle while reading record %d", record->uid);
                free(buf);
                if (tbuffer != buffer)
                    free(tbuffer);
                return FALSE;
            }

            memcpy(tbuffer, buf, 8);
            output_ptr = tbuffer + 8;
            buffer_remaining = blen - 8;
            start_of_data = buf + 8;
            len_of_data = record->size - 8;
            if (record->type == PLKR_DRTYPE_TEXT_COMPRESSED) {
                /* skip over the paragraph headers */
                memcpy(output_ptr, start_of_data, 4 * record->nparagraphs);
                start_of_data += (4 * record->nparagraphs);
                len_of_data -= (4 * record->nparagraphs);
                output_ptr += (4 * record->nparagraphs);
                buffer_remaining -= (4 * record->nparagraphs);
            }

            buf_to_use = size_needed - (start_of_data - buf);
            if (doc->compression == PLKR_COMPRESSION_ZLIB) {
                if (UncompressZLib(start_of_data, len_of_data, output_ptr, buf_to_use, (doc->owner_id_required ? doc->owner_id_key : nullptr)) != Z_OK) {
                    _plkr_message("Bad Zlib uncompress of record %d", record_index);
                    free(buf);
                    if (tbuffer != buffer)
                        free(tbuffer);
                    return FALSE;
                };
            } else if (doc->compression == PLKR_COMPRESSION_DOC) {
                if (UncompressDOC(start_of_data, len_of_data, output_ptr, buf_to_use) != 1) {
                    _plkr_message("Bad DOC uncompress of record %d", record_index);
                    free(buf);
                    if (tbuffer != buffer)
                        free(tbuffer);
                    return FALSE;
                };
            }
            free(buf);
        } else {
            /* all the record types which don't use compression */
            if (!handle->seek(handle, record->offset) || (handle->read(handle, tbuffer, blen, size_needed) != size_needed)) {
                _plkr_message("Bad read from DBHandle while reading record %d", record->uid);
                if (tbuffer != buffer)
                    free(tbuffer);
                return FALSE;
            }
        }
    }

    if (record_out)
        *record_out = record;
    if (buffer_out)
        *buffer_out = tbuffer;
    if (buffer_size_out)
        *buffer_size_out = size_needed;
    return TRUE;
}

static int ParseCategories(plkr_Document *newdoc, plkr_DBHandle handle)
{
    struct _plkr_CategoryName *categories;
    struct _plkr_CategoryName *newc;
    plkr_DataRecord *record;
    unsigned char *buf;
    unsigned char *ptr;
    int bufsize;

    if (GetUncompressedRecord(newdoc, handle, newdoc->default_category_record_uid, nullptr, 0, PLKR_DRTYPE_CATEGORY, &buf, &bufsize, &record)) {
        /* keep the record data, since the list of char * ptrs will point into it */
        record->cache = buf;
        record->cached_size = bufsize;
        categories = nullptr;
        for (ptr = buf + 8; (ptr - buf) < bufsize;) {
            newc = (struct _plkr_CategoryName *)malloc(sizeof(struct _plkr_CategoryName));
            newc->next = categories;
            categories = newc;
            newc->name = (char *)ptr;
            ptr += (strlen((char *)ptr) + 1);
        }
        newdoc->default_categories = categories;
        return TRUE;
    } else {
        return FALSE;
    }
}

static int ParseMetadata(plkr_Document *newdoc, plkr_DBHandle handle)
{
    unsigned char *buf;
    unsigned char *ptr;
    int bufsize;
    int nsubrecords;
    int typecode;
    int subrecord_length;
    int i;

    if (!GetUncompressedRecord(newdoc, handle, newdoc->metadata_record_uid, nullptr, 0, PLKR_DRTYPE_METADATA, &buf, &bufsize, nullptr)) {
        return FALSE;
    } else {
        nsubrecords = (buf[8] << 8) + buf[9];
        for (i = 0, ptr = buf + 10; i < nsubrecords; i++) {
            typecode = (ptr[0] << 8) + ptr[1];
            subrecord_length = ((ptr[2] << 8) + ptr[3]) * 2;

            if (typecode == PLKR_MDTYPE_DEFAULTCHARSET) {
                newdoc->default_charset_mibenum = (ptr[4] << 8) + ptr[5];
                ptr += 6;

            } else if (typecode == PLKR_MDTYPE_EXCEPTCHARSETS) {
                int i, n, record_id, mibenum;
                plkr_DataRecord *record;

                ptr += 4;
                for (i = 0, n = subrecord_length / 4; i < n; i++, ptr += 4) {
                    record_id = (ptr[0] << 8) + ptr[1];
                    mibenum = (ptr[2] << 8) + ptr[3];
                    record = FindRecordByIndex(newdoc, record_id);
                    if (record == nullptr) {
                        _plkr_message("Can't find record with id %d", record_id);
                        free(buf);
                        return FALSE;
                    }
                    record->charset_mibenum = mibenum;
                }

            } else if (typecode == PLKR_MDTYPE_OWNERIDCRC) {
                newdoc->owner_id_required = TRUE;
                ptr += 8;

            } else if (typecode == PLKR_MDTYPE_AUTHOR) {
                newdoc->author = _plkr_strndup((char *)(ptr + 4), subrecord_length);
                ptr += (4 + subrecord_length);

            } else if (typecode == PLKR_MDTYPE_TITLE) {
                newdoc->title = _plkr_strndup((char *)(ptr + 4), subrecord_length);
                ptr += (4 + subrecord_length);

            } else if (typecode == PLKR_MDTYPE_PUBLICATIONTIME) {
                newdoc->publication_time = READ_BIGENDIAN_LONG(ptr + 4) - PLKR_TIMEADJUST;
                ptr += 8;

            } else {
                _plkr_message("Bad metadata typecode %d encountered in metadata record", typecode);
                free(buf);
                return FALSE;
            }
        }
        free(buf);
        return TRUE;
    }
}

static int ParseURLs(plkr_Document *newdoc, plkr_DBHandle handle)
{
    plkr_DataRecord *record;
    unsigned char *buf;
    unsigned char *ptr;
    char **urls;
    int id;
    int i;
    int n;
    int count;
    int nurls;
    int bufsize;

    struct url_index_record {
        int last_url_index;
        int record_id;
    } * records;

    buf = nullptr;
    urls = nullptr;
    records = nullptr;

    if (!GetUncompressedRecord(newdoc, handle, newdoc->urls_index_record_uid, nullptr, 0, PLKR_DRTYPE_LINKS_INDEX, &buf, &bufsize, nullptr)) {
        return FALSE;
    } else {
        n = ((buf[4] << 8) + buf[5]) / 4;
        records = (struct url_index_record *)malloc(n * sizeof(*records));
        for (i = 0, nurls = 0; i < n; i++) {
            ptr = buf + 8 + (i * 4);
            records[i].last_url_index = (ptr[0] << 8) + ptr[1];
            records[i].record_id = (ptr[2] << 8) + ptr[3];
#ifdef DEBUGURLS
            _plkr_message("index %3d:  last = %d, record_id = %d", i, records[i].last_url_index, records[i].record_id);
#endif /* def DEBUGURLS */
            nurls = MAX(nurls, records[i].last_url_index);
        }
        free(buf);
        buf = nullptr;
    }

    urls = (char **)malloc(nurls * sizeof(char *));
    memset(urls, 0, nurls * sizeof(char *));

    for (count = 0, i = 0; i < n; i++) {
        id = records[i].record_id;
        if (!GetUncompressedRecord(newdoc, handle, id, nullptr, 0, PLKR_DRTYPE_NONE, &buf, &bufsize, &record)) {
            goto errout4;
        }
        if (record->type != PLKR_DRTYPE_LINKS && record->type != PLKR_DRTYPE_LINKS_COMPRESSED) {
            _plkr_message("Supposed URLs record has bad type %d", record->type);
            goto errout4;
        }
        record->cache = buf;
        record->cached_size = bufsize;
        buf = nullptr;
        for (ptr = record->cache + 8; (ptr - record->cache) < record->cached_size; ptr += (strlen((char *)ptr) + 1)) {
#ifdef DEBUGURLS
            _plkr_message("%3d:  %s", count, ptr);
#endif /* def DEBUGURLS */
            assert(count < nurls);
            urls[count++] = (char *)ptr;
        }
    }
    free(records);
    newdoc->urls = urls;
    newdoc->nurls = nurls;

    return TRUE;

errout4:
    if (buf != nullptr)
        free(buf);
    free(urls);
    free(records);
    return FALSE;
}

plkr_Document *plkr_OpenDoc(plkr_DBHandle handle)
{
    ReservedRecordEntry reserved[MAX_RESERVED];
    plkr_DataRecord *record;
    plkr_Document *newdoc;
    unsigned char utilbuf[128];
    static char id_stamp[9] = "DataPlkr";
    int i;
    int nreserved;
    int records_size;
    int compression;

    if (!handle->seek(handle, 0) || (handle->read(handle, utilbuf, sizeof(utilbuf), 78) != 78)) {
        _plkr_message("Bad read of DB header");
        return nullptr;
    }

    /* check for type stamp */
    if (strncmp((char *)(utilbuf + 60), id_stamp, 8) != 0) {
        _plkr_message("Bad magic number");
        return nullptr;
    }

    /* check for version 1 */
    i = (utilbuf[34] << 8) + utilbuf[35];
    if (i != 1) {
        _plkr_message("Not version 1 of Plucker format; version %d", i);
        return nullptr;
    }

    /* get the title, creation time, and last modification time from header */
    newdoc = (plkr_Document *)malloc(sizeof(plkr_Document));
    memset(newdoc, 0, sizeof(plkr_Document));
    newdoc->name = (char *)_plkr_strndup((char *)utilbuf, MIN(strlen((char *)utilbuf), 32));
    newdoc->creation_time = (time_t)((utilbuf[36] << 24) + (utilbuf[37] << 16) + (utilbuf[38] << 8) + utilbuf[39] - PLKR_TIMEADJUST);
    newdoc->modification_time = (time_t)((utilbuf[40] << 24) + (utilbuf[41] << 16) + (utilbuf[42] << 8) + utilbuf[43] - PLKR_TIMEADJUST);
    newdoc->nrecords = (utilbuf[76] << 8) + utilbuf[77];

    /* Now read the record-list to find out where the records are */
    records_size = sizeof(plkr_DataRecord) * newdoc->nrecords;
    newdoc->records = (plkr_DataRecord *)malloc(records_size);
    memset(newdoc->records, 0, records_size);
    for (i = 0; i < newdoc->nrecords; i++) {
        if (handle->read(handle, utilbuf, sizeof(utilbuf), 8) != 8) {
            _plkr_message("Bad read of record list");
            FreePluckerDoc(newdoc);
            return nullptr;
        }
        newdoc->records[i].offset = (utilbuf[0] << 24) + (utilbuf[1] << 16) + (utilbuf[2] << 8) + utilbuf[3];
    }

    /* process the index record */
    if (!handle->seek(handle, newdoc->records[0].offset) || (handle->read(handle, utilbuf, sizeof(utilbuf), 6) != 6)) {
        _plkr_message("Bad read of index record");
        FreePluckerDoc(newdoc);
        return nullptr;
    }
    if ((utilbuf[0] << 8) + utilbuf[1] != 1) {
        _plkr_message("index record has bad UID %d", (utilbuf[0] << 8) + utilbuf[1]);
        FreePluckerDoc(newdoc);
        return nullptr;
    }
    newdoc->records[0].uid = 1;
    compression = (utilbuf[2] << 8) + utilbuf[3];
    if (compression == PLKR_COMPRESSION_DOC)
        newdoc->compression = PLKR_COMPRESSION_DOC;
    else if (compression == PLKR_COMPRESSION_ZLIB)
        newdoc->compression = PLKR_COMPRESSION_ZLIB;
    else {
        _plkr_message("Unknown compression type %d", compression);
        FreePluckerDoc(newdoc);
        return nullptr;
    }
    nreserved = (utilbuf[4] << 8) + utilbuf[5];
    if (nreserved > MAX_RESERVED) {
        _plkr_message("Too many reserved records (%d) for software", nreserved);
        FreePluckerDoc(newdoc);
        return nullptr;
    }
    for (i = 0; i < nreserved; i++) {
        if (handle->read(handle, utilbuf, sizeof(utilbuf), 4) != 4) {
            _plkr_message("Bad read of reserved record list");
            FreePluckerDoc(newdoc);
            return nullptr;
        }
        reserved[i].name = (ReservedRecordName)((utilbuf[0] << 8) + utilbuf[1]);
        reserved[i].uid = (utilbuf[2] << 8) + utilbuf[3];
    }

    /* OK, now process the data records */
    newdoc->max_record_size = 0;
    for (i = 1; i < newdoc->nrecords; i++) {
        record = newdoc->records + i;
        if (!handle->seek(handle, record->offset) || (handle->read(handle, utilbuf, sizeof(utilbuf), 8) != 8)) {
            _plkr_message("Can't read header of record %d", i);
            FreePluckerDoc(newdoc);
            return nullptr;
        }
        newdoc->records[i - 1].size = record->offset - newdoc->records[i - 1].offset;
        record->uid = (utilbuf[0] << 8) + utilbuf[1];
        record->nparagraphs = (utilbuf[2] << 8) + utilbuf[3];
        record->uncompressed_size = (utilbuf[4] << 8) + utilbuf[5];
        record->type = (plkr_DataRecordType)utilbuf[6];
        newdoc->max_record_size = MAX(newdoc->max_record_size, record->uncompressed_size);
    }
    /* To get the size of the last record we subtract its offset from the total size of the DB. */
    if ((i = handle->size(handle)) == 0) {
        _plkr_message("Can't obtain size of DB");
        FreePluckerDoc(newdoc);
        return nullptr;
    };
    record = newdoc->records + (newdoc->nrecords - 1);
    record->size = i - record->offset;
    /* make sure the uncompressed size is set, now that we know the record sizes */
    for (i = 0; i < newdoc->nrecords; i++) {
        record = newdoc->records + i;
        if (record->uncompressed_size == 0) {
            if (record->type == PLKR_DRTYPE_LINKS_COMPRESSED || record->type == PLKR_DRTYPE_TEXT_COMPRESSED || record->type == PLKR_DRTYPE_TABLE_COMPRESSED || record->type == PLKR_DRTYPE_IMAGE_COMPRESSED) {
                _plkr_message("Bad uncompressed size 0 in record uid %d", record->uid);
                FreePluckerDoc(newdoc);
                return nullptr;
            } else {
                record->uncompressed_size = record->size - 8;
            }
        }
#ifdef DEBUGOPEN
        {
            static char *types[] = {"TEXT", "TEXTC", "IMAGE", "IMAGEC", "MAILTO", "URLINDEX", "URLS", "URLSC", "BOOKMARKS", "CATEGORIES", "METADATA"};
            _plkr_message("%3d:  type=%10s, offset=%07x, size=%5d, uncompressed_size=%5d", record->uid, types[MIN(record->type, sizeof(types) / sizeof(char *))], record->offset, record->size, record->uncompressed_size);
        }
#endif
    }

    /* find the reserved records */

    /* do metadata first, to find out whether we need an owner_id key */
    for (i = 0; i < nreserved; i++) {
        if (reserved[i].name == PLKR_METADATA_NAME) {
            newdoc->metadata_record_uid = reserved[i].uid;
            if (!ParseMetadata(newdoc, handle)) {
                _plkr_message("Error parsing metadata record");
                FreePluckerDoc(newdoc);
                return nullptr;
            }
        }
    }

    if (newdoc->owner_id_required) {
        /* we need to set up the owner-id key before uncompressing
           any records... */

        char *owner_id = plkr_GetConfigString(nullptr, "owner_id", nullptr);

        if (owner_id != nullptr) {
            unsigned long crc;
            int owner_id_len = strlen(owner_id);
            crc = crc32(0L, nullptr, 0);
            crc = crc32(crc, (const Bytef *)owner_id, owner_id_len);
            for (i = 0; i < 10; i++) {
                crc = crc32(crc, (const Bytef *)owner_id, owner_id_len);
                newdoc->owner_id_key[(i * 4) + 0] = (unsigned char)((crc >> 24) & 0xFF);
                newdoc->owner_id_key[(i * 4) + 1] = (unsigned char)((crc >> 16) & 0xFF);
                newdoc->owner_id_key[(i * 4) + 2] = (unsigned char)((crc >> 8) & 0xFF);
                newdoc->owner_id_key[(i * 4) + 3] = (unsigned char)(crc & 0xFF);
            }
        } else {
            _plkr_message("Document requires owner-id to open");
            FreePluckerDoc(newdoc);
            return nullptr;
        }
    }

    /* now do the rest of the reserved records */

    for (i = 0; i < nreserved; i++) {
        if (reserved[i].name == PLKR_HOME_NAME)
            newdoc->home_record_uid = reserved[i].uid;
        else if (reserved[i].name == PLKR_DEFAULT_CATEGORY_NAME) {
            newdoc->default_category_record_uid = reserved[i].uid;
            if (!ParseCategories(newdoc, handle)) {
                _plkr_message("Error parsing default-categories record");
                FreePluckerDoc(newdoc);
                return nullptr;
            }
        } else if (reserved[i].name == PLKR_URLS_INDEX_NAME) {
            newdoc->urls_index_record_uid = reserved[i].uid;
            if (!ParseURLs(newdoc, handle)) {
                _plkr_message("Error parsing URLs records");
                FreePluckerDoc(newdoc);
                return nullptr;
            }
        }
    }

    newdoc->handle = handle;

#ifdef DEBUGOPEN
    /* test the record fetch by fetching them! */
    for (i = 1; i < newdoc->nrecords; i++) {
        plkr_DataRecordType type;
        int n;
        printf(
            "==============================================\n"
            "record %3d (%d bytes)\n",
            newdoc->records[i].uid,
            newdoc->records[i].size);
        (void)plkr_GetRecordBytes(newdoc, newdoc->records[i].uid, &n, &type);
    }
#endif

    return newdoc;
}

int plkr_GetUidForIndex(plkr_Document *doc, int record_index)
{
    return doc->records[record_index].uid;
}

void plkr_CloseDoc(plkr_Document *doc)
{
    if (doc == nullptr) {
        _plkr_message("Attempt to free NULL doc");
    } else {
        FreePluckerDoc(doc);
    }
}

/***********************************************************************/
/***********************************************************************/
/*****                                                             *****/
/*****         An implementation of a file-based DBHandle          *****/
/*****                                                             *****/
/***********************************************************************/
/***********************************************************************/

static int FpSeek(plkr_DBHandle handle, long offset)
{
    long result;

    result = lseek(handle->dbprivate, offset, SEEK_SET);
    if (result != offset) {
        _plkr_message("Unable to seek fp %d to offset %lu -- %lu instead\n", handle->dbprivate, offset, result);
    }
    return (result == offset);
}

static int FpRead(plkr_DBHandle handle, unsigned char *buffer, int buffersize, int readsize)
{
    int result;

    result = read(handle->dbprivate, buffer, MIN(buffersize, readsize));
    if (result != readsize) {
        _plkr_message("Unable to read %d bytes from fp %d -- read %d instead\n", MIN(buffersize, readsize), handle->dbprivate, result);
    }
    return (result);
}

static void FpFree(plkr_DBHandle handle)
{
    int fp = handle->dbprivate;

    if (fp > 0)
        close(fp);
}

static long FpSize(plkr_DBHandle handle)
{
    int fp = handle->dbprivate;

    struct stat buf;

    if (fstat(fp, &buf) != 0) {
        _plkr_message("Can't stat file; errno %d", errno);
        return 0;
    };
    return buf.st_size;
}

plkr_Document *plkr_OpenDBFile(const char *filename)
{
    plkr_DBHandle handle;
    plkr_Document *doc;
    int fp;

#if !defined(WIN32)
    fp = open(filename, O_RDONLY);
#else
    fp = open(filename, O_RDONLY | O_BINARY);
#endif
    if (fp < 0) {
        _plkr_message("Can't open file %s", filename);
        return nullptr;
    }
    handle = (plkr_DBHandle)malloc(sizeof(*handle));
    handle->dbprivate = fp;
    handle->seek = FpSeek;
    handle->read = FpRead;
    handle->free = FpFree;
    handle->size = FpSize;
    doc = plkr_OpenDoc(handle);
    if (doc == nullptr)
        close(fp);
    return doc;
}

/***********************************************************************/
/***********************************************************************/
/*****                                                             *****/
/*****   Routines to access individual uncompressed records        *****/
/*****                                                             *****/
/***********************************************************************/
/***********************************************************************/

int plkr_CopyRecordBytes(plkr_Document *doc, int record_index, unsigned char *output_buffer, int output_buffer_size, plkr_DataRecordType *type)
{
    plkr_DataRecord *record;
    int output_size;

    if (!FindRecordByIndex(doc, record_index))
        return 0;

    if (!GetUncompressedRecord(doc, doc->handle, record_index, output_buffer, output_buffer_size, PLKR_DRTYPE_NONE, nullptr, &output_size, &record))
        return 0;
    else {
        *type = record->type;
        return output_size;
    }
}

unsigned char *plkr_GetRecordBytes(plkr_Document *doc, int record_index, int *size, plkr_DataRecordType *type)
{
    plkr_DataRecord *record;
    unsigned char *buf;

    if (!FindRecordByIndex(doc, record_index))
        return nullptr;

    if (!GetUncompressedRecord(doc, doc->handle, record_index, nullptr, 0, PLKR_DRTYPE_NONE, &buf, size, &record))
        return nullptr;
    else {
        if (!record->cache) {
            record->cache = buf;
            record->cached_size = *size;
        }
        *type = record->type;
        return buf;
    }
}

int plkr_GetHomeRecordID(plkr_Document *doc)
{
    return doc->home_record_uid;
}

char *plkr_GetName(plkr_Document *doc)
{
    return doc->name;
}

char *plkr_GetTitle(plkr_Document *doc)
{
    return doc->title;
}

char *plkr_GetAuthor(plkr_Document *doc)
{
    return doc->author;
}

int plkr_GetDefaultCharset(plkr_Document *doc)
{
    return doc->default_charset_mibenum;
}

unsigned long plkr_GetPublicationTime(plkr_Document *doc)
{
    if (doc->publication_time)
        return (unsigned long)doc->publication_time;
    else
        return (unsigned long)doc->creation_time;
}

plkr_CategoryList plkr_GetDefaultCategories(plkr_Document *doc)
{
    return doc->default_categories;
}

int plkr_GetRecordCount(plkr_Document *doc)
{
    return doc->nrecords;
}

int plkr_GetMaxRecordSize(plkr_Document *doc)
{
    return doc->max_record_size;
}

char *plkr_GetRecordURL(plkr_Document *doc, int record_index)
{
    if (record_index < 1 || record_index > doc->nurls)
        return nullptr;
    else
        return (doc->urls[record_index - 1]);
}

int plkr_HasRecordWithID(plkr_Document *doc, int record_index)
{
    return (FindRecordByIndex(doc, record_index) != nullptr);
}

int plkr_GetRecordType(plkr_Document *doc, int record_index)
{
    plkr_DataRecord *r;

    r = FindRecordByIndex(doc, record_index);
    if (r)
        return r->type;
    else
        return PLKR_DRTYPE_NONE;
}

int plkr_GetRecordCharset(plkr_Document *doc, int record_index)
{
    plkr_DataRecord *r;

    r = FindRecordByIndex(doc, record_index);
    if (r && ((r->type == PLKR_DRTYPE_TEXT_COMPRESSED) || (r->type == PLKR_DRTYPE_TEXT))) {
        if (r->charset_mibenum == 0)
            return doc->default_charset_mibenum;
        else
            return r->charset_mibenum;
    } else
        return 0;
}