|
|
/* db.c: an external database to avoid filesystem lookups. |
|
|
|
|
|
Copyright (C) 1994, 95, 96, 97 Karl Berry. |
|
|
|
|
|
This library is free software; you can redistribute it and/or |
|
|
modify it under the terms of the GNU Library General Public |
|
|
License as published by the Free Software Foundation; either |
|
|
version 2 of the License, or (at your option) any later version. |
|
|
|
|
|
This library is distributed in the hope that it will be useful, |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
|
Library General Public License for more details. |
|
|
|
|
|
You should have received a copy of the GNU Library General Public |
|
|
License along with this library; if not, write to the Free Software |
|
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ |
|
|
|
|
|
#include <kpathsea/config.h> |
|
|
#include <kpathsea/absolute.h> |
|
|
#include <kpathsea/c-stat.h> |
|
|
#include <kpathsea/c-fopen.h> |
|
|
#include <kpathsea/c-pathch.h> |
|
|
#include <kpathsea/db.h> |
|
|
#include <kpathsea/hash.h> |
|
|
#include <kpathsea/line.h> |
|
|
#include <kpathsea/pathsearch.h> |
|
|
#include <kpathsea/readable.h> |
|
|
#include <kpathsea/str-list.h> |
|
|
#include <kpathsea/tex-file.h> |
|
|
#include <kpathsea/variable.h> |
|
|
|
|
|
static hash_table_type db; /* The hash table for all the ls-R's. */ |
|
|
/* SMALL: The old size of the hash table was 7603, with the assumption |
|
|
that a minimal ls-R bas about 3500 entries. But a typical ls-R will |
|
|
be more like double that size. */ |
|
|
#ifndef DB_HASH_SIZE |
|
|
#define DB_HASH_SIZE 15991 |
|
|
#endif |
|
|
#ifndef DB_NAME |
|
|
#define DB_NAME "ls-R" |
|
|
#endif |
|
|
|
|
|
static hash_table_type alias_db; |
|
|
#ifndef ALIAS_NAME |
|
|
#define ALIAS_NAME "aliases" |
|
|
#endif |
|
|
#ifndef ALIAS_HASH_SIZE |
|
|
#define ALIAS_HASH_SIZE 1009 |
|
|
#endif |
|
|
|
|
|
static str_list_type db_dir_list; |
|
|
|
|
|
/* If DIRNAME contains any element beginning with a `.' (that is more |
|
|
than just `./'), return true. This is to allow ``hidden'' |
|
|
directories -- ones that don't get searched. */ |
|
|
|
|
|
static boolean |
|
|
ignore_dir_p P1C(const_string, dirname) |
|
|
{ |
|
|
const_string dot_pos = dirname; |
|
|
|
|
|
while ((dot_pos = strchr (dot_pos + 1, '.'))) { |
|
|
/* If / before and no / after, skip it. */ |
|
|
if (IS_DIR_SEP (dot_pos[-1]) && dot_pos[1] && !IS_DIR_SEP (dot_pos[1])) |
|
|
return true; |
|
|
} |
|
|
|
|
|
return false; |
|
|
} |
|
|
|
|
|
/* If no DB_FILENAME, return false (maybe they aren't using this feature). |
|
|
Otherwise, add entries from DB_FILENAME to TABLE, and return true. */ |
|
|
|
|
|
static boolean |
|
|
db_build P2C(hash_table_type *, table, const_string, db_filename) |
|
|
{ |
|
|
string line; |
|
|
unsigned dir_count = 0, file_count = 0, ignore_dir_count = 0; |
|
|
unsigned len = strlen (db_filename) - sizeof (DB_NAME) + 1; /* Keep the /. */ |
|
|
string top_dir = xmalloc (len + 1); |
|
|
string cur_dir = NULL; /* First thing in ls-R might be a filename. */ |
|
|
FILE *db_file = fopen (db_filename, FOPEN_R_MODE); |
|
|
|
|
|
strncpy (top_dir, db_filename, len); |
|
|
top_dir[len] = 0; |
|
|
|
|
|
if (db_file) { |
|
|
while ((line = read_line (db_file)) != NULL) { |
|
|
len = strlen (line); |
|
|
|
|
|
/* A line like `/foo:' = new dir foo. Allow both absolute (/...) |
|
|
and explicitly relative (./...) names here. It's a kludge to |
|
|
pass in the directory name with the trailing : still attached, |
|
|
but it doesn't actually hurt. */ |
|
|
if (len > 0 && line[len - 1] == ':' && kpse_absolute_p (line, true)) { |
|
|
/* New directory line. */ |
|
|
if (!ignore_dir_p (line)) { |
|
|
/* If they gave a relative name, prepend full directory name now. */ |
|
|
line[len - 1] = DIR_SEP; |
|
|
/* Skip over leading `./', it confuses `match' and is just a |
|
|
waste of space, anyway. This will lose on `../', but `match' |
|
|
won't work there, either, so it doesn't matter. */ |
|
|
cur_dir = *line == '.' ? concat (top_dir, line + 2) : xstrdup (line); |
|
|
dir_count++; |
|
|
} else { |
|
|
cur_dir = NULL; |
|
|
ignore_dir_count++; |
|
|
} |
|
|
|
|
|
/* Ignore blank, `.' and `..' lines. */ |
|
|
} else if (*line != 0 && cur_dir /* a file line? */ |
|
|
&& !(*line == '.' |
|
|
&& (line[1] == 0 || (line[1] == '.' && line[2] == 0)))) |
|
|
{/* Make a new hash table entry with a key of `line' and a data |
|
|
of `cur_dir'. An already-existing identical key is ok, since |
|
|
a file named `foo' can be in more than one directory. Share |
|
|
`cur_dir' among all its files (and hence never free it). */ |
|
|
hash_insert (table, xstrdup (line), cur_dir); |
|
|
file_count++; |
|
|
|
|
|
} /* else ignore blank lines or top-level files |
|
|
or files in ignored directories*/ |
|
|
|
|
|
free (line); |
|
|
} |
|
|
|
|
|
xfclose (db_file, db_filename); |
|
|
|
|
|
if (file_count == 0) { |
|
|
WARNING1 ("kpathsea: No usable entries in %s", db_filename); |
|
|
WARNING ("kpathsea: See the manual for how to generate ls-R"); |
|
|
db_file = NULL; |
|
|
} else { |
|
|
str_list_add (&db_dir_list, xstrdup (top_dir)); |
|
|
} |
|
|
|
|
|
#ifdef KPSE_DEBUG |
|
|
if (KPSE_DEBUG_P (KPSE_DEBUG_HASH)) { |
|
|
/* Don't make this a debugging bit, since the output is so |
|
|
voluminous, and being able to specify -1 is too useful. |
|
|
Instead, let people who want it run the program under |
|
|
a debugger and change the variable that way. */ |
|
|
boolean hash_summary_only = true; |
|
|
|
|
|
DEBUGF4 ("%s: %u entries in %d directories (%d hidden).\n", |
|
|
db_filename, file_count, dir_count, ignore_dir_count); |
|
|
DEBUGF ("ls-R hash table:"); |
|
|
hash_print (*table, hash_summary_only); |
|
|
fflush (stderr); |
|
|
} |
|
|
#endif /* KPSE_DEBUG */ |
|
|
} |
|
|
|
|
|
free (top_dir); |
|
|
|
|
|
return db_file != NULL; |
|
|
} |
|
|
|
|
|
|
|
|
/* Insert FNAME into the hash table. This is for files that get built |
|
|
during a run. We wouldn't want to reread all of ls-R, even if it got |
|
|
rebuilt. */ |
|
|
|
|
|
void |
|
|
kpse_db_insert P1C(const_string, passed_fname) |
|
|
{ |
|
|
/* We might not have found ls-R, or even had occasion to look for it |
|
|
yet, so do nothing if we have no hash table. */ |
|
|
if (db.buckets) { |
|
|
const_string dir_part; |
|
|
string fname = xstrdup (passed_fname); |
|
|
string baseptr = (string)xbasename (fname); |
|
|
const_string file_part = xstrdup (baseptr); |
|
|
|
|
|
*baseptr = '\0'; /* Chop off the filename. */ |
|
|
dir_part = fname; /* That leaves the dir, with the trailing /. */ |
|
|
|
|
|
hash_insert (&db, file_part, dir_part); |
|
|
} |
|
|
} |
|
|
|
|
|
/* Return true if FILENAME could be in PATH_ELT, i.e., if the directory |
|
|
part of FILENAME matches PATH_ELT. Have to consider // wildcards, but |
|
|
$ and ~ expansion have already been done. */ |
|
|
|
|
|
static boolean |
|
|
match P2C(const_string, filename, const_string, path_elt) |
|
|
{ |
|
|
const_string original_filename = filename; |
|
|
boolean matched = false; |
|
|
|
|
|
for (; *filename && *path_elt; filename++, path_elt++) { |
|
|
if (FILECHARCASEEQ (*filename, *path_elt)) /* normal character match */ |
|
|
; |
|
|
|
|
|
else if (IS_DIR_SEP (*path_elt) /* at // */ |
|
|
&& original_filename < filename && IS_DIR_SEP (path_elt[-1])) { |
|
|
while (IS_DIR_SEP (*path_elt)) |
|
|
path_elt++; /* get past second and any subsequent /'s */ |
|
|
if (*path_elt == 0) { |
|
|
/* Trailing //, matches anything. We could make this part of the |
|
|
other case, but it seems pointless to do the extra work. */ |
|
|
matched = true; |
|
|
break; |
|
|
} else { |
|
|
/* Intermediate //, have to match rest of PATH_ELT. */ |
|
|
for (; !matched && *filename; filename++) { |
|
|
/* Try matching at each possible character. */ |
|
|
if (IS_DIR_SEP (filename[-1]) |
|
|
&& FILECHARCASEEQ (*filename, *path_elt)) |
|
|
matched = match (filename, path_elt); |
|
|
} |
|
|
/* Prevent filename++ when *filename='\0'. */ |
|
|
break; |
|
|
} |
|
|
} |
|
|
|
|
|
else /* normal character nonmatch, quit */ |
|
|
break; |
|
|
} |
|
|
|
|
|
/* If we've reached the end of PATH_ELT, check that we're at the last |
|
|
component of FILENAME, we've matched. */ |
|
|
if (!matched && *path_elt == 0) { |
|
|
/* Probably PATH_ELT ended with `vf' or some such, and FILENAME ends |
|
|
with `vf/ptmr.vf'. In that case, we'll be at a directory |
|
|
separator. On the other hand, if PATH_ELT ended with a / (as in |
|
|
`vf/'), FILENAME being the same `vf/ptmr.vf', we'll be at the |
|
|
`p'. Upshot: if we're at a dir separator in FILENAME, skip it. |
|
|
But if not, that's ok, as long as there are no more dir separators. */ |
|
|
if (IS_DIR_SEP (*filename)) |
|
|
filename++; |
|
|
|
|
|
while (*filename && !IS_DIR_SEP (*filename)) |
|
|
filename++; |
|
|
matched = *filename == 0; |
|
|
} |
|
|
|
|
|
return matched; |
|
|
} |
|
|
|
|
|
|
|
|
/* If DB_DIR is a prefix of PATH_ELT, return true; otherwise false. |
|
|
That is, the question is whether to try the db for a file looked up |
|
|
in PATH_ELT. If PATH_ELT == ".", for example, the answer is no. If |
|
|
PATH_ELT == "/usr/local/lib/texmf/fonts//tfm", the answer is yes. |
|
|
|
|
|
In practice, ls-R is only needed for lengthy subdirectory |
|
|
comparisons, but there's no gain to checking PATH_ELT to see if it is |
|
|
a subdir match, since the only way to do that is to do a string |
|
|
search in it, which is all we do anyway. */ |
|
|
|
|
|
static boolean |
|
|
elt_in_db P2C(const_string, db_dir, const_string, path_elt) |
|
|
{ |
|
|
boolean found = false; |
|
|
|
|
|
while (!found && FILECHARCASEEQ (*db_dir++, *path_elt++)) { |
|
|
/* If we've matched the entire db directory, it's good. */ |
|
|
if (*db_dir == 0) |
|
|
found = true; |
|
|
|
|
|
/* If we've reached the end of PATH_ELT, but not the end of the db |
|
|
directory, it's no good. */ |
|
|
else if (*path_elt == 0) |
|
|
break; |
|
|
} |
|
|
|
|
|
return found; |
|
|
} |
|
|
|
|
|
/* If ALIAS_FILENAME exists, read it into TABLE. */ |
|
|
|
|
|
static boolean |
|
|
alias_build P2C(hash_table_type *, table, const_string, alias_filename) |
|
|
{ |
|
|
string line, real, alias; |
|
|
unsigned count = 0; |
|
|
FILE *alias_file = fopen (alias_filename, FOPEN_R_MODE); |
|
|
|
|
|
if (alias_file) { |
|
|
while ((line = read_line (alias_file)) != NULL) { |
|
|
/* comments or empty */ |
|
|
if (*line == 0 || *line == '%' || *line == '#') { |
|
|
; |
|
|
} else { |
|
|
/* Each line should have two fields: realname aliasname. */ |
|
|
real = line; |
|
|
while (*real && ISSPACE (*real)) |
|
|
real++; |
|
|
alias = real; |
|
|
while (*alias && !ISSPACE (*alias)) |
|
|
alias++; |
|
|
*alias++ = 0; |
|
|
while (*alias && ISSPACE (*alias)) |
|
|
alias++; |
|
|
/* Is the check for errors strong enough? Should we warn the user |
|
|
for potential errors? */ |
|
|
if (strlen (real) != 0 && strlen (alias) != 0) { |
|
|
hash_insert (table, xstrdup (alias), xstrdup (real)); |
|
|
count++; |
|
|
} |
|
|
} |
|
|
free (line); |
|
|
} |
|
|
|
|
|
#ifdef KPSE_DEBUG |
|
|
if (KPSE_DEBUG_P (KPSE_DEBUG_HASH)) { |
|
|
/* As with ls-R above ... */ |
|
|
boolean hash_summary_only = true; |
|
|
DEBUGF2 ("%s: %u aliases.\n", alias_filename, count); |
|
|
DEBUGF ("alias hash table:"); |
|
|
hash_print (*table, hash_summary_only); |
|
|
fflush (stderr); |
|
|
} |
|
|
#endif /* KPSE_DEBUG */ |
|
|
|
|
|
xfclose (alias_file, alias_filename); |
|
|
} |
|
|
|
|
|
return alias_file != NULL; |
|
|
} |
|
|
|
|
|
/* Initialize the path for ls-R files, and read them all into the hash |
|
|
table `db'. If no usable ls-R's are found, set db.buckets to NULL. */ |
|
|
|
|
|
void |
|
|
kpse_init_db P1H(void) |
|
|
{ |
|
|
boolean ok = false; |
|
|
const_string db_path = kpse_init_format (kpse_db_format); |
|
|
string *db_files = kpse_all_path_search (db_path, DB_NAME); |
|
|
string *orig_db_files = db_files; |
|
|
|
|
|
/* Must do this after the path searching (which ends up calling |
|
|
kpse_db_search recursively), so db.buckets stays NULL. */ |
|
|
db = hash_create (DB_HASH_SIZE); |
|
|
|
|
|
while (db_files && *db_files) { |
|
|
if (db_build (&db, *db_files)) |
|
|
ok = true; |
|
|
free (*db_files); |
|
|
db_files++; |
|
|
} |
|
|
|
|
|
if (!ok) { |
|
|
/* If db can't be built, leave `size' nonzero (so we don't |
|
|
rebuild it), but clear `buckets' (so we don't look in it). */ |
|
|
free (db.buckets); |
|
|
db.buckets = NULL; |
|
|
} |
|
|
|
|
|
free (orig_db_files); |
|
|
|
|
|
/* Add the content of any alias databases. There may exist more than |
|
|
one alias file along DB_NAME files. This duplicates the above code |
|
|
-- should be a function. */ |
|
|
ok = false; |
|
|
db_files = kpse_all_path_search (db_path, ALIAS_NAME); |
|
|
orig_db_files = db_files; |
|
|
|
|
|
alias_db = hash_create (ALIAS_HASH_SIZE); |
|
|
|
|
|
while (db_files && *db_files) { |
|
|
if (alias_build (&alias_db, *db_files)) |
|
|
ok = true; |
|
|
free (*db_files); |
|
|
db_files++; |
|
|
} |
|
|
|
|
|
if (!ok) { |
|
|
free (alias_db.buckets); |
|
|
alias_db.buckets = NULL; |
|
|
} |
|
|
|
|
|
free (orig_db_files); |
|
|
} |
|
|
|
|
|
/* Avoid doing anything if this PATH_ELT is irrelevant to the databases. */ |
|
|
|
|
|
str_list_type * |
|
|
kpse_db_search P3C(const_string, name, const_string, orig_path_elt, |
|
|
boolean, all) |
|
|
{ |
|
|
string *db_dirs, *orig_dirs, *r; |
|
|
const_string last_slash; |
|
|
string path_elt; |
|
|
boolean done; |
|
|
str_list_type *ret; |
|
|
unsigned e; |
|
|
string *aliases = NULL; |
|
|
boolean relevant = false; |
|
|
|
|
|
/* If we failed to build the database (or if this is the recursive |
|
|
call to build the db path), quit. */ |
|
|
if (db.buckets == NULL) |
|
|
return NULL; |
|
|
|
|
|
/* When tex-glyph.c calls us looking for, e.g., dpi600/cmr10.pk, we |
|
|
won't find it unless we change NAME to just `cmr10.pk' and append |
|
|
`/dpi600' to PATH_ELT. We are justified in using a literal `/' |
|
|
here, since that's what tex-glyph.c unconditionally uses in |
|
|
DPI_BITMAP_SPEC. But don't do anything if the / begins NAME; that |
|
|
should never happen. */ |
|
|
last_slash = strrchr (name, '/'); |
|
|
if (last_slash && last_slash != name) { |
|
|
unsigned len = last_slash - name + 1; |
|
|
string dir_part = xmalloc (len); |
|
|
strncpy (dir_part, name, len - 1); |
|
|
dir_part[len - 1] = 0; |
|
|
path_elt = concat3 (orig_path_elt, "/", dir_part); |
|
|
name = last_slash + 1; |
|
|
} else |
|
|
path_elt = (string) orig_path_elt; |
|
|
|
|
|
/* Don't bother doing any lookups if this `path_elt' isn't covered by |
|
|
any of database directories. We do this not so much because the |
|
|
extra couple of hash lookups matter -- they don't -- but rather |
|
|
because we want to return NULL in this case, so path_search can |
|
|
know to do a disk search. */ |
|
|
for (e = 0; !relevant && e < STR_LIST_LENGTH (db_dir_list); e++) { |
|
|
relevant = elt_in_db (STR_LIST_ELT (db_dir_list, e), path_elt); |
|
|
} |
|
|
if (!relevant) |
|
|
return NULL; |
|
|
|
|
|
/* If we have aliases for this name, use them. */ |
|
|
if (alias_db.buckets) |
|
|
aliases = hash_lookup (alias_db, name); |
|
|
|
|
|
if (!aliases) { |
|
|
aliases = XTALLOC1 (string); |
|
|
aliases[0] = NULL; |
|
|
} |
|
|
{ /* Push aliases up by one and insert the original name at the front. */ |
|
|
unsigned i; |
|
|
unsigned len = 1; /* Have NULL element already allocated. */ |
|
|
for (r = aliases; *r; r++) |
|
|
len++; |
|
|
XRETALLOC (aliases, len + 1, string); |
|
|
for (i = len; i > 0; i--) { |
|
|
aliases[i] = aliases[i - 1]; |
|
|
} |
|
|
aliases[0] = (string) name; |
|
|
} |
|
|
|
|
|
done = false; |
|
|
for (r = aliases; !done && *r; r++) { |
|
|
string try = *r; |
|
|
|
|
|
/* We have an ls-R db. Look up `try'. */ |
|
|
orig_dirs = db_dirs = hash_lookup (db, try); |
|
|
|
|
|
ret = XTALLOC1 (str_list_type); |
|
|
*ret = str_list_init (); |
|
|
|
|
|
/* For each filename found, see if it matches the path element. For |
|
|
example, if we have .../cx/cmr10.300pk and .../ricoh/cmr10.300pk, |
|
|
and the path looks like .../cx, we don't want the ricoh file. */ |
|
|
while (!done && db_dirs && *db_dirs) { |
|
|
string db_file = concat (*db_dirs, try); |
|
|
boolean matched = match (db_file, path_elt); |
|
|
|
|
|
#ifdef KPSE_DEBUG |
|
|
if (KPSE_DEBUG_P (KPSE_DEBUG_SEARCH)) |
|
|
DEBUGF3 ("db:match(%s,%s) = %d\n", db_file, path_elt, matched); |
|
|
#endif |
|
|
|
|
|
/* We got a hit in the database. Now see if the file actually |
|
|
exists, possibly under an alias. */ |
|
|
if (matched) { |
|
|
string found = NULL; |
|
|
if (kpse_readable_file (db_file)) { |
|
|
found = db_file; |
|
|
|
|
|
} else { |
|
|
string *a; |
|
|
|
|
|
free (db_file); /* `db_file' wasn't on disk. */ |
|
|
|
|
|
/* The hit in the DB doesn't exist in disk. Now try all its |
|
|
aliases. For example, suppose we have a hierarchy on CD, |
|
|
thus `mf.bas', but ls-R contains `mf.base'. Find it anyway. |
|
|
Could probably work around this with aliases, but |
|
|
this is pretty easy and shouldn't hurt. The upshot is that |
|
|
if one of the aliases actually exists, we use that. */ |
|
|
for (a = aliases + 1; *a && !found; a++) { |
|
|
string atry = concat (*db_dirs, *a); |
|
|
if (kpse_readable_file (atry)) |
|
|
found = atry; |
|
|
else |
|
|
free (atry); |
|
|
} |
|
|
} |
|
|
|
|
|
/* If we have a real file, add it to the list, maybe done. */ |
|
|
if (found) { |
|
|
str_list_add (ret, found); |
|
|
if (!all && found) |
|
|
done = true; |
|
|
} |
|
|
} else { /* no match in the db */ |
|
|
free (db_file); |
|
|
} |
|
|
|
|
|
|
|
|
/* On to the next directory, if any. */ |
|
|
db_dirs++; |
|
|
} |
|
|
|
|
|
/* This is just the space for the pointers, not the strings. */ |
|
|
if (orig_dirs && *orig_dirs) |
|
|
free (orig_dirs); |
|
|
} |
|
|
|
|
|
free (aliases); |
|
|
|
|
|
/* If we had to break up NAME, free the temporary PATH_ELT. */ |
|
|
if (path_elt != orig_path_elt) |
|
|
free (path_elt); |
|
|
|
|
|
return ret; |
|
|
}
|
|
|
|