/* db.c: an external database to avoid filesystem lookups. Copyright 1994, 1995, 1996, 1997, 2008, 2009 Karl Berry. Copyright 1997-2005 Olaf Weber. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, see . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef DB_HASH_SIZE /* Based on the size of 2008 texmf-dist/ls-R, about 62000 entries. But we don't want to make it too big, since texmf/ls-R only has about 1300 entries. We should dynamically adapt the size. */ #define DB_HASH_SIZE 32003 #endif #ifndef DB_NAME #define DB_NAME "ls-R" #endif #ifndef DB_NAME_LC #define DB_NAME_LC "ls-r" #endif /* read ls-R only on WIN32 */ #ifdef WIN32 static const_string db_names[] = { DB_NAME, NULL }; #else static const_string db_names[] = { DB_NAME, DB_NAME_LC, NULL }; #endif #ifndef ALIAS_NAME #define ALIAS_NAME "aliases" #endif #ifndef ALIAS_HASH_SIZE #define ALIAS_HASH_SIZE 1009 #endif /* If DIRNAME contains any element beginning with a `.' (that is more than just `./'), return true. This is to allow ``hidden'' directories -- ones that don't get searched. */ static boolean ignore_dir_p (const_string dirname) { const_string dot_pos = dirname; while ((dot_pos = strchr (dot_pos + 1, '.'))) { /* If / before and no / after, skip it. */ if (IS_DIR_SEP (dot_pos[-1]) && dot_pos[1] && !IS_DIR_SEP (dot_pos[1])) return true; } return false; } /* If no DB_FILENAME, return false (maybe they aren't using this feature). Otherwise, add entries from DB_FILENAME to TABLE, and return true. */ static boolean db_build (kpathsea kpse, hash_table_type *table, const_string db_filename) { string line; unsigned dir_count = 0, file_count = 0, ignore_dir_count = 0; unsigned len = strlen (db_filename) - sizeof (DB_NAME) + 1; /* Keep the /. */ string top_dir = (string)xmalloc (len + 1); string cur_dir = NULL; /* First thing in ls-R might be a filename. */ FILE *db_file = fopen (db_filename, FOPEN_R_MODE); strncpy (top_dir, db_filename, len); top_dir[len] = 0; if (db_file) { while ((line = read_line (db_file)) != NULL) { len = strlen (line); /* A line like `/foo:' = new dir foo. Allow both absolute (/...) and explicitly relative (./...) names here. It's a kludge to pass in the directory name with the trailing : still attached, but it doesn't actually hurt. */ if (len > 0 && line[len - 1] == ':' && kpathsea_absolute_p (kpse, line, true)) { /* New directory line. */ if (!ignore_dir_p (line)) { /* If they gave a relative name, prepend full directory name now. */ line[len - 1] = DIR_SEP; /* Skip over leading `./', it confuses `match' and is just a waste of space, anyway. This will lose on `../', but `match' won't work there, either, so it doesn't matter. */ cur_dir = *line == '.' ? concat (top_dir, line + 2) : xstrdup (line); dir_count++; } else { cur_dir = NULL; ignore_dir_count++; } /* Ignore blank, `.' and `..' lines. */ } else if (*line != 0 && cur_dir /* a file line? */ && !(*line == '.' && (line[1] == 0 || (line[1] == '.' && line[2] == 0)))) { /* Make a new hash table entry with a key of `line' and a data of `cur_dir'. An already-existing identical key is ok, since a file named `foo' can be in more than one directory. Share `cur_dir' among all its files (and hence never free it). Note that we assume that all names in the ls-R file have already been case-smashed to lowercase where appropriate. */ hash_insert_normalized (table, xstrdup (line), cur_dir); file_count++; } /* else ignore blank lines or top-level files or files in ignored directories*/ free (line); } xfclose (db_file, db_filename); if (file_count == 0) { WARNING1 ("kpathsea: No usable entries in %s", db_filename); WARNING ("kpathsea: See the manual for how to generate ls-R"); db_file = NULL; } else { str_list_add (&(kpse->db_dir_list), xstrdup (top_dir)); } #ifdef KPSE_DEBUG if (KPATHSEA_DEBUG_P (KPSE_DEBUG_HASH)) { /* Don't make this a debugging bit, since the output is so voluminous, and being able to specify -1 is too useful. Instead, let people who want it run the program under a debugger and change the variable that way. */ boolean hash_summary_only = true; DEBUGF4 ("%s: %u entries in %d directories (%d hidden).\n", db_filename, file_count, dir_count, ignore_dir_count); DEBUGF ("ls-R hash table:"); hash_print (*table, hash_summary_only); fflush (stderr); } #endif /* KPSE_DEBUG */ } free (top_dir); return db_file != NULL; } /* Insert FNAME into the hash table. This is for files that get built during a run. We wouldn't want to reread all of ls-R, even if it got rebuilt. */ void kpathsea_db_insert (kpathsea kpse, const_string passed_fname) { /* We might not have found ls-R, or even had occasion to look for it yet, so do nothing if we have no hash table. */ if (kpse->db.buckets) { const_string dir_part; string fname = xstrdup (passed_fname); string baseptr = (string)xbasename (fname); const_string file_part = xstrdup (baseptr); *baseptr = '\0'; /* Chop off the filename. */ dir_part = fname; /* That leaves the dir, with the trailing /. */ /* Note that we do not assuse that these names have been normalized. */ hash_insert (&(kpse->db), file_part, dir_part); } } #if defined(KPSE_COMPAT_API) void kpse_db_insert (const_string passed_fname) { kpathsea_db_insert(kpse_def, passed_fname); } #endif /* Return true if FILENAME could be in PATH_ELT, i.e., if the directory part of FILENAME matches PATH_ELT. Have to consider // wildcards, but $ and ~ expansion have already been done. */ static boolean match (const_string filename, const_string path_elt) { const_string original_filename = filename; boolean matched = false; for (; *filename && *path_elt; filename++, path_elt++) { if (FILECHARCASEEQ (*filename, *path_elt)) /* normal character match */ ; else if (IS_DIR_SEP (*path_elt) /* at // */ && original_filename < filename && IS_DIR_SEP (path_elt[-1])) { while (IS_DIR_SEP (*path_elt)) path_elt++; /* get past second and any subsequent /'s */ if (*path_elt == 0) { /* Trailing //, matches anything. We could make this part of the other case, but it seems pointless to do the extra work. */ matched = true; break; } else { /* Intermediate //, have to match rest of PATH_ELT. */ for (; !matched && *filename; filename++) { /* Try matching at each possible character. */ if (IS_DIR_SEP (filename[-1]) && FILECHARCASEEQ (*filename, *path_elt)) matched = match (filename, path_elt); } /* Prevent filename++ when *filename='\0'. */ break; } } else /* normal character nonmatch, quit */ break; } /* If we've reached the end of PATH_ELT, check that we're at the last component of FILENAME, we've matched. */ if (!matched && *path_elt == 0) { /* Probably PATH_ELT ended with `vf' or some such, and FILENAME ends with `vf/ptmr.vf'. In that case, we'll be at a directory separator. On the other hand, if PATH_ELT ended with a / (as in `vf/'), FILENAME being the same `vf/ptmr.vf', we'll be at the `p'. Upshot: if we're at a dir separator in FILENAME, skip it. But if not, that's ok, as long as there are no more dir separators. */ if (IS_DIR_SEP (*filename)) filename++; while (*filename && !IS_DIR_SEP (*filename)) filename++; matched = *filename == 0; } return matched; } /* If DB_DIR is a prefix of PATH_ELT, return true; otherwise false. That is, the question is whether to try the db for a file looked up in PATH_ELT. If PATH_ELT == ".", for example, the answer is no. If PATH_ELT == "/usr/local/lib/texmf/fonts//tfm", the answer is yes. In practice, ls-R is only needed for lengthy subdirectory comparisons, but there's no gain to checking PATH_ELT to see if it is a subdir match, since the only way to do that is to do a string search in it, which is all we do anyway. */ static boolean elt_in_db (const_string db_dir, const_string path_elt) { boolean found = false; while (!found && FILECHARCASEEQ (*db_dir++, *path_elt++)) { /* If we've matched the entire db directory, it's good. */ if (*db_dir == 0) found = true; /* If we've reached the end of PATH_ELT, but not the end of the db directory, it's no good. */ else if (*path_elt == 0) break; } return found; } /* If ALIAS_FILENAME exists, read it into TABLE. */ static boolean alias_build (kpathsea kpse, hash_table_type *table, const_string alias_filename) { string line, real, alias; unsigned count = 0; FILE *alias_file = fopen (alias_filename, FOPEN_R_MODE); if (alias_file) { while ((line = read_line (alias_file)) != NULL) { /* comments or empty */ if (*line == 0 || *line == '%' || *line == '#') { ; } else { /* Each line should have two fields: realname aliasname. */ real = line; while (*real && ISSPACE (*real)) real++; alias = real; while (*alias && !ISSPACE (*alias)) alias++; *alias++ = 0; while (*alias && ISSPACE (*alias)) alias++; /* Is the check for errors strong enough? Should we warn the user for potential errors? */ if (strlen (real) != 0 && strlen (alias) != 0) { /* Stuff in the alias file should be normalized. */ hash_insert_normalized (table, xstrdup (alias), xstrdup (real)); count++; } } free (line); } #ifdef KPSE_DEBUG if (KPATHSEA_DEBUG_P (KPSE_DEBUG_HASH)) { /* As with ls-R above ... */ boolean hash_summary_only = true; DEBUGF2 ("%s: %u aliases.\n", alias_filename, count); DEBUGF ("alias hash table:"); hash_print (*table, hash_summary_only); fflush (stderr); } #endif /* KPSE_DEBUG */ xfclose (alias_file, alias_filename); } return alias_file != NULL; } /* Initialize the path for ls-R files, and read them all into the hash table `db'. If no usable ls-R's found, set kpse->db.buckets to NULL. */ void kpathsea_init_db (kpathsea kpse) { boolean ok = false; const_string db_path; string *db_files; string *orig_db_files; assert (sizeof(DB_NAME) == sizeof(DB_NAME_LC)); db_path = kpathsea_init_format (kpse, kpse_db_format); db_files = kpathsea_all_path_search_list (kpse, db_path, db_names); orig_db_files = db_files; /* Must do this after the path searching (which ends up calling kpse_db_search recursively), so kpse->db.buckets stays NULL. */ kpse->db = hash_create (DB_HASH_SIZE); while (db_files && *db_files) { if (db_build (kpse, &(kpse->db), *db_files)) ok = true; free (*db_files); db_files++; } if (!ok) { /* If db can't be built, leave `size' nonzero (so we don't rebuild it), but clear `buckets' (so we don't look in it). */ free (kpse->db.buckets); kpse->db.buckets = NULL; } free (orig_db_files); /* Add the content of any alias databases. There may exist more than one alias file along DB_NAME files. This duplicates the above code -- should be a function. */ ok = false; db_files = kpathsea_all_path_search (kpse, db_path, ALIAS_NAME); orig_db_files = db_files; kpse->alias_db = hash_create (ALIAS_HASH_SIZE); while (db_files && *db_files) { if (alias_build (kpse, &(kpse->alias_db), *db_files)) ok = true; free (*db_files); db_files++; } if (!ok) { free (kpse->alias_db.buckets); kpse->alias_db.buckets = NULL; } free (orig_db_files); } #if defined(KPSE_COMPAT_API) void kpse_init_db (void) { kpathsea_init_db(kpse_def); } #endif /* Avoid doing anything if this PATH_ELT is irrelevant to the databases. */ str_list_type * kpathsea_db_search (kpathsea kpse, const_string name, const_string orig_path_elt, boolean all) { string *db_dirs, *orig_dirs, *r; const_string last_slash; string path_elt; boolean done; unsigned e; str_list_type *ret = NULL; string *aliases = NULL; boolean relevant = false; /* If we failed to build the database (or if this is the recursive call to build the db path), quit. */ if (kpse->db.buckets == NULL) return NULL; /* When tex-glyph.c calls us looking for, e.g., dpi600/cmr10.pk, we won't find it unless we change NAME to just `cmr10.pk' and append `/dpi600' to PATH_ELT. We are justified in using a literal `/' here, since that's what tex-glyph.c unconditionally uses in DPI_BITMAP_SPEC. But don't do anything if the / begins NAME; that should never happen. */ last_slash = strrchr (name, '/'); if (last_slash && last_slash != name) { unsigned len = last_slash - name + 1; string dir_part = (string)xmalloc (len); strncpy (dir_part, name, len - 1); dir_part[len - 1] = 0; path_elt = concat3 (orig_path_elt, "/", dir_part); name = last_slash + 1; } else path_elt = (string) orig_path_elt; /* Don't bother doing any lookups if this `path_elt' isn't covered by any of database directories. We do this not so much because the extra couple of hash lookups matter -- they don't -- but rather because we want to return NULL in this case, so path_search can know to do a disk search. */ for (e = 0; !relevant && e < STR_LIST_LENGTH (kpse->db_dir_list); e++) { relevant = elt_in_db (STR_LIST_ELT (kpse->db_dir_list, e), path_elt); } if (!relevant) return NULL; /* If we have aliases for this name, use them. */ if (kpse->alias_db.buckets) aliases = hash_lookup (kpse->alias_db, name); if (!aliases) { aliases = XTALLOC1 (string); aliases[0] = NULL; } { /* Push aliases up by one and insert the original name at the front. */ unsigned i; unsigned len = 1; /* Have NULL element already allocated. */ for (r = aliases; *r; r++) len++; XRETALLOC (aliases, len + 1, string); for (i = len; i > 0; i--) { aliases[i] = aliases[i - 1]; } aliases[0] = (string) name; } done = false; for (r = aliases; !done && *r; r++) { string ctry = *r; /* We have an ls-R db. Look up `try'. */ orig_dirs = db_dirs = hash_lookup (kpse->db, ctry); ret = XTALLOC1 (str_list_type); *ret = str_list_init (); /* For each filename found, see if it matches the path element. For example, if we have .../cx/cmr10.300pk and .../ricoh/cmr10.300pk, and the path looks like .../cx, we don't want the ricoh file. */ while (!done && db_dirs && *db_dirs) { string db_file = concat (*db_dirs, ctry); boolean matched = match (db_file, path_elt); #ifdef KPSE_DEBUG if (KPATHSEA_DEBUG_P (KPSE_DEBUG_SEARCH)) DEBUGF3 ("db:match(%s,%s) = %d\n", db_file, path_elt, matched); #endif /* We got a hit in the database. Now see if the file actually exists, possibly under an alias. */ if (matched) { string found = NULL; if (kpathsea_readable_file (kpse, db_file)) { found = db_file; } else { string *a; free (db_file); /* `db_file' wasn't on disk. */ /* The hit in the DB doesn't exist in disk. Now try all its aliases. For example, suppose we have a hierarchy on CD, thus `mf.bas', but ls-R contains `mf.base'. Find it anyway. Could probably work around this with aliases, but this is pretty easy and shouldn't hurt. The upshot is that if one of the aliases actually exists, we use that. */ for (a = aliases + 1; *a && !found; a++) { string atry = concat (*db_dirs, *a); if (kpathsea_readable_file (kpse, atry)) found = atry; else free (atry); } } /* If we have a real file, add it to the list, maybe done. */ if (found) { str_list_add (ret, found); if (!all && found) done = true; } } else { /* no match in the db */ free (db_file); } /* On to the next directory, if any. */ db_dirs++; } /* This is just the space for the pointers, not the strings. */ if (orig_dirs && *orig_dirs) free (orig_dirs); } free (aliases); /* If we had to break up NAME, free the temporary PATH_ELT. */ if (path_elt != orig_path_elt) free (path_elt); return ret; } #if defined(KPSE_COMPAT_API) str_list_type * kpse_db_search (const_string name, const_string orig_path_elt, boolean all) { return kpathsea_db_search (kpse_def, name, orig_path_elt, all); } #endif str_list_type * kpathsea_db_search_list (kpathsea kpse, const_string* names, const_string path_elt, boolean all) { string *db_dirs, *orig_dirs, *r; const_string last_slash, name, path; boolean done; unsigned e; string *aliases; int n; str_list_type *ret = NULL; boolean relevant = false; /* If we failed to build the database (or if this is the recursive call to build the db path), quit. */ if (kpse->db.buckets == NULL) return NULL; /* Don't bother doing any lookups if this `path_elt' isn't covered by any of database directories. We do this not so much because the extra couple of hash lookups matter -- they don't -- but rather because we want to return NULL in this case, so path_search can know to do a disk search. */ for (e = 0; !relevant && e < STR_LIST_LENGTH (kpse->db_dir_list); e++) { relevant = elt_in_db (STR_LIST_ELT (kpse->db_dir_list, e), path_elt); } if (!relevant) return NULL; done = false; ret = XTALLOC1 (str_list_type); *ret = str_list_init (); /* Handle each name. */ for (n = 0; !done && names[n]; n++) { name = names[n]; /* Absolute names should have been caught in our caller. */ if (kpathsea_absolute_p(kpse, name, true)) continue; /* When tex-glyph.c calls us looking for, e.g., dpi600/cmr10.pk, we won't find it unless we change NAME to just `cmr10.pk' and append `/dpi600' to PATH_ELT. We are justified in using a literal `/' here, since that's what tex-glyph.c unconditionally uses in DPI_BITMAP_SPEC. But don't do anything if the / begins NAME; that should never happen. */ last_slash = strrchr (name, '/'); if (last_slash && last_slash != name) { unsigned len = last_slash - name + 1; string dir_part = (string)xmalloc (len); strncpy (dir_part, name, len - 1); dir_part[len - 1] = 0; path = concat3 (path_elt, "/", dir_part); name = last_slash + 1; free(dir_part); } else { path = path_elt; } /* If we have aliases for this name, use them. */ if (kpse->alias_db.buckets) aliases = hash_lookup (kpse->alias_db, name); else aliases = NULL; if (!aliases) { aliases = XTALLOC1 (string); aliases[0] = NULL; } { /* Push aliases up by one and insert the original name at front. */ unsigned i; unsigned len = 1; /* Have NULL element already allocated. */ for (r = aliases; *r; r++) len++; XRETALLOC (aliases, len + 1, string); for (i = len; i > 0; i--) { aliases[i] = aliases[i - 1]; } aliases[0] = (string) name; } for (r = aliases; !done && *r; r++) { string ctry = *r; /* We have an ls-R db. Look up `try'. */ orig_dirs = db_dirs = hash_lookup (kpse->db, ctry); /* For each filename found, see if it matches the path element. For example, if we have .../cx/cmr10.300pk and .../ricoh/cmr10.300pk, and the path looks like .../cx, we don't want the ricoh file. */ while (!done && db_dirs && *db_dirs) { string db_file = concat (*db_dirs, ctry); boolean matched = match (db_file, path); #ifdef KPSE_DEBUG if (KPATHSEA_DEBUG_P (KPSE_DEBUG_SEARCH)) DEBUGF3 ("db:match(%s,%s) = %d\n", db_file, path, matched); #endif /* We got a hit in the database. Now see if the file actually exists, possibly under an alias. */ if (matched) { string found = NULL; if (kpathsea_readable_file (kpse, db_file)) { found = db_file; } else { string *a; free (db_file); /* `db_file' wasn't on disk. */ /* The hit in the DB doesn't exist in disk. Now try all its aliases. For example, suppose we have a hierarchy on CD, thus `mf.bas', but ls-R contains `mf.base'. Find it anyway. Could probably work around this with aliases, but this is pretty easy and shouldn't hurt. The upshot is that if one of the aliases actually exists, we use that. */ for (a = aliases + 1; *a && !found; a++) { string atry = concat (*db_dirs, *a); if (kpathsea_readable_file (kpse, atry)) found = atry; else free (atry); } } /* If we have a real file, add it to the list, maybe done. */ if (found) { str_list_add (ret, found); if (!all && found) done = true; } } else { /* no match in the db */ free (db_file); } /* On to the next directory, if any. */ db_dirs++; } /* This is just the space for the pointers, not the strings. */ if (orig_dirs && *orig_dirs) free (orig_dirs); } free (aliases); if (path != path_elt) free((string)path); } return ret; } #if defined(KPSE_COMPAT_API) str_list_type * kpse_db_search_list (const_string* names, const_string path_elt, boolean all) { return kpathsea_db_search_list (kpse_def, names, path_elt, all); } #endif