/* * NOTE: this is part of libzzipfseeko (i.e. it is not libzzip). * ================== * * These routines are fully independent from the traditional zzip * implementation. They assume a readonly seekable stdio handle * representing a complete zip file. The functions show how to * parse the structure, find files and return a decoded bytestream. * * These routines are a bit simple and really here for documenting * the way to access a zip file. The complexity of zip access comes * from staggered reading of bytes and reposition of a filepointer in * a big archive with lots of files and long compressed datastreams. * Plus varaints of drop-in stdio replacements, obfuscation routines, * auto fileextensions, drop-in dirent replacements, and so on... * * btw, we can _not_ use fgetpos/fsetpos since an fpos_t has no asserted * relation to a linear seek value as specified in zip info headers. In * general it is not a problem if your system has no fseeko/ftello pair * since we can fallback to fseek/ftell which limits the zip disk size * to 2MiBs but the zip-storable seek values are 32bit limited anyway. * * Author: * Guido Draheim * * Copyright (c) 2003,2004 Guido Draheim * All rights reserved, * use under the restrictions of the * Lesser GNU General Public License * or alternatively the restrictions * of the Mozilla Public License 1.1 */ #define _LARGEFILE_SOURCE 1 #define _ZZIP_ENTRY_STRUCT 1 #include #include #include #include #include #include #include #if defined ZZIP_HAVE_STRING_H #include #elif defined ZZIP_HAVE_STRINGS_H #include #endif #if defined ZZIP_HAVE_STDINT_H #include #endif #if __STDC_VERSION__+0 > 199900L #define ___ #define ____ #else #define ___ { #define ____ } #endif #ifndef ZZIP_HAVE_FSEEKO #define fseeko fseek #define ftello ftell #endif /* note that the struct zzip_entry inherits the zzip_disk_entry values * and usually carries a copy of its values (in disk format!). To make the * following code more readable, we use a shorthand notation for the * upcast needed in C (not needed in C++) as "disk_(entry)". */ #ifdef __zzip_entry_extends_zzip_disk_entry #define disk_(_entry_) _entry_ #else #define disk_(_entry_) (& (_entry_)->head) #endif /* we try to round all seeks to the pagesize - since we do not use * the sys/mmap interface we have to guess a good value here: */ #define PAGESIZE 8192 /* ====================================================================== */ /* helper functions */ /** => zzip_entry_data_offset * This functions read the correspoding struct zzip_file_header from * the zip disk of the given "entry". The returned off_t points to the * end of the file_header where the current fseek pointer has stopped. * This is used to immediatly parse out any filename/extras block following * the file_header. The return value is null on error. */ static zzip_off_t zzip_entry_fread_file_header(ZZIP_ENTRY * entry, struct zzip_file_header *file_header) { if (! entry || ! file_header) return 0; ___ zzip_off_t offset = zzip_disk_entry_fileoffset(disk_(entry)); if (0 > offset || offset >= entry->disksize) return 0; if (fseeko(entry->diskfile, offset, SEEK_SET) == -1) return 0; return (fread(file_header, sizeof(*file_header), 1, entry->diskfile) ? offset + sizeof(*file_header) : 0); ____; } /** helper functions for (fseeko) zip access api * * This functions returns the seekval offset of the data portion of the * file referenced by the given zzip_entry. It requires an intermediate * check of the file_header structure (i.e. it reads it from disk). After * this call, the contained diskfile readposition is already set to the * data_offset returned here. On error -1 is returned. */ zzip_off_t zzip_entry_data_offset(ZZIP_ENTRY * entry) { struct zzip_file_header file_header; if (! entry) return -1; ___ zzip_off_t offset = zzip_entry_fread_file_header(entry, &file_header); if (! offset) return -1; offset += zzip_file_header_sizeof_tails(&file_header); if (fseeko(entry->diskfile, offset, SEEK_SET) == -1) return -1; return offset; ____; } /** => zzip_entry_data_offset * This function is a big helper despite its little name: in a zip file the * encoded filenames are usually NOT zero-terminated but for common usage * with libc we need it that way. Secondly, the filename SHOULD be present * in the zip central directory but if not then we fallback to the filename * given in the file_header of each compressed data portion. */ zzip__new__ char * zzip_entry_strdup_name(ZZIP_ENTRY * entry) { if (! entry) return 0; ___ zzip_size_t len; if ((len = zzip_disk_entry_namlen(disk_(entry)))) { char *name = malloc(len + 1); if (! name) return 0; memcpy(name, entry->tail, len); name[len] = '\0'; return name; } ___ auto struct zzip_file_header header; if (zzip_entry_fread_file_header(entry, &header) && (len = zzip_file_header_namlen(&header))) { char *name = malloc(len + 1); if (! name) { return 0; } else { zzip_size_t n = fread(name, 1, len, entry->diskfile); if (n != len) { free (name); return 0; } name[n] = '\0'; return name; } } return 0; ____; ____; } static int prescan_entry(ZZIP_ENTRY * entry) { assert(entry); ___ zzip_off_t tailsize = zzip_disk_entry_sizeof_tails(disk_(entry)); if (tailsize + 1 > entry->tailalloc) { char *newtail = realloc(entry->tail, tailsize + 1); if (! newtail) return ENOMEM; entry->tail = newtail; entry->tailalloc = tailsize + 1; } # ifdef SIZE_MAX /* from stdint.h */ if (tailsize > (zzip_off_t)(SIZE_MAX)) { return EFBIG; } # endif ___ zzip_size_t readsize = fread(entry->tail, 1, tailsize, entry->diskfile); /* name + comment + extras */ if ((zzip_off_t)readsize != tailsize) { return errno; } else { return 0; } ____; ____; } static void prescan_clear(ZZIP_ENTRY * entry) { assert(entry); if (entry->tail) free(entry->tail); entry->tail = 0; entry->tailalloc = 0; } /* ====================================================================== */ /** => zzip_entry_findfile * * This function is the first call of all the zip access functions here. * It contains the code to find the first entry of the zip central directory. * Here we require the stdio handle to represent a real zip file where the * disk_trailer is _last_ in the file area, so that its position would be at * a fixed offset from the end of the file area if not for the comment field * allowed to be of variable length (which needs us to do a little search * for the disk_tailer). However, in this simple implementation we disregard * any disk_trailer info telling about multidisk archives, so we just return * a pointer to the first entry in the zip central directory of that file. * * For an actual means, we are going to search backwards from the end * of the mmaped block looking for the PK-magic signature of a * disk_trailer. If we see one then we check the rootseek value to * find the first disk_entry of the root central directory. If we find * the correct PK-magic signature of a disk_entry over there then we * assume we are done and we are going to return a pointer to that label. * * The return value is a pointer to the first zzip_disk_entry being checked * to be within the bounds of the file area specified by the arguments. If * no disk_trailer was found then null is returned, and likewise we only * accept a disk_trailer with a seekvalue that points to a disk_entry and * both parts have valid PK-magic parts. Beyond some sanity check we try to * catch a common brokeness with zip archives that still allows us to find * the start of the zip central directory. */ zzip__new__ ZZIP_ENTRY * zzip_entry_findfirst(FILE * disk) { if (! disk) return 0; if (fseeko(disk, 0, SEEK_END) == -1) return 0; ___ zzip_off_t disksize = ftello(disk); if (disksize < (zzip_off_t) sizeof(struct zzip_disk_trailer)) return 0; /* we read out chunks of 8 KiB in the hope to match disk granularity */ ___ zzip_off_t pagesize = PAGESIZE; /* getpagesize() */ ___ ZZIP_ENTRY *entry = malloc(sizeof(*entry)); if (! entry) return 0; ___ unsigned char *buffer = malloc(pagesize); if (! buffer) goto nomem; assert(pagesize / 2 > (zzip_off_t) sizeof(struct zzip_disk_trailer)); /* at each step, we will fread a pagesize block which overlaps with the * previous read by means of pagesize/2 step at the end of the while(1) */ ___ zzip_off_t mapoffs = disksize & ~(pagesize - 1); ___ zzip_off_t mapsize = disksize - mapoffs; if (mapoffs && mapsize < pagesize / 2) { mapoffs -= pagesize / 2; mapsize += pagesize / 2; } assert(mapsize < 3*8192); while (1) { if (fseeko(disk, mapoffs, SEEK_SET) == -1) goto error; if (fread(buffer, 1, mapsize, disk) != (zzip_size_t)mapsize) goto error; ___ unsigned char *p = buffer + mapsize - sizeof(struct zzip_disk_trailer); for (; p >= buffer; p--) { zzip_off_t root; /* (struct zzip_disk_entry*) */ if (zzip_disk_trailer_check_magic(p)) { root = zzip_disk_trailer_rootseek((struct zzip_disk_trailer *) p); if (root > disksize - (long) sizeof(struct zzip_disk_trailer)) { /* first disk_entry is after the disk_trailer? can't be! */ struct zzip_disk_trailer *trailer = (struct zzip_disk_trailer *) p; zzip_off_t rootsize = zzip_disk_trailer_rootsize(trailer); if (rootsize > mapoffs) continue; /* a common brokeness that can be fixed: we just assume the * central directory was written directly before : */ root = mapoffs - rootsize; } } else if (zzip_disk64_trailer_check_magic(p)) { struct zzip_disk64_trailer *trailer = (struct zzip_disk64_trailer *) p; if (sizeof(zzip_off_t) < 8) return 0; root = zzip_disk64_trailer_rootseek(trailer); } else continue; assert(0 <= root && root < mapsize); if (fseeko(disk, root, SEEK_SET) == -1) goto error; if (fread(disk_(entry), 1, sizeof(*disk_(entry)), disk) != sizeof(*disk_(entry))) goto error; if (zzip_disk_entry_check_magic(entry)) { free(buffer); entry->headseek = root; entry->diskfile = disk; entry->disksize = disksize; if (prescan_entry(entry)) goto nomem; return entry; } } ____; if (! mapoffs) break; assert(mapsize >= pagesize / 2); mapoffs -= pagesize / 2; /* mapsize += pagesize/2; */ mapsize = pagesize; /* if (mapsize > pagesize) ... */ if (disksize - mapoffs > 64 * 1024) break; } error: free(buffer); nomem: free(entry); ____; ____; ____; ____; ____; ____; return 0; } /** => zzip_entry_findfile * * This function takes an existing "entry" in the central root directory * (e.g. from zzip_entry_findfirst) and moves it to point to the next entry. * On error it returns 0, otherwise the old entry. If no further match is * found then null is returned and the entry already free()d. If you want * to stop searching for matches before that case then please call * => zzip_entry_free on the cursor struct ZZIP_ENTRY. */ zzip__new__ ZZIP_ENTRY * zzip_entry_findnext(ZZIP_ENTRY * _zzip_restrict entry) { if (! entry) return entry; if (! zzip_disk_entry_check_magic(entry)) goto err; ___ zzip_off_t seek = entry->headseek + zzip_disk_entry_sizeto_end(disk_(entry)); if (seek + (zzip_off_t) sizeof(*disk_(entry)) > entry->disksize) goto err; if (fseeko(entry->diskfile, seek, SEEK_SET) == -1) goto err; if (fread(disk_(entry), 1, sizeof(*disk_(entry)), entry->diskfile) != sizeof(*disk_(entry))) goto err; entry->headseek = seek; if (! zzip_disk_entry_check_magic(entry)) goto err; if (prescan_entry(entry)) goto err; return entry; err: zzip_entry_free(entry); return 0; ____; } /** => zzip_entry_findfile * this function releases the malloc()ed areas needed for zzip_entry, the * pointer is invalid afterwards. This function has #define synonyms of * zzip_entry_findlast(), zzip_entry_findlastfile(), zzip_entry_findlastmatch() */ int zzip_entry_free(ZZIP_ENTRY * entry) { if (! entry) return 0; prescan_clear(entry); free(entry); return 1; } /** search for files in the (fseeko) zip central directory * * This function is given a filename as an additional argument, to find the * disk_entry matching a given filename. The compare-function is usually * strcmp or strcasecmp or perhaps strcoll, if null then strcmp is used. * - use null as argument for "old"-entry when searching the first * matching entry, otherwise the last returned value if you look for other * entries with a special "compare" function (if null then a doubled search * is rather useless with this variant of _findfile). If no further entry is * found then null is returned and any "old"-entry gets already free()d. */ zzip__new__ ZZIP_ENTRY * zzip_entry_findfile(FILE * disk, char *filename, ZZIP_ENTRY * _zzip_restrict entry, zzip_strcmp_fn_t compare) { if (! filename || ! disk) return 0; if (! entry) entry = zzip_entry_findfirst(disk); else entry = zzip_entry_findnext(entry); if (! compare) compare = (zzip_strcmp_fn_t) (strcmp); for (; entry; entry = zzip_entry_findnext(entry)) { /* filenames within zip files are often not null-terminated! */ char *realname = zzip_entry_strdup_name(entry); if (! realname) continue; if (! compare(filename, realname)) { free(realname); return entry; } else { free(realname); continue; } } return 0; } /** => zzip_entry_findfile * * This function uses a compare-function with an additional argument * and it is called just like fnmatch(3) from POSIX.2 AD:1993), i.e. * the argument filespec first and the ziplocal filename second with * the integer-flags put in as third to the indirect call. If the * platform has fnmatch available then null-compare will use that one * and otherwise we fall back to mere strcmp, so if you need fnmatch * searching then please provide an implementation somewhere else. * - use null as argument for "after"-entry when searching the first * matching entry, or the last disk_entry return-value to find the * next entry matching the given filespec. If no further entry is * found then null is returned and any "old"-entry gets already free()d. */ zzip__new__ ZZIP_ENTRY * zzip_entry_findmatch(FILE * disk, char *filespec, ZZIP_ENTRY * _zzip_restrict entry, zzip_fnmatch_fn_t compare, int flags) { if (! filespec || ! disk) return 0; if (! entry) entry = zzip_entry_findfirst(disk); else entry = zzip_entry_findnext(entry); if (! compare) compare = (zzip_fnmatch_fn_t) _zzip_fnmatch; for (; entry; entry = zzip_entry_findnext(entry)) { /* filenames within zip files are often not null-terminated! */ char *realname = zzip_entry_strdup_name(entry); if (! realname) continue; if (! compare(filespec, realname, flags)) { free(realname); return entry; } else { free(realname); continue; } } return 0; } /* ====================================================================== */ /** * typedef struct zzip_disk_file ZZIP_ENTRY_FILE; */ struct zzip_entry_file /* : zzip_file_header */ { struct zzip_file_header header; /* fopen detected header */ ZZIP_ENTRY *entry; /* fopen entry */ zzip_off_t data; /* for stored blocks */ zzip_size_t avail; /* memorized for checks on EOF */ zzip_size_t compressed; /* compressed flag and datasize */ zzip_size_t dataoff; /* offset from data start */ z_stream zlib; /* for inflated blocks */ unsigned char buffer[PAGESIZE]; /* work buffer for inflate algorithm */ }; /** open a file within a zip disk for reading * * This function does take an "entry" argument and copies it (or just takes * it over as owner) to a new ZZIP_ENTRY_FILE handle structure. That * structure contains also a zlib buffer for decoding. This function does * seek to the file_header of the given "entry" and validates it for the * data buffer following it. We do also prefetch some data from the data * buffer thereby trying to match the disk pagesize for faster access later. * The => zzip_entry_fread will then read in chunks of pagesizes which is * the size of the internal readahead buffer. If an error occurs then null * is returned. */ zzip__new__ ZZIP_ENTRY_FILE * zzip_entry_fopen(ZZIP_ENTRY * entry, int takeover) { if (! entry) return 0; if (! takeover) { ZZIP_ENTRY *found = malloc(sizeof(*entry)); if (! found) return 0; memcpy(found, entry, sizeof(*entry)); /* prescan_copy */ found->tail = malloc(found->tailalloc); if (! found->tail) { free (found); return 0; } memcpy(found->tail, entry->tail, entry->tailalloc); entry = found; } ___ ZZIP_ENTRY_FILE *file = malloc(sizeof(*file)); if (! file) goto fail1; file->entry = entry; if (! zzip_entry_fread_file_header(entry, &file->header)) goto fail2; file->avail = zzip_file_header_usize(&file->header); file->data = zzip_entry_data_offset(entry); file->dataoff = 0; if (! file->avail || zzip_file_header_data_stored(&file->header)) { file->compressed = 0; return file; } file->compressed = zzip_file_header_csize(&file->header); file->zlib.opaque = 0; file->zlib.zalloc = Z_NULL; file->zlib.zfree = Z_NULL; ___ zzip_off_t seek = file->data; seek += sizeof(file->buffer); seek -= seek & (sizeof(file->buffer) - 1); assert(file->data < seek); /* pre-read to next PAGESIZE boundary... */ if (fseeko(file->entry->diskfile, file->data + file->dataoff, SEEK_SET) == -1) goto fail2; file->zlib.next_in = file->buffer; file->zlib.avail_in = fread(file->buffer, 1, seek - file->data, file->entry->diskfile); file->dataoff += file->zlib.avail_in; ____; if (! zzip_file_header_data_deflated(&file->header) || inflateInit2(&file->zlib, -MAX_WBITS) != Z_OK) goto fail2; return file; fail2: free(file); fail1: zzip_entry_free(entry); return 0; ____; } /** => zzip_entry_fopen * * This function opens a file found by name, so it does a search into * the zip central directory with => zzip_entry_findfile and whatever * is found first is given to => zzip_entry_fopen */ zzip__new__ ZZIP_ENTRY_FILE * zzip_entry_ffile(FILE * disk, char *filename) { ZZIP_ENTRY *entry = zzip_entry_findfile(disk, filename, 0, 0); if (! entry) return 0; return zzip_entry_fopen(entry, 1); } /** => zzip_entry_fopen * * This function reads more bytes into the output buffer specified as * arguments. The return value is null on eof or error, the stdio-like * interface can not distinguish between these so you need to check * with => zzip_entry_feof for the difference. */ zzip_size_t zzip_entry_fread(void *ptr, zzip_size_t sized, zzip_size_t nmemb, ZZIP_ENTRY_FILE * file) { if (! file) return 0; ___ zzip_size_t size = sized * nmemb; if (! file->compressed) { if (size > file->avail) size = file->avail; if (fread(ptr, 1, size, file->entry->diskfile) != size) return 0; file->dataoff += size; file->avail -= size; return size; } file->zlib.avail_out = size; file->zlib.next_out = ptr; ___ zzip_size_t total_old = file->zlib.total_out; while (1) { if (! file->zlib.avail_in) { size = file->compressed - file->dataoff; if (size > sizeof(file->buffer)) size = sizeof(file->buffer); /* fseek (file->data + file->dataoff, file->entry->diskfile); */ file->zlib.avail_in = fread(file->buffer, 1, size, file->entry->diskfile); file->zlib.next_in = file->buffer; file->dataoff += file->zlib.avail_in; } if (! file->zlib.avail_in) return 0; ___ int err = inflate(&file->zlib, Z_NO_FLUSH); if (err == Z_STREAM_END) file->avail = 0; else if (err == Z_OK) file->avail -= file->zlib.total_out - total_old; else return 0; ____; if (file->zlib.avail_out && ! file->zlib.avail_in) continue; return file->zlib.total_out - total_old; } ____; ____; } /** => zzip_entry_fopen * This function releases any zlib decoder info needed for decompression * and dumps the ZZIP_ENTRY_FILE struct then. */ int zzip_entry_fclose(ZZIP_ENTRY_FILE * file) { if (! file) return 0; if (file->compressed) inflateEnd(&file->zlib); zzip_entry_free(file->entry); free(file); return 0; } /** => zzip_entry_fopen * * This function allows to distinguish an error from an eof condition. * Actually, if we found an error but we did already reach eof then we * just keep on saying that it was an eof, so the app can just continue. */ int zzip_entry_feof(ZZIP_ENTRY_FILE * file) { return ! file || ! file->avail; }