gzio.c 13.6 KB
Newer Older
M
Mark Adler 已提交
1
/* gzio.c -- IO on .gz files
M
Mark Adler 已提交
2
 * Copyright (C) 1995 Jean-loup Gailly.
M
Mark Adler 已提交
3 4 5
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

M
Mark Adler 已提交
6
/* $Id: gzio.c,v 1.8 1995/05/03 17:27:09 jloup Exp $ */
M
Mark Adler 已提交
7 8 9 10 11 12 13 14 15

#include <stdio.h>

#include "zutil.h"

struct internal_state {int dummy;}; /* for buggy compilers */

#define Z_BUFSIZE 4096

M
Mark Adler 已提交
16 17
#define ALLOC(size) malloc(size)
#define TRYFREE(p) {if (p) free(p);}
M
Mark Adler 已提交
18

M
Mark Adler 已提交
19 20
#define GZ_MAGIC_1 0x1f
#define GZ_MAGIC_2 0x8b
M
Mark Adler 已提交
21 22 23 24 25 26 27 28 29

/* gzip flag byte */
#define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
#define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
#define COMMENT      0x10 /* bit 4 set: file comment present */
#define RESERVED     0xE0 /* bits 5..7: reserved */

M
Mark Adler 已提交
30 31 32 33
#ifndef SEEK_CUR
#  define SEEK_CUR 1
#endif

M
Mark Adler 已提交
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
typedef struct gz_stream {
    z_stream stream;
    int      z_err;   /* error code for last stream operation */
    int      z_eof;   /* set if end of input file */
    FILE     *file;   /* .gz file */
    Byte     *inbuf;  /* input buffer */
    Byte     *outbuf; /* output buffer */
    uLong    crc;     /* crc32 of uncompressed data */
    char     *msg;    /* error message */
    char     *path;   /* path name for debugging only */
    int      transparent; /* 1 if input file is not a .gz file */
    char     mode;    /* 'w' or 'r' */
} gz_stream;


M
Mark Adler 已提交
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
local int    destroy OF((gz_stream *s));
local gzFile gz_open OF((char *path, char *mode, int  fd));
local void   putLong OF((FILE *file, uLong x));
local uLong  getLong OF((Bytef *buf));

 /* ===========================================================================
 * Cleanup then free the given gz_stream. Return a zlib error code.
 */
local int destroy (s)
    gz_stream *s;
{
    int err = Z_OK;

    if (!s) return Z_STREAM_ERROR;

    TRYFREE(s->inbuf);
    TRYFREE(s->outbuf);
    TRYFREE(s->path);
    TRYFREE(s->msg);

    if (s->stream.state != NULL) {
       if (s->mode == 'w') {
           err = deflateEnd(&(s->stream));
       } else if (s->mode == 'r') {
           err = inflateEnd(&(s->stream));
       }
    }
    if (s->file != NULL && fclose(s->file)) {
        err = Z_ERRNO;
    }
    if (s->z_err < 0) err = s->z_err;
    TRYFREE(s);
    return err;
}
M
Mark Adler 已提交
83 84 85

/* ===========================================================================
     Opens a gzip (.gz) file for reading or writing. The mode parameter
M
Mark Adler 已提交
86
   is as in fopen ("rb" or "wb"). The file is given either by file descriptor
M
Mark Adler 已提交
87 88 89 90 91 92 93
   or path name (if fd == -1).
     gz_open return NULL if the file could not be opened or if there was
   insufficient memory to allocate the (de)compression state; errno
   can be checked to distinguish the two cases (if errno is zero, the
   zlib error is Z_MEM_ERROR).
*/
local gzFile gz_open (path, mode, fd)
M
Mark Adler 已提交
94 95
    char *path;
    char *mode;
M
Mark Adler 已提交
96 97 98
    int  fd;
{
    int err;
M
Mark Adler 已提交
99
    int level = Z_DEFAULT_COMPRESSION; /* compression level */
M
Mark Adler 已提交
100 101
    char *p = mode;
    gz_stream *s = (gz_stream *)ALLOC(sizeof(gz_stream));
M
Mark Adler 已提交
102 103 104 105 106

    if (!s) return Z_NULL;

    s->stream.zalloc = (alloc_func)0;
    s->stream.zfree = (free_func)0;
M
Mark Adler 已提交
107
    s->stream.opaque = (voidpf)0;
M
Mark Adler 已提交
108 109 110 111 112 113 114 115 116 117 118 119
    s->stream.next_in = s->inbuf = Z_NULL;
    s->stream.next_out = s->outbuf = Z_NULL;
    s->stream.avail_in = s->stream.avail_out = 0;
    s->file = NULL;
    s->z_err = Z_OK;
    s->z_eof = 0;
    s->crc = crc32(0L, Z_NULL, 0);
    s->msg = NULL;
    s->transparent = 0;

    s->path = (char*)ALLOC(strlen(path)+1);
    if (s->path == NULL) {
M
Mark Adler 已提交
120
        return destroy(s), (gzFile)Z_NULL;
M
Mark Adler 已提交
121 122 123 124 125
    }
    strcpy(s->path, path); /* do this early for debugging */

    s->mode = '\0';
    do {
M
Mark Adler 已提交
126
        if (*p == 'r') s->mode = 'r';
M
Mark Adler 已提交
127 128 129
        if (*p == 'w') s->mode = 'w';
        if (*p >= '1' && *p <= '9') level = *p - '0';
    } while (*p++);
M
Mark Adler 已提交
130 131 132
    if (s->mode == '\0') return destroy(s), (gzFile)Z_NULL;
    
    if (s->mode == 'w') {
M
Mark Adler 已提交
133
        err = deflateInit2(&(s->stream), level,
M
Mark Adler 已提交
134
                           Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, 0);
M
Mark Adler 已提交
135
        /* windowBits is passed < 0 to suppress zlib header */
M
Mark Adler 已提交
136

M
Mark Adler 已提交
137
        s->stream.next_out = s->outbuf = (Byte*)ALLOC(Z_BUFSIZE);
M
Mark Adler 已提交
138

M
Mark Adler 已提交
139 140 141
        if (err != Z_OK || s->outbuf == Z_NULL) {
            return destroy(s), (gzFile)Z_NULL;
        }
M
Mark Adler 已提交
142
    } else {
M
Mark Adler 已提交
143
        err = inflateInit2(&(s->stream), -MAX_WBITS);
M
Mark Adler 已提交
144
        s->stream.next_in  = s->inbuf = (Byte*)ALLOC(Z_BUFSIZE);
M
Mark Adler 已提交
145

M
Mark Adler 已提交
146 147 148
        if (err != Z_OK || s->inbuf == Z_NULL) {
            return destroy(s), (gzFile)Z_NULL;
        }
M
Mark Adler 已提交
149 150 151 152
    }
    s->stream.avail_out = Z_BUFSIZE;

    errno = 0;
M
Mark Adler 已提交
153
    s->file = fd < 0 ? FOPEN(path, mode) : (FILE*)fdopen(fd, mode);
M
Mark Adler 已提交
154 155

    if (s->file == NULL) {
M
Mark Adler 已提交
156
        return destroy(s), (gzFile)Z_NULL;
M
Mark Adler 已提交
157 158
    }
    if (s->mode == 'w') {
M
Mark Adler 已提交
159 160
        /* Write a very simple .gz header:
         */
M
Mark Adler 已提交
161
        fprintf(s->file, "%c%c%c%c%c%c%c%c%c%c", GZ_MAGIC_1, GZ_MAGIC_2,
M
Mark Adler 已提交
162
             Z_DEFLATED, 0 /*flags*/, 0,0,0,0 /*time*/, 0 /*xflags*/, OS_CODE);
M
Mark Adler 已提交
163
    } else {
M
Mark Adler 已提交
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
        /* Check and skip the header:
         */
        Byte c1 = 0, c2 = 0;
        Byte method = 0;
        Byte flags = 0;
        Byte xflags = 0;
        Byte time[4];
        Byte osCode;
        int c;

        s->stream.avail_in = fread(s->inbuf, 1, 2, s->file);
        if (s->stream.avail_in != 2 || s->inbuf[0] != GZ_MAGIC_1
            || s->inbuf[1] != GZ_MAGIC_2) {
            s->transparent = 1;
            return (gzFile)s;
        }
        s->stream.avail_in = 0;
        fscanf(s->file,"%c%c%4c%c%c", &method, &flags, time, &xflags, &osCode);

        if (method != Z_DEFLATED || feof(s->file) || (flags & RESERVED) != 0) {
            s->z_err = Z_DATA_ERROR;
            return (gzFile)s;
        }
        if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
            long len;
            fscanf(s->file, "%c%c", &c1, &c2);
            len = c1 + ((long)c2<<8);
            fseek(s->file, len, SEEK_CUR);
        }
        if ((flags & ORIG_NAME) != 0) { /* skip the original file name */
            while ((c = getc(s->file)) != 0 && c != EOF) ;
        }
        if ((flags & COMMENT) != 0) {   /* skip the .gz file comment */
            while ((c = getc(s->file)) != 0 && c != EOF) ;
        }
        if ((flags & HEAD_CRC) != 0) {  /* skip the header crc */
            fscanf(s->file, "%c%c", &c1, &c2);
        }
        if (feof(s->file)) {
            s->z_err = Z_DATA_ERROR;
        }
M
Mark Adler 已提交
205 206 207 208 209 210 211 212
    }
    return (gzFile)s;
}

/* ===========================================================================
     Opens a gzip (.gz) file for reading or writing.
*/
gzFile gzopen (path, mode)
M
Mark Adler 已提交
213 214
    char *path;
    char *mode;
M
Mark Adler 已提交
215 216 217 218 219
{
    return gz_open (path, mode, -1);
}

/* ===========================================================================
M
Mark Adler 已提交
220
     Associate a gzFile with the file descriptor fd.
M
Mark Adler 已提交
221 222 223
*/
gzFile gzdopen (fd, mode)
    int fd;
M
Mark Adler 已提交
224
    char *mode;
M
Mark Adler 已提交
225 226
{
    char name[20];
M
Mark Adler 已提交
227
    sprintf(name, "<fd:%d>", fd); /* for debugging */
M
Mark Adler 已提交
228 229 230 231 232 233 234 235 236 237

    return gz_open (name, mode, fd);
}

/* ===========================================================================
     Reads the given number of uncompressed bytes from the compressed file.
   gzread returns the number of bytes actually read (0 for end of file).
*/
int gzread (file, buf, len)
    gzFile file;
M
Mark Adler 已提交
238
    voidp buf;
M
Mark Adler 已提交
239 240 241 242 243 244
    unsigned len;
{
    gz_stream *s = (gz_stream*)file;

    if (s == NULL || s->mode != 'r') return Z_STREAM_ERROR;

M
Mark Adler 已提交
245 246 247 248 249 250 251 252 253 254 255 256 257 258
    if (s->transparent) {
        int n = 0;
        Byte *b = (Byte*)buf;
        /* Copy the first two (non-magic) bytes if not done already */
        while (s->stream.avail_in > 0 && len > 0) {
            *b++ = *s->stream.next_in++;
            s->stream.avail_in--;
            len--; n++;
        }
        if (len == 0) return n;
        return n + fread(b, 1, len, s->file);
    }
    if (s->z_err == Z_DATA_ERROR) return -1; /* bad .gz file */
    if (s->z_err == Z_STREAM_END) return 0;  /* don't read crc as data */
M
Mark Adler 已提交
259 260 261 262 263 264

    s->stream.next_out = buf;
    s->stream.avail_out = len;

    while (s->stream.avail_out != 0) {

M
Mark Adler 已提交
265 266 267
        if (s->stream.avail_in == 0 && !s->z_eof) {

            errno = 0;
M
Mark Adler 已提交
268 269
            s->stream.avail_in =
                fread(s->inbuf, 1, Z_BUFSIZE, s->file);
M
Mark Adler 已提交
270 271
            if (s->stream.avail_in == 0) {
                s->z_eof = 1;
M
Mark Adler 已提交
272 273 274 275 276
            } else if (s->stream.avail_in == (uInt)EOF) {
                s->stream.avail_in = 0;
                s->z_eof = 1;
                s->z_err = Z_ERRNO;
                break;
M
Mark Adler 已提交
277 278 279 280 281
            }
            s->stream.next_in = s->inbuf;
        }
        s->z_err = inflate(&(s->stream), Z_NO_FLUSH);

M
Mark Adler 已提交
282 283
        if (s->z_err == Z_STREAM_END ||
            s->z_err != Z_OK  || s->z_eof) break;
M
Mark Adler 已提交
284
    }
M
Mark Adler 已提交
285 286 287
    len -= s->stream.avail_out;
    s->crc = crc32(s->crc, buf, len);
    return (int)len;
M
Mark Adler 已提交
288 289 290 291 292 293 294 295
}

/* ===========================================================================
     Writes the given number of uncompressed bytes into the compressed file.
   gzwrite returns the number of bytes actually written (0 in case of error).
*/
int gzwrite (file, buf, len)
    gzFile file;
M
Mark Adler 已提交
296
    voidp buf;
M
Mark Adler 已提交
297 298 299 300 301 302 303 304 305 306 307
    unsigned len;
{
    gz_stream *s = (gz_stream*)file;

    if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;

    s->stream.next_in = buf;
    s->stream.avail_in = len;

    while (s->stream.avail_in != 0) {

M
Mark Adler 已提交
308
        if (s->stream.avail_out == 0) {
M
Mark Adler 已提交
309

M
Mark Adler 已提交
310 311 312 313 314 315 316 317 318
            s->stream.next_out = s->outbuf;
            if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) {
                s->z_err = Z_ERRNO;
                break;
            }
            s->stream.avail_out = Z_BUFSIZE;
        }
        s->z_err = deflate(&(s->stream), Z_NO_FLUSH);
        if (s->z_err != Z_OK) break;
M
Mark Adler 已提交
319 320 321
    }
    s->crc = crc32(s->crc, buf, len);

M
Mark Adler 已提交
322
    return (int)(len - s->stream.avail_in);
M
Mark Adler 已提交
323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
}

/* ===========================================================================
     Flushes all pending output into the compressed file. The parameter
   flush is as in the deflate() function.
     gzflush should be called only when strictly necessary because it can
   degrade compression.
*/
int gzflush (file, flush)
    gzFile file;
    int flush;
{
    uInt len;
    int done = 0;
    gz_stream *s = (gz_stream*)file;

    if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;

    s->stream.avail_in = 0; /* should be zero already anyway */

    for (;;) {
M
Mark Adler 已提交
344 345 346
        len = Z_BUFSIZE - s->stream.avail_out;

        if (len != 0) {
M
Mark Adler 已提交
347
            if (fwrite(s->outbuf, 1, len, s->file) != len) {
M
Mark Adler 已提交
348 349 350 351 352 353 354 355
                s->z_err = Z_ERRNO;
                return Z_ERRNO;
            }
            s->stream.next_out = s->outbuf;
            s->stream.avail_out = Z_BUFSIZE;
        }
        if (done) break;
        s->z_err = deflate(&(s->stream), flush);
M
Mark Adler 已提交
356

M
Mark Adler 已提交
357
        /* deflate has finished flushing only when it hasn't used up
M
Mark Adler 已提交
358 359
         * all the available space in the output buffer: 
         */
M
Mark Adler 已提交
360 361
        done = (s->stream.avail_out != 0 || s->z_err == Z_STREAM_END);
 
M
Mark Adler 已提交
362
        if (s->z_err != Z_OK && s->z_err != Z_STREAM_END) break;
M
Mark Adler 已提交
363
    }
M
Mark Adler 已提交
364
    fflush(s->file);
M
Mark Adler 已提交
365
    return  s->z_err == Z_STREAM_END ? Z_OK : s->z_err;
M
Mark Adler 已提交
366 367 368 369 370 371 372 373 374 375 376
}

/* ===========================================================================
   Outputs a long in LSB order to the given file
*/
local void putLong (file, x)
    FILE *file;
    uLong x;
{
    int n;
    for (n = 0; n < 4; n++) {
M
Mark Adler 已提交
377 378
        fputc((int)(x & 0xff), file);
        x >>= 8;
M
Mark Adler 已提交
379 380 381 382
    }
}

/* ===========================================================================
M
Mark Adler 已提交
383
   Reads a long in LSB order from the given buffer
M
Mark Adler 已提交
384
*/
M
Mark Adler 已提交
385 386
local uLong getLong (buf)
    Bytef *buf;
M
Mark Adler 已提交
387
{
M
Mark Adler 已提交
388 389 390 391 392 393 394
    uLong x = 0;
    Bytef *p = buf+4;

    do {
        x <<= 8;
        x |= *--p; 
    } while (p != buf);
M
Mark Adler 已提交
395 396 397 398 399 400 401 402 403 404
    return x;
}

/* ===========================================================================
     Flushes all pending output if necessary, closes the compressed file
   and deallocates all the (de)compression state.
*/
int gzclose (file)
    gzFile file;
{
M
Mark Adler 已提交
405
    uInt n;
M
Mark Adler 已提交
406
    int err;
M
Mark Adler 已提交
407 408 409 410 411
    gz_stream *s = (gz_stream*)file;

    if (s == NULL) return Z_STREAM_ERROR;

    if (s->mode == 'w') {
M
Mark Adler 已提交
412 413
        err = gzflush (file, Z_FINISH);
        if (err != Z_OK) return destroy(file);
M
Mark Adler 已提交
414

M
Mark Adler 已提交
415 416
        putLong (s->file, s->crc);
        putLong (s->file, s->stream.total_in);
M
Mark Adler 已提交
417

M
Mark Adler 已提交
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
    } else if (s->mode == 'r' && s->z_err == Z_STREAM_END) {

        /* slide CRC and original size if they are at the end of inbuf */
        if ((n = s->stream.avail_in) < 8  && !s->z_eof) {
            Byte *p = s->inbuf;
	    Bytef *q = s->stream.next_in;
            while (n--) { *p++ = *q++; };

            n = s->stream.avail_in;
            n += fread(p, 1, 8, s->file);
            s->stream.next_in = s->inbuf;
        }
        /* check CRC and original size */
        if (n < 8 ||
            getLong(s->stream.next_in) != s->crc ||
            getLong(s->stream.next_in + 4) != s->stream.total_out) {

            s->z_err = Z_DATA_ERROR;
        }
M
Mark Adler 已提交
437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
    }
    return destroy(file);
}

/* ===========================================================================
     Returns the error message for the last error which occured on the
   given compressed file. errnum is set to zlib error number. If an
   error occured in the file system and not in the compression library,
   errnum is set to Z_ERRNO and the application may consult errno
   to get the exact error code.
*/
char*  gzerror (file, errnum)
    gzFile file;
    int *errnum;
{
    char *m;
    gz_stream *s = (gz_stream*)file;

    if (s == NULL) {
M
Mark Adler 已提交
456
        *errnum = Z_STREAM_ERROR;
M
Mark Adler 已提交
457
        return z_errmsg[1-Z_STREAM_ERROR];
M
Mark Adler 已提交
458 459
    }
    *errnum = s->z_err;
M
Mark Adler 已提交
460
    if (*errnum == Z_OK) return "";
M
Mark Adler 已提交
461

M
Mark Adler 已提交
462
    m =  *errnum == Z_ERRNO ? zstrerror(errno) : s->stream.msg;
M
Mark Adler 已提交
463

M
Mark Adler 已提交
464
    if (m == NULL || *m == '\0') m = z_errmsg[1-s->z_err];
M
Mark Adler 已提交
465 466 467 468 469 470 471 472

    TRYFREE(s->msg);
    s->msg = (char*)ALLOC(strlen(s->path) + strlen(m) + 3);
    strcpy(s->msg, s->path);
    strcat(s->msg, ": ");
    strcat(s->msg, m);
    return s->msg;
}