deflate.h 12.8 KB
Newer Older
M
Mark Adler 已提交
1
/* deflate.h -- internal compression state
M
Mark Adler 已提交
2
 * Copyright (C) 1995-2016 Jean-loup Gailly
M
Mark Adler 已提交
3
 * For conditions of distribution and use, see copyright notice in zlib.h
M
Mark Adler 已提交
4 5 6 7 8 9 10
 */

/* WARNING: this file should *not* be used by applications. It is
   part of the implementation of the compression library and is
   subject to change. Applications should only use zlib.h.
 */

M
Mark Adler 已提交
11
/* @(#) $Id$ */
M
Mark Adler 已提交
12

M
Mark Adler 已提交
13 14
#ifndef DEFLATE_H
#define DEFLATE_H
M
Mark Adler 已提交
15 16 17

#include "zutil.h"

M
Mark Adler 已提交
18 19 20 21 22 23 24 25
/* define NO_GZIP when compiling if you want to disable gzip header and
   trailer creation by deflate().  NO_GZIP would be used to avoid linking in
   the crc code when it is not needed.  For shared libraries, gzip encoding
   should be left enabled. */
#ifndef NO_GZIP
#  define GZIP
#endif

M
Mark Adler 已提交
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
/* ===========================================================================
 * Internal compression state.
 */

#define LENGTH_CODES 29
/* number of length codes, not counting the special END_BLOCK code */

#define LITERALS  256
/* number of literal bytes 0..255 */

#define L_CODES (LITERALS+1+LENGTH_CODES)
/* number of Literal or Length codes, including the END_BLOCK code */

#define D_CODES   30
/* number of distance codes */

#define BL_CODES  19
/* number of codes used to transfer the bit lengths */

#define HEAP_SIZE (2*L_CODES+1)
/* maximum heap size */

#define MAX_BITS 15
/* All codes must not exceed MAX_BITS bits */

51 52 53
#define Buf_size 16
/* size of bit buffer in bi_buf */

54 55 56 57 58 59 60 61 62 63
#define INIT_STATE    42    /* zlib header -> BUSY_STATE */
#ifdef GZIP
#  define GZIP_STATE  57    /* gzip header -> BUSY_STATE | EXTRA_STATE */
#endif
#define EXTRA_STATE   69    /* gzip extra block -> NAME_STATE */
#define NAME_STATE    73    /* gzip file name -> COMMENT_STATE */
#define COMMENT_STATE 91    /* gzip comment -> HCRC_STATE */
#define HCRC_STATE   103    /* gzip header CRC -> BUSY_STATE */
#define BUSY_STATE   113    /* deflate -> FINISH_STATE */
#define FINISH_STATE 666    /* stream complete */
M
Mark Adler 已提交
64 65 66 67 68 69 70 71 72 73 74 75 76
/* Stream status */


/* Data structure describing a single value and its code string. */
typedef struct ct_data_s {
    union {
        ush  freq;       /* frequency count */
        ush  code;       /* bit string */
    } fc;
    union {
        ush  dad;        /* father node in Huffman tree */
        ush  len;        /* length of bit string */
    } dl;
M
Mark Adler 已提交
77
} FAR ct_data;
M
Mark Adler 已提交
78 79 80 81 82 83 84 85 86 87 88

#define Freq fc.freq
#define Code fc.code
#define Dad  dl.dad
#define Len  dl.len

typedef struct static_tree_desc_s  static_tree_desc;

typedef struct tree_desc_s {
    ct_data *dyn_tree;           /* the dynamic tree */
    int     max_code;            /* largest code with non zero frequency */
89
    const static_tree_desc *stat_desc;  /* the corresponding static tree */
M
Mark Adler 已提交
90
} FAR tree_desc;
M
Mark Adler 已提交
91 92

typedef ush Pos;
M
Mark Adler 已提交
93
typedef Pos FAR Posf;
M
Mark Adler 已提交
94
typedef unsigned IPos;
M
Mark Adler 已提交
95

M
Mark Adler 已提交
96 97 98 99 100
/* A Pos is an index in the character window. We use short instead of int to
 * save space in the various tables. IPos is used only for parameter passing.
 */

typedef struct internal_state {
M
Mark Adler 已提交
101
    z_streamp strm;      /* pointer back to this zlib stream */
M
Mark Adler 已提交
102
    int   status;        /* as the name implies */
M
Mark Adler 已提交
103
    Bytef *pending_buf;  /* output still pending */
M
Mark Adler 已提交
104
    ulg   pending_buf_size; /* size of pending_buf */
M
Mark Adler 已提交
105
    Bytef *pending_out;  /* next pending byte to output to the stream */
106
    ulg   pending;       /* nb of bytes in the pending buffer */
M
Mark Adler 已提交
107
    int   wrap;          /* bit 0 true for zlib, bit 1 true for gzip */
M
Mark Adler 已提交
108
    gz_headerp  gzhead;  /* gzip header information to write */
109
    ulg   gzindex;       /* where in extra, name, or comment */
M
Mark Adler 已提交
110
    Byte  method;        /* can only be DEFLATED */
M
Mark Adler 已提交
111
    int   last_flush;    /* value of flush param for previous deflate call */
M
Mark Adler 已提交
112

M
Mark Adler 已提交
113
                /* used by deflate.c: */
M
Mark Adler 已提交
114 115 116 117 118

    uInt  w_size;        /* LZ77 window size (32K by default) */
    uInt  w_bits;        /* log2(w_size)  (8..16) */
    uInt  w_mask;        /* w_size - 1 */

M
Mark Adler 已提交
119
    Bytef *window;
M
Mark Adler 已提交
120 121 122 123 124 125 126 127 128 129 130 131 132 133
    /* Sliding window. Input bytes are read into the second half of the window,
     * and move to the first half later to keep a dictionary of at least wSize
     * bytes. With this organization, matches are limited to a distance of
     * wSize-MAX_MATCH bytes, but this ensures that IO is always
     * performed with a length multiple of the block size. Also, it limits
     * the window size to 64K, which is quite useful on MSDOS.
     * To do: use the user input buffer as sliding window.
     */

    ulg window_size;
    /* Actual size of window: 2*wSize, except when the user input buffer
     * is directly used as sliding window.
     */

M
Mark Adler 已提交
134
    Posf *prev;
M
Mark Adler 已提交
135 136 137 138 139
    /* Link to older string with same hash index. To limit the size of this
     * array to 64K, this link is maintained only for the last 32K strings.
     * An index in this array is thus a window index modulo 32K.
     */

M
Mark Adler 已提交
140
    Posf *head; /* Heads of the hash chains or NIL. */
M
Mark Adler 已提交
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193

    uInt  ins_h;          /* hash index of string to be inserted */
    uInt  hash_size;      /* number of elements in hash table */
    uInt  hash_bits;      /* log2(hash_size) */
    uInt  hash_mask;      /* hash_size-1 */

    uInt  hash_shift;
    /* Number of bits by which ins_h must be shifted at each input
     * step. It must be such that after MIN_MATCH steps, the oldest
     * byte no longer takes part in the hash key, that is:
     *   hash_shift * MIN_MATCH >= hash_bits
     */

    long block_start;
    /* Window position at the beginning of the current output block. Gets
     * negative when the window is moved backwards.
     */

    uInt match_length;           /* length of best match */
    IPos prev_match;             /* previous match */
    int match_available;         /* set if previous match exists */
    uInt strstart;               /* start of string to insert */
    uInt match_start;            /* start of matching string */
    uInt lookahead;              /* number of valid bytes ahead in window */

    uInt prev_length;
    /* Length of the best match at previous step. Matches not greater than this
     * are discarded. This is used in the lazy match evaluation.
     */

    uInt max_chain_length;
    /* To speed up deflation, hash chains are never searched beyond this
     * length.  A higher limit improves compression ratio but degrades the
     * speed.
     */

    uInt max_lazy_match;
    /* Attempt to find a better match only when the current match is strictly
     * smaller than this value. This mechanism is used only for compression
     * levels >= 4.
     */
#   define max_insert_length  max_lazy_match
    /* Insert new strings in the hash table only if the match length is not
     * greater than this length. This saves time but degrades compression.
     * max_insert_length is used only for compression levels <= 3.
     */

    int level;    /* compression level (1..9) */
    int strategy; /* favor or force Huffman coding*/

    uInt good_match;
    /* Use a faster search when the previous match is longer than this */

M
Mark Adler 已提交
194
    int nice_match; /* Stop searching when current match exceeds this */
M
Mark Adler 已提交
195

M
Mark Adler 已提交
196
                /* used by trees.c: */
M
Mark Adler 已提交
197
    /* Didn't use ct_data typedef below to suppress compiler warning */
M
Mark Adler 已提交
198 199 200
    struct ct_data_s dyn_ltree[HEAP_SIZE];   /* literal and length tree */
    struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
    struct ct_data_s bl_tree[2*BL_CODES+1];  /* Huffman tree for bit lengths */
M
Mark Adler 已提交
201

M
Mark Adler 已提交
202 203 204
    struct tree_desc_s l_desc;               /* desc. for literal tree */
    struct tree_desc_s d_desc;               /* desc. for distance tree */
    struct tree_desc_s bl_desc;              /* desc. for bit length tree */
M
Mark Adler 已提交
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219

    ush bl_count[MAX_BITS+1];
    /* number of codes at each bit length for an optimal tree */

    int heap[2*L_CODES+1];      /* heap used to build the Huffman trees */
    int heap_len;               /* number of elements in the heap */
    int heap_max;               /* element of largest frequency */
    /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
     * The same heap array is used to build all trees.
     */

    uch depth[2*L_CODES+1];
    /* Depth of each subtree used as tie breaker for trees of equal frequency
     */

M
Mark Adler 已提交
220
    uchf *l_buf;          /* buffer for literals or lengths */
M
Mark Adler 已提交
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243

    uInt  lit_bufsize;
    /* Size of match buffer for literals/lengths.  There are 4 reasons for
     * limiting lit_bufsize to 64K:
     *   - frequencies can be kept in 16 bit counters
     *   - if compression is not successful for the first block, all input
     *     data is still in the window so we can still emit a stored block even
     *     when input comes from standard input.  (This can also be done for
     *     all blocks if lit_bufsize is not greater than 32K.)
     *   - if compression is not successful for a file smaller than 64K, we can
     *     even emit a stored file instead of a stored block (saving 5 bytes).
     *     This is applicable only for zip (not gzip or zlib).
     *   - creating new Huffman trees less frequently may not provide fast
     *     adaptation to changes in the input data statistics. (Take for
     *     example a binary file with poorly compressible code followed by
     *     a highly compressible string table.) Smaller buffer sizes give
     *     fast adaptation but have of course the overhead of transmitting
     *     trees more frequently.
     *   - I can't count above 4
     */

    uInt last_lit;      /* running index in l_buf */

M
Mark Adler 已提交
244
    ushf *d_buf;
M
Mark Adler 已提交
245 246 247 248 249 250 251 252
    /* Buffer for distances. To simplify the code, d_buf and l_buf have
     * the same number of elements. To use different lengths, an extra flag
     * array would be necessary.
     */

    ulg opt_len;        /* bit length of current block with optimal trees */
    ulg static_len;     /* bit length of current block with static trees */
    uInt matches;       /* number of string matches in current block */
253
    uInt insert;        /* bytes at end of window left to insert */
M
Mark Adler 已提交
254

255
#ifdef ZLIB_DEBUG
M
Mark Adler 已提交
256 257
    ulg compressed_len; /* total bit length of compressed file mod 2^32 */
    ulg bits_sent;      /* bit length of compressed data sent mod 2^32 */
M
Mark Adler 已提交
258 259 260 261 262 263 264 265 266 267 268
#endif

    ush bi_buf;
    /* Output buffer. bits are inserted starting at the bottom (least
     * significant bits).
     */
    int bi_valid;
    /* Number of valid bits in bi_buf.  All bits above the last valid bit
     * are always zero.
     */

M
Mark Adler 已提交
269 270 271 272 273 274 275
    ulg high_water;
    /* High water mark offset in window for initialized bytes -- bytes above
     * this are set to zero in order to avoid memory check warnings when
     * longest match routines access bytes past the input.  This is then
     * updated to the new high water mark.
     */

M
Mark Adler 已提交
276
} FAR deflate_state;
M
Mark Adler 已提交
277 278 279 280

/* Output a byte on the stream.
 * IN assertion: there is enough room in pending_buf.
 */
M
Mark Adler 已提交
281
#define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);}
M
Mark Adler 已提交
282 283 284 285 286 287 288 289 290 291 292 293


#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
/* Minimum amount of lookahead, except at the end of the input file.
 * See deflate.c for comments about the MIN_MATCH+1.
 */

#define MAX_DIST(s)  ((s)->w_size-MIN_LOOKAHEAD)
/* In order to simplify the code, particularly on 16 bit machines, match
 * distances are limited to MAX_DIST instead of WSIZE.
 */

M
Mark Adler 已提交
294 295 296 297
#define WIN_INIT MAX_MATCH
/* Number of bytes after end of data in window to initialize in order to avoid
   memory checker errors from longest match routines */

M
Mark Adler 已提交
298
        /* in trees.c */
M
Mark Adler 已提交
299 300 301 302
void ZLIB_INTERNAL _tr_init OF((deflate_state *s));
int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc));
void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf,
                        ulg stored_len, int last));
303
void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s));
M
Mark Adler 已提交
304 305 306
void ZLIB_INTERNAL _tr_align OF((deflate_state *s));
void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
                        ulg stored_len, int last));
M
Mark Adler 已提交
307 308 309 310 311 312 313 314

#define d_code(dist) \
   ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
/* Mapping from a distance to a distance code. dist is the distance - 1 and
 * must not have side effects. _dist_code[256] and _dist_code[257] are never
 * used.
 */

315
#ifndef ZLIB_DEBUG
M
Mark Adler 已提交
316 317 318
/* Inline versions of _tr_tally for speed: */

#if defined(GEN_TREES_H) || !defined(STDC)
M
Mark Adler 已提交
319 320
  extern uch ZLIB_INTERNAL _length_code[];
  extern uch ZLIB_INTERNAL _dist_code[];
M
Mark Adler 已提交
321
#else
M
Mark Adler 已提交
322 323
  extern const uch ZLIB_INTERNAL _length_code[];
  extern const uch ZLIB_INTERNAL _dist_code[];
M
Mark Adler 已提交
324 325 326 327 328 329 330 331 332 333
#endif

# define _tr_tally_lit(s, c, flush) \
  { uch cc = (c); \
    s->d_buf[s->last_lit] = 0; \
    s->l_buf[s->last_lit++] = cc; \
    s->dyn_ltree[cc].Freq++; \
    flush = (s->last_lit == s->lit_bufsize-1); \
   }
# define _tr_tally_dist(s, distance, length, flush) \
M
Mark Adler 已提交
334 335
  { uch len = (uch)(length); \
    ush dist = (ush)(distance); \
M
Mark Adler 已提交
336 337 338 339 340 341 342 343
    s->d_buf[s->last_lit] = dist; \
    s->l_buf[s->last_lit++] = len; \
    dist--; \
    s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
    s->dyn_dtree[d_code(dist)].Freq++; \
    flush = (s->last_lit == s->lit_bufsize-1); \
  }
#else
M
Mark Adler 已提交
344
# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
M
Mark Adler 已提交
345
# define _tr_tally_dist(s, distance, length, flush) \
M
Mark Adler 已提交
346
              flush = _tr_tally(s, distance, length)
M
Mark Adler 已提交
347 348
#endif

M
Mark Adler 已提交
349
#endif /* DEFLATE_H */