/* inffast.c -- fast decoding * Copyright (C) 1995-2004 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h * * Modified March 2005 by John Reiser for little-endian, ~0, tail merge, * out-of-line error cases, nuanced comments about copying from window. * Copyright 2005 BitWagon Software LLC. Original was from zlib-1.2.2. * For conditions of distribution and use, see copyright notice in zlib.h */ #include "zutil.h" #include "inftrees.h" #include "inflate.h" #include "inffast.h" #ifndef ASMINF /* Little-endian memory has a speed advantage of about 1%. */ static int const INFFAST_LITTLE = 1; /* Rely on constant propagation. */ /* If your compiler doesn't propagate the "static int const", * then replace with "#define INFFAST_LITTLE 1". * Little-endian also may benefit from "#define POSTINC 1". */ /* Allow machine dependent optimization for post-increment or pre-increment. Based on testing to date, Pre-increment preferred for: - PowerPC G3 (Adler) - MIPS R5000 (Randers-Pehrson) Post-increment preferred for: - none No measurable difference: - Pentium III (Anderson) - M68060 (Nikl) */ #ifdef POSTINC # define OFF 0 # define PUP(a) *(a)++ #else # define OFF 1 # define PUP(a) *++(a) #endif /* Decode literal, length, and distance codes and write out the resulting literal and match bytes until either not enough input or output is available, an end-of-block is encountered, or a data error is encountered. When large enough input and output buffers are supplied to inflate(), for example, a 16K input buffer and a 64K output buffer, more than 95% of the inflate execution time is spent in this routine. Entry assumptions: state->mode == LEN strm->avail_in >= 6 strm->avail_out >= 258 start >= strm->avail_out state->bits < 8 On return, state->mode is one of: LEN -- ran out of enough output space or enough available input TYPE -- reached end of block code, inflate() to interpret next block BAD -- error in block data Notes: - The maximum input bits used by a length/distance pair is 15 bits for the length code, 5 bits for the length extra, 15 bits for the distance code, and 13 bits for the distance extra. This totals 48 bits, or six bytes. Therefore if strm->avail_in >= 6, then there is enough input to avoid checking for available input while decoding. - The maximum bytes that a single length/distance pair can output is 258 bytes, which is the maximum length that can be coded. inflate_fast() requires strm->avail_out >= 258 for each loop to avoid checking for output space. */ #define COPY( dst,src,len) copy(dst, src, len) #define COPY_TAIL(dst,back,len) copy_tail(dst, back, len) #define COPY_INIT(dst) copy_init(dst) #define COPY_FINI(dst) copy_fini(dst) #define COPY_PUTC(dst,c) copy_putc(dst, c) static unsigned char FAR * copy(unsigned char FAR *dst, unsigned char FAR *src, unsigned len) { --len; goto middle; do { PUP(dst) = PUP(src); len-=3; PUP(dst) = PUP(src); middle: PUP(dst) = PUP(src); } while (2avail_out */ { /* Copy state to local variables. Use 'const' as much as possible * for clarity and to help the compiler. */ struct inflate_state FAR *const state = (struct inflate_state FAR *)strm->state; unsigned char FAR *in /* local strm->next_in */ = strm->next_in - OFF; unsigned char FAR *const last /* while in < last, enough input available */ = in + (strm->avail_in - 5); unsigned char FAR *out /* local strm->next_out */ = strm->next_out - OFF; unsigned char FAR *const beg /* inflate()'s initial strm->next_out */ = out - (start - strm->avail_out); unsigned char FAR *const end /* while out < end, enough space available */ = out + (strm->avail_out - 257); unsigned const wsize /* window size or zero if not using window */ = state->wsize; unsigned const whave /* valid bytes in the window */ = state->whave; unsigned const write /* window write index */ = state->write; unsigned char FAR *const window /* allocated sliding window, if wsize != 0 */ = state->window; unsigned long hold /* local strm->hold */ = state->hold; unsigned bits /* local strm->bits */ = state->bits; code const FAR *const lcode /* local strm->lencode */ = state->lencode; code const FAR *const dcode /* local strm->distcode */ = state->distcode; unsigned const lmask /* mask for first level of length codes */ = ~(~0U << state->lenbits); unsigned const dmask /* mask for first level of distance codes */ = ~(~0U << state->distbits); code this; /* retrieved table entry */ unsigned op; /* code bits, operation, extra bits, or */ /* window position, window bytes to copy */ char const *errmsg; out = COPY_INIT(out); /* From now on, use "strm->state", to free up a register. We also want strm not in a register during the loop, but gcc 3.4.1 uses one anyway. */ if (INFFAST_LITTLE && (1& (int)(OFF + in))) { hold += (unsigned long)*(OFF + in) << bits; in += 1; bits += 8; } /* decode literals and length/distances until end-of-block or not enough input data or output space */ do { if (bits < 15) { if (INFFAST_LITTLE) { hold += (unsigned long)*(unsigned short *)(OFF + in) << bits; in += 2; bits += 16; } else { hold += (unsigned long)(PUP(in)) << bits; bits += 8; hold += (unsigned long)(PUP(in)) << bits; bits += 8; } } this = lcode[hold & lmask]; dolen: op = (unsigned)(this.bits); hold >>= op; bits -= op; op = (unsigned)(this.op); if (op == 0) { /* literal */ Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? "inflate: literal '%c'\n" : "inflate: literal 0x%02x\n", this.val)); out= COPY_PUTC(out, (unsigned char)this.val); } else if (op & 16) { /* length base */ unsigned len = (unsigned)(this.val); op &= 15; /* number of extra bits */ if (op) { if (bits < op) { if (INFFAST_LITTLE) { hold += (unsigned long)*(unsigned short *)(OFF + in) << bits; in += 2; bits += 16; } else { hold += (unsigned long)(PUP(in)) << bits; bits += 8; } } len += (unsigned)hold &~ (~0U << op); hold >>= op; bits -= op; } Tracevv((stderr, "inflate: length %u\n", len)); if (bits < 15) { if (INFFAST_LITTLE) { hold += (unsigned long)*(unsigned short *)(OFF + in) << bits; in += 2; bits += 16; } else { hold += (unsigned long)(PUP(in)) << bits; bits += 8; hold += (unsigned long)(PUP(in)) << bits; bits += 8; } } this = dcode[hold & dmask]; dodist: op = (unsigned)(this.bits); hold >>= op; bits -= op; op = (unsigned)(this.op); if (op & 16) { /* distance base */ unsigned dist = (unsigned)(this.val); op &= 15; /* number of extra bits */ if (bits < op) { if (INFFAST_LITTLE) { hold += (unsigned long)*(unsigned short *)(OFF + in) << bits; in += 2; bits += 16; } else { hold += (unsigned long)(PUP(in)) << bits; bits += 8; if (bits < op) { hold += (unsigned long)(PUP(in)) << bits; bits += 8; } } } dist += (unsigned)hold &~ (~0U << op); hold >>= op; bits -= op; Tracevv((stderr, "inflate: distance %u\n", dist)); op = (unsigned)(out - beg); /* max distance in output */ if (dist > op) { /* see if copy from window */ unsigned char FAR *from; op = dist - op; /* distance back in window */ if (op > whave) { errmsg= "invalid distance too far back"; goto error; } from = window - OFF; if (write == 0) { /* common: full or dictionary */ from += wsize - op; goto merge; /* not all from window */ } else if (write < op) { /* wrap around window */ from += wsize + write - op; op -= write; if (op < len) { /* not all from end of window */ len -= op; out= COPY(out, from, op); op = write; /* enlarge tail merge for compiler */ from = window - OFF; goto merge; /* some from start of window */ } } else { /* contiguous in window */ from += write - op; merge: if (op < len) { /* not all from window */ len -= op; out= COPY(out, from, op); goto tail; /* rest direct from output */ } } out= COPY(out, from, len); /* all from window */ } else { /* copy direct from output */ tail: out= COPY_TAIL(out, dist, len); } } else if ((op & 64) == 0) { /* 2nd level distance code */ this = dcode[this.val + (hold &~ (~0U << op))]; goto dodist; } else { errmsg= "invalid distance code"; goto error; } } else if ((op & 64) == 0) { /* 2nd level length code */ this = lcode[this.val + (hold &~ (~0U << op))]; goto dolen; } else if (op & 32) { /* end-of-block */ goto end_of_block; } else { errmsg= "invalid literal/length code"; goto error; } } while (in < last && out < end); no_err: /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ in -= bits >> 3; ((struct inflate_state FAR *)(strm->state))->bits = bits &= ~(~0U << 3); ((struct inflate_state FAR *)(strm->state))->hold = hold & ~(~0U << bits); /* update state and return */ strm->next_in = in + OFF; strm->next_out = COPY_FINI(out + OFF); /* There is no circular buffer. Commutative, associative, distributive rules. * strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); * strm->avail_out = (unsigned)(out < end ? * 257 + (end - out) : 257 - (out - end)); */ strm->avail_in = 5 + (last - in); strm->avail_out = 257 + (end - out); return; /* Put error and unusual cases completely outside the main loop. */ error: strm->msg = (char *)errmsg; ((struct inflate_state FAR *)(strm->state))->mode = BAD; goto no_err; end_of_block: Tracevv((stderr, "inflate: end of block\n")); ((struct inflate_state FAR *)(strm->state))->mode = TYPE; goto no_err; } /* inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): - Using bit fields for code structure - Different op definition to avoid & for extra bits (do & for table bits) - Three separate decoding do-loops for direct, window, and write == 0 - Special case for distance > 1 copies to do overlapped load and store copy - Explicit branch predictions (based on measured branch probabilities) - Deferring match copy and interspersed it with decoding subsequent codes - Swapping literal/length else - Swapping window/direct else - Larger unrolled copy loops (three is about right) - Moving len -= 3 statement into middle of loop */ #endif /* !ASMINF */