1 |
/* inffast.c -- fast decoding |
2 |
* Copyright (C) 1995-2008 Mark Adler |
3 |
* For conditions of distribution and use, see copyright notice in zlib.h |
4 |
*/ |
5 |
|
6 |
#include "zutil.h" |
7 |
#include "inftrees.h" |
8 |
#include "inflate.h" |
9 |
#include "inffast.h" |
10 |
|
11 |
#ifndef ASMINF |
12 |
|
13 |
/* Allow machine dependent optimization for post-increment or pre-increment. |
14 |
Based on testing to date, |
15 |
Pre-increment preferred for: |
16 |
- PowerPC G3 (Adler) |
17 |
- MIPS R5000 (Randers-Pehrson) |
18 |
Post-increment preferred for: |
19 |
- none |
20 |
No measurable difference: |
21 |
- Pentium III (Anderson) |
22 |
- M68060 (Nikl) |
23 |
*/ |
24 |
#ifdef POSTINC |
25 |
# define OFF 0 |
26 |
# define PUP(a) *(a)++ |
27 |
#else |
28 |
# define OFF 1 |
29 |
# define PUP(a) *++(a) |
30 |
#endif |
31 |
|
32 |
/* |
33 |
Decode literal, length, and distance codes and write out the resulting |
34 |
literal and match bytes until either not enough input or output is |
35 |
available, an end-of-block is encountered, or a data error is encountered. |
36 |
When large enough input and output buffers are supplied to inflate(), for |
37 |
example, a 16K input buffer and a 64K output buffer, more than 95% of the |
38 |
inflate execution time is spent in this routine. |
39 |
|
40 |
Entry assumptions: |
41 |
|
42 |
state->mode == LEN |
43 |
strm->avail_in >= 6 |
44 |
strm->avail_out >= 258 |
45 |
start >= strm->avail_out |
46 |
state->bits < 8 |
47 |
|
48 |
On return, state->mode is one of: |
49 |
|
50 |
LEN -- ran out of enough output space or enough available input |
51 |
TYPE -- reached end of block code, inflate() to interpret next block |
52 |
BAD -- error in block data |
53 |
|
54 |
Notes: |
55 |
|
56 |
- The maximum input bits used by a length/distance pair is 15 bits for the |
57 |
length code, 5 bits for the length extra, 15 bits for the distance code, |
58 |
and 13 bits for the distance extra. This totals 48 bits, or six bytes. |
59 |
Therefore if strm->avail_in >= 6, then there is enough input to avoid |
60 |
checking for available input while decoding. |
61 |
|
62 |
- The maximum bytes that a single length/distance pair can output is 258 |
63 |
bytes, which is the maximum length that can be coded. inflate_fast() |
64 |
requires strm->avail_out >= 258 for each loop to avoid checking for |
65 |
output space. |
66 |
*/ |
67 |
void inflate_fast(strm, start) |
68 |
z_streamp strm; |
69 |
unsigned start; /* inflate()'s starting value for strm->avail_out */ |
70 |
{ |
71 |
struct inflate_state FAR *state; |
72 |
unsigned char FAR *in; /* local strm->next_in */ |
73 |
unsigned char FAR *last; /* while in < last, enough input available */ |
74 |
unsigned char FAR *out; /* local strm->next_out */ |
75 |
unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ |
76 |
unsigned char FAR *end; /* while out < end, enough space available */ |
77 |
#ifdef INFLATE_STRICT |
78 |
unsigned dmax; /* maximum distance from zlib header */ |
79 |
#endif |
80 |
unsigned wsize; /* window size or zero if not using window */ |
81 |
unsigned whave; /* valid bytes in the window */ |
82 |
unsigned wnext; /* window write index */ |
83 |
unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ |
84 |
unsigned long hold; /* local strm->hold */ |
85 |
unsigned bits; /* local strm->bits */ |
86 |
code const FAR *lcode; /* local strm->lencode */ |
87 |
code const FAR *dcode; /* local strm->distcode */ |
88 |
unsigned lmask; /* mask for first level of length codes */ |
89 |
unsigned dmask; /* mask for first level of distance codes */ |
90 |
code here; /* retrieved table entry */ |
91 |
unsigned op; /* code bits, operation, extra bits, or */ |
92 |
/* window position, window bytes to copy */ |
93 |
unsigned len; /* match length, unused bytes */ |
94 |
unsigned dist; /* match distance */ |
95 |
unsigned char FAR *from; /* where to copy match from */ |
96 |
|
97 |
/* copy state to local variables */ |
98 |
state = (struct inflate_state FAR *)strm->state; |
99 |
in = strm->next_in - OFF; |
100 |
last = in + (strm->avail_in - 5); |
101 |
out = strm->next_out - OFF; |
102 |
beg = out - (start - strm->avail_out); |
103 |
end = out + (strm->avail_out - 257); |
104 |
#ifdef INFLATE_STRICT |
105 |
dmax = state->dmax; |
106 |
#endif |
107 |
wsize = state->wsize; |
108 |
whave = state->whave; |
109 |
wnext = state->wnext; |
110 |
window = state->window; |
111 |
hold = state->hold; |
112 |
bits = state->bits; |
113 |
lcode = state->lencode; |
114 |
dcode = state->distcode; |
115 |
lmask = (1U << state->lenbits) - 1; |
116 |
dmask = (1U << state->distbits) - 1; |
117 |
|
118 |
/* decode literals and length/distances until end-of-block or not enough |
119 |
input data or output space */ |
120 |
do { |
121 |
if (bits < 15) { |
122 |
hold += (unsigned long)(PUP(in)) << bits; |
123 |
bits += 8; |
124 |
hold += (unsigned long)(PUP(in)) << bits; |
125 |
bits += 8; |
126 |
} |
127 |
here = lcode[hold & lmask]; |
128 |
dolen: |
129 |
op = (unsigned)(here.bits); |
130 |
hold >>= op; |
131 |
bits -= op; |
132 |
op = (unsigned)(here.op); |
133 |
if (op == 0) { /* literal */ |
134 |
Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? |
135 |
"inflate: literal '%c'\n" : |
136 |
"inflate: literal 0x%02x\n", here.val)); |
137 |
PUP(out) = (unsigned char)(here.val); |
138 |
} |
139 |
else if (op & 16) { /* length base */ |
140 |
len = (unsigned)(here.val); |
141 |
op &= 15; /* number of extra bits */ |
142 |
if (op) { |
143 |
if (bits < op) { |
144 |
hold += (unsigned long)(PUP(in)) << bits; |
145 |
bits += 8; |
146 |
} |
147 |
len += (unsigned)hold & ((1U << op) - 1); |
148 |
hold >>= op; |
149 |
bits -= op; |
150 |
} |
151 |
Tracevv((stderr, "inflate: length %u\n", len)); |
152 |
if (bits < 15) { |
153 |
hold += (unsigned long)(PUP(in)) << bits; |
154 |
bits += 8; |
155 |
hold += (unsigned long)(PUP(in)) << bits; |
156 |
bits += 8; |
157 |
} |
158 |
here = dcode[hold & dmask]; |
159 |
dodist: |
160 |
op = (unsigned)(here.bits); |
161 |
hold >>= op; |
162 |
bits -= op; |
163 |
op = (unsigned)(here.op); |
164 |
if (op & 16) { /* distance base */ |
165 |
dist = (unsigned)(here.val); |
166 |
op &= 15; /* number of extra bits */ |
167 |
if (bits < op) { |
168 |
hold += (unsigned long)(PUP(in)) << bits; |
169 |
bits += 8; |
170 |
if (bits < op) { |
171 |
hold += (unsigned long)(PUP(in)) << bits; |
172 |
bits += 8; |
173 |
} |
174 |
} |
175 |
dist += (unsigned)hold & ((1U << op) - 1); |
176 |
#ifdef INFLATE_STRICT |
177 |
if (dist > dmax) { |
178 |
strm->msg = (char *)"invalid distance too far back"; |
179 |
state->mode = BAD; |
180 |
break; |
181 |
} |
182 |
#endif |
183 |
hold >>= op; |
184 |
bits -= op; |
185 |
Tracevv((stderr, "inflate: distance %u\n", dist)); |
186 |
op = (unsigned)(out - beg); /* max distance in output */ |
187 |
if (dist > op) { /* see if copy from window */ |
188 |
op = dist - op; /* distance back in window */ |
189 |
if (op > whave) { |
190 |
if (state->sane) { |
191 |
strm->msg = |
192 |
(char *)"invalid distance too far back"; |
193 |
state->mode = BAD; |
194 |
break; |
195 |
} |
196 |
#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR |
197 |
if (len <= op - whave) { |
198 |
do { |
199 |
PUP(out) = 0; |
200 |
} while (--len); |
201 |
continue; |
202 |
} |
203 |
len -= op - whave; |
204 |
do { |
205 |
PUP(out) = 0; |
206 |
} while (--op > whave); |
207 |
if (op == 0) { |
208 |
from = out - dist; |
209 |
do { |
210 |
PUP(out) = PUP(from); |
211 |
} while (--len); |
212 |
continue; |
213 |
} |
214 |
#endif |
215 |
} |
216 |
from = window - OFF; |
217 |
if (wnext == 0) { /* very common case */ |
218 |
from += wsize - op; |
219 |
if (op < len) { /* some from window */ |
220 |
len -= op; |
221 |
do { |
222 |
PUP(out) = PUP(from); |
223 |
} while (--op); |
224 |
from = out - dist; /* rest from output */ |
225 |
} |
226 |
} |
227 |
else if (wnext < op) { /* wrap around window */ |
228 |
from += wsize + wnext - op; |
229 |
op -= wnext; |
230 |
if (op < len) { /* some from end of window */ |
231 |
len -= op; |
232 |
do { |
233 |
PUP(out) = PUP(from); |
234 |
} while (--op); |
235 |
from = window - OFF; |
236 |
if (wnext < len) { /* some from start of window */ |
237 |
op = wnext; |
238 |
len -= op; |
239 |
do { |
240 |
PUP(out) = PUP(from); |
241 |
} while (--op); |
242 |
from = out - dist; /* rest from output */ |
243 |
} |
244 |
} |
245 |
} |
246 |
else { /* contiguous in window */ |
247 |
from += wnext - op; |
248 |
if (op < len) { /* some from window */ |
249 |
len -= op; |
250 |
do { |
251 |
PUP(out) = PUP(from); |
252 |
} while (--op); |
253 |
from = out - dist; /* rest from output */ |
254 |
} |
255 |
} |
256 |
while (len > 2) { |
257 |
PUP(out) = PUP(from); |
258 |
PUP(out) = PUP(from); |
259 |
PUP(out) = PUP(from); |
260 |
len -= 3; |
261 |
} |
262 |
if (len) { |
263 |
PUP(out) = PUP(from); |
264 |
if (len > 1) |
265 |
PUP(out) = PUP(from); |
266 |
} |
267 |
} |
268 |
else { |
269 |
from = out - dist; /* copy direct from output */ |
270 |
do { /* minimum length is three */ |
271 |
PUP(out) = PUP(from); |
272 |
PUP(out) = PUP(from); |
273 |
PUP(out) = PUP(from); |
274 |
len -= 3; |
275 |
} while (len > 2); |
276 |
if (len) { |
277 |
PUP(out) = PUP(from); |
278 |
if (len > 1) |
279 |
PUP(out) = PUP(from); |
280 |
} |
281 |
} |
282 |
} |
283 |
else if ((op & 64) == 0) { /* 2nd level distance code */ |
284 |
here = dcode[here.val + (hold & ((1U << op) - 1))]; |
285 |
goto dodist; |
286 |
} |
287 |
else { |
288 |
strm->msg = (char *)"invalid distance code"; |
289 |
state->mode = BAD; |
290 |
break; |
291 |
} |
292 |
} |
293 |
else if ((op & 64) == 0) { /* 2nd level length code */ |
294 |
here = lcode[here.val + (hold & ((1U << op) - 1))]; |
295 |
goto dolen; |
296 |
} |
297 |
else if (op & 32) { /* end-of-block */ |
298 |
Tracevv((stderr, "inflate: end of block\n")); |
299 |
state->mode = TYPE; |
300 |
break; |
301 |
} |
302 |
else { |
303 |
strm->msg = (char *)"invalid literal/length code"; |
304 |
state->mode = BAD; |
305 |
break; |
306 |
} |
307 |
} while (in < last && out < end); |
308 |
|
309 |
/* return unused bytes (on entry, bits < 8, so in won't go too far back) */ |
310 |
len = bits >> 3; |
311 |
in -= len; |
312 |
bits -= len << 3; |
313 |
hold &= (1U << bits) - 1; |
314 |
|
315 |
/* update state and return */ |
316 |
strm->next_in = in + OFF; |
317 |
strm->next_out = out + OFF; |
318 |
strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); |
319 |
strm->avail_out = (unsigned)(out < end ? |
320 |
257 + (end - out) : 257 - (out - end)); |
321 |
state->hold = hold; |
322 |
state->bits = bits; |
323 |
return; |
324 |
} |
325 |
|
326 |
/* |
327 |
inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): |
328 |
- Using bit fields for code structure |
329 |
- Different op definition to avoid & for extra bits (do & for table bits) |
330 |
- Three separate decoding do-loops for direct, window, and wnext == 0 |
331 |
- Special case for distance > 1 copies to do overlapped load and store copy |
332 |
- Explicit branch predictions (based on measured branch probabilities) |
333 |
- Deferring match copy and interspersed it with decoding subsequent codes |
334 |
- Swapping literal/length else |
335 |
- Swapping window/direct else |
336 |
- Larger unrolled copy loops (three is about right) |
337 |
- Moving len -= 3 statement into middle of loop |
338 |
*/ |
339 |
|
340 |
#endif /* !ASMINF */ |