/[pcsx2_0.9.7]/trunk/3rdparty/bzip2/blocksort.c
ViewVC logotype

Annotation of /trunk/3rdparty/bzip2/blocksort.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10 - (hide annotations) (download)
Mon Sep 6 11:40:06 2010 UTC (9 years, 5 months ago) by william
File MIME type: text/plain
File size: 31718 byte(s)
exported r3113 from ./upstream/trunk
1 william 10
2     /*-------------------------------------------------------------*/
3     /*--- Block sorting machinery ---*/
4     /*--- blocksort.c ---*/
5     /*-------------------------------------------------------------*/
6    
7     /* ------------------------------------------------------------------
8     This file is part of bzip2/libbzip2, a program and library for
9     lossless, block-sorting data compression.
10    
11     bzip2/libbzip2 version 1.0.4 of 20 December 2006
12     Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
13    
14     Please read the WARNING, DISCLAIMER and PATENTS sections in the
15     README file.
16    
17     This program is released under the terms of the license contained
18     in the file LICENSE.
19     ------------------------------------------------------------------ */
20    
21    
22     #include "bzlib_private.h"
23    
24     /*---------------------------------------------*/
25     /*--- Fallback O(N log(N)^2) sorting ---*/
26     /*--- algorithm, for repetitive blocks ---*/
27     /*---------------------------------------------*/
28    
29     /*---------------------------------------------*/
30     static
31     __inline__
32     void fallbackSimpleSort ( UInt32* fmap,
33     UInt32* eclass,
34     Int32 lo,
35     Int32 hi )
36     {
37     Int32 i, j, tmp;
38     UInt32 ec_tmp;
39    
40     if (lo == hi) return;
41    
42     if (hi - lo > 3) {
43     for ( i = hi-4; i >= lo; i-- ) {
44     tmp = fmap[i];
45     ec_tmp = eclass[tmp];
46     for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
47     fmap[j-4] = fmap[j];
48     fmap[j-4] = tmp;
49     }
50     }
51    
52     for ( i = hi-1; i >= lo; i-- ) {
53     tmp = fmap[i];
54     ec_tmp = eclass[tmp];
55     for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
56     fmap[j-1] = fmap[j];
57     fmap[j-1] = tmp;
58     }
59     }
60    
61    
62     /*---------------------------------------------*/
63     #define fswap(zz1, zz2) \
64     { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
65    
66     #define fvswap(zzp1, zzp2, zzn) \
67     { \
68     Int32 yyp1 = (zzp1); \
69     Int32 yyp2 = (zzp2); \
70     Int32 yyn = (zzn); \
71     while (yyn > 0) { \
72     fswap(fmap[yyp1], fmap[yyp2]); \
73     yyp1++; yyp2++; yyn--; \
74     } \
75     }
76    
77    
78     #define fmin(a,b) ((a) < (b)) ? (a) : (b)
79    
80     #define fpush(lz,hz) { stackLo[sp] = lz; \
81     stackHi[sp] = hz; \
82     sp++; }
83    
84     #define fpop(lz,hz) { sp--; \
85     lz = stackLo[sp]; \
86     hz = stackHi[sp]; }
87    
88     #define FALLBACK_QSORT_SMALL_THRESH 10
89     #define FALLBACK_QSORT_STACK_SIZE 100
90    
91    
92     static
93     void fallbackQSort3 ( UInt32* fmap,
94     UInt32* eclass,
95     Int32 loSt,
96     Int32 hiSt )
97     {
98     Int32 unLo, unHi, ltLo, gtHi, n, m;
99     Int32 sp, lo, hi;
100     UInt32 med, r, r3;
101     Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
102     Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
103    
104     r = 0;
105    
106     sp = 0;
107     fpush ( loSt, hiSt );
108    
109     while (sp > 0) {
110    
111     AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
112    
113     fpop ( lo, hi );
114     if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
115     fallbackSimpleSort ( fmap, eclass, lo, hi );
116     continue;
117     }
118    
119     /* Random partitioning. Median of 3 sometimes fails to
120     avoid bad cases. Median of 9 seems to help but
121     looks rather expensive. This too seems to work but
122     is cheaper. Guidance for the magic constants
123     7621 and 32768 is taken from Sedgewick's algorithms
124     book, chapter 35.
125     */
126     r = ((r * 7621) + 1) % 32768;
127     r3 = r % 3;
128     if (r3 == 0) med = eclass[fmap[lo]]; else
129     if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
130     med = eclass[fmap[hi]];
131    
132     unLo = ltLo = lo;
133     unHi = gtHi = hi;
134    
135     while (1) {
136     while (1) {
137     if (unLo > unHi) break;
138     n = (Int32)eclass[fmap[unLo]] - (Int32)med;
139     if (n == 0) {
140     fswap(fmap[unLo], fmap[ltLo]);
141     ltLo++; unLo++;
142     continue;
143     };
144     if (n > 0) break;
145     unLo++;
146     }
147     while (1) {
148     if (unLo > unHi) break;
149     n = (Int32)eclass[fmap[unHi]] - (Int32)med;
150     if (n == 0) {
151     fswap(fmap[unHi], fmap[gtHi]);
152     gtHi--; unHi--;
153     continue;
154     };
155     if (n < 0) break;
156     unHi--;
157     }
158     if (unLo > unHi) break;
159     fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
160     }
161    
162     AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
163    
164     if (gtHi < ltLo) continue;
165    
166     n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
167     m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
168    
169     n = lo + unLo - ltLo - 1;
170     m = hi - (gtHi - unHi) + 1;
171    
172     if (n - lo > hi - m) {
173     fpush ( lo, n );
174     fpush ( m, hi );
175     } else {
176     fpush ( m, hi );
177     fpush ( lo, n );
178     }
179     }
180     }
181    
182     #undef fmin
183     #undef fpush
184     #undef fpop
185     #undef fswap
186     #undef fvswap
187     #undef FALLBACK_QSORT_SMALL_THRESH
188     #undef FALLBACK_QSORT_STACK_SIZE
189    
190    
191     /*---------------------------------------------*/
192     /* Pre:
193     nblock > 0
194     eclass exists for [0 .. nblock-1]
195     ((UChar*)eclass) [0 .. nblock-1] holds block
196     ptr exists for [0 .. nblock-1]
197    
198     Post:
199     ((UChar*)eclass) [0 .. nblock-1] holds block
200     All other areas of eclass destroyed
201     fmap [0 .. nblock-1] holds sorted order
202     bhtab [ 0 .. 2+(nblock/32) ] destroyed
203     */
204    
205     #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
206     #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
207     #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
208     #define WORD_BH(zz) bhtab[(zz) >> 5]
209     #define UNALIGNED_BH(zz) ((zz) & 0x01f)
210    
211     static
212     void fallbackSort ( UInt32* fmap,
213     UInt32* eclass,
214     UInt32* bhtab,
215     Int32 nblock,
216     Int32 verb )
217     {
218     Int32 ftab[257];
219     Int32 ftabCopy[256];
220     Int32 H, i, j, k, l, r, cc, cc1;
221     Int32 nNotDone;
222     Int32 nBhtab;
223     UChar* eclass8 = (UChar*)eclass;
224    
225     /*--
226     Initial 1-char radix sort to generate
227     initial fmap and initial BH bits.
228     --*/
229     if (verb >= 4)
230     VPrintf0 ( " bucket sorting ...\n" );
231     for (i = 0; i < 257; i++) ftab[i] = 0;
232     for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
233     for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
234     for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
235    
236     for (i = 0; i < nblock; i++) {
237     j = eclass8[i];
238     k = ftab[j] - 1;
239     ftab[j] = k;
240     fmap[k] = i;
241     }
242    
243     nBhtab = 2 + (nblock / 32);
244     for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
245     for (i = 0; i < 256; i++) SET_BH(ftab[i]);
246    
247     /*--
248     Inductively refine the buckets. Kind-of an
249     "exponential radix sort" (!), inspired by the
250     Manber-Myers suffix array construction algorithm.
251     --*/
252    
253     /*-- set sentinel bits for block-end detection --*/
254     for (i = 0; i < 32; i++) {
255     SET_BH(nblock + 2*i);
256     CLEAR_BH(nblock + 2*i + 1);
257     }
258    
259     /*-- the log(N) loop --*/
260     H = 1;
261     while (1) {
262    
263     if (verb >= 4)
264     VPrintf1 ( " depth %6d has ", H );
265    
266     j = 0;
267     for (i = 0; i < nblock; i++) {
268     if (ISSET_BH(i)) j = i;
269     k = fmap[i] - H; if (k < 0) k += nblock;
270     eclass[k] = j;
271     }
272    
273     nNotDone = 0;
274     r = -1;
275     while (1) {
276    
277     /*-- find the next non-singleton bucket --*/
278     k = r + 1;
279     while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
280     if (ISSET_BH(k)) {
281     while (WORD_BH(k) == 0xffffffff) k += 32;
282     while (ISSET_BH(k)) k++;
283     }
284     l = k - 1;
285     if (l >= nblock) break;
286     while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
287     if (!ISSET_BH(k)) {
288     while (WORD_BH(k) == 0x00000000) k += 32;
289     while (!ISSET_BH(k)) k++;
290     }
291     r = k - 1;
292     if (r >= nblock) break;
293    
294     /*-- now [l, r] bracket current bucket --*/
295     if (r > l) {
296     nNotDone += (r - l + 1);
297     fallbackQSort3 ( fmap, eclass, l, r );
298    
299     /*-- scan bucket and generate header bits-- */
300     cc = -1;
301     for (i = l; i <= r; i++) {
302     cc1 = eclass[fmap[i]];
303     if (cc != cc1) { SET_BH(i); cc = cc1; };
304     }
305     }
306     }
307    
308     if (verb >= 4)
309     VPrintf1 ( "%6d unresolved strings\n", nNotDone );
310    
311     H *= 2;
312     if (H > nblock || nNotDone == 0) break;
313     }
314    
315     /*--
316     Reconstruct the original block in
317     eclass8 [0 .. nblock-1], since the
318     previous phase destroyed it.
319     --*/
320     if (verb >= 4)
321     VPrintf0 ( " reconstructing block ...\n" );
322     j = 0;
323     for (i = 0; i < nblock; i++) {
324     while (ftabCopy[j] == 0) j++;
325     ftabCopy[j]--;
326     eclass8[fmap[i]] = (UChar)j;
327     }
328     AssertH ( j < 256, 1005 );
329     }
330    
331     #undef SET_BH
332     #undef CLEAR_BH
333     #undef ISSET_BH
334     #undef WORD_BH
335     #undef UNALIGNED_BH
336    
337    
338     /*---------------------------------------------*/
339     /*--- The main, O(N^2 log(N)) sorting ---*/
340     /*--- algorithm. Faster for "normal" ---*/
341     /*--- non-repetitive blocks. ---*/
342     /*---------------------------------------------*/
343    
344     /*---------------------------------------------*/
345     static
346     __inline__
347     Bool mainGtU ( UInt32 i1,
348     UInt32 i2,
349     UChar* block,
350     UInt16* quadrant,
351     UInt32 nblock,
352     Int32* budget )
353     {
354     Int32 k;
355     UChar c1, c2;
356     UInt16 s1, s2;
357    
358     AssertD ( i1 != i2, "mainGtU" );
359     /* 1 */
360     c1 = block[i1]; c2 = block[i2];
361     if (c1 != c2) return (c1 > c2);
362     i1++; i2++;
363     /* 2 */
364     c1 = block[i1]; c2 = block[i2];
365     if (c1 != c2) return (c1 > c2);
366     i1++; i2++;
367     /* 3 */
368     c1 = block[i1]; c2 = block[i2];
369     if (c1 != c2) return (c1 > c2);
370     i1++; i2++;
371     /* 4 */
372     c1 = block[i1]; c2 = block[i2];
373     if (c1 != c2) return (c1 > c2);
374     i1++; i2++;
375     /* 5 */
376     c1 = block[i1]; c2 = block[i2];
377     if (c1 != c2) return (c1 > c2);
378     i1++; i2++;
379     /* 6 */
380     c1 = block[i1]; c2 = block[i2];
381     if (c1 != c2) return (c1 > c2);
382     i1++; i2++;
383     /* 7 */
384     c1 = block[i1]; c2 = block[i2];
385     if (c1 != c2) return (c1 > c2);
386     i1++; i2++;
387     /* 8 */
388     c1 = block[i1]; c2 = block[i2];
389     if (c1 != c2) return (c1 > c2);
390     i1++; i2++;
391     /* 9 */
392     c1 = block[i1]; c2 = block[i2];
393     if (c1 != c2) return (c1 > c2);
394     i1++; i2++;
395     /* 10 */
396     c1 = block[i1]; c2 = block[i2];
397     if (c1 != c2) return (c1 > c2);
398     i1++; i2++;
399     /* 11 */
400     c1 = block[i1]; c2 = block[i2];
401     if (c1 != c2) return (c1 > c2);
402     i1++; i2++;
403     /* 12 */
404     c1 = block[i1]; c2 = block[i2];
405     if (c1 != c2) return (c1 > c2);
406     i1++; i2++;
407    
408     k = nblock + 8;
409    
410     do {
411     /* 1 */
412     c1 = block[i1]; c2 = block[i2];
413     if (c1 != c2) return (c1 > c2);
414     s1 = quadrant[i1]; s2 = quadrant[i2];
415     if (s1 != s2) return (s1 > s2);
416     i1++; i2++;
417     /* 2 */
418     c1 = block[i1]; c2 = block[i2];
419     if (c1 != c2) return (c1 > c2);
420     s1 = quadrant[i1]; s2 = quadrant[i2];
421     if (s1 != s2) return (s1 > s2);
422     i1++; i2++;
423     /* 3 */
424     c1 = block[i1]; c2 = block[i2];
425     if (c1 != c2) return (c1 > c2);
426     s1 = quadrant[i1]; s2 = quadrant[i2];
427     if (s1 != s2) return (s1 > s2);
428     i1++; i2++;
429     /* 4 */
430     c1 = block[i1]; c2 = block[i2];
431     if (c1 != c2) return (c1 > c2);
432     s1 = quadrant[i1]; s2 = quadrant[i2];
433     if (s1 != s2) return (s1 > s2);
434     i1++; i2++;
435     /* 5 */
436     c1 = block[i1]; c2 = block[i2];
437     if (c1 != c2) return (c1 > c2);
438     s1 = quadrant[i1]; s2 = quadrant[i2];
439     if (s1 != s2) return (s1 > s2);
440     i1++; i2++;
441     /* 6 */
442     c1 = block[i1]; c2 = block[i2];
443     if (c1 != c2) return (c1 > c2);
444     s1 = quadrant[i1]; s2 = quadrant[i2];
445     if (s1 != s2) return (s1 > s2);
446     i1++; i2++;
447     /* 7 */
448     c1 = block[i1]; c2 = block[i2];
449     if (c1 != c2) return (c1 > c2);
450     s1 = quadrant[i1]; s2 = quadrant[i2];
451     if (s1 != s2) return (s1 > s2);
452     i1++; i2++;
453     /* 8 */
454     c1 = block[i1]; c2 = block[i2];
455     if (c1 != c2) return (c1 > c2);
456     s1 = quadrant[i1]; s2 = quadrant[i2];
457     if (s1 != s2) return (s1 > s2);
458     i1++; i2++;
459    
460     if (i1 >= nblock) i1 -= nblock;
461     if (i2 >= nblock) i2 -= nblock;
462    
463     k -= 8;
464     (*budget)--;
465     }
466     while (k >= 0);
467    
468     return False;
469     }
470    
471    
472     /*---------------------------------------------*/
473     /*--
474     Knuth's increments seem to work better
475     than Incerpi-Sedgewick here. Possibly
476     because the number of elems to sort is
477     usually small, typically <= 20.
478     --*/
479     static
480     Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
481     9841, 29524, 88573, 265720,
482     797161, 2391484 };
483    
484     static
485     void mainSimpleSort ( UInt32* ptr,
486     UChar* block,
487     UInt16* quadrant,
488     Int32 nblock,
489     Int32 lo,
490     Int32 hi,
491     Int32 d,
492     Int32* budget )
493     {
494     Int32 i, j, h, bigN, hp;
495     UInt32 v;
496    
497     bigN = hi - lo + 1;
498     if (bigN < 2) return;
499    
500     hp = 0;
501     while (incs[hp] < bigN) hp++;
502     hp--;
503    
504     for (; hp >= 0; hp--) {
505     h = incs[hp];
506    
507     i = lo + h;
508     while (True) {
509    
510     /*-- copy 1 --*/
511     if (i > hi) break;
512     v = ptr[i];
513     j = i;
514     while ( mainGtU (
515     ptr[j-h]+d, v+d, block, quadrant, nblock, budget
516     ) ) {
517     ptr[j] = ptr[j-h];
518     j = j - h;
519     if (j <= (lo + h - 1)) break;
520     }
521     ptr[j] = v;
522     i++;
523    
524     /*-- copy 2 --*/
525     if (i > hi) break;
526     v = ptr[i];
527     j = i;
528     while ( mainGtU (
529     ptr[j-h]+d, v+d, block, quadrant, nblock, budget
530     ) ) {
531     ptr[j] = ptr[j-h];
532     j = j - h;
533     if (j <= (lo + h - 1)) break;
534     }
535     ptr[j] = v;
536     i++;
537    
538     /*-- copy 3 --*/
539     if (i > hi) break;
540     v = ptr[i];
541     j = i;
542     while ( mainGtU (
543     ptr[j-h]+d, v+d, block, quadrant, nblock, budget
544     ) ) {
545     ptr[j] = ptr[j-h];
546     j = j - h;
547     if (j <= (lo + h - 1)) break;
548     }
549     ptr[j] = v;
550     i++;
551    
552     if (*budget < 0) return;
553     }
554     }
555     }
556    
557    
558     /*---------------------------------------------*/
559     /*--
560     The following is an implementation of
561     an elegant 3-way quicksort for strings,
562     described in a paper "Fast Algorithms for
563     Sorting and Searching Strings", by Robert
564     Sedgewick and Jon L. Bentley.
565     --*/
566    
567     #define mswap(zz1, zz2) \
568     { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
569    
570     #define mvswap(zzp1, zzp2, zzn) \
571     { \
572     Int32 yyp1 = (zzp1); \
573     Int32 yyp2 = (zzp2); \
574     Int32 yyn = (zzn); \
575     while (yyn > 0) { \
576     mswap(ptr[yyp1], ptr[yyp2]); \
577     yyp1++; yyp2++; yyn--; \
578     } \
579     }
580    
581     static
582     __inline__
583     UChar mmed3 ( UChar a, UChar b, UChar c )
584     {
585     UChar t;
586     if (a > b) { t = a; a = b; b = t; };
587     if (b > c) {
588     b = c;
589     if (a > b) b = a;
590     }
591     return b;
592     }
593    
594     #define mmin(a,b) ((a) < (b)) ? (a) : (b)
595    
596     #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
597     stackHi[sp] = hz; \
598     stackD [sp] = dz; \
599     sp++; }
600    
601     #define mpop(lz,hz,dz) { sp--; \
602     lz = stackLo[sp]; \
603     hz = stackHi[sp]; \
604     dz = stackD [sp]; }
605    
606    
607     #define mnextsize(az) (nextHi[az]-nextLo[az])
608    
609     #define mnextswap(az,bz) \
610     { Int32 tz; \
611     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
612     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
613     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
614    
615    
616     #define MAIN_QSORT_SMALL_THRESH 20
617     #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
618     #define MAIN_QSORT_STACK_SIZE 100
619    
620     static
621     void mainQSort3 ( UInt32* ptr,
622     UChar* block,
623     UInt16* quadrant,
624     Int32 nblock,
625     Int32 loSt,
626     Int32 hiSt,
627     Int32 dSt,
628     Int32* budget )
629     {
630     Int32 unLo, unHi, ltLo, gtHi, n, m, med;
631     Int32 sp, lo, hi, d;
632    
633     Int32 stackLo[MAIN_QSORT_STACK_SIZE];
634     Int32 stackHi[MAIN_QSORT_STACK_SIZE];
635     Int32 stackD [MAIN_QSORT_STACK_SIZE];
636    
637     Int32 nextLo[3];
638     Int32 nextHi[3];
639     Int32 nextD [3];
640    
641     sp = 0;
642     mpush ( loSt, hiSt, dSt );
643    
644     while (sp > 0) {
645    
646     AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
647    
648     mpop ( lo, hi, d );
649     if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
650     d > MAIN_QSORT_DEPTH_THRESH) {
651     mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
652     if (*budget < 0) return;
653     continue;
654     }
655    
656     med = (Int32)
657     mmed3 ( block[ptr[ lo ]+d],
658     block[ptr[ hi ]+d],
659     block[ptr[ (lo+hi)>>1 ]+d] );
660    
661     unLo = ltLo = lo;
662     unHi = gtHi = hi;
663    
664     while (True) {
665     while (True) {
666     if (unLo > unHi) break;
667     n = ((Int32)block[ptr[unLo]+d]) - med;
668     if (n == 0) {
669     mswap(ptr[unLo], ptr[ltLo]);
670     ltLo++; unLo++; continue;
671     };
672     if (n > 0) break;
673     unLo++;
674     }
675     while (True) {
676     if (unLo > unHi) break;
677     n = ((Int32)block[ptr[unHi]+d]) - med;
678     if (n == 0) {
679     mswap(ptr[unHi], ptr[gtHi]);
680     gtHi--; unHi--; continue;
681     };
682     if (n < 0) break;
683     unHi--;
684     }
685     if (unLo > unHi) break;
686     mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
687     }
688    
689     AssertD ( unHi == unLo-1, "mainQSort3(2)" );
690    
691     if (gtHi < ltLo) {
692     mpush(lo, hi, d+1 );
693     continue;
694     }
695    
696     n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
697     m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
698    
699     n = lo + unLo - ltLo - 1;
700     m = hi - (gtHi - unHi) + 1;
701    
702     nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
703     nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
704     nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
705    
706     if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
707     if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
708     if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
709    
710     AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
711     AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
712    
713     mpush (nextLo[0], nextHi[0], nextD[0]);
714     mpush (nextLo[1], nextHi[1], nextD[1]);
715     mpush (nextLo[2], nextHi[2], nextD[2]);
716     }
717     }
718    
719     #undef mswap
720     #undef mvswap
721     #undef mpush
722     #undef mpop
723     #undef mmin
724     #undef mnextsize
725     #undef mnextswap
726     #undef MAIN_QSORT_SMALL_THRESH
727     #undef MAIN_QSORT_DEPTH_THRESH
728     #undef MAIN_QSORT_STACK_SIZE
729    
730    
731     /*---------------------------------------------*/
732     /* Pre:
733     nblock > N_OVERSHOOT
734     block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
735     ((UChar*)block32) [0 .. nblock-1] holds block
736     ptr exists for [0 .. nblock-1]
737    
738     Post:
739     ((UChar*)block32) [0 .. nblock-1] holds block
740     All other areas of block32 destroyed
741     ftab [0 .. 65536 ] destroyed
742     ptr [0 .. nblock-1] holds sorted order
743     if (*budget < 0), sorting was abandoned
744     */
745    
746     #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
747     #define SETMASK (1 << 21)
748     #define CLEARMASK (~(SETMASK))
749    
750     static
751     void mainSort ( UInt32* ptr,
752     UChar* block,
753     UInt16* quadrant,
754     UInt32* ftab,
755     Int32 nblock,
756     Int32 verb,
757     Int32* budget )
758     {
759     Int32 i, j, k, ss, sb;
760     Int32 runningOrder[256];
761     Bool bigDone[256];
762     Int32 copyStart[256];
763     Int32 copyEnd [256];
764     UChar c1;
765     Int32 numQSorted;
766     UInt16 s;
767     if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
768    
769     /*-- set up the 2-byte frequency table --*/
770     for (i = 65536; i >= 0; i--) ftab[i] = 0;
771    
772     j = block[0] << 8;
773     i = nblock-1;
774     for (; i >= 3; i -= 4) {
775     quadrant[i] = 0;
776     j = (j >> 8) | ( ((UInt16)block[i]) << 8);
777     ftab[j]++;
778     quadrant[i-1] = 0;
779     j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
780     ftab[j]++;
781     quadrant[i-2] = 0;
782     j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
783     ftab[j]++;
784     quadrant[i-3] = 0;
785     j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
786     ftab[j]++;
787     }
788     for (; i >= 0; i--) {
789     quadrant[i] = 0;
790     j = (j >> 8) | ( ((UInt16)block[i]) << 8);
791     ftab[j]++;
792     }
793    
794     /*-- (emphasises close relationship of block & quadrant) --*/
795     for (i = 0; i < BZ_N_OVERSHOOT; i++) {
796     block [nblock+i] = block[i];
797     quadrant[nblock+i] = 0;
798     }
799    
800     if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
801    
802     /*-- Complete the initial radix sort --*/
803     for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
804    
805     s = block[0] << 8;
806     i = nblock-1;
807     for (; i >= 3; i -= 4) {
808     s = (s >> 8) | (block[i] << 8);
809     j = ftab[s] -1;
810     ftab[s] = j;
811     ptr[j] = i;
812     s = (s >> 8) | (block[i-1] << 8);
813     j = ftab[s] -1;
814     ftab[s] = j;
815     ptr[j] = i-1;
816     s = (s >> 8) | (block[i-2] << 8);
817     j = ftab[s] -1;
818     ftab[s] = j;
819     ptr[j] = i-2;
820     s = (s >> 8) | (block[i-3] << 8);
821     j = ftab[s] -1;
822     ftab[s] = j;
823     ptr[j] = i-3;
824     }
825     for (; i >= 0; i--) {
826     s = (s >> 8) | (block[i] << 8);
827     j = ftab[s] -1;
828     ftab[s] = j;
829     ptr[j] = i;
830     }
831    
832     /*--
833     Now ftab contains the first loc of every small bucket.
834     Calculate the running order, from smallest to largest
835     big bucket.
836     --*/
837     for (i = 0; i <= 255; i++) {
838     bigDone [i] = False;
839     runningOrder[i] = i;
840     }
841    
842     {
843     Int32 vv;
844     Int32 h = 1;
845     do h = 3 * h + 1; while (h <= 256);
846     do {
847     h = h / 3;
848     for (i = h; i <= 255; i++) {
849     vv = runningOrder[i];
850     j = i;
851     while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
852     runningOrder[j] = runningOrder[j-h];
853     j = j - h;
854     if (j <= (h - 1)) goto zero;
855     }
856     zero:
857     runningOrder[j] = vv;
858     }
859     } while (h != 1);
860     }
861    
862     /*--
863     The main sorting loop.
864     --*/
865    
866     numQSorted = 0;
867    
868     for (i = 0; i <= 255; i++) {
869    
870     /*--
871     Process big buckets, starting with the least full.
872     Basically this is a 3-step process in which we call
873     mainQSort3 to sort the small buckets [ss, j], but
874     also make a big effort to avoid the calls if we can.
875     --*/
876     ss = runningOrder[i];
877    
878     /*--
879     Step 1:
880     Complete the big bucket [ss] by quicksorting
881     any unsorted small buckets [ss, j], for j != ss.
882     Hopefully previous pointer-scanning phases have already
883     completed many of the small buckets [ss, j], so
884     we don't have to sort them at all.
885     --*/
886     for (j = 0; j <= 255; j++) {
887     if (j != ss) {
888     sb = (ss << 8) + j;
889     if ( ! (ftab[sb] & SETMASK) ) {
890     Int32 lo = ftab[sb] & CLEARMASK;
891     Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
892     if (hi > lo) {
893     if (verb >= 4)
894     VPrintf4 ( " qsort [0x%x, 0x%x] "
895     "done %d this %d\n",
896     ss, j, numQSorted, hi - lo + 1 );
897     mainQSort3 (
898     ptr, block, quadrant, nblock,
899     lo, hi, BZ_N_RADIX, budget
900     );
901     numQSorted += (hi - lo + 1);
902     if (*budget < 0) return;
903     }
904     }
905     ftab[sb] |= SETMASK;
906     }
907     }
908    
909     AssertH ( !bigDone[ss], 1006 );
910    
911     /*--
912     Step 2:
913     Now scan this big bucket [ss] so as to synthesise the
914     sorted order for small buckets [t, ss] for all t,
915     including, magically, the bucket [ss,ss] too.
916     This will avoid doing Real Work in subsequent Step 1's.
917     --*/
918     {
919     for (j = 0; j <= 255; j++) {
920     copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
921     copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
922     }
923     for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
924     k = ptr[j]-1; if (k < 0) k += nblock;
925     c1 = block[k];
926     if (!bigDone[c1])
927     ptr[ copyStart[c1]++ ] = k;
928     }
929     for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
930     k = ptr[j]-1; if (k < 0) k += nblock;
931     c1 = block[k];
932     if (!bigDone[c1])
933     ptr[ copyEnd[c1]-- ] = k;
934     }
935     }
936    
937     AssertH ( (copyStart[ss]-1 == copyEnd[ss])
938     ||
939     /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
940     Necessity for this case is demonstrated by compressing
941     a sequence of approximately 48.5 million of character
942     251; 1.0.0/1.0.1 will then die here. */
943     (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
944     1007 )
945    
946     for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
947    
948     /*--
949     Step 3:
950     The [ss] big bucket is now done. Record this fact,
951     and update the quadrant descriptors. Remember to
952     update quadrants in the overshoot area too, if
953     necessary. The "if (i < 255)" test merely skips
954     this updating for the last bucket processed, since
955     updating for the last bucket is pointless.
956    
957     The quadrant array provides a way to incrementally
958     cache sort orderings, as they appear, so as to
959     make subsequent comparisons in fullGtU() complete
960     faster. For repetitive blocks this makes a big
961     difference (but not big enough to be able to avoid
962     the fallback sorting mechanism, exponential radix sort).
963    
964     The precise meaning is: at all times:
965    
966     for 0 <= i < nblock and 0 <= j <= nblock
967    
968     if block[i] != block[j],
969    
970     then the relative values of quadrant[i] and
971     quadrant[j] are meaningless.
972    
973     else {
974     if quadrant[i] < quadrant[j]
975     then the string starting at i lexicographically
976     precedes the string starting at j
977    
978     else if quadrant[i] > quadrant[j]
979     then the string starting at j lexicographically
980     precedes the string starting at i
981    
982     else
983     the relative ordering of the strings starting
984     at i and j has not yet been determined.
985     }
986     --*/
987     bigDone[ss] = True;
988    
989     if (i < 255) {
990     Int32 bbStart = ftab[ss << 8] & CLEARMASK;
991     Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
992     Int32 shifts = 0;
993    
994     while ((bbSize >> shifts) > 65534) shifts++;
995    
996     for (j = bbSize-1; j >= 0; j--) {
997     Int32 a2update = ptr[bbStart + j];
998     UInt16 qVal = (UInt16)(j >> shifts);
999     quadrant[a2update] = qVal;
1000     if (a2update < BZ_N_OVERSHOOT)
1001     quadrant[a2update + nblock] = qVal;
1002     }
1003     AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1004     }
1005    
1006     }
1007    
1008     if (verb >= 4)
1009     VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1010     nblock, numQSorted, nblock - numQSorted );
1011     }
1012    
1013     #undef BIGFREQ
1014     #undef SETMASK
1015     #undef CLEARMASK
1016    
1017    
1018     /*---------------------------------------------*/
1019     /* Pre:
1020     nblock > 0
1021     arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1022     ((UChar*)arr2) [0 .. nblock-1] holds block
1023     arr1 exists for [0 .. nblock-1]
1024    
1025     Post:
1026     ((UChar*)arr2) [0 .. nblock-1] holds block
1027     All other areas of block destroyed
1028     ftab [ 0 .. 65536 ] destroyed
1029     arr1 [0 .. nblock-1] holds sorted order
1030     */
1031     void BZ2_blockSort ( EState* s )
1032     {
1033     UInt32* ptr = s->ptr;
1034     UChar* block = s->block;
1035     UInt32* ftab = s->ftab;
1036     Int32 nblock = s->nblock;
1037     Int32 verb = s->verbosity;
1038     Int32 wfact = s->workFactor;
1039     UInt16* quadrant;
1040     Int32 budget;
1041     Int32 budgetInit;
1042     Int32 i;
1043    
1044     if (nblock < 10000) {
1045     fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1046     } else {
1047     /* Calculate the location for quadrant, remembering to get
1048     the alignment right. Assumes that &(block[0]) is at least
1049     2-byte aligned -- this should be ok since block is really
1050     the first section of arr2.
1051     */
1052     i = nblock+BZ_N_OVERSHOOT;
1053     if (i & 1) i++;
1054     quadrant = (UInt16*)(&(block[i]));
1055    
1056     /* (wfact-1) / 3 puts the default-factor-30
1057     transition point at very roughly the same place as
1058     with v0.1 and v0.9.0.
1059     Not that it particularly matters any more, since the
1060     resulting compressed stream is now the same regardless
1061     of whether or not we use the main sort or fallback sort.
1062     */
1063     if (wfact < 1 ) wfact = 1;
1064     if (wfact > 100) wfact = 100;
1065     budgetInit = nblock * ((wfact-1) / 3);
1066     budget = budgetInit;
1067    
1068     mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1069     if (verb >= 3)
1070     VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1071     budgetInit - budget,
1072     nblock,
1073     (float)(budgetInit - budget) /
1074     (float)(nblock==0 ? 1 : nblock) );
1075     if (budget < 0) {
1076     if (verb >= 2)
1077     VPrintf0 ( " too repetitive; using fallback"
1078     " sorting algorithm\n" );
1079     fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1080     }
1081     }
1082    
1083     s->origPtr = -1;
1084     for (i = 0; i < s->nblock; i++)
1085     if (ptr[i] == 0)
1086     { s->origPtr = i; break; };
1087    
1088     AssertH( s->origPtr != -1, 1003 );
1089     }
1090    
1091    
1092     /*-------------------------------------------------------------*/
1093     /*--- end blocksort.c ---*/
1094     /*-------------------------------------------------------------*/

  ViewVC Help
Powered by ViewVC 1.1.22