2 ff3adf60 2004-04-14 devnull /*-------------------------------------------------------------*/
3 ff3adf60 2004-04-14 devnull /*--- Block sorting machinery ---*/
4 ff3adf60 2004-04-14 devnull /*--- blocksort.c ---*/
5 ff3adf60 2004-04-14 devnull /*-------------------------------------------------------------*/
8 ff3adf60 2004-04-14 devnull This file is a part of bzip2 and/or libbzip2, a program and
9 ff3adf60 2004-04-14 devnull library for lossless, block-sorting data compression.
11 ff3adf60 2004-04-14 devnull Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
13 ff3adf60 2004-04-14 devnull Redistribution and use in source and binary forms, with or without
14 ff3adf60 2004-04-14 devnull modification, are permitted provided that the following conditions
17 ff3adf60 2004-04-14 devnull 1. Redistributions of source code must retain the above copyright
18 ff3adf60 2004-04-14 devnull notice, this list of conditions and the following disclaimer.
20 ff3adf60 2004-04-14 devnull 2. The origin of this software must not be misrepresented; you must
21 ff3adf60 2004-04-14 devnull not claim that you wrote the original software. If you use this
22 ff3adf60 2004-04-14 devnull software in a product, an acknowledgment in the product
23 ff3adf60 2004-04-14 devnull documentation would be appreciated but is not required.
25 ff3adf60 2004-04-14 devnull 3. Altered source versions must be plainly marked as such, and must
26 ff3adf60 2004-04-14 devnull not be misrepresented as being the original software.
28 ff3adf60 2004-04-14 devnull 4. The name of the author may not be used to endorse or promote
29 ff3adf60 2004-04-14 devnull products derived from this software without specific prior written
30 ff3adf60 2004-04-14 devnull permission.
32 ff3adf60 2004-04-14 devnull THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
33 ff3adf60 2004-04-14 devnull OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34 ff3adf60 2004-04-14 devnull WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 ff3adf60 2004-04-14 devnull ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
36 ff3adf60 2004-04-14 devnull DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37 ff3adf60 2004-04-14 devnull DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
38 ff3adf60 2004-04-14 devnull GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39 ff3adf60 2004-04-14 devnull INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
40 ff3adf60 2004-04-14 devnull WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 ff3adf60 2004-04-14 devnull NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 ff3adf60 2004-04-14 devnull SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44 ff3adf60 2004-04-14 devnull Julian Seward, Cambridge, UK.
45 ff3adf60 2004-04-14 devnull jseward@acm.org
46 ff3adf60 2004-04-14 devnull bzip2/libbzip2 version 1.0 of 21 March 2000
48 ff3adf60 2004-04-14 devnull This program is based on (at least) the work of:
49 ff3adf60 2004-04-14 devnull Mike Burrows
50 ff3adf60 2004-04-14 devnull David Wheeler
51 ff3adf60 2004-04-14 devnull Peter Fenwick
52 ff3adf60 2004-04-14 devnull Alistair Moffat
53 ff3adf60 2004-04-14 devnull Radford Neal
54 ff3adf60 2004-04-14 devnull Ian H. Witten
55 ff3adf60 2004-04-14 devnull Robert Sedgewick
56 ff3adf60 2004-04-14 devnull Jon L. Bentley
58 ff3adf60 2004-04-14 devnull For more information on these sources, see the manual.
60 ff3adf60 2004-04-14 devnull To get some idea how the block sorting algorithms in this file
61 ff3adf60 2004-04-14 devnull work, read my paper
62 ff3adf60 2004-04-14 devnull On the Performance of BWT Sorting Algorithms
63 ff3adf60 2004-04-14 devnull in Proceedings of the IEEE Data Compression Conference 2000,
64 ff3adf60 2004-04-14 devnull Snowbird, Utah, USA, 27-30 March 2000. The main sort in this
65 ff3adf60 2004-04-14 devnull file implements the algorithm called cache in the paper.
68 ff3adf60 2004-04-14 devnull #include "os.h"
69 ff3adf60 2004-04-14 devnull #include "bzlib.h"
70 ff3adf60 2004-04-14 devnull #include "bzlib_private.h"
72 ff3adf60 2004-04-14 devnull /*---------------------------------------------*/
73 ff3adf60 2004-04-14 devnull /*--- Fallback O(N log(N)^2) sorting ---*/
74 ff3adf60 2004-04-14 devnull /*--- algorithm, for repetitive blocks ---*/
75 ff3adf60 2004-04-14 devnull /*---------------------------------------------*/
77 ff3adf60 2004-04-14 devnull /*---------------------------------------------*/
79 ff3adf60 2004-04-14 devnull __inline__
80 ff3adf60 2004-04-14 devnull void fallbackSimpleSort ( UInt32* fmap,
81 ff3adf60 2004-04-14 devnull UInt32* eclass,
82 ff3adf60 2004-04-14 devnull Int32 lo,
83 ff3adf60 2004-04-14 devnull Int32 hi )
85 ff3adf60 2004-04-14 devnull Int32 i, j, tmp;
86 ff3adf60 2004-04-14 devnull UInt32 ec_tmp;
88 ff3adf60 2004-04-14 devnull if (lo == hi) return;
90 ff3adf60 2004-04-14 devnull if (hi - lo > 3) {
91 ff3adf60 2004-04-14 devnull for ( i = hi-4; i >= lo; i-- ) {
92 ff3adf60 2004-04-14 devnull tmp = fmap[i];
93 ff3adf60 2004-04-14 devnull ec_tmp = eclass[tmp];
94 ff3adf60 2004-04-14 devnull for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
95 ff3adf60 2004-04-14 devnull fmap[j-4] = fmap[j];
96 ff3adf60 2004-04-14 devnull fmap[j-4] = tmp;
100 ff3adf60 2004-04-14 devnull for ( i = hi-1; i >= lo; i-- ) {
101 ff3adf60 2004-04-14 devnull tmp = fmap[i];
102 ff3adf60 2004-04-14 devnull ec_tmp = eclass[tmp];
103 ff3adf60 2004-04-14 devnull for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
104 ff3adf60 2004-04-14 devnull fmap[j-1] = fmap[j];
105 ff3adf60 2004-04-14 devnull fmap[j-1] = tmp;
110 ff3adf60 2004-04-14 devnull /*---------------------------------------------*/
111 ff3adf60 2004-04-14 devnull #define fswap(zz1, zz2) \
112 ff3adf60 2004-04-14 devnull { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
114 ff3adf60 2004-04-14 devnull #define fvswap(zzp1, zzp2, zzn) \
116 ff3adf60 2004-04-14 devnull Int32 yyp1 = (zzp1); \
117 ff3adf60 2004-04-14 devnull Int32 yyp2 = (zzp2); \
118 ff3adf60 2004-04-14 devnull Int32 yyn = (zzn); \
119 ff3adf60 2004-04-14 devnull while (yyn > 0) { \
120 ff3adf60 2004-04-14 devnull fswap(fmap[yyp1], fmap[yyp2]); \
121 ff3adf60 2004-04-14 devnull yyp1++; yyp2++; yyn--; \
126 ff3adf60 2004-04-14 devnull #define fmin(a,b) ((a) < (b)) ? (a) : (b)
128 ff3adf60 2004-04-14 devnull #define fpush(lz,hz) { stackLo[sp] = lz; \
129 ff3adf60 2004-04-14 devnull stackHi[sp] = hz; \
132 ff3adf60 2004-04-14 devnull #define fpop(lz,hz) { sp--; \
133 ff3adf60 2004-04-14 devnull lz = stackLo[sp]; \
134 ff3adf60 2004-04-14 devnull hz = stackHi[sp]; }
136 ff3adf60 2004-04-14 devnull #define FALLBACK_QSORT_SMALL_THRESH 10
137 ff3adf60 2004-04-14 devnull #define FALLBACK_QSORT_STACK_SIZE 100
141 ff3adf60 2004-04-14 devnull void fallbackQSort3 ( UInt32* fmap,
142 ff3adf60 2004-04-14 devnull UInt32* eclass,
143 ff3adf60 2004-04-14 devnull Int32 loSt,
144 ff3adf60 2004-04-14 devnull Int32 hiSt )
146 ff3adf60 2004-04-14 devnull Int32 unLo, unHi, ltLo, gtHi, n, m;
147 ff3adf60 2004-04-14 devnull Int32 sp, lo, hi;
148 ff3adf60 2004-04-14 devnull UInt32 med, r, r3;
149 ff3adf60 2004-04-14 devnull Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
150 ff3adf60 2004-04-14 devnull Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
155 ff3adf60 2004-04-14 devnull fpush ( loSt, hiSt );
157 ff3adf60 2004-04-14 devnull while (sp > 0) {
159 ff3adf60 2004-04-14 devnull AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 );
161 ff3adf60 2004-04-14 devnull fpop ( lo, hi );
162 ff3adf60 2004-04-14 devnull if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
163 ff3adf60 2004-04-14 devnull fallbackSimpleSort ( fmap, eclass, lo, hi );
164 ff3adf60 2004-04-14 devnull continue;
167 ff3adf60 2004-04-14 devnull /* Random partitioning. Median of 3 sometimes fails to
168 ff3adf60 2004-04-14 devnull avoid bad cases. Median of 9 seems to help but
169 ff3adf60 2004-04-14 devnull looks rather expensive. This too seems to work but
170 ff3adf60 2004-04-14 devnull is cheaper. Guidance for the magic constants
171 ff3adf60 2004-04-14 devnull 7621 and 32768 is taken from Sedgewick's algorithms
172 ff3adf60 2004-04-14 devnull book, chapter 35.
174 ff3adf60 2004-04-14 devnull r = ((r * 7621) + 1) % 32768;
175 ff3adf60 2004-04-14 devnull r3 = r % 3;
176 ff3adf60 2004-04-14 devnull if (r3 == 0) med = eclass[fmap[lo]]; else
177 ff3adf60 2004-04-14 devnull if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
178 ff3adf60 2004-04-14 devnull med = eclass[fmap[hi]];
180 ff3adf60 2004-04-14 devnull unLo = ltLo = lo;
181 ff3adf60 2004-04-14 devnull unHi = gtHi = hi;
183 ff3adf60 2004-04-14 devnull while (1) {
184 ff3adf60 2004-04-14 devnull while (1) {
185 ff3adf60 2004-04-14 devnull if (unLo > unHi) break;
186 ff3adf60 2004-04-14 devnull n = (Int32)eclass[fmap[unLo]] - (Int32)med;
187 ff3adf60 2004-04-14 devnull if (n == 0) {
188 ff3adf60 2004-04-14 devnull fswap(fmap[unLo], fmap[ltLo]);
189 ff3adf60 2004-04-14 devnull ltLo++; unLo++;
190 ff3adf60 2004-04-14 devnull continue;
192 ff3adf60 2004-04-14 devnull if (n > 0) break;
195 ff3adf60 2004-04-14 devnull while (1) {
196 ff3adf60 2004-04-14 devnull if (unLo > unHi) break;
197 ff3adf60 2004-04-14 devnull n = (Int32)eclass[fmap[unHi]] - (Int32)med;
198 ff3adf60 2004-04-14 devnull if (n == 0) {
199 ff3adf60 2004-04-14 devnull fswap(fmap[unHi], fmap[gtHi]);
200 ff3adf60 2004-04-14 devnull gtHi--; unHi--;
201 ff3adf60 2004-04-14 devnull continue;
203 ff3adf60 2004-04-14 devnull if (n < 0) break;
206 ff3adf60 2004-04-14 devnull if (unLo > unHi) break;
207 ff3adf60 2004-04-14 devnull fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
210 ff3adf60 2004-04-14 devnull AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
212 ff3adf60 2004-04-14 devnull if (gtHi < ltLo) continue;
214 ff3adf60 2004-04-14 devnull n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
215 ff3adf60 2004-04-14 devnull m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
217 ff3adf60 2004-04-14 devnull n = lo + unLo - ltLo - 1;
218 ff3adf60 2004-04-14 devnull m = hi - (gtHi - unHi) + 1;
220 ff3adf60 2004-04-14 devnull if (n - lo > hi - m) {
221 ff3adf60 2004-04-14 devnull fpush ( lo, n );
222 ff3adf60 2004-04-14 devnull fpush ( m, hi );
223 ff3adf60 2004-04-14 devnull } else {
224 ff3adf60 2004-04-14 devnull fpush ( m, hi );
225 ff3adf60 2004-04-14 devnull fpush ( lo, n );
230 ff3adf60 2004-04-14 devnull #undef fmin
231 ff3adf60 2004-04-14 devnull #undef fpush
232 ff3adf60 2004-04-14 devnull #undef fpop
233 ff3adf60 2004-04-14 devnull #undef fswap
234 ff3adf60 2004-04-14 devnull #undef fvswap
235 ff3adf60 2004-04-14 devnull #undef FALLBACK_QSORT_SMALL_THRESH
236 ff3adf60 2004-04-14 devnull #undef FALLBACK_QSORT_STACK_SIZE
239 ff3adf60 2004-04-14 devnull /*---------------------------------------------*/
241 ff3adf60 2004-04-14 devnull nblock > 0
242 ff3adf60 2004-04-14 devnull eclass exists for [0 .. nblock-1]
243 ff3adf60 2004-04-14 devnull ((UChar*)eclass) [0 .. nblock-1] holds block
244 ff3adf60 2004-04-14 devnull ptr exists for [0 .. nblock-1]
247 ff3adf60 2004-04-14 devnull ((UChar*)eclass) [0 .. nblock-1] holds block
248 ff3adf60 2004-04-14 devnull All other areas of eclass destroyed
249 ff3adf60 2004-04-14 devnull fmap [0 .. nblock-1] holds sorted order
250 ff3adf60 2004-04-14 devnull bhtab [ 0 .. 2+(nblock/32) ] destroyed
253 ff3adf60 2004-04-14 devnull #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
254 ff3adf60 2004-04-14 devnull #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
255 ff3adf60 2004-04-14 devnull #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
256 ff3adf60 2004-04-14 devnull #define WORD_BH(zz) bhtab[(zz) >> 5]
257 ff3adf60 2004-04-14 devnull #define UNALIGNED_BH(zz) ((zz) & 0x01f)
260 ff3adf60 2004-04-14 devnull void fallbackSort ( UInt32* fmap,
261 ff3adf60 2004-04-14 devnull UInt32* eclass,
262 ff3adf60 2004-04-14 devnull UInt32* bhtab,
263 ff3adf60 2004-04-14 devnull Int32 nblock,
264 ff3adf60 2004-04-14 devnull Int32 verb )
266 ff3adf60 2004-04-14 devnull Int32 ftab[257];
267 ff3adf60 2004-04-14 devnull Int32 ftabCopy[256];
268 ff3adf60 2004-04-14 devnull Int32 H, i, j, k, l, r, cc, cc1;
269 ff3adf60 2004-04-14 devnull Int32 nNotDone;
270 ff3adf60 2004-04-14 devnull Int32 nBhtab;
271 ff3adf60 2004-04-14 devnull UChar* eclass8 = (UChar*)eclass;
274 ff3adf60 2004-04-14 devnull Initial 1-char radix sort to generate
275 ff3adf60 2004-04-14 devnull initial fmap and initial BH bits.
277 ff3adf60 2004-04-14 devnull if (verb >= 4)
278 ff3adf60 2004-04-14 devnull VPrintf0 ( " bucket sorting ...\n" );
279 ff3adf60 2004-04-14 devnull for (i = 0; i < 257; i++) ftab[i] = 0;
280 ff3adf60 2004-04-14 devnull for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
281 ff3adf60 2004-04-14 devnull for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
282 ff3adf60 2004-04-14 devnull for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
284 ff3adf60 2004-04-14 devnull for (i = 0; i < nblock; i++) {
285 ff3adf60 2004-04-14 devnull j = eclass8[i];
286 ff3adf60 2004-04-14 devnull k = ftab[j] - 1;
287 ff3adf60 2004-04-14 devnull ftab[j] = k;
288 ff3adf60 2004-04-14 devnull fmap[k] = i;
291 ff3adf60 2004-04-14 devnull nBhtab = 2 + (nblock / 32);
292 ff3adf60 2004-04-14 devnull for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
293 ff3adf60 2004-04-14 devnull for (i = 0; i < 256; i++) SET_BH(ftab[i]);
296 ff3adf60 2004-04-14 devnull Inductively refine the buckets. Kind-of an
297 ff3adf60 2004-04-14 devnull "exponential radix sort" (!), inspired by the
298 ff3adf60 2004-04-14 devnull Manber-Myers suffix array construction algorithm.
301 ff3adf60 2004-04-14 devnull /*-- set sentinel bits for block-end detection --*/
302 ff3adf60 2004-04-14 devnull for (i = 0; i < 32; i++) {
303 ff3adf60 2004-04-14 devnull SET_BH(nblock + 2*i);
304 ff3adf60 2004-04-14 devnull CLEAR_BH(nblock + 2*i + 1);
307 ff3adf60 2004-04-14 devnull /*-- the log(N) loop --*/
309 ff3adf60 2004-04-14 devnull while (1) {
311 ff3adf60 2004-04-14 devnull if (verb >= 4)
312 ff3adf60 2004-04-14 devnull VPrintf1 ( " depth %6d has ", H );
315 ff3adf60 2004-04-14 devnull for (i = 0; i < nblock; i++) {
316 ff3adf60 2004-04-14 devnull if (ISSET_BH(i)) j = i;
317 ff3adf60 2004-04-14 devnull k = fmap[i] - H; if (k < 0) k += nblock;
318 ff3adf60 2004-04-14 devnull eclass[k] = j;
321 ff3adf60 2004-04-14 devnull nNotDone = 0;
323 ff3adf60 2004-04-14 devnull while (1) {
325 ff3adf60 2004-04-14 devnull /*-- find the next non-singleton bucket --*/
326 ff3adf60 2004-04-14 devnull k = r + 1;
327 ff3adf60 2004-04-14 devnull while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
328 ff3adf60 2004-04-14 devnull if (ISSET_BH(k)) {
329 ff3adf60 2004-04-14 devnull while (WORD_BH(k) == 0xffffffff) k += 32;
330 ff3adf60 2004-04-14 devnull while (ISSET_BH(k)) k++;
332 ff3adf60 2004-04-14 devnull l = k - 1;
333 ff3adf60 2004-04-14 devnull if (l >= nblock) break;
334 ff3adf60 2004-04-14 devnull while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
335 ff3adf60 2004-04-14 devnull if (!ISSET_BH(k)) {
336 ff3adf60 2004-04-14 devnull while (WORD_BH(k) == 0x00000000) k += 32;
337 ff3adf60 2004-04-14 devnull while (!ISSET_BH(k)) k++;
339 ff3adf60 2004-04-14 devnull r = k - 1;
340 ff3adf60 2004-04-14 devnull if (r >= nblock) break;
342 ff3adf60 2004-04-14 devnull /*-- now [l, r] bracket current bucket --*/
343 ff3adf60 2004-04-14 devnull if (r > l) {
344 ff3adf60 2004-04-14 devnull nNotDone += (r - l + 1);
345 ff3adf60 2004-04-14 devnull fallbackQSort3 ( fmap, eclass, l, r );
347 ff3adf60 2004-04-14 devnull /*-- scan bucket and generate header bits-- */
348 ff3adf60 2004-04-14 devnull cc = -1;
349 ff3adf60 2004-04-14 devnull for (i = l; i <= r; i++) {
350 ff3adf60 2004-04-14 devnull cc1 = eclass[fmap[i]];
351 ff3adf60 2004-04-14 devnull if (cc != cc1) { SET_BH(i); cc = cc1; };
356 ff3adf60 2004-04-14 devnull if (verb >= 4)
357 ff3adf60 2004-04-14 devnull VPrintf1 ( "%6d unresolved strings\n", nNotDone );
360 ff3adf60 2004-04-14 devnull if (H > nblock || nNotDone == 0) break;
364 ff3adf60 2004-04-14 devnull Reconstruct the original block in
365 ff3adf60 2004-04-14 devnull eclass8 [0 .. nblock-1], since the
366 ff3adf60 2004-04-14 devnull previous phase destroyed it.
368 ff3adf60 2004-04-14 devnull if (verb >= 4)
369 ff3adf60 2004-04-14 devnull VPrintf0 ( " reconstructing block ...\n" );
371 ff3adf60 2004-04-14 devnull for (i = 0; i < nblock; i++) {
372 ff3adf60 2004-04-14 devnull while (ftabCopy[j] == 0) j++;
373 ff3adf60 2004-04-14 devnull ftabCopy[j]--;
374 ff3adf60 2004-04-14 devnull eclass8[fmap[i]] = (UChar)j;
376 ff3adf60 2004-04-14 devnull AssertH ( j < 256, 1005 );
379 ff3adf60 2004-04-14 devnull #undef SET_BH
380 ff3adf60 2004-04-14 devnull #undef CLEAR_BH
381 ff3adf60 2004-04-14 devnull #undef ISSET_BH
382 ff3adf60 2004-04-14 devnull #undef WORD_BH
383 ff3adf60 2004-04-14 devnull #undef UNALIGNED_BH
386 ff3adf60 2004-04-14 devnull /*---------------------------------------------*/
387 ff3adf60 2004-04-14 devnull /*--- The main, O(N^2 log(N)) sorting ---*/
388 ff3adf60 2004-04-14 devnull /*--- algorithm. Faster for "normal" ---*/
389 ff3adf60 2004-04-14 devnull /*--- non-repetitive blocks. ---*/
390 ff3adf60 2004-04-14 devnull /*---------------------------------------------*/
392 ff3adf60 2004-04-14 devnull /*---------------------------------------------*/
394 ff3adf60 2004-04-14 devnull __inline__
395 ff3adf60 2004-04-14 devnull Bool mainGtU ( UInt32 i1,
396 ff3adf60 2004-04-14 devnull UInt32 i2,
397 ff3adf60 2004-04-14 devnull UChar* block,
398 ff3adf60 2004-04-14 devnull UInt16* quadrant,
399 ff3adf60 2004-04-14 devnull UInt32 nblock,
400 ff3adf60 2004-04-14 devnull Int32* budget )
402 ff3adf60 2004-04-14 devnull Int32 k;
403 ff3adf60 2004-04-14 devnull UChar c1, c2;
404 ff3adf60 2004-04-14 devnull UInt16 s1, s2;
406 ff3adf60 2004-04-14 devnull AssertD ( i1 != i2, "mainGtU" );
408 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
409 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
410 ff3adf60 2004-04-14 devnull i1++; i2++;
412 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
413 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
414 ff3adf60 2004-04-14 devnull i1++; i2++;
416 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
417 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
418 ff3adf60 2004-04-14 devnull i1++; i2++;
420 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
421 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
422 ff3adf60 2004-04-14 devnull i1++; i2++;
424 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
425 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
426 ff3adf60 2004-04-14 devnull i1++; i2++;
428 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
429 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
430 ff3adf60 2004-04-14 devnull i1++; i2++;
432 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
433 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
434 ff3adf60 2004-04-14 devnull i1++; i2++;
436 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
437 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
438 ff3adf60 2004-04-14 devnull i1++; i2++;
440 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
441 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
442 ff3adf60 2004-04-14 devnull i1++; i2++;
443 ff3adf60 2004-04-14 devnull /* 10 */
444 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
445 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
446 ff3adf60 2004-04-14 devnull i1++; i2++;
447 ff3adf60 2004-04-14 devnull /* 11 */
448 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
449 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
450 ff3adf60 2004-04-14 devnull i1++; i2++;
451 ff3adf60 2004-04-14 devnull /* 12 */
452 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
453 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
454 ff3adf60 2004-04-14 devnull i1++; i2++;
456 ff3adf60 2004-04-14 devnull k = nblock + 8;
460 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
461 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
462 ff3adf60 2004-04-14 devnull s1 = quadrant[i1]; s2 = quadrant[i2];
463 ff3adf60 2004-04-14 devnull if (s1 != s2) return (s1 > s2);
464 ff3adf60 2004-04-14 devnull i1++; i2++;
466 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
467 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
468 ff3adf60 2004-04-14 devnull s1 = quadrant[i1]; s2 = quadrant[i2];
469 ff3adf60 2004-04-14 devnull if (s1 != s2) return (s1 > s2);
470 ff3adf60 2004-04-14 devnull i1++; i2++;
472 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
473 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
474 ff3adf60 2004-04-14 devnull s1 = quadrant[i1]; s2 = quadrant[i2];
475 ff3adf60 2004-04-14 devnull if (s1 != s2) return (s1 > s2);
476 ff3adf60 2004-04-14 devnull i1++; i2++;
478 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
479 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
480 ff3adf60 2004-04-14 devnull s1 = quadrant[i1]; s2 = quadrant[i2];
481 ff3adf60 2004-04-14 devnull if (s1 != s2) return (s1 > s2);
482 ff3adf60 2004-04-14 devnull i1++; i2++;
484 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
485 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
486 ff3adf60 2004-04-14 devnull s1 = quadrant[i1]; s2 = quadrant[i2];
487 ff3adf60 2004-04-14 devnull if (s1 != s2) return (s1 > s2);
488 ff3adf60 2004-04-14 devnull i1++; i2++;
490 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
491 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
492 ff3adf60 2004-04-14 devnull s1 = quadrant[i1]; s2 = quadrant[i2];
493 ff3adf60 2004-04-14 devnull if (s1 != s2) return (s1 > s2);
494 ff3adf60 2004-04-14 devnull i1++; i2++;
496 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
497 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
498 ff3adf60 2004-04-14 devnull s1 = quadrant[i1]; s2 = quadrant[i2];
499 ff3adf60 2004-04-14 devnull if (s1 != s2) return (s1 > s2);
500 ff3adf60 2004-04-14 devnull i1++; i2++;
502 ff3adf60 2004-04-14 devnull c1 = block[i1]; c2 = block[i2];
503 ff3adf60 2004-04-14 devnull if (c1 != c2) return (c1 > c2);
504 ff3adf60 2004-04-14 devnull s1 = quadrant[i1]; s2 = quadrant[i2];
505 ff3adf60 2004-04-14 devnull if (s1 != s2) return (s1 > s2);
506 ff3adf60 2004-04-14 devnull i1++; i2++;
508 ff3adf60 2004-04-14 devnull if (i1 >= nblock) i1 -= nblock;
509 ff3adf60 2004-04-14 devnull if (i2 >= nblock) i2 -= nblock;
512 ff3adf60 2004-04-14 devnull (*budget)--;
514 ff3adf60 2004-04-14 devnull while (k >= 0);
516 ff3adf60 2004-04-14 devnull return False;
520 ff3adf60 2004-04-14 devnull /*---------------------------------------------*/
522 ff3adf60 2004-04-14 devnull Knuth's increments seem to work better
523 ff3adf60 2004-04-14 devnull than Incerpi-Sedgewick here. Possibly
524 ff3adf60 2004-04-14 devnull because the number of elems to sort is
525 ff3adf60 2004-04-14 devnull usually small, typically <= 20.
528 ff3adf60 2004-04-14 devnull Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
529 ff3adf60 2004-04-14 devnull 9841, 29524, 88573, 265720,
530 ff3adf60 2004-04-14 devnull 797161, 2391484 };
533 ff3adf60 2004-04-14 devnull void mainSimpleSort ( UInt32* ptr,
534 ff3adf60 2004-04-14 devnull UChar* block,
535 ff3adf60 2004-04-14 devnull UInt16* quadrant,
536 ff3adf60 2004-04-14 devnull Int32 nblock,
537 ff3adf60 2004-04-14 devnull Int32 lo,
538 ff3adf60 2004-04-14 devnull Int32 hi,
539 ff3adf60 2004-04-14 devnull Int32 d,
540 ff3adf60 2004-04-14 devnull Int32* budget )
542 ff3adf60 2004-04-14 devnull Int32 i, j, h, bigN, hp;
543 ff3adf60 2004-04-14 devnull UInt32 v;
545 ff3adf60 2004-04-14 devnull bigN = hi - lo + 1;
546 ff3adf60 2004-04-14 devnull if (bigN < 2) return;
549 ff3adf60 2004-04-14 devnull while (incs[hp] < bigN) hp++;
552 ff3adf60 2004-04-14 devnull for (; hp >= 0; hp--) {
553 ff3adf60 2004-04-14 devnull h = incs[hp];
555 ff3adf60 2004-04-14 devnull i = lo + h;
556 ff3adf60 2004-04-14 devnull while (True) {
558 ff3adf60 2004-04-14 devnull /*-- copy 1 --*/
559 ff3adf60 2004-04-14 devnull if (i > hi) break;
560 ff3adf60 2004-04-14 devnull v = ptr[i];
562 ff3adf60 2004-04-14 devnull while ( mainGtU (
563 ff3adf60 2004-04-14 devnull ptr[j-h]+d, v+d, block, quadrant, nblock, budget
565 ff3adf60 2004-04-14 devnull ptr[j] = ptr[j-h];
566 ff3adf60 2004-04-14 devnull j = j - h;
567 ff3adf60 2004-04-14 devnull if (j <= (lo + h - 1)) break;
569 ff3adf60 2004-04-14 devnull ptr[j] = v;
572 ff3adf60 2004-04-14 devnull /*-- copy 2 --*/
573 ff3adf60 2004-04-14 devnull if (i > hi) break;
574 ff3adf60 2004-04-14 devnull v = ptr[i];
576 ff3adf60 2004-04-14 devnull while ( mainGtU (
577 ff3adf60 2004-04-14 devnull ptr[j-h]+d, v+d, block, quadrant, nblock, budget
579 ff3adf60 2004-04-14 devnull ptr[j] = ptr[j-h];
580 ff3adf60 2004-04-14 devnull j = j - h;
581 ff3adf60 2004-04-14 devnull if (j <= (lo + h - 1)) break;
583 ff3adf60 2004-04-14 devnull ptr[j] = v;
586 ff3adf60 2004-04-14 devnull /*-- copy 3 --*/
587 ff3adf60 2004-04-14 devnull if (i > hi) break;
588 ff3adf60 2004-04-14 devnull v = ptr[i];
590 ff3adf60 2004-04-14 devnull while ( mainGtU (
591 ff3adf60 2004-04-14 devnull ptr[j-h]+d, v+d, block, quadrant, nblock, budget
593 ff3adf60 2004-04-14 devnull ptr[j] = ptr[j-h];
594 ff3adf60 2004-04-14 devnull j = j - h;
595 ff3adf60 2004-04-14 devnull if (j <= (lo + h - 1)) break;
597 ff3adf60 2004-04-14 devnull ptr[j] = v;
600 ff3adf60 2004-04-14 devnull if (*budget < 0) return;
606 ff3adf60 2004-04-14 devnull /*---------------------------------------------*/
608 ff3adf60 2004-04-14 devnull The following is an implementation of
609 ff3adf60 2004-04-14 devnull an elegant 3-way quicksort for strings,
610 ff3adf60 2004-04-14 devnull described in a paper "Fast Algorithms for
611 ff3adf60 2004-04-14 devnull Sorting and Searching Strings", by Robert
612 ff3adf60 2004-04-14 devnull Sedgewick and Jon L. Bentley.
615 ff3adf60 2004-04-14 devnull #define mswap(zz1, zz2) \
616 ff3adf60 2004-04-14 devnull { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
618 ff3adf60 2004-04-14 devnull #define mvswap(zzp1, zzp2, zzn) \
620 ff3adf60 2004-04-14 devnull Int32 yyp1 = (zzp1); \
621 ff3adf60 2004-04-14 devnull Int32 yyp2 = (zzp2); \
622 ff3adf60 2004-04-14 devnull Int32 yyn = (zzn); \
623 ff3adf60 2004-04-14 devnull while (yyn > 0) { \
624 ff3adf60 2004-04-14 devnull mswap(ptr[yyp1], ptr[yyp2]); \
625 ff3adf60 2004-04-14 devnull yyp1++; yyp2++; yyn--; \
630 ff3adf60 2004-04-14 devnull __inline__
631 ff3adf60 2004-04-14 devnull UChar mmed3 ( UChar a, UChar b, UChar c )
633 ff3adf60 2004-04-14 devnull UChar t;
634 ff3adf60 2004-04-14 devnull if (a > b) { t = a; a = b; b = t; };
635 ff3adf60 2004-04-14 devnull if (b > c) {
637 ff3adf60 2004-04-14 devnull if (a > b) b = a;
639 ff3adf60 2004-04-14 devnull return b;
642 ff3adf60 2004-04-14 devnull #define mmin(a,b) ((a) < (b)) ? (a) : (b)
644 ff3adf60 2004-04-14 devnull #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
645 ff3adf60 2004-04-14 devnull stackHi[sp] = hz; \
646 ff3adf60 2004-04-14 devnull stackD [sp] = dz; \
649 ff3adf60 2004-04-14 devnull #define mpop(lz,hz,dz) { sp--; \
650 ff3adf60 2004-04-14 devnull lz = stackLo[sp]; \
651 ff3adf60 2004-04-14 devnull hz = stackHi[sp]; \
652 ff3adf60 2004-04-14 devnull dz = stackD [sp]; }
655 ff3adf60 2004-04-14 devnull #define mnextsize(az) (nextHi[az]-nextLo[az])
657 ff3adf60 2004-04-14 devnull #define mnextswap(az,bz) \
658 ff3adf60 2004-04-14 devnull { Int32 tz; \
659 ff3adf60 2004-04-14 devnull tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
660 ff3adf60 2004-04-14 devnull tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
661 ff3adf60 2004-04-14 devnull tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
664 ff3adf60 2004-04-14 devnull #define MAIN_QSORT_SMALL_THRESH 20
665 ff3adf60 2004-04-14 devnull #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
666 ff3adf60 2004-04-14 devnull #define MAIN_QSORT_STACK_SIZE 100
669 ff3adf60 2004-04-14 devnull void mainQSort3 ( UInt32* ptr,
670 ff3adf60 2004-04-14 devnull UChar* block,
671 ff3adf60 2004-04-14 devnull UInt16* quadrant,
672 ff3adf60 2004-04-14 devnull Int32 nblock,
673 ff3adf60 2004-04-14 devnull Int32 loSt,
674 ff3adf60 2004-04-14 devnull Int32 hiSt,
675 ff3adf60 2004-04-14 devnull Int32 dSt,
676 ff3adf60 2004-04-14 devnull Int32* budget )
678 ff3adf60 2004-04-14 devnull Int32 unLo, unHi, ltLo, gtHi, n, m, med;
679 ff3adf60 2004-04-14 devnull Int32 sp, lo, hi, d;
681 ff3adf60 2004-04-14 devnull Int32 stackLo[MAIN_QSORT_STACK_SIZE];
682 ff3adf60 2004-04-14 devnull Int32 stackHi[MAIN_QSORT_STACK_SIZE];
683 ff3adf60 2004-04-14 devnull Int32 stackD [MAIN_QSORT_STACK_SIZE];
685 ff3adf60 2004-04-14 devnull Int32 nextLo[3];
686 ff3adf60 2004-04-14 devnull Int32 nextHi[3];
687 ff3adf60 2004-04-14 devnull Int32 nextD [3];
690 ff3adf60 2004-04-14 devnull mpush ( loSt, hiSt, dSt );
692 ff3adf60 2004-04-14 devnull while (sp > 0) {
694 ff3adf60 2004-04-14 devnull AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 );
696 ff3adf60 2004-04-14 devnull mpop ( lo, hi, d );
697 ff3adf60 2004-04-14 devnull if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
698 ff3adf60 2004-04-14 devnull d > MAIN_QSORT_DEPTH_THRESH) {
699 ff3adf60 2004-04-14 devnull mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
700 ff3adf60 2004-04-14 devnull if (*budget < 0) return;
701 ff3adf60 2004-04-14 devnull continue;
704 ff3adf60 2004-04-14 devnull med = (Int32)
705 ff3adf60 2004-04-14 devnull mmed3 ( block[ptr[ lo ]+d],
706 ff3adf60 2004-04-14 devnull block[ptr[ hi ]+d],
707 ff3adf60 2004-04-14 devnull block[ptr[ (lo+hi)>>1 ]+d] );
709 ff3adf60 2004-04-14 devnull unLo = ltLo = lo;
710 ff3adf60 2004-04-14 devnull unHi = gtHi = hi;
712 ff3adf60 2004-04-14 devnull while (True) {
713 ff3adf60 2004-04-14 devnull while (True) {
714 ff3adf60 2004-04-14 devnull if (unLo > unHi) break;
715 ff3adf60 2004-04-14 devnull n = ((Int32)block[ptr[unLo]+d]) - med;
716 ff3adf60 2004-04-14 devnull if (n == 0) {
717 ff3adf60 2004-04-14 devnull mswap(ptr[unLo], ptr[ltLo]);
718 ff3adf60 2004-04-14 devnull ltLo++; unLo++; continue;
720 ff3adf60 2004-04-14 devnull if (n > 0) break;
723 ff3adf60 2004-04-14 devnull while (True) {
724 ff3adf60 2004-04-14 devnull if (unLo > unHi) break;
725 ff3adf60 2004-04-14 devnull n = ((Int32)block[ptr[unHi]+d]) - med;
726 ff3adf60 2004-04-14 devnull if (n == 0) {
727 ff3adf60 2004-04-14 devnull mswap(ptr[unHi], ptr[gtHi]);
728 ff3adf60 2004-04-14 devnull gtHi--; unHi--; continue;
730 ff3adf60 2004-04-14 devnull if (n < 0) break;
733 ff3adf60 2004-04-14 devnull if (unLo > unHi) break;
734 ff3adf60 2004-04-14 devnull mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
737 ff3adf60 2004-04-14 devnull AssertD ( unHi == unLo-1, "mainQSort3(2)" );
739 ff3adf60 2004-04-14 devnull if (gtHi < ltLo) {
740 ff3adf60 2004-04-14 devnull mpush(lo, hi, d+1 );
741 ff3adf60 2004-04-14 devnull continue;
744 ff3adf60 2004-04-14 devnull n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
745 ff3adf60 2004-04-14 devnull m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
747 ff3adf60 2004-04-14 devnull n = lo + unLo - ltLo - 1;
748 ff3adf60 2004-04-14 devnull m = hi - (gtHi - unHi) + 1;
750 ff3adf60 2004-04-14 devnull nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
751 ff3adf60 2004-04-14 devnull nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
752 ff3adf60 2004-04-14 devnull nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
754 ff3adf60 2004-04-14 devnull if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
755 ff3adf60 2004-04-14 devnull if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
756 ff3adf60 2004-04-14 devnull if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
758 ff3adf60 2004-04-14 devnull AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
759 ff3adf60 2004-04-14 devnull AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
761 ff3adf60 2004-04-14 devnull mpush (nextLo[0], nextHi[0], nextD[0]);
762 ff3adf60 2004-04-14 devnull mpush (nextLo[1], nextHi[1], nextD[1]);
763 ff3adf60 2004-04-14 devnull mpush (nextLo[2], nextHi[2], nextD[2]);
767 ff3adf60 2004-04-14 devnull #undef mswap
768 ff3adf60 2004-04-14 devnull #undef mvswap
769 ff3adf60 2004-04-14 devnull #undef mpush
770 ff3adf60 2004-04-14 devnull #undef mpop
771 ff3adf60 2004-04-14 devnull #undef mmin
772 ff3adf60 2004-04-14 devnull #undef mnextsize
773 ff3adf60 2004-04-14 devnull #undef mnextswap
774 ff3adf60 2004-04-14 devnull #undef MAIN_QSORT_SMALL_THRESH
775 ff3adf60 2004-04-14 devnull #undef MAIN_QSORT_DEPTH_THRESH
776 ff3adf60 2004-04-14 devnull #undef MAIN_QSORT_STACK_SIZE
779 ff3adf60 2004-04-14 devnull /*---------------------------------------------*/
781 ff3adf60 2004-04-14 devnull nblock > N_OVERSHOOT
782 ff3adf60 2004-04-14 devnull block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
783 ff3adf60 2004-04-14 devnull ((UChar*)block32) [0 .. nblock-1] holds block
784 ff3adf60 2004-04-14 devnull ptr exists for [0 .. nblock-1]
787 ff3adf60 2004-04-14 devnull ((UChar*)block32) [0 .. nblock-1] holds block
788 ff3adf60 2004-04-14 devnull All other areas of block32 destroyed
789 ff3adf60 2004-04-14 devnull ftab [0 .. 65536 ] destroyed
790 ff3adf60 2004-04-14 devnull ptr [0 .. nblock-1] holds sorted order
791 ff3adf60 2004-04-14 devnull if (*budget < 0), sorting was abandoned
794 ff3adf60 2004-04-14 devnull #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
795 ff3adf60 2004-04-14 devnull #define SETMASK (1 << 21)
796 ff3adf60 2004-04-14 devnull #define CLEARMASK (~(SETMASK))
799 ff3adf60 2004-04-14 devnull void mainSort ( UInt32* ptr,
800 ff3adf60 2004-04-14 devnull UChar* block,
801 ff3adf60 2004-04-14 devnull UInt16* quadrant,
802 ff3adf60 2004-04-14 devnull UInt32* ftab,
803 ff3adf60 2004-04-14 devnull Int32 nblock,
804 ff3adf60 2004-04-14 devnull Int32 verb,
805 ff3adf60 2004-04-14 devnull Int32* budget )
807 ff3adf60 2004-04-14 devnull Int32 i, j, k, ss, sb;
808 ff3adf60 2004-04-14 devnull Int32 runningOrder[256];
809 ff3adf60 2004-04-14 devnull Bool bigDone[256];
810 ff3adf60 2004-04-14 devnull Int32 copyStart[256];
811 ff3adf60 2004-04-14 devnull Int32 copyEnd [256];
812 ff3adf60 2004-04-14 devnull UChar c1;
813 ff3adf60 2004-04-14 devnull Int32 numQSorted;
814 ff3adf60 2004-04-14 devnull UInt16 s;
815 ff3adf60 2004-04-14 devnull if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
817 ff3adf60 2004-04-14 devnull /*-- set up the 2-byte frequency table --*/
818 ff3adf60 2004-04-14 devnull for (i = 65536; i >= 0; i--) ftab[i] = 0;
820 ff3adf60 2004-04-14 devnull j = block[0] << 8;
821 ff3adf60 2004-04-14 devnull i = nblock-1;
822 ff3adf60 2004-04-14 devnull for (; i >= 3; i -= 4) {
823 ff3adf60 2004-04-14 devnull quadrant[i] = 0;
824 ff3adf60 2004-04-14 devnull j = (j >> 8) | ( ((UInt16)block[i]) << 8);
825 ff3adf60 2004-04-14 devnull ftab[j]++;
826 ff3adf60 2004-04-14 devnull quadrant[i-1] = 0;
827 ff3adf60 2004-04-14 devnull j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
828 ff3adf60 2004-04-14 devnull ftab[j]++;
829 ff3adf60 2004-04-14 devnull quadrant[i-2] = 0;
830 ff3adf60 2004-04-14 devnull j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
831 ff3adf60 2004-04-14 devnull ftab[j]++;
832 ff3adf60 2004-04-14 devnull quadrant[i-3] = 0;
833 ff3adf60 2004-04-14 devnull j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
834 ff3adf60 2004-04-14 devnull ftab[j]++;
836 ff3adf60 2004-04-14 devnull for (; i >= 0; i--) {
837 ff3adf60 2004-04-14 devnull quadrant[i] = 0;
838 ff3adf60 2004-04-14 devnull j = (j >> 8) | ( ((UInt16)block[i]) << 8);
839 ff3adf60 2004-04-14 devnull ftab[j]++;
842 ff3adf60 2004-04-14 devnull /*-- (emphasises close relationship of block & quadrant) --*/
843 ff3adf60 2004-04-14 devnull for (i = 0; i < BZ_N_OVERSHOOT; i++) {
844 ff3adf60 2004-04-14 devnull block [nblock+i] = block[i];
845 ff3adf60 2004-04-14 devnull quadrant[nblock+i] = 0;
848 ff3adf60 2004-04-14 devnull if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
850 ff3adf60 2004-04-14 devnull /*-- Complete the initial radix sort --*/
851 ff3adf60 2004-04-14 devnull for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
853 ff3adf60 2004-04-14 devnull s = block[0] << 8;
854 ff3adf60 2004-04-14 devnull i = nblock-1;
855 ff3adf60 2004-04-14 devnull for (; i >= 3; i -= 4) {
856 ff3adf60 2004-04-14 devnull s = (s >> 8) | (block[i] << 8);
857 ff3adf60 2004-04-14 devnull j = ftab[s] -1;
858 ff3adf60 2004-04-14 devnull ftab[s] = j;
859 ff3adf60 2004-04-14 devnull ptr[j] = i;
860 ff3adf60 2004-04-14 devnull s = (s >> 8) | (block[i-1] << 8);
861 ff3adf60 2004-04-14 devnull j = ftab[s] -1;
862 ff3adf60 2004-04-14 devnull ftab[s] = j;
863 ff3adf60 2004-04-14 devnull ptr[j] = i-1;
864 ff3adf60 2004-04-14 devnull s = (s >> 8) | (block[i-2] << 8);
865 ff3adf60 2004-04-14 devnull j = ftab[s] -1;
866 ff3adf60 2004-04-14 devnull ftab[s] = j;
867 ff3adf60 2004-04-14 devnull ptr[j] = i-2;
868 ff3adf60 2004-04-14 devnull s = (s >> 8) | (block[i-3] << 8);
869 ff3adf60 2004-04-14 devnull j = ftab[s] -1;
870 ff3adf60 2004-04-14 devnull ftab[s] = j;
871 ff3adf60 2004-04-14 devnull ptr[j] = i-3;
873 ff3adf60 2004-04-14 devnull for (; i >= 0; i--) {
874 ff3adf60 2004-04-14 devnull s = (s >> 8) | (block[i] << 8);
875 ff3adf60 2004-04-14 devnull j = ftab[s] -1;
876 ff3adf60 2004-04-14 devnull ftab[s] = j;
877 ff3adf60 2004-04-14 devnull ptr[j] = i;
881 ff3adf60 2004-04-14 devnull Now ftab contains the first loc of every small bucket.
882 ff3adf60 2004-04-14 devnull Calculate the running order, from smallest to largest
883 ff3adf60 2004-04-14 devnull big bucket.
885 ff3adf60 2004-04-14 devnull for (i = 0; i <= 255; i++) {
886 ff3adf60 2004-04-14 devnull bigDone [i] = False;
887 ff3adf60 2004-04-14 devnull runningOrder[i] = i;
891 ff3adf60 2004-04-14 devnull Int32 vv;
892 ff3adf60 2004-04-14 devnull Int32 h = 1;
893 ff3adf60 2004-04-14 devnull do h = 3 * h + 1; while (h <= 256);
895 ff3adf60 2004-04-14 devnull h = h / 3;
896 ff3adf60 2004-04-14 devnull for (i = h; i <= 255; i++) {
897 ff3adf60 2004-04-14 devnull vv = runningOrder[i];
899 ff3adf60 2004-04-14 devnull while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
900 ff3adf60 2004-04-14 devnull runningOrder[j] = runningOrder[j-h];
901 ff3adf60 2004-04-14 devnull j = j - h;
902 ff3adf60 2004-04-14 devnull if (j <= (h - 1)) goto zero;
905 ff3adf60 2004-04-14 devnull runningOrder[j] = vv;
907 ff3adf60 2004-04-14 devnull } while (h != 1);
911 ff3adf60 2004-04-14 devnull The main sorting loop.
914 ff3adf60 2004-04-14 devnull numQSorted = 0;
916 ff3adf60 2004-04-14 devnull for (i = 0; i <= 255; i++) {
919 ff3adf60 2004-04-14 devnull Process big buckets, starting with the least full.
920 ff3adf60 2004-04-14 devnull Basically this is a 3-step process in which we call
921 ff3adf60 2004-04-14 devnull mainQSort3 to sort the small buckets [ss, j], but
922 ff3adf60 2004-04-14 devnull also make a big effort to avoid the calls if we can.
924 ff3adf60 2004-04-14 devnull ss = runningOrder[i];
928 ff3adf60 2004-04-14 devnull Complete the big bucket [ss] by quicksorting
929 ff3adf60 2004-04-14 devnull any unsorted small buckets [ss, j], for j != ss.
930 ff3adf60 2004-04-14 devnull Hopefully previous pointer-scanning phases have already
931 ff3adf60 2004-04-14 devnull completed many of the small buckets [ss, j], so
932 ff3adf60 2004-04-14 devnull we don't have to sort them at all.
934 ff3adf60 2004-04-14 devnull for (j = 0; j <= 255; j++) {
935 ff3adf60 2004-04-14 devnull if (j != ss) {
936 ff3adf60 2004-04-14 devnull sb = (ss << 8) + j;
937 ff3adf60 2004-04-14 devnull if ( ! (ftab[sb] & SETMASK) ) {
938 ff3adf60 2004-04-14 devnull Int32 lo = ftab[sb] & CLEARMASK;
939 ff3adf60 2004-04-14 devnull Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
940 ff3adf60 2004-04-14 devnull if (hi > lo) {
941 ff3adf60 2004-04-14 devnull if (verb >= 4)
942 ff3adf60 2004-04-14 devnull VPrintf4 ( " qsort [0x%x, 0x%x] "
943 ff3adf60 2004-04-14 devnull "done %d this %d\n",
944 ff3adf60 2004-04-14 devnull ss, j, numQSorted, hi - lo + 1 );
945 ff3adf60 2004-04-14 devnull mainQSort3 (
946 ff3adf60 2004-04-14 devnull ptr, block, quadrant, nblock,
947 ff3adf60 2004-04-14 devnull lo, hi, BZ_N_RADIX, budget
949 ff3adf60 2004-04-14 devnull numQSorted += (hi - lo + 1);
950 ff3adf60 2004-04-14 devnull if (*budget < 0) return;
953 ff3adf60 2004-04-14 devnull ftab[sb] |= SETMASK;
957 ff3adf60 2004-04-14 devnull AssertH ( !bigDone[ss], 1006 );
961 ff3adf60 2004-04-14 devnull Now scan this big bucket [ss] so as to synthesise the
962 ff3adf60 2004-04-14 devnull sorted order for small buckets [t, ss] for all t,
963 ff3adf60 2004-04-14 devnull including, magically, the bucket [ss,ss] too.
964 ff3adf60 2004-04-14 devnull This will avoid doing Real Work in subsequent Step 1's.
967 ff3adf60 2004-04-14 devnull for (j = 0; j <= 255; j++) {
968 ff3adf60 2004-04-14 devnull copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
969 ff3adf60 2004-04-14 devnull copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
971 ff3adf60 2004-04-14 devnull for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
972 ff3adf60 2004-04-14 devnull k = ptr[j]-1; if (k < 0) k += nblock;
973 ff3adf60 2004-04-14 devnull c1 = block[k];
974 ff3adf60 2004-04-14 devnull if (!bigDone[c1])
975 ff3adf60 2004-04-14 devnull ptr[ copyStart[c1]++ ] = k;
977 ff3adf60 2004-04-14 devnull for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
978 ff3adf60 2004-04-14 devnull k = ptr[j]-1; if (k < 0) k += nblock;
979 ff3adf60 2004-04-14 devnull c1 = block[k];
980 ff3adf60 2004-04-14 devnull if (!bigDone[c1])
981 ff3adf60 2004-04-14 devnull ptr[ copyEnd[c1]-- ] = k;
985 ff3adf60 2004-04-14 devnull AssertH ( copyStart[ss]-1 == copyEnd[ss], 1007 );
987 ff3adf60 2004-04-14 devnull for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
991 ff3adf60 2004-04-14 devnull The [ss] big bucket is now done. Record this fact,
992 ff3adf60 2004-04-14 devnull and update the quadrant descriptors. Remember to
993 ff3adf60 2004-04-14 devnull update quadrants in the overshoot area too, if
994 ff3adf60 2004-04-14 devnull necessary. The "if (i < 255)" test merely skips
995 ff3adf60 2004-04-14 devnull this updating for the last bucket processed, since
996 ff3adf60 2004-04-14 devnull updating for the last bucket is pointless.
998 ff3adf60 2004-04-14 devnull The quadrant array provides a way to incrementally
999 ff3adf60 2004-04-14 devnull cache sort orderings, as they appear, so as to
1000 ff3adf60 2004-04-14 devnull make subsequent comparisons in fullGtU() complete
1001 ff3adf60 2004-04-14 devnull faster. For repetitive blocks this makes a big
1002 ff3adf60 2004-04-14 devnull difference (but not big enough to be able to avoid
1003 ff3adf60 2004-04-14 devnull the fallback sorting mechanism, exponential radix sort).
1005 ff3adf60 2004-04-14 devnull The precise meaning is: at all times:
1007 ff3adf60 2004-04-14 devnull for 0 <= i < nblock and 0 <= j <= nblock
1009 ff3adf60 2004-04-14 devnull if block[i] != block[j],
1011 ff3adf60 2004-04-14 devnull then the relative values of quadrant[i] and
1012 ff3adf60 2004-04-14 devnull quadrant[j] are meaningless.
1015 ff3adf60 2004-04-14 devnull if quadrant[i] < quadrant[j]
1016 ff3adf60 2004-04-14 devnull then the string starting at i lexicographically
1017 ff3adf60 2004-04-14 devnull precedes the string starting at j
1019 ff3adf60 2004-04-14 devnull else if quadrant[i] > quadrant[j]
1020 ff3adf60 2004-04-14 devnull then the string starting at j lexicographically
1021 ff3adf60 2004-04-14 devnull precedes the string starting at i
1024 ff3adf60 2004-04-14 devnull the relative ordering of the strings starting
1025 ff3adf60 2004-04-14 devnull at i and j has not yet been determined.
1028 ff3adf60 2004-04-14 devnull bigDone[ss] = True;
1030 ff3adf60 2004-04-14 devnull if (i < 255) {
1031 ff3adf60 2004-04-14 devnull Int32 bbStart = ftab[ss << 8] & CLEARMASK;
1032 ff3adf60 2004-04-14 devnull Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
1033 ff3adf60 2004-04-14 devnull Int32 shifts = 0;
1035 ff3adf60 2004-04-14 devnull while ((bbSize >> shifts) > 65534) shifts++;
1037 ff3adf60 2004-04-14 devnull for (j = bbSize-1; j >= 0; j--) {
1038 ff3adf60 2004-04-14 devnull Int32 a2update = ptr[bbStart + j];
1039 ff3adf60 2004-04-14 devnull UInt16 qVal = (UInt16)(j >> shifts);
1040 ff3adf60 2004-04-14 devnull quadrant[a2update] = qVal;
1041 ff3adf60 2004-04-14 devnull if (a2update < BZ_N_OVERSHOOT)
1042 ff3adf60 2004-04-14 devnull quadrant[a2update + nblock] = qVal;
1044 ff3adf60 2004-04-14 devnull AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1049 ff3adf60 2004-04-14 devnull if (verb >= 4)
1050 ff3adf60 2004-04-14 devnull VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1051 ff3adf60 2004-04-14 devnull nblock, numQSorted, nblock - numQSorted );
1054 ff3adf60 2004-04-14 devnull #undef BIGFREQ
1055 ff3adf60 2004-04-14 devnull #undef SETMASK
1056 ff3adf60 2004-04-14 devnull #undef CLEARMASK
1059 ff3adf60 2004-04-14 devnull /*---------------------------------------------*/
1060 ff3adf60 2004-04-14 devnull /* Pre:
1061 ff3adf60 2004-04-14 devnull nblock > 0
1062 ff3adf60 2004-04-14 devnull arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1063 ff3adf60 2004-04-14 devnull ((UChar*)arr2) [0 .. nblock-1] holds block
1064 ff3adf60 2004-04-14 devnull arr1 exists for [0 .. nblock-1]
1067 ff3adf60 2004-04-14 devnull ((UChar*)arr2) [0 .. nblock-1] holds block
1068 ff3adf60 2004-04-14 devnull All other areas of block destroyed
1069 ff3adf60 2004-04-14 devnull ftab [ 0 .. 65536 ] destroyed
1070 ff3adf60 2004-04-14 devnull arr1 [0 .. nblock-1] holds sorted order
1072 ff3adf60 2004-04-14 devnull void BZ2_blockSort ( EState* s )
1074 ff3adf60 2004-04-14 devnull UInt32* ptr = s->ptr;
1075 ff3adf60 2004-04-14 devnull UChar* block = s->block;
1076 ff3adf60 2004-04-14 devnull UInt32* ftab = s->ftab;
1077 ff3adf60 2004-04-14 devnull Int32 nblock = s->nblock;
1078 ff3adf60 2004-04-14 devnull Int32 verb = s->verbosity;
1079 ff3adf60 2004-04-14 devnull Int32 wfact = s->workFactor;
1080 ff3adf60 2004-04-14 devnull UInt16* quadrant;
1081 ff3adf60 2004-04-14 devnull Int32 budget;
1082 ff3adf60 2004-04-14 devnull Int32 budgetInit;
1083 ff3adf60 2004-04-14 devnull Int32 i;
1085 ff3adf60 2004-04-14 devnull if (nblock < 10000) {
1086 ff3adf60 2004-04-14 devnull fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1087 ff3adf60 2004-04-14 devnull } else {
1088 ff3adf60 2004-04-14 devnull /* Calculate the location for quadrant, remembering to get
1089 ff3adf60 2004-04-14 devnull the alignment right. Assumes that &(block[0]) is at least
1090 ff3adf60 2004-04-14 devnull 2-byte aligned -- this should be ok since block is really
1091 ff3adf60 2004-04-14 devnull the first section of arr2.
1093 ff3adf60 2004-04-14 devnull i = nblock+BZ_N_OVERSHOOT;
1094 ff3adf60 2004-04-14 devnull if (i & 1) i++;
1095 ff3adf60 2004-04-14 devnull quadrant = (UInt16*)(&(block[i]));
1097 ff3adf60 2004-04-14 devnull /* (wfact-1) / 3 puts the default-factor-30
1098 ff3adf60 2004-04-14 devnull transition point at very roughly the same place as
1099 ff3adf60 2004-04-14 devnull with v0.1 and v0.9.0.
1100 ff3adf60 2004-04-14 devnull Not that it particularly matters any more, since the
1101 ff3adf60 2004-04-14 devnull resulting compressed stream is now the same regardless
1102 ff3adf60 2004-04-14 devnull of whether or not we use the main sort or fallback sort.
1104 ff3adf60 2004-04-14 devnull if (wfact < 1 ) wfact = 1;
1105 ff3adf60 2004-04-14 devnull if (wfact > 100) wfact = 100;
1106 ff3adf60 2004-04-14 devnull budgetInit = nblock * ((wfact-1) / 3);
1107 ff3adf60 2004-04-14 devnull budget = budgetInit;
1109 ff3adf60 2004-04-14 devnull mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1110 ff3adf60 2004-04-14 devnull if (verb >= 3)
1111 ff3adf60 2004-04-14 devnull VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1112 ff3adf60 2004-04-14 devnull budgetInit - budget,
1113 ff3adf60 2004-04-14 devnull nblock,
1114 ff3adf60 2004-04-14 devnull (float)(budgetInit - budget) /
1115 ff3adf60 2004-04-14 devnull (float)(nblock==0 ? 1 : nblock) );
1116 ff3adf60 2004-04-14 devnull if (budget < 0) {
1117 ff3adf60 2004-04-14 devnull if (verb >= 2)
1118 ff3adf60 2004-04-14 devnull VPrintf0 ( " too repetitive; using fallback"
1119 ff3adf60 2004-04-14 devnull " sorting algorithm\n" );
1120 ff3adf60 2004-04-14 devnull fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1124 ff3adf60 2004-04-14 devnull s->origPtr = -1;
1125 ff3adf60 2004-04-14 devnull for (i = 0; i < s->nblock; i++)
1126 ff3adf60 2004-04-14 devnull if (ptr[i] == 0)
1127 ff3adf60 2004-04-14 devnull { s->origPtr = i; break; };
1129 ff3adf60 2004-04-14 devnull AssertH( s->origPtr != -1, 1003 );
1133 ff3adf60 2004-04-14 devnull /*-------------------------------------------------------------*/
1134 ff3adf60 2004-04-14 devnull /*--- end blocksort.c ---*/
1135 ff3adf60 2004-04-14 devnull /*-------------------------------------------------------------*/