001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020/*
021 * This package is based on the work done by Keiron Liddle, Aftex Software
022 * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
023 * great code.
024 */
025package org.apache.commons.compress.compressors.bzip2;
026
027import java.io.IOException;
028import java.io.InputStream;
029
030import org.apache.commons.compress.compressors.CompressorInputStream;
031
032/**
033 * An input stream that decompresses from the BZip2 format to be read as any other stream.
034 * 
035 * @NotThreadSafe
036 */
037public class BZip2CompressorInputStream extends CompressorInputStream implements
038                                                                          BZip2Constants {
039
040    /**
041     * Index of the last char in the block, so the block size == last + 1.
042     */
043    private int last;
044
045    /**
046     * Index in zptr[] of original string after sorting.
047     */
048    private int origPtr;
049
050    /**
051     * always: in the range 0 .. 9. The current block size is 100000 * this
052     * number.
053     */
054    private int blockSize100k;
055
056    private boolean blockRandomised;
057
058    private int bsBuff;
059    private int bsLive;
060    private final CRC crc = new CRC();
061
062    private int nInUse;
063
064    private InputStream in;
065    private final boolean decompressConcatenated;
066
067    private static final int EOF = 0;
068    private static final int START_BLOCK_STATE = 1;
069    private static final int RAND_PART_A_STATE = 2;
070    private static final int RAND_PART_B_STATE = 3;
071    private static final int RAND_PART_C_STATE = 4;
072    private static final int NO_RAND_PART_A_STATE = 5;
073    private static final int NO_RAND_PART_B_STATE = 6;
074    private static final int NO_RAND_PART_C_STATE = 7;
075
076    private int currentState = START_BLOCK_STATE;
077
078    private int storedBlockCRC, storedCombinedCRC;
079    private int computedBlockCRC, computedCombinedCRC;
080
081    // Variables used by setup* methods exclusively
082
083    private int su_count;
084    private int su_ch2;
085    private int su_chPrev;
086    private int su_i2;
087    private int su_j2;
088    private int su_rNToGo;
089    private int su_rTPos;
090    private int su_tPos;
091    private char su_z;
092
093    /**
094     * All memory intensive stuff. This field is initialized by initBlock().
095     */
096    private BZip2CompressorInputStream.Data data;
097
098    /**
099     * Constructs a new BZip2CompressorInputStream which decompresses bytes
100     * read from the specified stream. This doesn't suppprt decompressing
101     * concatenated .bz2 files.
102     * 
103     * @param in the InputStream from which this object should be created
104     * @throws IOException
105     *             if the stream content is malformed or an I/O error occurs.
106     * @throws NullPointerException
107     *             if {@code in == null}
108     */
109    public BZip2CompressorInputStream(final InputStream in) throws IOException {
110        this(in, false);
111    }
112
113    /**
114     * Constructs a new BZip2CompressorInputStream which decompresses bytes
115     * read from the specified stream.
116     *
117     * @param in the InputStream from which this object should be created
118     * @param decompressConcatenated
119     *                     if true, decompress until the end of the input;
120     *                     if false, stop after the first .bz2 stream and
121     *                     leave the input position to point to the next
122     *                     byte after the .bz2 stream
123     *
124     * @throws IOException
125     *             if {@code in == null}, the stream content is malformed, or an I/O error occurs.
126     */
127    public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
128        this.in = in;
129        this.decompressConcatenated = decompressConcatenated;
130
131        init(true);
132        initBlock();
133    }
134
135    @Override
136    public int read() throws IOException {
137        if (this.in != null) {
138            final int r = read0();
139            count(r < 0 ? -1 : 1);
140            return r;
141        }
142        throw new IOException("stream closed");
143    }
144
145    /*
146     * (non-Javadoc)
147     * 
148     * @see java.io.InputStream#read(byte[], int, int)
149     */
150    @Override
151    public int read(final byte[] dest, final int offs, final int len)
152        throws IOException {
153        if (offs < 0) {
154            throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
155        }
156        if (len < 0) {
157            throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
158        }
159        if (offs + len > dest.length) {
160            throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
161                                                + len + ") > dest.length(" + dest.length + ").");
162        }
163        if (this.in == null) {
164            throw new IOException("stream closed");
165        }
166        if (len == 0) {
167            return 0;
168        }
169
170        final int hi = offs + len;
171        int destOffs = offs;
172        int b;
173        while (destOffs < hi && ((b = read0()) >= 0)) {
174            dest[destOffs++] = (byte) b;
175            count(1);
176        }
177
178        final int c = (destOffs == offs) ? -1 : (destOffs - offs);
179        return c;
180    }
181
182    private void makeMaps() {
183        final boolean[] inUse = this.data.inUse;
184        final byte[] seqToUnseq = this.data.seqToUnseq;
185
186        int nInUseShadow = 0;
187
188        for (int i = 0; i < 256; i++) {
189            if (inUse[i]) {
190                seqToUnseq[nInUseShadow++] = (byte) i;
191            }
192        }
193
194        this.nInUse = nInUseShadow;
195    }
196
197    private int read0() throws IOException {
198        switch (currentState) {
199        case EOF:
200            return -1;
201
202        case START_BLOCK_STATE:
203            return setupBlock();
204
205        case RAND_PART_A_STATE:
206            throw new IllegalStateException();
207
208        case RAND_PART_B_STATE:
209            return setupRandPartB();
210
211        case RAND_PART_C_STATE:
212            return setupRandPartC();
213
214        case NO_RAND_PART_A_STATE:
215            throw new IllegalStateException();
216
217        case NO_RAND_PART_B_STATE:
218            return setupNoRandPartB();
219
220        case NO_RAND_PART_C_STATE:
221            return setupNoRandPartC();
222
223        default:
224            throw new IllegalStateException();
225        }
226    }
227
228    private boolean init(final boolean isFirstStream) throws IOException {
229        if (null == in) {
230            throw new IOException("No InputStream");
231        }
232
233        final int magic0 = this.in.read();
234        if (magic0 == -1 && !isFirstStream) {
235            return false;
236        }
237        final int magic1 = this.in.read();
238        final int magic2 = this.in.read();
239
240        if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') {
241            throw new IOException(isFirstStream
242                    ? "Stream is not in the BZip2 format"
243                    : "Garbage after a valid BZip2 stream");
244        }
245
246        final int blockSize = this.in.read();
247        if ((blockSize < '1') || (blockSize > '9')) {
248            throw new IOException("BZip2 block size is invalid");
249        }
250
251        this.blockSize100k = blockSize - '0';
252
253        this.bsLive = 0;
254        this.computedCombinedCRC = 0;
255
256        return true;
257    }
258
259    private void initBlock() throws IOException {
260        char magic0;
261        char magic1;
262        char magic2;
263        char magic3;
264        char magic4;
265        char magic5;
266
267        while (true) {
268            // Get the block magic bytes.
269            magic0 = bsGetUByte();
270            magic1 = bsGetUByte();
271            magic2 = bsGetUByte();
272            magic3 = bsGetUByte();
273            magic4 = bsGetUByte();
274            magic5 = bsGetUByte();
275
276            // If isn't end of stream magic, break out of the loop.
277            if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45
278                    || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) {
279                break;
280            }
281
282            // End of stream was reached. Check the combined CRC and
283            // advance to the next .bz2 stream if decoding concatenated
284            // streams.
285            if (complete()) {
286                return;
287            }
288        }
289
290        if (magic0 != 0x31 || // '1'
291            magic1 != 0x41 || // ')'
292            magic2 != 0x59 || // 'Y'
293            magic3 != 0x26 || // '&'
294            magic4 != 0x53 || // 'S'
295            magic5 != 0x59 // 'Y'
296            ) {
297            this.currentState = EOF;
298            throw new IOException("bad block header");
299        }
300        this.storedBlockCRC = bsGetInt();
301        this.blockRandomised = bsR(1) == 1;
302
303        /**
304         * Allocate data here instead in constructor, so we do not allocate
305         * it if the input file is empty.
306         */
307        if (this.data == null) {
308            this.data = new Data(this.blockSize100k);
309        }
310
311        // currBlockNo++;
312        getAndMoveToFrontDecode();
313
314        this.crc.initialiseCRC();
315        this.currentState = START_BLOCK_STATE;
316    }
317
318    private void endBlock() throws IOException {
319        this.computedBlockCRC = this.crc.getFinalCRC();
320
321        // A bad CRC is considered a fatal error.
322        if (this.storedBlockCRC != this.computedBlockCRC) {
323            // make next blocks readable without error
324            // (repair feature, not yet documented, not tested)
325            this.computedCombinedCRC = (this.storedCombinedCRC << 1)
326                | (this.storedCombinedCRC >>> 31);
327            this.computedCombinedCRC ^= this.storedBlockCRC;
328
329            throw new IOException("BZip2 CRC error");
330        }
331
332        this.computedCombinedCRC = (this.computedCombinedCRC << 1)
333            | (this.computedCombinedCRC >>> 31);
334        this.computedCombinedCRC ^= this.computedBlockCRC;
335    }
336
337    private boolean complete() throws IOException {
338        this.storedCombinedCRC = bsGetInt();
339        this.currentState = EOF;
340        this.data = null;
341
342        if (this.storedCombinedCRC != this.computedCombinedCRC) {
343            throw new IOException("BZip2 CRC error");
344        }
345
346        // Look for the next .bz2 stream if decompressing
347        // concatenated files.
348        return !decompressConcatenated || !init(false);
349    }
350
351    @Override
352    public void close() throws IOException {
353        final InputStream inShadow = this.in;
354        if (inShadow != null) {
355            try {
356                if (inShadow != System.in) {
357                    inShadow.close();
358                }
359            } finally {
360                this.data = null;
361                this.in = null;
362            }
363        }
364    }
365
366    private int bsR(final int n) throws IOException {
367        int bsLiveShadow = this.bsLive;
368        int bsBuffShadow = this.bsBuff;
369
370        if (bsLiveShadow < n) {
371            final InputStream inShadow = this.in;
372            do {
373                final int thech = inShadow.read();
374
375                if (thech < 0) {
376                    throw new IOException("unexpected end of stream");
377                }
378
379                bsBuffShadow = (bsBuffShadow << 8) | thech;
380                bsLiveShadow += 8;
381            } while (bsLiveShadow < n);
382
383            this.bsBuff = bsBuffShadow;
384        }
385
386        this.bsLive = bsLiveShadow - n;
387        return (bsBuffShadow >> (bsLiveShadow - n)) & ((1 << n) - 1);
388    }
389
390    private boolean bsGetBit() throws IOException {
391        return bsR(1) != 0;
392    }
393
394    private char bsGetUByte() throws IOException {
395        return (char) bsR(8);
396    }
397
398    private int bsGetInt() throws IOException {
399        return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8);
400    }
401
402    /**
403     * Called by createHuffmanDecodingTables() exclusively.
404     */
405    private static void hbCreateDecodeTables(final int[] limit,
406                                             final int[] base, final int[] perm, final char[] length,
407                                             final int minLen, final int maxLen, final int alphaSize) {
408        for (int i = minLen, pp = 0; i <= maxLen; i++) {
409            for (int j = 0; j < alphaSize; j++) {
410                if (length[j] == i) {
411                    perm[pp++] = j;
412                }
413            }
414        }
415
416        for (int i = MAX_CODE_LEN; --i > 0;) {
417            base[i] = 0;
418            limit[i] = 0;
419        }
420
421        for (int i = 0; i < alphaSize; i++) {
422            base[length[i] + 1]++;
423        }
424
425        for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
426            b += base[i];
427            base[i] = b;
428        }
429
430        for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
431            final int nb = base[i + 1];
432            vec += nb - b;
433            b = nb;
434            limit[i] = vec - 1;
435            vec <<= 1;
436        }
437
438        for (int i = minLen + 1; i <= maxLen; i++) {
439            base[i] = ((limit[i - 1] + 1) << 1) - base[i];
440        }
441    }
442
443    private void recvDecodingTables() throws IOException {
444        final Data dataShadow = this.data;
445        final boolean[] inUse = dataShadow.inUse;
446        final byte[] pos = dataShadow.recvDecodingTables_pos;
447        final byte[] selector = dataShadow.selector;
448        final byte[] selectorMtf = dataShadow.selectorMtf;
449
450        int inUse16 = 0;
451
452        /* Receive the mapping table */
453        for (int i = 0; i < 16; i++) {
454            if (bsGetBit()) {
455                inUse16 |= 1 << i;
456            }
457        }
458
459        for (int i = 256; --i >= 0;) {
460            inUse[i] = false;
461        }
462
463        for (int i = 0; i < 16; i++) {
464            if ((inUse16 & (1 << i)) != 0) {
465                final int i16 = i << 4;
466                for (int j = 0; j < 16; j++) {
467                    if (bsGetBit()) {
468                        inUse[i16 + j] = true;
469                    }
470                }
471            }
472        }
473
474        makeMaps();
475        final int alphaSize = this.nInUse + 2;
476
477        /* Now the selectors */
478        final int nGroups = bsR(3);
479        final int nSelectors = bsR(15);
480
481        for (int i = 0; i < nSelectors; i++) {
482            int j = 0;
483            while (bsGetBit()) {
484                j++;
485            }
486            selectorMtf[i] = (byte) j;
487        }
488
489        /* Undo the MTF values for the selectors. */
490        for (int v = nGroups; --v >= 0;) {
491            pos[v] = (byte) v;
492        }
493
494        for (int i = 0; i < nSelectors; i++) {
495            int v = selectorMtf[i] & 0xff;
496            final byte tmp = pos[v];
497            while (v > 0) {
498                // nearly all times v is zero, 4 in most other cases
499                pos[v] = pos[v - 1];
500                v--;
501            }
502            pos[0] = tmp;
503            selector[i] = tmp;
504        }
505
506        final char[][] len = dataShadow.temp_charArray2d;
507
508        /* Now the coding tables */
509        for (int t = 0; t < nGroups; t++) {
510            int curr = bsR(5);
511            final char[] len_t = len[t];
512            for (int i = 0; i < alphaSize; i++) {
513                while (bsGetBit()) {
514                    curr += bsGetBit() ? -1 : 1;
515                }
516                len_t[i] = (char) curr;
517            }
518        }
519
520        // finally create the Huffman tables
521        createHuffmanDecodingTables(alphaSize, nGroups);
522    }
523
524    /**
525     * Called by recvDecodingTables() exclusively.
526     */
527    private void createHuffmanDecodingTables(final int alphaSize,
528                                             final int nGroups) {
529        final Data dataShadow = this.data;
530        final char[][] len = dataShadow.temp_charArray2d;
531        final int[] minLens = dataShadow.minLens;
532        final int[][] limit = dataShadow.limit;
533        final int[][] base = dataShadow.base;
534        final int[][] perm = dataShadow.perm;
535
536        for (int t = 0; t < nGroups; t++) {
537            int minLen = 32;
538            int maxLen = 0;
539            final char[] len_t = len[t];
540            for (int i = alphaSize; --i >= 0;) {
541                final char lent = len_t[i];
542                if (lent > maxLen) {
543                    maxLen = lent;
544                }
545                if (lent < minLen) {
546                    minLen = lent;
547                }
548            }
549            hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
550                                 maxLen, alphaSize);
551            minLens[t] = minLen;
552        }
553    }
554
555    private void getAndMoveToFrontDecode() throws IOException {
556        this.origPtr = bsR(24);
557        recvDecodingTables();
558
559        final InputStream inShadow = this.in;
560        final Data dataShadow = this.data;
561        final byte[] ll8 = dataShadow.ll8;
562        final int[] unzftab = dataShadow.unzftab;
563        final byte[] selector = dataShadow.selector;
564        final byte[] seqToUnseq = dataShadow.seqToUnseq;
565        final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
566        final int[] minLens = dataShadow.minLens;
567        final int[][] limit = dataShadow.limit;
568        final int[][] base = dataShadow.base;
569        final int[][] perm = dataShadow.perm;
570        final int limitLast = this.blockSize100k * 100000;
571
572        /*
573         * Setting up the unzftab entries here is not strictly necessary, but it
574         * does save having to do it later in a separate pass, and so saves a
575         * block's worth of cache misses.
576         */
577        for (int i = 256; --i >= 0;) {
578            yy[i] = (char) i;
579            unzftab[i] = 0;
580        }
581
582        int groupNo = 0;
583        int groupPos = G_SIZE - 1;
584        final int eob = this.nInUse + 1;
585        int nextSym = getAndMoveToFrontDecode0(0);
586        int bsBuffShadow = this.bsBuff;
587        int bsLiveShadow = this.bsLive;
588        int lastShadow = -1;
589        int zt = selector[groupNo] & 0xff;
590        int[] base_zt = base[zt];
591        int[] limit_zt = limit[zt];
592        int[] perm_zt = perm[zt];
593        int minLens_zt = minLens[zt];
594
595        while (nextSym != eob) {
596            if ((nextSym == RUNA) || (nextSym == RUNB)) {
597                int s = -1;
598
599                for (int n = 1; true; n <<= 1) {
600                    if (nextSym == RUNA) {
601                        s += n;
602                    } else if (nextSym == RUNB) {
603                        s += n << 1;
604                    } else {
605                        break;
606                    }
607
608                    if (groupPos == 0) {
609                        groupPos = G_SIZE - 1;
610                        zt = selector[++groupNo] & 0xff;
611                        base_zt = base[zt];
612                        limit_zt = limit[zt];
613                        perm_zt = perm[zt];
614                        minLens_zt = minLens[zt];
615                    } else {
616                        groupPos--;
617                    }
618
619                    int zn = minLens_zt;
620
621                    // Inlined:
622                    // int zvec = bsR(zn);
623                    while (bsLiveShadow < zn) {
624                        final int thech = inShadow.read();
625                        if (thech >= 0) {
626                            bsBuffShadow = (bsBuffShadow << 8) | thech;
627                            bsLiveShadow += 8;
628                            continue;
629                        }
630                        throw new IOException("unexpected end of stream");
631                    }
632                    int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
633                        & ((1 << zn) - 1);
634                    bsLiveShadow -= zn;
635
636                    while (zvec > limit_zt[zn]) {
637                        zn++;
638                        while (bsLiveShadow < 1) {
639                            final int thech = inShadow.read();
640                            if (thech >= 0) {
641                                bsBuffShadow = (bsBuffShadow << 8) | thech;
642                                bsLiveShadow += 8;
643                                continue;
644                            }
645                            throw new IOException(
646                                                  "unexpected end of stream");
647                        }
648                        bsLiveShadow--;
649                        zvec = (zvec << 1)
650                            | ((bsBuffShadow >> bsLiveShadow) & 1);
651                    }
652                    nextSym = perm_zt[zvec - base_zt[zn]];
653                }
654
655                final byte ch = seqToUnseq[yy[0]];
656                unzftab[ch & 0xff] += s + 1;
657
658                while (s-- >= 0) {
659                    ll8[++lastShadow] = ch;
660                }
661
662                if (lastShadow >= limitLast) {
663                    throw new IOException("block overrun");
664                }
665            } else {
666                if (++lastShadow >= limitLast) {
667                    throw new IOException("block overrun");
668                }
669
670                final char tmp = yy[nextSym - 1];
671                unzftab[seqToUnseq[tmp] & 0xff]++;
672                ll8[lastShadow] = seqToUnseq[tmp];
673
674                /*
675                 * This loop is hammered during decompression, hence avoid
676                 * native method call overhead of System.arraycopy for very
677                 * small ranges to copy.
678                 */
679                if (nextSym <= 16) {
680                    for (int j = nextSym - 1; j > 0;) {
681                        yy[j] = yy[--j];
682                    }
683                } else {
684                    System.arraycopy(yy, 0, yy, 1, nextSym - 1);
685                }
686
687                yy[0] = tmp;
688
689                if (groupPos == 0) {
690                    groupPos = G_SIZE - 1;
691                    zt = selector[++groupNo] & 0xff;
692                    base_zt = base[zt];
693                    limit_zt = limit[zt];
694                    perm_zt = perm[zt];
695                    minLens_zt = minLens[zt];
696                } else {
697                    groupPos--;
698                }
699
700                int zn = minLens_zt;
701
702                // Inlined:
703                // int zvec = bsR(zn);
704                while (bsLiveShadow < zn) {
705                    final int thech = inShadow.read();
706                    if (thech >= 0) {
707                        bsBuffShadow = (bsBuffShadow << 8) | thech;
708                        bsLiveShadow += 8;
709                        continue;
710                    }
711                    throw new IOException("unexpected end of stream");
712                }
713                int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
714                    & ((1 << zn) - 1);
715                bsLiveShadow -= zn;
716
717                while (zvec > limit_zt[zn]) {
718                    zn++;
719                    while (bsLiveShadow < 1) {
720                        final int thech = inShadow.read();
721                        if (thech >= 0) {
722                            bsBuffShadow = (bsBuffShadow << 8) | thech;
723                            bsLiveShadow += 8;
724                            continue;
725                        }
726                        throw new IOException("unexpected end of stream");
727                    }
728                    bsLiveShadow--;
729                    zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
730                }
731                nextSym = perm_zt[zvec - base_zt[zn]];
732            }
733        }
734
735        this.last = lastShadow;
736        this.bsLive = bsLiveShadow;
737        this.bsBuff = bsBuffShadow;
738    }
739
740    private int getAndMoveToFrontDecode0(final int groupNo) throws IOException {
741        final InputStream inShadow = this.in;
742        final Data dataShadow = this.data;
743        final int zt = dataShadow.selector[groupNo] & 0xff;
744        final int[] limit_zt = dataShadow.limit[zt];
745        int zn = dataShadow.minLens[zt];
746        int zvec = bsR(zn);
747        int bsLiveShadow = this.bsLive;
748        int bsBuffShadow = this.bsBuff;
749
750        while (zvec > limit_zt[zn]) {
751            zn++;
752            while (bsLiveShadow < 1) {
753                final int thech = inShadow.read();
754
755                if (thech >= 0) {
756                    bsBuffShadow = (bsBuffShadow << 8) | thech;
757                    bsLiveShadow += 8;
758                    continue;
759                }
760                throw new IOException("unexpected end of stream");
761            }
762            bsLiveShadow--;
763            zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
764        }
765
766        this.bsLive = bsLiveShadow;
767        this.bsBuff = bsBuffShadow;
768
769        return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]];
770    }
771
772    private int setupBlock() throws IOException {
773        if (currentState == EOF || this.data == null) {
774            return -1;
775        }
776
777        final int[] cftab = this.data.cftab;
778        final int[] tt = this.data.initTT(this.last + 1);
779        final byte[] ll8 = this.data.ll8;
780        cftab[0] = 0;
781        System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
782
783        for (int i = 1, c = cftab[0]; i <= 256; i++) {
784            c += cftab[i];
785            cftab[i] = c;
786        }
787
788        for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
789            tt[cftab[ll8[i] & 0xff]++] = i;
790        }
791
792        if ((this.origPtr < 0) || (this.origPtr >= tt.length)) {
793            throw new IOException("stream corrupted");
794        }
795
796        this.su_tPos = tt[this.origPtr];
797        this.su_count = 0;
798        this.su_i2 = 0;
799        this.su_ch2 = 256; /* not a char and not EOF */
800
801        if (this.blockRandomised) {
802            this.su_rNToGo = 0;
803            this.su_rTPos = 0;
804            return setupRandPartA();
805        }
806        return setupNoRandPartA();
807    }
808
809    private int setupRandPartA() throws IOException {
810        if (this.su_i2 <= this.last) {
811            this.su_chPrev = this.su_ch2;
812            int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
813            this.su_tPos = this.data.tt[this.su_tPos];
814            if (this.su_rNToGo == 0) {
815                this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
816                if (++this.su_rTPos == 512) {
817                    this.su_rTPos = 0;
818                }
819            } else {
820                this.su_rNToGo--;
821            }
822            this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0;
823            this.su_i2++;
824            this.currentState = RAND_PART_B_STATE;
825            this.crc.updateCRC(su_ch2Shadow);
826            return su_ch2Shadow;
827        }
828        endBlock();
829        initBlock();
830        return setupBlock();
831    }
832
833    private int setupNoRandPartA() throws IOException {
834        if (this.su_i2 <= this.last) {
835            this.su_chPrev = this.su_ch2;
836            final int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
837            this.su_ch2 = su_ch2Shadow;
838            this.su_tPos = this.data.tt[this.su_tPos];
839            this.su_i2++;
840            this.currentState = NO_RAND_PART_B_STATE;
841            this.crc.updateCRC(su_ch2Shadow);
842            return su_ch2Shadow;
843        }
844        this.currentState = NO_RAND_PART_A_STATE;
845        endBlock();
846        initBlock();
847        return setupBlock();
848    }
849
850    private int setupRandPartB() throws IOException {
851        if (this.su_ch2 != this.su_chPrev) {
852            this.currentState = RAND_PART_A_STATE;
853            this.su_count = 1;
854            return setupRandPartA();
855        } else if (++this.su_count >= 4) {
856            this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
857            this.su_tPos = this.data.tt[this.su_tPos];
858            if (this.su_rNToGo == 0) {
859                this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
860                if (++this.su_rTPos == 512) {
861                    this.su_rTPos = 0;
862                }
863            } else {
864                this.su_rNToGo--;
865            }
866            this.su_j2 = 0;
867            this.currentState = RAND_PART_C_STATE;
868            if (this.su_rNToGo == 1) {
869                this.su_z ^= 1;
870            }
871            return setupRandPartC();
872        } else {
873            this.currentState = RAND_PART_A_STATE;
874            return setupRandPartA();
875        }
876    }
877
878    private int setupRandPartC() throws IOException {
879        if (this.su_j2 < this.su_z) {
880            this.crc.updateCRC(this.su_ch2);
881            this.su_j2++;
882            return this.su_ch2;
883        }
884        this.currentState = RAND_PART_A_STATE;
885        this.su_i2++;
886        this.su_count = 0;
887        return setupRandPartA();
888    }
889
890    private int setupNoRandPartB() throws IOException {
891        if (this.su_ch2 != this.su_chPrev) {
892            this.su_count = 1;
893            return setupNoRandPartA();
894        } else if (++this.su_count >= 4) {
895            this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
896            this.su_tPos = this.data.tt[this.su_tPos];
897            this.su_j2 = 0;
898            return setupNoRandPartC();
899        } else {
900            return setupNoRandPartA();
901        }
902    }
903
904    private int setupNoRandPartC() throws IOException {
905        if (this.su_j2 < this.su_z) {
906            final int su_ch2Shadow = this.su_ch2;
907            this.crc.updateCRC(su_ch2Shadow);
908            this.su_j2++;
909            this.currentState = NO_RAND_PART_C_STATE;
910            return su_ch2Shadow;
911        }
912        this.su_i2++;
913        this.su_count = 0;
914        return setupNoRandPartA();
915    }
916
917    private static final class Data {
918
919        // (with blockSize 900k)
920        final boolean[] inUse = new boolean[256]; // 256 byte
921
922        final byte[] seqToUnseq = new byte[256]; // 256 byte
923        final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
924        final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
925
926        /**
927         * Freq table collected to save a pass over the data during
928         * decompression.
929         */
930        final int[] unzftab = new int[256]; // 1024 byte
931
932        final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
933        final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
934        final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
935        final int[] minLens = new int[N_GROUPS]; // 24 byte
936
937        final int[] cftab = new int[257]; // 1028 byte
938        final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
939        final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
940        // byte
941        final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
942        // ---------------
943        // 60798 byte
944
945        int[] tt; // 3600000 byte
946        byte[] ll8; // 900000 byte
947
948        // ---------------
949        // 4560782 byte
950        // ===============
951
952        Data(final int blockSize100k) {
953            this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE];
954        }
955
956        /**
957         * Initializes the {@link #tt} array.
958         * 
959         * This method is called when the required length of the array is known.
960         * I don't initialize it at construction time to avoid unneccessary
961         * memory allocation when compressing small files.
962         */
963        int[] initTT(final int length) {
964            int[] ttShadow = this.tt;
965
966            // tt.length should always be >= length, but theoretically
967            // it can happen, if the compressor mixed small and large
968            // blocks. Normally only the last block will be smaller
969            // than others.
970            if ((ttShadow == null) || (ttShadow.length < length)) {
971                this.tt = ttShadow = new int[length];
972            }
973
974            return ttShadow;
975        }
976
977    }
978
979    /**
980     * Checks if the signature matches what is expected for a bzip2 file.
981     * 
982     * @param signature
983     *            the bytes to check
984     * @param length
985     *            the number of bytes to check
986     * @return true, if this stream is a bzip2 compressed stream, false otherwise
987     * 
988     * @since 1.1
989     */
990    public static boolean matches(final byte[] signature, final int length) {
991
992        if (length < 3) {
993            return false;
994        }
995
996        if (signature[0] != 'B') {
997            return false;
998        }
999
1000        if (signature[1] != 'Z') {
1001            return false;
1002        }
1003
1004        if (signature[2] != 'h') {
1005            return false;
1006        }
1007
1008        return true;
1009    }
1010}