Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.nio.ByteBuffer;
028import java.util.zip.CRC32;
029import java.util.zip.DataFormatException;
030import java.util.zip.Inflater;
031import java.util.zip.ZipEntry;
032import java.util.zip.ZipException;
033
034import org.apache.commons.compress.archivers.ArchiveEntry;
035import org.apache.commons.compress.archivers.ArchiveInputStream;
036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
037import org.apache.commons.compress.utils.ArchiveUtils;
038import org.apache.commons.compress.utils.IOUtils;
039
040import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
041import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
042import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
043import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
044
045/**
046 * Implements an input stream that can read Zip archives.
047 *
048 * <p>As of Apache Commons Compress it transparently supports Zip64
049 * extensions and thus individual entries and archives larger than 4
050 * GB or with more than 65536 entries.</p>
051 *
052 * <p>The {@link ZipFile} class is preferred when reading from files
053 * as {@link ZipArchiveInputStream} is limited by not being able to
054 * read the central directory header before returning entries.  In
055 * particular {@link ZipArchiveInputStream}</p>
056 *
057 * <ul>
058 *
059 *  <li>may return entries that are not part of the central directory
060 *  at all and shouldn't be considered part of the archive.</li>
061 *
062 *  <li>may return several entries with the same name.</li>
063 *
064 *  <li>will not return internal or external attributes.</li>
065 *
066 *  <li>may return incomplete extra field data.</li>
067 *
068 *  <li>may return unknown sizes and CRC values for entries until the
069 *  next entry has been reached if the archive uses the data
070 *  descriptor feature.</li>
071 *
072 * </ul>
073 *
074 * @see ZipFile
075 * @NotThreadSafe
076 */
077public class ZipArchiveInputStream extends ArchiveInputStream {
078
079    /** The zip encoding to use for filenames and the file comment. */
080    private final ZipEncoding zipEncoding;
081
082    // the provided encoding (for unit tests)
083    final String encoding;
084
085    /** Whether to look for and use Unicode extra fields. */
086    private final boolean useUnicodeExtraFields;
087
088    /** Wrapped stream, will always be a PushbackInputStream. */
089    private final InputStream in;
090
091    /** Inflater used for all deflated entries. */
092    private final Inflater inf = new Inflater(true);
093
094    /** Buffer used to read from the wrapped stream. */
095    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
096
097    /** The entry that is currently being read. */
098    private CurrentEntry current = null;
099
100    /** Whether the stream has been closed. */
101    private boolean closed = false;
102
103    /** Whether the stream has reached the central directory - and thus found all entries. */
104    private boolean hitCentralDirectory = false;
105
106    /**
107     * When reading a stored entry that uses the data descriptor this
108     * stream has to read the full entry and caches it.  This is the
109     * cache.
110     */
111    private ByteArrayInputStream lastStoredEntry = null;
112
113    /** Whether the stream will try to read STORED entries that use a data descriptor. */
114    private boolean allowStoredEntriesWithDataDescriptor = false;
115
116    private static final int LFH_LEN = 30;
117    /*
118      local file header signature     WORD
119      version needed to extract       SHORT
120      general purpose bit flag        SHORT
121      compression method              SHORT
122      last mod file time              SHORT
123      last mod file date              SHORT
124      crc-32                          WORD
125      compressed size                 WORD
126      uncompressed size               WORD
127      file name length                SHORT
128      extra field length              SHORT
129    */
130
131    private static final int CFH_LEN = 46;
132    /*
133        central file header signature   WORD
134        version made by                 SHORT
135        version needed to extract       SHORT
136        general purpose bit flag        SHORT
137        compression method              SHORT
138        last mod file time              SHORT
139        last mod file date              SHORT
140        crc-32                          WORD
141        compressed size                 WORD
142        uncompressed size               WORD
143        file name length                SHORT
144        extra field length              SHORT
145        file comment length             SHORT
146        disk number start               SHORT
147        internal file attributes        SHORT
148        external file attributes        WORD
149        relative offset of local header WORD
150    */
151
152    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
153
154    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
155    private final byte[] lfhBuf = new byte[LFH_LEN];
156    private final byte[] skipBuf = new byte[1024];
157    private final byte[] shortBuf = new byte[SHORT];
158    private final byte[] wordBuf = new byte[WORD];
159    private final byte[] twoDwordBuf = new byte[2 * DWORD];
160
161    private int entriesRead = 0;
162
163    /**
164     * Create an instance using UTF-8 encoding
165     * @param inputStream the stream to wrap
166     */
167    public ZipArchiveInputStream(final InputStream inputStream) {
168        this(inputStream, ZipEncodingHelper.UTF8);
169    }
170
171    /**
172     * Create an instance using the specified encoding
173     * @param inputStream the stream to wrap
174     * @param encoding the encoding to use for file names, use null
175     * for the platform's default encoding
176     * @since 1.5
177     */
178    public ZipArchiveInputStream(final InputStream inputStream, final String encoding) {
179        this(inputStream, encoding, true);
180    }
181
182    /**
183     * Create an instance using the specified encoding
184     * @param inputStream the stream to wrap
185     * @param encoding the encoding to use for file names, use null
186     * for the platform's default encoding
187     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
188     * Extra Fields (if present) to set the file names.
189     */
190    public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) {
191        this(inputStream, encoding, useUnicodeExtraFields, false);
192    }
193
194    /**
195     * Create an instance using the specified encoding
196     * @param inputStream the stream to wrap
197     * @param encoding the encoding to use for file names, use null
198     * for the platform's default encoding
199     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
200     * Extra Fields (if present) to set the file names.
201     * @param allowStoredEntriesWithDataDescriptor whether the stream
202     * will try to read STORED entries that use a data descriptor
203     * @since 1.1
204     */
205    public ZipArchiveInputStream(final InputStream inputStream,
206                                 final String encoding,
207                                 final boolean useUnicodeExtraFields,
208                                 final boolean allowStoredEntriesWithDataDescriptor) {
209        this.encoding = encoding;
210        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
211        this.useUnicodeExtraFields = useUnicodeExtraFields;
212        in = new PushbackInputStream(inputStream, buf.capacity());
213        this.allowStoredEntriesWithDataDescriptor =
214            allowStoredEntriesWithDataDescriptor;
215        // haven't read anything so far
216        buf.limit(0);
217    }
218
219    public ZipArchiveEntry getNextZipEntry() throws IOException {
220        boolean firstEntry = true;
221        if (closed || hitCentralDirectory) {
222            return null;
223        }
224        if (current != null) {
225            closeEntry();
226            firstEntry = false;
227        }
228
229        try {
230            if (firstEntry) {
231                // split archives have a special signature before the
232                // first local file header - look for it and fail with
233                // the appropriate error message if this is a split
234                // archive.
235                readFirstLocalFileHeader(lfhBuf);
236            } else {
237                readFully(lfhBuf);
238            }
239        } catch (final EOFException e) {
240            return null;
241        }
242
243        final ZipLong sig = new ZipLong(lfhBuf);
244        if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) {
245            hitCentralDirectory = true;
246            skipRemainderOfArchive();
247            return null;
248        }
249        if (!sig.equals(ZipLong.LFH_SIG)) {
250            throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue()));
251        }
252
253        int off = WORD;
254        current = new CurrentEntry();
255
256        final int versionMadeBy = ZipShort.getValue(lfhBuf, off);
257        off += SHORT;
258        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
259
260        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off);
261        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
262        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
263        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
264        current.entry.setGeneralPurposeBit(gpFlag);
265
266        off += SHORT;
267
268        current.entry.setMethod(ZipShort.getValue(lfhBuf, off));
269        off += SHORT;
270
271        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off));
272        current.entry.setTime(time);
273        off += WORD;
274
275        ZipLong size = null, cSize = null;
276        if (!current.hasDataDescriptor) {
277            current.entry.setCrc(ZipLong.getValue(lfhBuf, off));
278            off += WORD;
279
280            cSize = new ZipLong(lfhBuf, off);
281            off += WORD;
282
283            size = new ZipLong(lfhBuf, off);
284            off += WORD;
285        } else {
286            off += 3 * WORD;
287        }
288
289        final int fileNameLen = ZipShort.getValue(lfhBuf, off);
290
291        off += SHORT;
292
293        final int extraLen = ZipShort.getValue(lfhBuf, off);
294        off += SHORT;
295
296        final byte[] fileName = new byte[fileNameLen];
297        readFully(fileName);
298        current.entry.setName(entryEncoding.decode(fileName), fileName);
299
300        final byte[] extraData = new byte[extraLen];
301        readFully(extraData);
302        current.entry.setExtra(extraData);
303
304        if (!hasUTF8Flag && useUnicodeExtraFields) {
305            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
306        }
307
308        processZip64Extra(size, cSize);
309
310        if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) {
311            if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
312                current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize()));
313            } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
314                current.in = new ExplodingInputStream(
315                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
316                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
317                        new BoundedInputStream(in, current.entry.getCompressedSize()));
318            } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
319                current.in = new BZip2CompressorInputStream(new BoundedInputStream(in, current.entry.getCompressedSize()));
320            }
321        }
322        
323        entriesRead++;
324        return current.entry;
325    }
326
327    /**
328     * Fills the given array with the first local file header and
329     * deals with splitting/spanning markers that may prefix the first
330     * LFH.
331     */
332    private void readFirstLocalFileHeader(final byte[] lfh) throws IOException {
333        readFully(lfh);
334        final ZipLong sig = new ZipLong(lfh);
335        if (sig.equals(ZipLong.DD_SIG)) {
336            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
337        }
338
339        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
340            // The archive is not really split as only one segment was
341            // needed in the end.  Just skip over the marker.
342            final byte[] missedLfhBytes = new byte[4];
343            readFully(missedLfhBytes);
344            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
345            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
346        }
347    }
348
349    /**
350     * Records whether a Zip64 extra is present and sets the size
351     * information from it if sizes are 0xFFFFFFFF and the entry
352     * doesn't use a data descriptor.
353     */
354    private void processZip64Extra(final ZipLong size, final ZipLong cSize) {
355        final Zip64ExtendedInformationExtraField z64 =
356            (Zip64ExtendedInformationExtraField) 
357            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
358        current.usesZip64 = z64 != null;
359        if (!current.hasDataDescriptor) {
360            if (z64 != null // same as current.usesZip64 but avoids NPE warning
361                    && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
362                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
363                current.entry.setSize(z64.getSize().getLongValue());
364            } else {
365                current.entry.setCompressedSize(cSize.getValue());
366                current.entry.setSize(size.getValue());
367            }
368        }
369    }
370
371    @Override
372    public ArchiveEntry getNextEntry() throws IOException {
373        return getNextZipEntry();
374    }
375
376    /**
377     * Whether this class is able to read the given entry.
378     *
379     * <p>May return false if it is set up to use encryption or a
380     * compression method that hasn't been implemented yet.</p>
381     * @since 1.1
382     */
383    @Override
384    public boolean canReadEntryData(final ArchiveEntry ae) {
385        if (ae instanceof ZipArchiveEntry) {
386            final ZipArchiveEntry ze = (ZipArchiveEntry) ae;
387            return ZipUtil.canHandleEntryData(ze)
388                && supportsDataDescriptorFor(ze);
389
390        }
391        return false;
392    }
393
394    @Override
395    public int read(final byte[] buffer, final int offset, final int length) throws IOException {
396        if (closed) {
397            throw new IOException("The stream is closed");
398        }
399
400        if (current == null) {
401            return -1;
402        }
403
404        // avoid int overflow, check null buffer
405        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
406            throw new ArrayIndexOutOfBoundsException();
407        }
408        
409        ZipUtil.checkRequestedFeatures(current.entry);
410        if (!supportsDataDescriptorFor(current.entry)) {
411            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
412                    current.entry);
413        }
414
415        int read;
416        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
417            read = readStored(buffer, offset, length);
418        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
419            read = readDeflated(buffer, offset, length);
420        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
421                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
422                || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
423            read = current.in.read(buffer, offset, length);
424        } else {
425            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
426                    current.entry);
427        }
428        
429        if (read >= 0) {
430            current.crc.update(buffer, offset, read);
431        }
432        
433        return read;
434    }
435
436    /**
437     * Implementation of read for STORED entries.
438     */
439    private int readStored(final byte[] buffer, final int offset, final int length) throws IOException {
440
441        if (current.hasDataDescriptor) {
442            if (lastStoredEntry == null) {
443                readStoredEntry();
444            }
445            return lastStoredEntry.read(buffer, offset, length);
446        }
447
448        final long csize = current.entry.getSize();
449        if (current.bytesRead >= csize) {
450            return -1;
451        }
452
453        if (buf.position() >= buf.limit()) {
454            buf.position(0);
455            final int l = in.read(buf.array());
456            if (l == -1) {
457                return -1;
458            }
459            buf.limit(l);
460
461            count(l);
462            current.bytesReadFromStream += l;
463        }
464
465        int toRead = Math.min(buf.remaining(), length);
466        if ((csize - current.bytesRead) < toRead) {
467            // if it is smaller than toRead then it fits into an int
468            toRead = (int) (csize - current.bytesRead);
469        }
470        buf.get(buffer, offset, toRead);
471        current.bytesRead += toRead;
472        return toRead;
473    }
474
475    /**
476     * Implementation of read for DEFLATED entries.
477     */
478    private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException {
479        final int read = readFromInflater(buffer, offset, length);
480        if (read <= 0) {
481            if (inf.finished()) {
482                return -1;
483            } else if (inf.needsDictionary()) {
484                throw new ZipException("This archive needs a preset dictionary"
485                                       + " which is not supported by Commons"
486                                       + " Compress.");
487            } else if (read == -1) {
488                throw new IOException("Truncated ZIP file");
489            }
490        }
491        return read;
492    }
493
494    /**
495     * Potentially reads more bytes to fill the inflater's buffer and
496     * reads from it.
497     */
498    private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException {
499        int read = 0;
500        do {
501            if (inf.needsInput()) {
502                final int l = fill();
503                if (l > 0) {
504                    current.bytesReadFromStream += buf.limit();
505                } else if (l == -1) {
506                    return -1;
507                } else {
508                    break;
509                }
510            }
511            try {
512                read = inf.inflate(buffer, offset, length);
513            } catch (final DataFormatException e) {
514                throw (IOException) new ZipException(e.getMessage()).initCause(e);
515            }
516        } while (read == 0 && inf.needsInput());
517        return read;
518    }
519
520    @Override
521    public void close() throws IOException {
522        if (!closed) {
523            closed = true;
524            try {
525                in.close();
526            } finally {
527                inf.end();
528            }
529        }
530    }
531
532    /**
533     * Skips over and discards value bytes of data from this input
534     * stream.
535     *
536     * <p>This implementation may end up skipping over some smaller
537     * number of bytes, possibly 0, if and only if it reaches the end
538     * of the underlying stream.</p>
539     *
540     * <p>The actual number of bytes skipped is returned.</p>
541     *
542     * @param value the number of bytes to be skipped.
543     * @return the actual number of bytes skipped.
544     * @throws IOException - if an I/O error occurs.
545     * @throws IllegalArgumentException - if value is negative.
546     */
547    @Override
548    public long skip(final long value) throws IOException {
549        if (value >= 0) {
550            long skipped = 0;
551            while (skipped < value) {
552                final long rem = value - skipped;
553                final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
554                if (x == -1) {
555                    return skipped;
556                }
557                skipped += x;
558            }
559            return skipped;
560        }
561        throw new IllegalArgumentException();
562    }
563
564    /**
565     * Checks if the signature matches what is expected for a zip file.
566     * Does not currently handle self-extracting zips which may have arbitrary
567     * leading content.
568     *
569     * @param signature the bytes to check
570     * @param length    the number of bytes to check
571     * @return true, if this stream is a zip archive stream, false otherwise
572     */
573    public static boolean matches(final byte[] signature, final int length) {
574        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
575            return false;
576        }
577
578        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
579            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
580            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
581            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
582    }
583
584    private static boolean checksig(final byte[] signature, final byte[] expected) {
585        for (int i = 0; i < expected.length; i++) {
586            if (signature[i] != expected[i]) {
587                return false;
588            }
589        }
590        return true;
591    }
592
593    /**
594     * Closes the current ZIP archive entry and positions the underlying
595     * stream to the beginning of the next entry. All per-entry variables
596     * and data structures are cleared.
597     * <p>
598     * If the compressed size of this entry is included in the entry header,
599     * then any outstanding bytes are simply skipped from the underlying
600     * stream without uncompressing them. This allows an entry to be safely
601     * closed even if the compression method is unsupported.
602     * <p>
603     * In case we don't know the compressed size of this entry or have
604     * already buffered too much data from the underlying stream to support
605     * uncompression, then the uncompression process is completed and the
606     * end position of the stream is adjusted based on the result of that
607     * process.
608     *
609     * @throws IOException if an error occurs
610     */
611    private void closeEntry() throws IOException {
612        if (closed) {
613            throw new IOException("The stream is closed");
614        }
615        if (current == null) {
616            return;
617        }
618
619        // Ensure all entry bytes are read
620        if (currentEntryHasOutstandingBytes()) {
621            drainCurrentEntryData();
622        } else {
623            // this is guaranteed to exhaust the stream
624            skip(Long.MAX_VALUE); //NOSONAR
625
626            final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
627                       ? getBytesInflated() : current.bytesRead;
628
629            // this is at most a single read() operation and can't
630            // exceed the range of int
631            final int diff = (int) (current.bytesReadFromStream - inB);
632
633            // Pushback any required bytes
634            if (diff > 0) {
635                pushback(buf.array(), buf.limit() - diff, diff);
636                current.bytesReadFromStream -= diff;
637            }
638
639            // Drain remainder of entry if not all data bytes were required
640            if (currentEntryHasOutstandingBytes()) {
641                drainCurrentEntryData();
642            }
643        }
644
645        if (lastStoredEntry == null && current.hasDataDescriptor) {
646            readDataDescriptor();
647        }
648
649        inf.reset();
650        buf.clear().flip();
651        current = null;
652        lastStoredEntry = null;
653    }
654
655    /**
656     * If the compressed size of the current entry is included in the entry header
657     * and there are any outstanding bytes in the underlying stream, then
658     * this returns true.
659     *
660     * @return true, if current entry is determined to have outstanding bytes, false otherwise
661     */
662    private boolean currentEntryHasOutstandingBytes() {
663        return current.bytesReadFromStream <= current.entry.getCompressedSize()
664                && !current.hasDataDescriptor;
665    }
666
667    /**
668     * Read all data of the current entry from the underlying stream
669     * that hasn't been read, yet.
670     */
671    private void drainCurrentEntryData() throws IOException {
672        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
673        while (remaining > 0) {
674            final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
675            if (n < 0) {
676                throw new EOFException("Truncated ZIP entry: "
677                                       + ArchiveUtils.sanitize(current.entry.getName()));
678            }
679            count(n);
680            remaining -= n;
681        }
682    }
683
684    /**
685     * Get the number of bytes Inflater has actually processed.
686     *
687     * <p>for Java &lt; Java7 the getBytes* methods in
688     * Inflater/Deflater seem to return unsigned ints rather than
689     * longs that start over with 0 at 2^32.</p>
690     *
691     * <p>The stream knows how many bytes it has read, but not how
692     * many the Inflater actually consumed - it should be between the
693     * total number of bytes read for the entry and the total number
694     * minus the last read operation.  Here we just try to make the
695     * value close enough to the bytes we've read by assuming the
696     * number of bytes consumed must be smaller than (or equal to) the
697     * number of bytes read but not smaller by more than 2^32.</p>
698     */
699    private long getBytesInflated() {
700        long inB = inf.getBytesRead();
701        if (current.bytesReadFromStream >= TWO_EXP_32) {
702            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
703                inB += TWO_EXP_32;
704            }
705        }
706        return inB;
707    }
708
709    private int fill() throws IOException {
710        if (closed) {
711            throw new IOException("The stream is closed");
712        }
713        final int length = in.read(buf.array());
714        if (length > 0) {
715            buf.limit(length);
716            count(buf.limit());
717            inf.setInput(buf.array(), 0, buf.limit());
718        }
719        return length;
720    }
721
722    private void readFully(final byte[] b) throws IOException {
723        final int count = IOUtils.readFully(in, b);
724        count(count);
725        if (count < b.length) {
726            throw new EOFException();
727        }
728    }
729
730    private void readDataDescriptor() throws IOException {
731        readFully(wordBuf);
732        ZipLong val = new ZipLong(wordBuf);
733        if (ZipLong.DD_SIG.equals(val)) {
734            // data descriptor with signature, skip sig
735            readFully(wordBuf);
736            val = new ZipLong(wordBuf);
737        }
738        current.entry.setCrc(val.getValue());
739
740        // if there is a ZIP64 extra field, sizes are eight bytes
741        // each, otherwise four bytes each.  Unfortunately some
742        // implementations - namely Java7 - use eight bytes without
743        // using a ZIP64 extra field -
744        // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
745
746        // just read 16 bytes and check whether bytes nine to twelve
747        // look like one of the signatures of what could follow a data
748        // descriptor (ignoring archive decryption headers for now).
749        // If so, push back eight bytes and assume sizes are four
750        // bytes, otherwise sizes are eight bytes each.
751        readFully(twoDwordBuf);
752        final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD);
753        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
754            pushback(twoDwordBuf, DWORD, DWORD);
755            current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf));
756            current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD));
757        } else {
758            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf));
759            current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD));
760        }
761    }
762
763    /**
764     * Whether this entry requires a data descriptor this library can work with.
765     *
766     * @return true if allowStoredEntriesWithDataDescriptor is true,
767     * the entry doesn't require any data descriptor or the method is
768     * DEFLATED.
769     */
770    private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
771        return !entry.getGeneralPurposeBit().usesDataDescriptor()
772
773                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
774                || entry.getMethod() == ZipEntry.DEFLATED;
775    }
776
777    /**
778     * Caches a stored entry that uses the data descriptor.
779     *
780     * <ul>
781     *   <li>Reads a stored entry until the signature of a local file
782     *     header, central directory header or data descriptor has been
783     *     found.</li>
784     *   <li>Stores all entry data in lastStoredEntry.</p>
785     *   <li>Rewinds the stream to position at the data
786     *     descriptor.</li>
787     *   <li>reads the data descriptor</li>
788     * </ul>
789     *
790     * <p>After calling this method the entry should know its size,
791     * the entry's data is cached and the stream is positioned at the
792     * next local file or central directory header.</p>
793     */
794    private void readStoredEntry() throws IOException {
795        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
796        int off = 0;
797        boolean done = false;
798
799        // length of DD without signature
800        final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
801
802        while (!done) {
803            final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
804            if (r <= 0) {
805                // read the whole archive without ever finding a
806                // central directory
807                throw new IOException("Truncated ZIP file");
808            }
809            if (r + off < 4) {
810                // buffer too small to check for a signature, loop
811                off += r;
812                continue;
813            }
814
815            done = bufferContainsSignature(bos, off, r, ddLen);
816            if (!done) {
817                off = cacheBytesRead(bos, off, r, ddLen);
818            }
819        }
820
821        final byte[] b = bos.toByteArray();
822        lastStoredEntry = new ByteArrayInputStream(b);
823    }
824
825    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
826    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
827    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
828
829    /**
830     * Checks whether the current buffer contains the signature of a
831     * &quot;data descriptor&quot;, &quot;local file header&quot; or
832     * &quot;central directory entry&quot;.
833     *
834     * <p>If it contains such a signature, reads the data descriptor
835     * and positions the stream right after the data descriptor.</p>
836     */
837    private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)
838            throws IOException {
839
840        boolean done = false;
841        int readTooMuch = 0;
842        for (int i = 0; !done && i < lastRead - 4; i++) {
843            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
844                if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
845                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
846                    // found a LFH or CFH:
847                    readTooMuch = offset + lastRead - i - expectedDDLen;
848                    done = true;
849                }
850                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
851                    // found DD:
852                    readTooMuch = offset + lastRead - i;
853                    done = true;
854                }
855                if (done) {
856                    // * push back bytes read in excess as well as the data
857                    //   descriptor
858                    // * copy the remaining bytes to cache
859                    // * read data descriptor
860                    pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
861                    bos.write(buf.array(), 0, i);
862                    readDataDescriptor();
863                }
864            }
865        }
866        return done;
867    }
868
869    /**
870     * If the last read bytes could hold a data descriptor and an
871     * incomplete signature then save the last bytes to the front of
872     * the buffer and cache everything in front of the potential data
873     * descriptor into the given ByteArrayOutputStream.
874     *
875     * <p>Data descriptor plus incomplete signature (3 bytes in the
876     * worst case) can be 20 bytes max.</p>
877     */
878    private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) {
879        final int cacheable = offset + lastRead - expecteDDLen - 3;
880        if (cacheable > 0) {
881            bos.write(buf.array(), 0, cacheable);
882            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
883            offset = expecteDDLen + 3;
884        } else {
885            offset += lastRead;
886        }
887        return offset;
888    }
889
890    private void pushback(final byte[] buf, final int offset, final int length) throws IOException {
891        ((PushbackInputStream) in).unread(buf, offset, length);
892        pushedBackBytes(length);
893    }
894
895    // End of Central Directory Record
896    //   end of central dir signature    WORD
897    //   number of this disk             SHORT
898    //   number of the disk with the
899    //   start of the central directory  SHORT
900    //   total number of entries in the
901    //   central directory on this disk  SHORT
902    //   total number of entries in
903    //   the central directory           SHORT
904    //   size of the central directory   WORD
905    //   offset of start of central
906    //   directory with respect to
907    //   the starting disk number        WORD
908    //   .ZIP file comment length        SHORT
909    //   .ZIP file comment               up to 64KB
910    //
911
912    /**
913     * Reads the stream until it find the "End of central directory
914     * record" and consumes it as well.
915     */
916    private void skipRemainderOfArchive() throws IOException {
917        // skip over central directory. One LFH has been read too much
918        // already.  The calculation discounts file names and extra
919        // data so it will be too short.
920        realSkip((long) entriesRead * CFH_LEN - LFH_LEN);
921        findEocdRecord();
922        realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
923        readFully(shortBuf);
924        // file comment
925        realSkip(ZipShort.getValue(shortBuf));
926    }
927
928    /**
929     * Reads forward until the signature of the &quot;End of central
930     * directory&quot; record is found.
931     */
932    private void findEocdRecord() throws IOException {
933        int currentByte = -1;
934        boolean skipReadCall = false;
935        while (skipReadCall || (currentByte = readOneByte()) > -1) {
936            skipReadCall = false;
937            if (!isFirstByteOfEocdSig(currentByte)) {
938                continue;
939            }
940            currentByte = readOneByte();
941            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
942                if (currentByte == -1) {
943                    break;
944                }
945                skipReadCall = isFirstByteOfEocdSig(currentByte);
946                continue;
947            }
948            currentByte = readOneByte();
949            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
950                if (currentByte == -1) {
951                    break;
952                }
953                skipReadCall = isFirstByteOfEocdSig(currentByte);
954                continue;
955            }
956            currentByte = readOneByte();
957            if (currentByte == -1
958                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
959                break;
960            }
961            skipReadCall = isFirstByteOfEocdSig(currentByte);
962        }
963    }
964
965    /**
966     * Skips bytes by reading from the underlying stream rather than
967     * the (potentially inflating) archive stream - which {@link
968     * #skip} would do.
969     *
970     * Also updates bytes-read counter.
971     */
972    private void realSkip(final long value) throws IOException {
973        if (value >= 0) {
974            long skipped = 0;
975            while (skipped < value) {
976                final long rem = value - skipped;
977                final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
978                if (x == -1) {
979                    return;
980                }
981                count(x);
982                skipped += x;
983            }
984            return;
985        }
986        throw new IllegalArgumentException();
987    }
988
989    /**
990     * Reads bytes by reading from the underlying stream rather than
991     * the (potentially inflating) archive stream - which {@link #read} would do.
992     *
993     * Also updates bytes-read counter.
994     */
995    private int readOneByte() throws IOException {
996        final int b = in.read();
997        if (b != -1) {
998            count(1);
999        }
1000        return b;
1001    }
1002
1003    private boolean isFirstByteOfEocdSig(final int b) {
1004        return b == ZipArchiveOutputStream.EOCD_SIG[0];
1005    }
1006
1007    /**
1008     * Structure collecting information for the entry that is
1009     * currently being read.
1010     */
1011    private static final class CurrentEntry {
1012
1013        /**
1014         * Current ZIP entry.
1015         */
1016        private final ZipArchiveEntry entry = new ZipArchiveEntry();
1017
1018        /**
1019         * Does the entry use a data descriptor?
1020         */
1021        private boolean hasDataDescriptor;
1022
1023        /**
1024         * Does the entry have a ZIP64 extended information extra field.
1025         */
1026        private boolean usesZip64;
1027
1028        /**
1029         * Number of bytes of entry content read by the client if the
1030         * entry is STORED.
1031         */
1032        private long bytesRead;
1033
1034        /**
1035         * Number of bytes of entry content read so from the stream.
1036         *
1037         * <p>This may be more than the actual entry's length as some
1038         * stuff gets buffered up and needs to be pushed back when the
1039         * end of the entry has been reached.</p>
1040         */
1041        private long bytesReadFromStream;
1042
1043        /**
1044         * The checksum calculated as the current entry is read.
1045         */
1046        private final CRC32 crc = new CRC32();
1047
1048        /**
1049         * The input stream decompressing the data for shrunk and imploded entries.
1050         */
1051        private InputStream in;
1052    }
1053
1054    /**
1055     * Bounded input stream adapted from commons-io
1056     */
1057    private class BoundedInputStream extends InputStream {
1058
1059        /** the wrapped input stream */
1060        private final InputStream in;
1061
1062        /** the max length to provide */
1063        private final long max;
1064
1065        /** the number of bytes already returned */
1066        private long pos = 0;
1067    
1068        /**
1069         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1070         * stream and limits it to a certain size.
1071         *
1072         * @param in The wrapped input stream
1073         * @param size The maximum number of bytes to return
1074         */
1075        public BoundedInputStream(final InputStream in, final long size) {
1076            this.max = size;
1077            this.in = in;
1078        }
1079
1080        @Override
1081        public int read() throws IOException {
1082            if (max >= 0 && pos >= max) {
1083                return -1;
1084            }
1085            final int result = in.read();
1086            pos++;
1087            count(1);
1088            current.bytesReadFromStream++;
1089            return result;
1090        }
1091
1092        @Override
1093        public int read(final byte[] b) throws IOException {
1094            return this.read(b, 0, b.length);
1095        }
1096
1097        @Override
1098        public int read(final byte[] b, final int off, final int len) throws IOException {
1099            if (max >= 0 && pos >= max) {
1100                return -1;
1101            }
1102            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1103            final int bytesRead = in.read(b, off, (int) maxRead);
1104
1105            if (bytesRead == -1) {
1106                return -1;
1107            }
1108
1109            pos += bytesRead;
1110            count(bytesRead);
1111            current.bytesReadFromStream += bytesRead;
1112            return bytesRead;
1113        }
1114
1115        @Override
1116        public long skip(final long n) throws IOException {
1117            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1118            final long skippedBytes = in.skip(toSkip);
1119            pos += skippedBytes;
1120            return skippedBytes;
1121        }
1122    
1123        @Override
1124        public int available() throws IOException {
1125            if (max >= 0 && pos >= max) {
1126                return 0;
1127            }
1128            return in.available();
1129        }
1130    }
1131}