001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.zip;
019
020import java.io.BufferedInputStream;
021import java.io.Closeable;
022import java.io.EOFException;
023import java.io.File;
024import java.io.IOException;
025import java.io.InputStream;
026import java.nio.ByteBuffer;
027import java.nio.channels.SeekableByteChannel;
028import java.nio.file.Files;
029import java.nio.file.StandardOpenOption;
030import java.util.Arrays;
031import java.util.Collections;
032import java.util.Comparator;
033import java.util.Enumeration;
034import java.util.EnumSet;
035import java.util.HashMap;
036import java.util.LinkedList;
037import java.util.List;
038import java.util.Map;
039import java.util.zip.Inflater;
040import java.util.zip.InflaterInputStream;
041import java.util.zip.ZipException;
042
043import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
044import org.apache.commons.compress.utils.IOUtils;
045
046import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
047import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
048import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
049import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
050import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
051
052/**
053 * Replacement for <code>java.util.ZipFile</code>.
054 *
055 * <p>This class adds support for file name encodings other than UTF-8
056 * (which is required to work on ZIP files created by native zip tools
057 * and is able to skip a preamble like the one found in self
058 * extracting archives.  Furthermore it returns instances of
059 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
060 * instead of <code>java.util.zip.ZipEntry</code>.</p>
061 *
062 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
063 * have to reimplement all methods anyway.  Like
064 * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the
065 * covers and supports compressed and uncompressed entries.  As of
066 * Apache Commons Compress 1.3 it also transparently supports Zip64
067 * extensions and thus individual entries and archives larger than 4
068 * GB or with more than 65536 entries.</p>
069 *
070 * <p>The method signatures mimic the ones of
071 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
072 *
073 * <ul>
074 *   <li>There is no getName method.</li>
075 *   <li>entries has been renamed to getEntries.</li>
076 *   <li>getEntries and getEntry return
077 *   <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
078 *   instances.</li>
079 *   <li>close is allowed to throw IOException.</li>
080 * </ul>
081 *
082 */
083public class ZipFile implements Closeable {
084    private static final int HASH_SIZE = 509;
085    static final int NIBLET_MASK = 0x0f;
086    static final int BYTE_SHIFT = 8;
087    private static final int POS_0 = 0;
088    private static final int POS_1 = 1;
089    private static final int POS_2 = 2;
090    private static final int POS_3 = 3;
091
092    /**
093     * List of entries in the order they appear inside the central
094     * directory.
095     */
096    private final List<ZipArchiveEntry> entries =
097        new LinkedList<>();
098
099    /**
100     * Maps String to list of ZipArchiveEntrys, name -> actual entries.
101     */
102    private final Map<String, LinkedList<ZipArchiveEntry>> nameMap =
103        new HashMap<>(HASH_SIZE);
104
105    private static final class OffsetEntry {
106        private long headerOffset = -1;
107        private long dataOffset = -1;
108    }
109
110    /**
111     * The encoding to use for filenames and the file comment.
112     *
113     * <p>For a list of possible values see <a
114     * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
115     * Defaults to UTF-8.</p>
116     */
117    private final String encoding;
118
119    /**
120     * The zip encoding to use for filenames and the file comment.
121     */
122    private final ZipEncoding zipEncoding;
123
124    /**
125     * File name of actual source.
126     */
127    private final String archiveName;
128
129    /**
130     * The actual data source.
131     */
132    private final SeekableByteChannel archive;
133
134    /**
135     * Whether to look for and use Unicode extra fields.
136     */
137    private final boolean useUnicodeExtraFields;
138
139    /**
140     * Whether the file is closed.
141     */
142    private volatile boolean closed = true;
143
144    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
145    private final byte[] dwordBuf = new byte[DWORD];
146    private final byte[] wordBuf = new byte[WORD];
147    private final byte[] cfhBuf = new byte[CFH_LEN];
148    private final byte[] shortBuf = new byte[SHORT];
149    private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
150    private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
151    private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
152
153    /**
154     * Opens the given file for reading, assuming "UTF8" for file names.
155     *
156     * @param f the archive.
157     *
158     * @throws IOException if an error occurs while reading the file.
159     */
160    public ZipFile(final File f) throws IOException {
161        this(f, ZipEncodingHelper.UTF8);
162    }
163
164    /**
165     * Opens the given file for reading, assuming "UTF8".
166     *
167     * @param name name of the archive.
168     *
169     * @throws IOException if an error occurs while reading the file.
170     */
171    public ZipFile(final String name) throws IOException {
172        this(new File(name), ZipEncodingHelper.UTF8);
173    }
174
175    /**
176     * Opens the given file for reading, assuming the specified
177     * encoding for file names, scanning unicode extra fields.
178     *
179     * @param name name of the archive.
180     * @param encoding the encoding to use for file names, use null
181     * for the platform's default encoding
182     *
183     * @throws IOException if an error occurs while reading the file.
184     */
185    public ZipFile(final String name, final String encoding) throws IOException {
186        this(new File(name), encoding, true);
187    }
188
189    /**
190     * Opens the given file for reading, assuming the specified
191     * encoding for file names and scanning for unicode extra fields.
192     *
193     * @param f the archive.
194     * @param encoding the encoding to use for file names, use null
195     * for the platform's default encoding
196     *
197     * @throws IOException if an error occurs while reading the file.
198     */
199    public ZipFile(final File f, final String encoding) throws IOException {
200        this(f, encoding, true);
201    }
202
203    /**
204     * Opens the given file for reading, assuming the specified
205     * encoding for file names.
206     *
207     * @param f the archive.
208     * @param encoding the encoding to use for file names, use null
209     * for the platform's default encoding
210     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
211     * Extra Fields (if present) to set the file names.
212     *
213     * @throws IOException if an error occurs while reading the file.
214     */
215    public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields)
216        throws IOException {
217        this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)),
218             f.getAbsolutePath(), encoding, useUnicodeExtraFields, true);
219    }
220
221    /**
222     * Opens the given channel for reading, assuming "UTF8" for file names.
223     *
224     * <p>{@link
225     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
226     * allows you to read from an in-memory archive.</p>
227     *
228     * @param channel the archive.
229     *
230     * @throws IOException if an error occurs while reading the file.
231     * @since 1.13
232     */
233    public ZipFile(final SeekableByteChannel channel)
234            throws IOException {
235        this(channel, "unknown archive", ZipEncodingHelper.UTF8, true);
236    }
237
238    /**
239     * Opens the given channel for reading, assuming the specified
240     * encoding for file names.
241     *
242     * <p>{@link
243     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
244     * allows you to read from an in-memory archive.</p>
245     *
246     * @param channel the archive.
247     * @param encoding the encoding to use for file names, use null
248     * for the platform's default encoding
249     *
250     * @throws IOException if an error occurs while reading the file.
251     * @since 1.13
252     */
253    public ZipFile(final SeekableByteChannel channel, final String encoding)
254        throws IOException {
255        this(channel, "unknown archive", encoding, true);
256    }
257
258    /**
259     * Opens the given channel for reading, assuming the specified
260     * encoding for file names.
261     *
262     * <p>{@link
263     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
264     * allows you to read from an in-memory archive.</p>
265     *
266     * @param channel the archive.
267     * @param archiveName name of the archive, used for error messages only.
268     * @param encoding the encoding to use for file names, use null
269     * for the platform's default encoding
270     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
271     * Extra Fields (if present) to set the file names.
272     *
273     * @throws IOException if an error occurs while reading the file.
274     * @since 1.13
275     */
276    public ZipFile(final SeekableByteChannel channel, final String archiveName,
277                   final String encoding, final boolean useUnicodeExtraFields)
278        throws IOException {
279        this(channel, archiveName, encoding, useUnicodeExtraFields, false);
280    }
281
282    private ZipFile(final SeekableByteChannel channel, final String archiveName,
283                    final String encoding, final boolean useUnicodeExtraFields,
284                    final boolean closeOnError)
285        throws IOException {
286        this.archiveName = archiveName;
287        this.encoding = encoding;
288        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
289        this.useUnicodeExtraFields = useUnicodeExtraFields;
290        archive = channel;
291        boolean success = false;
292        try {
293            final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
294                populateFromCentralDirectory();
295            resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
296            success = true;
297        } finally {
298            closed = !success;
299            if (!success && closeOnError) {
300                IOUtils.closeQuietly(archive);
301            }
302        }
303    }
304
305    /**
306     * The encoding to use for filenames and the file comment.
307     *
308     * @return null if using the platform's default character encoding.
309     */
310    public String getEncoding() {
311        return encoding;
312    }
313
314    /**
315     * Closes the archive.
316     * @throws IOException if an error occurs closing the archive.
317     */
318    @Override
319    public void close() throws IOException {
320        // this flag is only written here and read in finalize() which
321        // can never be run in parallel.
322        // no synchronization needed.
323        closed = true;
324
325        archive.close();
326    }
327
328    /**
329     * close a zipfile quietly; throw no io fault, do nothing
330     * on a null parameter
331     * @param zipfile file to close, can be null
332     */
333    public static void closeQuietly(final ZipFile zipfile) {
334        IOUtils.closeQuietly(zipfile);
335    }
336
337    /**
338     * Returns all entries.
339     *
340     * <p>Entries will be returned in the same order they appear
341     * within the archive's central directory.</p>
342     *
343     * @return all entries as {@link ZipArchiveEntry} instances
344     */
345    public Enumeration<ZipArchiveEntry> getEntries() {
346        return Collections.enumeration(entries);
347    }
348
349    /**
350     * Returns all entries in physical order.
351     *
352     * <p>Entries will be returned in the same order their contents
353     * appear within the archive.</p>
354     *
355     * @return all entries as {@link ZipArchiveEntry} instances
356     *
357     * @since 1.1
358     */
359    public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
360        final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]);
361        Arrays.sort(allEntries, offsetComparator);
362        return Collections.enumeration(Arrays.asList(allEntries));
363    }
364
365    /**
366     * Returns a named entry - or {@code null} if no entry by
367     * that name exists.
368     *
369     * <p>If multiple entries with the same name exist the first entry
370     * in the archive's central directory by that name is
371     * returned.</p>
372     *
373     * @param name name of the entry.
374     * @return the ZipArchiveEntry corresponding to the given name - or
375     * {@code null} if not present.
376     */
377    public ZipArchiveEntry getEntry(final String name) {
378        final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
379        return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
380    }
381
382    /**
383     * Returns all named entries in the same order they appear within
384     * the archive's central directory.
385     *
386     * @param name name of the entry.
387     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
388     * given name
389     * @since 1.6
390     */
391    public Iterable<ZipArchiveEntry> getEntries(final String name) {
392        final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
393        return entriesOfThatName != null ? entriesOfThatName
394            : Collections.<ZipArchiveEntry>emptyList();
395    }
396
397    /**
398     * Returns all named entries in the same order their contents
399     * appear within the archive.
400     *
401     * @param name name of the entry.
402     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
403     * given name
404     * @since 1.6
405     */
406    public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
407        ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0];
408        if (nameMap.containsKey(name)) {
409            entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName);
410            Arrays.sort(entriesOfThatName, offsetComparator);
411        }
412        return Arrays.asList(entriesOfThatName);
413    }
414
415    /**
416     * Whether this class is able to read the given entry.
417     *
418     * <p>May return false if it is set up to use encryption or a
419     * compression method that hasn't been implemented yet.</p>
420     * @since 1.1
421     * @param ze the entry
422     * @return whether this class is able to read the given entry.
423     */
424    public boolean canReadEntryData(final ZipArchiveEntry ze) {
425        return ZipUtil.canHandleEntryData(ze);
426    }
427
428    /**
429     * Expose the raw stream of the archive entry (compressed form).
430     *
431     * <p>This method does not relate to how/if we understand the payload in the
432     * stream, since we really only intend to move it on to somewhere else.</p>
433     *
434     * @param ze The entry to get the stream for
435     * @return The raw input stream containing (possibly) compressed data.
436     * @since 1.11
437     */
438    public InputStream getRawInputStream(final ZipArchiveEntry ze) {
439        if (!(ze instanceof Entry)) {
440            return null;
441        }
442        final OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry();
443        final long start = offsetEntry.dataOffset;
444        return new BoundedInputStream(start, ze.getCompressedSize());
445    }
446
447
448    /**
449     * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream.
450     * Compression and all other attributes will be as in this file.
451     * <p>This method transfers entries based on the central directory of the zip file.</p>
452     *
453     * @param target The zipArchiveOutputStream to write the entries to
454     * @param predicate A predicate that selects which entries to write
455     * @throws IOException on error
456     */
457    public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate)
458            throws IOException {
459        final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
460        while (src.hasMoreElements()) {
461            final ZipArchiveEntry entry = src.nextElement();
462            if (predicate.test( entry)) {
463                target.addRawArchiveEntry(entry, getRawInputStream(entry));
464            }
465        }
466    }
467
468    /**
469     * Returns an InputStream for reading the contents of the given entry.
470     *
471     * @param ze the entry to get the stream for.
472     * @return a stream to read the entry from.
473     * @throws IOException if unable to create an input stream from the zipentry
474     * @throws ZipException if the zipentry uses an unsupported feature
475     */
476    public InputStream getInputStream(final ZipArchiveEntry ze)
477        throws IOException, ZipException {
478        if (!(ze instanceof Entry)) {
479            return null;
480        }
481        // cast valididty is checked just above
482        final OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry();
483        ZipUtil.checkRequestedFeatures(ze);
484        final long start = offsetEntry.dataOffset;
485        // doesn't get closed if the method is not supported, but doesn't hold any resources either
486        final BoundedInputStream bis =
487            new BoundedInputStream(start, ze.getCompressedSize()); //NOSONAR
488        switch (ZipMethod.getMethodByCode(ze.getMethod())) {
489            case STORED:
490                return bis;
491            case UNSHRINKING:
492                return new UnshrinkingInputStream(bis);
493            case IMPLODING:
494                return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(),
495                        ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), new BufferedInputStream(bis));
496            case DEFLATED:
497                bis.addDummy();
498                final Inflater inflater = new Inflater(true);
499                return new InflaterInputStream(bis, inflater) {
500                    @Override
501                    public void close() throws IOException {
502                        try {
503                            super.close();
504                        } finally {
505                            inflater.end();
506                        }
507                    }
508                };
509            case BZIP2:
510                return new BZip2CompressorInputStream(bis);
511            case AES_ENCRYPTED:
512            case ENHANCED_DEFLATED:
513            case EXPANDING_LEVEL_1:
514            case EXPANDING_LEVEL_2:
515            case EXPANDING_LEVEL_3:
516            case EXPANDING_LEVEL_4:
517            case JPEG:
518            case LZMA:
519            case PKWARE_IMPLODING:
520            case PPMD:
521            case TOKENIZATION:
522            case UNKNOWN:
523            case WAVPACK:
524            default:
525                throw new ZipException("Found unsupported compression method "
526                                       + ze.getMethod());
527        }
528    }
529
530    /**
531     * <p>
532     * Convenience method to return the entry's content as a String if isUnixSymlink()
533     * returns true for it, otherwise returns null.
534     * </p>
535     *
536     * <p>This method assumes the symbolic link's file name uses the
537     * same encoding that as been specified for this ZipFile.</p>
538     *
539     * @param entry ZipArchiveEntry object that represents the symbolic link
540     * @return entry's content as a String
541     * @throws IOException problem with content's input stream
542     * @since 1.5
543     */
544    public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
545        if (entry != null && entry.isUnixSymlink()) {
546            try (InputStream in = getInputStream(entry)) {
547                return zipEncoding.decode(IOUtils.toByteArray(in));
548            }
549        }
550        return null;
551    }
552
553    /**
554     * Ensures that the close method of this zipfile is called when
555     * there are no more references to it.
556     * @see #close()
557     */
558    @Override
559    protected void finalize() throws Throwable {
560        try {
561            if (!closed) {
562                System.err.println("Cleaning up unclosed ZipFile for archive "
563                                   + archiveName);
564                close();
565            }
566        } finally {
567            super.finalize();
568        }
569    }
570
571    /**
572     * Length of a "central directory" entry structure without file
573     * name, extra fields or comment.
574     */
575    private static final int CFH_LEN =
576        /* version made by                 */ SHORT
577        /* version needed to extract       */ + SHORT
578        /* general purpose bit flag        */ + SHORT
579        /* compression method              */ + SHORT
580        /* last mod file time              */ + SHORT
581        /* last mod file date              */ + SHORT
582        /* crc-32                          */ + WORD
583        /* compressed size                 */ + WORD
584        /* uncompressed size               */ + WORD
585        /* filename length                 */ + SHORT
586        /* extra field length              */ + SHORT
587        /* file comment length             */ + SHORT
588        /* disk number start               */ + SHORT
589        /* internal file attributes        */ + SHORT
590        /* external file attributes        */ + WORD
591        /* relative offset of local header */ + WORD;
592
593    private static final long CFH_SIG =
594        ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
595
596    /**
597     * Reads the central directory of the given archive and populates
598     * the internal tables with ZipArchiveEntry instances.
599     *
600     * <p>The ZipArchiveEntrys will know all data that can be obtained from
601     * the central directory alone, but not the data that requires the
602     * local file header or additional data to be read.</p>
603     *
604     * @return a map of zipentries that didn't have the language
605     * encoding flag set when read.
606     */
607    private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
608        throws IOException {
609        final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
610            new HashMap<>();
611
612        positionAtCentralDirectory();
613
614        wordBbuf.rewind();
615        IOUtils.readFully(archive, wordBbuf);
616        long sig = ZipLong.getValue(wordBuf);
617
618        if (sig != CFH_SIG && startsWithLocalFileHeader()) {
619            throw new IOException("central directory is empty, can't expand"
620                                  + " corrupt archive.");
621        }
622
623        while (sig == CFH_SIG) {
624            readCentralDirectoryEntry(noUTF8Flag);
625            wordBbuf.rewind();
626            IOUtils.readFully(archive, wordBbuf);
627            sig = ZipLong.getValue(wordBuf);
628        }
629        return noUTF8Flag;
630    }
631
632    /**
633     * Reads an individual entry of the central directory, creats an
634     * ZipArchiveEntry from it and adds it to the global maps.
635     *
636     * @param noUTF8Flag map used to collect entries that don't have
637     * their UTF-8 flag set and whose name will be set by data read
638     * from the local file header later.  The current entry may be
639     * added to this map.
640     */
641    private void
642        readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
643        throws IOException {
644        cfhBbuf.rewind();
645        IOUtils.readFully(archive, cfhBbuf);
646        int off = 0;
647        final OffsetEntry offset = new OffsetEntry();
648        final Entry ze = new Entry(offset);
649
650        final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
651        off += SHORT;
652        ze.setVersionMadeBy(versionMadeBy);
653        ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
654
655        ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
656        off += SHORT; // version required
657
658        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
659        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
660        final ZipEncoding entryEncoding =
661            hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
662        ze.setGeneralPurposeBit(gpFlag);
663        ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
664
665        off += SHORT;
666
667        //noinspection MagicConstant
668        ze.setMethod(ZipShort.getValue(cfhBuf, off));
669        off += SHORT;
670
671        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
672        ze.setTime(time);
673        off += WORD;
674
675        ze.setCrc(ZipLong.getValue(cfhBuf, off));
676        off += WORD;
677
678        ze.setCompressedSize(ZipLong.getValue(cfhBuf, off));
679        off += WORD;
680
681        ze.setSize(ZipLong.getValue(cfhBuf, off));
682        off += WORD;
683
684        final int fileNameLen = ZipShort.getValue(cfhBuf, off);
685        off += SHORT;
686
687        final int extraLen = ZipShort.getValue(cfhBuf, off);
688        off += SHORT;
689
690        final int commentLen = ZipShort.getValue(cfhBuf, off);
691        off += SHORT;
692
693        final int diskStart = ZipShort.getValue(cfhBuf, off);
694        off += SHORT;
695
696        ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
697        off += SHORT;
698
699        ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
700        off += WORD;
701
702        final byte[] fileName = new byte[fileNameLen];
703        IOUtils.readFully(archive, ByteBuffer.wrap(fileName));
704        ze.setName(entryEncoding.decode(fileName), fileName);
705
706        // LFH offset,
707        offset.headerOffset = ZipLong.getValue(cfhBuf, off);
708        // data offset will be filled later
709        entries.add(ze);
710
711        final byte[] cdExtraData = new byte[extraLen];
712        IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData));
713        ze.setCentralDirectoryExtra(cdExtraData);
714
715        setSizesAndOffsetFromZip64Extra(ze, offset, diskStart);
716
717        final byte[] comment = new byte[commentLen];
718        IOUtils.readFully(archive, ByteBuffer.wrap(comment));
719        ze.setComment(entryEncoding.decode(comment));
720
721        if (!hasUTF8Flag && useUnicodeExtraFields) {
722            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
723        }
724    }
725
726    /**
727     * If the entry holds a Zip64 extended information extra field,
728     * read sizes from there if the entry's sizes are set to
729     * 0xFFFFFFFFF, do the same for the offset of the local file
730     * header.
731     *
732     * <p>Ensures the Zip64 extra either knows both compressed and
733     * uncompressed size or neither of both as the internal logic in
734     * ExtraFieldUtils forces the field to create local header data
735     * even if they are never used - and here a field with only one
736     * size would be invalid.</p>
737     */
738    private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze,
739                                                 final OffsetEntry offset,
740                                                 final int diskStart)
741        throws IOException {
742        final Zip64ExtendedInformationExtraField z64 =
743            (Zip64ExtendedInformationExtraField)
744            ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
745        if (z64 != null) {
746            final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
747            final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
748            final boolean hasRelativeHeaderOffset =
749                offset.headerOffset == ZIP64_MAGIC;
750            z64.reparseCentralDirectoryData(hasUncompressedSize,
751                                            hasCompressedSize,
752                                            hasRelativeHeaderOffset,
753                                            diskStart == ZIP64_MAGIC_SHORT);
754
755            if (hasUncompressedSize) {
756                ze.setSize(z64.getSize().getLongValue());
757            } else if (hasCompressedSize) {
758                z64.setSize(new ZipEightByteInteger(ze.getSize()));
759            }
760
761            if (hasCompressedSize) {
762                ze.setCompressedSize(z64.getCompressedSize().getLongValue());
763            } else if (hasUncompressedSize) {
764                z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
765            }
766
767            if (hasRelativeHeaderOffset) {
768                offset.headerOffset =
769                    z64.getRelativeHeaderOffset().getLongValue();
770            }
771        }
772    }
773
774    /**
775     * Length of the "End of central directory record" - which is
776     * supposed to be the last structure of the archive - without file
777     * comment.
778     */
779    static final int MIN_EOCD_SIZE =
780        /* end of central dir signature    */ WORD
781        /* number of this disk             */ + SHORT
782        /* number of the disk with the     */
783        /* start of the central directory  */ + SHORT
784        /* total number of entries in      */
785        /* the central dir on this disk    */ + SHORT
786        /* total number of entries in      */
787        /* the central dir                 */ + SHORT
788        /* size of the central directory   */ + WORD
789        /* offset of start of central      */
790        /* directory with respect to       */
791        /* the starting disk number        */ + WORD
792        /* zipfile comment length          */ + SHORT;
793
794    /**
795     * Maximum length of the "End of central directory record" with a
796     * file comment.
797     */
798    private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
799        /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
800
801    /**
802     * Offset of the field that holds the location of the first
803     * central directory entry inside the "End of central directory
804     * record" relative to the start of the "End of central directory
805     * record".
806     */
807    private static final int CFD_LOCATOR_OFFSET =
808        /* end of central dir signature    */ WORD
809        /* number of this disk             */ + SHORT
810        /* number of the disk with the     */
811        /* start of the central directory  */ + SHORT
812        /* total number of entries in      */
813        /* the central dir on this disk    */ + SHORT
814        /* total number of entries in      */
815        /* the central dir                 */ + SHORT
816        /* size of the central directory   */ + WORD;
817
818    /**
819     * Length of the "Zip64 end of central directory locator" - which
820     * should be right in front of the "end of central directory
821     * record" if one is present at all.
822     */
823    private static final int ZIP64_EOCDL_LENGTH =
824        /* zip64 end of central dir locator sig */ WORD
825        /* number of the disk with the start    */
826        /* start of the zip64 end of            */
827        /* central directory                    */ + WORD
828        /* relative offset of the zip64         */
829        /* end of central directory record      */ + DWORD
830        /* total number of disks                */ + WORD;
831
832    /**
833     * Offset of the field that holds the location of the "Zip64 end
834     * of central directory record" inside the "Zip64 end of central
835     * directory locator" relative to the start of the "Zip64 end of
836     * central directory locator".
837     */
838    private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
839        /* zip64 end of central dir locator sig */ WORD
840        /* number of the disk with the start    */
841        /* start of the zip64 end of            */
842        /* central directory                    */ + WORD;
843
844    /**
845     * Offset of the field that holds the location of the first
846     * central directory entry inside the "Zip64 end of central
847     * directory record" relative to the start of the "Zip64 end of
848     * central directory record".
849     */
850    private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
851        /* zip64 end of central dir        */
852        /* signature                       */ WORD
853        /* size of zip64 end of central    */
854        /* directory record                */ + DWORD
855        /* version made by                 */ + SHORT
856        /* version needed to extract       */ + SHORT
857        /* number of this disk             */ + WORD
858        /* number of the disk with the     */
859        /* start of the central directory  */ + WORD
860        /* total number of entries in the  */
861        /* central directory on this disk  */ + DWORD
862        /* total number of entries in the  */
863        /* central directory               */ + DWORD
864        /* size of the central directory   */ + DWORD;
865
866    /**
867     * Searches for either the &quot;Zip64 end of central directory
868     * locator&quot; or the &quot;End of central dir record&quot;, parses
869     * it and positions the stream at the first central directory
870     * record.
871     */
872    private void positionAtCentralDirectory()
873        throws IOException {
874        positionAtEndOfCentralDirectoryRecord();
875        boolean found = false;
876        final boolean searchedForZip64EOCD =
877            archive.position() > ZIP64_EOCDL_LENGTH;
878        if (searchedForZip64EOCD) {
879            archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
880            wordBbuf.rewind();
881            IOUtils.readFully(archive, wordBbuf);
882            found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
883                                  wordBuf);
884        }
885        if (!found) {
886            // not a ZIP64 archive
887            if (searchedForZip64EOCD) {
888                skipBytes(ZIP64_EOCDL_LENGTH - WORD);
889            }
890            positionAtCentralDirectory32();
891        } else {
892            positionAtCentralDirectory64();
893        }
894    }
895
896    /**
897     * Parses the &quot;Zip64 end of central directory locator&quot;,
898     * finds the &quot;Zip64 end of central directory record&quot; using the
899     * parsed information, parses that and positions the stream at the
900     * first central directory record.
901     *
902     * Expects stream to be positioned right behind the &quot;Zip64
903     * end of central directory locator&quot;'s signature.
904     */
905    private void positionAtCentralDirectory64()
906        throws IOException {
907        skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
908                  - WORD /* signature has already been read */);
909        dwordBbuf.rewind();
910        IOUtils.readFully(archive, dwordBbuf);
911        archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
912        wordBbuf.rewind();
913        IOUtils.readFully(archive, wordBbuf);
914        if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
915            throw new ZipException("archive's ZIP64 end of central "
916                                   + "directory locator is corrupt.");
917        }
918        skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
919                  - WORD /* signature has already been read */);
920        dwordBbuf.rewind();
921        IOUtils.readFully(archive, dwordBbuf);
922        archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
923    }
924
925    /**
926     * Parses the &quot;End of central dir record&quot; and positions
927     * the stream at the first central directory record.
928     *
929     * Expects stream to be positioned at the beginning of the
930     * &quot;End of central dir record&quot;.
931     */
932    private void positionAtCentralDirectory32()
933        throws IOException {
934        skipBytes(CFD_LOCATOR_OFFSET);
935        wordBbuf.rewind();
936        IOUtils.readFully(archive, wordBbuf);
937        archive.position(ZipLong.getValue(wordBuf));
938    }
939
940    /**
941     * Searches for the and positions the stream at the start of the
942     * &quot;End of central dir record&quot;.
943     */
944    private void positionAtEndOfCentralDirectoryRecord()
945        throws IOException {
946        final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
947                                             ZipArchiveOutputStream.EOCD_SIG);
948        if (!found) {
949            throw new ZipException("archive is not a ZIP archive");
950        }
951    }
952
953    /**
954     * Searches the archive backwards from minDistance to maxDistance
955     * for the given signature, positions the RandomaccessFile right
956     * at the signature if it has been found.
957     */
958    private boolean tryToLocateSignature(final long minDistanceFromEnd,
959                                         final long maxDistanceFromEnd,
960                                         final byte[] sig) throws IOException {
961        boolean found = false;
962        long off = archive.size() - minDistanceFromEnd;
963        final long stopSearching =
964            Math.max(0L, archive.size() - maxDistanceFromEnd);
965        if (off >= 0) {
966            for (; off >= stopSearching; off--) {
967                archive.position(off);
968                try {
969                    wordBbuf.rewind();
970                    IOUtils.readFully(archive, wordBbuf);
971                    wordBbuf.flip();
972                } catch (EOFException ex) {
973                    break;
974                }
975                int curr = wordBbuf.get();
976                if (curr == sig[POS_0]) {
977                    curr = wordBbuf.get();
978                    if (curr == sig[POS_1]) {
979                        curr = wordBbuf.get();
980                        if (curr == sig[POS_2]) {
981                            curr = wordBbuf.get();
982                            if (curr == sig[POS_3]) {
983                                found = true;
984                                break;
985                            }
986                        }
987                    }
988                }
989            }
990        }
991        if (found) {
992            archive.position(off);
993        }
994        return found;
995    }
996
997    /**
998     * Skips the given number of bytes or throws an EOFException if
999     * skipping failed.
1000     */ 
1001    private void skipBytes(final int count) throws IOException {
1002        long currentPosition = archive.position();
1003        long newPosition = currentPosition + count;
1004        if (newPosition > archive.size()) {
1005            throw new EOFException();
1006        }
1007        archive.position(newPosition);
1008    }
1009
1010    /**
1011     * Number of bytes in local file header up to the &quot;length of
1012     * filename&quot; entry.
1013     */
1014    private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
1015        /* local file header signature     */ WORD
1016        /* version needed to extract       */ + SHORT
1017        /* general purpose bit flag        */ + SHORT
1018        /* compression method              */ + SHORT
1019        /* last mod file time              */ + SHORT
1020        /* last mod file date              */ + SHORT
1021        /* crc-32                          */ + WORD
1022        /* compressed size                 */ + WORD
1023        /* uncompressed size               */ + (long) WORD;
1024
1025    /**
1026     * Walks through all recorded entries and adds the data available
1027     * from the local file header.
1028     *
1029     * <p>Also records the offsets for the data to read from the
1030     * entries.</p>
1031     */
1032    private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment>
1033                                            entriesWithoutUTF8Flag)
1034        throws IOException {
1035        for (final ZipArchiveEntry zipArchiveEntry : entries) {
1036            // entries is filled in populateFromCentralDirectory and
1037            // never modified
1038            final Entry ze = (Entry) zipArchiveEntry;
1039            final OffsetEntry offsetEntry = ze.getOffsetEntry();
1040            final long offset = offsetEntry.headerOffset;
1041            archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1042            wordBbuf.rewind();
1043            IOUtils.readFully(archive, wordBbuf);
1044            wordBbuf.flip();
1045            wordBbuf.get(shortBuf);
1046            final int fileNameLen = ZipShort.getValue(shortBuf);
1047            wordBbuf.get(shortBuf);
1048            final int extraFieldLen = ZipShort.getValue(shortBuf);
1049            skipBytes(fileNameLen);
1050            final byte[] localExtraData = new byte[extraFieldLen];
1051            IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData));
1052            ze.setExtra(localExtraData);
1053            offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
1054                + SHORT + SHORT + fileNameLen + extraFieldLen;
1055
1056            if (entriesWithoutUTF8Flag.containsKey(ze)) {
1057                final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1058                ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
1059                                                         nc.comment);
1060            }
1061
1062            final String name = ze.getName();
1063            LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
1064            if (entriesOfThatName == null) {
1065                entriesOfThatName = new LinkedList<>();
1066                nameMap.put(name, entriesOfThatName);
1067            }
1068            entriesOfThatName.addLast(ze);
1069        }
1070    }
1071
1072    /**
1073     * Checks whether the archive starts with a LFH.  If it doesn't,
1074     * it may be an empty archive.
1075     */
1076    private boolean startsWithLocalFileHeader() throws IOException {
1077        archive.position(0);
1078        wordBbuf.rewind();
1079        IOUtils.readFully(archive, wordBbuf);
1080        return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1081    }
1082
1083    /**
1084     * InputStream that delegates requests to the underlying
1085     * SeekableByteChannel, making sure that only bytes from a certain
1086     * range can be read.
1087     */
1088    private class BoundedInputStream extends InputStream {
1089        private static final int MAX_BUF_LEN = 8192;
1090        private final ByteBuffer buffer;
1091        private long remaining;
1092        private long loc;
1093        private boolean addDummyByte = false;
1094
1095        BoundedInputStream(final long start, final long remaining) {
1096            this.remaining = remaining;
1097            loc = start;
1098            if (remaining < MAX_BUF_LEN && remaining > 0) {
1099                buffer = ByteBuffer.allocate((int) remaining);
1100            } else {
1101                buffer = ByteBuffer.allocate(MAX_BUF_LEN);
1102            }
1103        }
1104
1105        @Override
1106        public int read() throws IOException {
1107            if (remaining-- <= 0) {
1108                if (addDummyByte) {
1109                    addDummyByte = false;
1110                    return 0;
1111                }
1112                return -1;
1113            }
1114            synchronized (archive) {
1115                archive.position(loc++);
1116                int read = read(1);
1117                if (read < 0) {
1118                    return read;
1119                }
1120                return buffer.get() & 0xff;
1121            }
1122        }
1123
1124        @Override
1125        public int read(final byte[] b, final int off, int len) throws IOException {
1126            if (remaining <= 0) {
1127                if (addDummyByte) {
1128                    addDummyByte = false;
1129                    b[off] = 0;
1130                    return 1;
1131                }
1132                return -1;
1133            }
1134
1135            if (len <= 0) {
1136                return 0;
1137            }
1138
1139            if (len > remaining) {
1140                len = (int) remaining;
1141            }
1142            ByteBuffer buf;
1143            int ret = -1;
1144            synchronized (archive) {
1145                archive.position(loc);
1146                if (len <= buffer.capacity()) {
1147                    buf = buffer;
1148                    ret = read(len);
1149                } else {
1150                    buf = ByteBuffer.allocate(len);
1151                    ret = archive.read(buf);
1152                    buf.flip();
1153                }
1154            }
1155            if (ret > 0) {
1156                buf.get(b, off, ret);
1157                loc += ret;
1158                remaining -= ret;
1159            }
1160            return ret;
1161        }
1162
1163        private int read(int len) throws IOException {
1164            buffer.rewind().limit(len);
1165            int read = archive.read(buffer);
1166            buffer.flip();
1167            return read;
1168        }
1169
1170        /**
1171         * Inflater needs an extra dummy byte for nowrap - see
1172         * Inflater's javadocs.
1173         */
1174        void addDummy() {
1175            addDummyByte = true;
1176        }
1177    }
1178
1179    private static final class NameAndComment {
1180        private final byte[] name;
1181        private final byte[] comment;
1182        private NameAndComment(final byte[] name, final byte[] comment) {
1183            this.name = name;
1184            this.comment = comment;
1185        }
1186    }
1187
1188    /**
1189     * Compares two ZipArchiveEntries based on their offset within the archive.
1190     *
1191     * <p>Won't return any meaningful results if one of the entries
1192     * isn't part of the archive at all.</p>
1193     *
1194     * @since 1.1
1195     */
1196    private final Comparator<ZipArchiveEntry> offsetComparator =
1197        new Comparator<ZipArchiveEntry>() {
1198        @Override
1199        public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) {
1200            if (e1 == e2) {
1201                return 0;
1202            }
1203
1204            final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null;
1205            final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null;
1206            if (ent1 == null) {
1207                return 1;
1208            }
1209            if (ent2 == null) {
1210                return -1;
1211            }
1212            final long val = (ent1.getOffsetEntry().headerOffset
1213                        - ent2.getOffsetEntry().headerOffset);
1214            return val == 0 ? 0 : val < 0 ? -1 : +1;
1215        }
1216    };
1217
1218    /**
1219     * Extends ZipArchiveEntry to store the offset within the archive.
1220     */
1221    private static class Entry extends ZipArchiveEntry {
1222
1223        private final OffsetEntry offsetEntry;
1224
1225        Entry(final OffsetEntry offset) {
1226            this.offsetEntry = offset;
1227        }
1228
1229        OffsetEntry getOffsetEntry() {
1230            return offsetEntry;
1231        }
1232
1233        @Override
1234        public int hashCode() {
1235            return 3 * super.hashCode()
1236                + (int) (offsetEntry.headerOffset % Integer.MAX_VALUE);
1237        }
1238
1239        @Override
1240        public boolean equals(final Object other) {
1241            if (super.equals(other)) {
1242                // super.equals would return false if other were not an Entry
1243                final Entry otherEntry = (Entry) other;
1244                return offsetEntry.headerOffset
1245                        == otherEntry.offsetEntry.headerOffset
1246                    && offsetEntry.dataOffset
1247                        == otherEntry.offsetEntry.dataOffset;
1248            }
1249            return false;
1250        }
1251    }
1252}