001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import java.io.BufferedInputStream; 021import java.io.Closeable; 022import java.io.EOFException; 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.nio.ByteBuffer; 027import java.nio.channels.SeekableByteChannel; 028import java.nio.file.Files; 029import java.nio.file.StandardOpenOption; 030import java.util.Arrays; 031import java.util.Collections; 032import java.util.Comparator; 033import java.util.Enumeration; 034import java.util.EnumSet; 035import java.util.HashMap; 036import java.util.LinkedList; 037import java.util.List; 038import java.util.Map; 039import java.util.zip.Inflater; 040import java.util.zip.InflaterInputStream; 041import java.util.zip.ZipException; 042 043import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 044import org.apache.commons.compress.utils.IOUtils; 045 046import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 047import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 048import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 049import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 050import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; 051 052/** 053 * Replacement for <code>java.util.ZipFile</code>. 054 * 055 * <p>This class adds support for file name encodings other than UTF-8 056 * (which is required to work on ZIP files created by native zip tools 057 * and is able to skip a preamble like the one found in self 058 * extracting archives. Furthermore it returns instances of 059 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 060 * instead of <code>java.util.zip.ZipEntry</code>.</p> 061 * 062 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would 063 * have to reimplement all methods anyway. Like 064 * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the 065 * covers and supports compressed and uncompressed entries. As of 066 * Apache Commons Compress 1.3 it also transparently supports Zip64 067 * extensions and thus individual entries and archives larger than 4 068 * GB or with more than 65536 entries.</p> 069 * 070 * <p>The method signatures mimic the ones of 071 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: 072 * 073 * <ul> 074 * <li>There is no getName method.</li> 075 * <li>entries has been renamed to getEntries.</li> 076 * <li>getEntries and getEntry return 077 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 078 * instances.</li> 079 * <li>close is allowed to throw IOException.</li> 080 * </ul> 081 * 082 */ 083public class ZipFile implements Closeable { 084 private static final int HASH_SIZE = 509; 085 static final int NIBLET_MASK = 0x0f; 086 static final int BYTE_SHIFT = 8; 087 private static final int POS_0 = 0; 088 private static final int POS_1 = 1; 089 private static final int POS_2 = 2; 090 private static final int POS_3 = 3; 091 092 /** 093 * List of entries in the order they appear inside the central 094 * directory. 095 */ 096 private final List<ZipArchiveEntry> entries = 097 new LinkedList<>(); 098 099 /** 100 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 101 */ 102 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = 103 new HashMap<>(HASH_SIZE); 104 105 private static final class OffsetEntry { 106 private long headerOffset = -1; 107 private long dataOffset = -1; 108 } 109 110 /** 111 * The encoding to use for filenames and the file comment. 112 * 113 * <p>For a list of possible values see <a 114 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 115 * Defaults to UTF-8.</p> 116 */ 117 private final String encoding; 118 119 /** 120 * The zip encoding to use for filenames and the file comment. 121 */ 122 private final ZipEncoding zipEncoding; 123 124 /** 125 * File name of actual source. 126 */ 127 private final String archiveName; 128 129 /** 130 * The actual data source. 131 */ 132 private final SeekableByteChannel archive; 133 134 /** 135 * Whether to look for and use Unicode extra fields. 136 */ 137 private final boolean useUnicodeExtraFields; 138 139 /** 140 * Whether the file is closed. 141 */ 142 private volatile boolean closed = true; 143 144 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 145 private final byte[] dwordBuf = new byte[DWORD]; 146 private final byte[] wordBuf = new byte[WORD]; 147 private final byte[] cfhBuf = new byte[CFH_LEN]; 148 private final byte[] shortBuf = new byte[SHORT]; 149 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); 150 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); 151 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); 152 153 /** 154 * Opens the given file for reading, assuming "UTF8" for file names. 155 * 156 * @param f the archive. 157 * 158 * @throws IOException if an error occurs while reading the file. 159 */ 160 public ZipFile(final File f) throws IOException { 161 this(f, ZipEncodingHelper.UTF8); 162 } 163 164 /** 165 * Opens the given file for reading, assuming "UTF8". 166 * 167 * @param name name of the archive. 168 * 169 * @throws IOException if an error occurs while reading the file. 170 */ 171 public ZipFile(final String name) throws IOException { 172 this(new File(name), ZipEncodingHelper.UTF8); 173 } 174 175 /** 176 * Opens the given file for reading, assuming the specified 177 * encoding for file names, scanning unicode extra fields. 178 * 179 * @param name name of the archive. 180 * @param encoding the encoding to use for file names, use null 181 * for the platform's default encoding 182 * 183 * @throws IOException if an error occurs while reading the file. 184 */ 185 public ZipFile(final String name, final String encoding) throws IOException { 186 this(new File(name), encoding, true); 187 } 188 189 /** 190 * Opens the given file for reading, assuming the specified 191 * encoding for file names and scanning for unicode extra fields. 192 * 193 * @param f the archive. 194 * @param encoding the encoding to use for file names, use null 195 * for the platform's default encoding 196 * 197 * @throws IOException if an error occurs while reading the file. 198 */ 199 public ZipFile(final File f, final String encoding) throws IOException { 200 this(f, encoding, true); 201 } 202 203 /** 204 * Opens the given file for reading, assuming the specified 205 * encoding for file names. 206 * 207 * @param f the archive. 208 * @param encoding the encoding to use for file names, use null 209 * for the platform's default encoding 210 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 211 * Extra Fields (if present) to set the file names. 212 * 213 * @throws IOException if an error occurs while reading the file. 214 */ 215 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) 216 throws IOException { 217 this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), 218 f.getAbsolutePath(), encoding, useUnicodeExtraFields, true); 219 } 220 221 /** 222 * Opens the given channel for reading, assuming "UTF8" for file names. 223 * 224 * <p>{@link 225 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 226 * allows you to read from an in-memory archive.</p> 227 * 228 * @param channel the archive. 229 * 230 * @throws IOException if an error occurs while reading the file. 231 * @since 1.13 232 */ 233 public ZipFile(final SeekableByteChannel channel) 234 throws IOException { 235 this(channel, "unknown archive", ZipEncodingHelper.UTF8, true); 236 } 237 238 /** 239 * Opens the given channel for reading, assuming the specified 240 * encoding for file names. 241 * 242 * <p>{@link 243 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 244 * allows you to read from an in-memory archive.</p> 245 * 246 * @param channel the archive. 247 * @param encoding the encoding to use for file names, use null 248 * for the platform's default encoding 249 * 250 * @throws IOException if an error occurs while reading the file. 251 * @since 1.13 252 */ 253 public ZipFile(final SeekableByteChannel channel, final String encoding) 254 throws IOException { 255 this(channel, "unknown archive", encoding, true); 256 } 257 258 /** 259 * Opens the given channel for reading, assuming the specified 260 * encoding for file names. 261 * 262 * <p>{@link 263 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 264 * allows you to read from an in-memory archive.</p> 265 * 266 * @param channel the archive. 267 * @param archiveName name of the archive, used for error messages only. 268 * @param encoding the encoding to use for file names, use null 269 * for the platform's default encoding 270 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 271 * Extra Fields (if present) to set the file names. 272 * 273 * @throws IOException if an error occurs while reading the file. 274 * @since 1.13 275 */ 276 public ZipFile(final SeekableByteChannel channel, final String archiveName, 277 final String encoding, final boolean useUnicodeExtraFields) 278 throws IOException { 279 this(channel, archiveName, encoding, useUnicodeExtraFields, false); 280 } 281 282 private ZipFile(final SeekableByteChannel channel, final String archiveName, 283 final String encoding, final boolean useUnicodeExtraFields, 284 final boolean closeOnError) 285 throws IOException { 286 this.archiveName = archiveName; 287 this.encoding = encoding; 288 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 289 this.useUnicodeExtraFields = useUnicodeExtraFields; 290 archive = channel; 291 boolean success = false; 292 try { 293 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 294 populateFromCentralDirectory(); 295 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 296 success = true; 297 } finally { 298 closed = !success; 299 if (!success && closeOnError) { 300 IOUtils.closeQuietly(archive); 301 } 302 } 303 } 304 305 /** 306 * The encoding to use for filenames and the file comment. 307 * 308 * @return null if using the platform's default character encoding. 309 */ 310 public String getEncoding() { 311 return encoding; 312 } 313 314 /** 315 * Closes the archive. 316 * @throws IOException if an error occurs closing the archive. 317 */ 318 @Override 319 public void close() throws IOException { 320 // this flag is only written here and read in finalize() which 321 // can never be run in parallel. 322 // no synchronization needed. 323 closed = true; 324 325 archive.close(); 326 } 327 328 /** 329 * close a zipfile quietly; throw no io fault, do nothing 330 * on a null parameter 331 * @param zipfile file to close, can be null 332 */ 333 public static void closeQuietly(final ZipFile zipfile) { 334 IOUtils.closeQuietly(zipfile); 335 } 336 337 /** 338 * Returns all entries. 339 * 340 * <p>Entries will be returned in the same order they appear 341 * within the archive's central directory.</p> 342 * 343 * @return all entries as {@link ZipArchiveEntry} instances 344 */ 345 public Enumeration<ZipArchiveEntry> getEntries() { 346 return Collections.enumeration(entries); 347 } 348 349 /** 350 * Returns all entries in physical order. 351 * 352 * <p>Entries will be returned in the same order their contents 353 * appear within the archive.</p> 354 * 355 * @return all entries as {@link ZipArchiveEntry} instances 356 * 357 * @since 1.1 358 */ 359 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 360 final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]); 361 Arrays.sort(allEntries, offsetComparator); 362 return Collections.enumeration(Arrays.asList(allEntries)); 363 } 364 365 /** 366 * Returns a named entry - or {@code null} if no entry by 367 * that name exists. 368 * 369 * <p>If multiple entries with the same name exist the first entry 370 * in the archive's central directory by that name is 371 * returned.</p> 372 * 373 * @param name name of the entry. 374 * @return the ZipArchiveEntry corresponding to the given name - or 375 * {@code null} if not present. 376 */ 377 public ZipArchiveEntry getEntry(final String name) { 378 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 379 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 380 } 381 382 /** 383 * Returns all named entries in the same order they appear within 384 * the archive's central directory. 385 * 386 * @param name name of the entry. 387 * @return the Iterable<ZipArchiveEntry> corresponding to the 388 * given name 389 * @since 1.6 390 */ 391 public Iterable<ZipArchiveEntry> getEntries(final String name) { 392 final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 393 return entriesOfThatName != null ? entriesOfThatName 394 : Collections.<ZipArchiveEntry>emptyList(); 395 } 396 397 /** 398 * Returns all named entries in the same order their contents 399 * appear within the archive. 400 * 401 * @param name name of the entry. 402 * @return the Iterable<ZipArchiveEntry> corresponding to the 403 * given name 404 * @since 1.6 405 */ 406 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { 407 ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0]; 408 if (nameMap.containsKey(name)) { 409 entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName); 410 Arrays.sort(entriesOfThatName, offsetComparator); 411 } 412 return Arrays.asList(entriesOfThatName); 413 } 414 415 /** 416 * Whether this class is able to read the given entry. 417 * 418 * <p>May return false if it is set up to use encryption or a 419 * compression method that hasn't been implemented yet.</p> 420 * @since 1.1 421 * @param ze the entry 422 * @return whether this class is able to read the given entry. 423 */ 424 public boolean canReadEntryData(final ZipArchiveEntry ze) { 425 return ZipUtil.canHandleEntryData(ze); 426 } 427 428 /** 429 * Expose the raw stream of the archive entry (compressed form). 430 * 431 * <p>This method does not relate to how/if we understand the payload in the 432 * stream, since we really only intend to move it on to somewhere else.</p> 433 * 434 * @param ze The entry to get the stream for 435 * @return The raw input stream containing (possibly) compressed data. 436 * @since 1.11 437 */ 438 public InputStream getRawInputStream(final ZipArchiveEntry ze) { 439 if (!(ze instanceof Entry)) { 440 return null; 441 } 442 final OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry(); 443 final long start = offsetEntry.dataOffset; 444 return new BoundedInputStream(start, ze.getCompressedSize()); 445 } 446 447 448 /** 449 * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream. 450 * Compression and all other attributes will be as in this file. 451 * <p>This method transfers entries based on the central directory of the zip file.</p> 452 * 453 * @param target The zipArchiveOutputStream to write the entries to 454 * @param predicate A predicate that selects which entries to write 455 * @throws IOException on error 456 */ 457 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) 458 throws IOException { 459 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 460 while (src.hasMoreElements()) { 461 final ZipArchiveEntry entry = src.nextElement(); 462 if (predicate.test( entry)) { 463 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 464 } 465 } 466 } 467 468 /** 469 * Returns an InputStream for reading the contents of the given entry. 470 * 471 * @param ze the entry to get the stream for. 472 * @return a stream to read the entry from. 473 * @throws IOException if unable to create an input stream from the zipentry 474 * @throws ZipException if the zipentry uses an unsupported feature 475 */ 476 public InputStream getInputStream(final ZipArchiveEntry ze) 477 throws IOException, ZipException { 478 if (!(ze instanceof Entry)) { 479 return null; 480 } 481 // cast valididty is checked just above 482 final OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry(); 483 ZipUtil.checkRequestedFeatures(ze); 484 final long start = offsetEntry.dataOffset; 485 // doesn't get closed if the method is not supported, but doesn't hold any resources either 486 final BoundedInputStream bis = 487 new BoundedInputStream(start, ze.getCompressedSize()); //NOSONAR 488 switch (ZipMethod.getMethodByCode(ze.getMethod())) { 489 case STORED: 490 return bis; 491 case UNSHRINKING: 492 return new UnshrinkingInputStream(bis); 493 case IMPLODING: 494 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(), 495 ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), new BufferedInputStream(bis)); 496 case DEFLATED: 497 bis.addDummy(); 498 final Inflater inflater = new Inflater(true); 499 return new InflaterInputStream(bis, inflater) { 500 @Override 501 public void close() throws IOException { 502 try { 503 super.close(); 504 } finally { 505 inflater.end(); 506 } 507 } 508 }; 509 case BZIP2: 510 return new BZip2CompressorInputStream(bis); 511 case AES_ENCRYPTED: 512 case ENHANCED_DEFLATED: 513 case EXPANDING_LEVEL_1: 514 case EXPANDING_LEVEL_2: 515 case EXPANDING_LEVEL_3: 516 case EXPANDING_LEVEL_4: 517 case JPEG: 518 case LZMA: 519 case PKWARE_IMPLODING: 520 case PPMD: 521 case TOKENIZATION: 522 case UNKNOWN: 523 case WAVPACK: 524 default: 525 throw new ZipException("Found unsupported compression method " 526 + ze.getMethod()); 527 } 528 } 529 530 /** 531 * <p> 532 * Convenience method to return the entry's content as a String if isUnixSymlink() 533 * returns true for it, otherwise returns null. 534 * </p> 535 * 536 * <p>This method assumes the symbolic link's file name uses the 537 * same encoding that as been specified for this ZipFile.</p> 538 * 539 * @param entry ZipArchiveEntry object that represents the symbolic link 540 * @return entry's content as a String 541 * @throws IOException problem with content's input stream 542 * @since 1.5 543 */ 544 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { 545 if (entry != null && entry.isUnixSymlink()) { 546 try (InputStream in = getInputStream(entry)) { 547 return zipEncoding.decode(IOUtils.toByteArray(in)); 548 } 549 } 550 return null; 551 } 552 553 /** 554 * Ensures that the close method of this zipfile is called when 555 * there are no more references to it. 556 * @see #close() 557 */ 558 @Override 559 protected void finalize() throws Throwable { 560 try { 561 if (!closed) { 562 System.err.println("Cleaning up unclosed ZipFile for archive " 563 + archiveName); 564 close(); 565 } 566 } finally { 567 super.finalize(); 568 } 569 } 570 571 /** 572 * Length of a "central directory" entry structure without file 573 * name, extra fields or comment. 574 */ 575 private static final int CFH_LEN = 576 /* version made by */ SHORT 577 /* version needed to extract */ + SHORT 578 /* general purpose bit flag */ + SHORT 579 /* compression method */ + SHORT 580 /* last mod file time */ + SHORT 581 /* last mod file date */ + SHORT 582 /* crc-32 */ + WORD 583 /* compressed size */ + WORD 584 /* uncompressed size */ + WORD 585 /* filename length */ + SHORT 586 /* extra field length */ + SHORT 587 /* file comment length */ + SHORT 588 /* disk number start */ + SHORT 589 /* internal file attributes */ + SHORT 590 /* external file attributes */ + WORD 591 /* relative offset of local header */ + WORD; 592 593 private static final long CFH_SIG = 594 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 595 596 /** 597 * Reads the central directory of the given archive and populates 598 * the internal tables with ZipArchiveEntry instances. 599 * 600 * <p>The ZipArchiveEntrys will know all data that can be obtained from 601 * the central directory alone, but not the data that requires the 602 * local file header or additional data to be read.</p> 603 * 604 * @return a map of zipentries that didn't have the language 605 * encoding flag set when read. 606 */ 607 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 608 throws IOException { 609 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 610 new HashMap<>(); 611 612 positionAtCentralDirectory(); 613 614 wordBbuf.rewind(); 615 IOUtils.readFully(archive, wordBbuf); 616 long sig = ZipLong.getValue(wordBuf); 617 618 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 619 throw new IOException("central directory is empty, can't expand" 620 + " corrupt archive."); 621 } 622 623 while (sig == CFH_SIG) { 624 readCentralDirectoryEntry(noUTF8Flag); 625 wordBbuf.rewind(); 626 IOUtils.readFully(archive, wordBbuf); 627 sig = ZipLong.getValue(wordBuf); 628 } 629 return noUTF8Flag; 630 } 631 632 /** 633 * Reads an individual entry of the central directory, creats an 634 * ZipArchiveEntry from it and adds it to the global maps. 635 * 636 * @param noUTF8Flag map used to collect entries that don't have 637 * their UTF-8 flag set and whose name will be set by data read 638 * from the local file header later. The current entry may be 639 * added to this map. 640 */ 641 private void 642 readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 643 throws IOException { 644 cfhBbuf.rewind(); 645 IOUtils.readFully(archive, cfhBbuf); 646 int off = 0; 647 final OffsetEntry offset = new OffsetEntry(); 648 final Entry ze = new Entry(offset); 649 650 final int versionMadeBy = ZipShort.getValue(cfhBuf, off); 651 off += SHORT; 652 ze.setVersionMadeBy(versionMadeBy); 653 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 654 655 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); 656 off += SHORT; // version required 657 658 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); 659 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 660 final ZipEncoding entryEncoding = 661 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 662 ze.setGeneralPurposeBit(gpFlag); 663 ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); 664 665 off += SHORT; 666 667 //noinspection MagicConstant 668 ze.setMethod(ZipShort.getValue(cfhBuf, off)); 669 off += SHORT; 670 671 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); 672 ze.setTime(time); 673 off += WORD; 674 675 ze.setCrc(ZipLong.getValue(cfhBuf, off)); 676 off += WORD; 677 678 ze.setCompressedSize(ZipLong.getValue(cfhBuf, off)); 679 off += WORD; 680 681 ze.setSize(ZipLong.getValue(cfhBuf, off)); 682 off += WORD; 683 684 final int fileNameLen = ZipShort.getValue(cfhBuf, off); 685 off += SHORT; 686 687 final int extraLen = ZipShort.getValue(cfhBuf, off); 688 off += SHORT; 689 690 final int commentLen = ZipShort.getValue(cfhBuf, off); 691 off += SHORT; 692 693 final int diskStart = ZipShort.getValue(cfhBuf, off); 694 off += SHORT; 695 696 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); 697 off += SHORT; 698 699 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); 700 off += WORD; 701 702 final byte[] fileName = new byte[fileNameLen]; 703 IOUtils.readFully(archive, ByteBuffer.wrap(fileName)); 704 ze.setName(entryEncoding.decode(fileName), fileName); 705 706 // LFH offset, 707 offset.headerOffset = ZipLong.getValue(cfhBuf, off); 708 // data offset will be filled later 709 entries.add(ze); 710 711 final byte[] cdExtraData = new byte[extraLen]; 712 IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData)); 713 ze.setCentralDirectoryExtra(cdExtraData); 714 715 setSizesAndOffsetFromZip64Extra(ze, offset, diskStart); 716 717 final byte[] comment = new byte[commentLen]; 718 IOUtils.readFully(archive, ByteBuffer.wrap(comment)); 719 ze.setComment(entryEncoding.decode(comment)); 720 721 if (!hasUTF8Flag && useUnicodeExtraFields) { 722 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 723 } 724 } 725 726 /** 727 * If the entry holds a Zip64 extended information extra field, 728 * read sizes from there if the entry's sizes are set to 729 * 0xFFFFFFFFF, do the same for the offset of the local file 730 * header. 731 * 732 * <p>Ensures the Zip64 extra either knows both compressed and 733 * uncompressed size or neither of both as the internal logic in 734 * ExtraFieldUtils forces the field to create local header data 735 * even if they are never used - and here a field with only one 736 * size would be invalid.</p> 737 */ 738 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze, 739 final OffsetEntry offset, 740 final int diskStart) 741 throws IOException { 742 final Zip64ExtendedInformationExtraField z64 = 743 (Zip64ExtendedInformationExtraField) 744 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 745 if (z64 != null) { 746 final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; 747 final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; 748 final boolean hasRelativeHeaderOffset = 749 offset.headerOffset == ZIP64_MAGIC; 750 z64.reparseCentralDirectoryData(hasUncompressedSize, 751 hasCompressedSize, 752 hasRelativeHeaderOffset, 753 diskStart == ZIP64_MAGIC_SHORT); 754 755 if (hasUncompressedSize) { 756 ze.setSize(z64.getSize().getLongValue()); 757 } else if (hasCompressedSize) { 758 z64.setSize(new ZipEightByteInteger(ze.getSize())); 759 } 760 761 if (hasCompressedSize) { 762 ze.setCompressedSize(z64.getCompressedSize().getLongValue()); 763 } else if (hasUncompressedSize) { 764 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 765 } 766 767 if (hasRelativeHeaderOffset) { 768 offset.headerOffset = 769 z64.getRelativeHeaderOffset().getLongValue(); 770 } 771 } 772 } 773 774 /** 775 * Length of the "End of central directory record" - which is 776 * supposed to be the last structure of the archive - without file 777 * comment. 778 */ 779 static final int MIN_EOCD_SIZE = 780 /* end of central dir signature */ WORD 781 /* number of this disk */ + SHORT 782 /* number of the disk with the */ 783 /* start of the central directory */ + SHORT 784 /* total number of entries in */ 785 /* the central dir on this disk */ + SHORT 786 /* total number of entries in */ 787 /* the central dir */ + SHORT 788 /* size of the central directory */ + WORD 789 /* offset of start of central */ 790 /* directory with respect to */ 791 /* the starting disk number */ + WORD 792 /* zipfile comment length */ + SHORT; 793 794 /** 795 * Maximum length of the "End of central directory record" with a 796 * file comment. 797 */ 798 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 799 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; 800 801 /** 802 * Offset of the field that holds the location of the first 803 * central directory entry inside the "End of central directory 804 * record" relative to the start of the "End of central directory 805 * record". 806 */ 807 private static final int CFD_LOCATOR_OFFSET = 808 /* end of central dir signature */ WORD 809 /* number of this disk */ + SHORT 810 /* number of the disk with the */ 811 /* start of the central directory */ + SHORT 812 /* total number of entries in */ 813 /* the central dir on this disk */ + SHORT 814 /* total number of entries in */ 815 /* the central dir */ + SHORT 816 /* size of the central directory */ + WORD; 817 818 /** 819 * Length of the "Zip64 end of central directory locator" - which 820 * should be right in front of the "end of central directory 821 * record" if one is present at all. 822 */ 823 private static final int ZIP64_EOCDL_LENGTH = 824 /* zip64 end of central dir locator sig */ WORD 825 /* number of the disk with the start */ 826 /* start of the zip64 end of */ 827 /* central directory */ + WORD 828 /* relative offset of the zip64 */ 829 /* end of central directory record */ + DWORD 830 /* total number of disks */ + WORD; 831 832 /** 833 * Offset of the field that holds the location of the "Zip64 end 834 * of central directory record" inside the "Zip64 end of central 835 * directory locator" relative to the start of the "Zip64 end of 836 * central directory locator". 837 */ 838 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 839 /* zip64 end of central dir locator sig */ WORD 840 /* number of the disk with the start */ 841 /* start of the zip64 end of */ 842 /* central directory */ + WORD; 843 844 /** 845 * Offset of the field that holds the location of the first 846 * central directory entry inside the "Zip64 end of central 847 * directory record" relative to the start of the "Zip64 end of 848 * central directory record". 849 */ 850 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 851 /* zip64 end of central dir */ 852 /* signature */ WORD 853 /* size of zip64 end of central */ 854 /* directory record */ + DWORD 855 /* version made by */ + SHORT 856 /* version needed to extract */ + SHORT 857 /* number of this disk */ + WORD 858 /* number of the disk with the */ 859 /* start of the central directory */ + WORD 860 /* total number of entries in the */ 861 /* central directory on this disk */ + DWORD 862 /* total number of entries in the */ 863 /* central directory */ + DWORD 864 /* size of the central directory */ + DWORD; 865 866 /** 867 * Searches for either the "Zip64 end of central directory 868 * locator" or the "End of central dir record", parses 869 * it and positions the stream at the first central directory 870 * record. 871 */ 872 private void positionAtCentralDirectory() 873 throws IOException { 874 positionAtEndOfCentralDirectoryRecord(); 875 boolean found = false; 876 final boolean searchedForZip64EOCD = 877 archive.position() > ZIP64_EOCDL_LENGTH; 878 if (searchedForZip64EOCD) { 879 archive.position(archive.position() - ZIP64_EOCDL_LENGTH); 880 wordBbuf.rewind(); 881 IOUtils.readFully(archive, wordBbuf); 882 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 883 wordBuf); 884 } 885 if (!found) { 886 // not a ZIP64 archive 887 if (searchedForZip64EOCD) { 888 skipBytes(ZIP64_EOCDL_LENGTH - WORD); 889 } 890 positionAtCentralDirectory32(); 891 } else { 892 positionAtCentralDirectory64(); 893 } 894 } 895 896 /** 897 * Parses the "Zip64 end of central directory locator", 898 * finds the "Zip64 end of central directory record" using the 899 * parsed information, parses that and positions the stream at the 900 * first central directory record. 901 * 902 * Expects stream to be positioned right behind the "Zip64 903 * end of central directory locator"'s signature. 904 */ 905 private void positionAtCentralDirectory64() 906 throws IOException { 907 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 908 - WORD /* signature has already been read */); 909 dwordBbuf.rewind(); 910 IOUtils.readFully(archive, dwordBbuf); 911 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 912 wordBbuf.rewind(); 913 IOUtils.readFully(archive, wordBbuf); 914 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 915 throw new ZipException("archive's ZIP64 end of central " 916 + "directory locator is corrupt."); 917 } 918 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 919 - WORD /* signature has already been read */); 920 dwordBbuf.rewind(); 921 IOUtils.readFully(archive, dwordBbuf); 922 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 923 } 924 925 /** 926 * Parses the "End of central dir record" and positions 927 * the stream at the first central directory record. 928 * 929 * Expects stream to be positioned at the beginning of the 930 * "End of central dir record". 931 */ 932 private void positionAtCentralDirectory32() 933 throws IOException { 934 skipBytes(CFD_LOCATOR_OFFSET); 935 wordBbuf.rewind(); 936 IOUtils.readFully(archive, wordBbuf); 937 archive.position(ZipLong.getValue(wordBuf)); 938 } 939 940 /** 941 * Searches for the and positions the stream at the start of the 942 * "End of central dir record". 943 */ 944 private void positionAtEndOfCentralDirectoryRecord() 945 throws IOException { 946 final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 947 ZipArchiveOutputStream.EOCD_SIG); 948 if (!found) { 949 throw new ZipException("archive is not a ZIP archive"); 950 } 951 } 952 953 /** 954 * Searches the archive backwards from minDistance to maxDistance 955 * for the given signature, positions the RandomaccessFile right 956 * at the signature if it has been found. 957 */ 958 private boolean tryToLocateSignature(final long minDistanceFromEnd, 959 final long maxDistanceFromEnd, 960 final byte[] sig) throws IOException { 961 boolean found = false; 962 long off = archive.size() - minDistanceFromEnd; 963 final long stopSearching = 964 Math.max(0L, archive.size() - maxDistanceFromEnd); 965 if (off >= 0) { 966 for (; off >= stopSearching; off--) { 967 archive.position(off); 968 try { 969 wordBbuf.rewind(); 970 IOUtils.readFully(archive, wordBbuf); 971 wordBbuf.flip(); 972 } catch (EOFException ex) { 973 break; 974 } 975 int curr = wordBbuf.get(); 976 if (curr == sig[POS_0]) { 977 curr = wordBbuf.get(); 978 if (curr == sig[POS_1]) { 979 curr = wordBbuf.get(); 980 if (curr == sig[POS_2]) { 981 curr = wordBbuf.get(); 982 if (curr == sig[POS_3]) { 983 found = true; 984 break; 985 } 986 } 987 } 988 } 989 } 990 } 991 if (found) { 992 archive.position(off); 993 } 994 return found; 995 } 996 997 /** 998 * Skips the given number of bytes or throws an EOFException if 999 * skipping failed. 1000 */ 1001 private void skipBytes(final int count) throws IOException { 1002 long currentPosition = archive.position(); 1003 long newPosition = currentPosition + count; 1004 if (newPosition > archive.size()) { 1005 throw new EOFException(); 1006 } 1007 archive.position(newPosition); 1008 } 1009 1010 /** 1011 * Number of bytes in local file header up to the "length of 1012 * filename" entry. 1013 */ 1014 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 1015 /* local file header signature */ WORD 1016 /* version needed to extract */ + SHORT 1017 /* general purpose bit flag */ + SHORT 1018 /* compression method */ + SHORT 1019 /* last mod file time */ + SHORT 1020 /* last mod file date */ + SHORT 1021 /* crc-32 */ + WORD 1022 /* compressed size */ + WORD 1023 /* uncompressed size */ + (long) WORD; 1024 1025 /** 1026 * Walks through all recorded entries and adds the data available 1027 * from the local file header. 1028 * 1029 * <p>Also records the offsets for the data to read from the 1030 * entries.</p> 1031 */ 1032 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> 1033 entriesWithoutUTF8Flag) 1034 throws IOException { 1035 for (final ZipArchiveEntry zipArchiveEntry : entries) { 1036 // entries is filled in populateFromCentralDirectory and 1037 // never modified 1038 final Entry ze = (Entry) zipArchiveEntry; 1039 final OffsetEntry offsetEntry = ze.getOffsetEntry(); 1040 final long offset = offsetEntry.headerOffset; 1041 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1042 wordBbuf.rewind(); 1043 IOUtils.readFully(archive, wordBbuf); 1044 wordBbuf.flip(); 1045 wordBbuf.get(shortBuf); 1046 final int fileNameLen = ZipShort.getValue(shortBuf); 1047 wordBbuf.get(shortBuf); 1048 final int extraFieldLen = ZipShort.getValue(shortBuf); 1049 skipBytes(fileNameLen); 1050 final byte[] localExtraData = new byte[extraFieldLen]; 1051 IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData)); 1052 ze.setExtra(localExtraData); 1053 offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH 1054 + SHORT + SHORT + fileNameLen + extraFieldLen; 1055 1056 if (entriesWithoutUTF8Flag.containsKey(ze)) { 1057 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 1058 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 1059 nc.comment); 1060 } 1061 1062 final String name = ze.getName(); 1063 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 1064 if (entriesOfThatName == null) { 1065 entriesOfThatName = new LinkedList<>(); 1066 nameMap.put(name, entriesOfThatName); 1067 } 1068 entriesOfThatName.addLast(ze); 1069 } 1070 } 1071 1072 /** 1073 * Checks whether the archive starts with a LFH. If it doesn't, 1074 * it may be an empty archive. 1075 */ 1076 private boolean startsWithLocalFileHeader() throws IOException { 1077 archive.position(0); 1078 wordBbuf.rewind(); 1079 IOUtils.readFully(archive, wordBbuf); 1080 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); 1081 } 1082 1083 /** 1084 * InputStream that delegates requests to the underlying 1085 * SeekableByteChannel, making sure that only bytes from a certain 1086 * range can be read. 1087 */ 1088 private class BoundedInputStream extends InputStream { 1089 private static final int MAX_BUF_LEN = 8192; 1090 private final ByteBuffer buffer; 1091 private long remaining; 1092 private long loc; 1093 private boolean addDummyByte = false; 1094 1095 BoundedInputStream(final long start, final long remaining) { 1096 this.remaining = remaining; 1097 loc = start; 1098 if (remaining < MAX_BUF_LEN && remaining > 0) { 1099 buffer = ByteBuffer.allocate((int) remaining); 1100 } else { 1101 buffer = ByteBuffer.allocate(MAX_BUF_LEN); 1102 } 1103 } 1104 1105 @Override 1106 public int read() throws IOException { 1107 if (remaining-- <= 0) { 1108 if (addDummyByte) { 1109 addDummyByte = false; 1110 return 0; 1111 } 1112 return -1; 1113 } 1114 synchronized (archive) { 1115 archive.position(loc++); 1116 int read = read(1); 1117 if (read < 0) { 1118 return read; 1119 } 1120 return buffer.get() & 0xff; 1121 } 1122 } 1123 1124 @Override 1125 public int read(final byte[] b, final int off, int len) throws IOException { 1126 if (remaining <= 0) { 1127 if (addDummyByte) { 1128 addDummyByte = false; 1129 b[off] = 0; 1130 return 1; 1131 } 1132 return -1; 1133 } 1134 1135 if (len <= 0) { 1136 return 0; 1137 } 1138 1139 if (len > remaining) { 1140 len = (int) remaining; 1141 } 1142 ByteBuffer buf; 1143 int ret = -1; 1144 synchronized (archive) { 1145 archive.position(loc); 1146 if (len <= buffer.capacity()) { 1147 buf = buffer; 1148 ret = read(len); 1149 } else { 1150 buf = ByteBuffer.allocate(len); 1151 ret = archive.read(buf); 1152 buf.flip(); 1153 } 1154 } 1155 if (ret > 0) { 1156 buf.get(b, off, ret); 1157 loc += ret; 1158 remaining -= ret; 1159 } 1160 return ret; 1161 } 1162 1163 private int read(int len) throws IOException { 1164 buffer.rewind().limit(len); 1165 int read = archive.read(buffer); 1166 buffer.flip(); 1167 return read; 1168 } 1169 1170 /** 1171 * Inflater needs an extra dummy byte for nowrap - see 1172 * Inflater's javadocs. 1173 */ 1174 void addDummy() { 1175 addDummyByte = true; 1176 } 1177 } 1178 1179 private static final class NameAndComment { 1180 private final byte[] name; 1181 private final byte[] comment; 1182 private NameAndComment(final byte[] name, final byte[] comment) { 1183 this.name = name; 1184 this.comment = comment; 1185 } 1186 } 1187 1188 /** 1189 * Compares two ZipArchiveEntries based on their offset within the archive. 1190 * 1191 * <p>Won't return any meaningful results if one of the entries 1192 * isn't part of the archive at all.</p> 1193 * 1194 * @since 1.1 1195 */ 1196 private final Comparator<ZipArchiveEntry> offsetComparator = 1197 new Comparator<ZipArchiveEntry>() { 1198 @Override 1199 public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) { 1200 if (e1 == e2) { 1201 return 0; 1202 } 1203 1204 final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null; 1205 final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null; 1206 if (ent1 == null) { 1207 return 1; 1208 } 1209 if (ent2 == null) { 1210 return -1; 1211 } 1212 final long val = (ent1.getOffsetEntry().headerOffset 1213 - ent2.getOffsetEntry().headerOffset); 1214 return val == 0 ? 0 : val < 0 ? -1 : +1; 1215 } 1216 }; 1217 1218 /** 1219 * Extends ZipArchiveEntry to store the offset within the archive. 1220 */ 1221 private static class Entry extends ZipArchiveEntry { 1222 1223 private final OffsetEntry offsetEntry; 1224 1225 Entry(final OffsetEntry offset) { 1226 this.offsetEntry = offset; 1227 } 1228 1229 OffsetEntry getOffsetEntry() { 1230 return offsetEntry; 1231 } 1232 1233 @Override 1234 public int hashCode() { 1235 return 3 * super.hashCode() 1236 + (int) (offsetEntry.headerOffset % Integer.MAX_VALUE); 1237 } 1238 1239 @Override 1240 public boolean equals(final Object other) { 1241 if (super.equals(other)) { 1242 // super.equals would return false if other were not an Entry 1243 final Entry otherEntry = (Entry) other; 1244 return offsetEntry.headerOffset 1245 == otherEntry.offsetEntry.headerOffset 1246 && offsetEntry.dataOffset 1247 == otherEntry.offsetEntry.dataOffset; 1248 } 1249 return false; 1250 } 1251 } 1252}