001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.ByteArrayInputStream; 022import java.io.ByteArrayOutputStream; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.PushbackInputStream; 027import java.nio.ByteBuffer; 028import java.util.zip.CRC32; 029import java.util.zip.DataFormatException; 030import java.util.zip.Inflater; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipException; 033 034import org.apache.commons.compress.archivers.ArchiveEntry; 035import org.apache.commons.compress.archivers.ArchiveInputStream; 036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 037import org.apache.commons.compress.utils.ArchiveUtils; 038import org.apache.commons.compress.utils.IOUtils; 039 040import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 041import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 042import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 043import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 044 045/** 046 * Implements an input stream that can read Zip archives. 047 * 048 * <p>As of Apache Commons Compress it transparently supports Zip64 049 * extensions and thus individual entries and archives larger than 4 050 * GB or with more than 65536 entries.</p> 051 * 052 * <p>The {@link ZipFile} class is preferred when reading from files 053 * as {@link ZipArchiveInputStream} is limited by not being able to 054 * read the central directory header before returning entries. In 055 * particular {@link ZipArchiveInputStream}</p> 056 * 057 * <ul> 058 * 059 * <li>may return entries that are not part of the central directory 060 * at all and shouldn't be considered part of the archive.</li> 061 * 062 * <li>may return several entries with the same name.</li> 063 * 064 * <li>will not return internal or external attributes.</li> 065 * 066 * <li>may return incomplete extra field data.</li> 067 * 068 * <li>may return unknown sizes and CRC values for entries until the 069 * next entry has been reached if the archive uses the data 070 * descriptor feature.</li> 071 * 072 * </ul> 073 * 074 * @see ZipFile 075 * @NotThreadSafe 076 */ 077public class ZipArchiveInputStream extends ArchiveInputStream { 078 079 /** The zip encoding to use for filenames and the file comment. */ 080 private final ZipEncoding zipEncoding; 081 082 // the provided encoding (for unit tests) 083 final String encoding; 084 085 /** Whether to look for and use Unicode extra fields. */ 086 private final boolean useUnicodeExtraFields; 087 088 /** Wrapped stream, will always be a PushbackInputStream. */ 089 private final InputStream in; 090 091 /** Inflater used for all deflated entries. */ 092 private final Inflater inf = new Inflater(true); 093 094 /** Buffer used to read from the wrapped stream. */ 095 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 096 097 /** The entry that is currently being read. */ 098 private CurrentEntry current = null; 099 100 /** Whether the stream has been closed. */ 101 private boolean closed = false; 102 103 /** Whether the stream has reached the central directory - and thus found all entries. */ 104 private boolean hitCentralDirectory = false; 105 106 /** 107 * When reading a stored entry that uses the data descriptor this 108 * stream has to read the full entry and caches it. This is the 109 * cache. 110 */ 111 private ByteArrayInputStream lastStoredEntry = null; 112 113 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 114 private boolean allowStoredEntriesWithDataDescriptor = false; 115 116 private static final int LFH_LEN = 30; 117 /* 118 local file header signature WORD 119 version needed to extract SHORT 120 general purpose bit flag SHORT 121 compression method SHORT 122 last mod file time SHORT 123 last mod file date SHORT 124 crc-32 WORD 125 compressed size WORD 126 uncompressed size WORD 127 file name length SHORT 128 extra field length SHORT 129 */ 130 131 private static final int CFH_LEN = 46; 132 /* 133 central file header signature WORD 134 version made by SHORT 135 version needed to extract SHORT 136 general purpose bit flag SHORT 137 compression method SHORT 138 last mod file time SHORT 139 last mod file date SHORT 140 crc-32 WORD 141 compressed size WORD 142 uncompressed size WORD 143 file name length SHORT 144 extra field length SHORT 145 file comment length SHORT 146 disk number start SHORT 147 internal file attributes SHORT 148 external file attributes WORD 149 relative offset of local header WORD 150 */ 151 152 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 153 154 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 155 private final byte[] lfhBuf = new byte[LFH_LEN]; 156 private final byte[] skipBuf = new byte[1024]; 157 private final byte[] shortBuf = new byte[SHORT]; 158 private final byte[] wordBuf = new byte[WORD]; 159 private final byte[] twoDwordBuf = new byte[2 * DWORD]; 160 161 private int entriesRead = 0; 162 163 /** 164 * Create an instance using UTF-8 encoding 165 * @param inputStream the stream to wrap 166 */ 167 public ZipArchiveInputStream(final InputStream inputStream) { 168 this(inputStream, ZipEncodingHelper.UTF8); 169 } 170 171 /** 172 * Create an instance using the specified encoding 173 * @param inputStream the stream to wrap 174 * @param encoding the encoding to use for file names, use null 175 * for the platform's default encoding 176 * @since 1.5 177 */ 178 public ZipArchiveInputStream(final InputStream inputStream, final String encoding) { 179 this(inputStream, encoding, true); 180 } 181 182 /** 183 * Create an instance using the specified encoding 184 * @param inputStream the stream to wrap 185 * @param encoding the encoding to use for file names, use null 186 * for the platform's default encoding 187 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 188 * Extra Fields (if present) to set the file names. 189 */ 190 public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) { 191 this(inputStream, encoding, useUnicodeExtraFields, false); 192 } 193 194 /** 195 * Create an instance using the specified encoding 196 * @param inputStream the stream to wrap 197 * @param encoding the encoding to use for file names, use null 198 * for the platform's default encoding 199 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 200 * Extra Fields (if present) to set the file names. 201 * @param allowStoredEntriesWithDataDescriptor whether the stream 202 * will try to read STORED entries that use a data descriptor 203 * @since 1.1 204 */ 205 public ZipArchiveInputStream(final InputStream inputStream, 206 final String encoding, 207 final boolean useUnicodeExtraFields, 208 final boolean allowStoredEntriesWithDataDescriptor) { 209 this.encoding = encoding; 210 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 211 this.useUnicodeExtraFields = useUnicodeExtraFields; 212 in = new PushbackInputStream(inputStream, buf.capacity()); 213 this.allowStoredEntriesWithDataDescriptor = 214 allowStoredEntriesWithDataDescriptor; 215 // haven't read anything so far 216 buf.limit(0); 217 } 218 219 public ZipArchiveEntry getNextZipEntry() throws IOException { 220 boolean firstEntry = true; 221 if (closed || hitCentralDirectory) { 222 return null; 223 } 224 if (current != null) { 225 closeEntry(); 226 firstEntry = false; 227 } 228 229 try { 230 if (firstEntry) { 231 // split archives have a special signature before the 232 // first local file header - look for it and fail with 233 // the appropriate error message if this is a split 234 // archive. 235 readFirstLocalFileHeader(lfhBuf); 236 } else { 237 readFully(lfhBuf); 238 } 239 } catch (final EOFException e) { 240 return null; 241 } 242 243 final ZipLong sig = new ZipLong(lfhBuf); 244 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) { 245 hitCentralDirectory = true; 246 skipRemainderOfArchive(); 247 return null; 248 } 249 if (!sig.equals(ZipLong.LFH_SIG)) { 250 throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue())); 251 } 252 253 int off = WORD; 254 current = new CurrentEntry(); 255 256 final int versionMadeBy = ZipShort.getValue(lfhBuf, off); 257 off += SHORT; 258 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 259 260 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off); 261 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 262 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 263 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 264 current.entry.setGeneralPurposeBit(gpFlag); 265 266 off += SHORT; 267 268 current.entry.setMethod(ZipShort.getValue(lfhBuf, off)); 269 off += SHORT; 270 271 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off)); 272 current.entry.setTime(time); 273 off += WORD; 274 275 ZipLong size = null, cSize = null; 276 if (!current.hasDataDescriptor) { 277 current.entry.setCrc(ZipLong.getValue(lfhBuf, off)); 278 off += WORD; 279 280 cSize = new ZipLong(lfhBuf, off); 281 off += WORD; 282 283 size = new ZipLong(lfhBuf, off); 284 off += WORD; 285 } else { 286 off += 3 * WORD; 287 } 288 289 final int fileNameLen = ZipShort.getValue(lfhBuf, off); 290 291 off += SHORT; 292 293 final int extraLen = ZipShort.getValue(lfhBuf, off); 294 off += SHORT; 295 296 final byte[] fileName = new byte[fileNameLen]; 297 readFully(fileName); 298 current.entry.setName(entryEncoding.decode(fileName), fileName); 299 300 final byte[] extraData = new byte[extraLen]; 301 readFully(extraData); 302 current.entry.setExtra(extraData); 303 304 if (!hasUTF8Flag && useUnicodeExtraFields) { 305 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 306 } 307 308 processZip64Extra(size, cSize); 309 310 if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) { 311 if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { 312 current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize())); 313 } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 314 current.in = new ExplodingInputStream( 315 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 316 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 317 new BoundedInputStream(in, current.entry.getCompressedSize())); 318 } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 319 current.in = new BZip2CompressorInputStream(new BoundedInputStream(in, current.entry.getCompressedSize())); 320 } 321 } 322 323 entriesRead++; 324 return current.entry; 325 } 326 327 /** 328 * Fills the given array with the first local file header and 329 * deals with splitting/spanning markers that may prefix the first 330 * LFH. 331 */ 332 private void readFirstLocalFileHeader(final byte[] lfh) throws IOException { 333 readFully(lfh); 334 final ZipLong sig = new ZipLong(lfh); 335 if (sig.equals(ZipLong.DD_SIG)) { 336 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 337 } 338 339 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 340 // The archive is not really split as only one segment was 341 // needed in the end. Just skip over the marker. 342 final byte[] missedLfhBytes = new byte[4]; 343 readFully(missedLfhBytes); 344 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 345 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 346 } 347 } 348 349 /** 350 * Records whether a Zip64 extra is present and sets the size 351 * information from it if sizes are 0xFFFFFFFF and the entry 352 * doesn't use a data descriptor. 353 */ 354 private void processZip64Extra(final ZipLong size, final ZipLong cSize) { 355 final Zip64ExtendedInformationExtraField z64 = 356 (Zip64ExtendedInformationExtraField) 357 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 358 current.usesZip64 = z64 != null; 359 if (!current.hasDataDescriptor) { 360 if (z64 != null // same as current.usesZip64 but avoids NPE warning 361 && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) { 362 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 363 current.entry.setSize(z64.getSize().getLongValue()); 364 } else { 365 current.entry.setCompressedSize(cSize.getValue()); 366 current.entry.setSize(size.getValue()); 367 } 368 } 369 } 370 371 @Override 372 public ArchiveEntry getNextEntry() throws IOException { 373 return getNextZipEntry(); 374 } 375 376 /** 377 * Whether this class is able to read the given entry. 378 * 379 * <p>May return false if it is set up to use encryption or a 380 * compression method that hasn't been implemented yet.</p> 381 * @since 1.1 382 */ 383 @Override 384 public boolean canReadEntryData(final ArchiveEntry ae) { 385 if (ae instanceof ZipArchiveEntry) { 386 final ZipArchiveEntry ze = (ZipArchiveEntry) ae; 387 return ZipUtil.canHandleEntryData(ze) 388 && supportsDataDescriptorFor(ze); 389 390 } 391 return false; 392 } 393 394 @Override 395 public int read(final byte[] buffer, final int offset, final int length) throws IOException { 396 if (closed) { 397 throw new IOException("The stream is closed"); 398 } 399 400 if (current == null) { 401 return -1; 402 } 403 404 // avoid int overflow, check null buffer 405 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 406 throw new ArrayIndexOutOfBoundsException(); 407 } 408 409 ZipUtil.checkRequestedFeatures(current.entry); 410 if (!supportsDataDescriptorFor(current.entry)) { 411 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 412 current.entry); 413 } 414 415 int read; 416 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 417 read = readStored(buffer, offset, length); 418 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 419 read = readDeflated(buffer, offset, length); 420 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 421 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() 422 || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 423 read = current.in.read(buffer, offset, length); 424 } else { 425 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 426 current.entry); 427 } 428 429 if (read >= 0) { 430 current.crc.update(buffer, offset, read); 431 } 432 433 return read; 434 } 435 436 /** 437 * Implementation of read for STORED entries. 438 */ 439 private int readStored(final byte[] buffer, final int offset, final int length) throws IOException { 440 441 if (current.hasDataDescriptor) { 442 if (lastStoredEntry == null) { 443 readStoredEntry(); 444 } 445 return lastStoredEntry.read(buffer, offset, length); 446 } 447 448 final long csize = current.entry.getSize(); 449 if (current.bytesRead >= csize) { 450 return -1; 451 } 452 453 if (buf.position() >= buf.limit()) { 454 buf.position(0); 455 final int l = in.read(buf.array()); 456 if (l == -1) { 457 return -1; 458 } 459 buf.limit(l); 460 461 count(l); 462 current.bytesReadFromStream += l; 463 } 464 465 int toRead = Math.min(buf.remaining(), length); 466 if ((csize - current.bytesRead) < toRead) { 467 // if it is smaller than toRead then it fits into an int 468 toRead = (int) (csize - current.bytesRead); 469 } 470 buf.get(buffer, offset, toRead); 471 current.bytesRead += toRead; 472 return toRead; 473 } 474 475 /** 476 * Implementation of read for DEFLATED entries. 477 */ 478 private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException { 479 final int read = readFromInflater(buffer, offset, length); 480 if (read <= 0) { 481 if (inf.finished()) { 482 return -1; 483 } else if (inf.needsDictionary()) { 484 throw new ZipException("This archive needs a preset dictionary" 485 + " which is not supported by Commons" 486 + " Compress."); 487 } else if (read == -1) { 488 throw new IOException("Truncated ZIP file"); 489 } 490 } 491 return read; 492 } 493 494 /** 495 * Potentially reads more bytes to fill the inflater's buffer and 496 * reads from it. 497 */ 498 private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException { 499 int read = 0; 500 do { 501 if (inf.needsInput()) { 502 final int l = fill(); 503 if (l > 0) { 504 current.bytesReadFromStream += buf.limit(); 505 } else if (l == -1) { 506 return -1; 507 } else { 508 break; 509 } 510 } 511 try { 512 read = inf.inflate(buffer, offset, length); 513 } catch (final DataFormatException e) { 514 throw (IOException) new ZipException(e.getMessage()).initCause(e); 515 } 516 } while (read == 0 && inf.needsInput()); 517 return read; 518 } 519 520 @Override 521 public void close() throws IOException { 522 if (!closed) { 523 closed = true; 524 try { 525 in.close(); 526 } finally { 527 inf.end(); 528 } 529 } 530 } 531 532 /** 533 * Skips over and discards value bytes of data from this input 534 * stream. 535 * 536 * <p>This implementation may end up skipping over some smaller 537 * number of bytes, possibly 0, if and only if it reaches the end 538 * of the underlying stream.</p> 539 * 540 * <p>The actual number of bytes skipped is returned.</p> 541 * 542 * @param value the number of bytes to be skipped. 543 * @return the actual number of bytes skipped. 544 * @throws IOException - if an I/O error occurs. 545 * @throws IllegalArgumentException - if value is negative. 546 */ 547 @Override 548 public long skip(final long value) throws IOException { 549 if (value >= 0) { 550 long skipped = 0; 551 while (skipped < value) { 552 final long rem = value - skipped; 553 final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 554 if (x == -1) { 555 return skipped; 556 } 557 skipped += x; 558 } 559 return skipped; 560 } 561 throw new IllegalArgumentException(); 562 } 563 564 /** 565 * Checks if the signature matches what is expected for a zip file. 566 * Does not currently handle self-extracting zips which may have arbitrary 567 * leading content. 568 * 569 * @param signature the bytes to check 570 * @param length the number of bytes to check 571 * @return true, if this stream is a zip archive stream, false otherwise 572 */ 573 public static boolean matches(final byte[] signature, final int length) { 574 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 575 return false; 576 } 577 578 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 579 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 580 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 581 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 582 } 583 584 private static boolean checksig(final byte[] signature, final byte[] expected) { 585 for (int i = 0; i < expected.length; i++) { 586 if (signature[i] != expected[i]) { 587 return false; 588 } 589 } 590 return true; 591 } 592 593 /** 594 * Closes the current ZIP archive entry and positions the underlying 595 * stream to the beginning of the next entry. All per-entry variables 596 * and data structures are cleared. 597 * <p> 598 * If the compressed size of this entry is included in the entry header, 599 * then any outstanding bytes are simply skipped from the underlying 600 * stream without uncompressing them. This allows an entry to be safely 601 * closed even if the compression method is unsupported. 602 * <p> 603 * In case we don't know the compressed size of this entry or have 604 * already buffered too much data from the underlying stream to support 605 * uncompression, then the uncompression process is completed and the 606 * end position of the stream is adjusted based on the result of that 607 * process. 608 * 609 * @throws IOException if an error occurs 610 */ 611 private void closeEntry() throws IOException { 612 if (closed) { 613 throw new IOException("The stream is closed"); 614 } 615 if (current == null) { 616 return; 617 } 618 619 // Ensure all entry bytes are read 620 if (currentEntryHasOutstandingBytes()) { 621 drainCurrentEntryData(); 622 } else { 623 // this is guaranteed to exhaust the stream 624 skip(Long.MAX_VALUE); //NOSONAR 625 626 final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 627 ? getBytesInflated() : current.bytesRead; 628 629 // this is at most a single read() operation and can't 630 // exceed the range of int 631 final int diff = (int) (current.bytesReadFromStream - inB); 632 633 // Pushback any required bytes 634 if (diff > 0) { 635 pushback(buf.array(), buf.limit() - diff, diff); 636 current.bytesReadFromStream -= diff; 637 } 638 639 // Drain remainder of entry if not all data bytes were required 640 if (currentEntryHasOutstandingBytes()) { 641 drainCurrentEntryData(); 642 } 643 } 644 645 if (lastStoredEntry == null && current.hasDataDescriptor) { 646 readDataDescriptor(); 647 } 648 649 inf.reset(); 650 buf.clear().flip(); 651 current = null; 652 lastStoredEntry = null; 653 } 654 655 /** 656 * If the compressed size of the current entry is included in the entry header 657 * and there are any outstanding bytes in the underlying stream, then 658 * this returns true. 659 * 660 * @return true, if current entry is determined to have outstanding bytes, false otherwise 661 */ 662 private boolean currentEntryHasOutstandingBytes() { 663 return current.bytesReadFromStream <= current.entry.getCompressedSize() 664 && !current.hasDataDescriptor; 665 } 666 667 /** 668 * Read all data of the current entry from the underlying stream 669 * that hasn't been read, yet. 670 */ 671 private void drainCurrentEntryData() throws IOException { 672 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 673 while (remaining > 0) { 674 final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 675 if (n < 0) { 676 throw new EOFException("Truncated ZIP entry: " 677 + ArchiveUtils.sanitize(current.entry.getName())); 678 } 679 count(n); 680 remaining -= n; 681 } 682 } 683 684 /** 685 * Get the number of bytes Inflater has actually processed. 686 * 687 * <p>for Java < Java7 the getBytes* methods in 688 * Inflater/Deflater seem to return unsigned ints rather than 689 * longs that start over with 0 at 2^32.</p> 690 * 691 * <p>The stream knows how many bytes it has read, but not how 692 * many the Inflater actually consumed - it should be between the 693 * total number of bytes read for the entry and the total number 694 * minus the last read operation. Here we just try to make the 695 * value close enough to the bytes we've read by assuming the 696 * number of bytes consumed must be smaller than (or equal to) the 697 * number of bytes read but not smaller by more than 2^32.</p> 698 */ 699 private long getBytesInflated() { 700 long inB = inf.getBytesRead(); 701 if (current.bytesReadFromStream >= TWO_EXP_32) { 702 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 703 inB += TWO_EXP_32; 704 } 705 } 706 return inB; 707 } 708 709 private int fill() throws IOException { 710 if (closed) { 711 throw new IOException("The stream is closed"); 712 } 713 final int length = in.read(buf.array()); 714 if (length > 0) { 715 buf.limit(length); 716 count(buf.limit()); 717 inf.setInput(buf.array(), 0, buf.limit()); 718 } 719 return length; 720 } 721 722 private void readFully(final byte[] b) throws IOException { 723 final int count = IOUtils.readFully(in, b); 724 count(count); 725 if (count < b.length) { 726 throw new EOFException(); 727 } 728 } 729 730 private void readDataDescriptor() throws IOException { 731 readFully(wordBuf); 732 ZipLong val = new ZipLong(wordBuf); 733 if (ZipLong.DD_SIG.equals(val)) { 734 // data descriptor with signature, skip sig 735 readFully(wordBuf); 736 val = new ZipLong(wordBuf); 737 } 738 current.entry.setCrc(val.getValue()); 739 740 // if there is a ZIP64 extra field, sizes are eight bytes 741 // each, otherwise four bytes each. Unfortunately some 742 // implementations - namely Java7 - use eight bytes without 743 // using a ZIP64 extra field - 744 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 745 746 // just read 16 bytes and check whether bytes nine to twelve 747 // look like one of the signatures of what could follow a data 748 // descriptor (ignoring archive decryption headers for now). 749 // If so, push back eight bytes and assume sizes are four 750 // bytes, otherwise sizes are eight bytes each. 751 readFully(twoDwordBuf); 752 final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD); 753 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 754 pushback(twoDwordBuf, DWORD, DWORD); 755 current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf)); 756 current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD)); 757 } else { 758 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf)); 759 current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD)); 760 } 761 } 762 763 /** 764 * Whether this entry requires a data descriptor this library can work with. 765 * 766 * @return true if allowStoredEntriesWithDataDescriptor is true, 767 * the entry doesn't require any data descriptor or the method is 768 * DEFLATED. 769 */ 770 private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { 771 return !entry.getGeneralPurposeBit().usesDataDescriptor() 772 773 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 774 || entry.getMethod() == ZipEntry.DEFLATED; 775 } 776 777 /** 778 * Caches a stored entry that uses the data descriptor. 779 * 780 * <ul> 781 * <li>Reads a stored entry until the signature of a local file 782 * header, central directory header or data descriptor has been 783 * found.</li> 784 * <li>Stores all entry data in lastStoredEntry.</p> 785 * <li>Rewinds the stream to position at the data 786 * descriptor.</li> 787 * <li>reads the data descriptor</li> 788 * </ul> 789 * 790 * <p>After calling this method the entry should know its size, 791 * the entry's data is cached and the stream is positioned at the 792 * next local file or central directory header.</p> 793 */ 794 private void readStoredEntry() throws IOException { 795 final ByteArrayOutputStream bos = new ByteArrayOutputStream(); 796 int off = 0; 797 boolean done = false; 798 799 // length of DD without signature 800 final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 801 802 while (!done) { 803 final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 804 if (r <= 0) { 805 // read the whole archive without ever finding a 806 // central directory 807 throw new IOException("Truncated ZIP file"); 808 } 809 if (r + off < 4) { 810 // buffer too small to check for a signature, loop 811 off += r; 812 continue; 813 } 814 815 done = bufferContainsSignature(bos, off, r, ddLen); 816 if (!done) { 817 off = cacheBytesRead(bos, off, r, ddLen); 818 } 819 } 820 821 final byte[] b = bos.toByteArray(); 822 lastStoredEntry = new ByteArrayInputStream(b); 823 } 824 825 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 826 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 827 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 828 829 /** 830 * Checks whether the current buffer contains the signature of a 831 * "data descriptor", "local file header" or 832 * "central directory entry". 833 * 834 * <p>If it contains such a signature, reads the data descriptor 835 * and positions the stream right after the data descriptor.</p> 836 */ 837 private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen) 838 throws IOException { 839 840 boolean done = false; 841 int readTooMuch = 0; 842 for (int i = 0; !done && i < lastRead - 4; i++) { 843 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 844 if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 845 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 846 // found a LFH or CFH: 847 readTooMuch = offset + lastRead - i - expectedDDLen; 848 done = true; 849 } 850 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 851 // found DD: 852 readTooMuch = offset + lastRead - i; 853 done = true; 854 } 855 if (done) { 856 // * push back bytes read in excess as well as the data 857 // descriptor 858 // * copy the remaining bytes to cache 859 // * read data descriptor 860 pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch); 861 bos.write(buf.array(), 0, i); 862 readDataDescriptor(); 863 } 864 } 865 } 866 return done; 867 } 868 869 /** 870 * If the last read bytes could hold a data descriptor and an 871 * incomplete signature then save the last bytes to the front of 872 * the buffer and cache everything in front of the potential data 873 * descriptor into the given ByteArrayOutputStream. 874 * 875 * <p>Data descriptor plus incomplete signature (3 bytes in the 876 * worst case) can be 20 bytes max.</p> 877 */ 878 private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) { 879 final int cacheable = offset + lastRead - expecteDDLen - 3; 880 if (cacheable > 0) { 881 bos.write(buf.array(), 0, cacheable); 882 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 883 offset = expecteDDLen + 3; 884 } else { 885 offset += lastRead; 886 } 887 return offset; 888 } 889 890 private void pushback(final byte[] buf, final int offset, final int length) throws IOException { 891 ((PushbackInputStream) in).unread(buf, offset, length); 892 pushedBackBytes(length); 893 } 894 895 // End of Central Directory Record 896 // end of central dir signature WORD 897 // number of this disk SHORT 898 // number of the disk with the 899 // start of the central directory SHORT 900 // total number of entries in the 901 // central directory on this disk SHORT 902 // total number of entries in 903 // the central directory SHORT 904 // size of the central directory WORD 905 // offset of start of central 906 // directory with respect to 907 // the starting disk number WORD 908 // .ZIP file comment length SHORT 909 // .ZIP file comment up to 64KB 910 // 911 912 /** 913 * Reads the stream until it find the "End of central directory 914 * record" and consumes it as well. 915 */ 916 private void skipRemainderOfArchive() throws IOException { 917 // skip over central directory. One LFH has been read too much 918 // already. The calculation discounts file names and extra 919 // data so it will be too short. 920 realSkip((long) entriesRead * CFH_LEN - LFH_LEN); 921 findEocdRecord(); 922 realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 923 readFully(shortBuf); 924 // file comment 925 realSkip(ZipShort.getValue(shortBuf)); 926 } 927 928 /** 929 * Reads forward until the signature of the "End of central 930 * directory" record is found. 931 */ 932 private void findEocdRecord() throws IOException { 933 int currentByte = -1; 934 boolean skipReadCall = false; 935 while (skipReadCall || (currentByte = readOneByte()) > -1) { 936 skipReadCall = false; 937 if (!isFirstByteOfEocdSig(currentByte)) { 938 continue; 939 } 940 currentByte = readOneByte(); 941 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 942 if (currentByte == -1) { 943 break; 944 } 945 skipReadCall = isFirstByteOfEocdSig(currentByte); 946 continue; 947 } 948 currentByte = readOneByte(); 949 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 950 if (currentByte == -1) { 951 break; 952 } 953 skipReadCall = isFirstByteOfEocdSig(currentByte); 954 continue; 955 } 956 currentByte = readOneByte(); 957 if (currentByte == -1 958 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 959 break; 960 } 961 skipReadCall = isFirstByteOfEocdSig(currentByte); 962 } 963 } 964 965 /** 966 * Skips bytes by reading from the underlying stream rather than 967 * the (potentially inflating) archive stream - which {@link 968 * #skip} would do. 969 * 970 * Also updates bytes-read counter. 971 */ 972 private void realSkip(final long value) throws IOException { 973 if (value >= 0) { 974 long skipped = 0; 975 while (skipped < value) { 976 final long rem = value - skipped; 977 final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 978 if (x == -1) { 979 return; 980 } 981 count(x); 982 skipped += x; 983 } 984 return; 985 } 986 throw new IllegalArgumentException(); 987 } 988 989 /** 990 * Reads bytes by reading from the underlying stream rather than 991 * the (potentially inflating) archive stream - which {@link #read} would do. 992 * 993 * Also updates bytes-read counter. 994 */ 995 private int readOneByte() throws IOException { 996 final int b = in.read(); 997 if (b != -1) { 998 count(1); 999 } 1000 return b; 1001 } 1002 1003 private boolean isFirstByteOfEocdSig(final int b) { 1004 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 1005 } 1006 1007 /** 1008 * Structure collecting information for the entry that is 1009 * currently being read. 1010 */ 1011 private static final class CurrentEntry { 1012 1013 /** 1014 * Current ZIP entry. 1015 */ 1016 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 1017 1018 /** 1019 * Does the entry use a data descriptor? 1020 */ 1021 private boolean hasDataDescriptor; 1022 1023 /** 1024 * Does the entry have a ZIP64 extended information extra field. 1025 */ 1026 private boolean usesZip64; 1027 1028 /** 1029 * Number of bytes of entry content read by the client if the 1030 * entry is STORED. 1031 */ 1032 private long bytesRead; 1033 1034 /** 1035 * Number of bytes of entry content read so from the stream. 1036 * 1037 * <p>This may be more than the actual entry's length as some 1038 * stuff gets buffered up and needs to be pushed back when the 1039 * end of the entry has been reached.</p> 1040 */ 1041 private long bytesReadFromStream; 1042 1043 /** 1044 * The checksum calculated as the current entry is read. 1045 */ 1046 private final CRC32 crc = new CRC32(); 1047 1048 /** 1049 * The input stream decompressing the data for shrunk and imploded entries. 1050 */ 1051 private InputStream in; 1052 } 1053 1054 /** 1055 * Bounded input stream adapted from commons-io 1056 */ 1057 private class BoundedInputStream extends InputStream { 1058 1059 /** the wrapped input stream */ 1060 private final InputStream in; 1061 1062 /** the max length to provide */ 1063 private final long max; 1064 1065 /** the number of bytes already returned */ 1066 private long pos = 0; 1067 1068 /** 1069 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1070 * stream and limits it to a certain size. 1071 * 1072 * @param in The wrapped input stream 1073 * @param size The maximum number of bytes to return 1074 */ 1075 public BoundedInputStream(final InputStream in, final long size) { 1076 this.max = size; 1077 this.in = in; 1078 } 1079 1080 @Override 1081 public int read() throws IOException { 1082 if (max >= 0 && pos >= max) { 1083 return -1; 1084 } 1085 final int result = in.read(); 1086 pos++; 1087 count(1); 1088 current.bytesReadFromStream++; 1089 return result; 1090 } 1091 1092 @Override 1093 public int read(final byte[] b) throws IOException { 1094 return this.read(b, 0, b.length); 1095 } 1096 1097 @Override 1098 public int read(final byte[] b, final int off, final int len) throws IOException { 1099 if (max >= 0 && pos >= max) { 1100 return -1; 1101 } 1102 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1103 final int bytesRead = in.read(b, off, (int) maxRead); 1104 1105 if (bytesRead == -1) { 1106 return -1; 1107 } 1108 1109 pos += bytesRead; 1110 count(bytesRead); 1111 current.bytesReadFromStream += bytesRead; 1112 return bytesRead; 1113 } 1114 1115 @Override 1116 public long skip(final long n) throws IOException { 1117 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1118 final long skippedBytes = in.skip(toSkip); 1119 pos += skippedBytes; 1120 return skippedBytes; 1121 } 1122 1123 @Override 1124 public int available() throws IOException { 1125 if (max >= 0 && pos >= max) { 1126 return 0; 1127 } 1128 return in.available(); 1129 } 1130 } 1131}