001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.sevenz; 019 020import java.io.BufferedInputStream; 021import java.io.ByteArrayInputStream; 022import java.io.Closeable; 023import java.io.DataInputStream; 024import java.io.File; 025import java.io.IOException; 026import java.io.InputStream; 027import java.nio.ByteBuffer; 028import java.nio.ByteOrder; 029import java.nio.channels.SeekableByteChannel; 030import java.nio.file.Files; 031import java.nio.file.StandardOpenOption; 032import java.util.ArrayList; 033import java.util.Arrays; 034import java.util.BitSet; 035import java.util.EnumSet; 036import java.util.LinkedList; 037import java.util.zip.CRC32; 038 039import org.apache.commons.compress.utils.BoundedInputStream; 040import org.apache.commons.compress.utils.CRC32VerifyingInputStream; 041import org.apache.commons.compress.utils.CharsetNames; 042import org.apache.commons.compress.utils.IOUtils; 043 044/** 045 * Reads a 7z file, using SeekableByteChannel under 046 * the covers. 047 * <p> 048 * The 7z file format is a flexible container 049 * that can contain many compression and 050 * encryption types, but at the moment only 051 * only Copy, LZMA, LZMA2, BZIP2, Deflate and AES-256 + SHA-256 052 * are supported. 053 * <p> 054 * The format is very Windows/Intel specific, 055 * so it uses little-endian byte order, 056 * doesn't store user/group or permission bits, 057 * and represents times using NTFS timestamps 058 * (100 nanosecond units since 1 January 1601). 059 * Hence the official tools recommend against 060 * using it for backup purposes on *nix, and 061 * recommend .tar.7z or .tar.lzma or .tar.xz 062 * instead. 063 * <p> 064 * Both the header and file contents may be 065 * compressed and/or encrypted. With both 066 * encrypted, neither file names nor file 067 * contents can be read, but the use of 068 * encryption isn't plausibly deniable. 069 * 070 * @NotThreadSafe 071 * @since 1.6 072 */ 073public class SevenZFile implements Closeable { 074 static final int SIGNATURE_HEADER_SIZE = 32; 075 076 private final String fileName; 077 private SeekableByteChannel channel; 078 private final Archive archive; 079 private int currentEntryIndex = -1; 080 private int currentFolderIndex = -1; 081 private InputStream currentFolderInputStream = null; 082 private byte[] password; 083 084 private final ArrayList<InputStream> deferredBlockStreams = new ArrayList<>(); 085 086 // shared with SevenZOutputFile and tests, neither mutates it 087 static final byte[] sevenZSignature = { //NOSONAR 088 (byte)'7', (byte)'z', (byte)0xBC, (byte)0xAF, (byte)0x27, (byte)0x1C 089 }; 090 091 /** 092 * Reads a file as 7z archive 093 * 094 * @param filename the file to read 095 * @param password optional password if the archive is encrypted - 096 * the byte array is supposed to be the UTF16-LE encoded 097 * representation of the password. 098 * @throws IOException if reading the archive fails 099 */ 100 public SevenZFile(final File filename, final byte[] password) throws IOException { 101 this(Files.newByteChannel(filename.toPath(), EnumSet.of(StandardOpenOption.READ)), 102 filename.getAbsolutePath(), password, true); 103 } 104 105 /** 106 * Reads a SeekableByteChannel as 7z archive 107 * 108 * <p>{@link 109 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 110 * allows you to read from an in-memory archive.</p> 111 * 112 * @param channel the channel to read 113 * @throws IOException if reading the archive fails 114 * @since 1.13 115 */ 116 public SevenZFile(final SeekableByteChannel channel) throws IOException { 117 this(channel, "unknown archive", null); 118 } 119 120 /** 121 * Reads a SeekableByteChannel as 7z archive 122 * 123 * <p>{@link 124 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 125 * allows you to read from an in-memory archive.</p> 126 * 127 * @param channel the channel to read 128 * @param password optional password if the archive is encrypted - 129 * the byte array is supposed to be the UTF16-LE encoded 130 * representation of the password. 131 * @throws IOException if reading the archive fails 132 * @since 1.13 133 */ 134 public SevenZFile(final SeekableByteChannel channel, 135 final byte[] password) throws IOException { 136 this(channel, "unknown archive", password); 137 } 138 139 /** 140 * Reads a SeekableByteChannel as 7z archive 141 * 142 * <p>{@link 143 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 144 * allows you to read from an in-memory archive.</p> 145 * 146 * @param channel the channel to read 147 * @param filename name of the archive - only used for error reporting 148 * @param password optional password if the archive is encrypted - 149 * the byte array is supposed to be the UTF16-LE encoded 150 * representation of the password. 151 * @throws IOException if reading the archive fails 152 * @since 1.13 153 */ 154 public SevenZFile(final SeekableByteChannel channel, String filename, 155 final byte[] password) throws IOException { 156 this(channel, filename, password, false); 157 } 158 159 private SevenZFile(final SeekableByteChannel channel, String filename, 160 final byte[] password, boolean closeOnError) throws IOException { 161 boolean succeeded = false; 162 this.channel = channel; 163 this.fileName = filename; 164 try { 165 archive = readHeaders(password); 166 if (password != null) { 167 this.password = new byte[password.length]; 168 System.arraycopy(password, 0, this.password, 0, password.length); 169 } else { 170 this.password = null; 171 } 172 succeeded = true; 173 } finally { 174 if (!succeeded && closeOnError) { 175 this.channel.close(); 176 } 177 } 178 } 179 180 /** 181 * Reads a file as unencrypted 7z archive 182 * 183 * @param filename the file to read 184 * @throws IOException if reading the archive fails 185 */ 186 public SevenZFile(final File filename) throws IOException { 187 this(filename, null); 188 } 189 190 /** 191 * Closes the archive. 192 * @throws IOException if closing the file fails 193 */ 194 @Override 195 public void close() throws IOException { 196 if (channel != null) { 197 try { 198 channel.close(); 199 } finally { 200 channel = null; 201 if (password != null) { 202 Arrays.fill(password, (byte) 0); 203 } 204 password = null; 205 } 206 } 207 } 208 209 /** 210 * Returns the next Archive Entry in this archive. 211 * 212 * @return the next entry, 213 * or {@code null} if there are no more entries 214 * @throws IOException if the next entry could not be read 215 */ 216 public SevenZArchiveEntry getNextEntry() throws IOException { 217 if (currentEntryIndex >= archive.files.length - 1) { 218 return null; 219 } 220 ++currentEntryIndex; 221 final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; 222 buildDecodingStream(); 223 return entry; 224 } 225 226 /** 227 * Returns meta-data of all archive entries. 228 * 229 * <p>This method only provides meta-data, the entries can not be 230 * used to read the contents, you still need to process all 231 * entries in order using {@link #getNextEntry} for that.</p> 232 * 233 * <p>The content methods are only available for entries that have 234 * already been reached via {@link #getNextEntry}.</p> 235 * 236 * @return meta-data of all archive entries. 237 * @since 1.11 238 */ 239 public Iterable<SevenZArchiveEntry> getEntries() { 240 return Arrays.asList(archive.files); 241 } 242 243 private Archive readHeaders(final byte[] password) throws IOException { 244 ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */) 245 .order(ByteOrder.LITTLE_ENDIAN); 246 readFully(buf); 247 final byte[] signature = new byte[6]; 248 buf.get(signature); 249 if (!Arrays.equals(signature, sevenZSignature)) { 250 throw new IOException("Bad 7z signature"); 251 } 252 // 7zFormat.txt has it wrong - it's first major then minor 253 final byte archiveVersionMajor = buf.get(); 254 final byte archiveVersionMinor = buf.get(); 255 if (archiveVersionMajor != 0) { 256 throw new IOException(String.format("Unsupported 7z version (%d,%d)", 257 archiveVersionMajor, archiveVersionMinor)); 258 } 259 260 final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); 261 final StartHeader startHeader = readStartHeader(startHeaderCrc); 262 263 final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; 264 if (nextHeaderSizeInt != startHeader.nextHeaderSize) { 265 throw new IOException("cannot handle nextHeaderSize " + startHeader.nextHeaderSize); 266 } 267 channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); 268 buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); 269 readFully(buf); 270 final CRC32 crc = new CRC32(); 271 crc.update(buf.array()); 272 if (startHeader.nextHeaderCrc != crc.getValue()) { 273 throw new IOException("NextHeader CRC mismatch"); 274 } 275 276 Archive archive = new Archive(); 277 int nid = getUnsignedByte(buf); 278 if (nid == NID.kEncodedHeader) { 279 buf = readEncodedHeader(buf, archive, password); 280 // Archive gets rebuilt with the new header 281 archive = new Archive(); 282 nid = getUnsignedByte(buf); 283 } 284 if (nid == NID.kHeader) { 285 readHeader(buf, archive); 286 } else { 287 throw new IOException("Broken or unsupported archive: no Header"); 288 } 289 return archive; 290 } 291 292 private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { 293 final StartHeader startHeader = new StartHeader(); 294 // using Stream rather than ByteBuffer for the benefit of the 295 // built-in CRC check 296 try (DataInputStream dataInputStream = new DataInputStream(new CRC32VerifyingInputStream( 297 new BoundedSeekableByteChannelInputStream(channel, 20), 20, startHeaderCrc))) { 298 startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); 299 startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); 300 startHeader.nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); 301 return startHeader; 302 } 303 } 304 305 private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { 306 int nid = getUnsignedByte(header); 307 308 if (nid == NID.kArchiveProperties) { 309 readArchiveProperties(header); 310 nid = getUnsignedByte(header); 311 } 312 313 if (nid == NID.kAdditionalStreamsInfo) { 314 throw new IOException("Additional streams unsupported"); 315 //nid = header.readUnsignedByte(); 316 } 317 318 if (nid == NID.kMainStreamsInfo) { 319 readStreamsInfo(header, archive); 320 nid = getUnsignedByte(header); 321 } 322 323 if (nid == NID.kFilesInfo) { 324 readFilesInfo(header, archive); 325 nid = getUnsignedByte(header); 326 } 327 328 if (nid != NID.kEnd) { 329 throw new IOException("Badly terminated header, found " + nid); 330 } 331 } 332 333 private void readArchiveProperties(final ByteBuffer input) throws IOException { 334 // FIXME: the reference implementation just throws them away? 335 int nid = getUnsignedByte(input); 336 while (nid != NID.kEnd) { 337 final long propertySize = readUint64(input); 338 final byte[] property = new byte[(int)propertySize]; 339 input.get(property); 340 nid = getUnsignedByte(input); 341 } 342 } 343 344 private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, 345 final byte[] password) throws IOException { 346 readStreamsInfo(header, archive); 347 348 // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? 349 final Folder folder = archive.folders[0]; 350 final int firstPackStreamIndex = 0; 351 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 352 0; 353 354 channel.position(folderOffset); 355 InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, 356 archive.packSizes[firstPackStreamIndex]); 357 for (final Coder coder : folder.getOrderedCoders()) { 358 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 359 throw new IOException("Multi input/output stream coders are not yet supported"); 360 } 361 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, //NOSONAR 362 folder.getUnpackSizeForCoder(coder), coder, password); 363 } 364 if (folder.hasCrc) { 365 inputStreamStack = new CRC32VerifyingInputStream(inputStreamStack, 366 folder.getUnpackSize(), folder.crc); 367 } 368 final byte[] nextHeader = new byte[(int)folder.getUnpackSize()]; 369 try (DataInputStream nextHeaderInputStream = new DataInputStream(inputStreamStack)) { 370 nextHeaderInputStream.readFully(nextHeader); 371 } 372 return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); 373 } 374 375 private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 376 int nid = getUnsignedByte(header); 377 378 if (nid == NID.kPackInfo) { 379 readPackInfo(header, archive); 380 nid = getUnsignedByte(header); 381 } 382 383 if (nid == NID.kUnpackInfo) { 384 readUnpackInfo(header, archive); 385 nid = getUnsignedByte(header); 386 } else { 387 // archive without unpack/coders info 388 archive.folders = new Folder[0]; 389 } 390 391 if (nid == NID.kSubStreamsInfo) { 392 readSubStreamsInfo(header, archive); 393 nid = getUnsignedByte(header); 394 } 395 396 if (nid != NID.kEnd) { 397 throw new IOException("Badly terminated StreamsInfo"); 398 } 399 } 400 401 private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { 402 archive.packPos = readUint64(header); 403 final long numPackStreams = readUint64(header); 404 int nid = getUnsignedByte(header); 405 if (nid == NID.kSize) { 406 archive.packSizes = new long[(int)numPackStreams]; 407 for (int i = 0; i < archive.packSizes.length; i++) { 408 archive.packSizes[i] = readUint64(header); 409 } 410 nid = getUnsignedByte(header); 411 } 412 413 if (nid == NID.kCRC) { 414 archive.packCrcsDefined = readAllOrBits(header, (int)numPackStreams); 415 archive.packCrcs = new long[(int)numPackStreams]; 416 for (int i = 0; i < (int)numPackStreams; i++) { 417 if (archive.packCrcsDefined.get(i)) { 418 archive.packCrcs[i] = 0xffffFFFFL & header.getInt(); 419 } 420 } 421 422 nid = getUnsignedByte(header); 423 } 424 425 if (nid != NID.kEnd) { 426 throw new IOException("Badly terminated PackInfo (" + nid + ")"); 427 } 428 } 429 430 private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { 431 int nid = getUnsignedByte(header); 432 if (nid != NID.kFolder) { 433 throw new IOException("Expected kFolder, got " + nid); 434 } 435 final long numFolders = readUint64(header); 436 final Folder[] folders = new Folder[(int)numFolders]; 437 archive.folders = folders; 438 final int external = getUnsignedByte(header); 439 if (external != 0) { 440 throw new IOException("External unsupported"); 441 } 442 for (int i = 0; i < (int)numFolders; i++) { 443 folders[i] = readFolder(header); 444 } 445 446 nid = getUnsignedByte(header); 447 if (nid != NID.kCodersUnpackSize) { 448 throw new IOException("Expected kCodersUnpackSize, got " + nid); 449 } 450 for (final Folder folder : folders) { 451 folder.unpackSizes = new long[(int)folder.totalOutputStreams]; 452 for (int i = 0; i < folder.totalOutputStreams; i++) { 453 folder.unpackSizes[i] = readUint64(header); 454 } 455 } 456 457 nid = getUnsignedByte(header); 458 if (nid == NID.kCRC) { 459 final BitSet crcsDefined = readAllOrBits(header, (int)numFolders); 460 for (int i = 0; i < (int)numFolders; i++) { 461 if (crcsDefined.get(i)) { 462 folders[i].hasCrc = true; 463 folders[i].crc = 0xffffFFFFL & header.getInt(); 464 } else { 465 folders[i].hasCrc = false; 466 } 467 } 468 469 nid = getUnsignedByte(header); 470 } 471 472 if (nid != NID.kEnd) { 473 throw new IOException("Badly terminated UnpackInfo"); 474 } 475 } 476 477 private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 478 for (final Folder folder : archive.folders) { 479 folder.numUnpackSubStreams = 1; 480 } 481 int totalUnpackStreams = archive.folders.length; 482 483 int nid = getUnsignedByte(header); 484 if (nid == NID.kNumUnpackStream) { 485 totalUnpackStreams = 0; 486 for (final Folder folder : archive.folders) { 487 final long numStreams = readUint64(header); 488 folder.numUnpackSubStreams = (int)numStreams; 489 totalUnpackStreams += numStreams; 490 } 491 nid = getUnsignedByte(header); 492 } 493 494 final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(); 495 subStreamsInfo.unpackSizes = new long[totalUnpackStreams]; 496 subStreamsInfo.hasCrc = new BitSet(totalUnpackStreams); 497 subStreamsInfo.crcs = new long[totalUnpackStreams]; 498 499 int nextUnpackStream = 0; 500 for (final Folder folder : archive.folders) { 501 if (folder.numUnpackSubStreams == 0) { 502 continue; 503 } 504 long sum = 0; 505 if (nid == NID.kSize) { 506 for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { 507 final long size = readUint64(header); 508 subStreamsInfo.unpackSizes[nextUnpackStream++] = size; 509 sum += size; 510 } 511 } 512 subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; 513 } 514 if (nid == NID.kSize) { 515 nid = getUnsignedByte(header); 516 } 517 518 int numDigests = 0; 519 for (final Folder folder : archive.folders) { 520 if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { 521 numDigests += folder.numUnpackSubStreams; 522 } 523 } 524 525 if (nid == NID.kCRC) { 526 final BitSet hasMissingCrc = readAllOrBits(header, numDigests); 527 final long[] missingCrcs = new long[numDigests]; 528 for (int i = 0; i < numDigests; i++) { 529 if (hasMissingCrc.get(i)) { 530 missingCrcs[i] = 0xffffFFFFL & header.getInt(); 531 } 532 } 533 int nextCrc = 0; 534 int nextMissingCrc = 0; 535 for (final Folder folder: archive.folders) { 536 if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { 537 subStreamsInfo.hasCrc.set(nextCrc, true); 538 subStreamsInfo.crcs[nextCrc] = folder.crc; 539 ++nextCrc; 540 } else { 541 for (int i = 0; i < folder.numUnpackSubStreams; i++) { 542 subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); 543 subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; 544 ++nextCrc; 545 ++nextMissingCrc; 546 } 547 } 548 } 549 550 nid = getUnsignedByte(header); 551 } 552 553 if (nid != NID.kEnd) { 554 throw new IOException("Badly terminated SubStreamsInfo"); 555 } 556 557 archive.subStreamsInfo = subStreamsInfo; 558 } 559 560 private Folder readFolder(final ByteBuffer header) throws IOException { 561 final Folder folder = new Folder(); 562 563 final long numCoders = readUint64(header); 564 final Coder[] coders = new Coder[(int)numCoders]; 565 long totalInStreams = 0; 566 long totalOutStreams = 0; 567 for (int i = 0; i < coders.length; i++) { 568 coders[i] = new Coder(); 569 final int bits = getUnsignedByte(header); 570 final int idSize = bits & 0xf; 571 final boolean isSimple = (bits & 0x10) == 0; 572 final boolean hasAttributes = (bits & 0x20) != 0; 573 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 574 575 coders[i].decompressionMethodId = new byte[idSize]; 576 header.get(coders[i].decompressionMethodId); 577 if (isSimple) { 578 coders[i].numInStreams = 1; 579 coders[i].numOutStreams = 1; 580 } else { 581 coders[i].numInStreams = readUint64(header); 582 coders[i].numOutStreams = readUint64(header); 583 } 584 totalInStreams += coders[i].numInStreams; 585 totalOutStreams += coders[i].numOutStreams; 586 if (hasAttributes) { 587 final long propertiesSize = readUint64(header); 588 coders[i].properties = new byte[(int)propertiesSize]; 589 header.get(coders[i].properties); 590 } 591 // would need to keep looping as above: 592 while (moreAlternativeMethods) { 593 throw new IOException("Alternative methods are unsupported, please report. " + 594 "The reference implementation doesn't support them either."); 595 } 596 } 597 folder.coders = coders; 598 folder.totalInputStreams = totalInStreams; 599 folder.totalOutputStreams = totalOutStreams; 600 601 if (totalOutStreams == 0) { 602 throw new IOException("Total output streams can't be 0"); 603 } 604 final long numBindPairs = totalOutStreams - 1; 605 final BindPair[] bindPairs = new BindPair[(int)numBindPairs]; 606 for (int i = 0; i < bindPairs.length; i++) { 607 bindPairs[i] = new BindPair(); 608 bindPairs[i].inIndex = readUint64(header); 609 bindPairs[i].outIndex = readUint64(header); 610 } 611 folder.bindPairs = bindPairs; 612 613 if (totalInStreams < numBindPairs) { 614 throw new IOException("Total input streams can't be less than the number of bind pairs"); 615 } 616 final long numPackedStreams = totalInStreams - numBindPairs; 617 final long packedStreams[] = new long[(int)numPackedStreams]; 618 if (numPackedStreams == 1) { 619 int i; 620 for (i = 0; i < (int)totalInStreams; i++) { 621 if (folder.findBindPairForInStream(i) < 0) { 622 break; 623 } 624 } 625 if (i == (int)totalInStreams) { 626 throw new IOException("Couldn't find stream's bind pair index"); 627 } 628 packedStreams[0] = i; 629 } else { 630 for (int i = 0; i < (int)numPackedStreams; i++) { 631 packedStreams[i] = readUint64(header); 632 } 633 } 634 folder.packedStreams = packedStreams; 635 636 return folder; 637 } 638 639 private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { 640 final int areAllDefined = getUnsignedByte(header); 641 final BitSet bits; 642 if (areAllDefined != 0) { 643 bits = new BitSet(size); 644 for (int i = 0; i < size; i++) { 645 bits.set(i, true); 646 } 647 } else { 648 bits = readBits(header, size); 649 } 650 return bits; 651 } 652 653 private BitSet readBits(final ByteBuffer header, final int size) throws IOException { 654 final BitSet bits = new BitSet(size); 655 int mask = 0; 656 int cache = 0; 657 for (int i = 0; i < size; i++) { 658 if (mask == 0) { 659 mask = 0x80; 660 cache = getUnsignedByte(header); 661 } 662 bits.set(i, (cache & mask) != 0); 663 mask >>>= 1; 664 } 665 return bits; 666 } 667 668 private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { 669 final long numFiles = readUint64(header); 670 final SevenZArchiveEntry[] files = new SevenZArchiveEntry[(int)numFiles]; 671 for (int i = 0; i < files.length; i++) { 672 files[i] = new SevenZArchiveEntry(); 673 } 674 BitSet isEmptyStream = null; 675 BitSet isEmptyFile = null; 676 BitSet isAnti = null; 677 while (true) { 678 final int propertyType = getUnsignedByte(header); 679 if (propertyType == 0) { 680 break; 681 } 682 final long size = readUint64(header); 683 switch (propertyType) { 684 case NID.kEmptyStream: { 685 isEmptyStream = readBits(header, files.length); 686 break; 687 } 688 case NID.kEmptyFile: { 689 if (isEmptyStream == null) { // protect against NPE 690 throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); 691 } 692 isEmptyFile = readBits(header, isEmptyStream.cardinality()); 693 break; 694 } 695 case NID.kAnti: { 696 if (isEmptyStream == null) { // protect against NPE 697 throw new IOException("Header format error: kEmptyStream must appear before kAnti"); 698 } 699 isAnti = readBits(header, isEmptyStream.cardinality()); 700 break; 701 } 702 case NID.kName: { 703 final int external = getUnsignedByte(header); 704 if (external != 0) { 705 throw new IOException("Not implemented"); 706 } 707 if (((size - 1) & 1) != 0) { 708 throw new IOException("File names length invalid"); 709 } 710 final byte[] names = new byte[(int)(size - 1)]; 711 header.get(names); 712 int nextFile = 0; 713 int nextName = 0; 714 for (int i = 0; i < names.length; i += 2) { 715 if (names[i] == 0 && names[i+1] == 0) { 716 files[nextFile++].setName(new String(names, nextName, i-nextName, CharsetNames.UTF_16LE)); 717 nextName = i + 2; 718 } 719 } 720 if (nextName != names.length || nextFile != files.length) { 721 throw new IOException("Error parsing file names"); 722 } 723 break; 724 } 725 case NID.kCTime: { 726 final BitSet timesDefined = readAllOrBits(header, files.length); 727 final int external = getUnsignedByte(header); 728 if (external != 0) { 729 throw new IOException("Unimplemented"); 730 } 731 for (int i = 0; i < files.length; i++) { 732 files[i].setHasCreationDate(timesDefined.get(i)); 733 if (files[i].getHasCreationDate()) { 734 files[i].setCreationDate(header.getLong()); 735 } 736 } 737 break; 738 } 739 case NID.kATime: { 740 final BitSet timesDefined = readAllOrBits(header, files.length); 741 final int external = getUnsignedByte(header); 742 if (external != 0) { 743 throw new IOException("Unimplemented"); 744 } 745 for (int i = 0; i < files.length; i++) { 746 files[i].setHasAccessDate(timesDefined.get(i)); 747 if (files[i].getHasAccessDate()) { 748 files[i].setAccessDate(header.getLong()); 749 } 750 } 751 break; 752 } 753 case NID.kMTime: { 754 final BitSet timesDefined = readAllOrBits(header, files.length); 755 final int external = getUnsignedByte(header); 756 if (external != 0) { 757 throw new IOException("Unimplemented"); 758 } 759 for (int i = 0; i < files.length; i++) { 760 files[i].setHasLastModifiedDate(timesDefined.get(i)); 761 if (files[i].getHasLastModifiedDate()) { 762 files[i].setLastModifiedDate(header.getLong()); 763 } 764 } 765 break; 766 } 767 case NID.kWinAttributes: { 768 final BitSet attributesDefined = readAllOrBits(header, files.length); 769 final int external = getUnsignedByte(header); 770 if (external != 0) { 771 throw new IOException("Unimplemented"); 772 } 773 for (int i = 0; i < files.length; i++) { 774 files[i].setHasWindowsAttributes(attributesDefined.get(i)); 775 if (files[i].getHasWindowsAttributes()) { 776 files[i].setWindowsAttributes(header.getInt()); 777 } 778 } 779 break; 780 } 781 case NID.kStartPos: { 782 throw new IOException("kStartPos is unsupported, please report"); 783 } 784 case NID.kDummy: { 785 // 7z 9.20 asserts the content is all zeros and ignores the property 786 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 787 788 if (skipBytesFully(header, size) < size) { 789 throw new IOException("Incomplete kDummy property"); 790 } 791 break; 792 } 793 794 default: { 795 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 796 if (skipBytesFully(header, size) < size) { 797 throw new IOException("Incomplete property of type " + propertyType); 798 } 799 break; 800 } 801 } 802 } 803 int nonEmptyFileCounter = 0; 804 int emptyFileCounter = 0; 805 for (int i = 0; i < files.length; i++) { 806 files[i].setHasStream(isEmptyStream == null ? true : !isEmptyStream.get(i)); 807 if (files[i].hasStream()) { 808 files[i].setDirectory(false); 809 files[i].setAntiItem(false); 810 files[i].setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); 811 files[i].setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); 812 files[i].setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); 813 ++nonEmptyFileCounter; 814 } else { 815 files[i].setDirectory(isEmptyFile == null ? true : !isEmptyFile.get(emptyFileCounter)); 816 files[i].setAntiItem(isAnti == null ? false : isAnti.get(emptyFileCounter)); 817 files[i].setHasCrc(false); 818 files[i].setSize(0); 819 ++emptyFileCounter; 820 } 821 } 822 archive.files = files; 823 calculateStreamMap(archive); 824 } 825 826 private void calculateStreamMap(final Archive archive) throws IOException { 827 final StreamMap streamMap = new StreamMap(); 828 829 int nextFolderPackStreamIndex = 0; 830 final int numFolders = archive.folders != null ? archive.folders.length : 0; 831 streamMap.folderFirstPackStreamIndex = new int[numFolders]; 832 for (int i = 0; i < numFolders; i++) { 833 streamMap.folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; 834 nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; 835 } 836 837 long nextPackStreamOffset = 0; 838 final int numPackSizes = archive.packSizes != null ? archive.packSizes.length : 0; 839 streamMap.packStreamOffsets = new long[numPackSizes]; 840 for (int i = 0; i < numPackSizes; i++) { 841 streamMap.packStreamOffsets[i] = nextPackStreamOffset; 842 nextPackStreamOffset += archive.packSizes[i]; 843 } 844 845 streamMap.folderFirstFileIndex = new int[numFolders]; 846 streamMap.fileFolderIndex = new int[archive.files.length]; 847 int nextFolderIndex = 0; 848 int nextFolderUnpackStreamIndex = 0; 849 for (int i = 0; i < archive.files.length; i++) { 850 if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) { 851 streamMap.fileFolderIndex[i] = -1; 852 continue; 853 } 854 if (nextFolderUnpackStreamIndex == 0) { 855 for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { 856 streamMap.folderFirstFileIndex[nextFolderIndex] = i; 857 if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { 858 break; 859 } 860 } 861 if (nextFolderIndex >= archive.folders.length) { 862 throw new IOException("Too few folders in archive"); 863 } 864 } 865 streamMap.fileFolderIndex[i] = nextFolderIndex; 866 if (!archive.files[i].hasStream()) { 867 continue; 868 } 869 ++nextFolderUnpackStreamIndex; 870 if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { 871 ++nextFolderIndex; 872 nextFolderUnpackStreamIndex = 0; 873 } 874 } 875 876 archive.streamMap = streamMap; 877 } 878 879 private void buildDecodingStream() throws IOException { 880 final int folderIndex = archive.streamMap.fileFolderIndex[currentEntryIndex]; 881 if (folderIndex < 0) { 882 deferredBlockStreams.clear(); 883 // TODO: previously it'd return an empty stream? 884 // new BoundedInputStream(new ByteArrayInputStream(new byte[0]), 0); 885 return; 886 } 887 final SevenZArchiveEntry file = archive.files[currentEntryIndex]; 888 if (currentFolderIndex == folderIndex) { 889 // (COMPRESS-320). 890 // The current entry is within the same (potentially opened) folder. The 891 // previous stream has to be fully decoded before we can start reading 892 // but don't do it eagerly -- if the user skips over the entire folder nothing 893 // is effectively decompressed. 894 895 file.setContentMethods(archive.files[currentEntryIndex - 1].getContentMethods()); 896 } else { 897 // We're opening a new folder. Discard any queued streams/ folder stream. 898 currentFolderIndex = folderIndex; 899 deferredBlockStreams.clear(); 900 if (currentFolderInputStream != null) { 901 currentFolderInputStream.close(); 902 currentFolderInputStream = null; 903 } 904 905 final Folder folder = archive.folders[folderIndex]; 906 final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; 907 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 908 archive.streamMap.packStreamOffsets[firstPackStreamIndex]; 909 currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); 910 } 911 912 InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize()); 913 if (file.getHasCrc()) { 914 fileStream = new CRC32VerifyingInputStream(fileStream, file.getSize(), file.getCrcValue()); 915 } 916 917 deferredBlockStreams.add(fileStream); 918 } 919 920 private InputStream buildDecoderStack(final Folder folder, final long folderOffset, 921 final int firstPackStreamIndex, final SevenZArchiveEntry entry) throws IOException { 922 channel.position(folderOffset); 923 InputStream inputStreamStack = 924 new BufferedInputStream( 925 new BoundedSeekableByteChannelInputStream(channel, 926 archive.packSizes[firstPackStreamIndex])); 927 final LinkedList<SevenZMethodConfiguration> methods = new LinkedList<>(); 928 for (final Coder coder : folder.getOrderedCoders()) { 929 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 930 throw new IOException("Multi input/output stream coders are not yet supported"); 931 } 932 final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); 933 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, 934 folder.getUnpackSizeForCoder(coder), coder, password); 935 methods.addFirst(new SevenZMethodConfiguration(method, 936 Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); 937 } 938 entry.setContentMethods(methods); 939 if (folder.hasCrc) { 940 return new CRC32VerifyingInputStream(inputStreamStack, 941 folder.getUnpackSize(), folder.crc); 942 } 943 return inputStreamStack; 944 } 945 946 /** 947 * Reads a byte of data. 948 * 949 * @return the byte read, or -1 if end of input is reached 950 * @throws IOException 951 * if an I/O error has occurred 952 */ 953 public int read() throws IOException { 954 return getCurrentStream().read(); 955 } 956 957 private InputStream getCurrentStream() throws IOException { 958 if (archive.files[currentEntryIndex].getSize() == 0) { 959 return new ByteArrayInputStream(new byte[0]); 960 } 961 if (deferredBlockStreams.isEmpty()) { 962 throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); 963 } 964 965 while (deferredBlockStreams.size() > 1) { 966 // In solid compression mode we need to decompress all leading folder' 967 // streams to get access to an entry. We defer this until really needed 968 // so that entire blocks can be skipped without wasting time for decompression. 969 try (final InputStream stream = deferredBlockStreams.remove(0)) { 970 IOUtils.skip(stream, Long.MAX_VALUE); 971 } 972 } 973 974 return deferredBlockStreams.get(0); 975 } 976 977 /** 978 * Reads data into an array of bytes. 979 * 980 * @param b the array to write data to 981 * @return the number of bytes read, or -1 if end of input is reached 982 * @throws IOException 983 * if an I/O error has occurred 984 */ 985 public int read(final byte[] b) throws IOException { 986 return read(b, 0, b.length); 987 } 988 989 /** 990 * Reads data into an array of bytes. 991 * 992 * @param b the array to write data to 993 * @param off offset into the buffer to start filling at 994 * @param len of bytes to read 995 * @return the number of bytes read, or -1 if end of input is reached 996 * @throws IOException 997 * if an I/O error has occurred 998 */ 999 public int read(final byte[] b, final int off, final int len) throws IOException { 1000 return getCurrentStream().read(b, off, len); 1001 } 1002 1003 private static long readUint64(final ByteBuffer in) throws IOException { 1004 // long rather than int as it might get shifted beyond the range of an int 1005 final long firstByte = getUnsignedByte(in); 1006 int mask = 0x80; 1007 long value = 0; 1008 for (int i = 0; i < 8; i++) { 1009 if ((firstByte & mask) == 0) { 1010 return value | ((firstByte & (mask - 1)) << (8 * i)); 1011 } 1012 final long nextByte = getUnsignedByte(in); 1013 value |= nextByte << (8 * i); 1014 mask >>>= 1; 1015 } 1016 return value; 1017 } 1018 1019 private static int getUnsignedByte(ByteBuffer buf) { 1020 return buf.get() & 0xff; 1021 } 1022 1023 /** 1024 * Checks if the signature matches what is expected for a 7z file. 1025 * 1026 * @param signature 1027 * the bytes to check 1028 * @param length 1029 * the number of bytes to check 1030 * @return true, if this is the signature of a 7z archive. 1031 * @since 1.8 1032 */ 1033 public static boolean matches(final byte[] signature, final int length) { 1034 if (length < sevenZSignature.length) { 1035 return false; 1036 } 1037 1038 for (int i = 0; i < sevenZSignature.length; i++) { 1039 if (signature[i] != sevenZSignature[i]) { 1040 return false; 1041 } 1042 } 1043 return true; 1044 } 1045 1046 private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) throws IOException { 1047 if (bytesToSkip < 1) { 1048 return 0; 1049 } 1050 int current = input.position(); 1051 int maxSkip = input.remaining(); 1052 if (maxSkip < bytesToSkip) { 1053 bytesToSkip = maxSkip; 1054 } 1055 input.position(current + (int) bytesToSkip); 1056 return bytesToSkip; 1057 } 1058 1059 private void readFully(ByteBuffer buf) throws IOException { 1060 buf.rewind(); 1061 IOUtils.readFully(channel, buf); 1062 buf.flip(); 1063 } 1064 1065 @Override 1066 public String toString() { 1067 return archive.toString(); 1068 } 1069}