001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.tar; 020 021import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN; 022import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET; 023 024import java.io.IOException; 025import java.math.BigInteger; 026import java.nio.ByteBuffer; 027import org.apache.commons.compress.archivers.zip.ZipEncoding; 028import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 029 030/** 031 * This class provides static utility methods to work with byte streams. 032 * 033 * @Immutable 034 */ 035// CheckStyle:HideUtilityClassConstructorCheck OFF (bc) 036public class TarUtils { 037 038 private static final int BYTE_MASK = 255; 039 040 static final ZipEncoding DEFAULT_ENCODING = 041 ZipEncodingHelper.getZipEncoding(null); 042 043 /** 044 * Encapsulates the algorithms used up to Commons Compress 1.3 as 045 * ZipEncoding. 046 */ 047 static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() { 048 @Override 049 public boolean canEncode(final String name) { return true; } 050 051 @Override 052 public ByteBuffer encode(final String name) { 053 final int length = name.length(); 054 final byte[] buf = new byte[length]; 055 056 // copy until end of input or output is reached. 057 for (int i = 0; i < length; ++i) { 058 buf[i] = (byte) name.charAt(i); 059 } 060 return ByteBuffer.wrap(buf); 061 } 062 063 @Override 064 public String decode(final byte[] buffer) { 065 final int length = buffer.length; 066 final StringBuilder result = new StringBuilder(length); 067 068 for (final byte b : buffer) { 069 if (b == 0) { // Trailing null 070 break; 071 } 072 result.append((char) (b & 0xFF)); // Allow for sign-extension 073 } 074 075 return result.toString(); 076 } 077 }; 078 079 /** Private constructor to prevent instantiation of this utility class. */ 080 private TarUtils(){ 081 } 082 083 /** 084 * Parse an octal string from a buffer. 085 * 086 * <p>Leading spaces are ignored. 087 * The buffer must contain a trailing space or NUL, 088 * and may contain an additional trailing space or NUL.</p> 089 * 090 * <p>The input buffer is allowed to contain all NULs, 091 * in which case the method returns 0L 092 * (this allows for missing fields).</p> 093 * 094 * <p>To work-around some tar implementations that insert a 095 * leading NUL this method returns 0 if it detects a leading NUL 096 * since Commons Compress 1.4.</p> 097 * 098 * @param buffer The buffer from which to parse. 099 * @param offset The offset into the buffer from which to parse. 100 * @param length The maximum number of bytes to parse - must be at least 2 bytes. 101 * @return The long value of the octal string. 102 * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected. 103 */ 104 public static long parseOctal(final byte[] buffer, final int offset, final int length) { 105 long result = 0; 106 int end = offset + length; 107 int start = offset; 108 109 if (length < 2){ 110 throw new IllegalArgumentException("Length "+length+" must be at least 2"); 111 } 112 113 if (buffer[start] == 0) { 114 return 0L; 115 } 116 117 // Skip leading spaces 118 while (start < end){ 119 if (buffer[start] == ' '){ 120 start++; 121 } else { 122 break; 123 } 124 } 125 126 // Trim all trailing NULs and spaces. 127 // The ustar and POSIX tar specs require a trailing NUL or 128 // space but some implementations use the extra digit for big 129 // sizes/uids/gids ... 130 byte trailer = buffer[end - 1]; 131 while (start < end && (trailer == 0 || trailer == ' ')) { 132 end--; 133 trailer = buffer[end - 1]; 134 } 135 136 for ( ;start < end; start++) { 137 final byte currentByte = buffer[start]; 138 // CheckStyle:MagicNumber OFF 139 if (currentByte < '0' || currentByte > '7'){ 140 throw new IllegalArgumentException( 141 exceptionMessage(buffer, offset, length, start, currentByte)); 142 } 143 result = (result << 3) + (currentByte - '0'); // convert from ASCII 144 // CheckStyle:MagicNumber ON 145 } 146 147 return result; 148 } 149 150 /** 151 * Compute the value contained in a byte buffer. If the most 152 * significant bit of the first byte in the buffer is set, this 153 * bit is ignored and the rest of the buffer is interpreted as a 154 * binary number. Otherwise, the buffer is interpreted as an 155 * octal number as per the parseOctal function above. 156 * 157 * @param buffer The buffer from which to parse. 158 * @param offset The offset into the buffer from which to parse. 159 * @param length The maximum number of bytes to parse. 160 * @return The long value of the octal or binary string. 161 * @throws IllegalArgumentException if the trailing space/NUL is 162 * missing or an invalid byte is detected in an octal number, or 163 * if a binary number would exceed the size of a signed long 164 * 64-bit integer. 165 * @since 1.4 166 */ 167 public static long parseOctalOrBinary(final byte[] buffer, final int offset, 168 final int length) { 169 170 if ((buffer[offset] & 0x80) == 0) { 171 return parseOctal(buffer, offset, length); 172 } 173 final boolean negative = buffer[offset] == (byte) 0xff; 174 if (length < 9) { 175 return parseBinaryLong(buffer, offset, length, negative); 176 } 177 return parseBinaryBigInteger(buffer, offset, length, negative); 178 } 179 180 private static long parseBinaryLong(final byte[] buffer, final int offset, 181 final int length, 182 final boolean negative) { 183 if (length >= 9) { 184 throw new IllegalArgumentException("At offset " + offset + ", " 185 + length + " byte binary number" 186 + " exceeds maximum signed long" 187 + " value"); 188 } 189 long val = 0; 190 for (int i = 1; i < length; i++) { 191 val = (val << 8) + (buffer[offset + i] & 0xff); 192 } 193 if (negative) { 194 // 2's complement 195 val--; 196 val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1; 197 } 198 return negative ? -val : val; 199 } 200 201 private static long parseBinaryBigInteger(final byte[] buffer, 202 final int offset, 203 final int length, 204 final boolean negative) { 205 final byte[] remainder = new byte[length - 1]; 206 System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); 207 BigInteger val = new BigInteger(remainder); 208 if (negative) { 209 // 2's complement 210 val = val.add(BigInteger.valueOf(-1)).not(); 211 } 212 if (val.bitLength() > 63) { 213 throw new IllegalArgumentException("At offset " + offset + ", " 214 + length + " byte binary number" 215 + " exceeds maximum signed long" 216 + " value"); 217 } 218 return negative ? -val.longValue() : val.longValue(); 219 } 220 221 /** 222 * Parse a boolean byte from a buffer. 223 * Leading spaces and NUL are ignored. 224 * The buffer may contain trailing spaces or NULs. 225 * 226 * @param buffer The buffer from which to parse. 227 * @param offset The offset into the buffer from which to parse. 228 * @return The boolean value of the bytes. 229 * @throws IllegalArgumentException if an invalid byte is detected. 230 */ 231 public static boolean parseBoolean(final byte[] buffer, final int offset) { 232 return buffer[offset] == 1; 233 } 234 235 // Helper method to generate the exception message 236 private static String exceptionMessage(final byte[] buffer, final int offset, 237 final int length, final int current, final byte currentByte) { 238 // default charset is good enough for an exception message, 239 // 240 // the alternative was to modify parseOctal and 241 // parseOctalOrBinary to receive the ZipEncoding of the 242 // archive (deprecating the existing public methods, of 243 // course) and dealing with the fact that ZipEncoding#decode 244 // can throw an IOException which parseOctal* doesn't declare 245 String string = new String(buffer, offset, length); 246 247 string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed 248 final String s = "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length; 249 return s; 250 } 251 252 /** 253 * Parse an entry name from a buffer. 254 * Parsing stops when a NUL is found 255 * or the buffer length is reached. 256 * 257 * @param buffer The buffer from which to parse. 258 * @param offset The offset into the buffer from which to parse. 259 * @param length The maximum number of bytes to parse. 260 * @return The entry name. 261 */ 262 public static String parseName(final byte[] buffer, final int offset, final int length) { 263 try { 264 return parseName(buffer, offset, length, DEFAULT_ENCODING); 265 } catch (final IOException ex) { 266 try { 267 return parseName(buffer, offset, length, FALLBACK_ENCODING); 268 } catch (final IOException ex2) { 269 // impossible 270 throw new RuntimeException(ex2); //NOSONAR 271 } 272 } 273 } 274 275 /** 276 * Parse an entry name from a buffer. 277 * Parsing stops when a NUL is found 278 * or the buffer length is reached. 279 * 280 * @param buffer The buffer from which to parse. 281 * @param offset The offset into the buffer from which to parse. 282 * @param length The maximum number of bytes to parse. 283 * @param encoding name of the encoding to use for file names 284 * @since 1.4 285 * @return The entry name. 286 * @throws IOException on error 287 */ 288 public static String parseName(final byte[] buffer, final int offset, 289 final int length, 290 final ZipEncoding encoding) 291 throws IOException { 292 293 int len = length; 294 for (; len > 0; len--) { 295 if (buffer[offset + len - 1] != 0) { 296 break; 297 } 298 } 299 if (len > 0) { 300 final byte[] b = new byte[len]; 301 System.arraycopy(buffer, offset, b, 0, len); 302 return encoding.decode(b); 303 } 304 return ""; 305 } 306 307 /** 308 * Copy a name into a buffer. 309 * Copies characters from the name into the buffer 310 * starting at the specified offset. 311 * If the buffer is longer than the name, the buffer 312 * is filled with trailing NULs. 313 * If the name is longer than the buffer, 314 * the output is truncated. 315 * 316 * @param name The header name from which to copy the characters. 317 * @param buf The buffer where the name is to be stored. 318 * @param offset The starting offset into the buffer 319 * @param length The maximum number of header bytes to copy. 320 * @return The updated offset, i.e. offset + length 321 */ 322 public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) { 323 try { 324 return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); 325 } catch (final IOException ex) { 326 try { 327 return formatNameBytes(name, buf, offset, length, 328 FALLBACK_ENCODING); 329 } catch (final IOException ex2) { 330 // impossible 331 throw new RuntimeException(ex2); //NOSONAR 332 } 333 } 334 } 335 336 /** 337 * Copy a name into a buffer. 338 * Copies characters from the name into the buffer 339 * starting at the specified offset. 340 * If the buffer is longer than the name, the buffer 341 * is filled with trailing NULs. 342 * If the name is longer than the buffer, 343 * the output is truncated. 344 * 345 * @param name The header name from which to copy the characters. 346 * @param buf The buffer where the name is to be stored. 347 * @param offset The starting offset into the buffer 348 * @param length The maximum number of header bytes to copy. 349 * @param encoding name of the encoding to use for file names 350 * @since 1.4 351 * @return The updated offset, i.e. offset + length 352 * @throws IOException on error 353 */ 354 public static int formatNameBytes(final String name, final byte[] buf, final int offset, 355 final int length, 356 final ZipEncoding encoding) 357 throws IOException { 358 int len = name.length(); 359 ByteBuffer b = encoding.encode(name); 360 while (b.limit() > length && len > 0) { 361 b = encoding.encode(name.substring(0, --len)); 362 } 363 final int limit = b.limit() - b.position(); 364 System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); 365 366 // Pad any remaining output bytes with NUL 367 for (int i = limit; i < length; ++i) { 368 buf[offset + i] = 0; 369 } 370 371 return offset + length; 372 } 373 374 /** 375 * Fill buffer with unsigned octal number, padded with leading zeroes. 376 * 377 * @param value number to convert to octal - treated as unsigned 378 * @param buffer destination buffer 379 * @param offset starting offset in buffer 380 * @param length length of buffer to fill 381 * @throws IllegalArgumentException if the value will not fit in the buffer 382 */ 383 public static void formatUnsignedOctalString(final long value, final byte[] buffer, 384 final int offset, final int length) { 385 int remaining = length; 386 remaining--; 387 if (value == 0) { 388 buffer[offset + remaining--] = (byte) '0'; 389 } else { 390 long val = value; 391 for (; remaining >= 0 && val != 0; --remaining) { 392 // CheckStyle:MagicNumber OFF 393 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); 394 val = val >>> 3; 395 // CheckStyle:MagicNumber ON 396 } 397 if (val != 0){ 398 throw new IllegalArgumentException 399 (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length); 400 } 401 } 402 403 for (; remaining >= 0; --remaining) { // leading zeros 404 buffer[offset + remaining] = (byte) '0'; 405 } 406 } 407 408 /** 409 * Write an octal integer into a buffer. 410 * 411 * Uses {@link #formatUnsignedOctalString} to format 412 * the value as an octal string with leading zeros. 413 * The converted number is followed by space and NUL 414 * 415 * @param value The value to write 416 * @param buf The buffer to receive the output 417 * @param offset The starting offset into the buffer 418 * @param length The size of the output buffer 419 * @return The updated offset, i.e offset+length 420 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 421 */ 422 public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 423 424 int idx=length-2; // For space and trailing null 425 formatUnsignedOctalString(value, buf, offset, idx); 426 427 buf[offset + idx++] = (byte) ' '; // Trailing space 428 buf[offset + idx] = 0; // Trailing null 429 430 return offset + length; 431 } 432 433 /** 434 * Write an octal long integer into a buffer. 435 * 436 * Uses {@link #formatUnsignedOctalString} to format 437 * the value as an octal string with leading zeros. 438 * The converted number is followed by a space. 439 * 440 * @param value The value to write as octal 441 * @param buf The destinationbuffer. 442 * @param offset The starting offset into the buffer. 443 * @param length The length of the buffer 444 * @return The updated offset 445 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 446 */ 447 public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 448 449 final int idx=length-1; // For space 450 451 formatUnsignedOctalString(value, buf, offset, idx); 452 buf[offset + idx] = (byte) ' '; // Trailing space 453 454 return offset + length; 455 } 456 457 /** 458 * Write an long integer into a buffer as an octal string if this 459 * will fit, or as a binary number otherwise. 460 * 461 * Uses {@link #formatUnsignedOctalString} to format 462 * the value as an octal string with leading zeros. 463 * The converted number is followed by a space. 464 * 465 * @param value The value to write into the buffer. 466 * @param buf The destination buffer. 467 * @param offset The starting offset into the buffer. 468 * @param length The length of the buffer. 469 * @return The updated offset. 470 * @throws IllegalArgumentException if the value (and trailer) 471 * will not fit in the buffer. 472 * @since 1.4 473 */ 474 public static int formatLongOctalOrBinaryBytes( 475 final long value, final byte[] buf, final int offset, final int length) { 476 477 // Check whether we are dealing with UID/GID or SIZE field 478 final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE; 479 480 final boolean negative = value < 0; 481 if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars 482 return formatLongOctalBytes(value, buf, offset, length); 483 } 484 485 if (length < 9) { 486 formatLongBinary(value, buf, offset, length, negative); 487 } 488 formatBigIntegerBinary(value, buf, offset, length, negative); 489 490 buf[offset] = (byte) (negative ? 0xff : 0x80); 491 return offset + length; 492 } 493 494 private static void formatLongBinary(final long value, final byte[] buf, 495 final int offset, final int length, 496 final boolean negative) { 497 final int bits = (length - 1) * 8; 498 final long max = 1l << bits; 499 long val = Math.abs(value); 500 if (val >= max) { 501 throw new IllegalArgumentException("Value " + value + 502 " is too large for " + length + " byte field."); 503 } 504 if (negative) { 505 val ^= max - 1; 506 val |= 0xff << bits; 507 val++; 508 } 509 for (int i = offset + length - 1; i >= offset; i--) { 510 buf[i] = (byte) val; 511 val >>= 8; 512 } 513 } 514 515 private static void formatBigIntegerBinary(final long value, final byte[] buf, 516 final int offset, 517 final int length, 518 final boolean negative) { 519 final BigInteger val = BigInteger.valueOf(value); 520 final byte[] b = val.toByteArray(); 521 final int len = b.length; 522 final int off = offset + length - len; 523 System.arraycopy(b, 0, buf, off, len); 524 final byte fill = (byte) (negative ? 0xff : 0); 525 for (int i = offset + 1; i < off; i++) { 526 buf[i] = fill; 527 } 528 } 529 530 /** 531 * Writes an octal value into a buffer. 532 * 533 * Uses {@link #formatUnsignedOctalString} to format 534 * the value as an octal string with leading zeros. 535 * The converted number is followed by NUL and then space. 536 * 537 * @param value The value to convert 538 * @param buf The destination buffer 539 * @param offset The starting offset into the buffer. 540 * @param length The size of the buffer. 541 * @return The updated value of offset, i.e. offset+length 542 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 543 */ 544 public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 545 546 int idx=length-2; // for NUL and space 547 formatUnsignedOctalString(value, buf, offset, idx); 548 549 buf[offset + idx++] = 0; // Trailing null 550 buf[offset + idx] = (byte) ' '; // Trailing space 551 552 return offset + length; 553 } 554 555 /** 556 * Compute the checksum of a tar entry header. 557 * 558 * @param buf The tar entry's header buffer. 559 * @return The computed checksum. 560 */ 561 public static long computeCheckSum(final byte[] buf) { 562 long sum = 0; 563 564 for (final byte element : buf) { 565 sum += BYTE_MASK & element; 566 } 567 568 return sum; 569 } 570 571 /** 572 * Wikipedia <a href="http://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>: 573 * <blockquote> 574 * The checksum is calculated by taking the sum of the unsigned byte values 575 * of the header block with the eight checksum bytes taken to be ascii 576 * spaces (decimal value 32). It is stored as a six digit octal number with 577 * leading zeroes followed by a NUL and then a space. Various 578 * implementations do not adhere to this format. For better compatibility, 579 * ignore leading and trailing whitespace, and get the first six digits. In 580 * addition, some historic tar implementations treated bytes as signed. 581 * Implementations typically calculate the checksum both ways, and treat it 582 * as good if either the signed or unsigned sum matches the included 583 * checksum. 584 * </blockquote> 585 * <p> 586 * The return value of this method should be treated as a best-effort 587 * heuristic rather than an absolute and final truth. The checksum 588 * verification logic may well evolve over time as more special cases 589 * are encountered. 590 * 591 * @param header tar header 592 * @return whether the checksum is reasonably good 593 * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a> 594 * @since 1.5 595 */ 596 public static boolean verifyCheckSum(final byte[] header) { 597 final long storedSum = parseOctal(header, CHKSUM_OFFSET, CHKSUMLEN); 598 long unsignedSum = 0; 599 long signedSum = 0; 600 601 for (int i = 0; i < header.length; i++) { 602 byte b = header[i]; 603 if (CHKSUM_OFFSET <= i && i < CHKSUM_OFFSET + CHKSUMLEN) { 604 b = ' '; 605 } 606 unsignedSum += 0xff & b; 607 signedSum += b; 608 } 609 return storedSum == unsignedSum || storedSum == signedSum; 610 } 611 612}