001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.tar;
020
021import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN;
022import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET;
023
024import java.io.IOException;
025import java.math.BigInteger;
026import java.nio.ByteBuffer;
027import org.apache.commons.compress.archivers.zip.ZipEncoding;
028import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
029
030/**
031 * This class provides static utility methods to work with byte streams.
032 *
033 * @Immutable
034 */
035// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
036public class TarUtils {
037
038    private static final int BYTE_MASK = 255;
039
040    static final ZipEncoding DEFAULT_ENCODING =
041        ZipEncodingHelper.getZipEncoding(null);
042
043    /**
044     * Encapsulates the algorithms used up to Commons Compress 1.3 as
045     * ZipEncoding.
046     */
047    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
048            @Override
049            public boolean canEncode(final String name) { return true; }
050
051            @Override
052            public ByteBuffer encode(final String name) {
053                final int length = name.length();
054                final byte[] buf = new byte[length];
055
056                // copy until end of input or output is reached.
057                for (int i = 0; i < length; ++i) {
058                    buf[i] = (byte) name.charAt(i);
059                }
060                return ByteBuffer.wrap(buf);
061            }
062
063            @Override
064            public String decode(final byte[] buffer) {
065                final int length = buffer.length;
066                final StringBuilder result = new StringBuilder(length);
067
068                for (final byte b : buffer) {
069                    if (b == 0) { // Trailing null
070                        break;
071                    }
072                    result.append((char) (b & 0xFF)); // Allow for sign-extension
073                }
074
075                return result.toString();
076            }
077        };
078
079    /** Private constructor to prevent instantiation of this utility class. */
080    private TarUtils(){
081    }
082
083    /**
084     * Parse an octal string from a buffer.
085     *
086     * <p>Leading spaces are ignored.
087     * The buffer must contain a trailing space or NUL,
088     * and may contain an additional trailing space or NUL.</p>
089     *
090     * <p>The input buffer is allowed to contain all NULs,
091     * in which case the method returns 0L
092     * (this allows for missing fields).</p>
093     *
094     * <p>To work-around some tar implementations that insert a
095     * leading NUL this method returns 0 if it detects a leading NUL
096     * since Commons Compress 1.4.</p>
097     *
098     * @param buffer The buffer from which to parse.
099     * @param offset The offset into the buffer from which to parse.
100     * @param length The maximum number of bytes to parse - must be at least 2 bytes.
101     * @return The long value of the octal string.
102     * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected.
103     */
104    public static long parseOctal(final byte[] buffer, final int offset, final int length) {
105        long    result = 0;
106        int     end = offset + length;
107        int     start = offset;
108
109        if (length < 2){
110            throw new IllegalArgumentException("Length "+length+" must be at least 2");
111        }
112
113        if (buffer[start] == 0) {
114            return 0L;
115        }
116
117        // Skip leading spaces
118        while (start < end){
119            if (buffer[start] == ' '){
120                start++;
121            } else {
122                break;
123            }
124        }
125
126        // Trim all trailing NULs and spaces.
127        // The ustar and POSIX tar specs require a trailing NUL or
128        // space but some implementations use the extra digit for big
129        // sizes/uids/gids ...
130        byte trailer = buffer[end - 1];
131        while (start < end && (trailer == 0 || trailer == ' ')) {
132            end--;
133            trailer = buffer[end - 1];
134        }
135
136        for ( ;start < end; start++) {
137            final byte currentByte = buffer[start];
138            // CheckStyle:MagicNumber OFF
139            if (currentByte < '0' || currentByte > '7'){
140                throw new IllegalArgumentException(
141                        exceptionMessage(buffer, offset, length, start, currentByte));
142            }
143            result = (result << 3) + (currentByte - '0'); // convert from ASCII
144            // CheckStyle:MagicNumber ON
145        }
146
147        return result;
148    }
149
150    /** 
151     * Compute the value contained in a byte buffer.  If the most
152     * significant bit of the first byte in the buffer is set, this
153     * bit is ignored and the rest of the buffer is interpreted as a
154     * binary number.  Otherwise, the buffer is interpreted as an
155     * octal number as per the parseOctal function above.
156     *
157     * @param buffer The buffer from which to parse.
158     * @param offset The offset into the buffer from which to parse.
159     * @param length The maximum number of bytes to parse.
160     * @return The long value of the octal or binary string.
161     * @throws IllegalArgumentException if the trailing space/NUL is
162     * missing or an invalid byte is detected in an octal number, or
163     * if a binary number would exceed the size of a signed long
164     * 64-bit integer.
165     * @since 1.4
166     */
167    public static long parseOctalOrBinary(final byte[] buffer, final int offset,
168                                          final int length) {
169
170        if ((buffer[offset] & 0x80) == 0) {
171            return parseOctal(buffer, offset, length);
172        }
173        final boolean negative = buffer[offset] == (byte) 0xff;
174        if (length < 9) {
175            return parseBinaryLong(buffer, offset, length, negative);
176        }
177        return parseBinaryBigInteger(buffer, offset, length, negative);
178    }
179
180    private static long parseBinaryLong(final byte[] buffer, final int offset,
181                                        final int length,
182                                        final boolean negative) {
183        if (length >= 9) {
184            throw new IllegalArgumentException("At offset " + offset + ", "
185                                               + length + " byte binary number"
186                                               + " exceeds maximum signed long"
187                                               + " value");
188        }
189        long val = 0;
190        for (int i = 1; i < length; i++) {
191            val = (val << 8) + (buffer[offset + i] & 0xff);
192        }
193        if (negative) {
194            // 2's complement
195            val--;
196            val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
197        }
198        return negative ? -val : val;
199    }
200
201    private static long parseBinaryBigInteger(final byte[] buffer,
202                                              final int offset,
203                                              final int length,
204                                              final boolean negative) {
205        final byte[] remainder = new byte[length - 1];
206        System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
207        BigInteger val = new BigInteger(remainder);
208        if (negative) {
209            // 2's complement
210            val = val.add(BigInteger.valueOf(-1)).not();
211        }
212        if (val.bitLength() > 63) {
213            throw new IllegalArgumentException("At offset " + offset + ", "
214                                               + length + " byte binary number"
215                                               + " exceeds maximum signed long"
216                                               + " value");
217        }
218        return negative ? -val.longValue() : val.longValue();
219    }
220
221    /**
222     * Parse a boolean byte from a buffer.
223     * Leading spaces and NUL are ignored.
224     * The buffer may contain trailing spaces or NULs.
225     *
226     * @param buffer The buffer from which to parse.
227     * @param offset The offset into the buffer from which to parse.
228     * @return The boolean value of the bytes.
229     * @throws IllegalArgumentException if an invalid byte is detected.
230     */
231    public static boolean parseBoolean(final byte[] buffer, final int offset) {
232        return buffer[offset] == 1;
233    }
234
235    // Helper method to generate the exception message
236    private static String exceptionMessage(final byte[] buffer, final int offset,
237            final int length, final int current, final byte currentByte) {
238        // default charset is good enough for an exception message,
239        //
240        // the alternative was to modify parseOctal and
241        // parseOctalOrBinary to receive the ZipEncoding of the
242        // archive (deprecating the existing public methods, of
243        // course) and dealing with the fact that ZipEncoding#decode
244        // can throw an IOException which parseOctal* doesn't declare
245        String string = new String(buffer, offset, length);
246
247        string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed
248        final String s = "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length;
249        return s;
250    }
251
252    /**
253     * Parse an entry name from a buffer.
254     * Parsing stops when a NUL is found
255     * or the buffer length is reached.
256     *
257     * @param buffer The buffer from which to parse.
258     * @param offset The offset into the buffer from which to parse.
259     * @param length The maximum number of bytes to parse.
260     * @return The entry name.
261     */
262    public static String parseName(final byte[] buffer, final int offset, final int length) {
263        try {
264            return parseName(buffer, offset, length, DEFAULT_ENCODING);
265        } catch (final IOException ex) {
266            try {
267                return parseName(buffer, offset, length, FALLBACK_ENCODING);
268            } catch (final IOException ex2) {
269                // impossible
270                throw new RuntimeException(ex2); //NOSONAR
271            }
272        }
273    }
274
275    /**
276     * Parse an entry name from a buffer.
277     * Parsing stops when a NUL is found
278     * or the buffer length is reached.
279     *
280     * @param buffer The buffer from which to parse.
281     * @param offset The offset into the buffer from which to parse.
282     * @param length The maximum number of bytes to parse.
283     * @param encoding name of the encoding to use for file names
284     * @since 1.4
285     * @return The entry name.
286     * @throws IOException on error
287     */
288    public static String parseName(final byte[] buffer, final int offset,
289                                   final int length,
290                                   final ZipEncoding encoding)
291        throws IOException {
292
293        int len = length;
294        for (; len > 0; len--) {
295            if (buffer[offset + len - 1] != 0) {
296                break;
297            }
298        }
299        if (len > 0) {
300            final byte[] b = new byte[len];
301            System.arraycopy(buffer, offset, b, 0, len);
302            return encoding.decode(b);
303        }
304        return "";
305    }
306
307    /**
308     * Copy a name into a buffer.
309     * Copies characters from the name into the buffer
310     * starting at the specified offset. 
311     * If the buffer is longer than the name, the buffer
312     * is filled with trailing NULs.
313     * If the name is longer than the buffer,
314     * the output is truncated.
315     *
316     * @param name The header name from which to copy the characters.
317     * @param buf The buffer where the name is to be stored.
318     * @param offset The starting offset into the buffer
319     * @param length The maximum number of header bytes to copy.
320     * @return The updated offset, i.e. offset + length
321     */
322    public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
323        try {
324            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
325        } catch (final IOException ex) {
326            try {
327                return formatNameBytes(name, buf, offset, length,
328                                       FALLBACK_ENCODING);
329            } catch (final IOException ex2) {
330                // impossible
331                throw new RuntimeException(ex2); //NOSONAR
332            }
333        }
334    }
335
336    /**
337     * Copy a name into a buffer.
338     * Copies characters from the name into the buffer
339     * starting at the specified offset. 
340     * If the buffer is longer than the name, the buffer
341     * is filled with trailing NULs.
342     * If the name is longer than the buffer,
343     * the output is truncated.
344     *
345     * @param name The header name from which to copy the characters.
346     * @param buf The buffer where the name is to be stored.
347     * @param offset The starting offset into the buffer
348     * @param length The maximum number of header bytes to copy.
349     * @param encoding name of the encoding to use for file names
350     * @since 1.4
351     * @return The updated offset, i.e. offset + length
352     * @throws IOException on error
353     */
354    public static int formatNameBytes(final String name, final byte[] buf, final int offset,
355                                      final int length,
356                                      final ZipEncoding encoding)
357        throws IOException {
358        int len = name.length();
359        ByteBuffer b = encoding.encode(name);
360        while (b.limit() > length && len > 0) {
361            b = encoding.encode(name.substring(0, --len));
362        }
363        final int limit = b.limit() - b.position();
364        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
365
366        // Pad any remaining output bytes with NUL
367        for (int i = limit; i < length; ++i) {
368            buf[offset + i] = 0;
369        }
370
371        return offset + length;
372    }
373
374    /**
375     * Fill buffer with unsigned octal number, padded with leading zeroes.
376     * 
377     * @param value number to convert to octal - treated as unsigned
378     * @param buffer destination buffer
379     * @param offset starting offset in buffer
380     * @param length length of buffer to fill
381     * @throws IllegalArgumentException if the value will not fit in the buffer
382     */
383    public static void formatUnsignedOctalString(final long value, final byte[] buffer,
384            final int offset, final int length) {
385        int remaining = length;
386        remaining--;
387        if (value == 0) {
388            buffer[offset + remaining--] = (byte) '0';
389        } else {
390            long val = value;
391            for (; remaining >= 0 && val != 0; --remaining) {
392                // CheckStyle:MagicNumber OFF
393                buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
394                val = val >>> 3;
395                // CheckStyle:MagicNumber ON
396            }
397            if (val != 0){
398                throw new IllegalArgumentException
399                (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length);
400            }
401        }
402
403        for (; remaining >= 0; --remaining) { // leading zeros
404            buffer[offset + remaining] = (byte) '0';
405        }
406    }
407
408    /**
409     * Write an octal integer into a buffer.
410     *
411     * Uses {@link #formatUnsignedOctalString} to format
412     * the value as an octal string with leading zeros.
413     * The converted number is followed by space and NUL
414     * 
415     * @param value The value to write
416     * @param buf The buffer to receive the output
417     * @param offset The starting offset into the buffer
418     * @param length The size of the output buffer
419     * @return The updated offset, i.e offset+length
420     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
421     */
422    public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
423
424        int idx=length-2; // For space and trailing null
425        formatUnsignedOctalString(value, buf, offset, idx);
426
427        buf[offset + idx++] = (byte) ' '; // Trailing space
428        buf[offset + idx]   = 0; // Trailing null
429
430        return offset + length;
431    }
432
433    /**
434     * Write an octal long integer into a buffer.
435     * 
436     * Uses {@link #formatUnsignedOctalString} to format
437     * the value as an octal string with leading zeros.
438     * The converted number is followed by a space.
439     * 
440     * @param value The value to write as octal
441     * @param buf The destinationbuffer.
442     * @param offset The starting offset into the buffer.
443     * @param length The length of the buffer
444     * @return The updated offset
445     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
446     */
447    public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
448
449        final int idx=length-1; // For space
450
451        formatUnsignedOctalString(value, buf, offset, idx);
452        buf[offset + idx] = (byte) ' '; // Trailing space
453
454        return offset + length;
455    }
456
457    /**
458     * Write an long integer into a buffer as an octal string if this
459     * will fit, or as a binary number otherwise.
460     * 
461     * Uses {@link #formatUnsignedOctalString} to format
462     * the value as an octal string with leading zeros.
463     * The converted number is followed by a space.
464     * 
465     * @param value The value to write into the buffer.
466     * @param buf The destination buffer.
467     * @param offset The starting offset into the buffer.
468     * @param length The length of the buffer.
469     * @return The updated offset.
470     * @throws IllegalArgumentException if the value (and trailer)
471     * will not fit in the buffer.
472     * @since 1.4
473     */
474    public static int formatLongOctalOrBinaryBytes(
475        final long value, final byte[] buf, final int offset, final int length) {
476
477        // Check whether we are dealing with UID/GID or SIZE field
478        final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
479
480        final boolean negative = value < 0;
481        if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
482            return formatLongOctalBytes(value, buf, offset, length);
483        }
484
485        if (length < 9) {
486            formatLongBinary(value, buf, offset, length, negative);
487        }
488        formatBigIntegerBinary(value, buf, offset, length, negative);
489
490        buf[offset] = (byte) (negative ? 0xff : 0x80);
491        return offset + length;
492    }
493
494    private static void formatLongBinary(final long value, final byte[] buf,
495                                         final int offset, final int length,
496                                         final boolean negative) {
497        final int bits = (length - 1) * 8;
498        final long max = 1l << bits;
499        long val = Math.abs(value);
500        if (val >= max) {
501            throw new IllegalArgumentException("Value " + value +
502                " is too large for " + length + " byte field.");
503        }
504        if (negative) {
505            val ^= max - 1;
506            val |= 0xff << bits;
507            val++;
508        }
509        for (int i = offset + length - 1; i >= offset; i--) {
510            buf[i] = (byte) val;
511            val >>= 8;
512        }
513    }
514
515    private static void formatBigIntegerBinary(final long value, final byte[] buf,
516                                               final int offset,
517                                               final int length,
518                                               final boolean negative) {
519        final BigInteger val = BigInteger.valueOf(value);
520        final byte[] b = val.toByteArray();
521        final int len = b.length;
522        final int off = offset + length - len;
523        System.arraycopy(b, 0, buf, off, len);
524        final byte fill = (byte) (negative ? 0xff : 0);
525        for (int i = offset + 1; i < off; i++) {
526            buf[i] = fill;
527        }
528    }
529
530    /**
531     * Writes an octal value into a buffer.
532     * 
533     * Uses {@link #formatUnsignedOctalString} to format
534     * the value as an octal string with leading zeros.
535     * The converted number is followed by NUL and then space.
536     *
537     * @param value The value to convert
538     * @param buf The destination buffer
539     * @param offset The starting offset into the buffer.
540     * @param length The size of the buffer.
541     * @return The updated value of offset, i.e. offset+length
542     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
543     */
544    public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
545
546        int idx=length-2; // for NUL and space
547        formatUnsignedOctalString(value, buf, offset, idx);
548
549        buf[offset + idx++]   = 0; // Trailing null
550        buf[offset + idx]     = (byte) ' '; // Trailing space
551
552        return offset + length;
553    }
554
555    /**
556     * Compute the checksum of a tar entry header.
557     *
558     * @param buf The tar entry's header buffer.
559     * @return The computed checksum.
560     */
561    public static long computeCheckSum(final byte[] buf) {
562        long sum = 0;
563
564        for (final byte element : buf) {
565            sum += BYTE_MASK & element;
566        }
567
568        return sum;
569    }
570
571    /**
572     * Wikipedia <a href="http://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>:
573     * <blockquote>
574     * The checksum is calculated by taking the sum of the unsigned byte values
575     * of the header block with the eight checksum bytes taken to be ascii
576     * spaces (decimal value 32). It is stored as a six digit octal number with
577     * leading zeroes followed by a NUL and then a space. Various
578     * implementations do not adhere to this format. For better compatibility,
579     * ignore leading and trailing whitespace, and get the first six digits. In
580     * addition, some historic tar implementations treated bytes as signed.
581     * Implementations typically calculate the checksum both ways, and treat it
582     * as good if either the signed or unsigned sum matches the included
583     * checksum.
584     * </blockquote>
585     * <p>
586     * The return value of this method should be treated as a best-effort
587     * heuristic rather than an absolute and final truth. The checksum
588     * verification logic may well evolve over time as more special cases
589     * are encountered.
590     *
591     * @param header tar header
592     * @return whether the checksum is reasonably good
593     * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
594     * @since 1.5
595     */
596    public static boolean verifyCheckSum(final byte[] header) {
597        final long storedSum = parseOctal(header, CHKSUM_OFFSET, CHKSUMLEN);
598        long unsignedSum = 0;
599        long signedSum = 0;
600
601        for (int i = 0; i < header.length; i++) {
602            byte b = header[i];
603            if (CHKSUM_OFFSET  <= i && i < CHKSUM_OFFSET + CHKSUMLEN) {
604                b = ' ';
605            }
606            unsignedSum += 0xff & b;
607            signedSum += b;
608        }
609        return storedSum == unsignedSum || storedSum == signedSum;
610    }
611
612}