001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lzw;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.nio.ByteOrder;
024
025import org.apache.commons.compress.compressors.CompressorInputStream;
026import org.apache.commons.compress.utils.BitInputStream;
027
028/**
029 * <p>Generic LZW implementation. It is used internally for
030 * the Z decompressor and the Unshrinking Zip file compression method,
031 * but may be useful for third-party projects in implementing their own LZW variations.</p>
032 *
033 * @NotThreadSafe
034 * @since 1.10
035 */
036public abstract class LZWInputStream extends CompressorInputStream {
037    protected static final int DEFAULT_CODE_SIZE = 9;
038    protected static final int UNUSED_PREFIX = -1;
039
040    private final byte[] oneByte = new byte[1];
041
042    protected final BitInputStream in;
043    private int clearCode = -1;
044    private int codeSize = DEFAULT_CODE_SIZE;
045    private byte previousCodeFirstChar;
046    private int previousCode = UNUSED_PREFIX;
047    private int tableSize;
048    private int[] prefixes;
049    private byte[] characters;
050    private byte[] outputStack;
051    private int outputStackLocation;
052
053    protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) {
054        this.in = new BitInputStream(inputStream, byteOrder);
055    }
056
057    @Override
058    public void close() throws IOException {
059        in.close();
060    }
061    
062    @Override
063    public int read() throws IOException {
064        final int ret = read(oneByte);
065        if (ret < 0) {
066            return ret;
067        }
068        return 0xff & oneByte[0];
069    }
070    
071    @Override
072    public int read(final byte[] b, final int off, final int len) throws IOException {
073        int bytesRead = readFromStack(b, off, len);
074        while (len - bytesRead > 0) {
075            final int result = decompressNextSymbol();
076            if (result < 0) {
077                if (bytesRead > 0) {
078                    count(bytesRead);
079                    return bytesRead;
080                }
081                return result;
082            }
083            bytesRead += readFromStack(b, off + bytesRead, len - bytesRead);
084        }
085        count(bytesRead);
086        return bytesRead;
087    }
088
089    /**
090     * Read the next code and expand it.
091     * @return the expanded next code
092     * @throws IOException on error
093     */
094    protected abstract int decompressNextSymbol() throws IOException;
095
096    /**
097     * Add a new entry to the dictionary.
098     * @param previousCode the previous code
099     * @param character the next character to append
100     * @return the new code
101     * @throws IOException on error
102     */
103    protected abstract int addEntry(int previousCode, byte character)
104        throws IOException;
105
106    /**
107     * Sets the clear code based on the code size.
108     * @param codeSize code size
109     */
110    protected void setClearCode(final int codeSize) {
111        clearCode = (1 << (codeSize - 1));
112    }
113
114    /**
115     * Initializes the arrays based on the maximum code size.
116     * @param maxCodeSize maximum code size
117     */
118    protected void initializeTables(final int maxCodeSize) {
119        final int maxTableSize = 1 << maxCodeSize;
120        prefixes = new int[maxTableSize];
121        characters = new byte[maxTableSize];
122        outputStack = new byte[maxTableSize];
123        outputStackLocation = maxTableSize;
124        final int max = 1 << 8;
125        for (int i = 0; i < max; i++) {
126            prefixes[i] = -1;
127            characters[i] = (byte) i;
128        }
129    }
130
131    /**
132     * Reads the next code from the stream.
133     * @return the next code
134     * @throws IOException on error
135     */
136    protected int readNextCode() throws IOException {
137        if (codeSize > 31) {
138            throw new IllegalArgumentException("code size must not be bigger than 31");
139        }
140        return (int) in.readBits(codeSize);
141    }
142    
143    /**
144     * Adds a new entry if the maximum table size hasn't been exceeded
145     * and returns the new index.
146     * @param previousCode the previous code
147     * @param character the character to append
148     * @param maxTableSize the maximum table size
149     * @return the new code
150     */
151    protected int addEntry(final int previousCode, final byte character, final int maxTableSize) {
152        if (tableSize < maxTableSize) {
153            prefixes[tableSize] = previousCode;
154            characters[tableSize] = character;
155            return tableSize++;
156        }
157        return -1;
158    }
159
160    /**
161     * Add entry for repeat of previousCode we haven't added, yet.
162     * @return new code for a repeat of the previous code
163     * @throws IOException on error
164     */
165    protected int addRepeatOfPreviousCode() throws IOException {
166        if (previousCode == -1) {
167            // can't have a repeat for the very first code
168            throw new IOException("The first code can't be a reference to its preceding code");
169        }
170        return addEntry(previousCode, previousCodeFirstChar);
171    }
172
173    /**
174     * Expands the entry with index code to the output stack and may
175     * create a new entry
176     * @param code the code
177     * @param addedUnfinishedEntry whether unfinished entries have been added
178     * @return the new location of the output stack
179     * @throws IOException on error
180     */
181    protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry)
182        throws IOException {
183        for (int entry = code; entry >= 0; entry = prefixes[entry]) {
184            outputStack[--outputStackLocation] = characters[entry];
185        }
186        if (previousCode != -1 && !addedUnfinishedEntry) {
187            addEntry(previousCode, outputStack[outputStackLocation]);
188        }
189        previousCode = code;
190        previousCodeFirstChar = outputStack[outputStackLocation];
191        return outputStackLocation;
192    }
193
194    private int readFromStack(final byte[] b, final int off, final int len) {
195        final int remainingInStack = outputStack.length - outputStackLocation;
196        if (remainingInStack > 0) {
197            final int maxLength = Math.min(remainingInStack, len);
198            System.arraycopy(outputStack, outputStackLocation, b, off, maxLength);
199            outputStackLocation += maxLength;
200            return maxLength;
201        }
202        return 0;
203    }
204
205    protected int getCodeSize() {
206        return codeSize;
207    }
208
209    protected void resetCodeSize() {
210        setCodeSize(DEFAULT_CODE_SIZE);
211    }
212
213    protected void setCodeSize(final int cs) {
214        this.codeSize = cs;
215    }
216
217    protected void incrementCodeSize() {
218        codeSize++;
219    }
220
221    protected void resetPreviousCode() {
222        this.previousCode = -1;
223    }
224
225    protected int getPrefix(final int offset) {
226        return prefixes[offset];
227    }
228
229    protected void setPrefix(final int offset, final int value) {
230        prefixes[offset] = value;
231    }
232
233    protected int getPrefixesLength() {
234        return prefixes.length;
235    }
236
237    protected int getClearCode() {
238        return clearCode;
239    }
240
241    protected int getTableSize() {
242        return tableSize;
243    }
244
245    protected void setTableSize(final int newSize) {
246        tableSize = newSize;
247    }
248
249}