001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lzw; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.nio.ByteOrder; 024 025import org.apache.commons.compress.compressors.CompressorInputStream; 026import org.apache.commons.compress.utils.BitInputStream; 027 028/** 029 * <p>Generic LZW implementation. It is used internally for 030 * the Z decompressor and the Unshrinking Zip file compression method, 031 * but may be useful for third-party projects in implementing their own LZW variations.</p> 032 * 033 * @NotThreadSafe 034 * @since 1.10 035 */ 036public abstract class LZWInputStream extends CompressorInputStream { 037 protected static final int DEFAULT_CODE_SIZE = 9; 038 protected static final int UNUSED_PREFIX = -1; 039 040 private final byte[] oneByte = new byte[1]; 041 042 protected final BitInputStream in; 043 private int clearCode = -1; 044 private int codeSize = DEFAULT_CODE_SIZE; 045 private byte previousCodeFirstChar; 046 private int previousCode = UNUSED_PREFIX; 047 private int tableSize; 048 private int[] prefixes; 049 private byte[] characters; 050 private byte[] outputStack; 051 private int outputStackLocation; 052 053 protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) { 054 this.in = new BitInputStream(inputStream, byteOrder); 055 } 056 057 @Override 058 public void close() throws IOException { 059 in.close(); 060 } 061 062 @Override 063 public int read() throws IOException { 064 final int ret = read(oneByte); 065 if (ret < 0) { 066 return ret; 067 } 068 return 0xff & oneByte[0]; 069 } 070 071 @Override 072 public int read(final byte[] b, final int off, final int len) throws IOException { 073 int bytesRead = readFromStack(b, off, len); 074 while (len - bytesRead > 0) { 075 final int result = decompressNextSymbol(); 076 if (result < 0) { 077 if (bytesRead > 0) { 078 count(bytesRead); 079 return bytesRead; 080 } 081 return result; 082 } 083 bytesRead += readFromStack(b, off + bytesRead, len - bytesRead); 084 } 085 count(bytesRead); 086 return bytesRead; 087 } 088 089 /** 090 * Read the next code and expand it. 091 * @return the expanded next code 092 * @throws IOException on error 093 */ 094 protected abstract int decompressNextSymbol() throws IOException; 095 096 /** 097 * Add a new entry to the dictionary. 098 * @param previousCode the previous code 099 * @param character the next character to append 100 * @return the new code 101 * @throws IOException on error 102 */ 103 protected abstract int addEntry(int previousCode, byte character) 104 throws IOException; 105 106 /** 107 * Sets the clear code based on the code size. 108 * @param codeSize code size 109 */ 110 protected void setClearCode(final int codeSize) { 111 clearCode = (1 << (codeSize - 1)); 112 } 113 114 /** 115 * Initializes the arrays based on the maximum code size. 116 * @param maxCodeSize maximum code size 117 */ 118 protected void initializeTables(final int maxCodeSize) { 119 final int maxTableSize = 1 << maxCodeSize; 120 prefixes = new int[maxTableSize]; 121 characters = new byte[maxTableSize]; 122 outputStack = new byte[maxTableSize]; 123 outputStackLocation = maxTableSize; 124 final int max = 1 << 8; 125 for (int i = 0; i < max; i++) { 126 prefixes[i] = -1; 127 characters[i] = (byte) i; 128 } 129 } 130 131 /** 132 * Reads the next code from the stream. 133 * @return the next code 134 * @throws IOException on error 135 */ 136 protected int readNextCode() throws IOException { 137 if (codeSize > 31) { 138 throw new IllegalArgumentException("code size must not be bigger than 31"); 139 } 140 return (int) in.readBits(codeSize); 141 } 142 143 /** 144 * Adds a new entry if the maximum table size hasn't been exceeded 145 * and returns the new index. 146 * @param previousCode the previous code 147 * @param character the character to append 148 * @param maxTableSize the maximum table size 149 * @return the new code 150 */ 151 protected int addEntry(final int previousCode, final byte character, final int maxTableSize) { 152 if (tableSize < maxTableSize) { 153 prefixes[tableSize] = previousCode; 154 characters[tableSize] = character; 155 return tableSize++; 156 } 157 return -1; 158 } 159 160 /** 161 * Add entry for repeat of previousCode we haven't added, yet. 162 * @return new code for a repeat of the previous code 163 * @throws IOException on error 164 */ 165 protected int addRepeatOfPreviousCode() throws IOException { 166 if (previousCode == -1) { 167 // can't have a repeat for the very first code 168 throw new IOException("The first code can't be a reference to its preceding code"); 169 } 170 return addEntry(previousCode, previousCodeFirstChar); 171 } 172 173 /** 174 * Expands the entry with index code to the output stack and may 175 * create a new entry 176 * @param code the code 177 * @param addedUnfinishedEntry whether unfinished entries have been added 178 * @return the new location of the output stack 179 * @throws IOException on error 180 */ 181 protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry) 182 throws IOException { 183 for (int entry = code; entry >= 0; entry = prefixes[entry]) { 184 outputStack[--outputStackLocation] = characters[entry]; 185 } 186 if (previousCode != -1 && !addedUnfinishedEntry) { 187 addEntry(previousCode, outputStack[outputStackLocation]); 188 } 189 previousCode = code; 190 previousCodeFirstChar = outputStack[outputStackLocation]; 191 return outputStackLocation; 192 } 193 194 private int readFromStack(final byte[] b, final int off, final int len) { 195 final int remainingInStack = outputStack.length - outputStackLocation; 196 if (remainingInStack > 0) { 197 final int maxLength = Math.min(remainingInStack, len); 198 System.arraycopy(outputStack, outputStackLocation, b, off, maxLength); 199 outputStackLocation += maxLength; 200 return maxLength; 201 } 202 return 0; 203 } 204 205 protected int getCodeSize() { 206 return codeSize; 207 } 208 209 protected void resetCodeSize() { 210 setCodeSize(DEFAULT_CODE_SIZE); 211 } 212 213 protected void setCodeSize(final int cs) { 214 this.codeSize = cs; 215 } 216 217 protected void incrementCodeSize() { 218 codeSize++; 219 } 220 221 protected void resetPreviousCode() { 222 this.previousCode = -1; 223 } 224 225 protected int getPrefix(final int offset) { 226 return prefixes[offset]; 227 } 228 229 protected void setPrefix(final int offset, final int value) { 230 prefixes[offset] = value; 231 } 232 233 protected int getPrefixesLength() { 234 return prefixes.length; 235 } 236 237 protected int getClearCode() { 238 return clearCode; 239 } 240 241 protected int getTableSize() { 242 return tableSize; 243 } 244 245 protected void setTableSize(final int newSize) { 246 tableSize = newSize; 247 } 248 249}