001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 019package org.apache.commons.compress.archivers.zip; 020 021import java.nio.ByteBuffer; 022import java.nio.charset.Charset; 023import java.nio.charset.UnsupportedCharsetException; 024import java.util.Collections; 025import java.util.HashMap; 026import java.util.Map; 027 028import org.apache.commons.compress.utils.Charsets; 029 030/** 031 * Static helper functions for robustly encoding filenames in zip files. 032 */ 033public abstract class ZipEncodingHelper { 034 035 /** 036 * A class, which holds the high characters of a simple encoding 037 * and lazily instantiates a Simple8BitZipEncoding instance in a 038 * thread-safe manner. 039 */ 040 private static class SimpleEncodingHolder { 041 042 private final char [] highChars; 043 private Simple8BitZipEncoding encoding; 044 045 /** 046 * Instantiate a simple encoding holder. 047 * 048 * @param highChars The characters for byte codes 128 to 255. 049 * 050 * @see Simple8BitZipEncoding#Simple8BitZipEncoding(char[]) 051 */ 052 SimpleEncodingHolder(final char [] highChars) { 053 this.highChars = highChars; 054 } 055 056 /** 057 * @return The associated {@link Simple8BitZipEncoding}, which 058 * is instantiated if not done so far. 059 */ 060 public synchronized Simple8BitZipEncoding getEncoding() { 061 if (this.encoding == null) { 062 this.encoding = new Simple8BitZipEncoding(this.highChars); 063 } 064 return this.encoding; 065 } 066 } 067 068 private static final Map<String, SimpleEncodingHolder> simpleEncodings; 069 070 static { 071 final Map<String, SimpleEncodingHolder> se = 072 new HashMap<>(); 073 074 final char[] cp437_high_chars = 075 new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 076 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef, 077 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6, 078 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, 079 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 080 0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa, 081 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310, 082 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, 083 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 084 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 085 0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534, 086 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 087 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 088 0x256c, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559, 089 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 090 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, 091 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 092 0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4, 093 0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1, 094 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, 095 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 096 0x25a0, 0x00a0 }; 097 098 final SimpleEncodingHolder cp437 = new SimpleEncodingHolder(cp437_high_chars); 099 100 se.put("CP437", cp437); 101 se.put("Cp437", cp437); 102 se.put("cp437", cp437); 103 se.put("IBM437", cp437); 104 se.put("ibm437", cp437); 105 106 final char[] cp850_high_chars = 107 new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 108 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef, 109 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6, 110 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, 111 0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, 112 0x00d7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa, 113 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x00ae, 114 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, 115 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 116 0x00c2, 0x00c0, 0x00a9, 0x2563, 0x2551, 0x2557, 117 0x255d, 0x00a2, 0x00a5, 0x2510, 0x2514, 0x2534, 118 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3, 119 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 120 0x256c, 0x00a4, 0x00f0, 0x00d0, 0x00ca, 0x00cb, 121 0x00c8, 0x0131, 0x00cd, 0x00ce, 0x00cf, 0x2518, 122 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580, 123 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 124 0x00b5, 0x00fe, 0x00de, 0x00da, 0x00db, 0x00d9, 125 0x00fd, 0x00dd, 0x00af, 0x00b4, 0x00ad, 0x00b1, 126 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8, 127 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 128 0x25a0, 0x00a0 }; 129 130 final SimpleEncodingHolder cp850 = new SimpleEncodingHolder(cp850_high_chars); 131 132 se.put("CP850", cp850); 133 se.put("Cp850", cp850); 134 se.put("cp850", cp850); 135 se.put("IBM850", cp850); 136 se.put("ibm850", cp850); 137 simpleEncodings = Collections.unmodifiableMap(se); 138 } 139 140 /** 141 * Grow a byte buffer, so it has a minimal capacity or at least 142 * the double capacity of the original buffer 143 * 144 * @param b The original buffer. 145 * @param newCapacity The minimal requested new capacity. 146 * @return A byte buffer <code>r</code> with 147 * <code>r.capacity() = max(b.capacity()*2,newCapacity)</code> and 148 * all the data contained in <code>b</code> copied to the beginning 149 * of <code>r</code>. 150 * 151 */ 152 static ByteBuffer growBuffer(final ByteBuffer b, final int newCapacity) { 153 b.limit(b.position()); 154 b.rewind(); 155 156 final int c2 = b.capacity() * 2; 157 final ByteBuffer on = ByteBuffer.allocate(c2 < newCapacity ? newCapacity : c2); 158 159 on.put(b); 160 return on; 161 } 162 163 164 /** 165 * The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as 166 * ASCII bytes. 167 */ 168 private static final byte[] HEX_DIGITS = 169 new byte [] { 170 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, 171 0x42, 0x43, 0x44, 0x45, 0x46 172 }; 173 174 /** 175 * Append <code>%Uxxxx</code> to the given byte buffer. 176 * The caller must assure, that <code>bb.remaining()>=6</code>. 177 * 178 * @param bb The byte buffer to write to. 179 * @param c The character to write. 180 */ 181 static void appendSurrogate(final ByteBuffer bb, final char c) { 182 183 bb.put((byte) '%'); 184 bb.put((byte) 'U'); 185 186 bb.put(HEX_DIGITS[(c >> 12)&0x0f]); 187 bb.put(HEX_DIGITS[(c >> 8)&0x0f]); 188 bb.put(HEX_DIGITS[(c >> 4)&0x0f]); 189 bb.put(HEX_DIGITS[c & 0x0f]); 190 } 191 192 193 /** 194 * name of the encoding UTF-8 195 */ 196 static final String UTF8 = "UTF8"; 197 198 /** 199 * name of the encoding UTF-8 200 */ 201 static final ZipEncoding UTF8_ZIP_ENCODING = new FallbackZipEncoding(UTF8); 202 203 /** 204 * Instantiates a zip encoding. 205 * 206 * @param name The name of the zip encoding. Specify {@code null} for 207 * the platform's default encoding. 208 * @return A zip encoding for the given encoding name. 209 */ 210 public static ZipEncoding getZipEncoding(final String name) { 211 212 // fallback encoding is good enough for UTF-8. 213 if (isUTF8(name)) { 214 return UTF8_ZIP_ENCODING; 215 } 216 217 if (name == null) { 218 return new FallbackZipEncoding(); 219 } 220 221 final SimpleEncodingHolder h = simpleEncodings.get(name); 222 223 if (h!=null) { 224 return h.getEncoding(); 225 } 226 227 try { 228 229 final Charset cs = Charset.forName(name); 230 return new NioZipEncoding(cs); 231 232 } catch (final UnsupportedCharsetException e) { 233 return new FallbackZipEncoding(name); 234 } 235 } 236 237 /** 238 * Returns whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding. 239 * 240 * @param charsetName 241 * If the given name is null, then check the platform's default encoding. 242 */ 243 static boolean isUTF8(String charsetName) { 244 if (charsetName == null) { 245 // check platform's default encoding 246 charsetName = Charset.defaultCharset().name(); 247 } 248 if (Charsets.UTF_8.name().equalsIgnoreCase(charsetName)) { 249 return true; 250 } 251 for (final String alias : Charsets.UTF_8.aliases()) { 252 if (alias.equalsIgnoreCase(charsetName)) { 253 return true; 254 } 255 } 256 return false; 257 } 258}