001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018
019package org.apache.commons.compress.archivers.zip;
020
021import java.nio.ByteBuffer;
022import java.nio.charset.Charset;
023import java.nio.charset.UnsupportedCharsetException;
024import java.util.Collections;
025import java.util.HashMap;
026import java.util.Map;
027
028import org.apache.commons.compress.utils.Charsets;
029
030/**
031 * Static helper functions for robustly encoding filenames in zip files. 
032 */
033public abstract class ZipEncodingHelper {
034
035    /**
036     * A class, which holds the high characters of a simple encoding
037     * and lazily instantiates a Simple8BitZipEncoding instance in a
038     * thread-safe manner.
039     */
040    private static class SimpleEncodingHolder {
041
042        private final char [] highChars;
043        private Simple8BitZipEncoding encoding;
044
045        /**
046         * Instantiate a simple encoding holder.
047         * 
048         * @param highChars The characters for byte codes 128 to 255.
049         * 
050         * @see Simple8BitZipEncoding#Simple8BitZipEncoding(char[])
051         */
052        SimpleEncodingHolder(final char [] highChars) {
053            this.highChars = highChars;
054        }
055
056        /**
057         * @return The associated {@link Simple8BitZipEncoding}, which
058         *         is instantiated if not done so far.
059         */
060        public synchronized Simple8BitZipEncoding getEncoding() {
061            if (this.encoding == null) {
062                this.encoding = new Simple8BitZipEncoding(this.highChars);
063            }
064            return this.encoding;
065        }
066    }
067
068    private static final Map<String, SimpleEncodingHolder> simpleEncodings;
069
070    static {
071        final Map<String, SimpleEncodingHolder> se =
072            new HashMap<>();
073
074        final char[] cp437_high_chars =
075            new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
076                         0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
077                         0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
078                         0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
079                         0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5,
080                         0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
081                         0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310,
082                         0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
083                         0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561,
084                         0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557,
085                         0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534,
086                         0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
087                         0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
088                         0x256c, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559,
089                         0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518,
090                         0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
091                         0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3,
092                         0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4,
093                         0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1,
094                         0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248,
095                         0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2,
096                         0x25a0, 0x00a0 };
097
098        final SimpleEncodingHolder cp437 = new SimpleEncodingHolder(cp437_high_chars);
099
100        se.put("CP437", cp437);
101        se.put("Cp437", cp437);
102        se.put("cp437", cp437);
103        se.put("IBM437", cp437);
104        se.put("ibm437", cp437);
105
106        final char[] cp850_high_chars =
107            new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
108                         0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
109                         0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
110                         0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
111                         0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8,
112                         0x00d7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
113                         0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x00ae,
114                         0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
115                         0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1,
116                         0x00c2, 0x00c0, 0x00a9, 0x2563, 0x2551, 0x2557,
117                         0x255d, 0x00a2, 0x00a5, 0x2510, 0x2514, 0x2534,
118                         0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
119                         0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
120                         0x256c, 0x00a4, 0x00f0, 0x00d0, 0x00ca, 0x00cb,
121                         0x00c8, 0x0131, 0x00cd, 0x00ce, 0x00cf, 0x2518,
122                         0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
123                         0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5,
124                         0x00b5, 0x00fe, 0x00de, 0x00da, 0x00db, 0x00d9,
125                         0x00fd, 0x00dd, 0x00af, 0x00b4, 0x00ad, 0x00b1,
126                         0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
127                         0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2,
128                         0x25a0, 0x00a0 };
129
130        final SimpleEncodingHolder cp850 = new SimpleEncodingHolder(cp850_high_chars);
131
132        se.put("CP850", cp850);
133        se.put("Cp850", cp850);
134        se.put("cp850", cp850);
135        se.put("IBM850", cp850);
136        se.put("ibm850", cp850);
137        simpleEncodings = Collections.unmodifiableMap(se);
138    }
139
140    /**
141     * Grow a byte buffer, so it has a minimal capacity or at least
142     * the double capacity of the original buffer 
143     * 
144     * @param b The original buffer.
145     * @param newCapacity The minimal requested new capacity.
146     * @return A byte buffer <code>r</code> with
147     *         <code>r.capacity() = max(b.capacity()*2,newCapacity)</code> and
148     *         all the data contained in <code>b</code> copied to the beginning
149     *         of <code>r</code>.
150     *
151     */
152    static ByteBuffer growBuffer(final ByteBuffer b, final int newCapacity) {
153        b.limit(b.position());
154        b.rewind();
155
156        final int c2 = b.capacity() * 2;
157        final ByteBuffer on = ByteBuffer.allocate(c2 < newCapacity ? newCapacity : c2);
158
159        on.put(b);
160        return on;
161    }
162
163 
164    /**
165     * The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as
166     * ASCII bytes.
167     */
168    private static final byte[] HEX_DIGITS =
169        new byte [] {
170        0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41,
171        0x42, 0x43, 0x44, 0x45, 0x46
172    };
173
174    /**
175     * Append <code>%Uxxxx</code> to the given byte buffer.
176     * The caller must assure, that <code>bb.remaining()&gt;=6</code>.
177     * 
178     * @param bb The byte buffer to write to.
179     * @param c The character to write.
180     */
181    static void appendSurrogate(final ByteBuffer bb, final char c) {
182
183        bb.put((byte) '%');
184        bb.put((byte) 'U');
185
186        bb.put(HEX_DIGITS[(c >> 12)&0x0f]);
187        bb.put(HEX_DIGITS[(c >> 8)&0x0f]);
188        bb.put(HEX_DIGITS[(c >> 4)&0x0f]);
189        bb.put(HEX_DIGITS[c & 0x0f]);
190    }
191
192
193    /**
194     * name of the encoding UTF-8
195     */
196    static final String UTF8 = "UTF8";
197
198    /**
199     * name of the encoding UTF-8
200     */
201    static final ZipEncoding UTF8_ZIP_ENCODING = new FallbackZipEncoding(UTF8);
202
203    /**
204     * Instantiates a zip encoding.
205     * 
206     * @param name The name of the zip encoding. Specify {@code null} for
207     *             the platform's default encoding.
208     * @return A zip encoding for the given encoding name.
209     */
210    public static ZipEncoding getZipEncoding(final String name) {
211 
212        // fallback encoding is good enough for UTF-8.
213        if (isUTF8(name)) {
214            return UTF8_ZIP_ENCODING;
215        }
216
217        if (name == null) {
218            return new FallbackZipEncoding();
219        }
220
221        final SimpleEncodingHolder h = simpleEncodings.get(name);
222
223        if (h!=null) {
224            return h.getEncoding();
225        }
226
227        try {
228
229            final Charset cs = Charset.forName(name);
230            return new NioZipEncoding(cs);
231
232        } catch (final UnsupportedCharsetException e) {
233            return new FallbackZipEncoding(name);
234        }
235    }
236
237    /**
238     * Returns whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding.
239     * 
240     * @param charsetName
241     *            If the given name is null, then check the platform's default encoding.
242     */
243    static boolean isUTF8(String charsetName) {
244        if (charsetName == null) {
245            // check platform's default encoding
246            charsetName = Charset.defaultCharset().name();
247        }
248        if (Charsets.UTF_8.name().equalsIgnoreCase(charsetName)) {
249            return true;
250        }
251        for (final String alias : Charsets.UTF_8.aliases()) {
252            if (alias.equalsIgnoreCase(charsetName)) {
253                return true;
254            }
255        }
256        return false;
257    }
258}