001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 019package org.apache.commons.compress.utils; 020 021import java.nio.charset.Charset; 022 023/** 024 * Charsets required of every implementation of the Java platform. 025 * 026 * From the Java documentation <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 027 * charsets</a>: 028 * <p> 029 * <cite>Every implementation of the Java platform is required to support the following character encodings. Consult the 030 * release documentation for your implementation to see if any other encodings are supported. Consult the release 031 * documentation for your implementation to see if any other encodings are supported. </cite> 032 * </p> 033 * 034 * <dl> 035 * <dt><code>US-ASCII</code></dt> 036 * <dd>Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</dd> 037 * <dt><code>ISO-8859-1</code></dt> 038 * <dd>ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</dd> 039 * <dt><code>UTF-8</code></dt> 040 * <dd>Eight-bit Unicode Transformation Format.</dd> 041 * <dt><code>UTF-16BE</code></dt> 042 * <dd>Sixteen-bit Unicode Transformation Format, big-endian byte order.</dd> 043 * <dt><code>UTF-16LE</code></dt> 044 * <dd>Sixteen-bit Unicode Transformation Format, little-endian byte order.</dd> 045 * <dt><code>UTF-16</code></dt> 046 * <dd>Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order 047 * accepted on input, big-endian used on output.)</dd> 048 * </dl> 049 * 050 * <p>This class best belongs in the Commons Lang or IO project. Even if a similar class is defined in another Commons 051 * component, it is not foreseen that Commons Compress would be made to depend on another Commons component.</p> 052 * 053 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 054 * @since 1.4 055 * @version $Id$ 056 */ 057public class Charsets { 058 059 // 060 // This class should only contain Charset instances for required encodings. This guarantees that it will load correctly and 061 // without delay on all Java platforms. 062 // 063 064 /** 065 * Returns the given Charset or the default Charset if the given Charset is null. 066 * 067 * @param charset 068 * A charset or null. 069 * @return the given Charset or the default Charset if the given Charset is null 070 */ 071 public static Charset toCharset(final Charset charset) { 072 return charset == null ? Charset.defaultCharset() : charset; 073 } 074 075 /** 076 * Returns a Charset for the named charset. If the name is null, return the default Charset. 077 * 078 * @param charset 079 * The name of the requested charset, may be null. 080 * @return a Charset for the named charset 081 * @throws java.nio.charset.UnsupportedCharsetException 082 * If the named charset is unavailable 083 * @throws java.nio.charset.IllegalCharsetNameException 084 * If the given charset name is illegal 085 */ 086 public static Charset toCharset(final String charset) { 087 return charset == null ? Charset.defaultCharset() : Charset.forName(charset); 088 } 089 090 /** 091 * CharsetNamesISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. 092 * <p> 093 * Every implementation of the Java platform is required to support this character encoding. 094 * </p> 095 * 096 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 097 */ 098 public static final Charset ISO_8859_1 = Charset.forName(CharsetNames.ISO_8859_1); 099 100 /** 101 * <p> 102 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set. 103 * </p> 104 * <p> 105 * Every implementation of the Java platform is required to support this character encoding. 106 * </p> 107 * 108 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 109 */ 110 public static final Charset US_ASCII = Charset.forName(CharsetNames.US_ASCII); 111 112 /** 113 * <p> 114 * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark 115 * (either order accepted on input, big-endian used on output) 116 * </p> 117 * <p> 118 * Every implementation of the Java platform is required to support this character encoding. 119 * </p> 120 * 121 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 122 */ 123 public static final Charset UTF_16 = Charset.forName(CharsetNames.UTF_16); 124 125 /** 126 * <p> 127 * Sixteen-bit Unicode Transformation Format, big-endian byte order. 128 * </p> 129 * <p> 130 * Every implementation of the Java platform is required to support this character encoding. 131 * </p> 132 * 133 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 134 */ 135 public static final Charset UTF_16BE = Charset.forName(CharsetNames.UTF_16BE); 136 137 /** 138 * <p> 139 * Sixteen-bit Unicode Transformation Format, little-endian byte order. 140 * </p> 141 * <p> 142 * Every implementation of the Java platform is required to support this character encoding. 143 * </p> 144 * 145 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 146 */ 147 public static final Charset UTF_16LE = Charset.forName(CharsetNames.UTF_16LE); 148 149 /** 150 * <p> 151 * Eight-bit Unicode Transformation Format. 152 * </p> 153 * <p> 154 * Every implementation of the Java platform is required to support this character encoding. 155 * </p> 156 * 157 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 158 */ 159 public static final Charset UTF_8 = Charset.forName(CharsetNames.UTF_8); 160}