Skip to content

Package: MimeUtility

MimeUtility

nameinstructionbranchcomplexitylinemethod
bEncodedLength(byte[])
M: 9 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 1 C: 0
0%
M: 1 C: 0
0%
checkAscii(InputStream, int, boolean)
M: 142 C: 0
0%
M: 48 C: 0
0%
M: 25 C: 0
0%
M: 44 C: 0
0%
M: 1 C: 0
0%
checkAscii(String)
M: 33 C: 0
0%
M: 8 C: 0
0%
M: 5 C: 0
0%
M: 11 C: 0
0%
M: 1 C: 0
0%
checkAscii(byte[])
M: 33 C: 0
0%
M: 8 C: 0
0%
M: 5 C: 0
0%
M: 10 C: 0
0%
M: 1 C: 0
0%
decode(InputStream, String)
M: 72 C: 0
0%
M: 18 C: 0
0%
M: 10 C: 0
0%
M: 15 C: 0
0%
M: 1 C: 0
0%
decodeInnerWords(String)
M: 88 C: 0
0%
M: 12 C: 0
0%
M: 7 C: 0
0%
M: 25 C: 0
0%
M: 1 C: 0
0%
decodeText(String)
M: 134 C: 0
0%
M: 30 C: 0
0%
M: 16 C: 0
0%
M: 38 C: 0
0%
M: 1 C: 0
0%
decodeWord(String)
M: 225 C: 0
0%
M: 22 C: 0
0%
M: 12 C: 0
0%
M: 45 C: 0
0%
M: 1 C: 0
0%
doEncode(String, boolean, String, int, String, boolean, boolean, StringBuilder)
M: 125 C: 0
0%
M: 18 C: 0
0%
M: 10 C: 0
0%
M: 29 C: 0
0%
M: 1 C: 0
0%
encode(OutputStream, String)
M: 73 C: 0
0%
M: 18 C: 0
0%
M: 10 C: 0
0%
M: 15 C: 0
0%
M: 1 C: 0
0%
encode(OutputStream, String, String)
M: 73 C: 0
0%
M: 18 C: 0
0%
M: 10 C: 0
0%
M: 15 C: 0
0%
M: 1 C: 0
0%
encodeText(String)
M: 5 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 1 C: 0
0%
M: 1 C: 0
0%
encodeText(String, String, String)
M: 6 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 1 C: 0
0%
M: 1 C: 0
0%
encodeWord(String)
M: 5 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 1 C: 0
0%
M: 1 C: 0
0%
encodeWord(String, String, String)
M: 6 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 1 C: 0
0%
M: 1 C: 0
0%
encodeWord(String, String, String, boolean)
M: 86 C: 0
0%
M: 12 C: 0
0%
M: 7 C: 0
0%
M: 20 C: 0
0%
M: 1 C: 0
0%
fold(int, String)
M: 154 C: 0
0%
M: 34 C: 0
0%
M: 18 C: 0
0%
M: 36 C: 0
0%
M: 1 C: 0
0%
getBoolean(Object, boolean)
M: 32 C: 0
0%
M: 10 C: 0
0%
M: 6 C: 0
0%
M: 9 C: 0
0%
M: 1 C: 0
0%
getBooleanProperty(Properties, String, boolean)
M: 6 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 1 C: 0
0%
M: 1 C: 0
0%
getBooleanSystemProperty(String, boolean)
M: 31 C: 0
0%
M: 6 C: 0
0%
M: 4 C: 0
0%
M: 10 C: 0
0%
M: 1 C: 0
0%
getBytes(InputStream)
M: 45 C: 0
0%
M: 4 C: 0
0%
M: 3 C: 0
0%
M: 11 C: 0
0%
M: 1 C: 0
0%
getBytes(String)
M: 25 C: 0
0%
M: 2 C: 0
0%
M: 2 C: 0
0%
M: 6 C: 0
0%
M: 1 C: 0
0%
getDefaultJavaCharset()
M: 31 C: 0
0%
M: 8 C: 0
0%
M: 5 C: 0
0%
M: 13 C: 0
0%
M: 1 C: 0
0%
getDefaultMIMECharset()
M: 14 C: 0
0%
M: 4 C: 0
0%
M: 3 C: 0
0%
M: 6 C: 0
0%
M: 1 C: 0
0%
getEncoding(DataHandler)
M: 76 C: 0
0%
M: 9 C: 0
0%
M: 6 C: 0
0%
M: 27 C: 0
0%
M: 1 C: 0
0%
getEncoding(DataSource)
M: 75 C: 0
0%
M: 15 C: 0
0%
M: 9 C: 0
0%
M: 25 C: 0
0%
M: 1 C: 0
0%
getProp(Properties, String)
M: 12 C: 0
0%
M: 2 C: 0
0%
M: 2 C: 0
0%
M: 4 C: 0
0%
M: 1 C: 0
0%
indexOfAny(String, String)
M: 5 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 1 C: 0
0%
M: 1 C: 0
0%
indexOfAny(String, String, int)
M: 23 C: 0
0%
M: 4 C: 0
0%
M: 3 C: 0
0%
M: 7 C: 0
0%
M: 1 C: 0
0%
javaCharset(String)
M: 28 C: 0
0%
M: 8 C: 0
0%
M: 5 C: 0
0%
M: 9 C: 0
0%
M: 1 C: 0
0%
loadMappings(LineInputStream, Map)
M: 50 C: 0
0%
M: 10 C: 0
0%
M: 6 C: 0
0%
M: 17 C: 0
0%
M: 1 C: 0
0%
makesafe(CharSequence)
M: 95 C: 0
0%
M: 20 C: 0
0%
M: 11 C: 0
0%
M: 22 C: 0
0%
M: 1 C: 0
0%
mimeCharset(String)
M: 19 C: 0
0%
M: 6 C: 0
0%
M: 4 C: 0
0%
M: 4 C: 0
0%
M: 1 C: 0
0%
nonAsciiCharset(ContentType)
M: 72 C: 0
0%
M: 10 C: 0
0%
M: 6 C: 0
0%
M: 19 C: 0
0%
M: 1 C: 0
0%
nonascii(int)
M: 19 C: 0
0%
M: 10 C: 0
0%
M: 6 C: 0
0%
M: 1 C: 0
0%
M: 1 C: 0
0%
qEncodedLength(byte[], boolean)
M: 37 C: 0
0%
M: 10 C: 0
0%
M: 6 C: 0
0%
M: 8 C: 0
0%
M: 1 C: 0
0%
quote(String, String)
M: 139 C: 0
0%
M: 38 C: 0
0%
M: 20 C: 0
0%
M: 29 C: 0
0%
M: 1 C: 0
0%
static {...}
M: 313 C: 0
0%
M: 6 C: 0
0%
M: 4 C: 0
0%
M: 71 C: 0
0%
M: 1 C: 0
0%
unfold(String)
M: 136 C: 0
0%
M: 28 C: 0
0%
M: 15 C: 0
0%
M: 29 C: 0
0%
M: 1 C: 0
0%

Coverage

1: /*
2: * Copyright (c) 1997, 2021 Oracle and/or its affiliates. All rights reserved.
3: *
4: * This program and the accompanying materials are made available under the
5: * terms of the Eclipse Public License v. 2.0, which is available at
6: * http://www.eclipse.org/legal/epl-2.0.
7: *
8: * This Source Code may also be made available under the following Secondary
9: * Licenses when the conditions for such availability set forth in the
10: * Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
11: * version 2 with the GNU Classpath Exception, which is available at
12: * https://www.gnu.org/software/classpath/license.html.
13: *
14: * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
15: */
16:
17: package jakarta.mail.internet;
18:
19: import java.io.BufferedReader;
20: import java.io.ByteArrayInputStream;
21: import java.io.ByteArrayOutputStream;
22: import java.io.EOFException;
23: import java.io.IOException;
24: import java.io.InputStream;
25: import java.io.OutputStream;
26: import java.io.StringReader;
27: import java.io.UnsupportedEncodingException;
28: import java.nio.charset.Charset;
29: import java.util.HashMap;
30: import java.util.Locale;
31: import java.util.Map;
32: import java.util.NoSuchElementException;
33: import java.util.Properties;
34: import java.util.StringTokenizer;
35:
36: import jakarta.activation.DataHandler;
37: import jakarta.activation.DataSource;
38: import jakarta.mail.EncodingAware;
39: import jakarta.mail.MessagingException;
40: import jakarta.mail.util.LineInputStream;
41: import jakarta.mail.util.StreamProvider;
42: import jakarta.mail.util.StreamProvider.EncoderTypes;
43:
44: /**
45: * This is a utility class that provides various MIME related
46: * functionality. <p>
47: *
48: * There are a set of methods to encode and decode MIME headers as
49: * per RFC 2047. Note that, in general, these methods are
50: * <strong>not</strong> needed when using methods such as
51: * <code>setSubject</code> and <code>setRecipients</code> Jakarta Mail
52: * will automatically encode and decode data when using these "higher
53: * level" methods. The methods below are only needed when maniuplating
54: * raw MIME headers using <code>setHeader</code> and <code>getHeader</code>
55: * methods. A brief description on handling such headers is given below: <p>
56: *
57: * RFC 822 mail headers <strong>must</strong> contain only US-ASCII
58: * characters. Headers that contain non US-ASCII characters must be
59: * encoded so that they contain only US-ASCII characters. Basically,
60: * this process involves using either BASE64 or QP to encode certain
61: * characters. RFC 2047 describes this in detail. <p>
62: *
63: * In Java, Strings contain (16 bit) Unicode characters. ASCII is a
64: * subset of Unicode (and occupies the range 0 - 127). A String
65: * that contains only ASCII characters is already mail-safe. If the
66: * String contains non US-ASCII characters, it must be encoded. An
67: * additional complexity in this step is that since Unicode is not
68: * yet a widely used charset, one might want to first charset-encode
69: * the String into another charset and then do the transfer-encoding.
70: * <p>
71: * Note that to get the actual bytes of a mail-safe String (say,
72: * for sending over SMTP), one must do
73: * <blockquote><pre>
74: *
75: *        byte[] bytes = string.getBytes("iso-8859-1");        
76: *
77: * </pre></blockquote><p>
78: *
79: * The <code>setHeader</code> and <code>addHeader</code> methods
80: * on MimeMessage and MimeBodyPart assume that the given header values
81: * are Unicode strings that contain only US-ASCII characters. Hence
82: * the callers of those methods must insure that the values they pass
83: * do not contain non US-ASCII characters. The methods in this class
84: * help do this. <p>
85: *
86: * The <code>getHeader</code> family of methods on MimeMessage and
87: * MimeBodyPart return the raw header value. These might be encoded
88: * as per RFC 2047, and if so, must be decoded into Unicode Strings.
89: * The methods in this class help to do this. <p>
90: *
91: * Several System properties control strict conformance to the MIME
92: * spec. Note that these are not session properties but must be set
93: * globally as System properties. <p>
94: *
95: * The <code>mail.mime.decodetext.strict</code> property controls
96: * decoding of MIME encoded words. The MIME spec requires that encoded
97: * words start at the beginning of a whitespace separated word. Some
98: * mailers incorrectly include encoded words in the middle of a word.
99: * If the <code>mail.mime.decodetext.strict</code> System property is
100: * set to <code>"false"</code>, an attempt will be made to decode these
101: * illegal encoded words. The default is true. <p>
102: *
103: * The <code>mail.mime.encodeeol.strict</code> property controls the
104: * choice of Content-Transfer-Encoding for MIME parts that are not of
105: * type "text". Often such parts will contain textual data for which
106: * an encoding that allows normal end of line conventions is appropriate.
107: * In rare cases, such a part will appear to contain entirely textual
108: * data, but will require an encoding that preserves CR and LF characters
109: * without change. If the <code>mail.mime.encodeeol.strict</code>
110: * System property is set to <code>"true"</code>, such an encoding will
111: * be used when necessary. The default is false. <p>
112: *
113: * In addition, the <code>mail.mime.charset</code> System property can
114: * be used to specify the default MIME charset to use for encoded words
115: * and text parts that don't otherwise specify a charset. Normally, the
116: * default MIME charset is derived from the default Java charset, as
117: * specified in the <code>file.encoding</code> System property. Most
118: * applications will have no need to explicitly set the default MIME
119: * charset. In cases where the default MIME charset to be used for
120: * mail messages is different than the charset used for files stored on
121: * the system, this property should be set. <p>
122: *
123: * The current implementation also supports the following System property.
124: * <p>
125: * The <code>mail.mime.ignoreunknownencoding</code> property controls
126: * whether unknown values in the <code>Content-Transfer-Encoding</code>
127: * header, as passed to the <code>decode</code> method, cause an exception.
128: * If set to <code>"true"</code>, unknown values are ignored and 8bit
129: * encoding is assumed. Otherwise, unknown values cause a MessagingException
130: * to be thrown.
131: *
132: * @author John Mani
133: * @author Bill Shannon
134: */
135:
136: public class MimeUtility {
137:
138: // This class cannot be instantiated
139: private MimeUtility() { }
140:
141: public static final int ALL = -1;
142:
143: // cached map of whether a charset is compatible with ASCII
144: // Map<String,Boolean>
145: private static final Map<String, Boolean> nonAsciiCharsetMap
146:          = new HashMap<>();
147:
148: private static final String WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~";
149: private static final String TEXT_SPECIALS = "=_?";
150: private static final boolean decodeStrict = getBooleanSystemProperty("mail.mime.decodetext.strict", true);
151: private static final boolean encodeEolStrict = getBooleanSystemProperty("mail.mime.encodeeol.strict", false);
152: private static final boolean ignoreUnknownEncoding = getBooleanSystemProperty(
153:          "mail.mime.ignoreunknownencoding", false);
154: private static final boolean allowUtf8 = getBooleanSystemProperty("mail.mime.allowutf8", false);
155: /*
156: * The following two properties allow disabling the fold()
157: * and unfold() methods and reverting to the previous behavior.
158: * They should never need to be changed and are here only because
159: * of my paranoid concern with compatibility.
160: */
161: private static final boolean foldEncodedWords = getBooleanSystemProperty("mail.mime.foldencodedwords", false);
162: private static final boolean foldText = getBooleanSystemProperty("mail.mime.foldtext", true);
163:
164:
165: /**
166: * Get the Content-Transfer-Encoding that should be applied
167: * to the input stream of this DataSource, to make it mail-safe. <p>
168: *
169: * The algorithm used here is: <br>
170: * <ul>
171: * <li>
172: * If the DataSource implements {@link EncodingAware}, ask it
173: * what encoding to use. If it returns non-null, return that value.
174: * <li>
175: * If the primary type of this datasource is "text" and if all
176: * the bytes in its input stream are US-ASCII, then the encoding
177: * is StreamProvider.BIT7_ENCODER. If more than half of the bytes are non-US-ASCII, then
178: * the encoding is StreamProvider.BASE_64_ENCODER. If less than half of the bytes are
179: * non-US-ASCII, then the encoding is StreamProvider.QUOTED_PRINTABLE_ENCODER.
180: * <li>
181: * If the primary type of this datasource is not "text", then if
182: * all the bytes of its input stream are US-ASCII, the encoding
183: * is StreamProvider.BIT7_ENCODER. If there is even one non-US-ASCII character, the
184: * encoding is StreamProvider.BASE_64_ENCODER.
185: * </ul>
186: *
187: * @param        ds        the DataSource
188: * @return                the encoding. This is either StreamProvider.BIT7_ENCODER,
189: *                        StreamProvider.QUOTED_PRINTABLE_ENCODER or StreamProvider.BASE_64_ENCODER
190: */
191: public static String getEncoding(DataSource ds) {
192:         ContentType cType = null;
193:         InputStream is = null;
194:         String encoding = null;
195:
196:•        if (ds instanceof EncodingAware) {
197:          encoding = ((EncodingAware)ds).getEncoding();
198:•         if (encoding != null)
199:                 return encoding;
200:         }
201:         try {
202:          cType = new ContentType(ds.getContentType());
203:          is = ds.getInputStream();
204:
205:          boolean isText = cType.match("text/*");
206:          // if not text, stop processing when we see non-ASCII
207:•         int i = checkAscii(is, ALL, !isText);
208:•         switch (i) {
209:          case ALL_ASCII:
210:                 encoding = EncoderTypes.BIT7_ENCODER.getEncoder(); // all ASCII
211:                 break;
212:          case MOSTLY_ASCII:
213:•                if (isText && nonAsciiCharset(cType))
214:                  encoding = EncoderTypes.BASE_64.getEncoder(); // charset isn't compatible with ASCII
215:                 else
216:                  encoding = EncoderTypes.QUOTED_PRINTABLE_ENCODER.getEncoder();        // mostly ASCII
217:                 break;
218:          default:
219:                 encoding = EncoderTypes.BASE_64.getEncoder(); // mostly binary
220:                 break;
221:          }
222:
223:         } catch (Exception ex) {
224:          return EncoderTypes.BASE_64.getEncoder(); // what else ?!
225:         } finally {
226:          // Close the input stream
227:          try {
228:•                if (is != null)
229:                  is.close();
230:          } catch (IOException ioex) { }
231:         }
232:
233:         return encoding;
234: }
235:
236: /**
237: * Determine whether the charset in the Content-Type is compatible
238: * with ASCII or not. A charset is compatible with ASCII if the
239: * encoded byte stream representing the Unicode string "\r\n" is
240: * the ASCII characters CR and LF. For example, the utf-16be
241: * charset is not compatible with ASCII.
242: *
243: * For performance, we keep a static map that caches the results.
244: */
245: private static boolean nonAsciiCharset(ContentType ct) {
246:         String charset = ct.getParameter("charset");
247:•        if (charset == null)
248:          return false;
249:         charset = charset.toLowerCase(Locale.ENGLISH);
250:         Boolean bool;
251:         synchronized (nonAsciiCharsetMap) {
252:          bool = nonAsciiCharsetMap.get(charset);
253:         }
254:•        if (bool == null) {
255:          try {
256:                 byte[] b = "\r\n".getBytes(charset);
257:•                bool = Boolean.valueOf(
258:                  b.length != 2 || b[0] != 015 || b[1] != 012);
259:          } catch (UnsupportedEncodingException uex) {
260:                 bool = Boolean.FALSE;        // a guess
261:          } catch (RuntimeException ex) {
262:                 bool = Boolean.TRUE;        // one of the weird ones?
263:          }
264:          synchronized (nonAsciiCharsetMap) {
265:                 nonAsciiCharsetMap.put(charset, bool);
266:          }
267:         }
268:         return bool.booleanValue();
269: }
270:
271: /**
272: * Same as <code>getEncoding(DataSource)</code> except that instead
273: * of reading the data from an <code>InputStream</code> it uses the
274: * <code>writeTo</code> method to examine the data. This is more
275: * efficient in the common case of a <code>DataHandler</code>
276: * created with an object and a MIME type (for example, a
277: * "text/plain" String) because all the I/O is done in this
278: * thread. In the case requiring an <code>InputStream</code> the
279: * <code>DataHandler</code> uses a thread, a pair of pipe streams,
280: * and the <code>writeTo</code> method to produce the data. <p>
281: *
282: * @param        dh        the DataHandler
283: * @return        the Content-Transfer-Encoding
284: * @since        JavaMail 1.2
285: */
286: public static String getEncoding(DataHandler dh) {
287:         ContentType cType = null;
288:         String encoding = null;
289:
290:         /*
291:          * Try to pick the most efficient means of determining the
292:          * encoding. If this DataHandler was created using a DataSource,
293:          * the getEncoding(DataSource) method is typically faster. If
294:          * the DataHandler was created with an object, this method is
295:          * much faster. To distinguish the two cases, we use a heuristic.
296:          * A DataHandler created with an object will always have a null name.
297:          * A DataHandler created with a DataSource will usually have a
298:          * non-null name.
299:          *
300:          * XXX - This is actually quite a disgusting hack, but it makes
301:          *         a common case run over twice as fast.
302:          */
303:•        if (dh.getName() != null)
304:          return getEncoding(dh.getDataSource());
305:
306:         try {
307:          cType = new ContentType(dh.getContentType());
308:         } catch (Exception ex) {
309:          return EncoderTypes.BASE_64.getEncoder(); // what else ?!
310:         }
311:
312:•        if (cType.match("text/*")) {
313:          // Check all of the available bytes
314:          AsciiOutputStream aos = new AsciiOutputStream(false, false);
315:          try {
316:                 dh.writeTo(aos);
317:          } catch (IOException ex) {
318:                  // ignore it, can't happen
319:          }
320:•         switch (aos.getAscii()) {
321:          case ALL_ASCII:
322:                 encoding = EncoderTypes.BIT7_ENCODER.getEncoder(); // all ascii
323:                 break;
324:          case MOSTLY_ASCII:
325:                 encoding = EncoderTypes.QUOTED_PRINTABLE_ENCODER.getEncoder(); // mostly ascii
326:                 break;
327:          default:
328:                 encoding = EncoderTypes.BASE_64.getEncoder(); // mostly binary
329:                 break;
330:          }
331:         } else { // not "text"
332:          // Check all of available bytes, break out if we find
333:          // at least one non-US-ASCII character
334:          AsciiOutputStream aos =
335:                         new AsciiOutputStream(true, encodeEolStrict);
336:          try {
337:                 dh.writeTo(aos);
338:          } catch (IOException ex) { }        // ignore it
339:•         if (aos.getAscii() == ALL_ASCII) // all ascii
340:                 encoding = EncoderTypes.BIT7_ENCODER.getEncoder();
341:          else // found atleast one non-ascii character, use b64
342:                 encoding = EncoderTypes.BASE_64.getEncoder();
343:         }
344:
345:         return encoding;
346: }
347:
348: /**
349: * Decode the given input stream. The Input stream returned is
350: * the decoded input stream. All the encodings defined in RFC 2045
351: * are supported here. They include StreamProvider.BASE_64_ENCODER, StreamProvider.QUOTED_PRINTABLE_ENCODER,
352: * StreamProvider.BIT7_ENCODER, StreamProvider.BIT8_ENCODER, and StreamProvider.BINARY_ENCODER. In addition, StreamProvider.UU_ENCODER is also
353: * supported. <p>
354: *
355: * In the current implementation, if the
356: * <code>mail.mime.ignoreunknownencoding</code> system property is set to
357: * <code>"true"</code>, unknown encoding values are ignored and the
358: * original InputStream is returned.
359: *
360: * @param        is                input stream
361: * @param        encoding        the encoding of the stream.
362: * @return                        decoded input stream.
363: * @exception MessagingException        if the encoding is unknown
364: */
365: public static InputStream decode(InputStream is, String encoding)
366:                 throws MessagingException {
367:•        if (encoding.equalsIgnoreCase(EncoderTypes.BASE_64.getEncoder()))
368:                 return StreamProvider.provider().inputBase64(is);
369:•        else if (encoding.equalsIgnoreCase(EncoderTypes.QUOTED_PRINTABLE_ENCODER.getEncoder()))
370:                 return StreamProvider.provider().inputQP(is);
371:•        else if (encoding.equalsIgnoreCase(EncoderTypes.UU_ENCODER.getEncoder()) ||
372:•                 encoding.equalsIgnoreCase(EncoderTypes.X_UU_ENCODER.getEncoder()) ||
373:•                 encoding.equalsIgnoreCase(EncoderTypes.X_UUE.getEncoder()))
374:                 return StreamProvider.provider().inputUU(is);
375:•        else if (encoding.equalsIgnoreCase(EncoderTypes.BINARY_ENCODER.getEncoder()) ||
376:•                 encoding.equalsIgnoreCase(EncoderTypes.BIT7_ENCODER.getEncoder()) ||
377:•                 encoding.equalsIgnoreCase(EncoderTypes.BIT8_ENCODER.getEncoder()))
378:                 return StreamProvider.provider().inputBinary(is);
379:         else {
380:•         if (!ignoreUnknownEncoding)
381:                 throw new MessagingException("Unknown encoding: " + encoding);
382:          return is;
383:         }
384: }
385:
386: /**
387: * Wrap an encoder around the given output stream.
388: * All the encodings defined in RFC 2045 are supported here.
389: * They include StreamProvider.BASE_64_ENCODER, StreamProvider.QUOTED_PRINTABLE_ENCODER, StreamProvider.BIT7_ENCODER, StreamProvider.BIT8_ENCODER and
390: * StreamProvider.BINARY_ENCODER. In addition, StreamProvider.UU_ENCODER is also supported.
391: *
392: * @param        os                output stream
393: * @param        encoding        the encoding of the stream.
394: * @return                        output stream that applies the
395: *                                specified encoding.
396: * @exception MessagingException        if the encoding is unknown
397: */
398: public static OutputStream encode(OutputStream os, String encoding)
399:                 throws MessagingException {
400:• if (encoding == null)
401:          return os;
402:•        else if (encoding.equalsIgnoreCase(EncoderTypes.BASE_64.getEncoder()))
403:                 return StreamProvider.provider().outputBase64(os);
404:•        else if (encoding.equalsIgnoreCase(EncoderTypes.QUOTED_PRINTABLE_ENCODER.getEncoder()))
405: return StreamProvider.provider().outputQP(os);
406:•        else if (encoding.equalsIgnoreCase(EncoderTypes.UU_ENCODER.getEncoder()) ||
407:•                 encoding.equalsIgnoreCase(EncoderTypes.X_UU_ENCODER.getEncoder()) ||
408:•                 encoding.equalsIgnoreCase(EncoderTypes.X_UUE.getEncoder()))
409:                 return StreamProvider.provider().outputUU(os, null);
410:•        else if (encoding.equalsIgnoreCase(EncoderTypes.BINARY_ENCODER.getEncoder()) ||
411:•                 encoding.equalsIgnoreCase(EncoderTypes.BIT7_ENCODER.getEncoder()) ||
412:•                 encoding.equalsIgnoreCase(EncoderTypes.BIT8_ENCODER.getEncoder()))
413:          return StreamProvider.provider().outputBinary(os);
414:         else
415:          throw new MessagingException("Unknown encoding: " +encoding);
416: }
417:
418: /**
419: * Wrap an encoder around the given output stream.
420: * All the encodings defined in RFC 2045 are supported here.
421: * They include StreamProvider.BASE_64_ENCODER, StreamProvider.QUOTED_PRINTABLE_ENCODER, StreamProvider.BIT7_ENCODER, StreamProvider.BIT8_ENCODER and
422: * StreamProvider.BINARY_ENCODER. In addition, StreamProvider.UU_ENCODER is also supported.
423: * The <code>filename</code> parameter is used with the StreamProvider.UU_ENCODER
424: * encoding and is included in the encoded output.
425: *
426: * @param os output stream
427: * @param encoding the encoding of the stream.
428: * @param filename name for the file being encoded (only used
429: * with uuencode)
430: * @return output stream that applies the
431: * specified encoding.
432: * @exception                MessagingException for unknown encodings
433: * @since JavaMail 1.2
434: */
435: public static OutputStream encode(OutputStream os, String encoding,
436: String filename)
437: throws MessagingException {
438:• if (encoding == null)
439: return os;
440:• else if (encoding.equalsIgnoreCase(EncoderTypes.BASE_64.getEncoder()))
441:         return StreamProvider.provider().outputBase64(os);
442:• else if (encoding.equalsIgnoreCase(EncoderTypes.QUOTED_PRINTABLE_ENCODER.getEncoder()))
443:         return StreamProvider.provider().outputQP(os);
444:• else if (encoding.equalsIgnoreCase(EncoderTypes.UU_ENCODER.getEncoder()) ||
445:• encoding.equalsIgnoreCase(EncoderTypes.X_UU_ENCODER.getEncoder()) ||
446:• encoding.equalsIgnoreCase(EncoderTypes.X_UUE.getEncoder()))
447:         return StreamProvider.provider().outputUU(os, filename);
448:• else if (encoding.equalsIgnoreCase(EncoderTypes.BINARY_ENCODER.getEncoder()) ||
449:• encoding.equalsIgnoreCase(EncoderTypes.BIT7_ENCODER.getEncoder()) ||
450:• encoding.equalsIgnoreCase(EncoderTypes.BIT8_ENCODER.getEncoder()))
451:         return StreamProvider.provider().outputBinary(os);
452: else
453: throw new MessagingException("Unknown encoding: " +encoding);
454: }
455:
456: /**
457: * Encode a RFC 822 "text" token into mail-safe form as per
458: * RFC 2047. <p>
459: *
460: * The given Unicode string is examined for non US-ASCII
461: * characters. If the string contains only US-ASCII characters,
462: * it is returned as-is. If the string contains non US-ASCII
463: * characters, it is first character-encoded using the platform's
464: * default charset, then transfer-encoded using either the B or
465: * Q encoding. The resulting bytes are then returned as a Unicode
466: * string containing only ASCII characters. <p>
467: *
468: * Note that this method should be used to encode only
469: * "unstructured" RFC 822 headers. <p>
470: *
471: * Example of usage:
472: * <blockquote><pre>
473: *
474: * MimePart part = ...
475: * String rawvalue = "FooBar Mailer, Japanese version 1.1"
476: * try {
477: * // If we know for sure that rawvalue contains only US-ASCII
478: * // characters, we can skip the encoding part
479: * part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue));
480: * } catch (UnsupportedEncodingException e) {
481: * // encoding failure
482: * } catch (MessagingException me) {
483: * // setHeader() failure
484: * }
485: *
486: * </pre></blockquote><p>
487: *
488: * @param        text        Unicode string
489: * @return        Unicode string containing only US-ASCII characters
490: * @exception UnsupportedEncodingException if the encoding fails
491: */
492: public static String encodeText(String text)
493:                         throws UnsupportedEncodingException {
494:         return encodeText(text, null, null);
495: }
496:
497: /**
498: * Encode a RFC 822 "text" token into mail-safe form as per
499: * RFC 2047. <p>
500: *
501: * The given Unicode string is examined for non US-ASCII
502: * characters. If the string contains only US-ASCII characters,
503: * it is returned as-is. If the string contains non US-ASCII
504: * characters, it is first character-encoded using the specified
505: * charset, then transfer-encoded using either the B or Q encoding.
506: * The resulting bytes are then returned as a Unicode string
507: * containing only ASCII characters. <p>
508: *
509: * Note that this method should be used to encode only
510: * "unstructured" RFC 822 headers.
511: *
512: * @param        text        the header value
513: * @param        charset        the charset. If this parameter is null, the
514: *                platform's default chatset is used.
515: * @param        encoding the encoding to be used. Currently supported
516: *                values are "B" and "Q". If this parameter is null, then
517: *                the "Q" encoding is used if most of characters to be
518: *                encoded are in the ASCII charset, otherwise "B" encoding
519: *                is used.
520: * @return        Unicode string containing only US-ASCII characters
521: * @exception UnsupportedEncodingException if the charset
522: *                        conversion failed.
523: */
524: public static String encodeText(String text, String charset,
525:                                  String encoding)
526:                         throws UnsupportedEncodingException {
527:         return encodeWord(text, charset, encoding, false);
528: }
529:
530: /**
531: * Decode "unstructured" headers, that is, headers that are defined
532: * as '*text' as per RFC 822. <p>
533: *
534: * The string is decoded using the algorithm specified in
535: * RFC 2047, Section 6.1. If the charset-conversion fails
536: * for any sequence, an UnsupportedEncodingException is thrown.
537: * If the String is not an RFC 2047 style encoded header, it is
538: * returned as-is <p>
539: *
540: * Example of usage:
541: * <blockquote><pre>
542: *
543: * MimePart part = ...
544: * String rawvalue = null;
545: * String value = null;
546: * try {
547: * if ((rawvalue = part.getHeader("X-mailer")[0]) != null)
548: * value = MimeUtility.decodeText(rawvalue);
549: * } catch (UnsupportedEncodingException e) {
550: * // Don't care
551: * value = rawvalue;
552: * } catch (MessagingException me) { }
553: *
554: * return value;
555: *
556: * </pre></blockquote><p>
557: *
558: * @param        etext        the possibly encoded value
559: * @return        the decoded text
560: * @exception UnsupportedEncodingException if the charset
561: *                        conversion failed.
562: */
563: public static String decodeText(String etext)
564:                 throws UnsupportedEncodingException {
565:         /*
566:          * We look for sequences separated by "linear-white-space".
567:          * (as per RFC 2047, Section 6.1)
568:          * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL.
569:          */
570:         String lwsp = " \t\n\r";
571:         StringTokenizer st;
572:
573:         /*
574:          * First, lets do a quick run thru the string and check
575:          * whether the sequence "=?" exists at all. If none exists,
576:          * we know there are no encoded-words in here and we can just
577:          * return the string as-is, without suffering thru the later
578:          * decoding logic.
579:          * This handles the most common case of unencoded headers
580:          * efficiently.
581:          */
582:•        if (etext.indexOf("=?") == -1)
583:          return etext;
584:
585:         // Encoded words found. Start decoding ...
586:
587:         st = new StringTokenizer(etext, lwsp, true);
588:         StringBuilder sb = new StringBuilder(); // decode buffer
589:         StringBuilder wsb = new StringBuilder(); // white space buffer
590:         boolean prevWasEncoded = false;
591:
592:•        while (st.hasMoreTokens()) {
593:          char c;
594:          String s = st.nextToken();
595:          // If whitespace, append it to the whitespace buffer
596:•         if (((c = s.charAt(0)) == ' ') || (c == '\t') ||
597:                 (c == '\r') || (c == '\n'))
598:                 wsb.append(c);
599:          else {
600:                 // Check if token is an 'encoded-word' ..
601:                 String word;
602:                 try {
603:                  word = decodeWord(s);
604:                  // Yes, this IS an 'encoded-word'.
605:•                 if (!prevWasEncoded && wsb.length() > 0) {
606:                         // if the previous word was also encoded, we
607:                         // should ignore the collected whitespace. Else
608:                         // we include the whitespace as well.
609:                         sb.append(wsb);
610:                  }
611:                  prevWasEncoded = true;
612:                 } catch (ParseException pex) {
613:                  // This is NOT an 'encoded-word'.
614:                  word = s;
615:                  // possibly decode inner encoded words
616:•                 if (!decodeStrict) {
617:                         String dword = decodeInnerWords(word);
618:•                        if (dword != word) {
619:                          // if a different String object was returned,
620:                          // decoding was done.
621:•                         if (prevWasEncoded && word.startsWith("=?")) {
622:                                 // encoded followed by encoded,
623:                                 // throw away whitespace between
624:                          } else {
625:                                 // include collected whitespace ..
626:•                                if (wsb.length() > 0)
627:                                  sb.append(wsb);
628:                          }
629:                          // did original end with encoded?
630:                          prevWasEncoded = word.endsWith("?=");
631:                          word = dword;
632:                         } else {
633:                          // include collected whitespace ..
634:•                         if (wsb.length() > 0)
635:                                 sb.append(wsb);
636:                          prevWasEncoded = false;
637:                         }
638:                  } else {
639:                         // include collected whitespace ..
640:•                        if (wsb.length() > 0)
641:                          sb.append(wsb);
642:                         prevWasEncoded = false;
643:                  }
644:                 }
645:                 sb.append(word); // append the actual word
646:                 wsb.setLength(0); // reset wsb for reuse
647:          }
648:         }
649:         sb.append(wsb);                // append trailing whitespace
650:         return sb.toString();
651: }
652:
653: /**
654: * Encode a RFC 822 "word" token into mail-safe form as per
655: * RFC 2047. <p>
656: *
657: * The given Unicode string is examined for non US-ASCII
658: * characters. If the string contains only US-ASCII characters,
659: * it is returned as-is. If the string contains non US-ASCII
660: * characters, it is first character-encoded using the platform's
661: * default charset, then transfer-encoded using either the B or
662: * Q encoding. The resulting bytes are then returned as a Unicode
663: * string containing only ASCII characters. <p>
664: *
665: * This method is meant to be used when creating RFC 822 "phrases".
666: * The InternetAddress class, for example, uses this to encode
667: * it's 'phrase' component.
668: *
669: * @param        word        Unicode string
670: * @return        Array of Unicode strings containing only US-ASCII
671: *                characters.
672: * @exception UnsupportedEncodingException if the encoding fails
673: */
674: public static String encodeWord(String word)
675:                         throws UnsupportedEncodingException {
676:         return encodeWord(word, null, null);
677: }
678:
679: /**
680: * Encode a RFC 822 "word" token into mail-safe form as per
681: * RFC 2047. <p>
682: *
683: * The given Unicode string is examined for non US-ASCII
684: * characters. If the string contains only US-ASCII characters,
685: * it is returned as-is. If the string contains non US-ASCII
686: * characters, it is first character-encoded using the specified
687: * charset, then transfer-encoded using either the B or Q encoding.
688: * The resulting bytes are then returned as a Unicode string
689: * containing only ASCII characters. <p>
690: *
691: * @param        word        Unicode string
692: * @param        charset        the MIME charset
693: * @param        encoding the encoding to be used. Currently supported
694: *                values are "B" and "Q". If this parameter is null, then
695: *                the "Q" encoding is used if most of characters to be
696: *                encoded are in the ASCII charset, otherwise "B" encoding
697: *                is used.
698: * @return        Unicode string containing only US-ASCII characters
699: * @exception UnsupportedEncodingException if the encoding fails
700: */
701: public static String encodeWord(String word, String charset,
702:                                  String encoding)
703:                         throws UnsupportedEncodingException {
704:         return encodeWord(word, charset, encoding, true);
705: }
706:
707: /*
708: * Encode the given string. The parameter 'encodingWord' should
709: * be true if a RFC 822 "word" token is being encoded and false if a
710: * RFC 822 "text" token is being encoded. This is because the
711: * "Q" encoding defined in RFC 2047 has more restrictions when
712: * encoding "word" tokens. (Sigh)
713: */
714: private static String encodeWord(String string, String charset,
715:                                  String encoding, boolean encodingWord)
716:                         throws UnsupportedEncodingException {
717:
718:         // If 'string' contains only US-ASCII characters, just
719:         // return it.
720:         int ascii = checkAscii(string);
721:•        if (ascii == ALL_ASCII)
722:          return string;
723:
724:         // Else, apply the specified charset conversion.
725:         String jcharset;
726:•        if (charset == null) { // use default charset
727:          jcharset = getDefaultJavaCharset(); // the java charset
728:          charset = getDefaultMIMECharset(); // the MIME equivalent
729:         } else // MIME charset -> java charset
730:          jcharset = javaCharset(charset);
731:
732:         // If no transfer-encoding is specified, figure one out.
733:•        if (encoding == null) {
734:•         if (ascii != MOSTLY_NONASCII)
735:                 encoding = "Q";
736:          else
737:                 encoding = "B";
738:         }
739:
740:         boolean b64;
741:•        if (encoding.equalsIgnoreCase("B"))
742:          b64 = true;
743:•        else if (encoding.equalsIgnoreCase("Q"))
744:          b64 = false;
745:         else
746:          throw new UnsupportedEncodingException(
747:                         "Unknown transfer encoding: " + encoding);
748:
749:         StringBuilder outb = new StringBuilder(); // the output buffer
750:         doEncode(string, b64, jcharset,
751:                  // As per RFC 2047, size of an encoded string should not
752:                  // exceed 75 bytes.
753:                  // 7 = size of "=?", '?', 'B'/'Q', '?', "?="
754:                  75 - 7 - charset.length(), // the available space
755:                  "=?" + charset + "?" + encoding + "?", // prefix
756:                  true, encodingWord, outb);
757:
758:         return outb.toString();
759: }
760:
761: /**
762: * Returns the length of the encoded version of this byte array.
763: *
764: * @param b the byte array
765: * @return the length
766: */
767: private static int bEncodedLength(byte[] b) {
768: return ((b.length + 2)/3) * 4;
769: }
770:
771: /**
772: * Returns the length of the encoded version of this byte array.
773: *
774: * @param b the byte array
775: * @param encodingWord true if encoding words, false if encoding text
776: * @return the length
777: */
778: private static int qEncodedLength(byte[] b, boolean encodingWord) {
779: int len = 0;
780:• String specials = encodingWord ? WORD_SPECIALS: TEXT_SPECIALS;
781:• for (int i = 0; i < b.length; i++) {
782: int c = b[i] & 0xff; // Mask off MSB
783:• if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0)
784: // needs encoding
785: len += 3; // Q-encoding is 1 -> 3 conversion
786: else
787: len++;
788: }
789: return len;
790: }
791:
792: private static void doEncode(String string, boolean b64,
793:                 String jcharset, int avail, String prefix,
794:                 boolean first, boolean encodingWord, StringBuilder buf)
795:                         throws UnsupportedEncodingException {
796:
797:         // First find out what the length of the encoded version of
798:         // 'string' would be.
799:         byte[] bytes = string.getBytes(jcharset);
800:         int len;
801:•        if (b64) // "B" encoding
802:          len = bEncodedLength(bytes);
803:         else // "Q"
804:          len = qEncodedLength(bytes, encodingWord);
805:         
806:         int size;
807:•        if ((len > avail) && ((size = string.length()) > 1)) {
808:          // If the length is greater than 'avail', split 'string'
809:          // into two and recurse.
810:          // Have to make sure not to split a Unicode surrogate pair.
811:          int split = size / 2;
812:•         if (Character.isHighSurrogate(string.charAt(split-1)))
813:                 split--;
814:•         if (split > 0)
815:                 doEncode(string.substring(0, split), b64, jcharset,
816:                          avail, prefix, first, encodingWord, buf);
817:          doEncode(string.substring(split, size), b64, jcharset,
818:                  avail, prefix, false, encodingWord, buf);
819:         } else {
820:          // length <= than 'avail'. Encode the given string
821:          ByteArrayOutputStream os = new ByteArrayOutputStream();
822:          OutputStream eos; // the encoder
823:•         if (b64) { // "B" encoding
824:          eos = StreamProvider.provider().outputB(os);
825:          } else { // "Q" encoding
826:          eos = StreamProvider.provider().outputQ(os, encodingWord);
827:          }
828:         
829:          try { // do the encoding
830:                 eos.write(bytes);
831:                 eos.close();
832:          } catch (IOException ioex) { }
833:
834:          byte[] encodedBytes = os.toByteArray(); // the encoded stuff
835:          // Now write out the encoded (all ASCII) bytes into our
836:          // StringBuilder
837:•         if (!first) // not the first line of this sequence
838:•                if (foldEncodedWords)
839:                  buf.append("\r\n "); // start a continuation line
840:                 else
841:                  buf.append(" "); // line will be folded later
842:
843:          buf.append(prefix);
844:•         for (int i = 0; i < encodedBytes.length; i++)
845:                 buf.append((char)encodedBytes[i]);
846:          buf.append("?="); // terminate the current sequence
847:         }
848: }
849:
850: /**
851: * The string is parsed using the rules in RFC 2047 and RFC 2231 for
852: * parsing an "encoded-word". If the parse fails, a ParseException is
853: * thrown. Otherwise, it is transfer-decoded, and then
854: * charset-converted into Unicode. If the charset-conversion
855: * fails, an UnsupportedEncodingException is thrown.<p>
856: *
857: * @param        eword        the encoded value
858: * @return        the decoded word
859: * @exception ParseException if the string is not an
860: *                        encoded-word as per RFC 2047 and RFC 2231.
861: * @exception UnsupportedEncodingException if the charset
862: *                        conversion failed.
863: */
864: public static String decodeWord(String eword)
865:                 throws ParseException, UnsupportedEncodingException {
866:
867:•        if (!eword.startsWith("=?")) // not an encoded word
868:          throw new ParseException(
869:                 "encoded word does not start with \"=?\": " + eword);
870:         
871:         // get charset
872:         int start = 2; int pos;
873:•        if ((pos = eword.indexOf('?', start)) == -1)
874:          throw new ParseException(
875:                 "encoded word does not include charset: " + eword);
876:         String charset = eword.substring(start, pos);
877:         int lpos = charset.indexOf('*');        // RFC 2231 language specified?
878:•        if (lpos >= 0)                                // yes, throw it away
879:          charset = charset.substring(0, lpos);
880:         charset = javaCharset(charset);
881:
882:         // get encoding
883:         start = pos+1;
884:•        if ((pos = eword.indexOf('?', start)) == -1)
885:          throw new ParseException(
886:                 "encoded word does not include encoding: " + eword);
887:         String encoding = eword.substring(start, pos);
888:
889:         // get encoded-sequence
890:         start = pos+1;
891:•        if ((pos = eword.indexOf("?=", start)) == -1)
892:          throw new ParseException(
893:                 "encoded word does not end with \"?=\": " + eword);
894:         /*
895:          * XXX - should include this, but leaving it out for compatibility...
896:          *
897:         if (decodeStrict && pos != eword.length() - 2)
898:          throw new ParseException(
899:                 "encoded word does not end with \"?=\": " + eword););
900:          */
901:         String word = eword.substring(start, pos);
902:
903:         try {
904:          String decodedWord;
905:•         if (word.length() > 0) {
906:                 // Extract the bytes from word
907:                 ByteArrayInputStream bis =
908:                  new ByteArrayInputStream(getBytes(word));
909:
910:                 // Get the appropriate decoder
911:                 InputStream is;
912:•                if (encoding.equalsIgnoreCase("B"))
913:                  is = StreamProvider.provider().inputBase64(bis);
914:•                else if (encoding.equalsIgnoreCase("Q"))
915:                  is = StreamProvider.provider().inputQ(bis);
916:                 else
917:                  throw new UnsupportedEncodingException(
918:                                  "unknown encoding: " + encoding);
919:
920:                 // For b64 & q, size of decoded word <= size of word. So
921:                 // the decoded bytes must fit into the 'bytes' array. This
922:                 // is certainly more efficient than writing bytes into a
923:                 // ByteArrayOutputStream and then pulling out the byte[]
924:                 // from it.
925:                 int count = bis.available();
926:                 byte[] bytes = new byte[count];
927:                 // count is set to the actual number of decoded bytes
928:                 count = is.read(bytes, 0, count);
929:
930:                 // Finally, convert the decoded bytes into a String using
931:                 // the specified charset
932:•                decodedWord = count <= 0 ? "" :
933:                                 new String(bytes, 0, count, charset);
934:          } else {
935:                 // no characters to decode, return empty string
936:                 decodedWord = "";
937:          }
938:•         if (pos + 2 < eword.length()) {
939:                 // there's still more text in the string
940:                 String rest = eword.substring(pos + 2);
941:•                if (!decodeStrict)
942:                  rest = decodeInnerWords(rest);
943:                 decodedWord += rest;
944:          }
945:          return decodedWord;
946:         } catch (UnsupportedEncodingException uex) {
947:          // explicitly catch and rethrow this exception, otherwise
948:          // the below IOException catch will swallow this up!
949:          throw uex;
950:         } catch (IOException ioex) {
951:          // Shouldn't happen.
952:          throw new ParseException(ioex.toString());
953:         } catch (IllegalArgumentException iex) {
954:          /* An unknown charset of the form ISO-XXX-XXX, will cause
955:          * the JDK to throw an IllegalArgumentException ... Since the
956:          * JDK will attempt to create a classname using this string,
957:          * but valid classnames must not contain the character '-',
958:          * and this results in an IllegalArgumentException, rather than
959:          * the expected UnsupportedEncodingException. Yikes
960:          */
961:          throw new UnsupportedEncodingException(charset);
962:         }
963: }
964:
965: /**
966: * Look for encoded words within a word. The MIME spec doesn't
967: * allow this, but many broken mailers, especially Japanese mailers,
968: * produce such incorrect encodings.
969: */
970: private static String decodeInnerWords(String word)
971:                                 throws UnsupportedEncodingException {
972:         int start = 0, i;
973:         StringBuilder buf = new StringBuilder();
974:•        while ((i = word.indexOf("=?", start)) >= 0) {
975:          buf.append(word.substring(start, i));
976:          // find first '?' after opening '=?' - end of charset
977:          int end = word.indexOf('?', i + 2);
978:•         if (end < 0)
979:                 break;
980:          // find next '?' after that - end of encoding
981:          end = word.indexOf('?', end + 1);
982:•         if (end < 0)
983:                 break;
984:          // find terminating '?='
985:          end = word.indexOf("?=", end + 1);
986:•         if (end < 0)
987:                 break;
988:          String s = word.substring(i, end + 2);
989:          try {
990:                 s = decodeWord(s);
991:          } catch (ParseException pex) {
992:                 // ignore it, just use the original string
993:          }
994:          buf.append(s);
995:          start = end + 2;
996:         }
997:•        if (start == 0)
998:          return word;
999:•        if (start < word.length())
1000:          buf.append(word.substring(start));
1001:         return buf.toString();
1002: }
1003:
1004: /**
1005: * A utility method to quote a word, if the word contains any
1006: * characters from the specified 'specials' list.<p>
1007: *
1008: * The <code>HeaderTokenizer</code> class defines two special
1009: * sets of delimiters - MIME and RFC 822. <p>
1010: *
1011: * This method is typically used during the generation of
1012: * RFC 822 and MIME header fields.
1013: *
1014: * @param        word        word to be quoted
1015: * @param        specials the set of special characters
1016: * @return                the possibly quoted word
1017: * @see        jakarta.mail.internet.HeaderTokenizer#MIME
1018: * @see        jakarta.mail.internet.HeaderTokenizer#RFC822
1019: */
1020: public static String quote(String word, String specials) {
1021:•        int len = word == null ? 0 : word.length();
1022:•        if (len == 0)
1023:          return "\"\"";        // an empty string is handled specially
1024:
1025:         /*
1026:          * Look for any "bad" characters, Escape and
1027:          * quote the entire string if necessary.
1028:          */
1029:         boolean needQuoting = false;
1030:•        for (int i = 0; i < len; i++) {
1031:          char c = word.charAt(i);
1032:•         if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
1033:                 // need to escape them and then quote the whole string
1034:                 StringBuilder sb = new StringBuilder(len + 3);
1035:                 sb.append('"');
1036:                 sb.append(word.substring(0, i));
1037:                 int lastc = 0;
1038:•                for (int j = i; j < len; j++) {
1039:                  char cc = word.charAt(j);
1040:•                 if ((cc == '"') || (cc == '\\') ||
1041:                         (cc == '\r') || (cc == '\n'))
1042:•                        if (cc == '\n' && lastc == '\r')
1043:                          ;        // do nothing, CR was already escaped
1044:                         else
1045:                          sb.append('\\');        // Escape the character
1046:                  sb.append(cc);
1047:                  lastc = cc;
1048:                 }
1049:                 sb.append('"');
1050:                 return sb.toString();
1051:•         } else if (c < 040 || (c >= 0177 && !allowUtf8) ||
1052:•                 specials.indexOf(c) >= 0)
1053:                 // These characters cause the string to be quoted
1054:                 needQuoting = true;
1055:         }
1056:
1057:•        if (needQuoting) {
1058:          StringBuilder sb = new StringBuilder(len + 2);
1059:          sb.append('"').append(word).append('"');
1060:          return sb.toString();
1061:         } else
1062:          return word;
1063: }
1064:
1065: /**
1066: * Fold a string at linear whitespace so that each line is no longer
1067: * than 76 characters, if possible. If there are more than 76
1068: * non-whitespace characters consecutively, the string is folded at
1069: * the first whitespace after that sequence. The parameter
1070: * <code>used</code> indicates how many characters have been used in
1071: * the current line; it is usually the length of the header name. <p>
1072: *
1073: * Note that line breaks in the string aren't escaped; they probably
1074: * should be.
1075: *
1076: * @param        used        characters used in line so far
1077: * @param        s        the string to fold
1078: * @return                the folded string
1079: * @since                JavaMail 1.4
1080: */
1081: public static String fold(int used, String s) {
1082:•        if (!foldText)
1083:          return s;
1084:
1085:         int end;
1086:         char c;
1087:         // Strip trailing spaces and newlines
1088:•        for (end = s.length() - 1; end >= 0; end--) {
1089:          c = s.charAt(end);
1090:•         if (c != ' ' && c != '\t' && c != '\r' && c != '\n')
1091:                 break;
1092:         }
1093:•        if (end != s.length() - 1)
1094:          s = s.substring(0, end + 1);
1095:
1096:         // if the string fits now, just return it
1097:•        if (used + s.length() <= 76)
1098:          return makesafe(s);
1099:
1100:         // have to actually fold the string
1101:         StringBuilder sb = new StringBuilder(s.length() + 4);
1102:         char lastc = 0;
1103:•        while (used + s.length() > 76) {
1104:          int lastspace = -1;
1105:•         for (int i = 0; i < s.length(); i++) {
1106:•                if (lastspace != -1 && used + i > 76)
1107:                  break;
1108:                 c = s.charAt(i);
1109:•                if (c == ' ' || c == '\t')
1110:•                 if (!(lastc == ' ' || lastc == '\t'))
1111:                         lastspace = i;
1112:                 lastc = c;
1113:          }
1114:•         if (lastspace == -1) {
1115:                 // no space, use the whole thing
1116:                 sb.append(s);
1117:                 s = "";
1118:                 used = 0;
1119:                 break;
1120:          }
1121:          sb.append(s.substring(0, lastspace));
1122:          sb.append("\r\n");
1123:          lastc = s.charAt(lastspace);
1124:          sb.append(lastc);
1125:          s = s.substring(lastspace + 1);
1126:          used = 1;
1127:         }
1128:         sb.append(s);
1129:         return makesafe(sb);
1130: }
1131:
1132: /**
1133: * If the String or StringBuilder has any embedded newlines,
1134: * make sure they're followed by whitespace, to prevent header
1135: * injection errors.
1136: */
1137: private static String makesafe(CharSequence s) {
1138:         int i;
1139:•        for (i = 0; i < s.length(); i++) {
1140:          char c = s.charAt(i);
1141:•         if (c == '\r' || c == '\n')
1142:                 break;
1143:         }
1144:•        if (i == s.length())        // went through whole string with no CR or LF
1145:          return s.toString();
1146:
1147:         // read the lines in the string and reassemble them,
1148:         // eliminating blank lines and inserting whitespace as necessary
1149:         StringBuilder sb = new StringBuilder(s.length() + 1);
1150:         BufferedReader r = new BufferedReader(new StringReader(s.toString()));
1151:         String line;
1152:         try {
1153:•         while ((line = r.readLine()) != null) {
1154:•                if (line.trim().length() == 0)
1155:                  continue;        // ignore empty lines
1156:•                if (sb.length() > 0) {
1157:                  sb.append("\r\n");
1158:•                 assert line.length() > 0; // proven above
1159:                  char c = line.charAt(0);
1160:•                 if (c != ' ' && c != '\t')
1161:                         sb.append(' ');
1162:                 }
1163:                 sb.append(line);
1164:          }
1165:         } catch (IOException ex) {
1166:          // XXX - should never happen when reading from a string
1167:          return s.toString();
1168:         }
1169:         return sb.toString();
1170: }
1171:
1172: /**
1173: * Unfold a folded header. Any line breaks that aren't escaped and
1174: * are followed by whitespace are removed.
1175: *
1176: * @param        s        the string to unfold
1177: * @return                the unfolded string
1178: * @since                JavaMail 1.4
1179: */
1180: public static String unfold(String s) {
1181:•        if (!foldText)
1182:          return s;
1183:
1184:         StringBuilder sb = null;
1185:         int i;
1186:•        while ((i = indexOfAny(s, "\r\n")) >= 0) {
1187:          int start = i;
1188:          int slen = s.length();
1189:          i++;                // skip CR or NL
1190:•         if (i < slen && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n')
1191:                 i++;        // skip LF
1192:•         if (start > 0 && s.charAt(start - 1) == '\\') {
1193:                 // there's a backslash before the line break
1194:                 // strip it out, but leave in the line break
1195:•                if (sb == null)
1196:                  sb = new StringBuilder(s.length());
1197:                 sb.append(s.substring(0, start - 1));
1198:                 sb.append(s.substring(start, i));
1199:                 s = s.substring(i);
1200:          } else {
1201:                 char c;
1202:                 // if next line starts with whitespace,
1203:                 // or at the end of the string, remove the line break
1204:                 // XXX - next line should always start with whitespace
1205:•                if (i >= slen || (c = s.charAt(i)) == ' ' || c == '\t') {
1206:•                 if (sb == null)
1207:                         sb = new StringBuilder(s.length());
1208:                  sb.append(s.substring(0, start));
1209:                  s = s.substring(i);
1210:                 } else {
1211:                  // it's not a continuation line, just leave in the newline
1212:•                 if (sb == null)
1213:                         sb = new StringBuilder(s.length());
1214:                  sb.append(s.substring(0, i));
1215:                  s = s.substring(i);
1216:                 }
1217:          }
1218:         }
1219:•        if (sb != null) {
1220:          sb.append(s);
1221:          return sb.toString();
1222:         } else
1223:          return s;
1224: }
1225:
1226: /**
1227: * Return the first index of any of the characters in "any" in "s",
1228: * or -1 if none are found.
1229: *
1230: * This should be a method on String.
1231: */
1232: private static int indexOfAny(String s, String any) {
1233:         return indexOfAny(s, any, 0);
1234: }
1235:
1236: private static int indexOfAny(String s, String any, int start) {
1237:         try {
1238:          int len = s.length();
1239:•         for (int i = start; i < len; i++) {
1240:•                if (any.indexOf(s.charAt(i)) >= 0)
1241:                  return i;
1242:          }
1243:          return -1;
1244:         } catch (StringIndexOutOfBoundsException e) {
1245:          return -1;
1246:         }
1247: }
1248:
1249: /**
1250: * Convert a MIME charset name into a valid Java charset name. <p>
1251: *
1252: * @param charset        the MIME charset name
1253: * @return the Java charset equivalent. If a suitable mapping is
1254: *                not available, the passed in charset is itself returned.
1255: */
1256: public static String javaCharset(String charset) {
1257:•        if (mime2java == null || charset == null)
1258:          // no mapping table, or charset parameter is null
1259:          return charset;
1260:
1261:         String alias = mime2java.get(charset.toLowerCase(Locale.ENGLISH));
1262:•        if (alias != null) {
1263:          // verify that the mapped name is valid before trying to use it
1264:          try {
1265:                 Charset.forName(alias);
1266:          } catch (Exception ex) {
1267:                 alias = null;        // charset alias not valid, use original name
1268:          }
1269:         }
1270:•        return alias == null ? charset : alias;
1271: }
1272:
1273: /**
1274: * Convert a java charset into its MIME charset name. <p>
1275: *
1276: * Note that a future version of JDK (post 1.2) might provide
1277: * this functionality, in which case, we may deprecate this
1278: * method then.
1279: *
1280: * @param charset the JDK charset
1281: * @return         the MIME/IANA equivalent. If a mapping
1282: *                        is not possible, the passed in charset itself
1283: *                        is returned.
1284: * @since                JavaMail 1.1
1285: */
1286: public static String mimeCharset(String charset) {
1287:•        if (java2mime == null || charset == null)
1288:          // no mapping table or charset param is null
1289:          return charset;
1290:
1291:         String alias = java2mime.get(charset.toLowerCase(Locale.ENGLISH));
1292:•        return alias == null ? charset : alias;
1293: }
1294:
1295: private static String defaultJavaCharset;
1296: private static String defaultMIMECharset;
1297:
1298: /**
1299: * Get the default charset corresponding to the system's current
1300: * default locale. If the System property <code>mail.mime.charset</code>
1301: * is set, a system charset corresponding to this MIME charset will be
1302: * returned. <p>
1303: *
1304: * @return        the default charset of the system's default locale,
1305: *                 as a Java charset. (NOT a MIME charset)
1306: * @since        JavaMail 1.1
1307: */
1308: public static String getDefaultJavaCharset() {
1309:•        if (defaultJavaCharset == null) {
1310:          /*
1311:          * If mail.mime.charset is set, it controls the default
1312:          * Java charset as well.
1313:          */
1314:          String mimecs = null;
1315:          try {
1316:                 mimecs = System.getProperty("mail.mime.charset");
1317:          } catch (SecurityException ex) { }        // ignore it
1318:•         if (mimecs != null && mimecs.length() > 0) {
1319:                 defaultJavaCharset = javaCharset(mimecs);
1320:                 return defaultJavaCharset;
1321:          }
1322:
1323:          try {
1324:                 defaultJavaCharset = System.getProperty("file.encoding",
1325:                                                         "8859_1");
1326:                 } catch (final SecurityException sex) {
1327:                         // fall back to ISO-Latin-1
1328:                         // don't use actual system encoding, because this might be
1329:                         // something completely different, like EBCDIC (IBM-037)
1330:•                        if (defaultJavaCharset == null) {
1331:                                 defaultJavaCharset = "8859_1";
1332:                         }
1333:                 }
1334:         }
1335:
1336:         return defaultJavaCharset;
1337: }
1338:
1339: /*
1340: * Get the default MIME charset for this locale.
1341: */
1342: static String getDefaultMIMECharset() {
1343:•        if (defaultMIMECharset == null) {
1344:          try {
1345:                 defaultMIMECharset = System.getProperty("mail.mime.charset");
1346:          } catch (SecurityException ex) { }        // ignore it
1347:         }
1348:•        if (defaultMIMECharset == null)
1349:          defaultMIMECharset = mimeCharset(getDefaultJavaCharset());
1350:         return defaultMIMECharset;
1351: }
1352:
1353: // Tables to map MIME charset names to Java names and vice versa.
1354: // XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset
1355: private static Map<String, String> mime2java;
1356: private static Map<String, String> java2mime;
1357:
1358: static {
1359:         java2mime = new HashMap<>(40);
1360:         mime2java = new HashMap<>(14);
1361:
1362:         try {
1363:          // Use this class's classloader to load the mapping file
1364:          // XXX - we should use SecuritySupport, but it's in another package
1365:          InputStream is =
1366:                  jakarta.mail.internet.MimeUtility.class.getResourceAsStream(
1367:                  "/META-INF/javamail.charset.map");
1368:
1369:•         if (is != null) {
1370:                 try {
1371:                         LineInputStream lineInput = StreamProvider.provider().inputLineStream(is, false);
1372:
1373:                  // Load the JDK-to-MIME charset mapping table
1374:                  loadMappings(lineInput, java2mime);
1375:
1376:                  // Load the MIME-to-JDK charset mapping table
1377:                  loadMappings(lineInput, mime2java);
1378:                 } finally {
1379:                  try {
1380:                         is.close();
1381:                  } catch (Exception cex) {
1382:                         // ignore
1383:                  }
1384:                 }
1385:          }
1386:         } catch (Exception ex) { }
1387:
1388:         // If we didn't load the tables, e.g., because we didn't have
1389:         // permission, load them manually. The entries here should be
1390:         // the same as the default javamail.charset.map.
1391:•        if (java2mime.isEmpty()) {
1392:          java2mime.put("8859_1", "ISO-8859-1");
1393:          java2mime.put("iso8859_1", "ISO-8859-1");
1394:          java2mime.put("iso8859-1", "ISO-8859-1");
1395:
1396:          java2mime.put("8859_2", "ISO-8859-2");
1397:          java2mime.put("iso8859_2", "ISO-8859-2");
1398:          java2mime.put("iso8859-2", "ISO-8859-2");
1399:
1400:          java2mime.put("8859_3", "ISO-8859-3");
1401:          java2mime.put("iso8859_3", "ISO-8859-3");
1402:          java2mime.put("iso8859-3", "ISO-8859-3");
1403:
1404:          java2mime.put("8859_4", "ISO-8859-4");
1405:          java2mime.put("iso8859_4", "ISO-8859-4");
1406:          java2mime.put("iso8859-4", "ISO-8859-4");
1407:
1408:          java2mime.put("8859_5", "ISO-8859-5");
1409:          java2mime.put("iso8859_5", "ISO-8859-5");
1410:          java2mime.put("iso8859-5", "ISO-8859-5");
1411:
1412:          java2mime.put("8859_6", "ISO-8859-6");
1413:          java2mime.put("iso8859_6", "ISO-8859-6");
1414:          java2mime.put("iso8859-6", "ISO-8859-6");
1415:
1416:          java2mime.put("8859_7", "ISO-8859-7");
1417:          java2mime.put("iso8859_7", "ISO-8859-7");
1418:          java2mime.put("iso8859-7", "ISO-8859-7");
1419:
1420:          java2mime.put("8859_8", "ISO-8859-8");
1421:          java2mime.put("iso8859_8", "ISO-8859-8");
1422:          java2mime.put("iso8859-8", "ISO-8859-8");
1423:
1424:          java2mime.put("8859_9", "ISO-8859-9");
1425:          java2mime.put("iso8859_9", "ISO-8859-9");
1426:          java2mime.put("iso8859-9", "ISO-8859-9");
1427:
1428:          java2mime.put("sjis", "Shift_JIS");
1429:          java2mime.put("jis", "ISO-2022-JP");
1430:          java2mime.put("iso2022jp", "ISO-2022-JP");
1431:          java2mime.put("euc_jp", "euc-jp");
1432:          java2mime.put("koi8_r", "koi8-r");
1433:          java2mime.put("euc_cn", "euc-cn");
1434:          java2mime.put("euc_tw", "euc-tw");
1435:          java2mime.put("euc_kr", "euc-kr");
1436:         }
1437:•        if (mime2java.isEmpty()) {
1438:          mime2java.put("iso-2022-cn", "ISO2022CN");
1439:          mime2java.put("iso-2022-kr", "ISO2022KR");
1440:          mime2java.put("utf-8", "UTF8");
1441:          mime2java.put("utf8", "UTF8");
1442:          mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
1443:          mime2java.put("ja_jp.eucjp", "EUCJIS");
1444:          mime2java.put("euc-kr", "KSC5601");
1445:          mime2java.put("euckr", "KSC5601");
1446:          mime2java.put("us-ascii", "ISO-8859-1");
1447:          mime2java.put("x-us-ascii", "ISO-8859-1");
1448:          mime2java.put("gb2312", "GB18030");
1449:          mime2java.put("cp936", "GB18030");
1450:          mime2java.put("ms936", "GB18030");
1451:          mime2java.put("gbk", "GB18030");
1452:         }
1453: }
1454:
1455: private static void loadMappings(LineInputStream is,
1456:          Map<String, String> table) {
1457:         String currLine;
1458:
1459:         while (true) {
1460:          try {
1461:                 currLine = is.readLine();
1462:          } catch (IOException ioex) {
1463:                 break; // error in reading, stop
1464:          }
1465:
1466:•         if (currLine == null) // end of file, stop
1467:                 break;
1468:•         if (currLine.startsWith("--") && currLine.endsWith("--"))
1469:                 // end of this table
1470:                 break;        
1471:
1472:          // ignore empty lines and comments
1473:•         if (currLine.trim().length() == 0 || currLine.startsWith("#"))
1474:                 continue;
1475:         
1476:          // A valid entry is of the form <key><separator><value>
1477:          // where, <separator> := SPACE | HT. Parse this
1478:          StringTokenizer tk = new StringTokenizer(currLine, " \t");
1479:          try {
1480:                 String key = tk.nextToken();
1481:                 String value = tk.nextToken();
1482:                 table.put(key.toLowerCase(Locale.ENGLISH), value);
1483:          } catch (NoSuchElementException nex) { }
1484:         }
1485: }
1486:
1487: static final int ALL_ASCII                 = 1;
1488: static final int MOSTLY_ASCII         = 2;
1489: static final int MOSTLY_NONASCII         = 3;
1490:
1491: /**
1492: * Check if the given string contains non US-ASCII characters.
1493: * @param        s        string
1494: * @return                ALL_ASCII if all characters in the string
1495: *                        belong to the US-ASCII charset. MOSTLY_ASCII
1496: *                        if more than half of the available characters
1497: *                        are US-ASCII characters. Else MOSTLY_NONASCII.
1498: */
1499: static int checkAscii(String s) {
1500:         int ascii = 0, non_ascii = 0;
1501:         int l = s.length();
1502:
1503:•        for (int i = 0; i < l; i++) {
1504:•         if (nonascii((int)s.charAt(i))) // non-ascii
1505:                 non_ascii++;
1506:          else
1507:                 ascii++;
1508:         }
1509:
1510:•        if (non_ascii == 0)
1511:          return ALL_ASCII;
1512:•        if (ascii > non_ascii)
1513:          return MOSTLY_ASCII;
1514:
1515:         return MOSTLY_NONASCII;
1516: }
1517:
1518: /**
1519: * Check if the given byte array contains non US-ASCII characters.
1520: * @param        b        byte array
1521: * @return                ALL_ASCII if all characters in the string
1522: *                        belong to the US-ASCII charset. MOSTLY_ASCII
1523: *                        if more than half of the available characters
1524: *                        are US-ASCII characters. Else MOSTLY_NONASCII.
1525: *
1526: * XXX - this method is no longer used
1527: */
1528: static int checkAscii(byte[] b) {
1529:         int ascii = 0, non_ascii = 0;
1530:
1531:•        for (int i=0; i < b.length; i++) {
1532:          // The '&' operator automatically causes b[i] to be promoted
1533:          // to an int, and we mask out the higher bytes in the int
1534:          // so that the resulting value is not a negative integer.
1535:•         if (nonascii(b[i] & 0xff)) // non-ascii
1536:                 non_ascii++;
1537:          else
1538:                 ascii++;
1539:         }
1540:         
1541:•        if (non_ascii == 0)
1542:          return ALL_ASCII;
1543:•        if (ascii > non_ascii)
1544:          return MOSTLY_ASCII;
1545:         
1546:         return MOSTLY_NONASCII;
1547: }
1548:
1549: /**
1550: * Check if the given input stream contains non US-ASCII characters.
1551: * Upto <code>max</code> bytes are checked. If <code>max</code> is
1552: * set to <code>ALL</code>, then all the bytes available in this
1553: * input stream are checked. If <code>breakOnNonAscii</code> is true
1554: * the check terminates when the first non-US-ASCII character is
1555: * found and MOSTLY_NONASCII is returned. Else, the check continues
1556: * till <code>max</code> bytes or till the end of stream.
1557: *
1558: * @param        is        the input stream
1559: * @param        max        maximum bytes to check for. The special value
1560: *                        ALL indicates that all the bytes in this input
1561: *                        stream must be checked.
1562: * @param        breakOnNonAscii if <code>true</code>, then terminate the
1563: *                        the check when the first non-US-ASCII character
1564: *                        is found.
1565: * @return                ALL_ASCII if all characters in the string
1566: *                        belong to the US-ASCII charset. MOSTLY_ASCII
1567: *                        if more than half of the available characters
1568: *                        are US-ASCII characters. Else MOSTLY_NONASCII.
1569: */
1570: static int checkAscii(InputStream is, int max, boolean breakOnNonAscii) {
1571:         int ascii = 0, non_ascii = 0;
1572:         int len;
1573:         int block = 4096;
1574:         int linelen = 0;
1575:         boolean longLine = false, badEOL = false;
1576:•        boolean checkEOL = encodeEolStrict && breakOnNonAscii;
1577:         byte buf[] = null;
1578:•        if (max != 0) {
1579:•         block = (max == ALL) ? 4096 : Math.min(max, 4096);
1580:          buf = new byte[block];
1581:         }
1582:•        while (max != 0) {
1583:          try {
1584:•                if ((len = is.read(buf, 0, block)) == -1)
1585:                  break;
1586:                 int lastb = 0;
1587:•                for (int i = 0; i < len; i++) {
1588:                   // The '&' operator automatically causes b[i] to
1589:                  // be promoted to an int, and we mask out the higher
1590:                  // bytes in the int so that the resulting value is
1591:                  // not a negative integer.
1592:                  int b = buf[i] & 0xff;
1593:•                 if (checkEOL &&
1594:                          ((lastb == '\r' && b != '\n') ||
1595:                          (lastb != '\r' && b == '\n')))
1596:                         badEOL = true;
1597:•                 if (b == '\r' || b == '\n')
1598:                         linelen = 0;
1599:                  else {
1600:                         linelen++;
1601:•                        if (linelen > 998)        // 1000 - CRLF
1602:                          longLine = true;
1603:                  }
1604:•                 if (nonascii(b)) {        // non-ascii
1605:•                 if (breakOnNonAscii) // we are done
1606:                          return MOSTLY_NONASCII;
1607:                  else
1608:                          non_ascii++;
1609:                  } else
1610:                  ascii++;
1611:                  lastb = b;
1612:                 }
1613:          } catch (IOException ioex) {
1614:                 break;
1615:          }
1616:•         if (max != ALL)
1617:                 max -= len;
1618:         }
1619:
1620:•        if (max == 0 && breakOnNonAscii)
1621:          // We have been told to break on the first non-ascii character.
1622:          // We haven't got any non-ascii character yet, but then we
1623:          // have not checked all of the available bytes either. So we
1624:          // cannot say for sure that this input stream is ALL_ASCII,
1625:          // and hence we must play safe and return MOSTLY_NONASCII
1626:
1627:          return MOSTLY_NONASCII;
1628:
1629:•        if (non_ascii == 0) { // no non-us-ascii characters so far
1630:          // If we're looking at non-text data, and we saw CR without LF
1631:          // or vice versa, consider this mostly non-ASCII so that it
1632:          // will be base64 encoded (since the quoted-printable encoder
1633:          // doesn't encode this case properly).
1634:•         if (badEOL)
1635:                 return MOSTLY_NONASCII;
1636:          // if we've seen a long line, we degrade to mostly ascii
1637:•         else if (longLine)
1638:                 return MOSTLY_ASCII;
1639:          else
1640:                 return ALL_ASCII;
1641:         }
1642:•        if (ascii > non_ascii) // mostly ascii
1643:          return MOSTLY_ASCII;
1644:         return MOSTLY_NONASCII;
1645: }
1646:
1647: static final boolean nonascii(int b) {
1648:•        return b >= 0177 || (b < 040 && b != '\r' && b != '\n' && b != '\t');
1649: }
1650:
1651: // This is a copy of ASCIIUtility#getBytes that was moved to implementation module
1652: public static byte[] getBytes(String s) {
1653: char [] chars= s.toCharArray();
1654: int size = chars.length;
1655: byte[] bytes = new byte[size];
1656:
1657:• for (int i = 0; i < size;)
1658: bytes[i] = (byte) chars[i++];
1659: return bytes;
1660: }
1661:
1662: // This is a copy of ASCIIUtility#getBytes that was moved to implementation module
1663: public static byte[] getBytes(InputStream is) throws IOException {
1664: int len;
1665: int size = 1024;
1666: byte [] buf;
1667:• if (is instanceof ByteArrayInputStream) {
1668: size = is.available();
1669: buf = new byte[size];
1670: len = is.read(buf, 0, size);
1671: } else {
1672: ByteArrayOutputStream bos = new ByteArrayOutputStream();
1673: buf = new byte[size];
1674:• while ((len = is.read(buf, 0, size)) != -1)
1675: bos.write(buf, 0, len);
1676: buf = bos.toByteArray();
1677: }
1678: return buf;
1679: }
1680:
1681: /**
1682: * Get a boolean valued property.
1683: *
1684: * @param        props        the properties
1685: * @param        name        the property name
1686: * @param        def        default value if property not found
1687: * @return                the property value
1688: */
1689: static boolean getBooleanProperty(Properties props, String name, boolean def) {
1690:         return getBoolean(getProp(props, name), def);
1691: }
1692:
1693: /**
1694: * Get a boolean valued System property.
1695: *
1696: * @param        name        the property name
1697: * @param        def        default value if property not found
1698: * @return                the property value
1699: */
1700: static boolean getBooleanSystemProperty(String name, boolean def) {
1701:                 try {
1702:                  return getBoolean(getProp(System.getProperties(), name), def);
1703:                 } catch (SecurityException sex) {
1704:                  // fall through...
1705:                 }
1706:         
1707:                 /*
1708:                  * If we can't get the entire System Properties object because
1709:                  * of a SecurityException, just ask for the specific property.
1710:                  */
1711:                 try {
1712:                  String value = System.getProperty(name);
1713:•                 if (value == null)
1714:                         return def;
1715:•                 if (def)
1716:•                        return !value.equalsIgnoreCase("false");
1717:                  else
1718:                         return value.equalsIgnoreCase("true");
1719:                 } catch (SecurityException sex) {
1720:                  return def;
1721:                 }
1722: }
1723:
1724: /**
1725: * Get the value of the specified property.
1726: * If the "get" method returns null, use the getProperty method,
1727: * which might cascade to a default Properties object.
1728: */
1729: private static Object getProp(Properties props, String name) {
1730:                 Object val = props.get(name);
1731:•                if (val != null)
1732:                  return val;
1733:                 else
1734:                  return props.getProperty(name);
1735: }
1736:
1737: /**
1738: * Interpret the value object as a boolean,
1739: * returning def if unable.
1740: */
1741: private static boolean getBoolean(Object value, boolean def) {
1742:•                if (value == null)
1743:                  return def;
1744:•                if (value instanceof String) {
1745:                  /*
1746:                  * If the default is true, only "false" turns it off.
1747:                  * If the default is false, only "true" turns it on.
1748:                  */
1749:•                 if (def)
1750:•                        return !((String)value).equalsIgnoreCase("false");
1751:                  else
1752:                         return ((String)value).equalsIgnoreCase("true");
1753:                 }
1754:•                if (value instanceof Boolean)
1755:                  return ((Boolean)value).booleanValue();
1756:                 return def;
1757: }
1758: }
1759:
1760: /**
1761: * An OutputStream that determines whether the data written to
1762: * it is all ASCII, mostly ASCII, or mostly non-ASCII.
1763: */
1764: class AsciiOutputStream extends OutputStream {
1765: private boolean breakOnNonAscii;
1766: private int ascii = 0, non_ascii = 0;
1767: private int linelen = 0;
1768: private boolean longLine = false;
1769: private boolean badEOL = false;
1770: private boolean checkEOL = false;
1771: private int lastb = 0;
1772: private int ret = 0;
1773:
1774: public AsciiOutputStream(boolean breakOnNonAscii, boolean encodeEolStrict) {
1775:         this.breakOnNonAscii = breakOnNonAscii;
1776:         checkEOL = encodeEolStrict && breakOnNonAscii;
1777: }
1778:
1779: @Override
1780: public void write(int b) throws IOException {
1781:         check(b);
1782: }
1783:
1784: @Override
1785: public void write(byte b[]) throws IOException {
1786:         write(b, 0, b.length);
1787: }
1788:
1789: @Override
1790: public void write(byte b[], int off, int len) throws IOException {
1791:         len += off;
1792:         for (int i = off; i < len ; i++)
1793:          check(b[i]);
1794: }
1795:
1796: private final void check(int b) throws IOException {
1797:         b &= 0xff;
1798:         if (checkEOL &&
1799:                 ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))
1800:          badEOL = true;
1801:         if (b == '\r' || b == '\n')
1802:          linelen = 0;
1803:         else {
1804:          linelen++;
1805:          if (linelen > 998)        // 1000 - CRLF
1806:                 longLine = true;
1807:         }
1808:         if (MimeUtility.nonascii(b)) { // non-ascii
1809:          non_ascii++;
1810:          if (breakOnNonAscii) {        // we are done
1811:                 ret = MimeUtility.MOSTLY_NONASCII;
1812:                 throw new EOFException();
1813:          }
1814:         } else
1815:          ascii++;
1816:         lastb = b;
1817: }
1818:
1819: /**
1820: * Return ASCII-ness of data stream.
1821: */
1822: public int getAscii() {
1823:         if (ret != 0)
1824:          return ret;
1825:         // If we're looking at non-text data, and we saw CR without LF
1826:         // or vice versa, consider this mostly non-ASCII so that it
1827:         // will be base64 encoded (since the quoted-printable encoder
1828:         // doesn't encode this case properly).
1829:         if (badEOL)
1830:          return MimeUtility.MOSTLY_NONASCII;
1831:         else if (non_ascii == 0) { // no non-us-ascii characters so far
1832:          // if we've seen a long line, we degrade to mostly ascii
1833:          if (longLine)
1834:                 return MimeUtility.MOSTLY_ASCII;
1835:          else
1836:                 return MimeUtility.ALL_ASCII;
1837:         }
1838:         if (ascii > non_ascii) // mostly ascii
1839:          return MimeUtility.MOSTLY_ASCII;
1840:         return MimeUtility.MOSTLY_NONASCII;
1841: }
1842: }