Skip to content

Package: AsciiOutputStream

AsciiOutputStream

nameinstructionbranchcomplexitylinemethod
AsciiOutputStream(boolean, boolean)
M: 4 C: 35
90%
M: 3 C: 1
25%
M: 2 C: 1
33%
M: 0 C: 11
100%
M: 0 C: 1
100%
check(int)
M: 36 C: 40
53%
M: 14 C: 6
30%
M: 9 C: 2
18%
M: 6 C: 10
63%
M: 0 C: 1
100%
getAscii()
M: 16 C: 14
47%
M: 6 C: 4
40%
M: 5 C: 1
17%
M: 6 C: 5
45%
M: 0 C: 1
100%
write(byte[])
M: 7 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 2 C: 0
0%
M: 1 C: 0
0%
write(byte[], int, int)
M: 0 C: 17
100%
M: 0 C: 2
100%
M: 0 C: 2
100%
M: 0 C: 4
100%
M: 0 C: 1
100%
write(int)
M: 4 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 2 C: 0
0%
M: 1 C: 0
0%

Coverage

1: /*
2: * Copyright (c) 1997, 2023 Oracle and/or its affiliates. All rights reserved.
3: *
4: * This program and the accompanying materials are made available under the
5: * terms of the Eclipse Public License v. 2.0, which is available at
6: * http://www.eclipse.org/legal/epl-2.0.
7: *
8: * This Source Code may also be made available under the following Secondary
9: * Licenses when the conditions for such availability set forth in the
10: * Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
11: * version 2 with the GNU Classpath Exception, which is available at
12: * https://www.gnu.org/software/classpath/license.html.
13: *
14: * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
15: */
16:
17: package jakarta.mail.internet;
18:
19: import jakarta.activation.DataHandler;
20: import jakarta.activation.DataSource;
21: import jakarta.mail.EncodingAware;
22: import jakarta.mail.MessagingException;
23: import jakarta.mail.util.LineInputStream;
24: import jakarta.mail.util.StreamProvider;
25: import jakarta.mail.util.StreamProvider.EncoderTypes;
26:
27: import java.io.BufferedReader;
28: import java.io.ByteArrayInputStream;
29: import java.io.ByteArrayOutputStream;
30: import java.io.EOFException;
31: import java.io.IOException;
32: import java.io.InputStream;
33: import java.io.OutputStream;
34: import java.io.StringReader;
35: import java.io.UnsupportedEncodingException;
36: import java.nio.charset.Charset;
37: import java.util.HashMap;
38: import java.util.Locale;
39: import java.util.Map;
40: import java.util.NoSuchElementException;
41: import java.util.Properties;
42: import java.util.StringTokenizer;
43:
44: /**
45: * This is a utility class that provides various MIME related
46: * functionality. <p>
47: *
48: * There are a set of methods to encode and decode MIME headers as
49: * per RFC 2047. Note that, in general, these methods are
50: * <strong>not</strong> needed when using methods such as
51: * <code>setSubject</code> and <code>setRecipients</code> Jakarta Mail
52: * will automatically encode and decode data when using these "higher
53: * level" methods. The methods below are only needed when maniuplating
54: * raw MIME headers using <code>setHeader</code> and <code>getHeader</code>
55: * methods. A brief description on handling such headers is given below: <p>
56: *
57: * RFC 822 mail headers <strong>must</strong> contain only US-ASCII
58: * characters. Headers that contain non US-ASCII characters must be
59: * encoded so that they contain only US-ASCII characters. Basically,
60: * this process involves using either BASE64 or QP to encode certain
61: * characters. RFC 2047 describes this in detail. <p>
62: *
63: * In Java, Strings contain (16 bit) Unicode characters. ASCII is a
64: * subset of Unicode (and occupies the range 0 - 127). A String
65: * that contains only ASCII characters is already mail-safe. If the
66: * String contains non US-ASCII characters, it must be encoded. An
67: * additional complexity in this step is that since Unicode is not
68: * yet a widely used charset, one might want to first charset-encode
69: * the String into another charset and then do the transfer-encoding.
70: * <p>
71: * Note that to get the actual bytes of a mail-safe String (say,
72: * for sending over SMTP), one must do
73: * <blockquote><pre>
74: *
75: *         byte[] bytes = string.getBytes("iso-8859-1");
76: *
77: * </pre></blockquote><p>
78: *
79: * The <code>setHeader</code> and <code>addHeader</code> methods
80: * on MimeMessage and MimeBodyPart assume that the given header values
81: * are Unicode strings that contain only US-ASCII characters. Hence
82: * the callers of those methods must insure that the values they pass
83: * do not contain non US-ASCII characters. The methods in this class
84: * help do this. <p>
85: *
86: * The <code>getHeader</code> family of methods on MimeMessage and
87: * MimeBodyPart return the raw header value. These might be encoded
88: * as per RFC 2047, and if so, must be decoded into Unicode Strings.
89: * The methods in this class help to do this. <p>
90: *
91: * Several System properties control strict conformance to the MIME
92: * spec. Note that these are not session properties but must be set
93: * globally as System properties. <p>
94: *
95: * The <code>mail.mime.decodetext.strict</code> property controls
96: * decoding of MIME encoded words. The MIME spec requires that encoded
97: * words start at the beginning of a whitespace separated word. Some
98: * mailers incorrectly include encoded words in the middle of a word.
99: * If the <code>mail.mime.decodetext.strict</code> System property is
100: * set to <code>"false"</code>, an attempt will be made to decode these
101: * illegal encoded words. The default is true. <p>
102: *
103: * The <code>mail.mime.encodeeol.strict</code> property controls the
104: * choice of Content-Transfer-Encoding for MIME parts that are not of
105: * type "text". Often such parts will contain textual data for which
106: * an encoding that allows normal end of line conventions is appropriate.
107: * In rare cases, such a part will appear to contain entirely textual
108: * data, but will require an encoding that preserves CR and LF characters
109: * without change. If the <code>mail.mime.encodeeol.strict</code>
110: * System property is set to <code>"true"</code>, such an encoding will
111: * be used when necessary. The default is false. <p>
112: *
113: * In addition, the <code>mail.mime.charset</code> System property can
114: * be used to specify the default MIME charset to use for encoded words
115: * and text parts that don't otherwise specify a charset. Normally, the
116: * default MIME charset is derived from the default Java charset, as
117: * specified in the <code>file.encoding</code> System property. Most
118: * applications will have no need to explicitly set the default MIME
119: * charset. In cases where the default MIME charset to be used for
120: * mail messages is different than the charset used for files stored on
121: * the system, this property should be set. <p>
122: *
123: * The current implementation also supports the following System property.
124: * <p>
125: * The <code>mail.mime.ignoreunknownencoding</code> property controls
126: * whether unknown values in the <code>Content-Transfer-Encoding</code>
127: * header, as passed to the <code>decode</code> method, cause an exception.
128: * If set to <code>"true"</code>, unknown values are ignored and 8bit
129: * encoding is assumed. Otherwise, unknown values cause a MessagingException
130: * to be thrown.
131: *
132: * @author John Mani
133: * @author Bill Shannon
134: */
135:
136: public class MimeUtility {
137:
138: // This class cannot be instantiated
139: private MimeUtility() {
140: }
141:
142: public static final int ALL = -1;
143:
144: // cached map of whether a charset is compatible with ASCII
145: // Map<String,Boolean>
146: private static final Map<String, Boolean> nonAsciiCharsetMap
147: = new HashMap<>();
148:
149: private static final String WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~";
150: private static final String TEXT_SPECIALS = "=_?";
151: private static final boolean decodeStrict = getBooleanSystemProperty("mail.mime.decodetext.strict", true);
152: private static final boolean encodeEolStrict = getBooleanSystemProperty("mail.mime.encodeeol.strict", false);
153: private static final boolean ignoreUnknownEncoding = getBooleanSystemProperty(
154: "mail.mime.ignoreunknownencoding", false);
155: private static final boolean allowUtf8 = getBooleanSystemProperty("mail.mime.allowutf8", false);
156: /*
157: * The following two properties allow disabling the fold()
158: * and unfold() methods and reverting to the previous behavior.
159: * They should never need to be changed and are here only because
160: * of my paranoid concern with compatibility.
161: */
162: private static final boolean foldEncodedWords = getBooleanSystemProperty("mail.mime.foldencodedwords", false);
163: private static final boolean foldText = getBooleanSystemProperty("mail.mime.foldtext", true);
164:
165:
166: /**
167: * Get the Content-Transfer-Encoding that should be applied
168: * to the input stream of this DataSource, to make it mail-safe. <p>
169: *
170: * The algorithm used here is: <br>
171: * <ul>
172: * <li>
173: * If the DataSource implements {@link EncodingAware}, ask it
174: * what encoding to use. If it returns non-null, return that value.
175: * <li>
176: * If the primary type of this datasource is "text" and if all
177: * the bytes in its input stream are US-ASCII, then the encoding
178: * is StreamProvider.BIT7_ENCODER. If more than half of the bytes are non-US-ASCII, then
179: * the encoding is StreamProvider.BASE_64_ENCODER. If less than half of the bytes are
180: * non-US-ASCII, then the encoding is StreamProvider.QUOTED_PRINTABLE_ENCODER.
181: * <li>
182: * If the primary type of this datasource is not "text", then if
183: * all the bytes of its input stream are US-ASCII, the encoding
184: * is StreamProvider.BIT7_ENCODER. If there is even one non-US-ASCII character, the
185: * encoding is StreamProvider.BASE_64_ENCODER.
186: * </ul>
187: *
188: * @param ds the DataSource
189: * @return the encoding. This is either StreamProvider.BIT7_ENCODER,
190: * StreamProvider.QUOTED_PRINTABLE_ENCODER or StreamProvider.BASE_64_ENCODER
191: */
192: public static String getEncoding(DataSource ds) {
193: ContentType cType = null;
194: InputStream is = null;
195: String encoding = null;
196:
197: if (ds instanceof EncodingAware) {
198: encoding = ((EncodingAware) ds).getEncoding();
199: if (encoding != null)
200: return encoding;
201: }
202: try {
203: cType = new ContentType(ds.getContentType());
204: is = ds.getInputStream();
205:
206: boolean isText = cType.match("text/*");
207: // if not text, stop processing when we see non-ASCII
208: int i = checkAscii(is, ALL, !isText);
209: switch (i) {
210: case ALL_ASCII:
211: encoding = EncoderTypes.BIT7_ENCODER.getEncoder(); // all ASCII
212: break;
213: case MOSTLY_ASCII:
214: if (isText && nonAsciiCharset(cType))
215: encoding = EncoderTypes.BASE_64.getEncoder(); // charset isn't compatible with ASCII
216: else
217: encoding = EncoderTypes.QUOTED_PRINTABLE_ENCODER.getEncoder(); // mostly ASCII
218: break;
219: default:
220: encoding = EncoderTypes.BASE_64.getEncoder(); // mostly binary
221: break;
222: }
223:
224: } catch (Exception ex) {
225: return EncoderTypes.BASE_64.getEncoder(); // what else ?!
226: } finally {
227: // Close the input stream
228: try {
229: if (is != null)
230: is.close();
231: } catch (IOException ioex) {
232: }
233: }
234:
235: return encoding;
236: }
237:
238: /**
239: * Determine whether the charset in the Content-Type is compatible
240: * with ASCII or not. A charset is compatible with ASCII if the
241: * encoded byte stream representing the Unicode string "\r\n" is
242: * the ASCII characters CR and LF. For example, the utf-16be
243: * charset is not compatible with ASCII.
244: *
245: * For performance, we keep a static map that caches the results.
246: */
247: private static boolean nonAsciiCharset(ContentType ct) {
248: String charset = ct.getParameter("charset");
249: if (charset == null)
250: return false;
251: charset = charset.toLowerCase(Locale.ENGLISH);
252: Boolean bool;
253: synchronized (nonAsciiCharsetMap) {
254: bool = nonAsciiCharsetMap.get(charset);
255: }
256: if (bool == null) {
257: try {
258: byte[] b = "\r\n".getBytes(charset);
259: bool = Boolean.valueOf(
260: b.length != 2 || b[0] != 015 || b[1] != 012);
261: } catch (UnsupportedEncodingException uex) {
262: bool = Boolean.FALSE; // a guess
263: } catch (RuntimeException ex) {
264: bool = Boolean.TRUE; // one of the weird ones?
265: }
266: synchronized (nonAsciiCharsetMap) {
267: nonAsciiCharsetMap.put(charset, bool);
268: }
269: }
270: return bool.booleanValue();
271: }
272:
273: /**
274: * Same as <code>getEncoding(DataSource)</code> except that instead
275: * of reading the data from an <code>InputStream</code> it uses the
276: * <code>writeTo</code> method to examine the data. This is more
277: * efficient in the common case of a <code>DataHandler</code>
278: * created with an object and a MIME type (for example, a
279: * "text/plain" String) because all the I/O is done in this
280: * thread. In the case requiring an <code>InputStream</code> the
281: * <code>DataHandler</code> uses a thread, a pair of pipe streams,
282: * and the <code>writeTo</code> method to produce the data.
283: *
284: * @param dh the DataHandler
285: * @return the Content-Transfer-Encoding
286: * @since JavaMail 1.2
287: */
288: public static String getEncoding(DataHandler dh) {
289: ContentType cType = null;
290: String encoding = null;
291:
292: /*
293: * Try to pick the most efficient means of determining the
294: * encoding. If this DataHandler was created using a DataSource,
295: * the getEncoding(DataSource) method is typically faster. If
296: * the DataHandler was created with an object, this method is
297: * much faster. To distinguish the two cases, we use a heuristic.
298: * A DataHandler created with an object will always have a null name.
299: * A DataHandler created with a DataSource will usually have a
300: * non-null name.
301: *
302: * XXX - This is actually quite a disgusting hack, but it makes
303: *         a common case run over twice as fast.
304: */
305: if (dh.getName() != null)
306: return getEncoding(dh.getDataSource());
307:
308: try {
309: cType = new ContentType(dh.getContentType());
310: } catch (Exception ex) {
311: return EncoderTypes.BASE_64.getEncoder(); // what else ?!
312: }
313:
314: if (cType.match("text/*")) {
315: // Check all of the available bytes
316: AsciiOutputStream aos = new AsciiOutputStream(false, false);
317: try {
318: dh.writeTo(aos);
319: } catch (IOException ex) {
320: // ignore it, can't happen
321: }
322: switch (aos.getAscii()) {
323: case ALL_ASCII:
324: encoding = EncoderTypes.BIT7_ENCODER.getEncoder(); // all ascii
325: break;
326: case MOSTLY_ASCII:
327: encoding = EncoderTypes.QUOTED_PRINTABLE_ENCODER.getEncoder(); // mostly ascii
328: break;
329: default:
330: encoding = EncoderTypes.BASE_64.getEncoder(); // mostly binary
331: break;
332: }
333: } else { // not "text"
334: // Check all of available bytes, break out if we find
335: // at least one non-US-ASCII character
336: AsciiOutputStream aos =
337: new AsciiOutputStream(true, encodeEolStrict);
338: try {
339: dh.writeTo(aos);
340: } catch (IOException ex) {
341: } // ignore it
342: if (aos.getAscii() == ALL_ASCII) // all ascii
343: encoding = EncoderTypes.BIT7_ENCODER.getEncoder();
344: else // found atleast one non-ascii character, use b64
345: encoding = EncoderTypes.BASE_64.getEncoder();
346: }
347:
348: return encoding;
349: }
350:
351: /**
352: * Decode the given input stream. The Input stream returned is
353: * the decoded input stream. All the encodings defined in RFC 2045
354: * are supported here. They include StreamProvider.BASE_64_ENCODER, StreamProvider.QUOTED_PRINTABLE_ENCODER,
355: * StreamProvider.BIT7_ENCODER, StreamProvider.BIT8_ENCODER, and StreamProvider.BINARY_ENCODER. In addition, StreamProvider.UU_ENCODER is also
356: * supported. <p>
357: *
358: * In the current implementation, if the
359: * <code>mail.mime.ignoreunknownencoding</code> system property is set to
360: * <code>"true"</code>, unknown encoding values are ignored and the
361: * original InputStream is returned.
362: *
363: * @param is input stream
364: * @param encoding the encoding of the stream.
365: * @return decoded input stream.
366: * @throws MessagingException if the encoding is unknown
367: */
368: public static InputStream decode(InputStream is, String encoding)
369: throws MessagingException {
370: if (encoding.equalsIgnoreCase(EncoderTypes.BASE_64.getEncoder()))
371: return StreamProvider.provider().inputBase64(is);
372: else if (encoding.equalsIgnoreCase(EncoderTypes.QUOTED_PRINTABLE_ENCODER.getEncoder()))
373: return StreamProvider.provider().inputQP(is);
374: else if (encoding.equalsIgnoreCase(EncoderTypes.UU_ENCODER.getEncoder()) ||
375: encoding.equalsIgnoreCase(EncoderTypes.X_UU_ENCODER.getEncoder()) ||
376: encoding.equalsIgnoreCase(EncoderTypes.X_UUE.getEncoder()))
377: return StreamProvider.provider().inputUU(is);
378: else if (encoding.equalsIgnoreCase(EncoderTypes.BINARY_ENCODER.getEncoder()) ||
379: encoding.equalsIgnoreCase(EncoderTypes.BIT7_ENCODER.getEncoder()) ||
380: encoding.equalsIgnoreCase(EncoderTypes.BIT8_ENCODER.getEncoder()))
381: return StreamProvider.provider().inputBinary(is);
382: else {
383: if (!ignoreUnknownEncoding)
384: throw new MessagingException("Unknown encoding: " + encoding);
385: return is;
386: }
387: }
388:
389: /**
390: * Wrap an encoder around the given output stream.
391: * All the encodings defined in RFC 2045 are supported here.
392: * They include StreamProvider.BASE_64_ENCODER, StreamProvider.QUOTED_PRINTABLE_ENCODER, StreamProvider.BIT7_ENCODER, StreamProvider.BIT8_ENCODER and
393: * StreamProvider.BINARY_ENCODER. In addition, StreamProvider.UU_ENCODER is also supported.
394: *
395: * @param os output stream
396: * @param encoding the encoding of the stream.
397: * @return output stream that applies the
398: * specified encoding.
399: * @throws MessagingException if the encoding is unknown
400: */
401: public static OutputStream encode(OutputStream os, String encoding)
402: throws MessagingException {
403: if (encoding == null)
404: return os;
405: else if (encoding.equalsIgnoreCase(EncoderTypes.BASE_64.getEncoder()))
406: return StreamProvider.provider().outputBase64(os);
407: else if (encoding.equalsIgnoreCase(EncoderTypes.QUOTED_PRINTABLE_ENCODER.getEncoder()))
408: return StreamProvider.provider().outputQP(os);
409: else if (encoding.equalsIgnoreCase(EncoderTypes.UU_ENCODER.getEncoder()) ||
410: encoding.equalsIgnoreCase(EncoderTypes.X_UU_ENCODER.getEncoder()) ||
411: encoding.equalsIgnoreCase(EncoderTypes.X_UUE.getEncoder()))
412: return StreamProvider.provider().outputUU(os, null);
413: else if (encoding.equalsIgnoreCase(EncoderTypes.BINARY_ENCODER.getEncoder()) ||
414: encoding.equalsIgnoreCase(EncoderTypes.BIT7_ENCODER.getEncoder()) ||
415: encoding.equalsIgnoreCase(EncoderTypes.BIT8_ENCODER.getEncoder()))
416: return StreamProvider.provider().outputBinary(os);
417: else
418: throw new MessagingException("Unknown encoding: " + encoding);
419: }
420:
421: /**
422: * Wrap an encoder around the given output stream.
423: * All the encodings defined in RFC 2045 are supported here.
424: * They include StreamProvider.BASE_64_ENCODER, StreamProvider.QUOTED_PRINTABLE_ENCODER, StreamProvider.BIT7_ENCODER, StreamProvider.BIT8_ENCODER and
425: * StreamProvider.BINARY_ENCODER. In addition, StreamProvider.UU_ENCODER is also supported.
426: * The <code>filename</code> parameter is used with the StreamProvider.UU_ENCODER
427: * encoding and is included in the encoded output.
428: *
429: * @param os output stream
430: * @param encoding the encoding of the stream.
431: * @param filename name for the file being encoded (only used
432: * with uuencode)
433: * @return output stream that applies the
434: * specified encoding.
435: * @throws MessagingException for unknown encodings
436: * @since JavaMail 1.2
437: */
438: public static OutputStream encode(OutputStream os, String encoding,
439: String filename)
440: throws MessagingException {
441: if (encoding == null)
442: return os;
443: else if (encoding.equalsIgnoreCase(EncoderTypes.BASE_64.getEncoder()))
444: return StreamProvider.provider().outputBase64(os);
445: else if (encoding.equalsIgnoreCase(EncoderTypes.QUOTED_PRINTABLE_ENCODER.getEncoder()))
446: return StreamProvider.provider().outputQP(os);
447: else if (encoding.equalsIgnoreCase(EncoderTypes.UU_ENCODER.getEncoder()) ||
448: encoding.equalsIgnoreCase(EncoderTypes.X_UU_ENCODER.getEncoder()) ||
449: encoding.equalsIgnoreCase(EncoderTypes.X_UUE.getEncoder()))
450: return StreamProvider.provider().outputUU(os, filename);
451: else if (encoding.equalsIgnoreCase(EncoderTypes.BINARY_ENCODER.getEncoder()) ||
452: encoding.equalsIgnoreCase(EncoderTypes.BIT7_ENCODER.getEncoder()) ||
453: encoding.equalsIgnoreCase(EncoderTypes.BIT8_ENCODER.getEncoder()))
454: return StreamProvider.provider().outputBinary(os);
455: else
456: throw new MessagingException("Unknown encoding: " + encoding);
457: }
458:
459: /**
460: * Encode a RFC 822 "text" token into mail-safe form as per
461: * RFC 2047. <p>
462: *
463: * The given Unicode string is examined for non US-ASCII
464: * characters. If the string contains only US-ASCII characters,
465: * it is returned as-is. If the string contains non US-ASCII
466: * characters, it is first character-encoded using the platform's
467: * default charset, then transfer-encoded using either the B or
468: * Q encoding. The resulting bytes are then returned as a Unicode
469: * string containing only ASCII characters. <p>
470: *
471: * Note that this method should be used to encode only
472: * "unstructured" RFC 822 headers. <p>
473: *
474: * Example of usage:
475: * <blockquote><pre>
476: *
477: * MimePart part = ...
478: * String rawvalue = "FooBar Mailer, Japanese version 1.1"
479: * try {
480: * // If we know for sure that rawvalue contains only US-ASCII
481: * // characters, we can skip the encoding part
482: * part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue));
483: * } catch (UnsupportedEncodingException e) {
484: * // encoding failure
485: * } catch (MessagingException me) {
486: * // setHeader() failure
487: * }
488: *
489: * </pre></blockquote>
490: *
491: * @param text Unicode string
492: * @return Unicode string containing only US-ASCII characters
493: * @throws UnsupportedEncodingException if the encoding fails
494: */
495: public static String encodeText(String text)
496: throws UnsupportedEncodingException {
497: return encodeText(text, null, null);
498: }
499:
500: /**
501: * Encode a RFC 822 "text" token into mail-safe form as per
502: * RFC 2047. <p>
503: *
504: * The given Unicode string is examined for non US-ASCII
505: * characters. If the string contains only US-ASCII characters,
506: * it is returned as-is. If the string contains non US-ASCII
507: * characters, it is first character-encoded using the specified
508: * charset, then transfer-encoded using either the B or Q encoding.
509: * The resulting bytes are then returned as a Unicode string
510: * containing only ASCII characters. <p>
511: *
512: * Note that this method should be used to encode only
513: * "unstructured" RFC 822 headers.
514: *
515: * @param text the header value
516: * @param charset the charset. If this parameter is null, the
517: * platform's default chatset is used.
518: * @param encoding the encoding to be used. Currently supported
519: * values are "B" and "Q". If this parameter is null, then
520: * the "Q" encoding is used if most of characters to be
521: * encoded are in the ASCII charset, otherwise "B" encoding
522: * is used.
523: * @return Unicode string containing only US-ASCII characters
524: * @throws UnsupportedEncodingException if the charset
525: * conversion failed.
526: */
527: public static String encodeText(String text, String charset,
528: String encoding)
529: throws UnsupportedEncodingException {
530: return encodeWord(text, charset, encoding, false);
531: }
532:
533: /**
534: * Decode "unstructured" headers, that is, headers that are defined
535: * as '*text' as per RFC 822. <p>
536: *
537: * The string is decoded using the algorithm specified in
538: * RFC 2047, Section 6.1. If the charset-conversion fails
539: * for any sequence, an UnsupportedEncodingException is thrown.
540: * If the String is not an RFC 2047 style encoded header, it is
541: * returned as-is <p>
542: *
543: * Example of usage:
544: * <blockquote><pre>
545: *
546: * MimePart part = ...
547: * String rawvalue = null;
548: * String value = null;
549: * try {
550: * if ((rawvalue = part.getHeader("X-mailer")[0]) != null)
551: * value = MimeUtility.decodeText(rawvalue);
552: * } catch (UnsupportedEncodingException e) {
553: * // Don't care
554: * value = rawvalue;
555: * } catch (MessagingException me) { }
556: *
557: * return value;
558: *
559: * </pre></blockquote>
560: *
561: * @param etext the possibly encoded value
562: * @return the decoded text
563: * @throws UnsupportedEncodingException if the charset
564: * conversion failed.
565: */
566: public static String decodeText(String etext)
567: throws UnsupportedEncodingException {
568: /*
569: * We look for sequences separated by "linear-white-space".
570: * (as per RFC 2047, Section 6.1)
571: * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL.
572: */
573: String lwsp = " \t\n\r";
574: StringTokenizer st;
575:
576: /*
577: * First, lets do a quick run thru the string and check
578: * whether the sequence "=?" exists at all. If none exists,
579: * we know there are no encoded-words in here and we can just
580: * return the string as-is, without suffering thru the later
581: * decoding logic.
582: * This handles the most common case of unencoded headers
583: * efficiently.
584: */
585: if (!etext.contains("=?"))
586: return etext;
587:
588: // Encoded words found. Start decoding ...
589:
590: st = new StringTokenizer(etext, lwsp, true);
591: StringBuilder sb = new StringBuilder(); // decode buffer
592: StringBuilder wsb = new StringBuilder(); // white space buffer
593: boolean prevWasEncoded = false;
594:
595: while (st.hasMoreTokens()) {
596: char c;
597: String s = st.nextToken();
598: // If whitespace, append it to the whitespace buffer
599: if (((c = s.charAt(0)) == ' ') || (c == '\t') ||
600: (c == '\r') || (c == '\n'))
601: wsb.append(c);
602: else {
603: // Check if token is an 'encoded-word' ..
604: String word;
605: try {
606: word = decodeWord(s);
607: // Yes, this IS an 'encoded-word'.
608: if (!prevWasEncoded && wsb.length() > 0) {
609: // if the previous word was also encoded, we
610: // should ignore the collected whitespace. Else
611: // we include the whitespace as well.
612: sb.append(wsb);
613: }
614: prevWasEncoded = true;
615: } catch (ParseException pex) {
616: // This is NOT an 'encoded-word'.
617: word = s;
618: // possibly decode inner encoded words
619: if (!decodeStrict) {
620: String dword = decodeInnerWords(word);
621: if (dword != word) {
622: // if a different String object was returned,
623: // decoding was done.
624: if (prevWasEncoded && word.startsWith("=?")) {
625: // encoded followed by encoded,
626: // throw away whitespace between
627: } else {
628: // include collected whitespace ..
629: if (wsb.length() > 0)
630: sb.append(wsb);
631: }
632: // did original end with encoded?
633: prevWasEncoded = word.endsWith("?=");
634: word = dword;
635: } else {
636: // include collected whitespace ..
637: if (wsb.length() > 0)
638: sb.append(wsb);
639: prevWasEncoded = false;
640: }
641: } else {
642: // include collected whitespace ..
643: if (wsb.length() > 0)
644: sb.append(wsb);
645: prevWasEncoded = false;
646: }
647: }
648: sb.append(word); // append the actual word
649: wsb.setLength(0); // reset wsb for reuse
650: }
651: }
652: sb.append(wsb); // append trailing whitespace
653: return sb.toString();
654: }
655:
656: /**
657: * Encode a RFC 822 "word" token into mail-safe form as per
658: * RFC 2047. <p>
659: *
660: * The given Unicode string is examined for non US-ASCII
661: * characters. If the string contains only US-ASCII characters,
662: * it is returned as-is. If the string contains non US-ASCII
663: * characters, it is first character-encoded using the platform's
664: * default charset, then transfer-encoded using either the B or
665: * Q encoding. The resulting bytes are then returned as a Unicode
666: * string containing only ASCII characters. <p>
667: *
668: * This method is meant to be used when creating RFC 822 "phrases".
669: * The InternetAddress class, for example, uses this to encode
670: * it's 'phrase' component.
671: *
672: * @param word Unicode string
673: * @return Array of Unicode strings containing only US-ASCII
674: * characters.
675: * @throws UnsupportedEncodingException if the encoding fails
676: */
677: public static String encodeWord(String word)
678: throws UnsupportedEncodingException {
679: return encodeWord(word, null, null);
680: }
681:
682: /**
683: * Encode a RFC 822 "word" token into mail-safe form as per
684: * RFC 2047. <p>
685: *
686: * The given Unicode string is examined for non US-ASCII
687: * characters. If the string contains only US-ASCII characters,
688: * it is returned as-is. If the string contains non US-ASCII
689: * characters, it is first character-encoded using the specified
690: * charset, then transfer-encoded using either the B or Q encoding.
691: * The resulting bytes are then returned as a Unicode string
692: * containing only ASCII characters.
693: *
694: * @param word Unicode string
695: * @param charset the MIME charset
696: * @param encoding the encoding to be used. Currently supported
697: * values are "B" and "Q". If this parameter is null, then
698: * the "Q" encoding is used if most of characters to be
699: * encoded are in the ASCII charset, otherwise "B" encoding
700: * is used.
701: * @return Unicode string containing only US-ASCII characters
702: * @throws UnsupportedEncodingException if the encoding fails
703: */
704: public static String encodeWord(String word, String charset,
705: String encoding)
706: throws UnsupportedEncodingException {
707: return encodeWord(word, charset, encoding, true);
708: }
709:
710: /*
711: * Encode the given string. The parameter 'encodingWord' should
712: * be true if a RFC 822 "word" token is being encoded and false if a
713: * RFC 822 "text" token is being encoded. This is because the
714: * "Q" encoding defined in RFC 2047 has more restrictions when
715: * encoding "word" tokens. (Sigh)
716: */
717: private static String encodeWord(String string, String charset,
718: String encoding, boolean encodingWord)
719: throws UnsupportedEncodingException {
720:
721: // If 'string' contains only US-ASCII characters, just
722: // return it.
723: int ascii = checkAscii(string);
724: if (ascii == ALL_ASCII)
725: return string;
726:
727: // Else, apply the specified charset conversion.
728: String jcharset;
729: if (charset == null) { // use default charset
730: jcharset = getDefaultJavaCharset(); // the java charset
731: charset = getDefaultMIMECharset(); // the MIME equivalent
732: } else // MIME charset -> java charset
733: jcharset = javaCharset(charset);
734:
735: // If no transfer-encoding is specified, figure one out.
736: if (encoding == null) {
737: if (ascii != MOSTLY_NONASCII)
738: encoding = "Q";
739: else
740: encoding = "B";
741: }
742:
743: boolean b64;
744: if (encoding.equalsIgnoreCase("B"))
745: b64 = true;
746: else if (encoding.equalsIgnoreCase("Q"))
747: b64 = false;
748: else
749: throw new UnsupportedEncodingException(
750: "Unknown transfer encoding: " + encoding);
751:
752: StringBuilder outb = new StringBuilder(); // the output buffer
753: doEncode(string, b64, jcharset,
754: // As per RFC 2047, size of an encoded string should not
755: // exceed 75 bytes.
756: // 7 = size of "=?", '?', 'B'/'Q', '?', "?="
757: 75 - 7 - charset.length(), // the available space
758: "=?" + charset + "?" + encoding + "?", // prefix
759: true, encodingWord, outb);
760:
761: return outb.toString();
762: }
763:
764: /**
765: * Returns the length of the encoded version of this byte array.
766: *
767: * @param b the byte array
768: * @return the length
769: */
770: private static int bEncodedLength(byte[] b) {
771: return ((b.length + 2) / 3) * 4;
772: }
773:
774: /**
775: * Returns the length of the encoded version of this byte array.
776: *
777: * @param b the byte array
778: * @param encodingWord true if encoding words, false if encoding text
779: * @return the length
780: */
781: private static int qEncodedLength(byte[] b, boolean encodingWord) {
782: int len = 0;
783: String specials = encodingWord ? WORD_SPECIALS : TEXT_SPECIALS;
784: for (int i = 0; i < b.length; i++) {
785: int c = b[i] & 0xff; // Mask off MSB
786: if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0)
787: // needs encoding
788: len += 3; // Q-encoding is 1 -> 3 conversion
789: else
790: len++;
791: }
792: return len;
793: }
794:
795: private static void doEncode(String string, boolean b64,
796: String jcharset, int avail, String prefix,
797: boolean first, boolean encodingWord, StringBuilder buf)
798: throws UnsupportedEncodingException {
799:
800: // First find out what the length of the encoded version of
801: // 'string' would be.
802: byte[] bytes = string.getBytes(jcharset);
803: int len;
804: if (b64) // "B" encoding
805: len = bEncodedLength(bytes);
806: else // "Q"
807: len = qEncodedLength(bytes, encodingWord);
808:
809: int size;
810: if ((len > avail) && ((size = string.length()) > 1)) {
811: // If the length is greater than 'avail', split 'string'
812: // into two and recurse.
813: // Have to make sure not to split a Unicode surrogate pair.
814: int split = size / 2;
815: if (Character.isHighSurrogate(string.charAt(split - 1)))
816: split--;
817: if (split > 0)
818: doEncode(string.substring(0, split), b64, jcharset,
819: avail, prefix, first, encodingWord, buf);
820: doEncode(string.substring(split, size), b64, jcharset,
821: avail, prefix, false, encodingWord, buf);
822: } else {
823: // length <= than 'avail'. Encode the given string
824: ByteArrayOutputStream os = new ByteArrayOutputStream();
825: OutputStream eos; // the encoder
826: if (b64) { // "B" encoding
827: eos = StreamProvider.provider().outputB(os);
828: } else { // "Q" encoding
829: eos = StreamProvider.provider().outputQ(os, encodingWord);
830: }
831:
832: try { // do the encoding
833: eos.write(bytes);
834: eos.close();
835: } catch (IOException ioex) {
836: }
837:
838: byte[] encodedBytes = os.toByteArray(); // the encoded stuff
839: // Now write out the encoded (all ASCII) bytes into our
840: // StringBuilder
841: if (!first) // not the first line of this sequence
842: if (foldEncodedWords)
843: buf.append("\r\n "); // start a continuation line
844: else
845: buf.append(" "); // line will be folded later
846:
847: buf.append(prefix);
848: for (int i = 0; i < encodedBytes.length; i++)
849: buf.append((char) encodedBytes[i]);
850: buf.append("?="); // terminate the current sequence
851: }
852: }
853:
854: /**
855: * The string is parsed using the rules in RFC 2047 and RFC 2231 for
856: * parsing an "encoded-word". If the parse fails, a ParseException is
857: * thrown. Otherwise, it is transfer-decoded, and then
858: * charset-converted into Unicode. If the charset-conversion
859: * fails, an UnsupportedEncodingException is thrown.
860: *
861: * @param eword the encoded value
862: * @return the decoded word
863: * @throws ParseException if the string is not an
864: * encoded-word as per RFC 2047 and RFC 2231.
865: * @throws UnsupportedEncodingException if the charset
866: * conversion failed.
867: */
868: public static String decodeWord(String eword)
869: throws ParseException, UnsupportedEncodingException {
870:
871: if (!eword.startsWith("=?")) // not an encoded word
872: throw new ParseException(
873: "encoded word does not start with \"=?\": " + eword);
874:
875: // get charset
876: int start = 2;
877: int pos;
878: if ((pos = eword.indexOf('?', start)) == -1)
879: throw new ParseException(
880: "encoded word does not include charset: " + eword);
881: String charset = eword.substring(start, pos);
882: int lpos = charset.indexOf('*'); // RFC 2231 language specified?
883: if (lpos >= 0) // yes, throw it away
884: charset = charset.substring(0, lpos);
885: charset = javaCharset(charset);
886:
887: // get encoding
888: start = pos + 1;
889: if ((pos = eword.indexOf('?', start)) == -1)
890: throw new ParseException(
891: "encoded word does not include encoding: " + eword);
892: String encoding = eword.substring(start, pos);
893:
894: // get encoded-sequence
895: start = pos + 1;
896: if ((pos = eword.indexOf("?=", start)) == -1)
897: throw new ParseException(
898: "encoded word does not end with \"?=\": " + eword);
899:         /*
900:          * XXX - should include this, but leaving it out for compatibility...
901:          *
902:         if (decodeStrict && pos != eword.length() - 2)
903:          throw new ParseException(
904:                 "encoded word does not end with \"?=\": " + eword););
905:          */
906: String word = eword.substring(start, pos);
907:
908: try {
909: String decodedWord;
910: if (word.length() > 0) {
911: // Extract the bytes from word
912: ByteArrayInputStream bis =
913: new ByteArrayInputStream(getBytes(word));
914:
915: // Get the appropriate decoder
916: InputStream is;
917: if (encoding.equalsIgnoreCase("B"))
918: is = StreamProvider.provider().inputBase64(bis);
919: else if (encoding.equalsIgnoreCase("Q"))
920: is = StreamProvider.provider().inputQ(bis);
921: else
922: throw new UnsupportedEncodingException(
923: "unknown encoding: " + encoding);
924:
925: // For b64 & q, size of decoded word <= size of word. So
926: // the decoded bytes must fit into the 'bytes' array. This
927: // is certainly more efficient than writing bytes into a
928: // ByteArrayOutputStream and then pulling out the byte[]
929: // from it.
930: int count = bis.available();
931: byte[] bytes = new byte[count];
932: // count is set to the actual number of decoded bytes
933: count = is.read(bytes, 0, count);
934:
935: // Finally, convert the decoded bytes into a String using
936: // the specified charset
937: decodedWord = count <= 0 ? "" :
938: new String(bytes, 0, count, charset);
939: } else {
940: // no characters to decode, return empty string
941: decodedWord = "";
942: }
943: if (pos + 2 < eword.length()) {
944: // there's still more text in the string
945: String rest = eword.substring(pos + 2);
946: if (!decodeStrict)
947: rest = decodeInnerWords(rest);
948: decodedWord += rest;
949: }
950: return decodedWord;
951: } catch (UnsupportedEncodingException uex) {
952: // explicitly catch and rethrow this exception, otherwise
953: // the below IOException catch will swallow this up!
954: throw uex;
955: } catch (IOException ioex) {
956: // Shouldn't happen.
957: throw new ParseException(ioex.toString());
958: } catch (IllegalArgumentException iex) {
959: /* An unknown charset of the form ISO-XXX-XXX, will cause
960: * the JDK to throw an IllegalArgumentException ... Since the
961: * JDK will attempt to create a classname using this string,
962: * but valid classnames must not contain the character '-',
963: * and this results in an IllegalArgumentException, rather than
964: * the expected UnsupportedEncodingException. Yikes
965: */
966: throw new UnsupportedEncodingException(charset);
967: }
968: }
969:
970: /**
971: * Look for encoded words within a word. The MIME spec doesn't
972: * allow this, but many broken mailers, especially Japanese mailers,
973: * produce such incorrect encodings.
974: */
975: private static String decodeInnerWords(String word)
976: throws UnsupportedEncodingException {
977: int start = 0, i;
978: StringBuilder buf = new StringBuilder();
979: while ((i = word.indexOf("=?", start)) >= 0) {
980: buf.append(word, start, i);
981: // find first '?' after opening '=?' - end of charset
982: int end = word.indexOf('?', i + 2);
983: if (end < 0)
984: break;
985: // find next '?' after that - end of encoding
986: end = word.indexOf('?', end + 1);
987: if (end < 0)
988: break;
989: // find terminating '?='
990: end = word.indexOf("?=", end + 1);
991: if (end < 0)
992: break;
993: String s = word.substring(i, end + 2);
994: try {
995: s = decodeWord(s);
996: } catch (ParseException pex) {
997: // ignore it, just use the original string
998: }
999: buf.append(s);
1000: start = end + 2;
1001: }
1002: if (start == 0)
1003: return word;
1004: if (start < word.length())
1005: buf.append(word.substring(start));
1006: return buf.toString();
1007: }
1008:
1009: /**
1010: * A utility method to quote a word, if the word contains any
1011: * characters from the specified 'specials' list.<p>
1012: *
1013: * The <code>HeaderTokenizer</code> class defines two special
1014: * sets of delimiters - MIME and RFC 822. <p>
1015: *
1016: * This method is typically used during the generation of
1017: * RFC 822 and MIME header fields.
1018: *
1019: * @param word word to be quoted
1020: * @param specials the set of special characters
1021: * @return the possibly quoted word
1022: * @see jakarta.mail.internet.HeaderTokenizer#MIME
1023: * @see jakarta.mail.internet.HeaderTokenizer#RFC822
1024: */
1025: public static String quote(String word, String specials) {
1026: int len = word == null ? 0 : word.length();
1027: if (len == 0)
1028: return "\"\""; // an empty string is handled specially
1029:
1030: /*
1031: * Look for any "bad" characters, Escape and
1032: * quote the entire string if necessary.
1033: */
1034: boolean needQuoting = false;
1035: for (int i = 0; i < len; i++) {
1036: char c = word.charAt(i);
1037: if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
1038: // need to escape them and then quote the whole string
1039: StringBuilder sb = new StringBuilder(len + 3);
1040: sb.append('"');
1041: sb.append(word, 0, i);
1042: int lastc = 0;
1043: for (int j = i; j < len; j++) {
1044: char cc = word.charAt(j);
1045: if ((cc == '"') || (cc == '\\') ||
1046: (cc == '\r') || (cc == '\n'))
1047: if (cc == '\n' && lastc == '\r')
1048: ; // do nothing, CR was already escaped
1049: else
1050: sb.append('\\'); // Escape the character
1051: sb.append(cc);
1052: lastc = cc;
1053: }
1054: sb.append('"');
1055: return sb.toString();
1056: } else if (c < 040 || (c >= 0177 && !allowUtf8) ||
1057: specials.indexOf(c) >= 0)
1058: // These characters cause the string to be quoted
1059: needQuoting = true;
1060: }
1061:
1062: if (needQuoting) {
1063: StringBuilder sb = new StringBuilder(len + 2);
1064: sb.append('"').append(word).append('"');
1065: return sb.toString();
1066: } else
1067: return word;
1068: }
1069:
1070: /**
1071: * Fold a string at linear whitespace so that each line is no longer
1072: * than 76 characters, if possible. If there are more than 76
1073: * non-whitespace characters consecutively, the string is folded at
1074: * the first whitespace after that sequence. The parameter
1075: * <code>used</code> indicates how many characters have been used in
1076: * the current line; it is usually the length of the header name. <p>
1077: *
1078: * Note that line breaks in the string aren't escaped; they probably
1079: * should be.
1080: *
1081: * @param used characters used in line so far
1082: * @param s the string to fold
1083: * @return the folded string
1084: * @since JavaMail 1.4
1085: */
1086: public static String fold(int used, String s) {
1087: if (!foldText)
1088: return s;
1089:
1090: int end;
1091: char c;
1092: // Strip trailing spaces and newlines
1093: for (end = s.length() - 1; end >= 0; end--) {
1094: c = s.charAt(end);
1095: if (c != ' ' && c != '\t' && c != '\r' && c != '\n')
1096: break;
1097: }
1098: if (end != s.length() - 1)
1099: s = s.substring(0, end + 1);
1100:
1101: // if the string fits now, just return it
1102: if (used + s.length() <= 76)
1103: return makesafe(s);
1104:
1105: // have to actually fold the string
1106: StringBuilder sb = new StringBuilder(s.length() + 4);
1107: char lastc = 0;
1108: while (used + s.length() > 76) {
1109: int lastspace = -1;
1110: for (int i = 0; i < s.length(); i++) {
1111: if (lastspace != -1 && used + i > 76)
1112: break;
1113: c = s.charAt(i);
1114: if (c == ' ' || c == '\t')
1115: if (!(lastc == ' ' || lastc == '\t'))
1116: lastspace = i;
1117: lastc = c;
1118: }
1119: if (lastspace == -1) {
1120: // no space, use the whole thing
1121: sb.append(s);
1122: s = "";
1123: used = 0;
1124: break;
1125: }
1126: sb.append(s, 0, lastspace);
1127: sb.append("\r\n");
1128: lastc = s.charAt(lastspace);
1129: sb.append(lastc);
1130: s = s.substring(lastspace + 1);
1131: used = 1;
1132: }
1133: sb.append(s);
1134: return makesafe(sb);
1135: }
1136:
1137: /**
1138: * If the String or StringBuilder has any embedded newlines,
1139: * make sure they're followed by whitespace, to prevent header
1140: * injection errors.
1141: */
1142: private static String makesafe(CharSequence s) {
1143: int i;
1144: for (i = 0; i < s.length(); i++) {
1145: char c = s.charAt(i);
1146: if (c == '\r' || c == '\n')
1147: break;
1148: }
1149: if (i == s.length()) // went through whole string with no CR or LF
1150: return s.toString();
1151:
1152: // read the lines in the string and reassemble them,
1153: // eliminating blank lines and inserting whitespace as necessary
1154: StringBuilder sb = new StringBuilder(s.length() + 1);
1155: BufferedReader r = new BufferedReader(new StringReader(s.toString()));
1156: String line;
1157: try {
1158: while ((line = r.readLine()) != null) {
1159: if (line.trim().length() == 0)
1160: continue; // ignore empty lines
1161: if (sb.length() > 0) {
1162: sb.append("\r\n");
1163: assert line.length() > 0; // proven above
1164: char c = line.charAt(0);
1165: if (c != ' ' && c != '\t')
1166: sb.append(' ');
1167: }
1168: sb.append(line);
1169: }
1170: } catch (IOException ex) {
1171: // XXX - should never happen when reading from a string
1172: return s.toString();
1173: }
1174: return sb.toString();
1175: }
1176:
1177: /**
1178: * Unfold a folded header. Any line breaks that aren't escaped and
1179: * are followed by whitespace are removed.
1180: *
1181: * @param s the string to unfold
1182: * @return the unfolded string
1183: * @since JavaMail 1.4
1184: */
1185: public static String unfold(String s) {
1186: if (!foldText)
1187: return s;
1188:
1189: StringBuilder sb = null;
1190: int i;
1191: while ((i = indexOfAny(s, "\r\n")) >= 0) {
1192: int start = i;
1193: int slen = s.length();
1194: i++; // skip CR or NL
1195: if (i < slen && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n')
1196: i++; // skip LF
1197: if (start > 0 && s.charAt(start - 1) == '\\') {
1198: // there's a backslash before the line break
1199: // strip it out, but leave in the line break
1200: if (sb == null)
1201: sb = new StringBuilder(s.length());
1202: sb.append(s, 0, start - 1);
1203: sb.append(s, start, i);
1204: s = s.substring(i);
1205: } else {
1206: char c;
1207: // if next line starts with whitespace,
1208: // or at the end of the string, remove the line break
1209: // XXX - next line should always start with whitespace
1210: if (i >= slen || (c = s.charAt(i)) == ' ' || c == '\t') {
1211: if (sb == null)
1212: sb = new StringBuilder(s.length());
1213: sb.append(s, 0, start);
1214: s = s.substring(i);
1215: } else {
1216: // it's not a continuation line, just leave in the newline
1217: if (sb == null)
1218: sb = new StringBuilder(s.length());
1219: sb.append(s, 0, i);
1220: s = s.substring(i);
1221: }
1222: }
1223: }
1224: if (sb != null) {
1225: sb.append(s);
1226: return sb.toString();
1227: } else
1228: return s;
1229: }
1230:
1231: /**
1232: * Return the first index of any of the characters in "any" in "s",
1233: * or -1 if none are found.
1234: *
1235: * This should be a method on String.
1236: */
1237: private static int indexOfAny(String s, String any) {
1238: return indexOfAny(s, any, 0);
1239: }
1240:
1241: private static int indexOfAny(String s, String any, int start) {
1242: try {
1243: int len = s.length();
1244: for (int i = start; i < len; i++) {
1245: if (any.indexOf(s.charAt(i)) >= 0)
1246: return i;
1247: }
1248: return -1;
1249: } catch (StringIndexOutOfBoundsException e) {
1250: return -1;
1251: }
1252: }
1253:
1254: /**
1255: * Convert a MIME charset name into a valid Java charset name.
1256: *
1257: * @param charset the MIME charset name
1258: * @return the Java charset equivalent. If a suitable mapping is
1259: * not available, the passed in charset is itself returned.
1260: */
1261: public static String javaCharset(String charset) {
1262: if (mime2java == null || charset == null)
1263: // no mapping table, or charset parameter is null
1264: return charset;
1265:
1266: String alias = mime2java.get(charset.toLowerCase(Locale.ENGLISH));
1267: if (alias != null) {
1268: // verify that the mapped name is valid before trying to use it
1269: try {
1270: Charset.forName(alias);
1271: } catch (Exception ex) {
1272: alias = null; // charset alias not valid, use original name
1273: }
1274: }
1275: return alias == null ? charset : alias;
1276: }
1277:
1278: /**
1279: * Convert a java charset into its MIME charset name. <p>
1280: *
1281: * Note that a future version of JDK (post 1.2) might provide
1282: * this functionality, in which case, we may deprecate this
1283: * method then.
1284: *
1285: * @param charset the JDK charset
1286: * @return the MIME/IANA equivalent. If a mapping
1287: * is not possible, the passed in charset itself
1288: * is returned.
1289: * @since JavaMail 1.1
1290: */
1291: public static String mimeCharset(String charset) {
1292: if (java2mime == null || charset == null)
1293: // no mapping table or charset param is null
1294: return charset;
1295:
1296: String alias = java2mime.get(charset.toLowerCase(Locale.ENGLISH));
1297: return alias == null ? charset : alias;
1298: }
1299:
1300: private static String defaultJavaCharset;
1301: private static String defaultMIMECharset;
1302:
1303: /**
1304: * Get the default charset corresponding to the system's current
1305: * default locale. If the System property <code>mail.mime.charset</code>
1306: * is set, a system charset corresponding to this MIME charset will be
1307: * returned.
1308: *
1309: * @return the default charset of the system's default locale,
1310: * as a Java charset. (NOT a MIME charset)
1311: * @since JavaMail 1.1
1312: */
1313: public static String getDefaultJavaCharset() {
1314: if (defaultJavaCharset == null) {
1315: /*
1316: * If mail.mime.charset is set, it controls the default
1317: * Java charset as well.
1318: */
1319: String mimecs = null;
1320: try {
1321: mimecs = System.getProperty("mail.mime.charset");
1322: } catch (SecurityException ex) {
1323: } // ignore it
1324: if (mimecs != null && mimecs.length() > 0) {
1325: defaultJavaCharset = javaCharset(mimecs);
1326: return defaultJavaCharset;
1327: }
1328:
1329: try {
1330: defaultJavaCharset = System.getProperty("file.encoding",
1331: "8859_1");
1332: } catch (final SecurityException sex) {
1333: // fall back to ISO-Latin-1
1334: // don't use actual system encoding, because this might be
1335: // something completely different, like EBCDIC (IBM-037)
1336: if (defaultJavaCharset == null) {
1337: defaultJavaCharset = "8859_1";
1338: }
1339: }
1340: }
1341:
1342: return defaultJavaCharset;
1343: }
1344:
1345: /*
1346: * Get the default MIME charset for this locale.
1347: */
1348: static String getDefaultMIMECharset() {
1349: if (defaultMIMECharset == null) {
1350: try {
1351: defaultMIMECharset = System.getProperty("mail.mime.charset");
1352: } catch (SecurityException ex) {
1353: } // ignore it
1354: }
1355: if (defaultMIMECharset == null)
1356: defaultMIMECharset = mimeCharset(getDefaultJavaCharset());
1357: return defaultMIMECharset;
1358: }
1359:
1360: // Tables to map MIME charset names to Java names and vice versa.
1361: // XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset
1362: private static Map<String, String> mime2java;
1363: private static Map<String, String> java2mime;
1364:
1365: static {
1366: java2mime = new HashMap<>(40);
1367: mime2java = new HashMap<>(14);
1368:
1369: try {
1370: // Use this class's classloader to load the mapping file
1371: // XXX - we should use SecuritySupport, but it's in another package
1372: InputStream is =
1373: jakarta.mail.internet.MimeUtility.class.getResourceAsStream(
1374: "/META-INF/javamail.charset.map");
1375:
1376: if (is != null) {
1377: try {
1378: LineInputStream lineInput = StreamProvider.provider().inputLineStream(is, false);
1379:
1380: // Load the JDK-to-MIME charset mapping table
1381: loadMappings(lineInput, java2mime);
1382:
1383: // Load the MIME-to-JDK charset mapping table
1384: loadMappings(lineInput, mime2java);
1385: } finally {
1386: try {
1387: is.close();
1388: } catch (Exception cex) {
1389: // ignore
1390: }
1391: }
1392: }
1393: } catch (Exception ex) {
1394: }
1395:
1396: // If we didn't load the tables, e.g., because we didn't have
1397: // permission, load them manually. The entries here should be
1398: // the same as the default javamail.charset.map.
1399: if (java2mime.isEmpty()) {
1400: java2mime.put("8859_1", "ISO-8859-1");
1401: java2mime.put("iso8859_1", "ISO-8859-1");
1402: java2mime.put("iso8859-1", "ISO-8859-1");
1403:
1404: java2mime.put("8859_2", "ISO-8859-2");
1405: java2mime.put("iso8859_2", "ISO-8859-2");
1406: java2mime.put("iso8859-2", "ISO-8859-2");
1407:
1408: java2mime.put("8859_3", "ISO-8859-3");
1409: java2mime.put("iso8859_3", "ISO-8859-3");
1410: java2mime.put("iso8859-3", "ISO-8859-3");
1411:
1412: java2mime.put("8859_4", "ISO-8859-4");
1413: java2mime.put("iso8859_4", "ISO-8859-4");
1414: java2mime.put("iso8859-4", "ISO-8859-4");
1415:
1416: java2mime.put("8859_5", "ISO-8859-5");
1417: java2mime.put("iso8859_5", "ISO-8859-5");
1418: java2mime.put("iso8859-5", "ISO-8859-5");
1419:
1420: java2mime.put("8859_6", "ISO-8859-6");
1421: java2mime.put("iso8859_6", "ISO-8859-6");
1422: java2mime.put("iso8859-6", "ISO-8859-6");
1423:
1424: java2mime.put("8859_7", "ISO-8859-7");
1425: java2mime.put("iso8859_7", "ISO-8859-7");
1426: java2mime.put("iso8859-7", "ISO-8859-7");
1427:
1428: java2mime.put("8859_8", "ISO-8859-8");
1429: java2mime.put("iso8859_8", "ISO-8859-8");
1430: java2mime.put("iso8859-8", "ISO-8859-8");
1431:
1432: java2mime.put("8859_9", "ISO-8859-9");
1433: java2mime.put("iso8859_9", "ISO-8859-9");
1434: java2mime.put("iso8859-9", "ISO-8859-9");
1435:
1436: java2mime.put("sjis", "Shift_JIS");
1437: java2mime.put("jis", "ISO-2022-JP");
1438: java2mime.put("iso2022jp", "ISO-2022-JP");
1439: java2mime.put("euc_jp", "euc-jp");
1440: java2mime.put("koi8_r", "koi8-r");
1441: java2mime.put("euc_cn", "euc-cn");
1442: java2mime.put("euc_tw", "euc-tw");
1443: java2mime.put("euc_kr", "euc-kr");
1444: }
1445: if (mime2java.isEmpty()) {
1446: mime2java.put("iso-2022-cn", "ISO2022CN");
1447: mime2java.put("iso-2022-kr", "ISO2022KR");
1448: mime2java.put("utf-8", "UTF8");
1449: mime2java.put("utf8", "UTF8");
1450: mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
1451: mime2java.put("ja_jp.eucjp", "EUCJIS");
1452: mime2java.put("euc-kr", "KSC5601");
1453: mime2java.put("euckr", "KSC5601");
1454: mime2java.put("us-ascii", "ISO-8859-1");
1455: mime2java.put("x-us-ascii", "ISO-8859-1");
1456: mime2java.put("gb2312", "GB18030");
1457: mime2java.put("cp936", "GB18030");
1458: mime2java.put("ms936", "GB18030");
1459: mime2java.put("gbk", "GB18030");
1460: }
1461: }
1462:
1463: private static void loadMappings(LineInputStream is,
1464: Map<String, String> table) {
1465: String currLine;
1466:
1467: while (true) {
1468: try {
1469: currLine = is.readLine();
1470: } catch (IOException ioex) {
1471: break; // error in reading, stop
1472: }
1473:
1474: if (currLine == null) // end of file, stop
1475: break;
1476: if (currLine.startsWith("--") && currLine.endsWith("--"))
1477: // end of this table
1478: break;
1479:
1480: // ignore empty lines and comments
1481: if (currLine.trim().length() == 0 || currLine.startsWith("#"))
1482: continue;
1483:
1484: // A valid entry is of the form <key><separator><value>
1485: // where, <separator> := SPACE | HT. Parse this
1486: StringTokenizer tk = new StringTokenizer(currLine, " \t");
1487: try {
1488: String key = tk.nextToken();
1489: String value = tk.nextToken();
1490: table.put(key.toLowerCase(Locale.ENGLISH), value);
1491: } catch (NoSuchElementException nex) {
1492: }
1493: }
1494: }
1495:
1496: static final int ALL_ASCII = 1;
1497: static final int MOSTLY_ASCII = 2;
1498: static final int MOSTLY_NONASCII = 3;
1499:
1500: /**
1501: * Check if the given string contains non US-ASCII characters.
1502: *
1503: * @param s string
1504: * @return ALL_ASCII if all characters in the string
1505: * belong to the US-ASCII charset. MOSTLY_ASCII
1506: * if more than half of the available characters
1507: * are US-ASCII characters. Else MOSTLY_NONASCII.
1508: */
1509: static int checkAscii(String s) {
1510: int ascii = 0, non_ascii = 0;
1511: int l = s.length();
1512:
1513: for (int i = 0; i < l; i++) {
1514: if (nonascii(s.charAt(i))) // non-ascii
1515: non_ascii++;
1516: else
1517: ascii++;
1518: }
1519:
1520: if (non_ascii == 0)
1521: return ALL_ASCII;
1522: if (ascii > non_ascii)
1523: return MOSTLY_ASCII;
1524:
1525: return MOSTLY_NONASCII;
1526: }
1527:
1528: /**
1529: * Check if the given byte array contains non US-ASCII characters.
1530: *
1531: * @param b byte array
1532: * @return ALL_ASCII if all characters in the string
1533: * belong to the US-ASCII charset. MOSTLY_ASCII
1534: * if more than half of the available characters
1535: * are US-ASCII characters. Else MOSTLY_NONASCII.
1536: *
1537: * XXX - this method is no longer used
1538: */
1539: static int checkAscii(byte[] b) {
1540: int ascii = 0, non_ascii = 0;
1541:
1542: for (int i = 0; i < b.length; i++) {
1543: // The '&' operator automatically causes b[i] to be promoted
1544: // to an int, and we mask out the higher bytes in the int
1545: // so that the resulting value is not a negative integer.
1546: if (nonascii(b[i] & 0xff)) // non-ascii
1547: non_ascii++;
1548: else
1549: ascii++;
1550: }
1551:
1552: if (non_ascii == 0)
1553: return ALL_ASCII;
1554: if (ascii > non_ascii)
1555: return MOSTLY_ASCII;
1556:
1557: return MOSTLY_NONASCII;
1558: }
1559:
1560: /**
1561: * Check if the given input stream contains non US-ASCII characters.
1562: * Upto <code>max</code> bytes are checked. If <code>max</code> is
1563: * set to <code>ALL</code>, then all the bytes available in this
1564: * input stream are checked. If <code>breakOnNonAscii</code> is true
1565: * the check terminates when the first non-US-ASCII character is
1566: * found and MOSTLY_NONASCII is returned. Else, the check continues
1567: * till <code>max</code> bytes or till the end of stream.
1568: *
1569: * @param is the input stream
1570: * @param max maximum bytes to check for. The special value
1571: * ALL indicates that all the bytes in this input
1572: * stream must be checked.
1573: * @param breakOnNonAscii if <code>true</code>, then terminate the
1574: * the check when the first non-US-ASCII character
1575: * is found.
1576: * @return ALL_ASCII if all characters in the string
1577: * belong to the US-ASCII charset. MOSTLY_ASCII
1578: * if more than half of the available characters
1579: * are US-ASCII characters. Else MOSTLY_NONASCII.
1580: */
1581: static int checkAscii(InputStream is, int max, boolean breakOnNonAscii) {
1582: int ascii = 0, non_ascii = 0;
1583: int len;
1584: int block = 4096;
1585: int linelen = 0;
1586: boolean longLine = false, badEOL = false;
1587: boolean checkEOL = encodeEolStrict && breakOnNonAscii;
1588: byte[] buf = null;
1589: if (max != 0) {
1590: block = (max == ALL) ? 4096 : Math.min(max, 4096);
1591: buf = new byte[block];
1592: }
1593: while (max != 0) {
1594: try {
1595: if ((len = is.read(buf, 0, block)) == -1)
1596: break;
1597: int lastb = 0;
1598: for (int i = 0; i < len; i++) {
1599: // The '&' operator automatically causes b[i] to
1600: // be promoted to an int, and we mask out the higher
1601: // bytes in the int so that the resulting value is
1602: // not a negative integer.
1603: int b = buf[i] & 0xff;
1604: if (checkEOL &&
1605: ((lastb == '\r' && b != '\n') ||
1606: (lastb != '\r' && b == '\n')))
1607: badEOL = true;
1608: if (b == '\r' || b == '\n')
1609: linelen = 0;
1610: else {
1611: linelen++;
1612: if (linelen > 998) // 1000 - CRLF
1613: longLine = true;
1614: }
1615: if (nonascii(b)) { // non-ascii
1616: if (breakOnNonAscii) // we are done
1617: return MOSTLY_NONASCII;
1618: else
1619: non_ascii++;
1620: } else
1621: ascii++;
1622: lastb = b;
1623: }
1624: } catch (IOException ioex) {
1625: break;
1626: }
1627: if (max != ALL)
1628: max -= len;
1629: }
1630:
1631: if (max == 0 && breakOnNonAscii)
1632: // We have been told to break on the first non-ascii character.
1633: // We haven't got any non-ascii character yet, but then we
1634: // have not checked all of the available bytes either. So we
1635: // cannot say for sure that this input stream is ALL_ASCII,
1636: // and hence we must play safe and return MOSTLY_NONASCII
1637:
1638: return MOSTLY_NONASCII;
1639:
1640: if (non_ascii == 0) { // no non-us-ascii characters so far
1641: // If we're looking at non-text data, and we saw CR without LF
1642: // or vice versa, consider this mostly non-ASCII so that it
1643: // will be base64 encoded (since the quoted-printable encoder
1644: // doesn't encode this case properly).
1645: if (badEOL)
1646: return MOSTLY_NONASCII;
1647: // if we've seen a long line, we degrade to mostly ascii
1648: else if (longLine)
1649: return MOSTLY_ASCII;
1650: else
1651: return ALL_ASCII;
1652: }
1653: if (ascii > non_ascii) // mostly ascii
1654: return MOSTLY_ASCII;
1655: return MOSTLY_NONASCII;
1656: }
1657:
1658: static final boolean nonascii(int b) {
1659: return b >= 0177 || (b < 040 && b != '\r' && b != '\n' && b != '\t');
1660: }
1661:
1662: // This is a copy of ASCIIUtility#getBytes that was moved to implementation module
1663: public static byte[] getBytes(String s) {
1664: char[] chars = s.toCharArray();
1665: int size = chars.length;
1666: byte[] bytes = new byte[size];
1667:
1668: for (int i = 0; i < size; )
1669: bytes[i] = (byte) chars[i++];
1670: return bytes;
1671: }
1672:
1673: // This is a copy of ASCIIUtility#getBytes that was moved to implementation module
1674: public static byte[] getBytes(InputStream is) throws IOException {
1675: int len;
1676: int size = 1024;
1677: byte[] buf;
1678: if (is instanceof ByteArrayInputStream) {
1679: size = is.available();
1680: buf = new byte[size];
1681: len = is.read(buf, 0, size);
1682: } else {
1683: ByteArrayOutputStream bos = new ByteArrayOutputStream();
1684: buf = new byte[size];
1685: while ((len = is.read(buf, 0, size)) != -1)
1686: bos.write(buf, 0, len);
1687: buf = bos.toByteArray();
1688: }
1689: return buf;
1690: }
1691:
1692: /**
1693: * Get a boolean valued property.
1694: *
1695: * @param props the properties
1696: * @param name the property name
1697: * @param def default value if property not found
1698: * @return the property value
1699: */
1700: static boolean getBooleanProperty(Properties props, String name, boolean def) {
1701: return getBoolean(getProp(props, name), def);
1702: }
1703:
1704: /**
1705: * Get a boolean valued System property.
1706: *
1707: * @param name the property name
1708: * @param def default value if property not found
1709: * @return the property value
1710: */
1711: static boolean getBooleanSystemProperty(String name, boolean def) {
1712: try {
1713: return getBoolean(getProp(System.getProperties(), name), def);
1714: } catch (SecurityException sex) {
1715: // fall through...
1716: }
1717:
1718: /*
1719: * If we can't get the entire System Properties object because
1720: * of a SecurityException, just ask for the specific property.
1721: */
1722: try {
1723: String value = System.getProperty(name);
1724: if (value == null)
1725: return def;
1726: if (def)
1727: return !value.equalsIgnoreCase("false");
1728: else
1729: return value.equalsIgnoreCase("true");
1730: } catch (SecurityException sex) {
1731: return def;
1732: }
1733: }
1734:
1735: /**
1736: * Get the value of the specified property.
1737: * If the "get" method returns null, use the getProperty method,
1738: * which might cascade to a default Properties object.
1739: */
1740: private static Object getProp(Properties props, String name) {
1741: Object val = props.get(name);
1742: if (val != null)
1743: return val;
1744: else
1745: return props.getProperty(name);
1746: }
1747:
1748: /**
1749: * Interpret the value object as a boolean,
1750: * returning def if unable.
1751: */
1752: private static boolean getBoolean(Object value, boolean def) {
1753: if (value == null)
1754: return def;
1755: if (value instanceof String) {
1756: /*
1757: * If the default is true, only "false" turns it off.
1758: * If the default is false, only "true" turns it on.
1759: */
1760: if (def)
1761: return !((String) value).equalsIgnoreCase("false");
1762: else
1763: return ((String) value).equalsIgnoreCase("true");
1764: }
1765: if (value instanceof Boolean)
1766: return ((Boolean) value).booleanValue();
1767: return def;
1768: }
1769: }
1770:
1771: /**
1772: * An OutputStream that determines whether the data written to
1773: * it is all ASCII, mostly ASCII, or mostly non-ASCII.
1774: */
1775: class AsciiOutputStream extends OutputStream {
1776: private boolean breakOnNonAscii;
1777: private int ascii = 0, non_ascii = 0;
1778: private int linelen = 0;
1779: private boolean longLine = false;
1780: private boolean badEOL = false;
1781: private boolean checkEOL = false;
1782: private int lastb = 0;
1783: private int ret = 0;
1784:
1785: public AsciiOutputStream(boolean breakOnNonAscii, boolean encodeEolStrict) {
1786: this.breakOnNonAscii = breakOnNonAscii;
1787:• checkEOL = encodeEolStrict && breakOnNonAscii;
1788: }
1789:
1790: @Override
1791: public void write(int b) throws IOException {
1792: check(b);
1793: }
1794:
1795: @Override
1796: public void write(byte[] b) throws IOException {
1797: write(b, 0, b.length);
1798: }
1799:
1800: @Override
1801: public void write(byte[] b, int off, int len) throws IOException {
1802: len += off;
1803:• for (int i = off; i < len; i++)
1804: check(b[i]);
1805: }
1806:
1807: private final void check(int b) throws IOException {
1808: b &= 0xff;
1809:• if (checkEOL &&
1810: ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))
1811: badEOL = true;
1812:• if (b == '\r' || b == '\n')
1813: linelen = 0;
1814: else {
1815: linelen++;
1816:• if (linelen > 998) // 1000 - CRLF
1817: longLine = true;
1818: }
1819:• if (MimeUtility.nonascii(b)) { // non-ascii
1820: non_ascii++;
1821:• if (breakOnNonAscii) { // we are done
1822: ret = MimeUtility.MOSTLY_NONASCII;
1823: throw new EOFException();
1824: }
1825: } else
1826: ascii++;
1827: lastb = b;
1828: }
1829:
1830: /**
1831: * Return ASCII-ness of data stream.
1832: */
1833: public int getAscii() {
1834:• if (ret != 0)
1835: return ret;
1836: // If we're looking at non-text data, and we saw CR without LF
1837: // or vice versa, consider this mostly non-ASCII so that it
1838: // will be base64 encoded (since the quoted-printable encoder
1839: // doesn't encode this case properly).
1840:• if (badEOL)
1841: return MimeUtility.MOSTLY_NONASCII;
1842:• else if (non_ascii == 0) { // no non-us-ascii characters so far
1843: // if we've seen a long line, we degrade to mostly ascii
1844:• if (longLine)
1845: return MimeUtility.MOSTLY_ASCII;
1846: else
1847: return MimeUtility.ALL_ASCII;
1848: }
1849:• if (ascii > non_ascii) // mostly ascii
1850: return MimeUtility.MOSTLY_ASCII;
1851: return MimeUtility.MOSTLY_NONASCII;
1852: }
1853: }