Skip to content

Method: toUnicodeString()

1: /*
2: * Copyright (c) 1997, 2023 Oracle and/or its affiliates. All rights reserved.
3: *
4: * This program and the accompanying materials are made available under the
5: * terms of the Eclipse Public License v. 2.0, which is available at
6: * http://www.eclipse.org/legal/epl-2.0.
7: *
8: * This Source Code may also be made available under the following Secondary
9: * Licenses when the conditions for such availability set forth in the
10: * Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
11: * version 2 with the GNU Classpath Exception, which is available at
12: * https://www.gnu.org/software/classpath/license.html.
13: *
14: * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
15: */
16:
17: package jakarta.mail.internet;
18:
19: import jakarta.mail.Address;
20: import jakarta.mail.Session;
21:
22: import java.io.UnsupportedEncodingException;
23: import java.net.InetAddress;
24: import java.net.UnknownHostException;
25: import java.nio.charset.StandardCharsets;
26: import java.util.ArrayList;
27: import java.util.List;
28: import java.util.Locale;
29: import java.util.StringTokenizer;
30:
31: /**
32: * This class represents an Internet email address using the syntax
33: * of <a href="http://www.ietf.org/rfc/rfc822.txt" target="_top">RFC822</a>.
34: * Typical address syntax is of the form "user@host.domain" or
35: * "Personal Name <user@host.domain>".
36: *
37: * @author Bill Shannon
38: * @author John Mani
39: */
40:
41: public class InternetAddress extends Address implements Cloneable {
42:
43: protected String address; // email address
44:
45: /**
46: * The personal name.
47: */
48: protected String personal;
49:
50: /**
51: * The RFC 2047 encoded version of the personal name. <p>
52: *
53: * This field and the <code>personal</code> field track each
54: * other, so if a subclass sets one of these fields directly, it
55: * should set the other to <code>null</code>, so that it is
56: * suitably recomputed.
57: */
58: protected String encodedPersonal;
59:
60: private static final long serialVersionUID = -7507595530758302903L;
61:
62: private static final boolean ignoreBogusGroupName =
63: MimeUtility.getBooleanSystemProperty(
64: "mail.mime.address.ignorebogusgroupname", true);
65:
66: private static final boolean useCanonicalHostName =
67: MimeUtility.getBooleanSystemProperty(
68: "mail.mime.address.usecanonicalhostname", true);
69:
70: private static final boolean allowUtf8 =
71: MimeUtility.getBooleanSystemProperty("mail.mime.allowutf8", false);
72:
73: /**
74: * Default constructor.
75: */
76: public InternetAddress() {
77: }
78:
79: /**
80: * Constructor. <p>
81: *
82: * Parse the given string and create an InternetAddress.
83: * See the <code>parse</code> method for details of the parsing.
84: * The address is parsed using "strict" parsing.
85: * This constructor does <b>not</b> perform the additional
86: * syntax checks that the
87: * <code>InternetAddress(String address, boolean strict)</code>
88: * constructor does when <code>strict</code> is <code>true</code>.
89: * This constructor is equivalent to
90: * <code>InternetAddress(address, false)</code>.
91: *
92: * @param address the address in RFC822 format
93: * @throws AddressException if the parse failed
94: */
95: public InternetAddress(String address) throws AddressException {
96: // use our address parsing utility routine to parse the string
97: InternetAddress[] a = parse(address, true);
98: // if we got back anything other than a single address, it's an error
99: if (a.length != 1)
100: throw new AddressException("Illegal address", address);
101:
102: /*
103: * Now copy the contents of the single address we parsed
104: * into the current object, which will be returned from the
105: * constructor.
106: * XXX - this sure is a round-about way of getting this done.
107: */
108: this.address = a[0].address;
109: this.personal = a[0].personal;
110: this.encodedPersonal = a[0].encodedPersonal;
111: }
112:
113: /**
114: * Parse the given string and create an InternetAddress.
115: * If <code>strict</code> is false, the detailed syntax of the
116: * address isn't checked.
117: *
118: * @param address the address in RFC822 format
119: * @param strict enforce RFC822 syntax
120: * @throws AddressException if the parse failed
121: * @since JavaMail 1.3
122: */
123: public InternetAddress(String address, boolean strict)
124: throws AddressException {
125: this(address);
126: if (strict) {
127: if (isGroup())
128: getGroup(true); // throw away the result
129: else
130: checkAddress(this.address, true, true);
131: }
132: }
133:
134: /**
135: * Construct an InternetAddress given the address and personal name.
136: * The address is assumed to be a syntactically valid RFC822 address.
137: *
138: * @param address the address in RFC822 format
139: * @param personal the personal name
140: * @throws UnsupportedEncodingException if the personal name
141: * can't be encoded in the given charset
142: */
143: public InternetAddress(String address, String personal)
144: throws UnsupportedEncodingException {
145: this(address, personal, null);
146: }
147:
148: /**
149: * Construct an InternetAddress given the address and personal name.
150: * The address is assumed to be a syntactically valid RFC822 address.
151: *
152: * @param address the address in RFC822 format
153: * @param personal the personal name
154: * @param charset the MIME charset for the name
155: * @throws UnsupportedEncodingException if the personal name
156: * can't be encoded in the given charset
157: */
158: public InternetAddress(String address, String personal, String charset)
159: throws UnsupportedEncodingException {
160: this.address = address;
161: setPersonal(personal, charset);
162: }
163:
164: /**
165: * Return a copy of this InternetAddress object.
166: *
167: * @since JavaMail 1.2
168: */
169: @Override
170: public Object clone() {
171: InternetAddress a = null;
172: try {
173: a = (InternetAddress) super.clone();
174: } catch (CloneNotSupportedException e) {
175: } // Won't happen
176: return a;
177: }
178:
179: /**
180: * Return the type of this address. The type of an InternetAddress
181: * is "rfc822".
182: */
183: @Override
184: public String getType() {
185: return "rfc822";
186: }
187:
188: /**
189: * Set the email address.
190: *
191: * @param address email address
192: */
193: public void setAddress(String address) {
194: this.address = address;
195: }
196:
197: /**
198: * Set the personal name. If the name contains non US-ASCII
199: * characters, then the name will be encoded using the specified
200: * charset as per RFC 2047. If the name contains only US-ASCII
201: * characters, no encoding is done and the name is used as is.
202: *
203: * @param name personal name
204: * @param charset MIME charset to be used to encode the name as
205: * per RFC 2047
206: * @throws UnsupportedEncodingException if the charset encoding
207: * fails.
208: * @see #setPersonal(String)
209: */
210: public void setPersonal(String name, String charset)
211: throws UnsupportedEncodingException {
212: personal = name;
213: if (name != null)
214: encodedPersonal = MimeUtility.encodeWord(name, charset, null);
215: else
216: encodedPersonal = null;
217: }
218:
219: /**
220: * Set the personal name. If the name contains non US-ASCII
221: * characters, then the name will be encoded using the platform's
222: * default charset. If the name contains only US-ASCII characters,
223: * no encoding is done and the name is used as is.
224: *
225: * @param name personal name
226: * @throws UnsupportedEncodingException if the charset encoding
227: * fails.
228: * @see #setPersonal(String name, String charset)
229: */
230: public void setPersonal(String name)
231: throws UnsupportedEncodingException {
232: personal = name;
233: if (name != null)
234: encodedPersonal = MimeUtility.encodeWord(name);
235: else
236: encodedPersonal = null;
237: }
238:
239: /**
240: * Get the email address.
241: *
242: * @return email address
243: */
244: public String getAddress() {
245: return address;
246: }
247:
248: /**
249: * Get the personal name. If the name is encoded as per RFC 2047,
250: * it is decoded and converted into Unicode. If the decoding or
251: * conversion fails, the raw data is returned as is.
252: *
253: * @return personal name
254: */
255: public String getPersonal() {
256: if (personal != null)
257: return personal;
258:
259: if (encodedPersonal != null) {
260: try {
261: personal = MimeUtility.decodeText(encodedPersonal);
262: return personal;
263: } catch (Exception ex) {
264: // 1. ParseException: either its an unencoded string or
265: //        it can't be parsed
266: // 2. UnsupportedEncodingException: can't decode it.
267: return encodedPersonal;
268: }
269: }
270: // No personal or encodedPersonal, return null
271: return null;
272: }
273:
274: /**
275: * Convert this address into a RFC 822 / RFC 2047 encoded address.
276: * The resulting string contains only US-ASCII characters, and
277: * hence is mail-safe.
278: *
279: * @return possibly encoded address string
280: */
281: @Override
282: public String toString() {
283: String a = address == null ? "" : address;
284: if (encodedPersonal == null && personal != null)
285: try {
286: encodedPersonal = MimeUtility.encodeWord(personal);
287: } catch (UnsupportedEncodingException ex) {
288: }
289:
290: if (encodedPersonal != null)
291: return quotePhrase(encodedPersonal) + " <" + a + ">";
292: else if (isGroup() || isSimple())
293: return a;
294: else
295: return "<" + a + ">";
296: }
297:
298: /**
299: * Returns a properly formatted address (RFC 822 syntax) of
300: * Unicode characters.
301: *
302: * @return Unicode address string
303: * @since JavaMail 1.2
304: */
305: public String toUnicodeString() {
306: String p = getPersonal();
307:• if (p != null)
308: return quotePhrase(p) + " <" + address + ">";
309:• else if (isGroup() || isSimple())
310: return address;
311: else
312: return "<" + address + ">";
313: }
314:
315: /*
316: * quotePhrase() quotes the words within a RFC822 phrase.
317: *
318: * This is tricky, since a phrase is defined as 1 or more
319: * RFC822 words, separated by LWSP. Now, a word that contains
320: * LWSP is supposed to be quoted, and this is exactly what the
321: * MimeUtility.quote() method does. However, when dealing with
322: * a phrase, any LWSP encountered can be construed to be the
323: * separator between words, and not part of the words themselves.
324: * To deal with this funkiness, we have the below variant of
325: * MimeUtility.quote(), which essentially ignores LWSP when
326: * deciding whether to quote a word.
327: *
328: * It aint pretty, but it gets the job done :)
329: */
330:
331: private static final String rfc822phrase =
332: HeaderTokenizer.RFC822.replace(' ', '\0').replace('\t', '\0');
333:
334: private static String quotePhrase(String phrase) {
335: int len = phrase.length();
336: boolean needQuoting = false;
337:
338: for (int i = 0; i < len; i++) {
339: char c = phrase.charAt(i);
340: if (c == '"' || c == '\\') {
341: // need to escape them and then quote the whole string
342: StringBuilder sb = new StringBuilder(len + 3);
343: sb.append('"');
344: for (int j = 0; j < len; j++) {
345: char cc = phrase.charAt(j);
346: if (cc == '"' || cc == '\\')
347: // Escape the character
348: sb.append('\\');
349: sb.append(cc);
350: }
351: sb.append('"');
352: return sb.toString();
353: } else if ((c < 040 && c != '\r' && c != '\n' && c != '\t') ||
354: (c >= 0177 && !allowUtf8) || rfc822phrase.indexOf(c) >= 0)
355: // These characters cause the string to be quoted
356: needQuoting = true;
357: }
358:
359: if (needQuoting) {
360: StringBuilder sb = new StringBuilder(len + 2);
361: sb.append('"').append(phrase).append('"');
362: return sb.toString();
363: } else
364: return phrase;
365: }
366:
367: private static String unquote(String s) {
368: if (s.startsWith("\"") && s.endsWith("\"") && s.length() > 1) {
369: s = s.substring(1, s.length() - 1);
370: // check for any escaped characters
371: if (s.indexOf('\\') >= 0) {
372: StringBuilder sb = new StringBuilder(s.length()); // approx
373: for (int i = 0; i < s.length(); i++) {
374: char c = s.charAt(i);
375: if (c == '\\' && i < s.length() - 1)
376: c = s.charAt(++i);
377: sb.append(c);
378: }
379: s = sb.toString();
380: }
381: }
382: return s;
383: }
384:
385: /**
386: * The equality operator.
387: */
388: @Override
389: public boolean equals(Object a) {
390: if (!(a instanceof InternetAddress))
391: return false;
392:
393: String s = ((InternetAddress) a).getAddress();
394: if (s == address)
395: return true;
396: if (address != null && address.equalsIgnoreCase(s))
397: return true;
398:
399: return false;
400: }
401:
402: /**
403: * Compute a hash code for the address.
404: */
405: @Override
406: public int hashCode() {
407: if (address == null)
408: return 0;
409: else
410: return address.toLowerCase(Locale.ENGLISH).hashCode();
411: }
412:
413: /**
414: * Convert the given array of InternetAddress objects into
415: * a comma separated sequence of address strings. The
416: * resulting string contains only US-ASCII characters, and
417: * hence is mail-safe.
418: *
419: * @param addresses array of InternetAddress objects
420: * @return comma separated string of addresses
421: * @throws ClassCastException if any address object in the
422: * given array is not an InternetAddress object. Note
423: * that this is a RuntimeException.
424: */
425: public static String toString(Address[] addresses) {
426: return toString(addresses, 0);
427: }
428:
429: /**
430: * Convert the given array of InternetAddress objects into
431: * a comma separated sequence of address strings. The
432: * resulting string contains Unicode characters.
433: *
434: * @param addresses array of InternetAddress objects
435: * @return comma separated string of addresses
436: * @throws ClassCastException if any address object in the
437: * given array is not an InternetAddress object. Note
438: * that this is a RuntimeException.
439: * @since JavaMail 1.6
440: */
441: public static String toUnicodeString(Address[] addresses) {
442: return toUnicodeString(addresses, 0);
443: }
444:
445: /**
446: * Convert the given array of InternetAddress objects into
447: * a comma separated sequence of address strings. The
448: * resulting string contains only US-ASCII characters, and
449: * hence is mail-safe. <p>
450: *
451: * The 'used' parameter specifies the number of character positions
452: * already taken up in the field into which the resulting address
453: * sequence string is to be inserted. It is used to determine the
454: * line-break positions in the resulting address sequence string.
455: *
456: * @param addresses array of InternetAddress objects
457: * @param used number of character positions already used, in
458: * the field into which the address string is to
459: * be inserted.
460: * @return comma separated string of addresses
461: * @throws ClassCastException if any address object in the
462: * given array is not an InternetAddress object. Note
463: * that this is a RuntimeException.
464: */
465: public static String toString(Address[] addresses, int used) {
466: if (addresses == null || addresses.length == 0)
467: return null;
468:
469: StringBuilder sb = new StringBuilder();
470:
471: for (int i = 0; i < addresses.length; i++) {
472: if (i != 0) { // need to append comma
473: sb.append(", ");
474: used += 2;
475: }
476:
477: // prefer not to split a single address across lines so used=0 below
478: String s = MimeUtility.fold(0, addresses[i].toString());
479: int len = lengthOfFirstSegment(s); // length till CRLF
480: if (used + len > 76) { // overflows ...
481: // smash trailing space from ", " above
482: int curlen = sb.length();
483: if (curlen > 0 && sb.charAt(curlen - 1) == ' ')
484: sb.setLength(curlen - 1);
485: sb.append("\r\n\t"); // .. start new continuation line
486: used = 8; // account for the starting <tab> char
487: }
488: sb.append(s);
489: used = lengthOfLastSegment(s, used);
490: }
491:
492: return sb.toString();
493: }
494:
495: /**
496: * Convert the given array of InternetAddress objects into
497: * a comma separated sequence of address strings. The
498: * resulting string contains Unicode characters. <p>
499: *
500: * The 'used' parameter specifies the number of character positions
501: * already taken up in the field into which the resulting address
502: * sequence string is to be inserted. It is used to determine the
503: * line-break positions in the resulting address sequence string.
504: *
505: * @param addresses array of InternetAddress objects
506: * @param used number of character positions already used, in
507: * the field into which the address string is to
508: * be inserted.
509: * @return comma separated string of addresses
510: * @throws ClassCastException if any address object in the
511: * given array is not an InternetAddress object. Note
512: * that this is a RuntimeException.
513: * @since JavaMail 1.6
514: */
515: /*
516: * XXX - This is exactly the same as the above, except it uses
517: *         toUnicodeString instead of toString.
518: * XXX - Since the line length restrictions are in bytes, not characters,
519: *         we convert all non-ASCII addresses to UTF-8 byte strings,
520: *         which we then convert to ISO-8859-1 Strings where every
521: *         character respresents one UTF-8 byte. At the end we reverse
522: *         the conversion to get back to a correct Unicode string.
523: *         This is a hack to allow all the other character-based methods
524: *         to work properly with UTF-8 bytes.
525: */
526: public static String toUnicodeString(Address[] addresses, int used) {
527: if (addresses == null || addresses.length == 0)
528: return null;
529:
530: StringBuilder sb = new StringBuilder();
531:
532: boolean sawNonAscii = false;
533: for (int i = 0; i < addresses.length; i++) {
534: if (i != 0) { // need to append comma
535: sb.append(", ");
536: used += 2;
537: }
538:
539: // prefer not to split a single address across lines so used=0 below
540: String as = ((InternetAddress) addresses[i]).toUnicodeString();
541: if (MimeUtility.checkAscii(as) != MimeUtility.ALL_ASCII) {
542: sawNonAscii = true;
543: as = new String(as.getBytes(StandardCharsets.UTF_8),
544: StandardCharsets.ISO_8859_1);
545: }
546: String s = MimeUtility.fold(0, as);
547: int len = lengthOfFirstSegment(s); // length till CRLF
548: if (used + len > 76) { // overflows ...
549: // smash trailing space from ", " above
550: int curlen = sb.length();
551: if (curlen > 0 && sb.charAt(curlen - 1) == ' ')
552: sb.setLength(curlen - 1);
553: sb.append("\r\n\t"); // .. start new continuation line
554: used = 8; // account for the starting <tab> char
555: }
556: sb.append(s);
557: used = lengthOfLastSegment(s, used);
558: }
559:
560: String ret = sb.toString();
561: if (sawNonAscii)
562: ret = new String(ret.getBytes(StandardCharsets.ISO_8859_1),
563: StandardCharsets.UTF_8);
564: return ret;
565: }
566:
567: /*
568: * Return the length of the first segment within this string.
569: * If no segments exist, the length of the whole line is returned.
570: */
571: private static int lengthOfFirstSegment(String s) {
572: int pos;
573: if ((pos = s.indexOf("\r\n")) != -1)
574: return pos;
575: else
576: return s.length();
577: }
578:
579: /*
580: * Return the length of the last segment within this string.
581: * If no segments exist, the length of the whole line plus
582: * <code>used</code> is returned.
583: */
584: private static int lengthOfLastSegment(String s, int used) {
585: int pos;
586: if ((pos = s.lastIndexOf("\r\n")) != -1)
587: return s.length() - pos - 2;
588: else
589: return s.length() + used;
590: }
591:
592: /**
593: * Return an InternetAddress object representing the current user.
594: * The entire email address may be specified in the "mail.from"
595: * property. If not set, the "mail.user" and "mail.host" properties
596: * are tried. If those are not set, the "user.name" property and
597: * <code>InetAddress.getLocalHost</code> method are tried.
598: * Security exceptions that may occur while accessing this information
599: * are ignored. If it is not possible to determine an email address,
600: * null is returned.
601: *
602: * @param session Session object used for property lookup
603: * @return current user's email address
604: */
605: public static InternetAddress getLocalAddress(Session session) {
606: try {
607: return _getLocalAddress(session);
608: } catch (SecurityException | AddressException sex) { // ignore it
609: } catch (UnknownHostException ex) {
610: } // ignore it
611: return null;
612: }
613:
614: /**
615: * A package-private version of getLocalAddress that doesn't swallow
616: * the exception. Used by MimeMessage.setFrom() to report the reason
617: * for the failure.
618: */
619: // package-private
620: static InternetAddress _getLocalAddress(Session session)
621: throws SecurityException, AddressException, UnknownHostException {
622: String user = null, host = null, address = null;
623: if (session == null) {
624: user = System.getProperty("user.name");
625: host = getLocalHostName();
626: } else {
627: address = session.getProperty("mail.from");
628: if (address == null) {
629: user = session.getProperty("mail.user");
630: if (user == null || user.length() == 0)
631: user = session.getProperty("user.name");
632: if (user == null || user.length() == 0)
633: user = System.getProperty("user.name");
634: host = session.getProperty("mail.host");
635: if (host == null || host.length() == 0)
636: host = getLocalHostName();
637: }
638: }
639:
640: if (address == null && user != null && user.length() != 0 &&
641: host != null && host.length() != 0)
642: address = MimeUtility.quote(user.trim(), specialsNoDot + "\t ") +
643: "@" + host;
644:
645: if (address == null)
646: return null;
647:
648: return new InternetAddress(address);
649: }
650:
651: /**
652: * Get the local host name from InetAddress and return it in a form
653: * suitable for use in an email address.
654: */
655: private static String getLocalHostName() throws UnknownHostException {
656: String host = null;
657: InetAddress me = InetAddress.getLocalHost();
658: if (me != null) {
659: // try canonical host name first
660: if (useCanonicalHostName)
661: host = me.getCanonicalHostName();
662: if (host == null)
663: host = me.getHostName();
664: // if we can't get our name, use local address literal
665: if (host == null)
666: host = me.getHostAddress();
667: if (host != null && host.length() > 0 && isInetAddressLiteral(host))
668: host = '[' + host + ']';
669: }
670: return host;
671: }
672:
673: /**
674: * Is the address an IPv4 or IPv6 address literal, which needs to
675: * be enclosed in "[]" in an email address? IPv4 literals contain
676: * decimal digits and dots, IPv6 literals contain hex digits, dots,
677: * and colons. We're lazy and don't check the exact syntax, just
678: * the allowed characters; strings that have only the allowed
679: * characters in a literal but don't meet the syntax requirements
680: * for a literal definitely can't be a host name and thus will fail
681: * later when used as an address literal.
682: */
683: private static boolean isInetAddressLiteral(String addr) {
684: boolean sawHex = false, sawColon = false;
685: for (int i = 0; i < addr.length(); i++) {
686: char c = addr.charAt(i);
687: if (c >= '0' && c <= '9')
688: ; // digits always ok
689: else if (c == '.')
690: ; // dot always ok
691: else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
692: sawHex = true; // need to see a colon too
693: else if (c == ':')
694: sawColon = true;
695: else
696: return false; // anything else, definitely not a literal
697: }
698: return !sawHex || sawColon;
699: }
700:
701: /**
702: * Parse the given comma separated sequence of addresses into
703: * InternetAddress objects. Addresses must follow RFC822 syntax.
704: *
705: * @param addresslist comma separated address strings
706: * @return array of InternetAddress objects
707: * @throws AddressException if the parse failed
708: */
709: public static InternetAddress[] parse(String addresslist)
710: throws AddressException {
711: return parse(addresslist, true);
712: }
713:
714: /**
715: * Parse the given sequence of addresses into InternetAddress
716: * objects. If <code>strict</code> is false, simple email addresses
717: * separated by spaces are also allowed. If <code>strict</code> is
718: * true, many (but not all) of the RFC822 syntax rules are enforced.
719: * In particular, even if <code>strict</code> is true, addresses
720: * composed of simple names (with no "@domain" part) are allowed.
721: * Such "illegal" addresses are not uncommon in real messages. <p>
722: *
723: * Non-strict parsing is typically used when parsing a list of
724: * mail addresses entered by a human. Strict parsing is typically
725: * used when parsing address headers in mail messages.
726: *
727: * @param addresslist comma separated address strings
728: * @param strict enforce RFC822 syntax
729: * @return array of InternetAddress objects
730: * @throws AddressException if the parse failed
731: */
732: public static InternetAddress[] parse(String addresslist, boolean strict)
733: throws AddressException {
734: return parse(addresslist, strict, false);
735: }
736:
737: /**
738: * Parse the given sequence of addresses into InternetAddress
739: * objects. If <code>strict</code> is false, the full syntax rules for
740: * individual addresses are not enforced. If <code>strict</code> is
741: * true, many (but not all) of the RFC822 syntax rules are enforced. <p>
742: *
743: * To better support the range of "invalid" addresses seen in real
744: * messages, this method enforces fewer syntax rules than the
745: * <code>parse</code> method when the strict flag is false
746: * and enforces more rules when the strict flag is true. If the
747: * strict flag is false and the parse is successful in separating out an
748: * email address or addresses, the syntax of the addresses themselves
749: * is not checked.
750: *
751: * @param addresslist comma separated address strings
752: * @param strict enforce RFC822 syntax
753: * @return array of InternetAddress objects
754: * @throws AddressException if the parse failed
755: * @since JavaMail 1.3
756: */
757: public static InternetAddress[] parseHeader(String addresslist,
758: boolean strict) throws AddressException {
759: return parse(MimeUtility.unfold(addresslist), strict, true);
760: }
761:
762: /*
763: * RFC822 Address parser.
764: *
765: * XXX - This is complex enough that it ought to be a real parser,
766: * not this ad-hoc mess, and because of that, this is not perfect.
767: *
768: * XXX - Deal with encoded Headers too.
769: */
770: @SuppressWarnings("fallthrough")
771: private static InternetAddress[] parse(String s, boolean strict,
772: boolean parseHdr) throws AddressException {
773: int start, end, index, nesting;
774: int start_personal = -1, end_personal = -1;
775: int length = s.length();
776: boolean ignoreErrors = parseHdr && !strict;
777: boolean in_group = false; // we're processing a group term
778: boolean route_addr = false; // address came from route-addr term
779: boolean rfc822 = false; // looks like an RFC822 address
780: char c;
781: List<InternetAddress> v = new ArrayList<>();
782: InternetAddress ma;
783:
784: for (start = end = -1, index = 0; index < length; index++) {
785: c = s.charAt(index);
786:
787: switch (c) {
788: case '(': // We are parsing a Comment. Ignore everything inside.
789: // XXX - comment fields should be parsed as whitespace,
790: //         more than one allowed per address
791: rfc822 = true;
792: if (start >= 0 && end == -1)
793: end = index;
794: int pindex = index;
795: for (index++, nesting = 1; index < length && nesting > 0;
796: index++) {
797: c = s.charAt(index);
798: switch (c) {
799: case '\\':
800: index++; // skip both '\' and the escaped char
801: break;
802: case '(':
803: nesting++;
804: break;
805: case ')':
806: nesting--;
807: break;
808: default:
809: break;
810: }
811: }
812: if (nesting > 0) {
813: if (!ignoreErrors)
814: throw new AddressException("Missing ')'", s, index);
815: // pretend the first paren was a regular character and
816: // continue parsing after it
817: index = pindex + 1;
818: break;
819: }
820: index--; // point to closing paren
821: if (start_personal == -1)
822: start_personal = pindex + 1;
823: if (end_personal == -1)
824: end_personal = index;
825: break;
826:
827: case ')':
828: if (!ignoreErrors)
829: throw new AddressException("Missing '('", s, index);
830: // pretend the left paren was a regular character and
831: // continue parsing
832: if (start == -1)
833: start = index;
834: break;
835:
836: case '<':
837: rfc822 = true;
838: if (route_addr) {
839: if (!ignoreErrors)
840: throw new AddressException(
841: "Extra route-addr", s, index);
842:
843: // assume missing comma between addresses
844: if (start == -1) {
845: route_addr = false;
846: rfc822 = false;
847: start = end = -1;
848: break; // nope, nothing there
849: }
850: if (!in_group) {
851: // got a token, add this to our InternetAddress list
852: if (end == -1) // should never happen
853: end = index;
854: String addr = s.substring(start, end).trim();
855:
856: ma = new InternetAddress();
857: ma.setAddress(addr);
858: if (start_personal >= 0) {
859: ma.encodedPersonal = unquote(
860: s.substring(start_personal, end_personal).
861: trim());
862: }
863: v.add(ma);
864:
865: route_addr = false;
866: rfc822 = false;
867: start = end = -1;
868: start_personal = end_personal = -1;
869: // continue processing this new address...
870: }
871: }
872:
873: int rindex = index;
874: boolean inquote = false;
875: outf:
876: for (index++; index < length; index++) {
877: c = s.charAt(index);
878: switch (c) {
879: case '\\': // XXX - is this needed?
880: index++; // skip both '\' and the escaped char
881: break;
882: case '"':
883: inquote = !inquote;
884: break;
885: case '>':
886: if (inquote)
887: continue;
888: break outf; // out of for loop
889: default:
890: break;
891: }
892: }
893:
894: // did we find a matching quote?
895: if (inquote) {
896: if (!ignoreErrors)
897: throw new AddressException("Missing '\"'", s, index);
898: // didn't find matching quote, try again ignoring quotes
899: // (e.g., ``<"@foo.com>'')
900: outq:
901: for (index = rindex + 1; index < length; index++) {
902: c = s.charAt(index);
903: if (c == '\\') // XXX - is this needed?
904: index++; // skip both '\' and the escaped char
905: else if (c == '>')
906: break;
907: }
908: }
909:
910: // did we find a terminating '>'?
911: if (index >= length) {
912: if (!ignoreErrors)
913: throw new AddressException("Missing '>'", s, index);
914: // pretend the "<" was a regular character and
915: // continue parsing after it (e.g., ``<@foo.com'')
916: index = rindex + 1;
917: if (start == -1)
918: start = rindex; // back up to include "<"
919: break;
920: }
921:
922: if (!in_group) {
923: if (start >= 0) {
924: // seen some characters? use them as the personal name
925: start_personal = start;
926: end_personal = rindex;
927: }
928: start = rindex + 1;
929: }
930: route_addr = true;
931: end = index;
932: break;
933:
934: case '>':
935: if (!ignoreErrors)
936: throw new AddressException("Missing '<'", s, index);
937: // pretend the ">" was a regular character and
938: // continue parsing (e.g., ``>@foo.com'')
939: if (start == -1)
940: start = index;
941: break;
942:
943: case '"': // parse quoted string
944: int qindex = index;
945: rfc822 = true;
946: if (start == -1)
947: start = index;
948: outq:
949: for (index++; index < length; index++) {
950: c = s.charAt(index);
951: switch (c) {
952: case '\\':
953: index++; // skip both '\' and the escaped char
954: break;
955: case '"':
956: break outq; // out of for loop
957: default:
958: break;
959: }
960: }
961: if (index >= length) {
962: if (!ignoreErrors)
963: throw new AddressException("Missing '\"'", s, index);
964: // pretend the quote was a regular character and
965: // continue parsing after it (e.g., ``"@foo.com'')
966: index = qindex + 1;
967: }
968: break;
969:
970: case '[': // a domain-literal, probably
971: int lindex = index;
972: rfc822 = true;
973: if (start == -1)
974: start = index;
975: outb:
976: for (index++; index < length; index++) {
977: c = s.charAt(index);
978: switch (c) {
979: case '\\':
980: index++; // skip both '\' and the escaped char
981: break;
982: case ']':
983: break outb; // out of for loop
984: default:
985: break;
986: }
987: }
988: if (index >= length) {
989: if (!ignoreErrors)
990: throw new AddressException("Missing ']'", s, index);
991: // pretend the "[" was a regular character and
992: // continue parsing after it (e.g., ``[@foo.com'')
993: index = lindex + 1;
994: }
995: break;
996:
997: case ';':
998: if (start == -1) {
999: route_addr = false;
1000: rfc822 = false;
1001: start = end = -1;
1002: break; // nope, nothing there
1003: }
1004: if (in_group) {
1005: in_group = false;
1006: /*
1007: * If parsing headers, but not strictly, peek ahead.
1008: * If next char is "@", treat the group name
1009: * like the local part of the address, e.g.,
1010: * "Undisclosed-Recipient:;@java.sun.com".
1011: */
1012: if (parseHdr && !strict &&
1013: index + 1 < length && s.charAt(index + 1) == '@')
1014: break;
1015: ma = new InternetAddress();
1016: end = index + 1;
1017: ma.setAddress(s.substring(start, end).trim());
1018: v.add(ma);
1019:
1020: route_addr = false;
1021: rfc822 = false;
1022: start = end = -1;
1023: start_personal = end_personal = -1;
1024: break;
1025: }
1026: if (!ignoreErrors)
1027: throw new AddressException(
1028: "Illegal semicolon, not in group", s, index);
1029:
1030: // otherwise, parsing a header; treat semicolon like comma
1031: // fall through to comma case...
1032:
1033: case ',': // end of an address, probably
1034: if (start == -1) {
1035: route_addr = false;
1036: rfc822 = false;
1037: start = end = -1;
1038: break; // nope, nothing there
1039: }
1040: if (in_group) {
1041: route_addr = false;
1042: break;
1043: }
1044: // got a token, add this to our InternetAddress list
1045: if (end == -1)
1046: end = index;
1047:
1048: String addr = s.substring(start, end).trim();
1049: String pers = null;
1050: if (rfc822 && start_personal >= 0) {
1051: pers = unquote(
1052: s.substring(start_personal, end_personal).trim());
1053: if (pers.trim().length() == 0)
1054: pers = null;
1055: }
1056:
1057: /*
1058: * If the personal name field has an "@" and the address
1059: * field does not, assume they were reversed, e.g.,
1060: * ``"joe doe" (john.doe@example.com)''.
1061: */
1062: if (parseHdr && !strict && pers != null &&
1063: pers.indexOf('@') >= 0 &&
1064: addr.indexOf('@') < 0 && addr.indexOf('!') < 0) {
1065: String tmp = addr;
1066: addr = pers;
1067: pers = tmp;
1068: }
1069: if (rfc822 || strict || parseHdr) {
1070: if (!ignoreErrors)
1071: checkAddress(addr, route_addr, false);
1072: ma = new InternetAddress();
1073: ma.setAddress(addr);
1074: if (pers != null)
1075: ma.encodedPersonal = pers;
1076: v.add(ma);
1077: } else {
1078: // maybe we passed over more than one space-separated addr
1079: StringTokenizer st = new StringTokenizer(addr);
1080: while (st.hasMoreTokens()) {
1081: String a = st.nextToken();
1082: checkAddress(a, false, false);
1083: ma = new InternetAddress();
1084: ma.setAddress(a);
1085: v.add(ma);
1086: }
1087: }
1088:
1089: route_addr = false;
1090: rfc822 = false;
1091: start = end = -1;
1092: start_personal = end_personal = -1;
1093: break;
1094:
1095: case ':':
1096: rfc822 = true;
1097: if (in_group)
1098: if (!ignoreErrors)
1099: throw new AddressException("Nested group", s, index);
1100: if (start == -1)
1101: start = index;
1102: if (parseHdr && !strict) {
1103: /*
1104: * If next char is a special character that can't occur at
1105: * the start of a valid address, treat the group name
1106: * as the entire address, e.g., "Date:, Tue", "Re:@foo".
1107: */
1108: if (index + 1 < length) {
1109: String addressSpecials = ")>[]:@\\,.";
1110: char nc = s.charAt(index + 1);
1111: if (addressSpecials.indexOf(nc) >= 0) {
1112: if (nc != '@')
1113: break; // don't change in_group
1114: /*
1115: * Handle a common error:
1116: * ``Undisclosed-Recipient:@example.com;''
1117: *
1118: * Scan ahead. If we find a semicolon before
1119: * one of these other special characters,
1120: * consider it to be a group after all.
1121: */
1122: for (int i = index + 2; i < length; i++) {
1123: nc = s.charAt(i);
1124: if (nc == ';')
1125: break;
1126: if (addressSpecials.indexOf(nc) >= 0)
1127: break;
1128: }
1129: if (nc == ';')
1130: break; // don't change in_group
1131: }
1132: }
1133:
1134: // ignore bogus "mailto:" prefix in front of an address,
1135: // or bogus mail header name included in the address field
1136: String gname = s.substring(start, index);
1137: if (ignoreBogusGroupName &&
1138: (gname.equalsIgnoreCase("mailto") ||
1139: gname.equalsIgnoreCase("From") ||
1140: gname.equalsIgnoreCase("To") ||
1141: gname.equalsIgnoreCase("Cc") ||
1142: gname.equalsIgnoreCase("Subject") ||
1143: gname.equalsIgnoreCase("Re")))
1144: start = -1; // we're not really in a group
1145: else
1146: in_group = true;
1147: } else
1148: in_group = true;
1149: break;
1150:
1151: // Ignore whitespace
1152: case ' ':
1153: case '\t':
1154: case '\r':
1155: case '\n':
1156: break;
1157:
1158: default:
1159: if (start == -1)
1160: start = index;
1161: break;
1162: }
1163: }
1164:
1165: if (start >= 0) {
1166: /*
1167: * The last token, add this to our InternetAddress list.
1168: * Note that this block of code should be identical to the
1169: * block above for "case ','".
1170: */
1171: if (end == -1)
1172: end = length;
1173:
1174: String addr = s.substring(start, end).trim();
1175: String pers = null;
1176: if (rfc822 && start_personal >= 0) {
1177: pers = unquote(
1178: s.substring(start_personal, end_personal).trim());
1179: if (pers.trim().length() == 0)
1180: pers = null;
1181: }
1182:
1183: /*
1184: * If the personal name field has an "@" and the address
1185: * field does not, assume they were reversed, e.g.,
1186: * ``"joe doe" (john.doe@example.com)''.
1187: */
1188: if (parseHdr && !strict &&
1189: pers != null && pers.indexOf('@') >= 0 &&
1190: addr.indexOf('@') < 0 && addr.indexOf('!') < 0) {
1191: String tmp = addr;
1192: addr = pers;
1193: pers = tmp;
1194: }
1195: if (rfc822 || strict || parseHdr) {
1196: if (!ignoreErrors)
1197: checkAddress(addr, route_addr, false);
1198: ma = new InternetAddress();
1199: ma.setAddress(addr);
1200: if (pers != null)
1201: ma.encodedPersonal = pers;
1202: v.add(ma);
1203: } else {
1204: // maybe we passed over more than one space-separated addr
1205: StringTokenizer st = new StringTokenizer(addr);
1206: while (st.hasMoreTokens()) {
1207: String a = st.nextToken();
1208: checkAddress(a, false, false);
1209: ma = new InternetAddress();
1210: ma.setAddress(a);
1211: v.add(ma);
1212: }
1213: }
1214: }
1215:
1216: InternetAddress[] a = new InternetAddress[v.size()];
1217: v.toArray(a);
1218: return a;
1219: }
1220:
1221: /**
1222: * Validate that this address conforms to the syntax rules of
1223: * RFC 822. The current implementation checks many, but not
1224: * all, syntax rules. Note that even though the syntax of
1225: * the address may be correct, there's no guarantee that a
1226: * mailbox of that name exists.
1227: *
1228: * @throws AddressException if the address isn't valid.
1229: * @since JavaMail 1.3
1230: */
1231: public void validate() throws AddressException {
1232: if (isGroup())
1233: getGroup(true); // throw away the result
1234: else
1235: checkAddress(getAddress(), true, true);
1236: }
1237:
1238: private static final String specialsNoDotNoAt = "()<>,;:\\\"[]";
1239: private static final String specialsNoDot = specialsNoDotNoAt + "@";
1240:
1241: /**
1242: * Check that the address is a valid "mailbox" per RFC822.
1243: * (We also allow simple names.)
1244: *
1245: * XXX - much more to check
1246: * XXX - doesn't handle domain-literals properly (but no one uses them)
1247: */
1248: private static void checkAddress(String addr,
1249: boolean routeAddr, boolean validate)
1250: throws AddressException {
1251: int i, start = 0;
1252:
1253: if (addr == null)
1254: throw new AddressException("Address is null");
1255: int len = addr.length();
1256: if (len == 0)
1257: throw new AddressException("Empty address", addr);
1258:
1259: /*
1260: * routeAddr indicates that the address is allowed
1261: * to have an RFC 822 "route".
1262: */
1263: if (routeAddr && addr.charAt(0) == '@') {
1264: /*
1265: * Check for a legal "route-addr":
1266: *                [@domain[,@domain ...]:]local@domain
1267: */
1268: for (start = 0; (i = indexOfAny(addr, ",:", start)) >= 0;
1269: start = i + 1) {
1270: if (addr.charAt(start) != '@')
1271: throw new AddressException("Illegal route-addr", addr);
1272: if (addr.charAt(i) == ':') {
1273: // end of route-addr
1274: start = i + 1;
1275: break;
1276: }
1277: }
1278: }
1279:
1280: /*
1281: * The rest should be "local@domain", but we allow simply "local"
1282: * unless called from validate.
1283: *
1284: * local-part must follow RFC 822 - no specials except '.'
1285: * unless quoted.
1286: */
1287:
1288: char c = (char) -1;
1289: char lastc = (char) -1;
1290: boolean inquote = false;
1291: for (i = start; i < len; i++) {
1292: lastc = c;
1293: c = addr.charAt(i);
1294: // a quoted-pair is only supposed to occur inside a quoted string,
1295: // but some people use it outside so we're more lenient
1296: if (c == '\\' || lastc == '\\')
1297: continue;
1298: if (c == '"') {
1299: if (inquote) {
1300: // peek ahead, next char must be "@"
1301: if (validate && i + 1 < len && addr.charAt(i + 1) != '@')
1302: throw new AddressException(
1303: "Quote not at end of local address", addr);
1304: inquote = false;
1305: } else {
1306: if (validate && i != 0)
1307: throw new AddressException(
1308: "Quote not at start of local address", addr);
1309: inquote = true;
1310: }
1311: continue;
1312: } else if (c == '\r') {
1313: // peek ahead, next char must be LF
1314: if (i + 1 < len && addr.charAt(i + 1) != '\n')
1315: throw new AddressException(
1316: "Quoted local address contains CR without LF", addr);
1317: } else if (c == '\n') {
1318: /*
1319: * CRLF followed by whitespace is allowed in a quoted string.
1320: * We allowed naked LF, but ensure LF is always followed by
1321: * whitespace to prevent spoofing the end of the header.
1322: */
1323: if (i + 1 < len && addr.charAt(i + 1) != ' ' &&
1324: addr.charAt(i + 1) != '\t')
1325: throw new AddressException(
1326: "Quoted local address contains newline without whitespace",
1327: addr);
1328: }
1329: if (inquote)
1330: continue;
1331: // dot rules should not be applied to quoted-string
1332: if (c == '.') {
1333: if (i == start)
1334: throw new AddressException(
1335: "Local address starts with dot", addr);
1336: if (lastc == '.')
1337: throw new AddressException(
1338: "Local address contains dot-dot", addr);
1339: }
1340: if (c == '@') {
1341: if (i == 0)
1342: throw new AddressException("Missing local name", addr);
1343: if (lastc == '.')
1344: throw new AddressException(
1345: "Local address ends with dot", addr);
1346: break; // done with local part
1347: }
1348: if (c <= 040 || c == 0177)
1349: throw new AddressException(
1350: "Local address contains control or whitespace", addr);
1351: if (specialsNoDot.indexOf(c) >= 0)
1352: throw new AddressException(
1353: "Local address contains illegal character", addr);
1354: }
1355: if (inquote)
1356: throw new AddressException("Unterminated quote", addr);
1357:
1358: /*
1359: * Done with local part, now check domain.
1360: *
1361: * Note that the MimeMessage class doesn't remember addresses
1362: * as separate objects; it writes them out as headers and then
1363: * parses the headers when the addresses are requested.
1364: * In order to support the case where a "simple" address is used,
1365: * but the address also has a personal name and thus looks like
1366: * it should be a valid RFC822 address when parsed, we only check
1367: * this if we're explicitly called from the validate method.
1368: */
1369:
1370: if (c != '@') {
1371: if (validate)
1372: throw new AddressException("Missing final '@domain'", addr);
1373: return;
1374: }
1375:
1376: // check for illegal chars in the domain, but ignore domain literals
1377:
1378: start = i + 1;
1379: if (start >= len)
1380: throw new AddressException("Missing domain", addr);
1381:
1382: if (addr.charAt(start) == '.')
1383: throw new AddressException("Domain starts with dot", addr);
1384: boolean inliteral = false;
1385: for (i = start; i < len; i++) {
1386: c = addr.charAt(i);
1387: if (c == '[') {
1388: if (i != start)
1389: throw new AddressException(
1390: "Domain literal not at start of domain", addr);
1391: inliteral = true; // domain literal, don't validate
1392: } else if (c == ']') {
1393: if (i != len - 1)
1394: throw new AddressException(
1395: "Domain literal end not at end of domain", addr);
1396: inliteral = false;
1397: } else if (c <= 040 || c == 0177) {
1398: throw new AddressException(
1399: "Domain contains control or whitespace", addr);
1400: } else {
1401: // RFC 2822 rule
1402: //if (specialsNoDot.indexOf(c) >= 0)
1403: /*
1404: * RFC 1034 rule is more strict
1405: * the full rule is:
1406: *
1407: * <domain> ::= <subdomain> | " "
1408: * <subdomain> ::= <label> | <subdomain> "." <label>
1409: * <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
1410: * <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
1411: * <let-dig-hyp> ::= <let-dig> | "-"
1412: * <let-dig> ::= <letter> | <digit>
1413: */
1414: if (!inliteral) {
1415: if (!(Character.isLetterOrDigit(c) || c == '-' || c == '.'))
1416: throw new AddressException(
1417: "Domain contains illegal character", addr);
1418: if (c == '.' && lastc == '.')
1419: throw new AddressException(
1420: "Domain contains dot-dot", addr);
1421: }
1422: }
1423: lastc = c;
1424: }
1425: if (lastc == '.')
1426: throw new AddressException("Domain ends with dot", addr);
1427: }
1428:
1429: /**
1430: * Is this a "simple" address? Simple addresses don't contain quotes
1431: * or any RFC822 special characters other than '@' and '.'.
1432: */
1433: private boolean isSimple() {
1434: return address == null || indexOfAny(address, specialsNoDotNoAt) < 0;
1435: }
1436:
1437: /**
1438: * Indicates whether this address is an RFC 822 group address.
1439: * Note that a group address is different than the mailing
1440: * list addresses supported by most mail servers. Group addresses
1441: * are rarely used; see RFC 822 for details.
1442: *
1443: * @return true if this address represents a group
1444: * @since JavaMail 1.3
1445: */
1446: public boolean isGroup() {
1447: // quick and dirty check
1448: return address != null &&
1449: address.endsWith(";") && address.indexOf(':') > 0;
1450: }
1451:
1452: /**
1453: * Return the members of a group address. A group may have zero,
1454: * one, or more members. If this address is not a group, null
1455: * is returned. The <code>strict</code> parameter controls whether
1456: * the group list is parsed using strict RFC 822 rules or not.
1457: * The parsing is done using the <code>parseHeader</code> method.
1458: *
1459: * @param strict use strict RFC 822 rules?
1460: * @return array of InternetAddress objects, or null
1461: * @throws AddressException if the group list can't be parsed
1462: * @since JavaMail 1.3
1463: */
1464: public InternetAddress[] getGroup(boolean strict) throws AddressException {
1465: String addr = getAddress();
1466: if (addr == null)
1467: return null;
1468: // groups are of the form "name:addr,addr,...;"
1469: if (!addr.endsWith(";"))
1470: return null;
1471: int ix = addr.indexOf(':');
1472: if (ix < 0)
1473: return null;
1474: // extract the list
1475: String list = addr.substring(ix + 1, addr.length() - 1);
1476: // parse it and return the individual addresses
1477: return InternetAddress.parseHeader(list, strict);
1478: }
1479:
1480: /**
1481: * Return the first index of any of the characters in "any" in "s",
1482: * or -1 if none are found.
1483: *
1484: * This should be a method on String.
1485: */
1486: private static int indexOfAny(String s, String any) {
1487: return indexOfAny(s, any, 0);
1488: }
1489:
1490: private static int indexOfAny(String s, String any, int start) {
1491: try {
1492: int len = s.length();
1493: for (int i = start; i < len; i++) {
1494: if (any.indexOf(s.charAt(i)) >= 0)
1495: return i;
1496: }
1497: return -1;
1498: } catch (StringIndexOutOfBoundsException e) {
1499: return -1;
1500: }
1501: }
1502:
1503: /*
1504: public static void main(String argv[]) throws Exception {
1505:         for (int i = 0; i < argv.length; i++) {
1506:          InternetAddress[] a = InternetAddress.parse(argv[i]);
1507:          for (int j = 0; j < a.length; j++) {
1508:                 System.out.println("arg " + i + " address " + j + ": " + a[j]);
1509:                 System.out.println("\tAddress: " + a[j].getAddress() +
1510:                                  "\tPersonal: " + a[j].getPersonal());
1511:          }
1512:          if (a.length > 1) {
1513:                 System.out.println("address 0 hash code: " + a[0].hashCode());
1514:                 System.out.println("address 1 hash code: " + a[1].hashCode());
1515:                 if (a[0].hashCode() == a[1].hashCode())
1516:                  System.out.println("success, hashcodes equal");
1517:                 else
1518:                  System.out.println("fail, hashcodes not equal");
1519:                 if (a[0].equals(a[1]))
1520:                  System.out.println("success, addresses equal");
1521:                 else
1522:                  System.out.println("fail, addresses not equal");
1523:                 if (a[1].equals(a[0]))
1524:                  System.out.println("success, addresses equal");
1525:                 else
1526:                  System.out.println("fail, addresses not equal");
1527:          }
1528:         }
1529: }
1530: */
1531: }