Skip to content

Package: HeaderTokenizer

HeaderTokenizer

nameinstructionbranchcomplexitylinemethod
HeaderTokenizer(String)
M: 5 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 2 C: 0
0%
M: 1 C: 0
0%
HeaderTokenizer(String, String)
M: 0 C: 6
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 2
100%
M: 0 C: 1
100%
HeaderTokenizer(String, String, boolean)
M: 2 C: 28
93%
M: 1 C: 1
50%
M: 1 C: 1
50%
M: 0 C: 7
100%
M: 0 C: 1
100%
collectString(char, boolean)
M: 39 C: 86
69%
M: 5 C: 11
69%
M: 4 C: 5
56%
M: 8 C: 16
67%
M: 0 C: 1
100%
filterToken(String, int, int, boolean)
M: 8 C: 55
87%
M: 4 C: 10
71%
M: 3 C: 5
63%
M: 3 C: 17
85%
M: 0 C: 1
100%
getNext(char, boolean)
M: 61 C: 193
76%
M: 17 C: 39
70%
M: 14 C: 15
52%
M: 11 C: 38
78%
M: 0 C: 1
100%
getRemainder()
M: 0 C: 14
100%
M: 0 C: 2
100%
M: 0 C: 2
100%
M: 0 C: 3
100%
M: 0 C: 1
100%
next()
M: 0 C: 5
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 1
100%
M: 0 C: 1
100%
next(char)
M: 5 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 1 C: 0
0%
M: 1 C: 0
0%
next(char, boolean)
M: 0 C: 18
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 4
100%
M: 0 C: 1
100%
peek()
M: 15 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 4 C: 0
0%
M: 1 C: 0
0%
skipWhiteSpace()
M: 0 C: 35
100%
M: 2 C: 8
80%
M: 2 C: 4
67%
M: 0 C: 4
100%
M: 0 C: 1
100%
static {...}
M: 0 C: 7
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 1
100%
M: 0 C: 1
100%
trimWhiteSpace(String)
M: 37 C: 0
0%
M: 12 C: 0
0%
M: 7 C: 0
0%
M: 6 C: 0
0%
M: 1 C: 0
0%

Coverage

1: /*
2: * Copyright (c) 1997, 2023 Oracle and/or its affiliates. All rights reserved.
3: *
4: * This program and the accompanying materials are made available under the
5: * terms of the Eclipse Public License v. 2.0, which is available at
6: * http://www.eclipse.org/legal/epl-2.0.
7: *
8: * This Source Code may also be made available under the following Secondary
9: * Licenses when the conditions for such availability set forth in the
10: * Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
11: * version 2 with the GNU Classpath Exception, which is available at
12: * https://www.gnu.org/software/classpath/license.html.
13: *
14: * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
15: */
16:
17: package jakarta.mail.internet;
18:
19: /**
20: * This class tokenizes RFC822 and MIME headers into the basic
21: * symbols specified by RFC822 and MIME. <p>
22: *
23: * This class handles folded headers (ie headers with embedded
24: * CRLF SPACE sequences). The folds are removed in the returned
25: * tokens.
26: *
27: * @author John Mani
28: * @author Bill Shannon
29: */
30:
31: public class HeaderTokenizer {
32:
33: /**
34: * The Token class represents tokens returned by the
35: * HeaderTokenizer.
36: */
37: public static class Token {
38:
39: private int type;
40: private String value;
41:
42: /**
43: * Token type indicating an ATOM.
44: */
45: public static final int ATOM = -1;
46:
47: /**
48: * Token type indicating a quoted string. The value
49: * field contains the string without the quotes.
50: */
51: public static final int QUOTEDSTRING = -2;
52:
53: /**
54: * Token type indicating a comment. The value field
55: * contains the comment string without the comment
56: * start and end symbols.
57: */
58: public static final int COMMENT = -3;
59:
60: /**
61: * Token type indicating end of input.
62: */
63: public static final int EOF = -4;
64:
65: /**
66: * Constructor.
67: *
68: * @param type Token type
69: * @param value Token value
70: */
71: public Token(int type, String value) {
72: this.type = type;
73: this.value = value;
74: }
75:
76: /**
77: * Return the type of the token. If the token represents a
78: * delimiter or a control character, the type is that character
79: * itself, converted to an integer. Otherwise, it's value is
80: * one of the following:
81: * <ul>
82: * <li><code>ATOM</code> A sequence of ASCII characters
83: *         delimited by either SPACE, CTL, "(", <"> or the
84: *         specified SPECIALS
85: * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters
86: *         within quotes
87: * <li><code>COMMENT</code> A sequence of ASCII characters
88: *         within "(" and ")".
89: * <li><code>EOF</code> End of header
90: * </ul>
91: *
92: * @return the token type
93: */
94: public int getType() {
95: return type;
96: }
97:
98: /**
99: * Returns the value of the token just read. When the current
100: * token is a quoted string, this field contains the body of the
101: * string, without the quotes. When the current token is a comment,
102: * this field contains the body of the comment.
103: *
104: * @return token value
105: */
106: public String getValue() {
107: return value;
108: }
109: }
110:
111: private String string; // the string to be tokenized
112: private boolean skipComments; // should comments be skipped ?
113: private String delimiters; // delimiter string
114: private int currentPos; // current parse position
115: private int maxPos; // string length
116: private int nextPos; // track start of next Token for next()
117: private int peekPos; // track start of next Token for peek()
118:
119: /**
120: * RFC822 specials
121: */
122: public final static String RFC822 = "()<>@,;:\\\"\t .[]";
123:
124: /**
125: * MIME specials
126: */
127: public final static String MIME = "()<>@,;:\\\"\t []/?=";
128:
129: // The EOF Token
130: private final static Token EOFToken = new Token(Token.EOF, null);
131:
132: /**
133: * Constructor that takes a rfc822 style header.
134: *
135: * @param skipComments If true, comments are skipped and
136: * not returned as tokens
137: * @param header The rfc822 header to be tokenized
138: * @param delimiters Set of delimiter characters
139: * to be used to delimit ATOMS. These
140: * are usually <code>RFC822</code> or
141: * <code>MIME</code>
142: */
143: public HeaderTokenizer(String header, String delimiters,
144: boolean skipComments) {
145:• string = (header == null) ? "" : header; // paranoia ?!
146: this.skipComments = skipComments;
147: this.delimiters = delimiters;
148: currentPos = nextPos = peekPos = 0;
149: maxPos = string.length();
150: }
151:
152: /**
153: * Constructor. Comments are ignored and not returned as tokens
154: *
155: * @param header The header that is tokenized
156: * @param delimiters The delimiters to be used
157: */
158: public HeaderTokenizer(String header, String delimiters) {
159: this(header, delimiters, true);
160: }
161:
162: /**
163: * Constructor. The RFC822 defined delimiters - RFC822 - are
164: * used to delimit ATOMS. Also comments are skipped and not
165: * returned as tokens
166: *
167: * @param header the header string
168: */
169: public HeaderTokenizer(String header) {
170: this(header, RFC822);
171: }
172:
173: /**
174: * Parses the next token from this String. <p>
175: *
176: * Clients sit in a loop calling next() to parse successive
177: * tokens until an EOF Token is returned.
178: *
179: * @return the next Token
180: * @throws ParseException if the parse fails
181: */
182: public Token next() throws ParseException {
183: return next('\0', false);
184: }
185:
186: /**
187: * Parses the next token from this String.
188: * If endOfAtom is not NUL, the token extends until the
189: * endOfAtom character is seen, or to the end of the header.
190: * This method is useful when parsing headers that don't
191: * obey the MIME specification, e.g., by failing to quote
192: * parameter values that contain spaces.
193: *
194: * @param endOfAtom if not NUL, character marking end of token
195: * @return the next Token
196: * @throws ParseException if the parse fails
197: * @since JavaMail 1.5
198: */
199: public Token next(char endOfAtom) throws ParseException {
200: return next(endOfAtom, false);
201: }
202:
203: /**
204: * Parses the next token from this String.
205: * endOfAtom is handled as above. If keepEscapes is true,
206: * any backslash escapes are preserved in the returned string.
207: * This method is useful when parsing headers that don't
208: * obey the MIME specification, e.g., by failing to escape
209: * backslashes in the filename parameter.
210: *
211: * @param endOfAtom if not NUL, character marking end of token
212: * @param keepEscapes keep all backslashes in returned string?
213: * @return the next Token
214: * @throws ParseException if the parse fails
215: * @since JavaMail 1.5
216: */
217: public Token next(char endOfAtom, boolean keepEscapes)
218: throws ParseException {
219: Token tk;
220:
221: currentPos = nextPos; // setup currentPos
222: tk = getNext(endOfAtom, keepEscapes);
223: nextPos = peekPos = currentPos; // update currentPos and peekPos
224: return tk;
225: }
226:
227: /**
228: * Peek at the next token, without actually removing the token
229: * from the parse stream. Invoking this method multiple times
230: * will return successive tokens, until <code>next()</code> is
231: * called.
232: *
233: * @return the next Token
234: * @throws ParseException if the parse fails
235: */
236: public Token peek() throws ParseException {
237: Token tk;
238:
239: currentPos = peekPos; // setup currentPos
240: tk = getNext('\0', false);
241: peekPos = currentPos; // update peekPos
242: return tk;
243: }
244:
245: /**
246: * Return the rest of the Header.
247: *
248: * @return String rest of header. null is returned if we are
249: * already at end of header
250: */
251: public String getRemainder() {
252:• if (nextPos >= string.length())
253: return null;
254: return string.substring(nextPos);
255: }
256:
257: /*
258: * Return the next token starting from 'currentPos'. After the
259: * parse, 'currentPos' is updated to point to the start of the
260: * next token.
261: */
262: private Token getNext(char endOfAtom, boolean keepEscapes)
263: throws ParseException {
264: // If we're already at end of string, return EOF
265:• if (currentPos >= maxPos)
266: return EOFToken;
267:
268: // Skip white-space, position currentPos beyond the space
269:• if (skipWhiteSpace() == Token.EOF)
270: return EOFToken;
271:
272: char c;
273: int start;
274: boolean filter = false;
275:
276: c = string.charAt(currentPos);
277:
278: // Check or Skip comments and position currentPos
279: // beyond the comment
280:• while (c == '(') {
281: // Parsing comment ..
282: int nesting;
283: for (start = ++currentPos, nesting = 1;
284:• nesting > 0 && currentPos < maxPos;
285: currentPos++) {
286: c = string.charAt(currentPos);
287:• if (c == '\\') { // Escape sequence
288: currentPos++; // skip the escaped character
289: filter = true;
290:• } else if (c == '\r')
291: filter = true;
292:• else if (c == '(')
293: nesting++;
294:• else if (c == ')')
295: nesting--;
296: }
297:• if (nesting != 0)
298: throw new ParseException("Unbalanced comments");
299:
300:• if (!skipComments) {
301: // Return the comment, if we are asked to.
302: // Note that the comment start & end markers are ignored.
303: String s;
304:• if (filter) // need to go thru the token again.
305: s = filterToken(string, start, currentPos - 1, keepEscapes);
306: else
307: s = string.substring(start, currentPos - 1);
308:
309: return new Token(Token.COMMENT, s);
310: }
311:
312: // Skip any whitespace after the comment.
313:• if (skipWhiteSpace() == Token.EOF)
314: return EOFToken;
315: c = string.charAt(currentPos);
316: }
317:
318: // Check for quoted-string and position currentPos
319: // beyond the terminating quote
320:• if (c == '"') {
321: currentPos++; // skip initial quote
322: return collectString('"', keepEscapes);
323: }
324:
325: // Check for SPECIAL or CTL
326:• if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
327:• if (endOfAtom > 0 && c != endOfAtom) {
328: // not expecting a special character here,
329: // pretend it's a quoted string
330: return collectString(endOfAtom, keepEscapes);
331: }
332: currentPos++; // re-position currentPos
333: char[] ch = new char[1];
334: ch[0] = c;
335: return new Token(c, new String(ch));
336: }
337:
338: // Check for ATOM
339:• for (start = currentPos; currentPos < maxPos; currentPos++) {
340: c = string.charAt(currentPos);
341: // ATOM is delimited by either SPACE, CTL, "(", <">
342: // or the specified SPECIALS
343:• if (c < 040 || c >= 0177 || c == '(' || c == ' ' ||
344:• c == '"' || delimiters.indexOf(c) >= 0) {
345:• if (endOfAtom > 0 && c != endOfAtom) {
346: // not the expected atom after all;
347: // back up and pretend it's a quoted string
348: currentPos = start;
349: return collectString(endOfAtom, keepEscapes);
350: }
351: break;
352: }
353: }
354: return new Token(Token.ATOM, string.substring(start, currentPos));
355: }
356:
357: private Token collectString(char eos, boolean keepEscapes)
358: throws ParseException {
359: int start;
360: boolean filter = false;
361:• for (start = currentPos; currentPos < maxPos; currentPos++) {
362: char c = string.charAt(currentPos);
363:• if (c == '\\') { // Escape sequence
364: currentPos++;
365: filter = true;
366:• } else if (c == '\r')
367: filter = true;
368:• else if (c == eos) {
369: currentPos++;
370: String s;
371:
372:• if (filter)
373: s = filterToken(string, start, currentPos - 1, keepEscapes);
374: else
375: s = string.substring(start, currentPos - 1);
376:
377:• if (c != '"') { // not a real quoted string
378: s = trimWhiteSpace(s);
379: currentPos--; // back up before the eos char
380: }
381:
382: return new Token(Token.QUOTEDSTRING, s);
383: }
384: }
385:
386: // ran off the end of the string
387:
388: // if we're looking for a matching quote, that's an error
389:• if (eos == '"')
390: throw new ParseException("Unbalanced quoted string");
391:
392: // otherwise, just return whatever's left
393: String s;
394:• if (filter)
395: s = filterToken(string, start, currentPos, keepEscapes);
396: else
397: s = string.substring(start, currentPos);
398: s = trimWhiteSpace(s);
399: return new Token(Token.QUOTEDSTRING, s);
400: }
401:
402: // Skip SPACE, HT, CR and NL
403: private int skipWhiteSpace() {
404: char c;
405:• for (; currentPos < maxPos; currentPos++)
406:• if (((c = string.charAt(currentPos)) != ' ') &&
407: (c != '\t') && (c != '\r') && (c != '\n'))
408: return currentPos;
409: return Token.EOF;
410: }
411:
412: // Trim SPACE, HT, CR and NL from end of string
413: private static String trimWhiteSpace(String s) {
414: char c;
415: int i;
416:• for (i = s.length() - 1; i >= 0; i--) {
417:• if (((c = s.charAt(i)) != ' ') &&
418: (c != '\t') && (c != '\r') && (c != '\n'))
419: break;
420: }
421:• if (i <= 0)
422: return "";
423: else
424: return s.substring(0, i + 1);
425: }
426:
427: /* Process escape sequences and embedded LWSPs from a comment or
428: * quoted string.
429: */
430: private static String filterToken(String s, int start, int end,
431: boolean keepEscapes) {
432: StringBuilder sb = new StringBuilder();
433: char c;
434: boolean gotEscape = false;
435: boolean gotCR = false;
436:
437:• for (int i = start; i < end; i++) {
438: c = s.charAt(i);
439:• if (c == '\n' && gotCR) {
440: // This LF is part of an unescaped
441: // CRLF sequence (i.e, LWSP). Skip it.
442: gotCR = false;
443: continue;
444: }
445:
446: gotCR = false;
447:• if (!gotEscape) {
448: // Previous character was NOT '\'
449:• if (c == '\\') // skip this character
450: gotEscape = true;
451:• else if (c == '\r') // skip this character
452: gotCR = true;
453: else // append this character
454: sb.append(c);
455: } else {
456: // Previous character was '\'. So no need to
457: // bother with any special processing, just
458: // append this character. If keepEscapes is
459: // set, keep the backslash. IE6 fails to escape
460: // backslashes in quoted strings in HTTP headers,
461: // e.g., in the filename parameter.
462:• if (keepEscapes)
463: sb.append('\\');
464: sb.append(c);
465: gotEscape = false;
466: }
467: }
468: return sb.toString();
469: }
470: }