Jenkins

Status
Changes
Console Output
View as plain text
View Build Information
Polling Log
Parameters
Timings
Git Build Data
Test Result
Coverage Report
Java Compiler Warnings
JavaDoc Warnings
SpotBugs Warnings
See Fingerprints
Previous Build
Next Build
Package: HeaderTokenizer

HeaderTokenizer

name
instruction
branch
complexity
line
method
HeaderTokenizer(String)
M: 5 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 2 C: 0
0%
M: 1 C: 0
0%
HeaderTokenizer(String, String)
M: 0 C: 6
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 2
100%
M: 0 C: 1
100%
HeaderTokenizer(String, String, boolean)
M: 2 C: 28
93%
M: 1 C: 1
50%
M: 1 C: 1
50%
M: 0 C: 7
100%
M: 0 C: 1
100%
collectString(char, boolean)
M: 39 C: 86
69%
M: 5 C: 11
69%
M: 4 C: 5
56%
M: 8 C: 16
67%
M: 0 C: 1
100%
filterToken(String, int, int, boolean)
M: 8 C: 55
87%
M: 4 C: 10
71%
M: 3 C: 5
63%
M: 3 C: 17
85%
M: 0 C: 1
100%
getNext(char, boolean)
M: 61 C: 193
76%
M: 17 C: 39
70%
M: 14 C: 15
52%
M: 11 C: 38
78%
M: 0 C: 1
100%
getRemainder()
M: 0 C: 14
100%
M: 0 C: 2
100%
M: 0 C: 2
100%
M: 0 C: 3
100%
M: 0 C: 1
100%
next()
M: 0 C: 5
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 1
100%
M: 0 C: 1
100%
next(char)
M: 5 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 1 C: 0
0%
M: 1 C: 0
0%
next(char, boolean)
M: 0 C: 18
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 4
100%
M: 0 C: 1
100%
peek()
M: 15 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 4 C: 0
0%
M: 1 C: 0
0%
skipWhiteSpace()
M: 0 C: 35
100%
M: 2 C: 8
80%
M: 2 C: 4
67%
M: 0 C: 4
100%
M: 0 C: 1
100%
static {...}
M: 0 C: 7
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 1
100%
M: 0 C: 1
100%
trimWhiteSpace(String)
M: 37 C: 0
0%
M: 12 C: 0
0%
M: 7 C: 0
0%
M: 6 C: 0
0%
M: 1 C: 0
0%
Coverage

1: /*
2:  * Copyright (c) 1997, 2023 Oracle and/or its affiliates. All rights reserved.
3:  *
4:  * This program and the accompanying materials are made available under the
5:  * terms of the Eclipse Public License v. 2.0, which is available at
6:  * http://www.eclipse.org/legal/epl-2.0.
7:  *
8:  * This Source Code may also be made available under the following Secondary
9:  * Licenses when the conditions for such availability set forth in the
10:  * Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
11:  * version 2 with the GNU Classpath Exception, which is available at
12:  * https://www.gnu.org/software/classpath/license.html.
13:  *
14:  * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
15:  */
16: 
17: package jakarta.mail.internet;
18: 
19: /**
20:  * This class tokenizes RFC822 and MIME headers into the basic
21:  * symbols specified by RFC822 and MIME. <p>
22:  *
23:  * This class handles folded headers (ie headers with embedded
24:  * CRLF SPACE sequences). The folds are removed in the returned
25:  * tokens.
26:  *
27:  * @author John Mani
28:  * @author Bill Shannon
29:  */
30: 
31: public class HeaderTokenizer {
32: 
33:     /**
34:      * The Token class represents tokens returned by the
35:      * HeaderTokenizer.
36:      */
37:     public static class Token {
38: 
39:         private int type;
40:         private String value;
41: 
42:         /**
43:          * Token type indicating an ATOM.
44:          */
45:         public static final int ATOM = -1;
46: 
47:         /**
48:          * Token type indicating a quoted string. The value
49:          * field contains the string without the quotes.
50:          */
51:         public static final int QUOTEDSTRING = -2;
52: 
53:         /**
54:          * Token type indicating a comment. The value field
55:          * contains the comment string without the comment
56:          * start and end symbols.
57:          */
58:         public static final int COMMENT = -3;
59: 
60:         /**
61:          * Token type indicating end of input.
62:          */
63:         public static final int EOF = -4;
64: 
65:         /**
66:          * Constructor.
67:          *
68:          * @param type  Token type
69:          * @param value Token value
70:          */
71:         public Token(int type, String value) {
72:             this.type = type;
73:             this.value = value;
74:         }
75: 
76:         /**
77:          * Return the type of the token. If the token represents a
78:          * delimiter or a control character, the type is that character
79:          * itself, converted to an integer. Otherwise, it's value is
80:          * one of the following:
81:          * <ul>
82:          * <li><code>ATOM</code> A sequence of ASCII characters
83:          *         delimited by either SPACE, CTL, "(", <"> or the
84:          *         specified SPECIALS
85:          * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters
86:          *         within quotes
87:          * <li><code>COMMENT</code> A sequence of ASCII characters
88:          *         within "(" and ")".
89:          * <li><code>EOF</code> End of header
90:          * </ul>
91:          *
92:          * @return the token type
93:          */
94:         public int getType() {
95:             return type;
96:         }
97: 
98:         /**
99:          * Returns the value of the token just read. When the current
100:          * token is a quoted string, this field contains the body of the
101:          * string, without the quotes. When the current token is a comment,
102:          * this field contains the body of the comment.
103:          *
104:          * @return token value
105:          */
106:         public String getValue() {
107:             return value;
108:         }
109:     }
110: 
111:     private String string; // the string to be tokenized
112:     private boolean skipComments; // should comments be skipped ?
113:     private String delimiters; // delimiter string
114:     private int currentPos; // current parse position
115:     private int maxPos; // string length
116:     private int nextPos; // track start of next Token for next()
117:     private int peekPos; // track start of next Token for peek()
118: 
119:     /**
120:      * RFC822 specials
121:      */
122:     public final static String RFC822 = "()<>@,;:\\\"\t .[]";
123: 
124:     /**
125:      * MIME specials
126:      */
127:     public final static String MIME = "()<>@,;:\\\"\t []/?=";
128: 
129:     // The EOF Token
130:     private final static Token EOFToken = new Token(Token.EOF, null);
131: 
132:     /**
133:      * Constructor that takes a rfc822 style header.
134:      *
135:      * @param skipComments If true, comments are skipped and
136:      *                     not returned as tokens
137:      * @param header       The rfc822 header to be tokenized
138:      * @param delimiters   Set of delimiter characters
139:      *                     to be used to delimit ATOMS. These
140:      *                     are usually <code>RFC822</code> or
141:      *                     <code>MIME</code>
142:      */
143:     public HeaderTokenizer(String header, String delimiters,
144:                            boolean skipComments) {
145:•        string = (header == null) ? "" : header; // paranoia ?!
146:         this.skipComments = skipComments;
147:         this.delimiters = delimiters;
148:         currentPos = nextPos = peekPos = 0;
149:         maxPos = string.length();
150:     }
151: 
152:     /**
153:      * Constructor. Comments are ignored and not returned as tokens
154:      *
155:      * @param header     The header that is tokenized
156:      * @param delimiters The delimiters to be used
157:      */
158:     public HeaderTokenizer(String header, String delimiters) {
159:         this(header, delimiters, true);
160:     }
161: 
162:     /**
163:      * Constructor. The RFC822 defined delimiters - RFC822 - are
164:      * used to delimit ATOMS. Also comments are skipped and not
165:      * returned as tokens
166:      *
167:      * @param header the header string
168:      */
169:     public HeaderTokenizer(String header) {
170:         this(header, RFC822);
171:     }
172: 
173:     /**
174:      * Parses the next token from this String. <p>
175:      *
176:      * Clients sit in a loop calling next() to parse successive
177:      * tokens until an EOF Token is returned.
178:      *
179:      * @return the next Token
180:      * @throws ParseException if the parse fails
181:      */
182:     public Token next() throws ParseException {
183:         return next('\0', false);
184:     }
185: 
186:     /**
187:      * Parses the next token from this String.
188:      * If endOfAtom is not NUL, the token extends until the
189:      * endOfAtom character is seen, or to the end of the header.
190:      * This method is useful when parsing headers that don't
191:      * obey the MIME specification, e.g., by failing to quote
192:      * parameter values that contain spaces.
193:      *
194:      * @param endOfAtom if not NUL, character marking end of token
195:      * @return the next Token
196:      * @throws ParseException if the parse fails
197:      * @since JavaMail 1.5
198:      */
199:     public Token next(char endOfAtom) throws ParseException {
200:         return next(endOfAtom, false);
201:     }
202: 
203:     /**
204:      * Parses the next token from this String.
205:      * endOfAtom is handled as above.  If keepEscapes is true,
206:      * any backslash escapes are preserved in the returned string.
207:      * This method is useful when parsing headers that don't
208:      * obey the MIME specification, e.g., by failing to escape
209:      * backslashes in the filename parameter.
210:      *
211:      * @param endOfAtom   if not NUL, character marking end of token
212:      * @param keepEscapes keep all backslashes in returned string?
213:      * @return the next Token
214:      * @throws ParseException if the parse fails
215:      * @since JavaMail 1.5
216:      */
217:     public Token next(char endOfAtom, boolean keepEscapes)
218:             throws ParseException {
219:         Token tk;
220: 
221:         currentPos = nextPos; // setup currentPos
222:         tk = getNext(endOfAtom, keepEscapes);
223:         nextPos = peekPos = currentPos; // update currentPos and peekPos
224:         return tk;
225:     }
226: 
227:     /**
228:      * Peek at the next token, without actually removing the token
229:      * from the parse stream. Invoking this method multiple times
230:      * will return successive tokens, until <code>next()</code> is
231:      * called.
232:      *
233:      * @return the next Token
234:      * @throws ParseException if the parse fails
235:      */
236:     public Token peek() throws ParseException {
237:         Token tk;
238: 
239:         currentPos = peekPos; // setup currentPos
240:         tk = getNext('\0', false);
241:         peekPos = currentPos; // update peekPos
242:         return tk;
243:     }
244: 
245:     /**
246:      * Return the rest of the Header.
247:      *
248:      * @return String    rest of header. null is returned if we are
249:      * already at end of header
250:      */
251:     public String getRemainder() {
252:•        if (nextPos >= string.length())
253:             return null;
254:         return string.substring(nextPos);
255:     }
256: 
257:     /*
258:      * Return the next token starting from 'currentPos'. After the
259:      * parse, 'currentPos' is updated to point to the start of the
260:      * next token.
261:      */
262:     private Token getNext(char endOfAtom, boolean keepEscapes)
263:             throws ParseException {
264:         // If we're already at end of string, return EOF
265:•        if (currentPos >= maxPos)
266:             return EOFToken;
267: 
268:         // Skip white-space, position currentPos beyond the space
269:•        if (skipWhiteSpace() == Token.EOF)
270:             return EOFToken;
271: 
272:         char c;
273:         int start;
274:         boolean filter = false;
275: 
276:         c = string.charAt(currentPos);
277: 
278:         // Check or Skip comments and position currentPos
279:         // beyond the comment
280:•        while (c == '(') {
281:             // Parsing comment ..
282:             int nesting;
283:             for (start = ++currentPos, nesting = 1;
284:•                 nesting > 0 && currentPos < maxPos;
285:                  currentPos++) {
286:                 c = string.charAt(currentPos);
287:•                if (c == '\\') {  // Escape sequence
288:                     currentPos++; // skip the escaped character
289:                     filter = true;
290:•                } else if (c == '\r')
291:                     filter = true;
292:•                else if (c == '(')
293:                     nesting++;
294:•                else if (c == ')')
295:                     nesting--;
296:             }
297:•            if (nesting != 0)
298:                 throw new ParseException("Unbalanced comments");
299: 
300:•            if (!skipComments) {
301:                 // Return the comment, if we are asked to.
302:                 // Note that the comment start & end markers are ignored.
303:                 String s;
304:•                if (filter) // need to go thru the token again.
305:                     s = filterToken(string, start, currentPos - 1, keepEscapes);
306:                 else
307:                     s = string.substring(start, currentPos - 1);
308: 
309:                 return new Token(Token.COMMENT, s);
310:             }
311: 
312:             // Skip any whitespace after the comment.
313:•            if (skipWhiteSpace() == Token.EOF)
314:                 return EOFToken;
315:             c = string.charAt(currentPos);
316:         }
317: 
318:         // Check for quoted-string and position currentPos
319:         //  beyond the terminating quote
320:•        if (c == '"') {
321:             currentPos++;    // skip initial quote
322:             return collectString('"', keepEscapes);
323:         }
324: 
325:         // Check for SPECIAL or CTL
326:•        if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
327:•            if (endOfAtom > 0 && c != endOfAtom) {
328:                 // not expecting a special character here,
329:                 // pretend it's a quoted string
330:                 return collectString(endOfAtom, keepEscapes);
331:             }
332:             currentPos++; // re-position currentPos
333:             char[] ch = new char[1];
334:             ch[0] = c;
335:             return new Token(c, new String(ch));
336:         }
337: 
338:         // Check for ATOM
339:•        for (start = currentPos; currentPos < maxPos; currentPos++) {
340:             c = string.charAt(currentPos);
341:             // ATOM is delimited by either SPACE, CTL, "(", <">
342:             // or the specified SPECIALS
343:•            if (c < 040 || c >= 0177 || c == '(' || c == ' ' ||
344:•                    c == '"' || delimiters.indexOf(c) >= 0) {
345:•                if (endOfAtom > 0 && c != endOfAtom) {
346:                     // not the expected atom after all;
347:                     // back up and pretend it's a quoted string
348:                     currentPos = start;
349:                     return collectString(endOfAtom, keepEscapes);
350:                 }
351:                 break;
352:             }
353:         }
354:         return new Token(Token.ATOM, string.substring(start, currentPos));
355:     }
356: 
357:     private Token collectString(char eos, boolean keepEscapes)
358:             throws ParseException {
359:         int start;
360:         boolean filter = false;
361:•        for (start = currentPos; currentPos < maxPos; currentPos++) {
362:             char c = string.charAt(currentPos);
363:•            if (c == '\\') { // Escape sequence
364:                 currentPos++;
365:                 filter = true;
366:•            } else if (c == '\r')
367:                 filter = true;
368:•            else if (c == eos) {
369:                 currentPos++;
370:                 String s;
371: 
372:•                if (filter)
373:                     s = filterToken(string, start, currentPos - 1, keepEscapes);
374:                 else
375:                     s = string.substring(start, currentPos - 1);
376: 
377:•                if (c != '"') {        // not a real quoted string
378:                     s = trimWhiteSpace(s);
379:                     currentPos--;    // back up before the eos char
380:                 }
381: 
382:                 return new Token(Token.QUOTEDSTRING, s);
383:             }
384:         }
385: 
386:         // ran off the end of the string
387: 
388:         // if we're looking for a matching quote, that's an error
389:•        if (eos == '"')
390:             throw new ParseException("Unbalanced quoted string");
391: 
392:         // otherwise, just return whatever's left
393:         String s;
394:•        if (filter)
395:             s = filterToken(string, start, currentPos, keepEscapes);
396:         else
397:             s = string.substring(start, currentPos);
398:         s = trimWhiteSpace(s);
399:         return new Token(Token.QUOTEDSTRING, s);
400:     }
401: 
402:     // Skip SPACE, HT, CR and NL
403:     private int skipWhiteSpace() {
404:         char c;
405:•        for (; currentPos < maxPos; currentPos++)
406:•            if (((c = string.charAt(currentPos)) != ' ') &&
407:                     (c != '\t') && (c != '\r') && (c != '\n'))
408:                 return currentPos;
409:         return Token.EOF;
410:     }
411: 
412:     // Trim SPACE, HT, CR and NL from end of string
413:     private static String trimWhiteSpace(String s) {
414:         char c;
415:         int i;
416:•        for (i = s.length() - 1; i >= 0; i--) {
417:•            if (((c = s.charAt(i)) != ' ') &&
418:                     (c != '\t') && (c != '\r') && (c != '\n'))
419:                 break;
420:         }
421:•        if (i <= 0)
422:             return "";
423:         else
424:             return s.substring(0, i + 1);
425:     }
426: 
427:     /* Process escape sequences and embedded LWSPs from a comment or
428:      * quoted string.
429:      */
430:     private static String filterToken(String s, int start, int end,
431:                                       boolean keepEscapes) {
432:         StringBuilder sb = new StringBuilder();
433:         char c;
434:         boolean gotEscape = false;
435:         boolean gotCR = false;
436: 
437:•        for (int i = start; i < end; i++) {
438:             c = s.charAt(i);
439:•            if (c == '\n' && gotCR) {
440:                 // This LF is part of an unescaped
441:                 // CRLF sequence (i.e, LWSP). Skip it.
442:                 gotCR = false;
443:                 continue;
444:             }
445: 
446:             gotCR = false;
447:•            if (!gotEscape) {
448:                 // Previous character was NOT '\'
449:•                if (c == '\\') // skip this character
450:                     gotEscape = true;
451:•                else if (c == '\r') // skip this character
452:                     gotCR = true;
453:                 else // append this character
454:                     sb.append(c);
455:             } else {
456:                 // Previous character was '\'. So no need to
457:                 // bother with any special processing, just
458:                 // append this character.  If keepEscapes is
459:                 // set, keep the backslash.  IE6 fails to escape
460:                 // backslashes in quoted strings in HTTP headers,
461:                 // e.g., in the filename parameter.
462:•                if (keepEscapes)
463:                     sb.append('\\');
464:                 sb.append(c);
465:                 gotEscape = false;
466:             }
467:         }
468:         return sb.toString();
469:     }
470: }