Jenkins

Status
Changes
Console Output
View as plain text
View Build Information
Polling Log
Parameters
Timings
Git Build Data
Test Result
Coverage Report
Java Compiler Warnings
JavaDoc Warnings
SpotBugs Warnings
See Fingerprints
Previous Build
Next Build
Package: HeaderTokenizer$Token

HeaderTokenizer$Token

name
instruction
branch
complexity
line
method
HeaderTokenizer.Token(int, String)
M: 0 C: 9
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 4
100%
M: 0 C: 1
100%
getType()
M: 0 C: 3
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 1
100%
M: 0 C: 1
100%
getValue()
M: 0 C: 3
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 1
100%
M: 0 C: 1
100%
Coverage

1: /*
2:  * Copyright (c) 1997, 2023 Oracle and/or its affiliates. All rights reserved.
3:  *
4:  * This program and the accompanying materials are made available under the
5:  * terms of the Eclipse Public License v. 2.0, which is available at
6:  * http://www.eclipse.org/legal/epl-2.0.
7:  *
8:  * This Source Code may also be made available under the following Secondary
9:  * Licenses when the conditions for such availability set forth in the
10:  * Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
11:  * version 2 with the GNU Classpath Exception, which is available at
12:  * https://www.gnu.org/software/classpath/license.html.
13:  *
14:  * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
15:  */
16: 
17: package jakarta.mail.internet;
18: 
19: import java.util.*;
20: 
21: /**
22:  * This class tokenizes RFC822 and MIME headers into the basic
23:  * symbols specified by RFC822 and MIME. <p>
24:  *
25:  * This class handles folded headers (ie headers with embedded
26:  * CRLF SPACE sequences). The folds are removed in the returned
27:  * tokens. 
28:  *
29:  * @author  John Mani
30:  * @author  Bill Shannon
31:  */
32: 
33: public class HeaderTokenizer {
34: 
35:     /**
36:      * The Token class represents tokens returned by the 
37:      * HeaderTokenizer.
38:      */
39:     public static class Token {
40: 
41:         private int type;
42:         private String value;
43: 
44:         /**
45:          * Token type indicating an ATOM.
46:          */
47:         public static final int ATOM                 = -1;
48: 
49:         /**
50:          * Token type indicating a quoted string. The value 
51:          * field contains the string without the quotes.
52:           */
53:         public static final int QUOTEDSTRING         = -2;
54: 
55:         /**
56:          * Token type indicating a comment. The value field 
57:          * contains the comment string without the comment 
58:          * start and end symbols.
59:          */
60:         public static final int COMMENT                = -3;
61: 
62:         /**
63:          * Token type indicating end of input.
64:          */
65:         public static final int  EOF                 = -4;
66: 
67:         /**
68:          * Constructor.
69:          * @param        type        Token type
70:          * @param        value        Token value
71:          */
72:         public Token(int type, String value) {
73:              this.type = type;
74:              this.value = value;
75:         }
76: 
77:         /**
78:          * Return the type of the token. If the token represents a
79:          * delimiter or a control character, the type is that character
80:          * itself, converted to an integer. Otherwise, it's value is 
81:          * one of the following:
82:          * <ul>
83:          * <li><code>ATOM</code> A sequence of ASCII characters 
84:          *        delimited by either SPACE, CTL, "(", <"> or the 
85:          *        specified SPECIALS
86:          * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters
87:          *        within quotes
88:          * <li><code>COMMENT</code> A sequence of ASCII characters 
89:          *        within "(" and ")".
90:          * <li><code>EOF</code> End of header
91:          * </ul>
92:          *
93:          * @return        the token type
94:          */
95:         public int getType() {
96:             return type;
97:         }
98: 
99:         /**
100:          * Returns the value of the token just read. When the current
101:          * token is a quoted string, this field contains the body of the
102:          * string, without the quotes. When the current token is a comment,
103:          * this field contains the body of the comment.
104:          *
105:          * @return        token value
106:          */
107:         public String getValue() {
108:             return value;
109:         }
110:     }
111: 
112:     private String string; // the string to be tokenized
113:     private boolean skipComments; // should comments be skipped ?
114:     private String delimiters; // delimiter string
115:     private int currentPos; // current parse position
116:     private int maxPos; // string length
117:     private int nextPos; // track start of next Token for next()
118:     private int peekPos; // track start of next Token for peek()
119: 
120:     /**
121:      * RFC822 specials
122:      */
123:     public final static String RFC822 = "()<>@,;:\\\"\t .[]";
124: 
125:     /**
126:      * MIME specials
127:      */
128:     public final static String MIME = "()<>@,;:\\\"\t []/?=";
129: 
130:     // The EOF Token
131:     private final static Token EOFToken = new Token(Token.EOF, null);
132: 
133:     /**
134:      * Constructor that takes a rfc822 style header.
135:      *
136:      * @param        header        The rfc822 header to be tokenized
137:      * @param        delimiters      Set of delimiter characters 
138:      *                                to be used to delimit ATOMS. These
139:      *                                are usually <code>RFC822</code> or 
140:      *                                <code>MIME</code>
141:      * @param   skipComments  If true, comments are skipped and
142:      *                                not returned as tokens
143:      */
144:     public HeaderTokenizer(String header, String delimiters,
145:                                boolean skipComments) {
146:         string = (header == null) ? "" : header; // paranoia ?!
147:         this.skipComments = skipComments;
148:         this.delimiters = delimiters;
149:         currentPos = nextPos = peekPos = 0;
150:         maxPos = string.length();
151:     }
152: 
153:     /**
154:      * Constructor. Comments are ignored and not returned as tokens
155:      *
156:      * @param        header  The header that is tokenized
157:      * @param        delimiters  The delimiters to be used
158:      */
159:     public HeaderTokenizer(String header, String delimiters) {
160:         this(header, delimiters, true);
161:     }
162: 
163:     /**
164:      * Constructor. The RFC822 defined delimiters - RFC822 - are
165:      * used to delimit ATOMS. Also comments are skipped and not
166:      * returned as tokens
167:      *
168:      * @param        header        the header string
169:      */
170:     public HeaderTokenizer(String header)  {
171:         this(header, RFC822);
172:     }
173: 
174:     /**
175:      * Parses the next token from this String. <p>
176:      *
177:      * Clients sit in a loop calling next() to parse successive
178:      * tokens until an EOF Token is returned.
179:      *
180:      * @return                the next Token
181:      * @exception        ParseException if the parse fails
182:      */
183:     public Token next() throws ParseException { 
184:         return next('\0', false);
185:     }
186: 
187:     /**
188:      * Parses the next token from this String.
189:      * If endOfAtom is not NUL, the token extends until the
190:      * endOfAtom character is seen, or to the end of the header.
191:      * This method is useful when parsing headers that don't
192:      * obey the MIME specification, e.g., by failing to quote
193:      * parameter values that contain spaces.
194:      *
195:      * @param        endOfAtom        if not NUL, character marking end of token
196:      * @return                the next Token
197:      * @exception        ParseException if the parse fails
198:      * @since                JavaMail 1.5
199:      */
200:     public Token next(char endOfAtom) throws ParseException { 
201:         return next(endOfAtom, false);
202:     }
203: 
204:     /**
205:      * Parses the next token from this String.
206:      * endOfAtom is handled as above.  If keepEscapes is true,
207:      * any backslash escapes are preserved in the returned string.
208:      * This method is useful when parsing headers that don't
209:      * obey the MIME specification, e.g., by failing to escape
210:      * backslashes in the filename parameter.
211:      *
212:      * @param        endOfAtom        if not NUL, character marking end of token
213:      * @param        keepEscapes        keep all backslashes in returned string?
214:      * @return                the next Token
215:      * @exception        ParseException if the parse fails
216:      * @since                JavaMail 1.5
217:      */
218:     public Token next(char endOfAtom, boolean keepEscapes)
219:                                 throws ParseException { 
220:         Token tk;
221: 
222:         currentPos = nextPos; // setup currentPos
223:         tk = getNext(endOfAtom, keepEscapes);
224:         nextPos = peekPos = currentPos; // update currentPos and peekPos
225:         return tk;
226:     }
227: 
228:     /**
229:      * Peek at the next token, without actually removing the token
230:      * from the parse stream. Invoking this method multiple times
231:      * will return successive tokens, until <code>next()</code> is
232:      * called.
233:      *
234:      * @return                the next Token
235:      * @exception        ParseException if the parse fails
236:      */
237:     public Token peek() throws ParseException {
238:         Token tk;
239: 
240:         currentPos = peekPos; // setup currentPos
241:         tk = getNext('\0', false);
242:         peekPos = currentPos; // update peekPos
243:         return tk;
244:     }
245: 
246:     /**
247:      * Return the rest of the Header.
248:      *
249:      * @return String        rest of header. null is returned if we are
250:      *                        already at end of header
251:      */
252:     public String getRemainder() {
253:         if (nextPos >= string.length())
254:             return null;
255:         return string.substring(nextPos);
256:     }
257: 
258:     /*
259:      * Return the next token starting from 'currentPos'. After the
260:      * parse, 'currentPos' is updated to point to the start of the 
261:      * next token.
262:      */
263:     private Token getNext(char endOfAtom, boolean keepEscapes)
264:                                 throws ParseException {
265:         // If we're already at end of string, return EOF
266:         if (currentPos >= maxPos)
267:             return EOFToken;
268: 
269:         // Skip white-space, position currentPos beyond the space
270:         if (skipWhiteSpace() == Token.EOF)
271:             return EOFToken;
272: 
273:         char c; 
274:         int start; 
275:         boolean filter = false;
276:         
277:         c = string.charAt(currentPos);
278: 
279:         // Check or Skip comments and position currentPos
280:         // beyond the comment
281:         while (c == '(') {
282:             // Parsing comment ..
283:             int nesting;
284:             for (start = ++currentPos, nesting = 1; 
285:                  nesting > 0 && currentPos < maxPos;
286:                  currentPos++) {
287:                 c = string.charAt(currentPos);
288:                 if (c == '\\') {  // Escape sequence
289:                     currentPos++; // skip the escaped character
290:                     filter = true;
291:                 } else if (c == '\r')
292:                     filter = true;
293:                 else if (c == '(')
294:                     nesting++;
295:                 else if (c == ')')
296:                     nesting--;
297:             }
298:             if (nesting != 0)
299:                 throw new ParseException("Unbalanced comments");
300: 
301:             if (!skipComments) {
302:                 // Return the comment, if we are asked to.
303:                 // Note that the comment start & end markers are ignored.
304:                 String s;
305:                 if (filter) // need to go thru the token again.
306:                     s = filterToken(string, start, currentPos-1, keepEscapes);
307:                 else
308:                     s = string.substring(start,currentPos-1);
309: 
310:                 return new Token(Token.COMMENT, s);
311:             }
312: 
313:             // Skip any whitespace after the comment.
314:             if (skipWhiteSpace() == Token.EOF)
315:                 return EOFToken;
316:             c = string.charAt(currentPos);
317:         }
318: 
319:         // Check for quoted-string and position currentPos 
320:         //  beyond the terminating quote
321:         if (c == '"') {
322:             currentPos++;        // skip initial quote
323:             return collectString('"', keepEscapes);
324:         }
325:         
326:         // Check for SPECIAL or CTL
327:         if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
328:             if (endOfAtom > 0 && c != endOfAtom) {
329:                 // not expecting a special character here,
330:                 // pretend it's a quoted string
331:                 return collectString(endOfAtom, keepEscapes);
332:             }
333:             currentPos++; // re-position currentPos
334:             char ch[] = new char[1];
335:             ch[0] = c;
336:             return new Token((int)c, new String(ch));
337:         }
338: 
339:         // Check for ATOM
340:         for (start = currentPos; currentPos < maxPos; currentPos++) {
341:             c = string.charAt(currentPos);
342:             // ATOM is delimited by either SPACE, CTL, "(", <"> 
343:             // or the specified SPECIALS
344:             if (c < 040 || c >= 0177 || c == '(' || c == ' ' ||
345:                         c == '"' || delimiters.indexOf(c) >= 0) {
346:                 if (endOfAtom > 0 && c != endOfAtom) {
347:                     // not the expected atom after all;
348:                     // back up and pretend it's a quoted string
349:                     currentPos = start;
350:                     return collectString(endOfAtom, keepEscapes);
351:                 }
352:                 break;
353:             }
354:         }
355:         return new Token(Token.ATOM, string.substring(start, currentPos));
356:     }
357: 
358:     private Token collectString(char eos, boolean keepEscapes)
359:                                 throws ParseException {
360:         int start;
361:         boolean filter = false;
362:         for (start = currentPos; currentPos < maxPos; currentPos++) {
363:             char c = string.charAt(currentPos);
364:             if (c == '\\') { // Escape sequence
365:                 currentPos++;
366:                 filter = true;
367:             } else if (c == '\r')
368:                 filter = true;
369:             else if (c == eos) {
370:                 currentPos++;
371:                 String s;
372: 
373:                 if (filter)
374:                     s = filterToken(string, start, currentPos-1, keepEscapes);
375:                 else
376:                     s = string.substring(start, currentPos-1);
377: 
378:                 if (c != '"') {                // not a real quoted string
379:                     s = trimWhiteSpace(s);
380:                     currentPos--;        // back up before the eos char
381:                 }
382: 
383:                 return new Token(Token.QUOTEDSTRING, s);
384:             }
385:         }
386: 
387:         // ran off the end of the string
388: 
389:         // if we're looking for a matching quote, that's an error
390:         if (eos == '"')
391:             throw new ParseException("Unbalanced quoted string");
392: 
393:         // otherwise, just return whatever's left
394:         String s;
395:         if (filter)
396:             s = filterToken(string, start, currentPos, keepEscapes);
397:         else
398:             s = string.substring(start, currentPos);
399:         s = trimWhiteSpace(s);
400:         return new Token(Token.QUOTEDSTRING, s);
401:     }
402: 
403:     // Skip SPACE, HT, CR and NL
404:     private int skipWhiteSpace() {
405:         char c;
406:         for (; currentPos < maxPos; currentPos++)
407:             if (((c = string.charAt(currentPos)) != ' ') && 
408:                 (c != '\t') && (c != '\r') && (c != '\n'))
409:                 return currentPos;
410:         return Token.EOF;
411:     }
412: 
413:     // Trim SPACE, HT, CR and NL from end of string
414:     private static String trimWhiteSpace(String s) {
415:         char c;
416:         int i;
417:         for (i = s.length() - 1; i >= 0; i--) {
418:             if (((c = s.charAt(i)) != ' ') && 
419:                 (c != '\t') && (c != '\r') && (c != '\n'))
420:                 break;
421:         }
422:         if (i <= 0)
423:             return "";
424:         else
425:             return s.substring(0, i + 1);
426:     }
427: 
428:     /* Process escape sequences and embedded LWSPs from a comment or
429:      * quoted string.
430:      */
431:     private static String filterToken(String s, int start, int end,
432:                                 boolean keepEscapes) {
433:         StringBuilder sb = new StringBuilder();
434:         char c;
435:         boolean gotEscape = false;
436:         boolean gotCR = false;
437: 
438:         for (int i = start; i < end; i++) {
439:             c = s.charAt(i);
440:             if (c == '\n' && gotCR) {
441:                 // This LF is part of an unescaped 
442:                 // CRLF sequence (i.e, LWSP). Skip it.
443:                 gotCR = false;
444:                 continue;
445:             }
446: 
447:             gotCR = false;
448:             if (!gotEscape) {
449:                 // Previous character was NOT '\'
450:                 if (c == '\\') // skip this character
451:                     gotEscape = true;
452:                 else if (c == '\r') // skip this character
453:                     gotCR = true;
454:                 else // append this character
455:                     sb.append(c);
456:             } else {
457:                 // Previous character was '\'. So no need to 
458:                 // bother with any special processing, just 
459:                 // append this character.  If keepEscapes is
460:                 // set, keep the backslash.  IE6 fails to escape
461:                 // backslashes in quoted strings in HTTP headers,
462:                 // e.g., in the filename parameter.
463:                 if (keepEscapes)
464:                     sb.append('\\');
465:                 sb.append(c);
466:                 gotEscape = false;
467:             }
468:         }
469:         return sb.toString();
470:     }
471: }