Skip to content

Package: MIMEParser

MIMEParser

nameinstructionbranchcomplexitylinemethod
MIMEParser(InputStream, String, MIMEConfig)
M: 0 C: 53
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 13
100%
M: 0 C: 1
100%
adjustBuf(int, int)
M: 12 C: 34
74%
M: 3 C: 3
50%
M: 3 C: 1
25%
M: 0 C: 8
100%
M: 0 C: 1
100%
compileBoundaryPattern()
M: 0 C: 77
100%
M: 0 C: 10
100%
M: 0 C: 6
100%
M: 0 C: 10
100%
M: 0 C: 1
100%
createBuf(int)
M: 0 C: 8
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 2
100%
M: 0 C: 1
100%
doubleBuf()
M: 23 C: 0
0%
M: 2 C: 0
0%
M: 2 C: 0
0%
M: 6 C: 0
0%
M: 1 C: 0
0%
fillBuf()
M: 33 C: 55
63%
M: 4 C: 8
67%
M: 4 C: 3
43%
M: 4 C: 14
78%
M: 0 C: 1
100%
getBytes(String)
M: 0 C: 25
100%
M: 0 C: 2
100%
M: 0 C: 2
100%
M: 0 C: 6
100%
M: 0 C: 1
100%
iterator()
M: 0 C: 5
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 1
100%
M: 0 C: 1
100%
match(byte[], int, int)
M: 0 C: 54
100%
M: 0 C: 6
100%
M: 0 C: 4
100%
M: 0 C: 9
100%
M: 0 C: 1
100%
readBody()
M: 12 C: 306
96%
M: 8 C: 46
85%
M: 8 C: 20
71%
M: 2 C: 34
94%
M: 0 C: 1
100%
readHeaders()
M: 0 C: 13
100%
M: 0 C: 2
100%
M: 0 C: 2
100%
M: 0 C: 3
100%
M: 0 C: 1
100%
skipPreamble()
M: 55 C: 161
75%
M: 13 C: 15
54%
M: 12 C: 3
20%
M: 9 C: 16
64%
M: 0 C: 1
100%
static {...}
M: 0 C: 5
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 1
100%
M: 0 C: 1
100%

Coverage

1: /*
2: * Copyright (c) 1997, 2022 Oracle and/or its affiliates. All rights reserved.
3: *
4: * This program and the accompanying materials are made available under the
5: * terms of the Eclipse Distribution License v. 1.0, which is available at
6: * http://www.eclipse.org/org/documents/edl-v10.php.
7: *
8: * SPDX-License-Identifier: BSD-3-Clause
9: */
10:
11: package org.jvnet.mimepull;
12:
13: import java.io.InputStream;
14: import java.io.IOException;
15: import java.util.*;
16: import java.util.logging.Logger;
17: import java.nio.ByteBuffer;
18: import java.util.logging.Level;
19:
20: /**
21: * Pull parser for the MIME messages. Applications can use pull API to continue
22: * the parsing MIME messages lazily.
23: *
24: * <pre>
25: * for e.g.:
26: * {@code
27: *
28: * MIMEParser parser = ...
29: * Iterator<MIMEEvent> it = parser.iterator();
30: * while(it.hasNext()) {
31: * MIMEEvent event = it.next();
32: * ...
33: * }
34: * }</pre>
35: *
36: * @author Jitendra Kotamraju
37: */
38: class MIMEParser implements Iterable<MIMEEvent> {
39:
40: private static final Logger LOGGER = Logger.getLogger(MIMEParser.class.getName());
41:
42: private static final String HEADER_ENCODING = "ISO8859-1";
43:
44: // Actually, the grammar doesn't support whitespace characters
45: // after boundary. But the mail implementation checks for it.
46: // We will only check for these many whitespace characters after boundary
47: private static final int NO_LWSP = 1000;
48: private enum STATE {START_MESSAGE, SKIP_PREAMBLE, START_PART, HEADERS, BODY, END_PART, END_MESSAGE}
49: private STATE state = STATE.START_MESSAGE;
50:
51: private final InputStream in;
52: private final byte[] bndbytes;
53: private final int bl;
54: private final MIMEConfig config;
55: private final int[] bcs = new int[128]; // BnM algo: Bad Character Shift table
56: private final int[] gss; // BnM algo : Good Suffix Shift table
57:
58: /**
59: * Have we parsed the data from our InputStream yet?
60: */
61: private boolean parsed;
62:
63: /*
64: * Read and process body partsList until we see the
65: * terminating boundary line (or EOF).
66: */
67: private boolean done = false;
68:
69: private boolean eof;
70: private final int capacity;
71: private byte[] buf;
72: private int len;
73: private boolean bol; // beginning of the line
74:
75: /*
76: * Parses the MIME content. At the EOF, it also closes input stream
77: */
78: MIMEParser(InputStream in, String boundary, MIMEConfig config) {
79: this.in = in;
80: this.bndbytes = getBytes("--"+boundary);
81: bl = bndbytes.length;
82: this.config = config;
83: gss = new int[bl];
84: compileBoundaryPattern();
85:
86: // \r\n + boundary + "--\r\n" + lots of LWSP
87: capacity = config.chunkSize+2+bl+4+NO_LWSP;
88: createBuf(capacity);
89: }
90:
91: /**
92: * Returns iterator for the parsing events. Use the iterator to advance
93: * the parsing.
94: *
95: * @return iterator for parsing events
96: */
97: @Override
98: public Iterator<MIMEEvent> iterator() {
99: return new MIMEEventIterator();
100: }
101:
102: class MIMEEventIterator implements Iterator<MIMEEvent> {
103:
104: @Override
105: public boolean hasNext() {
106: return !parsed;
107: }
108:
109: @Override
110: @SuppressWarnings({"fallthrough"})
111: public MIMEEvent next() {
112:
113: if (parsed) {
114: throw new NoSuchElementException();
115: }
116:
117: switch(state) {
118: case START_MESSAGE :
119: if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.START_MESSAGE);}
120: state = STATE.SKIP_PREAMBLE;
121: return MIMEEvent.START_MESSAGE;
122:
123: case SKIP_PREAMBLE :
124: if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.SKIP_PREAMBLE);}
125: skipPreamble();
126: // fall through
127: case START_PART :
128: if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.START_PART);}
129: state = STATE.HEADERS;
130: return MIMEEvent.START_PART;
131:
132: case HEADERS :
133: if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.HEADERS);}
134: InternetHeaders ih = readHeaders();
135: state = STATE.BODY;
136: bol = true;
137: return new MIMEEvent.Headers(ih);
138:
139: case BODY :
140: if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.BODY);}
141: ByteBuffer buf = readBody();
142: bol = false;
143: return new MIMEEvent.Content(buf);
144:
145: case END_PART :
146: if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.END_PART);}
147: if (done) {
148: state = STATE.END_MESSAGE;
149: } else {
150: state = STATE.START_PART;
151: }
152: return MIMEEvent.END_PART;
153:
154: case END_MESSAGE :
155: if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.END_MESSAGE);}
156: parsed = true;
157: return MIMEEvent.END_MESSAGE;
158:
159: default :
160: throw new MIMEParsingException("Unknown Parser state = "+state);
161: }
162: }
163:
164: @Override
165: public void remove() {
166: throw new UnsupportedOperationException();
167: }
168: }
169:
170: /**
171: * Collects the headers for the current part by parsing mesage stream.
172: *
173: * @return headers for the current part
174: */
175: private InternetHeaders readHeaders() {
176:• if (!eof) {
177: fillBuf();
178: }
179: return new InternetHeaders(new LineInputStream());
180: }
181:
182: /**
183: * Reads and saves the part of the current attachment part's content.
184: * At the end of this method, buf should have the remaining data
185: * at index 0.
186: *
187: * @return a chunk of the part's content
188: *
189: */
190: private ByteBuffer readBody() {
191:• if (!eof) {
192: fillBuf();
193: }
194: int start = match(buf, 0, len); // matches boundary
195:• if (start == -1) {
196: // No boundary is found
197:• assert eof || len >= config.chunkSize;
198:• int chunkSize = eof ? len : config.chunkSize;
199:• if (eof) {
200: done = true;
201: throw new MIMEParsingException("Reached EOF, but there is no closing MIME boundary.");
202: }
203: return adjustBuf(chunkSize, len-chunkSize);
204: }
205: // Found boundary.
206: // Is it at the start of a line ?
207: int chunkLen = start;
208:• if (bol && start == 0) {
209: // nothing to do
210:• } else if (start > 0 && (buf[start-1] == '\n' || buf[start-1] =='\r')) {
211: --chunkLen;
212:• if (buf[start-1] == '\n' && start >1 && buf[start-2] == '\r') {
213: --chunkLen;
214: }
215: } else {
216: return adjustBuf(start+1, len-start-1); // boundary is not at beginning of a line
217: }
218:
219:• if (start+bl+1 < len && buf[start+bl] == '-' && buf[start+bl+1] == '-') {
220: state = STATE.END_PART;
221: done = true;
222: return adjustBuf(chunkLen, 0);
223: }
224:
225: // Consider all the whitespace in boundary+whitespace+"\r\n"
226: int lwsp = 0;
227:• for(int i=start+bl; i < len && (buf[i] == ' ' || buf[i] == '\t'); i++) {
228: ++lwsp;
229: }
230:
231: // Check for \n or \r\n in boundary+whitespace+"\n" or boundary+whitespace+"\r\n"
232:• if (start+bl+lwsp < len && buf[start+bl+lwsp] == '\n') {
233: state = STATE.END_PART;
234: return adjustBuf(chunkLen, len-start-bl-lwsp-1);
235:• } else if (start+bl+lwsp+1 < len && buf[start+bl+lwsp] == '\r' && buf[start+bl+lwsp+1] == '\n') {
236: state = STATE.END_PART;
237: return adjustBuf(chunkLen, len-start-bl-lwsp-2);
238:• } else if (start+bl+lwsp+1 < len) {
239: return adjustBuf(chunkLen+1, len-chunkLen-1); // boundary string in a part data
240:• } else if (eof) {
241: done = true;
242: throw new MIMEParsingException("Reached EOF, but there is no closing MIME boundary.");
243: }
244:
245: // Some more data needed to determine if it is indeed a proper boundary
246: return adjustBuf(chunkLen, len-chunkLen);
247: }
248:
249: /**
250: * Returns a chunk from the original buffer. A new buffer is
251: * created with the remaining bytes.
252: *
253: * @param chunkSize create a chunk with these many bytes
254: * @param remaining bytes from the end of the buffer that need to be copied to
255: * the beginning of the new buffer
256: * @return chunk
257: */
258: private ByteBuffer adjustBuf(int chunkSize, int remaining) {
259:• assert buf != null;
260:• assert chunkSize >= 0;
261:• assert remaining >= 0;
262:
263: byte[] temp = buf;
264: // create a new buf and adjust it without this chunk
265: createBuf(remaining);
266: System.arraycopy(temp, len-remaining, buf, 0, remaining);
267: len = remaining;
268:
269: return ByteBuffer.wrap(temp, 0, chunkSize);
270: }
271:
272: private void createBuf(int min) {
273: buf = new byte[Math.max(min, capacity)];
274: }
275:
276: /**
277: * Skips the preamble to find the first attachment part
278: */
279: private void skipPreamble() {
280:
281: while(true) {
282:• if (!eof) {
283: fillBuf();
284: }
285: int start = match(buf, 0, len); // matches boundary
286:• if (start == -1) {
287: // No boundary is found
288:• if (eof) {
289: throw new MIMEParsingException("Missing start boundary");
290: } else {
291: adjustBuf(len-bl+1, bl-1);
292: continue;
293: }
294: }
295:
296:• if (start > config.chunkSize) {
297: adjustBuf(start, len-start);
298: continue;
299: }
300: // Consider all the whitespace boundary+whitespace+"\r\n"
301: int lwsp = 0;
302:• for(int i=start+bl; i < len && (buf[i] == ' ' || buf[i] == '\t'); i++) {
303: ++lwsp;
304: }
305: // Check for \n or \r\n
306:• if (start+bl+lwsp < len && (buf[start+bl+lwsp] == '\n' || buf[start+bl+lwsp] == '\r') ) {
307:• if (buf[start+bl+lwsp] == '\n') {
308: adjustBuf(start+bl+lwsp+1, len-start-bl-lwsp-1);
309: break;
310:• } else if (start+bl+lwsp+1 < len && buf[start+bl+lwsp+1] == '\n') {
311: adjustBuf(start+bl+lwsp+2, len-start-bl-lwsp-2);
312: break;
313: }
314: }
315: adjustBuf(start+1, len-start-1);
316: }
317:• if (LOGGER.isLoggable(Level.FINE)) {LOGGER.log(Level.FINE, "Skipped the preamble. buffer len={0}", len);}
318: }
319:
320: private static byte[] getBytes(String s) {
321: char [] chars= s.toCharArray();
322: int size = chars.length;
323: byte[] bytes = new byte[size];
324:
325:• for (int i = 0; i < size;) {
326: bytes[i] = (byte) chars[i++];
327: }
328: return bytes;
329: }
330:
331: /**
332: * Boyer-Moore search method. Copied from java.util.regex.Pattern.java
333: *
334: * Pre calculates arrays needed to generate the bad character
335: * shift and the good suffix shift. Only the last seven bits
336: * are used to see if chars match; This keeps the tables small
337: * and covers the heavily used ASCII range, but occasionally
338: * results in an aliased match for the bad character shift.
339: */
340: private void compileBoundaryPattern() {
341: int i, j;
342:
343: // Precalculate part of the bad character shift
344: // It is a table for where in the pattern each
345: // lower 7-bit value occurs
346:• for (i = 0; i < bndbytes.length; i++) {
347: bcs[bndbytes[i]&0x7F] = i + 1;
348: }
349:
350: // Precalculate the good suffix shift
351: // i is the shift amount being considered
352:•NEXT: for (i = bndbytes.length; i > 0; i--) {
353: // j is the beginning index of suffix being considered
354:• for (j = bndbytes.length - 1; j >= i; j--) {
355: // Testing for good suffix
356:• if (bndbytes[j] == bndbytes[j-i]) {
357: // src[j..len] is a good suffix
358: gss[j-1] = i;
359: } else {
360: // No match. The array has already been
361: // filled up with correct values before.
362: continue NEXT;
363: }
364: }
365: // This fills up the remaining of optoSft
366: // any suffix can not have larger shift amount
367: // then its sub-suffix. Why???
368:• while (j > 0) {
369: gss[--j] = i;
370: }
371: }
372: // Set the guard value because of unicode compression
373: gss[bndbytes.length -1] = 1;
374: }
375:
376: /**
377: * Finds the boundary in the given buffer using Boyer-Moore algo.
378: * Copied from java.util.regex.Pattern.java
379: *
380: * @param mybuf boundary to be searched in this mybuf
381: * @param off start index in mybuf
382: * @param len number of bytes in mybuf
383: *
384: * @return -1 if there is no match or index where the match starts
385: */
386: private int match(byte[] mybuf, int off, int len) {
387: int last = len - bndbytes.length;
388:
389: // Loop over all possible match positions in text
390:•NEXT: while (off <= last) {
391: // Loop over pattern from right to left
392:• for (int j = bndbytes.length - 1; j >= 0; j--) {
393: byte ch = mybuf[off+j];
394:• if (ch != bndbytes[j]) {
395: // Shift search to the right by the maximum of the
396: // bad character shift and the good suffix shift
397: off += Math.max(j + 1 - bcs[ch&0x7F], gss[j]);
398: continue NEXT;
399: }
400: }
401: // Entire pattern matched starting at off
402: return off;
403: }
404: return -1;
405: }
406:
407: /**
408: * Fills the remaining buf to the full capacity
409: */
410: private void fillBuf() {
411:• if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "Before fillBuf() buffer len={0}", len);}
412:• assert !eof;
413:• while(len < buf.length) {
414: int read;
415: try {
416: read = in.read(buf, len, buf.length-len);
417: } catch(IOException ioe) {
418: throw new MIMEParsingException(ioe);
419: }
420:• if (read == -1) {
421: eof = true;
422: try {
423:• if (LOGGER.isLoggable(Level.FINE)) {LOGGER.fine("Closing the input stream.");}
424: in.close();
425: } catch(IOException ioe) {
426: throw new MIMEParsingException(ioe);
427: }
428: break;
429: } else {
430: len += read;
431: }
432: }
433:• if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "After fillBuf() buffer len={0}", len);}
434: }
435:
436: private void doubleBuf() {
437: byte[] temp = new byte[2*len];
438: System.arraycopy(buf, 0, temp, 0, len);
439: buf = temp;
440:• if (!eof) {
441: fillBuf();
442: }
443: }
444:
445: class LineInputStream {
446: private int offset;
447:
448: /*
449: * Read a line containing only ASCII characters from the input
450: * stream. A line is terminated by a CR or NL or CR-NL sequence.
451: * A common error is a CR-CR-NL sequence, which will also terminate
452: * a line.
453: * The line terminator is not returned as part of the returned
454: * String. Returns null if no data is available. <p>
455: *
456: * This class is similar to the deprecated
457: * <code>DataInputStream.readLine()</code>
458: */
459: public String readLine() throws IOException {
460:
461: int hdrLen = 0;
462: int lwsp = 0;
463: while(offset+hdrLen < len) {
464: if (buf[offset+hdrLen] == '\n') {
465: lwsp = 1;
466: break;
467: }
468: if (offset+hdrLen+1 == len) {
469: doubleBuf();
470: }
471: if (offset+hdrLen+1 >= len) { // No more data in the stream
472: assert eof;
473: return null;
474: }
475: if (buf[offset+hdrLen] == '\r' && buf[offset+hdrLen+1] == '\n') {
476: lwsp = 2;
477: break;
478: }
479: ++hdrLen;
480: }
481: if (hdrLen == 0) {
482: adjustBuf(offset+lwsp, len-offset-lwsp);
483: return null;
484: }
485:
486: String hdr = new String(buf, offset, hdrLen, HEADER_ENCODING);
487: offset += hdrLen+lwsp;
488: return hdr;
489: }
490:
491: }
492:
493: }