1 //Written in the D programming language
2 /*
3  * Routintes for reading and parsing MIME documents.
4  *
5  * Copyright: Copyright (C) 2013-2014 Jaypha
6  *
7  * Distributed under the Boost Software License, Version 1.0.
8  * (See http://www.boost.org/LICENSE_1_0.txt)
9  *
10  * Authors: Jason den Dulk
11  */
12 
13 module jaypha.inet.mime.reading;
14 
15 public import jaypha.inet.mime.header;
16 
17 enum mimeSpecials = "()<>@,;:\\\".[]";  // from RFC822
18 enum mimetSpecials = "()<>@,;:\\\"/[]?=";  //  from RFC2045
19 enum mimeLwsp = " \t";
20 enum mimeDelimeters = mimeSpecials ~ mimeLwsp;
21 enum mimeTokenDelimeters = mimetSpecials ~ mimeLwsp;
22 
23 import std.array;
24 import std.string;
25 import std.range;
26 import std.algorithm;
27 
28 //-----------------------------------------------------------------------------
29 // extracts a single MIME parameter.
30 // Parameters are of the format *(';' name '=' value)
31 // value = token / quoted-string.
32 
33 void extractMimeParams(string source, ref string[string] parameters)
34 {
35   skipSpaceComment(source);
36   while (!source.empty && source.front == ';')
37   {
38     source.popFront();
39     source.skipSpaceComment();
40     auto attribute = source.extractToken();
41     source.skipSpaceComment();
42     if (source.cfront != '=') throw new Exception("malformed MIME header");
43     source.popFront();
44     source.skipSpaceComment();
45     if (source.cfront == '\"')
46       parameters[attribute] = source.extractQuotedString();
47     else
48       parameters[attribute] = source.extractToken();
49     source.skipSpaceComment();
50   }
51 }
52 
53 //-----------------------------------------------------------------------------
54 
55 unittest
56 {
57   string t1 = "; bean  = (not this)rock ";
58   string t2 = ";dog(canine)  = \"A (canine) animal\"  ";
59   string t3 = " ;(c)rabbit=jack;jack=\"\"";
60   string t4 = ";";
61 
62   string[string] parms;
63 
64   extractMimeParams(t1,parms);
65   assert("bean" in parms);
66   assert(parms["bean"] = "rock");
67 
68   extractMimeParams(t2,parms);
69   assert("dog" in parms);
70   assert(parms["dog"] = "A (canine) animal");
71 
72   extractMimeParams(t3,parms);
73   assert("rabbit" in parms);
74   assert("jack" in parms);
75   assert(parms["rabbit"] == "jack");
76   assert(parms["jack"].empty);
77 
78   try {
79     extractMimeParams(t4,parms);
80     assert(false);
81   } catch (Exception e) {
82   }
83 }
84 
85 //-----------------------------------------------------------------------------
86 // Extracts a MIME token from the input string.
87 
88 string extractToken(ref string source)
89 {
90   auto remainder = findAmong(source, mimeTokenDelimeters);
91   auto token = source[0..$-remainder.length];
92   if (token.empty) throw new Exception("malformed MIME header");
93   source = remainder;
94   return token;
95 }
96 
97 //-----------------------------------------------------------------------------
98 // Extracts a quoted string token from the input string.
99 
100 string extractQuotedString(ref string source)
101 {
102   auto s = appender!string();
103 
104   source.popFront(); // front should be a \".
105   while (source.cfront != '\"')
106   {
107     if (source.front == '\\')
108       source.popFront();
109     s.put(source.cfront());
110     source.popFront();
111   }
112   source.popFront(); // front should be a \".
113   return s.data;
114 }
115 
116 //-----------------------------------------------------------------------------
117 
118 unittest
119 {
120   string t1 = "john@";
121   string t2 = "\" a quoted \\\" string\"g";
122   string t3 = "\"unfinished";
123 
124   assert(extractToken(t1) == "john");
125   assert(t1 == "@");
126 
127   assert(extractQuotedString(t2) == " a quoted \" string");
128   assert(t2 == "g");
129 
130   try
131   {
132     extractQuotedString(t3);
133     assert(false);
134   } catch (Exception e)
135   {
136     assert(t3.empty);
137   }
138 
139   try
140   {
141     t3 = "";
142     extractToken(t3);
143     assert(false);
144   } catch (Exception e)
145   {
146     assert(t3.empty);
147   }
148 
149   try
150   {
151     t3 = "";
152     extractToken(t3);
153     assert(false);
154   } catch (Exception e)
155   {
156     assert(t3.empty);
157   }
158 }
159 
160 //-----------------------------------------------------------------------------
161 // Skips all contiguous spaces and comments.
162 
163 void skipSpaceComment(ref string source)
164 {
165   skipSpace(source);
166   while (!source.empty && source.front == '(')
167   {
168     ulong count = 1;
169     source.popFront();
170     do
171     {
172       if (source.cfront == '\\')
173         source.popFront();
174       else
175       {
176         if (source.front == '(')
177           ++count;
178         else if (source.front == ')')
179           --count;
180       }
181       source.cpopFront();
182     } while (count != 0);
183     skipSpace(source);
184   }
185 }
186 
187 //-----------------------------------------------------------------------------
188 
189 void skipSpace(ref string source)
190 {
191   while (!source.empty && inPattern(source.front, mimeLwsp))
192     source.popFront();
193 }
194 
195 //-----------------------------------------------------------------------------
196 
197 unittest
198 {
199   string t1 = "  xyz";
200   string t2 = "   (comment1) (comment (2)(2))(tricky \\) comment)  non-comment";
201   string t3 = "(unfinished comment";
202   
203   skipSpace(t1);
204   assert(t1 == "xyz");
205 
206   skipSpaceComment(t2);
207   assert(t2 == "non-comment");
208 
209   try {
210     skipSpaceComment(t3);
211     assert(false);
212   } catch(Exception e)
213   {
214     assert(t3.empty);
215   }
216 }
217 
218 //-----------------------------------------------------------------------------
219 // "compulsory" front and popFront. Spits the dummy if empty.
220 
221 auto cfront(R)(ref R range) if (isInputRange!R)
222 {
223   if (range.empty) throw new Exception("malformed MIME header");
224   return range.front;
225 }
226 
227 void cpopFront(R)(ref R range) if (isInputRange!R)
228 {
229   if (range.empty) throw new Exception("malformed MIME header");
230   range.popFront();
231 }
232 
233 
234 //-----------------------------------------------------------------------------
235 // Reads in headers from a MIME document. Unfolds multiline headers, but
236 // does not perform any other lexing of header field bodies.
237 // Does consume the empty line following headers.
238 
239 MimeHeader[] parseMimeHeaders(BR)(ref BR r)
240   if ((isInputRange!BR && is(ElementType!BR : ubyte)))
241 {
242   MimeHeader[] headers;
243   /* Read headers until we get to a blank line */
244 
245   while (true)
246   {
247     auto buf = jaypha.algorithm.findSplit(r, cast(ubyte[])MimeEoln);
248     if (buf[1] != cast(ubyte[]) MimeEoln) throw new Exception("malformed Mime Header");
249 
250     if (buf[0].length == 0) break;
251 
252     auto header = cast(string) buf[0];
253     if (inPattern(header[0], mimeLwsp))
254     {
255       // leading whitespace means s part of the previous header.
256       headers[$-1].fieldBody ~= header;
257     }
258     else
259     {
260       auto buf2 = std.algorithm.findSplit(header,":");
261       if (buf2[1] != ":") throw new Exception("malformed Mime Header");
262       headers ~= MimeHeader(buf2[0], buf2[2]);
263     }
264   }
265   return headers;
266 }
267 
268 //-----------------------------------------------------------------------------
269 
270 unittest
271 {
272   string entity_text =
273     "Content-Type: text/plain; charset=us-ascii\r\n"
274     "Content-Disposition: blah blah \r\n"
275     "\tblah\r\n"
276     "\r\n"
277     "This is explicitly typed plain US-ASCII text.\r\n"
278     "It DOES end with a linebreak.\r\n";
279 
280   //auto r1 = inputRangeObject(cast(ubyte[]) entity_text.dup);
281   auto r1 = cast(ubyte[]) entity_text;
282 
283   auto headers = parseMimeHeaders(r1);
284   assert(headers.length == 2);
285   assert(headers[0].name == "Content-Type");
286   assert(headers[0].fieldBody == " text/plain; charset=us-ascii");
287   assert(headers[1].name == "Content-Disposition");
288   assert(headers[1].fieldBody == " blah blah \tblah");
289   assert(r1.front == 'T');
290 }
291 
292 //-----------------------------------------------------------------------------
293 // Entity Reader. Takes an input range representing a MIME document, extracts
294 // the headers and presents the rest for further reading.
295 
296 auto mimeEntityReader(BR)(BR reader)
297   if ((isInputRange!BR && is(ElementType!BR : ubyte)))
298 {
299   return MimeEntityReader!(BR)(parseMimeHeaders(reader),reader);
300 }
301 
302 struct MimeEntityReader(BR)
303 {
304   MimeHeader[] headers;
305   BR content;
306 }
307 
308 //-----------------------------------------------------------------------------
309 
310 unittest
311 {
312   import std.stdio;
313   import std.exception;
314   import std.array;
315   import std.algorithm;
316   import std.range;
317 
318   string entity_text =
319     "Content-Type: text/plain; charset=us-ascii\r\n"
320     "Content-Disposition: blah blah \r\n"
321     "\tblah\r\n"
322     "\r\n"
323     "This is explicitly typed plain US-ASCII text.\r\n"
324     "It DOES end with a linebreak.\r\n";
325 
326   auto r1 = inputRangeObject(cast(ubyte[]) entity_text.dup);
327 
328   auto entity = mimeEntityReader(r1);
329 
330   static assert(is(typeof(entity.content) == typeof(r1)));
331   assert(entity.headers.length == 2);
332   assert(entity.headers[0].name == "Content-Type");
333   assert(entity.headers[0].fieldBody == " text/plain; charset=us-ascii");
334   assert(entity.headers[1].name == "Content-Disposition");
335   assert(entity.headers[1].fieldBody == " blah blah \tblah");
336 
337   auto buff = appender!(ubyte[]);
338 
339   entity.content.copy(buff);
340   assert(buff.data == 
341     "This is explicitly typed plain US-ASCII text.\r\n"
342     "It DOES end with a linebreak.\r\n");
343 }
344 
345 //-----------------------------------------------------------------------------
346 // Multipart Entity Reader. Takes an input range and converts it into an
347 // input range of Mime Entity Readers. Each element represents a Mime Entity.
348 // Presumes that headers of the primary entity have already been extracted.
349 
350 import jaypha.algorithm;
351 import jaypha.range;
352 
353 auto mimeMultipartReader(Reader)(ref Reader r, string boundary)
354   if (isInputRange!Reader && is(ElementType!Reader : ubyte))
355 {
356   string full_boundary = "\r\n--"~boundary;
357 
358   jaypha.algorithm.findSplit(r, full_boundary[2..$]);
359   jaypha.algorithm.findSplit(r, "\r\n"); // skip over whitespace, but don't bother checking.
360 
361   auto entity = mimeEntityReader(readUntil(r, full_boundary));
362 
363   alias typeof(entity) T;
364 
365   struct MR
366   {
367     @property bool empty() { return r.empty; }
368 
369     @property T front() { return entity; }
370 
371     void popFront()
372     {
373       if (!entity.content.empty) entity.content.drain(); // In case the user pops before fully reading the entity
374 
375       auto rem = jaypha.algorithm.findSplit(r, MimeEoln); // skip over whitespace, but don't bother checking.
376       bool last_time = startsWith(rem[0], "--");
377       if (!last_time)
378       {
379         if (rem[1] != MimeEoln) throw new Exception("malformed MIME Entity");
380         entity = mimeEntityReader(readUntil(r, full_boundary));
381       }
382       else
383       {
384         r.drain(); // Skip epilogue;
385       }
386     }
387   }
388   return MR();
389 }
390 /+
391 auto get_multipart_reader(Reader)(ref Reader r, string boundary)
392   if (isByteRange!Reader)
393 {
394   string full_boundary = "\r\n--"~boundary;
395 
396   if (!skipOverAnyway(r, full_boundary[2..$]))
397     skipOverUntil(r,full_boundary);
398   jaypha.range.munch(r, " \t");
399   skipOverAnyway(r,"\r\n");
400 
401   auto entity = mime_entity_reader(readUntil(r, full_boundary));
402 
403   alias typeof(entity) T;
404 
405   struct MR
406   {
407     @property bool empty() { return r.empty; }
408 
409     @property T front() { return entity; }
410 
411     void popFront()
412     {
413       bool last_time = false;
414 
415       if (!entity.content.empty) entity.content.drain();
416       if (skipOverAnyway(r, "--"))  // terminating boundary
417         last_time = true;
418       jaypha.range.munch(r, " \t");
419       if (!last_time)
420       {
421         skipOverAnyway(r,"\r\n");
422         entity = mime_entity_reader(readUntil(r, full_boundary));
423       }
424       else
425       {
426         r.drain(); // Skip epilogue;
427       }
428     }
429   }
430   return MR();
431 }
432 +/
433 
434 //----------------------------------------------------------------------------
435 // Advances the input range until sentinal is found
436 
437 private bool skipOverUntil(Reader)(ref Reader r, string sentinel)
438 {
439   while (true)
440   {
441     if (cast(char)r.front == sentinel[0])
442       for (uint i=0; i<=sentinel.length; ++i)
443       {
444         if (i == sentinel.length)
445           return true;
446         if (r.empty)
447           return false;
448 
449         if (cast(char)r.front != sentinel[i])
450           break;
451 
452         r.popFront();
453         if (r.empty)
454           return false;
455       }
456     else
457     {
458       r.popFront();
459       if (r.empty)
460         return false;
461     }
462   }
463 }
464 
465 
466 unittest
467 {
468 
469   string preamble =
470     "This is the preamble.  It is to be ignored, though it\r\n"
471     "is a handy place for composition agents to include an\r\n"
472     "explanatory note to non-MIME conformant readers.\r\n"
473     "\r\n"
474     "--simple boundary  \t  \t\t \r\n"
475     "\r\n"
476     "This is implicitly typed plain US-ASCII text.\r\n"
477     "It does NOT end with a linebreak.\r\n"
478     "--simple boundary\r\n"
479     "Content-type: text/plain; charset=us-ascii\r\n"
480     "\r\n"
481     "This is explicitly typed plain US-ASCII text.\r\n"
482     "It DOES end with a linebreak.\r\n"
483     "\r\n"
484     "--simple boundary--\r\n"
485     "\r\n"
486     "This is the epilogue.  It is also to be ignored.\r\n";
487 
488   string preamble2 = "--simple boundary\r\nZBC";
489 
490   auto buff = appender!(ubyte[]);
491 
492   ubyte[] txt = cast(ubyte[]) "acabacbxyz".dup;
493 
494   string y = "abc";
495 
496   auto r1 = inputRangeObject(txt);
497 
498   auto x = r1.skipOverUntil("cbx");
499   assert(x);
500   r1.copy(buff);
501   assert(cast(char[])(buff.data) == "yz");
502 
503   buff.clear();
504 
505   txt = cast(ubyte[]) "acabacbxyz".dup;
506   r1 = inputRangeObject(txt);
507 
508   assert(!r1.skipOverUntil("c1bx"));
509   assert(r1.empty);
510 
511   txt = cast(ubyte[]) preamble.dup;
512   r1 = inputRangeObject(txt);
513 
514   auto r2 = mimeMultipartReader(r1, "simple boundary");
515 
516 
517   assert(r1.front == cast(ubyte)'T');
518   auto r3 = r2.front;
519 
520   assert(r3.headers.length == 0);
521   put(buff,r3.content);
522   assert(buff.data == "This is implicitly typed plain US-ASCII text.\r\n"
523     "It does NOT end with a linebreak.");
524   assert(r3.content.empty);
525   assert(r1.front == cast(ubyte)'\r');
526 
527   buff.clear();
528   r2.popFront();
529   r3 = r2.front;
530   assert(r3.headers.length == 1);
531   
532   r3.content.copy(buff);
533 
534   assert(buff.data ==
535     "This is explicitly typed plain US-ASCII text.\r\n"
536     "It DOES end with a linebreak.\r\n");
537   assert(r3.content.empty);
538   r2.popFront();
539   assert(r2.empty);
540   assert(r1.empty);
541 
542 }
543 
544 //----------------------------------------------------------------------------
545 // Comsumes the front of the range as long as it matches the given prefix
546 // Returns whether or not the entire prefix got matches. If all_or_nothing is
547 // true, then an exception occurs if prefix is nto matched in its entirely.
548 // Designed to work with ranges that cannot be rewound.
549 
550 bool skipOverAnyway(R)(ref R r, string prefix, bool all_or_nothing = false)
551  if (isInputRange!R)
552 {
553   if (r.empty || r.front != prefix[0])
554     return false;
555 
556   uint i = 0;
557   do
558   {
559     r.popFront();
560     ++i;
561   } while (i < prefix.length && !r.empty && r.front == prefix[i]);
562 
563   if (i == prefix.length) return true;
564   if (all_or_nothing) throw new Exception("malformed MIME Entity");
565   return false;
566 }
567 
568 unittest
569 {
570   ubyte[] txt = cast(ubyte[]) "acabacbxyz".dup;
571   auto r1 = inputRangeObject(txt);
572   auto buff = appender!(ubyte[])();
573 
574   assert(skipOverAnyway(r1, "aca"));
575   assert(!skipOverAnyway(r1, "baa"));
576   assert(!skipOverAnyway(r1, "xyz"));
577   try {
578     skipOverAnyway(r1,"cbz",true);
579     assert(false);
580   } catch (Exception e) {
581   }
582   r1.copy(buff);
583   assert(cast(char[])(buff.data) == "xyz");
584 }
585 
586 //----------------------------------------------------------------------------
587 // An alternative to std.algorithm.until that works with non-rewindable input
588 // ranges.
589 
590 auto readUntil(R,E)(ref R r, E sentinel)
591   if (isInputRange!R && isInputRange!E &&
592       isScalarType!(ElementType!E) && isScalarType!(ElementType!R))
593 {
594   alias ElementType!R T;
595 
596   //----------------------------------------------------
597 
598   final class ReadUntil
599   {
600     //------------------------------------
601 
602     bool empty = false;
603 
604     //------------------------------------
605 
606     @property T front()
607     {
608       if (idx < length) return sentinel[idx];
609       return r.front;
610     }
611 
612     //------------------------------------
613 
614     void popFront()
615     {
616       if (!empty)
617       {
618         if (idx < length)
619         {
620           ++idx;
621           if (idx == length)
622           {
623             idx = length = 0;
624             sentinel_check();
625           }
626         }
627         else
628         {
629           r.popFront();
630           if (r.empty) throw new Exception("malformed MIME Entity");
631           sentinel_check();
632         }
633       }
634     }
635 
636     //------------------------------------
637 
638     void sentinel_check()
639     {
640       if (r.front != sentinel[0]) return;
641 
642       do
643       {
644         r.popFront();
645         if (r.empty) throw new Exception("malformed MIME Entity");
646         ++length;
647       } while (length < sentinel.length && r.front == sentinel[length]);
648 
649       if (length == sentinel.length)
650         empty = true;
651     }
652 
653     //------------------------------------
654 
655     private:
656       uint length = 0;
657       uint idx = 0;
658   }
659 
660   return new ReadUntil();
661 }
662 
663 //----------------------------------------------------------------------------
664 
665 unittest
666 {
667   ubyte[] txt = cast(ubyte[]) "acabacbxyz".dup;
668 
669   auto buff = appender!(ubyte[]);
670 
671   auto r1 = inputRangeObject(txt);
672 
673   auto u = readUntil(r1,"acb");
674   u.copy(buff);
675   assert(cast(char[])(buff.data) == "acab");
676   buff.clear();
677   r1.copy(buff);
678   assert(cast(char[])(buff.data) == "xyz");
679 }