1 //Written in the D programming language 2 /* 3 * Routintes for reading and parsing MIME documents. 4 * 5 * Copyright: Copyright (C) 2013-2014 Jaypha 6 * 7 * Distributed under the Boost Software License, Version 1.0. 8 * (See http://www.boost.org/LICENSE_1_0.txt) 9 * 10 * Authors: Jason den Dulk 11 */ 12 13 module jaypha.inet.mime.reading; 14 15 public import jaypha.inet.mime.header; 16 17 enum mimeSpecials = "()<>@,;:\\\".[]"; // from RFC822 18 enum mimetSpecials = "()<>@,;:\\\"/[]?="; // from RFC2045 19 enum mimeLwsp = " \t"; 20 enum mimeDelimeters = mimeSpecials ~ mimeLwsp; 21 enum mimeTokenDelimeters = mimetSpecials ~ mimeLwsp; 22 23 import std.array; 24 import std.string; 25 import std.range; 26 import std.algorithm; 27 28 //----------------------------------------------------------------------------- 29 // extracts a single MIME parameter. 30 // Parameters are of the format *(';' name '=' value) 31 // value = token / quoted-string. 32 33 void extractMimeParams(string source, ref string[string] parameters) 34 { 35 skipSpaceComment(source); 36 while (!source.empty && source.front == ';') 37 { 38 source.popFront(); 39 source.skipSpaceComment(); 40 auto attribute = source.extractToken(); 41 source.skipSpaceComment(); 42 if (source.cfront != '=') throw new Exception("malformed MIME header"); 43 source.popFront(); 44 source.skipSpaceComment(); 45 if (source.cfront == '\"') 46 parameters[attribute] = source.extractQuotedString(); 47 else 48 parameters[attribute] = source.extractToken(); 49 source.skipSpaceComment(); 50 } 51 } 52 53 //----------------------------------------------------------------------------- 54 55 unittest 56 { 57 string t1 = "; bean = (not this)rock "; 58 string t2 = ";dog(canine) = \"A (canine) animal\" "; 59 string t3 = " ;(c)rabbit=jack;jack=\"\""; 60 string t4 = ";"; 61 62 string[string] parms; 63 64 extractMimeParams(t1,parms); 65 assert("bean" in parms); 66 assert(parms["bean"] = "rock"); 67 68 extractMimeParams(t2,parms); 69 assert("dog" in parms); 70 assert(parms["dog"] = "A (canine) animal"); 71 72 extractMimeParams(t3,parms); 73 assert("rabbit" in parms); 74 assert("jack" in parms); 75 assert(parms["rabbit"] == "jack"); 76 assert(parms["jack"].empty); 77 78 try { 79 extractMimeParams(t4,parms); 80 assert(false); 81 } catch (Exception e) { 82 } 83 } 84 85 //----------------------------------------------------------------------------- 86 // Extracts a MIME token from the input string. 87 88 string extractToken(ref string source) 89 { 90 auto remainder = findAmong(source, mimeTokenDelimeters); 91 auto token = source[0..$-remainder.length]; 92 if (token.empty) throw new Exception("malformed MIME header"); 93 source = remainder; 94 return token; 95 } 96 97 //----------------------------------------------------------------------------- 98 // Extracts a quoted string token from the input string. 99 100 string extractQuotedString(ref string source) 101 { 102 auto s = appender!string(); 103 104 source.popFront(); // front should be a \". 105 while (source.cfront != '\"') 106 { 107 if (source.front == '\\') 108 source.popFront(); 109 s.put(source.cfront()); 110 source.popFront(); 111 } 112 source.popFront(); // front should be a \". 113 return s.data; 114 } 115 116 //----------------------------------------------------------------------------- 117 118 unittest 119 { 120 string t1 = "john@"; 121 string t2 = "\" a quoted \\\" string\"g"; 122 string t3 = "\"unfinished"; 123 124 assert(extractToken(t1) == "john"); 125 assert(t1 == "@"); 126 127 assert(extractQuotedString(t2) == " a quoted \" string"); 128 assert(t2 == "g"); 129 130 try 131 { 132 extractQuotedString(t3); 133 assert(false); 134 } catch (Exception e) 135 { 136 assert(t3.empty); 137 } 138 139 try 140 { 141 t3 = ""; 142 extractToken(t3); 143 assert(false); 144 } catch (Exception e) 145 { 146 assert(t3.empty); 147 } 148 149 try 150 { 151 t3 = ""; 152 extractToken(t3); 153 assert(false); 154 } catch (Exception e) 155 { 156 assert(t3.empty); 157 } 158 } 159 160 //----------------------------------------------------------------------------- 161 // Skips all contiguous spaces and comments. 162 163 void skipSpaceComment(ref string source) 164 { 165 skipSpace(source); 166 while (!source.empty && source.front == '(') 167 { 168 ulong count = 1; 169 source.popFront(); 170 do 171 { 172 if (source.cfront == '\\') 173 source.popFront(); 174 else 175 { 176 if (source.front == '(') 177 ++count; 178 else if (source.front == ')') 179 --count; 180 } 181 source.cpopFront(); 182 } while (count != 0); 183 skipSpace(source); 184 } 185 } 186 187 //----------------------------------------------------------------------------- 188 189 void skipSpace(ref string source) 190 { 191 while (!source.empty && inPattern(source.front, mimeLwsp)) 192 source.popFront(); 193 } 194 195 //----------------------------------------------------------------------------- 196 197 unittest 198 { 199 string t1 = " xyz"; 200 string t2 = " (comment1) (comment (2)(2))(tricky \\) comment) non-comment"; 201 string t3 = "(unfinished comment"; 202 203 skipSpace(t1); 204 assert(t1 == "xyz"); 205 206 skipSpaceComment(t2); 207 assert(t2 == "non-comment"); 208 209 try { 210 skipSpaceComment(t3); 211 assert(false); 212 } catch(Exception e) 213 { 214 assert(t3.empty); 215 } 216 } 217 218 //----------------------------------------------------------------------------- 219 // "compulsory" front and popFront. Spits the dummy if empty. 220 221 auto cfront(R)(ref R range) if (isInputRange!R) 222 { 223 if (range.empty) throw new Exception("malformed MIME header"); 224 return range.front; 225 } 226 227 void cpopFront(R)(ref R range) if (isInputRange!R) 228 { 229 if (range.empty) throw new Exception("malformed MIME header"); 230 range.popFront(); 231 } 232 233 234 //----------------------------------------------------------------------------- 235 // Reads in headers from a MIME document. Unfolds multiline headers, but 236 // does not perform any other lexing of header field bodies. 237 // Does consume the empty line following headers. 238 239 MimeHeader[] parseMimeHeaders(BR)(ref BR r) 240 if ((isInputRange!BR && is(ElementType!BR : ubyte))) 241 { 242 MimeHeader[] headers; 243 /* Read headers until we get to a blank line */ 244 245 while (true) 246 { 247 auto buf = jaypha.algorithm.findSplit(r, cast(ubyte[])MimeEoln); 248 if (buf[1] != cast(ubyte[]) MimeEoln) throw new Exception("malformed Mime Header"); 249 250 if (buf[0].length == 0) break; 251 252 auto header = cast(string) buf[0]; 253 if (inPattern(header[0], mimeLwsp)) 254 { 255 // leading whitespace means s part of the previous header. 256 headers[$-1].fieldBody ~= header; 257 } 258 else 259 { 260 auto buf2 = std.algorithm.findSplit(header,":"); 261 if (buf2[1] != ":") throw new Exception("malformed Mime Header"); 262 headers ~= MimeHeader(buf2[0], buf2[2]); 263 } 264 } 265 return headers; 266 } 267 268 //----------------------------------------------------------------------------- 269 270 unittest 271 { 272 string entity_text = 273 "Content-Type: text/plain; charset=us-ascii\r\n" 274 "Content-Disposition: blah blah \r\n" 275 "\tblah\r\n" 276 "\r\n" 277 "This is explicitly typed plain US-ASCII text.\r\n" 278 "It DOES end with a linebreak.\r\n"; 279 280 //auto r1 = inputRangeObject(cast(ubyte[]) entity_text.dup); 281 auto r1 = cast(ubyte[]) entity_text; 282 283 auto headers = parseMimeHeaders(r1); 284 assert(headers.length == 2); 285 assert(headers[0].name == "Content-Type"); 286 assert(headers[0].fieldBody == " text/plain; charset=us-ascii"); 287 assert(headers[1].name == "Content-Disposition"); 288 assert(headers[1].fieldBody == " blah blah \tblah"); 289 assert(r1.front == 'T'); 290 } 291 292 //----------------------------------------------------------------------------- 293 // Entity Reader. Takes an input range representing a MIME document, extracts 294 // the headers and presents the rest for further reading. 295 296 auto mimeEntityReader(BR)(BR reader) 297 if ((isInputRange!BR && is(ElementType!BR : ubyte))) 298 { 299 return MimeEntityReader!(BR)(parseMimeHeaders(reader),reader); 300 } 301 302 struct MimeEntityReader(BR) 303 { 304 MimeHeader[] headers; 305 BR content; 306 } 307 308 //----------------------------------------------------------------------------- 309 310 unittest 311 { 312 import std.stdio; 313 import std.exception; 314 import std.array; 315 import std.algorithm; 316 import std.range; 317 318 string entity_text = 319 "Content-Type: text/plain; charset=us-ascii\r\n" 320 "Content-Disposition: blah blah \r\n" 321 "\tblah\r\n" 322 "\r\n" 323 "This is explicitly typed plain US-ASCII text.\r\n" 324 "It DOES end with a linebreak.\r\n"; 325 326 auto r1 = inputRangeObject(cast(ubyte[]) entity_text.dup); 327 328 auto entity = mimeEntityReader(r1); 329 330 static assert(is(typeof(entity.content) == typeof(r1))); 331 assert(entity.headers.length == 2); 332 assert(entity.headers[0].name == "Content-Type"); 333 assert(entity.headers[0].fieldBody == " text/plain; charset=us-ascii"); 334 assert(entity.headers[1].name == "Content-Disposition"); 335 assert(entity.headers[1].fieldBody == " blah blah \tblah"); 336 337 auto buff = appender!(ubyte[]); 338 339 entity.content.copy(buff); 340 assert(buff.data == 341 "This is explicitly typed plain US-ASCII text.\r\n" 342 "It DOES end with a linebreak.\r\n"); 343 } 344 345 //----------------------------------------------------------------------------- 346 // Multipart Entity Reader. Takes an input range and converts it into an 347 // input range of Mime Entity Readers. Each element represents a Mime Entity. 348 // Presumes that headers of the primary entity have already been extracted. 349 350 import jaypha.algorithm; 351 import jaypha.range; 352 353 auto mimeMultipartReader(Reader)(ref Reader r, string boundary) 354 if (isInputRange!Reader && is(ElementType!Reader : ubyte)) 355 { 356 string full_boundary = "\r\n--"~boundary; 357 358 jaypha.algorithm.findSplit(r, full_boundary[2..$]); 359 jaypha.algorithm.findSplit(r, "\r\n"); // skip over whitespace, but don't bother checking. 360 361 auto entity = mimeEntityReader(readUntil(r, full_boundary)); 362 363 alias typeof(entity) T; 364 365 struct MR 366 { 367 @property bool empty() { return r.empty; } 368 369 @property T front() { return entity; } 370 371 void popFront() 372 { 373 if (!entity.content.empty) entity.content.drain(); // In case the user pops before fully reading the entity 374 375 auto rem = jaypha.algorithm.findSplit(r, MimeEoln); // skip over whitespace, but don't bother checking. 376 bool last_time = startsWith(rem[0], "--"); 377 if (!last_time) 378 { 379 if (rem[1] != MimeEoln) throw new Exception("malformed MIME Entity"); 380 entity = mimeEntityReader(readUntil(r, full_boundary)); 381 } 382 else 383 { 384 r.drain(); // Skip epilogue; 385 } 386 } 387 } 388 return MR(); 389 } 390 /+ 391 auto get_multipart_reader(Reader)(ref Reader r, string boundary) 392 if (isByteRange!Reader) 393 { 394 string full_boundary = "\r\n--"~boundary; 395 396 if (!skipOverAnyway(r, full_boundary[2..$])) 397 skipOverUntil(r,full_boundary); 398 jaypha.range.munch(r, " \t"); 399 skipOverAnyway(r,"\r\n"); 400 401 auto entity = mime_entity_reader(readUntil(r, full_boundary)); 402 403 alias typeof(entity) T; 404 405 struct MR 406 { 407 @property bool empty() { return r.empty; } 408 409 @property T front() { return entity; } 410 411 void popFront() 412 { 413 bool last_time = false; 414 415 if (!entity.content.empty) entity.content.drain(); 416 if (skipOverAnyway(r, "--")) // terminating boundary 417 last_time = true; 418 jaypha.range.munch(r, " \t"); 419 if (!last_time) 420 { 421 skipOverAnyway(r,"\r\n"); 422 entity = mime_entity_reader(readUntil(r, full_boundary)); 423 } 424 else 425 { 426 r.drain(); // Skip epilogue; 427 } 428 } 429 } 430 return MR(); 431 } 432 +/ 433 434 //---------------------------------------------------------------------------- 435 // Advances the input range until sentinal is found 436 437 private bool skipOverUntil(Reader)(ref Reader r, string sentinel) 438 { 439 while (true) 440 { 441 if (cast(char)r.front == sentinel[0]) 442 for (uint i=0; i<=sentinel.length; ++i) 443 { 444 if (i == sentinel.length) 445 return true; 446 if (r.empty) 447 return false; 448 449 if (cast(char)r.front != sentinel[i]) 450 break; 451 452 r.popFront(); 453 if (r.empty) 454 return false; 455 } 456 else 457 { 458 r.popFront(); 459 if (r.empty) 460 return false; 461 } 462 } 463 } 464 465 466 unittest 467 { 468 469 string preamble = 470 "This is the preamble. It is to be ignored, though it\r\n" 471 "is a handy place for composition agents to include an\r\n" 472 "explanatory note to non-MIME conformant readers.\r\n" 473 "\r\n" 474 "--simple boundary \t \t\t \r\n" 475 "\r\n" 476 "This is implicitly typed plain US-ASCII text.\r\n" 477 "It does NOT end with a linebreak.\r\n" 478 "--simple boundary\r\n" 479 "Content-type: text/plain; charset=us-ascii\r\n" 480 "\r\n" 481 "This is explicitly typed plain US-ASCII text.\r\n" 482 "It DOES end with a linebreak.\r\n" 483 "\r\n" 484 "--simple boundary--\r\n" 485 "\r\n" 486 "This is the epilogue. It is also to be ignored.\r\n"; 487 488 string preamble2 = "--simple boundary\r\nZBC"; 489 490 auto buff = appender!(ubyte[]); 491 492 ubyte[] txt = cast(ubyte[]) "acabacbxyz".dup; 493 494 string y = "abc"; 495 496 auto r1 = inputRangeObject(txt); 497 498 auto x = r1.skipOverUntil("cbx"); 499 assert(x); 500 r1.copy(buff); 501 assert(cast(char[])(buff.data) == "yz"); 502 503 buff.clear(); 504 505 txt = cast(ubyte[]) "acabacbxyz".dup; 506 r1 = inputRangeObject(txt); 507 508 assert(!r1.skipOverUntil("c1bx")); 509 assert(r1.empty); 510 511 txt = cast(ubyte[]) preamble.dup; 512 r1 = inputRangeObject(txt); 513 514 auto r2 = mimeMultipartReader(r1, "simple boundary"); 515 516 517 assert(r1.front == cast(ubyte)'T'); 518 auto r3 = r2.front; 519 520 assert(r3.headers.length == 0); 521 put(buff,r3.content); 522 assert(buff.data == "This is implicitly typed plain US-ASCII text.\r\n" 523 "It does NOT end with a linebreak."); 524 assert(r3.content.empty); 525 assert(r1.front == cast(ubyte)'\r'); 526 527 buff.clear(); 528 r2.popFront(); 529 r3 = r2.front; 530 assert(r3.headers.length == 1); 531 532 r3.content.copy(buff); 533 534 assert(buff.data == 535 "This is explicitly typed plain US-ASCII text.\r\n" 536 "It DOES end with a linebreak.\r\n"); 537 assert(r3.content.empty); 538 r2.popFront(); 539 assert(r2.empty); 540 assert(r1.empty); 541 542 } 543 544 //---------------------------------------------------------------------------- 545 // Comsumes the front of the range as long as it matches the given prefix 546 // Returns whether or not the entire prefix got matches. If all_or_nothing is 547 // true, then an exception occurs if prefix is nto matched in its entirely. 548 // Designed to work with ranges that cannot be rewound. 549 550 bool skipOverAnyway(R)(ref R r, string prefix, bool all_or_nothing = false) 551 if (isInputRange!R) 552 { 553 if (r.empty || r.front != prefix[0]) 554 return false; 555 556 uint i = 0; 557 do 558 { 559 r.popFront(); 560 ++i; 561 } while (i < prefix.length && !r.empty && r.front == prefix[i]); 562 563 if (i == prefix.length) return true; 564 if (all_or_nothing) throw new Exception("malformed MIME Entity"); 565 return false; 566 } 567 568 unittest 569 { 570 ubyte[] txt = cast(ubyte[]) "acabacbxyz".dup; 571 auto r1 = inputRangeObject(txt); 572 auto buff = appender!(ubyte[])(); 573 574 assert(skipOverAnyway(r1, "aca")); 575 assert(!skipOverAnyway(r1, "baa")); 576 assert(!skipOverAnyway(r1, "xyz")); 577 try { 578 skipOverAnyway(r1,"cbz",true); 579 assert(false); 580 } catch (Exception e) { 581 } 582 r1.copy(buff); 583 assert(cast(char[])(buff.data) == "xyz"); 584 } 585 586 //---------------------------------------------------------------------------- 587 // An alternative to std.algorithm.until that works with non-rewindable input 588 // ranges. 589 590 auto readUntil(R,E)(ref R r, E sentinel) 591 if (isInputRange!R && isInputRange!E && 592 isScalarType!(ElementType!E) && isScalarType!(ElementType!R)) 593 { 594 alias ElementType!R T; 595 596 //---------------------------------------------------- 597 598 final class ReadUntil 599 { 600 //------------------------------------ 601 602 bool empty = false; 603 604 //------------------------------------ 605 606 @property T front() 607 { 608 if (idx < length) return sentinel[idx]; 609 return r.front; 610 } 611 612 //------------------------------------ 613 614 void popFront() 615 { 616 if (!empty) 617 { 618 if (idx < length) 619 { 620 ++idx; 621 if (idx == length) 622 { 623 idx = length = 0; 624 sentinel_check(); 625 } 626 } 627 else 628 { 629 r.popFront(); 630 if (r.empty) throw new Exception("malformed MIME Entity"); 631 sentinel_check(); 632 } 633 } 634 } 635 636 //------------------------------------ 637 638 void sentinel_check() 639 { 640 if (r.front != sentinel[0]) return; 641 642 do 643 { 644 r.popFront(); 645 if (r.empty) throw new Exception("malformed MIME Entity"); 646 ++length; 647 } while (length < sentinel.length && r.front == sentinel[length]); 648 649 if (length == sentinel.length) 650 empty = true; 651 } 652 653 //------------------------------------ 654 655 private: 656 uint length = 0; 657 uint idx = 0; 658 } 659 660 return new ReadUntil(); 661 } 662 663 //---------------------------------------------------------------------------- 664 665 unittest 666 { 667 ubyte[] txt = cast(ubyte[]) "acabacbxyz".dup; 668 669 auto buff = appender!(ubyte[]); 670 671 auto r1 = inputRangeObject(txt); 672 673 auto u = readUntil(r1,"acb"); 674 u.copy(buff); 675 assert(cast(char[])(buff.data) == "acab"); 676 buff.clear(); 677 r1.copy(buff); 678 assert(cast(char[])(buff.data) == "xyz"); 679 }