1 //Written in the D programming language 2 /* 3 * Routintes for reading and parsing MIME documents. 4 * 5 * Copyright: Copyright (C) 2013-2014 Jaypha 6 * 7 * Distributed under the Boost Software License, Version 1.0. 8 * (See http://www.boost.org/LICENSE_1_0.txt) 9 * 10 * Authors: Jason den Dulk 11 */ 12 13 module jaypha.inet.mime.reading; 14 15 public import jaypha.inet.mime.header; 16 17 enum mimeSpecials = "()<>@,;:\\\".[]"; // from RFC822 18 enum mimetSpecials = "()<>@,;:\\\"/[]?="; // from RFC2045 19 enum mimeLwsp = " \t"; 20 enum mimeDelimeters = mimeSpecials ~ mimeLwsp; 21 enum mimeTokenDelimeters = mimetSpecials ~ mimeLwsp; 22 23 import std.array; 24 import std.string; 25 import std.range; 26 import std.algorithm; 27 import std.traits; 28 29 //----------------------------------------------------------------------------- 30 // extracts MIME parameters. 31 // Parameters are of the format *(';' atrribute '=' value) 32 // value = token / quoted-string. 33 // TODO updated def in RFC2231 34 35 void extractMimeParams(string source, ref string[string] parameters) 36 { 37 skipSpaceComment(source); 38 while (!source.empty && source.front == ';') 39 { 40 source.popFront(); 41 source.skipSpaceComment(); 42 auto attribute = source.extractToken(); 43 source.skipSpaceComment(); 44 if (source.cfront != '=') throw new Exception("malformed MIME header"); 45 source.popFront(); 46 source.skipSpaceComment(); 47 if (source.cfront == '\"') 48 parameters[attribute] = source.extractQuotedString(); 49 else 50 parameters[attribute] = source.extractToken(); 51 source.skipSpaceComment(); 52 } 53 } 54 55 //----------------------------------------------------------------------------- 56 57 unittest 58 { 59 string t1 = "; bean = (not this)rock "; 60 string t2 = ";dog(canine) = \"A (canine) animal\" "; 61 string t3 = " ;(c)rabbit=jack;jack=\"\""; 62 string t4 = ";"; 63 64 string[string] parms; 65 66 extractMimeParams(t1,parms); 67 assert("bean" in parms); 68 assert(parms["bean"] = "rock"); 69 70 extractMimeParams(t2,parms); 71 assert("dog" in parms); 72 assert(parms["dog"] = "A (canine) animal"); 73 74 extractMimeParams(t3,parms); 75 assert("rabbit" in parms); 76 assert("jack" in parms); 77 assert(parms["rabbit"] == "jack"); 78 assert(parms["jack"].empty); 79 80 try { 81 extractMimeParams(t4,parms); 82 assert(false); 83 } catch (Exception e) { 84 } 85 } 86 87 //----------------------------------------------------------------------------- 88 // Extracts a MIME token from the input string. 89 90 string extractToken(ref string source) 91 { 92 auto remainder = findAmong(source, mimeTokenDelimeters); 93 auto token = source[0..$-remainder.length]; 94 if (token.empty) throw new Exception("malformed MIME header"); 95 source = remainder; 96 return token; 97 } 98 99 //----------------------------------------------------------------------------- 100 // Extracts a quoted string token from the input string. 101 102 string extractQuotedString(ref string source) 103 { 104 auto s = appender!string(); 105 106 source.popFront(); // front should be a \". 107 while (source.cfront != '\"') 108 { 109 if (source.front == '\\') 110 source.popFront(); 111 s.put(source.cfront()); 112 source.popFront(); 113 } 114 source.popFront(); // front should be a \". 115 return s.data; 116 } 117 118 //----------------------------------------------------------------------------- 119 120 unittest 121 { 122 string t1 = "john@"; 123 string t2 = "\" a quoted \\\" string\"g"; 124 string t3 = "\"unfinished"; 125 126 assert(extractToken(t1) == "john"); 127 assert(t1 == "@"); 128 129 assert(extractQuotedString(t2) == " a quoted \" string"); 130 assert(t2 == "g"); 131 132 try 133 { 134 extractQuotedString(t3); 135 assert(false); 136 } catch (Exception e) 137 { 138 assert(t3.empty); 139 } 140 141 try 142 { 143 t3 = ""; 144 extractToken(t3); 145 assert(false); 146 } catch (Exception e) 147 { 148 assert(t3.empty); 149 } 150 151 try 152 { 153 t3 = ""; 154 extractToken(t3); 155 assert(false); 156 } catch (Exception e) 157 { 158 assert(t3.empty); 159 } 160 } 161 162 //----------------------------------------------------------------------------- 163 // Skips all contiguous spaces and comments. 164 165 void skipSpaceComment(ref string source) 166 { 167 skipSpace(source); 168 while (!source.empty && source.front == '(') 169 { 170 ulong count = 1; 171 source.popFront(); 172 do 173 { 174 if (source.cfront == '\\') 175 source.popFront(); 176 else 177 { 178 if (source.front == '(') 179 ++count; 180 else if (source.front == ')') 181 --count; 182 } 183 source.cpopFront(); 184 } while (count != 0); 185 skipSpace(source); 186 } 187 } 188 189 //----------------------------------------------------------------------------- 190 191 void skipSpace(ref string source) 192 { 193 while (!source.empty && inPattern(source.front, mimeLwsp)) 194 source.popFront(); 195 } 196 197 //----------------------------------------------------------------------------- 198 199 unittest 200 { 201 string t1 = " xyz"; 202 string t2 = " (comment1) (comment (2)(2))(tricky \\) comment) non-comment"; 203 string t3 = "(unfinished comment"; 204 205 skipSpace(t1); 206 assert(t1 == "xyz"); 207 208 skipSpaceComment(t2); 209 assert(t2 == "non-comment"); 210 211 try { 212 skipSpaceComment(t3); 213 assert(false); 214 } catch(Exception e) 215 { 216 assert(t3.empty); 217 } 218 } 219 220 //----------------------------------------------------------------------------- 221 // "compulsory" front and popFront. Spits the dummy if empty. 222 223 auto cfront(R)(ref R range) if (isInputRange!R) 224 { 225 if (range.empty) throw new Exception("malformed MIME header"); 226 return range.front; 227 } 228 229 void cpopFront(R)(ref R range) if (isInputRange!R) 230 { 231 if (range.empty) throw new Exception("malformed MIME header"); 232 range.popFront(); 233 } 234 235 //----------------------------------------------------------------------------- 236 // Reads in headers from a MIME document. Unfolds multiline headers, but 237 // does not perform any other lexing of header field bodies. 238 // Does consume the empty line following headers. 239 // TODO: Allow to read from strings as well as octect streams. 240 241 MimeHeader[] parseMimeHeaders(BR)(ref BR reader) 242 if ((isInputRange!BR && is(ElementType!BR : ubyte))) 243 { 244 MimeHeader[] headers; 245 /* Read headers until we get to a blank line */ 246 247 while (true) 248 { 249 auto buf = jaypha.algorithm.findSplit(reader, cast(ubyte[])MimeEoln); 250 if (buf[1] != cast(ubyte[]) MimeEoln) throw new Exception("malformed Mime Header"); 251 252 if (buf[0].length == 0) break; 253 254 auto header = cast(string) buf[0]; 255 256 if (inPattern(header[0], mimeLwsp)) 257 { 258 // leading whitespace means s part of the previous header. 259 headers[$-1].fieldBody ~= header; 260 } 261 else 262 { 263 auto buf2 = std.algorithm.findSplit(header,":"); 264 if (buf2[1] != ":") throw new Exception("malformed Mime Header"); 265 headers ~= MimeHeader(buf2[0], buf2[2]); 266 } 267 } 268 return headers; 269 } 270 271 //----------------------------------------------------------------------------- 272 273 unittest 274 { 275 string entity_text = 276 "Content-Type: text/plain; charset=us-ascii\r\n" 277 "Content-Disposition: blah blah \r\n" 278 "\tblah\r\n" 279 "\r\n" 280 "This is explicitly typed plain US-ASCII text.\r\n" 281 "It DOES end with a linebreak.\r\n"; 282 283 //auto r1 = inputRangeObject(cast(ubyte[]) entity_text.dup); 284 auto r1 = cast(ubyte[]) entity_text; 285 286 auto headers = parseMimeHeaders(r1); 287 assert(headers.length == 2); 288 assert(headers[0].name == "Content-Type"); 289 assert(headers[0].fieldBody == " text/plain; charset=us-ascii"); 290 assert(headers[1].name == "Content-Disposition"); 291 assert(headers[1].fieldBody == " blah blah \tblah"); 292 assert(r1.front == 'T'); 293 } 294 295 //----------------------------------------------------------------------------- 296 // Entity Reader. Takes an input range representing a MIME document, extracts 297 // the headers and presents the rest for further reading. 298 299 auto mimeEntityReader(BR)(BR reader) 300 if ((isInputRange!BR && is(ElementType!BR : ubyte))) 301 { 302 return MimeEntityReader!(BR)(parseMimeHeaders(reader),reader); 303 } 304 305 struct MimeEntityReader(BR) 306 { 307 MimeHeader[] headers; 308 BR content; 309 } 310 311 //----------------------------------------------------------------------------- 312 313 unittest 314 { 315 import std.stdio; 316 import std.exception; 317 import std.array; 318 import std.algorithm; 319 import std.range; 320 321 string entity_text = 322 "Content-Type: text/plain; charset=us-ascii\r\n" 323 "Content-Disposition: blah blah \r\n" 324 "\tblah\r\n" 325 "\r\n" 326 "This is explicitly typed plain US-ASCII text.\r\n" 327 "It DOES end with a linebreak.\r\n"; 328 329 auto r1 = inputRangeObject(cast(ubyte[]) entity_text.dup); 330 331 auto entity = mimeEntityReader(r1); 332 333 static assert(is(typeof(entity.content) == typeof(r1))); 334 assert(entity.headers.length == 2); 335 assert(entity.headers[0].name == "Content-Type"); 336 assert(entity.headers[0].fieldBody == " text/plain; charset=us-ascii"); 337 assert(entity.headers[1].name == "Content-Disposition"); 338 assert(entity.headers[1].fieldBody == " blah blah \tblah"); 339 340 auto buff = appender!(ubyte[]); 341 342 entity.content.copy(buff); 343 assert(buff.data == 344 "This is explicitly typed plain US-ASCII text.\r\n" 345 "It DOES end with a linebreak.\r\n"); 346 } 347 348 //----------------------------------------------------------------------------- 349 // Multipart Entity Reader. Takes an input range and converts it into an 350 // input range of Mime Entity Readers. Each element represents a Mime Entity. 351 // Presumes that headers of the primary entity have already been extracted. 352 // TODO: See if a way can be found to preserve the preamble and epilogue. 353 354 import jaypha.algorithm; 355 import jaypha.range; 356 357 auto mimeMultipartReader(BR)(ref BR reader, string boundary) 358 if (isInputRange!BR && is(ElementType!BR : ubyte)) 359 { 360 string full_boundary = "\r\n--"~boundary; 361 362 jaypha.algorithm.findSplit(reader, full_boundary[2..$]); 363 jaypha.algorithm.findSplit(reader, "\r\n"); // skip over whitespace, but don't bother checking. 364 365 auto entity = mimeEntityReader(readUntil(reader, full_boundary)); 366 367 alias typeof(entity) T; 368 369 struct MR 370 { 371 @property bool empty() { return reader.empty; } 372 373 @property T front() { return entity; } 374 375 void popFront() 376 { 377 if (!entity.content.empty) entity.content.drain(); // In case the user pops before fully reading the entity 378 379 auto rem = jaypha.algorithm.findSplit(reader, MimeEoln); // skip over whitespace, but don't bother checking. 380 bool last_time = startsWith(rem[0], "--"); 381 if (!last_time) 382 { 383 if (rem[1] != MimeEoln) throw new Exception("malformed MIME Entity"); 384 entity = mimeEntityReader(readUntil(reader, full_boundary)); 385 } 386 else 387 { 388 reader.drain(); // Skip epilogue; 389 } 390 } 391 } 392 return MR(); 393 } 394 395 //---------------------------------------------------------------------------- 396 // Advances the input range until sentinal is found 397 398 private bool skipOverUntil(Reader)(ref Reader r, string sentinel) 399 { 400 while (true) 401 { 402 if (cast(char)r.front == sentinel[0]) 403 for (uint i=0; i<=sentinel.length; ++i) 404 { 405 if (i == sentinel.length) 406 return true; 407 if (r.empty) 408 return false; 409 410 if (cast(char)r.front != sentinel[i]) 411 break; 412 413 r.popFront(); 414 if (r.empty) 415 return false; 416 } 417 else 418 { 419 r.popFront(); 420 if (r.empty) 421 return false; 422 } 423 } 424 } 425 426 427 unittest 428 { 429 430 string preamble = 431 "This is the preamble. It is to be ignored, though it\r\n" 432 "is a handy place for composition agents to include an\r\n" 433 "explanatory note to non-MIME conformant readers.\r\n" 434 "\r\n" 435 "--simple boundary \t \t\t \r\n" 436 "\r\n" 437 "This is implicitly typed plain US-ASCII text.\r\n" 438 "It does NOT end with a linebreak.\r\n" 439 "--simple boundary\r\n" 440 "Content-type: text/plain; charset=us-ascii\r\n" 441 "\r\n" 442 "This is explicitly typed plain US-ASCII text.\r\n" 443 "It DOES end with a linebreak.\r\n" 444 "\r\n" 445 "--simple boundary--\r\n" 446 "\r\n" 447 "This is the epilogue. It is also to be ignored.\r\n"; 448 449 string preamble2 = "--simple boundary\r\nZBC"; 450 451 auto buff = appender!(ubyte[]); 452 453 ubyte[] txt = cast(ubyte[]) "acabacbxyz".dup; 454 455 string y = "abc"; 456 457 auto r1 = inputRangeObject(txt); 458 459 auto x = r1.skipOverUntil("cbx"); 460 assert(x); 461 r1.copy(buff); 462 assert(cast(char[])(buff.data) == "yz"); 463 464 buff.clear(); 465 466 txt = cast(ubyte[]) "acabacbxyz".dup; 467 r1 = inputRangeObject(txt); 468 469 assert(!r1.skipOverUntil("c1bx")); 470 assert(r1.empty); 471 472 txt = cast(ubyte[]) preamble.dup; 473 r1 = inputRangeObject(txt); 474 475 auto r2 = mimeMultipartReader(r1, "simple boundary"); 476 477 478 assert(r1.front == cast(ubyte)'T'); 479 auto r3 = r2.front; 480 481 assert(r3.headers.length == 0); 482 put(buff,r3.content); 483 assert(buff.data == "This is implicitly typed plain US-ASCII text.\r\n" 484 "It does NOT end with a linebreak."); 485 assert(r3.content.empty); 486 assert(r1.front == cast(ubyte)'\r'); 487 488 buff.clear(); 489 r2.popFront(); 490 r3 = r2.front; 491 assert(r3.headers.length == 1); 492 493 r3.content.copy(buff); 494 495 assert(buff.data == 496 "This is explicitly typed plain US-ASCII text.\r\n" 497 "It DOES end with a linebreak.\r\n"); 498 assert(r3.content.empty); 499 r2.popFront(); 500 assert(r2.empty); 501 assert(r1.empty); 502 503 } 504 505 //---------------------------------------------------------------------------- 506 // Comsumes the front of the range as long as it matches the given prefix 507 // Returns whether or not the entire prefix got matches. If all_or_nothing is 508 // true, then an exception occurs if prefix is nto matched in its entirely. 509 // Designed to work with ranges that cannot be rewound. 510 511 bool skipOverAnyway(R)(ref R r, string prefix, bool all_or_nothing = false) 512 if (isInputRange!R) 513 { 514 if (r.empty || r.front != prefix[0]) 515 return false; 516 517 uint i = 0; 518 do 519 { 520 r.popFront(); 521 ++i; 522 } while (i < prefix.length && !r.empty && r.front == prefix[i]); 523 524 if (i == prefix.length) return true; 525 if (all_or_nothing) throw new Exception("malformed MIME Entity"); 526 return false; 527 } 528 529 unittest 530 { 531 ubyte[] txt = cast(ubyte[]) "acabacbxyz".dup; 532 auto r1 = inputRangeObject(txt); 533 auto buff = appender!(ubyte[])(); 534 535 assert(skipOverAnyway(r1, "aca")); 536 assert(!skipOverAnyway(r1, "baa")); 537 assert(!skipOverAnyway(r1, "xyz")); 538 try { 539 skipOverAnyway(r1,"cbz",true); 540 assert(false); 541 } catch (Exception e) { 542 } 543 r1.copy(buff); 544 assert(cast(char[])(buff.data) == "xyz"); 545 } 546 547 //---------------------------------------------------------------------------- 548 // An alternative to std.algorithm.until that works with non-rewindable input 549 // ranges. 550 551 auto readUntil(R,E)(ref R r, E sentinel) 552 if (isInputRange!R && isInputRange!E && 553 isScalarType!(ElementType!E) && isScalarType!(ElementType!R)) 554 { 555 alias ElementType!R T; 556 557 //---------------------------------------------------- 558 559 final class ReadUntil 560 { 561 //------------------------------------ 562 563 bool empty = false; 564 565 //------------------------------------ 566 567 @property T front() 568 { 569 if (idx < length) return sentinel[idx]; 570 return r.front; 571 } 572 573 //------------------------------------ 574 575 void popFront() 576 { 577 if (!empty) 578 { 579 if (idx < length) 580 { 581 ++idx; 582 if (idx == length) 583 { 584 idx = length = 0; 585 sentinelCheck(); 586 } 587 } 588 else 589 { 590 r.popFront(); 591 sentinelCheck(); 592 } 593 } 594 } 595 596 //------------------------------------ 597 598 void sentinelCheck() 599 { 600 if (r.empty) { empty = true; return; } 601 if (r.front != sentinel[0]) return; 602 603 do 604 { 605 r.popFront(); 606 ++length; 607 if (r.empty) break; 608 } while (length < sentinel.length && r.front == sentinel[length]); 609 610 if (length == sentinel.length) 611 empty = true; 612 } 613 614 //------------------------------------ 615 616 private: 617 size_t length = 0; 618 size_t idx = 0; 619 } 620 621 return new ReadUntil(); 622 } 623 624 //---------------------------------------------------------------------------- 625 626 unittest 627 { 628 ubyte[] txt = cast(ubyte[]) "acabacbxyz".dup; 629 630 auto buff = appender!(ubyte[]); 631 632 auto r1 = inputRangeObject(txt); 633 634 auto u = readUntil(r1,"acb"); 635 u.copy(buff); 636 assert(cast(char[])(buff.data) == "acab"); 637 buff.clear(); 638 r1.copy(buff); 639 assert(cast(char[])(buff.data) == "xyz"); 640 } 641