1 /** 2 * Concepts shared between HTTP clients and servers. 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Stéphan Kochen <stephan@kochen.nl> 12 * Vladimir Panteleev <vladimir@thecybershadow.net> 13 * Simon Arlott 14 */ 15 16 module ae.net.http.common; 17 18 import core.time; 19 20 import std.algorithm; 21 import std.array; 22 import std.string; 23 import std.conv; 24 import std.ascii; 25 import std.exception; 26 import std.datetime; 27 import std.typecons : tuple; 28 29 import ae.net.ietf.headers; 30 import ae.sys.data; 31 import ae.utils.array : amap, afilter, auniq, asort; 32 import ae.utils.text; 33 import ae.utils.time; 34 import zlib = ae.utils.zlib; 35 import gzip = ae.utils.gzip; 36 37 /// Base HTTP message class 38 private abstract class HttpMessage 39 { 40 public: 41 string protocol = "http"; 42 string protocolVersion = "1.0"; 43 Headers headers; 44 Data[] data; 45 SysTime creationTime; 46 47 this() 48 { 49 creationTime = Clock.currTime(); 50 } 51 52 @property Duration age() 53 { 54 return Clock.currTime() - creationTime; 55 } 56 } 57 58 /// HTTP request class 59 class HttpRequest : HttpMessage 60 { 61 public: 62 string method = "GET"; 63 string proxy; 64 65 this() 66 { 67 } 68 69 this(string resource) 70 { 71 this.resource = resource; 72 } 73 74 /// Resource part of URL (everything after the hostname) 75 @property string resource() 76 { 77 return _resource; 78 } 79 80 /// Setting the resource to a full URL will fill in the Host header, as well. 81 @property void resource(string value) 82 { 83 _resource = value; 84 85 // applies to both Client/Server as some clients put a full URL in the GET line instead of using a "Host" header 86 string protocol; 87 if (_resource.asciiStartsWith("http://")) 88 protocol = "http"; 89 else 90 if (_resource.asciiStartsWith("https://")) 91 protocol = "https"; 92 93 if (protocol) 94 { 95 this.protocol = protocol; 96 97 value = value[protocol.length+3..$]; 98 auto pathstart = value.indexOf('/'); 99 if (pathstart == -1) 100 { 101 host = value; 102 _resource = "/"; 103 } 104 else 105 { 106 host = value[0..pathstart]; 107 _resource = value[pathstart..$]; 108 } 109 auto portstart = host().indexOf(':'); 110 if (portstart != -1) 111 { 112 port = to!ushort(host[portstart+1..$]); 113 host = host[0..portstart]; 114 } 115 } 116 } 117 118 /// The hostname, without the port number 119 @property string host() 120 { 121 string _host = headers.get("Host", null); 122 auto colon = _host.lastIndexOf(":"); 123 return colon<0 ? _host : _host[0..colon]; 124 } 125 126 @property void host(string _host) 127 { 128 auto _port = this.port; 129 headers["Host"] = _port==protocolDefaultPort ? _host : _host ~ ":" ~ text(_port); 130 } 131 132 @property ushort protocolDefaultPort() 133 { 134 switch (protocol) 135 { 136 case "http": 137 return 80; 138 case "https": 139 return 443; 140 default: 141 throw new Exception("Unknown protocol: " ~ protocol); 142 } 143 } 144 145 /// Port number, from Host header (defaults to 80) 146 @property ushort port() 147 { 148 if ("Host" in headers) 149 { 150 string _host = headers["Host"]; 151 auto colon = _host.lastIndexOf(":"); 152 return colon<0 ? protocolDefaultPort : to!ushort(_host[colon+1..$]); 153 } 154 else 155 return _port ? _port : protocolDefaultPort; 156 } 157 158 @property void port(ushort _port) 159 { 160 if ("Host" in headers) 161 { 162 if (_port == protocolDefaultPort) 163 headers["Host"] = this.host; 164 else 165 headers["Host"] = this.host ~ ":" ~ text(_port); 166 } 167 else 168 this._port = _port; 169 } 170 171 /// Path part of request (until the ?) 172 @property string path() 173 { 174 auto p = resource.indexOf('?'); 175 if (p >= 0) 176 return resource[0..p]; 177 else 178 return resource; 179 } 180 181 /// Query string part of request (atfer the ?) 182 @property string queryString() 183 { 184 auto p = resource.indexOf('?'); 185 if (p >= 0) 186 return resource[p+1..$]; 187 else 188 return null; 189 } 190 191 /// ditto 192 @property void queryString(string value) 193 { 194 auto p = resource.indexOf('?'); 195 if (p >= 0) 196 resource = resource[0..p]; 197 if (value) 198 resource = resource ~ '?' ~ value; 199 } 200 201 /// AA of query string parameters 202 @property UrlParameters urlParameters() 203 { 204 return decodeUrlParameters(queryString); 205 } 206 207 /// ditto 208 @property void urlParameters(UrlParameters parameters) 209 { 210 queryString = encodeUrlParameters(parameters); 211 } 212 213 /// URL without resource (protocol, host and port). 214 @property string root() 215 { 216 return protocol ~ "://" ~ host ~ (port==protocolDefaultPort ? null : ":" ~ to!string(port)); 217 } 218 219 /// Reconstruct full URL from host, port and resource 220 @property string url() 221 { 222 return root ~ resource; 223 } 224 225 /// Full URL without query parameters or fragment. 226 @property string baseURL() 227 { 228 return root ~ resource.findSplit("?")[0]; 229 } 230 231 @property string proxyHost() 232 { 233 auto portstart = proxy.indexOf(':'); 234 if (portstart != -1) 235 return proxy[0..portstart]; 236 return proxy; 237 } 238 239 @property ushort proxyPort() 240 { 241 auto portstart = proxy.indexOf(':'); 242 if (portstart != -1) 243 return to!ushort(proxy[portstart+1..$]); 244 return 80; 245 } 246 247 /// Parse the first line in a HTTP request ("METHOD /resource HTTP/1.x"). 248 void parseRequestLine(string reqLine) 249 { 250 enforce(reqLine.length > 10, "Request line too short"); 251 auto methodEnd = reqLine.indexOf(' '); 252 enforce(methodEnd > 0, "Malformed request line"); 253 method = reqLine[0 .. methodEnd]; 254 reqLine = reqLine[methodEnd + 1 .. reqLine.length]; 255 256 auto resourceEnd = reqLine.lastIndexOf(' '); 257 enforce(resourceEnd > 0, "Malformed request line"); 258 resource = reqLine[0 .. resourceEnd]; 259 260 string protocol = reqLine[resourceEnd+1..$]; 261 enforce(protocol.startsWith("HTTP/")); 262 protocolVersion = protocol[5..$]; 263 } 264 265 /// Decodes submitted form data, and returns an AA of values. 266 UrlParameters decodePostData() 267 { 268 auto contentType = headers.get("Content-Type", "").decodeTokenHeader; 269 270 switch (contentType.value) 271 { 272 case "application/x-www-form-urlencoded": 273 return decodeUrlParameters(cast(string)data.joinToHeap()); 274 case "multipart/form-data": 275 return decodeMultipart(data.joinData, contentType.properties.get("boundary", null)) 276 .map!(part => tuple(part.headers.get("Content-Disposition", null).decodeTokenHeader.properties.get("name", null), cast(string)part.data.toHeap())) 277 .UrlParameters; 278 case "": 279 throw new Exception("No Content-Type"); 280 default: 281 throw new Exception("Unknown Content-Type: " ~ contentType.value); 282 } 283 } 284 285 /// Get list of hosts as specified in headers (e.g. X-Forwarded-For). 286 /// First item in returned array is the node furthest away. 287 /// Duplicates are removed. 288 /// Specify socket remote address in remoteHost to add it to the list. 289 deprecated("Insecure, use HttpServer.remoteIPHeader") 290 string[] remoteHosts(string remoteHost = null) 291 { 292 return 293 (headers.get("X-Forwarded-For", null).split(",").amap!(std..string.strip)() ~ 294 headers.get("X-Forwarded-Host", null) ~ 295 remoteHost) 296 .afilter!`a && a != "unknown"`() 297 .auniq(); 298 } 299 300 deprecated unittest 301 { 302 auto req = new HttpRequest(); 303 assert(req.remoteHosts() == []); 304 assert(req.remoteHosts("3.3.3.3") == ["3.3.3.3"]); 305 306 req.headers["X-Forwarded-For"] = "1.1.1.1, 2.2.2.2"; 307 req.headers["X-Forwarded-Host"] = "2.2.2.2"; 308 assert(req.remoteHosts("3.3.3.3") == ["1.1.1.1", "2.2.2.2", "3.3.3.3"]); 309 } 310 311 /// Basic cookie parsing 312 string[string] getCookies() 313 { 314 string[string] cookies; 315 foreach (segment; headers.get("Cookie", null).split(";")) 316 { 317 segment = segment.strip(); 318 auto p = segment.indexOf('='); 319 if (p > 0) 320 cookies[segment[0..p]] = segment[p+1..$]; 321 } 322 return cookies; 323 } 324 325 private: 326 string _resource; 327 ushort _port = 0; // used only when no "Host" in headers; otherwise, taken from there 328 } 329 330 /// HTTP response status codes 331 enum HttpStatusCode : ushort 332 { 333 Continue=100, 334 SwitchingProtocols=101, 335 336 OK=200, 337 Created=201, 338 Accepted=202, 339 NonAuthoritativeInformation=203, 340 NoContent=204, 341 ResetContent=205, 342 PartialContent=206, 343 344 MultipleChoices=300, 345 MovedPermanently=301, 346 Found=302, 347 SeeOther=303, 348 NotModified=304, 349 UseProxy=305, 350 //(Unused)=306, 351 TemporaryRedirect=307, 352 353 BadRequest=400, 354 Unauthorized=401, 355 PaymentRequired=402, 356 Forbidden=403, 357 NotFound=404, 358 MethodNotAllowed=405, 359 NotAcceptable=406, 360 ProxyAuthenticationRequired=407, 361 RequestTimeout=408, 362 Conflict=409, 363 Gone=410, 364 LengthRequired=411, 365 PreconditionFailed=412, 366 RequestEntityTooLarge=413, 367 RequestUriTooLong=414, 368 UnsupportedMediaType=415, 369 RequestedRangeNotSatisfiable=416, 370 ExpectationFailed=417, 371 372 InternalServerError=500, 373 NotImplemented=501, 374 BadGateway=502, 375 ServiceUnavailable=503, 376 GatewayTimeout=504, 377 HttpVersionNotSupported=505 378 } 379 380 /// HTTP reply class 381 class HttpResponse : HttpMessage 382 { 383 public: 384 HttpStatusCode status; 385 string statusMessage; 386 387 int compressionLevel = 1; 388 389 static string getStatusMessage(HttpStatusCode code) 390 { 391 switch(code) 392 { 393 case 100: return "Continue"; 394 case 101: return "Switching Protocols"; 395 396 case 200: return "OK"; 397 case 201: return "Created"; 398 case 202: return "Accepted"; 399 case 203: return "Non-Authoritative Information"; 400 case 204: return "No Content"; 401 case 205: return "Reset Content"; 402 case 206: return "Partial Content"; 403 case 300: return "Multiple Choices"; 404 case 301: return "Moved Permanently"; 405 case 302: return "Found"; 406 case 303: return "See Other"; 407 case 304: return "Not Modified"; 408 case 305: return "Use Proxy"; 409 case 306: return "(Unused)"; 410 case 307: return "Temporary Redirect"; 411 412 case 400: return "Bad Request"; 413 case 401: return "Unauthorized"; 414 case 402: return "Payment Required"; 415 case 403: return "Forbidden"; 416 case 404: return "Not Found"; 417 case 405: return "Method Not Allowed"; 418 case 406: return "Not Acceptable"; 419 case 407: return "Proxy Authentication Required"; 420 case 408: return "Request Timeout"; 421 case 409: return "Conflict"; 422 case 410: return "Gone"; 423 case 411: return "Length Required"; 424 case 412: return "Precondition Failed"; 425 case 413: return "Request Entity Too Large"; 426 case 414: return "Request-URI Too Long"; 427 case 415: return "Unsupported Media Type"; 428 case 416: return "Requested Range Not Satisfiable"; 429 case 417: return "Expectation Failed"; 430 431 case 500: return "Internal Server Error"; 432 case 501: return "Not Implemented"; 433 case 502: return "Bad Gateway"; 434 case 503: return "Service Unavailable"; 435 case 504: return "Gateway Timeout"; 436 case 505: return "HTTP Version Not Supported"; 437 default: return null; 438 } 439 } 440 441 /// Set the response status code and message 442 void setStatus(HttpStatusCode code) 443 { 444 status = code; 445 statusMessage = getStatusMessage(code); 446 } 447 448 final void parseStatusLine(string statusLine) 449 { 450 auto versionEnd = statusLine.indexOf(' '); 451 if (versionEnd == -1) 452 throw new Exception("Malformed status line"); 453 protocolVersion = statusLine[0..versionEnd]; 454 statusLine = statusLine[versionEnd+1..statusLine.length]; 455 456 auto statusEnd = statusLine.indexOf(' '); 457 string statusCode; 458 if (statusEnd >= 0) 459 { 460 statusCode = statusLine[0 .. statusEnd]; 461 statusMessage = statusLine[statusEnd+1..statusLine.length]; 462 } 463 else 464 { 465 statusCode = statusLine; 466 statusMessage = null; 467 } 468 status = cast(HttpStatusCode)to!ushort(statusCode); 469 } 470 471 /// If the data is compressed, return the decompressed data 472 // this is not a property on purpose - to avoid using it multiple times as it will unpack the data on every access 473 // TODO: there is no reason for above limitation 474 Data getContent() 475 { 476 if ("Content-Encoding" in headers && headers["Content-Encoding"]=="deflate") 477 return zlib.uncompress(data).joinData(); 478 else 479 if ("Content-Encoding" in headers && headers["Content-Encoding"]=="gzip") 480 return gzip.uncompress(data).joinData(); 481 else 482 return data.joinData(); 483 assert(0); 484 } 485 486 protected void compressWithDeflate() 487 { 488 data = zlib.compress(data, zlib.ZlibOptions(compressionLevel)); 489 } 490 491 protected void compressWithGzip() 492 { 493 data = gzip.compress(data, zlib.ZlibOptions(compressionLevel)); 494 } 495 496 /// Called by the server to compress content, if possible/appropriate 497 final package void optimizeData(ref const Headers requestHeaders) 498 { 499 auto acceptEncoding = requestHeaders.get("Accept-Encoding", null); 500 if (acceptEncoding && "Content-Encoding" !in headers) 501 { 502 auto contentType = headers.get("Content-Type", null); 503 if (contentType.startsWith("text/") 504 || contentType == "application/json" 505 || contentType == "image/vnd.microsoft.icon" 506 || contentType == "image/svg+xml") 507 { 508 auto supported = parseItemList(acceptEncoding) ~ ["*"]; 509 foreach (method; supported) 510 switch (method) 511 { 512 case "deflate": 513 headers["Content-Encoding"] = method; 514 headers.add("Vary", "Accept-Encoding"); 515 compressWithDeflate(); 516 return; 517 case "gzip": 518 headers["Content-Encoding"] = method; 519 headers.add("Vary", "Accept-Encoding"); 520 compressWithGzip(); 521 return; 522 case "*": 523 if("Content-Encoding" in headers) 524 headers.remove("Content-Encoding"); 525 return; 526 default: 527 break; 528 } 529 assert(0); 530 } 531 } 532 } 533 534 /// Called by the server to apply range request. 535 final package void sliceData(ref const Headers requestHeaders) 536 { 537 if (status == HttpStatusCode.OK) 538 { 539 if ("If-Modified-Since" in requestHeaders && 540 "Last-Modified" in headers && 541 headers["Last-Modified"].parseTime!(TimeFormats.RFC2822) <= requestHeaders["If-Modified-Since"].parseTime!(TimeFormats.RFC2822)) 542 { 543 setStatus(HttpStatusCode.NotModified); 544 data = null; 545 return; 546 } 547 548 headers["Accept-Ranges"] = "bytes"; 549 auto prange = "Range" in requestHeaders; 550 if (prange && (*prange).startsWith("bytes=")) 551 { 552 auto ranges = (*prange)[6..$].split(",")[0].split("-").map!(s => s.length ? s.to!size_t : size_t.max)().array(); 553 enforce(ranges.length == 2, "Bad range request"); 554 ranges[1]++; 555 auto datum = DataSetBytes(this.data); 556 if (ranges[1] == size_t.min) // was not specified (size_t.max overflowed into 0) 557 ranges[1] = datum.length; 558 if (ranges[0] >= datum.length || ranges[0] >= ranges[1] || ranges[1] > datum.length) 559 { 560 //writeError(HttpStatusCode.RequestedRangeNotSatisfiable); 561 setStatus(HttpStatusCode.RequestedRangeNotSatisfiable); 562 data = [Data(statusMessage)]; 563 return; 564 } 565 else 566 { 567 setStatus(HttpStatusCode.PartialContent); 568 this.data = datum[ranges[0]..ranges[1]]; 569 headers["Content-Range"] = "bytes %d-%d/%d".format(ranges[0], ranges[0] + this.data.bytes.length - 1, datum.length); 570 } 571 } 572 } 573 } 574 } 575 576 void disableCache(ref Headers headers) 577 { 578 headers["Expires"] = "Mon, 26 Jul 1997 05:00:00 GMT"; // disable IE caching 579 //headers["Last-Modified"] = "" . gmdate( "D, d M Y H:i:s" ) . " GMT"; 580 headers["Cache-Control"] = "no-cache, must-revalidate"; 581 headers["Pragma"] = "no-cache"; 582 } 583 584 void cacheForever(ref Headers headers) 585 { 586 headers["Expires"] = httpTime(Clock.currTime().add!"years"(1)); 587 headers["Cache-Control"] = "public, max-age=31536000"; 588 } 589 590 string httpTime(SysTime time) 591 { 592 // Apache is bad at timezones 593 time.timezone = UTC(); 594 return time.formatTime!(TimeFormats.RFC2822)(); 595 } 596 597 import std.algorithm : sort; 598 599 /// Parses a list in the format of "a, b, c;q=0.5, d" and returns 600 /// an array of items sorted by "q" (["a", "b", "d", "c"]) 601 string[] parseItemList(string s) 602 { 603 static struct Item 604 { 605 float q = 1.0; 606 string str; 607 608 this(string s) 609 { 610 auto params = s.split(";"); 611 if (!params.length) return; 612 str = params[0]; 613 foreach (param; params[1..$]) 614 if (param.startsWith("q=")) 615 q = to!float(param[2..$]); 616 } 617 } 618 619 return s 620 .split(",") 621 .amap!(a => Item(strip(a)))() 622 .asort!`a.q > b.q`() 623 .amap!`a.str`(); 624 } 625 626 unittest 627 { 628 assert(parseItemList("a, b, c;q=0.5, d") == ["a", "b", "d", "c"]); 629 } 630 631 // TODO: optimize / move to HtmlWriter 632 string httpEscape(string str) 633 { 634 string result; 635 foreach(c;str) 636 switch(c) 637 { 638 case '<': 639 result ~= "<"; 640 break; 641 case '>': 642 result ~= ">"; 643 break; 644 case '&': 645 result ~= "&"; 646 break; 647 case '\xDF': // the beta-like symbol 648 result ~= "ß"; 649 break; 650 default: 651 result ~= [c]; 652 } 653 return result; 654 } 655 656 public import ae.net.ietf.url : UrlParameters, encodeUrlParameter, encodeUrlParameters, decodeUrlParameter, decodeUrlParameters; 657 658 struct MultipartPart 659 { 660 Headers headers; 661 Data data; 662 } 663 664 Data encodeMultipart(MultipartPart[] parts, string boundary) 665 { 666 Data data; 667 foreach (ref part; parts) 668 { 669 data ~= "--" ~ boundary ~ "\r\n"; 670 foreach (name, value; part.headers) 671 data ~= name ~ ": " ~ value ~ "\r\n"; 672 data ~= "\r\n"; 673 assert((cast(string)part.data.contents).indexOf(boundary) < 0); 674 data ~= part.data; 675 data ~= "\r\n"; 676 } 677 data ~= "--" ~ boundary ~ "--\r\n"; 678 return data; 679 } 680 681 MultipartPart[] decodeMultipart(Data data, string boundary) 682 { 683 auto s = cast(char[])data.contents; 684 auto term = "\r\n--" ~ boundary ~ "--\r\n"; 685 enforce(s.endsWith(term), "Bad multipart terminator"); 686 s = s[0..$-term.length]; 687 auto delim = "--" ~ boundary ~ "\r\n"; 688 enforce(s.skipOver(delim), "Bad multipart start"); 689 delim = "\r\n" ~ delim; 690 auto parts = s.split(delim); 691 MultipartPart[] result; 692 foreach (part; parts) 693 { 694 auto segs = part.findSplit("\r\n\r\n"); 695 enforce(segs[1], "Can't find headers in multipart part"); 696 MultipartPart p; 697 foreach (line; segs[0].split("\r\n")) 698 { 699 auto hparts = line.findSplit(":"); 700 p.headers[hparts[0].strip.idup] = hparts[2].strip.idup; 701 } 702 p.data = Data(segs[2]); 703 result ~= p; 704 } 705 return result; 706 } 707 708 unittest 709 { 710 auto parts = [ 711 MultipartPart(Headers(["Foo" : "bar"]), Data.init), 712 MultipartPart(Headers(["Baz" : "quux", "Frob" : "xyzzy"]), Data("Content goes here\xFF")), 713 ]; 714 auto boundary = "abcde"; 715 auto parts2 = parts.encodeMultipart(boundary).decodeMultipart(boundary); 716 assert(parts2.length == parts.length); 717 foreach (p; 0..parts.length) 718 { 719 assert(parts[p].headers == parts2[p].headers); 720 assert(parts[p].data.contents == parts2[p].data.contents); 721 } 722 } 723 724 private bool asciiStartsWith(string s, string prefix) 725 { 726 if (s.length < prefix.length) 727 return false; 728 import std.ascii; 729 foreach (i, c; prefix) 730 if (toLower(c) != toLower(s[i])) 731 return false; 732 return true; 733 }