1 /**
2  * Concepts shared between HTTP clients and servers.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Stéphan Kochen <stephan@kochen.nl>
12  *   Vladimir Panteleev <vladimir@thecybershadow.net>
13  *   Simon Arlott
14  */
15 
16 module ae.net.http.common;
17 
18 import core.time;
19 
20 import std.algorithm;
21 import std.array;
22 import std.string;
23 import std.conv;
24 import std.ascii;
25 import std.exception;
26 import std.datetime;
27 import std.typecons : tuple;
28 
29 import ae.net.ietf.headers;
30 import ae.sys.data;
31 import ae.utils.array : amap, afilter, auniq, asort;
32 import ae.utils.text;
33 import ae.utils.time;
34 import zlib = ae.utils.zlib;
35 import gzip = ae.utils.gzip;
36 
37 /// Base HTTP message class
38 private abstract class HttpMessage
39 {
40 public:
41 	string protocol = "http";
42 	string protocolVersion = "1.0";
43 	Headers headers;
44 	Data[] data;
45 	SysTime creationTime;
46 
47 	this()
48 	{
49 		creationTime = Clock.currTime();
50 	}
51 
52 	@property Duration age()
53 	{
54 		return Clock.currTime() - creationTime;
55 	}
56 }
57 
58 /// HTTP request class
59 class HttpRequest : HttpMessage
60 {
61 public:
62 	string method = "GET";
63 	string proxy;
64 
65 	this()
66 	{
67 	}
68 
69 	this(string resource)
70 	{
71 		this.resource = resource;
72 	}
73 
74 	/// Resource part of URL (everything after the hostname)
75 	@property string resource()
76 	{
77 		return _resource;
78 	}
79 
80 	/// Setting the resource to a full URL will fill in the Host header, as well.
81 	@property void resource(string value)
82 	{
83 		_resource = value;
84 
85 		// applies to both Client/Server as some clients put a full URL in the GET line instead of using a "Host" header
86 		string protocol;
87 		if (_resource.asciiStartsWith("http://"))
88 			protocol = "http";
89 		else
90 		if (_resource.asciiStartsWith("https://"))
91 			protocol = "https";
92 
93 		if (protocol)
94 		{
95 			this.protocol = protocol;
96 
97 			value = value[protocol.length+3..$];
98 			auto pathstart = value.indexOf('/');
99 			if (pathstart == -1)
100 			{
101 				host = value;
102 				_resource = "/";
103 			}
104 			else
105 			{
106 				host = value[0..pathstart];
107 				_resource = value[pathstart..$];
108 			}
109 			auto portstart = host().indexOf(':');
110 			if (portstart != -1)
111 			{
112 				port = to!ushort(host[portstart+1..$]);
113 				host = host[0..portstart];
114 			}
115 		}
116 	}
117 
118 	/// The hostname, without the port number
119 	@property string host()
120 	{
121 		string _host = headers.get("Host", null);
122 		auto colon = _host.lastIndexOf(":");
123 		return colon<0 ? _host : _host[0..colon];
124 	}
125 
126 	@property void host(string _host)
127 	{
128 		auto _port = this.port;
129 		headers["Host"] = _port==protocolDefaultPort ? _host : _host ~ ":" ~ text(_port);
130 	}
131 
132 	@property ushort protocolDefaultPort()
133 	{
134 		switch (protocol)
135 		{
136 			case "http":
137 				return 80;
138 			case "https":
139 				return 443;
140 			default:
141 				throw new Exception("Unknown protocol: " ~ protocol);
142 		}
143 	}
144 
145 	/// Port number, from Host header (defaults to 80)
146 	@property ushort port()
147 	{
148 		if ("Host" in headers)
149 		{
150 			string _host = headers["Host"];
151 			auto colon = _host.lastIndexOf(":");
152 			return colon<0 ? protocolDefaultPort : to!ushort(_host[colon+1..$]);
153 		}
154 		else
155 			return _port ? _port : protocolDefaultPort;
156 	}
157 
158 	@property void port(ushort _port)
159 	{
160 		if ("Host" in headers)
161 		{
162 			if (_port == protocolDefaultPort)
163 				headers["Host"] = this.host;
164 			else
165 				headers["Host"] = this.host ~ ":" ~ text(_port);
166 		}
167 		else
168 			this._port = _port;
169 	}
170 
171 	/// Path part of request (until the ?)
172 	@property string path()
173 	{
174 		auto p = resource.indexOf('?');
175 		if (p >= 0)
176 			return resource[0..p];
177 		else
178 			return resource;
179 	}
180 
181 	/// Query string part of request (atfer the ?)
182 	@property string queryString()
183 	{
184 		auto p = resource.indexOf('?');
185 		if (p >= 0)
186 			return resource[p+1..$];
187 		else
188 			return null;
189 	}
190 
191 	/// ditto
192 	@property void queryString(string value)
193 	{
194 		auto p = resource.indexOf('?');
195 		if (p >= 0)
196 			resource = resource[0..p];
197 		resource = resource ~ '?' ~ value;
198 	}
199 
200 	/// AA of query string parameters
201 	@property UrlParameters urlParameters()
202 	{
203 		return decodeUrlParameters(queryString);
204 	}
205 
206 	/// ditto
207 	@property void urlParameters(UrlParameters parameters)
208 	{
209 		queryString = encodeUrlParameters(parameters);
210 	}
211 
212 	/// URL without resource (protocol, host and port).
213 	@property string root()
214 	{
215 		return protocol ~ "://" ~ host ~ (port==protocolDefaultPort ? null : ":" ~ to!string(port));
216 	}
217 
218 	/// Reconstruct full URL from host, port and resource
219 	@property string url()
220 	{
221 		return root ~ resource;
222 	}
223 
224 	/// Full URL without query parameters or fragment.
225 	@property string baseURL()
226 	{
227 		return root ~ resource.findSplit("?")[0];
228 	}
229 
230 	@property string proxyHost()
231 	{
232 		auto portstart = proxy.indexOf(':');
233 		if (portstart != -1)
234 			return proxy[0..portstart];
235 		return proxy;
236 	}
237 
238 	@property ushort proxyPort()
239 	{
240 		auto portstart = proxy.indexOf(':');
241 		if (portstart != -1)
242 			return to!ushort(proxy[portstart+1..$]);
243 		return 80;
244 	}
245 
246 	/// Parse the first line in a HTTP request ("METHOD /resource HTTP/1.x").
247 	void parseRequestLine(string reqLine)
248 	{
249 		enforce(reqLine.length > 10, "Request line too short");
250 		auto methodEnd = reqLine.indexOf(' ');
251 		enforce(methodEnd > 0, "Malformed request line");
252 		method = reqLine[0 .. methodEnd];
253 		reqLine = reqLine[methodEnd + 1 .. reqLine.length];
254 
255 		auto resourceEnd = reqLine.lastIndexOf(' ');
256 		enforce(resourceEnd > 0, "Malformed request line");
257 		resource = reqLine[0 .. resourceEnd];
258 
259 		string protocol = reqLine[resourceEnd+1..$];
260 		enforce(protocol.startsWith("HTTP/"));
261 		protocolVersion = protocol[5..$];
262 	}
263 
264 	/// Decodes submitted form data, and returns an AA of values.
265 	UrlParameters decodePostData()
266 	{
267 		auto contentType = headers.get("Content-Type", "").decodeTokenHeader;
268 
269 		switch (contentType.value)
270 		{
271 			case "application/x-www-form-urlencoded":
272 				return decodeUrlParameters(cast(string)data.joinToHeap());
273 			case "multipart/form-data":
274 				return decodeMultipart(data.joinData, contentType.properties.get("boundary", null))
275 					.map!(part => tuple(part.headers.get("Content-Disposition", null).decodeTokenHeader.properties.get("name", null), cast(string)part.data.toHeap()))
276 					.UrlParameters;
277 			case "":
278 				throw new Exception("No Content-Type");
279 			default:
280 				throw new Exception("Unknown Content-Type: " ~ contentType.value);
281 		}
282 	}
283 
284 	/// Get list of hosts as specified in headers (e.g. X-Forwarded-For).
285 	/// First item in returned array is the node furthest away.
286 	/// Duplicates are removed.
287 	/// Specify socket remote address in remoteHost to add it to the list.
288 	string[] remoteHosts(string remoteHost = null)
289 	{
290 		return
291 			(headers.get("X-Forwarded-For", null).split(",").amap!(std..string.strip)() ~
292 			 headers.get("X-Forwarded-Host", null) ~
293 			 remoteHost)
294 			.afilter!`a && a != "unknown"`()
295 			.auniq();
296 	}
297 
298 	unittest
299 	{
300 		auto req = new HttpRequest();
301 		assert(req.remoteHosts() == []);
302 		assert(req.remoteHosts("3.3.3.3") == ["3.3.3.3"]);
303 
304 		req.headers["X-Forwarded-For"] = "1.1.1.1, 2.2.2.2";
305 		req.headers["X-Forwarded-Host"] = "2.2.2.2";
306 		assert(req.remoteHosts("3.3.3.3") == ["1.1.1.1", "2.2.2.2", "3.3.3.3"]);
307 	}
308 
309 	/// Basic cookie parsing
310 	string[string] getCookies()
311 	{
312 		string[string] cookies;
313 		foreach (segment; headers.get("Cookie", null).split(";"))
314 		{
315 			segment = segment.strip();
316 			auto p = segment.indexOf('=');
317 			if (p > 0)
318 				cookies[segment[0..p]] = segment[p+1..$];
319 		}
320 		return cookies;
321 	}
322 
323 private:
324 	string _resource;
325 	ushort _port = 0; // used only when no "Host" in headers; otherwise, taken from there
326 }
327 
328 /// HTTP response status codes
329 enum HttpStatusCode : ushort
330 {
331 	Continue=100,
332 	SwitchingProtocols=101,
333 
334 	OK=200,
335 	Created=201,
336 	Accepted=202,
337 	NonAuthoritativeInformation=203,
338 	NoContent=204,
339 	ResetContent=205,
340 	PartialContent=206,
341 
342 	MultipleChoices=300,
343 	MovedPermanently=301,
344 	Found=302,
345 	SeeOther=303,
346 	NotModified=304,
347 	UseProxy=305,
348 	//(Unused)=306,
349 	TemporaryRedirect=307,
350 
351 	BadRequest=400,
352 	Unauthorized=401,
353 	PaymentRequired=402,
354 	Forbidden=403,
355 	NotFound=404,
356 	MethodNotAllowed=405,
357 	NotAcceptable=406,
358 	ProxyAuthenticationRequired=407,
359 	RequestTimeout=408,
360 	Conflict=409,
361 	Gone=410,
362 	LengthRequired=411,
363 	PreconditionFailed=412,
364 	RequestEntityTooLarge=413,
365 	RequestUriTooLong=414,
366 	UnsupportedMediaType=415,
367 	RequestedRangeNotSatisfiable=416,
368 	ExpectationFailed=417,
369 
370 	InternalServerError=500,
371 	NotImplemented=501,
372 	BadGateway=502,
373 	ServiceUnavailable=503,
374 	GatewayTimeout=504,
375 	HttpVersionNotSupported=505
376 }
377 
378 /// HTTP reply class
379 class HttpResponse : HttpMessage
380 {
381 public:
382 	HttpStatusCode status;
383 	string statusMessage;
384 
385 	int compressionLevel = 1;
386 
387 	static string getStatusMessage(HttpStatusCode code)
388 	{
389 		switch(code)
390 		{
391 			case 100: return "Continue";
392 			case 101: return "Switching Protocols";
393 
394 			case 200: return "OK";
395 			case 201: return "Created";
396 			case 202: return "Accepted";
397 			case 203: return "Non-Authoritative Information";
398 			case 204: return "No Content";
399 			case 205: return "Reset Content";
400 			case 206: return "Partial Content";
401 			case 300: return "Multiple Choices";
402 			case 301: return "Moved Permanently";
403 			case 302: return "Found";
404 			case 303: return "See Other";
405 			case 304: return "Not Modified";
406 			case 305: return "Use Proxy";
407 			case 306: return "(Unused)";
408 			case 307: return "Temporary Redirect";
409 
410 			case 400: return "Bad Request";
411 			case 401: return "Unauthorized";
412 			case 402: return "Payment Required";
413 			case 403: return "Forbidden";
414 			case 404: return "Not Found";
415 			case 405: return "Method Not Allowed";
416 			case 406: return "Not Acceptable";
417 			case 407: return "Proxy Authentication Required";
418 			case 408: return "Request Timeout";
419 			case 409: return "Conflict";
420 			case 410: return "Gone";
421 			case 411: return "Length Required";
422 			case 412: return "Precondition Failed";
423 			case 413: return "Request Entity Too Large";
424 			case 414: return "Request-URI Too Long";
425 			case 415: return "Unsupported Media Type";
426 			case 416: return "Requested Range Not Satisfiable";
427 			case 417: return "Expectation Failed";
428 
429 			case 500: return "Internal Server Error";
430 			case 501: return "Not Implemented";
431 			case 502: return "Bad Gateway";
432 			case 503: return "Service Unavailable";
433 			case 504: return "Gateway Timeout";
434 			case 505: return "HTTP Version Not Supported";
435 			default: return null;
436 		}
437 	}
438 
439 	/// Set the response status code and message
440 	void setStatus(HttpStatusCode code)
441 	{
442 		status = code;
443 		statusMessage = getStatusMessage(code);
444 	}
445 
446 	final void parseStatusLine(string statusLine)
447 	{
448 		auto versionEnd = statusLine.indexOf(' ');
449 		if (versionEnd == -1)
450 			throw new Exception("Malformed status line");
451 		protocolVersion = statusLine[0..versionEnd];
452 		statusLine = statusLine[versionEnd+1..statusLine.length];
453 
454 		auto statusEnd = statusLine.indexOf(' ');
455 		string statusCode;
456 		if (statusEnd >= 0)
457 		{
458 			statusCode = statusLine[0 .. statusEnd];
459 			statusMessage = statusLine[statusEnd+1..statusLine.length];
460 		}
461 		else
462 		{
463 			statusCode = statusLine;
464 			statusMessage = null;
465 		}
466 		status = cast(HttpStatusCode)to!ushort(statusCode);
467 	}
468 
469 	/// If the data is compressed, return the decompressed data
470 	// this is not a property on purpose - to avoid using it multiple times as it will unpack the data on every access
471 	// TODO: there is no reason for above limitation
472 	Data getContent()
473 	{
474 		if ("Content-Encoding" in headers && headers["Content-Encoding"]=="deflate")
475 			return zlib.uncompress(data).joinData();
476 		else
477 		if ("Content-Encoding" in headers && headers["Content-Encoding"]=="gzip")
478 			return gzip.uncompress(data).joinData();
479 		else
480 			return data.joinData();
481 		assert(0);
482 	}
483 
484 	protected void compressWithDeflate()
485 	{
486 		data = zlib.compress(data, zlib.ZlibOptions(compressionLevel));
487 	}
488 
489 	protected void compressWithGzip()
490 	{
491 		data = gzip.compress(data, zlib.ZlibOptions(compressionLevel));
492 	}
493 
494 	/// Called by the server to compress content, if possible/appropriate
495 	final package void optimizeData(in ref Headers requestHeaders)
496 	{
497 		auto acceptEncoding = requestHeaders.get("Accept-Encoding", null);
498 		if (acceptEncoding && "Content-Encoding" !in headers)
499 		{
500 			auto contentType = headers.get("Content-Type", null);
501 			if (contentType.startsWith("text/")
502 			 || contentType == "application/json"
503 			 || contentType == "image/vnd.microsoft.icon"
504 			 || contentType == "image/svg+xml")
505 			{
506 				auto supported = parseItemList(acceptEncoding) ~ ["*"];
507 				foreach (method; supported)
508 					switch (method)
509 					{
510 						case "deflate":
511 							headers["Content-Encoding"] = method;
512 							headers.add("Vary", "Accept-Encoding");
513 							compressWithDeflate();
514 							return;
515 						case "gzip":
516 							headers["Content-Encoding"] = method;
517 							headers.add("Vary", "Accept-Encoding");
518 							compressWithGzip();
519 							return;
520 						case "*":
521 							if("Content-Encoding" in headers)
522 								headers.remove("Content-Encoding");
523 							return;
524 						default:
525 							break;
526 					}
527 				assert(0);
528 			}
529 		}
530 	}
531 
532 	/// Called by the server to apply range request.
533 	final package void sliceData(in ref Headers requestHeaders)
534 	{
535 		if (status == HttpStatusCode.OK)
536 		{
537 			if ("If-Modified-Since" in requestHeaders &&
538 				"Last-Modified" in headers &&
539 				headers["Last-Modified"].parseTime!(TimeFormats.RFC2822) <= requestHeaders["If-Modified-Since"].parseTime!(TimeFormats.RFC2822))
540 			{
541 				setStatus(HttpStatusCode.NotModified);
542 				data = null;
543 				return;
544 			}
545 
546 			headers["Accept-Ranges"] = "bytes";
547 			auto prange = "Range" in requestHeaders;
548 			if (prange && (*prange).startsWith("bytes="))
549 			{
550 				auto ranges = (*prange)[6..$].split(",")[0].split("-").map!(s => s.length ? s.to!size_t : size_t.max)().array();
551 				enforce(ranges.length == 2, "Bad range request");
552 				ranges[1]++;
553 				auto datum = DataSetBytes(this.data);
554 				if (ranges[1] == size_t.min) // was not specified (size_t.max overflowed into 0)
555 					ranges[1] = datum.length;
556 				if (ranges[0] >= datum.length || ranges[0] >= ranges[1] || ranges[1] > datum.length)
557 				{
558 					//writeError(HttpStatusCode.RequestedRangeNotSatisfiable);
559 					setStatus(HttpStatusCode.RequestedRangeNotSatisfiable);
560 					data = [Data(statusMessage)];
561 					return;
562 				}
563 				else
564 				{
565 					setStatus(HttpStatusCode.PartialContent);
566 					this.data = datum[ranges[0]..ranges[1]];
567 					headers["Content-Range"] = "bytes %d-%d/%d".format(ranges[0], ranges[0] + this.data.bytes.length - 1, datum.length);
568 				}
569 			}
570 		}
571 	}
572 }
573 
574 void disableCache(ref Headers headers)
575 {
576 	headers["Expires"] = "Mon, 26 Jul 1997 05:00:00 GMT";  // disable IE caching
577 	//headers["Last-Modified"] = "" . gmdate( "D, d M Y H:i:s" ) . " GMT";
578 	headers["Cache-Control"] = "no-cache, must-revalidate";
579 	headers["Pragma"] = "no-cache";
580 }
581 
582 void cacheForever(ref Headers headers)
583 {
584 	headers["Expires"] = httpTime(Clock.currTime().add!"years"(1));
585 	headers["Cache-Control"] = "public, max-age=31536000";
586 }
587 
588 string httpTime(SysTime time)
589 {
590 	// Apache is bad at timezones
591 	time.timezone = UTC();
592 	return time.formatTime!(TimeFormats.RFC2822)();
593 }
594 
595 import std.algorithm : sort;
596 
597 /// Parses a list in the format of "a, b, c;q=0.5, d" and returns
598 /// an array of items sorted by "q" (["a", "b", "d", "c"])
599 string[] parseItemList(string s)
600 {
601 	static struct Item
602 	{
603 		float q = 1.0;
604 		string str;
605 
606 		this(string s)
607 		{
608 			auto params = s.split(";");
609 			if (!params.length) return;
610 			str = params[0];
611 			foreach (param; params[1..$])
612 				if (param.startsWith("q="))
613 					q = to!float(param[2..$]);
614 		}
615 	}
616 
617 	return s
618 		.split(",")
619 		.amap!(a => Item(strip(a)))()
620 		.asort!`a.q > b.q`()
621 		.amap!`a.str`();
622 }
623 
624 unittest
625 {
626 	assert(parseItemList("a, b, c;q=0.5, d") == ["a", "b", "d", "c"]);
627 }
628 
629 // TODO: optimize / move to HtmlWriter
630 string httpEscape(string str)
631 {
632 	string result;
633 	foreach(c;str)
634 		switch(c)
635 		{
636 			case '<':
637 				result ~= "&lt;";
638 				break;
639 			case '>':
640 				result ~= "&gt;";
641 				break;
642 			case '&':
643 				result ~= "&amp;";
644 				break;
645 			case '\xDF':  // the beta-like symbol
646 				result ~= "&szlig;";
647 				break;
648 			default:
649 				result ~= [c];
650 		}
651 	return result;
652 }
653 
654 public import ae.net.ietf.url : UrlParameters, encodeUrlParameter, encodeUrlParameters, decodeUrlParameter, decodeUrlParameters;
655 
656 struct MultipartPart
657 {
658 	Headers headers;
659 	Data data;
660 }
661 
662 Data encodeMultipart(MultipartPart[] parts, string boundary)
663 {
664 	Data data;
665 	foreach (ref part; parts)
666 	{
667 		data ~= "--" ~ boundary ~ "\r\n";
668 		foreach (name, value; part.headers)
669 			data ~= name ~ ": " ~ value ~ "\r\n";
670 		data ~= "\r\n";
671 		assert((cast(string)part.data.contents).indexOf(boundary) < 0);
672 		data ~= part.data;
673 		data ~= "\r\n";
674 	}
675 	data ~= "--" ~ boundary ~ "--\r\n";
676 	return data;
677 }
678 
679 MultipartPart[] decodeMultipart(Data data, string boundary)
680 {
681 	auto s = cast(char[])data.contents;
682 	auto term = "\r\n--" ~ boundary ~ "--\r\n";
683 	enforce(s.endsWith(term), "Bad multipart terminator");
684 	s = s[0..$-term.length];
685 	auto delim = "--" ~ boundary ~ "\r\n";
686 	enforce(s.skipOver(delim), "Bad multipart start");
687 	delim = "\r\n" ~ delim;
688 	auto parts = s.split(delim);
689 	MultipartPart[] result;
690 	foreach (part; parts)
691 	{
692 		auto segs = part.findSplit("\r\n\r\n");
693 		enforce(segs[1], "Can't find headers in multipart part");
694 		MultipartPart p;
695 		foreach (line; segs[0].split("\r\n"))
696 		{
697 			auto hparts = line.findSplit(":");
698 			p.headers[hparts[0].strip.idup] = hparts[2].strip.idup;
699 		}
700 		p.data = Data(segs[2]);
701 		result ~= p;
702 	}
703 	return result;
704 }
705 
706 unittest
707 {
708 	auto parts = [
709 		MultipartPart(Headers(["Foo" : "bar"]), Data.init),
710 		MultipartPart(Headers(["Baz" : "quux", "Frob" : "xyzzy"]), Data("Content goes here\xFF")),
711 	];
712 	auto boundary = "abcde";
713 	auto parts2 = parts.encodeMultipart(boundary).decodeMultipart(boundary);
714 	assert(parts2.length == parts.length);
715 	foreach (p; 0..parts.length)
716 	{
717 		assert(parts[p].headers == parts2[p].headers);
718 		assert(parts[p].data.contents == parts2[p].data.contents);
719 	}
720 }
721 
722 private bool asciiStartsWith(string s, string prefix)
723 {
724 	if (s.length < prefix.length)
725 		return false;
726 	import std.ascii;
727 	foreach (i, c; prefix)
728 		if (toLower(c) != toLower(s[i]))
729 			return false;
730 	return true;
731 }