1 /**
2  * Concepts shared between HTTP clients and servers.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Stéphan Kochen <stephan@kochen.nl>
12  *   Vladimir Panteleev <vladimir@thecybershadow.net>
13  *   Simon Arlott
14  */
15 
16 module ae.net.http.common;
17 
18 import core.time;
19 
20 import std.algorithm;
21 import std.array;
22 import std..string;
23 import std.conv;
24 import std.ascii;
25 import std.exception;
26 import std.datetime;
27 import std.typecons : tuple;
28 
29 import ae.net.ietf.headers;
30 import ae.sys.data;
31 import ae.utils.array : amap, afilter, auniq, asort;
32 import ae.utils.text;
33 import ae.utils.time;
34 import zlib = ae.utils.zlib;
35 import gzip = ae.utils.gzip;
36 
37 /// Base HTTP message class
38 private abstract class HttpMessage
39 {
40 public:
41 	string protocol = "http";
42 	string protocolVersion = "1.0";
43 	Headers headers;
44 	Data[] data;
45 	SysTime creationTime;
46 
47 	this()
48 	{
49 		creationTime = Clock.currTime();
50 	}
51 
52 	@property Duration age()
53 	{
54 		return Clock.currTime() - creationTime;
55 	}
56 }
57 
58 /// HTTP request class
59 class HttpRequest : HttpMessage
60 {
61 public:
62 	string method = "GET";
63 	string proxy;
64 
65 	this()
66 	{
67 	}
68 
69 	this(string resource)
70 	{
71 		this.resource = resource;
72 	}
73 
74 	/// Resource part of URL (everything after the hostname)
75 	@property string resource()
76 	{
77 		return _resource;
78 	}
79 
80 	/// Setting the resource to a full URL will fill in the Host header, as well.
81 	@property void resource(string value)
82 	{
83 		_resource = value;
84 
85 		// applies to both Client/Server as some clients put a full URL in the GET line instead of using a "Host" header
86 		string protocol;
87 		if (_resource.asciiStartsWith("http://"))
88 			protocol = "http";
89 		else
90 		if (_resource.asciiStartsWith("https://"))
91 			protocol = "https";
92 
93 		if (protocol)
94 		{
95 			this.protocol = protocol;
96 
97 			value = value[protocol.length+3..$];
98 			auto pathstart = value.indexOf('/');
99 			if (pathstart == -1)
100 			{
101 				host = value;
102 				_resource = "/";
103 			}
104 			else
105 			{
106 				host = value[0..pathstart];
107 				_resource = value[pathstart..$];
108 			}
109 			auto portstart = host().indexOf(':');
110 			if (portstart != -1)
111 			{
112 				port = to!ushort(host[portstart+1..$]);
113 				host = host[0..portstart];
114 			}
115 		}
116 	}
117 
118 	/// The hostname, without the port number
119 	@property string host()
120 	{
121 		string _host = headers.get("Host", null);
122 		auto colon = _host.lastIndexOf(":");
123 		return colon<0 ? _host : _host[0..colon];
124 	}
125 
126 	@property void host(string _host)
127 	{
128 		auto _port = this.port;
129 		headers["Host"] = _port==protocolDefaultPort ? _host : _host ~ ":" ~ text(_port);
130 	}
131 
132 	@property ushort protocolDefaultPort()
133 	{
134 		switch (protocol)
135 		{
136 			case "http":
137 				return 80;
138 			case "https":
139 				return 443;
140 			default:
141 				throw new Exception("Unknown protocol: " ~ protocol);
142 		}
143 	}
144 
145 	/// Port number, from Host header (defaults to 80)
146 	@property ushort port()
147 	{
148 		if ("Host" in headers)
149 		{
150 			string _host = headers["Host"];
151 			auto colon = _host.lastIndexOf(":");
152 			return colon<0 ? protocolDefaultPort : to!ushort(_host[colon+1..$]);
153 		}
154 		else
155 			return _port ? _port : protocolDefaultPort;
156 	}
157 
158 	@property void port(ushort _port)
159 	{
160 		if ("Host" in headers)
161 		{
162 			if (_port == protocolDefaultPort)
163 				headers["Host"] = this.host;
164 			else
165 				headers["Host"] = this.host ~ ":" ~ text(_port);
166 		}
167 		else
168 			this._port = _port;
169 	}
170 
171 	/// Path part of request (until the ?)
172 	@property string path()
173 	{
174 		auto p = resource.indexOf('?');
175 		if (p >= 0)
176 			return resource[0..p];
177 		else
178 			return resource;
179 	}
180 
181 	/// Query string part of request (atfer the ?)
182 	@property string queryString()
183 	{
184 		auto p = resource.indexOf('?');
185 		if (p >= 0)
186 			return resource[p+1..$];
187 		else
188 			return null;
189 	}
190 
191 	/// ditto
192 	@property void queryString(string value)
193 	{
194 		auto p = resource.indexOf('?');
195 		if (p >= 0)
196 			resource = resource[0..p];
197 		if (value)
198 			resource = resource ~ '?' ~ value;
199 	}
200 
201 	/// AA of query string parameters
202 	@property UrlParameters urlParameters()
203 	{
204 		return decodeUrlParameters(queryString);
205 	}
206 
207 	/// ditto
208 	@property void urlParameters(UrlParameters parameters)
209 	{
210 		queryString = encodeUrlParameters(parameters);
211 	}
212 
213 	/// URL without resource (protocol, host and port).
214 	@property string root()
215 	{
216 		return protocol ~ "://" ~ host ~ (port==protocolDefaultPort ? null : ":" ~ to!string(port));
217 	}
218 
219 	/// Reconstruct full URL from host, port and resource
220 	@property string url()
221 	{
222 		return root ~ resource;
223 	}
224 
225 	/// Full URL without query parameters or fragment.
226 	@property string baseURL()
227 	{
228 		return root ~ resource.findSplit("?")[0];
229 	}
230 
231 	@property string proxyHost()
232 	{
233 		auto portstart = proxy.indexOf(':');
234 		if (portstart != -1)
235 			return proxy[0..portstart];
236 		return proxy;
237 	}
238 
239 	@property ushort proxyPort()
240 	{
241 		auto portstart = proxy.indexOf(':');
242 		if (portstart != -1)
243 			return to!ushort(proxy[portstart+1..$]);
244 		return 80;
245 	}
246 
247 	/// Parse the first line in a HTTP request ("METHOD /resource HTTP/1.x").
248 	void parseRequestLine(string reqLine)
249 	{
250 		enforce(reqLine.length > 10, "Request line too short");
251 		auto methodEnd = reqLine.indexOf(' ');
252 		enforce(methodEnd > 0, "Malformed request line");
253 		method = reqLine[0 .. methodEnd];
254 		reqLine = reqLine[methodEnd + 1 .. reqLine.length];
255 
256 		auto resourceEnd = reqLine.lastIndexOf(' ');
257 		enforce(resourceEnd > 0, "Malformed request line");
258 		resource = reqLine[0 .. resourceEnd];
259 
260 		string protocol = reqLine[resourceEnd+1..$];
261 		enforce(protocol.startsWith("HTTP/"));
262 		protocolVersion = protocol[5..$];
263 	}
264 
265 	/// Decodes submitted form data, and returns an AA of values.
266 	UrlParameters decodePostData()
267 	{
268 		auto contentType = headers.get("Content-Type", "").decodeTokenHeader;
269 
270 		switch (contentType.value)
271 		{
272 			case "application/x-www-form-urlencoded":
273 				return decodeUrlParameters(cast(string)data.joinToHeap());
274 			case "multipart/form-data":
275 				return decodeMultipart(data.joinData, contentType.properties.get("boundary", null))
276 					.map!(part => tuple(part.headers.get("Content-Disposition", null).decodeTokenHeader.properties.get("name", null), cast(string)part.data.toHeap()))
277 					.UrlParameters;
278 			case "":
279 				throw new Exception("No Content-Type");
280 			default:
281 				throw new Exception("Unknown Content-Type: " ~ contentType.value);
282 		}
283 	}
284 
285 	/// Get list of hosts as specified in headers (e.g. X-Forwarded-For).
286 	/// First item in returned array is the node furthest away.
287 	/// Duplicates are removed.
288 	/// Specify socket remote address in remoteHost to add it to the list.
289 	string[] remoteHosts(string remoteHost = null)
290 	{
291 		return
292 			(headers.get("X-Forwarded-For", null).split(",").amap!(std..string.strip)() ~
293 			 headers.get("X-Forwarded-Host", null) ~
294 			 remoteHost)
295 			.afilter!`a && a != "unknown"`()
296 			.auniq();
297 	}
298 
299 	unittest
300 	{
301 		auto req = new HttpRequest();
302 		assert(req.remoteHosts() == []);
303 		assert(req.remoteHosts("3.3.3.3") == ["3.3.3.3"]);
304 
305 		req.headers["X-Forwarded-For"] = "1.1.1.1, 2.2.2.2";
306 		req.headers["X-Forwarded-Host"] = "2.2.2.2";
307 		assert(req.remoteHosts("3.3.3.3") == ["1.1.1.1", "2.2.2.2", "3.3.3.3"]);
308 	}
309 
310 	/// Basic cookie parsing
311 	string[string] getCookies()
312 	{
313 		string[string] cookies;
314 		foreach (segment; headers.get("Cookie", null).split(";"))
315 		{
316 			segment = segment.strip();
317 			auto p = segment.indexOf('=');
318 			if (p > 0)
319 				cookies[segment[0..p]] = segment[p+1..$];
320 		}
321 		return cookies;
322 	}
323 
324 private:
325 	string _resource;
326 	ushort _port = 0; // used only when no "Host" in headers; otherwise, taken from there
327 }
328 
329 /// HTTP response status codes
330 enum HttpStatusCode : ushort
331 {
332 	Continue=100,
333 	SwitchingProtocols=101,
334 
335 	OK=200,
336 	Created=201,
337 	Accepted=202,
338 	NonAuthoritativeInformation=203,
339 	NoContent=204,
340 	ResetContent=205,
341 	PartialContent=206,
342 
343 	MultipleChoices=300,
344 	MovedPermanently=301,
345 	Found=302,
346 	SeeOther=303,
347 	NotModified=304,
348 	UseProxy=305,
349 	//(Unused)=306,
350 	TemporaryRedirect=307,
351 
352 	BadRequest=400,
353 	Unauthorized=401,
354 	PaymentRequired=402,
355 	Forbidden=403,
356 	NotFound=404,
357 	MethodNotAllowed=405,
358 	NotAcceptable=406,
359 	ProxyAuthenticationRequired=407,
360 	RequestTimeout=408,
361 	Conflict=409,
362 	Gone=410,
363 	LengthRequired=411,
364 	PreconditionFailed=412,
365 	RequestEntityTooLarge=413,
366 	RequestUriTooLong=414,
367 	UnsupportedMediaType=415,
368 	RequestedRangeNotSatisfiable=416,
369 	ExpectationFailed=417,
370 
371 	InternalServerError=500,
372 	NotImplemented=501,
373 	BadGateway=502,
374 	ServiceUnavailable=503,
375 	GatewayTimeout=504,
376 	HttpVersionNotSupported=505
377 }
378 
379 /// HTTP reply class
380 class HttpResponse : HttpMessage
381 {
382 public:
383 	HttpStatusCode status;
384 	string statusMessage;
385 
386 	int compressionLevel = 1;
387 
388 	static string getStatusMessage(HttpStatusCode code)
389 	{
390 		switch(code)
391 		{
392 			case 100: return "Continue";
393 			case 101: return "Switching Protocols";
394 
395 			case 200: return "OK";
396 			case 201: return "Created";
397 			case 202: return "Accepted";
398 			case 203: return "Non-Authoritative Information";
399 			case 204: return "No Content";
400 			case 205: return "Reset Content";
401 			case 206: return "Partial Content";
402 			case 300: return "Multiple Choices";
403 			case 301: return "Moved Permanently";
404 			case 302: return "Found";
405 			case 303: return "See Other";
406 			case 304: return "Not Modified";
407 			case 305: return "Use Proxy";
408 			case 306: return "(Unused)";
409 			case 307: return "Temporary Redirect";
410 
411 			case 400: return "Bad Request";
412 			case 401: return "Unauthorized";
413 			case 402: return "Payment Required";
414 			case 403: return "Forbidden";
415 			case 404: return "Not Found";
416 			case 405: return "Method Not Allowed";
417 			case 406: return "Not Acceptable";
418 			case 407: return "Proxy Authentication Required";
419 			case 408: return "Request Timeout";
420 			case 409: return "Conflict";
421 			case 410: return "Gone";
422 			case 411: return "Length Required";
423 			case 412: return "Precondition Failed";
424 			case 413: return "Request Entity Too Large";
425 			case 414: return "Request-URI Too Long";
426 			case 415: return "Unsupported Media Type";
427 			case 416: return "Requested Range Not Satisfiable";
428 			case 417: return "Expectation Failed";
429 
430 			case 500: return "Internal Server Error";
431 			case 501: return "Not Implemented";
432 			case 502: return "Bad Gateway";
433 			case 503: return "Service Unavailable";
434 			case 504: return "Gateway Timeout";
435 			case 505: return "HTTP Version Not Supported";
436 			default: return null;
437 		}
438 	}
439 
440 	/// Set the response status code and message
441 	void setStatus(HttpStatusCode code)
442 	{
443 		status = code;
444 		statusMessage = getStatusMessage(code);
445 	}
446 
447 	final void parseStatusLine(string statusLine)
448 	{
449 		auto versionEnd = statusLine.indexOf(' ');
450 		if (versionEnd == -1)
451 			throw new Exception("Malformed status line");
452 		protocolVersion = statusLine[0..versionEnd];
453 		statusLine = statusLine[versionEnd+1..statusLine.length];
454 
455 		auto statusEnd = statusLine.indexOf(' ');
456 		string statusCode;
457 		if (statusEnd >= 0)
458 		{
459 			statusCode = statusLine[0 .. statusEnd];
460 			statusMessage = statusLine[statusEnd+1..statusLine.length];
461 		}
462 		else
463 		{
464 			statusCode = statusLine;
465 			statusMessage = null;
466 		}
467 		status = cast(HttpStatusCode)to!ushort(statusCode);
468 	}
469 
470 	/// If the data is compressed, return the decompressed data
471 	// this is not a property on purpose - to avoid using it multiple times as it will unpack the data on every access
472 	// TODO: there is no reason for above limitation
473 	Data getContent()
474 	{
475 		if ("Content-Encoding" in headers && headers["Content-Encoding"]=="deflate")
476 			return zlib.uncompress(data).joinData();
477 		else
478 		if ("Content-Encoding" in headers && headers["Content-Encoding"]=="gzip")
479 			return gzip.uncompress(data).joinData();
480 		else
481 			return data.joinData();
482 		assert(0);
483 	}
484 
485 	protected void compressWithDeflate()
486 	{
487 		data = zlib.compress(data, zlib.ZlibOptions(compressionLevel));
488 	}
489 
490 	protected void compressWithGzip()
491 	{
492 		data = gzip.compress(data, zlib.ZlibOptions(compressionLevel));
493 	}
494 
495 	/// Called by the server to compress content, if possible/appropriate
496 	final package void optimizeData(ref const Headers requestHeaders)
497 	{
498 		auto acceptEncoding = requestHeaders.get("Accept-Encoding", null);
499 		if (acceptEncoding && "Content-Encoding" !in headers)
500 		{
501 			auto contentType = headers.get("Content-Type", null);
502 			if (contentType.startsWith("text/")
503 			 || contentType == "application/json"
504 			 || contentType == "image/vnd.microsoft.icon"
505 			 || contentType == "image/svg+xml")
506 			{
507 				auto supported = parseItemList(acceptEncoding) ~ ["*"];
508 				foreach (method; supported)
509 					switch (method)
510 					{
511 						case "deflate":
512 							headers["Content-Encoding"] = method;
513 							headers.add("Vary", "Accept-Encoding");
514 							compressWithDeflate();
515 							return;
516 						case "gzip":
517 							headers["Content-Encoding"] = method;
518 							headers.add("Vary", "Accept-Encoding");
519 							compressWithGzip();
520 							return;
521 						case "*":
522 							if("Content-Encoding" in headers)
523 								headers.remove("Content-Encoding");
524 							return;
525 						default:
526 							break;
527 					}
528 				assert(0);
529 			}
530 		}
531 	}
532 
533 	/// Called by the server to apply range request.
534 	final package void sliceData(ref const Headers requestHeaders)
535 	{
536 		if (status == HttpStatusCode.OK)
537 		{
538 			if ("If-Modified-Since" in requestHeaders &&
539 				"Last-Modified" in headers &&
540 				headers["Last-Modified"].parseTime!(TimeFormats.RFC2822) <= requestHeaders["If-Modified-Since"].parseTime!(TimeFormats.RFC2822))
541 			{
542 				setStatus(HttpStatusCode.NotModified);
543 				data = null;
544 				return;
545 			}
546 
547 			headers["Accept-Ranges"] = "bytes";
548 			auto prange = "Range" in requestHeaders;
549 			if (prange && (*prange).startsWith("bytes="))
550 			{
551 				auto ranges = (*prange)[6..$].split(",")[0].split("-").map!(s => s.length ? s.to!size_t : size_t.max)().array();
552 				enforce(ranges.length == 2, "Bad range request");
553 				ranges[1]++;
554 				auto datum = DataSetBytes(this.data);
555 				if (ranges[1] == size_t.min) // was not specified (size_t.max overflowed into 0)
556 					ranges[1] = datum.length;
557 				if (ranges[0] >= datum.length || ranges[0] >= ranges[1] || ranges[1] > datum.length)
558 				{
559 					//writeError(HttpStatusCode.RequestedRangeNotSatisfiable);
560 					setStatus(HttpStatusCode.RequestedRangeNotSatisfiable);
561 					data = [Data(statusMessage)];
562 					return;
563 				}
564 				else
565 				{
566 					setStatus(HttpStatusCode.PartialContent);
567 					this.data = datum[ranges[0]..ranges[1]];
568 					headers["Content-Range"] = "bytes %d-%d/%d".format(ranges[0], ranges[0] + this.data.bytes.length - 1, datum.length);
569 				}
570 			}
571 		}
572 	}
573 }
574 
575 void disableCache(ref Headers headers)
576 {
577 	headers["Expires"] = "Mon, 26 Jul 1997 05:00:00 GMT";  // disable IE caching
578 	//headers["Last-Modified"] = "" . gmdate( "D, d M Y H:i:s" ) . " GMT";
579 	headers["Cache-Control"] = "no-cache, must-revalidate";
580 	headers["Pragma"] = "no-cache";
581 }
582 
583 void cacheForever(ref Headers headers)
584 {
585 	headers["Expires"] = httpTime(Clock.currTime().add!"years"(1));
586 	headers["Cache-Control"] = "public, max-age=31536000";
587 }
588 
589 string httpTime(SysTime time)
590 {
591 	// Apache is bad at timezones
592 	time.timezone = UTC();
593 	return time.formatTime!(TimeFormats.RFC2822)();
594 }
595 
596 import std.algorithm : sort;
597 
598 /// Parses a list in the format of "a, b, c;q=0.5, d" and returns
599 /// an array of items sorted by "q" (["a", "b", "d", "c"])
600 string[] parseItemList(string s)
601 {
602 	static struct Item
603 	{
604 		float q = 1.0;
605 		string str;
606 
607 		this(string s)
608 		{
609 			auto params = s.split(";");
610 			if (!params.length) return;
611 			str = params[0];
612 			foreach (param; params[1..$])
613 				if (param.startsWith("q="))
614 					q = to!float(param[2..$]);
615 		}
616 	}
617 
618 	return s
619 		.split(",")
620 		.amap!(a => Item(strip(a)))()
621 		.asort!`a.q > b.q`()
622 		.amap!`a.str`();
623 }
624 
625 unittest
626 {
627 	assert(parseItemList("a, b, c;q=0.5, d") == ["a", "b", "d", "c"]);
628 }
629 
630 // TODO: optimize / move to HtmlWriter
631 string httpEscape(string str)
632 {
633 	string result;
634 	foreach(c;str)
635 		switch(c)
636 		{
637 			case '<':
638 				result ~= "&lt;";
639 				break;
640 			case '>':
641 				result ~= "&gt;";
642 				break;
643 			case '&':
644 				result ~= "&amp;";
645 				break;
646 			case '\xDF':  // the beta-like symbol
647 				result ~= "&szlig;";
648 				break;
649 			default:
650 				result ~= [c];
651 		}
652 	return result;
653 }
654 
655 public import ae.net.ietf.url : UrlParameters, encodeUrlParameter, encodeUrlParameters, decodeUrlParameter, decodeUrlParameters;
656 
657 struct MultipartPart
658 {
659 	Headers headers;
660 	Data data;
661 }
662 
663 Data encodeMultipart(MultipartPart[] parts, string boundary)
664 {
665 	Data data;
666 	foreach (ref part; parts)
667 	{
668 		data ~= "--" ~ boundary ~ "\r\n";
669 		foreach (name, value; part.headers)
670 			data ~= name ~ ": " ~ value ~ "\r\n";
671 		data ~= "\r\n";
672 		assert((cast(string)part.data.contents).indexOf(boundary) < 0);
673 		data ~= part.data;
674 		data ~= "\r\n";
675 	}
676 	data ~= "--" ~ boundary ~ "--\r\n";
677 	return data;
678 }
679 
680 MultipartPart[] decodeMultipart(Data data, string boundary)
681 {
682 	auto s = cast(char[])data.contents;
683 	auto term = "\r\n--" ~ boundary ~ "--\r\n";
684 	enforce(s.endsWith(term), "Bad multipart terminator");
685 	s = s[0..$-term.length];
686 	auto delim = "--" ~ boundary ~ "\r\n";
687 	enforce(s.skipOver(delim), "Bad multipart start");
688 	delim = "\r\n" ~ delim;
689 	auto parts = s.split(delim);
690 	MultipartPart[] result;
691 	foreach (part; parts)
692 	{
693 		auto segs = part.findSplit("\r\n\r\n");
694 		enforce(segs[1], "Can't find headers in multipart part");
695 		MultipartPart p;
696 		foreach (line; segs[0].split("\r\n"))
697 		{
698 			auto hparts = line.findSplit(":");
699 			p.headers[hparts[0].strip.idup] = hparts[2].strip.idup;
700 		}
701 		p.data = Data(segs[2]);
702 		result ~= p;
703 	}
704 	return result;
705 }
706 
707 unittest
708 {
709 	auto parts = [
710 		MultipartPart(Headers(["Foo" : "bar"]), Data.init),
711 		MultipartPart(Headers(["Baz" : "quux", "Frob" : "xyzzy"]), Data("Content goes here\xFF")),
712 	];
713 	auto boundary = "abcde";
714 	auto parts2 = parts.encodeMultipart(boundary).decodeMultipart(boundary);
715 	assert(parts2.length == parts.length);
716 	foreach (p; 0..parts.length)
717 	{
718 		assert(parts[p].headers == parts2[p].headers);
719 		assert(parts[p].data.contents == parts2[p].data.contents);
720 	}
721 }
722 
723 private bool asciiStartsWith(string s, string prefix)
724 {
725 	if (s.length < prefix.length)
726 		return false;
727 	import std.ascii;
728 	foreach (i, c; prefix)
729 		if (toLower(c) != toLower(s[i]))
730 			return false;
731 	return true;
732 }