1 /**
2  * Concepts shared between HTTP clients and servers.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Stéphan Kochen <stephan@kochen.nl>
12  *   Vladimir Panteleev <vladimir@thecybershadow.net>
13  *   Simon Arlott
14  */
15 
16 module ae.net.http.common;
17 
18 import core.time;
19 
20 import std.algorithm;
21 import std.array;
22 import std.string;
23 import std.conv;
24 import std.ascii;
25 import std.exception;
26 import std.datetime;
27 import std.typecons : tuple;
28 
29 import ae.net.ietf.headers;
30 import ae.sys.data;
31 import ae.utils.array : amap, afilter, auniq, asort;
32 import ae.utils.text;
33 import ae.utils.time;
34 import zlib = ae.utils.zlib;
35 import gzip = ae.utils.gzip;
36 
37 /// Base HTTP message class
38 private abstract class HttpMessage
39 {
40 public:
41 	string protocol = "http";
42 	string protocolVersion = "1.0";
43 	Headers headers;
44 	Data[] data;
45 	SysTime creationTime;
46 
47 	this()
48 	{
49 		creationTime = Clock.currTime();
50 	}
51 
52 	@property Duration age()
53 	{
54 		return Clock.currTime() - creationTime;
55 	}
56 }
57 
58 /// HTTP request class
59 class HttpRequest : HttpMessage
60 {
61 public:
62 	string method = "GET";
63 	string proxy;
64 
65 	this()
66 	{
67 	}
68 
69 	this(string resource)
70 	{
71 		this.resource = resource;
72 	}
73 
74 	/// Resource part of URL (everything after the hostname)
75 	@property string resource()
76 	{
77 		return _resource;
78 	}
79 
80 	/// Setting the resource to a full URL will fill in the Host header, as well.
81 	@property void resource(string value)
82 	{
83 		_resource = value;
84 
85 		// applies to both Client/Server as some clients put a full URL in the GET line instead of using a "Host" header
86 		string protocol;
87 		if (_resource.asciiStartsWith("http://"))
88 			protocol = "http";
89 		else
90 		if (_resource.asciiStartsWith("https://"))
91 			protocol = "https";
92 
93 		if (protocol)
94 		{
95 			this.protocol = protocol;
96 
97 			value = value[protocol.length+3..$];
98 			auto pathstart = value.indexOf('/');
99 			if (pathstart == -1)
100 			{
101 				host = value;
102 				_resource = "/";
103 			}
104 			else
105 			{
106 				host = value[0..pathstart];
107 				_resource = value[pathstart..$];
108 			}
109 			auto portstart = host().indexOf(':');
110 			if (portstart != -1)
111 			{
112 				port = to!ushort(host[portstart+1..$]);
113 				host = host[0..portstart];
114 			}
115 		}
116 	}
117 
118 	/// The hostname, without the port number
119 	@property string host()
120 	{
121 		string _host = headers.get("Host", null);
122 		auto colon = _host.lastIndexOf(":");
123 		return colon<0 ? _host : _host[0..colon];
124 	}
125 
126 	@property void host(string _host)
127 	{
128 		auto _port = this.port;
129 		headers["Host"] = _port==protocolDefaultPort ? _host : _host ~ ":" ~ text(_port);
130 	}
131 
132 	@property ushort protocolDefaultPort()
133 	{
134 		switch (protocol)
135 		{
136 			case "http":
137 				return 80;
138 			case "https":
139 				return 443;
140 			default:
141 				throw new Exception("Unknown protocol: " ~ protocol);
142 		}
143 	}
144 
145 	/// Port number, from Host header (defaults to 80)
146 	@property ushort port()
147 	{
148 		if ("Host" in headers)
149 		{
150 			string _host = headers["Host"];
151 			auto colon = _host.lastIndexOf(":");
152 			return colon<0 ? protocolDefaultPort : to!ushort(_host[colon+1..$]);
153 		}
154 		else
155 			return _port ? _port : protocolDefaultPort;
156 	}
157 
158 	@property void port(ushort _port)
159 	{
160 		if ("Host" in headers)
161 		{
162 			if (_port == protocolDefaultPort)
163 				headers["Host"] = this.host;
164 			else
165 				headers["Host"] = this.host ~ ":" ~ text(_port);
166 		}
167 		else
168 			this._port = _port;
169 	}
170 
171 	/// Path part of request (until the ?)
172 	@property string path()
173 	{
174 		auto p = resource.indexOf('?');
175 		if (p >= 0)
176 			return resource[0..p];
177 		else
178 			return resource;
179 	}
180 
181 	/// Query string part of request (atfer the ?)
182 	@property string queryString()
183 	{
184 		auto p = resource.indexOf('?');
185 		if (p >= 0)
186 			return resource[p+1..$];
187 		else
188 			return null;
189 	}
190 
191 	/// ditto
192 	@property void queryString(string value)
193 	{
194 		auto p = resource.indexOf('?');
195 		if (p >= 0)
196 			resource = resource[0..p];
197 		if (value)
198 			resource = resource ~ '?' ~ value;
199 	}
200 
201 	/// AA of query string parameters
202 	@property UrlParameters urlParameters()
203 	{
204 		return decodeUrlParameters(queryString);
205 	}
206 
207 	/// ditto
208 	@property void urlParameters(UrlParameters parameters)
209 	{
210 		queryString = encodeUrlParameters(parameters);
211 	}
212 
213 	/// URL without resource (protocol, host and port).
214 	@property string root()
215 	{
216 		return protocol ~ "://" ~ host ~ (port==protocolDefaultPort ? null : ":" ~ to!string(port));
217 	}
218 
219 	/// Reconstruct full URL from host, port and resource
220 	@property string url()
221 	{
222 		return root ~ resource;
223 	}
224 
225 	/// Full URL without query parameters or fragment.
226 	@property string baseURL()
227 	{
228 		return root ~ resource.findSplit("?")[0];
229 	}
230 
231 	@property string proxyHost()
232 	{
233 		auto portstart = proxy.indexOf(':');
234 		if (portstart != -1)
235 			return proxy[0..portstart];
236 		return proxy;
237 	}
238 
239 	@property ushort proxyPort()
240 	{
241 		auto portstart = proxy.indexOf(':');
242 		if (portstart != -1)
243 			return to!ushort(proxy[portstart+1..$]);
244 		return 80;
245 	}
246 
247 	/// Parse the first line in a HTTP request ("METHOD /resource HTTP/1.x").
248 	void parseRequestLine(string reqLine)
249 	{
250 		enforce(reqLine.length > 10, "Request line too short");
251 		auto methodEnd = reqLine.indexOf(' ');
252 		enforce(methodEnd > 0, "Malformed request line");
253 		method = reqLine[0 .. methodEnd];
254 		reqLine = reqLine[methodEnd + 1 .. reqLine.length];
255 
256 		auto resourceEnd = reqLine.lastIndexOf(' ');
257 		enforce(resourceEnd > 0, "Malformed request line");
258 		resource = reqLine[0 .. resourceEnd];
259 
260 		string protocol = reqLine[resourceEnd+1..$];
261 		enforce(protocol.startsWith("HTTP/"));
262 		protocolVersion = protocol[5..$];
263 	}
264 
265 	/// Decodes submitted form data, and returns an AA of values.
266 	UrlParameters decodePostData()
267 	{
268 		auto contentType = headers.get("Content-Type", "").decodeTokenHeader;
269 
270 		switch (contentType.value)
271 		{
272 			case "application/x-www-form-urlencoded":
273 				return decodeUrlParameters(cast(string)data.joinToHeap());
274 			case "multipart/form-data":
275 				return decodeMultipart(data.joinData, contentType.properties.get("boundary", null))
276 					.map!(part => tuple(part.headers.get("Content-Disposition", null).decodeTokenHeader.properties.get("name", null), cast(string)part.data.toHeap()))
277 					.UrlParameters;
278 			case "":
279 				throw new Exception("No Content-Type");
280 			default:
281 				throw new Exception("Unknown Content-Type: " ~ contentType.value);
282 		}
283 	}
284 
285 	/// Get list of hosts as specified in headers (e.g. X-Forwarded-For).
286 	/// First item in returned array is the node furthest away.
287 	/// Duplicates are removed.
288 	/// Specify socket remote address in remoteHost to add it to the list.
289 	deprecated("Insecure, use HttpServer.remoteIPHeader")
290 	string[] remoteHosts(string remoteHost = null)
291 	{
292 		return
293 			(headers.get("X-Forwarded-For", null).split(",").amap!(std..string.strip)() ~
294 			 headers.get("X-Forwarded-Host", null) ~
295 			 remoteHost)
296 			.afilter!`a && a != "unknown"`()
297 			.auniq();
298 	}
299 
300 	deprecated unittest
301 	{
302 		auto req = new HttpRequest();
303 		assert(req.remoteHosts() == []);
304 		assert(req.remoteHosts("3.3.3.3") == ["3.3.3.3"]);
305 
306 		req.headers["X-Forwarded-For"] = "1.1.1.1, 2.2.2.2";
307 		req.headers["X-Forwarded-Host"] = "2.2.2.2";
308 		assert(req.remoteHosts("3.3.3.3") == ["1.1.1.1", "2.2.2.2", "3.3.3.3"]);
309 	}
310 
311 	/// Basic cookie parsing
312 	string[string] getCookies()
313 	{
314 		string[string] cookies;
315 		foreach (segment; headers.get("Cookie", null).split(";"))
316 		{
317 			segment = segment.strip();
318 			auto p = segment.indexOf('=');
319 			if (p > 0)
320 				cookies[segment[0..p]] = segment[p+1..$];
321 		}
322 		return cookies;
323 	}
324 
325 private:
326 	string _resource;
327 	ushort _port = 0; // used only when no "Host" in headers; otherwise, taken from there
328 }
329 
330 /// HTTP response status codes
331 enum HttpStatusCode : ushort
332 {
333 	Continue=100,
334 	SwitchingProtocols=101,
335 
336 	OK=200,
337 	Created=201,
338 	Accepted=202,
339 	NonAuthoritativeInformation=203,
340 	NoContent=204,
341 	ResetContent=205,
342 	PartialContent=206,
343 
344 	MultipleChoices=300,
345 	MovedPermanently=301,
346 	Found=302,
347 	SeeOther=303,
348 	NotModified=304,
349 	UseProxy=305,
350 	//(Unused)=306,
351 	TemporaryRedirect=307,
352 
353 	BadRequest=400,
354 	Unauthorized=401,
355 	PaymentRequired=402,
356 	Forbidden=403,
357 	NotFound=404,
358 	MethodNotAllowed=405,
359 	NotAcceptable=406,
360 	ProxyAuthenticationRequired=407,
361 	RequestTimeout=408,
362 	Conflict=409,
363 	Gone=410,
364 	LengthRequired=411,
365 	PreconditionFailed=412,
366 	RequestEntityTooLarge=413,
367 	RequestUriTooLong=414,
368 	UnsupportedMediaType=415,
369 	RequestedRangeNotSatisfiable=416,
370 	ExpectationFailed=417,
371 
372 	InternalServerError=500,
373 	NotImplemented=501,
374 	BadGateway=502,
375 	ServiceUnavailable=503,
376 	GatewayTimeout=504,
377 	HttpVersionNotSupported=505
378 }
379 
380 /// HTTP reply class
381 class HttpResponse : HttpMessage
382 {
383 public:
384 	HttpStatusCode status;
385 	string statusMessage;
386 
387 	int compressionLevel = 1;
388 
389 	static string getStatusMessage(HttpStatusCode code)
390 	{
391 		switch(code)
392 		{
393 			case 100: return "Continue";
394 			case 101: return "Switching Protocols";
395 
396 			case 200: return "OK";
397 			case 201: return "Created";
398 			case 202: return "Accepted";
399 			case 203: return "Non-Authoritative Information";
400 			case 204: return "No Content";
401 			case 205: return "Reset Content";
402 			case 206: return "Partial Content";
403 			case 300: return "Multiple Choices";
404 			case 301: return "Moved Permanently";
405 			case 302: return "Found";
406 			case 303: return "See Other";
407 			case 304: return "Not Modified";
408 			case 305: return "Use Proxy";
409 			case 306: return "(Unused)";
410 			case 307: return "Temporary Redirect";
411 
412 			case 400: return "Bad Request";
413 			case 401: return "Unauthorized";
414 			case 402: return "Payment Required";
415 			case 403: return "Forbidden";
416 			case 404: return "Not Found";
417 			case 405: return "Method Not Allowed";
418 			case 406: return "Not Acceptable";
419 			case 407: return "Proxy Authentication Required";
420 			case 408: return "Request Timeout";
421 			case 409: return "Conflict";
422 			case 410: return "Gone";
423 			case 411: return "Length Required";
424 			case 412: return "Precondition Failed";
425 			case 413: return "Request Entity Too Large";
426 			case 414: return "Request-URI Too Long";
427 			case 415: return "Unsupported Media Type";
428 			case 416: return "Requested Range Not Satisfiable";
429 			case 417: return "Expectation Failed";
430 
431 			case 500: return "Internal Server Error";
432 			case 501: return "Not Implemented";
433 			case 502: return "Bad Gateway";
434 			case 503: return "Service Unavailable";
435 			case 504: return "Gateway Timeout";
436 			case 505: return "HTTP Version Not Supported";
437 			default: return null;
438 		}
439 	}
440 
441 	/// Set the response status code and message
442 	void setStatus(HttpStatusCode code)
443 	{
444 		status = code;
445 		statusMessage = getStatusMessage(code);
446 	}
447 
448 	final void parseStatusLine(string statusLine)
449 	{
450 		auto versionEnd = statusLine.indexOf(' ');
451 		if (versionEnd == -1)
452 			throw new Exception("Malformed status line");
453 		protocolVersion = statusLine[0..versionEnd];
454 		statusLine = statusLine[versionEnd+1..statusLine.length];
455 
456 		auto statusEnd = statusLine.indexOf(' ');
457 		string statusCode;
458 		if (statusEnd >= 0)
459 		{
460 			statusCode = statusLine[0 .. statusEnd];
461 			statusMessage = statusLine[statusEnd+1..statusLine.length];
462 		}
463 		else
464 		{
465 			statusCode = statusLine;
466 			statusMessage = null;
467 		}
468 		status = cast(HttpStatusCode)to!ushort(statusCode);
469 	}
470 
471 	/// If the data is compressed, return the decompressed data
472 	// this is not a property on purpose - to avoid using it multiple times as it will unpack the data on every access
473 	// TODO: there is no reason for above limitation
474 	Data getContent()
475 	{
476 		if ("Content-Encoding" in headers && headers["Content-Encoding"]=="deflate")
477 			return zlib.uncompress(data).joinData();
478 		else
479 		if ("Content-Encoding" in headers && headers["Content-Encoding"]=="gzip")
480 			return gzip.uncompress(data).joinData();
481 		else
482 			return data.joinData();
483 		assert(0);
484 	}
485 
486 	protected void compressWithDeflate()
487 	{
488 		data = zlib.compress(data, zlib.ZlibOptions(compressionLevel));
489 	}
490 
491 	protected void compressWithGzip()
492 	{
493 		data = gzip.compress(data, zlib.ZlibOptions(compressionLevel));
494 	}
495 
496 	/// Called by the server to compress content, if possible/appropriate
497 	final package void optimizeData(ref const Headers requestHeaders)
498 	{
499 		auto acceptEncoding = requestHeaders.get("Accept-Encoding", null);
500 		if (acceptEncoding && "Content-Encoding" !in headers)
501 		{
502 			auto contentType = headers.get("Content-Type", null);
503 			if (contentType.startsWith("text/")
504 			 || contentType == "application/json"
505 			 || contentType == "image/vnd.microsoft.icon"
506 			 || contentType == "image/svg+xml")
507 			{
508 				auto supported = parseItemList(acceptEncoding) ~ ["*"];
509 				foreach (method; supported)
510 					switch (method)
511 					{
512 						case "deflate":
513 							headers["Content-Encoding"] = method;
514 							headers.add("Vary", "Accept-Encoding");
515 							compressWithDeflate();
516 							return;
517 						case "gzip":
518 							headers["Content-Encoding"] = method;
519 							headers.add("Vary", "Accept-Encoding");
520 							compressWithGzip();
521 							return;
522 						case "*":
523 							if("Content-Encoding" in headers)
524 								headers.remove("Content-Encoding");
525 							return;
526 						default:
527 							break;
528 					}
529 				assert(0);
530 			}
531 		}
532 	}
533 
534 	/// Called by the server to apply range request.
535 	final package void sliceData(ref const Headers requestHeaders)
536 	{
537 		if (status == HttpStatusCode.OK)
538 		{
539 			if ("If-Modified-Since" in requestHeaders &&
540 				"Last-Modified" in headers &&
541 				headers["Last-Modified"].parseTime!(TimeFormats.RFC2822) <= requestHeaders["If-Modified-Since"].parseTime!(TimeFormats.RFC2822))
542 			{
543 				setStatus(HttpStatusCode.NotModified);
544 				data = null;
545 				return;
546 			}
547 
548 			headers["Accept-Ranges"] = "bytes";
549 			auto prange = "Range" in requestHeaders;
550 			if (prange && (*prange).startsWith("bytes="))
551 			{
552 				auto ranges = (*prange)[6..$].split(",")[0].split("-").map!(s => s.length ? s.to!size_t : size_t.max)().array();
553 				enforce(ranges.length == 2, "Bad range request");
554 				ranges[1]++;
555 				auto datum = DataSetBytes(this.data);
556 				if (ranges[1] == size_t.min) // was not specified (size_t.max overflowed into 0)
557 					ranges[1] = datum.length;
558 				if (ranges[0] >= datum.length || ranges[0] >= ranges[1] || ranges[1] > datum.length)
559 				{
560 					//writeError(HttpStatusCode.RequestedRangeNotSatisfiable);
561 					setStatus(HttpStatusCode.RequestedRangeNotSatisfiable);
562 					data = [Data(statusMessage)];
563 					return;
564 				}
565 				else
566 				{
567 					setStatus(HttpStatusCode.PartialContent);
568 					this.data = datum[ranges[0]..ranges[1]];
569 					headers["Content-Range"] = "bytes %d-%d/%d".format(ranges[0], ranges[0] + this.data.bytes.length - 1, datum.length);
570 				}
571 			}
572 		}
573 	}
574 }
575 
576 void disableCache(ref Headers headers)
577 {
578 	headers["Expires"] = "Mon, 26 Jul 1997 05:00:00 GMT";  // disable IE caching
579 	//headers["Last-Modified"] = "" . gmdate( "D, d M Y H:i:s" ) . " GMT";
580 	headers["Cache-Control"] = "no-cache, must-revalidate";
581 	headers["Pragma"] = "no-cache";
582 }
583 
584 void cacheForever(ref Headers headers)
585 {
586 	headers["Expires"] = httpTime(Clock.currTime().add!"years"(1));
587 	headers["Cache-Control"] = "public, max-age=31536000";
588 }
589 
590 string httpTime(SysTime time)
591 {
592 	// Apache is bad at timezones
593 	time.timezone = UTC();
594 	return time.formatTime!(TimeFormats.RFC2822)();
595 }
596 
597 import std.algorithm : sort;
598 
599 /// Parses a list in the format of "a, b, c;q=0.5, d" and returns
600 /// an array of items sorted by "q" (["a", "b", "d", "c"])
601 string[] parseItemList(string s)
602 {
603 	static struct Item
604 	{
605 		float q = 1.0;
606 		string str;
607 
608 		this(string s)
609 		{
610 			auto params = s.split(";");
611 			if (!params.length) return;
612 			str = params[0];
613 			foreach (param; params[1..$])
614 				if (param.startsWith("q="))
615 					q = to!float(param[2..$]);
616 		}
617 	}
618 
619 	return s
620 		.split(",")
621 		.amap!(a => Item(strip(a)))()
622 		.asort!`a.q > b.q`()
623 		.amap!`a.str`();
624 }
625 
626 unittest
627 {
628 	assert(parseItemList("a, b, c;q=0.5, d") == ["a", "b", "d", "c"]);
629 }
630 
631 // TODO: optimize / move to HtmlWriter
632 string httpEscape(string str)
633 {
634 	string result;
635 	foreach(c;str)
636 		switch(c)
637 		{
638 			case '<':
639 				result ~= "&lt;";
640 				break;
641 			case '>':
642 				result ~= "&gt;";
643 				break;
644 			case '&':
645 				result ~= "&amp;";
646 				break;
647 			case '\xDF':  // the beta-like symbol
648 				result ~= "&szlig;";
649 				break;
650 			default:
651 				result ~= [c];
652 		}
653 	return result;
654 }
655 
656 public import ae.net.ietf.url : UrlParameters, encodeUrlParameter, encodeUrlParameters, decodeUrlParameter, decodeUrlParameters;
657 
658 struct MultipartPart
659 {
660 	Headers headers;
661 	Data data;
662 }
663 
664 Data encodeMultipart(MultipartPart[] parts, string boundary)
665 {
666 	Data data;
667 	foreach (ref part; parts)
668 	{
669 		data ~= "--" ~ boundary ~ "\r\n";
670 		foreach (name, value; part.headers)
671 			data ~= name ~ ": " ~ value ~ "\r\n";
672 		data ~= "\r\n";
673 		assert((cast(string)part.data.contents).indexOf(boundary) < 0);
674 		data ~= part.data;
675 		data ~= "\r\n";
676 	}
677 	data ~= "--" ~ boundary ~ "--\r\n";
678 	return data;
679 }
680 
681 MultipartPart[] decodeMultipart(Data data, string boundary)
682 {
683 	auto s = cast(char[])data.contents;
684 	auto term = "\r\n--" ~ boundary ~ "--\r\n";
685 	enforce(s.endsWith(term), "Bad multipart terminator");
686 	s = s[0..$-term.length];
687 	auto delim = "--" ~ boundary ~ "\r\n";
688 	enforce(s.skipOver(delim), "Bad multipart start");
689 	delim = "\r\n" ~ delim;
690 	auto parts = s.split(delim);
691 	MultipartPart[] result;
692 	foreach (part; parts)
693 	{
694 		auto segs = part.findSplit("\r\n\r\n");
695 		enforce(segs[1], "Can't find headers in multipart part");
696 		MultipartPart p;
697 		foreach (line; segs[0].split("\r\n"))
698 		{
699 			auto hparts = line.findSplit(":");
700 			p.headers[hparts[0].strip.idup] = hparts[2].strip.idup;
701 		}
702 		p.data = Data(segs[2]);
703 		result ~= p;
704 	}
705 	return result;
706 }
707 
708 unittest
709 {
710 	auto parts = [
711 		MultipartPart(Headers(["Foo" : "bar"]), Data.init),
712 		MultipartPart(Headers(["Baz" : "quux", "Frob" : "xyzzy"]), Data("Content goes here\xFF")),
713 	];
714 	auto boundary = "abcde";
715 	auto parts2 = parts.encodeMultipart(boundary).decodeMultipart(boundary);
716 	assert(parts2.length == parts.length);
717 	foreach (p; 0..parts.length)
718 	{
719 		assert(parts[p].headers == parts2[p].headers);
720 		assert(parts[p].data.contents == parts2[p].data.contents);
721 	}
722 }
723 
724 private bool asciiStartsWith(string s, string prefix)
725 {
726 	if (s.length < prefix.length)
727 		return false;
728 	import std.ascii;
729 	foreach (i, c; prefix)
730 		if (toLower(c) != toLower(s[i]))
731 			return false;
732 	return true;
733 }