1 /**
2  * Concepts shared between HTTP clients and servers.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Stéphan Kochen <stephan@kochen.nl>
12  *   Vladimir Panteleev <vladimir@thecybershadow.net>
13  *   Simon Arlott
14  */
15 
16 module ae.net.http.common;
17 
18 import core.time;
19 
20 import std.algorithm;
21 import std.array;
22 import std.string;
23 import std.conv;
24 import std.ascii;
25 import std.exception;
26 import std.datetime;
27 
28 import ae.net.ietf.headers;
29 import ae.sys.data;
30 import ae.utils.array : amap, afilter, auniq, asort;
31 import ae.utils.text;
32 import ae.utils.time;
33 import zlib = ae.utils.zlib;
34 import gzip = ae.utils.gzip;
35 
36 /// Base HTTP message class
37 private abstract class HttpMessage
38 {
39 public:
40 	string protocol = "http";
41 	string protocolVersion = "1.0";
42 	Headers headers;
43 	Data[] data;
44 	SysTime creationTime;
45 
46 	this()
47 	{
48 		creationTime = Clock.currTime();
49 	}
50 
51 	@property Duration age()
52 	{
53 		return Clock.currTime() - creationTime;
54 	}
55 }
56 
57 /// HTTP request class
58 class HttpRequest : HttpMessage
59 {
60 public:
61 	string method = "GET";
62 	string proxy;
63 
64 	this()
65 	{
66 	}
67 
68 	this(string resource)
69 	{
70 		this.resource = resource;
71 	}
72 
73 	/// Resource part of URL (everything after the hostname)
74 	@property string resource()
75 	{
76 		return _resource;
77 	}
78 
79 	/// Setting the resource to a full URL will fill in the Host header, as well.
80 	@property void resource(string value)
81 	{
82 		_resource = value;
83 
84 		// applies to both Client/Server as some clients put a full URL in the GET line instead of using a "Host" header
85 		string protocol;
86 		if (_resource.asciiStartsWith("http://"))
87 			protocol = "http";
88 		else
89 		if (_resource.asciiStartsWith("https://"))
90 			protocol = "https";
91 
92 		if (protocol)
93 		{
94 			this.protocol = protocol;
95 
96 			value = value[protocol.length+3..$];
97 			auto pathstart = value.indexOf('/');
98 			if (pathstart == -1)
99 			{
100 				host = value;
101 				_resource = "/";
102 			}
103 			else
104 			{
105 				host = value[0..pathstart];
106 				_resource = value[pathstart..$];
107 			}
108 			auto portstart = host().indexOf(':');
109 			if (portstart != -1)
110 			{
111 				port = to!ushort(host[portstart+1..$]);
112 				host = host[0..portstart];
113 			}
114 		}
115 	}
116 
117 	/// The hostname, without the port number
118 	@property string host()
119 	{
120 		string _host = headers.get("Host", null);
121 		auto colon = _host.lastIndexOf(":");
122 		return colon<0 ? _host : _host[0..colon];
123 	}
124 
125 	@property void host(string _host)
126 	{
127 		auto _port = this.port;
128 		headers["Host"] = _port==protocolDefaultPort ? _host : _host ~ ":" ~ text(_port);
129 	}
130 
131 	@property ushort protocolDefaultPort()
132 	{
133 		switch (protocol)
134 		{
135 			case "http":
136 				return 80;
137 			case "https":
138 				return 443;
139 			default:
140 				throw new Exception("Unknown protocol: " ~ protocol);
141 		}
142 	}
143 
144 	/// Port number, from Host header (defaults to 80)
145 	@property ushort port()
146 	{
147 		if ("Host" in headers)
148 		{
149 			string _host = headers["Host"];
150 			auto colon = _host.lastIndexOf(":");
151 			return colon<0 ? protocolDefaultPort : to!ushort(_host[colon+1..$]);
152 		}
153 		else
154 			return _port ? _port : protocolDefaultPort;
155 	}
156 
157 	@property void port(ushort _port)
158 	{
159 		if ("Host" in headers)
160 		{
161 			if (_port == protocolDefaultPort)
162 				headers["Host"] = this.host;
163 			else
164 				headers["Host"] = this.host ~ ":" ~ text(_port);
165 		}
166 		else
167 			this._port = _port;
168 	}
169 
170 	/// Path part of request (until the ?)
171 	@property string path()
172 	{
173 		auto p = resource.indexOf('?');
174 		if (p >= 0)
175 			return resource[0..p];
176 		else
177 			return resource;
178 	}
179 
180 	/// Query string part of request (atfer the ?)
181 	@property string queryString()
182 	{
183 		auto p = resource.indexOf('?');
184 		if (p >= 0)
185 			return resource[p+1..$];
186 		else
187 			return null;
188 	}
189 
190 	/// ditto
191 	@property void queryString(string value)
192 	{
193 		auto p = resource.indexOf('?');
194 		if (p >= 0)
195 			resource = resource[0..p];
196 		resource = resource ~ '?' ~ value;
197 	}
198 
199 	/// AA of query string parameters
200 	@property UrlParameters urlParameters()
201 	{
202 		return decodeUrlParameters(queryString);
203 	}
204 
205 	/// ditto
206 	@property void urlParameters(UrlParameters parameters)
207 	{
208 		queryString = encodeUrlParameters(parameters);
209 	}
210 
211 	/// URL without resource (protocol, host and port).
212 	@property string root()
213 	{
214 		return protocol ~ "://" ~ host ~ (port==protocolDefaultPort ? null : ":" ~ to!string(port));
215 	}
216 
217 	/// Reconstruct full URL from host, port and resource
218 	@property string url()
219 	{
220 		return root ~ resource;
221 	}
222 
223 	/// Full URL without query parameters or fragment.
224 	@property string baseURL()
225 	{
226 		return root ~ resource.findSplit("?")[0];
227 	}
228 
229 	@property string proxyHost()
230 	{
231 		auto portstart = proxy.indexOf(':');
232 		if (portstart != -1)
233 			return proxy[0..portstart];
234 		return proxy;
235 	}
236 
237 	@property ushort proxyPort()
238 	{
239 		auto portstart = proxy.indexOf(':');
240 		if (portstart != -1)
241 			return to!ushort(proxy[portstart+1..$]);
242 		return 80;
243 	}
244 
245 	/// Parse the first line in a HTTP request ("METHOD /resource HTTP/1.x").
246 	void parseRequestLine(string reqLine)
247 	{
248 		enforce(reqLine.length > 10, "Request line too short");
249 		auto methodEnd = reqLine.indexOf(' ');
250 		enforce(methodEnd > 0, "Malformed request line");
251 		method = reqLine[0 .. methodEnd];
252 		reqLine = reqLine[methodEnd + 1 .. reqLine.length];
253 
254 		auto resourceEnd = reqLine.lastIndexOf(' ');
255 		enforce(resourceEnd > 0, "Malformed request line");
256 		resource = reqLine[0 .. resourceEnd];
257 
258 		string protocol = reqLine[resourceEnd+1..$];
259 		enforce(protocol.startsWith("HTTP/"));
260 		protocolVersion = protocol[5..$];
261 	}
262 
263 	/// Decodes submitted form data, and returns an AA of values.
264 	UrlParameters decodePostData()
265 	{
266 		auto data = cast(string)data.joinToHeap();
267 		if (data.length is 0)
268 			return UrlParameters(null);
269 
270 		string contentType = headers.get("Content-Type", "");
271 
272 		switch (contentType.findSplit(";")[0])
273 		{
274 			case "application/x-www-form-urlencoded":
275 				return decodeUrlParameters(data);
276 			case "":
277 				throw new Exception("No Content-Type");
278 			default:
279 				throw new Exception("Unknown Content-Type: " ~ contentType);
280 		}
281 	}
282 
283 	/// Get list of hosts as specified in headers (e.g. X-Forwarded-For).
284 	/// First item in returned array is the node furthest away.
285 	/// Duplicates are removed.
286 	/// Specify socket remote address in remoteHost to add it to the list.
287 	string[] remoteHosts(string remoteHost = null)
288 	{
289 		return
290 			(headers.get("X-Forwarded-For", null).split(",").amap!(std..string.strip)() ~
291 			 headers.get("X-Forwarded-Host", null) ~
292 			 remoteHost)
293 			.afilter!`a && a != "unknown"`()
294 			.auniq();
295 	}
296 
297 	unittest
298 	{
299 		auto req = new HttpRequest();
300 		assert(req.remoteHosts() == []);
301 		assert(req.remoteHosts("3.3.3.3") == ["3.3.3.3"]);
302 
303 		req.headers["X-Forwarded-For"] = "1.1.1.1, 2.2.2.2";
304 		req.headers["X-Forwarded-Host"] = "2.2.2.2";
305 		assert(req.remoteHosts("3.3.3.3") == ["1.1.1.1", "2.2.2.2", "3.3.3.3"]);
306 	}
307 
308 	/// Basic cookie parsing
309 	string[string] getCookies()
310 	{
311 		string[string] cookies;
312 		foreach (segment; headers.get("Cookie", null).split(";"))
313 		{
314 			segment = segment.strip();
315 			auto p = segment.indexOf('=');
316 			if (p > 0)
317 				cookies[segment[0..p]] = segment[p+1..$];
318 		}
319 		return cookies;
320 	}
321 
322 private:
323 	string _resource;
324 	ushort _port = 0; // used only when no "Host" in headers; otherwise, taken from there
325 }
326 
327 /// HTTP response status codes
328 enum HttpStatusCode : ushort
329 {
330 	Continue=100,
331 	SwitchingProtocols=101,
332 
333 	OK=200,
334 	Created=201,
335 	Accepted=202,
336 	NonAuthoritativeInformation=203,
337 	NoContent=204,
338 	ResetContent=205,
339 	PartialContent=206,
340 
341 	MultipleChoices=300,
342 	MovedPermanently=301,
343 	Found=302,
344 	SeeOther=303,
345 	NotModified=304,
346 	UseProxy=305,
347 	//(Unused)=306,
348 	TemporaryRedirect=307,
349 
350 	BadRequest=400,
351 	Unauthorized=401,
352 	PaymentRequired=402,
353 	Forbidden=403,
354 	NotFound=404,
355 	MethodNotAllowed=405,
356 	NotAcceptable=406,
357 	ProxyAuthenticationRequired=407,
358 	RequestTimeout=408,
359 	Conflict=409,
360 	Gone=410,
361 	LengthRequired=411,
362 	PreconditionFailed=412,
363 	RequestEntityTooLarge=413,
364 	RequestUriTooLong=414,
365 	UnsupportedMediaType=415,
366 	RequestedRangeNotSatisfiable=416,
367 	ExpectationFailed=417,
368 
369 	InternalServerError=500,
370 	NotImplemented=501,
371 	BadGateway=502,
372 	ServiceUnavailable=503,
373 	GatewayTimeout=504,
374 	HttpVersionNotSupported=505
375 }
376 
377 /// HTTP reply class
378 class HttpResponse : HttpMessage
379 {
380 public:
381 	HttpStatusCode status;
382 	string statusMessage;
383 
384 	int compressionLevel = 1;
385 
386 	static string getStatusMessage(HttpStatusCode code)
387 	{
388 		switch(code)
389 		{
390 			case 100: return "Continue";
391 			case 101: return "Switching Protocols";
392 
393 			case 200: return "OK";
394 			case 201: return "Created";
395 			case 202: return "Accepted";
396 			case 203: return "Non-Authoritative Information";
397 			case 204: return "No Content";
398 			case 205: return "Reset Content";
399 			case 206: return "Partial Content";
400 			case 300: return "Multiple Choices";
401 			case 301: return "Moved Permanently";
402 			case 302: return "Found";
403 			case 303: return "See Other";
404 			case 304: return "Not Modified";
405 			case 305: return "Use Proxy";
406 			case 306: return "(Unused)";
407 			case 307: return "Temporary Redirect";
408 
409 			case 400: return "Bad Request";
410 			case 401: return "Unauthorized";
411 			case 402: return "Payment Required";
412 			case 403: return "Forbidden";
413 			case 404: return "Not Found";
414 			case 405: return "Method Not Allowed";
415 			case 406: return "Not Acceptable";
416 			case 407: return "Proxy Authentication Required";
417 			case 408: return "Request Timeout";
418 			case 409: return "Conflict";
419 			case 410: return "Gone";
420 			case 411: return "Length Required";
421 			case 412: return "Precondition Failed";
422 			case 413: return "Request Entity Too Large";
423 			case 414: return "Request-URI Too Long";
424 			case 415: return "Unsupported Media Type";
425 			case 416: return "Requested Range Not Satisfiable";
426 			case 417: return "Expectation Failed";
427 
428 			case 500: return "Internal Server Error";
429 			case 501: return "Not Implemented";
430 			case 502: return "Bad Gateway";
431 			case 503: return "Service Unavailable";
432 			case 504: return "Gateway Timeout";
433 			case 505: return "HTTP Version Not Supported";
434 			default: return null;
435 		}
436 	}
437 
438 	/// Set the response status code and message
439 	void setStatus(HttpStatusCode code)
440 	{
441 		status = code;
442 		statusMessage = getStatusMessage(code);
443 	}
444 
445 	final void parseStatusLine(string statusLine)
446 	{
447 		auto versionEnd = statusLine.indexOf(' ');
448 		if (versionEnd == -1)
449 			throw new Exception("Malformed status line");
450 		protocolVersion = statusLine[0..versionEnd];
451 		statusLine = statusLine[versionEnd+1..statusLine.length];
452 
453 		auto statusEnd = statusLine.indexOf(' ');
454 		string statusCode;
455 		if (statusEnd >= 0)
456 		{
457 			statusCode = statusLine[0 .. statusEnd];
458 			statusMessage = statusLine[statusEnd+1..statusLine.length];
459 		}
460 		else
461 		{
462 			statusCode = statusLine;
463 			statusMessage = null;
464 		}
465 		status = cast(HttpStatusCode)to!ushort(statusCode);
466 	}
467 
468 	/// If the data is compressed, return the decompressed data
469 	// this is not a property on purpose - to avoid using it multiple times as it will unpack the data on every access
470 	// TODO: there is no reason for above limitation
471 	Data getContent()
472 	{
473 		if ("Content-Encoding" in headers && headers["Content-Encoding"]=="deflate")
474 			return zlib.uncompress(data).joinData();
475 		else
476 		if ("Content-Encoding" in headers && headers["Content-Encoding"]=="gzip")
477 			return gzip.uncompress(data).joinData();
478 		else
479 			return data.joinData();
480 		assert(0);
481 	}
482 
483 	protected void compressWithDeflate()
484 	{
485 		data = zlib.compress(data, zlib.ZlibOptions(compressionLevel));
486 	}
487 
488 	protected void compressWithGzip()
489 	{
490 		data = gzip.compress(data, zlib.ZlibOptions(compressionLevel));
491 	}
492 
493 	/// Called by the server to compress content, if possible/appropriate
494 	final package void optimizeData(in ref Headers requestHeaders)
495 	{
496 		auto acceptEncoding = requestHeaders.get("Accept-Encoding", null);
497 		if (acceptEncoding && "Content-Encoding" !in headers)
498 		{
499 			auto contentType = headers.get("Content-Type", null);
500 			if (contentType.startsWith("text/")
501 			 || contentType == "application/json"
502 			 || contentType == "image/vnd.microsoft.icon"
503 			 || contentType == "image/svg+xml")
504 			{
505 				auto supported = parseItemList(acceptEncoding) ~ ["*"];
506 				foreach (method; supported)
507 					switch (method)
508 					{
509 						case "deflate":
510 							headers["Content-Encoding"] = method;
511 							headers.add("Vary", "Accept-Encoding");
512 							compressWithDeflate();
513 							return;
514 						case "gzip":
515 							headers["Content-Encoding"] = method;
516 							headers.add("Vary", "Accept-Encoding");
517 							compressWithGzip();
518 							return;
519 						case "*":
520 							if("Content-Encoding" in headers)
521 								headers.remove("Content-Encoding");
522 							return;
523 						default:
524 							break;
525 					}
526 				assert(0);
527 			}
528 		}
529 	}
530 
531 	/// Called by the server to apply range request.
532 	final package void sliceData(in ref Headers requestHeaders)
533 	{
534 		if (status == HttpStatusCode.OK)
535 		{
536 			if ("If-Modified-Since" in requestHeaders &&
537 				"Last-Modified" in headers &&
538 				headers["Last-Modified"].parseTime!(TimeFormats.RFC2822) <= requestHeaders["If-Modified-Since"].parseTime!(TimeFormats.RFC2822))
539 			{
540 				setStatus(HttpStatusCode.NotModified);
541 				data = null;
542 				return;
543 			}
544 
545 			headers["Accept-Ranges"] = "bytes";
546 			auto prange = "Range" in requestHeaders;
547 			if (prange && (*prange).startsWith("bytes="))
548 			{
549 				auto ranges = (*prange)[6..$].split(",")[0].split("-").map!(s => s.length ? s.to!size_t : size_t.max)().array();
550 				enforce(ranges.length == 2, "Bad range request");
551 				ranges[1]++;
552 				auto datum = DataSetBytes(this.data);
553 				if (ranges[1] == size_t.min) // was not specified (size_t.max overflowed into 0)
554 					ranges[1] = datum.length;
555 				if (ranges[0] >= datum.length || ranges[0] >= ranges[1] || ranges[1] > datum.length)
556 				{
557 					//writeError(HttpStatusCode.RequestedRangeNotSatisfiable);
558 					setStatus(HttpStatusCode.RequestedRangeNotSatisfiable);
559 					data = [Data(statusMessage)];
560 					return;
561 				}
562 				else
563 				{
564 					setStatus(HttpStatusCode.PartialContent);
565 					this.data = datum[ranges[0]..ranges[1]];
566 					headers["Content-Range"] = "bytes %d-%d/%d".format(ranges[0], ranges[0] + this.data.bytes.length - 1, datum.length);
567 				}
568 			}
569 		}
570 	}
571 }
572 
573 void disableCache(ref Headers headers)
574 {
575 	headers["Expires"] = "Mon, 26 Jul 1997 05:00:00 GMT";  // disable IE caching
576 	//headers["Last-Modified"] = "" . gmdate( "D, d M Y H:i:s" ) . " GMT";
577 	headers["Cache-Control"] = "no-cache, must-revalidate";
578 	headers["Pragma"] = "no-cache";
579 }
580 
581 void cacheForever(ref Headers headers)
582 {
583 	headers["Expires"] = httpTime(Clock.currTime().add!"years"(1));
584 	headers["Cache-Control"] = "public, max-age=31536000";
585 }
586 
587 string httpTime(SysTime time)
588 {
589 	// Apache is bad at timezones
590 	time.timezone = UTC();
591 	return time.formatTime!(TimeFormats.RFC2822)();
592 }
593 
594 import std.algorithm : sort;
595 
596 /// Parses a list in the format of "a, b, c;q=0.5, d" and returns
597 /// an array of items sorted by "q" (["a", "b", "d", "c"])
598 string[] parseItemList(string s)
599 {
600 	static struct Item
601 	{
602 		float q = 1.0;
603 		string str;
604 
605 		this(string s)
606 		{
607 			auto params = s.split(";");
608 			if (!params.length) return;
609 			str = params[0];
610 			foreach (param; params[1..$])
611 				if (param.startsWith("q="))
612 					q = to!float(param[2..$]);
613 		}
614 	}
615 
616 	return s
617 		.split(",")
618 		.amap!(a => Item(strip(a)))()
619 		.asort!`a.q > b.q`()
620 		.amap!`a.str`();
621 }
622 
623 unittest
624 {
625 	assert(parseItemList("a, b, c;q=0.5, d") == ["a", "b", "d", "c"]);
626 }
627 
628 // TODO: optimize / move to HtmlWriter
629 string httpEscape(string str)
630 {
631 	string result;
632 	foreach(c;str)
633 		switch(c)
634 		{
635 			case '<':
636 				result ~= "&lt;";
637 				break;
638 			case '>':
639 				result ~= "&gt;";
640 				break;
641 			case '&':
642 				result ~= "&amp;";
643 				break;
644 			case '\xDF':  // the beta-like symbol
645 				result ~= "&szlig;";
646 				break;
647 			default:
648 				result ~= [c];
649 		}
650 	return result;
651 }
652 
653 public import ae.net.ietf.url : UrlParameters, encodeUrlParameter, encodeUrlParameters, decodeUrlParameter, decodeUrlParameters;
654 
655 struct MultipartPart
656 {
657 	string[string] headers;
658 	Data data;
659 }
660 
661 Data encodeMultipart(MultipartPart[] parts, string boundary)
662 {
663 	Data data;
664 	foreach (ref part; parts)
665 	{
666 		data ~= "--" ~ boundary ~ "\r\n";
667 		foreach (name, value; part.headers)
668 			data ~= name ~ ": " ~ value ~ "\r\n";
669 		data ~= "\r\n";
670 		assert((cast(string)part.data.contents).indexOf(boundary) < 0);
671 		data ~= part.data;
672 		data ~= "\r\n";
673 	}
674 	data ~= "--" ~ boundary ~ "--\r\n";
675 	return data;
676 }
677 
678 MultipartPart[] decodeMultipart(Data data, string boundary)
679 {
680 	auto s = cast(char[])data.contents;
681 	auto term = "\r\n--" ~ boundary ~ "--\r\n";
682 	enforce(s.endsWith(term), "Bad multipart terminator");
683 	s = s[0..$-term.length];
684 	auto delim = "--" ~ boundary ~ "\r\n";
685 	enforce(s.skipOver(delim), "Bad multipart start");
686 	delim = "\r\n" ~ delim;
687 	auto parts = s.split(delim);
688 	MultipartPart[] result;
689 	foreach (part; parts)
690 	{
691 		auto segs = part.findSplit("\r\n\r\n");
692 		enforce(segs[1], "Can't find headers in multipart part");
693 		MultipartPart p;
694 		foreach (line; segs[0].split("\r\n"))
695 		{
696 			auto hparts = line.findSplit(":");
697 			p.headers[hparts[0].strip.idup] = hparts[2].strip.idup;
698 		}
699 		p.data = Data(segs[2]);
700 		result ~= p;
701 	}
702 	return result;
703 }
704 
705 unittest
706 {
707 	auto parts = [
708 		MultipartPart(["Foo" : "bar"], Data.init),
709 		MultipartPart(["Baz" : "quux", "Frob" : "xyzzy"], Data("Content goes here\xFF")),
710 	];
711 	auto boundary = "abcde";
712 	auto parts2 = parts.encodeMultipart(boundary).decodeMultipart(boundary);
713 	assert(parts2.length == parts.length);
714 	foreach (p; 0..parts.length)
715 	{
716 		assert(parts[p].headers == parts2[p].headers);
717 		assert(parts[p].data.contents == parts2[p].data.contents);
718 	}
719 }
720 
721 private bool asciiStartsWith(string s, string prefix)
722 {
723 	if (s.length < prefix.length)
724 		return false;
725 	import std.ascii;
726 	foreach (i, c; prefix)
727 		if (toLower(c) != toLower(s[i]))
728 			return false;
729 	return true;
730 }