1 /**
2  * Concepts shared between HTTP clients and servers.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Stéphan Kochen <stephan@kochen.nl>
12  *   Vladimir Panteleev <vladimir@thecybershadow.net>
13  *   Simon Arlott
14  */
15 
16 module ae.net.http.common;
17 
18 import core.time;
19 
20 import std.algorithm;
21 import std.array;
22 import std.string;
23 import std.conv;
24 import std.ascii;
25 import std.exception;
26 import std.datetime;
27 
28 import ae.net.ietf.headers;
29 import ae.sys.data;
30 import ae.utils.array : amap, afilter, auniq, asort;
31 import ae.utils.text;
32 import ae.utils.time;
33 import zlib = ae.utils.zlib;
34 import gzip = ae.utils.gzip;
35 
36 /// Base HTTP message class
37 private abstract class HttpMessage
38 {
39 public:
40 	string protocol = "http";
41 	string protocolVersion = "1.0";
42 	Headers headers;
43 	Data[] data;
44 	SysTime creationTime;
45 
46 	this()
47 	{
48 		creationTime = Clock.currTime();
49 	}
50 
51 	@property Duration age()
52 	{
53 		return Clock.currTime() - creationTime;
54 	}
55 }
56 
57 /// HTTP request class
58 class HttpRequest : HttpMessage
59 {
60 public:
61 	string method = "GET";
62 	string proxy;
63 
64 	this()
65 	{
66 	}
67 
68 	this(string resource)
69 	{
70 		this.resource = resource;
71 	}
72 
73 	/// Resource part of URL (everything after the hostname)
74 	@property string resource()
75 	{
76 		return _resource;
77 	}
78 
79 	/// Setting the resource to a full URL will fill in the Host header, as well.
80 	@property void resource(string value)
81 	{
82 		_resource = value;
83 
84 		// applies to both Client/Server as some clients put a full URL in the GET line instead of using a "Host" header
85 		string protocol;
86 		if (_resource.asciiStartsWith("http://"))
87 			protocol = "http";
88 		else
89 		if (_resource.asciiStartsWith("https://"))
90 			protocol = "https";
91 
92 		if (protocol)
93 		{
94 			this.protocol = protocol;
95 
96 			value = value[protocol.length+3..$];
97 			auto pathstart = value.indexOf('/');
98 			if (pathstart == -1)
99 			{
100 				host = value;
101 				_resource = "/";
102 			}
103 			else
104 			{
105 				host = value[0..pathstart];
106 				_resource = value[pathstart..$];
107 			}
108 			auto portstart = host().indexOf(':');
109 			if (portstart != -1)
110 			{
111 				port = to!ushort(host[portstart+1..$]);
112 				host = host[0..portstart];
113 			}
114 		}
115 	}
116 
117 	/// The hostname, without the port number
118 	@property string host()
119 	{
120 		string _host = headers.get("Host", null);
121 		auto colon = _host.lastIndexOf(":");
122 		return colon<0 ? _host : _host[0..colon];
123 	}
124 
125 	@property void host(string _host)
126 	{
127 		auto _port = this.port;
128 		headers["Host"] = _port==protocolDefaultPort ? _host : _host ~ ":" ~ text(_port);
129 	}
130 
131 	@property ushort protocolDefaultPort()
132 	{
133 		switch (protocol)
134 		{
135 			case "http":
136 				return 80;
137 			case "https":
138 				return 443;
139 			default:
140 				throw new Exception("Unknown protocol: " ~ protocol);
141 		}
142 	}
143 
144 	/// Port number, from Host header (defaults to 80)
145 	@property ushort port()
146 	{
147 		if ("Host" in headers)
148 		{
149 			string _host = headers["Host"];
150 			auto colon = _host.lastIndexOf(":");
151 			return colon<0 ? protocolDefaultPort : to!ushort(_host[colon+1..$]);
152 		}
153 		else
154 			return _port ? _port : protocolDefaultPort;
155 	}
156 
157 	@property void port(ushort _port)
158 	{
159 		if ("Host" in headers)
160 		{
161 			if (_port == protocolDefaultPort)
162 				headers["Host"] = this.host;
163 			else
164 				headers["Host"] = this.host ~ ":" ~ text(_port);
165 		}
166 		else
167 			this._port = _port;
168 	}
169 
170 	/// Path part of request (until the ?)
171 	@property string path()
172 	{
173 		auto p = resource.indexOf('?');
174 		if (p >= 0)
175 			return resource[0..p];
176 		else
177 			return resource;
178 	}
179 
180 	/// Query string part of request (atfer the ?)
181 	@property string queryString()
182 	{
183 		auto p = resource.indexOf('?');
184 		if (p >= 0)
185 			return resource[p+1..$];
186 		else
187 			return null;
188 	}
189 
190 	/// AA of query string parameters
191 	@property UrlParameters urlParameters()
192 	{
193 		return decodeUrlParameters(queryString);
194 	}
195 
196 	/// Reconstruct full URL from host, port and resource
197 	@property string url()
198 	{
199 		return protocol ~ "://" ~ host ~ (port==protocolDefaultPort ? null : to!string(port)) ~ resource;
200 	}
201 
202 	@property string proxyHost()
203 	{
204 		auto portstart = proxy.indexOf(':');
205 		if (portstart != -1)
206 			return proxy[0..portstart];
207 		return proxy;
208 	}
209 
210 	@property ushort proxyPort()
211 	{
212 		auto portstart = proxy.indexOf(':');
213 		if (portstart != -1)
214 			return to!ushort(proxy[portstart+1..$]);
215 		return 80;
216 	}
217 
218 	/// Parse the first line in a HTTP request ("METHOD /resource HTTP/1.x").
219 	void parseRequestLine(string reqLine)
220 	{
221 		enforce(reqLine.length > 10, "Request line too short");
222 		auto methodEnd = reqLine.indexOf(' ');
223 		enforce(methodEnd > 0, "Malformed request line");
224 		method = reqLine[0 .. methodEnd];
225 		reqLine = reqLine[methodEnd + 1 .. reqLine.length];
226 
227 		auto resourceEnd = reqLine.lastIndexOf(' ');
228 		enforce(resourceEnd > 0, "Malformed request line");
229 		resource = reqLine[0 .. resourceEnd];
230 
231 		string protocol = reqLine[resourceEnd+1..$];
232 		enforce(protocol.startsWith("HTTP/"));
233 		protocolVersion = protocol[5..$];
234 	}
235 
236 	/// Decodes submitted form data, and returns an AA of values.
237 	UrlParameters decodePostData()
238 	{
239 		auto data = cast(string)data.joinToHeap();
240 		if (data.length is 0)
241 			return UrlParameters(null);
242 
243 		string contentType = headers.get("Content-Type", "");
244 
245 		switch (contentType.findSplit(";")[0])
246 		{
247 			case "application/x-www-form-urlencoded":
248 				return decodeUrlParameters(data);
249 			case "":
250 				throw new Exception("No Content-Type");
251 			default:
252 				throw new Exception("Unknown Content-Type: " ~ contentType);
253 		}
254 	}
255 
256 	/// Get list of hosts as specified in headers (e.g. X-Forwarded-For).
257 	/// First item in returned array is the node furthest away.
258 	/// Duplicates are removed.
259 	/// Specify socket remote address in remoteHost to add it to the list.
260 	string[] remoteHosts(string remoteHost = null)
261 	{
262 		return
263 			(headers.get("X-Forwarded-For", null).split(",").amap!(std..string.strip)() ~
264 			 headers.get("X-Forwarded-Host", null) ~
265 			 remoteHost)
266 			.afilter!`a && a != "unknown"`()
267 			.auniq();
268 	}
269 
270 	unittest
271 	{
272 		auto req = new HttpRequest();
273 		assert(req.remoteHosts() == []);
274 		assert(req.remoteHosts("3.3.3.3") == ["3.3.3.3"]);
275 
276 		req.headers["X-Forwarded-For"] = "1.1.1.1, 2.2.2.2";
277 		req.headers["X-Forwarded-Host"] = "2.2.2.2";
278 		assert(req.remoteHosts("3.3.3.3") == ["1.1.1.1", "2.2.2.2", "3.3.3.3"]);
279 	}
280 
281 	/// Basic cookie parsing
282 	string[string] getCookies()
283 	{
284 		string[string] cookies;
285 		foreach (segment; headers.get("Cookie", null).split(";"))
286 		{
287 			segment = segment.strip();
288 			auto p = segment.indexOf('=');
289 			if (p > 0)
290 				cookies[segment[0..p]] = segment[p+1..$];
291 		}
292 		return cookies;
293 	}
294 
295 private:
296 	string _resource;
297 	ushort _port = 0; // used only when no "Host" in headers; otherwise, taken from there
298 }
299 
300 /// HTTP response status codes
301 enum HttpStatusCode : ushort
302 {
303 	Continue=100,
304 	SwitchingProtocols=101,
305 
306 	OK=200,
307 	Created=201,
308 	Accepted=202,
309 	NonAuthoritativeInformation=203,
310 	NoContent=204,
311 	ResetContent=205,
312 	PartialContent=206,
313 
314 	MultipleChoices=300,
315 	MovedPermanently=301,
316 	Found=302,
317 	SeeOther=303,
318 	NotModified=304,
319 	UseProxy=305,
320 	//(Unused)=306,
321 	TemporaryRedirect=307,
322 
323 	BadRequest=400,
324 	Unauthorized=401,
325 	PaymentRequired=402,
326 	Forbidden=403,
327 	NotFound=404,
328 	MethodNotAllowed=405,
329 	NotAcceptable=406,
330 	ProxyAuthenticationRequired=407,
331 	RequestTimeout=408,
332 	Conflict=409,
333 	Gone=410,
334 	LengthRequired=411,
335 	PreconditionFailed=412,
336 	RequestEntityTooLarge=413,
337 	RequestUriTooLong=414,
338 	UnsupportedMediaType=415,
339 	RequestedRangeNotSatisfiable=416,
340 	ExpectationFailed=417,
341 
342 	InternalServerError=500,
343 	NotImplemented=501,
344 	BadGateway=502,
345 	ServiceUnavailable=503,
346 	GatewayTimeout=504,
347 	HttpVersionNotSupported=505
348 }
349 
350 /// HTTP reply class
351 class HttpResponse : HttpMessage
352 {
353 public:
354 	HttpStatusCode status;
355 	string statusMessage;
356 
357 	int compressionLevel = 1;
358 
359 	static string getStatusMessage(HttpStatusCode code)
360 	{
361 		switch(code)
362 		{
363 			case 100: return "Continue";
364 			case 101: return "Switching Protocols";
365 
366 			case 200: return "OK";
367 			case 201: return "Created";
368 			case 202: return "Accepted";
369 			case 203: return "Non-Authoritative Information";
370 			case 204: return "No Content";
371 			case 205: return "Reset Content";
372 			case 206: return "Partial Content";
373 			case 300: return "Multiple Choices";
374 			case 301: return "Moved Permanently";
375 			case 302: return "Found";
376 			case 303: return "See Other";
377 			case 304: return "Not Modified";
378 			case 305: return "Use Proxy";
379 			case 306: return "(Unused)";
380 			case 307: return "Temporary Redirect";
381 
382 			case 400: return "Bad Request";
383 			case 401: return "Unauthorized";
384 			case 402: return "Payment Required";
385 			case 403: return "Forbidden";
386 			case 404: return "Not Found";
387 			case 405: return "Method Not Allowed";
388 			case 406: return "Not Acceptable";
389 			case 407: return "Proxy Authentication Required";
390 			case 408: return "Request Timeout";
391 			case 409: return "Conflict";
392 			case 410: return "Gone";
393 			case 411: return "Length Required";
394 			case 412: return "Precondition Failed";
395 			case 413: return "Request Entity Too Large";
396 			case 414: return "Request-URI Too Long";
397 			case 415: return "Unsupported Media Type";
398 			case 416: return "Requested Range Not Satisfiable";
399 			case 417: return "Expectation Failed";
400 
401 			case 500: return "Internal Server Error";
402 			case 501: return "Not Implemented";
403 			case 502: return "Bad Gateway";
404 			case 503: return "Service Unavailable";
405 			case 504: return "Gateway Timeout";
406 			case 505: return "HTTP Version Not Supported";
407 			default: return null;
408 		}
409 	}
410 
411 	/// Set the response status code and message
412 	void setStatus(HttpStatusCode code)
413 	{
414 		status = code;
415 		statusMessage = getStatusMessage(code);
416 	}
417 
418 	final void parseStatusLine(string statusLine)
419 	{
420 		auto versionEnd = statusLine.indexOf(' ');
421 		if (versionEnd == -1)
422 			throw new Exception("Malformed status line");
423 		protocolVersion = statusLine[0..versionEnd];
424 		statusLine = statusLine[versionEnd+1..statusLine.length];
425 
426 		auto statusEnd = statusLine.indexOf(' ');
427 		string statusCode;
428 		if (statusEnd >= 0)
429 		{
430 			statusCode = statusLine[0 .. statusEnd];
431 			statusMessage = statusLine[statusEnd+1..statusLine.length];
432 		}
433 		else
434 		{
435 			statusCode = statusLine;
436 			statusMessage = null;
437 		}
438 		status = cast(HttpStatusCode)to!ushort(statusCode);
439 	}
440 
441 	/// If the data is compressed, return the decompressed data
442 	// this is not a property on purpose - to avoid using it multiple times as it will unpack the data on every access
443 	// TODO: there is no reason for above limitation
444 	Data getContent()
445 	{
446 		if ("Content-Encoding" in headers && headers["Content-Encoding"]=="deflate")
447 			return zlib.uncompress(data).joinData();
448 		else
449 		if ("Content-Encoding" in headers && headers["Content-Encoding"]=="gzip")
450 			return gzip.uncompress(data).joinData();
451 		else
452 			return data.joinData();
453 		assert(0);
454 	}
455 
456 	protected void compressWithDeflate()
457 	{
458 		data = zlib.compress(data, zlib.ZlibOptions(compressionLevel));
459 	}
460 
461 	protected void compressWithGzip()
462 	{
463 		data = gzip.compress(data, zlib.ZlibOptions(compressionLevel));
464 	}
465 
466 	/// Called by the server to compress content, if possible/appropriate
467 	final package void optimizeData(in ref Headers requestHeaders)
468 	{
469 		auto acceptEncoding = requestHeaders.get("Accept-Encoding", null);
470 		if (acceptEncoding && "Content-Encoding" !in headers)
471 		{
472 			auto contentType = headers.get("Content-Type", null);
473 			if (contentType.startsWith("text/")
474 			 || contentType == "application/json"
475 			 || contentType == "image/vnd.microsoft.icon"
476 			 || contentType == "image/svg+xml")
477 			{
478 				auto supported = parseItemList(acceptEncoding) ~ ["*"];
479 				foreach (method; supported)
480 					switch (method)
481 					{
482 						case "deflate":
483 							headers["Content-Encoding"] = method;
484 							headers.add("Vary", "Accept-Encoding");
485 							compressWithDeflate();
486 							return;
487 						case "gzip":
488 							headers["Content-Encoding"] = method;
489 							headers.add("Vary", "Accept-Encoding");
490 							compressWithGzip();
491 							return;
492 						case "*":
493 							if("Content-Encoding" in headers)
494 								headers.remove("Content-Encoding");
495 							return;
496 						default:
497 							break;
498 					}
499 				assert(0);
500 			}
501 		}
502 	}
503 
504 	/// Called by the server to apply range request.
505 	final package void sliceData(in ref Headers requestHeaders)
506 	{
507 		if (status == HttpStatusCode.OK)
508 		{
509 			if ("If-Modified-Since" in requestHeaders &&
510 				"Last-Modified" in headers &&
511 				headers["Last-Modified"].parseTime!(TimeFormats.RFC2822) <= requestHeaders["If-Modified-Since"].parseTime!(TimeFormats.RFC2822))
512 			{
513 				setStatus(HttpStatusCode.NotModified);
514 				data = null;
515 				return;
516 			}
517 
518 			headers["Accept-Ranges"] = "bytes";
519 			auto prange = "Range" in requestHeaders;
520 			if (prange && (*prange).startsWith("bytes="))
521 			{
522 				auto ranges = (*prange)[6..$].split(",")[0].split("-").map!(s => s.length ? s.to!size_t : size_t.max)().array();
523 				enforce(ranges.length == 2, "Bad range request");
524 				ranges[1]++;
525 				auto datum = DataSetBytes(this.data);
526 				if (ranges[1] == size_t.min) // was not specified (size_t.max overflowed into 0)
527 					ranges[1] = datum.length;
528 				if (ranges[0] >= datum.length || ranges[0] >= ranges[1] || ranges[1] > datum.length)
529 				{
530 					//writeError(HttpStatusCode.RequestedRangeNotSatisfiable);
531 					setStatus(HttpStatusCode.RequestedRangeNotSatisfiable);
532 					data = [Data(statusMessage)];
533 					return;
534 				}
535 				else
536 				{
537 					setStatus(HttpStatusCode.PartialContent);
538 					this.data = datum[ranges[0]..ranges[1]];
539 					headers["Content-Range"] = "bytes %d-%d/%d".format(ranges[0], ranges[0] + this.data.bytes.length - 1, datum.length);
540 				}
541 			}
542 		}
543 	}
544 }
545 
546 void disableCache(ref Headers headers)
547 {
548 	headers["Expires"] = "Mon, 26 Jul 1997 05:00:00 GMT";  // disable IE caching
549 	//headers["Last-Modified"] = "" . gmdate( "D, d M Y H:i:s" ) . " GMT";
550 	headers["Cache-Control"] = "no-cache, must-revalidate";
551 	headers["Pragma"] = "no-cache";
552 }
553 
554 void cacheForever(ref Headers headers)
555 {
556 	headers["Expires"] = httpTime(Clock.currTime().add!"years"(1));
557 	headers["Cache-Control"] = "public, max-age=31536000";
558 }
559 
560 string httpTime(SysTime time)
561 {
562 	// Apache is bad at timezones
563 	time.timezone = UTC();
564 	return time.formatTime!(TimeFormats.RFC2822)();
565 }
566 
567 import std.algorithm : sort;
568 
569 /// Parses a list in the format of "a, b, c;q=0.5, d" and returns
570 /// an array of items sorted by "q" (["a", "b", "d", "c"])
571 string[] parseItemList(string s)
572 {
573 	static struct Item
574 	{
575 		float q = 1.0;
576 		string str;
577 
578 		this(string s)
579 		{
580 			auto params = s.split(";");
581 			if (!params.length) return;
582 			str = params[0];
583 			foreach (param; params[1..$])
584 				if (param.startsWith("q="))
585 					q = to!float(param[2..$]);
586 		}
587 	}
588 
589 	return s
590 		.split(",")
591 		.amap!(a => Item(strip(a)))()
592 		.asort!`a.q > b.q`()
593 		.amap!`a.str`();
594 }
595 
596 unittest
597 {
598 	assert(parseItemList("a, b, c;q=0.5, d") == ["a", "b", "d", "c"]);
599 }
600 
601 // TODO: optimize / move to HtmlWriter
602 string httpEscape(string str)
603 {
604 	string result;
605 	foreach(c;str)
606 		switch(c)
607 		{
608 			case '<':
609 				result ~= "&lt;";
610 				break;
611 			case '>':
612 				result ~= "&gt;";
613 				break;
614 			case '&':
615 				result ~= "&amp;";
616 				break;
617 			case '\xDF':  // the beta-like symbol
618 				result ~= "&szlig;";
619 				break;
620 			default:
621 				result ~= [c];
622 		}
623 	return result;
624 }
625 
626 public import ae.net.ietf.url : UrlParameters, encodeUrlParameter, encodeUrlParameters, decodeUrlParameter, decodeUrlParameters;
627 
628 struct MultipartPart
629 {
630 	string[string] headers;
631 	Data data;
632 }
633 
634 Data encodeMultipart(MultipartPart[] parts, string boundary)
635 {
636 	Data data;
637 	foreach (ref part; parts)
638 	{
639 		data ~= "--" ~ boundary ~ "\r\n";
640 		foreach (name, value; part.headers)
641 			data ~= name ~ ": " ~ value ~ "\r\n";
642 		data ~= "\r\n";
643 		assert((cast(string)part.data.contents).indexOf(boundary) < 0);
644 		data ~= part.data;
645 	}
646 	data ~= "\r\n--" ~ boundary ~ "--\r\n";
647 	return data;
648 }
649 
650 private bool asciiStartsWith(string s, string prefix)
651 {
652 	if (s.length < prefix.length)
653 		return false;
654 	import std.ascii;
655 	foreach (i, c; prefix)
656 		if (toLower(c) != toLower(s[i]))
657 			return false;
658 	return true;
659 }