1 /**
2  * Concepts shared between HTTP clients and servers.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Stéphan Kochen <stephan@kochen.nl>
12  *   Vladimir Panteleev <vladimir@thecybershadow.net>
13  *   Simon Arlott
14  */
15 
16 module ae.net.http.common;
17 
18 import core.time;
19 
20 import std.algorithm;
21 import std.array;
22 import std.string;
23 import std.conv;
24 import std.ascii;
25 import std.exception;
26 import std.datetime;
27 
28 import ae.net.ietf.headers;
29 import ae.sys.data;
30 import ae.utils.array : amap, afilter, auniq, asort;
31 import ae.utils.text;
32 import ae.utils.time;
33 import zlib = ae.utils.zlib;
34 import gzip = ae.utils.gzip;
35 
36 /// Base HTTP message class
37 private abstract class HttpMessage
38 {
39 public:
40 	string protocol = "http";
41 	string protocolVersion = "1.0";
42 	Headers headers;
43 	Data[] data;
44 	SysTime creationTime;
45 
46 	this()
47 	{
48 		creationTime = Clock.currTime();
49 	}
50 
51 	@property Duration age()
52 	{
53 		return Clock.currTime() - creationTime;
54 	}
55 }
56 
57 /// HTTP request class
58 class HttpRequest : HttpMessage
59 {
60 public:
61 	string method = "GET";
62 	string proxy;
63 
64 	this()
65 	{
66 	}
67 
68 	this(string resource)
69 	{
70 		this.resource = resource;
71 	}
72 
73 	/// Resource part of URL (everything after the hostname)
74 	@property string resource()
75 	{
76 		return _resource;
77 	}
78 
79 	/// Setting the resource to a full URL will fill in the Host header, as well.
80 	@property void resource(string value)
81 	{
82 		_resource = value;
83 
84 		// applies to both Client/Server as some clients put a full URL in the GET line instead of using a "Host" header
85 		string protocol;
86 		if (_resource.asciiStartsWith("http://"))
87 			protocol = "http";
88 		else
89 		if (_resource.asciiStartsWith("https://"))
90 			protocol = "https";
91 
92 		if (protocol)
93 		{
94 			this.protocol = protocol;
95 
96 			value = value[protocol.length+3..$];
97 			auto pathstart = value.indexOf('/');
98 			if (pathstart == -1)
99 			{
100 				host = value;
101 				_resource = "/";
102 			}
103 			else
104 			{
105 				host = value[0..pathstart];
106 				_resource = value[pathstart..$];
107 			}
108 			auto portstart = host().indexOf(':');
109 			if (portstart != -1)
110 			{
111 				port = to!ushort(host[portstart+1..$]);
112 				host = host[0..portstart];
113 			}
114 		}
115 	}
116 
117 	/// The hostname, without the port number
118 	@property string host()
119 	{
120 		string _host = headers.get("Host", null);
121 		auto colon = _host.lastIndexOf(":");
122 		return colon<0 ? _host : _host[0..colon];
123 	}
124 
125 	@property void host(string _host)
126 	{
127 		auto _port = this.port;
128 		headers["Host"] = _port==protocolDefaultPort ? _host : _host ~ ":" ~ text(_port);
129 	}
130 
131 	@property ushort protocolDefaultPort()
132 	{
133 		switch (protocol)
134 		{
135 			case "http":
136 				return 80;
137 			case "https":
138 				return 443;
139 			default:
140 				throw new Exception("Unknown protocol: " ~ protocol);
141 		}
142 	}
143 
144 	/// Port number, from Host header (defaults to 80)
145 	@property ushort port()
146 	{
147 		if ("Host" in headers)
148 		{
149 			string _host = headers["Host"];
150 			auto colon = _host.lastIndexOf(":");
151 			return colon<0 ? protocolDefaultPort : to!ushort(_host[colon+1..$]);
152 		}
153 		else
154 			return _port ? _port : protocolDefaultPort;
155 	}
156 
157 	@property void port(ushort _port)
158 	{
159 		if ("Host" in headers)
160 		{
161 			if (_port == protocolDefaultPort)
162 				headers["Host"] = this.host;
163 			else
164 				headers["Host"] = this.host ~ ":" ~ text(_port);
165 		}
166 		else
167 			this._port = _port;
168 	}
169 
170 	/// Path part of request (until the ?)
171 	@property string path()
172 	{
173 		auto p = resource.indexOf('?');
174 		if (p >= 0)
175 			return resource[0..p];
176 		else
177 			return resource;
178 	}
179 
180 	/// Query string part of request (atfer the ?)
181 	@property string queryString()
182 	{
183 		auto p = resource.indexOf('?');
184 		if (p >= 0)
185 			return resource[p+1..$];
186 		else
187 			return null;
188 	}
189 
190 	/// AA of query string parameters
191 	@property string[string] urlParameters()
192 	{
193 		return decodeUrlParameters(queryString);
194 	}
195 
196 	/// Reconstruct full URL from host, port and resource
197 	@property string url()
198 	{
199 		return protocol ~ "://" ~ host ~ (port==protocolDefaultPort ? null : to!string(port)) ~ resource;
200 	}
201 
202 	@property string proxyHost()
203 	{
204 		auto portstart = proxy.indexOf(':');
205 		if (portstart != -1)
206 			return proxy[0..portstart];
207 		return proxy;
208 	}
209 
210 	@property ushort proxyPort()
211 	{
212 		auto portstart = proxy.indexOf(':');
213 		if (portstart != -1)
214 			return to!ushort(proxy[portstart+1..$]);
215 		return 80;
216 	}
217 
218 	/// Parse the first line in a HTTP request ("METHOD /resource HTTP/1.x").
219 	void parseRequestLine(string reqLine)
220 	{
221 		enforce(reqLine.length > 10, "Request line too short");
222 		auto methodEnd = reqLine.indexOf(' ');
223 		enforce(methodEnd > 0, "Malformed request line");
224 		method = reqLine[0 .. methodEnd];
225 		reqLine = reqLine[methodEnd + 1 .. reqLine.length];
226 
227 		auto resourceEnd = reqLine.lastIndexOf(' ');
228 		enforce(resourceEnd > 0, "Malformed request line");
229 		resource = reqLine[0 .. resourceEnd];
230 
231 		string protocol = reqLine[resourceEnd+1..$];
232 		enforce(protocol.startsWith("HTTP/"));
233 		protocolVersion = protocol[5..$];
234 	}
235 
236 	/// Decodes submitted form data, and returns an AA of values.
237 	string[string] decodePostData()
238 	{
239 		auto data = cast(string)data.joinToHeap();
240 		if (data.length is 0)
241 			return null;
242 
243 		string contentType = headers.get("Content-Type", "");
244 
245 		switch (contentType)
246 		{
247 			case "application/x-www-form-urlencoded":
248 				return decodeUrlParameters(data);
249 			case "":
250 				throw new Exception("No Content-Type");
251 			default:
252 				throw new Exception("Unknown Content-Type: " ~ contentType);
253 		}
254 	}
255 
256 	/// Get list of hosts as specified in headers (e.g. X-Forwarded-For).
257 	/// First item in returned array is the node furthest away.
258 	/// Duplicates are removed.
259 	/// Specify socket remote address in remoteHost to add it to the list.
260 	string[] remoteHosts(string remoteHost = null)
261 	{
262 		return
263 			(headers.get("X-Forwarded-For", null).split(",").amap!strip() ~
264 			 headers.get("X-Forwarded-Host", null) ~
265 			 remoteHost)
266 			.afilter!`a && a != "unknown"`()
267 			.auniq();
268 	}
269 
270 	unittest
271 	{
272 		auto req = new HttpRequest();
273 		assert(req.remoteHosts() == []);
274 		assert(req.remoteHosts("3.3.3.3") == ["3.3.3.3"]);
275 
276 		req.headers["X-Forwarded-For"] = "1.1.1.1, 2.2.2.2";
277 		req.headers["X-Forwarded-Host"] = "2.2.2.2";
278 		assert(req.remoteHosts("3.3.3.3") == ["1.1.1.1", "2.2.2.2", "3.3.3.3"]);
279 	}
280 
281 private:
282 	string _resource;
283 	ushort _port = 0; // used only when no "Host" in headers; otherwise, taken from there
284 }
285 
286 /// HTTP response status codes
287 enum HttpStatusCode : ushort
288 {
289 	Continue=100,
290 	SwitchingProtocols=101,
291 
292 	OK=200,
293 	Created=201,
294 	Accepted=202,
295 	NonAuthoritativeInformation=203,
296 	NoContent=204,
297 	ResetContent=205,
298 	PartialContent=206,
299 
300 	MultipleChoices=300,
301 	MovedPermanently=301,
302 	Found=302,
303 	SeeOther=303,
304 	NotModified=304,
305 	UseProxy=305,
306 	//(Unused)=306,
307 	TemporaryRedirect=307,
308 
309 	BadRequest=400,
310 	Unauthorized=401,
311 	PaymentRequired=402,
312 	Forbidden=403,
313 	NotFound=404,
314 	MethodNotAllowed=405,
315 	NotAcceptable=406,
316 	ProxyAuthenticationRequired=407,
317 	RequestTimeout=408,
318 	Conflict=409,
319 	Gone=410,
320 	LengthRequired=411,
321 	PreconditionFailed=412,
322 	RequestEntityTooLarge=413,
323 	RequestUriTooLong=414,
324 	UnsupportedMediaType=415,
325 	RequestedRangeNotSatisfiable=416,
326 	ExpectationFailed=417,
327 
328 	InternalServerError=500,
329 	NotImplemented=501,
330 	BadGateway=502,
331 	ServiceUnavailable=503,
332 	GatewayTimeout=504,
333 	HttpVersionNotSupported=505
334 }
335 
336 /// HTTP reply class
337 class HttpResponse : HttpMessage
338 {
339 public:
340 	ushort status;
341 	string statusMessage;
342 
343 	int compressionLevel = 1;
344 
345 	static string getStatusMessage(HttpStatusCode code)
346 	{
347 		switch(code)
348 		{
349 			case 100: return "Continue";
350 			case 101: return "Switching Protocols";
351 
352 			case 200: return "OK";
353 			case 201: return "Created";
354 			case 202: return "Accepted";
355 			case 203: return "Non-Authoritative Information";
356 			case 204: return "No Content";
357 			case 205: return "Reset Content";
358 			case 206: return "Partial Content";
359 			case 300: return "Multiple Choices";
360 			case 301: return "Moved Permanently";
361 			case 302: return "Found";
362 			case 303: return "See Other";
363 			case 304: return "Not Modified";
364 			case 305: return "Use Proxy";
365 			case 306: return "(Unused)";
366 			case 307: return "Temporary Redirect";
367 
368 			case 400: return "Bad Request";
369 			case 401: return "Unauthorized";
370 			case 402: return "Payment Required";
371 			case 403: return "Forbidden";
372 			case 404: return "Not Found";
373 			case 405: return "Method Not Allowed";
374 			case 406: return "Not Acceptable";
375 			case 407: return "Proxy Authentication Required";
376 			case 408: return "Request Timeout";
377 			case 409: return "Conflict";
378 			case 410: return "Gone";
379 			case 411: return "Length Required";
380 			case 412: return "Precondition Failed";
381 			case 413: return "Request Entity Too Large";
382 			case 414: return "Request-URI Too Long";
383 			case 415: return "Unsupported Media Type";
384 			case 416: return "Requested Range Not Satisfiable";
385 			case 417: return "Expectation Failed";
386 
387 			case 500: return "Internal Server Error";
388 			case 501: return "Not Implemented";
389 			case 502: return "Bad Gateway";
390 			case 503: return "Service Unavailable";
391 			case 504: return "Gateway Timeout";
392 			case 505: return "HTTP Version Not Supported";
393 			default: return null;
394 		}
395 	}
396 
397 	/// Set the response status code and message
398 	void setStatus(HttpStatusCode code)
399 	{
400 		status = code;
401 		statusMessage = getStatusMessage(code);
402 	}
403 
404 	final void parseStatusLine(string statusLine)
405 	{
406 		auto versionEnd = statusLine.indexOf(' ');
407 		if (versionEnd == -1)
408 			throw new Exception("Malformed status line");
409 		protocolVersion = statusLine[0..versionEnd];
410 		statusLine = statusLine[versionEnd+1..statusLine.length];
411 
412 		auto statusEnd = statusLine.indexOf(' ');
413 		string statusCode;
414 		if (statusEnd >= 0)
415 		{
416 			statusCode = statusLine[0 .. statusEnd];
417 			statusMessage = statusLine[statusEnd+1..statusLine.length];
418 		}
419 		else
420 		{
421 			statusCode = statusLine;
422 			statusMessage = null;
423 		}
424 		status = cast(HttpStatusCode)to!ushort(statusCode);
425 	}
426 
427 	/// If the data is compressed, return the decompressed data
428 	// this is not a property on purpose - to avoid using it multiple times as it will unpack the data on every access
429 	// TODO: there is no reason for above limitation
430 	Data getContent()
431 	{
432 		if ("Content-Encoding" in headers && headers["Content-Encoding"]=="deflate")
433 			return zlib.uncompress(data).joinData();
434 		else
435 		if ("Content-Encoding" in headers && headers["Content-Encoding"]=="gzip")
436 			return gzip.uncompress(data).joinData();
437 		else
438 			return data.joinData();
439 		assert(0);
440 	}
441 
442 	protected void compressWithDeflate()
443 	{
444 		data = zlib.compress(data, zlib.ZlibOptions(compressionLevel));
445 	}
446 
447 	protected void compressWithGzip()
448 	{
449 		data = gzip.compress(data, zlib.ZlibOptions(compressionLevel));
450 	}
451 
452 	/// Called by the server to compress content, if possible/appropriate
453 	final package void optimizeData(in ref Headers requestHeaders)
454 	{
455 		auto acceptEncoding = headers.get("Accept-Encoding", null);
456 		if (acceptEncoding && "Content-Encoding" !in headers)
457 		{
458 			auto contentType = headers.get("Content-Type", null);
459 			if (contentType.startsWith("text/") || contentType=="application/json")
460 			{
461 				auto supported = parseItemList(acceptEncoding) ~ ["*"];
462 
463 				foreach (method; supported)
464 					switch (method)
465 					{
466 						case "deflate":
467 							headers["Content-Encoding"] = method;
468 							headers.add("Vary", "Accept-Encoding");
469 							compressWithDeflate();
470 							return;
471 						case "gzip":
472 							headers["Content-Encoding"] = method;
473 							headers.add("Vary", "Accept-Encoding");
474 							compressWithGzip();
475 							return;
476 						case "*":
477 							if("Content-Encoding" in headers)
478 								headers.remove("Content-Encoding");
479 							return;
480 						default:
481 							break;
482 					}
483 				assert(0);
484 			}
485 		}
486 	}
487 
488 	/// Called by the server to apply range request.
489 	final package void sliceData(in ref Headers requestHeaders)
490 	{
491 		if (status == HttpStatusCode.OK)
492 		{
493 			headers["Accept-Ranges"] = "bytes";
494 			auto prange = "Range" in requestHeaders;
495 			if (prange && (*prange).startsWith("bytes="))
496 			{
497 				auto ranges = (*prange)[6..$].split(",")[0].split("-").map!(s => s.length ? s.to!size_t : size_t.max)().array();
498 				enforce(ranges.length == 2, "Bad range request");
499 				ranges[1]++;
500 				auto datum = DataSetBytes(this.data);
501 				if (ranges[1] == size_t.min) // was not specified (size_t.max overflowed into 0)
502 					ranges[1] = datum.length;
503 				if (ranges[0] >= datum.length || ranges[0] >= ranges[1] || ranges[1] > datum.length)
504 				{
505 					//writeError(HttpStatusCode.RequestedRangeNotSatisfiable);
506 					setStatus(HttpStatusCode.RequestedRangeNotSatisfiable);
507 					data = [Data(statusMessage)];
508 					return;
509 				}
510 				else
511 				{
512 					setStatus(HttpStatusCode.PartialContent);
513 					this.data = datum[ranges[0]..ranges[1]];
514 					headers["Content-Range"] = "bytes %d-%d/%d".format(ranges[0], ranges[0] + this.data.bytes.length - 1, datum.length);
515 				}
516 			}
517 		}
518 	}
519 }
520 
521 void disableCache(ref Headers headers)
522 {
523 	headers["Expires"] = "Mon, 26 Jul 1997 05:00:00 GMT";  // disable IE caching
524 	//headers["Last-Modified"] = "" . gmdate( "D, d M Y H:i:s" ) . " GMT";
525 	headers["Cache-Control"] = "no-cache, must-revalidate";
526 	headers["Pragma"] = "no-cache";
527 }
528 
529 void cacheForever(ref Headers headers)
530 {
531 	headers["Expires"] = httpTime(Clock.currTime().add!"years"(1));
532 	headers["Cache-Control"] = "public, max-age=31536000";
533 }
534 
535 string httpTime(SysTime time)
536 {
537 	// Apache is bad at timezones
538 	time.timezone = UTC();
539 	return time.formatTime!(TimeFormats.RFC2822)();
540 }
541 
542 import std.algorithm : sort;
543 
544 /// Parses a list in the format of "a, b, c;q=0.5, d" and returns
545 /// an array of items sorted by "q" (["a", "b", "d", "c"])
546 string[] parseItemList(string s)
547 {
548 	static struct Item
549 	{
550 		float q = 1.0;
551 		string str;
552 
553 		this(string s)
554 		{
555 			auto params = s.split(";");
556 			if (!params.length) return;
557 			str = params[0];
558 			foreach (param; params[1..$])
559 				if (param.startsWith("q="))
560 					q = to!float(param[2..$]);
561 		}
562 	}
563 
564 	return s
565 		.split(",")
566 		.amap!(a => Item(strip(a)))()
567 		.asort!`a.q > b.q`()
568 		.amap!`a.str`();
569 }
570 
571 unittest
572 {
573 	assert(parseItemList("a, b, c;q=0.5, d") == ["a", "b", "d", "c"]);
574 }
575 
576 // TODO: optimize / move to HtmlWriter
577 string httpEscape(string str)
578 {
579 	string result;
580 	foreach(c;str)
581 		switch(c)
582 		{
583 			case '<':
584 				result ~= "&lt;";
585 				break;
586 			case '>':
587 				result ~= "&gt;";
588 				break;
589 			case '&':
590 				result ~= "&amp;";
591 				break;
592 			case '\xDF':  // the beta-like symbol
593 				result ~= "&szlig;";
594 				break;
595 			default:
596 				result ~= [c];
597 		}
598 	return result;
599 }
600 
601 string encodeUrlParameter(string param)
602 {
603 	string s;
604 	foreach (c; param)
605 		if (!isAlphaNum(c) && c!='-' && c!='_')
606 			s ~= format("%%%02X", cast(ubyte)c);
607 		else
608 			s ~= c;
609 	return s;
610 }
611 
612 string encodeUrlParameters(string[string] dic)
613 {
614 	string[] segs;
615 	foreach (name, value; dic)
616 		segs ~= encodeUrlParameter(name) ~ '=' ~ encodeUrlParameter(value);
617 	return join(segs, "&");
618 }
619 
620 string decodeUrlParameter(string encoded)
621 {
622 	string s;
623 	for (auto i=0; i<encoded.length; i++)
624 		if (encoded[i] == '%' && i+3 <= encoded.length)
625 		{
626 			s ~= cast(char)fromHex!ubyte(encoded[i+1..i+3]);
627 			i += 2;
628 		}
629 		else
630 		if (encoded[i] == '+')
631 			s ~= ' ';
632 		else
633 			s ~= encoded[i];
634 	return s;
635 }
636 
637 string[string] decodeUrlParameters(string qs)
638 {
639 	string[] segs = split(qs, "&");
640 	string[string] dic;
641 	foreach (pair; segs)
642 	{
643 		auto p = pair.indexOf('=');
644 		if (p < 0)
645 			dic[decodeUrlParameter(pair)] = null;
646 		else
647 			dic[decodeUrlParameter(pair[0..p])] = decodeUrlParameter(pair[p+1..$]);
648 	}
649 	return dic;
650 }
651 
652 struct MultipartPart
653 {
654 	string[string] headers;
655 	Data data;
656 }
657 
658 Data encodeMultipart(MultipartPart[] parts, string boundary)
659 {
660 	Data data;
661 	foreach (ref part; parts)
662 	{
663 		data ~= "--" ~ boundary ~ "\r\n";
664 		foreach (name, value; part.headers)
665 			data ~= name ~ ": " ~ value ~ "\r\n";
666 		data ~= "\r\n";
667 		assert((cast(string)part.data.contents).indexOf(boundary) < 0);
668 		data ~= part.data;
669 	}
670 	data ~= "\r\n--" ~ boundary ~ "--\r\n";
671 	return data;
672 }
673 
674 private bool asciiStartsWith(string s, string prefix)
675 {
676 	if (s.length < prefix.length)
677 		return false;
678 	import std.ascii;
679 	foreach (i, c; prefix)
680 		if (toLower(c) != toLower(s[i]))
681 			return false;
682 	return true;
683 }