1 /**
2  * ae.sys.net implementation for HTTP using Curl,
3  * with caching and cookie support
4  *
5  * License:
6  *   This Source Code Form is subject to the terms of
7  *   the Mozilla Public License, v. 2.0. If a copy of
8  *   the MPL was not distributed with this file, You
9  *   can obtain one at http://mozilla.org/MPL/2.0/.
10  *
11  * Authors:
12  *   Vladimir Panteleev <vladimir@thecybershadow.net>
13  */
14 
15 module ae.sys.net.cachedcurl;
16 
17 // TODO: refactor into an abstract Cached!Network wrapper?
18 
19 import std.algorithm.comparison;
20 import std.conv;
21 import std.exception;
22 import std.file;
23 import std.net.curl;
24 import std.path;
25 import std.string;
26 import std.typecons;
27 
28 import ae.net.http.common;
29 import ae.net.ietf.url;
30 import ae.sys.dataio;
31 import ae.sys.dataset;
32 import ae.sys.file;
33 import ae.sys.net;
34 import ae.utils.digest;
35 import ae.utils.json;
36 import ae.utils.time;
37 
38 class CachedCurlNetwork : Network
39 {
40 	/// Curl HTTP object
41 	/// Can be customized after construction.
42 	HTTP http;
43 
44 	/// Directory for caching responses
45 	string cacheDir = "cache";
46 
47 	/// Ignore cache entries older than the given time
48 	StdTime epoch = 0;
49 
50 	/// Directory for reading cookies.
51 	/// May be moved to a lambda in the future.
52 	/// Format is one file per host, with hostname ~ cookieExt being the file name.
53 	/// Contents is one line for the entire HTTP "Cookie" header.
54 	string cookieDir, cookieExt;
55 
56 	this()
57 	{
58 		http = HTTP();
59 	}
60 
61 	static struct Metadata
62 	{
63 		HTTP.StatusLine statusLine;
64 		string[][string] headers;
65 	}
66 
67 	static struct Request
68 	{
69 		string url;
70 		HTTP.Method method = HTTP.Method.get;
71 		const(void)[] data;
72 		const(string[2])[] headers;
73 
74 		int maxRedirects = int.min; // choose depending or method
75 	}
76 
77 	/*private*/ static void req(CachedCurlNetwork instance, ref const Request request, string target, string metadataPath)
78 	{
79 		with (instance)
80 		{
81 			http.clearRequestHeaders();
82 			http.method = request.method;
83 			if (request.maxRedirects != int.min)
84 				http.maxRedirects = request.maxRedirects;
85 			else
86 			if (request.method == HTTP.Method.head)
87 				http.maxRedirects = uint.max;
88 			else
89 				http.maxRedirects = 10;
90 			auto host = request.url.split("/")[2];
91 			if (cookieDir)
92 			{
93 				auto cookiePath = buildPath(cookieDir, host ~ cookieExt);
94 				if (cookiePath.exists)
95 					http.addRequestHeader("Cookie", cookiePath.readText.chomp());
96 			}
97 			foreach (header; request.headers)
98 				http.addRequestHeader(header[0], header[1]);
99 			Metadata metadata;
100 			http.onReceiveHeader =
101 				(in char[] key, in char[] value)
102 				{
103 					metadata.headers[key.idup] ~= value.idup;
104 				};
105 			http.onReceiveStatusLine =
106 				(HTTP.StatusLine statusLine)
107 				{
108 					metadata.statusLine = statusLine;
109 				};
110 			if (request.data)
111 			{
112 				const(void)[] data = request.data;
113 				http.addRequestHeader("Content-Length", data.length.text);
114 				http.onSend = (void[] buf)
115 					{
116 						size_t len = min(buf.length, data.length);
117 						buf[0..len] = data[0..len];
118 						data = data[len..$];
119 						return len;
120 					};
121 			}
122 			else
123 				http.onSend = null;
124 			download!HTTP(request.url, target, http);
125 			write(metadataPath, metadata.toJson);
126 		}
127 	}
128 
129 	static struct Response
130 	{
131 		string responsePath;
132 		string metadataPath;
133 
134 		@property ubyte[] responseData()
135 		{
136 			checkOK();
137 			return cast(ubyte[])std.file.read(responsePath);
138 		}
139 
140 		@property Metadata metadata()
141 		{
142 			return metadataPath.exists ? metadataPath.readText.jsonParse!Metadata : Metadata.init;
143 		}
144 
145 		@property bool ok()
146 		{
147 			return metadata.statusLine.code / 100 == 2;
148 		}
149 
150 		ref Response checkOK() return
151 		{
152 			if (!ok)
153 				throw new CachedCurlException(metadata);
154 			return this;
155 		}
156 	}
157 
158 	static class CachedCurlException : Exception
159 	{
160 		Metadata metadata;
161 
162 		this(Metadata metadata, string fn = __FILE__, size_t ln = __LINE__)
163 		{
164 			this.metadata = metadata;
165 			super("Request failed: " ~ metadata.statusLine.reason, fn, ln);
166 		}
167 	}
168 
169 	Response cachedReq(ref const Request request)
170 	{
171 		auto hash = getDigestString!MD5(request.url ~ cast(char)request.method ~ request.data);
172 		auto path = buildPath(cacheDir, hash[0..2], hash);
173 		ensurePathExists(path);
174 		auto metadataPath = path ~ ".metadata";
175 		if (path.exists && path.timeLastModified.stdTime < epoch)
176 			path.remove();
177 		cached!req(this, request, path, metadataPath);
178 		return Response(path, metadataPath);
179 	}
180 
181 	Response cachedReq(string url, HTTP.Method method, in void[] data = null)
182 	{
183 		auto req = Request(url, method, data);
184 		return cachedReq(req);
185 	}
186 
187 	string downloadFile(string url)
188 	{
189 		return cachedReq(url, HTTP.Method.get).checkOK.responsePath;
190 	}
191 
192 	override void downloadFile(string url, string target)
193 	{
194 		std.file.copy(downloadFile(url), target);
195 	}
196 
197 	override void[] getFile(string url)
198 	{
199 		return cachedReq(url, HTTP.Method.get).responseData;
200 	}
201 
202 	override bool urlOK(string url)
203 	{
204 		return cachedReq(url, HTTP.Method.get).ok;
205 	}
206 
207 	override string resolveRedirect(string url)
208 	{
209 		return
210 			url.applyRelativeURL(
211 				cachedReq(url, HTTP.Method.head, null)
212 				.metadata
213 				.headers
214 				.get("location", null)
215 				.enforce("Not a redirect: " ~ url)
216 				[$-1]);
217 	}
218 
219 	override void[] post(string url, in void[] data)
220 	{
221 		return cachedReq(url, HTTP.Method.post, data).responseData;
222 	}
223 
224 	override HttpResponse httpRequest(HttpRequest request)
225 	{
226 		Request req;
227 		req.url = request.url;
228 		switch (request.method.toUpper)
229 		{
230 			case "HEAD"   : req.method = HTTP.Method.head; break;
231 			case "GET"    : req.method = HTTP.Method.get; break;
232 			case "POST"   : req.method = HTTP.Method.post; break;
233 			case "PUT"    : req.method = HTTP.Method.put; break;
234 			case "DEL"    : req.method = HTTP.Method.del; break;
235 			case "OPTIONS": req.method = HTTP.Method.options; break;
236 			case "TRACE"  : req.method = HTTP.Method.trace; break;
237 			case "CONNECT": req.method = HTTP.Method.connect; break;
238 			case "PATCH"  : req.method = HTTP.Method.patch; break;
239 			default: throw new Exception("Unknown HTTP method: " ~ request.method);
240 		}
241 		req.data = request.data.joinToHeap;
242 		foreach (name, value; request.headers)
243 			req.headers ~= [name, value];
244 		req.maxRedirects = 0;
245 
246 		auto resp = cachedReq(req);
247 		auto metadata = resp.metadata;
248 
249 		auto response = new HttpResponse;
250 		response.status = cast(HttpStatusCode)metadata.statusLine.code;
251 		response.statusMessage = metadata.statusLine.reason;
252 		foreach (name, values; metadata.headers)
253 			foreach (value; values)
254 				response.headers.add(name, value);
255 		response.data = [readData(resp.responsePath)];
256 		return response;
257 	}
258 }
259 
260 alias CachedCurlException = CachedCurlNetwork.CachedCurlException;
261 
262 static this()
263 {
264 	net = new CachedCurlNetwork();
265 }