1 /**
2  * ae.sys.net implementation for HTTP using Curl,
3  * with caching and cookie support
4  *
5  * License:
6  *   This Source Code Form is subject to the terms of
7  *   the Mozilla Public License, v. 2.0. If a copy of
8  *   the MPL was not distributed with this file, You
9  *   can obtain one at http://mozilla.org/MPL/2.0/.
10  *
11  * Authors:
12  *   Vladimir Panteleev <ae@cy.md>
13  */
14 
15 module ae.sys.net.cachedcurl;
16 
17 // TODO: refactor into an abstract Cached!Network wrapper?
18 
19 import std.algorithm.comparison;
20 import std.algorithm.iteration;
21 import std.conv;
22 import std.exception;
23 import std.file;
24 import std.net.curl;
25 import std.path;
26 import std.string;
27 import std.typecons;
28 
29 import ae.net.http.common;
30 import ae.net.ietf.url;
31 import ae.sys.dataio;
32 import ae.sys.dataset;
33 import ae.sys.file;
34 import ae.sys.net;
35 import ae.utils.array;
36 import ae.utils.digest;
37 import ae.utils.json;
38 import ae.utils.time;
39 
40 /// libcurl-based implementation of `Network` which caches responses.
41 /// Allows quickly re-running some deterministic process without redownloading all URLs.
42 class CachedCurlNetwork : Network
43 {
44 	/// Curl HTTP object
45 	/// Can be customized after construction.
46 	HTTP http;
47 
48 	/// Directory for caching responses
49 	string cacheDir = "cache";
50 
51 	/// Ignore cache entries older than the given time
52 	StdTime epoch = 0;
53 
54 	/// Directory for reading cookies.
55 	/// May be moved to a lambda in the future.
56 	/// Format is one file per host, with hostname ~ cookieExt being the file name.
57 	/// Contents is one line for the entire HTTP "Cookie" header.
58 	string cookieDir, cookieExt;
59 
60 	this()
61 	{
62 		http = HTTP();
63 	} ///
64 
65 	/// Response metadata.
66 	static struct Metadata
67 	{
68 		HTTP.StatusLine statusLine; /// HTTP status line.
69 		string[][string] headers; /// HTTP response headers.
70 	}
71 
72 	static struct Request
73 	{
74 		string url; ///
75 		HTTP.Method method = HTTP.Method.get; ///
76 		const(ubyte)[] data; ///
77 		const(string[2])[] headers; ///
78 
79 		/// Maximum number of redirects to follow.
80 		/// By default, choose a number appropriate to the method.
81 		int maxRedirects = int.min;
82 	} ///
83 
84 	/*private*/ static void _req(CachedCurlNetwork instance, ref const Request request, string target, string metadataPath)
85 	{
86 		with (instance)
87 		{
88 			http.clearRequestHeaders();
89 			http.method = request.method;
90 			if (request.maxRedirects != int.min)
91 				http.maxRedirects = request.maxRedirects;
92 			else
93 			if (request.method == HTTP.Method.head)
94 				http.maxRedirects = uint.max;
95 			else
96 				http.maxRedirects = 10;
97 			auto host = request.url.split("/")[2];
98 			if (cookieDir)
99 			{
100 				auto cookiePath = buildPath(cookieDir, host ~ cookieExt);
101 				if (cookiePath.exists)
102 					http.addRequestHeader("Cookie", cookiePath.readText.chomp());
103 			}
104 			foreach (header; request.headers)
105 				http.addRequestHeader(header[0], header[1]);
106 			Metadata metadata;
107 			http.onReceiveHeader =
108 				(in char[] key, in char[] value)
109 				{
110 					metadata.headers[key.idup] ~= value.idup;
111 				};
112 			http.onReceiveStatusLine =
113 				(HTTP.StatusLine statusLine)
114 				{
115 					metadata.statusLine = statusLine;
116 				};
117 			if (request.data)
118 			{
119 				const(ubyte)[] data = request.data;
120 				http.addRequestHeader("Content-Length", data.length.text);
121 				http.onSend = (void[] voidBuf)
122 					{
123 						auto buf = cast(ubyte[])voidBuf;
124 						size_t len = min(buf.length, data.length);
125 						buf[0..len] = data[0..len];
126 						data = data[len..$];
127 						return len;
128 					};
129 			}
130 			else
131 				http.onSend = null;
132 			download!HTTP(request.url, target, http);
133 			write(metadataPath, metadata.toJson);
134 		}
135 	}
136 
137 	static struct Response
138 	{
139 		string responsePath; /// Path to response data.
140 		string metadataPath; /// Path to response metadata.
141 
142 		/// Returns the response data, if it was successful.
143 		@property ubyte[] responseData()
144 		{
145 			checkOK();
146 			return cast(ubyte[])std.file.read(responsePath);
147 		}
148 
149 		/// Returns the response metadata.
150 		@property Metadata metadata()
151 		{
152 			return metadataPath.exists ? metadataPath.readText.jsonParse!Metadata : Metadata.init;
153 		}
154 
155 		/// Check if the response succeeded.
156 		@property bool ok()
157 		{
158 			return metadata.statusLine.code / 100 == 2;
159 		}
160 
161 		/// Check if the response succeeded, and throws an error if not.
162 		ref Response checkOK() return
163 		{
164 			if (!ok)
165 				throw new CachedCurlException(metadata);
166 			return this;
167 		}
168 	} ///
169 
170 	/// Exception thrown for failed requests (server errors).
171 	static class CachedCurlException : Exception
172 	{
173 		Metadata metadata; ///
174 
175 		private this(Metadata metadata, string fn = __FILE__, size_t ln = __LINE__)
176 		{
177 			this.metadata = metadata;
178 			super("Request failed: " ~ metadata.statusLine.reason, fn, ln);
179 		}
180 	}
181 
182 	/// Perform a raw request and return information about the resulting cached response.
183 	Response cachedReq(ref const Request request)
184 	{
185 		auto hash = getDigestString!MD5(request.url.asBytes ~ cast(char)request.method ~ request.data ~ request.headers.map!(pair => pair[0].asBytes ~ pair[1].asBytes).join);
186 		auto path = buildPath(cacheDir, hash[0..2], hash);
187 		ensurePathExists(path);
188 		auto metadataPath = path ~ ".metadata";
189 		if (path.exists && path.timeLastModified.stdTime < epoch)
190 			path.remove();
191 		cached!_req(this, request, path, metadataPath);
192 		return Response(path, metadataPath);
193 	}
194 
195 	/// ditto
196 	Response cachedReq(string url, HTTP.Method method, const(ubyte)[] data = null)
197 	{
198 		auto req = Request(url, method, data);
199 		return cachedReq(req);
200 	}
201 
202 	string downloadFile(string url)
203 	{
204 		return cachedReq(url, HTTP.Method.get).checkOK.responsePath;
205 	} /// Download a file and return the response path.
206 
207 	override void downloadFile(string url, string target)
208 	{
209 		std.file.copy(downloadFile(url), target);
210 	} ///
211 
212 	override ubyte[] getFile(string url)
213 	{
214 		return cachedReq(url, HTTP.Method.get).responseData;
215 	} ///
216 
217 	override bool urlOK(string url)
218 	{
219 		return cachedReq(url, HTTP.Method.get).ok;
220 	} ///
221 
222 	override string resolveRedirect(string url)
223 	{
224 		return
225 			url.applyRelativeURL(
226 				cachedReq(url, HTTP.Method.head, null)
227 				.metadata
228 				.headers
229 				.get("location", null)
230 				.enforce("Not a redirect: " ~ url)
231 				[$-1]);
232 	} ///
233 
234 	override ubyte[] post(string url, const(ubyte)[] data)
235 	{
236 		return cachedReq(url, HTTP.Method.post, data).responseData;
237 	} ///
238 
239 	override HttpResponse httpRequest(HttpRequest request)
240 	{
241 		Request req;
242 		req.url = request.url;
243 		switch (request.method.toUpper)
244 		{
245 			case "HEAD"   : req.method = HTTP.Method.head; break;
246 			case "GET"    : req.method = HTTP.Method.get; break;
247 			case "POST"   : req.method = HTTP.Method.post; break;
248 			case "PUT"    : req.method = HTTP.Method.put; break;
249 			case "DEL"    : req.method = HTTP.Method.del; break;
250 			case "OPTIONS": req.method = HTTP.Method.options; break;
251 			case "TRACE"  : req.method = HTTP.Method.trace; break;
252 			case "CONNECT": req.method = HTTP.Method.connect; break;
253 			case "PATCH"  : req.method = HTTP.Method.patch; break;
254 			default: throw new Exception("Unknown HTTP method: " ~ request.method);
255 		}
256 		req.data = request.data.joinToGC();
257 		foreach (name, value; request.headers)
258 			req.headers ~= [name, value];
259 		req.maxRedirects = uint.max; // Do not follow redirects, return them as-is
260 
261 		auto resp = cachedReq(req);
262 		auto metadata = resp.metadata;
263 
264 		auto response = new HttpResponse;
265 		response.status = cast(HttpStatusCode)metadata.statusLine.code;
266 		response.statusMessage = metadata.statusLine.reason;
267 		foreach (name, values; metadata.headers)
268 			foreach (value; values)
269 				response.headers.add(name, value);
270 		response.data = DataVec(readData(resp.responsePath));
271 		return response;
272 	} ///
273 }
274 
275 alias CachedCurlException = CachedCurlNetwork.CachedCurlException; ///
276 
277 static this()
278 {
279 	net = new CachedCurlNetwork();
280 }