1 /**
2  * ae.sys.net implementation for HTTP using Curl,
3  * with caching and cookie support
4  *
5  * License:
6  *   This Source Code Form is subject to the terms of
7  *   the Mozilla Public License, v. 2.0. If a copy of
8  *   the MPL was not distributed with this file, You
9  *   can obtain one at http://mozilla.org/MPL/2.0/.
10  *
11  * Authors:
12  *   Vladimir Panteleev <ae@cy.md>
13  */
14 
15 module ae.sys.net.cachedcurl;
16 
17 // TODO: refactor into an abstract Cached!Network wrapper?
18 
19 import std.algorithm.comparison;
20 import std.conv;
21 import std.exception;
22 import std.file;
23 import std.net.curl;
24 import std.path;
25 import std.string;
26 import std.typecons;
27 
28 import ae.net.http.common;
29 import ae.net.ietf.url;
30 import ae.sys.dataio;
31 import ae.sys.dataset;
32 import ae.sys.file;
33 import ae.sys.net;
34 import ae.utils.array;
35 import ae.utils.digest;
36 import ae.utils.json;
37 import ae.utils.time;
38 
39 /// libcurl-based implementation of `Network` which caches responses.
40 /// Allows quickly re-running some deterministic process without redownloading all URLs.
41 class CachedCurlNetwork : Network
42 {
43 	/// Curl HTTP object
44 	/// Can be customized after construction.
45 	HTTP http;
46 
47 	/// Directory for caching responses
48 	string cacheDir = "cache";
49 
50 	/// Ignore cache entries older than the given time
51 	StdTime epoch = 0;
52 
53 	/// Directory for reading cookies.
54 	/// May be moved to a lambda in the future.
55 	/// Format is one file per host, with hostname ~ cookieExt being the file name.
56 	/// Contents is one line for the entire HTTP "Cookie" header.
57 	string cookieDir, cookieExt;
58 
59 	this()
60 	{
61 		http = HTTP();
62 	} ///
63 
64 	/// Response metadata.
65 	static struct Metadata
66 	{
67 		HTTP.StatusLine statusLine; /// HTTP status line.
68 		string[][string] headers; /// HTTP response headers.
69 	}
70 
71 	static struct Request
72 	{
73 		string url; ///
74 		HTTP.Method method = HTTP.Method.get; ///
75 		const(ubyte)[] data; ///
76 		const(string[2])[] headers; ///
77 
78 		/// Maximum number of redirects to follow.
79 		/// By default, choose a number appropriate to the method.
80 		int maxRedirects = int.min;
81 	} ///
82 
83 	/*private*/ static void _req(CachedCurlNetwork instance, ref const Request request, string target, string metadataPath)
84 	{
85 		with (instance)
86 		{
87 			http.clearRequestHeaders();
88 			http.method = request.method;
89 			if (request.maxRedirects != int.min)
90 				http.maxRedirects = request.maxRedirects;
91 			else
92 			if (request.method == HTTP.Method.head)
93 				http.maxRedirects = uint.max;
94 			else
95 				http.maxRedirects = 10;
96 			auto host = request.url.split("/")[2];
97 			if (cookieDir)
98 			{
99 				auto cookiePath = buildPath(cookieDir, host ~ cookieExt);
100 				if (cookiePath.exists)
101 					http.addRequestHeader("Cookie", cookiePath.readText.chomp());
102 			}
103 			foreach (header; request.headers)
104 				http.addRequestHeader(header[0], header[1]);
105 			Metadata metadata;
106 			http.onReceiveHeader =
107 				(in char[] key, in char[] value)
108 				{
109 					metadata.headers[key.idup] ~= value.idup;
110 				};
111 			http.onReceiveStatusLine =
112 				(HTTP.StatusLine statusLine)
113 				{
114 					metadata.statusLine = statusLine;
115 				};
116 			if (request.data)
117 			{
118 				const(ubyte)[] data = request.data;
119 				http.addRequestHeader("Content-Length", data.length.text);
120 				http.onSend = (void[] voidBuf)
121 					{
122 						auto buf = cast(ubyte[])voidBuf;
123 						size_t len = min(buf.length, data.length);
124 						buf[0..len] = data[0..len];
125 						data = data[len..$];
126 						return len;
127 					};
128 			}
129 			else
130 				http.onSend = null;
131 			download!HTTP(request.url, target, http);
132 			write(metadataPath, metadata.toJson);
133 		}
134 	}
135 
136 	static struct Response
137 	{
138 		string responsePath; /// Path to response data.
139 		string metadataPath; /// Path to response metadata.
140 
141 		/// Returns the response data, if it was successful.
142 		@property ubyte[] responseData()
143 		{
144 			checkOK();
145 			return cast(ubyte[])std.file.read(responsePath);
146 		}
147 
148 		/// Returns the response metadata.
149 		@property Metadata metadata()
150 		{
151 			return metadataPath.exists ? metadataPath.readText.jsonParse!Metadata : Metadata.init;
152 		}
153 
154 		/// Check if the response succeeded.
155 		@property bool ok()
156 		{
157 			return metadata.statusLine.code / 100 == 2;
158 		}
159 
160 		/// Check if the response succeeded, and throws an error if not.
161 		ref Response checkOK() return
162 		{
163 			if (!ok)
164 				throw new CachedCurlException(metadata);
165 			return this;
166 		}
167 	} ///
168 
169 	/// Exception thrown for failed requests (server errors).
170 	static class CachedCurlException : Exception
171 	{
172 		Metadata metadata; ///
173 
174 		private this(Metadata metadata, string fn = __FILE__, size_t ln = __LINE__)
175 		{
176 			this.metadata = metadata;
177 			super("Request failed: " ~ metadata.statusLine.reason, fn, ln);
178 		}
179 	}
180 
181 	/// Perform a raw request and return information about the resulting cached response.
182 	Response cachedReq(ref const Request request)
183 	{
184 		auto hash = getDigestString!MD5(request.url.asBytes ~ cast(char)request.method ~ request.data);
185 		auto path = buildPath(cacheDir, hash[0..2], hash);
186 		ensurePathExists(path);
187 		auto metadataPath = path ~ ".metadata";
188 		if (path.exists && path.timeLastModified.stdTime < epoch)
189 			path.remove();
190 		cached!_req(this, request, path, metadataPath);
191 		return Response(path, metadataPath);
192 	}
193 
194 	/// ditto
195 	Response cachedReq(string url, HTTP.Method method, const(ubyte)[] data = null)
196 	{
197 		auto req = Request(url, method, data);
198 		return cachedReq(req);
199 	}
200 
201 	string downloadFile(string url)
202 	{
203 		return cachedReq(url, HTTP.Method.get).checkOK.responsePath;
204 	} /// Download a file and return the response path.
205 
206 	override void downloadFile(string url, string target)
207 	{
208 		std.file.copy(downloadFile(url), target);
209 	} ///
210 
211 	override ubyte[] getFile(string url)
212 	{
213 		return cachedReq(url, HTTP.Method.get).responseData;
214 	} ///
215 
216 	override bool urlOK(string url)
217 	{
218 		return cachedReq(url, HTTP.Method.get).ok;
219 	} ///
220 
221 	override string resolveRedirect(string url)
222 	{
223 		return
224 			url.applyRelativeURL(
225 				cachedReq(url, HTTP.Method.head, null)
226 				.metadata
227 				.headers
228 				.get("location", null)
229 				.enforce("Not a redirect: " ~ url)
230 				[$-1]);
231 	} ///
232 
233 	override ubyte[] post(string url, const(ubyte)[] data)
234 	{
235 		return cachedReq(url, HTTP.Method.post, data).responseData;
236 	} ///
237 
238 	override HttpResponse httpRequest(HttpRequest request)
239 	{
240 		Request req;
241 		req.url = request.url;
242 		switch (request.method.toUpper)
243 		{
244 			case "HEAD"   : req.method = HTTP.Method.head; break;
245 			case "GET"    : req.method = HTTP.Method.get; break;
246 			case "POST"   : req.method = HTTP.Method.post; break;
247 			case "PUT"    : req.method = HTTP.Method.put; break;
248 			case "DEL"    : req.method = HTTP.Method.del; break;
249 			case "OPTIONS": req.method = HTTP.Method.options; break;
250 			case "TRACE"  : req.method = HTTP.Method.trace; break;
251 			case "CONNECT": req.method = HTTP.Method.connect; break;
252 			case "PATCH"  : req.method = HTTP.Method.patch; break;
253 			default: throw new Exception("Unknown HTTP method: " ~ request.method);
254 		}
255 		req.data = request.data.joinToGC();
256 		foreach (name, value; request.headers)
257 			req.headers ~= [name, value];
258 		req.maxRedirects = uint.max; // Do not follow redirects, return them as-is
259 
260 		auto resp = cachedReq(req);
261 		auto metadata = resp.metadata;
262 
263 		auto response = new HttpResponse;
264 		response.status = cast(HttpStatusCode)metadata.statusLine.code;
265 		response.statusMessage = metadata.statusLine.reason;
266 		foreach (name, values; metadata.headers)
267 			foreach (value; values)
268 				response.headers.add(name, value);
269 		response.data = DataVec(readData(resp.responsePath));
270 		return response;
271 	} ///
272 }
273 
274 alias CachedCurlException = CachedCurlNetwork.CachedCurlException; ///
275 
276 static this()
277 {
278 	net = new CachedCurlNetwork();
279 }