1 /**
2  * ae.sys.net implementation for HTTP using Curl,
3  * with caching and cookie support
4  *
5  * License:
6  *   This Source Code Form is subject to the terms of
7  *   the Mozilla Public License, v. 2.0. If a copy of
8  *   the MPL was not distributed with this file, You
9  *   can obtain one at http://mozilla.org/MPL/2.0/.
10  *
11  * Authors:
12  *   Vladimir Panteleev <ae@cy.md>
13  */
14 
15 module ae.sys.net.cachedcurl;
16 
17 // TODO: refactor into an abstract Cached!Network wrapper?
18 
19 import std.algorithm.comparison;
20 import std.conv;
21 import std.exception;
22 import std.file;
23 import std.net.curl;
24 import std.path;
25 import std.string;
26 import std.typecons;
27 
28 import ae.net.http.common;
29 import ae.net.ietf.url;
30 import ae.sys.dataio;
31 import ae.sys.dataset;
32 import ae.sys.file;
33 import ae.sys.net;
34 import ae.utils.digest;
35 import ae.utils.json;
36 import ae.utils.time;
37 
38 /// libcurl-based implementation of `Network` which caches responses.
39 /// Allows quickly re-running some deterministic process without redownloading all URLs.
40 class CachedCurlNetwork : Network
41 {
42 	/// Curl HTTP object
43 	/// Can be customized after construction.
44 	HTTP http;
45 
46 	/// Directory for caching responses
47 	string cacheDir = "cache";
48 
49 	/// Ignore cache entries older than the given time
50 	StdTime epoch = 0;
51 
52 	/// Directory for reading cookies.
53 	/// May be moved to a lambda in the future.
54 	/// Format is one file per host, with hostname ~ cookieExt being the file name.
55 	/// Contents is one line for the entire HTTP "Cookie" header.
56 	string cookieDir, cookieExt;
57 
58 	this()
59 	{
60 		http = HTTP();
61 	} ///
62 
63 	/// Response metadata.
64 	static struct Metadata
65 	{
66 		HTTP.StatusLine statusLine; /// HTTP status line.
67 		string[][string] headers; /// HTTP response headers.
68 	}
69 
70 	static struct Request
71 	{
72 		string url; ///
73 		HTTP.Method method = HTTP.Method.get; ///
74 		const(void)[] data; ///
75 		const(string[2])[] headers; ///
76 
77 		/// Maximum number of redirects to follow.
78 		/// By default, choose a number appropriate to the method.
79 		int maxRedirects = int.min;
80 	} ///
81 
82 	/*private*/ static void _req(CachedCurlNetwork instance, ref const Request request, string target, string metadataPath)
83 	{
84 		with (instance)
85 		{
86 			http.clearRequestHeaders();
87 			http.method = request.method;
88 			if (request.maxRedirects != int.min)
89 				http.maxRedirects = request.maxRedirects;
90 			else
91 			if (request.method == HTTP.Method.head)
92 				http.maxRedirects = uint.max;
93 			else
94 				http.maxRedirects = 10;
95 			auto host = request.url.split("/")[2];
96 			if (cookieDir)
97 			{
98 				auto cookiePath = buildPath(cookieDir, host ~ cookieExt);
99 				if (cookiePath.exists)
100 					http.addRequestHeader("Cookie", cookiePath.readText.chomp());
101 			}
102 			foreach (header; request.headers)
103 				http.addRequestHeader(header[0], header[1]);
104 			Metadata metadata;
105 			http.onReceiveHeader =
106 				(in char[] key, in char[] value)
107 				{
108 					metadata.headers[key.idup] ~= value.idup;
109 				};
110 			http.onReceiveStatusLine =
111 				(HTTP.StatusLine statusLine)
112 				{
113 					metadata.statusLine = statusLine;
114 				};
115 			if (request.data)
116 			{
117 				const(void)[] data = request.data;
118 				http.addRequestHeader("Content-Length", data.length.text);
119 				http.onSend = (void[] buf)
120 					{
121 						size_t len = min(buf.length, data.length);
122 						buf[0..len] = data[0..len];
123 						data = data[len..$];
124 						return len;
125 					};
126 			}
127 			else
128 				http.onSend = null;
129 			download!HTTP(request.url, target, http);
130 			write(metadataPath, metadata.toJson);
131 		}
132 	}
133 
134 	static struct Response
135 	{
136 		string responsePath; /// Path to response data.
137 		string metadataPath; /// Path to response metadata.
138 
139 		/// Returns the response data, if it was successful.
140 		@property ubyte[] responseData()
141 		{
142 			checkOK();
143 			return cast(ubyte[])std.file.read(responsePath);
144 		}
145 
146 		/// Returns the response metadata.
147 		@property Metadata metadata()
148 		{
149 			return metadataPath.exists ? metadataPath.readText.jsonParse!Metadata : Metadata.init;
150 		}
151 
152 		/// Check if the response succeeded.
153 		@property bool ok()
154 		{
155 			return metadata.statusLine.code / 100 == 2;
156 		}
157 
158 		/// Check if the response succeeded, and throws an error if not.
159 		ref Response checkOK() return
160 		{
161 			if (!ok)
162 				throw new CachedCurlException(metadata);
163 			return this;
164 		}
165 	} ///
166 
167 	/// Exception thrown for failed requests (server errors).
168 	static class CachedCurlException : Exception
169 	{
170 		Metadata metadata; ///
171 
172 		private this(Metadata metadata, string fn = __FILE__, size_t ln = __LINE__)
173 		{
174 			this.metadata = metadata;
175 			super("Request failed: " ~ metadata.statusLine.reason, fn, ln);
176 		}
177 	}
178 
179 	/// Perform a raw request and return information about the resulting cached response.
180 	Response cachedReq(ref const Request request)
181 	{
182 		auto hash = getDigestString!MD5(request.url ~ cast(char)request.method ~ request.data);
183 		auto path = buildPath(cacheDir, hash[0..2], hash);
184 		ensurePathExists(path);
185 		auto metadataPath = path ~ ".metadata";
186 		if (path.exists && path.timeLastModified.stdTime < epoch)
187 			path.remove();
188 		cached!_req(this, request, path, metadataPath);
189 		return Response(path, metadataPath);
190 	}
191 
192 	/// ditto
193 	Response cachedReq(string url, HTTP.Method method, in void[] data = null)
194 	{
195 		auto req = Request(url, method, data);
196 		return cachedReq(req);
197 	}
198 
199 	string downloadFile(string url)
200 	{
201 		return cachedReq(url, HTTP.Method.get).checkOK.responsePath;
202 	} /// Download a file and return the response path.
203 
204 	override void downloadFile(string url, string target)
205 	{
206 		std.file.copy(downloadFile(url), target);
207 	} ///
208 
209 	override void[] getFile(string url)
210 	{
211 		return cachedReq(url, HTTP.Method.get).responseData;
212 	} ///
213 
214 	override bool urlOK(string url)
215 	{
216 		return cachedReq(url, HTTP.Method.get).ok;
217 	} ///
218 
219 	override string resolveRedirect(string url)
220 	{
221 		return
222 			url.applyRelativeURL(
223 				cachedReq(url, HTTP.Method.head, null)
224 				.metadata
225 				.headers
226 				.get("location", null)
227 				.enforce("Not a redirect: " ~ url)
228 				[$-1]);
229 	} ///
230 
231 	override void[] post(string url, in void[] data)
232 	{
233 		return cachedReq(url, HTTP.Method.post, data).responseData;
234 	} ///
235 
236 	override HttpResponse httpRequest(HttpRequest request)
237 	{
238 		Request req;
239 		req.url = request.url;
240 		switch (request.method.toUpper)
241 		{
242 			case "HEAD"   : req.method = HTTP.Method.head; break;
243 			case "GET"    : req.method = HTTP.Method.get; break;
244 			case "POST"   : req.method = HTTP.Method.post; break;
245 			case "PUT"    : req.method = HTTP.Method.put; break;
246 			case "DEL"    : req.method = HTTP.Method.del; break;
247 			case "OPTIONS": req.method = HTTP.Method.options; break;
248 			case "TRACE"  : req.method = HTTP.Method.trace; break;
249 			case "CONNECT": req.method = HTTP.Method.connect; break;
250 			case "PATCH"  : req.method = HTTP.Method.patch; break;
251 			default: throw new Exception("Unknown HTTP method: " ~ request.method);
252 		}
253 		req.data = request.data.joinToHeap;
254 		foreach (name, value; request.headers)
255 			req.headers ~= [name, value];
256 		req.maxRedirects = uint.max; // Do not follow redirects, return them as-is
257 
258 		auto resp = cachedReq(req);
259 		auto metadata = resp.metadata;
260 
261 		auto response = new HttpResponse;
262 		response.status = cast(HttpStatusCode)metadata.statusLine.code;
263 		response.statusMessage = metadata.statusLine.reason;
264 		foreach (name, values; metadata.headers)
265 			foreach (value; values)
266 				response.headers.add(name, value);
267 		response.data = DataVec(readData(resp.responsePath));
268 		return response;
269 	} ///
270 }
271 
272 alias CachedCurlException = CachedCurlNetwork.CachedCurlException; ///
273 
274 static this()
275 {
276 	net = new CachedCurlNetwork();
277 }