1 /** 2 * ae.sys.net implementation for HTTP using Curl, 3 * with caching and cookie support 4 * 5 * License: 6 * This Source Code Form is subject to the terms of 7 * the Mozilla Public License, v. 2.0. If a copy of 8 * the MPL was not distributed with this file, You 9 * can obtain one at http://mozilla.org/MPL/2.0/. 10 * 11 * Authors: 12 * Vladimir Panteleev <ae@cy.md> 13 */ 14 15 module ae.sys.net.cachedcurl; 16 17 // TODO: refactor into an abstract Cached!Network wrapper? 18 19 import std.algorithm.comparison; 20 import std.conv; 21 import std.exception; 22 import std.file; 23 import std.net.curl; 24 import std.path; 25 import std.string; 26 import std.typecons; 27 28 import ae.net.http.common; 29 import ae.net.ietf.url; 30 import ae.sys.dataio; 31 import ae.sys.dataset; 32 import ae.sys.file; 33 import ae.sys.net; 34 import ae.utils.digest; 35 import ae.utils.json; 36 import ae.utils.time; 37 38 /// libcurl-based implementation of `Network` which caches responses. 39 /// Allows quickly re-running some deterministic process without redownloading all URLs. 40 class CachedCurlNetwork : Network 41 { 42 /// Curl HTTP object 43 /// Can be customized after construction. 44 HTTP http; 45 46 /// Directory for caching responses 47 string cacheDir = "cache"; 48 49 /// Ignore cache entries older than the given time 50 StdTime epoch = 0; 51 52 /// Directory for reading cookies. 53 /// May be moved to a lambda in the future. 54 /// Format is one file per host, with hostname ~ cookieExt being the file name. 55 /// Contents is one line for the entire HTTP "Cookie" header. 56 string cookieDir, cookieExt; 57 58 this() 59 { 60 http = HTTP(); 61 } /// 62 63 /// Response metadata. 64 static struct Metadata 65 { 66 HTTP.StatusLine statusLine; /// HTTP status line. 67 string[][string] headers; /// HTTP response headers. 68 } 69 70 static struct Request 71 { 72 string url; /// 73 HTTP.Method method = HTTP.Method.get; /// 74 const(void)[] data; /// 75 const(string[2])[] headers; /// 76 77 /// Maximum number of redirects to follow. 78 /// By default, choose a number appropriate to the method. 79 int maxRedirects = int.min; 80 } /// 81 82 /*private*/ static void _req(CachedCurlNetwork instance, ref const Request request, string target, string metadataPath) 83 { 84 with (instance) 85 { 86 http.clearRequestHeaders(); 87 http.method = request.method; 88 if (request.maxRedirects != int.min) 89 http.maxRedirects = request.maxRedirects; 90 else 91 if (request.method == HTTP.Method.head) 92 http.maxRedirects = uint.max; 93 else 94 http.maxRedirects = 10; 95 auto host = request.url.split("/")[2]; 96 if (cookieDir) 97 { 98 auto cookiePath = buildPath(cookieDir, host ~ cookieExt); 99 if (cookiePath.exists) 100 http.addRequestHeader("Cookie", cookiePath.readText.chomp()); 101 } 102 foreach (header; request.headers) 103 http.addRequestHeader(header[0], header[1]); 104 Metadata metadata; 105 http.onReceiveHeader = 106 (in char[] key, in char[] value) 107 { 108 metadata.headers[key.idup] ~= value.idup; 109 }; 110 http.onReceiveStatusLine = 111 (HTTP.StatusLine statusLine) 112 { 113 metadata.statusLine = statusLine; 114 }; 115 if (request.data) 116 { 117 const(void)[] data = request.data; 118 http.addRequestHeader("Content-Length", data.length.text); 119 http.onSend = (void[] buf) 120 { 121 size_t len = min(buf.length, data.length); 122 buf[0..len] = data[0..len]; 123 data = data[len..$]; 124 return len; 125 }; 126 } 127 else 128 http.onSend = null; 129 download!HTTP(request.url, target, http); 130 write(metadataPath, metadata.toJson); 131 } 132 } 133 134 static struct Response 135 { 136 string responsePath; /// Path to response data. 137 string metadataPath; /// Path to response metadata. 138 139 /// Returns the response data, if it was successful. 140 @property ubyte[] responseData() 141 { 142 checkOK(); 143 return cast(ubyte[])std.file.read(responsePath); 144 } 145 146 /// Returns the response metadata. 147 @property Metadata metadata() 148 { 149 return metadataPath.exists ? metadataPath.readText.jsonParse!Metadata : Metadata.init; 150 } 151 152 /// Check if the response succeeded. 153 @property bool ok() 154 { 155 return metadata.statusLine.code / 100 == 2; 156 } 157 158 /// Check if the response succeeded, and throws an error if not. 159 ref Response checkOK() return 160 { 161 if (!ok) 162 throw new CachedCurlException(metadata); 163 return this; 164 } 165 } /// 166 167 /// Exception thrown for failed requests (server errors). 168 static class CachedCurlException : Exception 169 { 170 Metadata metadata; /// 171 172 private this(Metadata metadata, string fn = __FILE__, size_t ln = __LINE__) 173 { 174 this.metadata = metadata; 175 super("Request failed: " ~ metadata.statusLine.reason, fn, ln); 176 } 177 } 178 179 /// Perform a raw request and return information about the resulting cached response. 180 Response cachedReq(ref const Request request) 181 { 182 auto hash = getDigestString!MD5(request.url ~ cast(char)request.method ~ request.data); 183 auto path = buildPath(cacheDir, hash[0..2], hash); 184 ensurePathExists(path); 185 auto metadataPath = path ~ ".metadata"; 186 if (path.exists && path.timeLastModified.stdTime < epoch) 187 path.remove(); 188 cached!_req(this, request, path, metadataPath); 189 return Response(path, metadataPath); 190 } 191 192 /// ditto 193 Response cachedReq(string url, HTTP.Method method, in void[] data = null) 194 { 195 auto req = Request(url, method, data); 196 return cachedReq(req); 197 } 198 199 string downloadFile(string url) 200 { 201 return cachedReq(url, HTTP.Method.get).checkOK.responsePath; 202 } /// Download a file and return the response path. 203 204 override void downloadFile(string url, string target) 205 { 206 std.file.copy(downloadFile(url), target); 207 } /// 208 209 override void[] getFile(string url) 210 { 211 return cachedReq(url, HTTP.Method.get).responseData; 212 } /// 213 214 override bool urlOK(string url) 215 { 216 return cachedReq(url, HTTP.Method.get).ok; 217 } /// 218 219 override string resolveRedirect(string url) 220 { 221 return 222 url.applyRelativeURL( 223 cachedReq(url, HTTP.Method.head, null) 224 .metadata 225 .headers 226 .get("location", null) 227 .enforce("Not a redirect: " ~ url) 228 [$-1]); 229 } /// 230 231 override void[] post(string url, in void[] data) 232 { 233 return cachedReq(url, HTTP.Method.post, data).responseData; 234 } /// 235 236 override HttpResponse httpRequest(HttpRequest request) 237 { 238 Request req; 239 req.url = request.url; 240 switch (request.method.toUpper) 241 { 242 case "HEAD" : req.method = HTTP.Method.head; break; 243 case "GET" : req.method = HTTP.Method.get; break; 244 case "POST" : req.method = HTTP.Method.post; break; 245 case "PUT" : req.method = HTTP.Method.put; break; 246 case "DEL" : req.method = HTTP.Method.del; break; 247 case "OPTIONS": req.method = HTTP.Method.options; break; 248 case "TRACE" : req.method = HTTP.Method.trace; break; 249 case "CONNECT": req.method = HTTP.Method.connect; break; 250 case "PATCH" : req.method = HTTP.Method.patch; break; 251 default: throw new Exception("Unknown HTTP method: " ~ request.method); 252 } 253 req.data = request.data.joinToHeap; 254 foreach (name, value; request.headers) 255 req.headers ~= [name, value]; 256 req.maxRedirects = uint.max; // Do not follow redirects, return them as-is 257 258 auto resp = cachedReq(req); 259 auto metadata = resp.metadata; 260 261 auto response = new HttpResponse; 262 response.status = cast(HttpStatusCode)metadata.statusLine.code; 263 response.statusMessage = metadata.statusLine.reason; 264 foreach (name, values; metadata.headers) 265 foreach (value; values) 266 response.headers.add(name, value); 267 response.data = DataVec(readData(resp.responsePath)); 268 return response; 269 } /// 270 } 271 272 alias CachedCurlException = CachedCurlNetwork.CachedCurlException; /// 273 274 static this() 275 { 276 net = new CachedCurlNetwork(); 277 }