1 /** 2 * ae.sys.net implementation for HTTP using Curl, 3 * with caching and cookie support 4 * 5 * License: 6 * This Source Code Form is subject to the terms of 7 * the Mozilla Public License, v. 2.0. If a copy of 8 * the MPL was not distributed with this file, You 9 * can obtain one at http://mozilla.org/MPL/2.0/. 10 * 11 * Authors: 12 * Vladimir Panteleev <ae@cy.md> 13 */ 14 15 module ae.sys.net.cachedcurl; 16 17 // TODO: refactor into an abstract Cached!Network wrapper? 18 19 import std.algorithm.comparison; 20 import std.algorithm.iteration; 21 import std.conv; 22 import std.exception; 23 import std.file; 24 import std.net.curl; 25 import std.path; 26 import std.string; 27 import std.typecons; 28 29 import ae.net.http.common; 30 import ae.net.ietf.url; 31 import ae.sys.dataio; 32 import ae.sys.dataset; 33 import ae.sys.file; 34 import ae.sys.net; 35 import ae.utils.array; 36 import ae.utils.digest; 37 import ae.utils.json; 38 import ae.utils.time; 39 40 /// libcurl-based implementation of `Network` which caches responses. 41 /// Allows quickly re-running some deterministic process without redownloading all URLs. 42 class CachedCurlNetwork : Network 43 { 44 /// Curl HTTP object 45 /// Can be customized after construction. 46 HTTP http; 47 48 /// Directory for caching responses 49 string cacheDir = "cache"; 50 51 /// Ignore cache entries older than the given time 52 StdTime epoch = 0; 53 54 /// Directory for reading cookies. 55 /// May be moved to a lambda in the future. 56 /// Format is one file per host, with hostname ~ cookieExt being the file name. 57 /// Contents is one line for the entire HTTP "Cookie" header. 58 string cookieDir, cookieExt; 59 60 this() 61 { 62 http = HTTP(); 63 } /// 64 65 /// Response metadata. 66 static struct Metadata 67 { 68 HTTP.StatusLine statusLine; /// HTTP status line. 69 string[][string] headers; /// HTTP response headers. 70 } 71 72 static struct Request 73 { 74 string url; /// 75 HTTP.Method method = HTTP.Method.get; /// 76 const(ubyte)[] data; /// 77 const(string[2])[] headers; /// 78 79 /// Maximum number of redirects to follow. 80 /// By default, choose a number appropriate to the method. 81 int maxRedirects = int.min; 82 } /// 83 84 /*private*/ static void _req(CachedCurlNetwork instance, ref const Request request, string target, string metadataPath) 85 { 86 with (instance) 87 { 88 http.clearRequestHeaders(); 89 http.method = request.method; 90 if (request.maxRedirects != int.min) 91 http.maxRedirects = request.maxRedirects; 92 else 93 if (request.method == HTTP.Method.head) 94 http.maxRedirects = uint.max; 95 else 96 http.maxRedirects = 10; 97 auto host = request.url.split("/")[2]; 98 if (cookieDir) 99 { 100 auto cookiePath = buildPath(cookieDir, host ~ cookieExt); 101 if (cookiePath.exists) 102 http.addRequestHeader("Cookie", cookiePath.readText.chomp()); 103 } 104 foreach (header; request.headers) 105 http.addRequestHeader(header[0], header[1]); 106 Metadata metadata; 107 http.onReceiveHeader = 108 (in char[] key, in char[] value) 109 { 110 metadata.headers[key.idup] ~= value.idup; 111 }; 112 http.onReceiveStatusLine = 113 (HTTP.StatusLine statusLine) 114 { 115 metadata.statusLine = statusLine; 116 }; 117 if (request.data) 118 { 119 const(ubyte)[] data = request.data; 120 http.addRequestHeader("Content-Length", data.length.text); 121 http.onSend = (void[] voidBuf) 122 { 123 auto buf = cast(ubyte[])voidBuf; 124 size_t len = min(buf.length, data.length); 125 buf[0..len] = data[0..len]; 126 data = data[len..$]; 127 return len; 128 }; 129 } 130 else 131 http.onSend = null; 132 download!HTTP(request.url, target, http); 133 write(metadataPath, metadata.toJson); 134 } 135 } 136 137 static struct Response 138 { 139 string responsePath; /// Path to response data. 140 string metadataPath; /// Path to response metadata. 141 142 /// Returns the response data, if it was successful. 143 @property ubyte[] responseData() 144 { 145 checkOK(); 146 return cast(ubyte[])std.file.read(responsePath); 147 } 148 149 /// Returns the response metadata. 150 @property Metadata metadata() 151 { 152 return metadataPath.exists ? metadataPath.readText.jsonParse!Metadata : Metadata.init; 153 } 154 155 /// Check if the response succeeded. 156 @property bool ok() 157 { 158 return metadata.statusLine.code / 100 == 2; 159 } 160 161 /// Check if the response succeeded, and throws an error if not. 162 ref Response checkOK() return 163 { 164 if (!ok) 165 throw new CachedCurlException(metadata); 166 return this; 167 } 168 } /// 169 170 /// Exception thrown for failed requests (server errors). 171 static class CachedCurlException : Exception 172 { 173 Metadata metadata; /// 174 175 private this(Metadata metadata, string fn = __FILE__, size_t ln = __LINE__) 176 { 177 this.metadata = metadata; 178 super("Request failed: " ~ metadata.statusLine.reason, fn, ln); 179 } 180 } 181 182 /// Perform a raw request and return information about the resulting cached response. 183 Response cachedReq(ref const Request request) 184 { 185 auto hash = getDigestString!MD5(request.url.asBytes ~ cast(char)request.method ~ request.data ~ request.headers.map!(pair => pair[0].asBytes ~ pair[1].asBytes).join); 186 auto path = buildPath(cacheDir, hash[0..2], hash); 187 ensurePathExists(path); 188 auto metadataPath = path ~ ".metadata"; 189 if (path.exists && path.timeLastModified.stdTime < epoch) 190 path.remove(); 191 cached!_req(this, request, path, metadataPath); 192 return Response(path, metadataPath); 193 } 194 195 /// ditto 196 Response cachedReq(string url, HTTP.Method method, const(ubyte)[] data = null) 197 { 198 auto req = Request(url, method, data); 199 return cachedReq(req); 200 } 201 202 string downloadFile(string url) 203 { 204 return cachedReq(url, HTTP.Method.get).checkOK.responsePath; 205 } /// Download a file and return the response path. 206 207 override void downloadFile(string url, string target) 208 { 209 std.file.copy(downloadFile(url), target); 210 } /// 211 212 override ubyte[] getFile(string url) 213 { 214 return cachedReq(url, HTTP.Method.get).responseData; 215 } /// 216 217 override bool urlOK(string url) 218 { 219 return cachedReq(url, HTTP.Method.get).ok; 220 } /// 221 222 override string resolveRedirect(string url) 223 { 224 return 225 url.applyRelativeURL( 226 cachedReq(url, HTTP.Method.head, null) 227 .metadata 228 .headers 229 .get("location", null) 230 .enforce("Not a redirect: " ~ url) 231 [$-1]); 232 } /// 233 234 override ubyte[] post(string url, const(ubyte)[] data) 235 { 236 return cachedReq(url, HTTP.Method.post, data).responseData; 237 } /// 238 239 override HttpResponse httpRequest(HttpRequest request) 240 { 241 Request req; 242 req.url = request.url; 243 switch (request.method.toUpper) 244 { 245 case "HEAD" : req.method = HTTP.Method.head; break; 246 case "GET" : req.method = HTTP.Method.get; break; 247 case "POST" : req.method = HTTP.Method.post; break; 248 case "PUT" : req.method = HTTP.Method.put; break; 249 case "DEL" : req.method = HTTP.Method.del; break; 250 case "OPTIONS": req.method = HTTP.Method.options; break; 251 case "TRACE" : req.method = HTTP.Method.trace; break; 252 case "CONNECT": req.method = HTTP.Method.connect; break; 253 case "PATCH" : req.method = HTTP.Method.patch; break; 254 default: throw new Exception("Unknown HTTP method: " ~ request.method); 255 } 256 req.data = request.data.joinToGC(); 257 foreach (name, value; request.headers) 258 req.headers ~= [name, value]; 259 req.maxRedirects = uint.max; // Do not follow redirects, return them as-is 260 261 auto resp = cachedReq(req); 262 auto metadata = resp.metadata; 263 264 auto response = new HttpResponse; 265 response.status = cast(HttpStatusCode)metadata.statusLine.code; 266 response.statusMessage = metadata.statusLine.reason; 267 foreach (name, values; metadata.headers) 268 foreach (value; values) 269 response.headers.add(name, value); 270 response.data = DataVec(readData(resp.responsePath)); 271 return response; 272 } /// 273 } 274 275 alias CachedCurlException = CachedCurlNetwork.CachedCurlException; /// 276 277 static this() 278 { 279 net = new CachedCurlNetwork(); 280 }