1 /** 2 * ae.sys.net implementation for HTTP using Curl, 3 * with caching and cookie support 4 * 5 * License: 6 * This Source Code Form is subject to the terms of 7 * the Mozilla Public License, v. 2.0. If a copy of 8 * the MPL was not distributed with this file, You 9 * can obtain one at http://mozilla.org/MPL/2.0/. 10 * 11 * Authors: 12 * Vladimir Panteleev <ae@cy.md> 13 */ 14 15 module ae.sys.net.cachedcurl; 16 17 // TODO: refactor into an abstract Cached!Network wrapper? 18 19 import std.algorithm.comparison; 20 import std.conv; 21 import std.exception; 22 import std.file; 23 import std.net.curl; 24 import std.path; 25 import std.string; 26 import std.typecons; 27 28 import ae.net.http.common; 29 import ae.net.ietf.url; 30 import ae.sys.dataio; 31 import ae.sys.dataset; 32 import ae.sys.file; 33 import ae.sys.net; 34 import ae.utils.array; 35 import ae.utils.digest; 36 import ae.utils.json; 37 import ae.utils.time; 38 39 /// libcurl-based implementation of `Network` which caches responses. 40 /// Allows quickly re-running some deterministic process without redownloading all URLs. 41 class CachedCurlNetwork : Network 42 { 43 /// Curl HTTP object 44 /// Can be customized after construction. 45 HTTP http; 46 47 /// Directory for caching responses 48 string cacheDir = "cache"; 49 50 /// Ignore cache entries older than the given time 51 StdTime epoch = 0; 52 53 /// Directory for reading cookies. 54 /// May be moved to a lambda in the future. 55 /// Format is one file per host, with hostname ~ cookieExt being the file name. 56 /// Contents is one line for the entire HTTP "Cookie" header. 57 string cookieDir, cookieExt; 58 59 this() 60 { 61 http = HTTP(); 62 } /// 63 64 /// Response metadata. 65 static struct Metadata 66 { 67 HTTP.StatusLine statusLine; /// HTTP status line. 68 string[][string] headers; /// HTTP response headers. 69 } 70 71 static struct Request 72 { 73 string url; /// 74 HTTP.Method method = HTTP.Method.get; /// 75 const(ubyte)[] data; /// 76 const(string[2])[] headers; /// 77 78 /// Maximum number of redirects to follow. 79 /// By default, choose a number appropriate to the method. 80 int maxRedirects = int.min; 81 } /// 82 83 /*private*/ static void _req(CachedCurlNetwork instance, ref const Request request, string target, string metadataPath) 84 { 85 with (instance) 86 { 87 http.clearRequestHeaders(); 88 http.method = request.method; 89 if (request.maxRedirects != int.min) 90 http.maxRedirects = request.maxRedirects; 91 else 92 if (request.method == HTTP.Method.head) 93 http.maxRedirects = uint.max; 94 else 95 http.maxRedirects = 10; 96 auto host = request.url.split("/")[2]; 97 if (cookieDir) 98 { 99 auto cookiePath = buildPath(cookieDir, host ~ cookieExt); 100 if (cookiePath.exists) 101 http.addRequestHeader("Cookie", cookiePath.readText.chomp()); 102 } 103 foreach (header; request.headers) 104 http.addRequestHeader(header[0], header[1]); 105 Metadata metadata; 106 http.onReceiveHeader = 107 (in char[] key, in char[] value) 108 { 109 metadata.headers[key.idup] ~= value.idup; 110 }; 111 http.onReceiveStatusLine = 112 (HTTP.StatusLine statusLine) 113 { 114 metadata.statusLine = statusLine; 115 }; 116 if (request.data) 117 { 118 const(ubyte)[] data = request.data; 119 http.addRequestHeader("Content-Length", data.length.text); 120 http.onSend = (void[] voidBuf) 121 { 122 auto buf = cast(ubyte[])voidBuf; 123 size_t len = min(buf.length, data.length); 124 buf[0..len] = data[0..len]; 125 data = data[len..$]; 126 return len; 127 }; 128 } 129 else 130 http.onSend = null; 131 download!HTTP(request.url, target, http); 132 write(metadataPath, metadata.toJson); 133 } 134 } 135 136 static struct Response 137 { 138 string responsePath; /// Path to response data. 139 string metadataPath; /// Path to response metadata. 140 141 /// Returns the response data, if it was successful. 142 @property ubyte[] responseData() 143 { 144 checkOK(); 145 return cast(ubyte[])std.file.read(responsePath); 146 } 147 148 /// Returns the response metadata. 149 @property Metadata metadata() 150 { 151 return metadataPath.exists ? metadataPath.readText.jsonParse!Metadata : Metadata.init; 152 } 153 154 /// Check if the response succeeded. 155 @property bool ok() 156 { 157 return metadata.statusLine.code / 100 == 2; 158 } 159 160 /// Check if the response succeeded, and throws an error if not. 161 ref Response checkOK() return 162 { 163 if (!ok) 164 throw new CachedCurlException(metadata); 165 return this; 166 } 167 } /// 168 169 /// Exception thrown for failed requests (server errors). 170 static class CachedCurlException : Exception 171 { 172 Metadata metadata; /// 173 174 private this(Metadata metadata, string fn = __FILE__, size_t ln = __LINE__) 175 { 176 this.metadata = metadata; 177 super("Request failed: " ~ metadata.statusLine.reason, fn, ln); 178 } 179 } 180 181 /// Perform a raw request and return information about the resulting cached response. 182 Response cachedReq(ref const Request request) 183 { 184 auto hash = getDigestString!MD5(request.url.asBytes ~ cast(char)request.method ~ request.data); 185 auto path = buildPath(cacheDir, hash[0..2], hash); 186 ensurePathExists(path); 187 auto metadataPath = path ~ ".metadata"; 188 if (path.exists && path.timeLastModified.stdTime < epoch) 189 path.remove(); 190 cached!_req(this, request, path, metadataPath); 191 return Response(path, metadataPath); 192 } 193 194 /// ditto 195 Response cachedReq(string url, HTTP.Method method, const(ubyte)[] data = null) 196 { 197 auto req = Request(url, method, data); 198 return cachedReq(req); 199 } 200 201 string downloadFile(string url) 202 { 203 return cachedReq(url, HTTP.Method.get).checkOK.responsePath; 204 } /// Download a file and return the response path. 205 206 override void downloadFile(string url, string target) 207 { 208 std.file.copy(downloadFile(url), target); 209 } /// 210 211 override ubyte[] getFile(string url) 212 { 213 return cachedReq(url, HTTP.Method.get).responseData; 214 } /// 215 216 override bool urlOK(string url) 217 { 218 return cachedReq(url, HTTP.Method.get).ok; 219 } /// 220 221 override string resolveRedirect(string url) 222 { 223 return 224 url.applyRelativeURL( 225 cachedReq(url, HTTP.Method.head, null) 226 .metadata 227 .headers 228 .get("location", null) 229 .enforce("Not a redirect: " ~ url) 230 [$-1]); 231 } /// 232 233 override ubyte[] post(string url, const(ubyte)[] data) 234 { 235 return cachedReq(url, HTTP.Method.post, data).responseData; 236 } /// 237 238 override HttpResponse httpRequest(HttpRequest request) 239 { 240 Request req; 241 req.url = request.url; 242 switch (request.method.toUpper) 243 { 244 case "HEAD" : req.method = HTTP.Method.head; break; 245 case "GET" : req.method = HTTP.Method.get; break; 246 case "POST" : req.method = HTTP.Method.post; break; 247 case "PUT" : req.method = HTTP.Method.put; break; 248 case "DEL" : req.method = HTTP.Method.del; break; 249 case "OPTIONS": req.method = HTTP.Method.options; break; 250 case "TRACE" : req.method = HTTP.Method.trace; break; 251 case "CONNECT": req.method = HTTP.Method.connect; break; 252 case "PATCH" : req.method = HTTP.Method.patch; break; 253 default: throw new Exception("Unknown HTTP method: " ~ request.method); 254 } 255 req.data = request.data.joinToGC(); 256 foreach (name, value; request.headers) 257 req.headers ~= [name, value]; 258 req.maxRedirects = uint.max; // Do not follow redirects, return them as-is 259 260 auto resp = cachedReq(req); 261 auto metadata = resp.metadata; 262 263 auto response = new HttpResponse; 264 response.status = cast(HttpStatusCode)metadata.statusLine.code; 265 response.statusMessage = metadata.statusLine.reason; 266 foreach (name, values; metadata.headers) 267 foreach (value; values) 268 response.headers.add(name, value); 269 response.data = DataVec(readData(resp.responsePath)); 270 return response; 271 } /// 272 } 273 274 alias CachedCurlException = CachedCurlNetwork.CachedCurlException; /// 275 276 static this() 277 { 278 net = new CachedCurlNetwork(); 279 }