1 /** 2 * Code to manage cached D component builds. 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <ae@cy.md> 12 */ 13 14 module ae.sys.d.cache; 15 16 import std.algorithm; 17 import std.array; 18 import std.exception; 19 import std.file; 20 import std.format; 21 import std.path; 22 import std.range; 23 import std..string; 24 25 import ae.sys.file; 26 import ae.utils.meta; 27 28 alias copy = std.file.copy; // https://issues.dlang.org/show_bug.cgi?id=14817 29 30 /// An interface that needs to be implemented by `DCache` users. 31 interface ICacheHost 32 { 33 /// An optimization helper which provides a linear order in which keys should be optimized 34 /// (cache entries most likely to have common data should be adjacent in the list). 35 /// Returns: An array of groups, each group is an array of key prefixes. 36 /// Params: 37 /// key = If non-null, this function will only return keys relevant to the given key. 38 string[][] getKeyOrder(string key); 39 40 /// Log a string. 41 void log(string s); 42 } 43 44 /// Abstract base class. 45 abstract class DCache 46 { 47 string cacheDir; /// Directory which will hold the cached builds. 48 ICacheHost cacheHost; /// Callback interface. 49 50 alias cacheHost this; 51 52 this(string cacheDir, ICacheHost cacheHost) 53 { 54 this.cacheDir = cacheDir; 55 this.cacheHost = cacheHost; 56 } /// 57 58 /// Get name of this cache engine. 59 abstract @property string name() const; 60 61 /// Get a list of keys for all cached entries. 62 abstract string[] getEntries() const; 63 64 /// Check if an entry with the given key exists. 65 abstract bool haveEntry(string key) const; 66 67 /// Get the full file listing for the given cache entry. 68 /// Forward slashes only. 69 abstract string[] listFiles(string key) const; 70 71 /// Add files from a directory. 72 /// This operation is destructive: the cache implementation 73 /// is allowed to modify the source directory. 74 abstract void add(string key, string sourcePath); 75 76 /// Extract a cache entry to a target directory, with a filter for root files/directories. 77 abstract void extract(string key, string targetPath, bool delegate(string) pathFilter); 78 79 /// Delete a cache entry. 80 abstract void remove(string key); 81 82 /// Close the cache. 83 /// Called after all operations are completed 84 /// and the cache is no longer immediately needed. 85 abstract void finalize(); 86 87 /// Optimize the cache (minimize disk space). 88 /// This operation can be very slow (and should display progress). 89 abstract void optimize(); 90 91 /// Verify cache integrity. 92 abstract void verify(); 93 94 /// Utility function (copy file or directory) 95 final void cp(string src, string dst, bool silent = false) 96 { 97 if (!silent) 98 log("Copy: " ~ src ~ " -> " ~ dst); 99 100 ensurePathExists(dst); 101 if (src.isDir) 102 { 103 if (!dst.exists) 104 dst.mkdir(); 105 foreach (de; src.dirEntries(SpanMode.shallow)) 106 cp(de.name, dst.buildPath(de.name.baseName), true); 107 } 108 else 109 { 110 copy(src, dst); 111 version (Posix) 112 dst.setAttributes(src.getAttributes()); 113 } 114 } 115 } 116 117 /// Base class for a simple directory store. 118 abstract class DirCacheBase : DCache 119 { 120 this(string cacheDir, ICacheHost cacheHost) 121 { 122 super(cacheDir, cacheHost); 123 if (!cacheDir.exists) 124 cacheDir.mkdirRecurse(); 125 } /// 126 127 override string[] getEntries() const 128 { 129 return !cacheDir.exists ? null : 130 cacheDir.dirEntries(SpanMode.shallow).filter!(de => de.isDir).map!(de => de.baseName).array; 131 } 132 133 override bool haveEntry(string key) const 134 { 135 return cacheDir.buildPath(key).I!(path => path.exists && path.isDir); 136 } 137 138 override string[] listFiles(string key) const 139 { 140 return cacheDir.buildPath(key).I!(path => 141 path 142 .dirEntries(SpanMode.breadth) 143 .filter!(de => de.isFile) 144 .map!(de => de.name 145 [path.length+1..$] // paths should be relative to cache entry dir root 146 .replace(`\`, `/`) 147 ) 148 .array 149 ); 150 } 151 152 abstract void optimizeKey(string key); 153 154 override void add(string key, string sourcePath) 155 { 156 auto entryDir = cacheDir.buildPath(key); 157 ensurePathExists(entryDir); 158 sourcePath.rename(entryDir); 159 optimizeKey(key); 160 } 161 162 override void extract(string key, string targetPath, bool delegate(string) pathFilter) 163 { 164 cacheDir.buildPath(key).dirEntries(SpanMode.shallow) 165 .filter!(de => pathFilter(de.baseName)) 166 .each!(de => cp(de.name, buildPath(targetPath, de.baseName))); 167 } 168 169 override void remove(string key) 170 { 171 cacheDir.buildPath(key).rmdirRecurse(); 172 } 173 } 174 175 /// The bare minimum required for ae.sys.d to work. 176 /// Implement a temporary cache which is deleted 177 /// as soon as it's no longer immediately needed. 178 class TempCache : DirCacheBase 179 { 180 this(string cacheDir, ICacheHost cacheHost) 181 { 182 super(cacheDir, cacheHost); 183 finalize(); 184 } /// 185 186 alias cacheHost this; // https://issues.dlang.org/show_bug.cgi?id=5973 187 188 override @property string name() const { return "none"; } 189 190 override void finalize() 191 { 192 if (cacheDir.exists) 193 { 194 log("Clearing temporary cache"); 195 rmdirRecurse(cacheDir); 196 } 197 } 198 199 override void optimize() { finalize(); } 200 override void optimizeKey(string key) {} 201 202 override void verify() {} 203 } 204 205 /// Store cached builds in subdirectories. 206 /// Optimize things by hard-linking identical files. 207 class DirCache : DirCacheBase 208 { 209 mixin GenerateConstructorProxies; 210 alias cacheHost this; // https://issues.dlang.org/show_bug.cgi?id=5973 211 212 override @property string name() const { return "directory"; } 213 214 size_t dedupedFiles; 215 216 /// Replace all files that have duplicate subpaths and content 217 /// which exist under both dirA and dirB with hard links. 218 void dedupDirectories(string dirA, string dirB) 219 { 220 foreach (de; dirEntries(dirA, SpanMode.depth)) 221 if (de.isFile) 222 { 223 auto pathA = de.name; 224 auto subPath = pathA[dirA.length..$]; 225 auto pathB = dirB ~ subPath; 226 227 if (pathB.exists 228 && pathA.getSize() == pathB.getSize() 229 && pathA.getFileID() != pathB.getFileID() 230 && pathA.mdFileCached() == pathB.mdFileCached()) 231 { 232 //debug log(pathB ~ " = " ~ pathA); 233 pathB.remove(); 234 try 235 { 236 pathA.hardLink(pathB); 237 dedupedFiles++; 238 } 239 catch (FileException e) 240 { 241 log(" -- Hard link failed: " ~ e.msg); 242 pathA.copy(pathB); 243 } 244 } 245 } 246 } 247 248 private final void optimizeCacheImpl(const(string)[] order, bool reverse = false, string onlyKey = null) 249 { 250 if (reverse) 251 order = order.retro.array(); 252 253 string[] lastKeys; 254 255 auto cacheDirEntries = cacheDir 256 .dirEntries(SpanMode.shallow) 257 .map!(de => de.baseName) 258 .array 259 .sort() 260 ; 261 262 foreach (prefix; order) 263 { 264 auto cacheEntries = cacheDirEntries 265 .filter!(name => name.startsWith(prefix)) 266 .array 267 ; 268 269 bool optimizeThis = onlyKey is null || onlyKey.startsWith(prefix); 270 271 if (optimizeThis) 272 { 273 auto targetEntries = lastKeys ~ cacheEntries; 274 275 if (targetEntries.length) 276 foreach (i, entry1; targetEntries[0..$-1]) 277 foreach (entry2; targetEntries[i+1..$]) 278 dedupDirectories(buildPath(cacheDir, entry1), buildPath(cacheDir, entry2)); 279 } 280 281 if (cacheEntries.length) 282 lastKeys = cacheEntries; 283 } 284 } 285 286 override void finalize() {} 287 288 /// Optimize entire cache. 289 override void optimize() 290 { 291 bool[string] componentNames; 292 foreach (de; dirEntries(cacheDir, SpanMode.shallow)) 293 { 294 auto parts = de.baseName().split("-"); 295 if (parts.length >= 3) 296 componentNames[parts[0..$-2].join("-")] = true; 297 } 298 299 log("Optimizing cache..."); dedupedFiles = 0; 300 foreach (order; getKeyOrder(null)) 301 optimizeCacheImpl(order); 302 log("Deduplicated %d files.".format(dedupedFiles)); 303 } 304 305 /// Optimize specific revision. 306 override void optimizeKey(string key) 307 { 308 log("Optimizing cache entry..."); dedupedFiles = 0; 309 auto orders = getKeyOrder(key); 310 foreach (order; orders) // Should be only one 311 { 312 optimizeCacheImpl(order, false, key); 313 optimizeCacheImpl(order, true , key); 314 } 315 log("Deduplicated %d files.".format(dedupedFiles)); 316 } 317 318 override void verify() {} 319 } 320 321 /// Cache backed by a git repository. 322 /// Git's packfiles provide an efficient way to store binary 323 /// files with small differences, however adding and extracting 324 /// items is a little slower. 325 class GitCache : DCache 326 { 327 import std.process; 328 import ae.sys.git; 329 330 Git git; 331 332 /// Builds are pinned by refs namespaced by this prefix. 333 static const refPrefix = "refs/ae-sys-d-cache/"; 334 335 override @property string name() const { return "git"; } 336 337 this(string cacheDir, ICacheHost cacheHost) 338 { 339 super(cacheDir, cacheHost); 340 if (!cacheDir.exists) 341 { 342 cacheDir.mkdirRecurse(); 343 spawnProcess(["git", "init", cacheDir]) 344 .wait() 345 .I!(code => (code==0).enforce("git init failed")) 346 ; 347 } 348 git = Git(cacheDir); 349 } /// 350 351 override string[] getEntries() const 352 { 353 auto chompPrefix(R)(R r, string prefix) { return r.filter!(s => s.startsWith(prefix)).map!(s => s[prefix.length..$]); } 354 try 355 return git 356 .query("show-ref") 357 .splitLines 358 .map!(s => s[41..$]) 359 .I!chompPrefix(refPrefix) 360 .array 361 ; 362 catch (Exception e) 363 return null; // show-ref will exit with status 1 for an empty repository 364 } 365 366 override bool haveEntry(string key) const 367 { 368 auto p = git.pipe(["show-ref", "--verify", refPrefix ~ key], Redirect.stdout | Redirect.stderr); 369 readFile(p.stdout); 370 readFile(p.stderr); 371 return p.pid.wait() == 0; 372 } 373 374 override string[] listFiles(string key) const 375 { 376 return git 377 .query("ls-tree", "--name-only", "-r", refPrefix ~ key) 378 .splitLines 379 ; 380 } 381 382 override void add(string key, string sourcePath) 383 { 384 auto writer = git.createObjectWriter(); 385 auto tree = git.importTree(sourcePath, writer); 386 auto author = "ae.sys.d.cache <ae.sys.d.cache@thecybershadow.net> 0 +0000"; 387 auto commit = writer.write(Git.Object.createCommit(Git.Object.ParsedCommit(tree, null, author, author, [key]))); 388 git.run("update-ref", refPrefix ~ key, commit.toString()); 389 } 390 391 override void extract(string key, string targetPath, bool delegate(string) pathFilter) 392 { 393 if (!targetPath.exists) 394 targetPath.mkdirRecurse(); 395 396 auto reader = git.createObjectReader(); 397 git.exportCommit(refPrefix ~ key, targetPath, reader, fn => pathFilter(fn.pathSplitter.front)); 398 } 399 400 override void remove(string key) 401 { 402 git.run("update-ref", "-d", refPrefix ~ key); 403 } 404 405 override void finalize() {} 406 407 override void optimize() 408 { 409 git.run("prune"); 410 git.run("pack-refs", "--all"); 411 git.run("repack", "-a", "-d"); 412 } 413 414 override void verify() 415 { 416 git.run("fsck", "--full", "--strict"); 417 } 418 } 419 420 /// Create a DCache instance according to the given name. 421 DCache createCache(string name, string cacheDir, ICacheHost cacheHost) 422 { 423 switch (name) 424 { 425 case "": 426 case "false": // compat 427 case "none": return new TempCache(cacheDir, cacheHost); 428 case "true": // compat 429 case "directory": return new DirCache (cacheDir, cacheHost); 430 case "git": return new GitCache (cacheDir, cacheHost); 431 default: throw new Exception("Unknown cache engine: " ~ name); 432 } 433 } 434 435 unittest 436 { 437 void testEngine(string name) 438 { 439 class Host : ICacheHost 440 { 441 string[][] getKeyOrder(string key) { return null; } 442 void log(string s) {} 443 } 444 auto host = new Host; 445 446 auto testDir = "test-cache"; 447 if (testDir.exists) testDir.forceDelete!(No.atomic)(Yes.recursive); 448 testDir.mkdir(); 449 scope(exit) if (testDir.exists) testDir.forceDelete!(No.atomic)(Yes.recursive); 450 451 auto cacheEngine = createCache(name, testDir.buildPath("cache"), host); 452 assert(cacheEngine.getEntries().length == 0); 453 assert(!cacheEngine.haveEntry("test-key")); 454 455 auto sourceDir = testDir.buildPath("source"); 456 mkdir(sourceDir); 457 write(sourceDir.buildPath("test.txt"), "Test"); 458 mkdir(sourceDir.buildPath("dir")); 459 write(sourceDir.buildPath("dir", "test2.txt"), "Test 2"); 460 461 cacheEngine.add("test-key", sourceDir); 462 assert(cacheEngine.getEntries() == ["test-key"]); 463 assert(cacheEngine.haveEntry("test-key")); 464 assert(cacheEngine.listFiles("test-key").sort().release == ["dir/test2.txt", "test.txt"]); 465 466 if (sourceDir.exists) rmdirRecurse(sourceDir); 467 mkdir(sourceDir); 468 write(sourceDir.buildPath("test3.txt"), "Test 3"); 469 mkdir(sourceDir.buildPath("test3")); 470 write(sourceDir.buildPath("test3", "test.txt"), "Test 3 subdir"); 471 cacheEngine.add("test-key-2", sourceDir); 472 473 auto targetDir = testDir.buildPath("target"); 474 mkdir(targetDir); 475 cacheEngine.extract("test-key", targetDir, fn => true); 476 assert(targetDir.buildPath("test.txt").readText == "Test"); 477 assert(targetDir.buildPath("dir", "test2.txt").readText == "Test 2"); 478 479 rmdirRecurse(targetDir); 480 mkdir(targetDir); 481 cacheEngine.extract("test-key-2", targetDir, fn => true); 482 assert(targetDir.buildPath("test3.txt").readText == "Test 3"); 483 484 rmdirRecurse(targetDir); 485 mkdir(targetDir); 486 cacheEngine.extract("test-key", targetDir, fn => fn=="dir"); 487 assert(!targetDir.buildPath("test.txt").exists); 488 assert(targetDir.buildPath("dir", "test2.txt").readText == "Test 2"); 489 490 cacheEngine.remove("test-key"); 491 assert(cacheEngine.getEntries().length == 1); 492 assert(!cacheEngine.haveEntry("test-key")); 493 494 cacheEngine.finalize(); 495 cacheEngine.verify(); 496 497 cacheEngine.optimize(); 498 } 499 500 testEngine("none"); 501 testEngine("directory"); 502 testEngine("git"); 503 }