1 /** 2 * Code to manage cached D component builds. 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <ae@cy.md> 12 */ 13 14 module ae.sys.d.cache; 15 16 import std.algorithm; 17 import std.array; 18 import std.exception; 19 import std.file; 20 import std.format; 21 import std.path; 22 import std.range; 23 import std.string; 24 25 import ae.sys.file; 26 import ae.utils.meta; 27 28 private alias copy = std.file.copy; // https://issues.dlang.org/show_bug.cgi?id=14817 29 30 /// An interface that needs to be implemented by `DCache` users. 31 interface ICacheHost 32 { 33 /// An optimization helper which provides a linear order in which keys should be optimized 34 /// (cache entries most likely to have common data should be adjacent in the list). 35 /// Returns: An array of groups, each group is an array of key prefixes. 36 /// Params: 37 /// key = If non-null, this function will only return keys relevant to the given key. 38 string[][] getKeyOrder(string key); 39 40 /// Log a string. 41 void log(string s); 42 } 43 44 /// Abstract base class. 45 abstract class DCache 46 { 47 string cacheDir; /// Directory which will hold the cached builds. 48 ICacheHost cacheHost; /// Callback interface. 49 50 alias cacheHost this; 51 52 this(string cacheDir, ICacheHost cacheHost) 53 { 54 this.cacheDir = cacheDir; 55 this.cacheHost = cacheHost; 56 } /// 57 58 /// Get name of this cache engine. 59 abstract @property string name() const; 60 61 /// Get a list of keys for all cached entries. 62 abstract string[] getEntries() const; 63 64 /// Check if an entry with the given key exists. 65 abstract bool haveEntry(string key) const; 66 67 /// Get the full file listing for the given cache entry. 68 /// Forward slashes only. 69 abstract string[] listFiles(string key) const; 70 71 /// Add files from a directory. 72 /// This operation is destructive: the cache implementation 73 /// is allowed to modify the source directory. 74 abstract void add(string key, string sourcePath); 75 76 /// Extract a cache entry to a target directory, with a filter for root files/directories. 77 abstract void extract(string key, string targetPath, bool delegate(string) pathFilter); 78 79 /// Delete a cache entry. 80 abstract void remove(string key); 81 82 /// Close the cache. 83 /// Called after all operations are completed 84 /// and the cache is no longer immediately needed. 85 abstract void finalize(); 86 87 /// Optimize the cache (minimize disk space). 88 /// This operation can be very slow (and should display progress). 89 abstract void optimize(); 90 91 /// Verify cache integrity. 92 abstract void verify(); 93 94 /// Utility function (copy file or directory) 95 final void cp(string src, string dst, bool silent = false) 96 { 97 if (!silent) 98 log("Copy: " ~ src ~ " -> " ~ dst); 99 100 ensurePathExists(dst); 101 if (src.isDir) 102 { 103 if (!dst.exists) 104 dst.mkdir(); 105 foreach (de; src.dirEntries(SpanMode.shallow)) 106 cp(de.name, dst.buildPath(de.name.baseName), true); 107 } 108 else 109 { 110 copy(src, dst); 111 version (Posix) 112 dst.setAttributes(src.getAttributes()); 113 } 114 } 115 } 116 117 /// Base class for a simple directory store. 118 abstract class DirCacheBase : DCache 119 { 120 this(string cacheDir, ICacheHost cacheHost) 121 { 122 super(cacheDir, cacheHost); 123 if (!cacheDir.exists) 124 cacheDir.mkdirRecurse(); 125 } /// 126 127 protected: 128 override string[] getEntries() const 129 { 130 return !cacheDir.exists ? null : 131 cacheDir.dirEntries(SpanMode.shallow).filter!(de => de.isDir).map!(de => de.baseName).array; 132 } 133 134 override bool haveEntry(string key) const 135 { 136 return cacheDir.buildPath(key).I!(path => path.exists && path.isDir); 137 } 138 139 override string[] listFiles(string key) const 140 { 141 return cacheDir.buildPath(key).I!(path => 142 path 143 .dirEntries(SpanMode.breadth) 144 .filter!(de => de.isFile) 145 .map!(de => de.name 146 [path.length+1..$] // paths should be relative to cache entry dir root 147 .replace(`\`, `/`) 148 ) 149 .array 150 ); 151 } 152 153 abstract void optimizeKey(string key); 154 155 override void add(string key, string sourcePath) 156 { 157 auto entryDir = cacheDir.buildPath(key); 158 ensurePathExists(entryDir); 159 sourcePath.rename(entryDir); 160 optimizeKey(key); 161 } 162 163 override void extract(string key, string targetPath, bool delegate(string) pathFilter) 164 { 165 cacheDir.buildPath(key).dirEntries(SpanMode.shallow) 166 .filter!(de => pathFilter(de.baseName)) 167 .each!(de => cp(de.name, buildPath(targetPath, de.baseName))); 168 } 169 170 override void remove(string key) 171 { 172 cacheDir.buildPath(key).rmdirRecurse(); 173 } 174 } 175 176 /// The bare minimum required for ae.sys.d to work. 177 /// Implement a temporary cache which is deleted 178 /// as soon as it's no longer immediately needed. 179 class TempCache : DirCacheBase 180 { 181 this(string cacheDir, ICacheHost cacheHost) 182 { 183 super(cacheDir, cacheHost); 184 finalize(); 185 } /// 186 187 protected: 188 alias cacheHost this; // https://issues.dlang.org/show_bug.cgi?id=5973 189 190 override @property string name() const { return "none"; } 191 192 override void finalize() 193 { 194 if (cacheDir.exists) 195 { 196 log("Clearing temporary cache"); 197 rmdirRecurse(cacheDir); 198 } 199 } 200 201 override void optimize() { finalize(); } 202 override void optimizeKey(string key) {} 203 204 override void verify() {} 205 } 206 207 /// Store cached builds in subdirectories. 208 /// Optimize things by hard-linking identical files. 209 class DirCache : DirCacheBase 210 { 211 mixin GenerateConstructorProxies; 212 alias cacheHost this; // https://issues.dlang.org/show_bug.cgi?id=5973 213 214 protected: 215 override @property string name() const { return "directory"; } 216 217 size_t dedupedFiles; 218 219 /// Replace all files that have duplicate subpaths and content 220 /// which exist under both dirA and dirB with hard links. 221 void dedupDirectories(string dirA, string dirB) 222 { 223 foreach (de; dirEntries(dirA, SpanMode.depth)) 224 if (de.isFile) 225 { 226 auto pathA = de.name; 227 auto subPath = pathA[dirA.length..$]; 228 auto pathB = dirB ~ subPath; 229 230 if (pathB.exists 231 && pathA.getSize() == pathB.getSize() 232 && pathA.getFileID() != pathB.getFileID() 233 && pathA.mdFileCached() == pathB.mdFileCached()) 234 { 235 //debug log(pathB ~ " = " ~ pathA); 236 pathB.remove(); 237 try 238 { 239 pathA.hardLink(pathB); 240 dedupedFiles++; 241 } 242 catch (FileException e) 243 { 244 log(" -- Hard link failed: " ~ e.msg); 245 pathA.copy(pathB); 246 } 247 } 248 } 249 } 250 251 private final void optimizeCacheImpl(const(string)[] order, bool reverse = false, string onlyKey = null) 252 { 253 if (reverse) 254 order = order.retro.array(); 255 256 string[] lastKeys; 257 258 auto cacheDirEntries = cacheDir 259 .dirEntries(SpanMode.shallow) 260 .map!(de => de.baseName) 261 .array 262 .sort() 263 ; 264 265 foreach (prefix; order) 266 { 267 auto cacheEntries = cacheDirEntries 268 .filter!(name => name.startsWith(prefix)) 269 .array 270 ; 271 272 bool optimizeThis = onlyKey is null || onlyKey.startsWith(prefix); 273 274 if (optimizeThis) 275 { 276 auto targetEntries = lastKeys ~ cacheEntries; 277 278 if (targetEntries.length) 279 foreach (i, entry1; targetEntries[0..$-1]) 280 foreach (entry2; targetEntries[i+1..$]) 281 dedupDirectories(buildPath(cacheDir, entry1), buildPath(cacheDir, entry2)); 282 } 283 284 if (cacheEntries.length) 285 lastKeys = cacheEntries; 286 } 287 } 288 289 override void finalize() {} 290 291 /// Optimize entire cache. 292 override void optimize() 293 { 294 bool[string] componentNames; 295 foreach (de; dirEntries(cacheDir, SpanMode.shallow)) 296 { 297 auto parts = de.baseName().split("-"); 298 if (parts.length >= 3) 299 componentNames[parts[0..$-2].join("-")] = true; 300 } 301 302 log("Optimizing cache..."); dedupedFiles = 0; 303 foreach (order; getKeyOrder(null)) 304 optimizeCacheImpl(order); 305 log("Deduplicated %d files.".format(dedupedFiles)); 306 } 307 308 /// Optimize specific revision. 309 override void optimizeKey(string key) 310 { 311 log("Optimizing cache entry..."); dedupedFiles = 0; 312 auto orders = getKeyOrder(key); 313 foreach (order; orders) // Should be only one 314 { 315 optimizeCacheImpl(order, false, key); 316 optimizeCacheImpl(order, true , key); 317 } 318 log("Deduplicated %d files.".format(dedupedFiles)); 319 } 320 321 override void verify() {} 322 } 323 324 /// Cache backed by a git repository. 325 /// Git's packfiles provide an efficient way to store binary 326 /// files with small differences, however adding and extracting 327 /// items is a little slower. 328 class GitCache : DCache 329 { 330 this(string cacheDir, ICacheHost cacheHost) 331 { 332 super(cacheDir, cacheHost); 333 if (!cacheDir.exists) 334 { 335 cacheDir.mkdirRecurse(); 336 spawnProcess(["git", "init", cacheDir]) 337 .wait() 338 .I!(code => (code==0).enforce("git init failed")) 339 ; 340 } 341 git = Git(cacheDir); 342 } /// 343 344 /// Builds are pinned by refs namespaced by this prefix. 345 static const refPrefix = "refs/ae-sys-d-cache/"; 346 347 protected: 348 import std.process; 349 import ae.sys.git; 350 351 Git git; 352 353 override @property string name() const { return "git"; } 354 355 override string[] getEntries() const 356 { 357 auto chompPrefix(R)(R r, string prefix) { return r.filter!(s => s.startsWith(prefix)).map!(s => s[prefix.length..$]); } 358 try 359 return git 360 .query("show-ref") 361 .splitLines 362 .map!(s => s[41..$]) 363 .I!chompPrefix(refPrefix) 364 .array 365 ; 366 catch (Exception e) 367 return null; // show-ref will exit with status 1 for an empty repository 368 } 369 370 override bool haveEntry(string key) const 371 { 372 auto p = git.pipe(["show-ref", "--verify", refPrefix ~ key], Redirect.stdout | Redirect.stderr); 373 readFile(p.stdout); 374 readFile(p.stderr); 375 return p.pid.wait() == 0; 376 } 377 378 override string[] listFiles(string key) const 379 { 380 return git 381 .query("ls-tree", "--name-only", "-r", refPrefix ~ key) 382 .splitLines 383 ; 384 } 385 386 override void add(string key, string sourcePath) 387 { 388 auto writer = git.createObjectWriter(); 389 auto tree = git.importTree(sourcePath, writer); 390 auto author = "ae.sys.d.cache <ae.sys.d.cache@thecybershadow.net> 0 +0000"; 391 auto commit = writer.write(Git.Object.createCommit(Git.Object.ParsedCommit(tree, null, author, author, [key]))); 392 git.run("update-ref", refPrefix ~ key, commit.toString()); 393 } 394 395 override void extract(string key, string targetPath, bool delegate(string) pathFilter) 396 { 397 if (!targetPath.exists) 398 targetPath.mkdirRecurse(); 399 400 auto reader = git.createObjectReader(); 401 git.exportCommit(refPrefix ~ key, targetPath, reader, fn => pathFilter(fn.pathSplitter.front)); 402 } 403 404 override void remove(string key) 405 { 406 git.run("update-ref", "-d", refPrefix ~ key); 407 } 408 409 override void finalize() {} 410 411 override void optimize() 412 { 413 git.run("prune"); 414 git.run("pack-refs", "--all"); 415 git.run("repack", "-a", "-d"); 416 } 417 418 override void verify() 419 { 420 git.run("fsck", "--full", "--strict"); 421 } 422 } 423 424 /// Create a DCache instance according to the given name. 425 DCache createCache(string name, string cacheDir, ICacheHost cacheHost) 426 { 427 switch (name) 428 { 429 case "": 430 case "false": // compat 431 case "none": return new TempCache(cacheDir, cacheHost); 432 case "true": // compat 433 case "directory": return new DirCache (cacheDir, cacheHost); 434 case "git": return new GitCache (cacheDir, cacheHost); 435 default: throw new Exception("Unknown cache engine: " ~ name); 436 } 437 } 438 439 unittest 440 { 441 void testEngine(string name) 442 { 443 class Host : ICacheHost 444 { 445 string[][] getKeyOrder(string key) { return null; } 446 void log(string s) {} 447 } 448 auto host = new Host; 449 450 auto testDir = "test-cache"; 451 if (testDir.exists) testDir.forceDelete!(No.atomic)(Yes.recursive); 452 testDir.mkdir(); 453 scope(exit) if (testDir.exists) testDir.forceDelete!(No.atomic)(Yes.recursive); 454 455 auto cacheEngine = createCache(name, testDir.buildPath("cache"), host); 456 assert(cacheEngine.getEntries().length == 0); 457 assert(!cacheEngine.haveEntry("test-key")); 458 459 auto sourceDir = testDir.buildPath("source"); 460 mkdir(sourceDir); 461 write(sourceDir.buildPath("test.txt"), "Test"); 462 mkdir(sourceDir.buildPath("dir")); 463 write(sourceDir.buildPath("dir", "test2.txt"), "Test 2"); 464 465 cacheEngine.add("test-key", sourceDir); 466 assert(cacheEngine.getEntries() == ["test-key"]); 467 assert(cacheEngine.haveEntry("test-key")); 468 assert(cacheEngine.listFiles("test-key").sort().release == ["dir/test2.txt", "test.txt"]); 469 470 if (sourceDir.exists) rmdirRecurse(sourceDir); 471 mkdir(sourceDir); 472 write(sourceDir.buildPath("test3.txt"), "Test 3"); 473 mkdir(sourceDir.buildPath("test3")); 474 write(sourceDir.buildPath("test3", "test.txt"), "Test 3 subdir"); 475 cacheEngine.add("test-key-2", sourceDir); 476 477 auto targetDir = testDir.buildPath("target"); 478 mkdir(targetDir); 479 cacheEngine.extract("test-key", targetDir, fn => true); 480 assert(targetDir.buildPath("test.txt").readText == "Test"); 481 assert(targetDir.buildPath("dir", "test2.txt").readText == "Test 2"); 482 483 rmdirRecurse(targetDir); 484 mkdir(targetDir); 485 cacheEngine.extract("test-key-2", targetDir, fn => true); 486 assert(targetDir.buildPath("test3.txt").readText == "Test 3"); 487 488 rmdirRecurse(targetDir); 489 mkdir(targetDir); 490 cacheEngine.extract("test-key", targetDir, fn => fn=="dir"); 491 assert(!targetDir.buildPath("test.txt").exists); 492 assert(targetDir.buildPath("dir", "test2.txt").readText == "Test 2"); 493 494 cacheEngine.remove("test-key"); 495 assert(cacheEngine.getEntries().length == 1); 496 assert(!cacheEngine.haveEntry("test-key")); 497 498 cacheEngine.finalize(); 499 cacheEngine.verify(); 500 501 cacheEngine.optimize(); 502 } 503 504 testEngine("none"); 505 testEngine("directory"); 506 testEngine("git"); 507 }