1 /** 2 * Code to manage cached D component builds. 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <ae@cy.md> 12 */ 13 14 module ae.sys.d.cache; 15 16 import std.algorithm; 17 import std.array; 18 import std.exception; 19 import std.file; 20 import std.format; 21 import std.path; 22 import std.range; 23 import std.string; 24 25 import ae.sys.file; 26 import ae.utils.meta; 27 28 private alias copy = std.file.copy; // https://issues.dlang.org/show_bug.cgi?id=14817 29 30 /// An interface that needs to be implemented by `DCache` users. 31 interface ICacheHost 32 { 33 /// An optimization helper which provides a linear order in which keys should be optimized 34 /// (cache entries most likely to have common data should be adjacent in the list). 35 /// Returns: An array of groups, each group is an array of key prefixes. 36 /// Params: 37 /// key = If non-null, this function will only return keys relevant to the given key. 38 string[][] getKeyOrder(string key); 39 40 /// Log a string. 41 void log(string s); 42 } 43 44 /// Abstract base class. 45 abstract class DCache 46 { 47 string cacheDir; /// Directory which will hold the cached builds. 48 ICacheHost host; /// Callback interface. 49 50 this(string cacheDir, ICacheHost cacheHost) 51 { 52 this.cacheDir = cacheDir; 53 this.host = cacheHost; 54 } /// 55 56 /// Get name of this cache engine. 57 abstract @property string name() const; 58 59 /// Get a list of keys for all cached entries. 60 abstract string[] getEntries() const; 61 62 /// Check if an entry with the given key exists. 63 abstract bool haveEntry(string key) const; 64 65 /// Get the full file listing for the given cache entry. 66 /// Forward slashes only. 67 abstract string[] listFiles(string key) const; 68 69 /// Add files from a directory. 70 /// This operation is destructive: the cache implementation 71 /// is allowed to modify the source directory. 72 abstract void add(string key, string sourcePath); 73 74 /// Extract a cache entry to a target directory, with a filter for root files/directories. 75 abstract void extract(string key, string targetPath, bool delegate(string) pathFilter); 76 77 /// Delete a cache entry. 78 abstract void remove(string key); 79 80 /// Close the cache. 81 /// Called after all operations are completed 82 /// and the cache is no longer immediately needed. 83 abstract void finalize(); 84 85 /// Optimize the cache (minimize disk space). 86 /// This operation can be very slow (and should display progress). 87 abstract void optimize(); 88 89 /// Verify cache integrity. 90 abstract void verify(); 91 92 /// Utility function (copy file or directory) 93 final void cp(string src, string dst, bool silent = false) 94 { 95 if (!silent) 96 host.log("Copy: " ~ src ~ " -> " ~ dst); 97 98 ensurePathExists(dst); 99 if (src.isDir) 100 { 101 if (!dst.exists) 102 dst.mkdir(); 103 foreach (de; src.dirEntries(SpanMode.shallow)) 104 cp(de.name, dst.buildPath(de.name.baseName), true); 105 } 106 else 107 { 108 copy(src, dst); 109 version (Posix) 110 dst.setAttributes(src.getAttributes()); 111 } 112 } 113 } 114 115 /// Base class for a simple directory store. 116 abstract class DirCacheBase : DCache 117 { 118 this(string cacheDir, ICacheHost cacheHost) 119 { 120 super(cacheDir, cacheHost); 121 if (!cacheDir.exists) 122 cacheDir.mkdirRecurse(); 123 } /// 124 125 protected: 126 override string[] getEntries() const 127 { 128 return !cacheDir.exists ? null : 129 cacheDir.dirEntries(SpanMode.shallow).filter!(de => de.isDir).map!(de => de.baseName).array; 130 } 131 132 override bool haveEntry(string key) const 133 { 134 return cacheDir.buildPath(key).I!(path => path.exists && path.isDir); 135 } 136 137 override string[] listFiles(string key) const 138 { 139 return cacheDir.buildPath(key).I!(path => 140 path 141 .dirEntries(SpanMode.breadth) 142 .filter!(de => de.isFile) 143 .map!(de => de.name 144 [path.length+1..$] // paths should be relative to cache entry dir root 145 .replace(`\`, `/`) 146 ) 147 .array 148 ); 149 } 150 151 abstract void optimizeKey(string key); 152 153 override void add(string key, string sourcePath) 154 { 155 auto entryDir = cacheDir.buildPath(key); 156 ensurePathExists(entryDir); 157 sourcePath.rename(entryDir); 158 optimizeKey(key); 159 } 160 161 override void extract(string key, string targetPath, bool delegate(string) pathFilter) 162 { 163 cacheDir.buildPath(key).dirEntries(SpanMode.shallow) 164 .filter!(de => pathFilter(de.baseName)) 165 .each!(de => cp(de.name, buildPath(targetPath, de.baseName))); 166 } 167 168 override void remove(string key) 169 { 170 cacheDir.buildPath(key).rmdirRecurse(); 171 } 172 } 173 174 /// The bare minimum required for ae.sys.d to work. 175 /// Implement a temporary cache which is deleted 176 /// as soon as it's no longer immediately needed. 177 class TempCache : DirCacheBase 178 { 179 this(string cacheDir, ICacheHost cacheHost) 180 { 181 super(cacheDir, cacheHost); 182 finalize(); 183 } /// 184 185 protected: 186 override @property string name() const { return "none"; } 187 188 override void finalize() 189 { 190 if (cacheDir.exists) 191 { 192 host.log("Clearing temporary cache"); 193 rmdirRecurse(cacheDir); 194 } 195 } 196 197 override void optimize() { finalize(); } 198 override void optimizeKey(string key) {} 199 200 override void verify() {} 201 } 202 203 /// Store cached builds in subdirectories. 204 /// Optimize things by hard-linking identical files. 205 class DirCache : DirCacheBase 206 { 207 mixin GenerateConstructorProxies; 208 209 protected: 210 override @property string name() const { return "directory"; } 211 212 size_t dedupedFiles; 213 214 /// Replace all files that have duplicate subpaths and content 215 /// which exist under both dirA and dirB with hard links. 216 void dedupDirectories(string dirA, string dirB) 217 { 218 foreach (de; dirEntries(dirA, SpanMode.depth)) 219 if (de.isFile) 220 { 221 auto pathA = de.name; 222 auto subPath = pathA[dirA.length..$]; 223 auto pathB = dirB ~ subPath; 224 225 if (pathB.exists 226 && pathA.getSize() == pathB.getSize() 227 && pathA.getFileID() != pathB.getFileID() 228 && pathA.mdFileCached() == pathB.mdFileCached()) 229 { 230 //debug log(pathB ~ " = " ~ pathA); 231 pathB.remove(); 232 try 233 { 234 pathA.hardLink(pathB); 235 dedupedFiles++; 236 } 237 catch (FileException e) 238 { 239 host.log(" -- Hard link failed: " ~ e.msg); 240 pathA.copy(pathB); 241 } 242 } 243 } 244 } 245 246 private final void optimizeCacheImpl(const(string)[] order, bool reverse = false, string onlyKey = null) 247 { 248 if (reverse) 249 order = order.retro.array(); 250 251 string[] lastKeys; 252 253 auto cacheDirEntries = cacheDir 254 .dirEntries(SpanMode.shallow) 255 .map!(de => de.baseName) 256 .array 257 .sort() 258 ; 259 260 foreach (prefix; order) 261 { 262 auto cacheEntries = cacheDirEntries 263 .filter!(name => name.startsWith(prefix)) 264 .array 265 ; 266 267 bool optimizeThis = onlyKey is null || onlyKey.startsWith(prefix); 268 269 if (optimizeThis) 270 { 271 auto targetEntries = lastKeys ~ cacheEntries; 272 273 if (targetEntries.length) 274 foreach (i, entry1; targetEntries[0..$-1]) 275 foreach (entry2; targetEntries[i+1..$]) 276 dedupDirectories(buildPath(cacheDir, entry1), buildPath(cacheDir, entry2)); 277 } 278 279 if (cacheEntries.length) 280 lastKeys = cacheEntries; 281 } 282 } 283 284 override void finalize() {} 285 286 /// Optimize entire cache. 287 override void optimize() 288 { 289 bool[string] componentNames; 290 foreach (de; dirEntries(cacheDir, SpanMode.shallow)) 291 { 292 auto parts = de.baseName().split("-"); 293 if (parts.length >= 3) 294 componentNames[parts[0..$-2].join("-")] = true; 295 } 296 297 host.log("Optimizing cache..."); dedupedFiles = 0; 298 foreach (order; host.getKeyOrder(null)) 299 optimizeCacheImpl(order); 300 host.log("Deduplicated %d files.".format(dedupedFiles)); 301 } 302 303 /// Optimize specific revision. 304 override void optimizeKey(string key) 305 { 306 host.log("Optimizing cache entry..."); dedupedFiles = 0; 307 auto orders = host.getKeyOrder(key); 308 foreach (order; orders) // Should be only one 309 { 310 optimizeCacheImpl(order, false, key); 311 optimizeCacheImpl(order, true , key); 312 } 313 host.log("Deduplicated %d files.".format(dedupedFiles)); 314 } 315 316 override void verify() {} 317 } 318 319 /// Cache backed by a git repository. 320 /// Git's packfiles provide an efficient way to store binary 321 /// files with small differences, however adding and extracting 322 /// items is a little slower. 323 class GitCache : DCache 324 { 325 this(string cacheDir, ICacheHost cacheHost) 326 { 327 super(cacheDir, cacheHost); 328 if (!cacheDir.exists) 329 { 330 cacheDir.mkdirRecurse(); 331 spawnProcess(["git", "init", cacheDir]) 332 .wait() 333 .I!(code => (code==0).enforce("git init failed")) 334 ; 335 } 336 git = Git(cacheDir); 337 } /// 338 339 /// Builds are pinned by refs namespaced by this prefix. 340 static const refPrefix = "refs/ae-sys-d-cache/"; 341 342 protected: 343 import std.process; 344 import ae.sys.git; 345 346 Git git; 347 348 override @property string name() const { return "git"; } 349 350 override string[] getEntries() const 351 { 352 auto chompPrefix(R)(R r, string prefix) { return r.filter!(s => s.startsWith(prefix)).map!(s => s[prefix.length..$]); } 353 try 354 return git 355 .query("show-ref") 356 .splitLines 357 .map!(s => s[41..$]) 358 .I!chompPrefix(refPrefix) 359 .array 360 ; 361 catch (Exception e) 362 return null; // show-ref will exit with status 1 for an empty repository 363 } 364 365 override bool haveEntry(string key) const 366 { 367 auto p = git.pipe(["show-ref", "--verify", refPrefix ~ key], Redirect.stdout | Redirect.stderr); 368 readFile(p.stdout); 369 readFile(p.stderr); 370 return p.pid.wait() == 0; 371 } 372 373 override string[] listFiles(string key) const 374 { 375 return git 376 .query("ls-tree", "--name-only", "-r", refPrefix ~ key) 377 .splitLines 378 ; 379 } 380 381 override void add(string key, string sourcePath) 382 { 383 auto writer = git.createObjectWriter(); 384 auto tree = git.importTree(sourcePath, writer); 385 auto author = "ae.sys.d.cache <ae.sys.d.cache@thecybershadow.net> 0 +0000"; 386 auto commit = writer.write(Git.Object.createCommit(Git.Object.ParsedCommit(tree, null, author, author, [key]))); 387 git.run("update-ref", refPrefix ~ key, commit.toString()); 388 } 389 390 override void extract(string key, string targetPath, bool delegate(string) pathFilter) 391 { 392 if (!targetPath.exists) 393 targetPath.mkdirRecurse(); 394 395 auto reader = git.createObjectReader(); 396 git.exportCommit(refPrefix ~ key, targetPath, reader, fn => pathFilter(fn.pathSplitter.front)); 397 } 398 399 override void remove(string key) 400 { 401 git.run("update-ref", "-d", refPrefix ~ key); 402 } 403 404 override void finalize() {} 405 406 override void optimize() 407 { 408 git.run("prune"); 409 git.run("pack-refs", "--all"); 410 git.run("repack", "-a", "-d"); 411 } 412 413 override void verify() 414 { 415 git.run("fsck", "--full", "--strict"); 416 } 417 } 418 419 /// Create a DCache instance according to the given name. 420 DCache createCache(string name, string cacheDir, ICacheHost cacheHost) 421 { 422 switch (name) 423 { 424 case "": 425 case "false": // compat 426 case "none": return new TempCache(cacheDir, cacheHost); 427 case "true": // compat 428 case "directory": return new DirCache (cacheDir, cacheHost); 429 case "git": return new GitCache (cacheDir, cacheHost); 430 default: throw new Exception("Unknown cache engine: " ~ name); 431 } 432 } 433 434 unittest 435 { 436 void testEngine(string name) 437 { 438 class Host : ICacheHost 439 { 440 string[][] getKeyOrder(string key) { return null; } 441 void log(string s) {} 442 } 443 auto host = new Host; 444 445 auto testDir = "test-cache"; 446 if (testDir.exists) testDir.forceDelete!(No.atomic)(Yes.recursive); 447 testDir.mkdir(); 448 scope(exit) if (testDir.exists) testDir.forceDelete!(No.atomic)(Yes.recursive); 449 450 auto cacheEngine = createCache(name, testDir.buildPath("cache"), host); 451 assert(cacheEngine.getEntries().length == 0); 452 assert(!cacheEngine.haveEntry("test-key")); 453 454 auto sourceDir = testDir.buildPath("source"); 455 mkdir(sourceDir); 456 write(sourceDir.buildPath("test.txt"), "Test"); 457 mkdir(sourceDir.buildPath("dir")); 458 write(sourceDir.buildPath("dir", "test2.txt"), "Test 2"); 459 460 cacheEngine.add("test-key", sourceDir); 461 assert(cacheEngine.getEntries() == ["test-key"]); 462 assert(cacheEngine.haveEntry("test-key")); 463 assert(cacheEngine.listFiles("test-key").sort().release == ["dir/test2.txt", "test.txt"]); 464 465 if (sourceDir.exists) rmdirRecurse(sourceDir); 466 mkdir(sourceDir); 467 write(sourceDir.buildPath("test3.txt"), "Test 3"); 468 mkdir(sourceDir.buildPath("test3")); 469 write(sourceDir.buildPath("test3", "test.txt"), "Test 3 subdir"); 470 cacheEngine.add("test-key-2", sourceDir); 471 472 auto targetDir = testDir.buildPath("target"); 473 mkdir(targetDir); 474 cacheEngine.extract("test-key", targetDir, fn => true); 475 assert(targetDir.buildPath("test.txt").readText == "Test"); 476 assert(targetDir.buildPath("dir", "test2.txt").readText == "Test 2"); 477 478 rmdirRecurse(targetDir); 479 mkdir(targetDir); 480 cacheEngine.extract("test-key-2", targetDir, fn => true); 481 assert(targetDir.buildPath("test3.txt").readText == "Test 3"); 482 483 rmdirRecurse(targetDir); 484 mkdir(targetDir); 485 cacheEngine.extract("test-key", targetDir, fn => fn=="dir"); 486 assert(!targetDir.buildPath("test.txt").exists); 487 assert(targetDir.buildPath("dir", "test2.txt").readText == "Test 2"); 488 489 cacheEngine.remove("test-key"); 490 assert(cacheEngine.getEntries().length == 1); 491 assert(!cacheEngine.haveEntry("test-key")); 492 493 cacheEngine.finalize(); 494 cacheEngine.verify(); 495 496 cacheEngine.optimize(); 497 } 498 499 testEngine("none"); 500 testEngine("directory"); 501 testEngine("git"); 502 }