1 /**
2  * Code to manage cached D component builds.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  */
13 
14 module ae.sys.d.cache;
15 
16 import std.algorithm;
17 import std.array;
18 import std.exception;
19 import std.file;
20 import std.format;
21 import std.path;
22 import std.range;
23 import std..string;
24 
25 import ae.sys.file;
26 import ae.utils.meta;
27 
28 alias copy = std.file.copy; // https://issues.dlang.org/show_bug.cgi?id=14817
29 
30 /// An interface that needs to be implemented by `DCache` users.
31 interface ICacheHost
32 {
33 	/// An optimization helper which provides a linear order in which keys should be optimized
34 	/// (cache entries most likely to have common data should be adjacent in the list).
35 	/// Returns: An array of groups, each group is an array of key prefixes.
36 	/// Params:
37 	///   key = If non-null, this function will only return keys relevant to the given key.
38 	string[][] getKeyOrder(string key);
39 
40 	/// Log a string.
41 	void log(string s);
42 }
43 
44 /// Abstract base class.
45 abstract class DCache
46 {
47 	string cacheDir; /// Directory which will hold the cached builds.
48 	ICacheHost cacheHost; /// Callback interface.
49 
50 	alias cacheHost this;
51 
52 	this(string cacheDir, ICacheHost cacheHost)
53 	{
54 		this.cacheDir = cacheDir;
55 		this.cacheHost = cacheHost;
56 	} ///
57 
58 	/// Get name of this cache engine.
59 	abstract @property string name() const;
60 
61 	/// Get a list of keys for all cached entries.
62 	abstract string[] getEntries() const;
63 
64 	/// Check if an entry with the given key exists.
65 	abstract bool haveEntry(string key) const;
66 
67 	/// Get the full file listing for the given cache entry.
68 	/// Forward slashes only.
69 	abstract string[] listFiles(string key) const;
70 
71 	/// Add files from a directory.
72 	/// This operation is destructive: the cache implementation
73 	/// is allowed to modify the source directory.
74 	abstract void add(string key, string sourcePath);
75 
76 	/// Extract a cache entry to a target directory, with a filter for root files/directories.
77 	abstract void extract(string key, string targetPath, bool delegate(string) pathFilter);
78 
79 	/// Delete a cache entry.
80 	abstract void remove(string key);
81 
82 	/// Close the cache.
83 	/// Called after all operations are completed
84 	/// and the cache is no longer immediately needed.
85 	abstract void finalize();
86 
87 	/// Optimize the cache (minimize disk space).
88 	/// This operation can be very slow (and should display progress).
89 	abstract void optimize();
90 
91 	/// Verify cache integrity.
92 	abstract void verify();
93 
94 	/// Utility function (copy file or directory)
95 	final void cp(string src, string dst, bool silent = false)
96 	{
97 		if (!silent)
98 			log("Copy: " ~ src ~ " -> " ~ dst);
99 
100 		ensurePathExists(dst);
101 		if (src.isDir)
102 		{
103 			if (!dst.exists)
104 				dst.mkdir();
105 			foreach (de; src.dirEntries(SpanMode.shallow))
106 				cp(de.name, dst.buildPath(de.name.baseName), true);
107 		}
108 		else
109 		{
110 			copy(src, dst);
111 			version (Posix)
112 				dst.setAttributes(src.getAttributes());
113 		}
114 	}
115 }
116 
117 /// Base class for a simple directory store.
118 abstract class DirCacheBase : DCache
119 {
120 	this(string cacheDir, ICacheHost cacheHost)
121 	{
122 		super(cacheDir, cacheHost);
123 		if (!cacheDir.exists)
124 			cacheDir.mkdirRecurse();
125 	} ///
126 
127 	override string[] getEntries() const
128 	{
129 		return !cacheDir.exists ? null :
130 			cacheDir.dirEntries(SpanMode.shallow).filter!(de => de.isDir).map!(de => de.baseName).array;
131 	}
132 
133 	override bool haveEntry(string key) const
134 	{
135 		return cacheDir.buildPath(key).I!(path => path.exists && path.isDir);
136 	}
137 
138 	override string[] listFiles(string key) const
139 	{
140 		return cacheDir.buildPath(key).I!(path =>
141 			path
142 			.dirEntries(SpanMode.breadth)
143 			.filter!(de => de.isFile)
144 			.map!(de => de.name
145 				[path.length+1..$] // paths should be relative to cache entry dir root
146 				.replace(`\`, `/`)
147 			)
148 			.array
149 		);
150 	}
151 
152 	abstract void optimizeKey(string key);
153 
154 	override void add(string key, string sourcePath)
155 	{
156 		auto entryDir = cacheDir.buildPath(key);
157 		ensurePathExists(entryDir);
158 		sourcePath.rename(entryDir);
159 		optimizeKey(key);
160 	}
161 
162 	override void extract(string key, string targetPath, bool delegate(string) pathFilter)
163 	{
164 		cacheDir.buildPath(key).dirEntries(SpanMode.shallow)
165 			.filter!(de => pathFilter(de.baseName))
166 			.each!(de => cp(de.name, buildPath(targetPath, de.baseName)));
167 	}
168 
169 	override void remove(string key)
170 	{
171 		cacheDir.buildPath(key).rmdirRecurse();
172 	}
173 }
174 
175 /// The bare minimum required for ae.sys.d to work.
176 /// Implement a temporary cache which is deleted
177 /// as soon as it's no longer immediately needed.
178 class TempCache : DirCacheBase
179 {
180 	this(string cacheDir, ICacheHost cacheHost)
181 	{
182 		super(cacheDir, cacheHost);
183 		finalize();
184 	} ///
185 
186 	alias cacheHost this; // https://issues.dlang.org/show_bug.cgi?id=5973
187 
188 	override @property string name() const { return "none"; }
189 
190 	override void finalize()
191 	{
192 		if (cacheDir.exists)
193 		{
194 			log("Clearing temporary cache");
195 			rmdirRecurse(cacheDir);
196 		}
197 	}
198 
199 	override void optimize() { finalize(); }
200 	override void optimizeKey(string key) {}
201 
202 	override void verify() {}
203 }
204 
205 /// Store cached builds in subdirectories.
206 /// Optimize things by hard-linking identical files.
207 class DirCache : DirCacheBase
208 {
209 	mixin GenerateConstructorProxies;
210 	alias cacheHost this; // https://issues.dlang.org/show_bug.cgi?id=5973
211 
212 	override @property string name() const { return "directory"; }
213 
214 	size_t dedupedFiles;
215 
216 	/// Replace all files that have duplicate subpaths and content
217 	/// which exist under both dirA and dirB with hard links.
218 	void dedupDirectories(string dirA, string dirB)
219 	{
220 		foreach (de; dirEntries(dirA, SpanMode.depth))
221 			if (de.isFile)
222 			{
223 				auto pathA = de.name;
224 				auto subPath = pathA[dirA.length..$];
225 				auto pathB = dirB ~ subPath;
226 
227 				if (pathB.exists
228 				 && pathA.getSize() == pathB.getSize()
229 				 && pathA.getFileID() != pathB.getFileID()
230 				 && pathA.mdFileCached() == pathB.mdFileCached())
231 				{
232 					//debug log(pathB ~ " = " ~ pathA);
233 					pathB.remove();
234 					try
235 					{
236 						pathA.hardLink(pathB);
237 						dedupedFiles++;
238 					}
239 					catch (FileException e)
240 					{
241 						log(" -- Hard link failed: " ~ e.msg);
242 						pathA.copy(pathB);
243 					}
244 				}
245 			}
246 	}
247 
248 	private final void optimizeCacheImpl(const(string)[] order, bool reverse = false, string onlyKey = null)
249 	{
250 		if (reverse)
251 			order = order.retro.array();
252 
253 		string[] lastKeys;
254 
255 		auto cacheDirEntries = cacheDir
256 			.dirEntries(SpanMode.shallow)
257 			.map!(de => de.baseName)
258 			.array
259 			.sort()
260 		;
261 
262 		foreach (prefix; order)
263 		{
264 			auto cacheEntries = cacheDirEntries
265 				.filter!(name => name.startsWith(prefix))
266 				.array
267 			;
268 
269 			bool optimizeThis = onlyKey is null || onlyKey.startsWith(prefix);
270 
271 			if (optimizeThis)
272 			{
273 				auto targetEntries = lastKeys ~ cacheEntries;
274 
275 				if (targetEntries.length)
276 					foreach (i, entry1; targetEntries[0..$-1])
277 						foreach (entry2; targetEntries[i+1..$])
278 							dedupDirectories(buildPath(cacheDir, entry1), buildPath(cacheDir, entry2));
279 			}
280 
281 			if (cacheEntries.length)
282 				lastKeys = cacheEntries;
283 		}
284 	}
285 
286 	override void finalize() {}
287 
288 	/// Optimize entire cache.
289 	override void optimize()
290 	{
291 		bool[string] componentNames;
292 		foreach (de; dirEntries(cacheDir, SpanMode.shallow))
293 		{
294 			auto parts = de.baseName().split("-");
295 			if (parts.length >= 3)
296 				componentNames[parts[0..$-2].join("-")] = true;
297 		}
298 
299 		log("Optimizing cache..."); dedupedFiles = 0;
300 		foreach (order; getKeyOrder(null))
301 			optimizeCacheImpl(order);
302 		log("Deduplicated %d files.".format(dedupedFiles));
303 	}
304 
305 	/// Optimize specific revision.
306 	override void optimizeKey(string key)
307 	{
308 		log("Optimizing cache entry..."); dedupedFiles = 0;
309 		auto orders = getKeyOrder(key);
310 		foreach (order; orders) // Should be only one
311 		{
312 			optimizeCacheImpl(order, false, key);
313 			optimizeCacheImpl(order, true , key);
314 		}
315 		log("Deduplicated %d files.".format(dedupedFiles));
316 	}
317 
318 	override void verify() {}
319 }
320 
321 /// Cache backed by a git repository.
322 /// Git's packfiles provide an efficient way to store binary
323 /// files with small differences, however adding and extracting
324 /// items is a little slower.
325 class GitCache : DCache
326 {
327 	import std.process;
328 	import ae.sys.git;
329 
330 	Git git;
331 
332 	/// Builds are pinned by refs namespaced by this prefix.
333 	static const refPrefix = "refs/ae-sys-d-cache/";
334 
335 	override @property string name() const { return "git"; }
336 
337 	this(string cacheDir, ICacheHost cacheHost)
338 	{
339 		super(cacheDir, cacheHost);
340 		if (!cacheDir.exists)
341 		{
342 			cacheDir.mkdirRecurse();
343 			spawnProcess(["git", "init", cacheDir])
344 				.wait()
345 				.I!(code => (code==0).enforce("git init failed"))
346 			;
347 		}
348 		git = Git(cacheDir);
349 	} ///
350 
351 	override string[] getEntries() const
352 	{
353 		auto chompPrefix(R)(R r, string prefix) { return r.filter!(s => s.startsWith(prefix)).map!(s => s[prefix.length..$]); }
354 		try
355 			return git
356 				.query("show-ref")
357 				.splitLines
358 				.map!(s => s[41..$])
359 				.I!chompPrefix(refPrefix)
360 				.array
361 			;
362 		catch (Exception e)
363 			return null; // show-ref will exit with status 1 for an empty repository
364 	}
365 
366 	override bool haveEntry(string key) const
367 	{
368 		auto p = git.pipe(["show-ref", "--verify", refPrefix ~ key], Redirect.stdout | Redirect.stderr);
369 		readFile(p.stdout);
370 		readFile(p.stderr);
371 		return p.pid.wait() == 0;
372 	}
373 
374 	override string[] listFiles(string key) const
375 	{
376 		return git
377 			.query("ls-tree", "--name-only", "-r", refPrefix ~ key)
378 			.splitLines
379 		;
380 	}
381 
382 	override void add(string key, string sourcePath)
383 	{
384 		auto writer = git.createObjectWriter();
385 		auto tree = git.importTree(sourcePath, writer);
386 		auto author = "ae.sys.d.cache <ae.sys.d.cache@thecybershadow.net> 0 +0000";
387 		auto commit = writer.write(Git.Object.createCommit(Git.Object.ParsedCommit(tree, null, author, author, [key])));
388 		git.run("update-ref", refPrefix ~ key, commit.toString());
389 	}
390 
391 	override void extract(string key, string targetPath, bool delegate(string) pathFilter)
392 	{
393 		if (!targetPath.exists)
394 			targetPath.mkdirRecurse();
395 
396 		auto reader = git.createObjectReader();
397 		git.exportCommit(refPrefix ~ key, targetPath, reader, fn => pathFilter(fn.pathSplitter.front));
398 	}
399 
400 	override void remove(string key)
401 	{
402 		git.run("update-ref", "-d", refPrefix ~ key);
403 	}
404 
405 	override void finalize() {}
406 
407 	override void optimize()
408 	{
409 		git.run("prune");
410 		git.run("pack-refs", "--all");
411 		git.run("repack", "-a", "-d");
412 	}
413 
414 	override void verify()
415 	{
416 		git.run("fsck", "--full", "--strict");
417 	}
418 }
419 
420 /// Create a DCache instance according to the given name.
421 DCache createCache(string name, string cacheDir, ICacheHost cacheHost)
422 {
423 	switch (name)
424 	{
425 		case "":
426 		case "false": // compat
427 		case "none":      return new TempCache(cacheDir, cacheHost);
428 		case "true":  // compat
429 		case "directory": return new DirCache (cacheDir, cacheHost);
430 		case "git":       return new GitCache (cacheDir, cacheHost);
431 		default: throw new Exception("Unknown cache engine: " ~ name);
432 	}
433 }
434 
435 unittest
436 {
437 	void testEngine(string name)
438 	{
439 		class Host : ICacheHost
440 		{
441 			string[][] getKeyOrder(string key) { return null; }
442 			void log(string s) {}
443 		}
444 		auto host = new Host;
445 
446 		auto testDir = "test-cache";
447 		if (testDir.exists) testDir.forceDelete!(No.atomic)(Yes.recursive);
448 		testDir.mkdir();
449 		scope(exit) if (testDir.exists) testDir.forceDelete!(No.atomic)(Yes.recursive);
450 
451 		auto cacheEngine = createCache(name, testDir.buildPath("cache"), host);
452 		assert(cacheEngine.getEntries().length == 0);
453 		assert(!cacheEngine.haveEntry("test-key"));
454 
455 		auto sourceDir = testDir.buildPath("source");
456 		mkdir(sourceDir);
457 		write(sourceDir.buildPath("test.txt"), "Test");
458 		mkdir(sourceDir.buildPath("dir"));
459 		write(sourceDir.buildPath("dir", "test2.txt"), "Test 2");
460 
461 		cacheEngine.add("test-key", sourceDir);
462 		assert(cacheEngine.getEntries() == ["test-key"]);
463 		assert(cacheEngine.haveEntry("test-key"));
464 		assert(cacheEngine.listFiles("test-key").sort().release == ["dir/test2.txt", "test.txt"]);
465 
466 		if (sourceDir.exists) rmdirRecurse(sourceDir);
467 		mkdir(sourceDir);
468 		write(sourceDir.buildPath("test3.txt"), "Test 3");
469 		mkdir(sourceDir.buildPath("test3"));
470 		write(sourceDir.buildPath("test3", "test.txt"), "Test 3 subdir");
471 		cacheEngine.add("test-key-2", sourceDir);
472 
473 		auto targetDir = testDir.buildPath("target");
474 		mkdir(targetDir);
475 		cacheEngine.extract("test-key", targetDir, fn => true);
476 		assert(targetDir.buildPath("test.txt").readText == "Test");
477 		assert(targetDir.buildPath("dir", "test2.txt").readText == "Test 2");
478 
479 		rmdirRecurse(targetDir);
480 		mkdir(targetDir);
481 		cacheEngine.extract("test-key-2", targetDir, fn => true);
482 		assert(targetDir.buildPath("test3.txt").readText == "Test 3");
483 
484 		rmdirRecurse(targetDir);
485 		mkdir(targetDir);
486 		cacheEngine.extract("test-key", targetDir, fn => fn=="dir");
487 		assert(!targetDir.buildPath("test.txt").exists);
488 		assert(targetDir.buildPath("dir", "test2.txt").readText == "Test 2");
489 
490 		cacheEngine.remove("test-key");
491 		assert(cacheEngine.getEntries().length == 1);
492 		assert(!cacheEngine.haveEntry("test-key"));
493 
494 		cacheEngine.finalize();
495 		cacheEngine.verify();
496 
497 		cacheEngine.optimize();
498 	}
499 
500 	testEngine("none");
501 	testEngine("directory");
502 	testEngine("git");
503 }