1 /**
2  * Code to manage cached D component builds.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  */
13 
14 module ae.sys.d.cache;
15 
16 import std.algorithm;
17 import std.array;
18 import std.exception;
19 import std.file;
20 import std.format;
21 import std.path;
22 import std.range;
23 import std.string;
24 
25 import ae.sys.file;
26 import ae.utils.meta;
27 
28 private alias copy = std.file.copy; // https://issues.dlang.org/show_bug.cgi?id=14817
29 
30 /// An interface that needs to be implemented by `DCache` users.
31 interface ICacheHost
32 {
33 	/// An optimization helper which provides a linear order in which keys should be optimized
34 	/// (cache entries most likely to have common data should be adjacent in the list).
35 	/// Returns: An array of groups, each group is an array of key prefixes.
36 	/// Params:
37 	///   key = If non-null, this function will only return keys relevant to the given key.
38 	string[][] getKeyOrder(string key);
39 
40 	/// Log a string.
41 	void log(string s);
42 }
43 
44 /// Abstract base class.
45 abstract class DCache
46 {
47 	string cacheDir; /// Directory which will hold the cached builds.
48 	ICacheHost cacheHost; /// Callback interface.
49 
50 	alias cacheHost this;
51 
52 	this(string cacheDir, ICacheHost cacheHost)
53 	{
54 		this.cacheDir = cacheDir;
55 		this.cacheHost = cacheHost;
56 	} ///
57 
58 	/// Get name of this cache engine.
59 	abstract @property string name() const;
60 
61 	/// Get a list of keys for all cached entries.
62 	abstract string[] getEntries() const;
63 
64 	/// Check if an entry with the given key exists.
65 	abstract bool haveEntry(string key) const;
66 
67 	/// Get the full file listing for the given cache entry.
68 	/// Forward slashes only.
69 	abstract string[] listFiles(string key) const;
70 
71 	/// Add files from a directory.
72 	/// This operation is destructive: the cache implementation
73 	/// is allowed to modify the source directory.
74 	abstract void add(string key, string sourcePath);
75 
76 	/// Extract a cache entry to a target directory, with a filter for root files/directories.
77 	abstract void extract(string key, string targetPath, bool delegate(string) pathFilter);
78 
79 	/// Delete a cache entry.
80 	abstract void remove(string key);
81 
82 	/// Close the cache.
83 	/// Called after all operations are completed
84 	/// and the cache is no longer immediately needed.
85 	abstract void finalize();
86 
87 	/// Optimize the cache (minimize disk space).
88 	/// This operation can be very slow (and should display progress).
89 	abstract void optimize();
90 
91 	/// Verify cache integrity.
92 	abstract void verify();
93 
94 	/// Utility function (copy file or directory)
95 	final void cp(string src, string dst, bool silent = false)
96 	{
97 		if (!silent)
98 			log("Copy: " ~ src ~ " -> " ~ dst);
99 
100 		ensurePathExists(dst);
101 		if (src.isDir)
102 		{
103 			if (!dst.exists)
104 				dst.mkdir();
105 			foreach (de; src.dirEntries(SpanMode.shallow))
106 				cp(de.name, dst.buildPath(de.name.baseName), true);
107 		}
108 		else
109 		{
110 			copy(src, dst);
111 			version (Posix)
112 				dst.setAttributes(src.getAttributes());
113 		}
114 	}
115 }
116 
117 /// Base class for a simple directory store.
118 abstract class DirCacheBase : DCache
119 {
120 	this(string cacheDir, ICacheHost cacheHost)
121 	{
122 		super(cacheDir, cacheHost);
123 		if (!cacheDir.exists)
124 			cacheDir.mkdirRecurse();
125 	} ///
126 
127 protected:
128 	override string[] getEntries() const
129 	{
130 		return !cacheDir.exists ? null :
131 			cacheDir.dirEntries(SpanMode.shallow).filter!(de => de.isDir).map!(de => de.baseName).array;
132 	}
133 
134 	override bool haveEntry(string key) const
135 	{
136 		return cacheDir.buildPath(key).I!(path => path.exists && path.isDir);
137 	}
138 
139 	override string[] listFiles(string key) const
140 	{
141 		return cacheDir.buildPath(key).I!(path =>
142 			path
143 			.dirEntries(SpanMode.breadth)
144 			.filter!(de => de.isFile)
145 			.map!(de => de.name
146 				[path.length+1..$] // paths should be relative to cache entry dir root
147 				.replace(`\`, `/`)
148 			)
149 			.array
150 		);
151 	}
152 
153 	abstract void optimizeKey(string key);
154 
155 	override void add(string key, string sourcePath)
156 	{
157 		auto entryDir = cacheDir.buildPath(key);
158 		ensurePathExists(entryDir);
159 		sourcePath.rename(entryDir);
160 		optimizeKey(key);
161 	}
162 
163 	override void extract(string key, string targetPath, bool delegate(string) pathFilter)
164 	{
165 		cacheDir.buildPath(key).dirEntries(SpanMode.shallow)
166 			.filter!(de => pathFilter(de.baseName))
167 			.each!(de => cp(de.name, buildPath(targetPath, de.baseName)));
168 	}
169 
170 	override void remove(string key)
171 	{
172 		cacheDir.buildPath(key).rmdirRecurse();
173 	}
174 }
175 
176 /// The bare minimum required for ae.sys.d to work.
177 /// Implement a temporary cache which is deleted
178 /// as soon as it's no longer immediately needed.
179 class TempCache : DirCacheBase
180 {
181 	this(string cacheDir, ICacheHost cacheHost)
182 	{
183 		super(cacheDir, cacheHost);
184 		finalize();
185 	} ///
186 
187 protected:
188 	alias cacheHost this; // https://issues.dlang.org/show_bug.cgi?id=5973
189 
190 	override @property string name() const { return "none"; }
191 
192 	override void finalize()
193 	{
194 		if (cacheDir.exists)
195 		{
196 			log("Clearing temporary cache");
197 			rmdirRecurse(cacheDir);
198 		}
199 	}
200 
201 	override void optimize() { finalize(); }
202 	override void optimizeKey(string key) {}
203 
204 	override void verify() {}
205 }
206 
207 /// Store cached builds in subdirectories.
208 /// Optimize things by hard-linking identical files.
209 class DirCache : DirCacheBase
210 {
211 	mixin GenerateConstructorProxies;
212 	alias cacheHost this; // https://issues.dlang.org/show_bug.cgi?id=5973
213 
214 protected:
215 	override @property string name() const { return "directory"; }
216 
217 	size_t dedupedFiles;
218 
219 	/// Replace all files that have duplicate subpaths and content
220 	/// which exist under both dirA and dirB with hard links.
221 	void dedupDirectories(string dirA, string dirB)
222 	{
223 		foreach (de; dirEntries(dirA, SpanMode.depth))
224 			if (de.isFile)
225 			{
226 				auto pathA = de.name;
227 				auto subPath = pathA[dirA.length..$];
228 				auto pathB = dirB ~ subPath;
229 
230 				if (pathB.exists
231 				 && pathA.getSize() == pathB.getSize()
232 				 && pathA.getFileID() != pathB.getFileID()
233 				 && pathA.mdFileCached() == pathB.mdFileCached())
234 				{
235 					//debug log(pathB ~ " = " ~ pathA);
236 					pathB.remove();
237 					try
238 					{
239 						pathA.hardLink(pathB);
240 						dedupedFiles++;
241 					}
242 					catch (FileException e)
243 					{
244 						log(" -- Hard link failed: " ~ e.msg);
245 						pathA.copy(pathB);
246 					}
247 				}
248 			}
249 	}
250 
251 	private final void optimizeCacheImpl(const(string)[] order, bool reverse = false, string onlyKey = null)
252 	{
253 		if (reverse)
254 			order = order.retro.array();
255 
256 		string[] lastKeys;
257 
258 		auto cacheDirEntries = cacheDir
259 			.dirEntries(SpanMode.shallow)
260 			.map!(de => de.baseName)
261 			.array
262 			.sort()
263 		;
264 
265 		foreach (prefix; order)
266 		{
267 			auto cacheEntries = cacheDirEntries
268 				.filter!(name => name.startsWith(prefix))
269 				.array
270 			;
271 
272 			bool optimizeThis = onlyKey is null || onlyKey.startsWith(prefix);
273 
274 			if (optimizeThis)
275 			{
276 				auto targetEntries = lastKeys ~ cacheEntries;
277 
278 				if (targetEntries.length)
279 					foreach (i, entry1; targetEntries[0..$-1])
280 						foreach (entry2; targetEntries[i+1..$])
281 							dedupDirectories(buildPath(cacheDir, entry1), buildPath(cacheDir, entry2));
282 			}
283 
284 			if (cacheEntries.length)
285 				lastKeys = cacheEntries;
286 		}
287 	}
288 
289 	override void finalize() {}
290 
291 	/// Optimize entire cache.
292 	override void optimize()
293 	{
294 		bool[string] componentNames;
295 		foreach (de; dirEntries(cacheDir, SpanMode.shallow))
296 		{
297 			auto parts = de.baseName().split("-");
298 			if (parts.length >= 3)
299 				componentNames[parts[0..$-2].join("-")] = true;
300 		}
301 
302 		log("Optimizing cache..."); dedupedFiles = 0;
303 		foreach (order; getKeyOrder(null))
304 			optimizeCacheImpl(order);
305 		log("Deduplicated %d files.".format(dedupedFiles));
306 	}
307 
308 	/// Optimize specific revision.
309 	override void optimizeKey(string key)
310 	{
311 		log("Optimizing cache entry..."); dedupedFiles = 0;
312 		auto orders = getKeyOrder(key);
313 		foreach (order; orders) // Should be only one
314 		{
315 			optimizeCacheImpl(order, false, key);
316 			optimizeCacheImpl(order, true , key);
317 		}
318 		log("Deduplicated %d files.".format(dedupedFiles));
319 	}
320 
321 	override void verify() {}
322 }
323 
324 /// Cache backed by a git repository.
325 /// Git's packfiles provide an efficient way to store binary
326 /// files with small differences, however adding and extracting
327 /// items is a little slower.
328 class GitCache : DCache
329 {
330 	this(string cacheDir, ICacheHost cacheHost)
331 	{
332 		super(cacheDir, cacheHost);
333 		if (!cacheDir.exists)
334 		{
335 			cacheDir.mkdirRecurse();
336 			spawnProcess(["git", "init", cacheDir])
337 				.wait()
338 				.I!(code => (code==0).enforce("git init failed"))
339 			;
340 		}
341 		git = Git(cacheDir);
342 	} ///
343 
344 	/// Builds are pinned by refs namespaced by this prefix.
345 	static const refPrefix = "refs/ae-sys-d-cache/";
346 
347 protected:
348 	import std.process;
349 	import ae.sys.git;
350 
351 	Git git;
352 
353 	override @property string name() const { return "git"; }
354 
355 	override string[] getEntries() const
356 	{
357 		auto chompPrefix(R)(R r, string prefix) { return r.filter!(s => s.startsWith(prefix)).map!(s => s[prefix.length..$]); }
358 		try
359 			return git
360 				.query("show-ref")
361 				.splitLines
362 				.map!(s => s[41..$])
363 				.I!chompPrefix(refPrefix)
364 				.array
365 			;
366 		catch (Exception e)
367 			return null; // show-ref will exit with status 1 for an empty repository
368 	}
369 
370 	override bool haveEntry(string key) const
371 	{
372 		auto p = git.pipe(["show-ref", "--verify", refPrefix ~ key], Redirect.stdout | Redirect.stderr);
373 		readFile(p.stdout);
374 		readFile(p.stderr);
375 		return p.pid.wait() == 0;
376 	}
377 
378 	override string[] listFiles(string key) const
379 	{
380 		return git
381 			.query("ls-tree", "--name-only", "-r", refPrefix ~ key)
382 			.splitLines
383 		;
384 	}
385 
386 	override void add(string key, string sourcePath)
387 	{
388 		auto writer = git.createObjectWriter();
389 		auto tree = git.importTree(sourcePath, writer);
390 		auto author = "ae.sys.d.cache <ae.sys.d.cache@thecybershadow.net> 0 +0000";
391 		auto commit = writer.write(Git.Object.createCommit(Git.Object.ParsedCommit(tree, null, author, author, [key])));
392 		git.run("update-ref", refPrefix ~ key, commit.toString());
393 	}
394 
395 	override void extract(string key, string targetPath, bool delegate(string) pathFilter)
396 	{
397 		if (!targetPath.exists)
398 			targetPath.mkdirRecurse();
399 
400 		auto reader = git.createObjectReader();
401 		git.exportCommit(refPrefix ~ key, targetPath, reader, fn => pathFilter(fn.pathSplitter.front));
402 	}
403 
404 	override void remove(string key)
405 	{
406 		git.run("update-ref", "-d", refPrefix ~ key);
407 	}
408 
409 	override void finalize() {}
410 
411 	override void optimize()
412 	{
413 		git.run("prune");
414 		git.run("pack-refs", "--all");
415 		git.run("repack", "-a", "-d");
416 	}
417 
418 	override void verify()
419 	{
420 		git.run("fsck", "--full", "--strict");
421 	}
422 }
423 
424 /// Create a DCache instance according to the given name.
425 DCache createCache(string name, string cacheDir, ICacheHost cacheHost)
426 {
427 	switch (name)
428 	{
429 		case "":
430 		case "false": // compat
431 		case "none":      return new TempCache(cacheDir, cacheHost);
432 		case "true":  // compat
433 		case "directory": return new DirCache (cacheDir, cacheHost);
434 		case "git":       return new GitCache (cacheDir, cacheHost);
435 		default: throw new Exception("Unknown cache engine: " ~ name);
436 	}
437 }
438 
439 unittest
440 {
441 	void testEngine(string name)
442 	{
443 		class Host : ICacheHost
444 		{
445 			string[][] getKeyOrder(string key) { return null; }
446 			void log(string s) {}
447 		}
448 		auto host = new Host;
449 
450 		auto testDir = "test-cache";
451 		if (testDir.exists) testDir.forceDelete!(No.atomic)(Yes.recursive);
452 		testDir.mkdir();
453 		scope(exit) if (testDir.exists) testDir.forceDelete!(No.atomic)(Yes.recursive);
454 
455 		auto cacheEngine = createCache(name, testDir.buildPath("cache"), host);
456 		assert(cacheEngine.getEntries().length == 0);
457 		assert(!cacheEngine.haveEntry("test-key"));
458 
459 		auto sourceDir = testDir.buildPath("source");
460 		mkdir(sourceDir);
461 		write(sourceDir.buildPath("test.txt"), "Test");
462 		mkdir(sourceDir.buildPath("dir"));
463 		write(sourceDir.buildPath("dir", "test2.txt"), "Test 2");
464 
465 		cacheEngine.add("test-key", sourceDir);
466 		assert(cacheEngine.getEntries() == ["test-key"]);
467 		assert(cacheEngine.haveEntry("test-key"));
468 		assert(cacheEngine.listFiles("test-key").sort().release == ["dir/test2.txt", "test.txt"]);
469 
470 		if (sourceDir.exists) rmdirRecurse(sourceDir);
471 		mkdir(sourceDir);
472 		write(sourceDir.buildPath("test3.txt"), "Test 3");
473 		mkdir(sourceDir.buildPath("test3"));
474 		write(sourceDir.buildPath("test3", "test.txt"), "Test 3 subdir");
475 		cacheEngine.add("test-key-2", sourceDir);
476 
477 		auto targetDir = testDir.buildPath("target");
478 		mkdir(targetDir);
479 		cacheEngine.extract("test-key", targetDir, fn => true);
480 		assert(targetDir.buildPath("test.txt").readText == "Test");
481 		assert(targetDir.buildPath("dir", "test2.txt").readText == "Test 2");
482 
483 		rmdirRecurse(targetDir);
484 		mkdir(targetDir);
485 		cacheEngine.extract("test-key-2", targetDir, fn => true);
486 		assert(targetDir.buildPath("test3.txt").readText == "Test 3");
487 
488 		rmdirRecurse(targetDir);
489 		mkdir(targetDir);
490 		cacheEngine.extract("test-key", targetDir, fn => fn=="dir");
491 		assert(!targetDir.buildPath("test.txt").exists);
492 		assert(targetDir.buildPath("dir", "test2.txt").readText == "Test 2");
493 
494 		cacheEngine.remove("test-key");
495 		assert(cacheEngine.getEntries().length == 1);
496 		assert(!cacheEngine.haveEntry("test-key"));
497 
498 		cacheEngine.finalize();
499 		cacheEngine.verify();
500 
501 		cacheEngine.optimize();
502 	}
503 
504 	testEngine("none");
505 	testEngine("directory");
506 	testEngine("git");
507 }