1 /**
2  * Code to manage cached D component builds.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  */
13 
14 module ae.sys.d.cache;
15 
16 import std.algorithm;
17 import std.array;
18 import std.exception;
19 import std.file;
20 import std.format;
21 import std.path;
22 import std.range;
23 import std.string;
24 
25 import ae.sys.file;
26 import ae.utils.meta;
27 
28 private alias copy = std.file.copy; // https://issues.dlang.org/show_bug.cgi?id=14817
29 
30 /// An interface that needs to be implemented by `DCache` users.
31 interface ICacheHost
32 {
33 	/// An optimization helper which provides a linear order in which keys should be optimized
34 	/// (cache entries most likely to have common data should be adjacent in the list).
35 	/// Returns: An array of groups, each group is an array of key prefixes.
36 	/// Params:
37 	///   key = If non-null, this function will only return keys relevant to the given key.
38 	string[][] getKeyOrder(string key);
39 
40 	/// Log a string.
41 	void log(string s);
42 }
43 
44 /// Abstract base class.
45 abstract class DCache
46 {
47 	string cacheDir; /// Directory which will hold the cached builds.
48 	ICacheHost host; /// Callback interface.
49 
50 	this(string cacheDir, ICacheHost cacheHost)
51 	{
52 		this.cacheDir = cacheDir;
53 		this.host = cacheHost;
54 	} ///
55 
56 	/// Get name of this cache engine.
57 	abstract @property string name() const;
58 
59 	/// Get a list of keys for all cached entries.
60 	abstract string[] getEntries() const;
61 
62 	/// Check if an entry with the given key exists.
63 	abstract bool haveEntry(string key) const;
64 
65 	/// Get the full file listing for the given cache entry.
66 	/// Forward slashes only.
67 	abstract string[] listFiles(string key) const;
68 
69 	/// Add files from a directory.
70 	/// This operation is destructive: the cache implementation
71 	/// is allowed to modify the source directory.
72 	abstract void add(string key, string sourcePath);
73 
74 	/// Extract a cache entry to a target directory, with a filter for root files/directories.
75 	abstract void extract(string key, string targetPath, bool delegate(string) pathFilter);
76 
77 	/// Delete a cache entry.
78 	abstract void remove(string key);
79 
80 	/// Close the cache.
81 	/// Called after all operations are completed
82 	/// and the cache is no longer immediately needed.
83 	abstract void finalize();
84 
85 	/// Optimize the cache (minimize disk space).
86 	/// This operation can be very slow (and should display progress).
87 	abstract void optimize();
88 
89 	/// Verify cache integrity.
90 	abstract void verify();
91 
92 	/// Utility function (copy file or directory)
93 	final void cp(string src, string dst, bool silent = false)
94 	{
95 		if (!silent)
96 			host.log("Copy: " ~ src ~ " -> " ~ dst);
97 
98 		ensurePathExists(dst);
99 		if (src.isDir)
100 		{
101 			if (!dst.exists)
102 				dst.mkdir();
103 			foreach (de; src.dirEntries(SpanMode.shallow))
104 				cp(de.name, dst.buildPath(de.name.baseName), true);
105 		}
106 		else
107 		{
108 			copy(src, dst);
109 			version (Posix)
110 				dst.setAttributes(src.getAttributes());
111 		}
112 	}
113 }
114 
115 /// Base class for a simple directory store.
116 abstract class DirCacheBase : DCache
117 {
118 	this(string cacheDir, ICacheHost cacheHost)
119 	{
120 		super(cacheDir, cacheHost);
121 		if (!cacheDir.exists)
122 			cacheDir.mkdirRecurse();
123 	} ///
124 
125 protected:
126 	override string[] getEntries() const
127 	{
128 		return !cacheDir.exists ? null :
129 			cacheDir.dirEntries(SpanMode.shallow).filter!(de => de.isDir).map!(de => de.baseName).array;
130 	}
131 
132 	override bool haveEntry(string key) const
133 	{
134 		return cacheDir.buildPath(key).I!(path => path.exists && path.isDir);
135 	}
136 
137 	override string[] listFiles(string key) const
138 	{
139 		return cacheDir.buildPath(key).I!(path =>
140 			path
141 			.dirEntries(SpanMode.breadth)
142 			.filter!(de => de.isFile)
143 			.map!(de => de.name
144 				[path.length+1..$] // paths should be relative to cache entry dir root
145 				.replace(`\`, `/`)
146 			)
147 			.array
148 		);
149 	}
150 
151 	abstract void optimizeKey(string key);
152 
153 	override void add(string key, string sourcePath)
154 	{
155 		auto entryDir = cacheDir.buildPath(key);
156 		ensurePathExists(entryDir);
157 		sourcePath.rename(entryDir);
158 		optimizeKey(key);
159 	}
160 
161 	override void extract(string key, string targetPath, bool delegate(string) pathFilter)
162 	{
163 		cacheDir.buildPath(key).dirEntries(SpanMode.shallow)
164 			.filter!(de => pathFilter(de.baseName))
165 			.each!(de => cp(de.name, buildPath(targetPath, de.baseName)));
166 	}
167 
168 	override void remove(string key)
169 	{
170 		cacheDir.buildPath(key).rmdirRecurse();
171 	}
172 }
173 
174 /// The bare minimum required for ae.sys.d to work.
175 /// Implement a temporary cache which is deleted
176 /// as soon as it's no longer immediately needed.
177 class TempCache : DirCacheBase
178 {
179 	this(string cacheDir, ICacheHost cacheHost)
180 	{
181 		super(cacheDir, cacheHost);
182 		finalize();
183 	} ///
184 
185 protected:
186 	override @property string name() const { return "none"; }
187 
188 	override void finalize()
189 	{
190 		if (cacheDir.exists)
191 		{
192 			host.log("Clearing temporary cache");
193 			rmdirRecurse(cacheDir);
194 		}
195 	}
196 
197 	override void optimize() { finalize(); }
198 	override void optimizeKey(string key) {}
199 
200 	override void verify() {}
201 }
202 
203 /// Store cached builds in subdirectories.
204 /// Optimize things by hard-linking identical files.
205 class DirCache : DirCacheBase
206 {
207 	mixin GenerateConstructorProxies;
208 
209 protected:
210 	override @property string name() const { return "directory"; }
211 
212 	size_t dedupedFiles;
213 
214 	/// Replace all files that have duplicate subpaths and content
215 	/// which exist under both dirA and dirB with hard links.
216 	void dedupDirectories(string dirA, string dirB)
217 	{
218 		foreach (de; dirEntries(dirA, SpanMode.depth))
219 			if (de.isFile)
220 			{
221 				auto pathA = de.name;
222 				auto subPath = pathA[dirA.length..$];
223 				auto pathB = dirB ~ subPath;
224 
225 				if (pathB.exists
226 				 && pathA.getSize() == pathB.getSize()
227 				 && pathA.getFileID() != pathB.getFileID()
228 				 && pathA.mdFileCached() == pathB.mdFileCached())
229 				{
230 					//debug log(pathB ~ " = " ~ pathA);
231 					pathB.remove();
232 					try
233 					{
234 						pathA.hardLink(pathB);
235 						dedupedFiles++;
236 					}
237 					catch (FileException e)
238 					{
239 						host.log(" -- Hard link failed: " ~ e.msg);
240 						pathA.copy(pathB);
241 					}
242 				}
243 			}
244 	}
245 
246 	private final void optimizeCacheImpl(const(string)[] order, bool reverse = false, string onlyKey = null)
247 	{
248 		if (reverse)
249 			order = order.retro.array();
250 
251 		string[] lastKeys;
252 
253 		auto cacheDirEntries = cacheDir
254 			.dirEntries(SpanMode.shallow)
255 			.map!(de => de.baseName)
256 			.array
257 			.sort()
258 		;
259 
260 		foreach (prefix; order)
261 		{
262 			auto cacheEntries = cacheDirEntries
263 				.filter!(name => name.startsWith(prefix))
264 				.array
265 			;
266 
267 			bool optimizeThis = onlyKey is null || onlyKey.startsWith(prefix);
268 
269 			if (optimizeThis)
270 			{
271 				auto targetEntries = lastKeys ~ cacheEntries;
272 
273 				if (targetEntries.length)
274 					foreach (i, entry1; targetEntries[0..$-1])
275 						foreach (entry2; targetEntries[i+1..$])
276 							dedupDirectories(buildPath(cacheDir, entry1), buildPath(cacheDir, entry2));
277 			}
278 
279 			if (cacheEntries.length)
280 				lastKeys = cacheEntries;
281 		}
282 	}
283 
284 	override void finalize() {}
285 
286 	/// Optimize entire cache.
287 	override void optimize()
288 	{
289 		bool[string] componentNames;
290 		foreach (de; dirEntries(cacheDir, SpanMode.shallow))
291 		{
292 			auto parts = de.baseName().split("-");
293 			if (parts.length >= 3)
294 				componentNames[parts[0..$-2].join("-")] = true;
295 		}
296 
297 		host.log("Optimizing cache..."); dedupedFiles = 0;
298 		foreach (order; host.getKeyOrder(null))
299 			optimizeCacheImpl(order);
300 		host.log("Deduplicated %d files.".format(dedupedFiles));
301 	}
302 
303 	/// Optimize specific revision.
304 	override void optimizeKey(string key)
305 	{
306 		host.log("Optimizing cache entry..."); dedupedFiles = 0;
307 		auto orders = host.getKeyOrder(key);
308 		foreach (order; orders) // Should be only one
309 		{
310 			optimizeCacheImpl(order, false, key);
311 			optimizeCacheImpl(order, true , key);
312 		}
313 		host.log("Deduplicated %d files.".format(dedupedFiles));
314 	}
315 
316 	override void verify() {}
317 }
318 
319 /// Cache backed by a git repository.
320 /// Git's packfiles provide an efficient way to store binary
321 /// files with small differences, however adding and extracting
322 /// items is a little slower.
323 class GitCache : DCache
324 {
325 	this(string cacheDir, ICacheHost cacheHost)
326 	{
327 		super(cacheDir, cacheHost);
328 		if (!cacheDir.exists)
329 		{
330 			cacheDir.mkdirRecurse();
331 			spawnProcess(["git", "init", cacheDir])
332 				.wait()
333 				.I!(code => (code==0).enforce("git init failed"))
334 			;
335 		}
336 		git = Git(cacheDir);
337 	} ///
338 
339 	/// Builds are pinned by refs namespaced by this prefix.
340 	static const refPrefix = "refs/ae-sys-d-cache/";
341 
342 protected:
343 	import std.process;
344 	import ae.sys.git;
345 
346 	Git git;
347 
348 	override @property string name() const { return "git"; }
349 
350 	override string[] getEntries() const
351 	{
352 		auto chompPrefix(R)(R r, string prefix) { return r.filter!(s => s.startsWith(prefix)).map!(s => s[prefix.length..$]); }
353 		try
354 			return git
355 				.query("show-ref")
356 				.splitLines
357 				.map!(s => s[41..$])
358 				.I!chompPrefix(refPrefix)
359 				.array
360 			;
361 		catch (Exception e)
362 			return null; // show-ref will exit with status 1 for an empty repository
363 	}
364 
365 	override bool haveEntry(string key) const
366 	{
367 		auto p = git.pipe(["show-ref", "--verify", refPrefix ~ key], Redirect.stdout | Redirect.stderr);
368 		readFile(p.stdout);
369 		readFile(p.stderr);
370 		return p.pid.wait() == 0;
371 	}
372 
373 	override string[] listFiles(string key) const
374 	{
375 		return git
376 			.query("ls-tree", "--name-only", "-r", refPrefix ~ key)
377 			.splitLines
378 		;
379 	}
380 
381 	override void add(string key, string sourcePath)
382 	{
383 		auto writer = git.createObjectWriter();
384 		auto tree = git.importTree(sourcePath, writer);
385 		auto author = "ae.sys.d.cache <ae.sys.d.cache@thecybershadow.net> 0 +0000";
386 		auto commit = writer.write(Git.Object.createCommit(Git.Object.ParsedCommit(tree, null, author, author, [key])));
387 		git.run("update-ref", refPrefix ~ key, commit.toString());
388 	}
389 
390 	override void extract(string key, string targetPath, bool delegate(string) pathFilter)
391 	{
392 		if (!targetPath.exists)
393 			targetPath.mkdirRecurse();
394 
395 		auto reader = git.createObjectReader();
396 		git.exportCommit(refPrefix ~ key, targetPath, reader, fn => pathFilter(fn.pathSplitter.front));
397 	}
398 
399 	override void remove(string key)
400 	{
401 		git.run("update-ref", "-d", refPrefix ~ key);
402 	}
403 
404 	override void finalize() {}
405 
406 	override void optimize()
407 	{
408 		git.run("prune");
409 		git.run("pack-refs", "--all");
410 		git.run("repack", "-a", "-d");
411 	}
412 
413 	override void verify()
414 	{
415 		git.run("fsck", "--full", "--strict");
416 	}
417 }
418 
419 /// Create a DCache instance according to the given name.
420 DCache createCache(string name, string cacheDir, ICacheHost cacheHost)
421 {
422 	switch (name)
423 	{
424 		case "":
425 		case "false": // compat
426 		case "none":      return new TempCache(cacheDir, cacheHost);
427 		case "true":  // compat
428 		case "directory": return new DirCache (cacheDir, cacheHost);
429 		case "git":       return new GitCache (cacheDir, cacheHost);
430 		default: throw new Exception("Unknown cache engine: " ~ name);
431 	}
432 }
433 
434 unittest
435 {
436 	void testEngine(string name)
437 	{
438 		class Host : ICacheHost
439 		{
440 			string[][] getKeyOrder(string key) { return null; }
441 			void log(string s) {}
442 		}
443 		auto host = new Host;
444 
445 		auto testDir = "test-cache";
446 		if (testDir.exists) testDir.forceDelete!(No.atomic)(Yes.recursive);
447 		testDir.mkdir();
448 		scope(exit) if (testDir.exists) testDir.forceDelete!(No.atomic)(Yes.recursive);
449 
450 		auto cacheEngine = createCache(name, testDir.buildPath("cache"), host);
451 		assert(cacheEngine.getEntries().length == 0);
452 		assert(!cacheEngine.haveEntry("test-key"));
453 
454 		auto sourceDir = testDir.buildPath("source");
455 		mkdir(sourceDir);
456 		write(sourceDir.buildPath("test.txt"), "Test");
457 		mkdir(sourceDir.buildPath("dir"));
458 		write(sourceDir.buildPath("dir", "test2.txt"), "Test 2");
459 
460 		cacheEngine.add("test-key", sourceDir);
461 		assert(cacheEngine.getEntries() == ["test-key"]);
462 		assert(cacheEngine.haveEntry("test-key"));
463 		assert(cacheEngine.listFiles("test-key").sort().release == ["dir/test2.txt", "test.txt"]);
464 
465 		if (sourceDir.exists) rmdirRecurse(sourceDir);
466 		mkdir(sourceDir);
467 		write(sourceDir.buildPath("test3.txt"), "Test 3");
468 		mkdir(sourceDir.buildPath("test3"));
469 		write(sourceDir.buildPath("test3", "test.txt"), "Test 3 subdir");
470 		cacheEngine.add("test-key-2", sourceDir);
471 
472 		auto targetDir = testDir.buildPath("target");
473 		mkdir(targetDir);
474 		cacheEngine.extract("test-key", targetDir, fn => true);
475 		assert(targetDir.buildPath("test.txt").readText == "Test");
476 		assert(targetDir.buildPath("dir", "test2.txt").readText == "Test 2");
477 
478 		rmdirRecurse(targetDir);
479 		mkdir(targetDir);
480 		cacheEngine.extract("test-key-2", targetDir, fn => true);
481 		assert(targetDir.buildPath("test3.txt").readText == "Test 3");
482 
483 		rmdirRecurse(targetDir);
484 		mkdir(targetDir);
485 		cacheEngine.extract("test-key", targetDir, fn => fn=="dir");
486 		assert(!targetDir.buildPath("test.txt").exists);
487 		assert(targetDir.buildPath("dir", "test2.txt").readText == "Test 2");
488 
489 		cacheEngine.remove("test-key");
490 		assert(cacheEngine.getEntries().length == 1);
491 		assert(!cacheEngine.haveEntry("test-key"));
492 
493 		cacheEngine.finalize();
494 		cacheEngine.verify();
495 
496 		cacheEngine.optimize();
497 	}
498 
499 	testEngine("none");
500 	testEngine("directory");
501 	testEngine("git");
502 }