1 /**
2  * Code to manage cached D component builds.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.d.cache;
15 
16 import std.algorithm;
17 import std.array;
18 import std.exception;
19 import std.file;
20 import std.format;
21 import std.path;
22 import std.range;
23 import std..string;
24 
25 import ae.sys.file;
26 import ae.utils.meta;
27 
28 alias copy = std.file.copy; // https://issues.dlang.org/show_bug.cgi?id=14817
29 
30 interface ICacheHost
31 {
32 	/// An optimization helper which provides a linear order in which keys should be optimized
33 	/// (cache entries most likely to have common data should be adjacent in the list).
34 	/// Returns: An array of groups, each group is an array of key prefixes.
35 	/// Params:
36 	///   key = If non-null, this function will only return keys relevant to the given key.
37 	string[][] getKeyOrder(string key);
38 
39 	/// Log a string.
40 	void log(string s);
41 }
42 
43 /// Abstract base class.
44 abstract class DCache
45 {
46 	string cacheDir;
47 	ICacheHost cacheHost;
48 
49 	alias cacheHost this;
50 
51 	this(string cacheDir, ICacheHost cacheHost)
52 	{
53 		this.cacheDir = cacheDir;
54 		this.cacheHost = cacheHost;
55 	}
56 
57 	/// Get name of this cache engine.
58 	abstract @property string name() const;
59 
60 	/// Get a list of keys for all cached entries.
61 	abstract string[] getEntries() const;
62 
63 	/// Check if an entry with the given key exists.
64 	abstract bool haveEntry(string key) const;
65 
66 	/// Get the full file listing for the given cache entry.
67 	/// Forward slashes only.
68 	abstract string[] listFiles(string key) const;
69 
70 	/// Add files from a directory.
71 	/// This operation is destructive: the cache implementation
72 	/// is allowed to modify the source directory.
73 	abstract void add(string key, string sourcePath);
74 
75 	/// Extract a cache entry to a target directory, with a filter for root files/directories.
76 	abstract void extract(string key, string targetPath, bool delegate(string) pathFilter);
77 
78 	/// Delete a cache entry.
79 	abstract void remove(string key);
80 
81 	/// Close the cache.
82 	/// Called after all operations are completed
83 	/// and the cache is no longer immediately needed.
84 	abstract void finalize();
85 
86 	/// Optimize the cache (minimize disk space).
87 	/// This operation can be very slow (and should display progress).
88 	abstract void optimize();
89 
90 	/// Verify cache integrity.
91 	abstract void verify();
92 
93 	/// Utility function (copy file or directory)
94 	final void cp(string src, string dst, bool silent = false)
95 	{
96 		if (!silent)
97 			log("Copy: " ~ src ~ " -> " ~ dst);
98 
99 		ensurePathExists(dst);
100 		if (src.isDir)
101 		{
102 			if (!dst.exists)
103 				dst.mkdir();
104 			foreach (de; src.dirEntries(SpanMode.shallow))
105 				cp(de.name, dst.buildPath(de.name.baseName), true);
106 		}
107 		else
108 		{
109 			copy(src, dst);
110 			version (Posix)
111 				dst.setAttributes(src.getAttributes());
112 		}
113 	}
114 }
115 
116 /// Base class for a simple directory store.
117 abstract class DirCacheBase : DCache
118 {
119 	this(string cacheDir, ICacheHost cacheHost)
120 	{
121 		super(cacheDir, cacheHost);
122 		if (!cacheDir.exists)
123 			cacheDir.mkdirRecurse();
124 	}
125 
126 	override string[] getEntries() const
127 	{
128 		return !cacheDir.exists ? null :
129 			cacheDir.dirEntries(SpanMode.shallow).filter!(de => de.isDir).map!(de => de.baseName).array;
130 	}
131 
132 	override bool haveEntry(string key) const
133 	{
134 		return cacheDir.buildPath(key).I!(path => path.exists && path.isDir);
135 	}
136 
137 	override string[] listFiles(string key) const
138 	{
139 		return cacheDir.buildPath(key).I!(path =>
140 			path
141 			.dirEntries(SpanMode.breadth)
142 			.filter!(de => de.isFile)
143 			.map!(de => de.name
144 				[path.length+1..$] // paths should be relative to cache entry dir root
145 				.replace(`\`, `/`)
146 			)
147 			.array
148 		);
149 	}
150 
151 	abstract void optimizeKey(string key);
152 
153 	override void add(string key, string sourcePath)
154 	{
155 		auto entryDir = cacheDir.buildPath(key);
156 		ensurePathExists(entryDir);
157 		sourcePath.rename(entryDir);
158 		optimizeKey(key);
159 	}
160 
161 	override void extract(string key, string targetPath, bool delegate(string) pathFilter)
162 	{
163 		cacheDir.buildPath(key).dirEntries(SpanMode.shallow)
164 			.filter!(de => pathFilter(de.baseName))
165 			.each!(de => cp(de.name, buildPath(targetPath, de.baseName)));
166 	}
167 
168 	override void remove(string key)
169 	{
170 		cacheDir.buildPath(key).rmdirRecurse();
171 	}
172 }
173 
174 /// The bare minimum required for ae.sys.d to work.
175 /// Implement a temporary cache which is deleted
176 /// as soon as it's no longer immediately needed.
177 class TempCache : DirCacheBase
178 {
179 	this(string cacheDir, ICacheHost cacheHost)
180 	{
181 		super(cacheDir, cacheHost);
182 		finalize();
183 	}
184 
185 	alias cacheHost this; // https://issues.dlang.org/show_bug.cgi?id=5973
186 
187 	override @property string name() const { return "none"; }
188 
189 	override void finalize()
190 	{
191 		if (cacheDir.exists)
192 		{
193 			log("Clearing temporary cache");
194 			rmdirRecurse(cacheDir);
195 		}
196 	}
197 
198 	override void optimize() { finalize(); }
199 	override void optimizeKey(string key) {}
200 
201 	override void verify() {}
202 }
203 
204 /// Store cached builds in subdirectories.
205 /// Optimize things by hard-linking identical files.
206 class DirCache : DirCacheBase
207 {
208 	mixin GenerateConstructorProxies;
209 	alias cacheHost this; // https://issues.dlang.org/show_bug.cgi?id=5973
210 
211 	override @property string name() const { return "directory"; }
212 
213 	size_t dedupedFiles;
214 
215 	/// Replace all files that have duplicate subpaths and content
216 	/// which exist under both dirA and dirB with hard links.
217 	void dedupDirectories(string dirA, string dirB)
218 	{
219 		foreach (de; dirEntries(dirA, SpanMode.depth))
220 			if (de.isFile)
221 			{
222 				auto pathA = de.name;
223 				auto subPath = pathA[dirA.length..$];
224 				auto pathB = dirB ~ subPath;
225 
226 				if (pathB.exists
227 				 && pathA.getSize() == pathB.getSize()
228 				 && pathA.getFileID() != pathB.getFileID()
229 				 && pathA.mdFileCached() == pathB.mdFileCached())
230 				{
231 					//debug log(pathB ~ " = " ~ pathA);
232 					pathB.remove();
233 					try
234 					{
235 						pathA.hardLink(pathB);
236 						dedupedFiles++;
237 					}
238 					catch (FileException e)
239 					{
240 						log(" -- Hard link failed: " ~ e.msg);
241 						pathA.copy(pathB);
242 					}
243 				}
244 			}
245 	}
246 
247 	private final void optimizeCacheImpl(const(string)[] order, bool reverse = false, string onlyKey = null)
248 	{
249 		if (reverse)
250 			order = order.retro.array();
251 
252 		string[] lastKeys;
253 
254 		auto cacheDirEntries = cacheDir
255 			.dirEntries(SpanMode.shallow)
256 			.map!(de => de.baseName)
257 			.array
258 			.sort()
259 		;
260 
261 		foreach (prefix; order)
262 		{
263 			auto cacheEntries = cacheDirEntries
264 				.filter!(name => name.startsWith(prefix))
265 				.array
266 			;
267 
268 			bool optimizeThis = onlyKey is null || onlyKey.startsWith(prefix);
269 
270 			if (optimizeThis)
271 			{
272 				auto targetEntries = lastKeys ~ cacheEntries;
273 
274 				if (targetEntries.length)
275 					foreach (i, entry1; targetEntries[0..$-1])
276 						foreach (entry2; targetEntries[i+1..$])
277 							dedupDirectories(buildPath(cacheDir, entry1), buildPath(cacheDir, entry2));
278 			}
279 
280 			if (cacheEntries.length)
281 				lastKeys = cacheEntries;
282 		}
283 	}
284 
285 	override void finalize() {}
286 
287 	/// Optimize entire cache.
288 	override void optimize()
289 	{
290 		bool[string] componentNames;
291 		foreach (de; dirEntries(cacheDir, SpanMode.shallow))
292 		{
293 			auto parts = de.baseName().split("-");
294 			if (parts.length >= 3)
295 				componentNames[parts[0..$-2].join("-")] = true;
296 		}
297 
298 		log("Optimizing cache..."); dedupedFiles = 0;
299 		foreach (order; getKeyOrder(null))
300 			optimizeCacheImpl(order);
301 		log("Deduplicated %d files.".format(dedupedFiles));
302 	}
303 
304 	/// Optimize specific revision.
305 	override void optimizeKey(string key)
306 	{
307 		log("Optimizing cache entry..."); dedupedFiles = 0;
308 		auto orders = getKeyOrder(key);
309 		foreach (order; orders) // Should be only one
310 		{
311 			optimizeCacheImpl(order, false, key);
312 			optimizeCacheImpl(order, true , key);
313 		}
314 		log("Deduplicated %d files.".format(dedupedFiles));
315 	}
316 
317 	override void verify() {}
318 }
319 
320 /// Cache backed by a git repository.
321 /// Git's packfiles provide an efficient way to store binary
322 /// files with small differences, however adding and extracting
323 /// items is a little slower.
324 class GitCache : DCache
325 {
326 	import std.process;
327 	import ae.sys.git;
328 
329 	Repository git;
330 	static const refPrefix = "refs/ae-sys-d-cache/";
331 
332 	override @property string name() const { return "git"; }
333 
334 	this(string cacheDir, ICacheHost cacheHost)
335 	{
336 		super(cacheDir, cacheHost);
337 		if (!cacheDir.exists)
338 		{
339 			cacheDir.mkdirRecurse();
340 			spawnProcess(["git", "init", cacheDir])
341 				.wait()
342 				.I!(code => (code==0).enforce("git init failed"))
343 			;
344 		}
345 		git = Repository(cacheDir);
346 	}
347 
348 	override string[] getEntries() const
349 	{
350 		auto chompPrefix(R)(R r, string prefix) { return r.filter!(s => s.startsWith(prefix)).map!(s => s[prefix.length..$]); }
351 		try
352 			return git
353 				.query("show-ref")
354 				.splitLines
355 				.map!(s => s[41..$])
356 				.I!chompPrefix(refPrefix)
357 				.array
358 			;
359 		catch (Exception e)
360 			return null; // show-ref will exit with status 1 for an empty repository
361 	}
362 
363 	override bool haveEntry(string key) const
364 	{
365 		auto p = git.pipe(["show-ref", "--verify", refPrefix ~ key], Redirect.stdout | Redirect.stderr);
366 		readFile(p.stdout);
367 		readFile(p.stderr);
368 		return p.pid.wait() == 0;
369 	}
370 
371 	override string[] listFiles(string key) const
372 	{
373 		return git
374 			.query("ls-tree", "--name-only", "-r", refPrefix ~ key)
375 			.splitLines
376 		;
377 	}
378 
379 	override void add(string key, string sourcePath)
380 	{
381 		auto writer = git.createObjectWriter();
382 		auto tree = git.importTree(sourcePath, writer);
383 		auto author = "ae.sys.d.cache <ae.sys.d.cache@thecybershadow.net> 0 +0000";
384 		auto commit = writer.write(GitObject.createCommit(GitObject.ParsedCommit(tree, null, author, author, [key])));
385 		git.run("update-ref", refPrefix ~ key, commit.toString());
386 	}
387 
388 	override void extract(string key, string targetPath, bool delegate(string) pathFilter)
389 	{
390 		if (!targetPath.exists)
391 			targetPath.mkdirRecurse();
392 
393 		auto reader = git.createObjectReader();
394 		git.exportCommit(refPrefix ~ key, targetPath, reader, fn => pathFilter(fn.pathSplitter.front));
395 	}
396 
397 	override void remove(string key)
398 	{
399 		git.run("update-ref", "-d", refPrefix ~ key);
400 	}
401 
402 	override void finalize() {}
403 
404 	override void optimize()
405 	{
406 		git.run("prune");
407 		git.run("pack-refs", "--all");
408 		git.run("repack", "-a", "-d");
409 	}
410 
411 	override void verify()
412 	{
413 		git.run("fsck", "--full", "--strict");
414 	}
415 }
416 
417 /// Create a DCache instance according to the given name.
418 DCache createCache(string name, string cacheDir, ICacheHost cacheHost)
419 {
420 	switch (name)
421 	{
422 		case "":
423 		case "false": // compat
424 		case "none":      return new TempCache(cacheDir, cacheHost);
425 		case "true":  // compat
426 		case "directory": return new DirCache (cacheDir, cacheHost);
427 		case "git":       return new GitCache (cacheDir, cacheHost);
428 		default: throw new Exception("Unknown cache engine: " ~ name);
429 	}
430 }
431 
432 unittest
433 {
434 	void testEngine(string name)
435 	{
436 		class Host : ICacheHost
437 		{
438 			string[][] getKeyOrder(string key) { return null; }
439 			void log(string s) {}
440 		}
441 		auto host = new Host;
442 
443 		auto testDir = "test-cache";
444 		if (testDir.exists) testDir.forceDelete!(No.atomic)(Yes.recursive);
445 		testDir.mkdir();
446 		scope(exit) if (testDir.exists) testDir.forceDelete!(No.atomic)(Yes.recursive);
447 
448 		auto cacheEngine = createCache(name, testDir.buildPath("cache"), host);
449 		assert(cacheEngine.getEntries().length == 0);
450 		assert(!cacheEngine.haveEntry("test-key"));
451 
452 		auto sourceDir = testDir.buildPath("source");
453 		mkdir(sourceDir);
454 		write(sourceDir.buildPath("test.txt"), "Test");
455 		mkdir(sourceDir.buildPath("dir"));
456 		write(sourceDir.buildPath("dir", "test2.txt"), "Test 2");
457 
458 		cacheEngine.add("test-key", sourceDir);
459 		assert(cacheEngine.getEntries() == ["test-key"]);
460 		assert(cacheEngine.haveEntry("test-key"));
461 		assert(cacheEngine.listFiles("test-key").sort().release == ["dir/test2.txt", "test.txt"]);
462 
463 		if (sourceDir.exists) rmdirRecurse(sourceDir);
464 		mkdir(sourceDir);
465 		write(sourceDir.buildPath("test3.txt"), "Test 3");
466 		mkdir(sourceDir.buildPath("test3"));
467 		write(sourceDir.buildPath("test3", "test.txt"), "Test 3 subdir");
468 		cacheEngine.add("test-key-2", sourceDir);
469 
470 		auto targetDir = testDir.buildPath("target");
471 		mkdir(targetDir);
472 		cacheEngine.extract("test-key", targetDir, fn => true);
473 		assert(targetDir.buildPath("test.txt").readText == "Test");
474 		assert(targetDir.buildPath("dir", "test2.txt").readText == "Test 2");
475 
476 		rmdirRecurse(targetDir);
477 		mkdir(targetDir);
478 		cacheEngine.extract("test-key-2", targetDir, fn => true);
479 		assert(targetDir.buildPath("test3.txt").readText == "Test 3");
480 
481 		rmdirRecurse(targetDir);
482 		mkdir(targetDir);
483 		cacheEngine.extract("test-key", targetDir, fn => fn=="dir");
484 		assert(!targetDir.buildPath("test.txt").exists);
485 		assert(targetDir.buildPath("dir", "test2.txt").readText == "Test 2");
486 
487 		cacheEngine.remove("test-key");
488 		assert(cacheEngine.getEntries().length == 1);
489 		assert(!cacheEngine.haveEntry("test-key"));
490 
491 		cacheEngine.finalize();
492 		cacheEngine.verify();
493 
494 		cacheEngine.optimize();
495 	}
496 
497 	testEngine("none");
498 	testEngine("directory");
499 	testEngine("git");
500 }