1 /**
2  * Code to manage a D component repository.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.d.repo;
15 
16 import std.algorithm;
17 import std.conv : text;
18 import std.datetime : SysTime;
19 import std.exception;
20 import std.file;
21 import std.process : environment;
22 import std.range;
23 import std.regex;
24 import std.string;
25 import std.path;
26 
27 import ae.sys.git;
28 import ae.utils.exception;
29 import ae.utils.json;
30 import ae.utils.regex;
31 import ae.utils.time : StdTime;
32 
33 /// Base class for a managed repository.
34 class ManagedRepository
35 {
36 	/// Git repository we manage.
37 	public Repository git;
38 
39 	/// Should we fetch the latest stuff?
40 	public bool offline;
41 
42 	/// Verify working tree state to make sure we don't clobber user changes?
43 	public bool verify;
44 
45 	/// Ensure we have a repository.
46 	public void needRepo()
47 	{
48 		assert(git.path, "No repository");
49 	}
50 
51 	public @property string name() { needRepo(); return git.path.baseName; }
52 
53 	// Head
54 
55 	/// Ensure the repository's HEAD is as indicated.
56 	public void needHead(string hash)
57 	{
58 		needClean();
59 		if (getHead() == hash)
60 			return;
61 
62 		try
63 			performCheckout(hash);
64 		catch (Exception e)
65 		{
66 			log("Error checking out %s: %s".format(hash, e));
67 
68 			// Might be a GC-ed merge. Try to recreate the merge
69 			auto hit = mergeCache.find!(entry => entry.result == hash)();
70 			enforce(!hit.empty, "Unknown hash %s".format(hash));
71 			performMerge(hit.front.base, hit.front.branch, hit.front.revert, hit.front.mainline);
72 			enforce(getHead() == hash, "Unexpected merge result: expected %s, got %s".format(hash, getHead()));
73 		}
74 	}
75 
76 	private string currentHead = null;
77 
78 	/// Returns the SHA1 of the given named ref.
79 	public string getRef(string name)
80 	{
81 		return git.query("rev-parse", name);
82 	}
83 
84 	/// Return the commit the repository HEAD is pointing at.
85 	/// Cache the result.
86 	public string getHead()
87 	{
88 		if (!currentHead)
89 			currentHead = getRef("HEAD");
90 
91 		return currentHead;
92 	}
93 
94 	protected void performCheckout(string hash)
95 	{
96 		needClean();
97 		needCommit(hash);
98 
99 		log("Checking out %s commit %s...".format(name, hash));
100 
101 		git.run("checkout", hash);
102 
103 		saveState();
104 		currentHead = hash;
105 	}
106 
107 	/// Ensure that the specified commit is fetched.
108 	protected void needCommit(string hash)
109 	{
110 		void check()
111 		{
112 			enforce(git.query(["cat-file", "-t", hash]) == "commit",
113 				"Unexpected object type");
114 		}
115 
116 		if (offline)
117 			check();
118 		else
119 		{
120 			try
121 				check();
122 			catch (Exception e)
123 			{
124 				log("Don't have commit " ~ hash ~ ", updating and retrying...");
125 				update();
126 				check();
127 			}
128 		}
129 	}
130 
131 	/// Update the remote.
132 	public void update()
133 	{
134 		if (!offline)
135 		{
136 			needRepo();
137 			log("Updating " ~ name ~ "...");
138 			git.run("-c", "fetch.recurseSubmodules=false", "remote", "update", "--prune");
139 			git.run("-c", "fetch.recurseSubmodules=false", "fetch", "--tags");
140 		}
141 	}
142 
143 	// Clean
144 
145 	bool clean = false;
146 
147 	/// Ensure the repository's working copy is clean.
148 	public void needClean()
149 	{
150 		if (clean)
151 			return;
152 		performCleanup();
153 		clean = true;
154 	}
155 
156 	private void performCleanup()
157 	{
158 		checkState();
159 		clearState();
160 
161 		log("Cleaning repository %s...".format(name));
162 		needRepo();
163 		try
164 		{
165 			git.run("reset", "--hard");
166 			git.run("clean", "--force", "--force" /*Sic*/, "-x", "-d", "--quiet");
167 		}
168 		catch (Exception e)
169 			throw new RepositoryCleanException(e.msg, e);
170 		saveState();
171 	}
172 
173 	// Merge cache
174 
175 	private static struct MergeInfo
176 	{
177 		string base, branch;
178 		bool revert = false;
179 		int mainline = 0;
180 		string result;
181 	}
182 	private alias MergeCache = MergeInfo[];
183 	private MergeCache mergeCacheData;
184 	private bool haveMergeCache;
185 
186 	private @property ref MergeCache mergeCache()
187 	{
188 		if (!haveMergeCache)
189 		{
190 			if (mergeCachePath.exists)
191 				mergeCacheData = mergeCachePath.readText().jsonParse!MergeCache;
192 			haveMergeCache = true;
193 		}
194 
195 		return mergeCacheData;
196 	}
197 
198 	private void saveMergeCache()
199 	{
200 		std.file.write(mergeCachePath(), toJson(mergeCache));
201 	}
202 
203 	private @property string mergeCachePath()
204 	{
205 		needRepo();
206 		return buildPath(git.gitDir, "ae-sys-d-mergecache.json");
207 	}
208 
209 	// Merge
210 
211 	private void setupGitEnv()
212 	{
213 		string[string] mergeEnv;
214 		foreach (person; ["AUTHOR", "COMMITTER"])
215 		{
216 			mergeEnv["GIT_%s_DATE".format(person)] = "Thu, 01 Jan 1970 00:00:00 +0000";
217 			mergeEnv["GIT_%s_NAME".format(person)] = "ae.sys.d";
218 			mergeEnv["GIT_%s_EMAIL".format(person)] = "ae.sys.d\x40thecybershadow.net";
219 		}
220 		foreach (k, v; mergeEnv)
221 			environment[k] = v;
222 		// TODO: restore environment
223 	}
224 
225 	/// Returns the hash of the merge between the base and branch commits.
226 	/// Performs the merge if necessary. Caches the result.
227 	public string getMerge(string base, string branch)
228 	{
229 		return getMergeImpl(base, branch, false, 0);
230 	}
231 
232 	/// Returns the resulting hash when reverting the branch from the base commit.
233 	/// Performs the revert if necessary. Caches the result.
234 	/// mainline is the 1-based mainline index (as per `man git-revert`),
235 	/// or 0 if commit is not a merge commit.
236 	public string getRevert(string base, string branch, int mainline)
237 	{
238 		return getMergeImpl(base, branch, true, mainline);
239 	}
240 
241 	private string getMergeImpl(string base, string branch, bool revert, int mainline)
242 	{
243 		auto hit = mergeCache.find!(entry =>
244 			entry.base == base &&
245 			entry.branch == branch &&
246 			entry.revert == revert &&
247 			entry.mainline == mainline)();
248 		if (!hit.empty)
249 			return hit.front.result;
250 
251 		performMerge(base, branch, revert, mainline);
252 
253 		auto head = getHead();
254 		mergeCache ~= MergeInfo(base, branch, revert, mainline, head);
255 		saveMergeCache();
256 		return head;
257 	}
258 
259 	private static const string mergeCommitMessage = "ae.sys.d merge";
260 	private static const string revertCommitMessage = "ae.sys.d revert";
261 
262 	// Performs a merge or revert.
263 	private void performMerge(string base, string branch, bool revert, int mainline)
264 	{
265 		needHead(base);
266 		currentHead = null;
267 
268 		log("%s %s into %s.".format(revert ? "Reverting" : "Merging", branch, base));
269 
270 		scope(exit) saveState();
271 
272 		scope (failure)
273 		{
274 			if (!revert)
275 			{
276 				log("Aborting merge...");
277 				git.run("merge", "--abort");
278 			}
279 			else
280 			{
281 				log("Aborting revert...");
282 				git.run("revert", "--abort");
283 			}
284 			clean = false;
285 		}
286 
287 		void doMerge()
288 		{
289 			setupGitEnv();
290 			if (!revert)
291 				git.run("merge", "--no-ff", "-m", mergeCommitMessage, branch);
292 			else
293 			{
294 				string[] args = ["revert", "--no-edit"];
295 				if (mainline)
296 					args ~= ["--mainline", text(mainline)];
297 				args ~= [branch];
298 				git.run(args);
299 			}
300 		}
301 
302 		if (git.path.baseName() == "dmd")
303 		{
304 			try
305 				doMerge();
306 			catch (Exception)
307 			{
308 				log("Merge failed. Attempting conflict resolution...");
309 				git.run("checkout", "--theirs", "test");
310 				git.run("add", "test");
311 				if (!revert)
312 					git.run("-c", "rerere.enabled=false", "commit", "-m", mergeCommitMessage);
313 				else
314 					git.run("revert", "--continue");
315 			}
316 		}
317 		else
318 			doMerge();
319 
320 		log("Merge successful.");
321 	}
322 
323 	/// Finds and returns the merge parents of the given merge commit.
324 	/// Queries the git repository if necessary. Caches the result.
325 	public MergeInfo getMergeInfo(string merge)
326 	{
327 		auto hit = mergeCache.find!(entry => entry.result == merge && !entry.revert)();
328 		if (!hit.empty)
329 			return hit.front;
330 
331 		auto parents = git.query(["log", "--pretty=%P", "-n", "1", merge]).split();
332 		enforce(parents.length > 1, "Not a merge: " ~ merge);
333 		enforce(parents.length == 2, "Too many parents: " ~ merge);
334 
335 		auto info = MergeInfo(parents[0], parents[1], false, 0, merge);
336 		mergeCache ~= info;
337 		return info;
338 	}
339 
340 	/// Follows the string of merges starting from the given
341 	/// head commit, up till the merge with the given branch.
342 	/// Then, reapplies all merges in order,
343 	/// except for that with the given branch.
344 	public string getUnMerge(string head, string branch)
345 	{
346 		// This could be optimized using an interactive rebase
347 
348 		auto info = getMergeInfo(head);
349 		if (info.branch == branch)
350 			return info.base;
351 
352 		return getMerge(getUnMerge(info.base, branch), info.branch);
353 	}
354 
355 	// Branches, forks and customization
356 
357 	/// Return SHA1 of the given remote ref.
358 	/// Fetches the remote first, unless offline mode is on.
359 	string getRemoteRef(string remote, string remoteRef, string localRef)
360 	{
361 		needRepo();
362 		if (!offline)
363 		{
364 			log("Fetching from %s (%s -> %s) ...".format(remote, remoteRef, localRef));
365 			git.run("fetch", remote, "+%s:%s".format(remoteRef, localRef));
366 		}
367 		return getRef(localRef);
368 	}
369 
370 	/// Return SHA1 of the given pull request #.
371 	/// Fetches the pull request first, unless offline mode is on.
372 	string getPull(int pull)
373 	{
374 		return getRemoteRef(
375 			"origin",
376 			"refs/pull/%d/head".format(pull),
377 			"refs/digger/pull/%d".format(pull),
378 		);
379 	}
380 
381 	/// Return SHA1 of the given GitHub fork.
382 	/// Fetches the fork first, unless offline mode is on.
383 	/// (This is a thin wrapper around getRemoteBranch.)
384 	string getFork(string user, string branch)
385 	{
386 		enforce(user  .match(re!`^\w[\w\-]*$`), "Bad remote name");
387 		enforce(branch.match(re!`^\w[\w\-\.]*$`), "Bad branch name");
388 
389 		return getRemoteRef(
390 			"https://github.com/%s/%s".format(user, name),
391 			"refs/heads/%s".format(branch),
392 			"refs/digger/fork/%s/%s".format(user, branch),
393 		);
394 	}
395 
396 	/// Find the child of a commit, and, if the commit was a merge,
397 	/// the mainline index of said commit for the child.
398 	void getChild(string branch, string commit, out string child, out int mainline)
399 	{
400 		needCommit(branch);
401 
402 		log("Querying history for commit children...");
403 		auto history = git.getHistory([branch]);
404 
405 		bool[Hash] seen;
406 		void visit(Commit* commit)
407 		{
408 			if (commit.hash !in seen)
409 			{
410 				seen[commit.hash] = true;
411 				foreach (parent; commit.parents)
412 					visit(parent);
413 			}
414 		}
415 		auto branchHash = branch.toCommitHash();
416 		auto pBranchCommit = branchHash in history.commits;
417 		enforce(pBranchCommit, "Can't find commit " ~ branch ~" in history");
418 		visit(*pBranchCommit);
419 
420 		auto commitHash = commit.toCommitHash();
421 		auto pCommit = commitHash in history.commits;
422 		enforce(pCommit, "Can't find commit in history");
423 		auto children = (*pCommit).children;
424 		enforce(children.length, "Commit has no children");
425 		children = children.filter!(child => child.hash in seen).array();
426 		enforce(children.length, "Commit has no children under specified branch");
427 		enforce(children.length == 1, "Commit has more than one child");
428 		auto childCommit = children[0];
429 		child = childCommit.hash.toString();
430 
431 		if (childCommit.parents.length == 1)
432 			mainline = 0;
433 		else
434 		{
435 			enforce(childCommit.parents.length == 2, "Can't get mainline of multiple-branch merges");
436 			if (childCommit.parents[0] is *pCommit)
437 				mainline = 2;
438 			else
439 				mainline = 1;
440 
441 			auto mergeInfo = MergeInfo(
442 				childCommit.parents[0].hash.toString(),
443 				childCommit.parents[1].hash.toString(),
444 				true, mainline, commit);
445 			if (!mergeCache.canFind(mergeInfo))
446 			{
447 				mergeCache ~= mergeInfo;
448 				saveMergeCache();
449 			}
450 		}
451 	}
452 
453 	// State saving and checking
454 
455 	struct FileState
456 	{
457 		bool isLink;
458 		ulong size;
459 		StdTime modificationTime;
460 	}
461 
462 	FileState getFileState(string file)
463 	{
464 		assert(verify);
465 		auto path = git.path.buildPath(file);
466 		auto de = DirEntry(path);
467 		return FileState(de.isSymlink, de.size, de.timeLastModified.stdTime);
468 	}
469 
470 	alias RepositoryState = FileState[string];
471 
472 	/// Return the working tree "state".
473 	/// This returns a file list, along with size and modification time.
474 	RepositoryState getState()
475 	{
476 		assert(verify);
477 		needRepo();
478 		auto files = git.query(["ls-files"]).splitLines();
479 		RepositoryState state;
480 		foreach (file; files)
481 			state[file] = getFileState(file);
482 		return state;
483 	}
484 
485 	private @property string workTreeStatePath()
486 	{
487 		assert(verify);
488 		needRepo();
489 		return buildPath(git.gitDir, "ae-sys-d-worktree.json");
490 	}
491 
492 	/// Save the state of the working tree for versioned files
493 	/// to a .json file, which can later be verified with checkState.
494 	/// This should be called after any git command which mutates the git state.
495 	void saveState()
496 	{
497 		if (!verify)
498 			return;
499 		std.file.write(workTreeStatePath, getState().toJson());
500 	}
501 
502 	/// Save the state of just one file.
503 	/// This should be called after automatic edits to repository files during a build.
504 	/// The file parameter should be relative to the directory root, and use forward slashes.
505 	void saveFileState(string file)
506 	{
507 		if (!verify)
508 			return;
509 		if (!workTreeStatePath.exists)
510 			return;
511 		auto state = workTreeStatePath.readText.jsonParse!RepositoryState();
512 		state[file] = getFileState(file);
513 		std.file.write(workTreeStatePath, state.toJson());
514 	}
515 
516 	/// Verify that the state of the working tree matches the one
517 	/// when saveState was last called. Throw an exception otherwise.
518 	/// This and clearState should be called before any git command
519 	/// which destroys working directory changes.
520 	void checkState()
521 	{
522 		if (!verify)
523 			return;
524 		if (!workTreeStatePath.exists)
525 			return;
526 		auto savedState = workTreeStatePath.readText.jsonParse!RepositoryState();
527 		auto currentState = getState();
528 		try
529 		{
530 			foreach (file, fileState; currentState)
531 			{
532 				enforce(file in savedState, "New file: " ~ file);
533 				enforce(savedState[file].isLink == fileState.isLink,
534 					"File modified: %s (is link changed, before: %s, after: %s)".format(file, savedState[file].isLink, fileState.isLink));
535 				if (fileState.isLink)
536 					continue; // Correct lstat is too hard, just skip symlinks
537 				enforce(savedState[file].size == fileState.size,
538 					"File modified: %s (size changed, before: %s, after: %s)".format(file, savedState[file].size, fileState.size));
539 				enforce(savedState[file].modificationTime == fileState.modificationTime,
540 					"File modified: %s (modification time changed, before: %s, after: %s)".format(file, SysTime(savedState[file].modificationTime), SysTime(fileState.modificationTime)));
541 				assert(savedState[file] == fileState);
542 			}
543 		}
544 		catch (Exception e)
545 			throw new Exception(
546 				"The worktree has changed since the last time this software updated it.\n" ~
547 				"Specifically:\n" ~
548 				"    " ~ e.msg ~ "\n\n" ~
549 				"Aborting to avoid overwriting your changes.\n" ~
550 				"To continue:\n" ~
551 				" 1. Commit / stash / back up your changes, if you wish to keep them\n" ~
552 				" 2. Delete " ~ workTreeStatePath ~ "\n" ~
553 				" 3. Try this operation again."
554 			);
555 	}
556 
557 	/// Delete the saved working tree state, if any.
558 	void clearState()
559 	{
560 		if (!verify)
561 			return;
562 		if (workTreeStatePath.exists)
563 			workTreeStatePath.remove();
564 	}
565 
566 	// Misc
567 
568 	/// Reset internal state.
569 	protected void reset()
570 	{
571 		currentHead = null;
572 		clean = false;
573 		haveMergeCache = false;
574 		mergeCacheData = null;
575 	}
576 
577 	/// Override to add logging.
578 	protected abstract void log(string line);
579 }
580 
581 /// Used to communicate that a "reset --hard" failed.
582 /// Generally this indicates git repository corruption.
583 mixin DeclareException!q{RepositoryCleanException};