1 /**
2  * Code to manage a D component repository.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  */
13 
14 module ae.sys.d.repo;
15 
16 import std.algorithm;
17 import std.conv : text;
18 import std.datetime : SysTime;
19 import std.exception;
20 import std.file;
21 import std.process : environment;
22 import std.range;
23 import std.regex;
24 import std.string;
25 import std.path;
26 
27 import ae.sys.git;
28 import ae.utils.exception;
29 import ae.utils.json;
30 import ae.utils.regex;
31 import ae.utils.time : StdTime;
32 
33 /// Base class for a managed repository.
34 class ManagedRepository
35 {
36 	/// Git repository we manage.
37 	public @property ref const(Git) git()
38 	{
39 		if (!gitRepo.path)
40 		{
41 			gitRepo = getRepo();
42 			assert(gitRepo.path, "No repository");
43 			foreach (person; ["AUTHOR", "COMMITTER"])
44 			{
45 				gitRepo.environment["GIT_%s_DATE".format(person)] = "Thu, 01 Jan 1970 00:00:00 +0000";
46 				gitRepo.environment["GIT_%s_NAME".format(person)] = "ae.sys.d";
47 				gitRepo.environment["GIT_%s_EMAIL".format(person)] = "ae.sys.d\x40thecybershadow.net";
48 			}
49 		}
50 		return gitRepo;
51 	}
52 
53 	/// Should we fetch the latest stuff?
54 	public bool offline;
55 
56 	/// Verify working tree state to make sure we don't clobber user changes?
57 	public bool verify;
58 
59 	private Git gitRepo;
60 
61 	/// Repository provider
62 	abstract protected Git getRepo();
63 
64 	/// Base name of the repository directory
65 	public @property string name() { return git.path.baseName; }
66 
67 	// Head
68 
69 	/// Ensure the repository's HEAD is as indicated.
70 	public void needHead(string hash)
71 	{
72 		needClean();
73 		if (getHead() == hash)
74 			return;
75 
76 		try
77 			performCheckout(hash);
78 		catch (Exception e)
79 		{
80 			log("Error checking out %s: %s".format(hash, e));
81 
82 			// Might be a GC-ed merge. Try to recreate the merge
83 			auto hit = mergeCache.find!(entry => entry.result == hash)();
84 			enforce(!hit.empty, "Unknown hash %s".format(hash));
85 			performMerge(hit.front.spec);
86 			enforce(getHead() == hash, "Unexpected merge result: expected %s, got %s".format(hash, getHead()));
87 		}
88 	}
89 
90 	private string currentHead = null;
91 
92 	/// Returns the SHA1 of the given named ref.
93 	public string getRef(string name)
94 	{
95 		return git.query("rev-parse", "--verify", "--quiet", name);
96 	}
97 
98 	/// Return the commit the repository HEAD is pointing at.
99 	/// Cache the result.
100 	public string getHead()
101 	{
102 		if (!currentHead)
103 			currentHead = getRef("HEAD");
104 
105 		return currentHead;
106 	}
107 
108 	protected void performCheckout(string hash)
109 	{
110 		needClean();
111 		needCommit(hash);
112 
113 		log("Checking out %s commit %s...".format(name, hash));
114 
115 		git.run("checkout", hash);
116 
117 		saveState();
118 		currentHead = hash;
119 	}
120 
121 	/// Ensure that the specified commit is fetched.
122 	protected void needCommit(string hash)
123 	{
124 		void check()
125 		{
126 			enforce(git.query(["cat-file", "-t", hash]) == "commit",
127 				"Unexpected object type");
128 		}
129 
130 		try
131 			check();
132 		catch (Exception e)
133 		{
134 			if (offline)
135 			{
136 				log("Don't have commit " ~ hash ~ " and in offline mode, can't proceed.");
137 				throw new Exception("Giving up");
138 			}
139 			else
140 			{
141 				log("Don't have commit " ~ hash ~ ", updating and retrying...");
142 				update();
143 				check();
144 			}
145 		}
146 	}
147 
148 	/// Update the remote.
149 	/// Return true if any updates were fetched.
150 	public bool update()
151 	{
152 		if (!offline)
153 		{
154 			log("Updating " ~ name ~ "...");
155 			auto oldRefs = git.query(["show-ref"]);
156 			git.run("-c", "fetch.recurseSubmodules=false", "remote", "update", "--prune");
157 			git.run("-c", "fetch.recurseSubmodules=false", "fetch", "--force", "--tags");
158 			auto newRefs = git.query(["show-ref"]);
159 			return oldRefs != newRefs;
160 		}
161 		else
162 			return false;
163 	}
164 
165 	// Clean
166 
167 	bool clean = false; /// True when we know that the repository is currently clean.
168 
169 	/// Ensure the repository's working copy is clean.
170 	public void needClean()
171 	{
172 		if (clean)
173 			return;
174 		performCleanup();
175 		clean = true;
176 	}
177 
178 	private void performCleanup()
179 	{
180 		checkState();
181 		clearState();
182 
183 		log("Cleaning repository %s...".format(name));
184 		try
185 		{
186 			git.run("reset", "--hard");
187 			git.run("clean", "--force", "--force" /*Sic*/, "-x", "-d", "--quiet");
188 		}
189 		catch (Exception e)
190 			throw new RepositoryCleanException(e.msg, e);
191 		saveState();
192 	}
193 
194 	// Merge cache
195 
196 	/// How to merge a branch into another
197 	enum MergeMode
198 	{
199 		merge,      /// git merge (commit with multiple parents) of the target and branch tips
200 		cherryPick, /// apply the commits as a patch
201 	}
202 	private static struct MergeSpec
203 	{
204 		string target;
205 		string[2] branch; // [base, tip]
206 		MergeMode mode;
207 		bool revert = false;
208 	}
209 	private static struct MergeInfo
210 	{
211 		MergeSpec spec;
212 		string result;
213 		int mainline = 0; // git parent index of the "target", if any
214 	}
215 	private alias MergeCache = MergeInfo[];
216 	private MergeCache mergeCacheData;
217 	private bool haveMergeCache;
218 
219 	private @property ref MergeCache mergeCache()
220 	{
221 		if (!haveMergeCache)
222 		{
223 			if (mergeCachePath.exists)
224 				mergeCacheData = mergeCachePath.readText().jsonParse!MergeCache;
225 			haveMergeCache = true;
226 		}
227 
228 		return mergeCacheData;
229 	}
230 
231 	private void saveMergeCache()
232 	{
233 		std.file.write(mergeCachePath(), toJson(mergeCache));
234 	}
235 
236 	private @property string mergeCachePath()
237 	{
238 		return buildPath(git.gitDir, "ae-sys-d-mergecache-v2.json");
239 	}
240 
241 	// Merge
242 
243 	/// Returns the hash of the merge between the target and branch commits.
244 	/// Performs the merge if necessary. Caches the result.
245 	public string getMerge(string target, string[2] branch, MergeMode mode)
246 	{
247 		return getMergeImpl(MergeSpec(target, branch, mode, false));
248 	}
249 
250 	/// Returns the resulting hash when reverting the branch from the base commit.
251 	/// Performs the revert if necessary. Caches the result.
252 	/// mainline is the 1-based mainline index (as per `man git-revert`),
253 	/// or 0 if commit is not a merge commit.
254 	public string getRevert(string target, string[2] branch, MergeMode mode)
255 	{
256 		return getMergeImpl(MergeSpec(target, branch, mode, true));
257 	}
258 
259 	private string getMergeImpl(MergeSpec spec)
260 	{
261 		auto hit = mergeCache.find!(entry => entry.spec == spec)();
262 		if (!hit.empty)
263 			return hit.front.result;
264 
265 		performMerge(spec);
266 
267 		auto head = getHead();
268 		mergeCache ~= MergeInfo(spec, head);
269 		saveMergeCache();
270 		return head;
271 	}
272 
273 	private static const string mergeCommitMessage = "ae.sys.d merge";
274 	private static const string revertCommitMessage = "ae.sys.d revert";
275 
276 	// Performs a merge or revert.
277 	private void performMerge(MergeSpec spec)
278 	{
279 		needHead(spec.target);
280 		currentHead = null;
281 
282 		log("%s %s into %s.".format(spec.revert ? "Reverting" : "Merging", spec.branch, spec.target));
283 
284 		scope(exit) saveState();
285 
286 		scope (failure)
287 		{
288 			string op;
289 			final switch (spec.mode)
290 			{
291 				case MergeMode.merge:
292 					op = spec.revert ? "revert" : "merge";
293 					break;
294 				case MergeMode.cherryPick:
295 					op = spec.revert ? "revert" : "cherry-pick";
296 					break;
297 			}
298 
299 			log("Aborting " ~ op ~ "...");
300 			git.run(op, "--abort");
301 			clean = false;
302 		}
303 
304 		void doMerge()
305 		{
306 			final switch (spec.mode)
307 			{
308 				case MergeMode.merge:
309 					if (!spec.revert)
310 						git.run("merge", "--no-ff", "-m", mergeCommitMessage, spec.branch[1]);
311 					else
312 					{
313 						// When reverting in merge mode, we try to
314 						// find the merge commit following the branch
315 						// tip, and revert only that merge commit.
316 						string mergeCommit; int mainline;
317 						getChild(spec.target, spec.branch[1], /*out*/mergeCommit, /*out*/mainline);
318 
319 						string[] args = ["revert", "--no-edit"];
320 						if (mainline)
321 							args ~= ["--mainline", text(mainline)];
322 						args ~= [mergeCommit];
323 						git.run(args);
324 					}
325 					break;
326 				case MergeMode.cherryPick:
327 					enforce(spec.branch[0], "Must specify a branch base for a cherry-pick merge");
328 					auto range = spec.branch[0] ~ ".." ~ spec.branch[1];
329 					if (!spec.revert)
330 						git.run("cherry-pick", range);
331 					else
332 						git.run("revert", "--no-edit", range);
333 					break;
334 			}
335 		}
336 
337 		if (git.path.baseName() == "dmd")
338 		{
339 			try
340 				doMerge();
341 			catch (Exception)
342 			{
343 				log("Merge failed. Attempting conflict resolution...");
344 				git.run("checkout", "--theirs", "test");
345 				git.run("add", "test");
346 				if (!spec.revert)
347 					git.run("-c", "rerere.enabled=false", "commit", "-m", mergeCommitMessage);
348 				else
349 					git.run("revert", "--continue");
350 			}
351 		}
352 		else
353 			doMerge();
354 
355 		log("Merge successful.");
356 	}
357 
358 	/// Finds and returns the merge parents of the given merge commit.
359 	/// Queries the git repository if necessary. Caches the result.
360 	public MergeInfo getMergeInfo(string merge)
361 	{
362 		auto hit = mergeCache.find!(entry => entry.result == merge && !entry.spec.revert)();
363 		if (!hit.empty)
364 			return hit.front;
365 
366 		auto parents = git.query(["log", "--pretty=%P", "-n", "1", merge]).split();
367 		enforce(parents.length > 1, "Not a merge: " ~ merge);
368 		enforce(parents.length == 2, "Too many parents: " ~ merge);
369 
370 		auto info = MergeInfo(MergeSpec(parents[0], [null, parents[1]], MergeMode.merge, false), merge, 1);
371 		mergeCache ~= info;
372 		return info;
373 	}
374 
375 	/// Follows the string of merges starting from the given
376 	/// head commit, up till the merge with the given branch.
377 	/// Then, reapplies all merges in order,
378 	/// except for that with the given branch.
379 	public string getUnMerge(string head, string[2] branch, MergeMode mode)
380 	{
381 		// This could be optimized using an interactive rebase
382 
383 		auto info = getMergeInfo(head);
384 		if (info.spec.branch[1] == branch[1])
385 			return info.spec.target;
386 
387 		// Recurse to keep looking
388 		auto unmerge = getUnMerge(info.spec.target, branch, mode);
389 		// Re-apply this non-matching merge
390 		return getMerge(unmerge, info.spec.branch, info.spec.mode);
391 	}
392 
393 	// Branches, forks and customization
394 
395 	/// Return SHA1 of the given remote ref.
396 	/// Fetches the remote first, unless offline mode is on.
397 	string getRemoteRef(string remote, string remoteRef, string localRef)
398 	{
399 		if (!offline)
400 		{
401 			log("Fetching from %s (%s -> %s) ...".format(remote, remoteRef, localRef));
402 			git.run("fetch", remote, "+%s:%s".format(remoteRef, localRef));
403 		}
404 		return getRef(localRef);
405 	}
406 
407 	/// Return SHA1 of the given pull request #.
408 	/// Fetches the pull request first, unless offline mode is on.
409 	string getPullTip(int pull)
410 	{
411 		return getRemoteRef(
412 			"origin",
413 			"refs/pull/%d/head".format(pull),
414 			"refs/digger/pull/%d".format(pull),
415 		);
416 	}
417 
418 	private static bool isCommitHash(string s)
419 	{
420 		return s.length == 40 && s.representation.all!(c => (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'));
421 	}
422 
423 	/// Return SHA1 (base, tip) of the given branch (possibly of GitHub fork).
424 	/// Fetches the fork first, unless offline mode is on.
425 	/// (This is a thin wrapper around getRemoteRef.)
426 	string[2] getBranch(string user, string base, string tip)
427 	{
428 		if (user) enforce(user.match(re!`^\w[\w\-]*$`), "Bad remote name");
429 		if (base) enforce(base.match(re!`^\w[\w\-\.]*$`), "Bad branch base name");
430 		if (true) enforce(tip .match(re!`^\w[\w\-\.]*$`), "Bad branch tip name");
431 
432 		if (!user)
433 			user = "dlang";
434 
435 		if (isCommitHash(tip))
436 		{
437 			if (!offline)
438 			{
439 				// We don't know which branch the commit will be in, so just grab everything.
440 				auto remote = "https://github.com/%s/%s".format(user, name);
441 				log("Fetching everything from %s ...".format(remote));
442 				git.run("fetch", remote, "+refs/heads/*:refs/forks/%s/*".format(user));
443 			}
444 			if (!base)
445 				base = git.query("rev-parse", tip ~ "^");
446 			return [
447 				base,
448 				tip,
449 			];
450 		}
451 		else
452 		{
453 			return [
454 				null,
455 				getRemoteRef(
456 					"https://github.com/%s/%s".format(user, name),
457 					"refs/heads/%s".format(tip),
458 					"refs/digger/fork/%s/%s".format(user, tip),
459 				),
460 			];
461 		}
462 	}
463 
464 	/// Find the child of a commit, and, if the commit was a merge,
465 	/// the mainline index of said commit for the child.
466 	void getChild(string branch, string commit, out string child, out int mainline)
467 	{
468 		needCommit(branch);
469 
470 		log("Querying history for commit children...");
471 		auto history = git.getHistory([branch]);
472 
473 		bool[Git.CommitID] seen;
474 		void visit(Git.History.Commit* commit)
475 		{
476 			if (commit.oid !in seen)
477 			{
478 				seen[commit.oid] = true;
479 				foreach (parent; commit.parents)
480 					visit(parent);
481 			}
482 		}
483 		auto branchHash = Git.CommitID(branch);
484 		auto pBranchCommit = branchHash in history.commits;
485 		enforce(pBranchCommit, "Can't find commit " ~ branch ~" in history");
486 		visit(*pBranchCommit);
487 
488 		auto commitHash = Git.CommitID(commit);
489 		auto pCommit = commitHash in history.commits;
490 		enforce(pCommit, "Can't find commit in history");
491 		auto children = (*pCommit).children;
492 		enforce(children.length, "Commit has no children");
493 		children = children.filter!(child => child.oid in seen).array();
494 		enforce(children.length, "Commit has no children under specified branch");
495 		enforce(children.length == 1, "Commit has more than one child");
496 		auto childCommit = children[0];
497 		child = childCommit.oid.toString();
498 
499 		if (childCommit.parents.length == 1)
500 			mainline = 0;
501 		else
502 		{
503 			enforce(childCommit.parents.length == 2, "Can't get mainline of multiple-branch merges");
504 			if (childCommit.parents[0] is *pCommit)
505 				mainline = 2;
506 			else
507 				mainline = 1;
508 
509 			auto mergeInfo = MergeInfo(
510 				MergeSpec(
511 					childCommit.parents[0].oid.toString(),
512 					childCommit.parents[1].oid.toString(),
513 					MergeMode.merge,
514 					true),
515 				commit, mainline);
516 			if (!mergeCache.canFind(mergeInfo))
517 			{
518 				mergeCache ~= mergeInfo;
519 				saveMergeCache();
520 			}
521 		}
522 	}
523 
524 	// State saving and checking
525 
526 	private struct FileState
527 	{
528 		bool isLink;
529 		ulong size;
530 		StdTime modificationTime;
531 	}
532 
533 	private FileState getFileState(string file)
534 	{
535 		assert(verify);
536 		auto path = git.path.buildPath(file);
537 		auto de = DirEntry(path);
538 		return FileState(de.isSymlink, de.size, de.timeLastModified.stdTime);
539 	}
540 
541 	private alias RepositoryState = FileState[string];
542 
543 	/// Return the working tree "state".
544 	/// This returns a file list, along with size and modification time.
545 	RepositoryState getState()
546 	{
547 		assert(verify);
548 		auto files = git.query(["ls-files"]).splitLines();
549 		RepositoryState state;
550 		foreach (file; files)
551 			try
552 				state[file] = getFileState(file);
553 			catch (Exception e) {}
554 		return state;
555 	}
556 
557 	private @property string workTreeStatePath()
558 	{
559 		assert(verify);
560 		return buildPath(git.gitDir, "ae-sys-d-worktree.json");
561 	}
562 
563 	/// Save the state of the working tree for versioned files
564 	/// to a .json file, which can later be verified with checkState.
565 	/// This should be called after any git command which mutates the git state.
566 	void saveState()
567 	{
568 		if (!verify)
569 			return;
570 		std.file.write(workTreeStatePath, getState().toJson());
571 	}
572 
573 	/// Save the state of just one file.
574 	/// This should be called after automatic edits to repository files during a build.
575 	/// The file parameter should be relative to the directory root, and use forward slashes.
576 	void saveFileState(string file)
577 	{
578 		if (!verify)
579 			return;
580 		if (!workTreeStatePath.exists)
581 			return;
582 		auto state = workTreeStatePath.readText.jsonParse!RepositoryState();
583 		state[file] = getFileState(file);
584 		std.file.write(workTreeStatePath, state.toJson());
585 	}
586 
587 	/// Verify that the state of the working tree matches the one
588 	/// when saveState was last called. Throw an exception otherwise.
589 	/// This and clearState should be called before any git command
590 	/// which destroys working directory changes.
591 	void checkState()
592 	{
593 		if (!verify)
594 			return;
595 		if (!workTreeStatePath.exists)
596 			return;
597 		auto savedState = workTreeStatePath.readText.jsonParse!RepositoryState();
598 		auto currentState = getState();
599 		try
600 		{
601 			foreach (file, fileState; currentState)
602 			{
603 				enforce(file in savedState, "New file: " ~ file);
604 				enforce(savedState[file].isLink == fileState.isLink,
605 					"File modified: %s (is link changed, before: %s, after: %s)".format(file, savedState[file].isLink, fileState.isLink));
606 				if (fileState.isLink)
607 					continue; // Correct lstat is too hard, just skip symlinks
608 				enforce(savedState[file].size == fileState.size,
609 					"File modified: %s (size changed, before: %s, after: %s)".format(file, savedState[file].size, fileState.size));
610 				enforce(savedState[file].modificationTime == fileState.modificationTime,
611 					"File modified: %s (modification time changed, before: %s, after: %s)".format(file, SysTime(savedState[file].modificationTime), SysTime(fileState.modificationTime)));
612 				assert(savedState[file] == fileState);
613 			}
614 		}
615 		catch (Exception e)
616 			throw new Exception(
617 				"The worktree has changed since the last time this software updated it.\n" ~
618 				"Specifically:\n" ~
619 				"    " ~ e.msg ~ "\n\n" ~
620 				"Aborting to avoid overwriting your changes.\n" ~
621 				"To continue:\n" ~
622 				" 1. Commit / stash / back up your changes, if you wish to keep them\n" ~
623 				" 2. Delete " ~ workTreeStatePath ~ "\n" ~
624 				" 3. Try this operation again."
625 			);
626 	}
627 
628 	/// Delete the saved working tree state, if any.
629 	void clearState()
630 	{
631 		if (!verify)
632 			return;
633 		if (workTreeStatePath.exists)
634 			workTreeStatePath.remove();
635 	}
636 
637 	// Misc
638 
639 	/// Reset internal state.
640 	protected void reset()
641 	{
642 		currentHead = null;
643 		clean = false;
644 		haveMergeCache = false;
645 		mergeCacheData = null;
646 	}
647 
648 	/// Override to add logging.
649 	protected abstract void log(string line);
650 }
651 
652 /// Used to communicate that a "reset --hard" failed.
653 /// Generally this indicates git repository corruption.
654 mixin DeclareException!q{RepositoryCleanException};