1 /**
2  * Code to manage a D component repository.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.d.repo;
15 
16 import std.algorithm;
17 import std.conv : text;
18 import std.datetime : SysTime;
19 import std.exception;
20 import std.file;
21 import std.process : environment;
22 import std.range;
23 import std.regex;
24 import std.string;
25 import std.path;
26 
27 import ae.sys.git;
28 import ae.utils.exception;
29 import ae.utils.json;
30 import ae.utils.regex;
31 import ae.utils.time : StdTime;
32 
33 /// Base class for a managed repository.
34 class ManagedRepository
35 {
36 	/// Git repository we manage.
37 	public @property ref const(Repository) git()
38 	{
39 		if (!gitRepo.path)
40 		{
41 			gitRepo = getRepo();
42 			assert(gitRepo.path, "No repository");
43 			foreach (person; ["AUTHOR", "COMMITTER"])
44 			{
45 				gitRepo.environment["GIT_%s_DATE".format(person)] = "Thu, 01 Jan 1970 00:00:00 +0000";
46 				gitRepo.environment["GIT_%s_NAME".format(person)] = "ae.sys.d";
47 				gitRepo.environment["GIT_%s_EMAIL".format(person)] = "ae.sys.d\x40thecybershadow.net";
48 			}
49 		}
50 		return gitRepo;
51 	}
52 
53 	/// Should we fetch the latest stuff?
54 	public bool offline;
55 
56 	/// Verify working tree state to make sure we don't clobber user changes?
57 	public bool verify;
58 
59 	private Repository gitRepo;
60 
61 	/// Repository provider
62 	abstract protected Repository getRepo();
63 
64 	public @property string name() { return git.path.baseName; }
65 
66 	// Head
67 
68 	/// Ensure the repository's HEAD is as indicated.
69 	public void needHead(string hash)
70 	{
71 		needClean();
72 		if (getHead() == hash)
73 			return;
74 
75 		try
76 			performCheckout(hash);
77 		catch (Exception e)
78 		{
79 			log("Error checking out %s: %s".format(hash, e));
80 
81 			// Might be a GC-ed merge. Try to recreate the merge
82 			auto hit = mergeCache.find!(entry => entry.result == hash)();
83 			enforce(!hit.empty, "Unknown hash %s".format(hash));
84 			performMerge(hit.front.spec);
85 			enforce(getHead() == hash, "Unexpected merge result: expected %s, got %s".format(hash, getHead()));
86 		}
87 	}
88 
89 	private string currentHead = null;
90 
91 	/// Returns the SHA1 of the given named ref.
92 	public string getRef(string name)
93 	{
94 		return git.query("rev-parse", "--verify", "--quiet", name);
95 	}
96 
97 	/// Return the commit the repository HEAD is pointing at.
98 	/// Cache the result.
99 	public string getHead()
100 	{
101 		if (!currentHead)
102 			currentHead = getRef("HEAD");
103 
104 		return currentHead;
105 	}
106 
107 	protected void performCheckout(string hash)
108 	{
109 		needClean();
110 		needCommit(hash);
111 
112 		log("Checking out %s commit %s...".format(name, hash));
113 
114 		git.run("checkout", hash);
115 
116 		saveState();
117 		currentHead = hash;
118 	}
119 
120 	/// Ensure that the specified commit is fetched.
121 	protected void needCommit(string hash)
122 	{
123 		void check()
124 		{
125 			enforce(git.query(["cat-file", "-t", hash]) == "commit",
126 				"Unexpected object type");
127 		}
128 
129 		try
130 			check();
131 		catch (Exception e)
132 		{
133 			if (offline)
134 			{
135 				log("Don't have commit " ~ hash ~ " and in offline mode, can't proceed.");
136 				throw new Exception("Giving up");
137 			}
138 			else
139 			{
140 				log("Don't have commit " ~ hash ~ ", updating and retrying...");
141 				update();
142 				check();
143 			}
144 		}
145 	}
146 
147 	/// Update the remote.
148 	/// Return true if any updates were fetched.
149 	public bool update()
150 	{
151 		if (!offline)
152 		{
153 			log("Updating " ~ name ~ "...");
154 			auto oldRefs = git.query(["show-ref"]);
155 			git.run("-c", "fetch.recurseSubmodules=false", "remote", "update", "--prune");
156 			git.run("-c", "fetch.recurseSubmodules=false", "fetch", "--force", "--tags");
157 			auto newRefs = git.query(["show-ref"]);
158 			return oldRefs != newRefs;
159 		}
160 		else
161 			return false;
162 	}
163 
164 	// Clean
165 
166 	bool clean = false;
167 
168 	/// Ensure the repository's working copy is clean.
169 	public void needClean()
170 	{
171 		if (clean)
172 			return;
173 		performCleanup();
174 		clean = true;
175 	}
176 
177 	private void performCleanup()
178 	{
179 		checkState();
180 		clearState();
181 
182 		log("Cleaning repository %s...".format(name));
183 		try
184 		{
185 			git.run("reset", "--hard");
186 			git.run("clean", "--force", "--force" /*Sic*/, "-x", "-d", "--quiet");
187 		}
188 		catch (Exception e)
189 			throw new RepositoryCleanException(e.msg, e);
190 		saveState();
191 	}
192 
193 	// Merge cache
194 
195 	enum MergeMode
196 	{
197 		merge,      /// git merge (commit with multiple parents) of the target and branch tips
198 		cherryPick, /// apply the commits as a patch
199 	}
200 	private static struct MergeSpec
201 	{
202 		string target;
203 		string[2] branch; // [base, tip]
204 		MergeMode mode;
205 		bool revert = false;
206 	}
207 	private static struct MergeInfo
208 	{
209 		MergeSpec spec;
210 		string result;
211 		int mainline = 0; // git parent index of the "target", if any
212 	}
213 	private alias MergeCache = MergeInfo[];
214 	private MergeCache mergeCacheData;
215 	private bool haveMergeCache;
216 
217 	private @property ref MergeCache mergeCache()
218 	{
219 		if (!haveMergeCache)
220 		{
221 			if (mergeCachePath.exists)
222 				mergeCacheData = mergeCachePath.readText().jsonParse!MergeCache;
223 			haveMergeCache = true;
224 		}
225 
226 		return mergeCacheData;
227 	}
228 
229 	private void saveMergeCache()
230 	{
231 		std.file.write(mergeCachePath(), toJson(mergeCache));
232 	}
233 
234 	private @property string mergeCachePath()
235 	{
236 		return buildPath(git.gitDir, "ae-sys-d-mergecache-v2.json");
237 	}
238 
239 	// Merge
240 
241 	/// Returns the hash of the merge between the target and branch commits.
242 	/// Performs the merge if necessary. Caches the result.
243 	public string getMerge(string target, string[2] branch, MergeMode mode)
244 	{
245 		return getMergeImpl(MergeSpec(target, branch, mode, false));
246 	}
247 
248 	/// Returns the resulting hash when reverting the branch from the base commit.
249 	/// Performs the revert if necessary. Caches the result.
250 	/// mainline is the 1-based mainline index (as per `man git-revert`),
251 	/// or 0 if commit is not a merge commit.
252 	public string getRevert(string target, string[2] branch, MergeMode mode)
253 	{
254 		return getMergeImpl(MergeSpec(target, branch, mode, true));
255 	}
256 
257 	private string getMergeImpl(MergeSpec spec)
258 	{
259 		auto hit = mergeCache.find!(entry => entry.spec == spec)();
260 		if (!hit.empty)
261 			return hit.front.result;
262 
263 		performMerge(spec);
264 
265 		auto head = getHead();
266 		mergeCache ~= MergeInfo(spec, head);
267 		saveMergeCache();
268 		return head;
269 	}
270 
271 	private static const string mergeCommitMessage = "ae.sys.d merge";
272 	private static const string revertCommitMessage = "ae.sys.d revert";
273 
274 	// Performs a merge or revert.
275 	private void performMerge(MergeSpec spec)
276 	{
277 		needHead(spec.target);
278 		currentHead = null;
279 
280 		log("%s %s into %s.".format(spec.revert ? "Reverting" : "Merging", spec.branch, spec.target));
281 
282 		scope(exit) saveState();
283 
284 		scope (failure)
285 		{
286 			string op;
287 			final switch (spec.mode)
288 			{
289 				case MergeMode.merge:
290 					op = spec.revert ? "revert" : "merge";
291 					break;
292 				case MergeMode.cherryPick:
293 					op = spec.revert ? "revert" : "cherry-pick";
294 					break;
295 			}
296 
297 			log("Aborting " ~ op ~ "...");
298 			git.run(op, "--abort");
299 			clean = false;
300 		}
301 
302 		void doMerge()
303 		{
304 			final switch (spec.mode)
305 			{
306 				case MergeMode.merge:
307 					if (!spec.revert)
308 						git.run("merge", "--no-ff", "-m", mergeCommitMessage, spec.branch[1]);
309 					else
310 					{
311 						// When reverting in merge mode, we try to
312 						// find the merge commit following the branch
313 						// tip, and revert only that merge commit.
314 						string mergeCommit; int mainline;
315 						getChild(spec.target, spec.branch[1], /*out*/mergeCommit, /*out*/mainline);
316 
317 						string[] args = ["revert", "--no-edit"];
318 						if (mainline)
319 							args ~= ["--mainline", text(mainline)];
320 						args ~= [mergeCommit];
321 						git.run(args);
322 					}
323 					break;
324 				case MergeMode.cherryPick:
325 					enforce(spec.branch[0], "Must specify a branch base for a cherry-pick merge");
326 					auto range = spec.branch[0] ~ ".." ~ spec.branch[1];
327 					if (!spec.revert)
328 						git.run("cherry-pick", range);
329 					else
330 						git.run("revert", "--no-edit", range);
331 					break;
332 			}
333 		}
334 
335 		if (git.path.baseName() == "dmd")
336 		{
337 			try
338 				doMerge();
339 			catch (Exception)
340 			{
341 				log("Merge failed. Attempting conflict resolution...");
342 				git.run("checkout", "--theirs", "test");
343 				git.run("add", "test");
344 				if (!spec.revert)
345 					git.run("-c", "rerere.enabled=false", "commit", "-m", mergeCommitMessage);
346 				else
347 					git.run("revert", "--continue");
348 			}
349 		}
350 		else
351 			doMerge();
352 
353 		log("Merge successful.");
354 	}
355 
356 	/// Finds and returns the merge parents of the given merge commit.
357 	/// Queries the git repository if necessary. Caches the result.
358 	public MergeInfo getMergeInfo(string merge)
359 	{
360 		auto hit = mergeCache.find!(entry => entry.result == merge && !entry.spec.revert)();
361 		if (!hit.empty)
362 			return hit.front;
363 
364 		auto parents = git.query(["log", "--pretty=%P", "-n", "1", merge]).split();
365 		enforce(parents.length > 1, "Not a merge: " ~ merge);
366 		enforce(parents.length == 2, "Too many parents: " ~ merge);
367 
368 		auto info = MergeInfo(MergeSpec(parents[0], [null, parents[1]], MergeMode.merge, false), merge, 1);
369 		mergeCache ~= info;
370 		return info;
371 	}
372 
373 	/// Follows the string of merges starting from the given
374 	/// head commit, up till the merge with the given branch.
375 	/// Then, reapplies all merges in order,
376 	/// except for that with the given branch.
377 	public string getUnMerge(string head, string[2] branch, MergeMode mode)
378 	{
379 		// This could be optimized using an interactive rebase
380 
381 		auto info = getMergeInfo(head);
382 		if (info.spec.branch[1] == branch[1])
383 			return info.spec.target;
384 
385 		// Recurse to keep looking
386 		auto unmerge = getUnMerge(info.spec.target, branch, mode);
387 		// Re-apply this non-matching merge
388 		return getMerge(unmerge, info.spec.branch, info.spec.mode);
389 	}
390 
391 	// Branches, forks and customization
392 
393 	/// Return SHA1 of the given remote ref.
394 	/// Fetches the remote first, unless offline mode is on.
395 	string getRemoteRef(string remote, string remoteRef, string localRef)
396 	{
397 		if (!offline)
398 		{
399 			log("Fetching from %s (%s -> %s) ...".format(remote, remoteRef, localRef));
400 			git.run("fetch", remote, "+%s:%s".format(remoteRef, localRef));
401 		}
402 		return getRef(localRef);
403 	}
404 
405 	/// Return SHA1 of the given pull request #.
406 	/// Fetches the pull request first, unless offline mode is on.
407 	string getPullTip(int pull)
408 	{
409 		return getRemoteRef(
410 			"origin",
411 			"refs/pull/%d/head".format(pull),
412 			"refs/digger/pull/%d".format(pull),
413 		);
414 	}
415 
416 	private static bool isCommitHash(string s)
417 	{
418 		return s.length == 40 && s.representation.all!(c => (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'));
419 	}
420 
421 	/// Return SHA1 (base, tip) of the given branch (possibly of GitHub fork).
422 	/// Fetches the fork first, unless offline mode is on.
423 	/// (This is a thin wrapper around getRemoteRef.)
424 	string[2] getBranch(string user, string base, string tip)
425 	{
426 		if (user) enforce(user.match(re!`^\w[\w\-]*$`), "Bad remote name");
427 		if (base) enforce(base.match(re!`^\w[\w\-\.]*$`), "Bad branch base name");
428 		if (true) enforce(tip .match(re!`^\w[\w\-\.]*$`), "Bad branch tip name");
429 
430 		if (!user)
431 			user = "dlang";
432 
433 		if (isCommitHash(tip))
434 		{
435 			if (!offline)
436 			{
437 				// We don't know which branch the commit will be in, so just grab everything.
438 				auto remote = "https://github.com/%s/%s".format(user, name);
439 				log("Fetching everything from %s ...".format(remote));
440 				git.run("fetch", remote, "+refs/heads/*:refs/forks/%s/*".format(user));
441 			}
442 			if (!base)
443 				base = git.query("rev-parse", tip ~ "^");
444 			return [
445 				base,
446 				tip,
447 			];
448 		}
449 		else
450 		{
451 			return [
452 				null,
453 				getRemoteRef(
454 					"https://github.com/%s/%s".format(user, name),
455 					"refs/heads/%s".format(tip),
456 					"refs/digger/fork/%s/%s".format(user, tip),
457 				),
458 			];
459 		}
460 	}
461 
462 	/// Find the child of a commit, and, if the commit was a merge,
463 	/// the mainline index of said commit for the child.
464 	void getChild(string branch, string commit, out string child, out int mainline)
465 	{
466 		needCommit(branch);
467 
468 		log("Querying history for commit children...");
469 		auto history = git.getHistory([branch]);
470 
471 		bool[Hash] seen;
472 		void visit(Commit* commit)
473 		{
474 			if (commit.hash !in seen)
475 			{
476 				seen[commit.hash] = true;
477 				foreach (parent; commit.parents)
478 					visit(parent);
479 			}
480 		}
481 		auto branchHash = branch.toCommitHash();
482 		auto pBranchCommit = branchHash in history.commits;
483 		enforce(pBranchCommit, "Can't find commit " ~ branch ~" in history");
484 		visit(*pBranchCommit);
485 
486 		auto commitHash = commit.toCommitHash();
487 		auto pCommit = commitHash in history.commits;
488 		enforce(pCommit, "Can't find commit in history");
489 		auto children = (*pCommit).children;
490 		enforce(children.length, "Commit has no children");
491 		children = children.filter!(child => child.hash in seen).array();
492 		enforce(children.length, "Commit has no children under specified branch");
493 		enforce(children.length == 1, "Commit has more than one child");
494 		auto childCommit = children[0];
495 		child = childCommit.hash.toString();
496 
497 		if (childCommit.parents.length == 1)
498 			mainline = 0;
499 		else
500 		{
501 			enforce(childCommit.parents.length == 2, "Can't get mainline of multiple-branch merges");
502 			if (childCommit.parents[0] is *pCommit)
503 				mainline = 2;
504 			else
505 				mainline = 1;
506 
507 			auto mergeInfo = MergeInfo(
508 				MergeSpec(
509 					childCommit.parents[0].hash.toString(),
510 					childCommit.parents[1].hash.toString(),
511 					MergeMode.merge,
512 					true),
513 				commit, mainline);
514 			if (!mergeCache.canFind(mergeInfo))
515 			{
516 				mergeCache ~= mergeInfo;
517 				saveMergeCache();
518 			}
519 		}
520 	}
521 
522 	// State saving and checking
523 
524 	struct FileState
525 	{
526 		bool isLink;
527 		ulong size;
528 		StdTime modificationTime;
529 	}
530 
531 	FileState getFileState(string file)
532 	{
533 		assert(verify);
534 		auto path = git.path.buildPath(file);
535 		auto de = DirEntry(path);
536 		return FileState(de.isSymlink, de.size, de.timeLastModified.stdTime);
537 	}
538 
539 	alias RepositoryState = FileState[string];
540 
541 	/// Return the working tree "state".
542 	/// This returns a file list, along with size and modification time.
543 	RepositoryState getState()
544 	{
545 		assert(verify);
546 		auto files = git.query(["ls-files"]).splitLines();
547 		RepositoryState state;
548 		foreach (file; files)
549 			try
550 				state[file] = getFileState(file);
551 			catch (Exception e) {}
552 		return state;
553 	}
554 
555 	private @property string workTreeStatePath()
556 	{
557 		assert(verify);
558 		return buildPath(git.gitDir, "ae-sys-d-worktree.json");
559 	}
560 
561 	/// Save the state of the working tree for versioned files
562 	/// to a .json file, which can later be verified with checkState.
563 	/// This should be called after any git command which mutates the git state.
564 	void saveState()
565 	{
566 		if (!verify)
567 			return;
568 		std.file.write(workTreeStatePath, getState().toJson());
569 	}
570 
571 	/// Save the state of just one file.
572 	/// This should be called after automatic edits to repository files during a build.
573 	/// The file parameter should be relative to the directory root, and use forward slashes.
574 	void saveFileState(string file)
575 	{
576 		if (!verify)
577 			return;
578 		if (!workTreeStatePath.exists)
579 			return;
580 		auto state = workTreeStatePath.readText.jsonParse!RepositoryState();
581 		state[file] = getFileState(file);
582 		std.file.write(workTreeStatePath, state.toJson());
583 	}
584 
585 	/// Verify that the state of the working tree matches the one
586 	/// when saveState was last called. Throw an exception otherwise.
587 	/// This and clearState should be called before any git command
588 	/// which destroys working directory changes.
589 	void checkState()
590 	{
591 		if (!verify)
592 			return;
593 		if (!workTreeStatePath.exists)
594 			return;
595 		auto savedState = workTreeStatePath.readText.jsonParse!RepositoryState();
596 		auto currentState = getState();
597 		try
598 		{
599 			foreach (file, fileState; currentState)
600 			{
601 				enforce(file in savedState, "New file: " ~ file);
602 				enforce(savedState[file].isLink == fileState.isLink,
603 					"File modified: %s (is link changed, before: %s, after: %s)".format(file, savedState[file].isLink, fileState.isLink));
604 				if (fileState.isLink)
605 					continue; // Correct lstat is too hard, just skip symlinks
606 				enforce(savedState[file].size == fileState.size,
607 					"File modified: %s (size changed, before: %s, after: %s)".format(file, savedState[file].size, fileState.size));
608 				enforce(savedState[file].modificationTime == fileState.modificationTime,
609 					"File modified: %s (modification time changed, before: %s, after: %s)".format(file, SysTime(savedState[file].modificationTime), SysTime(fileState.modificationTime)));
610 				assert(savedState[file] == fileState);
611 			}
612 		}
613 		catch (Exception e)
614 			throw new Exception(
615 				"The worktree has changed since the last time this software updated it.\n" ~
616 				"Specifically:\n" ~
617 				"    " ~ e.msg ~ "\n\n" ~
618 				"Aborting to avoid overwriting your changes.\n" ~
619 				"To continue:\n" ~
620 				" 1. Commit / stash / back up your changes, if you wish to keep them\n" ~
621 				" 2. Delete " ~ workTreeStatePath ~ "\n" ~
622 				" 3. Try this operation again."
623 			);
624 	}
625 
626 	/// Delete the saved working tree state, if any.
627 	void clearState()
628 	{
629 		if (!verify)
630 			return;
631 		if (workTreeStatePath.exists)
632 			workTreeStatePath.remove();
633 	}
634 
635 	// Misc
636 
637 	/// Reset internal state.
638 	protected void reset()
639 	{
640 		currentHead = null;
641 		clean = false;
642 		haveMergeCache = false;
643 		mergeCacheData = null;
644 	}
645 
646 	/// Override to add logging.
647 	protected abstract void log(string line);
648 }
649 
650 /// Used to communicate that a "reset --hard" failed.
651 /// Generally this indicates git repository corruption.
652 mixin DeclareException!q{RepositoryCleanException};