1 /**
2  * Code to manage a D component repository.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.d.repo;
15 
16 import std.algorithm;
17 import std.conv : text;
18 import std.datetime : SysTime;
19 import std.exception;
20 import std.file;
21 import std.process : environment;
22 import std.range;
23 import std.regex;
24 import std.string;
25 import std.path;
26 
27 import ae.sys.git;
28 import ae.utils.exception;
29 import ae.utils.json;
30 import ae.utils.regex;
31 import ae.utils.time : StdTime;
32 
33 /// Base class for a managed repository.
34 class ManagedRepository
35 {
36 	/// Git repository we manage.
37 	public @property ref const(Repository) git()
38 	{
39 		if (!gitRepo.path)
40 		{
41 			gitRepo = getRepo();
42 			assert(gitRepo.path, "No repository");
43 			foreach (person; ["AUTHOR", "COMMITTER"])
44 			{
45 				gitRepo.environment["GIT_%s_DATE".format(person)] = "Thu, 01 Jan 1970 00:00:00 +0000";
46 				gitRepo.environment["GIT_%s_NAME".format(person)] = "ae.sys.d";
47 				gitRepo.environment["GIT_%s_EMAIL".format(person)] = "ae.sys.d\x40thecybershadow.net";
48 			}
49 		}
50 		return gitRepo;
51 	}
52 
53 	/// Should we fetch the latest stuff?
54 	public bool offline;
55 
56 	/// Verify working tree state to make sure we don't clobber user changes?
57 	public bool verify;
58 
59 	private Repository gitRepo;
60 
61 	/// Repository provider
62 	abstract protected Repository getRepo();
63 
64 	public @property string name() { return git.path.baseName; }
65 
66 	// Head
67 
68 	/// Ensure the repository's HEAD is as indicated.
69 	public void needHead(string hash)
70 	{
71 		needClean();
72 		if (getHead() == hash)
73 			return;
74 
75 		try
76 			performCheckout(hash);
77 		catch (Exception e)
78 		{
79 			log("Error checking out %s: %s".format(hash, e));
80 
81 			// Might be a GC-ed merge. Try to recreate the merge
82 			auto hit = mergeCache.find!(entry => entry.result == hash)();
83 			enforce(!hit.empty, "Unknown hash %s".format(hash));
84 			performMerge(hit.front.base, hit.front.branch, hit.front.revert, hit.front.mainline);
85 			enforce(getHead() == hash, "Unexpected merge result: expected %s, got %s".format(hash, getHead()));
86 		}
87 	}
88 
89 	private string currentHead = null;
90 
91 	/// Returns the SHA1 of the given named ref.
92 	public string getRef(string name)
93 	{
94 		return git.query("rev-parse", "--verify", "--quiet", name);
95 	}
96 
97 	/// Return the commit the repository HEAD is pointing at.
98 	/// Cache the result.
99 	public string getHead()
100 	{
101 		if (!currentHead)
102 			currentHead = getRef("HEAD");
103 
104 		return currentHead;
105 	}
106 
107 	protected void performCheckout(string hash)
108 	{
109 		needClean();
110 		needCommit(hash);
111 
112 		log("Checking out %s commit %s...".format(name, hash));
113 
114 		git.run("checkout", hash);
115 
116 		saveState();
117 		currentHead = hash;
118 	}
119 
120 	/// Ensure that the specified commit is fetched.
121 	protected void needCommit(string hash)
122 	{
123 		void check()
124 		{
125 			enforce(git.query(["cat-file", "-t", hash]) == "commit",
126 				"Unexpected object type");
127 		}
128 
129 		if (offline)
130 			check();
131 		else
132 		{
133 			try
134 				check();
135 			catch (Exception e)
136 			{
137 				log("Don't have commit " ~ hash ~ ", updating and retrying...");
138 				update();
139 				check();
140 			}
141 		}
142 	}
143 
144 	/// Update the remote.
145 	/// Return true if any updates were fetched.
146 	public bool update()
147 	{
148 		if (!offline)
149 		{
150 			log("Updating " ~ name ~ "...");
151 			auto oldRefs = git.query(["show-ref"]);
152 			git.run("-c", "fetch.recurseSubmodules=false", "remote", "update", "--prune");
153 			git.run("-c", "fetch.recurseSubmodules=false", "fetch", "--force", "--tags");
154 			auto newRefs = git.query(["show-ref"]);
155 			return oldRefs != newRefs;
156 		}
157 		else
158 			return false;
159 	}
160 
161 	// Clean
162 
163 	bool clean = false;
164 
165 	/// Ensure the repository's working copy is clean.
166 	public void needClean()
167 	{
168 		if (clean)
169 			return;
170 		performCleanup();
171 		clean = true;
172 	}
173 
174 	private void performCleanup()
175 	{
176 		checkState();
177 		clearState();
178 
179 		log("Cleaning repository %s...".format(name));
180 		try
181 		{
182 			git.run("reset", "--hard");
183 			git.run("clean", "--force", "--force" /*Sic*/, "-x", "-d", "--quiet");
184 		}
185 		catch (Exception e)
186 			throw new RepositoryCleanException(e.msg, e);
187 		saveState();
188 	}
189 
190 	// Merge cache
191 
192 	private static struct MergeInfo
193 	{
194 		string base, branch;
195 		bool revert = false;
196 		int mainline = 0;
197 		string result;
198 	}
199 	private alias MergeCache = MergeInfo[];
200 	private MergeCache mergeCacheData;
201 	private bool haveMergeCache;
202 
203 	private @property ref MergeCache mergeCache()
204 	{
205 		if (!haveMergeCache)
206 		{
207 			if (mergeCachePath.exists)
208 				mergeCacheData = mergeCachePath.readText().jsonParse!MergeCache;
209 			haveMergeCache = true;
210 		}
211 
212 		return mergeCacheData;
213 	}
214 
215 	private void saveMergeCache()
216 	{
217 		std.file.write(mergeCachePath(), toJson(mergeCache));
218 	}
219 
220 	private @property string mergeCachePath()
221 	{
222 		return buildPath(git.gitDir, "ae-sys-d-mergecache.json");
223 	}
224 
225 	// Merge
226 
227 	/// Returns the hash of the merge between the base and branch commits.
228 	/// Performs the merge if necessary. Caches the result.
229 	public string getMerge(string base, string branch)
230 	{
231 		return getMergeImpl(base, branch, false, 0);
232 	}
233 
234 	/// Returns the resulting hash when reverting the branch from the base commit.
235 	/// Performs the revert if necessary. Caches the result.
236 	/// mainline is the 1-based mainline index (as per `man git-revert`),
237 	/// or 0 if commit is not a merge commit.
238 	public string getRevert(string base, string branch, int mainline)
239 	{
240 		return getMergeImpl(base, branch, true, mainline);
241 	}
242 
243 	private string getMergeImpl(string base, string branch, bool revert, int mainline)
244 	{
245 		auto hit = mergeCache.find!(entry =>
246 			entry.base == base &&
247 			entry.branch == branch &&
248 			entry.revert == revert &&
249 			entry.mainline == mainline)();
250 		if (!hit.empty)
251 			return hit.front.result;
252 
253 		performMerge(base, branch, revert, mainline);
254 
255 		auto head = getHead();
256 		mergeCache ~= MergeInfo(base, branch, revert, mainline, head);
257 		saveMergeCache();
258 		return head;
259 	}
260 
261 	private static const string mergeCommitMessage = "ae.sys.d merge";
262 	private static const string revertCommitMessage = "ae.sys.d revert";
263 
264 	// Performs a merge or revert.
265 	private void performMerge(string base, string branch, bool revert, int mainline)
266 	{
267 		needHead(base);
268 		currentHead = null;
269 
270 		log("%s %s into %s.".format(revert ? "Reverting" : "Merging", branch, base));
271 
272 		scope(exit) saveState();
273 
274 		scope (failure)
275 		{
276 			if (!revert)
277 			{
278 				log("Aborting merge...");
279 				git.run("merge", "--abort");
280 			}
281 			else
282 			{
283 				log("Aborting revert...");
284 				git.run("revert", "--abort");
285 			}
286 			clean = false;
287 		}
288 
289 		void doMerge()
290 		{
291 			if (!revert)
292 				git.run("merge", "--no-ff", "-m", mergeCommitMessage, branch);
293 			else
294 			{
295 				string[] args = ["revert", "--no-edit"];
296 				if (mainline)
297 					args ~= ["--mainline", text(mainline)];
298 				args ~= [branch];
299 				git.run(args);
300 			}
301 		}
302 
303 		if (git.path.baseName() == "dmd")
304 		{
305 			try
306 				doMerge();
307 			catch (Exception)
308 			{
309 				log("Merge failed. Attempting conflict resolution...");
310 				git.run("checkout", "--theirs", "test");
311 				git.run("add", "test");
312 				if (!revert)
313 					git.run("-c", "rerere.enabled=false", "commit", "-m", mergeCommitMessage);
314 				else
315 					git.run("revert", "--continue");
316 			}
317 		}
318 		else
319 			doMerge();
320 
321 		log("Merge successful.");
322 	}
323 
324 	/// Finds and returns the merge parents of the given merge commit.
325 	/// Queries the git repository if necessary. Caches the result.
326 	public MergeInfo getMergeInfo(string merge)
327 	{
328 		auto hit = mergeCache.find!(entry => entry.result == merge && !entry.revert)();
329 		if (!hit.empty)
330 			return hit.front;
331 
332 		auto parents = git.query(["log", "--pretty=%P", "-n", "1", merge]).split();
333 		enforce(parents.length > 1, "Not a merge: " ~ merge);
334 		enforce(parents.length == 2, "Too many parents: " ~ merge);
335 
336 		auto info = MergeInfo(parents[0], parents[1], false, 0, merge);
337 		mergeCache ~= info;
338 		return info;
339 	}
340 
341 	/// Follows the string of merges starting from the given
342 	/// head commit, up till the merge with the given branch.
343 	/// Then, reapplies all merges in order,
344 	/// except for that with the given branch.
345 	public string getUnMerge(string head, string branch)
346 	{
347 		// This could be optimized using an interactive rebase
348 
349 		auto info = getMergeInfo(head);
350 		if (info.branch == branch)
351 			return info.base;
352 
353 		return getMerge(getUnMerge(info.base, branch), info.branch);
354 	}
355 
356 	// Branches, forks and customization
357 
358 	/// Return SHA1 of the given remote ref.
359 	/// Fetches the remote first, unless offline mode is on.
360 	string getRemoteRef(string remote, string remoteRef, string localRef)
361 	{
362 		if (!offline)
363 		{
364 			log("Fetching from %s (%s -> %s) ...".format(remote, remoteRef, localRef));
365 			git.run("fetch", remote, "+%s:%s".format(remoteRef, localRef));
366 		}
367 		return getRef(localRef);
368 	}
369 
370 	/// Return SHA1 of the given pull request #.
371 	/// Fetches the pull request first, unless offline mode is on.
372 	string getPull(int pull)
373 	{
374 		return getRemoteRef(
375 			"origin",
376 			"refs/pull/%d/head".format(pull),
377 			"refs/digger/pull/%d".format(pull),
378 		);
379 	}
380 
381 	/// Return SHA1 of the given GitHub fork.
382 	/// Fetches the fork first, unless offline mode is on.
383 	/// (This is a thin wrapper around getRemoteBranch.)
384 	string getFork(string user, string branch)
385 	{
386 		enforce(user  .match(re!`^\w[\w\-]*$`), "Bad remote name");
387 		enforce(branch.match(re!`^\w[\w\-\.]*$`), "Bad branch name");
388 
389 		return getRemoteRef(
390 			"https://github.com/%s/%s".format(user, name),
391 			"refs/heads/%s".format(branch),
392 			"refs/digger/fork/%s/%s".format(user, branch),
393 		);
394 	}
395 
396 	/// Find the child of a commit, and, if the commit was a merge,
397 	/// the mainline index of said commit for the child.
398 	void getChild(string branch, string commit, out string child, out int mainline)
399 	{
400 		needCommit(branch);
401 
402 		log("Querying history for commit children...");
403 		auto history = git.getHistory([branch]);
404 
405 		bool[Hash] seen;
406 		void visit(Commit* commit)
407 		{
408 			if (commit.hash !in seen)
409 			{
410 				seen[commit.hash] = true;
411 				foreach (parent; commit.parents)
412 					visit(parent);
413 			}
414 		}
415 		auto branchHash = branch.toCommitHash();
416 		auto pBranchCommit = branchHash in history.commits;
417 		enforce(pBranchCommit, "Can't find commit " ~ branch ~" in history");
418 		visit(*pBranchCommit);
419 
420 		auto commitHash = commit.toCommitHash();
421 		auto pCommit = commitHash in history.commits;
422 		enforce(pCommit, "Can't find commit in history");
423 		auto children = (*pCommit).children;
424 		enforce(children.length, "Commit has no children");
425 		children = children.filter!(child => child.hash in seen).array();
426 		enforce(children.length, "Commit has no children under specified branch");
427 		enforce(children.length == 1, "Commit has more than one child");
428 		auto childCommit = children[0];
429 		child = childCommit.hash.toString();
430 
431 		if (childCommit.parents.length == 1)
432 			mainline = 0;
433 		else
434 		{
435 			enforce(childCommit.parents.length == 2, "Can't get mainline of multiple-branch merges");
436 			if (childCommit.parents[0] is *pCommit)
437 				mainline = 2;
438 			else
439 				mainline = 1;
440 
441 			auto mergeInfo = MergeInfo(
442 				childCommit.parents[0].hash.toString(),
443 				childCommit.parents[1].hash.toString(),
444 				true, mainline, commit);
445 			if (!mergeCache.canFind(mergeInfo))
446 			{
447 				mergeCache ~= mergeInfo;
448 				saveMergeCache();
449 			}
450 		}
451 	}
452 
453 	// State saving and checking
454 
455 	struct FileState
456 	{
457 		bool isLink;
458 		ulong size;
459 		StdTime modificationTime;
460 	}
461 
462 	FileState getFileState(string file)
463 	{
464 		assert(verify);
465 		auto path = git.path.buildPath(file);
466 		auto de = DirEntry(path);
467 		return FileState(de.isSymlink, de.size, de.timeLastModified.stdTime);
468 	}
469 
470 	alias RepositoryState = FileState[string];
471 
472 	/// Return the working tree "state".
473 	/// This returns a file list, along with size and modification time.
474 	RepositoryState getState()
475 	{
476 		assert(verify);
477 		auto files = git.query(["ls-files"]).splitLines();
478 		RepositoryState state;
479 		foreach (file; files)
480 			try
481 				state[file] = getFileState(file);
482 			catch (Exception e) {}
483 		return state;
484 	}
485 
486 	private @property string workTreeStatePath()
487 	{
488 		assert(verify);
489 		return buildPath(git.gitDir, "ae-sys-d-worktree.json");
490 	}
491 
492 	/// Save the state of the working tree for versioned files
493 	/// to a .json file, which can later be verified with checkState.
494 	/// This should be called after any git command which mutates the git state.
495 	void saveState()
496 	{
497 		if (!verify)
498 			return;
499 		std.file.write(workTreeStatePath, getState().toJson());
500 	}
501 
502 	/// Save the state of just one file.
503 	/// This should be called after automatic edits to repository files during a build.
504 	/// The file parameter should be relative to the directory root, and use forward slashes.
505 	void saveFileState(string file)
506 	{
507 		if (!verify)
508 			return;
509 		if (!workTreeStatePath.exists)
510 			return;
511 		auto state = workTreeStatePath.readText.jsonParse!RepositoryState();
512 		state[file] = getFileState(file);
513 		std.file.write(workTreeStatePath, state.toJson());
514 	}
515 
516 	/// Verify that the state of the working tree matches the one
517 	/// when saveState was last called. Throw an exception otherwise.
518 	/// This and clearState should be called before any git command
519 	/// which destroys working directory changes.
520 	void checkState()
521 	{
522 		if (!verify)
523 			return;
524 		if (!workTreeStatePath.exists)
525 			return;
526 		auto savedState = workTreeStatePath.readText.jsonParse!RepositoryState();
527 		auto currentState = getState();
528 		try
529 		{
530 			foreach (file, fileState; currentState)
531 			{
532 				enforce(file in savedState, "New file: " ~ file);
533 				enforce(savedState[file].isLink == fileState.isLink,
534 					"File modified: %s (is link changed, before: %s, after: %s)".format(file, savedState[file].isLink, fileState.isLink));
535 				if (fileState.isLink)
536 					continue; // Correct lstat is too hard, just skip symlinks
537 				enforce(savedState[file].size == fileState.size,
538 					"File modified: %s (size changed, before: %s, after: %s)".format(file, savedState[file].size, fileState.size));
539 				enforce(savedState[file].modificationTime == fileState.modificationTime,
540 					"File modified: %s (modification time changed, before: %s, after: %s)".format(file, SysTime(savedState[file].modificationTime), SysTime(fileState.modificationTime)));
541 				assert(savedState[file] == fileState);
542 			}
543 		}
544 		catch (Exception e)
545 			throw new Exception(
546 				"The worktree has changed since the last time this software updated it.\n" ~
547 				"Specifically:\n" ~
548 				"    " ~ e.msg ~ "\n\n" ~
549 				"Aborting to avoid overwriting your changes.\n" ~
550 				"To continue:\n" ~
551 				" 1. Commit / stash / back up your changes, if you wish to keep them\n" ~
552 				" 2. Delete " ~ workTreeStatePath ~ "\n" ~
553 				" 3. Try this operation again."
554 			);
555 	}
556 
557 	/// Delete the saved working tree state, if any.
558 	void clearState()
559 	{
560 		if (!verify)
561 			return;
562 		if (workTreeStatePath.exists)
563 			workTreeStatePath.remove();
564 	}
565 
566 	// Misc
567 
568 	/// Reset internal state.
569 	protected void reset()
570 	{
571 		currentHead = null;
572 		clean = false;
573 		haveMergeCache = false;
574 		mergeCacheData = null;
575 	}
576 
577 	/// Override to add logging.
578 	protected abstract void log(string line);
579 }
580 
581 /// Used to communicate that a "reset --hard" failed.
582 /// Generally this indicates git repository corruption.
583 mixin DeclareException!q{RepositoryCleanException};