1 /**
2  * Code to manage a D component repository.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.d.repo;
15 
16 import std.algorithm;
17 import std.conv : text;
18 import std.datetime : SysTime;
19 import std.exception;
20 import std.file;
21 import std.process : environment;
22 import std.range;
23 import std.regex;
24 import std.string;
25 import std.path;
26 
27 import ae.sys.git;
28 import ae.utils.exception;
29 import ae.utils.json;
30 import ae.utils.regex;
31 import ae.utils.time : StdTime;
32 
33 /// Base class for a managed repository.
34 class ManagedRepository
35 {
36 	/// Git repository we manage.
37 	public @property ref const(Repository) git()
38 	{
39 		if (!gitRepo.path)
40 		{
41 			gitRepo = getRepo();
42 			assert(gitRepo.path, "No repository");
43 			foreach (person; ["AUTHOR", "COMMITTER"])
44 			{
45 				gitRepo.environment["GIT_%s_DATE".format(person)] = "Thu, 01 Jan 1970 00:00:00 +0000";
46 				gitRepo.environment["GIT_%s_NAME".format(person)] = "ae.sys.d";
47 				gitRepo.environment["GIT_%s_EMAIL".format(person)] = "ae.sys.d\x40thecybershadow.net";
48 			}
49 		}
50 		return gitRepo;
51 	}
52 
53 	/// Should we fetch the latest stuff?
54 	public bool offline;
55 
56 	/// Verify working tree state to make sure we don't clobber user changes?
57 	public bool verify;
58 
59 	private Repository gitRepo;
60 
61 	/// Repository provider
62 	abstract protected Repository getRepo();
63 
64 	public @property string name() { return git.path.baseName; }
65 
66 	// Head
67 
68 	/// Ensure the repository's HEAD is as indicated.
69 	public void needHead(string hash)
70 	{
71 		needClean();
72 		if (getHead() == hash)
73 			return;
74 
75 		try
76 			performCheckout(hash);
77 		catch (Exception e)
78 		{
79 			log("Error checking out %s: %s".format(hash, e));
80 
81 			// Might be a GC-ed merge. Try to recreate the merge
82 			auto hit = mergeCache.find!(entry => entry.result == hash)();
83 			enforce(!hit.empty, "Unknown hash %s".format(hash));
84 			performMerge(hit.front.base, hit.front.branch, hit.front.revert, hit.front.mainline);
85 			enforce(getHead() == hash, "Unexpected merge result: expected %s, got %s".format(hash, getHead()));
86 		}
87 	}
88 
89 	private string currentHead = null;
90 
91 	/// Returns the SHA1 of the given named ref.
92 	public string getRef(string name)
93 	{
94 		return git.query("rev-parse", "--verify", "--quiet", name);
95 	}
96 
97 	/// Return the commit the repository HEAD is pointing at.
98 	/// Cache the result.
99 	public string getHead()
100 	{
101 		if (!currentHead)
102 			currentHead = getRef("HEAD");
103 
104 		return currentHead;
105 	}
106 
107 	protected void performCheckout(string hash)
108 	{
109 		needClean();
110 		needCommit(hash);
111 
112 		log("Checking out %s commit %s...".format(name, hash));
113 
114 		git.run("checkout", hash);
115 
116 		saveState();
117 		currentHead = hash;
118 	}
119 
120 	/// Ensure that the specified commit is fetched.
121 	protected void needCommit(string hash)
122 	{
123 		void check()
124 		{
125 			enforce(git.query(["cat-file", "-t", hash]) == "commit",
126 				"Unexpected object type");
127 		}
128 
129 		try
130 			check();
131 		catch (Exception e)
132 		{
133 			if (offline)
134 			{
135 				log("Don't have commit " ~ hash ~ " and in offline mode, can't proceed.");
136 				throw new Exception("Giving up");
137 			}
138 			else
139 			{
140 				log("Don't have commit " ~ hash ~ ", updating and retrying...");
141 				update();
142 				check();
143 			}
144 		}
145 	}
146 
147 	/// Update the remote.
148 	/// Return true if any updates were fetched.
149 	public bool update()
150 	{
151 		if (!offline)
152 		{
153 			log("Updating " ~ name ~ "...");
154 			auto oldRefs = git.query(["show-ref"]);
155 			git.run("-c", "fetch.recurseSubmodules=false", "remote", "update", "--prune");
156 			git.run("-c", "fetch.recurseSubmodules=false", "fetch", "--force", "--tags");
157 			auto newRefs = git.query(["show-ref"]);
158 			return oldRefs != newRefs;
159 		}
160 		else
161 			return false;
162 	}
163 
164 	// Clean
165 
166 	bool clean = false;
167 
168 	/// Ensure the repository's working copy is clean.
169 	public void needClean()
170 	{
171 		if (clean)
172 			return;
173 		performCleanup();
174 		clean = true;
175 	}
176 
177 	private void performCleanup()
178 	{
179 		checkState();
180 		clearState();
181 
182 		log("Cleaning repository %s...".format(name));
183 		try
184 		{
185 			git.run("reset", "--hard");
186 			git.run("clean", "--force", "--force" /*Sic*/, "-x", "-d", "--quiet");
187 		}
188 		catch (Exception e)
189 			throw new RepositoryCleanException(e.msg, e);
190 		saveState();
191 	}
192 
193 	// Merge cache
194 
195 	private static struct MergeInfo
196 	{
197 		string base, branch;
198 		bool revert = false;
199 		int mainline = 0;
200 		string result;
201 	}
202 	private alias MergeCache = MergeInfo[];
203 	private MergeCache mergeCacheData;
204 	private bool haveMergeCache;
205 
206 	private @property ref MergeCache mergeCache()
207 	{
208 		if (!haveMergeCache)
209 		{
210 			if (mergeCachePath.exists)
211 				mergeCacheData = mergeCachePath.readText().jsonParse!MergeCache;
212 			haveMergeCache = true;
213 		}
214 
215 		return mergeCacheData;
216 	}
217 
218 	private void saveMergeCache()
219 	{
220 		std.file.write(mergeCachePath(), toJson(mergeCache));
221 	}
222 
223 	private @property string mergeCachePath()
224 	{
225 		return buildPath(git.gitDir, "ae-sys-d-mergecache.json");
226 	}
227 
228 	// Merge
229 
230 	/// Returns the hash of the merge between the base and branch commits.
231 	/// Performs the merge if necessary. Caches the result.
232 	public string getMerge(string base, string branch)
233 	{
234 		return getMergeImpl(base, branch, false, 0);
235 	}
236 
237 	/// Returns the resulting hash when reverting the branch from the base commit.
238 	/// Performs the revert if necessary. Caches the result.
239 	/// mainline is the 1-based mainline index (as per `man git-revert`),
240 	/// or 0 if commit is not a merge commit.
241 	public string getRevert(string base, string branch, int mainline)
242 	{
243 		return getMergeImpl(base, branch, true, mainline);
244 	}
245 
246 	private string getMergeImpl(string base, string branch, bool revert, int mainline)
247 	{
248 		auto hit = mergeCache.find!(entry =>
249 			entry.base == base &&
250 			entry.branch == branch &&
251 			entry.revert == revert &&
252 			entry.mainline == mainline)();
253 		if (!hit.empty)
254 			return hit.front.result;
255 
256 		performMerge(base, branch, revert, mainline);
257 
258 		auto head = getHead();
259 		mergeCache ~= MergeInfo(base, branch, revert, mainline, head);
260 		saveMergeCache();
261 		return head;
262 	}
263 
264 	private static const string mergeCommitMessage = "ae.sys.d merge";
265 	private static const string revertCommitMessage = "ae.sys.d revert";
266 
267 	// Performs a merge or revert.
268 	private void performMerge(string base, string branch, bool revert, int mainline)
269 	{
270 		needHead(base);
271 		currentHead = null;
272 
273 		log("%s %s into %s.".format(revert ? "Reverting" : "Merging", branch, base));
274 
275 		scope(exit) saveState();
276 
277 		scope (failure)
278 		{
279 			if (!revert)
280 			{
281 				log("Aborting merge...");
282 				git.run("merge", "--abort");
283 			}
284 			else
285 			{
286 				log("Aborting revert...");
287 				git.run("revert", "--abort");
288 			}
289 			clean = false;
290 		}
291 
292 		void doMerge()
293 		{
294 			if (!revert)
295 				git.run("merge", "--no-ff", "-m", mergeCommitMessage, branch);
296 			else
297 			{
298 				string[] args = ["revert", "--no-edit"];
299 				if (mainline)
300 					args ~= ["--mainline", text(mainline)];
301 				args ~= [branch];
302 				git.run(args);
303 			}
304 		}
305 
306 		if (git.path.baseName() == "dmd")
307 		{
308 			try
309 				doMerge();
310 			catch (Exception)
311 			{
312 				log("Merge failed. Attempting conflict resolution...");
313 				git.run("checkout", "--theirs", "test");
314 				git.run("add", "test");
315 				if (!revert)
316 					git.run("-c", "rerere.enabled=false", "commit", "-m", mergeCommitMessage);
317 				else
318 					git.run("revert", "--continue");
319 			}
320 		}
321 		else
322 			doMerge();
323 
324 		log("Merge successful.");
325 	}
326 
327 	/// Finds and returns the merge parents of the given merge commit.
328 	/// Queries the git repository if necessary. Caches the result.
329 	public MergeInfo getMergeInfo(string merge)
330 	{
331 		auto hit = mergeCache.find!(entry => entry.result == merge && !entry.revert)();
332 		if (!hit.empty)
333 			return hit.front;
334 
335 		auto parents = git.query(["log", "--pretty=%P", "-n", "1", merge]).split();
336 		enforce(parents.length > 1, "Not a merge: " ~ merge);
337 		enforce(parents.length == 2, "Too many parents: " ~ merge);
338 
339 		auto info = MergeInfo(parents[0], parents[1], false, 0, merge);
340 		mergeCache ~= info;
341 		return info;
342 	}
343 
344 	/// Follows the string of merges starting from the given
345 	/// head commit, up till the merge with the given branch.
346 	/// Then, reapplies all merges in order,
347 	/// except for that with the given branch.
348 	public string getUnMerge(string head, string branch)
349 	{
350 		// This could be optimized using an interactive rebase
351 
352 		auto info = getMergeInfo(head);
353 		if (info.branch == branch)
354 			return info.base;
355 
356 		return getMerge(getUnMerge(info.base, branch), info.branch);
357 	}
358 
359 	// Branches, forks and customization
360 
361 	/// Return SHA1 of the given remote ref.
362 	/// Fetches the remote first, unless offline mode is on.
363 	string getRemoteRef(string remote, string remoteRef, string localRef)
364 	{
365 		if (!offline)
366 		{
367 			log("Fetching from %s (%s -> %s) ...".format(remote, remoteRef, localRef));
368 			git.run("fetch", remote, "+%s:%s".format(remoteRef, localRef));
369 		}
370 		return getRef(localRef);
371 	}
372 
373 	/// Return SHA1 of the given pull request #.
374 	/// Fetches the pull request first, unless offline mode is on.
375 	string getPull(int pull)
376 	{
377 		return getRemoteRef(
378 			"origin",
379 			"refs/pull/%d/head".format(pull),
380 			"refs/digger/pull/%d".format(pull),
381 		);
382 	}
383 
384 	/// Return SHA1 of the given GitHub fork.
385 	/// Fetches the fork first, unless offline mode is on.
386 	/// (This is a thin wrapper around getRemoteBranch.)
387 	string getFork(string user, string branch)
388 	{
389 		enforce(user  .match(re!`^\w[\w\-]*$`), "Bad remote name");
390 		enforce(branch.match(re!`^\w[\w\-\.]*$`), "Bad branch name");
391 
392 		return getRemoteRef(
393 			"https://github.com/%s/%s".format(user, name),
394 			"refs/heads/%s".format(branch),
395 			"refs/digger/fork/%s/%s".format(user, branch),
396 		);
397 	}
398 
399 	/// Find the child of a commit, and, if the commit was a merge,
400 	/// the mainline index of said commit for the child.
401 	void getChild(string branch, string commit, out string child, out int mainline)
402 	{
403 		needCommit(branch);
404 
405 		log("Querying history for commit children...");
406 		auto history = git.getHistory([branch]);
407 
408 		bool[Hash] seen;
409 		void visit(Commit* commit)
410 		{
411 			if (commit.hash !in seen)
412 			{
413 				seen[commit.hash] = true;
414 				foreach (parent; commit.parents)
415 					visit(parent);
416 			}
417 		}
418 		auto branchHash = branch.toCommitHash();
419 		auto pBranchCommit = branchHash in history.commits;
420 		enforce(pBranchCommit, "Can't find commit " ~ branch ~" in history");
421 		visit(*pBranchCommit);
422 
423 		auto commitHash = commit.toCommitHash();
424 		auto pCommit = commitHash in history.commits;
425 		enforce(pCommit, "Can't find commit in history");
426 		auto children = (*pCommit).children;
427 		enforce(children.length, "Commit has no children");
428 		children = children.filter!(child => child.hash in seen).array();
429 		enforce(children.length, "Commit has no children under specified branch");
430 		enforce(children.length == 1, "Commit has more than one child");
431 		auto childCommit = children[0];
432 		child = childCommit.hash.toString();
433 
434 		if (childCommit.parents.length == 1)
435 			mainline = 0;
436 		else
437 		{
438 			enforce(childCommit.parents.length == 2, "Can't get mainline of multiple-branch merges");
439 			if (childCommit.parents[0] is *pCommit)
440 				mainline = 2;
441 			else
442 				mainline = 1;
443 
444 			auto mergeInfo = MergeInfo(
445 				childCommit.parents[0].hash.toString(),
446 				childCommit.parents[1].hash.toString(),
447 				true, mainline, commit);
448 			if (!mergeCache.canFind(mergeInfo))
449 			{
450 				mergeCache ~= mergeInfo;
451 				saveMergeCache();
452 			}
453 		}
454 	}
455 
456 	// State saving and checking
457 
458 	struct FileState
459 	{
460 		bool isLink;
461 		ulong size;
462 		StdTime modificationTime;
463 	}
464 
465 	FileState getFileState(string file)
466 	{
467 		assert(verify);
468 		auto path = git.path.buildPath(file);
469 		auto de = DirEntry(path);
470 		return FileState(de.isSymlink, de.size, de.timeLastModified.stdTime);
471 	}
472 
473 	alias RepositoryState = FileState[string];
474 
475 	/// Return the working tree "state".
476 	/// This returns a file list, along with size and modification time.
477 	RepositoryState getState()
478 	{
479 		assert(verify);
480 		auto files = git.query(["ls-files"]).splitLines();
481 		RepositoryState state;
482 		foreach (file; files)
483 			try
484 				state[file] = getFileState(file);
485 			catch (Exception e) {}
486 		return state;
487 	}
488 
489 	private @property string workTreeStatePath()
490 	{
491 		assert(verify);
492 		return buildPath(git.gitDir, "ae-sys-d-worktree.json");
493 	}
494 
495 	/// Save the state of the working tree for versioned files
496 	/// to a .json file, which can later be verified with checkState.
497 	/// This should be called after any git command which mutates the git state.
498 	void saveState()
499 	{
500 		if (!verify)
501 			return;
502 		std.file.write(workTreeStatePath, getState().toJson());
503 	}
504 
505 	/// Save the state of just one file.
506 	/// This should be called after automatic edits to repository files during a build.
507 	/// The file parameter should be relative to the directory root, and use forward slashes.
508 	void saveFileState(string file)
509 	{
510 		if (!verify)
511 			return;
512 		if (!workTreeStatePath.exists)
513 			return;
514 		auto state = workTreeStatePath.readText.jsonParse!RepositoryState();
515 		state[file] = getFileState(file);
516 		std.file.write(workTreeStatePath, state.toJson());
517 	}
518 
519 	/// Verify that the state of the working tree matches the one
520 	/// when saveState was last called. Throw an exception otherwise.
521 	/// This and clearState should be called before any git command
522 	/// which destroys working directory changes.
523 	void checkState()
524 	{
525 		if (!verify)
526 			return;
527 		if (!workTreeStatePath.exists)
528 			return;
529 		auto savedState = workTreeStatePath.readText.jsonParse!RepositoryState();
530 		auto currentState = getState();
531 		try
532 		{
533 			foreach (file, fileState; currentState)
534 			{
535 				enforce(file in savedState, "New file: " ~ file);
536 				enforce(savedState[file].isLink == fileState.isLink,
537 					"File modified: %s (is link changed, before: %s, after: %s)".format(file, savedState[file].isLink, fileState.isLink));
538 				if (fileState.isLink)
539 					continue; // Correct lstat is too hard, just skip symlinks
540 				enforce(savedState[file].size == fileState.size,
541 					"File modified: %s (size changed, before: %s, after: %s)".format(file, savedState[file].size, fileState.size));
542 				enforce(savedState[file].modificationTime == fileState.modificationTime,
543 					"File modified: %s (modification time changed, before: %s, after: %s)".format(file, SysTime(savedState[file].modificationTime), SysTime(fileState.modificationTime)));
544 				assert(savedState[file] == fileState);
545 			}
546 		}
547 		catch (Exception e)
548 			throw new Exception(
549 				"The worktree has changed since the last time this software updated it.\n" ~
550 				"Specifically:\n" ~
551 				"    " ~ e.msg ~ "\n\n" ~
552 				"Aborting to avoid overwriting your changes.\n" ~
553 				"To continue:\n" ~
554 				" 1. Commit / stash / back up your changes, if you wish to keep them\n" ~
555 				" 2. Delete " ~ workTreeStatePath ~ "\n" ~
556 				" 3. Try this operation again."
557 			);
558 	}
559 
560 	/// Delete the saved working tree state, if any.
561 	void clearState()
562 	{
563 		if (!verify)
564 			return;
565 		if (workTreeStatePath.exists)
566 			workTreeStatePath.remove();
567 	}
568 
569 	// Misc
570 
571 	/// Reset internal state.
572 	protected void reset()
573 	{
574 		currentHead = null;
575 		clean = false;
576 		haveMergeCache = false;
577 		mergeCacheData = null;
578 	}
579 
580 	/// Override to add logging.
581 	protected abstract void log(string line);
582 }
583 
584 /// Used to communicate that a "reset --hard" failed.
585 /// Generally this indicates git repository corruption.
586 mixin DeclareException!q{RepositoryCleanException};