1 /**
2  * Code to manage a D component repository.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.d.repo;
15 
16 import std.algorithm;
17 import std.conv : text;
18 import std.datetime : SysTime;
19 import std.exception;
20 import std.file;
21 import std.process : environment;
22 import std.range;
23 import std.regex;
24 import std.string;
25 import std.path;
26 
27 import ae.sys.git;
28 import ae.utils.exception;
29 import ae.utils.json;
30 import ae.utils.regex;
31 import ae.utils.time : StdTime;
32 
33 /// Base class for a managed repository.
34 class ManagedRepository
35 {
36 	/// Git repository we manage.
37 	public @property ref const(Repository) git()
38 	{
39 		if (!gitRepo.path)
40 		{
41 			gitRepo = getRepo();
42 			assert(gitRepo.path, "No repository");
43 			foreach (person; ["AUTHOR", "COMMITTER"])
44 			{
45 				gitRepo.environment["GIT_%s_DATE".format(person)] = "Thu, 01 Jan 1970 00:00:00 +0000";
46 				gitRepo.environment["GIT_%s_NAME".format(person)] = "ae.sys.d";
47 				gitRepo.environment["GIT_%s_EMAIL".format(person)] = "ae.sys.d\x40thecybershadow.net";
48 			}
49 		}
50 		return gitRepo;
51 	}
52 
53 	/// Should we fetch the latest stuff?
54 	public bool offline;
55 
56 	/// Verify working tree state to make sure we don't clobber user changes?
57 	public bool verify;
58 
59 	private Repository gitRepo;
60 
61 	/// Repository provider
62 	abstract protected Repository getRepo();
63 
64 	public @property string name() { return git.path.baseName; }
65 
66 	// Head
67 
68 	/// Ensure the repository's HEAD is as indicated.
69 	public void needHead(string hash)
70 	{
71 		needClean();
72 		if (getHead() == hash)
73 			return;
74 
75 		try
76 			performCheckout(hash);
77 		catch (Exception e)
78 		{
79 			log("Error checking out %s: %s".format(hash, e));
80 
81 			// Might be a GC-ed merge. Try to recreate the merge
82 			auto hit = mergeCache.find!(entry => entry.result == hash)();
83 			enforce(!hit.empty, "Unknown hash %s".format(hash));
84 			performMerge(hit.front.base, hit.front.branch, hit.front.revert, hit.front.mainline);
85 			enforce(getHead() == hash, "Unexpected merge result: expected %s, got %s".format(hash, getHead()));
86 		}
87 	}
88 
89 	private string currentHead = null;
90 
91 	/// Returns the SHA1 of the given named ref.
92 	public string getRef(string name)
93 	{
94 		return git.query("rev-parse", name);
95 	}
96 
97 	/// Return the commit the repository HEAD is pointing at.
98 	/// Cache the result.
99 	public string getHead()
100 	{
101 		if (!currentHead)
102 			currentHead = getRef("HEAD");
103 
104 		return currentHead;
105 	}
106 
107 	protected void performCheckout(string hash)
108 	{
109 		needClean();
110 		needCommit(hash);
111 
112 		log("Checking out %s commit %s...".format(name, hash));
113 
114 		git.run("checkout", hash);
115 
116 		saveState();
117 		currentHead = hash;
118 	}
119 
120 	/// Ensure that the specified commit is fetched.
121 	protected void needCommit(string hash)
122 	{
123 		void check()
124 		{
125 			enforce(git.query(["cat-file", "-t", hash]) == "commit",
126 				"Unexpected object type");
127 		}
128 
129 		if (offline)
130 			check();
131 		else
132 		{
133 			try
134 				check();
135 			catch (Exception e)
136 			{
137 				log("Don't have commit " ~ hash ~ ", updating and retrying...");
138 				update();
139 				check();
140 			}
141 		}
142 	}
143 
144 	/// Update the remote.
145 	public void update()
146 	{
147 		if (!offline)
148 		{
149 			log("Updating " ~ name ~ "...");
150 			git.run("-c", "fetch.recurseSubmodules=false", "remote", "update", "--prune");
151 			git.run("-c", "fetch.recurseSubmodules=false", "fetch", "--force", "--tags");
152 		}
153 	}
154 
155 	// Clean
156 
157 	bool clean = false;
158 
159 	/// Ensure the repository's working copy is clean.
160 	public void needClean()
161 	{
162 		if (clean)
163 			return;
164 		performCleanup();
165 		clean = true;
166 	}
167 
168 	private void performCleanup()
169 	{
170 		checkState();
171 		clearState();
172 
173 		log("Cleaning repository %s...".format(name));
174 		try
175 		{
176 			git.run("reset", "--hard");
177 			git.run("clean", "--force", "--force" /*Sic*/, "-x", "-d", "--quiet");
178 		}
179 		catch (Exception e)
180 			throw new RepositoryCleanException(e.msg, e);
181 		saveState();
182 	}
183 
184 	// Merge cache
185 
186 	private static struct MergeInfo
187 	{
188 		string base, branch;
189 		bool revert = false;
190 		int mainline = 0;
191 		string result;
192 	}
193 	private alias MergeCache = MergeInfo[];
194 	private MergeCache mergeCacheData;
195 	private bool haveMergeCache;
196 
197 	private @property ref MergeCache mergeCache()
198 	{
199 		if (!haveMergeCache)
200 		{
201 			if (mergeCachePath.exists)
202 				mergeCacheData = mergeCachePath.readText().jsonParse!MergeCache;
203 			haveMergeCache = true;
204 		}
205 
206 		return mergeCacheData;
207 	}
208 
209 	private void saveMergeCache()
210 	{
211 		std.file.write(mergeCachePath(), toJson(mergeCache));
212 	}
213 
214 	private @property string mergeCachePath()
215 	{
216 		return buildPath(git.gitDir, "ae-sys-d-mergecache.json");
217 	}
218 
219 	// Merge
220 
221 	/// Returns the hash of the merge between the base and branch commits.
222 	/// Performs the merge if necessary. Caches the result.
223 	public string getMerge(string base, string branch)
224 	{
225 		return getMergeImpl(base, branch, false, 0);
226 	}
227 
228 	/// Returns the resulting hash when reverting the branch from the base commit.
229 	/// Performs the revert if necessary. Caches the result.
230 	/// mainline is the 1-based mainline index (as per `man git-revert`),
231 	/// or 0 if commit is not a merge commit.
232 	public string getRevert(string base, string branch, int mainline)
233 	{
234 		return getMergeImpl(base, branch, true, mainline);
235 	}
236 
237 	private string getMergeImpl(string base, string branch, bool revert, int mainline)
238 	{
239 		auto hit = mergeCache.find!(entry =>
240 			entry.base == base &&
241 			entry.branch == branch &&
242 			entry.revert == revert &&
243 			entry.mainline == mainline)();
244 		if (!hit.empty)
245 			return hit.front.result;
246 
247 		performMerge(base, branch, revert, mainline);
248 
249 		auto head = getHead();
250 		mergeCache ~= MergeInfo(base, branch, revert, mainline, head);
251 		saveMergeCache();
252 		return head;
253 	}
254 
255 	private static const string mergeCommitMessage = "ae.sys.d merge";
256 	private static const string revertCommitMessage = "ae.sys.d revert";
257 
258 	// Performs a merge or revert.
259 	private void performMerge(string base, string branch, bool revert, int mainline)
260 	{
261 		needHead(base);
262 		currentHead = null;
263 
264 		log("%s %s into %s.".format(revert ? "Reverting" : "Merging", branch, base));
265 
266 		scope(exit) saveState();
267 
268 		scope (failure)
269 		{
270 			if (!revert)
271 			{
272 				log("Aborting merge...");
273 				git.run("merge", "--abort");
274 			}
275 			else
276 			{
277 				log("Aborting revert...");
278 				git.run("revert", "--abort");
279 			}
280 			clean = false;
281 		}
282 
283 		void doMerge()
284 		{
285 			if (!revert)
286 				git.run("merge", "--no-ff", "-m", mergeCommitMessage, branch);
287 			else
288 			{
289 				string[] args = ["revert", "--no-edit"];
290 				if (mainline)
291 					args ~= ["--mainline", text(mainline)];
292 				args ~= [branch];
293 				git.run(args);
294 			}
295 		}
296 
297 		if (git.path.baseName() == "dmd")
298 		{
299 			try
300 				doMerge();
301 			catch (Exception)
302 			{
303 				log("Merge failed. Attempting conflict resolution...");
304 				git.run("checkout", "--theirs", "test");
305 				git.run("add", "test");
306 				if (!revert)
307 					git.run("-c", "rerere.enabled=false", "commit", "-m", mergeCommitMessage);
308 				else
309 					git.run("revert", "--continue");
310 			}
311 		}
312 		else
313 			doMerge();
314 
315 		log("Merge successful.");
316 	}
317 
318 	/// Finds and returns the merge parents of the given merge commit.
319 	/// Queries the git repository if necessary. Caches the result.
320 	public MergeInfo getMergeInfo(string merge)
321 	{
322 		auto hit = mergeCache.find!(entry => entry.result == merge && !entry.revert)();
323 		if (!hit.empty)
324 			return hit.front;
325 
326 		auto parents = git.query(["log", "--pretty=%P", "-n", "1", merge]).split();
327 		enforce(parents.length > 1, "Not a merge: " ~ merge);
328 		enforce(parents.length == 2, "Too many parents: " ~ merge);
329 
330 		auto info = MergeInfo(parents[0], parents[1], false, 0, merge);
331 		mergeCache ~= info;
332 		return info;
333 	}
334 
335 	/// Follows the string of merges starting from the given
336 	/// head commit, up till the merge with the given branch.
337 	/// Then, reapplies all merges in order,
338 	/// except for that with the given branch.
339 	public string getUnMerge(string head, string branch)
340 	{
341 		// This could be optimized using an interactive rebase
342 
343 		auto info = getMergeInfo(head);
344 		if (info.branch == branch)
345 			return info.base;
346 
347 		return getMerge(getUnMerge(info.base, branch), info.branch);
348 	}
349 
350 	// Branches, forks and customization
351 
352 	/// Return SHA1 of the given remote ref.
353 	/// Fetches the remote first, unless offline mode is on.
354 	string getRemoteRef(string remote, string remoteRef, string localRef)
355 	{
356 		if (!offline)
357 		{
358 			log("Fetching from %s (%s -> %s) ...".format(remote, remoteRef, localRef));
359 			git.run("fetch", remote, "+%s:%s".format(remoteRef, localRef));
360 		}
361 		return getRef(localRef);
362 	}
363 
364 	/// Return SHA1 of the given pull request #.
365 	/// Fetches the pull request first, unless offline mode is on.
366 	string getPull(int pull)
367 	{
368 		return getRemoteRef(
369 			"origin",
370 			"refs/pull/%d/head".format(pull),
371 			"refs/digger/pull/%d".format(pull),
372 		);
373 	}
374 
375 	/// Return SHA1 of the given GitHub fork.
376 	/// Fetches the fork first, unless offline mode is on.
377 	/// (This is a thin wrapper around getRemoteBranch.)
378 	string getFork(string user, string branch)
379 	{
380 		enforce(user  .match(re!`^\w[\w\-]*$`), "Bad remote name");
381 		enforce(branch.match(re!`^\w[\w\-\.]*$`), "Bad branch name");
382 
383 		return getRemoteRef(
384 			"https://github.com/%s/%s".format(user, name),
385 			"refs/heads/%s".format(branch),
386 			"refs/digger/fork/%s/%s".format(user, branch),
387 		);
388 	}
389 
390 	/// Find the child of a commit, and, if the commit was a merge,
391 	/// the mainline index of said commit for the child.
392 	void getChild(string branch, string commit, out string child, out int mainline)
393 	{
394 		needCommit(branch);
395 
396 		log("Querying history for commit children...");
397 		auto history = git.getHistory([branch]);
398 
399 		bool[Hash] seen;
400 		void visit(Commit* commit)
401 		{
402 			if (commit.hash !in seen)
403 			{
404 				seen[commit.hash] = true;
405 				foreach (parent; commit.parents)
406 					visit(parent);
407 			}
408 		}
409 		auto branchHash = branch.toCommitHash();
410 		auto pBranchCommit = branchHash in history.commits;
411 		enforce(pBranchCommit, "Can't find commit " ~ branch ~" in history");
412 		visit(*pBranchCommit);
413 
414 		auto commitHash = commit.toCommitHash();
415 		auto pCommit = commitHash in history.commits;
416 		enforce(pCommit, "Can't find commit in history");
417 		auto children = (*pCommit).children;
418 		enforce(children.length, "Commit has no children");
419 		children = children.filter!(child => child.hash in seen).array();
420 		enforce(children.length, "Commit has no children under specified branch");
421 		enforce(children.length == 1, "Commit has more than one child");
422 		auto childCommit = children[0];
423 		child = childCommit.hash.toString();
424 
425 		if (childCommit.parents.length == 1)
426 			mainline = 0;
427 		else
428 		{
429 			enforce(childCommit.parents.length == 2, "Can't get mainline of multiple-branch merges");
430 			if (childCommit.parents[0] is *pCommit)
431 				mainline = 2;
432 			else
433 				mainline = 1;
434 
435 			auto mergeInfo = MergeInfo(
436 				childCommit.parents[0].hash.toString(),
437 				childCommit.parents[1].hash.toString(),
438 				true, mainline, commit);
439 			if (!mergeCache.canFind(mergeInfo))
440 			{
441 				mergeCache ~= mergeInfo;
442 				saveMergeCache();
443 			}
444 		}
445 	}
446 
447 	// State saving and checking
448 
449 	struct FileState
450 	{
451 		bool isLink;
452 		ulong size;
453 		StdTime modificationTime;
454 	}
455 
456 	FileState getFileState(string file)
457 	{
458 		assert(verify);
459 		auto path = git.path.buildPath(file);
460 		auto de = DirEntry(path);
461 		return FileState(de.isSymlink, de.size, de.timeLastModified.stdTime);
462 	}
463 
464 	alias RepositoryState = FileState[string];
465 
466 	/// Return the working tree "state".
467 	/// This returns a file list, along with size and modification time.
468 	RepositoryState getState()
469 	{
470 		assert(verify);
471 		auto files = git.query(["ls-files"]).splitLines();
472 		RepositoryState state;
473 		foreach (file; files)
474 			state[file] = getFileState(file);
475 		return state;
476 	}
477 
478 	private @property string workTreeStatePath()
479 	{
480 		assert(verify);
481 		return buildPath(git.gitDir, "ae-sys-d-worktree.json");
482 	}
483 
484 	/// Save the state of the working tree for versioned files
485 	/// to a .json file, which can later be verified with checkState.
486 	/// This should be called after any git command which mutates the git state.
487 	void saveState()
488 	{
489 		if (!verify)
490 			return;
491 		std.file.write(workTreeStatePath, getState().toJson());
492 	}
493 
494 	/// Save the state of just one file.
495 	/// This should be called after automatic edits to repository files during a build.
496 	/// The file parameter should be relative to the directory root, and use forward slashes.
497 	void saveFileState(string file)
498 	{
499 		if (!verify)
500 			return;
501 		if (!workTreeStatePath.exists)
502 			return;
503 		auto state = workTreeStatePath.readText.jsonParse!RepositoryState();
504 		state[file] = getFileState(file);
505 		std.file.write(workTreeStatePath, state.toJson());
506 	}
507 
508 	/// Verify that the state of the working tree matches the one
509 	/// when saveState was last called. Throw an exception otherwise.
510 	/// This and clearState should be called before any git command
511 	/// which destroys working directory changes.
512 	void checkState()
513 	{
514 		if (!verify)
515 			return;
516 		if (!workTreeStatePath.exists)
517 			return;
518 		auto savedState = workTreeStatePath.readText.jsonParse!RepositoryState();
519 		auto currentState = getState();
520 		try
521 		{
522 			foreach (file, fileState; currentState)
523 			{
524 				enforce(file in savedState, "New file: " ~ file);
525 				enforce(savedState[file].isLink == fileState.isLink,
526 					"File modified: %s (is link changed, before: %s, after: %s)".format(file, savedState[file].isLink, fileState.isLink));
527 				if (fileState.isLink)
528 					continue; // Correct lstat is too hard, just skip symlinks
529 				enforce(savedState[file].size == fileState.size,
530 					"File modified: %s (size changed, before: %s, after: %s)".format(file, savedState[file].size, fileState.size));
531 				enforce(savedState[file].modificationTime == fileState.modificationTime,
532 					"File modified: %s (modification time changed, before: %s, after: %s)".format(file, SysTime(savedState[file].modificationTime), SysTime(fileState.modificationTime)));
533 				assert(savedState[file] == fileState);
534 			}
535 		}
536 		catch (Exception e)
537 			throw new Exception(
538 				"The worktree has changed since the last time this software updated it.\n" ~
539 				"Specifically:\n" ~
540 				"    " ~ e.msg ~ "\n\n" ~
541 				"Aborting to avoid overwriting your changes.\n" ~
542 				"To continue:\n" ~
543 				" 1. Commit / stash / back up your changes, if you wish to keep them\n" ~
544 				" 2. Delete " ~ workTreeStatePath ~ "\n" ~
545 				" 3. Try this operation again."
546 			);
547 	}
548 
549 	/// Delete the saved working tree state, if any.
550 	void clearState()
551 	{
552 		if (!verify)
553 			return;
554 		if (workTreeStatePath.exists)
555 			workTreeStatePath.remove();
556 	}
557 
558 	// Misc
559 
560 	/// Reset internal state.
561 	protected void reset()
562 	{
563 		currentHead = null;
564 		clean = false;
565 		haveMergeCache = false;
566 		mergeCacheData = null;
567 	}
568 
569 	/// Override to add logging.
570 	protected abstract void log(string line);
571 }
572 
573 /// Used to communicate that a "reset --hard" failed.
574 /// Generally this indicates git repository corruption.
575 mixin DeclareException!q{RepositoryCleanException};