1 /**
2  * Code to manage a D component repository.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.d.repo;
15 
16 import std.algorithm;
17 import std.conv : text;
18 import std.datetime : SysTime;
19 import std.exception;
20 import std.file;
21 import std.process : environment;
22 import std.range;
23 import std.regex;
24 import std.string;
25 import std.path;
26 
27 import ae.sys.git;
28 import ae.utils.exception;
29 import ae.utils.json;
30 import ae.utils.regex;
31 import ae.utils.time : StdTime;
32 
33 /// Base class for a managed repository.
34 class ManagedRepository
35 {
36 	/// Git repository we manage.
37 	public Repository git;
38 
39 	/// Should we fetch the latest stuff?
40 	public bool offline;
41 
42 	/// Verify working tree state to make sure we don't clobber user changes?
43 	public bool verify;
44 
45 	/// Ensure we have a repository.
46 	public void needRepo()
47 	{
48 		assert(git.path, "No repository");
49 	}
50 
51 	public @property string name() { needRepo(); return git.path.baseName; }
52 
53 	// Head
54 
55 	/// Ensure the repository's HEAD is as indicated.
56 	public void needHead(string hash)
57 	{
58 		needClean();
59 		if (getHead() == hash)
60 			return;
61 
62 		try
63 			performCheckout(hash);
64 		catch (Exception e)
65 		{
66 			log("Error checking out %s: %s".format(hash, e));
67 
68 			// Might be a GC-ed merge. Try to recreate the merge
69 			auto hit = mergeCache.find!(entry => entry.result == hash)();
70 			enforce(!hit.empty, "Unknown hash %s".format(hash));
71 			performMerge(hit.front.base, hit.front.branch, hit.front.revert, hit.front.mainline);
72 			enforce(getHead() == hash, "Unexpected merge result: expected %s, got %s".format(hash, getHead()));
73 		}
74 	}
75 
76 	private string currentHead = null;
77 
78 	/// Returns the SHA1 of the given named ref.
79 	public string getRef(string name)
80 	{
81 		return git.query("rev-parse", name);
82 	}
83 
84 	/// Return the commit the repository HEAD is pointing at.
85 	/// Cache the result.
86 	public string getHead()
87 	{
88 		if (!currentHead)
89 			currentHead = getRef("HEAD");
90 
91 		return currentHead;
92 	}
93 
94 	protected void performCheckout(string hash)
95 	{
96 		needClean();
97 
98 		log("Checking out %s commit %s...".format(name, hash));
99 
100 		if (offline)
101 			git.run("checkout", hash);
102 		else
103 		{
104 			try
105 				git.run("checkout", hash);
106 			catch (Exception e)
107 			{
108 				log("Checkout failed, updating and retrying...");
109 				update();
110 				git.run("checkout", hash);
111 			}
112 		}
113 
114 		saveState();
115 		currentHead = hash;
116 	}
117 
118 	/// Update the remote.
119 	public void update()
120 	{
121 		if (!offline)
122 		{
123 			needRepo();
124 			log("Updating " ~ name ~ "...");
125 			git.run("-c", "fetch.recurseSubmodules=false", "remote", "update", "--prune");
126 			git.run("-c", "fetch.recurseSubmodules=false", "fetch", "--tags");
127 		}
128 	}
129 
130 	// Clean
131 
132 	bool clean = false;
133 
134 	/// Ensure the repository's working copy is clean.
135 	public void needClean()
136 	{
137 		if (clean)
138 			return;
139 		performCleanup();
140 		clean = true;
141 	}
142 
143 	private void performCleanup()
144 	{
145 		checkState();
146 		clearState();
147 
148 		log("Cleaning repository %s...".format(name));
149 		needRepo();
150 		try
151 		{
152 			git.run("reset", "--hard");
153 			git.run("clean", "--force", "-x", "-d", "--quiet");
154 		}
155 		catch (Exception e)
156 			throw new RepositoryCleanException(e.msg, e);
157 		saveState();
158 	}
159 
160 	// Merge cache
161 
162 	private static struct MergeInfo
163 	{
164 		string base, branch;
165 		bool revert = false;
166 		int mainline = 0;
167 		string result;
168 	}
169 	private alias MergeCache = MergeInfo[];
170 	private MergeCache mergeCacheData;
171 	private bool haveMergeCache;
172 
173 	private @property ref MergeCache mergeCache()
174 	{
175 		if (!haveMergeCache)
176 		{
177 			if (mergeCachePath.exists)
178 				mergeCacheData = mergeCachePath.readText().jsonParse!MergeCache;
179 			haveMergeCache = true;
180 		}
181 
182 		return mergeCacheData;
183 	}
184 
185 	private void saveMergeCache()
186 	{
187 		std.file.write(mergeCachePath(), toJson(mergeCache));
188 	}
189 
190 	private @property string mergeCachePath()
191 	{
192 		needRepo();
193 		return buildPath(git.gitDir, "ae-sys-d-mergecache.json");
194 	}
195 
196 	// Merge
197 
198 	private void setupGitEnv()
199 	{
200 		string[string] mergeEnv;
201 		foreach (person; ["AUTHOR", "COMMITTER"])
202 		{
203 			mergeEnv["GIT_%s_DATE".format(person)] = "Thu, 01 Jan 1970 00:00:00 +0000";
204 			mergeEnv["GIT_%s_NAME".format(person)] = "ae.sys.d";
205 			mergeEnv["GIT_%s_EMAIL".format(person)] = "ae.sys.d\x40thecybershadow.net";
206 		}
207 		foreach (k, v; mergeEnv)
208 			environment[k] = v;
209 		// TODO: restore environment
210 	}
211 
212 	/// Returns the hash of the merge between the base and branch commits.
213 	/// Performs the merge if necessary. Caches the result.
214 	public string getMerge(string base, string branch)
215 	{
216 		return getMergeImpl(base, branch, false, 0);
217 	}
218 
219 	/// Returns the resulting hash when reverting the branch from the base commit.
220 	/// Performs the revert if necessary. Caches the result.
221 	/// mainline is the 1-based mainline index (as per `man git-revert`),
222 	/// or 0 if commit is not a merge commit.
223 	public string getRevert(string base, string branch, int mainline)
224 	{
225 		return getMergeImpl(base, branch, true, mainline);
226 	}
227 
228 	private string getMergeImpl(string base, string branch, bool revert, int mainline)
229 	{
230 		auto hit = mergeCache.find!(entry =>
231 			entry.base == base &&
232 			entry.branch == branch &&
233 			entry.revert == revert &&
234 			entry.mainline == mainline)();
235 		if (!hit.empty)
236 			return hit.front.result;
237 
238 		performMerge(base, branch, revert, mainline);
239 
240 		auto head = getHead();
241 		mergeCache ~= MergeInfo(base, branch, revert, mainline, head);
242 		saveMergeCache();
243 		return head;
244 	}
245 
246 	private static const string mergeCommitMessage = "ae.sys.d merge";
247 	private static const string revertCommitMessage = "ae.sys.d revert";
248 
249 	// Performs a merge or revert.
250 	private void performMerge(string base, string branch, bool revert, int mainline)
251 	{
252 		needHead(base);
253 		currentHead = null;
254 
255 		log("%s %s into %s.".format(revert ? "Reverting" : "Merging", branch, base));
256 
257 		scope (failure)
258 		{
259 			if (!revert)
260 			{
261 				log("Aborting merge...");
262 				git.run("merge", "--abort");
263 			}
264 			else
265 			{
266 				log("Aborting revert...");
267 				git.run("revert", "--abort");
268 			}
269 			clean = false;
270 		}
271 
272 		void doMerge()
273 		{
274 			setupGitEnv();
275 			if (!revert)
276 				git.run("merge", "--no-ff", "-m", mergeCommitMessage, branch);
277 			else
278 			{
279 				string[] args = ["revert", "--no-edit"];
280 				if (mainline)
281 					args ~= ["--mainline", text(mainline)];
282 				args ~= [branch];
283 				git.run(args);
284 			}
285 		}
286 
287 		if (git.path.baseName() == "dmd")
288 		{
289 			try
290 				doMerge();
291 			catch (Exception)
292 			{
293 				log("Merge failed. Attempting conflict resolution...");
294 				git.run("checkout", "--theirs", "test");
295 				git.run("add", "test");
296 				if (!revert)
297 					git.run("-c", "rerere.enabled=false", "commit", "-m", mergeCommitMessage);
298 				else
299 					git.run("revert", "--continue");
300 			}
301 		}
302 		else
303 			doMerge();
304 
305 		saveState();
306 		log("Merge successful.");
307 	}
308 
309 	/// Finds and returns the merge parents of the given merge commit.
310 	/// Queries the git repository if necessary. Caches the result.
311 	public MergeInfo getMergeInfo(string merge)
312 	{
313 		auto hit = mergeCache.find!(entry => entry.result == merge && !entry.revert)();
314 		if (!hit.empty)
315 			return hit.front;
316 
317 		auto parents = git.query(["log", "--pretty=%P", "-n", "1", merge]).split();
318 		enforce(parents.length > 1, "Not a merge: " ~ merge);
319 		enforce(parents.length == 2, "Too many parents: " ~ merge);
320 
321 		auto info = MergeInfo(parents[0], parents[1], false, 0, merge);
322 		mergeCache ~= info;
323 		return info;
324 	}
325 
326 	/// Follows the string of merges starting from the given
327 	/// head commit, up till the merge with the given branch.
328 	/// Then, reapplies all merges in order,
329 	/// except for that with the given branch.
330 	public string getUnMerge(string head, string branch)
331 	{
332 		// This could be optimized using an interactive rebase
333 
334 		auto info = getMergeInfo(head);
335 		if (info.branch == branch)
336 			return info.base;
337 
338 		return getMerge(getUnMerge(info.base, branch), info.branch);
339 	}
340 
341 	// Branches, forks and customization
342 
343 	/// Return SHA1 of the given remote ref.
344 	/// Fetches the remote first, unless offline mode is on.
345 	string getRemoteRef(string remote, string remoteRef, string localRef)
346 	{
347 		needRepo();
348 		if (!offline)
349 		{
350 			log("Fetching from %s (%s -> %s) ...".format(remote, remoteRef, localRef));
351 			git.run("fetch", remote, "+%s:%s".format(remoteRef, localRef));
352 		}
353 		return getRef(localRef);
354 	}
355 
356 	/// Return SHA1 of the given pull request #.
357 	/// Fetches the pull request first, unless offline mode is on.
358 	string getPull(int pull)
359 	{
360 		return getRemoteRef(
361 			"origin",
362 			"refs/pull/%d/head".format(pull),
363 			"refs/digger/pull/%d".format(pull),
364 		);
365 	}
366 
367 	/// Return SHA1 of the given GitHub fork.
368 	/// Fetches the fork first, unless offline mode is on.
369 	/// (This is a thin wrapper around getRemoteBranch.)
370 	string getFork(string user, string branch)
371 	{
372 		enforce(user  .match(re!`^\w[\w\-]*$`), "Bad remote name");
373 		enforce(branch.match(re!`^\w[\w\-\.]*$`), "Bad branch name");
374 
375 		return getRemoteRef(
376 			"https://github.com/%s/%s".format(user, name),
377 			"refs/heads/%s".format(branch),
378 			"refs/digger/fork/%s/%s".format(user, branch),
379 		);
380 	}
381 
382 	/// Find the child of a commit, and, if the commit was a merge,
383 	/// the mainline index of said commit for the child.
384 	void getChild(string branch, string commit, out string child, out int mainline)
385 	{
386 		log("Querying history for commit children...");
387 		auto history = git.getHistory();
388 
389 		bool[Hash] seen;
390 		void visit(Commit* commit)
391 		{
392 			if (commit.hash !in seen)
393 			{
394 				seen[commit.hash] = true;
395 				foreach (parent; commit.parents)
396 					visit(parent);
397 			}
398 		}
399 		auto branchHash = branch.toCommitHash();
400 		auto pBranchCommit = branchHash in history.commits;
401 		enforce(pBranchCommit, "Can't find commit in history");
402 		visit(*pBranchCommit);
403 
404 		auto commitHash = commit.toCommitHash();
405 		auto pCommit = commitHash in history.commits;
406 		enforce(pCommit, "Can't find commit in history");
407 		auto children = (*pCommit).children;
408 		enforce(children.length, "Commit has no children");
409 		children = children.filter!(child => child.hash in seen).array();
410 		enforce(children.length, "Commit has no children under specified branch");
411 		enforce(children.length == 1, "Commit has more than one child");
412 		auto childCommit = children[0];
413 		child = childCommit.hash.toString();
414 
415 		if (childCommit.parents.length == 1)
416 			mainline = 0;
417 		else
418 		{
419 			enforce(childCommit.parents.length == 2, "Can't get mainline of multiple-branch merges");
420 			if (childCommit.parents[0] is *pCommit)
421 				mainline = 2;
422 			else
423 				mainline = 1;
424 
425 			auto mergeInfo = MergeInfo(
426 				childCommit.parents[0].hash.toString(),
427 				childCommit.parents[1].hash.toString(),
428 				true, mainline, commit);
429 			if (!mergeCache.canFind(mergeInfo))
430 			{
431 				mergeCache ~= mergeInfo;
432 				saveMergeCache();
433 			}
434 		}
435 	}
436 
437 	// State saving and checking
438 
439 	struct FileState
440 	{
441 		bool isLink;
442 		ulong size;
443 		StdTime modificationTime;
444 	}
445 
446 	FileState getFileState(string file)
447 	{
448 		assert(verify);
449 		auto path = git.path.buildPath(file);
450 		auto de = DirEntry(path);
451 		return FileState(de.isSymlink, de.size, de.timeLastModified.stdTime);
452 	}
453 
454 	alias RepositoryState = FileState[string];
455 
456 	/// Return the working tree "state".
457 	/// This returns a file list, along with size and modification time.
458 	RepositoryState getState()
459 	{
460 		assert(verify);
461 		needRepo();
462 		auto files = git.query(["ls-files"]).splitLines();
463 		RepositoryState state;
464 		foreach (file; files)
465 			state[file] = getFileState(file);
466 		return state;
467 	}
468 
469 	private @property string workTreeStatePath()
470 	{
471 		assert(verify);
472 		needRepo();
473 		return buildPath(git.gitDir, "ae-sys-d-worktree.json");
474 	}
475 
476 	/// Save the state of the working tree for versioned files
477 	/// to a .json file, which can later be verified with checkState.
478 	/// This should be called after any git command which mutates the git state.
479 	void saveState()
480 	{
481 		if (!verify)
482 			return;
483 		std.file.write(workTreeStatePath, getState().toJson());
484 	}
485 
486 	/// Save the state of just one file.
487 	/// This should be called after automatic edits to repository files during a build.
488 	/// The file parameter should be relative to the directory root, and use forward slashes.
489 	void saveFileState(string file)
490 	{
491 		if (!verify)
492 			return;
493 		if (!workTreeStatePath.exists)
494 			return;
495 		auto state = workTreeStatePath.readText.jsonParse!RepositoryState();
496 		state[file] = getFileState(file);
497 		std.file.write(workTreeStatePath, state.toJson());
498 	}
499 
500 	/// Verify that the state of the working tree matches the one
501 	/// when saveState was last called. Throw an exception otherwise.
502 	/// This and clearState should be called before any git command
503 	/// which destroys working directory changes.
504 	void checkState()
505 	{
506 		if (!verify)
507 			return;
508 		if (!workTreeStatePath.exists)
509 			return;
510 		auto savedState = workTreeStatePath.readText.jsonParse!RepositoryState();
511 		auto currentState = getState();
512 		try
513 		{
514 			foreach (file, fileState; currentState)
515 			{
516 				enforce(file in savedState, "New file: " ~ file);
517 				enforce(savedState[file].isLink == fileState.isLink,
518 					"File modified: %s (is link changed, before: %s, after: %s)".format(file, savedState[file].isLink, fileState.isLink));
519 				if (fileState.isLink)
520 					continue; // Correct lstat is too hard, just skip symlinks
521 				enforce(savedState[file].size == fileState.size,
522 					"File modified: %s (size changed, before: %s, after: %s)".format(file, savedState[file].size, fileState.size));
523 				enforce(savedState[file].modificationTime == fileState.modificationTime,
524 					"File modified: %s (modification time changed, before: %s, after: %s)".format(file, SysTime(savedState[file].modificationTime), SysTime(fileState.modificationTime)));
525 				assert(savedState[file] == fileState);
526 			}
527 		}
528 		catch (Exception e)
529 			throw new Exception(e.msg ~ "\n" ~ "Save / commit your changes, then delete " ~ workTreeStatePath);
530 	}
531 
532 	/// Delete the saved working tree state, if any.
533 	void clearState()
534 	{
535 		if (!verify)
536 			return;
537 		if (workTreeStatePath.exists)
538 			workTreeStatePath.remove();
539 	}
540 
541 	// Misc
542 
543 	/// Reset internal state.
544 	protected void reset()
545 	{
546 		currentHead = null;
547 		clean = false;
548 		haveMergeCache = false;
549 		mergeCacheData = null;
550 	}
551 
552 	/// Override to add logging.
553 	protected abstract void log(string line);
554 }
555 
556 /// Used to communicate that a "reset --hard" failed.
557 /// Generally this indicates git repository corruption.
558 mixin DeclareException!q{RepositoryCleanException};