1 /**
2  * Code to manage a D component repository.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.d.repo;
15 
16 import std.algorithm;
17 import std.conv : text;
18 import std.exception;
19 import std.file;
20 import std.process : environment;
21 import std.range;
22 import std.regex;
23 import std.string;
24 import std.path;
25 
26 import ae.sys.git;
27 import ae.utils.exception;
28 import ae.utils.json;
29 import ae.utils.regex;
30 import ae.utils.time : StdTime;
31 
32 /// Base class for a managed repository.
33 class ManagedRepository
34 {
35 	/// Git repository we manage.
36 	public Repository git;
37 
38 	/// Should we fetch the latest stuff?
39 	public bool offline;
40 
41 	/// Verify working tree state to make sure we don't clobber user changes?
42 	public bool verify;
43 
44 	/// Ensure we have a repository.
45 	public void needRepo()
46 	{
47 		assert(git.path, "No repository");
48 	}
49 
50 	public @property string name() { needRepo(); return git.path.baseName; }
51 
52 	// Head
53 
54 	/// Ensure the repository's HEAD is as indicated.
55 	public void needHead(string hash)
56 	{
57 		needClean();
58 		if (getHead() == hash)
59 			return;
60 
61 		try
62 			performCheckout(hash);
63 		catch (Exception e)
64 		{
65 			log("Error checking out %s: %s".format(hash, e));
66 
67 			// Might be a GC-ed merge. Try to recreate the merge
68 			auto hit = mergeCache.find!(entry => entry.result == hash)();
69 			enforce(!hit.empty, "Unknown hash %s".format(hash));
70 			performMerge(hit.front.base, hit.front.branch, hit.front.revert, hit.front.mainline);
71 			enforce(getHead() == hash, "Unexpected merge result: expected %s, got %s".format(hash, getHead()));
72 		}
73 	}
74 
75 	private string currentHead = null;
76 
77 	/// Returns the SHA1 of the given named ref.
78 	public string getRef(string name)
79 	{
80 		return git.query("rev-parse", name);
81 	}
82 
83 	/// Return the commit the repository HEAD is pointing at.
84 	/// Cache the result.
85 	public string getHead()
86 	{
87 		if (!currentHead)
88 			currentHead = getRef("HEAD");
89 
90 		return currentHead;
91 	}
92 
93 	protected void performCheckout(string hash)
94 	{
95 		needClean();
96 
97 		log("Checking out %s commit %s...".format(name, hash));
98 
99 		if (offline)
100 			git.run("checkout", hash);
101 		else
102 		{
103 			try
104 				git.run("checkout", hash);
105 			catch (Exception e)
106 			{
107 				log("Checkout failed, updating and retrying...");
108 				update();
109 				git.run("checkout", hash);
110 			}
111 		}
112 
113 		saveState();
114 		currentHead = hash;
115 	}
116 
117 	/// Update the remote.
118 	public void update()
119 	{
120 		if (!offline)
121 		{
122 			needRepo();
123 			log("Updating " ~ name ~ "...");
124 			git.run("-c", "fetch.recurseSubmodules=false", "remote", "update", "--prune");
125 			git.run("-c", "fetch.recurseSubmodules=false", "fetch", "--tags");
126 		}
127 	}
128 
129 	// Clean
130 
131 	bool clean = false;
132 
133 	/// Ensure the repository's working copy is clean.
134 	public void needClean()
135 	{
136 		if (clean)
137 			return;
138 		performCleanup();
139 		clean = true;
140 	}
141 
142 	private void performCleanup()
143 	{
144 		checkState();
145 		clearState();
146 
147 		log("Cleaning repository %s...".format(name));
148 		needRepo();
149 		try
150 		{
151 			git.run("reset", "--hard");
152 			git.run("clean", "--force", "-x", "-d", "--quiet");
153 		}
154 		catch (Exception e)
155 			throw new RepositoryCleanException(e.msg, e);
156 		saveState();
157 	}
158 
159 	// Merge cache
160 
161 	private static struct MergeInfo
162 	{
163 		string base, branch;
164 		bool revert = false;
165 		int mainline = 0;
166 		string result;
167 	}
168 	private alias MergeCache = MergeInfo[];
169 	private MergeCache mergeCacheData;
170 	private bool haveMergeCache;
171 
172 	private @property ref MergeCache mergeCache()
173 	{
174 		if (!haveMergeCache)
175 		{
176 			if (mergeCachePath.exists)
177 				mergeCacheData = mergeCachePath.readText().jsonParse!MergeCache;
178 			haveMergeCache = true;
179 		}
180 
181 		return mergeCacheData;
182 	}
183 
184 	private void saveMergeCache()
185 	{
186 		std.file.write(mergeCachePath(), toJson(mergeCache));
187 	}
188 
189 	private @property string mergeCachePath()
190 	{
191 		needRepo();
192 		return buildPath(git.gitDir, "ae-sys-d-mergecache.json");
193 	}
194 
195 	// Merge
196 
197 	private void setupGitEnv()
198 	{
199 		string[string] mergeEnv;
200 		foreach (person; ["AUTHOR", "COMMITTER"])
201 		{
202 			mergeEnv["GIT_%s_DATE".format(person)] = "Thu, 01 Jan 1970 00:00:00 +0000";
203 			mergeEnv["GIT_%s_NAME".format(person)] = "ae.sys.d";
204 			mergeEnv["GIT_%s_EMAIL".format(person)] = "ae.sys.d\x40thecybershadow.net";
205 		}
206 		foreach (k, v; mergeEnv)
207 			environment[k] = v;
208 		// TODO: restore environment
209 	}
210 
211 	/// Returns the hash of the merge between the base and branch commits.
212 	/// Performs the merge if necessary. Caches the result.
213 	public string getMerge(string base, string branch)
214 	{
215 		return getMergeImpl(base, branch, false, 0);
216 	}
217 
218 	/// Returns the resulting hash when reverting the branch from the base commit.
219 	/// Performs the revert if necessary. Caches the result.
220 	/// mainline is the 1-based mainline index (as per `man git-revert`),
221 	/// or 0 if commit is not a merge commit.
222 	public string getRevert(string base, string branch, int mainline)
223 	{
224 		return getMergeImpl(base, branch, true, mainline);
225 	}
226 
227 	private string getMergeImpl(string base, string branch, bool revert, int mainline)
228 	{
229 		auto hit = mergeCache.find!(entry =>
230 			entry.base == base &&
231 			entry.branch == branch &&
232 			entry.revert == revert &&
233 			entry.mainline == mainline)();
234 		if (!hit.empty)
235 			return hit.front.result;
236 
237 		performMerge(base, branch, revert, mainline);
238 
239 		auto head = getHead();
240 		mergeCache ~= MergeInfo(base, branch, revert, mainline, head);
241 		saveMergeCache();
242 		return head;
243 	}
244 
245 	private static const string mergeCommitMessage = "ae.sys.d merge";
246 	private static const string revertCommitMessage = "ae.sys.d revert";
247 
248 	// Performs a merge or revert.
249 	private void performMerge(string base, string branch, bool revert, int mainline)
250 	{
251 		needHead(base);
252 		currentHead = null;
253 
254 		log("%s %s into %s.".format(revert ? "Reverting" : "Merging", branch, base));
255 
256 		scope (failure)
257 		{
258 			if (!revert)
259 			{
260 				log("Aborting merge...");
261 				git.run("merge", "--abort");
262 			}
263 			else
264 			{
265 				log("Aborting revert...");
266 				git.run("revert", "--abort");
267 			}
268 			clean = false;
269 		}
270 
271 		void doMerge()
272 		{
273 			setupGitEnv();
274 			if (!revert)
275 				git.run("merge", "--no-ff", "-m", mergeCommitMessage, branch);
276 			else
277 			{
278 				string[] args = ["revert", "--no-edit"];
279 				if (mainline)
280 					args ~= ["--mainline", text(mainline)];
281 				args ~= [branch];
282 				git.run(args);
283 			}
284 		}
285 
286 		if (git.path.baseName() == "dmd")
287 		{
288 			try
289 				doMerge();
290 			catch (Exception)
291 			{
292 				log("Merge failed. Attempting conflict resolution...");
293 				git.run("checkout", "--theirs", "test");
294 				git.run("add", "test");
295 				if (!revert)
296 					git.run("-c", "rerere.enabled=false", "commit", "-m", mergeCommitMessage);
297 				else
298 					git.run("revert", "--continue");
299 			}
300 		}
301 		else
302 			doMerge();
303 
304 		saveState();
305 		log("Merge successful.");
306 	}
307 
308 	/// Finds and returns the merge parents of the given merge commit.
309 	/// Queries the git repository if necessary. Caches the result.
310 	public MergeInfo getMergeInfo(string merge)
311 	{
312 		auto hit = mergeCache.find!(entry => entry.result == merge && !entry.revert)();
313 		if (!hit.empty)
314 			return hit.front;
315 
316 		auto parents = git.query(["log", "--pretty=%P", "-n", "1", merge]).split();
317 		enforce(parents.length > 1, "Not a merge: " ~ merge);
318 		enforce(parents.length == 2, "Too many parents: " ~ merge);
319 
320 		auto info = MergeInfo(parents[0], parents[1], false, 0, merge);
321 		mergeCache ~= info;
322 		return info;
323 	}
324 
325 	/// Follows the string of merges starting from the given
326 	/// head commit, up till the merge with the given branch.
327 	/// Then, reapplies all merges in order,
328 	/// except for that with the given branch.
329 	public string getUnMerge(string head, string branch)
330 	{
331 		// This could be optimized using an interactive rebase
332 
333 		auto info = getMergeInfo(head);
334 		if (info.branch == branch)
335 			return info.base;
336 
337 		return getMerge(getUnMerge(info.base, branch), info.branch);
338 	}
339 
340 	// Branches, forks and customization
341 
342 	/// Return SHA1 of the given remote ref.
343 	/// Fetches the remote first, unless offline mode is on.
344 	string getRemoteRef(string remote, string remoteRef, string localRef)
345 	{
346 		needRepo();
347 		if (!offline)
348 		{
349 			log("Fetching from %s (%s -> %s) ...".format(remote, remoteRef, localRef));
350 			git.run("fetch", remote, "+%s:%s".format(remoteRef, localRef));
351 		}
352 		return getRef(localRef);
353 	}
354 
355 	/// Return SHA1 of the given pull request #.
356 	/// Fetches the pull request first, unless offline mode is on.
357 	string getPull(int pull)
358 	{
359 		return getRemoteRef(
360 			"origin",
361 			"refs/pull/%d/head".format(pull),
362 			"refs/digger/pull/%d".format(pull),
363 		);
364 	}
365 
366 	/// Return SHA1 of the given GitHub fork.
367 	/// Fetches the fork first, unless offline mode is on.
368 	/// (This is a thin wrapper around getRemoteBranch.)
369 	string getFork(string user, string branch)
370 	{
371 		enforce(user  .match(re!`^\w[\w\-]*$`), "Bad remote name");
372 		enforce(branch.match(re!`^\w[\w\-\.]*$`), "Bad branch name");
373 
374 		return getRemoteRef(
375 			"https://github.com/%s/%s".format(user, name),
376 			"refs/heads/%s".format(branch),
377 			"refs/digger/fork/%s/%s".format(user, branch),
378 		);
379 	}
380 
381 	/// Find the child of a commit, and, if the commit was a merge,
382 	/// the mainline index of said commit for the child.
383 	void getChild(string branch, string commit, out string child, out int mainline)
384 	{
385 		log("Querying history for commit children...");
386 		auto history = git.getHistory();
387 
388 		bool[Hash] seen;
389 		void visit(Commit* commit)
390 		{
391 			if (commit.hash !in seen)
392 			{
393 				seen[commit.hash] = true;
394 				foreach (parent; commit.parents)
395 					visit(parent);
396 			}
397 		}
398 		auto branchHash = branch.toCommitHash();
399 		auto pBranchCommit = branchHash in history.commits;
400 		enforce(pBranchCommit, "Can't find commit in history");
401 		visit(*pBranchCommit);
402 
403 		auto commitHash = commit.toCommitHash();
404 		auto pCommit = commitHash in history.commits;
405 		enforce(pCommit, "Can't find commit in history");
406 		auto children = (*pCommit).children;
407 		enforce(children.length, "Commit has no children");
408 		children = children.filter!(child => child.hash in seen).array();
409 		enforce(children.length, "Commit has no children under specified branch");
410 		enforce(children.length == 1, "Commit has more than one child");
411 		auto childCommit = children[0];
412 		child = childCommit.hash.toString();
413 
414 		if (childCommit.parents.length == 1)
415 			mainline = 0;
416 		else
417 		{
418 			enforce(childCommit.parents.length == 2, "Can't get mainline of multiple-branch merges");
419 			if (childCommit.parents[0] is *pCommit)
420 				mainline = 2;
421 			else
422 				mainline = 1;
423 
424 			auto mergeInfo = MergeInfo(
425 				childCommit.parents[0].hash.toString(),
426 				childCommit.parents[1].hash.toString(),
427 				true, mainline, commit);
428 			if (!mergeCache.canFind(mergeInfo))
429 			{
430 				mergeCache ~= mergeInfo;
431 				saveMergeCache();
432 			}
433 		}
434 	}
435 
436 	// State saving and checking
437 
438 	struct FileState
439 	{
440 		ulong size;
441 		StdTime modificationTime;
442 	}
443 
444 	FileState getFileState(string file)
445 	{
446 		assert(verify);
447 		auto path = git.path.buildPath(file);
448 		auto de = DirEntry(path);
449 		return FileState(de.size, de.timeLastModified.stdTime);
450 	}
451 
452 	alias RepositoryState = FileState[string];
453 
454 	/// Return the working tree "state".
455 	/// This returns a file list, along with size and modification time.
456 	RepositoryState getState()
457 	{
458 		assert(verify);
459 		needRepo();
460 		auto files = git.query(["ls-files"]).splitLines();
461 		RepositoryState state;
462 		foreach (file; files)
463 			state[file] = getFileState(file);
464 		return state;
465 	}
466 
467 	private @property string workTreeStatePath()
468 	{
469 		assert(verify);
470 		needRepo();
471 		return buildPath(git.gitDir, "ae-sys-d-worktree.json");
472 	}
473 
474 	/// Save the state of the working tree for versioned files
475 	/// to a .json file, which can later be verified with checkState.
476 	/// This should be called after any git command which mutates the git state.
477 	void saveState()
478 	{
479 		if (!verify)
480 			return;
481 		std.file.write(workTreeStatePath, getState().toJson());
482 	}
483 
484 	/// Save the state of just one file.
485 	/// This should be called after automatic edits to repository files during a build.
486 	/// The file parameter should be relative to the directory root, and use forward slashes.
487 	void saveFileState(string file)
488 	{
489 		if (!verify)
490 			return;
491 		if (!workTreeStatePath.exists)
492 			return;
493 		auto state = workTreeStatePath.readText.jsonParse!RepositoryState();
494 		state[file] = getFileState(file);
495 		std.file.write(workTreeStatePath, state.toJson());
496 	}
497 
498 	/// Verify that the state of the working tree matches the one
499 	/// when saveState was last called. Throw an exception otherwise.
500 	/// This and clearState should be called before any git command
501 	/// which destroys working directory changes.
502 	void checkState()
503 	{
504 		if (!verify)
505 			return;
506 		if (!workTreeStatePath.exists)
507 			return;
508 		auto savedState = workTreeStatePath.readText.jsonParse!RepositoryState();
509 		auto currentState = getState();
510 		try
511 		{
512 			foreach (file, fileState; currentState)
513 			{
514 				enforce(file in savedState, "New file: " ~ file);
515 				enforce(savedState[file] == fileState, "File modified: " ~ file);
516 			}
517 		}
518 		catch (Exception e)
519 			throw new Exception(e.msg ~ "\n" ~ "Save / commit your changes, then delete " ~ workTreeStatePath);
520 	}
521 
522 	/// Delete the saved working tree state, if any.
523 	void clearState()
524 	{
525 		if (!verify)
526 			return;
527 		if (workTreeStatePath.exists)
528 			workTreeStatePath.remove();
529 	}
530 
531 	// Misc
532 
533 	/// Reset internal state.
534 	protected void reset()
535 	{
536 		currentHead = null;
537 		clean = false;
538 		haveMergeCache = false;
539 		mergeCacheData = null;
540 	}
541 
542 	/// Override to add logging.
543 	protected abstract void log(string line);
544 }
545 
546 /// Used to communicate that a "reset --hard" failed.
547 /// Generally this indicates git repository corruption.
548 mixin DeclareException!q{RepositoryCleanException};