1 /**
2  * Wrappers for the git command-line tools.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.git;
15 
16 import std.algorithm;
17 import std.array;
18 import std.conv;
19 import std.exception;
20 import std.file;
21 import std.format;
22 import std.path;
23 import std.process;
24 import std.string;
25 import std.typecons : RefCounted;
26 import std.utf;
27 
28 import ae.sys.cmd;
29 import ae.sys.file;
30 import ae.utils.aa;
31 import ae.utils.array;
32 import ae.utils.meta;
33 import ae.utils.text;
34 
35 struct Repository
36 {
37 	string path;
38 	string gitDir;
39 
40 	string[string] environment;
41 	string[] commandPrefix;
42 
43 	static string[] globalOptions; // per-thread
44 
45 	this(string path)
46 	{
47 		path = path.absolutePath();
48 		enforce(path.exists, "Repository path does not exist: " ~ path);
49 		gitDir = path.buildPath(".git");
50 		if (gitDir.exists && gitDir.isFile)
51 			gitDir = path.buildNormalizedPath(gitDir.readText().strip()[8..$]);
52 		//path = path.replace(`\`, `/`);
53 		this.path = path;
54 		this.commandPrefix = ["git",
55 			"-c", "core.autocrlf=false",
56 			"-c", "gc.autoDetach=false",
57 			"-C", path
58 		] ~ globalOptions;
59 		version (Windows) {} else
60 			this.environment["GIT_CONFIG_NOSYSTEM"] = "1";
61 		this.environment["HOME"] = gitDir;
62 		this.environment["XDG_CONFIG_HOME"] = gitDir;
63 	}
64 
65 	invariant()
66 	{
67 		assert(environment !is null, "Not initialized");
68 	}
69 
70 	// Have just some primitives here.
71 	// Higher-level functionality can be added using UFCS.
72 	void   run  (string[] args...) const { return .run  (commandPrefix ~ args, environment, path); }
73 	string query(string[] args...) const { return .query(commandPrefix ~ args, environment, path).chomp(); }
74 	bool   check(string[] args...) const { return spawnProcess(commandPrefix ~ args, environment, Config.none, path).wait() == 0; }
75 	auto   pipe (string[] args, Redirect redirect)
76 	                               const { return pipeProcess(commandPrefix ~ args, redirect, environment, Config.none, path); }
77 	auto   pipe (string[] args...) const { return pipe(args, Redirect.stdin | Redirect.stdout); }
78 
79 	/// Certain git commands (notably, bisect) must
80 	/// be run in the repository's root directory.
81 	private string workPath(string cmd) const
82 	{
83 		switch (cmd)
84 		{
85 			case "bisect":
86 			case "submodule":
87 				return path;
88 			default:
89 				return null;
90 		}
91 	}
92 
93 	History getHistory(string[] extraRefs = null) const
94 	{
95 		History history;
96 
97 		Commit* getCommit(Hash hash)
98 		{
99 			auto pcommit = hash in history.commits;
100 			return pcommit ? *pcommit : (history.commits[hash] = new Commit(history.numCommits++, hash));
101 		}
102 
103 		Commit* commit;
104 		string currentBlock;
105 
106 		foreach (line; query([`log`, `--all`, `--pretty=raw`] ~ extraRefs).split('\n'))
107 		{
108 			if (!line.length)
109 			{
110 				if (currentBlock)
111 					currentBlock = null;
112 				continue;
113 			}
114 
115 			if (currentBlock)
116 			{
117 				enforce(line.startsWith(" "), "Expected " ~ currentBlock ~ " line in git log");
118 				continue;
119 			}
120 
121 			if (line.startsWith("commit "))
122 			{
123 				auto hash = line[7..$].toCommitHash();
124 				commit = getCommit(hash);
125 			}
126 			else
127 			if (line.startsWith("tree "))
128 				continue;
129 			else
130 			if (line.startsWith("parent "))
131 			{
132 				auto hash = line[7..$].toCommitHash();
133 				auto parent = getCommit(hash);
134 				commit.parents ~= parent;
135 				parent.children ~= commit;
136 			}
137 			else
138 			if (line.startsWith("author "))
139 				commit.author = line[7..$];
140 			else
141 			if (line.startsWith("committer "))
142 			{
143 				commit.committer = line[10..$];
144 				commit.time = line.split(" ")[$-2].to!int();
145 			}
146 			else
147 			if (line.startsWith("    "))
148 				commit.message ~= line[4..$];
149 			else
150 			if (line.startsWith("gpgsig "))
151 				currentBlock = "GPG signature";
152 			else
153 			if (line.startsWith("mergetag "))
154 				currentBlock = "Tag merge";
155 			else
156 				enforce(false, "Unknown line in git log: " ~ line);
157 		}
158 
159 		foreach (line; query([`show-ref`, `--dereference`]).splitLines())
160 		{
161 			auto h = line[0..40].toCommitHash();
162 			if (h in history.commits)
163 				history.refs[line[41..$]] = h;
164 		}
165 
166 		return history;
167 	}
168 
169 	struct ObjectReaderImpl
170 	{
171 		ProcessPipes pipes;
172 
173 		GitObject read(string name)
174 		{
175 			pipes.stdin.writeln(name);
176 			pipes.stdin.flush();
177 
178 			auto headerLine = pipes.stdout.safeReadln().strip();
179 			auto header = headerLine.split(" ");
180 			enforce(header.length == 3, "Malformed header during cat-file: " ~ headerLine);
181 			auto hash = header[0].toCommitHash();
182 
183 			GitObject obj;
184 			obj.hash = hash;
185 			obj.type = header[1];
186 			auto size = to!size_t(header[2]);
187 			if (size)
188 			{
189 				auto data = new ubyte[size];
190 				auto read = pipes.stdout.rawRead(data);
191 				enforce(read.length == size, "Unexpected EOF during cat-file");
192 				obj.data = data.assumeUnique();
193 			}
194 
195 			char[1] lf;
196 			pipes.stdout.rawRead(lf[]);
197 			enforce(lf[0] == '\n', "Terminating newline expected");
198 
199 			return obj;
200 		}
201 
202 		GitObject read(Hash hash)
203 		{
204 			auto obj = read(hash.toString());
205 			enforce(obj.hash == hash, "Unexpected object during cat-file");
206 			return obj;
207 		}
208 
209 		~this()
210 		{
211 			pipes.stdin.close();
212 			enforce(pipes.pid.wait() == 0, "git cat-file exited with failure");
213 		}
214 	}
215 	alias ObjectReader = RefCounted!ObjectReaderImpl;
216 
217 	/// Spawn a cat-file process which can read git objects by demand.
218 	ObjectReader createObjectReader()
219 	{
220 		auto pipes = this.pipe(`cat-file`, `--batch`);
221 		return ObjectReader(pipes);
222 	}
223 
224 	/// Run a batch cat-file query.
225 	GitObject[] getObjects(Hash[] hashes)
226 	{
227 		GitObject[] result;
228 		result.reserve(hashes.length);
229 		auto reader = createObjectReader();
230 
231 		foreach (hash; hashes)
232 			result ~= reader.read(hash);
233 
234 		return result;
235 	}
236 
237 	struct ObjectWriterImpl
238 	{
239 		bool initialized;
240 		ProcessPipes pipes;
241 
242 		this(ProcessPipes pipes)
243 		{
244 			this.pipes = pipes;
245 			initialized = true;
246 		}
247 
248 		Hash write(in void[] data)
249 		{
250 			import std.random;
251 			auto p = NamedPipe("ae-sys-git-writeObjects-%d".format(uniform!ulong));
252 			pipes.stdin.writeln(p.fileName);
253 			pipes.stdin.flush();
254 
255 			auto f = p.connect();
256 			f.rawWrite(data);
257 			f.flush();
258 			f.close();
259 
260 			return pipes.stdout.safeReadln().strip().toCommitHash();
261 		}
262 
263 		~this()
264 		{
265 			if (initialized)
266 			{
267 				pipes.stdin.close();
268 				enforce(pipes.pid.wait() == 0, "git hash-object exited with failure");
269 				initialized = false;
270 			}
271 		}
272 	}
273 	alias ObjectWriter = RefCounted!ObjectWriterImpl;
274 
275 	struct ObjectMultiWriterImpl
276 	{
277 		Repository* repo;
278 		ObjectWriter treeWriter, blobWriter, commitWriter;
279 
280 		Hash write(in GitObject obj)
281 		{
282 			ObjectWriter* pwriter;
283 			switch (obj.type) // https://issues.dlang.org/show_bug.cgi?id=14595
284 			{
285 				case "tree"  : pwriter = &treeWriter  ; break;
286 				case "blob"  : pwriter = &blobWriter  ; break;
287 				case "commit": pwriter = &commitWriter; break;
288 				default: throw new Exception("Unknown object type: " ~ obj.type);
289 			}
290 			if (!pwriter.initialized)
291 				*pwriter = ObjectWriter(repo.pipe(`hash-object`, `-t`, obj.type, `-w`, `--stdin-paths`));
292 			return pwriter.write(obj.data);
293 		}
294 	}
295 	alias ObjectMultiWriter = RefCounted!ObjectMultiWriterImpl;
296 
297 	/// Spawn a hash-object process which can hash and write git objects on the fly.
298 	ObjectWriter createObjectWriter(string type)
299 	{
300 		auto pipes = this.pipe(`hash-object`, `-t`, type, `-w`, `--stdin-paths`);
301 		return ObjectWriter(pipes);
302 	}
303 
304 	/// ditto
305 	ObjectMultiWriter createObjectWriter()
306 	{
307 		return ObjectMultiWriter(&this);
308 	}
309 
310 	/// Batch-write the given objects to the database.
311 	/// The hashes are saved to the "hash" fields of the passed objects.
312 	void writeObjects(GitObject[] objects)
313 	{
314 		string[] allTypes = objects.map!(obj => obj.type).toSet().keys;
315 		foreach (type; allTypes)
316 		{
317 			auto writer = createObjectWriter(type);
318 			foreach (ref obj; objects)
319 				if (obj.type == type)
320 					obj.hash = writer.write(obj.data);
321 		}
322 	}
323 
324 	/// Extract a commit's tree to a given directory
325 	void exportCommit(string commit, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
326 	{
327 		exportTree(reader.read(commit).parseCommit().tree, path, reader, pathFilter);
328 	}
329 
330 	/// Extract a tree to a given directory
331 	void exportTree(Hash treeHash, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
332 	{
333 		void exportSubTree(Hash treeHash, string[] subPath)
334 		{
335 			auto tree = reader.read(treeHash).parseTree();
336 			foreach (entry; tree)
337 			{
338 				auto entrySubPath = subPath ~ entry.name;
339 				if (pathFilter && !pathFilter(entrySubPath.join("/")))
340 					continue;
341 				auto entryPath = buildPath([path] ~ entrySubPath);
342 				switch (entry.mode)
343 				{
344 					case octal!100644: // file
345 					case octal!100755: // executable file
346 						std.file.write(entryPath, reader.read(entry.hash).data);
347 						version (Posix)
348 						{
349 							// Make executable
350 							if (entry.mode == octal!100755)
351 								entryPath.setAttributes(entryPath.getAttributes | ((entryPath.getAttributes & octal!444) >> 2));
352 						}
353 						break;
354 					case octal! 40000: // tree
355 						mkdirRecurse(entryPath);
356 						exportSubTree(entry.hash, entrySubPath);
357 						break;
358 					case octal!160000: // submodule
359 						mkdirRecurse(entryPath);
360 						break;
361 					default:
362 						throw new Exception("Unknown git file mode: %o".format(entry.mode));
363 				}
364 			}
365 		}
366 		exportSubTree(treeHash, null);
367 	}
368 
369 	/// Import a directory tree into the object store, and return the new tree object's hash.
370 	Hash importTree(string path, ObjectMultiWriter writer, bool delegate(string) pathFilter = null)
371 	{
372 		static // Error: variable ae.sys.git.Repository.importTree.writer has scoped destruction, cannot build closure
373 		Hash importSubTree(string path, string subPath, ref ObjectMultiWriter writer, bool delegate(string) pathFilter)
374 		{
375 			auto entries = subPath
376 				.dirEntries(SpanMode.shallow)
377 				.filter!(de => !pathFilter || pathFilter(de.relativePath(path)))
378 				.map!(de =>
379 					de.isDir
380 					? GitObject.TreeEntry(
381 						octal!40000,
382 						de.baseName,
383 						importSubTree(path, buildPath(subPath, de.baseName), writer, pathFilter)
384 					)
385 					: GitObject.TreeEntry(
386 						isVersion!`Posix` && (de.attributes & octal!111) ? octal!100755 : octal!100644,
387 						de.baseName,
388 						writer.write(GitObject(Hash.init, "blob", cast(immutable(ubyte)[])read(de.name)))
389 					)
390 				)
391 				.array
392 				.sort!((a, b) => a.sortName < b.sortName).release
393 			;
394 			return writer.write(GitObject.createTree(entries));
395 		}
396 		return importSubTree(path, path, writer, pathFilter);
397 	}
398 
399 	struct RefWriterImpl
400 	{
401 		bool initialized;
402 		ProcessPipes pipes;
403 
404 		this(ProcessPipes pipes)
405 		{
406 			this.pipes = pipes;
407 			initialized = true;
408 		}
409 
410 		private void op(string op, bool noDeref, string refName, Hash*[] hashes...)
411 		{
412 			if (noDeref)
413 				pipes.stdin.write("option no-deref\0");
414 			pipes.stdin.write(op, " ", refName, '\0');
415 			foreach (hash; hashes)
416 			{
417 				if (hash)
418 					pipes.stdin.write((*hash).toString());
419 				pipes.stdin.write('\0');
420 			}
421 			pipes.stdin.flush();
422 		}
423 
424 		void update   (string refName, Hash newValue               , bool noDeref = false) { op("update", noDeref, refName, &newValue, null     ); }
425 		void update   (string refName, Hash newValue, Hash oldValue, bool noDeref = false) { op("update", noDeref, refName, &newValue, &oldValue); }
426 		void create   (string refName, Hash newValue               , bool noDeref = false) { op("create", noDeref, refName, &newValue           ); }
427 		void deleteRef(string refName                              , bool noDeref = false) { op("delete", noDeref, refName,            null     ); }
428 		void deleteRef(string refName,                Hash oldValue, bool noDeref = false) { op("delete", noDeref, refName,            &oldValue); }
429 		void verify   (string refName                              , bool noDeref = false) { op("verify", noDeref, refName,            null     ); }
430 		void verify   (string refName,                Hash oldValue, bool noDeref = false) { op("verify", noDeref, refName,            &oldValue); }
431 
432 		~this()
433 		{
434 			if (initialized)
435 			{
436 				pipes.stdin.close();
437 				enforce(pipes.pid.wait() == 0, "git update-ref exited with failure");
438 				initialized = false;
439 			}
440 		}
441 	}
442 	alias RefWriter = RefCounted!RefWriterImpl;
443 
444 	/// Spawn a update-ref process which can update git refs on the fly.
445 	RefWriter createRefWriter()
446 	{
447 		auto pipes = this.pipe(`update-ref`, `-z`, `--stdin`);
448 		return RefWriter(pipes);
449 	}
450 }
451 
452 struct GitObject
453 {
454 	Hash hash;
455 	string type;
456 	immutable(ubyte)[] data;
457 
458 	struct ParsedCommit
459 	{
460 		Hash tree;
461 		Hash[] parents;
462 		string author, committer; /// entire lines - name, email and date
463 		string[] message, gpgsig;
464 
465 		@property Authorship parsedAuthor() { return parseAuthorship(author); }
466 		@property Authorship parsedCommitter() { return parseAuthorship(committer); }
467 		@property void parsedAuthor(Authorship authorship) { author = authorship.toString(); }
468 		@property void parsedCommitter(Authorship authorship) { committer = authorship.toString(); }
469 	}
470 
471 	ParsedCommit parseCommit()
472 	{
473 		enforce(type == "commit", "Wrong object type");
474 		ParsedCommit result;
475 		auto lines = (cast(string)data).split('\n');
476 		while (lines.length)
477 		{
478 			auto line = lines.shift();
479 			if (line == "")
480 			{
481 				result.message = lines;
482 				break; // commit message begins
483 			}
484 			auto parts = line.findSplit(" ");
485 			auto field = parts[0];
486 			line = parts[2];
487 			switch (field)
488 			{
489 				case "tree":
490 					result.tree = line.toCommitHash();
491 					break;
492 				case "parent":
493 					result.parents ~= line.toCommitHash();
494 					break;
495 				case "author":
496 					result.author = line;
497 					break;
498 				case "committer":
499 					result.committer = line;
500 					break;
501 				case "gpgsig":
502 				{
503 					auto p = lines.countUntil!(line => !line.startsWith(" "));
504 					if (p < 0)
505 						p = lines.length;
506 					result.gpgsig = [line] ~ lines[0 .. p].apply!(each!((ref line) => line.skipOver(" ").enforce("gpgsig line without leading space")));
507 					lines = lines[p .. $];
508 					break;
509 				}
510 				default:
511 					throw new Exception("Unknown commit field: " ~ field ~ "\n" ~ cast(string)data);
512 			}
513 		}
514 		return result;
515 	}
516 
517 	static GitObject createCommit(ParsedCommit commit)
518 	{
519 		auto s = "tree %s\n%-(parent %s\n%|%)author %s\ncommitter %s\n\n%-(%s\n%)".format(
520 				commit.tree.toString(),
521 				commit.parents.map!(ae.sys.git.toString),
522 				commit.author,
523 				commit.committer,
524 				commit.message,
525 			);
526 		return GitObject(Hash.init, "commit", cast(immutable(ubyte)[])s);
527 	}
528 
529 	struct TreeEntry
530 	{
531 		uint mode;
532 		string name;
533 		Hash hash;
534 
535 		@property string sortName() { return (mode & octal!40000) ? name ~ "/" : name; }
536 	}
537 
538 	TreeEntry[] parseTree()
539 	{
540 		enforce(type == "tree", "Wrong object type");
541 		TreeEntry[] result;
542 		auto rem = data;
543 		while (rem.length)
544 		{
545 			auto si = rem.countUntil(' ');
546 			auto zi = rem.countUntil(0);
547 			auto ei = zi + 1 + Hash.sizeof;
548 			auto str = cast(string)rem[0..zi];
549 			enforce(0 < si && si < zi && ei <= rem.length, "Malformed tree entry:\n" ~ hexDump(rem));
550 			result ~= TreeEntry(str[0..si].to!uint(8), str[si+1..zi], cast(Hash)rem[zi+1..ei][0..20]); // https://issues.dlang.org/show_bug.cgi?id=13112
551 			rem = rem[ei..$];
552 		}
553 		return result;
554 	}
555 
556 	static GitObject createTree(TreeEntry[] entries)
557 	{
558 		auto buf = appender!(immutable(ubyte)[]);
559 		foreach (entry; entries)
560 		{
561 			buf.formattedWrite("%o %s\0", entry.mode, entry.name);
562 			buf.put(entry.hash[]);
563 		}
564 		return GitObject(Hash.init, "tree", buf.data);
565 	}
566 }
567 
568 struct History
569 {
570 	Commit*[Hash] commits;
571 	uint numCommits = 0;
572 	Hash[string] refs;
573 }
574 
575 alias ubyte[20] Hash;
576 
577 struct Commit
578 {
579 	uint id;
580 	Hash hash;
581 	uint time;
582 	string author, committer;
583 	string[] message;
584 	Commit*[] parents, children;
585 
586 	@property Authorship parsedAuthor() { return parseAuthorship(author); }
587 	@property Authorship parsedCommitter() { return parseAuthorship(committer); }
588 	@property void parsedAuthor(Authorship authorship) { author = authorship.toString(); }
589 	@property void parsedCommitter(Authorship authorship) { committer = authorship.toString(); }
590 }
591 
592 struct Authorship
593 {
594 	string name;
595 	string email;
596 	string date; // use `"U O"` ae.utils.time format to parse/format
597 	string toString() const { return name ~ " <" ~ email ~ "> " ~ date; }
598 }
599 static Authorship parseAuthorship(string authorship)
600 {
601 	Authorship result;
602 	auto parts1 = authorship.findSplit(" <");
603 	auto parts2 = parts1[2].findSplit("> ");
604 	result.name = parts1[0];
605 	result.email = parts2[0];
606 	result.date = parts2[2];
607 	return result;
608 }
609 
610 Hash toCommitHash(in char[] hash)
611 {
612 	enforce(hash.length == 40, "Bad hash length: " ~ hash);
613 	ubyte[20] result;
614 	foreach (i, ref b; result)
615 		b = to!ubyte(hash[i*2..i*2+2], 16);
616 	return result;
617 }
618 
619 unittest
620 {
621 	assert(toCommitHash("0123456789abcdef0123456789ABCDEF01234567") == [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67]);
622 }
623 
624 string toString(ref Hash hash) pure
625 {
626 	char[40] buf = hash.toLowerHex();
627 	return buf[].idup;
628 }
629 
630 unittest
631 {
632 	Hash hash = [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67];
633 	assert(hash.toString() == "0123456789abcdef0123456789abcdef01234567");
634 }
635 
636 /// Tries to match the default destination of `git clone`.
637 string repositoryNameFromURL(string url)
638 {
639 	return url
640 		.split(":")[$-1]
641 		.split("/")[$-1]
642 		.chomp(".git");
643 }
644 
645 unittest
646 {
647 	assert(repositoryNameFromURL("https://github.com/CyberShadow/ae.git") == "ae");
648 	assert(repositoryNameFromURL("git@example.com:ae.git") == "ae");
649 }