1 /**
2  * Wrappers for the git command-line tools.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.git;
15 
16 import std.algorithm;
17 import std.array;
18 import std.conv;
19 import std.exception;
20 import std.file;
21 import std.format;
22 import std.path;
23 import std.process;
24 import std.string;
25 import std.typecons;
26 import std.utf;
27 
28 import ae.sys.cmd;
29 import ae.sys.file;
30 import ae.utils.aa;
31 import ae.utils.meta;
32 import ae.utils.text;
33 
34 struct Repository
35 {
36 	string path;
37 	string gitDir;
38 
39 	string[string] environment;
40 	static immutable string[] commandPrefix = ["git", "-c", "core.autocrlf=false"];
41 
42 	this(string path)
43 	{
44 		path = path.absolutePath();
45 		enforce(path.exists, "Repository path does not exist: " ~ path);
46 		gitDir = path.buildPath(".git");
47 		if (gitDir.exists && gitDir.isFile)
48 			gitDir = path.buildNormalizedPath(gitDir.readText().strip()[8..$]);
49 		//path = path.replace(`\`, `/`);
50 		this.path = path;
51 		version (Windows) {} else
52 			this.environment["GIT_CONFIG_NOSYSTEM"] = "1";
53 		this.environment["HOME"] = gitDir;
54 		this.environment["XDG_CONFIG_HOME"] = gitDir;
55 	}
56 
57 	invariant()
58 	{
59 		assert(environment !is null, "Not initialized");
60 	}
61 
62 	// Have just some primitives here.
63 	// Higher-level functionality can be added using UFCS.
64 	void   run  (string[] args...) const { auto owd = pushd(workPath(args[0])); return .run  (commandPrefix ~ args, environment, path); }
65 	string query(string[] args...) const { auto owd = pushd(workPath(args[0])); return .query(commandPrefix ~ args, environment, path).strip(); }
66 	bool   check(string[] args...) const { auto owd = pushd(workPath(args[0])); return spawnProcess(commandPrefix ~ args, environment, Config.none, path).wait() == 0; }
67 	auto   pipe (string[] args, Redirect redirect)
68 	                               const { auto owd = pushd(workPath(args[0])); return pipeProcess(commandPrefix ~ args, redirect, environment, Config.none, path); }
69 	auto   pipe (string[] args...) const { return pipe(args, Redirect.stdin | Redirect.stdout); }
70 
71 	/// Certain git commands (notably, bisect) must
72 	/// be run in the repository's root directory.
73 	private string workPath(string cmd) const
74 	{
75 		switch (cmd)
76 		{
77 			case "bisect":
78 			case "submodule":
79 				return path;
80 			default:
81 				return null;
82 		}
83 	}
84 
85 	History getHistory(string[] extraRefs = null)
86 	{
87 		History history;
88 
89 		Commit* getCommit(Hash hash)
90 		{
91 			auto pcommit = hash in history.commits;
92 			return pcommit ? *pcommit : (history.commits[hash] = new Commit(history.numCommits++, hash));
93 		}
94 
95 		Commit* commit;
96 		bool inSig; // PGP signature
97 
98 		foreach (line; query([`log`, `--all`, `--pretty=raw`] ~ extraRefs).split('\n'))
99 		{
100 			if (!line.length)
101 			{
102 				inSig = false;
103 				continue;
104 			}
105 
106 			if (inSig)
107 			{
108 				enforce(line.startsWith(" "), "Expected GPG signature line in git log");
109 				continue;
110 			}
111 
112 			if (line.startsWith("commit "))
113 			{
114 				auto hash = line[7..$].toCommitHash();
115 				commit = getCommit(hash);
116 			}
117 			else
118 			if (line.startsWith("tree "))
119 				continue;
120 			else
121 			if (line.startsWith("parent "))
122 			{
123 				auto hash = line[7..$].toCommitHash();
124 				auto parent = getCommit(hash);
125 				commit.parents ~= parent;
126 				parent.children ~= commit;
127 			}
128 			else
129 			if (line.startsWith("author "))
130 				commit.author = line[7..$];
131 			else
132 			if (line.startsWith("committer "))
133 			{
134 				commit.committer = line[10..$];
135 				commit.time = line.split(" ")[$-2].to!int();
136 			}
137 			else
138 			if (line.startsWith("    "))
139 				commit.message ~= line[4..$];
140 			else
141 			if (line.startsWith("gpgsig "))
142 				inSig = true;
143 			else
144 				enforce(false, "Unknown line in git log: " ~ line);
145 		}
146 
147 		foreach (line; query([`show-ref`, `--dereference`]).splitLines())
148 		{
149 			auto h = line[0..40].toCommitHash();
150 			if (h in history.commits)
151 				history.refs[line[41..$]] = h;
152 		}
153 
154 		return history;
155 	}
156 
157 	struct ObjectReaderImpl
158 	{
159 		ProcessPipes pipes;
160 
161 		GitObject read(string name)
162 		{
163 			pipes.stdin.writeln(name);
164 			pipes.stdin.flush();
165 
166 			auto headerLine = pipes.stdout.safeReadln().strip();
167 			auto header = headerLine.split(" ");
168 			enforce(header.length == 3, "Malformed header during cat-file: " ~ headerLine);
169 			auto hash = header[0].toCommitHash();
170 
171 			GitObject obj;
172 			obj.hash = hash;
173 			obj.type = header[1];
174 			auto size = to!size_t(header[2]);
175 			if (size)
176 			{
177 				auto data = new ubyte[size];
178 				auto read = pipes.stdout.rawRead(data);
179 				enforce(read.length == size, "Unexpected EOF during cat-file");
180 				obj.data = data.assumeUnique();
181 			}
182 
183 			char[1] lf;
184 			pipes.stdout.rawRead(lf[]);
185 			enforce(lf[0] == '\n', "Terminating newline expected");
186 
187 			return obj;
188 		}
189 
190 		GitObject read(Hash hash)
191 		{
192 			auto obj = read(hash.toString());
193 			enforce(obj.hash == hash, "Unexpected object during cat-file");
194 			return obj;
195 		}
196 
197 		~this()
198 		{
199 			pipes.stdin.close();
200 			enforce(pipes.pid.wait() == 0, "git cat-file exited with failure");
201 		}
202 	}
203 	alias ObjectReader = RefCounted!ObjectReaderImpl;
204 
205 	/// Spawn a cat-file process which can read git objects by demand.
206 	ObjectReader createObjectReader()
207 	{
208 		auto pipes = this.pipe(`cat-file`, `--batch`);
209 		return ObjectReader(pipes);
210 	}
211 
212 	/// Run a batch cat-file query.
213 	GitObject[] getObjects(Hash[] hashes)
214 	{
215 		GitObject[] result;
216 		result.reserve(hashes.length);
217 		auto reader = createObjectReader();
218 
219 		foreach (hash; hashes)
220 			result ~= reader.read(hash);
221 
222 		return result;
223 	}
224 
225 	struct ObjectWriterImpl
226 	{
227 		bool initialized;
228 		ProcessPipes pipes;
229 
230 		this(ProcessPipes pipes)
231 		{
232 			this.pipes = pipes;
233 			initialized = true;
234 		}
235 
236 		Hash write(in void[] data)
237 		{
238 			import std.random;
239 			auto p = NamedPipe("ae-sys-git-writeObjects-%d".format(uniform!ulong));
240 			pipes.stdin.writeln(p.fileName);
241 			pipes.stdin.flush();
242 
243 			auto f = p.connect();
244 			f.rawWrite(data);
245 			f.flush();
246 			f.close();
247 
248 			return pipes.stdout.safeReadln().strip().toCommitHash();
249 		}
250 
251 		~this()
252 		{
253 			if (initialized)
254 			{
255 				pipes.stdin.close();
256 				enforce(pipes.pid.wait() == 0, "git hash-object exited with failure");
257 				initialized = false;
258 			}
259 		}
260 	}
261 	alias ObjectWriter = RefCounted!ObjectWriterImpl;
262 
263 	struct ObjectMultiWriterImpl
264 	{
265 		Repository* repo;
266 		ObjectWriter treeWriter, blobWriter, commitWriter;
267 
268 		Hash write(in GitObject obj)
269 		{
270 			ObjectWriter* pwriter;
271 			switch (obj.type) // https://issues.dlang.org/show_bug.cgi?id=14595
272 			{
273 				case "tree"  : pwriter = &treeWriter  ; break;
274 				case "blob"  : pwriter = &blobWriter  ; break;
275 				case "commit": pwriter = &commitWriter; break;
276 				default: throw new Exception("Unknown object type: " ~ obj.type);
277 			}
278 			if (!pwriter.initialized)
279 				*pwriter = ObjectWriter(repo.pipe(`hash-object`, `-t`, obj.type, `-w`, `--stdin-paths`));
280 			return pwriter.write(obj.data);
281 		}
282 	}
283 	alias ObjectMultiWriter = RefCounted!ObjectMultiWriterImpl;
284 
285 	/// Spawn a hash-object process which can hash and write git objects on the fly.
286 	ObjectWriter createObjectWriter(string type)
287 	{
288 		auto pipes = this.pipe(`hash-object`, `-t`, type, `-w`, `--stdin-paths`);
289 		return ObjectWriter(pipes);
290 	}
291 
292 	/// ditto
293 	ObjectMultiWriter createObjectWriter()
294 	{
295 		return ObjectMultiWriter(&this);
296 	}
297 
298 	/// Batch-write the given objects to the database.
299 	/// The hashes are saved to the "hash" fields of the passed objects.
300 	void writeObjects(GitObject[] objects)
301 	{
302 		string[] allTypes = objects.map!(obj => obj.type).toSet().keys;
303 		foreach (type; allTypes)
304 		{
305 			auto writer = createObjectWriter(type);
306 			foreach (ref obj; objects)
307 				if (obj.type == type)
308 					obj.hash = writer.write(obj.data);
309 		}
310 	}
311 
312 	/// Extract a commit's tree to a given directory
313 	void exportCommit(string commit, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
314 	{
315 		exportTree(reader.read(commit).parseCommit().tree, path, reader, pathFilter);
316 	}
317 
318 	/// Extract a tree to a given directory
319 	void exportTree(Hash treeHash, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
320 	{
321 		void exportSubTree(Hash treeHash, string[] subPath)
322 		{
323 			auto tree = reader.read(treeHash).parseTree();
324 			foreach (entry; tree)
325 			{
326 				auto entrySubPath = subPath ~ entry.name;
327 				if (pathFilter && !pathFilter(entrySubPath.join("/")))
328 					continue;
329 				auto entryPath = buildPath([path] ~ entrySubPath);
330 				switch (entry.mode)
331 				{
332 					case octal!100644: // file
333 					case octal!100755: // executable file
334 						std.file.write(entryPath, reader.read(entry.hash).data);
335 						version (Posix)
336 						{
337 							// Make executable
338 							if (entry.mode == octal!100755)
339 								entryPath.setAttributes(entryPath.getAttributes | ((entryPath.getAttributes & octal!444) >> 2));
340 						}
341 						break;
342 					case octal! 40000: // tree
343 						mkdirRecurse(entryPath);
344 						exportSubTree(entry.hash, entrySubPath);
345 						break;
346 					case octal!160000: // submodule
347 						mkdirRecurse(entryPath);
348 						break;
349 					default:
350 						throw new Exception("Unknown git file mode: %o".format(entry.mode));
351 				}
352 			}
353 		}
354 		exportSubTree(treeHash, null);
355 	}
356 
357 	/// Import a directory tree into the object store, and return the new tree object's hash.
358 	Hash importTree(string path, ObjectMultiWriter writer, bool delegate(string) pathFilter = null)
359 	{
360 		static // Error: variable ae.sys.git.Repository.importTree.writer has scoped destruction, cannot build closure
361 		Hash importSubTree(string path, string subPath, ref ObjectMultiWriter writer, bool delegate(string) pathFilter)
362 		{
363 			auto entries = subPath
364 				.dirEntries(SpanMode.shallow)
365 				.filter!(de => !pathFilter || pathFilter(de.relativePath(path)))
366 				.map!(de =>
367 					de.isDir
368 					? GitObject.TreeEntry(
369 						octal!40000,
370 						de.baseName,
371 						importSubTree(path, buildPath(subPath, de.baseName), writer, pathFilter)
372 					)
373 					: GitObject.TreeEntry(
374 						isVersion!`Posix` && (de.attributes & octal!111) ? octal!100755 : octal!100644,
375 						de.baseName,
376 						writer.write(GitObject(Hash.init, "blob", cast(immutable(ubyte)[])read(de.name)))
377 					)
378 				)
379 				.array
380 				.sort!((a, b) => a.sortName < b.sortName).release
381 			;
382 			return writer.write(GitObject.createTree(entries));
383 		}
384 		return importSubTree(path, path, writer, pathFilter);
385 	}
386 
387 	struct RefWriterImpl
388 	{
389 		bool initialized;
390 		ProcessPipes pipes;
391 
392 		this(ProcessPipes pipes)
393 		{
394 			this.pipes = pipes;
395 			initialized = true;
396 		}
397 
398 		private void op(string op, bool noDeref, string refName, Hash*[] hashes...)
399 		{
400 			if (noDeref)
401 				pipes.stdin.write("option no-deref\0");
402 			pipes.stdin.write(op, " ", refName, '\0');
403 			foreach (hash; hashes)
404 			{
405 				if (hash)
406 					pipes.stdin.write((*hash).toString());
407 				pipes.stdin.write('\0');
408 			}
409 			pipes.stdin.flush();
410 		}
411 
412 		void update   (string refName, Hash newValue               , bool noDeref = false) { op("update", noDeref, refName, &newValue, null     ); }
413 		void update   (string refName, Hash newValue, Hash oldValue, bool noDeref = false) { op("update", noDeref, refName, &newValue, &oldValue); }
414 		void create   (string refName, Hash newValue               , bool noDeref = false) { op("create", noDeref, refName, &newValue           ); }
415 		void deleteRef(string refName                              , bool noDeref = false) { op("delete", noDeref, refName,            null     ); }
416 		void deleteRef(string refName,                Hash oldValue, bool noDeref = false) { op("delete", noDeref, refName,            &oldValue); }
417 		void verify   (string refName                              , bool noDeref = false) { op("verify", noDeref, refName,            null     ); }
418 		void verify   (string refName,                Hash oldValue, bool noDeref = false) { op("verify", noDeref, refName,            &oldValue); }
419 
420 		~this()
421 		{
422 			if (initialized)
423 			{
424 				pipes.stdin.close();
425 				enforce(pipes.pid.wait() == 0, "git update-ref exited with failure");
426 				initialized = false;
427 			}
428 		}
429 	}
430 	alias RefWriter = RefCounted!RefWriterImpl;
431 
432 	/// Spawn a update-ref process which can update git refs on the fly.
433 	RefWriter createRefWriter()
434 	{
435 		auto pipes = this.pipe(`update-ref`, `-z`, `--stdin`);
436 		return RefWriter(pipes);
437 	}
438 }
439 
440 struct GitObject
441 {
442 	Hash hash;
443 	string type;
444 	immutable(ubyte)[] data;
445 
446 	struct ParsedCommit
447 	{
448 		Hash tree;
449 		Hash[] parents;
450 		string author, committer; /// entire lines - name, email and date
451 		string[] message;
452 	}
453 
454 	ParsedCommit parseCommit()
455 	{
456 		enforce(type == "commit", "Wrong object type");
457 		ParsedCommit result;
458 		auto lines = (cast(string)data).split('\n');
459 		foreach (n, line; lines)
460 		{
461 			if (line == "")
462 			{
463 				result.message = lines[n+1..$];
464 				break; // commit message begins
465 			}
466 			auto parts = line.findSplit(" ");
467 			auto field = parts[0];
468 			line = parts[2];
469 			switch (field)
470 			{
471 				case "tree":
472 					result.tree = line.toCommitHash();
473 					break;
474 				case "parent":
475 					result.parents ~= line.toCommitHash();
476 					break;
477 				case "author":
478 					result.author = line;
479 					break;
480 				case "committer":
481 					result.committer = line;
482 					break;
483 				default:
484 					throw new Exception("Unknown commit field: " ~ field);
485 			}
486 		}
487 		return result;
488 	}
489 
490 	static GitObject createCommit(ParsedCommit commit)
491 	{
492 		auto s = "tree %s\n%-(parent %s\n%|%)author %s\ncommitter %s\n\n%-(%s\n%)".format(
493 				commit.tree.toString(),
494 				commit.parents.map!(ae.sys.git.toString),
495 				commit.author,
496 				commit.committer,
497 				commit.message,
498 			);
499 		return GitObject(Hash.init, "commit", cast(immutable(ubyte)[])s);
500 	}
501 
502 	struct TreeEntry
503 	{
504 		uint mode;
505 		string name;
506 		Hash hash;
507 
508 		@property string sortName() { return (mode & octal!40000) ? name ~ "/" : name; }
509 	}
510 
511 	TreeEntry[] parseTree()
512 	{
513 		enforce(type == "tree", "Wrong object type");
514 		TreeEntry[] result;
515 		auto rem = data;
516 		while (rem.length)
517 		{
518 			auto si = rem.countUntil(' ');
519 			auto zi = rem.countUntil(0);
520 			auto ei = zi + 1 + Hash.sizeof;
521 			auto str = cast(string)rem[0..zi];
522 			enforce(0 < si && si < zi && ei <= rem.length, "Malformed tree entry:\n" ~ hexDump(rem));
523 			result ~= TreeEntry(str[0..si].to!uint(8), str[si+1..zi], cast(Hash)rem[zi+1..ei][0..20]); // https://issues.dlang.org/show_bug.cgi?id=13112
524 			rem = rem[ei..$];
525 		}
526 		return result;
527 	}
528 
529 	static GitObject createTree(TreeEntry[] entries)
530 	{
531 		auto buf = appender!(immutable(ubyte)[]);
532 		foreach (entry; entries)
533 		{
534 			buf.formattedWrite("%o %s\0", entry.mode, entry.name);
535 			buf.put(entry.hash[]);
536 		}
537 		return GitObject(Hash.init, "tree", buf.data);
538 	}
539 }
540 
541 struct History
542 {
543 	Commit*[Hash] commits;
544 	uint numCommits = 0;
545 	Hash[string] refs;
546 }
547 
548 alias ubyte[20] Hash;
549 
550 struct Commit
551 {
552 	uint id;
553 	Hash hash;
554 	uint time;
555 	string author, committer;
556 	string[] message;
557 	Commit*[] parents, children;
558 }
559 
560 Hash toCommitHash(in char[] hash)
561 {
562 	enforce(hash.length == 40, "Bad hash length: " ~ hash);
563 	ubyte[20] result;
564 	foreach (i, ref b; result)
565 		b = to!ubyte(hash[i*2..i*2+2], 16);
566 	return result;
567 }
568 
569 string toString(ref Hash hash)
570 {
571 	return format("%(%02x%)", hash[]);
572 }
573 
574 unittest
575 {
576 	assert(toCommitHash("0123456789abcdef0123456789ABCDEF01234567") == [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67]);
577 }
578 
579 /// Tries to match the default destination of `git clone`.
580 string repositoryNameFromURL(string url)
581 {
582 	return url
583 		.split(":")[$-1]
584 		.split("/")[$-1]
585 		.chomp(".git");
586 }
587 
588 unittest
589 {
590 	assert(repositoryNameFromURL("https://github.com/CyberShadow/ae.git") == "ae");
591 	assert(repositoryNameFromURL("git@example.com:ae.git") == "ae");
592 }