1 /**
2  * Wrappers for the git command-line tools.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.git;
15 
16 import std.algorithm;
17 import std.array;
18 import std.conv;
19 import std.exception;
20 import std.file;
21 import std.format;
22 import std.path;
23 import std.process;
24 import std.string;
25 import std.typecons : RefCounted;
26 import std.utf;
27 
28 import ae.sys.cmd;
29 import ae.sys.file;
30 import ae.utils.aa;
31 import ae.utils.array;
32 import ae.utils.meta;
33 import ae.utils.text;
34 
35 struct Repository
36 {
37 	string path;
38 	string gitDir;
39 
40 	string[string] environment;
41 	string[] commandPrefix;
42 
43 	static string[] globalOptions; // per-thread
44 
45 	this(string path)
46 	{
47 		path = path.absolutePath();
48 		enforce(path.exists, "Repository path does not exist: " ~ path);
49 		gitDir = path.buildPath(".git");
50 		if (gitDir.exists && gitDir.isFile)
51 			gitDir = path.buildNormalizedPath(gitDir.readText().strip()[8..$]);
52 		//path = path.replace(`\`, `/`);
53 		this.path = path;
54 		this.commandPrefix = ["git", "-c", "core.autocrlf=false", "-C", path] ~ globalOptions;
55 		version (Windows) {} else
56 			this.environment["GIT_CONFIG_NOSYSTEM"] = "1";
57 		this.environment["HOME"] = gitDir;
58 		this.environment["XDG_CONFIG_HOME"] = gitDir;
59 	}
60 
61 	invariant()
62 	{
63 		assert(environment !is null, "Not initialized");
64 	}
65 
66 	// Have just some primitives here.
67 	// Higher-level functionality can be added using UFCS.
68 	void   run  (string[] args...) const { return .run  (commandPrefix ~ args, environment, path); }
69 	string query(string[] args...) const { return .query(commandPrefix ~ args, environment, path).strip(); }
70 	bool   check(string[] args...) const { return spawnProcess(commandPrefix ~ args, environment, Config.none, path).wait() == 0; }
71 	auto   pipe (string[] args, Redirect redirect)
72 	                               const { return pipeProcess(commandPrefix ~ args, redirect, environment, Config.none, path); }
73 	auto   pipe (string[] args...) const { return pipe(args, Redirect.stdin | Redirect.stdout); }
74 
75 	/// Certain git commands (notably, bisect) must
76 	/// be run in the repository's root directory.
77 	private string workPath(string cmd) const
78 	{
79 		switch (cmd)
80 		{
81 			case "bisect":
82 			case "submodule":
83 				return path;
84 			default:
85 				return null;
86 		}
87 	}
88 
89 	History getHistory(string[] extraRefs = null) const
90 	{
91 		History history;
92 
93 		Commit* getCommit(Hash hash)
94 		{
95 			auto pcommit = hash in history.commits;
96 			return pcommit ? *pcommit : (history.commits[hash] = new Commit(history.numCommits++, hash));
97 		}
98 
99 		Commit* commit;
100 		string currentBlock;
101 
102 		foreach (line; query([`log`, `--all`, `--pretty=raw`] ~ extraRefs).split('\n'))
103 		{
104 			if (!line.length)
105 			{
106 				if (currentBlock)
107 					currentBlock = null;
108 				continue;
109 			}
110 
111 			if (currentBlock)
112 			{
113 				enforce(line.startsWith(" "), "Expected " ~ currentBlock ~ " line in git log");
114 				continue;
115 			}
116 
117 			if (line.startsWith("commit "))
118 			{
119 				auto hash = line[7..$].toCommitHash();
120 				commit = getCommit(hash);
121 			}
122 			else
123 			if (line.startsWith("tree "))
124 				continue;
125 			else
126 			if (line.startsWith("parent "))
127 			{
128 				auto hash = line[7..$].toCommitHash();
129 				auto parent = getCommit(hash);
130 				commit.parents ~= parent;
131 				parent.children ~= commit;
132 			}
133 			else
134 			if (line.startsWith("author "))
135 				commit.author = line[7..$];
136 			else
137 			if (line.startsWith("committer "))
138 			{
139 				commit.committer = line[10..$];
140 				commit.time = line.split(" ")[$-2].to!int();
141 			}
142 			else
143 			if (line.startsWith("    "))
144 				commit.message ~= line[4..$];
145 			else
146 			if (line.startsWith("gpgsig "))
147 				currentBlock = "GPG signature";
148 			else
149 			if (line.startsWith("mergetag "))
150 				currentBlock = "Tag merge";
151 			else
152 				enforce(false, "Unknown line in git log: " ~ line);
153 		}
154 
155 		foreach (line; query([`show-ref`, `--dereference`]).splitLines())
156 		{
157 			auto h = line[0..40].toCommitHash();
158 			if (h in history.commits)
159 				history.refs[line[41..$]] = h;
160 		}
161 
162 		return history;
163 	}
164 
165 	struct ObjectReaderImpl
166 	{
167 		ProcessPipes pipes;
168 
169 		GitObject read(string name)
170 		{
171 			pipes.stdin.writeln(name);
172 			pipes.stdin.flush();
173 
174 			auto headerLine = pipes.stdout.safeReadln().strip();
175 			auto header = headerLine.split(" ");
176 			enforce(header.length == 3, "Malformed header during cat-file: " ~ headerLine);
177 			auto hash = header[0].toCommitHash();
178 
179 			GitObject obj;
180 			obj.hash = hash;
181 			obj.type = header[1];
182 			auto size = to!size_t(header[2]);
183 			if (size)
184 			{
185 				auto data = new ubyte[size];
186 				auto read = pipes.stdout.rawRead(data);
187 				enforce(read.length == size, "Unexpected EOF during cat-file");
188 				obj.data = data.assumeUnique();
189 			}
190 
191 			char[1] lf;
192 			pipes.stdout.rawRead(lf[]);
193 			enforce(lf[0] == '\n', "Terminating newline expected");
194 
195 			return obj;
196 		}
197 
198 		GitObject read(Hash hash)
199 		{
200 			auto obj = read(hash.toString());
201 			enforce(obj.hash == hash, "Unexpected object during cat-file");
202 			return obj;
203 		}
204 
205 		~this()
206 		{
207 			pipes.stdin.close();
208 			enforce(pipes.pid.wait() == 0, "git cat-file exited with failure");
209 		}
210 	}
211 	alias ObjectReader = RefCounted!ObjectReaderImpl;
212 
213 	/// Spawn a cat-file process which can read git objects by demand.
214 	ObjectReader createObjectReader()
215 	{
216 		auto pipes = this.pipe(`cat-file`, `--batch`);
217 		return ObjectReader(pipes);
218 	}
219 
220 	/// Run a batch cat-file query.
221 	GitObject[] getObjects(Hash[] hashes)
222 	{
223 		GitObject[] result;
224 		result.reserve(hashes.length);
225 		auto reader = createObjectReader();
226 
227 		foreach (hash; hashes)
228 			result ~= reader.read(hash);
229 
230 		return result;
231 	}
232 
233 	struct ObjectWriterImpl
234 	{
235 		bool initialized;
236 		ProcessPipes pipes;
237 
238 		this(ProcessPipes pipes)
239 		{
240 			this.pipes = pipes;
241 			initialized = true;
242 		}
243 
244 		Hash write(in void[] data)
245 		{
246 			import std.random;
247 			auto p = NamedPipe("ae-sys-git-writeObjects-%d".format(uniform!ulong));
248 			pipes.stdin.writeln(p.fileName);
249 			pipes.stdin.flush();
250 
251 			auto f = p.connect();
252 			f.rawWrite(data);
253 			f.flush();
254 			f.close();
255 
256 			return pipes.stdout.safeReadln().strip().toCommitHash();
257 		}
258 
259 		~this()
260 		{
261 			if (initialized)
262 			{
263 				pipes.stdin.close();
264 				enforce(pipes.pid.wait() == 0, "git hash-object exited with failure");
265 				initialized = false;
266 			}
267 		}
268 	}
269 	alias ObjectWriter = RefCounted!ObjectWriterImpl;
270 
271 	struct ObjectMultiWriterImpl
272 	{
273 		Repository* repo;
274 		ObjectWriter treeWriter, blobWriter, commitWriter;
275 
276 		Hash write(in GitObject obj)
277 		{
278 			ObjectWriter* pwriter;
279 			switch (obj.type) // https://issues.dlang.org/show_bug.cgi?id=14595
280 			{
281 				case "tree"  : pwriter = &treeWriter  ; break;
282 				case "blob"  : pwriter = &blobWriter  ; break;
283 				case "commit": pwriter = &commitWriter; break;
284 				default: throw new Exception("Unknown object type: " ~ obj.type);
285 			}
286 			if (!pwriter.initialized)
287 				*pwriter = ObjectWriter(repo.pipe(`hash-object`, `-t`, obj.type, `-w`, `--stdin-paths`));
288 			return pwriter.write(obj.data);
289 		}
290 	}
291 	alias ObjectMultiWriter = RefCounted!ObjectMultiWriterImpl;
292 
293 	/// Spawn a hash-object process which can hash and write git objects on the fly.
294 	ObjectWriter createObjectWriter(string type)
295 	{
296 		auto pipes = this.pipe(`hash-object`, `-t`, type, `-w`, `--stdin-paths`);
297 		return ObjectWriter(pipes);
298 	}
299 
300 	/// ditto
301 	ObjectMultiWriter createObjectWriter()
302 	{
303 		return ObjectMultiWriter(&this);
304 	}
305 
306 	/// Batch-write the given objects to the database.
307 	/// The hashes are saved to the "hash" fields of the passed objects.
308 	void writeObjects(GitObject[] objects)
309 	{
310 		string[] allTypes = objects.map!(obj => obj.type).toSet().keys;
311 		foreach (type; allTypes)
312 		{
313 			auto writer = createObjectWriter(type);
314 			foreach (ref obj; objects)
315 				if (obj.type == type)
316 					obj.hash = writer.write(obj.data);
317 		}
318 	}
319 
320 	/// Extract a commit's tree to a given directory
321 	void exportCommit(string commit, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
322 	{
323 		exportTree(reader.read(commit).parseCommit().tree, path, reader, pathFilter);
324 	}
325 
326 	/// Extract a tree to a given directory
327 	void exportTree(Hash treeHash, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
328 	{
329 		void exportSubTree(Hash treeHash, string[] subPath)
330 		{
331 			auto tree = reader.read(treeHash).parseTree();
332 			foreach (entry; tree)
333 			{
334 				auto entrySubPath = subPath ~ entry.name;
335 				if (pathFilter && !pathFilter(entrySubPath.join("/")))
336 					continue;
337 				auto entryPath = buildPath([path] ~ entrySubPath);
338 				switch (entry.mode)
339 				{
340 					case octal!100644: // file
341 					case octal!100755: // executable file
342 						std.file.write(entryPath, reader.read(entry.hash).data);
343 						version (Posix)
344 						{
345 							// Make executable
346 							if (entry.mode == octal!100755)
347 								entryPath.setAttributes(entryPath.getAttributes | ((entryPath.getAttributes & octal!444) >> 2));
348 						}
349 						break;
350 					case octal! 40000: // tree
351 						mkdirRecurse(entryPath);
352 						exportSubTree(entry.hash, entrySubPath);
353 						break;
354 					case octal!160000: // submodule
355 						mkdirRecurse(entryPath);
356 						break;
357 					default:
358 						throw new Exception("Unknown git file mode: %o".format(entry.mode));
359 				}
360 			}
361 		}
362 		exportSubTree(treeHash, null);
363 	}
364 
365 	/// Import a directory tree into the object store, and return the new tree object's hash.
366 	Hash importTree(string path, ObjectMultiWriter writer, bool delegate(string) pathFilter = null)
367 	{
368 		static // Error: variable ae.sys.git.Repository.importTree.writer has scoped destruction, cannot build closure
369 		Hash importSubTree(string path, string subPath, ref ObjectMultiWriter writer, bool delegate(string) pathFilter)
370 		{
371 			auto entries = subPath
372 				.dirEntries(SpanMode.shallow)
373 				.filter!(de => !pathFilter || pathFilter(de.relativePath(path)))
374 				.map!(de =>
375 					de.isDir
376 					? GitObject.TreeEntry(
377 						octal!40000,
378 						de.baseName,
379 						importSubTree(path, buildPath(subPath, de.baseName), writer, pathFilter)
380 					)
381 					: GitObject.TreeEntry(
382 						isVersion!`Posix` && (de.attributes & octal!111) ? octal!100755 : octal!100644,
383 						de.baseName,
384 						writer.write(GitObject(Hash.init, "blob", cast(immutable(ubyte)[])read(de.name)))
385 					)
386 				)
387 				.array
388 				.sort!((a, b) => a.sortName < b.sortName).release
389 			;
390 			return writer.write(GitObject.createTree(entries));
391 		}
392 		return importSubTree(path, path, writer, pathFilter);
393 	}
394 
395 	struct RefWriterImpl
396 	{
397 		bool initialized;
398 		ProcessPipes pipes;
399 
400 		this(ProcessPipes pipes)
401 		{
402 			this.pipes = pipes;
403 			initialized = true;
404 		}
405 
406 		private void op(string op, bool noDeref, string refName, Hash*[] hashes...)
407 		{
408 			if (noDeref)
409 				pipes.stdin.write("option no-deref\0");
410 			pipes.stdin.write(op, " ", refName, '\0');
411 			foreach (hash; hashes)
412 			{
413 				if (hash)
414 					pipes.stdin.write((*hash).toString());
415 				pipes.stdin.write('\0');
416 			}
417 			pipes.stdin.flush();
418 		}
419 
420 		void update   (string refName, Hash newValue               , bool noDeref = false) { op("update", noDeref, refName, &newValue, null     ); }
421 		void update   (string refName, Hash newValue, Hash oldValue, bool noDeref = false) { op("update", noDeref, refName, &newValue, &oldValue); }
422 		void create   (string refName, Hash newValue               , bool noDeref = false) { op("create", noDeref, refName, &newValue           ); }
423 		void deleteRef(string refName                              , bool noDeref = false) { op("delete", noDeref, refName,            null     ); }
424 		void deleteRef(string refName,                Hash oldValue, bool noDeref = false) { op("delete", noDeref, refName,            &oldValue); }
425 		void verify   (string refName                              , bool noDeref = false) { op("verify", noDeref, refName,            null     ); }
426 		void verify   (string refName,                Hash oldValue, bool noDeref = false) { op("verify", noDeref, refName,            &oldValue); }
427 
428 		~this()
429 		{
430 			if (initialized)
431 			{
432 				pipes.stdin.close();
433 				enforce(pipes.pid.wait() == 0, "git update-ref exited with failure");
434 				initialized = false;
435 			}
436 		}
437 	}
438 	alias RefWriter = RefCounted!RefWriterImpl;
439 
440 	/// Spawn a update-ref process which can update git refs on the fly.
441 	RefWriter createRefWriter()
442 	{
443 		auto pipes = this.pipe(`update-ref`, `-z`, `--stdin`);
444 		return RefWriter(pipes);
445 	}
446 }
447 
448 struct GitObject
449 {
450 	Hash hash;
451 	string type;
452 	immutable(ubyte)[] data;
453 
454 	struct ParsedCommit
455 	{
456 		Hash tree;
457 		Hash[] parents;
458 		string author, committer; /// entire lines - name, email and date
459 		string[] message, gpgsig;
460 	}
461 
462 	ParsedCommit parseCommit()
463 	{
464 		enforce(type == "commit", "Wrong object type");
465 		ParsedCommit result;
466 		auto lines = (cast(string)data).split('\n');
467 		while (lines.length)
468 		{
469 			auto line = lines.shift();
470 			if (line == "")
471 			{
472 				result.message = lines;
473 				break; // commit message begins
474 			}
475 			auto parts = line.findSplit(" ");
476 			auto field = parts[0];
477 			line = parts[2];
478 			switch (field)
479 			{
480 				case "tree":
481 					result.tree = line.toCommitHash();
482 					break;
483 				case "parent":
484 					result.parents ~= line.toCommitHash();
485 					break;
486 				case "author":
487 					result.author = line;
488 					break;
489 				case "committer":
490 					result.committer = line;
491 					break;
492 				case "gpgsig":
493 				{
494 					auto p = lines.countUntil!(line => !line.startsWith(" "));
495 					if (p < 0)
496 						p = lines.length;
497 					result.gpgsig = [line] ~ lines[0 .. p].apply!(each!((ref line) => line.skipOver(" ").enforce("gpgsig line without leading space")));
498 					lines = lines[p .. $];
499 					break;
500 				}
501 				default:
502 					throw new Exception("Unknown commit field: " ~ field ~ "\n" ~ cast(string)data);
503 			}
504 		}
505 		return result;
506 	}
507 
508 	static GitObject createCommit(ParsedCommit commit)
509 	{
510 		auto s = "tree %s\n%-(parent %s\n%|%)author %s\ncommitter %s\n\n%-(%s\n%)".format(
511 				commit.tree.toString(),
512 				commit.parents.map!(ae.sys.git.toString),
513 				commit.author,
514 				commit.committer,
515 				commit.message,
516 			);
517 		return GitObject(Hash.init, "commit", cast(immutable(ubyte)[])s);
518 	}
519 
520 	struct TreeEntry
521 	{
522 		uint mode;
523 		string name;
524 		Hash hash;
525 
526 		@property string sortName() { return (mode & octal!40000) ? name ~ "/" : name; }
527 	}
528 
529 	TreeEntry[] parseTree()
530 	{
531 		enforce(type == "tree", "Wrong object type");
532 		TreeEntry[] result;
533 		auto rem = data;
534 		while (rem.length)
535 		{
536 			auto si = rem.countUntil(' ');
537 			auto zi = rem.countUntil(0);
538 			auto ei = zi + 1 + Hash.sizeof;
539 			auto str = cast(string)rem[0..zi];
540 			enforce(0 < si && si < zi && ei <= rem.length, "Malformed tree entry:\n" ~ hexDump(rem));
541 			result ~= TreeEntry(str[0..si].to!uint(8), str[si+1..zi], cast(Hash)rem[zi+1..ei][0..20]); // https://issues.dlang.org/show_bug.cgi?id=13112
542 			rem = rem[ei..$];
543 		}
544 		return result;
545 	}
546 
547 	static GitObject createTree(TreeEntry[] entries)
548 	{
549 		auto buf = appender!(immutable(ubyte)[]);
550 		foreach (entry; entries)
551 		{
552 			buf.formattedWrite("%o %s\0", entry.mode, entry.name);
553 			buf.put(entry.hash[]);
554 		}
555 		return GitObject(Hash.init, "tree", buf.data);
556 	}
557 }
558 
559 struct History
560 {
561 	Commit*[Hash] commits;
562 	uint numCommits = 0;
563 	Hash[string] refs;
564 }
565 
566 alias ubyte[20] Hash;
567 
568 struct Commit
569 {
570 	uint id;
571 	Hash hash;
572 	uint time;
573 	string author, committer;
574 	string[] message;
575 	Commit*[] parents, children;
576 }
577 
578 Hash toCommitHash(in char[] hash)
579 {
580 	enforce(hash.length == 40, "Bad hash length: " ~ hash);
581 	ubyte[20] result;
582 	foreach (i, ref b; result)
583 		b = to!ubyte(hash[i*2..i*2+2], 16);
584 	return result;
585 }
586 
587 string toString(ref Hash hash)
588 {
589 	return format("%(%02x%)", hash[]);
590 }
591 
592 unittest
593 {
594 	assert(toCommitHash("0123456789abcdef0123456789ABCDEF01234567") == [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67]);
595 }
596 
597 /// Tries to match the default destination of `git clone`.
598 string repositoryNameFromURL(string url)
599 {
600 	return url
601 		.split(":")[$-1]
602 		.split("/")[$-1]
603 		.chomp(".git");
604 }
605 
606 unittest
607 {
608 	assert(repositoryNameFromURL("https://github.com/CyberShadow/ae.git") == "ae");
609 	assert(repositoryNameFromURL("git@example.com:ae.git") == "ae");
610 }