1 /**
2  * Wrappers for the git command-line tools.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.git;
15 
16 import std.algorithm;
17 import std.array;
18 import std.conv;
19 import std.exception;
20 import std.file;
21 import std.format;
22 import std.path;
23 import std.process;
24 import std.string;
25 import std.typecons;
26 import std.utf;
27 
28 import ae.sys.cmd;
29 import ae.sys.file;
30 import ae.utils.aa;
31 import ae.utils.meta;
32 import ae.utils.text;
33 
34 struct Repository
35 {
36 	string path;
37 	string gitDir;
38 
39 	string[string] environment;
40 	string[] commandPrefix;
41 
42 	static string[] globalOptions; // per-thread
43 
44 	this(string path)
45 	{
46 		path = path.absolutePath();
47 		enforce(path.exists, "Repository path does not exist: " ~ path);
48 		gitDir = path.buildPath(".git");
49 		if (gitDir.exists && gitDir.isFile)
50 			gitDir = path.buildNormalizedPath(gitDir.readText().strip()[8..$]);
51 		//path = path.replace(`\`, `/`);
52 		this.path = path;
53 		this.commandPrefix = ["git", "-c", "core.autocrlf=false", "-C", path] ~ globalOptions;
54 		version (Windows) {} else
55 			this.environment["GIT_CONFIG_NOSYSTEM"] = "1";
56 		this.environment["HOME"] = gitDir;
57 		this.environment["XDG_CONFIG_HOME"] = gitDir;
58 	}
59 
60 	invariant()
61 	{
62 		assert(environment !is null, "Not initialized");
63 	}
64 
65 	// Have just some primitives here.
66 	// Higher-level functionality can be added using UFCS.
67 	void   run  (string[] args...) const { return .run  (commandPrefix ~ args, environment, path); }
68 	string query(string[] args...) const { return .query(commandPrefix ~ args, environment, path).strip(); }
69 	bool   check(string[] args...) const { return spawnProcess(commandPrefix ~ args, environment, Config.none, path).wait() == 0; }
70 	auto   pipe (string[] args, Redirect redirect)
71 	                               const { return pipeProcess(commandPrefix ~ args, redirect, environment, Config.none, path); }
72 	auto   pipe (string[] args...) const { return pipe(args, Redirect.stdin | Redirect.stdout); }
73 
74 	/// Certain git commands (notably, bisect) must
75 	/// be run in the repository's root directory.
76 	private string workPath(string cmd) const
77 	{
78 		switch (cmd)
79 		{
80 			case "bisect":
81 			case "submodule":
82 				return path;
83 			default:
84 				return null;
85 		}
86 	}
87 
88 	History getHistory(string[] extraRefs = null)
89 	{
90 		History history;
91 
92 		Commit* getCommit(Hash hash)
93 		{
94 			auto pcommit = hash in history.commits;
95 			return pcommit ? *pcommit : (history.commits[hash] = new Commit(history.numCommits++, hash));
96 		}
97 
98 		Commit* commit;
99 		string currentBlock;
100 
101 		foreach (line; query([`log`, `--all`, `--pretty=raw`] ~ extraRefs).split('\n'))
102 		{
103 			if (!line.length)
104 			{
105 				if (currentBlock)
106 					currentBlock = null;
107 				continue;
108 			}
109 
110 			if (currentBlock)
111 			{
112 				enforce(line.startsWith(" "), "Expected " ~ currentBlock ~ " line in git log");
113 				continue;
114 			}
115 
116 			if (line.startsWith("commit "))
117 			{
118 				auto hash = line[7..$].toCommitHash();
119 				commit = getCommit(hash);
120 			}
121 			else
122 			if (line.startsWith("tree "))
123 				continue;
124 			else
125 			if (line.startsWith("parent "))
126 			{
127 				auto hash = line[7..$].toCommitHash();
128 				auto parent = getCommit(hash);
129 				commit.parents ~= parent;
130 				parent.children ~= commit;
131 			}
132 			else
133 			if (line.startsWith("author "))
134 				commit.author = line[7..$];
135 			else
136 			if (line.startsWith("committer "))
137 			{
138 				commit.committer = line[10..$];
139 				commit.time = line.split(" ")[$-2].to!int();
140 			}
141 			else
142 			if (line.startsWith("    "))
143 				commit.message ~= line[4..$];
144 			else
145 			if (line.startsWith("gpgsig "))
146 				currentBlock = "GPG signature";
147 			else
148 			if (line.startsWith("mergetag "))
149 				currentBlock = "Tag merge";
150 			else
151 				enforce(false, "Unknown line in git log: " ~ line);
152 		}
153 
154 		foreach (line; query([`show-ref`, `--dereference`]).splitLines())
155 		{
156 			auto h = line[0..40].toCommitHash();
157 			if (h in history.commits)
158 				history.refs[line[41..$]] = h;
159 		}
160 
161 		return history;
162 	}
163 
164 	struct ObjectReaderImpl
165 	{
166 		ProcessPipes pipes;
167 
168 		GitObject read(string name)
169 		{
170 			pipes.stdin.writeln(name);
171 			pipes.stdin.flush();
172 
173 			auto headerLine = pipes.stdout.safeReadln().strip();
174 			auto header = headerLine.split(" ");
175 			enforce(header.length == 3, "Malformed header during cat-file: " ~ headerLine);
176 			auto hash = header[0].toCommitHash();
177 
178 			GitObject obj;
179 			obj.hash = hash;
180 			obj.type = header[1];
181 			auto size = to!size_t(header[2]);
182 			if (size)
183 			{
184 				auto data = new ubyte[size];
185 				auto read = pipes.stdout.rawRead(data);
186 				enforce(read.length == size, "Unexpected EOF during cat-file");
187 				obj.data = data.assumeUnique();
188 			}
189 
190 			char[1] lf;
191 			pipes.stdout.rawRead(lf[]);
192 			enforce(lf[0] == '\n', "Terminating newline expected");
193 
194 			return obj;
195 		}
196 
197 		GitObject read(Hash hash)
198 		{
199 			auto obj = read(hash.toString());
200 			enforce(obj.hash == hash, "Unexpected object during cat-file");
201 			return obj;
202 		}
203 
204 		~this()
205 		{
206 			pipes.stdin.close();
207 			enforce(pipes.pid.wait() == 0, "git cat-file exited with failure");
208 		}
209 	}
210 	alias ObjectReader = RefCounted!ObjectReaderImpl;
211 
212 	/// Spawn a cat-file process which can read git objects by demand.
213 	ObjectReader createObjectReader()
214 	{
215 		auto pipes = this.pipe(`cat-file`, `--batch`);
216 		return ObjectReader(pipes);
217 	}
218 
219 	/// Run a batch cat-file query.
220 	GitObject[] getObjects(Hash[] hashes)
221 	{
222 		GitObject[] result;
223 		result.reserve(hashes.length);
224 		auto reader = createObjectReader();
225 
226 		foreach (hash; hashes)
227 			result ~= reader.read(hash);
228 
229 		return result;
230 	}
231 
232 	struct ObjectWriterImpl
233 	{
234 		bool initialized;
235 		ProcessPipes pipes;
236 
237 		this(ProcessPipes pipes)
238 		{
239 			this.pipes = pipes;
240 			initialized = true;
241 		}
242 
243 		Hash write(in void[] data)
244 		{
245 			import std.random;
246 			auto p = NamedPipe("ae-sys-git-writeObjects-%d".format(uniform!ulong));
247 			pipes.stdin.writeln(p.fileName);
248 			pipes.stdin.flush();
249 
250 			auto f = p.connect();
251 			f.rawWrite(data);
252 			f.flush();
253 			f.close();
254 
255 			return pipes.stdout.safeReadln().strip().toCommitHash();
256 		}
257 
258 		~this()
259 		{
260 			if (initialized)
261 			{
262 				pipes.stdin.close();
263 				enforce(pipes.pid.wait() == 0, "git hash-object exited with failure");
264 				initialized = false;
265 			}
266 		}
267 	}
268 	alias ObjectWriter = RefCounted!ObjectWriterImpl;
269 
270 	struct ObjectMultiWriterImpl
271 	{
272 		Repository* repo;
273 		ObjectWriter treeWriter, blobWriter, commitWriter;
274 
275 		Hash write(in GitObject obj)
276 		{
277 			ObjectWriter* pwriter;
278 			switch (obj.type) // https://issues.dlang.org/show_bug.cgi?id=14595
279 			{
280 				case "tree"  : pwriter = &treeWriter  ; break;
281 				case "blob"  : pwriter = &blobWriter  ; break;
282 				case "commit": pwriter = &commitWriter; break;
283 				default: throw new Exception("Unknown object type: " ~ obj.type);
284 			}
285 			if (!pwriter.initialized)
286 				*pwriter = ObjectWriter(repo.pipe(`hash-object`, `-t`, obj.type, `-w`, `--stdin-paths`));
287 			return pwriter.write(obj.data);
288 		}
289 	}
290 	alias ObjectMultiWriter = RefCounted!ObjectMultiWriterImpl;
291 
292 	/// Spawn a hash-object process which can hash and write git objects on the fly.
293 	ObjectWriter createObjectWriter(string type)
294 	{
295 		auto pipes = this.pipe(`hash-object`, `-t`, type, `-w`, `--stdin-paths`);
296 		return ObjectWriter(pipes);
297 	}
298 
299 	/// ditto
300 	ObjectMultiWriter createObjectWriter()
301 	{
302 		return ObjectMultiWriter(&this);
303 	}
304 
305 	/// Batch-write the given objects to the database.
306 	/// The hashes are saved to the "hash" fields of the passed objects.
307 	void writeObjects(GitObject[] objects)
308 	{
309 		string[] allTypes = objects.map!(obj => obj.type).toSet().keys;
310 		foreach (type; allTypes)
311 		{
312 			auto writer = createObjectWriter(type);
313 			foreach (ref obj; objects)
314 				if (obj.type == type)
315 					obj.hash = writer.write(obj.data);
316 		}
317 	}
318 
319 	/// Extract a commit's tree to a given directory
320 	void exportCommit(string commit, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
321 	{
322 		exportTree(reader.read(commit).parseCommit().tree, path, reader, pathFilter);
323 	}
324 
325 	/// Extract a tree to a given directory
326 	void exportTree(Hash treeHash, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
327 	{
328 		void exportSubTree(Hash treeHash, string[] subPath)
329 		{
330 			auto tree = reader.read(treeHash).parseTree();
331 			foreach (entry; tree)
332 			{
333 				auto entrySubPath = subPath ~ entry.name;
334 				if (pathFilter && !pathFilter(entrySubPath.join("/")))
335 					continue;
336 				auto entryPath = buildPath([path] ~ entrySubPath);
337 				switch (entry.mode)
338 				{
339 					case octal!100644: // file
340 					case octal!100755: // executable file
341 						std.file.write(entryPath, reader.read(entry.hash).data);
342 						version (Posix)
343 						{
344 							// Make executable
345 							if (entry.mode == octal!100755)
346 								entryPath.setAttributes(entryPath.getAttributes | ((entryPath.getAttributes & octal!444) >> 2));
347 						}
348 						break;
349 					case octal! 40000: // tree
350 						mkdirRecurse(entryPath);
351 						exportSubTree(entry.hash, entrySubPath);
352 						break;
353 					case octal!160000: // submodule
354 						mkdirRecurse(entryPath);
355 						break;
356 					default:
357 						throw new Exception("Unknown git file mode: %o".format(entry.mode));
358 				}
359 			}
360 		}
361 		exportSubTree(treeHash, null);
362 	}
363 
364 	/// Import a directory tree into the object store, and return the new tree object's hash.
365 	Hash importTree(string path, ObjectMultiWriter writer, bool delegate(string) pathFilter = null)
366 	{
367 		static // Error: variable ae.sys.git.Repository.importTree.writer has scoped destruction, cannot build closure
368 		Hash importSubTree(string path, string subPath, ref ObjectMultiWriter writer, bool delegate(string) pathFilter)
369 		{
370 			auto entries = subPath
371 				.dirEntries(SpanMode.shallow)
372 				.filter!(de => !pathFilter || pathFilter(de.relativePath(path)))
373 				.map!(de =>
374 					de.isDir
375 					? GitObject.TreeEntry(
376 						octal!40000,
377 						de.baseName,
378 						importSubTree(path, buildPath(subPath, de.baseName), writer, pathFilter)
379 					)
380 					: GitObject.TreeEntry(
381 						isVersion!`Posix` && (de.attributes & octal!111) ? octal!100755 : octal!100644,
382 						de.baseName,
383 						writer.write(GitObject(Hash.init, "blob", cast(immutable(ubyte)[])read(de.name)))
384 					)
385 				)
386 				.array
387 				.sort!((a, b) => a.sortName < b.sortName).release
388 			;
389 			return writer.write(GitObject.createTree(entries));
390 		}
391 		return importSubTree(path, path, writer, pathFilter);
392 	}
393 
394 	struct RefWriterImpl
395 	{
396 		bool initialized;
397 		ProcessPipes pipes;
398 
399 		this(ProcessPipes pipes)
400 		{
401 			this.pipes = pipes;
402 			initialized = true;
403 		}
404 
405 		private void op(string op, bool noDeref, string refName, Hash*[] hashes...)
406 		{
407 			if (noDeref)
408 				pipes.stdin.write("option no-deref\0");
409 			pipes.stdin.write(op, " ", refName, '\0');
410 			foreach (hash; hashes)
411 			{
412 				if (hash)
413 					pipes.stdin.write((*hash).toString());
414 				pipes.stdin.write('\0');
415 			}
416 			pipes.stdin.flush();
417 		}
418 
419 		void update   (string refName, Hash newValue               , bool noDeref = false) { op("update", noDeref, refName, &newValue, null     ); }
420 		void update   (string refName, Hash newValue, Hash oldValue, bool noDeref = false) { op("update", noDeref, refName, &newValue, &oldValue); }
421 		void create   (string refName, Hash newValue               , bool noDeref = false) { op("create", noDeref, refName, &newValue           ); }
422 		void deleteRef(string refName                              , bool noDeref = false) { op("delete", noDeref, refName,            null     ); }
423 		void deleteRef(string refName,                Hash oldValue, bool noDeref = false) { op("delete", noDeref, refName,            &oldValue); }
424 		void verify   (string refName                              , bool noDeref = false) { op("verify", noDeref, refName,            null     ); }
425 		void verify   (string refName,                Hash oldValue, bool noDeref = false) { op("verify", noDeref, refName,            &oldValue); }
426 
427 		~this()
428 		{
429 			if (initialized)
430 			{
431 				pipes.stdin.close();
432 				enforce(pipes.pid.wait() == 0, "git update-ref exited with failure");
433 				initialized = false;
434 			}
435 		}
436 	}
437 	alias RefWriter = RefCounted!RefWriterImpl;
438 
439 	/// Spawn a update-ref process which can update git refs on the fly.
440 	RefWriter createRefWriter()
441 	{
442 		auto pipes = this.pipe(`update-ref`, `-z`, `--stdin`);
443 		return RefWriter(pipes);
444 	}
445 }
446 
447 struct GitObject
448 {
449 	Hash hash;
450 	string type;
451 	immutable(ubyte)[] data;
452 
453 	struct ParsedCommit
454 	{
455 		Hash tree;
456 		Hash[] parents;
457 		string author, committer; /// entire lines - name, email and date
458 		string[] message;
459 	}
460 
461 	ParsedCommit parseCommit()
462 	{
463 		enforce(type == "commit", "Wrong object type");
464 		ParsedCommit result;
465 		auto lines = (cast(string)data).split('\n');
466 		foreach (n, line; lines)
467 		{
468 			if (line == "")
469 			{
470 				result.message = lines[n+1..$];
471 				break; // commit message begins
472 			}
473 			auto parts = line.findSplit(" ");
474 			auto field = parts[0];
475 			line = parts[2];
476 			switch (field)
477 			{
478 				case "tree":
479 					result.tree = line.toCommitHash();
480 					break;
481 				case "parent":
482 					result.parents ~= line.toCommitHash();
483 					break;
484 				case "author":
485 					result.author = line;
486 					break;
487 				case "committer":
488 					result.committer = line;
489 					break;
490 				default:
491 					throw new Exception("Unknown commit field: " ~ field);
492 			}
493 		}
494 		return result;
495 	}
496 
497 	static GitObject createCommit(ParsedCommit commit)
498 	{
499 		auto s = "tree %s\n%-(parent %s\n%|%)author %s\ncommitter %s\n\n%-(%s\n%)".format(
500 				commit.tree.toString(),
501 				commit.parents.map!(ae.sys.git.toString),
502 				commit.author,
503 				commit.committer,
504 				commit.message,
505 			);
506 		return GitObject(Hash.init, "commit", cast(immutable(ubyte)[])s);
507 	}
508 
509 	struct TreeEntry
510 	{
511 		uint mode;
512 		string name;
513 		Hash hash;
514 
515 		@property string sortName() { return (mode & octal!40000) ? name ~ "/" : name; }
516 	}
517 
518 	TreeEntry[] parseTree()
519 	{
520 		enforce(type == "tree", "Wrong object type");
521 		TreeEntry[] result;
522 		auto rem = data;
523 		while (rem.length)
524 		{
525 			auto si = rem.countUntil(' ');
526 			auto zi = rem.countUntil(0);
527 			auto ei = zi + 1 + Hash.sizeof;
528 			auto str = cast(string)rem[0..zi];
529 			enforce(0 < si && si < zi && ei <= rem.length, "Malformed tree entry:\n" ~ hexDump(rem));
530 			result ~= TreeEntry(str[0..si].to!uint(8), str[si+1..zi], cast(Hash)rem[zi+1..ei][0..20]); // https://issues.dlang.org/show_bug.cgi?id=13112
531 			rem = rem[ei..$];
532 		}
533 		return result;
534 	}
535 
536 	static GitObject createTree(TreeEntry[] entries)
537 	{
538 		auto buf = appender!(immutable(ubyte)[]);
539 		foreach (entry; entries)
540 		{
541 			buf.formattedWrite("%o %s\0", entry.mode, entry.name);
542 			buf.put(entry.hash[]);
543 		}
544 		return GitObject(Hash.init, "tree", buf.data);
545 	}
546 }
547 
548 struct History
549 {
550 	Commit*[Hash] commits;
551 	uint numCommits = 0;
552 	Hash[string] refs;
553 }
554 
555 alias ubyte[20] Hash;
556 
557 struct Commit
558 {
559 	uint id;
560 	Hash hash;
561 	uint time;
562 	string author, committer;
563 	string[] message;
564 	Commit*[] parents, children;
565 }
566 
567 Hash toCommitHash(in char[] hash)
568 {
569 	enforce(hash.length == 40, "Bad hash length: " ~ hash);
570 	ubyte[20] result;
571 	foreach (i, ref b; result)
572 		b = to!ubyte(hash[i*2..i*2+2], 16);
573 	return result;
574 }
575 
576 string toString(ref Hash hash)
577 {
578 	return format("%(%02x%)", hash[]);
579 }
580 
581 unittest
582 {
583 	assert(toCommitHash("0123456789abcdef0123456789ABCDEF01234567") == [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67]);
584 }
585 
586 /// Tries to match the default destination of `git clone`.
587 string repositoryNameFromURL(string url)
588 {
589 	return url
590 		.split(":")[$-1]
591 		.split("/")[$-1]
592 		.chomp(".git");
593 }
594 
595 unittest
596 {
597 	assert(repositoryNameFromURL("https://github.com/CyberShadow/ae.git") == "ae");
598 	assert(repositoryNameFromURL("git@example.com:ae.git") == "ae");
599 }