1 /**
2  * Wrappers for the git command-line tools.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.git;
15 
16 import std.algorithm;
17 import std.array;
18 import std.conv;
19 import std.exception;
20 import std.file;
21 import std.format;
22 import std.path;
23 import std.process;
24 import std.string;
25 import std.typecons;
26 import std.utf;
27 
28 import ae.sys.cmd;
29 import ae.sys.file;
30 import ae.utils.aa;
31 import ae.utils.meta;
32 import ae.utils.text;
33 
34 struct Repository
35 {
36 	string path;
37 	string gitDir;
38 
39 	string[string] environment;
40 	static immutable string[] commandPrefix = ["git", "-c", "core.autocrlf=false"];
41 
42 	this(string path)
43 	{
44 		path = path.absolutePath();
45 		enforce(path.exists, "Repository path does not exist: " ~ path);
46 		gitDir = path.buildPath(".git");
47 		if (gitDir.exists && gitDir.isFile)
48 			gitDir = path.buildNormalizedPath(gitDir.readText().strip()[8..$]);
49 		//path = path.replace(`\`, `/`);
50 		this.path = path;
51 		version (Windows) {} else
52 			this.environment["GIT_CONFIG_NOSYSTEM"] = "1";
53 		this.environment["HOME"] = gitDir;
54 		this.environment["XDG_CONFIG_HOME"] = gitDir;
55 	}
56 
57 	invariant()
58 	{
59 		assert(environment !is null, "Not initialized");
60 	}
61 
62 	// Have just some primitives here.
63 	// Higher-level functionality can be added using UFCS.
64 	void   run  (string[] args...) const { auto owd = pushd(workPath(args[0])); return .run  (commandPrefix ~ args, environment, path); }
65 	string query(string[] args...) const { auto owd = pushd(workPath(args[0])); return .query(commandPrefix ~ args, environment, path); }
66 	bool   check(string[] args...) const { auto owd = pushd(workPath(args[0])); return spawnProcess(commandPrefix ~ args, environment, Config.none, path).wait() == 0; }
67 	auto   pipe (string[] args, Redirect redirect)
68 	                               const { auto owd = pushd(workPath(args[0])); return pipeProcess(commandPrefix ~ args, redirect, environment, Config.none, path); }
69 	auto   pipe (string[] args...) const { return pipe(args, Redirect.stdin | Redirect.stdout); }
70 
71 	/// Certain git commands (notably, bisect) must
72 	/// be run in the repository's root directory.
73 	private string workPath(string cmd) const
74 	{
75 		switch (cmd)
76 		{
77 			case "bisect":
78 			case "submodule":
79 				return path;
80 			default:
81 				return null;
82 		}
83 	}
84 
85 	History getHistory(string[] extraRefs = null)
86 	{
87 		History history;
88 
89 		Commit* getCommit(Hash hash)
90 		{
91 			auto pcommit = hash in history.commits;
92 			return pcommit ? *pcommit : (history.commits[hash] = new Commit(history.numCommits++, hash));
93 		}
94 
95 		Commit* commit;
96 		bool inSig; // PGP signature
97 
98 		foreach (line; query([`log`, `--all`, `--pretty=raw`] ~ extraRefs).split('\n'))
99 		{
100 			if (!line.length)
101 			{
102 				inSig = false;
103 				continue;
104 			}
105 
106 			if (inSig)
107 			{
108 				enforce(line.startsWith(" "), "Expected GPG signature line in git log");
109 				continue;
110 			}
111 
112 			if (line.startsWith("commit "))
113 			{
114 				auto hash = line[7..$].toCommitHash();
115 				commit = getCommit(hash);
116 			}
117 			else
118 			if (line.startsWith("tree "))
119 				continue;
120 			else
121 			if (line.startsWith("parent "))
122 			{
123 				auto hash = line[7..$].toCommitHash();
124 				auto parent = getCommit(hash);
125 				commit.parents ~= parent;
126 				parent.children ~= commit;
127 			}
128 			else
129 			if (line.startsWith("author "))
130 				commit.author = line[7..$];
131 			else
132 			if (line.startsWith("committer "))
133 			{
134 				commit.committer = line[10..$];
135 				commit.time = line.split(" ")[$-2].to!int();
136 			}
137 			else
138 			if (line.startsWith("    "))
139 				commit.message ~= line[4..$];
140 			else
141 			if (line.startsWith("gpgsig "))
142 				inSig = true;
143 			else
144 				enforce(false, "Unknown line in git log: " ~ line);
145 		}
146 
147 		foreach (line; query([`show-ref`, `--dereference`]).splitLines())
148 		{
149 			auto h = line[0..40].toCommitHash();
150 			if (h in history.commits)
151 				history.refs[line[41..$]] = h;
152 		}
153 
154 		return history;
155 	}
156 
157 	struct ObjectReaderImpl
158 	{
159 		ProcessPipes pipes;
160 
161 		GitObject read(string name)
162 		{
163 			pipes.stdin.writeln(name);
164 			pipes.stdin.flush();
165 
166 			auto headerLine = pipes.stdout.safeReadln().strip();
167 			auto header = headerLine.split(" ");
168 			enforce(header.length == 3, "Malformed header during cat-file: " ~ headerLine);
169 			auto hash = header[0].toCommitHash();
170 
171 			GitObject obj;
172 			obj.hash = hash;
173 			obj.type = header[1];
174 			auto size = to!size_t(header[2]);
175 			if (size)
176 			{
177 				auto data = new ubyte[size];
178 				auto read = pipes.stdout.rawRead(data);
179 				enforce(read.length == size, "Unexpected EOF during cat-file");
180 				obj.data = data.assumeUnique();
181 			}
182 
183 			char[1] lf;
184 			pipes.stdout.rawRead(lf[]);
185 			enforce(lf[0] == '\n', "Terminating newline expected");
186 
187 			return obj;
188 		}
189 
190 		GitObject read(Hash hash)
191 		{
192 			auto obj = read(hash.toString());
193 			enforce(obj.hash == hash, "Unexpected object during cat-file");
194 			return obj;
195 		}
196 
197 		~this()
198 		{
199 			pipes.stdin.close();
200 			enforce(pipes.pid.wait() == 0, "git cat-file exited with failure");
201 		}
202 	}
203 	alias ObjectReader = RefCounted!ObjectReaderImpl;
204 
205 	/// Spawn a cat-file process which can read git objects by demand.
206 	ObjectReader createObjectReader()
207 	{
208 		auto pipes = this.pipe(`cat-file`, `--batch`);
209 		return ObjectReader(pipes);
210 	}
211 
212 	/// Run a batch cat-file query.
213 	GitObject[] getObjects(Hash[] hashes)
214 	{
215 		GitObject[] result;
216 		result.reserve(hashes.length);
217 		auto reader = createObjectReader();
218 
219 		foreach (hash; hashes)
220 			result ~= reader.read(hash);
221 
222 		return result;
223 	}
224 
225 	struct ObjectWriterImpl
226 	{
227 		bool initialized;
228 		ProcessPipes pipes;
229 
230 		this(ProcessPipes pipes)
231 		{
232 			this.pipes = pipes;
233 			initialized = true;
234 		}
235 
236 		Hash write(in void[] data)
237 		{
238 			import std.random;
239 			auto p = NamedPipe("ae-sys-git-writeObjects-%d".format(uniform!ulong));
240 			pipes.stdin.writeln(p.fileName);
241 			pipes.stdin.flush();
242 
243 			auto f = p.connect();
244 			f.rawWrite(data);
245 			f.flush();
246 			f.close();
247 
248 			return pipes.stdout.safeReadln().strip().toCommitHash();
249 		}
250 
251 		~this()
252 		{
253 			if (initialized)
254 			{
255 				pipes.stdin.close();
256 				enforce(pipes.pid.wait() == 0, "git hash-object exited with failure");
257 				initialized = false;
258 			}
259 		}
260 	}
261 	alias ObjectWriter = RefCounted!ObjectWriterImpl;
262 
263 	struct ObjectMultiWriterImpl
264 	{
265 		Repository* repo;
266 		ObjectWriter treeWriter, blobWriter, commitWriter;
267 
268 		Hash write(in GitObject obj)
269 		{
270 			ObjectWriter* pwriter;
271 			switch (obj.type) // https://issues.dlang.org/show_bug.cgi?id=14595
272 			{
273 				case "tree"  : pwriter = &treeWriter  ; break;
274 				case "blob"  : pwriter = &blobWriter  ; break;
275 				case "commit": pwriter = &commitWriter; break;
276 				default: throw new Exception("Unknown object type: " ~ obj.type);
277 			}
278 			if (!pwriter.initialized)
279 				*pwriter = ObjectWriter(repo.pipe(`hash-object`, `-t`, obj.type, `-w`, `--stdin-paths`));
280 			return pwriter.write(obj.data);
281 		}
282 	}
283 	alias ObjectMultiWriter = RefCounted!ObjectMultiWriterImpl;
284 
285 	/// Spawn a hash-object process which can hash and write git objects on the fly.
286 	ObjectWriter createObjectWriter(string type)
287 	{
288 		auto pipes = this.pipe(`hash-object`, `-t`, type, `-w`, `--stdin-paths`);
289 		return ObjectWriter(pipes);
290 	}
291 
292 	/// ditto
293 	ObjectMultiWriter createObjectWriter()
294 	{
295 		return ObjectMultiWriter(&this);
296 	}
297 
298 	/// Batch-write the given objects to the database.
299 	/// The hashes are saved to the "hash" fields of the passed objects.
300 	void writeObjects(GitObject[] objects)
301 	{
302 		string[] allTypes = objects.map!(obj => obj.type).toSet().keys;
303 		foreach (type; allTypes)
304 		{
305 			auto writer = createObjectWriter(type);
306 			foreach (ref obj; objects)
307 				if (obj.type == type)
308 					obj.hash = writer.write(obj.data);
309 		}
310 	}
311 
312 	/// Extract a commit's tree to a given directory
313 	void exportCommit(string commit, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
314 	{
315 		exportTree(reader.read(commit).parseCommit().tree, path, reader, pathFilter);
316 	}
317 
318 	/// Extract a tree to a given directory
319 	void exportTree(Hash treeHash, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
320 	{
321 		void exportSubTree(Hash treeHash, string[] subPath)
322 		{
323 			auto tree = reader.read(treeHash).parseTree();
324 			foreach (entry; tree)
325 			{
326 				auto entrySubPath = subPath ~ entry.name;
327 				if (pathFilter && !pathFilter(entrySubPath.join("/")))
328 					continue;
329 				auto entryPath = buildPath([path] ~ entrySubPath);
330 				switch (entry.mode)
331 				{
332 					case octal!100644: // file
333 					case octal!100755: // executable file
334 						std.file.write(entryPath, reader.read(entry.hash).data);
335 						version (Posix)
336 						{
337 							// Make executable
338 							if (entry.mode == octal!100755)
339 								entryPath.setAttributes(entryPath.getAttributes | ((entryPath.getAttributes & octal!444) >> 2));
340 						}
341 						break;
342 					case octal! 40000: // tree
343 						mkdirRecurse(entryPath);
344 						exportSubTree(entry.hash, entrySubPath);
345 						break;
346 					case octal!160000: // submodule
347 						mkdirRecurse(entryPath);
348 						break;
349 					default:
350 						throw new Exception("Unknown git file mode: %o".format(entry.mode));
351 				}
352 			}
353 		}
354 		exportSubTree(treeHash, null);
355 	}
356 
357 	/// Import a directory tree into the object store, and return the new tree object's hash.
358 	Hash importTree(string path, ObjectMultiWriter writer, bool delegate(string) pathFilter = null)
359 	{
360 		static // Error: variable ae.sys.git.Repository.importTree.writer has scoped destruction, cannot build closure
361 		Hash importSubTree(string path, string subPath, ref ObjectMultiWriter writer, bool delegate(string) pathFilter)
362 		{
363 			auto entries = subPath
364 				.dirEntries(SpanMode.shallow)
365 				.filter!(de => !pathFilter || pathFilter(de.relativePath(path)))
366 				.map!(de =>
367 					de.isDir
368 					? GitObject.TreeEntry(
369 						octal!40000,
370 						de.baseName,
371 						importSubTree(path, buildPath(subPath, de.baseName), writer, pathFilter)
372 					)
373 					: GitObject.TreeEntry(
374 						isVersion!`Posix` && (de.attributes & octal!111) ? octal!100755 : octal!100644,
375 						de.baseName,
376 						writer.write(GitObject(Hash.init, "blob", cast(immutable(ubyte)[])read(de.name)))
377 					)
378 				)
379 				.array
380 				.sort!((a, b) => a.sortName < b.sortName).release
381 			;
382 			return writer.write(GitObject.createTree(entries));
383 		}
384 		return importSubTree(path, path, writer, pathFilter);
385 	}
386 }
387 
388 struct GitObject
389 {
390 	Hash hash;
391 	string type;
392 	immutable(ubyte)[] data;
393 
394 	struct ParsedCommit
395 	{
396 		Hash tree;
397 		Hash[] parents;
398 		string author, committer; /// entire lines - name, email and date
399 		string[] message;
400 	}
401 
402 	ParsedCommit parseCommit()
403 	{
404 		enforce(type == "commit", "Wrong object type");
405 		ParsedCommit result;
406 		auto lines = (cast(string)data).split('\n');
407 		foreach (n, line; lines)
408 		{
409 			if (line == "")
410 			{
411 				result.message = lines[n+1..$];
412 				break; // commit message begins
413 			}
414 			auto parts = line.findSplit(" ");
415 			auto field = parts[0];
416 			line = parts[2];
417 			switch (field)
418 			{
419 				case "tree":
420 					result.tree = line.toCommitHash();
421 					break;
422 				case "parent":
423 					result.parents ~= line.toCommitHash();
424 					break;
425 				case "author":
426 					result.author = line;
427 					break;
428 				case "committer":
429 					result.committer = line;
430 					break;
431 				default:
432 					throw new Exception("Unknown commit field: " ~ field);
433 			}
434 		}
435 		return result;
436 	}
437 
438 	static GitObject createCommit(ParsedCommit commit)
439 	{
440 		auto s = "tree %s\n%-(parent %s\n%|%)author %s\ncommitter %s\n\n%-(%s\n%)".format(
441 				commit.tree.toString(),
442 				commit.parents.map!(ae.sys.git.toString),
443 				commit.author,
444 				commit.committer,
445 				commit.message,
446 			);
447 		return GitObject(Hash.init, "commit", cast(immutable(ubyte)[])s);
448 	}
449 
450 	struct TreeEntry
451 	{
452 		uint mode;
453 		string name;
454 		Hash hash;
455 
456 		@property string sortName() { return (mode & octal!40000) ? name ~ "/" : name; }
457 	}
458 
459 	TreeEntry[] parseTree()
460 	{
461 		enforce(type == "tree", "Wrong object type");
462 		TreeEntry[] result;
463 		auto rem = data;
464 		while (rem.length)
465 		{
466 			auto si = rem.countUntil(' ');
467 			auto zi = rem.countUntil(0);
468 			auto ei = zi + 1 + Hash.sizeof;
469 			auto str = cast(string)rem[0..zi];
470 			enforce(0 < si && si < zi && ei <= rem.length, "Malformed tree entry:\n" ~ hexDump(rem));
471 			result ~= TreeEntry(str[0..si].to!uint(8), str[si+1..zi], cast(Hash)rem[zi+1..ei][0..20]); // https://issues.dlang.org/show_bug.cgi?id=13112
472 			rem = rem[ei..$];
473 		}
474 		return result;
475 	}
476 
477 	static GitObject createTree(TreeEntry[] entries)
478 	{
479 		auto buf = appender!(immutable(ubyte)[]);
480 		foreach (entry; entries)
481 		{
482 			buf.formattedWrite("%o %s\0", entry.mode, entry.name);
483 			buf.put(entry.hash[]);
484 		}
485 		return GitObject(Hash.init, "tree", buf.data);
486 	}
487 }
488 
489 struct History
490 {
491 	Commit*[Hash] commits;
492 	uint numCommits = 0;
493 	Hash[string] refs;
494 }
495 
496 alias ubyte[20] Hash;
497 
498 struct Commit
499 {
500 	uint id;
501 	Hash hash;
502 	uint time;
503 	string author, committer;
504 	string[] message;
505 	Commit*[] parents, children;
506 }
507 
508 Hash toCommitHash(in char[] hash)
509 {
510 	enforce(hash.length == 40, "Bad hash length: " ~ hash);
511 	ubyte[20] result;
512 	foreach (i, ref b; result)
513 		b = to!ubyte(hash[i*2..i*2+2], 16);
514 	return result;
515 }
516 
517 string toString(ref Hash hash)
518 {
519 	return format("%(%02x%)", hash[]);
520 }
521 
522 unittest
523 {
524 	assert(toCommitHash("0123456789abcdef0123456789ABCDEF01234567") == [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67]);
525 }
526 
527 /// Tries to match the default destination of `git clone`.
528 string repositoryNameFromURL(string url)
529 {
530 	return url
531 		.split(":")[$-1]
532 		.split("/")[$-1]
533 		.chomp(".git");
534 }
535 
536 unittest
537 {
538 	assert(repositoryNameFromURL("https://github.com/CyberShadow/ae.git") == "ae");
539 	assert(repositoryNameFromURL("git@example.com:ae.git") == "ae");
540 }