1 /**
2  * Wrappers for the git command-line tools.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.git;
15 
16 import std.algorithm;
17 import std.array;
18 import std.conv;
19 import std.exception;
20 import std.file;
21 import std.format;
22 import std.path;
23 import std.process;
24 import std.string;
25 import std.typecons;
26 import std.utf;
27 
28 import ae.sys.cmd;
29 import ae.sys.file;
30 import ae.utils.aa;
31 import ae.utils.meta;
32 import ae.utils.text;
33 
34 struct Repository
35 {
36 	string path;
37 	string gitDir;
38 
39 	string[string] environment;
40 
41 	this(string path)
42 	{
43 		path = path.absolutePath();
44 		enforce(path.exists, "Repository path does not exist: " ~ path);
45 		gitDir = path.buildPath(".git");
46 		if (gitDir.exists && gitDir.isFile)
47 			gitDir = path.buildNormalizedPath(gitDir.readText().strip()[8..$]);
48 		//path = path.replace(`\`, `/`);
49 		this.path = path;
50 		version (Windows) {} else
51 			this.environment["GIT_CONFIG_NOSYSTEM"] = "1";
52 		this.environment["HOME"] = gitDir;
53 		this.environment["XDG_CONFIG_HOME"] = gitDir;
54 	}
55 
56 	invariant()
57 	{
58 		assert(environment !is null, "Not initialized");
59 	}
60 
61 	// Have just some primitives here.
62 	// Higher-level functionality can be added using UFCS.
63 	void   run  (string[] args...) const { auto owd = pushd(workPath(args[0])); return .run  (["git"] ~ args, environment, path); }
64 	string query(string[] args...) const { auto owd = pushd(workPath(args[0])); return .query(["git"]  ~ args, environment, path); }
65 	bool   check(string[] args...) const { auto owd = pushd(workPath(args[0])); return spawnProcess(["git"]  ~ args, environment, Config.none, path).wait() == 0; }
66 	auto   pipe (string[] args, Redirect redirect)
67 	                               const { auto owd = pushd(workPath(args[0])); return pipeProcess(["git"]  ~ args, redirect, environment, Config.none, path); }
68 	auto   pipe (string[] args...) const { return pipe(args, Redirect.stdin | Redirect.stdout); }
69 
70 	/// Certain git commands (notably, bisect) must
71 	/// be run in the repository's root directory.
72 	private string workPath(string cmd) const
73 	{
74 		switch (cmd)
75 		{
76 			case "bisect":
77 			case "submodule":
78 				return path;
79 			default:
80 				return null;
81 		}
82 	}
83 
84 	History getHistory()
85 	{
86 		History history;
87 
88 		Commit* getCommit(Hash hash)
89 		{
90 			auto pcommit = hash in history.commits;
91 			return pcommit ? *pcommit : (history.commits[hash] = new Commit(history.numCommits++, hash));
92 		}
93 
94 		Commit* commit;
95 		bool inSig; // PGP signature
96 
97 		foreach (line; query([`log`, `--all`, `--pretty=raw`]).splitLines())
98 		{
99 			if (!line.length)
100 			{
101 				inSig = false;
102 				continue;
103 			}
104 
105 			if (inSig)
106 			{
107 				enforce(line.startsWith(" "), "Expected GPG signature line in git log");
108 				continue;
109 			}
110 
111 			if (line.startsWith("commit "))
112 			{
113 				auto hash = line[7..$].toCommitHash();
114 				commit = getCommit(hash);
115 			}
116 			else
117 			if (line.startsWith("tree "))
118 				continue;
119 			else
120 			if (line.startsWith("parent "))
121 			{
122 				auto hash = line[7..$].toCommitHash();
123 				auto parent = getCommit(hash);
124 				commit.parents ~= parent;
125 				parent.children ~= commit;
126 			}
127 			else
128 			if (line.startsWith("author "))
129 				commit.author = line[7..$];
130 			else
131 			if (line.startsWith("committer "))
132 			{
133 				commit.committer = line[10..$];
134 				commit.time = line.split(" ")[$-2].to!int();
135 			}
136 			else
137 			if (line.startsWith("    "))
138 				commit.message ~= line[4..$];
139 			else
140 			if (line.startsWith("gpgsig "))
141 				inSig = true;
142 			else
143 				enforce(false, "Unknown line in git log: " ~ line);
144 		}
145 
146 		foreach (line; query([`show-ref`, `--dereference`]).splitLines())
147 		{
148 			auto h = line[0..40].toCommitHash();
149 			if (h in history.commits)
150 				history.refs[line[41..$]] = h;
151 		}
152 
153 		return history;
154 	}
155 
156 	struct ObjectReaderImpl
157 	{
158 		ProcessPipes pipes;
159 
160 		GitObject read(string name)
161 		{
162 			pipes.stdin.writeln(name);
163 			pipes.stdin.flush();
164 
165 			auto headerLine = pipes.stdout.safeReadln().strip();
166 			auto header = headerLine.split(" ");
167 			enforce(header.length == 3, "Malformed header during cat-file: " ~ headerLine);
168 			auto hash = header[0].toCommitHash();
169 
170 			GitObject obj;
171 			obj.hash = hash;
172 			obj.type = header[1];
173 			auto size = to!size_t(header[2]);
174 			if (size)
175 			{
176 				auto data = new ubyte[size];
177 				auto read = pipes.stdout.rawRead(data);
178 				enforce(read.length == size, "Unexpected EOF during cat-file");
179 				obj.data = data.assumeUnique();
180 			}
181 
182 			char[1] lf;
183 			pipes.stdout.rawRead(lf[]);
184 			enforce(lf[0] == '\n', "Terminating newline expected");
185 
186 			return obj;
187 		}
188 
189 		GitObject read(Hash hash)
190 		{
191 			auto obj = read(hash.toString());
192 			enforce(obj.hash == hash, "Unexpected object during cat-file");
193 			return obj;
194 		}
195 
196 		~this()
197 		{
198 			pipes.stdin.close();
199 			enforce(pipes.pid.wait() == 0, "git cat-file exited with failure");
200 		}
201 	}
202 	alias ObjectReader = RefCounted!ObjectReaderImpl;
203 
204 	/// Spawn a cat-file process which can read git objects by demand.
205 	ObjectReader createObjectReader()
206 	{
207 		auto pipes = this.pipe(`cat-file`, `--batch`);
208 		return ObjectReader(pipes);
209 	}
210 
211 	/// Run a batch cat-file query.
212 	GitObject[] getObjects(Hash[] hashes)
213 	{
214 		GitObject[] result;
215 		result.reserve(hashes.length);
216 		auto reader = createObjectReader();
217 
218 		foreach (hash; hashes)
219 			result ~= reader.read(hash);
220 
221 		return result;
222 	}
223 
224 	struct ObjectWriterImpl
225 	{
226 		bool initialized;
227 		ProcessPipes pipes;
228 
229 		this(ProcessPipes pipes)
230 		{
231 			this.pipes = pipes;
232 			initialized = true;
233 		}
234 
235 		Hash write(in void[] data)
236 		{
237 			import std.random;
238 			auto p = NamedPipe("ae-sys-git-writeObjects-%d".format(uniform!ulong));
239 			pipes.stdin.writeln(p.fileName);
240 			pipes.stdin.flush();
241 
242 			auto f = p.connect();
243 			f.rawWrite(data);
244 			f.flush();
245 			f.close();
246 
247 			return pipes.stdout.safeReadln().strip().toCommitHash();
248 		}
249 
250 		~this()
251 		{
252 			if (initialized)
253 			{
254 				pipes.stdin.close();
255 				enforce(pipes.pid.wait() == 0, "git hash-object exited with failure");
256 				initialized = false;
257 			}
258 		}
259 	}
260 	alias ObjectWriter = RefCounted!ObjectWriterImpl;
261 
262 	struct ObjectMultiWriterImpl
263 	{
264 		Repository* repo;
265 		ObjectWriter treeWriter, blobWriter, commitWriter;
266 
267 		Hash write(in GitObject obj)
268 		{
269 			ObjectWriter* pwriter;
270 			switch (obj.type) // https://issues.dlang.org/show_bug.cgi?id=14595
271 			{
272 				case "tree"  : pwriter = &treeWriter  ; break;
273 				case "blob"  : pwriter = &blobWriter  ; break;
274 				case "commit": pwriter = &commitWriter; break;
275 				default: throw new Exception("Unknown object type: " ~ obj.type);
276 			}
277 			if (!pwriter.initialized)
278 				*pwriter = ObjectWriter(repo.pipe(`hash-object`, `-t`, obj.type, `-w`, `--stdin-paths`));
279 			return pwriter.write(obj.data);
280 		}
281 	}
282 	alias ObjectMultiWriter = RefCounted!ObjectMultiWriterImpl;
283 
284 	/// Spawn a hash-object process which can hash and write git objects on the fly.
285 	ObjectWriter createObjectWriter(string type)
286 	{
287 		auto pipes = this.pipe(`hash-object`, `-t`, type, `-w`, `--stdin-paths`);
288 		return ObjectWriter(pipes);
289 	}
290 
291 	/// ditto
292 	ObjectMultiWriter createObjectWriter()
293 	{
294 		return ObjectMultiWriter(&this);
295 	}
296 
297 	/// Batch-write the given objects to the database.
298 	/// The hashes are saved to the "hash" fields of the passed objects.
299 	void writeObjects(GitObject[] objects)
300 	{
301 		string[] allTypes = objects.map!(obj => obj.type).toSet().keys;
302 		foreach (type; allTypes)
303 		{
304 			auto writer = createObjectWriter(type);
305 			foreach (ref obj; objects)
306 				if (obj.type == type)
307 					obj.hash = writer.write(obj.data);
308 		}
309 	}
310 
311 	/// Extract a commit's tree to a given directory
312 	void exportCommit(string commit, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
313 	{
314 		exportTree(reader.read(commit).parseCommit().tree, path, reader, pathFilter);
315 	}
316 
317 	/// Extract a tree to a given directory
318 	void exportTree(Hash treeHash, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
319 	{
320 		void exportSubTree(Hash treeHash, string[] subPath)
321 		{
322 			auto tree = reader.read(treeHash).parseTree();
323 			foreach (entry; tree)
324 			{
325 				auto entrySubPath = subPath ~ entry.name;
326 				if (pathFilter && !pathFilter(entrySubPath.join("/")))
327 					continue;
328 				auto entryPath = buildPath([path] ~ entrySubPath);
329 				switch (entry.mode)
330 				{
331 					case octal!100644: // file
332 					case octal!100755: // executable file
333 						std.file.write(entryPath, reader.read(entry.hash).data);
334 						version (Posix)
335 						{
336 							// Make executable
337 							if (entry.mode == octal!100755)
338 								entryPath.setAttributes(entryPath.getAttributes | ((entryPath.getAttributes & octal!444) >> 2));
339 						}
340 						break;
341 					case octal! 40000: // tree
342 						mkdirRecurse(entryPath);
343 						exportSubTree(entry.hash, entrySubPath);
344 						break;
345 					case octal!160000: // submodule
346 						mkdirRecurse(entryPath);
347 						break;
348 					default:
349 						throw new Exception("Unknown git file mode: %o".format(entry.mode));
350 				}
351 			}
352 		}
353 		exportSubTree(treeHash, null);
354 	}
355 
356 	/// Import a directory tree into the object store, and return the new tree object's hash.
357 	Hash importTree(string path, ObjectMultiWriter writer, bool delegate(string) pathFilter = null)
358 	{
359 		static // Error: variable ae.sys.git.Repository.importTree.writer has scoped destruction, cannot build closure
360 		Hash importSubTree(string path, string subPath, ref ObjectMultiWriter writer, bool delegate(string) pathFilter)
361 		{
362 			auto entries = subPath
363 				.dirEntries(SpanMode.shallow)
364 				.filter!(de => !pathFilter || pathFilter(de.relativePath(path)))
365 				.map!(de =>
366 					de.isDir
367 					? GitObject.TreeEntry(
368 						octal!40000,
369 						de.baseName,
370 						importSubTree(path, buildPath(subPath, de.baseName), writer, pathFilter)
371 					)
372 					: GitObject.TreeEntry(
373 						isVersion!`Posix` && (de.attributes & octal!111) ? octal!100755 : octal!100644,
374 						de.baseName,
375 						writer.write(GitObject(Hash.init, "blob", cast(immutable(ubyte)[])read(de.name)))
376 					)
377 				)
378 				.array
379 				.sort!((a, b) => a.sortName < b.sortName).release
380 			;
381 			return writer.write(GitObject.createTree(entries));
382 		}
383 		return importSubTree(path, path, writer, pathFilter);
384 	}
385 }
386 
387 struct GitObject
388 {
389 	Hash hash;
390 	string type;
391 	immutable(ubyte)[] data;
392 
393 	struct ParsedCommit
394 	{
395 		Hash tree;
396 		Hash[] parents;
397 		string author, committer; /// entire lines - name, email and date
398 		string[] message;
399 	}
400 
401 	ParsedCommit parseCommit()
402 	{
403 		enforce(type == "commit", "Wrong object type");
404 		ParsedCommit result;
405 		auto lines = (cast(string)data).split('\n');
406 		foreach (n, line; lines)
407 		{
408 			if (line == "")
409 			{
410 				result.message = lines[n+1..$];
411 				break; // commit message begins
412 			}
413 			auto parts = line.findSplit(" ");
414 			auto field = parts[0];
415 			line = parts[2];
416 			switch (field)
417 			{
418 				case "tree":
419 					result.tree = line.toCommitHash();
420 					break;
421 				case "parent":
422 					result.parents ~= line.toCommitHash();
423 					break;
424 				case "author":
425 					result.author = line;
426 					break;
427 				case "committer":
428 					result.committer = line;
429 					break;
430 				default:
431 					throw new Exception("Unknown commit field: " ~ field);
432 			}
433 		}
434 		return result;
435 	}
436 
437 	static GitObject createCommit(ParsedCommit commit)
438 	{
439 		auto s = "tree %s\n%-(parent %s\n%|%)author %s\ncommitter %s\n\n%-(%s\n%)".format(
440 				commit.tree.toString(),
441 				commit.parents.map!(ae.sys.git.toString),
442 				commit.author,
443 				commit.committer,
444 				commit.message,
445 			);
446 		return GitObject(Hash.init, "commit", cast(immutable(ubyte)[])s);
447 	}
448 
449 	struct TreeEntry
450 	{
451 		uint mode;
452 		string name;
453 		Hash hash;
454 
455 		@property string sortName() { return (mode & octal!40000) ? name ~ "/" : name; }
456 	}
457 
458 	TreeEntry[] parseTree()
459 	{
460 		enforce(type == "tree", "Wrong object type");
461 		TreeEntry[] result;
462 		auto rem = data;
463 		while (rem.length)
464 		{
465 			auto si = rem.countUntil(' ');
466 			auto zi = rem.countUntil(0);
467 			auto ei = zi + 1 + Hash.sizeof;
468 			auto str = cast(string)rem[0..zi];
469 			enforce(0 < si && si < zi && ei <= rem.length, "Malformed tree entry:\n" ~ hexDump(rem));
470 			result ~= TreeEntry(str[0..si].to!uint(8), str[si+1..zi], cast(Hash)rem[zi+1..ei][0..20]); // https://issues.dlang.org/show_bug.cgi?id=13112
471 			rem = rem[ei..$];
472 		}
473 		return result;
474 	}
475 
476 	static GitObject createTree(TreeEntry[] entries)
477 	{
478 		auto buf = appender!(ubyte[]);
479 		foreach (entry; entries)
480 		{
481 			buf.formattedWrite("%o %s\0", entry.mode, entry.name);
482 			buf.put(entry.hash[]);
483 		}
484 		return GitObject(Hash.init, "tree", buf.data.assumeUnique);
485 	}
486 }
487 
488 struct History
489 {
490 	Commit*[Hash] commits;
491 	uint numCommits = 0;
492 	Hash[string] refs;
493 }
494 
495 alias ubyte[20] Hash;
496 
497 struct Commit
498 {
499 	uint id;
500 	Hash hash;
501 	uint time;
502 	string author, committer;
503 	string[] message;
504 	Commit*[] parents, children;
505 }
506 
507 Hash toCommitHash(in char[] hash)
508 {
509 	enforce(hash.length == 40, "Bad hash length: " ~ hash);
510 	ubyte[20] result;
511 	foreach (i, ref b; result)
512 		b = to!ubyte(hash[i*2..i*2+2], 16);
513 	return result;
514 }
515 
516 string toString(ref Hash hash)
517 {
518 	return format("%(%02x%)", hash[]);
519 }
520 
521 unittest
522 {
523 	assert(toCommitHash("0123456789abcdef0123456789ABCDEF01234567") == [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67]);
524 }
525 
526 /// Tries to match the default destination of `git clone`.
527 string repositoryNameFromURL(string url)
528 {
529 	return url
530 		.split(":")[$-1]
531 		.split("/")[$-1]
532 		.chomp(".git");
533 }
534 
535 unittest
536 {
537 	assert(repositoryNameFromURL("https://github.com/CyberShadow/ae.git") == "ae");
538 	assert(repositoryNameFromURL("git@example.com:ae.git") == "ae");
539 }