1 /**
2  * Wrappers for the git command-line tools.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.git;
15 
16 import std.algorithm;
17 import std.array;
18 import std.conv;
19 import std.exception;
20 import std.file;
21 import std.format;
22 import std.path;
23 import std.process;
24 import std.string;
25 import std.typecons;
26 import std.utf;
27 
28 import ae.sys.cmd;
29 import ae.sys.file;
30 import ae.utils.aa;
31 import ae.utils.meta;
32 import ae.utils.text;
33 
34 struct Repository
35 {
36 	string path;
37 	string gitDir;
38 
39 	string[string] environment;
40 	static immutable string[] commandPrefix = ["git", "-c", "core.autocrlf=false"];
41 
42 	this(string path)
43 	{
44 		path = path.absolutePath();
45 		enforce(path.exists, "Repository path does not exist: " ~ path);
46 		gitDir = path.buildPath(".git");
47 		if (gitDir.exists && gitDir.isFile)
48 			gitDir = path.buildNormalizedPath(gitDir.readText().strip()[8..$]);
49 		//path = path.replace(`\`, `/`);
50 		this.path = path;
51 		version (Windows) {} else
52 			this.environment["GIT_CONFIG_NOSYSTEM"] = "1";
53 		this.environment["HOME"] = gitDir;
54 		this.environment["XDG_CONFIG_HOME"] = gitDir;
55 	}
56 
57 	invariant()
58 	{
59 		assert(environment !is null, "Not initialized");
60 	}
61 
62 	// Have just some primitives here.
63 	// Higher-level functionality can be added using UFCS.
64 	void   run  (string[] args...) const { auto owd = pushd(workPath(args[0])); return .run  (commandPrefix ~ args, environment, path); }
65 	string query(string[] args...) const { auto owd = pushd(workPath(args[0])); return .query(commandPrefix ~ args, environment, path).strip(); }
66 	bool   check(string[] args...) const { auto owd = pushd(workPath(args[0])); return spawnProcess(commandPrefix ~ args, environment, Config.none, path).wait() == 0; }
67 	auto   pipe (string[] args, Redirect redirect)
68 	                               const { auto owd = pushd(workPath(args[0])); return pipeProcess(commandPrefix ~ args, redirect, environment, Config.none, path); }
69 	auto   pipe (string[] args...) const { return pipe(args, Redirect.stdin | Redirect.stdout); }
70 
71 	/// Certain git commands (notably, bisect) must
72 	/// be run in the repository's root directory.
73 	private string workPath(string cmd) const
74 	{
75 		switch (cmd)
76 		{
77 			case "bisect":
78 			case "submodule":
79 				return path;
80 			default:
81 				return null;
82 		}
83 	}
84 
85 	History getHistory(string[] extraRefs = null)
86 	{
87 		History history;
88 
89 		Commit* getCommit(Hash hash)
90 		{
91 			auto pcommit = hash in history.commits;
92 			return pcommit ? *pcommit : (history.commits[hash] = new Commit(history.numCommits++, hash));
93 		}
94 
95 		Commit* commit;
96 		string currentBlock;
97 
98 		foreach (line; query([`log`, `--all`, `--pretty=raw`] ~ extraRefs).split('\n'))
99 		{
100 			if (!line.length)
101 			{
102 				if (currentBlock)
103 					currentBlock = null;
104 				continue;
105 			}
106 
107 			if (currentBlock)
108 			{
109 				enforce(line.startsWith(" "), "Expected " ~ currentBlock ~ " line in git log");
110 				continue;
111 			}
112 
113 			if (line.startsWith("commit "))
114 			{
115 				auto hash = line[7..$].toCommitHash();
116 				commit = getCommit(hash);
117 			}
118 			else
119 			if (line.startsWith("tree "))
120 				continue;
121 			else
122 			if (line.startsWith("parent "))
123 			{
124 				auto hash = line[7..$].toCommitHash();
125 				auto parent = getCommit(hash);
126 				commit.parents ~= parent;
127 				parent.children ~= commit;
128 			}
129 			else
130 			if (line.startsWith("author "))
131 				commit.author = line[7..$];
132 			else
133 			if (line.startsWith("committer "))
134 			{
135 				commit.committer = line[10..$];
136 				commit.time = line.split(" ")[$-2].to!int();
137 			}
138 			else
139 			if (line.startsWith("    "))
140 				commit.message ~= line[4..$];
141 			else
142 			if (line.startsWith("gpgsig "))
143 				currentBlock = "GPG signature";
144 			else
145 			if (line.startsWith("mergetag "))
146 				currentBlock = "Tag merge";
147 			else
148 				enforce(false, "Unknown line in git log: " ~ line);
149 		}
150 
151 		foreach (line; query([`show-ref`, `--dereference`]).splitLines())
152 		{
153 			auto h = line[0..40].toCommitHash();
154 			if (h in history.commits)
155 				history.refs[line[41..$]] = h;
156 		}
157 
158 		return history;
159 	}
160 
161 	struct ObjectReaderImpl
162 	{
163 		ProcessPipes pipes;
164 
165 		GitObject read(string name)
166 		{
167 			pipes.stdin.writeln(name);
168 			pipes.stdin.flush();
169 
170 			auto headerLine = pipes.stdout.safeReadln().strip();
171 			auto header = headerLine.split(" ");
172 			enforce(header.length == 3, "Malformed header during cat-file: " ~ headerLine);
173 			auto hash = header[0].toCommitHash();
174 
175 			GitObject obj;
176 			obj.hash = hash;
177 			obj.type = header[1];
178 			auto size = to!size_t(header[2]);
179 			if (size)
180 			{
181 				auto data = new ubyte[size];
182 				auto read = pipes.stdout.rawRead(data);
183 				enforce(read.length == size, "Unexpected EOF during cat-file");
184 				obj.data = data.assumeUnique();
185 			}
186 
187 			char[1] lf;
188 			pipes.stdout.rawRead(lf[]);
189 			enforce(lf[0] == '\n', "Terminating newline expected");
190 
191 			return obj;
192 		}
193 
194 		GitObject read(Hash hash)
195 		{
196 			auto obj = read(hash.toString());
197 			enforce(obj.hash == hash, "Unexpected object during cat-file");
198 			return obj;
199 		}
200 
201 		~this()
202 		{
203 			pipes.stdin.close();
204 			enforce(pipes.pid.wait() == 0, "git cat-file exited with failure");
205 		}
206 	}
207 	alias ObjectReader = RefCounted!ObjectReaderImpl;
208 
209 	/// Spawn a cat-file process which can read git objects by demand.
210 	ObjectReader createObjectReader()
211 	{
212 		auto pipes = this.pipe(`cat-file`, `--batch`);
213 		return ObjectReader(pipes);
214 	}
215 
216 	/// Run a batch cat-file query.
217 	GitObject[] getObjects(Hash[] hashes)
218 	{
219 		GitObject[] result;
220 		result.reserve(hashes.length);
221 		auto reader = createObjectReader();
222 
223 		foreach (hash; hashes)
224 			result ~= reader.read(hash);
225 
226 		return result;
227 	}
228 
229 	struct ObjectWriterImpl
230 	{
231 		bool initialized;
232 		ProcessPipes pipes;
233 
234 		this(ProcessPipes pipes)
235 		{
236 			this.pipes = pipes;
237 			initialized = true;
238 		}
239 
240 		Hash write(in void[] data)
241 		{
242 			import std.random;
243 			auto p = NamedPipe("ae-sys-git-writeObjects-%d".format(uniform!ulong));
244 			pipes.stdin.writeln(p.fileName);
245 			pipes.stdin.flush();
246 
247 			auto f = p.connect();
248 			f.rawWrite(data);
249 			f.flush();
250 			f.close();
251 
252 			return pipes.stdout.safeReadln().strip().toCommitHash();
253 		}
254 
255 		~this()
256 		{
257 			if (initialized)
258 			{
259 				pipes.stdin.close();
260 				enforce(pipes.pid.wait() == 0, "git hash-object exited with failure");
261 				initialized = false;
262 			}
263 		}
264 	}
265 	alias ObjectWriter = RefCounted!ObjectWriterImpl;
266 
267 	struct ObjectMultiWriterImpl
268 	{
269 		Repository* repo;
270 		ObjectWriter treeWriter, blobWriter, commitWriter;
271 
272 		Hash write(in GitObject obj)
273 		{
274 			ObjectWriter* pwriter;
275 			switch (obj.type) // https://issues.dlang.org/show_bug.cgi?id=14595
276 			{
277 				case "tree"  : pwriter = &treeWriter  ; break;
278 				case "blob"  : pwriter = &blobWriter  ; break;
279 				case "commit": pwriter = &commitWriter; break;
280 				default: throw new Exception("Unknown object type: " ~ obj.type);
281 			}
282 			if (!pwriter.initialized)
283 				*pwriter = ObjectWriter(repo.pipe(`hash-object`, `-t`, obj.type, `-w`, `--stdin-paths`));
284 			return pwriter.write(obj.data);
285 		}
286 	}
287 	alias ObjectMultiWriter = RefCounted!ObjectMultiWriterImpl;
288 
289 	/// Spawn a hash-object process which can hash and write git objects on the fly.
290 	ObjectWriter createObjectWriter(string type)
291 	{
292 		auto pipes = this.pipe(`hash-object`, `-t`, type, `-w`, `--stdin-paths`);
293 		return ObjectWriter(pipes);
294 	}
295 
296 	/// ditto
297 	ObjectMultiWriter createObjectWriter()
298 	{
299 		return ObjectMultiWriter(&this);
300 	}
301 
302 	/// Batch-write the given objects to the database.
303 	/// The hashes are saved to the "hash" fields of the passed objects.
304 	void writeObjects(GitObject[] objects)
305 	{
306 		string[] allTypes = objects.map!(obj => obj.type).toSet().keys;
307 		foreach (type; allTypes)
308 		{
309 			auto writer = createObjectWriter(type);
310 			foreach (ref obj; objects)
311 				if (obj.type == type)
312 					obj.hash = writer.write(obj.data);
313 		}
314 	}
315 
316 	/// Extract a commit's tree to a given directory
317 	void exportCommit(string commit, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
318 	{
319 		exportTree(reader.read(commit).parseCommit().tree, path, reader, pathFilter);
320 	}
321 
322 	/// Extract a tree to a given directory
323 	void exportTree(Hash treeHash, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
324 	{
325 		void exportSubTree(Hash treeHash, string[] subPath)
326 		{
327 			auto tree = reader.read(treeHash).parseTree();
328 			foreach (entry; tree)
329 			{
330 				auto entrySubPath = subPath ~ entry.name;
331 				if (pathFilter && !pathFilter(entrySubPath.join("/")))
332 					continue;
333 				auto entryPath = buildPath([path] ~ entrySubPath);
334 				switch (entry.mode)
335 				{
336 					case octal!100644: // file
337 					case octal!100755: // executable file
338 						std.file.write(entryPath, reader.read(entry.hash).data);
339 						version (Posix)
340 						{
341 							// Make executable
342 							if (entry.mode == octal!100755)
343 								entryPath.setAttributes(entryPath.getAttributes | ((entryPath.getAttributes & octal!444) >> 2));
344 						}
345 						break;
346 					case octal! 40000: // tree
347 						mkdirRecurse(entryPath);
348 						exportSubTree(entry.hash, entrySubPath);
349 						break;
350 					case octal!160000: // submodule
351 						mkdirRecurse(entryPath);
352 						break;
353 					default:
354 						throw new Exception("Unknown git file mode: %o".format(entry.mode));
355 				}
356 			}
357 		}
358 		exportSubTree(treeHash, null);
359 	}
360 
361 	/// Import a directory tree into the object store, and return the new tree object's hash.
362 	Hash importTree(string path, ObjectMultiWriter writer, bool delegate(string) pathFilter = null)
363 	{
364 		static // Error: variable ae.sys.git.Repository.importTree.writer has scoped destruction, cannot build closure
365 		Hash importSubTree(string path, string subPath, ref ObjectMultiWriter writer, bool delegate(string) pathFilter)
366 		{
367 			auto entries = subPath
368 				.dirEntries(SpanMode.shallow)
369 				.filter!(de => !pathFilter || pathFilter(de.relativePath(path)))
370 				.map!(de =>
371 					de.isDir
372 					? GitObject.TreeEntry(
373 						octal!40000,
374 						de.baseName,
375 						importSubTree(path, buildPath(subPath, de.baseName), writer, pathFilter)
376 					)
377 					: GitObject.TreeEntry(
378 						isVersion!`Posix` && (de.attributes & octal!111) ? octal!100755 : octal!100644,
379 						de.baseName,
380 						writer.write(GitObject(Hash.init, "blob", cast(immutable(ubyte)[])read(de.name)))
381 					)
382 				)
383 				.array
384 				.sort!((a, b) => a.sortName < b.sortName).release
385 			;
386 			return writer.write(GitObject.createTree(entries));
387 		}
388 		return importSubTree(path, path, writer, pathFilter);
389 	}
390 
391 	struct RefWriterImpl
392 	{
393 		bool initialized;
394 		ProcessPipes pipes;
395 
396 		this(ProcessPipes pipes)
397 		{
398 			this.pipes = pipes;
399 			initialized = true;
400 		}
401 
402 		private void op(string op, bool noDeref, string refName, Hash*[] hashes...)
403 		{
404 			if (noDeref)
405 				pipes.stdin.write("option no-deref\0");
406 			pipes.stdin.write(op, " ", refName, '\0');
407 			foreach (hash; hashes)
408 			{
409 				if (hash)
410 					pipes.stdin.write((*hash).toString());
411 				pipes.stdin.write('\0');
412 			}
413 			pipes.stdin.flush();
414 		}
415 
416 		void update   (string refName, Hash newValue               , bool noDeref = false) { op("update", noDeref, refName, &newValue, null     ); }
417 		void update   (string refName, Hash newValue, Hash oldValue, bool noDeref = false) { op("update", noDeref, refName, &newValue, &oldValue); }
418 		void create   (string refName, Hash newValue               , bool noDeref = false) { op("create", noDeref, refName, &newValue           ); }
419 		void deleteRef(string refName                              , bool noDeref = false) { op("delete", noDeref, refName,            null     ); }
420 		void deleteRef(string refName,                Hash oldValue, bool noDeref = false) { op("delete", noDeref, refName,            &oldValue); }
421 		void verify   (string refName                              , bool noDeref = false) { op("verify", noDeref, refName,            null     ); }
422 		void verify   (string refName,                Hash oldValue, bool noDeref = false) { op("verify", noDeref, refName,            &oldValue); }
423 
424 		~this()
425 		{
426 			if (initialized)
427 			{
428 				pipes.stdin.close();
429 				enforce(pipes.pid.wait() == 0, "git update-ref exited with failure");
430 				initialized = false;
431 			}
432 		}
433 	}
434 	alias RefWriter = RefCounted!RefWriterImpl;
435 
436 	/// Spawn a update-ref process which can update git refs on the fly.
437 	RefWriter createRefWriter()
438 	{
439 		auto pipes = this.pipe(`update-ref`, `-z`, `--stdin`);
440 		return RefWriter(pipes);
441 	}
442 }
443 
444 struct GitObject
445 {
446 	Hash hash;
447 	string type;
448 	immutable(ubyte)[] data;
449 
450 	struct ParsedCommit
451 	{
452 		Hash tree;
453 		Hash[] parents;
454 		string author, committer; /// entire lines - name, email and date
455 		string[] message;
456 	}
457 
458 	ParsedCommit parseCommit()
459 	{
460 		enforce(type == "commit", "Wrong object type");
461 		ParsedCommit result;
462 		auto lines = (cast(string)data).split('\n');
463 		foreach (n, line; lines)
464 		{
465 			if (line == "")
466 			{
467 				result.message = lines[n+1..$];
468 				break; // commit message begins
469 			}
470 			auto parts = line.findSplit(" ");
471 			auto field = parts[0];
472 			line = parts[2];
473 			switch (field)
474 			{
475 				case "tree":
476 					result.tree = line.toCommitHash();
477 					break;
478 				case "parent":
479 					result.parents ~= line.toCommitHash();
480 					break;
481 				case "author":
482 					result.author = line;
483 					break;
484 				case "committer":
485 					result.committer = line;
486 					break;
487 				default:
488 					throw new Exception("Unknown commit field: " ~ field);
489 			}
490 		}
491 		return result;
492 	}
493 
494 	static GitObject createCommit(ParsedCommit commit)
495 	{
496 		auto s = "tree %s\n%-(parent %s\n%|%)author %s\ncommitter %s\n\n%-(%s\n%)".format(
497 				commit.tree.toString(),
498 				commit.parents.map!(ae.sys.git.toString),
499 				commit.author,
500 				commit.committer,
501 				commit.message,
502 			);
503 		return GitObject(Hash.init, "commit", cast(immutable(ubyte)[])s);
504 	}
505 
506 	struct TreeEntry
507 	{
508 		uint mode;
509 		string name;
510 		Hash hash;
511 
512 		@property string sortName() { return (mode & octal!40000) ? name ~ "/" : name; }
513 	}
514 
515 	TreeEntry[] parseTree()
516 	{
517 		enforce(type == "tree", "Wrong object type");
518 		TreeEntry[] result;
519 		auto rem = data;
520 		while (rem.length)
521 		{
522 			auto si = rem.countUntil(' ');
523 			auto zi = rem.countUntil(0);
524 			auto ei = zi + 1 + Hash.sizeof;
525 			auto str = cast(string)rem[0..zi];
526 			enforce(0 < si && si < zi && ei <= rem.length, "Malformed tree entry:\n" ~ hexDump(rem));
527 			result ~= TreeEntry(str[0..si].to!uint(8), str[si+1..zi], cast(Hash)rem[zi+1..ei][0..20]); // https://issues.dlang.org/show_bug.cgi?id=13112
528 			rem = rem[ei..$];
529 		}
530 		return result;
531 	}
532 
533 	static GitObject createTree(TreeEntry[] entries)
534 	{
535 		auto buf = appender!(immutable(ubyte)[]);
536 		foreach (entry; entries)
537 		{
538 			buf.formattedWrite("%o %s\0", entry.mode, entry.name);
539 			buf.put(entry.hash[]);
540 		}
541 		return GitObject(Hash.init, "tree", buf.data);
542 	}
543 }
544 
545 struct History
546 {
547 	Commit*[Hash] commits;
548 	uint numCommits = 0;
549 	Hash[string] refs;
550 }
551 
552 alias ubyte[20] Hash;
553 
554 struct Commit
555 {
556 	uint id;
557 	Hash hash;
558 	uint time;
559 	string author, committer;
560 	string[] message;
561 	Commit*[] parents, children;
562 }
563 
564 Hash toCommitHash(in char[] hash)
565 {
566 	enforce(hash.length == 40, "Bad hash length: " ~ hash);
567 	ubyte[20] result;
568 	foreach (i, ref b; result)
569 		b = to!ubyte(hash[i*2..i*2+2], 16);
570 	return result;
571 }
572 
573 string toString(ref Hash hash)
574 {
575 	return format("%(%02x%)", hash[]);
576 }
577 
578 unittest
579 {
580 	assert(toCommitHash("0123456789abcdef0123456789ABCDEF01234567") == [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67]);
581 }
582 
583 /// Tries to match the default destination of `git clone`.
584 string repositoryNameFromURL(string url)
585 {
586 	return url
587 		.split(":")[$-1]
588 		.split("/")[$-1]
589 		.chomp(".git");
590 }
591 
592 unittest
593 {
594 	assert(repositoryNameFromURL("https://github.com/CyberShadow/ae.git") == "ae");
595 	assert(repositoryNameFromURL("git@example.com:ae.git") == "ae");
596 }