1 /**
2  * Wrappers for the git command-line tools.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.git;
15 
16 import std.algorithm;
17 import std.array;
18 import std.conv;
19 import std.exception;
20 import std.file;
21 import std.format;
22 import std.path;
23 import std.process;
24 import std.string;
25 import std.typecons;
26 import std.utf;
27 
28 import ae.sys.cmd;
29 import ae.sys.file;
30 import ae.utils.aa;
31 import ae.utils.meta;
32 import ae.utils.text;
33 
34 struct Repository
35 {
36 	string path;
37 	string gitDir;
38 
39 	string[string] environment;
40 	string[] commandPrefix;
41 
42 	this(string path)
43 	{
44 		path = path.absolutePath();
45 		enforce(path.exists, "Repository path does not exist: " ~ path);
46 		gitDir = path.buildPath(".git");
47 		if (gitDir.exists && gitDir.isFile)
48 			gitDir = path.buildNormalizedPath(gitDir.readText().strip()[8..$]);
49 		//path = path.replace(`\`, `/`);
50 		this.path = path;
51 		this.commandPrefix = ["git", "-c", "core.autocrlf=false", "-C", path];
52 		version (Windows) {} else
53 			this.environment["GIT_CONFIG_NOSYSTEM"] = "1";
54 		this.environment["HOME"] = gitDir;
55 		this.environment["XDG_CONFIG_HOME"] = gitDir;
56 	}
57 
58 	invariant()
59 	{
60 		assert(environment !is null, "Not initialized");
61 	}
62 
63 	// Have just some primitives here.
64 	// Higher-level functionality can be added using UFCS.
65 	void   run  (string[] args...) const { return .run  (commandPrefix ~ args, environment, path); }
66 	string query(string[] args...) const { return .query(commandPrefix ~ args, environment, path).strip(); }
67 	bool   check(string[] args...) const { return spawnProcess(commandPrefix ~ args, environment, Config.none, path).wait() == 0; }
68 	auto   pipe (string[] args, Redirect redirect)
69 	                               const { return pipeProcess(commandPrefix ~ args, redirect, environment, Config.none, path); }
70 	auto   pipe (string[] args...) const { return pipe(args, Redirect.stdin | Redirect.stdout); }
71 
72 	/// Certain git commands (notably, bisect) must
73 	/// be run in the repository's root directory.
74 	private string workPath(string cmd) const
75 	{
76 		switch (cmd)
77 		{
78 			case "bisect":
79 			case "submodule":
80 				return path;
81 			default:
82 				return null;
83 		}
84 	}
85 
86 	History getHistory(string[] extraRefs = null)
87 	{
88 		History history;
89 
90 		Commit* getCommit(Hash hash)
91 		{
92 			auto pcommit = hash in history.commits;
93 			return pcommit ? *pcommit : (history.commits[hash] = new Commit(history.numCommits++, hash));
94 		}
95 
96 		Commit* commit;
97 		string currentBlock;
98 
99 		foreach (line; query([`log`, `--all`, `--pretty=raw`] ~ extraRefs).split('\n'))
100 		{
101 			if (!line.length)
102 			{
103 				if (currentBlock)
104 					currentBlock = null;
105 				continue;
106 			}
107 
108 			if (currentBlock)
109 			{
110 				enforce(line.startsWith(" "), "Expected " ~ currentBlock ~ " line in git log");
111 				continue;
112 			}
113 
114 			if (line.startsWith("commit "))
115 			{
116 				auto hash = line[7..$].toCommitHash();
117 				commit = getCommit(hash);
118 			}
119 			else
120 			if (line.startsWith("tree "))
121 				continue;
122 			else
123 			if (line.startsWith("parent "))
124 			{
125 				auto hash = line[7..$].toCommitHash();
126 				auto parent = getCommit(hash);
127 				commit.parents ~= parent;
128 				parent.children ~= commit;
129 			}
130 			else
131 			if (line.startsWith("author "))
132 				commit.author = line[7..$];
133 			else
134 			if (line.startsWith("committer "))
135 			{
136 				commit.committer = line[10..$];
137 				commit.time = line.split(" ")[$-2].to!int();
138 			}
139 			else
140 			if (line.startsWith("    "))
141 				commit.message ~= line[4..$];
142 			else
143 			if (line.startsWith("gpgsig "))
144 				currentBlock = "GPG signature";
145 			else
146 			if (line.startsWith("mergetag "))
147 				currentBlock = "Tag merge";
148 			else
149 				enforce(false, "Unknown line in git log: " ~ line);
150 		}
151 
152 		foreach (line; query([`show-ref`, `--dereference`]).splitLines())
153 		{
154 			auto h = line[0..40].toCommitHash();
155 			if (h in history.commits)
156 				history.refs[line[41..$]] = h;
157 		}
158 
159 		return history;
160 	}
161 
162 	struct ObjectReaderImpl
163 	{
164 		ProcessPipes pipes;
165 
166 		GitObject read(string name)
167 		{
168 			pipes.stdin.writeln(name);
169 			pipes.stdin.flush();
170 
171 			auto headerLine = pipes.stdout.safeReadln().strip();
172 			auto header = headerLine.split(" ");
173 			enforce(header.length == 3, "Malformed header during cat-file: " ~ headerLine);
174 			auto hash = header[0].toCommitHash();
175 
176 			GitObject obj;
177 			obj.hash = hash;
178 			obj.type = header[1];
179 			auto size = to!size_t(header[2]);
180 			if (size)
181 			{
182 				auto data = new ubyte[size];
183 				auto read = pipes.stdout.rawRead(data);
184 				enforce(read.length == size, "Unexpected EOF during cat-file");
185 				obj.data = data.assumeUnique();
186 			}
187 
188 			char[1] lf;
189 			pipes.stdout.rawRead(lf[]);
190 			enforce(lf[0] == '\n', "Terminating newline expected");
191 
192 			return obj;
193 		}
194 
195 		GitObject read(Hash hash)
196 		{
197 			auto obj = read(hash.toString());
198 			enforce(obj.hash == hash, "Unexpected object during cat-file");
199 			return obj;
200 		}
201 
202 		~this()
203 		{
204 			pipes.stdin.close();
205 			enforce(pipes.pid.wait() == 0, "git cat-file exited with failure");
206 		}
207 	}
208 	alias ObjectReader = RefCounted!ObjectReaderImpl;
209 
210 	/// Spawn a cat-file process which can read git objects by demand.
211 	ObjectReader createObjectReader()
212 	{
213 		auto pipes = this.pipe(`cat-file`, `--batch`);
214 		return ObjectReader(pipes);
215 	}
216 
217 	/// Run a batch cat-file query.
218 	GitObject[] getObjects(Hash[] hashes)
219 	{
220 		GitObject[] result;
221 		result.reserve(hashes.length);
222 		auto reader = createObjectReader();
223 
224 		foreach (hash; hashes)
225 			result ~= reader.read(hash);
226 
227 		return result;
228 	}
229 
230 	struct ObjectWriterImpl
231 	{
232 		bool initialized;
233 		ProcessPipes pipes;
234 
235 		this(ProcessPipes pipes)
236 		{
237 			this.pipes = pipes;
238 			initialized = true;
239 		}
240 
241 		Hash write(in void[] data)
242 		{
243 			import std.random;
244 			auto p = NamedPipe("ae-sys-git-writeObjects-%d".format(uniform!ulong));
245 			pipes.stdin.writeln(p.fileName);
246 			pipes.stdin.flush();
247 
248 			auto f = p.connect();
249 			f.rawWrite(data);
250 			f.flush();
251 			f.close();
252 
253 			return pipes.stdout.safeReadln().strip().toCommitHash();
254 		}
255 
256 		~this()
257 		{
258 			if (initialized)
259 			{
260 				pipes.stdin.close();
261 				enforce(pipes.pid.wait() == 0, "git hash-object exited with failure");
262 				initialized = false;
263 			}
264 		}
265 	}
266 	alias ObjectWriter = RefCounted!ObjectWriterImpl;
267 
268 	struct ObjectMultiWriterImpl
269 	{
270 		Repository* repo;
271 		ObjectWriter treeWriter, blobWriter, commitWriter;
272 
273 		Hash write(in GitObject obj)
274 		{
275 			ObjectWriter* pwriter;
276 			switch (obj.type) // https://issues.dlang.org/show_bug.cgi?id=14595
277 			{
278 				case "tree"  : pwriter = &treeWriter  ; break;
279 				case "blob"  : pwriter = &blobWriter  ; break;
280 				case "commit": pwriter = &commitWriter; break;
281 				default: throw new Exception("Unknown object type: " ~ obj.type);
282 			}
283 			if (!pwriter.initialized)
284 				*pwriter = ObjectWriter(repo.pipe(`hash-object`, `-t`, obj.type, `-w`, `--stdin-paths`));
285 			return pwriter.write(obj.data);
286 		}
287 	}
288 	alias ObjectMultiWriter = RefCounted!ObjectMultiWriterImpl;
289 
290 	/// Spawn a hash-object process which can hash and write git objects on the fly.
291 	ObjectWriter createObjectWriter(string type)
292 	{
293 		auto pipes = this.pipe(`hash-object`, `-t`, type, `-w`, `--stdin-paths`);
294 		return ObjectWriter(pipes);
295 	}
296 
297 	/// ditto
298 	ObjectMultiWriter createObjectWriter()
299 	{
300 		return ObjectMultiWriter(&this);
301 	}
302 
303 	/// Batch-write the given objects to the database.
304 	/// The hashes are saved to the "hash" fields of the passed objects.
305 	void writeObjects(GitObject[] objects)
306 	{
307 		string[] allTypes = objects.map!(obj => obj.type).toSet().keys;
308 		foreach (type; allTypes)
309 		{
310 			auto writer = createObjectWriter(type);
311 			foreach (ref obj; objects)
312 				if (obj.type == type)
313 					obj.hash = writer.write(obj.data);
314 		}
315 	}
316 
317 	/// Extract a commit's tree to a given directory
318 	void exportCommit(string commit, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
319 	{
320 		exportTree(reader.read(commit).parseCommit().tree, path, reader, pathFilter);
321 	}
322 
323 	/// Extract a tree to a given directory
324 	void exportTree(Hash treeHash, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
325 	{
326 		void exportSubTree(Hash treeHash, string[] subPath)
327 		{
328 			auto tree = reader.read(treeHash).parseTree();
329 			foreach (entry; tree)
330 			{
331 				auto entrySubPath = subPath ~ entry.name;
332 				if (pathFilter && !pathFilter(entrySubPath.join("/")))
333 					continue;
334 				auto entryPath = buildPath([path] ~ entrySubPath);
335 				switch (entry.mode)
336 				{
337 					case octal!100644: // file
338 					case octal!100755: // executable file
339 						std.file.write(entryPath, reader.read(entry.hash).data);
340 						version (Posix)
341 						{
342 							// Make executable
343 							if (entry.mode == octal!100755)
344 								entryPath.setAttributes(entryPath.getAttributes | ((entryPath.getAttributes & octal!444) >> 2));
345 						}
346 						break;
347 					case octal! 40000: // tree
348 						mkdirRecurse(entryPath);
349 						exportSubTree(entry.hash, entrySubPath);
350 						break;
351 					case octal!160000: // submodule
352 						mkdirRecurse(entryPath);
353 						break;
354 					default:
355 						throw new Exception("Unknown git file mode: %o".format(entry.mode));
356 				}
357 			}
358 		}
359 		exportSubTree(treeHash, null);
360 	}
361 
362 	/// Import a directory tree into the object store, and return the new tree object's hash.
363 	Hash importTree(string path, ObjectMultiWriter writer, bool delegate(string) pathFilter = null)
364 	{
365 		static // Error: variable ae.sys.git.Repository.importTree.writer has scoped destruction, cannot build closure
366 		Hash importSubTree(string path, string subPath, ref ObjectMultiWriter writer, bool delegate(string) pathFilter)
367 		{
368 			auto entries = subPath
369 				.dirEntries(SpanMode.shallow)
370 				.filter!(de => !pathFilter || pathFilter(de.relativePath(path)))
371 				.map!(de =>
372 					de.isDir
373 					? GitObject.TreeEntry(
374 						octal!40000,
375 						de.baseName,
376 						importSubTree(path, buildPath(subPath, de.baseName), writer, pathFilter)
377 					)
378 					: GitObject.TreeEntry(
379 						isVersion!`Posix` && (de.attributes & octal!111) ? octal!100755 : octal!100644,
380 						de.baseName,
381 						writer.write(GitObject(Hash.init, "blob", cast(immutable(ubyte)[])read(de.name)))
382 					)
383 				)
384 				.array
385 				.sort!((a, b) => a.sortName < b.sortName).release
386 			;
387 			return writer.write(GitObject.createTree(entries));
388 		}
389 		return importSubTree(path, path, writer, pathFilter);
390 	}
391 
392 	struct RefWriterImpl
393 	{
394 		bool initialized;
395 		ProcessPipes pipes;
396 
397 		this(ProcessPipes pipes)
398 		{
399 			this.pipes = pipes;
400 			initialized = true;
401 		}
402 
403 		private void op(string op, bool noDeref, string refName, Hash*[] hashes...)
404 		{
405 			if (noDeref)
406 				pipes.stdin.write("option no-deref\0");
407 			pipes.stdin.write(op, " ", refName, '\0');
408 			foreach (hash; hashes)
409 			{
410 				if (hash)
411 					pipes.stdin.write((*hash).toString());
412 				pipes.stdin.write('\0');
413 			}
414 			pipes.stdin.flush();
415 		}
416 
417 		void update   (string refName, Hash newValue               , bool noDeref = false) { op("update", noDeref, refName, &newValue, null     ); }
418 		void update   (string refName, Hash newValue, Hash oldValue, bool noDeref = false) { op("update", noDeref, refName, &newValue, &oldValue); }
419 		void create   (string refName, Hash newValue               , bool noDeref = false) { op("create", noDeref, refName, &newValue           ); }
420 		void deleteRef(string refName                              , bool noDeref = false) { op("delete", noDeref, refName,            null     ); }
421 		void deleteRef(string refName,                Hash oldValue, bool noDeref = false) { op("delete", noDeref, refName,            &oldValue); }
422 		void verify   (string refName                              , bool noDeref = false) { op("verify", noDeref, refName,            null     ); }
423 		void verify   (string refName,                Hash oldValue, bool noDeref = false) { op("verify", noDeref, refName,            &oldValue); }
424 
425 		~this()
426 		{
427 			if (initialized)
428 			{
429 				pipes.stdin.close();
430 				enforce(pipes.pid.wait() == 0, "git update-ref exited with failure");
431 				initialized = false;
432 			}
433 		}
434 	}
435 	alias RefWriter = RefCounted!RefWriterImpl;
436 
437 	/// Spawn a update-ref process which can update git refs on the fly.
438 	RefWriter createRefWriter()
439 	{
440 		auto pipes = this.pipe(`update-ref`, `-z`, `--stdin`);
441 		return RefWriter(pipes);
442 	}
443 }
444 
445 struct GitObject
446 {
447 	Hash hash;
448 	string type;
449 	immutable(ubyte)[] data;
450 
451 	struct ParsedCommit
452 	{
453 		Hash tree;
454 		Hash[] parents;
455 		string author, committer; /// entire lines - name, email and date
456 		string[] message;
457 	}
458 
459 	ParsedCommit parseCommit()
460 	{
461 		enforce(type == "commit", "Wrong object type");
462 		ParsedCommit result;
463 		auto lines = (cast(string)data).split('\n');
464 		foreach (n, line; lines)
465 		{
466 			if (line == "")
467 			{
468 				result.message = lines[n+1..$];
469 				break; // commit message begins
470 			}
471 			auto parts = line.findSplit(" ");
472 			auto field = parts[0];
473 			line = parts[2];
474 			switch (field)
475 			{
476 				case "tree":
477 					result.tree = line.toCommitHash();
478 					break;
479 				case "parent":
480 					result.parents ~= line.toCommitHash();
481 					break;
482 				case "author":
483 					result.author = line;
484 					break;
485 				case "committer":
486 					result.committer = line;
487 					break;
488 				default:
489 					throw new Exception("Unknown commit field: " ~ field);
490 			}
491 		}
492 		return result;
493 	}
494 
495 	static GitObject createCommit(ParsedCommit commit)
496 	{
497 		auto s = "tree %s\n%-(parent %s\n%|%)author %s\ncommitter %s\n\n%-(%s\n%)".format(
498 				commit.tree.toString(),
499 				commit.parents.map!(ae.sys.git.toString),
500 				commit.author,
501 				commit.committer,
502 				commit.message,
503 			);
504 		return GitObject(Hash.init, "commit", cast(immutable(ubyte)[])s);
505 	}
506 
507 	struct TreeEntry
508 	{
509 		uint mode;
510 		string name;
511 		Hash hash;
512 
513 		@property string sortName() { return (mode & octal!40000) ? name ~ "/" : name; }
514 	}
515 
516 	TreeEntry[] parseTree()
517 	{
518 		enforce(type == "tree", "Wrong object type");
519 		TreeEntry[] result;
520 		auto rem = data;
521 		while (rem.length)
522 		{
523 			auto si = rem.countUntil(' ');
524 			auto zi = rem.countUntil(0);
525 			auto ei = zi + 1 + Hash.sizeof;
526 			auto str = cast(string)rem[0..zi];
527 			enforce(0 < si && si < zi && ei <= rem.length, "Malformed tree entry:\n" ~ hexDump(rem));
528 			result ~= TreeEntry(str[0..si].to!uint(8), str[si+1..zi], cast(Hash)rem[zi+1..ei][0..20]); // https://issues.dlang.org/show_bug.cgi?id=13112
529 			rem = rem[ei..$];
530 		}
531 		return result;
532 	}
533 
534 	static GitObject createTree(TreeEntry[] entries)
535 	{
536 		auto buf = appender!(immutable(ubyte)[]);
537 		foreach (entry; entries)
538 		{
539 			buf.formattedWrite("%o %s\0", entry.mode, entry.name);
540 			buf.put(entry.hash[]);
541 		}
542 		return GitObject(Hash.init, "tree", buf.data);
543 	}
544 }
545 
546 struct History
547 {
548 	Commit*[Hash] commits;
549 	uint numCommits = 0;
550 	Hash[string] refs;
551 }
552 
553 alias ubyte[20] Hash;
554 
555 struct Commit
556 {
557 	uint id;
558 	Hash hash;
559 	uint time;
560 	string author, committer;
561 	string[] message;
562 	Commit*[] parents, children;
563 }
564 
565 Hash toCommitHash(in char[] hash)
566 {
567 	enforce(hash.length == 40, "Bad hash length: " ~ hash);
568 	ubyte[20] result;
569 	foreach (i, ref b; result)
570 		b = to!ubyte(hash[i*2..i*2+2], 16);
571 	return result;
572 }
573 
574 string toString(ref Hash hash)
575 {
576 	return format("%(%02x%)", hash[]);
577 }
578 
579 unittest
580 {
581 	assert(toCommitHash("0123456789abcdef0123456789ABCDEF01234567") == [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67]);
582 }
583 
584 /// Tries to match the default destination of `git clone`.
585 string repositoryNameFromURL(string url)
586 {
587 	return url
588 		.split(":")[$-1]
589 		.split("/")[$-1]
590 		.chomp(".git");
591 }
592 
593 unittest
594 {
595 	assert(repositoryNameFromURL("https://github.com/CyberShadow/ae.git") == "ae");
596 	assert(repositoryNameFromURL("git@example.com:ae.git") == "ae");
597 }