1 /**
2  * Wrappers for the git command-line tools.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.git;
15 
16 import std.algorithm;
17 import std.array;
18 import std.conv;
19 import std.exception;
20 import std.file;
21 import std.format;
22 import std.path;
23 import std.process;
24 import std.string;
25 import std.typecons;
26 import std.utf;
27 
28 import ae.sys.cmd;
29 import ae.sys.file;
30 import ae.utils.aa;
31 import ae.utils.meta;
32 import ae.utils.text;
33 
34 struct Repository
35 {
36 	string path;
37 	string gitDir;
38 
39 	string[string] environment;
40 
41 	this(string path)
42 	{
43 		path = path.absolutePath();
44 		enforce(path.exists, "Repository path does not exist: " ~ path);
45 		gitDir = path.buildPath(".git");
46 		if (gitDir.exists && gitDir.isFile)
47 			gitDir = path.buildNormalizedPath(gitDir.readText().strip()[8..$]);
48 		//path = path.replace(`\`, `/`);
49 		this.path = path;
50 		version (Windows) {} else
51 			this.environment["GIT_CONFIG_NOSYSTEM"] = "1";
52 		this.environment["HOME"] = gitDir;
53 		this.environment["XDG_CONFIG_HOME"] = gitDir;
54 	}
55 
56 	invariant()
57 	{
58 		assert(environment !is null, "Not initialized");
59 	}
60 
61 	// Have just some primitives here.
62 	// Higher-level functionality can be added using UFCS.
63 	void   run  (string[] args...) const { auto owd = pushd(workPath(args[0])); return .run  (["git"] ~ args, environment, path); }
64 	string query(string[] args...) const { auto owd = pushd(workPath(args[0])); return .query(["git"]  ~ args, environment, path); }
65 	bool   check(string[] args...) const { auto owd = pushd(workPath(args[0])); return spawnProcess(["git"]  ~ args, environment, Config.none, path).wait() == 0; }
66 	auto   pipe (string[] args, Redirect redirect)
67 	                               const { auto owd = pushd(workPath(args[0])); return pipeProcess(["git"]  ~ args, redirect, environment, Config.none, path); }
68 	auto   pipe (string[] args...) const { return pipe(args, Redirect.stdin | Redirect.stdout); }
69 
70 	/// Certain git commands (notably, bisect) must
71 	/// be run in the repository's root directory.
72 	private string workPath(string cmd) const
73 	{
74 		switch (cmd)
75 		{
76 			case "bisect":
77 			case "submodule":
78 				return path;
79 			default:
80 				return null;
81 		}
82 	}
83 
84 	History getHistory()
85 	{
86 		History history;
87 
88 		Commit* getCommit(Hash hash)
89 		{
90 			auto pcommit = hash in history.commits;
91 			return pcommit ? *pcommit : (history.commits[hash] = new Commit(history.numCommits++, hash));
92 		}
93 
94 		Commit* commit;
95 
96 		foreach (line; query([`log`, `--all`, `--pretty=raw`]).splitLines())
97 		{
98 			if (!line.length)
99 				continue;
100 
101 			if (line.startsWith("commit "))
102 			{
103 				auto hash = line[7..$].toCommitHash();
104 				commit = getCommit(hash);
105 			}
106 			else
107 			if (line.startsWith("tree "))
108 				continue;
109 			else
110 			if (line.startsWith("parent "))
111 			{
112 				auto hash = line[7..$].toCommitHash();
113 				auto parent = getCommit(hash);
114 				commit.parents ~= parent;
115 				parent.children ~= commit;
116 			}
117 			else
118 			if (line.startsWith("author "))
119 				commit.author = line[7..$];
120 			else
121 			if (line.startsWith("committer "))
122 			{
123 				commit.committer = line[10..$];
124 				commit.time = line.split(" ")[$-2].to!int();
125 			}
126 			else
127 			if (line.startsWith("    "))
128 				commit.message ~= line[4..$];
129 			else
130 				//enforce(false, "Unknown line in git log: " ~ line);
131 				commit.message[$-1] ~= line;
132 		}
133 
134 		foreach (line; query([`show-ref`, `--dereference`]).splitLines())
135 		{
136 			auto h = line[0..40].toCommitHash();
137 			if (h in history.commits)
138 				history.refs[line[41..$]] = h;
139 		}
140 
141 		return history;
142 	}
143 
144 	struct ObjectReaderImpl
145 	{
146 		ProcessPipes pipes;
147 
148 		GitObject read(string name)
149 		{
150 			pipes.stdin.writeln(name);
151 			pipes.stdin.flush();
152 
153 			auto headerLine = pipes.stdout.safeReadln().strip();
154 			auto header = headerLine.split(" ");
155 			enforce(header.length == 3, "Malformed header during cat-file: " ~ headerLine);
156 			auto hash = header[0].toCommitHash();
157 
158 			GitObject obj;
159 			obj.hash = hash;
160 			obj.type = header[1];
161 			auto size = to!size_t(header[2]);
162 			if (size)
163 			{
164 				auto data = new ubyte[size];
165 				auto read = pipes.stdout.rawRead(data);
166 				enforce(read.length == size, "Unexpected EOF during cat-file");
167 				obj.data = data.assumeUnique();
168 			}
169 
170 			char[1] lf;
171 			pipes.stdout.rawRead(lf[]);
172 			enforce(lf[0] == '\n', "Terminating newline expected");
173 
174 			return obj;
175 		}
176 
177 		GitObject read(Hash hash)
178 		{
179 			auto obj = read(hash.toString());
180 			enforce(obj.hash == hash, "Unexpected object during cat-file");
181 			return obj;
182 		}
183 
184 		~this()
185 		{
186 			pipes.stdin.close();
187 			enforce(pipes.pid.wait() == 0, "git cat-file exited with failure");
188 		}
189 	}
190 	alias ObjectReader = RefCounted!ObjectReaderImpl;
191 
192 	/// Spawn a cat-file process which can read git objects by demand.
193 	ObjectReader createObjectReader()
194 	{
195 		auto pipes = this.pipe(`cat-file`, `--batch`);
196 		return ObjectReader(pipes);
197 	}
198 
199 	/// Run a batch cat-file query.
200 	GitObject[] getObjects(Hash[] hashes)
201 	{
202 		GitObject[] result;
203 		result.reserve(hashes.length);
204 		auto reader = createObjectReader();
205 
206 		foreach (hash; hashes)
207 			result ~= reader.read(hash);
208 
209 		return result;
210 	}
211 
212 	struct ObjectWriterImpl
213 	{
214 		bool initialized;
215 		ProcessPipes pipes;
216 
217 		this(ProcessPipes pipes)
218 		{
219 			this.pipes = pipes;
220 			initialized = true;
221 		}
222 
223 		Hash write(in void[] data)
224 		{
225 			import std.random;
226 			auto p = NamedPipe("ae-sys-git-writeObjects-%d".format(uniform!ulong));
227 			pipes.stdin.writeln(p.fileName);
228 			pipes.stdin.flush();
229 
230 			auto f = p.connect();
231 			f.rawWrite(data);
232 			f.flush();
233 			f.close();
234 
235 			return pipes.stdout.safeReadln().strip().toCommitHash();
236 		}
237 
238 		~this()
239 		{
240 			if (initialized)
241 			{
242 				pipes.stdin.close();
243 				enforce(pipes.pid.wait() == 0, "git hash-object exited with failure");
244 				initialized = false;
245 			}
246 		}
247 	}
248 	alias ObjectWriter = RefCounted!ObjectWriterImpl;
249 
250 	struct ObjectMultiWriterImpl
251 	{
252 		Repository* repo;
253 		ObjectWriter treeWriter, blobWriter, commitWriter;
254 
255 		Hash write(in GitObject obj)
256 		{
257 			ObjectWriter* pwriter;
258 			switch (obj.type) // https://issues.dlang.org/show_bug.cgi?id=14595
259 			{
260 				case "tree"  : pwriter = &treeWriter  ; break;
261 				case "blob"  : pwriter = &blobWriter  ; break;
262 				case "commit": pwriter = &commitWriter; break;
263 				default: throw new Exception("Unknown object type: " ~ obj.type);
264 			}
265 			if (!pwriter.initialized)
266 				*pwriter = ObjectWriter(repo.pipe(`hash-object`, `-t`, obj.type, `-w`, `--stdin-paths`));
267 			return pwriter.write(obj.data);
268 		}
269 	}
270 	alias ObjectMultiWriter = RefCounted!ObjectMultiWriterImpl;
271 
272 	/// Spawn a hash-object process which can hash and write git objects on the fly.
273 	ObjectWriter createObjectWriter(string type)
274 	{
275 		auto pipes = this.pipe(`hash-object`, `-t`, type, `-w`, `--stdin-paths`);
276 		return ObjectWriter(pipes);
277 	}
278 
279 	/// ditto
280 	ObjectMultiWriter createObjectWriter()
281 	{
282 		return ObjectMultiWriter(&this);
283 	}
284 
285 	/// Batch-write the given objects to the database.
286 	/// The hashes are saved to the "hash" fields of the passed objects.
287 	void writeObjects(GitObject[] objects)
288 	{
289 		string[] allTypes = objects.map!(obj => obj.type).toSet().keys;
290 		foreach (type; allTypes)
291 		{
292 			auto writer = createObjectWriter(type);
293 			foreach (ref obj; objects)
294 				if (obj.type == type)
295 					obj.hash = writer.write(obj.data);
296 		}
297 	}
298 
299 	/// Extract a commit's tree to a given directory
300 	void exportCommit(string commit, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
301 	{
302 		exportTree(reader.read(commit).parseCommit().tree, path, reader, pathFilter);
303 	}
304 
305 	/// Extract a tree to a given directory
306 	void exportTree(Hash treeHash, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
307 	{
308 		void exportSubTree(Hash treeHash, string[] subPath)
309 		{
310 			auto tree = reader.read(treeHash).parseTree();
311 			foreach (entry; tree)
312 			{
313 				auto entrySubPath = subPath ~ entry.name;
314 				if (pathFilter && !pathFilter(entrySubPath.join("/")))
315 					continue;
316 				auto entryPath = buildPath([path] ~ entrySubPath);
317 				switch (entry.mode)
318 				{
319 					case octal!100644: // file
320 					case octal!100755: // executable file
321 						std.file.write(entryPath, reader.read(entry.hash).data);
322 						version (Posix)
323 						{
324 							// Make executable
325 							if (entry.mode == octal!100755)
326 								entryPath.setAttributes(entryPath.getAttributes | ((entryPath.getAttributes & octal!444) >> 2));
327 						}
328 						break;
329 					case octal! 40000: // tree
330 						mkdirRecurse(entryPath);
331 						exportSubTree(entry.hash, entrySubPath);
332 						break;
333 					case octal!160000: // submodule
334 						mkdirRecurse(entryPath);
335 						break;
336 					default:
337 						throw new Exception("Unknown git file mode: %o".format(entry.mode));
338 				}
339 			}
340 		}
341 		exportSubTree(treeHash, null);
342 	}
343 
344 	/// Import a directory tree into the object store, and return the new tree object's hash.
345 	Hash importTree(string path, ObjectMultiWriter writer, bool delegate(string) pathFilter = null)
346 	{
347 		static // Error: variable ae.sys.git.Repository.importTree.writer has scoped destruction, cannot build closure
348 		Hash importSubTree(string path, string subPath, ref ObjectMultiWriter writer, bool delegate(string) pathFilter)
349 		{
350 			auto entries = subPath
351 				.dirEntries(SpanMode.shallow)
352 				.filter!(de => !pathFilter || pathFilter(de.relativePath(path)))
353 				.map!(de =>
354 					de.isDir
355 					? GitObject.TreeEntry(
356 						octal!40000,
357 						de.baseName,
358 						importSubTree(path, buildPath(subPath, de.baseName), writer, pathFilter)
359 					)
360 					: GitObject.TreeEntry(
361 						isVersion!`Posix` && (de.attributes & octal!111) ? octal!100755 : octal!100644,
362 						de.baseName,
363 						writer.write(GitObject(Hash.init, "blob", cast(immutable(ubyte)[])read(de.name)))
364 					)
365 				)
366 				.array
367 				.sort!((a, b) => a.sortName < b.sortName).release
368 			;
369 			return writer.write(GitObject.createTree(entries));
370 		}
371 		return importSubTree(path, path, writer, pathFilter);
372 	}
373 }
374 
375 struct GitObject
376 {
377 	Hash hash;
378 	string type;
379 	immutable(ubyte)[] data;
380 
381 	struct ParsedCommit
382 	{
383 		Hash tree;
384 		Hash[] parents;
385 		string author, committer; /// entire lines - name, email and date
386 		string[] message;
387 	}
388 
389 	ParsedCommit parseCommit()
390 	{
391 		enforce(type == "commit", "Wrong object type");
392 		ParsedCommit result;
393 		auto lines = (cast(string)data).split('\n');
394 		foreach (n, line; lines)
395 		{
396 			if (line == "")
397 			{
398 				result.message = lines[n+1..$];
399 				break; // commit message begins
400 			}
401 			auto parts = line.findSplit(" ");
402 			auto field = parts[0];
403 			line = parts[2];
404 			switch (field)
405 			{
406 				case "tree":
407 					result.tree = line.toCommitHash();
408 					break;
409 				case "parent":
410 					result.parents ~= line.toCommitHash();
411 					break;
412 				case "author":
413 					result.author = line;
414 					break;
415 				case "committer":
416 					result.committer = line;
417 					break;
418 				default:
419 					throw new Exception("Unknown commit field: " ~ field);
420 			}
421 		}
422 		return result;
423 	}
424 
425 	static GitObject createCommit(ParsedCommit commit)
426 	{
427 		auto s = "tree %s\n%-(parent %s\n%|%)author %s\ncommitter %s\n\n%-(%s\n%)".format(
428 				commit.tree.toString(),
429 				commit.parents.map!(ae.sys.git.toString),
430 				commit.author,
431 				commit.committer,
432 				commit.message,
433 			);
434 		return GitObject(Hash.init, "commit", cast(immutable(ubyte)[])s);
435 	}
436 
437 	struct TreeEntry
438 	{
439 		uint mode;
440 		string name;
441 		Hash hash;
442 
443 		@property string sortName() { return (mode & octal!40000) ? name ~ "/" : name; }
444 	}
445 
446 	TreeEntry[] parseTree()
447 	{
448 		enforce(type == "tree", "Wrong object type");
449 		TreeEntry[] result;
450 		auto rem = data;
451 		while (rem.length)
452 		{
453 			auto si = rem.countUntil(' ');
454 			auto zi = rem.countUntil(0);
455 			auto ei = zi + 1 + Hash.sizeof;
456 			auto str = cast(string)rem[0..zi];
457 			enforce(0 < si && si < zi && ei <= rem.length, "Malformed tree entry:\n" ~ hexDump(rem));
458 			result ~= TreeEntry(str[0..si].to!uint(8), str[si+1..zi], cast(Hash)rem[zi+1..ei][0..20]); // https://issues.dlang.org/show_bug.cgi?id=13112
459 			rem = rem[ei..$];
460 		}
461 		return result;
462 	}
463 
464 	static GitObject createTree(TreeEntry[] entries)
465 	{
466 		auto buf = appender!(ubyte[]);
467 		foreach (entry; entries)
468 		{
469 			buf.formattedWrite("%o %s\0", entry.mode, entry.name);
470 			buf.put(entry.hash[]);
471 		}
472 		return GitObject(Hash.init, "tree", buf.data.assumeUnique);
473 	}
474 }
475 
476 struct History
477 {
478 	Commit*[Hash] commits;
479 	uint numCommits = 0;
480 	Hash[string] refs;
481 }
482 
483 alias ubyte[20] Hash;
484 
485 struct Commit
486 {
487 	uint id;
488 	Hash hash;
489 	uint time;
490 	string author, committer;
491 	string[] message;
492 	Commit*[] parents, children;
493 }
494 
495 Hash toCommitHash(in char[] hash)
496 {
497 	enforce(hash.length == 40, "Bad hash length: " ~ hash);
498 	ubyte[20] result;
499 	foreach (i, ref b; result)
500 		b = to!ubyte(hash[i*2..i*2+2], 16);
501 	return result;
502 }
503 
504 string toString(ref Hash hash)
505 {
506 	return format("%(%02x%)", hash[]);
507 }
508 
509 unittest
510 {
511 	assert(toCommitHash("0123456789abcdef0123456789ABCDEF01234567") == [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67]);
512 }
513 
514 /// Tries to match the default destination of `git clone`.
515 string repositoryNameFromURL(string url)
516 {
517 	return url
518 		.split(":")[$-1]
519 		.split("/")[$-1]
520 		.chomp(".git");
521 }
522 
523 unittest
524 {
525 	assert(repositoryNameFromURL("https://github.com/CyberShadow/ae.git") == "ae");
526 	assert(repositoryNameFromURL("git@example.com:ae.git") == "ae");
527 }