1 /**
2  * Wrappers for the git command-line tools.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.git;
15 
16 import std.algorithm;
17 import std.array;
18 import std.conv;
19 import std.exception;
20 import std.file;
21 import std.format;
22 import std.path;
23 import std.process;
24 import std.string;
25 import std.typecons;
26 import std.utf;
27 
28 import ae.sys.cmd;
29 import ae.sys.file;
30 import ae.utils.aa;
31 import ae.utils.meta;
32 import ae.utils.text;
33 
34 struct Repository
35 {
36 	string path;
37 	string gitDir;
38 
39 	// TODO: replace this with using the std.process workDir parameter in 2.066
40 	string[] argsPrefix;
41 
42 	this(string path)
43 	{
44 		path = path.absolutePath();
45 		enforce(path.exists, "Repository path does not exist");
46 		gitDir = path.buildPath(".git");
47 		if (gitDir.exists && gitDir.isFile)
48 			gitDir = path.buildNormalizedPath(gitDir.readText().strip()[8..$]);
49 		//path = path.replace(`\`, `/`);
50 		this.path = path;
51 		this.argsPrefix = [`git`, `--work-tree=` ~ path, `--git-dir=` ~ gitDir];
52 	}
53 
54 	invariant()
55 	{
56 		assert(argsPrefix.length, "Not initialized");
57 	}
58 
59 	// Have just some primitives here.
60 	// Higher-level functionality can be added using UFCS.
61 	void   run  (string[] args...) const { auto owd = pushd(workPath(args[0])); return .run  (argsPrefix ~ args); }
62 	string query(string[] args...) const { auto owd = pushd(workPath(args[0])); return .query(argsPrefix ~ args); }
63 	bool   check(string[] args...) const { auto owd = pushd(workPath(args[0])); return spawnProcess(argsPrefix ~ args).wait() == 0; }
64 	auto   pipe (string[] args, Redirect redirect)
65 	                               const { auto owd = pushd(workPath(args[0])); return pipeProcess(argsPrefix ~ args, redirect); }
66 	auto   pipe (string[] args...) const { return pipe(args, Redirect.stdin | Redirect.stdout); }
67 
68 	/// Certain git commands (notably, bisect) must
69 	/// be run in the repository's root directory.
70 	private string workPath(string cmd) const
71 	{
72 		switch (cmd)
73 		{
74 			case "bisect":
75 			case "submodule":
76 				return path;
77 			default:
78 				return null;
79 		}
80 	}
81 
82 	History getHistory()
83 	{
84 		History history;
85 
86 		Commit* getCommit(Hash hash)
87 		{
88 			auto pcommit = hash in history.commits;
89 			return pcommit ? *pcommit : (history.commits[hash] = new Commit(history.numCommits++, hash));
90 		}
91 
92 		Commit* commit;
93 
94 		foreach (line; query([`log`, `--all`, `--pretty=raw`]).splitLines())
95 		{
96 			if (!line.length)
97 				continue;
98 
99 			if (line.startsWith("commit "))
100 			{
101 				auto hash = line[7..$].toCommitHash();
102 				commit = getCommit(hash);
103 			}
104 			else
105 			if (line.startsWith("tree "))
106 				continue;
107 			else
108 			if (line.startsWith("parent "))
109 			{
110 				auto hash = line[7..$].toCommitHash();
111 				auto parent = getCommit(hash);
112 				commit.parents ~= parent;
113 				parent.children ~= commit;
114 			}
115 			else
116 			if (line.startsWith("author "))
117 				commit.author = line[7..$];
118 			else
119 			if (line.startsWith("committer "))
120 			{
121 				commit.committer = line[10..$];
122 				commit.time = line.split(" ")[$-2].to!int();
123 			}
124 			else
125 			if (line.startsWith("    "))
126 				commit.message ~= line[4..$];
127 			else
128 				//enforce(false, "Unknown line in git log: " ~ line);
129 				commit.message[$-1] ~= line;
130 		}
131 
132 		foreach (line; query([`show-ref`, `--dereference`]).splitLines())
133 		{
134 			auto h = line[0..40].toCommitHash();
135 			if (h in history.commits)
136 				history.refs[line[41..$]] = h;
137 		}
138 
139 		return history;
140 	}
141 
142 	struct ObjectReaderImpl
143 	{
144 		ProcessPipes pipes;
145 
146 		GitObject read(string name)
147 		{
148 			pipes.stdin.writeln(name);
149 			pipes.stdin.flush();
150 
151 			auto headerLine = pipes.stdout.safeReadln().strip();
152 			auto header = headerLine.split(" ");
153 			enforce(header.length == 3, "Malformed header during cat-file: " ~ headerLine);
154 			auto hash = header[0].toCommitHash();
155 
156 			GitObject obj;
157 			obj.hash = hash;
158 			obj.type = header[1];
159 			auto size = to!size_t(header[2]);
160 			if (size)
161 			{
162 				auto data = new ubyte[size];
163 				auto read = pipes.stdout.rawRead(data);
164 				enforce(read.length == size, "Unexpected EOF during cat-file");
165 				obj.data = data.assumeUnique();
166 			}
167 
168 			char[1] lf;
169 			pipes.stdout.rawRead(lf[]);
170 			enforce(lf[0] == '\n', "Terminating newline expected");
171 
172 			return obj;
173 		}
174 
175 		GitObject read(Hash hash)
176 		{
177 			auto obj = read(hash.toString());
178 			enforce(obj.hash == hash, "Unexpected object during cat-file");
179 			return obj;
180 		}
181 
182 		~this()
183 		{
184 			pipes.stdin.close();
185 			enforce(pipes.pid.wait() == 0, "git cat-file exited with failure");
186 		}
187 	}
188 	alias ObjectReader = RefCounted!ObjectReaderImpl;
189 
190 	/// Spawn a cat-file process which can read git objects by demand.
191 	ObjectReader createObjectReader()
192 	{
193 		auto pipes = this.pipe(`cat-file`, `--batch`);
194 		return ObjectReader(pipes);
195 	}
196 
197 	/// Run a batch cat-file query.
198 	GitObject[] getObjects(Hash[] hashes)
199 	{
200 		GitObject[] result;
201 		result.reserve(hashes.length);
202 		auto reader = createObjectReader();
203 
204 		foreach (hash; hashes)
205 			result ~= reader.read(hash);
206 
207 		return result;
208 	}
209 
210 	struct ObjectWriterImpl
211 	{
212 		bool initialized;
213 		ProcessPipes pipes;
214 
215 		this(ProcessPipes pipes)
216 		{
217 			this.pipes = pipes;
218 			initialized = true;
219 		}
220 
221 		Hash write(in void[] data)
222 		{
223 			import std.random;
224 			auto p = NamedPipe("ae-sys-git-writeObjects-%d".format(uniform!ulong));
225 			pipes.stdin.writeln(p.fileName);
226 			pipes.stdin.flush();
227 
228 			auto f = p.connect();
229 			f.rawWrite(data);
230 			f.flush();
231 			f.close();
232 
233 			return pipes.stdout.safeReadln().strip().toCommitHash();
234 		}
235 
236 		~this()
237 		{
238 			if (initialized)
239 			{
240 				pipes.stdin.close();
241 				enforce(pipes.pid.wait() == 0, "git hash-object exited with failure");
242 				initialized = false;
243 			}
244 		}
245 	}
246 	alias ObjectWriter = RefCounted!ObjectWriterImpl;
247 
248 	struct ObjectMultiWriterImpl
249 	{
250 		Repository* repo;
251 		ObjectWriter treeWriter, blobWriter, commitWriter;
252 
253 		Hash write(in GitObject obj)
254 		{
255 			ObjectWriter* pwriter;
256 			switch (obj.type) // https://issues.dlang.org/show_bug.cgi?id=14595
257 			{
258 				case "tree"  : pwriter = &treeWriter  ; break;
259 				case "blob"  : pwriter = &blobWriter  ; break;
260 				case "commit": pwriter = &commitWriter; break;
261 				default: throw new Exception("Unknown object type: " ~ obj.type);
262 			}
263 			if (!pwriter.initialized)
264 				*pwriter = ObjectWriter(repo.pipe(`hash-object`, `-t`, obj.type, `-w`, `--stdin-paths`));
265 			return pwriter.write(obj.data);
266 		}
267 	}
268 	alias ObjectMultiWriter = RefCounted!ObjectMultiWriterImpl;
269 
270 	/// Spawn a hash-object process which can hash and write git objects on the fly.
271 	ObjectWriter createObjectWriter(string type)
272 	{
273 		auto pipes = this.pipe(`hash-object`, `-t`, type, `-w`, `--stdin-paths`);
274 		return ObjectWriter(pipes);
275 	}
276 
277 	/// ditto
278 	ObjectMultiWriter createObjectWriter()
279 	{
280 		return ObjectMultiWriter(&this);
281 	}
282 
283 	/// Batch-write the given objects to the database.
284 	/// The hashes are saved to the "hash" fields of the passed objects.
285 	void writeObjects(GitObject[] objects)
286 	{
287 		string[] allTypes = objects.map!(obj => obj.type).toSet().keys;
288 		foreach (type; allTypes)
289 		{
290 			auto writer = createObjectWriter(type);
291 			foreach (ref obj; objects)
292 				if (obj.type == type)
293 					obj.hash = writer.write(obj.data);
294 		}
295 	}
296 
297 	/// Extract a commit's tree to a given directory
298 	void exportCommit(string commit, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
299 	{
300 		exportTree(reader.read(commit).parseCommit().tree, path, reader, pathFilter);
301 	}
302 
303 	/// Extract a tree to a given directory
304 	void exportTree(Hash treeHash, string path, ObjectReader reader, bool delegate(string) pathFilter = null)
305 	{
306 		void exportSubTree(Hash treeHash, string[] subPath)
307 		{
308 			auto tree = reader.read(treeHash).parseTree();
309 			foreach (entry; tree)
310 			{
311 				auto entrySubPath = subPath ~ entry.name;
312 				if (pathFilter && !pathFilter(entrySubPath.join("/")))
313 					continue;
314 				auto entryPath = buildPath([path] ~ entrySubPath);
315 				switch (entry.mode)
316 				{
317 					case octal!100644: // file
318 					case octal!100755: // executable file
319 						std.file.write(entryPath, reader.read(entry.hash).data);
320 						version (Posix)
321 						{
322 							// Make executable
323 							if (entry.mode == octal!100755)
324 								entryPath.setAttributes(entryPath.getAttributes | ((entryPath.getAttributes & octal!444) >> 2));
325 						}
326 						break;
327 					case octal! 40000: // tree
328 						mkdirRecurse(entryPath);
329 						exportSubTree(entry.hash, entrySubPath);
330 						break;
331 					case octal!160000: // submodule
332 						mkdirRecurse(entryPath);
333 						break;
334 					default:
335 						throw new Exception("Unknown git file mode: %o".format(entry.mode));
336 				}
337 			}
338 		}
339 		exportSubTree(treeHash, null);
340 	}
341 
342 	/// Import a directory tree into the object store, and return the new tree object's hash.
343 	Hash importTree(string path, ObjectMultiWriter writer, bool delegate(string) pathFilter = null)
344 	{
345 		static // Error: variable ae.sys.git.Repository.importTree.writer has scoped destruction, cannot build closure
346 		Hash importSubTree(string path, string subPath, ref ObjectMultiWriter writer, bool delegate(string) pathFilter)
347 		{
348 			auto entries = subPath
349 				.dirEntries(SpanMode.shallow)
350 				.filter!(de => !pathFilter || pathFilter(de.relativePath(path)))
351 				.map!(de =>
352 					de.isDir
353 					? GitObject.TreeEntry(
354 						octal!40000,
355 						de.baseName,
356 						importSubTree(path, buildPath(subPath, de.baseName), writer, pathFilter)
357 					)
358 					: GitObject.TreeEntry(
359 						isVersion!`Posix` && (de.attributes & octal!111) ? octal!100755 : octal!100644,
360 						de.baseName,
361 						writer.write(GitObject(Hash.init, "blob", cast(immutable(ubyte)[])read(de.name)))
362 					)
363 				)
364 				.array
365 				.sort!((a, b) => a.sortName < b.sortName).release
366 			;
367 			return writer.write(GitObject.createTree(entries));
368 		}
369 		return importSubTree(path, path, writer, pathFilter);
370 	}
371 }
372 
373 struct GitObject
374 {
375 	Hash hash;
376 	string type;
377 	immutable(ubyte)[] data;
378 
379 	struct ParsedCommit
380 	{
381 		Hash tree;
382 		Hash[] parents;
383 		string author, committer; /// entire lines - name, email and date
384 		string[] message;
385 	}
386 
387 	ParsedCommit parseCommit()
388 	{
389 		enforce(type == "commit", "Wrong object type");
390 		ParsedCommit result;
391 		auto lines = (cast(string)data).split('\n');
392 		foreach (n, line; lines)
393 		{
394 			if (line == "")
395 			{
396 				result.message = lines[n+1..$];
397 				break; // commit message begins
398 			}
399 			auto parts = line.findSplit(" ");
400 			auto field = parts[0];
401 			line = parts[2];
402 			switch (field)
403 			{
404 				case "tree":
405 					result.tree = line.toCommitHash();
406 					break;
407 				case "parent":
408 					result.parents ~= line.toCommitHash();
409 					break;
410 				case "author":
411 					result.author = line;
412 					break;
413 				case "committer":
414 					result.committer = line;
415 					break;
416 				default:
417 					throw new Exception("Unknown commit field: " ~ field);
418 			}
419 		}
420 		return result;
421 	}
422 
423 	static GitObject createCommit(ParsedCommit commit)
424 	{
425 		auto s = "tree %s\n%-(parent %s\n%|%)author %s\ncommitter %s\n\n%-(%s\n%)".format(
426 				commit.tree.toString(),
427 				commit.parents.map!(ae.sys.git.toString),
428 				commit.author,
429 				commit.committer,
430 				commit.message,
431 			);
432 		return GitObject(Hash.init, "commit", cast(immutable(ubyte)[])s);
433 	}
434 
435 	struct TreeEntry
436 	{
437 		uint mode;
438 		string name;
439 		Hash hash;
440 
441 		@property string sortName() { return (mode & octal!40000) ? name ~ "/" : name; }
442 	}
443 
444 	TreeEntry[] parseTree()
445 	{
446 		enforce(type == "tree", "Wrong object type");
447 		TreeEntry[] result;
448 		auto rem = data;
449 		while (rem.length)
450 		{
451 			auto si = rem.countUntil(' ');
452 			auto zi = rem.countUntil(0);
453 			auto ei = zi + 1 + Hash.sizeof;
454 			auto str = cast(string)rem[0..zi];
455 			enforce(0 < si && si < zi && ei <= rem.length, "Malformed tree entry:\n" ~ hexDump(rem));
456 			result ~= TreeEntry(str[0..si].to!uint(8), str[si+1..zi], cast(Hash)rem[zi+1..ei][0..20]); // https://issues.dlang.org/show_bug.cgi?id=13112
457 			rem = rem[ei..$];
458 		}
459 		return result;
460 	}
461 
462 	static GitObject createTree(TreeEntry[] entries)
463 	{
464 		auto buf = appender!(ubyte[]);
465 		foreach (entry; entries)
466 		{
467 			buf.formattedWrite("%o %s\0", entry.mode, entry.name);
468 			buf.put(entry.hash[]);
469 		}
470 		return GitObject(Hash.init, "tree", buf.data.assumeUnique);
471 	}
472 }
473 
474 struct History
475 {
476 	Commit*[Hash] commits;
477 	uint numCommits = 0;
478 	Hash[string] refs;
479 }
480 
481 alias ubyte[20] Hash;
482 
483 struct Commit
484 {
485 	uint id;
486 	Hash hash;
487 	uint time;
488 	string author, committer;
489 	string[] message;
490 	Commit*[] parents, children;
491 }
492 
493 Hash toCommitHash(in char[] hash)
494 {
495 	enforce(hash.length == 40, "Bad hash length: " ~ hash);
496 	ubyte[20] result;
497 	foreach (i, ref b; result)
498 		b = to!ubyte(hash[i*2..i*2+2], 16);
499 	return result;
500 }
501 
502 string toString(ref Hash hash)
503 {
504 	return format("%(%02x%)", hash[]);
505 }
506 
507 unittest
508 {
509 	assert(toCommitHash("0123456789abcdef0123456789ABCDEF01234567") == [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67]);
510 }
511 
512 /// Tries to match the default destination of `git clone`.
513 string repositoryNameFromURL(string url)
514 {
515 	return url
516 		.split(":")[$-1]
517 		.split("/")[$-1]
518 		.chomp(".git");
519 }
520 
521 unittest
522 {
523 	assert(repositoryNameFromURL("https://github.com/CyberShadow/ae.git") == "ae");
524 	assert(repositoryNameFromURL("git@example.com:ae.git") == "ae");
525 }