1 /**
2  * Wrappers for the git command-line tools.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.sys.git;
15 
16 import std.algorithm;
17 import std.array;
18 import std.conv;
19 import std.exception;
20 import std.file;
21 import std.format;
22 import std.path;
23 import std.process;
24 import std.string;
25 import std.typecons;
26 import std.utf;
27 
28 import ae.sys.cmd;
29 import ae.sys.file;
30 import ae.utils.aa;
31 import ae.utils.text;
32 
33 struct Repository
34 {
35 	string path;
36 
37 	// TODO: replace this with using the std.process workDir parameter in 2.066
38 	string[] argsPrefix;
39 
40 	this(string path)
41 	{
42 		path = path.absolutePath();
43 		enforce(path.exists, "Repository path does not exist");
44 		auto dotGit = path.buildPath(".git");
45 		if (dotGit.exists && dotGit.isFile)
46 			dotGit = path.buildPath(dotGit.readText().strip()[8..$]);
47 		//path = path.replace(`\`, `/`);
48 		this.path = path;
49 		this.argsPrefix = [`git`, `--work-tree=` ~ path, `--git-dir=` ~ dotGit];
50 	}
51 
52 	invariant()
53 	{
54 		assert(argsPrefix.length, "Not initialized");
55 	}
56 
57 	// Have just some primitives here.
58 	// Higher-level functionality can be added using UFCS.
59 	void   run  (string[] args...) { auto owd = pushd(workPath(args[0])); return .run  (argsPrefix ~ args); }
60 	string query(string[] args...) { auto owd = pushd(workPath(args[0])); return .query(argsPrefix ~ args); }
61 	bool   check(string[] args...) { auto owd = pushd(workPath(args[0])); return spawnProcess(argsPrefix ~ args).wait() == 0; }
62 	auto   pipe (string[] args...) { auto owd = pushd(workPath(args[0])); return pipeProcess(argsPrefix ~ args); }
63 
64 	/// Certain git commands (notably, bisect) must
65 	/// be run in the repository's root directory.
66 	private string workPath(string cmd)
67 	{
68 		switch (cmd)
69 		{
70 			case "bisect":
71 			case "submodule":
72 				return path;
73 			default:
74 				return null;
75 		}
76 	}
77 
78 	History getHistory()
79 	{
80 		History history;
81 
82 		Commit* getCommit(Hash hash)
83 		{
84 			auto pcommit = hash in history.commits;
85 			return pcommit ? *pcommit : (history.commits[hash] = new Commit(history.numCommits++, hash));
86 		}
87 
88 		Commit* commit;
89 
90 		foreach (line; query([`log`, `--all`, `--pretty=raw`]).splitLines())
91 		{
92 			if (!line.length)
93 				continue;
94 
95 			if (line.startsWith("commit "))
96 			{
97 				auto hash = line[7..$].toCommitHash();
98 				commit = getCommit(hash);
99 			}
100 			else
101 			if (line.startsWith("tree "))
102 				continue;
103 			else
104 			if (line.startsWith("parent "))
105 			{
106 				auto hash = line[7..$].toCommitHash();
107 				auto parent = getCommit(hash);
108 				commit.parents ~= parent;
109 				parent.children ~= commit;
110 			}
111 			else
112 			if (line.startsWith("author "))
113 				commit.author = line[7..$];
114 			else
115 			if (line.startsWith("committer "))
116 			{
117 				commit.committer = line[10..$];
118 				commit.time = line.split(" ")[$-2].to!int();
119 			}
120 			else
121 			if (line.startsWith("    "))
122 				commit.message ~= line[4..$];
123 			else
124 				//enforce(false, "Unknown line in git log: " ~ line);
125 				commit.message[$-1] ~= line;
126 		}
127 
128 		foreach (line; query([`show-ref`, `--dereference`]).splitLines())
129 		{
130 			auto h = line[0..40].toCommitHash();
131 			if (h in history.commits)
132 				history.refs[line[41..$]] = h;
133 		}
134 
135 		return history;
136 	}
137 
138 	/// Run a batch cat-file query.
139 	GitObject[] getObjects(Hash[] hashes)
140 	{
141 		GitObject[] result;
142 		result.reserve(hashes.length);
143 
144 		auto pipes = this.pipe(`cat-file`, `--batch`);
145 		foreach (n, hash; hashes)
146 		{
147 			pipes.stdin.writeln(hash.toString());
148 			pipes.stdin.flush();
149 
150 			auto headerLine = pipes.stdout.readln().strip();
151 			auto header = headerLine.split(" ");
152 			enforce(header.length == 3, "Malformed header during cat-file: " ~ headerLine);
153 			enforce(header[0].toCommitHash() == hash, "Unexpected object during cat-file");
154 
155 			GitObject obj;
156 			obj.hash = hash;
157 			obj.type = header[1];
158 			auto size = to!size_t(header[2]);
159 			auto data = new ubyte[size];
160 			auto read = pipes.stdout.rawRead(data);
161 			enforce(read.length == size, "Unexpected EOF during cat-file");
162 			obj.data = data.assumeUnique();
163 
164 			char[1] lf;
165 			pipes.stdout.rawRead(lf[]);
166 			enforce(lf[0] == '\n', "Terminating newline expected");
167 
168 			result ~= obj;
169 		}
170 		pipes.stdin.close();
171 		enforce(pipes.pid.wait() == 0, "git cat-file exited with failure");
172 		return result;
173 	}
174 
175 	struct ObjectWriterImpl
176 	{
177 		ProcessPipes pipes;
178 
179 		Hash write(in void[] data)
180 		{
181 			auto p = NamedPipe("ae-sys-git-writeObjects");
182 			pipes.stdin.writeln(p.fileName);
183 			pipes.stdin.flush();
184 
185 			auto f = p.connect();
186 			f.rawWrite(data);
187 			f.flush();
188 			f.close();
189 
190 			return pipes.stdout.readln().strip().toCommitHash();
191 		}
192 
193 		~this()
194 		{
195 			pipes.stdin.close();
196 			enforce(pipes.pid.wait() == 0, "git hash-object exited with failure");
197 		}
198 	}
199 	alias ObjectWriter = RefCounted!ObjectWriterImpl;
200 
201 	/// Spawn a hash-object process which can hash and write git objects on the fly.
202 	ObjectWriter createObjectWriter(string type)
203 	{
204 		auto pipes = this.pipe(`hash-object`, `-t`, type, `-w`, `--stdin-paths`);
205 		return ObjectWriter(pipes);
206 	}
207 
208 	/// Batch-write the given objects to the database.
209 	/// The hashes are saved to the "hash" fields of the passed objects.
210 	void writeObjects(GitObject[] objects)
211 	{
212 		string[] allTypes = objects.map!(obj => obj.type).toSet().keys;
213 		foreach (type; allTypes)
214 		{
215 			auto writer = createObjectWriter(type);
216 			foreach (ref obj; objects)
217 				if (obj.type == type)
218 					obj.hash = writer.write(obj.data);
219 		}
220 	}
221 }
222 
223 struct GitObject
224 {
225 	Hash hash;
226 	string type;
227 	immutable(ubyte)[] data;
228 
229 	struct ParsedCommit
230 	{
231 		Hash tree;
232 		Hash[] parents;
233 		string author, committer; /// entire lines - name, email and date
234 		string[] message;
235 	}
236 
237 	ParsedCommit parseCommit()
238 	{
239 		enforce(type == "commit", "Wrong object type");
240 		ParsedCommit result;
241 		auto lines = (cast(string)data).split('\n');
242 		foreach (n, line; lines)
243 		{
244 			if (line == "")
245 			{
246 				result.message = lines[n+1..$];
247 				break; // commit message begins
248 			}
249 			auto parts = line.findSplit(" ");
250 			auto field = parts[0];
251 			line = parts[2];
252 			switch (field)
253 			{
254 				case "tree":
255 					result.tree = line.toCommitHash();
256 					break;
257 				case "parent":
258 					result.parents ~= line.toCommitHash();
259 					break;
260 				case "author":
261 					result.author = line;
262 					break;
263 				case "committer":
264 					result.committer = line;
265 					break;
266 				default:
267 					throw new Exception("Unknown commit field: " ~ field);
268 			}
269 		}
270 		return result;
271 	}
272 
273 	static GitObject createCommit(ParsedCommit commit)
274 	{
275 		auto s = "tree %s\n%-(parent %s\n%|%)author %s\ncommitter %s\n\n%-(%s\n%)".format(
276 				commit.tree.toString(),
277 				commit.parents.map!(ae.sys.git.toString),
278 				commit.author,
279 				commit.committer,
280 				commit.message,
281 			);
282 		return GitObject(Hash.init, "commit", cast(immutable(ubyte)[])s);
283 	}
284 
285 	struct TreeEntry
286 	{
287 		uint mode;
288 		string name;
289 		Hash hash;
290 	}
291 
292 	TreeEntry[] parseTree()
293 	{
294 		enforce(type == "tree", "Wrong object type");
295 		TreeEntry[] result;
296 		auto rem = data;
297 		while (rem.length)
298 		{
299 			auto si = rem.countUntil(' ');
300 			auto zi = rem.countUntil(0);
301 			auto ei = zi + 1 + Hash.sizeof;
302 			auto str = cast(string)rem[0..zi];
303 			enforce(0 < si && si < zi && ei <= rem.length, "Malformed tree entry:\n" ~ hexDump(rem));
304 			result ~= TreeEntry(str[0..si].to!uint(8), str[si+1..zi], cast(Hash)rem[zi+1..ei][0..20]); // https://issues.dlang.org/show_bug.cgi?id=13112
305 			rem = rem[ei..$];
306 		}
307 		return result;
308 	}
309 
310 	static GitObject createTree(TreeEntry[] entries)
311 	{
312 		auto buf = appender!(ubyte[]);
313 		foreach (entry; entries)
314 		{
315 			buf.formattedWrite("%o %s\0", entry.mode, entry.name);
316 			buf.put(entry.hash[]);
317 		}
318 		return GitObject(Hash.init, "tree", buf.data.assumeUnique);
319 	}
320 }
321 
322 struct History
323 {
324 	Commit*[Hash] commits;
325 	uint numCommits = 0;
326 	Hash[string] refs;
327 }
328 
329 alias ubyte[20] Hash;
330 
331 struct Commit
332 {
333 	uint id;
334 	Hash hash;
335 	uint time;
336 	string author, committer;
337 	string[] message;
338 	Commit*[] parents, children;
339 }
340 
341 Hash toCommitHash(in char[] hash)
342 {
343 	enforce(hash.length == 40, "Bad hash length: " ~ hash);
344 	ubyte[20] result;
345 	foreach (i, ref b; result)
346 		b = to!ubyte(hash[i*2..i*2+2], 16);
347 	return result;
348 }
349 
350 string toString(ref Hash hash)
351 {
352 	return format("%(%02x%)", hash[]);
353 }
354 
355 unittest
356 {
357 	assert(toCommitHash("0123456789abcdef0123456789ABCDEF01234567") == [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67]);
358 }
359 
360 /// Tries to match the default destination of `git clone`.
361 string repositoryNameFromURL(string url)
362 {
363 	return url
364 		.split(":")[$-1]
365 		.split("/")[$-1]
366 		.chomp(".git");
367 }
368 
369 unittest
370 {
371 	assert(repositoryNameFromURL("https://github.com/CyberShadow/ae.git") == "ae");
372 	assert(repositoryNameFromURL("git@example.com:ae.git") == "ae");
373 }