1 /** 2 * Code to manage a D component repository. 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <ae@cy.md> 12 */ 13 14 module ae.sys.d.repo; 15 16 import std.algorithm; 17 import std.conv : text; 18 import std.datetime : SysTime; 19 import std.exception; 20 import std.file; 21 import std.process : environment; 22 import std.range; 23 import std.regex; 24 import std.string; 25 import std.path; 26 27 import ae.sys.git; 28 import ae.utils.exception; 29 import ae.utils.json; 30 import ae.utils.regex; 31 import ae.utils.time : StdTime; 32 33 /// Base class for a managed repository. 34 class ManagedRepository 35 { 36 /// Git repository we manage. 37 public @property ref const(Git) git() 38 { 39 if (!gitRepo.path) 40 { 41 gitRepo = getRepo(); 42 assert(gitRepo.path, "No repository"); 43 foreach (person; ["AUTHOR", "COMMITTER"]) 44 { 45 gitRepo.environment["GIT_%s_DATE".format(person)] = "Thu, 01 Jan 1970 00:00:00 +0000"; 46 gitRepo.environment["GIT_%s_NAME".format(person)] = "ae.sys.d"; 47 gitRepo.environment["GIT_%s_EMAIL".format(person)] = "ae.sys.d\x40thecybershadow.net"; 48 } 49 } 50 return gitRepo; 51 } 52 53 /// Should we fetch the latest stuff? 54 public bool offline; 55 56 /// Verify working tree state to make sure we don't clobber user changes? 57 public bool verify; 58 59 private Git gitRepo; 60 61 /// Repository provider 62 abstract protected Git getRepo(); 63 64 /// Base name of the repository directory 65 public @property string name() { return git.path.baseName; } 66 67 // Head 68 69 /// Ensure the repository's HEAD is as indicated. 70 public void needHead(string hash) 71 { 72 needClean(); 73 if (getHead() == hash) 74 return; 75 76 try 77 performCheckout(hash); 78 catch (Exception e) 79 { 80 log("Error checking out %s: %s".format(hash, e)); 81 82 // Might be a GC-ed merge. Try to recreate the merge 83 auto hit = mergeCache.find!(entry => entry.result == hash)(); 84 enforce(!hit.empty, "Unknown hash %s".format(hash)); 85 performMerge(hit.front.spec); 86 enforce(getHead() == hash, "Unexpected merge result: expected %s, got %s".format(hash, getHead())); 87 } 88 } 89 90 private string currentHead = null; 91 92 /// Returns the SHA1 of the given named ref. 93 public string getRef(string name) 94 { 95 return git.query("rev-parse", "--verify", "--quiet", name); 96 } 97 98 /// Return the commit the repository HEAD is pointing at. 99 /// Cache the result. 100 public string getHead() 101 { 102 if (!currentHead) 103 currentHead = getRef("HEAD"); 104 105 return currentHead; 106 } 107 108 protected void performCheckout(string hash) 109 { 110 needClean(); 111 needCommit(hash); 112 113 log("Checking out %s commit %s...".format(name, hash)); 114 115 git.run("checkout", hash); 116 117 saveState(); 118 currentHead = hash; 119 } 120 121 /// Ensure that the specified commit is fetched. 122 protected void needCommit(string hash) 123 { 124 void check() 125 { 126 enforce(git.query(["cat-file", "-t", hash]) == "commit", 127 "Unexpected object type"); 128 } 129 130 try 131 check(); 132 catch (Exception e) 133 { 134 if (offline) 135 { 136 log("Don't have commit " ~ hash ~ " and in offline mode, can't proceed."); 137 throw new Exception("Giving up"); 138 } 139 else 140 { 141 log("Don't have commit " ~ hash ~ ", updating and retrying..."); 142 update(); 143 check(); 144 } 145 } 146 } 147 148 /// Update the remote. 149 /// Return true if any updates were fetched. 150 public bool update() 151 { 152 if (!offline) 153 { 154 log("Updating " ~ name ~ "..."); 155 auto oldRefs = git.query(["show-ref"]); 156 git.run("-c", "fetch.recurseSubmodules=false", "remote", "update", "--prune"); 157 git.run("-c", "fetch.recurseSubmodules=false", "fetch", "--force", "--tags"); 158 auto newRefs = git.query(["show-ref"]); 159 return oldRefs != newRefs; 160 } 161 else 162 return false; 163 } 164 165 // Clean 166 167 bool clean = false; /// True when we know that the repository is currently clean. 168 169 /// Ensure the repository's working copy is clean. 170 public void needClean() 171 { 172 if (clean) 173 return; 174 performCleanup(); 175 clean = true; 176 } 177 178 private void performCleanup() 179 { 180 checkState(); 181 clearState(); 182 183 log("Cleaning repository %s...".format(name)); 184 try 185 { 186 git.run("reset", "--hard"); 187 git.run("clean", "--force", "--force" /*Sic*/, "-x", "-d", "--quiet"); 188 } 189 catch (Exception e) 190 throw new RepositoryCleanException(e.msg, e); 191 saveState(); 192 } 193 194 // Merge cache 195 196 /// How to merge a branch into another 197 enum MergeMode 198 { 199 merge, /// git merge (commit with multiple parents) of the target and branch tips 200 cherryPick, /// apply the commits as a patch 201 } 202 private static struct MergeSpec 203 { 204 string target; 205 string[2] branch; // [base, tip] 206 MergeMode mode; 207 bool revert = false; 208 } 209 private static struct MergeInfo 210 { 211 MergeSpec spec; 212 string result; 213 int mainline = 0; // git parent index of the "target", if any 214 } 215 private alias MergeCache = MergeInfo[]; 216 private MergeCache mergeCacheData; 217 private bool haveMergeCache; 218 219 private @property ref MergeCache mergeCache() 220 { 221 if (!haveMergeCache) 222 { 223 if (mergeCachePath.exists) 224 mergeCacheData = mergeCachePath.readText().jsonParse!MergeCache; 225 haveMergeCache = true; 226 } 227 228 return mergeCacheData; 229 } 230 231 private void saveMergeCache() 232 { 233 std.file.write(mergeCachePath(), toJson(mergeCache)); 234 } 235 236 private @property string mergeCachePath() 237 { 238 return buildPath(git.gitDir, "ae-sys-d-mergecache-v2.json"); 239 } 240 241 // Merge 242 243 /// Returns the hash of the merge between the target and branch commits. 244 /// Performs the merge if necessary. Caches the result. 245 public string getMerge(string target, string[2] branch, MergeMode mode) 246 { 247 return getMergeImpl(MergeSpec(target, branch, mode, false)); 248 } 249 250 /// Returns the resulting hash when reverting the branch from the base commit. 251 /// Performs the revert if necessary. Caches the result. 252 /// mainline is the 1-based mainline index (as per `man git-revert`), 253 /// or 0 if commit is not a merge commit. 254 public string getRevert(string target, string[2] branch, MergeMode mode) 255 { 256 return getMergeImpl(MergeSpec(target, branch, mode, true)); 257 } 258 259 private string getMergeImpl(MergeSpec spec) 260 { 261 auto hit = mergeCache.find!(entry => entry.spec == spec)(); 262 if (!hit.empty) 263 return hit.front.result; 264 265 performMerge(spec); 266 267 auto head = getHead(); 268 mergeCache ~= MergeInfo(spec, head); 269 saveMergeCache(); 270 return head; 271 } 272 273 private static const string mergeCommitMessage = "ae.sys.d merge"; 274 private static const string revertCommitMessage = "ae.sys.d revert"; 275 276 // Performs a merge or revert. 277 private void performMerge(MergeSpec spec) 278 { 279 needHead(spec.target); 280 currentHead = null; 281 282 log("%s %s into %s.".format(spec.revert ? "Reverting" : "Merging", spec.branch, spec.target)); 283 284 scope(exit) saveState(); 285 286 scope (failure) 287 { 288 string op; 289 final switch (spec.mode) 290 { 291 case MergeMode.merge: 292 op = spec.revert ? "revert" : "merge"; 293 break; 294 case MergeMode.cherryPick: 295 op = spec.revert ? "revert" : "cherry-pick"; 296 break; 297 } 298 299 log("Aborting " ~ op ~ "..."); 300 git.run(op, "--abort"); 301 clean = false; 302 } 303 304 void doMerge() 305 { 306 final switch (spec.mode) 307 { 308 case MergeMode.merge: 309 if (!spec.revert) 310 git.run("merge", "--no-ff", "-m", mergeCommitMessage, spec.branch[1]); 311 else 312 { 313 // When reverting in merge mode, we try to 314 // find the merge commit following the branch 315 // tip, and revert only that merge commit. 316 string mergeCommit; int mainline; 317 getChild(spec.target, spec.branch[1], /*out*/mergeCommit, /*out*/mainline); 318 319 string[] args = ["revert", "--no-edit"]; 320 if (mainline) 321 args ~= ["--mainline", text(mainline)]; 322 args ~= [mergeCommit]; 323 git.run(args); 324 } 325 break; 326 case MergeMode.cherryPick: 327 enforce(spec.branch[0], "Must specify a branch base for a cherry-pick merge"); 328 auto range = spec.branch[0] ~ ".." ~ spec.branch[1]; 329 if (!spec.revert) 330 git.run("cherry-pick", range); 331 else 332 git.run("revert", "--no-edit", range); 333 break; 334 } 335 } 336 337 if (git.path.baseName() == "dmd") 338 { 339 try 340 doMerge(); 341 catch (Exception) 342 { 343 log("Merge failed. Attempting conflict resolution..."); 344 git.run("checkout", "--theirs", "test"); 345 git.run("add", "test"); 346 if (!spec.revert) 347 git.run("-c", "rerere.enabled=false", "commit", "-m", mergeCommitMessage); 348 else 349 git.run("revert", "--continue"); 350 } 351 } 352 else 353 doMerge(); 354 355 log("Merge successful."); 356 } 357 358 /// Finds and returns the merge parents of the given merge commit. 359 /// Queries the git repository if necessary. Caches the result. 360 public MergeInfo getMergeInfo(string merge) 361 { 362 auto hit = mergeCache.find!(entry => entry.result == merge && !entry.spec.revert)(); 363 if (!hit.empty) 364 return hit.front; 365 366 auto parents = git.query(["log", "--pretty=%P", "-n", "1", merge]).split(); 367 enforce(parents.length > 1, "Not a merge: " ~ merge); 368 enforce(parents.length == 2, "Too many parents: " ~ merge); 369 370 auto info = MergeInfo(MergeSpec(parents[0], [null, parents[1]], MergeMode.merge, false), merge, 1); 371 mergeCache ~= info; 372 return info; 373 } 374 375 /// Follows the string of merges starting from the given 376 /// head commit, up till the merge with the given branch. 377 /// Then, reapplies all merges in order, 378 /// except for that with the given branch. 379 public string getUnMerge(string head, string[2] branch, MergeMode mode) 380 { 381 // This could be optimized using an interactive rebase 382 383 auto info = getMergeInfo(head); 384 if (info.spec.branch[1] == branch[1]) 385 return info.spec.target; 386 387 // Recurse to keep looking 388 auto unmerge = getUnMerge(info.spec.target, branch, mode); 389 // Re-apply this non-matching merge 390 return getMerge(unmerge, info.spec.branch, info.spec.mode); 391 } 392 393 // Branches, forks and customization 394 395 /// Return SHA1 of the given remote ref. 396 /// Fetches the remote first, unless offline mode is on. 397 string getRemoteRef(string remote, string remoteRef, string localRef) 398 { 399 if (!offline) 400 { 401 log("Fetching from %s (%s -> %s) ...".format(remote, remoteRef, localRef)); 402 git.run("fetch", remote, "+%s:%s".format(remoteRef, localRef)); 403 } 404 return getRef(localRef); 405 } 406 407 /// Return SHA1 of the given pull request #. 408 /// Fetches the pull request first, unless offline mode is on. 409 string getPullTip(int pull) 410 { 411 return getRemoteRef( 412 "origin", 413 "refs/pull/%d/head".format(pull), 414 "refs/digger/pull/%d".format(pull), 415 ); 416 } 417 418 private static bool isCommitHash(string s) 419 { 420 return s.length == 40 && s.representation.all!(c => (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')); 421 } 422 423 /// Return SHA1 (base, tip) of the given branch (possibly of GitHub fork). 424 /// Fetches the fork first, unless offline mode is on. 425 /// (This is a thin wrapper around getRemoteRef.) 426 string[2] getBranch(string user, string base, string tip) 427 { 428 if (user) enforce(user.match(re!`^\w[\w\-]*$`), "Bad remote name"); 429 if (base) enforce(base.match(re!`^\w[\w\-\.]*$`), "Bad branch base name"); 430 if (true) enforce(tip .match(re!`^\w[\w\-\.]*$`), "Bad branch tip name"); 431 432 if (!user) 433 user = "dlang"; 434 435 if (isCommitHash(tip)) 436 { 437 if (!offline) 438 { 439 // We don't know which branch the commit will be in, so just grab everything. 440 auto remote = "https://github.com/%s/%s".format(user, name); 441 log("Fetching everything from %s ...".format(remote)); 442 git.run("fetch", remote, "+refs/heads/*:refs/forks/%s/*".format(user)); 443 } 444 if (!base) 445 base = git.query("rev-parse", tip ~ "^"); 446 return [ 447 base, 448 tip, 449 ]; 450 } 451 else 452 { 453 return [ 454 null, 455 getRemoteRef( 456 "https://github.com/%s/%s".format(user, name), 457 "refs/heads/%s".format(tip), 458 "refs/digger/fork/%s/%s".format(user, tip), 459 ), 460 ]; 461 } 462 } 463 464 /// Find the child of a commit, and, if the commit was a merge, 465 /// the mainline index of said commit for the child. 466 void getChild(string branch, string commit, out string child, out int mainline) 467 { 468 needCommit(branch); 469 470 log("Querying history for commit children..."); 471 auto history = git.getHistory([branch]); 472 473 bool[Git.CommitID] seen; 474 void visit(Git.History.Commit* commit) 475 { 476 if (commit.oid !in seen) 477 { 478 seen[commit.oid] = true; 479 foreach (parent; commit.parents) 480 visit(parent); 481 } 482 } 483 auto branchHash = Git.CommitID(branch); 484 auto pBranchCommit = branchHash in history.commits; 485 enforce(pBranchCommit, "Can't find commit " ~ branch ~" in history"); 486 visit(*pBranchCommit); 487 488 auto commitHash = Git.CommitID(commit); 489 auto pCommit = commitHash in history.commits; 490 enforce(pCommit, "Can't find commit in history"); 491 auto children = (*pCommit).children; 492 enforce(children.length, "Commit has no children"); 493 children = children.filter!(child => child.oid in seen).array(); 494 enforce(children.length, "Commit has no children under specified branch"); 495 enforce(children.length == 1, "Commit has more than one child"); 496 auto childCommit = children[0]; 497 child = childCommit.oid.toString(); 498 499 if (childCommit.parents.length == 1) 500 mainline = 0; 501 else 502 { 503 enforce(childCommit.parents.length == 2, "Can't get mainline of multiple-branch merges"); 504 if (childCommit.parents[0] is *pCommit) 505 mainline = 2; 506 else 507 mainline = 1; 508 509 auto mergeInfo = MergeInfo( 510 MergeSpec( 511 childCommit.parents[0].oid.toString(), 512 childCommit.parents[1].oid.toString(), 513 MergeMode.merge, 514 true), 515 commit, mainline); 516 if (!mergeCache.canFind(mergeInfo)) 517 { 518 mergeCache ~= mergeInfo; 519 saveMergeCache(); 520 } 521 } 522 } 523 524 // State saving and checking 525 526 private struct FileState 527 { 528 bool isLink; 529 ulong size; 530 StdTime modificationTime; 531 } 532 533 private FileState getFileState(string file) 534 { 535 assert(verify); 536 auto path = git.path.buildPath(file); 537 auto de = DirEntry(path); 538 return FileState(de.isSymlink, de.size, de.timeLastModified.stdTime); 539 } 540 541 private alias RepositoryState = FileState[string]; 542 543 /// Return the working tree "state". 544 /// This returns a file list, along with size and modification time. 545 RepositoryState getState() 546 { 547 assert(verify); 548 auto files = git.query(["ls-files"]).splitLines(); 549 RepositoryState state; 550 foreach (file; files) 551 try 552 state[file] = getFileState(file); 553 catch (Exception e) {} 554 return state; 555 } 556 557 private @property string workTreeStatePath() 558 { 559 assert(verify); 560 return buildPath(git.gitDir, "ae-sys-d-worktree.json"); 561 } 562 563 /// Save the state of the working tree for versioned files 564 /// to a .json file, which can later be verified with checkState. 565 /// This should be called after any git command which mutates the git state. 566 void saveState() 567 { 568 if (!verify) 569 return; 570 std.file.write(workTreeStatePath, getState().toJson()); 571 } 572 573 /// Save the state of just one file. 574 /// This should be called after automatic edits to repository files during a build. 575 /// The file parameter should be relative to the directory root, and use forward slashes. 576 void saveFileState(string file) 577 { 578 if (!verify) 579 return; 580 if (!workTreeStatePath.exists) 581 return; 582 auto state = workTreeStatePath.readText.jsonParse!RepositoryState(); 583 state[file] = getFileState(file); 584 std.file.write(workTreeStatePath, state.toJson()); 585 } 586 587 /// Verify that the state of the working tree matches the one 588 /// when saveState was last called. Throw an exception otherwise. 589 /// This and clearState should be called before any git command 590 /// which destroys working directory changes. 591 void checkState() 592 { 593 if (!verify) 594 return; 595 if (!workTreeStatePath.exists) 596 return; 597 auto savedState = workTreeStatePath.readText.jsonParse!RepositoryState(); 598 auto currentState = getState(); 599 try 600 { 601 foreach (file, fileState; currentState) 602 { 603 enforce(file in savedState, "New file: " ~ file); 604 enforce(savedState[file].isLink == fileState.isLink, 605 "File modified: %s (is link changed, before: %s, after: %s)".format(file, savedState[file].isLink, fileState.isLink)); 606 if (fileState.isLink) 607 continue; // Correct lstat is too hard, just skip symlinks 608 enforce(savedState[file].size == fileState.size, 609 "File modified: %s (size changed, before: %s, after: %s)".format(file, savedState[file].size, fileState.size)); 610 enforce(savedState[file].modificationTime == fileState.modificationTime, 611 "File modified: %s (modification time changed, before: %s, after: %s)".format(file, SysTime(savedState[file].modificationTime), SysTime(fileState.modificationTime))); 612 assert(savedState[file] == fileState); 613 } 614 } 615 catch (Exception e) 616 throw new Exception( 617 "The worktree has changed since the last time this software updated it.\n" ~ 618 "Specifically:\n" ~ 619 " " ~ e.msg ~ "\n\n" ~ 620 "Aborting to avoid overwriting your changes.\n" ~ 621 "To continue:\n" ~ 622 " 1. Commit / stash / back up your changes, if you wish to keep them\n" ~ 623 " 2. Delete " ~ workTreeStatePath ~ "\n" ~ 624 " 3. Try this operation again." 625 ); 626 } 627 628 /// Delete the saved working tree state, if any. 629 void clearState() 630 { 631 if (!verify) 632 return; 633 if (workTreeStatePath.exists) 634 workTreeStatePath.remove(); 635 } 636 637 // Misc 638 639 /// Reset internal state. 640 protected void reset() 641 { 642 currentHead = null; 643 clean = false; 644 haveMergeCache = false; 645 mergeCacheData = null; 646 } 647 648 /// Override to add logging. 649 protected abstract void log(string line); 650 } 651 652 /// Used to communicate that a "reset --hard" failed. 653 /// Generally this indicates git repository corruption. 654 mixin DeclareException!q{RepositoryCleanException};