1 /** 2 * Data structure for optimized "sparse" N-dimensional matrices 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <vladimir@thecybershadow.net> 12 */ 13 14 module ae.utils.mapset; 15 16 static if (__VERSION__ >= 2083): 17 18 import std.algorithm.iteration; 19 import std.algorithm.searching; 20 import std.algorithm.sorting; 21 import std.array; 22 import std.exception; 23 import std.typecons : tuple; 24 25 import ae.utils.aa; 26 import ae.utils.array : amap; 27 28 /** 29 Data structure for holding optimized "sparse" N-dimensional matrices. 30 The number of dimensions is arbitrary and can be varied at runtime. 31 32 Unlike classical sparse arrays/matrices, this data structure is 33 optimized not just for arrays which are mostly zero (or any given 34 value), but for any matrices where data is likely to repeat across 35 dimensions. 36 37 This is done by storing the data as a tree, with each depth 38 representing one dimension. A sub-tree is, thus, a sub-matrix 39 (slice of the matrix represented by the top-level tree) along the 40 top-level dimension. Each sub-tree is individually immutable, and 41 can therefore be shared within the same tree or even by several 42 instances of this type. 43 44 Dimension order need not be the same for all sub-trees. Even the 45 number of dimensions in siblings' sub-trees need not be the same; 46 i.e, the data structure is itself sparse with regards to the 47 dimensions. 48 49 As there is no explicit "value" type (associated with every set of 50 coordinates), this data structure can be seen as a representation 51 of a set of points, but it can be simulated by adding a "value" 52 dimension (in the same way how a gray-scale image can be 53 represented as a set of 3D points like a height-map). 54 55 An alternative way to look at this data structure is as a set of 56 maps (i.e. associative arrays). Thus, each layer of the tree stores 57 one possible map key, and under it, all the maps that have this key 58 with a specific value. Note that for this use case, DimValue needs 59 to have a value (configured using nullValue) which indicates the 60 absence of a certain key (dimension) in a map. 61 62 Params: 63 DimName = The type used to indicate the name (or index) of a dimension. 64 DimValue = The type for indicating a point on a dimension's axis. 65 nullValue = A value for DimValue indicating that a dimension is unset 66 */ 67 struct MapSet(DimName, DimValue, DimValue nullValue = DimValue.init) 68 { 69 /// Logically, each MapSet node has a map of values to a subset. 70 /// However, it is faster to represent that map as an array of key-value pairs 71 /// rather than a D associative array, so we do that here. 72 struct Pair 73 { 74 DimValue value; 75 MapSet set; 76 77 int opCmp(ref const typeof(this) other) const 78 { 79 static if (is(typeof(value.opCmp(other.value)) : int)) 80 return value.opCmp(other.value); 81 else 82 return value < other.value ? -1 : value > other.value ? 1 : 0; 83 } 84 } 85 86 struct Node 87 { 88 DimName dim; 89 Pair[] children; 90 91 immutable this(DimName dim, immutable Pair[] children) 92 { 93 // Zero children doesn't make sense. 94 // It would be the equivalent of an empty set, 95 // but then we should just use MapSet.emptySet instead. 96 assert(!children.empty, "Node with zero children"); 97 98 // Nodes should be in their canonical form for 99 // memoization and deduplication to be effective. 100 assert(children.isSorted, "Children are not sorted"); 101 102 this.dim = dim; 103 this.children = children; 104 105 // Because each subset is immutable, we can 106 // pre-calculate the hash during construction. 107 hash = hashOf(dim) ^ hashOf(children); 108 109 size_t totalMembers = 0; 110 foreach (i, ref pair; children) 111 { 112 if (i) 113 assert(pair.value != children[i-1].value, "Duplicate value"); 114 115 pair.set.assertDeduplicated(); 116 // Same as "Node with zero children" 117 assert(pair.set !is emptySet, "Empty set as subset"); 118 119 totalMembers += pair.set.count; 120 } 121 this.totalMembers = totalMembers; 122 } 123 124 immutable this(DimName dim, immutable MapSet[DimValue] children) 125 { 126 auto childrenList = children.byKeyValue.map!(kv => Pair(kv.key, kv.value)).array; 127 childrenList.sort(); 128 this(dim, cast(immutable) childrenList); 129 } 130 131 void toString(scope void delegate(const(char)[]) sink) const 132 { 133 import std.format : formattedWrite; 134 sink.formattedWrite("{ %(%s%), [ ", (&dim)[0..1]); 135 bool first = true; 136 foreach (ref pair; children) 137 { 138 if (first) 139 first = false; 140 else 141 sink(", "); 142 sink.formattedWrite("%s : %s", pair.value, pair.set); 143 } 144 sink(" ] }"); 145 } 146 147 private hash_t hash; 148 size_t totalMembers; 149 150 hash_t toHash() const 151 { 152 return hash; 153 } 154 155 bool opEquals(ref const typeof(this) s) const 156 { 157 return hash == s.hash && dim == s.dim && children == s.children; 158 } 159 } 160 161 /// Indicates the empty set. 162 /// May only occur at the top level (never as a subset). 163 private enum emptySetRoot = cast(immutable(Node)*)1; 164 165 /// If emptySetRoot, zero values. 166 /// If null, one value with zero dimensions. 167 /// Otherwise, pointer to node describing the next dimension. 168 immutable(Node)* root = emptySetRoot; 169 170 /// A set containing a single nil-dimensional element. 171 /// Holds exactly one value (a point with all dimensions at 172 /// nullValue). 173 static immutable unitSet = MapSet(null); 174 175 /// The empty set. Represents a set which holds zero values. 176 static immutable emptySet = MapSet(emptySetRoot); 177 178 /// Return the total number of items in this set. 179 size_t count() const 180 { 181 if (this is emptySet) 182 return 0; 183 if (this is unitSet) 184 return 1; 185 return root.totalMembers; 186 } 187 188 private struct SetSetOp { MapSet a, b; } 189 private struct SetDimOp { MapSet set; DimName dim; } 190 private struct SetIdxOp { MapSet set; size_t index; } 191 private struct Cache 192 { 193 /// For deduplication - key is value 194 MapSet[MapSet] instances; 195 /// Operations - things that operate recursively on subtrees 196 /// should be memoized here 197 MapSet[SetSetOp] merge, subtract, cartesianProduct, reorderUsing; 198 MapSet[SetDimOp] remove, bringToFront; 199 MapSet[SetIdxOp] swapDepth; 200 MapSet[MapSet] optimize, completeSuperset; 201 size_t[MapSet] uniqueNodes, maxDepth; 202 } 203 204 /// Because subtrees can be reused within the tree, a way of 205 /// memoizing operations across the entire tree (instead of just 206 /// across children of a single node, or siblings) is crucial for 207 /// performance. 208 static Cache cache; 209 210 /// Clear the global operations cache. 211 static void clearCache() 212 { 213 cache = Cache.init; 214 } 215 216 /// Because MapSet operations benefit greatly from memoization, 217 /// maintaining a set of interned sets benefits performance. After 218 /// calling `clearCache`, call this method to re-register extant 219 /// live instances to the instance cache. 220 void addToCache() const 221 { 222 cache.instances.updateVoid( 223 this, 224 { 225 if (this !is emptySet && this !is unitSet) 226 foreach (ref pair; root.children) 227 pair.set.addToCache(); 228 return this; 229 }, 230 (ref MapSet set) 231 { 232 assert(set is this); 233 } 234 ); 235 } 236 237 /// Intern and deduplicate this MapSet. 238 /// Needs to be called only after constructing a MapSet manually 239 /// (by allocating, populating, and setting `root`). 240 MapSet deduplicate() const 241 { 242 MapSet deduplicated; 243 cache.instances.updateVoid( 244 this, 245 { 246 debug if (this !is emptySet && this !is unitSet) 247 foreach (ref pair; root.children) 248 pair.set.assertDeduplicated(); 249 deduplicated = this; 250 return this; 251 }, 252 (ref MapSet set) 253 { 254 deduplicated = set; 255 } 256 ); 257 return deduplicated; 258 } 259 260 private void assertDeduplicated() const { debug assert(this is emptySet || this is unitSet || cache.instances[this] is this); } 261 262 /// Count and return the total number of unique nodes in this MapSet. 263 size_t uniqueNodes() const 264 { 265 return cache.uniqueNodes.require(this, { 266 HashSet!MapSet seen; 267 void visit(MapSet set) 268 { 269 if (set is emptySet || set is unitSet || set in seen) 270 return; 271 seen.add(set); 272 foreach (ref pair; set.root.children) 273 visit(pair.set); 274 } 275 visit(this); 276 return seen.length; 277 }()); 278 } 279 280 /// Collect the names of all dimensions occurring in this tree. 281 DimName[] getDims() const 282 { 283 HashSet!DimName dims; 284 HashSet!MapSet seen; 285 void visit(MapSet set) 286 { 287 if (set is emptySet || set is unitSet || set in seen) 288 return; 289 seen.add(set); 290 dims.add(set.root.dim); 291 foreach (ref pair; set.root.children) 292 visit(pair.set); 293 } 294 visit(this); 295 return dims.keys; 296 } 297 298 /// Return all values for all dimensions occurring in this set. 299 /// The Cartesian product of these values would thus be a superset 300 /// of this set. 301 /// This is equivalent to, but faster than, calling `getDims` and 302 /// then `all` for each dim. 303 HashSet!DimValue[DimName] getDimsAndValues() const 304 { 305 if (this is emptySet) return null; 306 307 // Be careful to count the implicit nullValues on branches 308 // where a dim doesn't occur. 309 MapSet set = this.completeSuperset; 310 HashSet!DimValue[DimName] result; 311 while (set !is unitSet) 312 { 313 DimName dim = set.root.dim; 314 HashSet!DimValue values = set.root.children.map!((ref child) => child.value).toSet; 315 bool added = result.addNew(dim, values); 316 assert(added, "Duplicate dimension"); 317 set = set.root.children[0].set; 318 } 319 return result; 320 } 321 322 /// Combine two matrices together, returning their union. 323 /// If `other` is a subset of `this`, return `this` unchanged. 324 MapSet merge(MapSet other) const 325 { 326 if (this is emptySet) return other; 327 if (other is emptySet) return this; 328 if (this is other) return this; 329 if (!root) return bringToFront(other.root.dim).merge(other); 330 331 this.assertDeduplicated(); 332 other.assertDeduplicated(); 333 return cache.merge.require(SetSetOp(this, other), { 334 other = other.bringToFront(root.dim); 335 336 if (root.children.length == 1 && other.root.children.length == 1 && 337 root.children[0].value == other.root.children[0].value) 338 { 339 // Single-child optimization 340 auto mergeResult = root.children[0].set.merge(other.root.children[0].set); 341 if (mergeResult !is root.children[0].set) 342 return MapSet(new immutable Node(root.dim, [Pair(root.children[0].value, mergeResult)])).deduplicate; 343 else 344 return this; 345 } 346 347 MapSet[DimValue] newChildren; 348 foreach (ref pair; root.children) 349 newChildren[pair.value] = pair.set; 350 351 bool modified; 352 foreach (ref pair; other.root.children) 353 newChildren.updateVoid(pair.value, 354 { 355 modified = true; 356 return pair.set; 357 }, 358 (ref MapSet oldSubmatrix) 359 { 360 auto mergeResult = oldSubmatrix.merge(pair.set); 361 if (oldSubmatrix !is mergeResult) 362 { 363 oldSubmatrix = mergeResult; 364 modified = true; 365 } 366 } 367 ); 368 369 if (!modified) 370 return this; 371 372 return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate; 373 }()); 374 } 375 376 /// Return the difference between `this` and the given set. 377 /// If `other` does not intersect with `this`, return `this` unchanged. 378 MapSet subtract(MapSet other) const 379 { 380 if (this is emptySet) return this; 381 if (other is emptySet) return this; 382 if (this is other) return emptySet; 383 if (!root) return bringToFront(other.root.dim).subtract(other); 384 385 this.assertDeduplicated(); 386 other.assertDeduplicated(); 387 return cache.subtract.require(SetSetOp(this, other), { 388 other = other.bringToFront(root.dim); 389 390 if (root.children.length == 1 && other.root.children.length == 1 && 391 root.children[0].value == other.root.children[0].value) 392 { 393 // Single-child optimization 394 auto subtractResult = root.children[0].set.subtract(other.root.children[0].set); 395 if (subtractResult is emptySet) 396 return emptySet; 397 else 398 if (subtractResult !is root.children[0].set) 399 return MapSet(new immutable Node(root.dim, [Pair(root.children[0].value, subtractResult)])).deduplicate; 400 else 401 return this; 402 } 403 404 MapSet[DimValue] newChildren; 405 foreach (ref pair; root.children) 406 newChildren[pair.value] = pair.set; 407 408 bool modified; 409 foreach (ref pair; other.root.children) 410 if (auto poldSubmatrix = pair.value in newChildren) 411 { 412 auto subtractResult = poldSubmatrix.subtract(pair.set); 413 if (*poldSubmatrix !is subtractResult) 414 { 415 *poldSubmatrix = subtractResult; 416 if (subtractResult is emptySet) 417 newChildren.remove(pair.value); 418 modified = true; 419 } 420 } 421 422 if (!modified) 423 return this; 424 if (!newChildren.length) 425 return emptySet; 426 427 return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate; 428 }()); 429 } 430 431 /*private*/ MapSet lazyMap(scope MapSet delegate(MapSet) fn) const 432 { 433 // Defer allocation until the need to mutate 434 foreach (i, ref pair; root.children) // Read-only scan 435 { 436 auto newSet = fn(pair.set); 437 if (newSet !is pair.set) 438 { 439 auto newChildren = new Pair[root.children.length]; 440 // Known to not need mutation 441 newChildren[0 .. i] = root.children[0 .. i]; 442 // Reuse already calculated result 443 newChildren[i] = Pair(pair.value, newSet); 444 // Continue scan with mutation 445 foreach (j, ref pair2; root.children[i + 1 .. $]) 446 newChildren[i + 1 + j] = Pair(pair2.value, fn(pair2.set)); 447 return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate; 448 } 449 } 450 return this; // No mutation necessary 451 } 452 453 /// "Unset" a given dimension, removing it from the matrix. 454 /// The result is the union of all sub-matrices for all values of `dim`. 455 MapSet remove(DimName dim) const 456 { 457 if (this is emptySet || this is unitSet) return this; 458 this.assertDeduplicated(); 459 return cache.remove.require(SetDimOp(this, dim), { 460 if (root.dim == dim) 461 { 462 MapSet result; 463 foreach (ref pair; root.children) 464 result = result.merge(pair.set); 465 return result; 466 } 467 return lazyMap(set => set.remove(dim)); 468 }()); 469 } 470 471 /// Unset dimensions according to a predicate. 472 /// This is faster than removing dimensions individually, however, 473 /// unlike the `DimName` overload, this one does not benefit from global memoization. 474 MapSet remove(bool delegate(DimName) pred) const 475 { 476 if (this is emptySet) return this; 477 this.assertDeduplicated(); 478 479 MapSet[MapSet] cache; 480 MapSet visit(MapSet set) 481 { 482 if (set is unitSet) 483 return set; 484 return cache.require(set, { 485 if (pred(set.root.dim)) 486 { 487 MapSet result; 488 foreach (ref pair; set.root.children) 489 result = result.merge(visit(pair.set)); 490 return result; 491 } 492 return set.lazyMap(set => visit(set)); 493 }()); 494 } 495 return visit(this); 496 } 497 498 /// Set the given dimension to always have the given value, 499 /// collapsing (returning the union of) all sub-matrices 500 /// for previously different values of `dim`. 501 MapSet set(DimName dim, DimValue value) const 502 { 503 return this.remove(dim).addDim(dim, value); 504 } 505 506 /// Adds a new dimension with the given value. 507 /// The dimension must not have previously existed in `this`. 508 private MapSet addDim(DimName dim, DimValue value) const 509 { 510 if (this is emptySet) return this; 511 this.assertDeduplicated(); 512 assert(this is this.remove(dim), "Duplicate dimension"); 513 if (value == nullValue) 514 return this; 515 return MapSet(new immutable Node(dim, [Pair(value, this)])).deduplicate; 516 } 517 518 /// Return a sub-matrix for all points where the given dimension has this value. 519 /// The dimension itself is not included in the result. 520 /// Note: if `dim` doesn't occur (e.g. because `this` is the unit set) and `value` is `nullValue`, 521 /// this returns the unit set (not the empty set). 522 MapSet slice(DimName dim, DimValue value) const 523 { 524 if (this is emptySet) return emptySet; 525 this.assertDeduplicated(); 526 foreach (ref pair; bringToFront(dim).root.children) 527 if (pair.value == value) 528 return pair.set; 529 return emptySet; 530 } 531 532 /// Return a subset of this set for all points where the given dimension has this value. 533 /// Unlike `slice`, the dimension itself is included in the result (with the given value). 534 MapSet get(DimName dim, DimValue value) const 535 { 536 if (this is emptySet) return emptySet; 537 this.assertDeduplicated(); 538 foreach (ref pair; bringToFront(dim).root.children) 539 if (pair.value == value) 540 return MapSet(new immutable Node(dim, [Pair(value, pair.set)])).deduplicate; 541 return emptySet; 542 } 543 544 /// Return all unique values occurring for a given dimension. 545 /// Unless this is the empty set, the return value is always non-empty. 546 /// If `dim` doesn't occur, it will be `[nullValue]`. 547 const(DimValue)[] all(DimName dim) const 548 { 549 // return bringToFront(dim).root.children.keys; 550 if (this is emptySet) return null; 551 if (this is unitSet) return [nullValue]; 552 if (root.dim == dim) return root.children.amap!(child => child.value); 553 this.assertDeduplicated(); 554 555 HashSet!DimValue allValues; 556 HashSet!MapSet seen; 557 void visit(MapSet set) 558 { 559 if (set is unitSet) 560 { 561 allValues.add(nullValue); 562 return; 563 } 564 if (set in seen) 565 return; 566 seen.add(set); 567 568 if (set.root.dim == dim) 569 foreach (ref pair; set.root.children) 570 allValues.add(pair.value); 571 else 572 foreach (ref pair; set.root.children) 573 visit(pair.set); 574 } 575 visit(this); 576 return allValues.keys; 577 } 578 579 /// Return a set which represents the Cartesian product between 580 /// this set and the given `values` across the specified 581 /// dimension. 582 MapSet cartesianProduct(DimName dim, DimValue[] values) const 583 { 584 if (this is emptySet) return emptySet; 585 if (values.length == 0) return emptySet; 586 this.assertDeduplicated(); 587 auto unset = this.remove(dim); 588 auto children = values.map!(value => Pair(value, unset)).array; 589 children.sort(); 590 return MapSet(new immutable Node(dim, cast(immutable) children)).deduplicate; 591 } 592 593 /// Return a set which represents the Cartesian product between 594 /// this and the given set. 595 /// Duplicate dimensions are first removed from `this` set. 596 /// For best performance, call `big.cartesianProduct(small)` 597 MapSet cartesianProduct(MapSet other) const 598 { 599 if (this is emptySet || other is emptySet) return emptySet; 600 if (this is unitSet) return other; 601 if (other is unitSet) return this; 602 603 MapSet unset = this; 604 foreach (dim; other.getDims()) 605 unset = unset.remove(dim); 606 607 return other.uncheckedCartesianProduct(unset); 608 } 609 610 // Assumes that the dimensions in `this` and `other` are disjoint. 611 private MapSet uncheckedCartesianProduct(MapSet other) const 612 { 613 if (this is unitSet) return other; 614 if (other is unitSet) return this; 615 616 this.assertDeduplicated(); 617 other.assertDeduplicated(); 618 619 return cache.cartesianProduct.require(SetSetOp(this, other), { 620 return lazyMap(set => set.uncheckedCartesianProduct(other)); 621 }()); 622 } 623 624 /// Return a superset of this set, consisting of a Cartesian 625 /// product of every value of every dimension. The total number of 626 /// unique nodes is thus equal to the number of dimensions. 627 MapSet completeSuperset() const 628 { 629 if (this is emptySet || this is unitSet) return this; 630 this.assertDeduplicated(); 631 632 return cache.completeSuperset.require(this, { 633 MapSet child = MapSet.emptySet; 634 foreach (ref pair; root.children) 635 child = child.merge(pair.set); 636 child = child.completeSuperset(); 637 auto newChildren = root.children.dup; 638 foreach (ref pair; newChildren) 639 pair.set = child; 640 return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate; 641 }()); 642 } 643 644 /// Refactor this matrix into one with the same data, 645 /// but putting the given dimension in front. 646 /// This will speed up access to values with the given dimension. 647 /// If the dimension does not yet occur in the set (or any subset), 648 /// it is instantiated with a single `nullValue` value. 649 /// The set must be non-empty. 650 MapSet bringToFront(DimName dim) const 651 { 652 assert(this !is emptySet, "Empty sets may not have dimensions"); 653 654 if (this is unitSet) 655 { 656 // We reached the bottom, and did not find `dim` along the way. 657 // Create it now. 658 return MapSet(new immutable Node(dim, [Pair(nullValue, unitSet)])).deduplicate; 659 } 660 661 if (dim == root.dim) 662 { 663 // Already at the front. 664 return this; 665 } 666 667 // 1. Recurse. 668 // 2. After recursion, all children should have `dim` at the front. 669 // So, just swap this layer with the next one. 670 671 this.assertDeduplicated(); 672 return cache.bringToFront.require(SetDimOp(this, dim), { 673 if (root.children.length == 1) 674 { 675 auto newSubmatrix = root.children[0].set.bringToFront(dim); 676 auto newChildren = newSubmatrix.root.children.dup; 677 foreach (ref pair; newChildren) 678 pair.set = MapSet(new immutable Node(root.dim, [Pair(root.children[0].value, pair.set)])).deduplicate; 679 return MapSet(new immutable Node(dim, cast(immutable) newChildren)).deduplicate; 680 } 681 682 Pair[][DimValue] subsets; 683 foreach (ref pair; root.children) 684 { 685 auto newSubmatrix = pair.set.bringToFront(dim); 686 assert(newSubmatrix.root.dim == dim); 687 foreach (ref pair2; newSubmatrix.root.children) 688 subsets[pair2.value] ~= Pair(pair.value, pair2.set); 689 } 690 Pair[] newChildren; 691 foreach (value, children; subsets) 692 { 693 children.sort(); 694 newChildren ~= Pair(value, MapSet(new immutable Node(root.dim, cast(immutable) children)).deduplicate); 695 } 696 newChildren.sort(); 697 return MapSet(new immutable Node(dim, cast(immutable) newChildren)).deduplicate; 698 }()); 699 } 700 701 /// Refactor this matrix into one with the same data, 702 /// but attempting to lower the total number of nodes. 703 MapSet optimize() const 704 { 705 if (this is emptySet || this is unitSet) return this; 706 this.assertDeduplicated(); 707 708 return cache.optimize.require(this, { 709 struct Result { bool done; MapSet[MapSet] map; } 710 static Result optimizeLayer(HashSet!MapSet sets0) 711 { 712 // - At the bottom? 713 // - Yes: 714 // - return failure 715 // - No: 716 // - Try to swap this layer. Success? 717 // - Yes: 718 // - return success 719 // - No: 720 // - Recurse and try to swap next layer. Success? 721 // - Yes: Retry this layer 722 // - No: return failure (bottom reached) 723 724 assert(!sets0.empty); 725 if (sets0.byKey.front is unitSet) 726 return Result(true, null); // at the bottom 727 auto dim0 = sets0.byKey.front.root.dim; 728 assert(sets0.byKey.all!(set => set !is unitSet), "Leaf/non-leaf nodes mismatch"); 729 assert(sets0.byKey.all!(set => set.root.dim == dim0), "Dim mismatch"); 730 731 auto sets1 = sets0.byKey.map!(set => set.root.children.map!(function MapSet (ref child) => child.set)).joiner.toSet; 732 assert(!sets1.empty); 733 if (sets1.byKey.front is unitSet) 734 return Result(true, null); // one layer away from the bottom, nothing to swap with 735 auto dim1 = sets1.byKey.front.root.dim; 736 assert(sets1.byKey.all!(set => set !is unitSet), "Leaf/non-leaf nodes mismatch"); 737 assert(sets1.byKey.all!(set => set.root.dim == dim1), "Dim mismatch"); 738 739 auto currentNodes = sets0.length + sets1.length; 740 741 MapSet[MapSet] swappedSets; 742 HashSet!MapSet sets0new, sets1new; 743 744 foreach (set0; sets0.byKey) 745 { 746 Pair[][DimValue] subsets; 747 foreach (ref pair0; set0.root.children) 748 foreach (ref pair1; pair0.set.root.children) 749 subsets[pair1.value] ~= Pair(pair0.value, pair1.set); 750 751 Pair[] newChildren; 752 foreach (value, children; subsets) 753 { 754 children.sort(); 755 auto set1new = MapSet(new immutable Node(dim0, cast(immutable) children)).deduplicate; 756 sets1new.add(set1new); 757 newChildren ~= Pair(value, set1new); 758 } 759 newChildren.sort(); 760 auto set0new = MapSet(new immutable Node(dim1, cast(immutable) newChildren)).deduplicate; 761 sets0new.add(set0new); 762 swappedSets[set0] = set0new; 763 } 764 765 auto newNodes = sets0new.length + sets1new.length; 766 767 if (newNodes < currentNodes) 768 return Result(false, swappedSets); // Success, retry above layer 769 770 // Failure, descend 771 772 auto result1 = optimizeLayer(sets1); 773 if (!result1.map) 774 { 775 assert(result1.done); 776 return Result(true, null); // Done, bottom reached 777 } 778 779 // Apply result 780 sets0new.clear(); 781 foreach (set0; sets0.byKey) 782 { 783 set0.assertDeduplicated(); 784 auto newChildren = set0.root.children.dup; 785 foreach (ref pair0; newChildren) 786 pair0.set = result1.map[pair0.set]; 787 auto set0new = MapSet(new immutable Node(dim0, cast(immutable) newChildren)).deduplicate; 788 sets0new.add(set0new); 789 swappedSets[set0] = set0new; 790 } 791 792 if (result1.done) 793 return Result(true, swappedSets); 794 795 // Retry this layer 796 auto result0 = optimizeLayer(sets0new); 797 if (!result0.map) 798 { 799 assert(result0.done); 800 return Result(true, swappedSets); // Bottom was reached upon retry, just return our results unchanged 801 } 802 803 MapSet[MapSet] compoundedResult; 804 foreach (set0; sets0.byKey) 805 compoundedResult[set0] = result0.map[swappedSets[set0]]; 806 return Result(result0.done, compoundedResult); 807 } 808 809 MapSet[1] root = [this.normalize]; 810 while (true) 811 { 812 auto result = optimizeLayer(root[].toSet()); 813 if (result.map) 814 root[0] = result.map[root[0]]; 815 if (result.done) 816 return root[0]; 817 } 818 }()); 819 } 820 821 /// Return a set equivalent to a unit set (all dimensions 822 /// explicitly set to `nullValue`), with all dimensions in `this`, 823 /// in an order approximately following that of the dimensions in 824 /// `this`. Implicitly normalized. 825 private MapSet dimOrderReference() const 826 { 827 if (this is unitSet || this is emptySet) return this; 828 829 DimName[] dims; 830 HashSet!MapSet seen; 831 void visit(MapSet set, size_t pos) 832 { 833 if (set is unitSet) return; 834 if (set in seen) return; 835 seen.add(set); 836 if ((pos == dims.length || dims[pos] != set.root.dim) && !dims.canFind(set.root.dim)) 837 { 838 dims.insertInPlace(pos, set.root.dim); 839 pos++; 840 } 841 foreach (ref pair; set.root.children) 842 visit(pair.set, pos); 843 } 844 visit(this, 0); 845 846 MapSet result = unitSet; 847 foreach_reverse (dim; dims) 848 result = MapSet(new immutable Node(dim, [Pair(nullValue, result)])).deduplicate(); 849 850 return result; 851 } 852 853 /// Refactor this matrix into one in which dimensions always occur 854 /// in the same order, no matter what path is taken. 855 MapSet normalize() const 856 { 857 if (this is unitSet || this is emptySet) return this; 858 859 return reorderUsing(dimOrderReference()); 860 } 861 862 private size_t maxDepth() const 863 { 864 import std.algorithm.comparison : max; 865 866 if (this is emptySet || this is unitSet) 867 return 0; 868 this.assertDeduplicated(); 869 return cache.maxDepth.require(this, { 870 size_t maxDepth = 0; 871 foreach (ref pair; root.children) 872 maxDepth = max(maxDepth, pair.set.maxDepth()); 873 return 1 + maxDepth; 874 }()); 875 } 876 877 /*private*/ MapSet swapDepth(size_t depth) const 878 { 879 if (this is emptySet || this is unitSet) return this; 880 this.assertDeduplicated(); 881 882 return cache.swapDepth.require(SetIdxOp(this, depth), { 883 if (depth == 0) 884 { 885 foreach (ref pair; root.children) 886 if (pair.set !is unitSet) 887 return bringToFront(pair.set.root.dim); 888 return this; 889 } 890 else 891 { 892 auto newChildren = root.children.dup; 893 foreach (ref pair; newChildren) 894 pair.set = pair.set.swapDepth(depth - 1); 895 return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate; 896 } 897 }()); 898 } 899 900 /// Refactor this matrix into one with the same data, but in which 901 /// the dimensions always occur as in `reference` (which is 902 /// assumed to be normalized). 903 MapSet reorderUsing(MapSet reference) const 904 { 905 if (this is emptySet || reference is emptySet || reference is unitSet) return this; 906 this.assertDeduplicated(); 907 reference.assertDeduplicated(); 908 909 return cache.reorderUsing.require(SetSetOp(this, reference), { 910 return bringToFront(reference.root.dim).lazyMap(set => set.reorderUsing(reference.root.children[0].set)); 911 }()); 912 } 913 914 void toString(scope void delegate(const(char)[]) sink) const 915 { 916 import std.format : formattedWrite; 917 if (this is emptySet) 918 sink("{}"); 919 else 920 if (this is unitSet) 921 sink("{[]}"); 922 else 923 sink.formattedWrite!"%s"(*root); 924 } 925 926 hash_t toHash() const 927 { 928 return 929 this is emptySet ? 0 : 930 this is unitSet ? 1 : 931 root.toHash(); 932 } 933 934 bool opEquals(const typeof(this) s) const 935 { 936 if (root is s.root) 937 return true; 938 if (this is emptySet || this is unitSet || s is emptySet || s is unitSet) 939 return this is s; 940 return *root == *s.root; 941 } 942 } 943 944 unittest 945 { 946 import std.algorithm.sorting : sort; 947 948 alias M = MapSet!(string, int); 949 M m, n; 950 m = m.merge(M.unitSet.set("x", 1).set("y", 5)); 951 m = m.merge(M.unitSet.set("x", 1).set("y", 6)); 952 assert(m.all("x") == [1]); 953 assert(m.all("y").dup.sort.release == [5, 6]); 954 955 m = m.merge(M.unitSet.set("x", 2).set("y", 6)); 956 assert(m.get("x", 1).all("y").dup.sort.release == [5, 6]); 957 assert(m.get("y", 6).all("x").dup.sort.release == [1, 2]); 958 959 m = m.subtract(M.unitSet.set("x", 1).set("y", 6)); 960 assert(m.all("x").dup.sort.release == [1, 2]); 961 assert(m.all("y").dup.sort.release == [5, 6]); 962 assert(m.get("x", 1).all("y") == [5]); 963 assert(m.get("y", 6).all("x") == [2]); 964 965 m = M.emptySet; 966 m = m.merge(M.unitSet.set("x", 10).set("y", 20)); 967 m = m.merge(M.unitSet.set("x", 10).set("y", 21)); 968 m = m.merge(M.unitSet.set("x", 11).set("y", 21)); 969 m = m.merge(M.unitSet.set("x", 11).set("y", 22)); 970 auto o = m.optimize(); 971 assert(o.uniqueNodes < m.uniqueNodes); 972 973 m = M.emptySet; 974 assert(m.all("x") == []); 975 m = M.unitSet; 976 assert(m.all("x") == [0]); 977 m = m.merge(M.unitSet.set("x", 1)); 978 assert(m.all("x").dup.sort.release == [0, 1]); 979 980 m = M.unitSet; 981 assert(m.set("x", 1).set("x", 1).all("x") == [1]); 982 983 m = M.unitSet; 984 m = m.cartesianProduct("x", [1, 2, 3]); 985 m = m.cartesianProduct("y", [1, 2, 3]); 986 m = m.cartesianProduct("z", [1, 2, 3]); 987 assert(m.count == 3 * 3 * 3); 988 assert(m .all("x").dup.sort.release == [1, 2, 3]); 989 assert(m.set("z", 1).all("x").dup.sort.release == [1, 2, 3]); 990 assert(m.set("x", 1).all("z").dup.sort.release == [1, 2, 3]); 991 992 m = M.unitSet; 993 m = m.cartesianProduct("a", [1, 2, 3]); 994 m = m.cartesianProduct("b", [1, 2, 3]); 995 n = M.unitSet; 996 n = n.cartesianProduct("c", [1, 2, 3]); 997 n = n.cartesianProduct("d", [1, 2, 3]); 998 m = m.cartesianProduct(n); 999 assert(m.count == 3 * 3 * 3 * 3); 1000 1001 assert(M.unitSet != M.unitSet.set("x", 1)); 1002 1003 m = M.emptySet; 1004 m = m.merge(M.unitSet.set("x", 1).set("y", 11)); 1005 m = m.merge(M.unitSet.set("x", 2).set("y", 12).set("z", 22)); 1006 m = m.completeSuperset(); 1007 assert(m.uniqueNodes == 3); 1008 assert(m.count == 8); // 2 ^^ 3 1009 assert(m.all("x").dup.sort.release == [1, 2]); 1010 assert(m.all("y").dup.sort.release == [11, 12]); 1011 assert(m.all("z").dup.sort.release == [0, 22]); 1012 } 1013 1014 1015 /// Allows executing a deterministic algorithm over all states in a given MapSet. 1016 /// If a variable is not queried by the algorithm, states for all 1017 /// variations of that variable are processed in one iteration. 1018 struct MapSetVisitor(A, V) 1019 { 1020 alias Set = MapSet!(A, V); 1021 Set set; 1022 1023 struct Var 1024 { 1025 A name; 1026 const(V)[] values; 1027 size_t pos; 1028 } 1029 Var[] stack; 1030 size_t stackPos; 1031 V[A] singularValues, resolvedValues; // Faster than workingSet.all(name)[0] 1032 private HashSet!A dirtyValues; // Accumulate MapSet.set calls 1033 private Set workingSet; 1034 1035 this(Set set) 1036 { 1037 this.set = set; 1038 foreach (dim, values; set.getDimsAndValues()) 1039 if (values.length == 1) 1040 singularValues[dim] = values.byKey.front; 1041 } 1042 1043 /// Resets iteration to the beginning. 1044 /// Equivalent to but faster than constructing a new MapSetVisitor 1045 /// instance (`visitor = MapSetVisitor(visitor.set)`). 1046 void reset() 1047 { 1048 workingSet = Set.emptySet; 1049 stack = null; 1050 } 1051 1052 /// Returns true if there are more states to iterate over, 1053 /// otherwise returns false 1054 bool next() 1055 { 1056 if (set is Set.emptySet) 1057 return false; 1058 if (workingSet is Set.emptySet) 1059 { 1060 // first iteration 1061 } 1062 else 1063 while (true) 1064 { 1065 if (!stack.length) 1066 return false; // All possibilities exhausted 1067 auto last = &stack[$-1]; 1068 last.pos++; 1069 if (last.pos == last.values.length) 1070 { 1071 stack = stack[0 .. $ - 1]; 1072 continue; 1073 } 1074 break; 1075 } 1076 1077 workingSet = set; 1078 resolvedValues = null; 1079 dirtyValues.clear(); 1080 stackPos = 0; 1081 return true; 1082 } 1083 1084 private void flush() 1085 { 1086 if (dirtyValues.empty) 1087 return; 1088 workingSet = workingSet.remove((A name) => name in dirtyValues); 1089 foreach (name; dirtyValues) 1090 workingSet = workingSet.addDim(name, resolvedValues[name]); 1091 dirtyValues.clear(); 1092 } 1093 1094 @property Set currentSubset() 1095 { 1096 assert(workingSet !is Set.emptySet, "Not iterating"); 1097 flush(); 1098 return workingSet; 1099 } 1100 1101 /// Algorithm interface - get a value by name 1102 V get(A name) 1103 { 1104 assert(workingSet !is Set.emptySet, "Not iterating"); 1105 if (auto pvalue = name in resolvedValues) 1106 return *pvalue; 1107 if (auto pvalue = name in singularValues) 1108 return *pvalue; 1109 1110 if (stackPos == stack.length) 1111 { 1112 // Expand new variable 1113 auto values = workingSet.all(name); 1114 auto value = values[0]; 1115 resolvedValues[name] = value; 1116 stack ~= Var(name, values, 0); 1117 stackPos++; 1118 if (values.length > 1) 1119 workingSet = workingSet.get(name, value); 1120 return value; 1121 } 1122 1123 // Iterate over known variable 1124 auto var = &stack[stackPos]; 1125 assert(var.name == name, "Mismatching get order"); 1126 auto value = var.values[var.pos]; 1127 workingSet = workingSet.get(var.name, value); 1128 assert(workingSet !is Set.emptySet, "Empty set after restoring"); 1129 resolvedValues[var.name] = value; 1130 stackPos++; 1131 return value; 1132 } 1133 1134 /// Algorithm interface - set a value by name 1135 void put(A name, V value) 1136 { 1137 assert(workingSet !is Set.emptySet, "Not iterating"); 1138 if (name !in resolvedValues) 1139 if (auto pvalue = name in singularValues) 1140 if (*pvalue == value) 1141 return; 1142 1143 resolvedValues[name] = value; 1144 dirtyValues.add(name); 1145 } 1146 1147 /// Apply a function over every possible value of the given 1148 /// variable, without resolving it (unless it's already resolved). 1149 void transform(A name, scope void delegate(ref V value) fun) 1150 { 1151 assert(workingSet !is Set.emptySet, "Not iterating"); 1152 if (auto pvalue = name in resolvedValues) 1153 { 1154 dirtyValues.add(name); 1155 return fun(*pvalue); 1156 } 1157 1158 workingSet = workingSet.bringToFront(name); 1159 Set[V] newChildren; 1160 foreach (ref child; workingSet.root.children) 1161 { 1162 V value = child.value; 1163 fun(value); 1164 newChildren.updateVoid(value, 1165 () => child.set, 1166 (ref Set set) 1167 { 1168 set = set.merge(child.set); 1169 }); 1170 } 1171 workingSet = Set(new immutable Set.Node(name, cast(immutable) newChildren)).deduplicate; 1172 } 1173 1174 /// Apply a function over every possible value of the given 1175 /// variable, without resolving it (unless it's already resolved). 1176 /// The function is assumed to be injective (does not produce 1177 /// duplicate outputs for distinct inputs). 1178 void injectiveTransform(A name, scope void delegate(ref V value) fun) 1179 { 1180 assert(workingSet !is Set.emptySet, "Not iterating"); 1181 if (auto pvalue = name in resolvedValues) 1182 { 1183 dirtyValues.add(name); 1184 return fun(*pvalue); 1185 } 1186 1187 workingSet = workingSet.bringToFront(name); 1188 auto newChildren = workingSet.root.children.dup; 1189 foreach (ref child; newChildren) 1190 fun(child.value); 1191 newChildren.sort(); 1192 workingSet = Set(new immutable Set.Node(name, cast(immutable) newChildren)).deduplicate; 1193 } 1194 1195 /// Inject a variable and values to iterate over. 1196 /// The variable must not have been resolved yet. 1197 void inject(A name, V[] values) 1198 { 1199 assert(name !in resolvedValues, "Already resolved"); 1200 assert(values.length > 0, "Injecting zero values would result in an empty set"); 1201 singularValues.remove(name); 1202 workingSet = workingSet.cartesianProduct(name, values); 1203 } 1204 } 1205 1206 /// An algorithm which divides two numbers. 1207 /// When the divisor is zero, we don't even query the dividend, 1208 /// therefore processing all dividends in one iteration. 1209 unittest 1210 { 1211 alias M = MapSet!(string, int); 1212 M m = M.unitSet 1213 .cartesianProduct("divisor" , [0, 1, 2]) 1214 .cartesianProduct("dividend", [0, 1, 2]); 1215 assert(m.count == 9); 1216 1217 auto v = MapSetVisitor!(string, int)(m); 1218 M results; 1219 int iterations; 1220 while (v.next()) 1221 { 1222 iterations++; 1223 auto divisor = v.get("divisor"); 1224 if (divisor == 0) 1225 continue; 1226 auto dividend = v.get("dividend"); 1227 v.put("quotient", dividend / divisor); 1228 results = results.merge(v.currentSubset); 1229 } 1230 1231 assert(iterations == 7); // 1 for division by zero + 3 for division by one + 3 for division by two 1232 assert(results.get("divisor", 2).get("dividend", 2).all("quotient") == [1]); 1233 assert(results.get("divisor", 0).count == 0); 1234 } 1235 1236 unittest 1237 { 1238 import std.algorithm.sorting : sort; 1239 1240 alias M = MapSet!(string, int); 1241 M m = M.unitSet.cartesianProduct("x", [1, 2, 3]); 1242 auto v = MapSetVisitor!(string, int)(m); 1243 v.next(); 1244 v.transform("x", (ref int v) { v *= 2; }); 1245 assert(v.currentSubset.all("x").dup.sort.release == [2, 4, 6]); 1246 } 1247 1248 unittest 1249 { 1250 alias M = MapSet!(string, int); 1251 M m = M.unitSet.cartesianProduct("x", [1, 2, 3]); 1252 auto v = MapSetVisitor!(string, int)(m); 1253 while (v.next()) 1254 { 1255 v.transform("x", (ref int v) { v *= 2; }); 1256 v.put("y", v.get("x")); 1257 } 1258 }