1 /** 2 * Data structure for optimized "sparse" N-dimensional matrices 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <ae@cy.md> 12 */ 13 14 module ae.utils.mapset; 15 16 static if (__VERSION__ >= 2083): 17 18 import std.algorithm.iteration; 19 import std.algorithm.searching; 20 import std.algorithm.sorting; 21 import std.array; 22 import std.exception; 23 import std.typecons : tuple; 24 25 import ae.utils.aa; 26 import ae.utils.array : amap; 27 28 /** 29 Data structure for holding optimized "sparse" N-dimensional matrices. 30 The number of dimensions is arbitrary and can be varied at runtime. 31 32 Unlike classical sparse arrays/matrices, this data structure is 33 optimized not just for arrays which are mostly zero (or any given 34 value), but for any matrices where data is likely to repeat across 35 dimensions. 36 37 This is done by storing the data as a tree, with each depth 38 representing one dimension. A sub-tree is, thus, a sub-matrix 39 (slice of the matrix represented by the top-level tree) along the 40 top-level dimension. Each sub-tree is individually immutable, and 41 can therefore be shared within the same tree or even by several 42 instances of this type. 43 44 Dimension order need not be the same for all sub-trees. Even the 45 number of dimensions in siblings' sub-trees need not be the same; 46 i.e, the data structure is itself sparse with regards to the 47 dimensions. 48 49 As there is no explicit "value" type (associated with every set of 50 coordinates), this data structure can be seen as a representation 51 of a set of points, but it can be simulated by adding a "value" 52 dimension (in the same way how a gray-scale image can be 53 represented as a set of 3D points like a height-map). 54 55 An alternative way to look at this data structure is as a set of 56 maps (i.e. associative arrays). Thus, each layer of the tree stores 57 one possible map key, and under it, all the maps that have this key 58 with a specific value. Note that for this use case, DimValue needs 59 to have a value (configured using nullValue) which indicates the 60 absence of a certain key (dimension) in a map. 61 62 Params: 63 DimName = The type used to indicate the name (or index) of a dimension. 64 DimValue = The type for indicating a point on a dimension's axis. 65 nullValue = A value for DimValue indicating that a dimension is unset 66 */ 67 struct MapSet(DimName, DimValue, DimValue nullValue = DimValue.init) 68 { 69 /// Logically, each MapSet node has a map of values to a subset. 70 /// However, it is faster to represent that map as an array of key-value pairs 71 /// rather than a D associative array, so we do that here. 72 struct Pair 73 { 74 DimValue value; /// 75 MapSet set; /// 76 77 int opCmp(ref const typeof(this) other) const 78 { 79 static if (is(typeof(value.opCmp(other.value)) : int)) 80 return value.opCmp(other.value); 81 else 82 return value < other.value ? -1 : value > other.value ? 1 : 0; 83 } /// 84 } 85 86 struct Node 87 { 88 DimName dim; /// 89 Pair[] children; /// 90 91 immutable this(DimName dim, immutable Pair[] children) 92 { 93 // Zero children doesn't make sense. 94 // It would be the equivalent of an empty set, 95 // but then we should just use MapSet.emptySet instead. 96 assert(!children.empty, "Node with zero children"); 97 98 // Nodes should be in their canonical form for 99 // memoization and deduplication to be effective. 100 assert(children.isSorted, "Children are not sorted"); 101 102 this.dim = dim; 103 this.children = children; 104 105 // Because each subset is immutable, we can 106 // pre-calculate the hash during construction. 107 hash = hashOf(dim) ^ hashOf(children); 108 109 size_t totalMembers = 0; 110 foreach (i, ref pair; children) 111 { 112 if (i) 113 assert(pair.value != children[i-1].value, "Duplicate value"); 114 115 pair.set.assertDeduplicated(); 116 // Same as "Node with zero children" 117 assert(pair.set !is emptySet, "Empty set as subset"); 118 119 totalMembers += pair.set.count; 120 } 121 this.totalMembers = totalMembers; 122 } /// 123 124 immutable this(DimName dim, immutable MapSet[DimValue] children) 125 { 126 auto childrenList = children.byKeyValue.map!(kv => Pair(kv.key, kv.value)).array; 127 childrenList.sort(); 128 this(dim, cast(immutable) childrenList); 129 } /// 130 131 void toString(scope void delegate(const(char)[]) sink) const 132 { 133 import std.format : formattedWrite; 134 sink.formattedWrite("{ %(%s%), [ ", (&dim)[0..1]); 135 bool first = true; 136 foreach (ref pair; children) 137 { 138 if (first) 139 first = false; 140 else 141 sink(", "); 142 sink.formattedWrite("%s : %s", pair.value, pair.set); 143 } 144 sink(" ] }"); 145 } /// 146 147 private hash_t hash; 148 private size_t totalMembers; 149 150 hash_t toHash() const 151 { 152 return hash; 153 } /// 154 155 bool opEquals(ref const typeof(this) s) const 156 { 157 return hash == s.hash && dim == s.dim && children == s.children; 158 } /// 159 } /// 160 161 /// Indicates the empty set. 162 /// May only occur at the top level (never as a subset). 163 private enum emptySetRoot = cast(immutable(Node)*)1; 164 165 /// If emptySetRoot, zero values. 166 /// If null, one value with zero dimensions. 167 /// Otherwise, pointer to node describing the next dimension. 168 immutable(Node)* root = emptySetRoot; 169 170 /// A set containing a single nil-dimensional element. 171 /// Holds exactly one value (a point with all dimensions at 172 /// nullValue). 173 static immutable unitSet = MapSet(null); 174 175 /// The empty set. Represents a set which holds zero values. 176 static immutable emptySet = MapSet(emptySetRoot); 177 178 /// Return the total number of items in this set. 179 size_t count() const 180 { 181 if (this is emptySet) 182 return 0; 183 if (this is unitSet) 184 return 1; 185 return root.totalMembers; 186 } 187 188 private struct SetSetOp { MapSet a, b; } 189 private struct SetDimOp { MapSet set; DimName dim; } 190 private struct SetIdxOp { MapSet set; size_t index; } 191 private struct Cache 192 { 193 /// For deduplication - key is value 194 MapSet[MapSet] instances; 195 /// Operations - things that operate recursively on subtrees 196 /// should be memoized here 197 MapSet[SetSetOp] merge, subtract, cartesianProduct, reorderUsing; 198 MapSet[SetDimOp] remove, bringToFront; 199 MapSet[SetIdxOp] swapDepth; 200 MapSet[MapSet] optimize, completeSuperset; 201 size_t[MapSet] uniqueNodes, maxDepth; 202 } 203 204 /// Because subtrees can be reused within the tree, a way of 205 /// memoizing operations across the entire tree (instead of just 206 /// across children of a single node, or siblings) is crucial for 207 /// performance. 208 static Cache cache; 209 210 /// Clear the global operations cache. 211 static void clearCache() 212 { 213 cache = Cache.init; 214 } 215 216 /// Because MapSet operations benefit greatly from memoization, 217 /// maintaining a set of interned sets benefits performance. After 218 /// calling `clearCache`, call this method to re-register extant 219 /// live instances to the instance cache. 220 void addToCache() const 221 { 222 cache.instances.updateVoid( 223 this, 224 { 225 if (this !is emptySet && this !is unitSet) 226 foreach (ref pair; root.children) 227 pair.set.addToCache(); 228 return this; 229 }, 230 (ref MapSet set) 231 { 232 assert(set is this); 233 } 234 ); 235 } 236 237 /// Intern and deduplicate this MapSet. 238 /// Needs to be called only after constructing a MapSet manually 239 /// (by allocating, populating, and setting `root`). 240 MapSet deduplicate() const 241 { 242 MapSet deduplicated; 243 cache.instances.updateVoid( 244 this, 245 { 246 debug if (this !is emptySet && this !is unitSet) 247 foreach (ref pair; root.children) 248 pair.set.assertDeduplicated(); 249 deduplicated = this; 250 return this; 251 }, 252 (ref MapSet set) 253 { 254 deduplicated = set; 255 } 256 ); 257 return deduplicated; 258 } 259 260 private void assertDeduplicated() const { debug assert(this is emptySet || this is unitSet || cache.instances[this] is this); } 261 262 /// Count and return the total number of unique nodes in this MapSet. 263 size_t uniqueNodes() const 264 { 265 return cache.uniqueNodes.require(this, { 266 HashSet!MapSet seen; 267 void visit(MapSet set) 268 { 269 if (set is emptySet || set is unitSet || set in seen) 270 return; 271 seen.add(set); 272 foreach (ref pair; set.root.children) 273 visit(pair.set); 274 } 275 visit(this); 276 return seen.length; 277 }()); 278 } 279 280 /// Collect the names of all dimensions occurring in this tree. 281 DimName[] getDims() const 282 { 283 HashSet!DimName dims; 284 HashSet!MapSet seen; 285 void visit(MapSet set) 286 { 287 if (set is emptySet || set is unitSet || set in seen) 288 return; 289 seen.add(set); 290 dims.add(set.root.dim); 291 foreach (ref pair; set.root.children) 292 visit(pair.set); 293 } 294 visit(this); 295 return dims.keys; 296 } 297 298 /// Return all values for all dimensions occurring in this set. 299 /// The Cartesian product of these values would thus be a superset 300 /// of this set. 301 /// This is equivalent to, but faster than, calling `getDims` and 302 /// then `all` for each dim. 303 HashSet!(immutable(DimValue))[DimName] getDimsAndValues(this This)() 304 { 305 if (this is emptySet) return null; 306 307 // Be careful to count the implicit nullValues on branches 308 // where a dim doesn't occur. 309 MapSet set = this.completeSuperset; 310 HashSet!(immutable(DimValue))[DimName] result; 311 while (set !is unitSet) 312 { 313 DimName dim = set.root.dim; 314 HashSet!(immutable(DimValue)) values = set.root.children.map!((ref child) => child.value).toSet; 315 bool added = result.addNew(dim, values); 316 assert(added, "Duplicate dimension"); 317 set = set.root.children[0].set; 318 } 319 return result; 320 } 321 322 /// Combine two matrices together, returning their union. 323 /// If `other` is a subset of `this`, return `this` unchanged. 324 MapSet merge(MapSet other) const 325 { 326 if (this is emptySet) return other; 327 if (other is emptySet) return this; 328 if (this is other) return this; 329 if (!root) return bringToFront(other.root.dim).merge(other); 330 331 this.assertDeduplicated(); 332 other.assertDeduplicated(); 333 return cache.merge.require(SetSetOp(this, other), { 334 other = other.bringToFront(root.dim); 335 336 if (root.children.length == 1 && other.root.children.length == 1 && 337 root.children[0].value == other.root.children[0].value) 338 { 339 // Single-child optimization 340 auto mergeResult = root.children[0].set.merge(other.root.children[0].set); 341 if (mergeResult !is root.children[0].set) 342 return MapSet(new immutable Node(root.dim, [Pair(root.children[0].value, mergeResult)])).deduplicate; 343 else 344 return this; 345 } 346 347 MapSet[DimValue] newChildren; 348 foreach (ref pair; root.children) 349 newChildren[pair.value] = pair.set; 350 351 bool modified; 352 foreach (ref pair; other.root.children) 353 newChildren.updateVoid(pair.value, 354 { 355 modified = true; 356 return pair.set; 357 }, 358 (ref MapSet oldSubmatrix) 359 { 360 auto mergeResult = oldSubmatrix.merge(pair.set); 361 if (oldSubmatrix !is mergeResult) 362 { 363 oldSubmatrix = mergeResult; 364 modified = true; 365 } 366 } 367 ); 368 369 if (!modified) 370 return this; 371 372 return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate; 373 }()); 374 } 375 376 /// Return the difference between `this` and the given set. 377 /// If `other` does not intersect with `this`, return `this` unchanged. 378 MapSet subtract(MapSet other) const 379 { 380 if (this is emptySet) return this; 381 if (other is emptySet) return this; 382 if (this is other) return emptySet; 383 if (!root) return bringToFront(other.root.dim).subtract(other); 384 385 this.assertDeduplicated(); 386 other.assertDeduplicated(); 387 return cache.subtract.require(SetSetOp(this, other), { 388 other = other.bringToFront(root.dim); 389 390 if (root.children.length == 1 && other.root.children.length == 1 && 391 root.children[0].value == other.root.children[0].value) 392 { 393 // Single-child optimization 394 auto subtractResult = root.children[0].set.subtract(other.root.children[0].set); 395 if (subtractResult is emptySet) 396 return emptySet; 397 else 398 if (subtractResult !is root.children[0].set) 399 return MapSet(new immutable Node(root.dim, [Pair(root.children[0].value, subtractResult)])).deduplicate; 400 else 401 return this; 402 } 403 404 MapSet[DimValue] newChildren; 405 foreach (ref pair; root.children) 406 newChildren[pair.value] = pair.set; 407 408 bool modified; 409 foreach (ref pair; other.root.children) 410 if (auto poldSubmatrix = pair.value in newChildren) 411 { 412 auto subtractResult = poldSubmatrix.subtract(pair.set); 413 if (*poldSubmatrix !is subtractResult) 414 { 415 *poldSubmatrix = subtractResult; 416 if (subtractResult is emptySet) 417 newChildren.remove(pair.value); 418 modified = true; 419 } 420 } 421 422 if (!modified) 423 return this; 424 if (!newChildren.length) 425 return emptySet; 426 427 return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate; 428 }()); 429 } 430 431 /// Apply `fn` over subsets, and return a new `MapSet`. 432 /*private*/ MapSet lazyMap(scope MapSet delegate(MapSet) fn) const 433 { 434 // Defer allocation until the need to mutate 435 foreach (i, ref pair; root.children) // Read-only scan 436 { 437 auto newSet = fn(pair.set); 438 if (newSet !is pair.set) 439 { 440 auto newChildren = new Pair[root.children.length]; 441 // Known to not need mutation 442 newChildren[0 .. i] = root.children[0 .. i]; 443 // Reuse already calculated result 444 newChildren[i] = Pair(pair.value, newSet); 445 // Continue scan with mutation 446 foreach (j, ref pair2; root.children[i + 1 .. $]) 447 newChildren[i + 1 + j] = Pair(pair2.value, fn(pair2.set)); 448 return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate; 449 } 450 } 451 return this; // No mutation necessary 452 } 453 454 /// "Unset" a given dimension, removing it from the matrix. 455 /// The result is the union of all sub-matrices for all values of `dim`. 456 MapSet remove(DimName dim) const 457 { 458 if (this is emptySet || this is unitSet) return this; 459 this.assertDeduplicated(); 460 return cache.remove.require(SetDimOp(this, dim), { 461 if (root.dim == dim) 462 { 463 MapSet result; 464 foreach (ref pair; root.children) 465 result = result.merge(pair.set); 466 return result; 467 } 468 return lazyMap(set => set.remove(dim)); 469 }()); 470 } 471 472 /// Unset dimensions according to a predicate. 473 /// This is faster than removing dimensions individually, however, 474 /// unlike the `DimName` overload, this one does not benefit from global memoization. 475 MapSet remove(bool delegate(DimName) pred) const 476 { 477 if (this is emptySet) return this; 478 this.assertDeduplicated(); 479 480 MapSet[MapSet] cache; 481 MapSet visit(MapSet set) 482 { 483 if (set is unitSet) 484 return set; 485 return cache.require(set, { 486 if (pred(set.root.dim)) 487 { 488 MapSet result; 489 foreach (ref pair; set.root.children) 490 result = result.merge(visit(pair.set)); 491 return result; 492 } 493 return set.lazyMap(set => visit(set)); 494 }()); 495 } 496 return visit(this); 497 } 498 499 /// Set the given dimension to always have the given value, 500 /// collapsing (returning the union of) all sub-matrices 501 /// for previously different values of `dim`. 502 MapSet set(DimName dim, DimValue value) const 503 { 504 return this.remove(dim).addDim(dim, value); 505 } 506 507 /// Adds a new dimension with the given value. 508 /// The dimension must not have previously existed in `this`. 509 private MapSet addDim(DimName dim, DimValue value) const 510 { 511 if (this is emptySet) return this; 512 this.assertDeduplicated(); 513 assert(this is this.remove(dim), "Duplicate dimension"); 514 if (value == nullValue) 515 return this; 516 return MapSet(new immutable Node(dim, [Pair(value, this)])).deduplicate; 517 } 518 519 /// Return a sub-matrix for all points where the given dimension has this value. 520 /// The dimension itself is not included in the result. 521 /// Note: if `dim` doesn't occur (e.g. because `this` is the unit set) and `value` is `nullValue`, 522 /// this returns the unit set (not the empty set). 523 MapSet slice(DimName dim, DimValue value) const 524 { 525 if (this is emptySet) return emptySet; 526 this.assertDeduplicated(); 527 foreach (ref pair; bringToFront(dim).root.children) 528 if (pair.value == value) 529 return pair.set; 530 return emptySet; 531 } 532 533 /// Return a subset of this set for all points where the given dimension has this value. 534 /// Unlike `slice`, the dimension itself is included in the result (with the given value). 535 MapSet get(DimName dim, DimValue value) const 536 { 537 if (this is emptySet) return emptySet; 538 this.assertDeduplicated(); 539 foreach (ref pair; bringToFront(dim).root.children) 540 if (pair.value == value) 541 return MapSet(new immutable Node(dim, [Pair(value, pair.set)])).deduplicate; 542 return emptySet; 543 } 544 545 /// Return all unique values occurring for a given dimension. 546 /// Unless this is the empty set, the return value is always non-empty. 547 /// If `dim` doesn't occur, it will be `[nullValue]`. 548 const(DimValue)[] all(DimName dim) const 549 { 550 // return bringToFront(dim).root.children.keys; 551 if (this is emptySet) return null; 552 if (this is unitSet) return [nullValue]; 553 if (root.dim == dim) return root.children.amap!(child => child.value); 554 this.assertDeduplicated(); 555 556 HashSet!DimValue allValues; 557 HashSet!MapSet seen; 558 void visit(MapSet set) 559 { 560 if (set is unitSet) 561 { 562 allValues.add(nullValue); 563 return; 564 } 565 if (set in seen) 566 return; 567 seen.add(set); 568 569 if (set.root.dim == dim) 570 foreach (ref pair; set.root.children) 571 allValues.add(pair.value); 572 else 573 foreach (ref pair; set.root.children) 574 visit(pair.set); 575 } 576 visit(this); 577 return allValues.keys; 578 } 579 580 /// Return a set which represents the Cartesian product between 581 /// this set and the given `values` across the specified 582 /// dimension. 583 MapSet cartesianProduct(DimName dim, DimValue[] values) const 584 { 585 if (this is emptySet) return emptySet; 586 if (values.length == 0) return emptySet; 587 this.assertDeduplicated(); 588 auto unset = this.remove(dim); 589 auto children = values.map!(value => Pair(value, unset)).array; 590 children.sort(); 591 return MapSet(new immutable Node(dim, cast(immutable) children)).deduplicate; 592 } 593 594 /// Return a set which represents the Cartesian product between 595 /// this and the given set. 596 /// Duplicate dimensions are first removed from `this` set. 597 /// For best performance, call `big.cartesianProduct(small)` 598 MapSet cartesianProduct(MapSet other) const 599 { 600 if (this is emptySet || other is emptySet) return emptySet; 601 if (this is unitSet) return other; 602 if (other is unitSet) return this; 603 604 MapSet unset = this; 605 foreach (dim; other.getDims()) 606 unset = unset.remove(dim); 607 608 return other.uncheckedCartesianProduct(unset); 609 } 610 611 // Assumes that the dimensions in `this` and `other` are disjoint. 612 private MapSet uncheckedCartesianProduct(MapSet other) const 613 { 614 if (this is unitSet) return other; 615 if (other is unitSet) return this; 616 617 this.assertDeduplicated(); 618 other.assertDeduplicated(); 619 620 return cache.cartesianProduct.require(SetSetOp(this, other), { 621 return lazyMap(set => set.uncheckedCartesianProduct(other)); 622 }()); 623 } 624 625 /// Return a superset of this set, consisting of a Cartesian 626 /// product of every value of every dimension. The total number of 627 /// unique nodes is thus equal to the number of dimensions. 628 MapSet completeSuperset() const 629 { 630 if (this is emptySet || this is unitSet) return this; 631 this.assertDeduplicated(); 632 633 return cache.completeSuperset.require(this, { 634 MapSet child = MapSet.emptySet; 635 foreach (ref pair; root.children) 636 child = child.merge(pair.set); 637 child = child.completeSuperset(); 638 auto newChildren = root.children.dup; 639 foreach (ref pair; newChildren) 640 pair.set = child; 641 return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate; 642 }()); 643 } 644 645 /// Refactor this matrix into one with the same data, 646 /// but putting the given dimension in front. 647 /// This will speed up access to values with the given dimension. 648 /// If the dimension does not yet occur in the set (or any subset), 649 /// it is instantiated with a single `nullValue` value. 650 /// The set must be non-empty. 651 MapSet bringToFront(DimName dim) const 652 { 653 assert(this !is emptySet, "Empty sets may not have dimensions"); 654 655 if (this is unitSet) 656 { 657 // We reached the bottom, and did not find `dim` along the way. 658 // Create it now. 659 return MapSet(new immutable Node(dim, [Pair(nullValue, unitSet)])).deduplicate; 660 } 661 662 if (dim == root.dim) 663 { 664 // Already at the front. 665 return this; 666 } 667 668 // 1. Recurse. 669 // 2. After recursion, all children should have `dim` at the front. 670 // So, just swap this layer with the next one. 671 672 this.assertDeduplicated(); 673 return cache.bringToFront.require(SetDimOp(this, dim), { 674 if (root.children.length == 1) 675 { 676 auto newSubmatrix = root.children[0].set.bringToFront(dim); 677 auto newChildren = newSubmatrix.root.children.dup; 678 foreach (ref pair; newChildren) 679 pair.set = MapSet(new immutable Node(root.dim, [Pair(root.children[0].value, pair.set)])).deduplicate; 680 return MapSet(new immutable Node(dim, cast(immutable) newChildren)).deduplicate; 681 } 682 683 Pair[][DimValue] subsets; 684 foreach (ref pair; root.children) 685 { 686 auto newSubmatrix = pair.set.bringToFront(dim); 687 assert(newSubmatrix.root.dim == dim); 688 foreach (ref pair2; newSubmatrix.root.children) 689 subsets[pair2.value] ~= Pair(pair.value, pair2.set); 690 } 691 Pair[] newChildren; 692 foreach (value, children; subsets) 693 { 694 children.sort(); 695 newChildren ~= Pair(value, MapSet(new immutable Node(root.dim, cast(immutable) children)).deduplicate); 696 } 697 newChildren.sort(); 698 return MapSet(new immutable Node(dim, cast(immutable) newChildren)).deduplicate; 699 }()); 700 } 701 702 /// Refactor this matrix into one with the same data, 703 /// but attempting to lower the total number of nodes. 704 MapSet optimize() const 705 { 706 if (this is emptySet || this is unitSet) return this; 707 this.assertDeduplicated(); 708 709 return cache.optimize.require(this, { 710 struct Result { bool done; MapSet[MapSet] map; } 711 static Result optimizeLayer(HashSet!MapSet sets0) 712 { 713 // - At the bottom? 714 // - Yes: 715 // - return failure 716 // - No: 717 // - Try to swap this layer. Success? 718 // - Yes: 719 // - return success 720 // - No: 721 // - Recurse and try to swap next layer. Success? 722 // - Yes: Retry this layer 723 // - No: return failure (bottom reached) 724 725 assert(!sets0.empty); 726 if (sets0.byKey.front is unitSet) 727 return Result(true, null); // at the bottom 728 auto dim0 = sets0.byKey.front.root.dim; 729 assert(sets0.byKey.all!(set => set !is unitSet), "Leaf/non-leaf nodes mismatch"); 730 assert(sets0.byKey.all!(set => set.root.dim == dim0), "Dim mismatch"); 731 732 auto sets1 = sets0.byKey.map!(set => set.root.children.map!(function MapSet (ref child) => child.set)).joiner.toSet; 733 assert(!sets1.empty); 734 if (sets1.byKey.front is unitSet) 735 return Result(true, null); // one layer away from the bottom, nothing to swap with 736 auto dim1 = sets1.byKey.front.root.dim; 737 assert(sets1.byKey.all!(set => set !is unitSet), "Leaf/non-leaf nodes mismatch"); 738 assert(sets1.byKey.all!(set => set.root.dim == dim1), "Dim mismatch"); 739 740 auto currentNodes = sets0.length + sets1.length; 741 742 MapSet[MapSet] swappedSets; 743 HashSet!MapSet sets0new, sets1new; 744 745 foreach (set0; sets0.byKey) 746 { 747 Pair[][DimValue] subsets; 748 foreach (ref pair0; set0.root.children) 749 foreach (ref pair1; pair0.set.root.children) 750 subsets[pair1.value] ~= Pair(pair0.value, pair1.set); 751 752 Pair[] newChildren; 753 foreach (value, children; subsets) 754 { 755 children.sort(); 756 auto set1new = MapSet(new immutable Node(dim0, cast(immutable) children)).deduplicate; 757 sets1new.add(set1new); 758 newChildren ~= Pair(value, set1new); 759 } 760 newChildren.sort(); 761 auto set0new = MapSet(new immutable Node(dim1, cast(immutable) newChildren)).deduplicate; 762 sets0new.add(set0new); 763 swappedSets[set0] = set0new; 764 } 765 766 auto newNodes = sets0new.length + sets1new.length; 767 768 if (newNodes < currentNodes) 769 return Result(false, swappedSets); // Success, retry above layer 770 771 // Failure, descend 772 773 auto result1 = optimizeLayer(sets1); 774 if (!result1.map) 775 { 776 assert(result1.done); 777 return Result(true, null); // Done, bottom reached 778 } 779 780 // Apply result 781 sets0new.clear(); 782 foreach (set0; sets0.byKey) 783 { 784 set0.assertDeduplicated(); 785 auto newChildren = set0.root.children.dup; 786 foreach (ref pair0; newChildren) 787 pair0.set = result1.map[pair0.set]; 788 auto set0new = MapSet(new immutable Node(dim0, cast(immutable) newChildren)).deduplicate; 789 sets0new.add(set0new); 790 swappedSets[set0] = set0new; 791 } 792 793 if (result1.done) 794 return Result(true, swappedSets); 795 796 // Retry this layer 797 auto result0 = optimizeLayer(sets0new); 798 if (!result0.map) 799 { 800 assert(result0.done); 801 return Result(true, swappedSets); // Bottom was reached upon retry, just return our results unchanged 802 } 803 804 MapSet[MapSet] compoundedResult; 805 foreach (set0; sets0.byKey) 806 compoundedResult[set0] = result0.map[swappedSets[set0]]; 807 return Result(result0.done, compoundedResult); 808 } 809 810 MapSet[1] root = [this.normalize]; 811 while (true) 812 { 813 auto result = optimizeLayer(root[].toSet()); 814 if (result.map) 815 root[0] = result.map[root[0]]; 816 if (result.done) 817 return root[0]; 818 } 819 }()); 820 } 821 822 /// Return a set equivalent to a unit set (all dimensions 823 /// explicitly set to `nullValue`), with all dimensions in `this`, 824 /// in an order approximately following that of the dimensions in 825 /// `this`. Implicitly normalized. 826 private MapSet dimOrderReference() const 827 { 828 if (this is unitSet || this is emptySet) return this; 829 830 DimName[] dims; 831 HashSet!MapSet seen; 832 void visit(MapSet set, size_t pos) 833 { 834 if (set is unitSet) return; 835 if (set in seen) return; 836 seen.add(set); 837 if ((pos == dims.length || dims[pos] != set.root.dim) && !dims.canFind(set.root.dim)) 838 { 839 dims.insertInPlace(pos, set.root.dim); 840 pos++; 841 } 842 foreach (ref pair; set.root.children) 843 visit(pair.set, pos); 844 } 845 visit(this, 0); 846 847 MapSet result = unitSet; 848 foreach_reverse (dim; dims) 849 result = MapSet(new immutable Node(dim, [Pair(nullValue, result)])).deduplicate(); 850 851 return result; 852 } 853 854 /// Refactor this matrix into one in which dimensions always occur 855 /// in the same order, no matter what path is taken. 856 MapSet normalize() const 857 { 858 if (this is unitSet || this is emptySet) return this; 859 860 return reorderUsing(dimOrderReference()); 861 } 862 863 private size_t maxDepth() const 864 { 865 import std.algorithm.comparison : max; 866 867 if (this is emptySet || this is unitSet) 868 return 0; 869 this.assertDeduplicated(); 870 return cache.maxDepth.require(this, { 871 size_t maxDepth = 0; 872 foreach (ref pair; root.children) 873 maxDepth = max(maxDepth, pair.set.maxDepth()); 874 return 1 + maxDepth; 875 }()); 876 } 877 878 /// Swap the two adjacent dimensions which are `depth` dimensions away. 879 /*private*/ MapSet swapDepth(size_t depth) const 880 { 881 if (this is emptySet || this is unitSet) return this; 882 this.assertDeduplicated(); 883 884 return cache.swapDepth.require(SetIdxOp(this, depth), { 885 if (depth == 0) 886 { 887 foreach (ref pair; root.children) 888 if (pair.set !is unitSet) 889 return bringToFront(pair.set.root.dim); 890 return this; 891 } 892 else 893 { 894 auto newChildren = root.children.dup; 895 foreach (ref pair; newChildren) 896 pair.set = pair.set.swapDepth(depth - 1); 897 return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate; 898 } 899 }()); 900 } 901 902 /// Refactor this matrix into one with the same data, but in which 903 /// the dimensions always occur as in `reference` (which is 904 /// assumed to be normalized). 905 MapSet reorderUsing(MapSet reference) const 906 { 907 if (this is emptySet || reference is emptySet || reference is unitSet) return this; 908 this.assertDeduplicated(); 909 reference.assertDeduplicated(); 910 911 return cache.reorderUsing.require(SetSetOp(this, reference), { 912 return bringToFront(reference.root.dim).lazyMap(set => set.reorderUsing(reference.root.children[0].set)); 913 }()); 914 } 915 916 void toString(scope void delegate(const(char)[]) sink) const 917 { 918 import std.format : formattedWrite; 919 if (this is emptySet) 920 sink("{}"); 921 else 922 if (this is unitSet) 923 sink("{[]}"); 924 else 925 sink.formattedWrite!"%s"(*root); 926 } /// 927 928 hash_t toHash() const 929 { 930 return 931 this is emptySet ? 0 : 932 this is unitSet ? 1 : 933 root.toHash(); 934 } /// 935 936 bool opEquals(const typeof(this) s) const 937 { 938 if (root is s.root) 939 return true; 940 if (this is emptySet || this is unitSet || s is emptySet || s is unitSet) 941 return this is s; 942 return *root == *s.root; 943 } /// 944 } 945 946 unittest 947 { 948 import std.algorithm.sorting : sort; 949 950 alias M = MapSet!(string, int); 951 M m, n; 952 m = m.merge(M.unitSet.set("x", 1).set("y", 5)); 953 m = m.merge(M.unitSet.set("x", 1).set("y", 6)); 954 assert(m.all("x") == [1]); 955 assert(m.all("y").dup.sort.release == [5, 6]); 956 957 m = m.merge(M.unitSet.set("x", 2).set("y", 6)); 958 assert(m.get("x", 1).all("y").dup.sort.release == [5, 6]); 959 assert(m.get("y", 6).all("x").dup.sort.release == [1, 2]); 960 961 m = m.subtract(M.unitSet.set("x", 1).set("y", 6)); 962 assert(m.all("x").dup.sort.release == [1, 2]); 963 assert(m.all("y").dup.sort.release == [5, 6]); 964 assert(m.get("x", 1).all("y") == [5]); 965 assert(m.get("y", 6).all("x") == [2]); 966 967 m = M.emptySet; 968 m = m.merge(M.unitSet.set("x", 10).set("y", 20)); 969 m = m.merge(M.unitSet.set("x", 10).set("y", 21)); 970 m = m.merge(M.unitSet.set("x", 11).set("y", 21)); 971 m = m.merge(M.unitSet.set("x", 11).set("y", 22)); 972 auto o = m.optimize(); 973 assert(o.uniqueNodes < m.uniqueNodes); 974 975 m = M.emptySet; 976 assert(m.all("x") == []); 977 m = M.unitSet; 978 assert(m.all("x") == [0]); 979 m = m.merge(M.unitSet.set("x", 1)); 980 assert(m.all("x").dup.sort.release == [0, 1]); 981 982 m = M.unitSet; 983 assert(m.set("x", 1).set("x", 1).all("x") == [1]); 984 985 m = M.unitSet; 986 m = m.cartesianProduct("x", [1, 2, 3]); 987 m = m.cartesianProduct("y", [1, 2, 3]); 988 m = m.cartesianProduct("z", [1, 2, 3]); 989 assert(m.count == 3 * 3 * 3); 990 assert(m .all("x").dup.sort.release == [1, 2, 3]); 991 assert(m.set("z", 1).all("x").dup.sort.release == [1, 2, 3]); 992 assert(m.set("x", 1).all("z").dup.sort.release == [1, 2, 3]); 993 994 m = M.unitSet; 995 m = m.cartesianProduct("a", [1, 2, 3]); 996 m = m.cartesianProduct("b", [1, 2, 3]); 997 n = M.unitSet; 998 n = n.cartesianProduct("c", [1, 2, 3]); 999 n = n.cartesianProduct("d", [1, 2, 3]); 1000 m = m.cartesianProduct(n); 1001 assert(m.count == 3 * 3 * 3 * 3); 1002 1003 assert(M.unitSet != M.unitSet.set("x", 1)); 1004 1005 m = M.emptySet; 1006 m = m.merge(M.unitSet.set("x", 1).set("y", 11)); 1007 m = m.merge(M.unitSet.set("x", 2).set("y", 12).set("z", 22)); 1008 m = m.completeSuperset(); 1009 assert(m.uniqueNodes == 3); 1010 assert(m.count == 8); // 2 ^^ 3 1011 assert(m.all("x").dup.sort.release == [1, 2]); 1012 assert(m.all("y").dup.sort.release == [11, 12]); 1013 assert(m.all("z").dup.sort.release == [0, 22]); 1014 } 1015 1016 1017 /// Allows executing a deterministic algorithm over all states in a given MapSet. 1018 /// If a variable is not queried by the algorithm, states for all 1019 /// variations of that variable are processed in one iteration. 1020 struct MapSetVisitor(A, V) 1021 { 1022 /// Underlying `MapSet`. 1023 alias Set = MapSet!(A, V); 1024 Set set; /// ditto 1025 1026 /// Internal state. 1027 /*private*/ public 1028 { 1029 struct Var 1030 { 1031 A name; /// 1032 const(V)[] values; /// 1033 size_t pos; /// 1034 } 1035 Var[] stack; 1036 size_t stackPos; 1037 V[A] singularValues, resolvedValues; // Faster than workingSet.all(name)[0] 1038 private HashSet!A dirtyValues; // Accumulate MapSet.set calls 1039 private Set workingSet; 1040 } 1041 1042 this(Set set) 1043 { 1044 this.set = set; 1045 foreach (dim, values; set.getDimsAndValues()) 1046 if (values.length == 1) 1047 singularValues[dim] = values.byKey.front; 1048 } /// 1049 1050 /// Resets iteration to the beginning. 1051 /// Equivalent to but faster than constructing a new MapSetVisitor 1052 /// instance (`visitor = MapSetVisitor(visitor.set)`). 1053 void reset() 1054 { 1055 workingSet = Set.emptySet; 1056 stack = null; 1057 } 1058 1059 /// Returns true if there are more states to iterate over, 1060 /// otherwise returns false 1061 bool next() 1062 { 1063 if (set is Set.emptySet) 1064 return false; 1065 if (workingSet is Set.emptySet) 1066 { 1067 // first iteration 1068 } 1069 else 1070 while (true) 1071 { 1072 if (!stack.length) 1073 return false; // All possibilities exhausted 1074 auto last = &stack[$-1]; 1075 last.pos++; 1076 if (last.pos == last.values.length) 1077 { 1078 stack = stack[0 .. $ - 1]; 1079 continue; 1080 } 1081 break; 1082 } 1083 1084 workingSet = set; 1085 resolvedValues = null; 1086 dirtyValues.clear(); 1087 stackPos = 0; 1088 return true; 1089 } 1090 1091 private void flush() 1092 { 1093 if (dirtyValues.empty) 1094 return; 1095 workingSet = workingSet.remove((A name) => name in dirtyValues); 1096 foreach (name; dirtyValues) 1097 workingSet = workingSet.addDim(name, resolvedValues[name]); 1098 dirtyValues.clear(); 1099 } 1100 1101 /// Peek at the subset the algorithm is currently working with. 1102 @property Set currentSubset() 1103 { 1104 assert(workingSet !is Set.emptySet, "Not iterating"); 1105 flush(); 1106 return workingSet; 1107 } 1108 1109 /// Algorithm interface - get a value by name 1110 V get(A name) 1111 { 1112 assert(workingSet !is Set.emptySet, "Not iterating"); 1113 if (auto pvalue = name in resolvedValues) 1114 return *pvalue; 1115 if (auto pvalue = name in singularValues) 1116 return *pvalue; 1117 1118 if (stackPos == stack.length) 1119 { 1120 // Expand new variable 1121 auto values = workingSet.all(name); 1122 auto value = values[0]; 1123 resolvedValues[name] = value; 1124 stack ~= Var(name, values, 0); 1125 stackPos++; 1126 if (values.length > 1) 1127 workingSet = workingSet.get(name, value); 1128 return value; 1129 } 1130 1131 // Iterate over known variable 1132 auto var = &stack[stackPos]; 1133 assert(var.name == name, "Mismatching get order"); 1134 auto value = var.values[var.pos]; 1135 workingSet = workingSet.get(var.name, value); 1136 assert(workingSet !is Set.emptySet, "Empty set after restoring"); 1137 resolvedValues[var.name] = value; 1138 stackPos++; 1139 return value; 1140 } 1141 1142 /// Algorithm interface - set a value by name 1143 void put(A name, V value) 1144 { 1145 assert(workingSet !is Set.emptySet, "Not iterating"); 1146 if (name !in resolvedValues) 1147 if (auto pvalue = name in singularValues) 1148 if (*pvalue == value) 1149 return; 1150 1151 resolvedValues[name] = value; 1152 dirtyValues.add(name); 1153 } 1154 1155 /// Apply a function over every possible value of the given 1156 /// variable, without resolving it (unless it's already resolved). 1157 void transform(A name, scope void delegate(ref V value) fun) 1158 { 1159 assert(workingSet !is Set.emptySet, "Not iterating"); 1160 if (auto pvalue = name in resolvedValues) 1161 { 1162 dirtyValues.add(name); 1163 return fun(*pvalue); 1164 } 1165 1166 workingSet = workingSet.bringToFront(name); 1167 Set[V] newChildren; 1168 foreach (ref child; workingSet.root.children) 1169 { 1170 V value = child.value; 1171 fun(value); 1172 newChildren.updateVoid(value, 1173 () => child.set, 1174 (ref Set set) 1175 { 1176 set = set.merge(child.set); 1177 }); 1178 } 1179 workingSet = Set(new immutable Set.Node(name, cast(immutable) newChildren)).deduplicate; 1180 } 1181 1182 /// Apply a function over every possible value of the given 1183 /// variable, without resolving it (unless it's already resolved). 1184 /// The function is assumed to be injective (does not produce 1185 /// duplicate outputs for distinct inputs). 1186 void injectiveTransform(A name, scope void delegate(ref V value) fun) 1187 { 1188 assert(workingSet !is Set.emptySet, "Not iterating"); 1189 if (auto pvalue = name in resolvedValues) 1190 { 1191 dirtyValues.add(name); 1192 return fun(*pvalue); 1193 } 1194 1195 workingSet = workingSet.bringToFront(name); 1196 auto newChildren = workingSet.root.children.dup; 1197 foreach (ref child; newChildren) 1198 fun(child.value); 1199 newChildren.sort(); 1200 workingSet = Set(new immutable Set.Node(name, cast(immutable) newChildren)).deduplicate; 1201 } 1202 1203 /// Inject a variable and values to iterate over. 1204 /// The variable must not have been resolved yet. 1205 void inject(A name, V[] values) 1206 { 1207 assert(name !in resolvedValues, "Already resolved"); 1208 assert(values.length > 0, "Injecting zero values would result in an empty set"); 1209 singularValues.remove(name); 1210 workingSet = workingSet.cartesianProduct(name, values); 1211 } 1212 } 1213 1214 /// An algorithm which divides two numbers. 1215 /// When the divisor is zero, we don't even query the dividend, 1216 /// therefore processing all dividends in one iteration. 1217 unittest 1218 { 1219 alias M = MapSet!(string, int); 1220 M m = M.unitSet 1221 .cartesianProduct("divisor" , [0, 1, 2]) 1222 .cartesianProduct("dividend", [0, 1, 2]); 1223 assert(m.count == 9); 1224 1225 auto v = MapSetVisitor!(string, int)(m); 1226 M results; 1227 int iterations; 1228 while (v.next()) 1229 { 1230 iterations++; 1231 auto divisor = v.get("divisor"); 1232 if (divisor == 0) 1233 continue; 1234 auto dividend = v.get("dividend"); 1235 v.put("quotient", dividend / divisor); 1236 results = results.merge(v.currentSubset); 1237 } 1238 1239 assert(iterations == 7); // 1 for division by zero + 3 for division by one + 3 for division by two 1240 assert(results.get("divisor", 2).get("dividend", 2).all("quotient") == [1]); 1241 assert(results.get("divisor", 0).count == 0); 1242 } 1243 1244 unittest 1245 { 1246 import std.algorithm.sorting : sort; 1247 1248 alias M = MapSet!(string, int); 1249 M m = M.unitSet.cartesianProduct("x", [1, 2, 3]); 1250 auto v = MapSetVisitor!(string, int)(m); 1251 v.next(); 1252 v.transform("x", (ref int v) { v *= 2; }); 1253 assert(v.currentSubset.all("x").dup.sort.release == [2, 4, 6]); 1254 } 1255 1256 unittest 1257 { 1258 alias M = MapSet!(string, int); 1259 M m = M.unitSet.cartesianProduct("x", [1, 2, 3]); 1260 auto v = MapSetVisitor!(string, int)(m); 1261 while (v.next()) 1262 { 1263 v.transform("x", (ref int v) { v *= 2; }); 1264 v.put("y", v.get("x")); 1265 } 1266 }