1 /**
2  * Data structure for optimized "sparse" N-dimensional matrices
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.utils.mapset;
15 
16 static if (__VERSION__ >= 2083):
17 
18 import std.algorithm.iteration;
19 import std.algorithm.searching;
20 import std.algorithm.sorting;
21 import std.array;
22 import std.exception;
23 import std.typecons : tuple;
24 
25 import ae.utils.aa;
26 import ae.utils.array : amap;
27 
28 /**
29    Data structure for holding optimized "sparse" N-dimensional matrices.
30    The number of dimensions is arbitrary and can be varied at runtime.
31 
32    Unlike classical sparse arrays/matrices, this data structure is
33    optimized not just for arrays which are mostly zero (or any given
34    value), but for any matrices where data is likely to repeat across
35    dimensions.
36 
37    This is done by storing the data as a tree, with each depth
38    representing one dimension. A sub-tree is, thus, a sub-matrix
39    (slice of the matrix represented by the top-level tree) along the
40    top-level dimension. Each sub-tree is individually immutable, and
41    can therefore be shared within the same tree or even by several
42    instances of this type.
43 
44    Dimension order need not be the same for all sub-trees. Even the
45    number of dimensions in siblings' sub-trees need not be the same;
46    i.e, the data structure is itself sparse with regards to the
47    dimensions.
48 
49    As there is no explicit "value" type (associated with every set of
50    coordinates), this data structure can be seen as a representation
51    of a set of points, but it can be simulated by adding a "value"
52    dimension (in the same way how a gray-scale image can be
53    represented as a set of 3D points like a height-map).
54 
55    An alternative way to look at this data structure is as a set of
56    maps (i.e. associative arrays). Thus, each layer of the tree stores
57    one possible map key, and under it, all the maps that have this key
58    with a specific value. Note that for this use case, DimValue needs
59    to have a value (configured using nullValue) which indicates the
60    absence of a certain key (dimension) in a map.
61 
62    Params:
63      DimName = The type used to indicate the name (or index) of a dimension.
64      DimValue = The type for indicating a point on a dimension's axis.
65      nullValue = A value for DimValue indicating that a dimension is unset
66  */
67 struct MapSet(DimName, DimValue, DimValue nullValue = DimValue.init)
68 {
69 	/// Logically, each MapSet node has a map of values to a subset.
70 	/// However, it is faster to represent that map as an array of key-value pairs
71 	/// rather than a D associative array, so we do that here.
72 	struct Pair
73 	{
74 		DimValue value;
75 		MapSet set;
76 
77 		int opCmp(ref const typeof(this) other) const
78 		{
79 			static if (is(typeof(value.opCmp(other.value)) : int))
80 				return value.opCmp(other.value);
81 			else
82 				return value < other.value ? -1 : value > other.value ? 1 : 0;
83 		}
84 	}
85 
86 	struct Node
87 	{
88 		DimName dim;
89 		Pair[] children;
90 
91 		immutable this(DimName dim, immutable Pair[] children)
92 		{
93 			// Zero children doesn't make sense.
94 			// It would be the equivalent of an empty set,
95 			// but then we should just use MapSet.emptySet instead.
96 			assert(!children.empty, "Node with zero children");
97 
98 			// Nodes should be in their canonical form for
99 			// memoization and deduplication to be effective.
100 			assert(children.isSorted, "Children are not sorted");
101 
102 			this.dim = dim;
103 			this.children = children;
104 
105 			// Because each subset is immutable, we can
106 			// pre-calculate the hash during construction.
107 			hash = hashOf(dim) ^ hashOf(children);
108 
109 			size_t totalMembers = 0;
110 			foreach (i, ref pair; children)
111 			{
112 				if (i)
113 					assert(pair.value != children[i-1].value, "Duplicate value");
114 
115 				pair.set.assertDeduplicated();
116 				// Same as "Node with zero children"
117 				assert(pair.set !is emptySet, "Empty set as subset");
118 
119 				totalMembers += pair.set.count;
120 			}
121 			this.totalMembers = totalMembers;
122 		}
123 
124 		immutable this(DimName dim, immutable MapSet[DimValue] children)
125 		{
126 			auto childrenList = children.byKeyValue.map!(kv => Pair(kv.key, kv.value)).array;
127 			childrenList.sort();
128 			this(dim, cast(immutable) childrenList);
129 		}
130 
131 		void toString(scope void delegate(const(char)[]) sink) const
132 		{
133 			import std.format : formattedWrite;
134 			sink.formattedWrite("{ %(%s%), [ ", (&dim)[0..1]);
135 			bool first = true;
136 			foreach (ref pair; children)
137 			{
138 				if (first)
139 					first = false;
140 				else
141 					sink(", ");
142 				sink.formattedWrite("%s : %s", pair.value, pair.set);
143 			}
144 			sink(" ] }");
145 		}
146 
147 		private hash_t hash;
148 		size_t totalMembers;
149 
150 		hash_t toHash() const
151 		{
152 			return hash;
153 		}
154 
155 		bool opEquals(ref const typeof(this) s) const
156 		{
157 			return hash == s.hash && dim == s.dim && children == s.children;
158 		}
159 	}
160 
161 	/// Indicates the empty set.
162 	/// May only occur at the top level (never as a subset).
163 	private enum emptySetRoot = cast(immutable(Node)*)1;
164 
165 	/// If emptySetRoot, zero values.
166 	/// If null, one value with zero dimensions.
167 	/// Otherwise, pointer to node describing the next dimension.
168 	immutable(Node)* root = emptySetRoot;
169 
170 	/// A set containing a single nil-dimensional element.
171 	/// Holds exactly one value (a point with all dimensions at
172 	/// nullValue).
173 	static immutable unitSet = MapSet(null);
174 
175 	/// The empty set. Represents a set which holds zero values.
176 	static immutable emptySet = MapSet(emptySetRoot);
177 
178 	/// Return the total number of items in this set.
179 	size_t count() const
180 	{
181 		if (this is emptySet)
182 			return 0;
183 		if (this is unitSet)
184 			return 1;
185 		return root.totalMembers;
186 	}
187 
188 	private struct SetSetOp { MapSet a, b; }
189 	private struct SetDimOp { MapSet set; DimName dim; }
190 	private struct SetIdxOp { MapSet set; size_t index; }
191 	private struct Cache
192 	{
193 		/// For deduplication - key is value
194 		MapSet[MapSet] instances;
195 		/// Operations - things that operate recursively on subtrees
196 		/// should be memoized here
197 		MapSet[SetSetOp] merge, subtract, cartesianProduct, reorderUsing;
198 		MapSet[SetDimOp] remove, bringToFront;
199 		MapSet[SetIdxOp] swapDepth;
200 		MapSet[MapSet] optimize, completeSuperset;
201 		size_t[MapSet] uniqueNodes, maxDepth;
202 	}
203 
204 	/// Because subtrees can be reused within the tree, a way of
205 	/// memoizing operations across the entire tree (instead of just
206 	/// across children of a single node, or siblings) is crucial for
207 	/// performance.
208 	static Cache cache;
209 
210 	/// Clear the global operations cache.
211 	static void clearCache()
212 	{
213 		cache = Cache.init;
214 	}
215 
216 	/// Because MapSet operations benefit greatly from memoization,
217 	/// maintaining a set of interned sets benefits performance. After
218 	/// calling `clearCache`, call this method to re-register extant
219 	/// live instances to the instance cache.
220 	void addToCache() const
221 	{
222 		cache.instances.updateVoid(
223 			this,
224 			{
225 				if (this !is emptySet && this !is unitSet)
226 					foreach (ref pair; root.children)
227 						pair.set.addToCache();
228 				return this;
229 			},
230 			(ref MapSet set)
231 			{
232 				assert(set is this);
233 			}
234 		);
235 	}
236 
237 	/// Intern and deduplicate this MapSet.
238 	/// Needs to be called only after constructing a MapSet manually
239 	/// (by allocating, populating, and setting `root`).
240 	MapSet deduplicate() const
241 	{
242 		MapSet deduplicated;
243 		cache.instances.updateVoid(
244 			this,
245 			{
246 				debug if (this !is emptySet && this !is unitSet)
247 					foreach (ref pair; root.children)
248 						pair.set.assertDeduplicated();
249 				deduplicated = this;
250 				return this;
251 			},
252 			(ref MapSet set)
253 			{
254 				deduplicated = set;
255 			}
256 		);
257 		return deduplicated;
258 	}
259 
260 	private void assertDeduplicated() const { debug assert(this is emptySet || this is unitSet || cache.instances[this] is this); }
261 
262 	/// Count and return the total number of unique nodes in this MapSet.
263 	size_t uniqueNodes() const
264 	{
265 		return cache.uniqueNodes.require(this, {
266 			HashSet!MapSet seen;
267 			void visit(MapSet set)
268 			{
269 				if (set is emptySet || set is unitSet || set in seen)
270 					return;
271 				seen.add(set);
272 				foreach (ref pair; set.root.children)
273 					visit(pair.set);
274 			}
275 			visit(this);
276 			return seen.length;
277 		}());
278 	}
279 
280 	/// Collect the names of all dimensions occurring in this tree.
281 	DimName[] getDims() const
282 	{
283 		HashSet!DimName dims;
284 		HashSet!MapSet seen;
285 		void visit(MapSet set)
286 		{
287 			if (set is emptySet || set is unitSet || set in seen)
288 				return;
289 			seen.add(set);
290 			dims.add(set.root.dim);
291 			foreach (ref pair; set.root.children)
292 				visit(pair.set);
293 		}
294 		visit(this);
295 		return dims.keys;
296 	}
297 
298 	/// Return all values for all dimensions occurring in this set.
299 	/// The Cartesian product of these values would thus be a superset
300 	/// of this set.
301 	/// This is equivalent to, but faster than, calling `getDims` and
302 	/// then `all` for each dim.
303 	HashSet!DimValue[DimName] getDimsAndValues() const
304 	{
305 		if (this is emptySet) return null;
306 
307 		// Be careful to count the implicit nullValues on branches
308 		// where a dim doesn't occur.
309 		MapSet set = this.completeSuperset;
310 		HashSet!DimValue[DimName] result;
311 		while (set !is unitSet)
312 		{
313 			DimName dim = set.root.dim;
314 			HashSet!DimValue values = set.root.children.map!((ref child) => child.value).toSet;
315 			bool added = result.addNew(dim, values);
316 			assert(added, "Duplicate dimension");
317 			set = set.root.children[0].set;
318 		}
319 		return result;
320 	}
321 
322 	/// Combine two matrices together, returning their union.
323 	/// If `other` is a subset of `this`, return `this` unchanged.
324 	MapSet merge(MapSet other) const
325 	{
326 		if (this is emptySet) return other;
327 		if (other is emptySet) return this;
328 		if (this is other) return this;
329 		if (!root) return bringToFront(other.root.dim).merge(other);
330 
331 		this.assertDeduplicated();
332 		other.assertDeduplicated();
333 		return cache.merge.require(SetSetOp(this, other), {
334 			other = other.bringToFront(root.dim);
335 
336 			if (root.children.length == 1 && other.root.children.length == 1 &&
337 				root.children[0].value == other.root.children[0].value)
338 			{
339 				// Single-child optimization
340 				auto mergeResult = root.children[0].set.merge(other.root.children[0].set);
341 				if (mergeResult !is root.children[0].set)
342 					return MapSet(new immutable Node(root.dim, [Pair(root.children[0].value, mergeResult)])).deduplicate;
343 				else
344 					return this;
345 			}
346 
347 			MapSet[DimValue] newChildren;
348 			foreach (ref pair; root.children)
349 				newChildren[pair.value] = pair.set;
350 
351 			bool modified;
352 			foreach (ref pair; other.root.children)
353 				newChildren.updateVoid(pair.value,
354 					{
355 						modified = true;
356 						return pair.set;
357 					},
358 					(ref MapSet oldSubmatrix)
359 					{
360 						auto mergeResult = oldSubmatrix.merge(pair.set);
361 						if (oldSubmatrix !is mergeResult)
362 						{
363 							oldSubmatrix = mergeResult;
364 							modified = true;
365 						}
366 					}
367 				);
368 
369 			if (!modified)
370 				return this;
371 
372 			return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate;
373 		}());
374 	}
375 
376 	/// Return the difference between `this` and the given set.
377 	/// If `other` does not intersect with `this`, return `this` unchanged.
378 	MapSet subtract(MapSet other) const
379 	{
380 		if (this is emptySet) return this;
381 		if (other is emptySet) return this;
382 		if (this is other) return emptySet;
383 		if (!root) return bringToFront(other.root.dim).subtract(other);
384 
385 		this.assertDeduplicated();
386 		other.assertDeduplicated();
387 		return cache.subtract.require(SetSetOp(this, other), {
388 			other = other.bringToFront(root.dim);
389 
390 			if (root.children.length == 1 && other.root.children.length == 1 &&
391 				root.children[0].value == other.root.children[0].value)
392 			{
393 				// Single-child optimization
394 				auto subtractResult = root.children[0].set.subtract(other.root.children[0].set);
395 				if (subtractResult is emptySet)
396 					return emptySet;
397 				else
398 				if (subtractResult !is root.children[0].set)
399 					return MapSet(new immutable Node(root.dim, [Pair(root.children[0].value, subtractResult)])).deduplicate;
400 				else
401 					return this;
402 			}
403 
404 			MapSet[DimValue] newChildren;
405 			foreach (ref pair; root.children)
406 				newChildren[pair.value] = pair.set;
407 
408 			bool modified;
409 			foreach (ref pair; other.root.children)
410 				if (auto poldSubmatrix = pair.value in newChildren)
411 				{
412 					auto subtractResult = poldSubmatrix.subtract(pair.set);
413 					if (*poldSubmatrix !is subtractResult)
414 					{
415 						*poldSubmatrix = subtractResult;
416 						if (subtractResult is emptySet)
417 							newChildren.remove(pair.value);
418 						modified = true;
419 					}
420 				}
421 
422 			if (!modified)
423 				return this;
424 			if (!newChildren.length)
425 				return emptySet;
426 
427 			return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate;
428 		}());
429 	}
430 
431 	/*private*/ MapSet lazyMap(scope MapSet delegate(MapSet) fn) const
432 	{
433 		// Defer allocation until the need to mutate
434 		foreach (i, ref pair; root.children) // Read-only scan
435 		{
436 			auto newSet = fn(pair.set);
437 			if (newSet !is pair.set)
438 			{
439 				auto newChildren = new Pair[root.children.length];
440 				// Known to not need mutation
441 				newChildren[0 .. i] = root.children[0 .. i];
442 				// Reuse already calculated result
443 				newChildren[i] = Pair(pair.value, newSet);
444 				// Continue scan with mutation
445 				foreach (j, ref pair2; root.children[i + 1 .. $])
446 					newChildren[i + 1 + j] = Pair(pair2.value, fn(pair2.set));
447 				return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate;
448 			}
449 		}
450 		return this; // No mutation necessary
451 	}
452 
453 	/// "Unset" a given dimension, removing it from the matrix.
454 	/// The result is the union of all sub-matrices for all values of `dim`.
455 	MapSet remove(DimName dim) const
456 	{
457 		if (this is emptySet || this is unitSet) return this;
458 		this.assertDeduplicated();
459 		return cache.remove.require(SetDimOp(this, dim), {
460 			if (root.dim == dim)
461 			{
462 				MapSet result;
463 				foreach (ref pair; root.children)
464 					result = result.merge(pair.set);
465 				return result;
466 			}
467 			return lazyMap(set => set.remove(dim));
468 		}());
469 	}
470 
471 	/// Unset dimensions according to a predicate.
472 	/// This is faster than removing dimensions individually, however,
473 	/// unlike the `DimName` overload, this one does not benefit from global memoization.
474 	MapSet remove(bool delegate(DimName) pred) const
475 	{
476 		if (this is emptySet) return this;
477 		this.assertDeduplicated();
478 
479 		MapSet[MapSet] cache;
480 		MapSet visit(MapSet set)
481 		{
482 			if (set is unitSet)
483 				return set;
484 			return cache.require(set, {
485 				if (pred(set.root.dim))
486 				{
487 					MapSet result;
488 					foreach (ref pair; set.root.children)
489 						result = result.merge(visit(pair.set));
490 					return result;
491 				}
492 				return set.lazyMap(set => visit(set));
493 			}());
494 		}
495 		return visit(this);
496 	}
497 
498 	/// Set the given dimension to always have the given value,
499 	/// collapsing (returning the union of) all sub-matrices
500 	/// for previously different values of `dim`.
501 	MapSet set(DimName dim, DimValue value) const
502 	{
503 		return this.remove(dim).addDim(dim, value);
504 	}
505 
506 	/// Adds a new dimension with the given value.
507 	/// The dimension must not have previously existed in `this`.
508 	private MapSet addDim(DimName dim, DimValue value) const
509 	{
510 		if (this is emptySet) return this;
511 		this.assertDeduplicated();
512 		assert(this is this.remove(dim), "Duplicate dimension");
513 		if (value == nullValue)
514 			return this;
515 		return MapSet(new immutable Node(dim, [Pair(value, this)])).deduplicate;
516 	}
517 
518 	/// Return a sub-matrix for all points where the given dimension has this value.
519 	/// The dimension itself is not included in the result.
520 	/// Note: if `dim` doesn't occur (e.g. because `this` is the unit set) and `value` is `nullValue`,
521 	/// this returns the unit set (not the empty set).
522 	MapSet slice(DimName dim, DimValue value) const
523 	{
524 		if (this is emptySet) return emptySet;
525 		this.assertDeduplicated();
526 		foreach (ref pair; bringToFront(dim).root.children)
527 			if (pair.value == value)
528 				return pair.set;
529 		return emptySet;
530 	}
531 
532 	/// Return a subset of this set for all points where the given dimension has this value.
533 	/// Unlike `slice`, the dimension itself is included in the result (with the given value).
534 	MapSet get(DimName dim, DimValue value) const
535 	{
536 		if (this is emptySet) return emptySet;
537 		this.assertDeduplicated();
538 		foreach (ref pair; bringToFront(dim).root.children)
539 			if (pair.value == value)
540 				return MapSet(new immutable Node(dim, [Pair(value, pair.set)])).deduplicate;
541 		return emptySet;
542 	}
543 
544 	/// Return all unique values occurring for a given dimension.
545 	/// Unless this is the empty set, the return value is always non-empty.
546 	/// If `dim` doesn't occur, it will be `[nullValue]`.
547 	const(DimValue)[] all(DimName dim) const
548 	{
549 		// return bringToFront(dim).root.children.keys;
550 		if (this is emptySet) return null;
551 		if (this is unitSet) return [nullValue];
552 		if (root.dim == dim) return root.children.amap!(child => child.value);
553 		this.assertDeduplicated();
554 
555 		HashSet!DimValue allValues;
556 		HashSet!MapSet seen;
557 		void visit(MapSet set)
558 		{
559 			if (set is unitSet)
560 			{
561 				allValues.add(nullValue);
562 				return;
563 			}
564 			if (set in seen)
565 				return;
566 			seen.add(set);
567 
568 			if (set.root.dim == dim)
569 				foreach (ref pair; set.root.children)
570 					allValues.add(pair.value);
571 			else
572 				foreach (ref pair; set.root.children)
573 					visit(pair.set);
574 		}
575 		visit(this);
576 		return allValues.keys;
577 	}
578 
579 	/// Return a set which represents the Cartesian product between
580 	/// this set and the given `values` across the specified
581 	/// dimension.
582 	MapSet cartesianProduct(DimName dim, DimValue[] values) const
583 	{
584 		if (this is emptySet) return emptySet;
585 		if (values.length == 0) return emptySet;
586 		this.assertDeduplicated();
587 		auto unset = this.remove(dim);
588 		auto children = values.map!(value => Pair(value, unset)).array;
589 		children.sort();
590 		return MapSet(new immutable Node(dim, cast(immutable) children)).deduplicate;
591 	}
592 
593 	/// Return a set which represents the Cartesian product between
594 	/// this and the given set.
595 	/// Duplicate dimensions are first removed from `this` set.
596 	/// For best performance, call `big.cartesianProduct(small)`
597 	MapSet cartesianProduct(MapSet other) const
598 	{
599 		if (this is emptySet || other is emptySet) return emptySet;
600 		if (this is unitSet) return other;
601 		if (other is unitSet) return this;
602 
603 		MapSet unset = this;
604 		foreach (dim; other.getDims())
605 			unset = unset.remove(dim);
606 
607 		return other.uncheckedCartesianProduct(unset);
608 	}
609 
610 	// Assumes that the dimensions in `this` and `other` are disjoint.
611 	private MapSet uncheckedCartesianProduct(MapSet other) const
612 	{
613 		if (this is unitSet) return other;
614 		if (other is unitSet) return this;
615 
616 		this.assertDeduplicated();
617 		other.assertDeduplicated();
618 
619 		return cache.cartesianProduct.require(SetSetOp(this, other), {
620 			return lazyMap(set => set.uncheckedCartesianProduct(other));
621 		}());
622 	}
623 
624 	/// Return a superset of this set, consisting of a Cartesian
625 	/// product of every value of every dimension. The total number of
626 	/// unique nodes is thus equal to the number of dimensions.
627 	MapSet completeSuperset() const
628 	{
629 		if (this is emptySet || this is unitSet) return this;
630 		this.assertDeduplicated();
631 
632 		return cache.completeSuperset.require(this, {
633 			MapSet child = MapSet.emptySet;
634 			foreach (ref pair; root.children)
635 				child = child.merge(pair.set);
636 			child = child.completeSuperset();
637 			auto newChildren = root.children.dup;
638 			foreach (ref pair; newChildren)
639 				pair.set = child;
640 			return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate;
641 		}());
642 	}
643 
644 	/// Refactor this matrix into one with the same data,
645 	/// but putting the given dimension in front.
646 	/// This will speed up access to values with the given dimension.
647 	/// If the dimension does not yet occur in the set (or any subset),
648 	/// it is instantiated with a single `nullValue` value.
649 	/// The set must be non-empty.
650 	MapSet bringToFront(DimName dim) const
651 	{
652 		assert(this !is emptySet, "Empty sets may not have dimensions");
653 
654 		if (this is unitSet)
655 		{
656 			// We reached the bottom, and did not find `dim` along the way.
657 			// Create it now.
658 			return MapSet(new immutable Node(dim, [Pair(nullValue, unitSet)])).deduplicate;
659 		}
660 
661 		if (dim == root.dim)
662 		{
663 			// Already at the front.
664 			return this;
665 		}
666 
667 		// 1. Recurse.
668 		// 2. After recursion, all children should have `dim` at the front.
669 		// So, just swap this layer with the next one.
670 
671 		this.assertDeduplicated();
672 		return cache.bringToFront.require(SetDimOp(this, dim), {
673 			if (root.children.length == 1)
674 			{
675 				auto newSubmatrix = root.children[0].set.bringToFront(dim);
676 				auto newChildren = newSubmatrix.root.children.dup;
677 				foreach (ref pair; newChildren)
678 					pair.set = MapSet(new immutable Node(root.dim, [Pair(root.children[0].value, pair.set)])).deduplicate;
679 				return MapSet(new immutable Node(dim, cast(immutable) newChildren)).deduplicate;
680 			}
681 
682 			Pair[][DimValue] subsets;
683 			foreach (ref pair; root.children)
684 			{
685 				auto newSubmatrix = pair.set.bringToFront(dim);
686 				assert(newSubmatrix.root.dim == dim);
687 				foreach (ref pair2; newSubmatrix.root.children)
688 					subsets[pair2.value] ~= Pair(pair.value, pair2.set);
689 			}
690 			Pair[] newChildren;
691 			foreach (value, children; subsets)
692 			{
693 				children.sort();
694 				newChildren ~= Pair(value, MapSet(new immutable Node(root.dim, cast(immutable) children)).deduplicate);
695 			}
696 			newChildren.sort();
697 			return MapSet(new immutable Node(dim, cast(immutable) newChildren)).deduplicate;
698 		}());
699 	}
700 
701 	/// Refactor this matrix into one with the same data,
702 	/// but attempting to lower the total number of nodes.
703 	MapSet optimize() const
704 	{
705 		if (this is emptySet || this is unitSet) return this;
706 		this.assertDeduplicated();
707 
708 		return cache.optimize.require(this, {
709 			struct Result { bool done; MapSet[MapSet] map; }
710 			static Result optimizeLayer(HashSet!MapSet sets0)
711 			{
712 				// - At the bottom?
713 				//   - Yes:
714 				//     - return failure
715 				//   - No:
716 				//     - Try to swap this layer. Success?
717 				//       - Yes:
718 				//         - return success
719 				//       - No:
720 				//         - Recurse and try to swap next layer. Success?
721 				//           - Yes: Retry this layer
722 				//           - No: return failure (bottom reached)
723 
724 				assert(!sets0.empty);
725 				if (sets0.byKey.front is unitSet)
726 					return Result(true, null); // at the bottom
727 				auto dim0 = sets0.byKey.front.root.dim;
728 				assert(sets0.byKey.all!(set => set !is unitSet), "Leaf/non-leaf nodes mismatch");
729 				assert(sets0.byKey.all!(set => set.root.dim == dim0), "Dim mismatch");
730 
731 				auto sets1 = sets0.byKey.map!(set => set.root.children.map!(function MapSet (ref child) => child.set)).joiner.toSet;
732 				assert(!sets1.empty);
733 				if (sets1.byKey.front is unitSet)
734 					return Result(true, null); // one layer away from the bottom, nothing to swap with
735 				auto dim1 = sets1.byKey.front.root.dim;
736 				assert(sets1.byKey.all!(set => set !is unitSet), "Leaf/non-leaf nodes mismatch");
737 				assert(sets1.byKey.all!(set => set.root.dim == dim1), "Dim mismatch");
738 
739 				auto currentNodes = sets0.length + sets1.length;
740 
741 				MapSet[MapSet] swappedSets;
742 				HashSet!MapSet sets0new, sets1new;
743 
744 				foreach (set0; sets0.byKey)
745 				{
746 					Pair[][DimValue] subsets;
747 					foreach (ref pair0; set0.root.children)
748 						foreach (ref pair1; pair0.set.root.children)
749 							subsets[pair1.value] ~= Pair(pair0.value, pair1.set);
750 
751 					Pair[] newChildren;
752 					foreach (value, children; subsets)
753 					{
754 						children.sort();
755 						auto set1new = MapSet(new immutable Node(dim0, cast(immutable) children)).deduplicate;
756 						sets1new.add(set1new);
757 						newChildren ~= Pair(value, set1new);
758 					}
759 					newChildren.sort();
760 					auto set0new = MapSet(new immutable Node(dim1, cast(immutable) newChildren)).deduplicate;
761 					sets0new.add(set0new);
762 					swappedSets[set0] = set0new;
763 				}
764 
765 				auto newNodes = sets0new.length + sets1new.length;
766 
767 				if (newNodes < currentNodes)
768 					return Result(false, swappedSets); // Success, retry above layer
769 
770 				// Failure, descend
771 
772 				auto result1 = optimizeLayer(sets1);
773 				if (!result1.map)
774 				{
775 					assert(result1.done);
776 					return Result(true, null); // Done, bottom reached
777 				}
778 
779 				// Apply result
780 				sets0new.clear();
781 				foreach (set0; sets0.byKey)
782 				{
783 					set0.assertDeduplicated();
784 					auto newChildren = set0.root.children.dup;
785 					foreach (ref pair0; newChildren)
786 						pair0.set = result1.map[pair0.set];
787 					auto set0new = MapSet(new immutable Node(dim0, cast(immutable) newChildren)).deduplicate;
788 					sets0new.add(set0new);
789 					swappedSets[set0] = set0new;
790 				}
791 
792 				if (result1.done)
793 					return Result(true, swappedSets);
794 
795 				// Retry this layer
796 				auto result0 = optimizeLayer(sets0new);
797 				if (!result0.map)
798 				{
799 					assert(result0.done);
800 					return Result(true, swappedSets); // Bottom was reached upon retry, just return our results unchanged
801 				}
802 
803 				MapSet[MapSet] compoundedResult;
804 				foreach (set0; sets0.byKey)
805 					compoundedResult[set0] = result0.map[swappedSets[set0]];
806 				return Result(result0.done, compoundedResult);
807 			}
808 
809 			MapSet[1] root = [this.normalize];
810 			while (true)
811 			{
812 				auto result = optimizeLayer(root[].toSet());
813 				if (result.map)
814 					root[0] = result.map[root[0]];
815 				if (result.done)
816 					return root[0];
817 			}
818 		}());
819 	}
820 
821 	/// Return a set equivalent to a unit set (all dimensions
822 	/// explicitly set to `nullValue`), with all dimensions in `this`,
823 	/// in an order approximately following that of the dimensions in
824 	/// `this`.  Implicitly normalized.
825 	private MapSet dimOrderReference() const
826 	{
827 		if (this is unitSet || this is emptySet) return this;
828 
829 		DimName[] dims;
830 		HashSet!MapSet seen;
831 		void visit(MapSet set, size_t pos)
832 		{
833 			if (set is unitSet) return;
834 			if (set in seen) return;
835 			seen.add(set);
836 			if ((pos == dims.length || dims[pos] != set.root.dim) && !dims.canFind(set.root.dim))
837 			{
838 				dims.insertInPlace(pos, set.root.dim);
839 				pos++;
840 			}
841 			foreach (ref pair; set.root.children)
842 				visit(pair.set, pos);
843 		}
844 		visit(this, 0);
845 
846 		MapSet result = unitSet;
847 		foreach_reverse (dim; dims)
848 			result = MapSet(new immutable Node(dim, [Pair(nullValue, result)])).deduplicate();
849 
850 		return result;
851 	}
852 
853 	/// Refactor this matrix into one in which dimensions always occur
854 	/// in the same order, no matter what path is taken.
855 	MapSet normalize() const
856 	{
857 		if (this is unitSet || this is emptySet) return this;
858 
859 		return reorderUsing(dimOrderReference());
860 	}
861 
862 	private size_t maxDepth() const
863 	{
864 		import std.algorithm.comparison : max;
865 
866 		if (this is emptySet || this is unitSet)
867 			return 0;
868 		this.assertDeduplicated();
869 		return cache.maxDepth.require(this, {
870 			size_t maxDepth = 0;
871 			foreach (ref pair; root.children)
872 				maxDepth = max(maxDepth, pair.set.maxDepth());
873 			return 1 + maxDepth;
874 		}());
875 	}
876 
877 	/*private*/ MapSet swapDepth(size_t depth) const
878 	{
879 		if (this is emptySet || this is unitSet) return this;
880 		this.assertDeduplicated();
881 
882 		return cache.swapDepth.require(SetIdxOp(this, depth), {
883 			if (depth == 0)
884 			{
885 				foreach (ref pair; root.children)
886 					if (pair.set !is unitSet)
887 						return bringToFront(pair.set.root.dim);
888 				return this;
889 			}
890 			else
891 			{
892 				auto newChildren = root.children.dup;
893 				foreach (ref pair; newChildren)
894 					pair.set = pair.set.swapDepth(depth - 1);
895 				return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate;
896 			}
897 		}());
898 	}
899 
900 	/// Refactor this matrix into one with the same data, but in which
901 	/// the dimensions always occur as in `reference` (which is
902 	/// assumed to be normalized).
903 	MapSet reorderUsing(MapSet reference) const
904 	{
905 		if (this is emptySet || reference is emptySet || reference is unitSet) return this;
906 		this.assertDeduplicated();
907 		reference.assertDeduplicated();
908 
909 		return cache.reorderUsing.require(SetSetOp(this, reference), {
910 			return bringToFront(reference.root.dim).lazyMap(set => set.reorderUsing(reference.root.children[0].set));
911 		}());
912 	}
913 
914 	void toString(scope void delegate(const(char)[]) sink) const
915 	{
916 		import std.format : formattedWrite;
917 		if (this is emptySet)
918 			sink("{}");
919 		else
920 		if (this is unitSet)
921 			sink("{[]}");
922 		else
923 			sink.formattedWrite!"%s"(*root);
924 	}
925 
926 	hash_t toHash() const
927 	{
928 		return
929 			this is emptySet ? 0 :
930 			this is unitSet ? 1 :
931 			root.toHash();
932 	}
933 
934 	bool opEquals(const typeof(this) s) const
935 	{
936 		if (root is s.root)
937 			return true;
938 		if (this is emptySet || this is unitSet || s is emptySet || s is unitSet)
939 			return this is s;
940 		return *root == *s.root;
941 	}
942 }
943 
944 unittest
945 {
946 	import std.algorithm.sorting : sort;
947 
948 	alias M = MapSet!(string, int);
949 	M m, n;
950 	m = m.merge(M.unitSet.set("x", 1).set("y", 5));
951 	m = m.merge(M.unitSet.set("x", 1).set("y", 6));
952 	assert(m.all("x") == [1]);
953 	assert(m.all("y").dup.sort.release == [5, 6]);
954 
955 	m = m.merge(M.unitSet.set("x", 2).set("y", 6));
956 	assert(m.get("x", 1).all("y").dup.sort.release == [5, 6]);
957 	assert(m.get("y", 6).all("x").dup.sort.release == [1, 2]);
958 
959 	m = m.subtract(M.unitSet.set("x", 1).set("y", 6));
960 	assert(m.all("x").dup.sort.release == [1, 2]);
961 	assert(m.all("y").dup.sort.release == [5, 6]);
962 	assert(m.get("x", 1).all("y") == [5]);
963 	assert(m.get("y", 6).all("x") == [2]);
964 
965 	m = M.emptySet;
966 	m = m.merge(M.unitSet.set("x", 10).set("y", 20));
967 	m = m.merge(M.unitSet.set("x", 10).set("y", 21));
968 	m = m.merge(M.unitSet.set("x", 11).set("y", 21));
969 	m = m.merge(M.unitSet.set("x", 11).set("y", 22));
970 	auto o = m.optimize();
971 	assert(o.uniqueNodes < m.uniqueNodes);
972 
973 	m = M.emptySet;
974 	assert(m.all("x") == []);
975 	m = M.unitSet;
976 	assert(m.all("x") == [0]);
977 	m = m.merge(M.unitSet.set("x", 1));
978 	assert(m.all("x").dup.sort.release == [0, 1]);
979 
980 	m = M.unitSet;
981 	assert(m.set("x", 1).set("x", 1).all("x") == [1]);
982 
983 	m = M.unitSet;
984 	m = m.cartesianProduct("x", [1, 2, 3]);
985 	m = m.cartesianProduct("y", [1, 2, 3]);
986 	m = m.cartesianProduct("z", [1, 2, 3]);
987 	assert(m.count == 3 * 3 * 3);
988 	assert(m            .all("x").dup.sort.release == [1, 2, 3]);
989 	assert(m.set("z", 1).all("x").dup.sort.release == [1, 2, 3]);
990 	assert(m.set("x", 1).all("z").dup.sort.release == [1, 2, 3]);
991 
992 	m = M.unitSet;
993 	m = m.cartesianProduct("a", [1, 2, 3]);
994 	m = m.cartesianProduct("b", [1, 2, 3]);
995 	n = M.unitSet;
996 	n = n.cartesianProduct("c", [1, 2, 3]);
997 	n = n.cartesianProduct("d", [1, 2, 3]);
998 	m = m.cartesianProduct(n);
999 	assert(m.count == 3 * 3 * 3 * 3);
1000 
1001 	assert(M.unitSet != M.unitSet.set("x", 1));
1002 
1003 	m = M.emptySet;
1004 	m = m.merge(M.unitSet.set("x", 1).set("y", 11));
1005 	m = m.merge(M.unitSet.set("x", 2).set("y", 12).set("z", 22));
1006 	m = m.completeSuperset();
1007 	assert(m.uniqueNodes == 3);
1008 	assert(m.count == 8); // 2 ^^ 3
1009 	assert(m.all("x").dup.sort.release == [1, 2]);
1010 	assert(m.all("y").dup.sort.release == [11, 12]);
1011 	assert(m.all("z").dup.sort.release == [0, 22]);
1012 }
1013 
1014 
1015 /// Allows executing a deterministic algorithm over all states in a given MapSet.
1016 /// If a variable is not queried by the algorithm, states for all
1017 /// variations of that variable are processed in one iteration.
1018 struct MapSetVisitor(A, V)
1019 {
1020 	alias Set = MapSet!(A, V);
1021 	Set set;
1022 
1023 	struct Var
1024 	{
1025 		A name;
1026 		const(V)[] values;
1027 		size_t pos;
1028 	}
1029 	Var[] stack;
1030 	size_t stackPos;
1031 	V[A] singularValues, resolvedValues; // Faster than workingSet.all(name)[0]
1032 	private HashSet!A dirtyValues; // Accumulate MapSet.set calls
1033 	private Set workingSet;
1034 
1035 	this(Set set)
1036 	{
1037 		this.set = set;
1038 		foreach (dim, values; set.getDimsAndValues())
1039 			if (values.length == 1)
1040 				singularValues[dim] = values.byKey.front;
1041 	}
1042 
1043 	/// Resets iteration to the beginning.
1044 	/// Equivalent to but faster than constructing a new MapSetVisitor
1045 	/// instance (`visitor = MapSetVisitor(visitor.set)`).
1046 	void reset()
1047 	{
1048 		workingSet = Set.emptySet;
1049 		stack = null;
1050 	}
1051 
1052 	/// Returns true if there are more states to iterate over,
1053 	/// otherwise returns false
1054 	bool next()
1055 	{
1056 		if (set is Set.emptySet)
1057 			return false;
1058 		if (workingSet is Set.emptySet)
1059 		{
1060 			// first iteration
1061 		}
1062 		else
1063 			while (true)
1064 			{
1065 				if (!stack.length)
1066 					return false; // All possibilities exhausted
1067 				auto last = &stack[$-1];
1068 				last.pos++;
1069 				if (last.pos == last.values.length)
1070 				{
1071 					stack = stack[0 .. $ - 1];
1072 					continue;
1073 				}
1074 				break;
1075 			}
1076 
1077 		workingSet = set;
1078 		resolvedValues = null;
1079 		dirtyValues.clear();
1080 		stackPos = 0;
1081 		return true;
1082 	}
1083 
1084 	private void flush()
1085 	{
1086 		if (dirtyValues.empty)
1087 			return;
1088 		workingSet = workingSet.remove((A name) => name in dirtyValues);
1089 		foreach (name; dirtyValues)
1090 			workingSet = workingSet.addDim(name, resolvedValues[name]);
1091 		dirtyValues.clear();
1092 	}
1093 
1094 	@property Set currentSubset()
1095 	{
1096 		assert(workingSet !is Set.emptySet, "Not iterating");
1097 		flush();
1098 		return workingSet;
1099 	}
1100 
1101 	/// Algorithm interface - get a value by name
1102 	V get(A name)
1103 	{
1104 		assert(workingSet !is Set.emptySet, "Not iterating");
1105 		if (auto pvalue = name in resolvedValues)
1106 			return *pvalue;
1107 		if (auto pvalue = name in singularValues)
1108 			return *pvalue;
1109 
1110 		if (stackPos == stack.length)
1111 		{
1112 			// Expand new variable
1113 			auto values = workingSet.all(name);
1114 			auto value = values[0];
1115 			resolvedValues[name] = value;
1116 			stack ~= Var(name, values, 0);
1117 			stackPos++;
1118 			if (values.length > 1)
1119 				workingSet = workingSet.get(name, value);
1120 			return value;
1121 		}
1122 
1123 		// Iterate over known variable
1124 		auto var = &stack[stackPos];
1125 		assert(var.name == name, "Mismatching get order");
1126 		auto value = var.values[var.pos];
1127 		workingSet = workingSet.get(var.name, value);
1128 		assert(workingSet !is Set.emptySet, "Empty set after restoring");
1129 		resolvedValues[var.name] = value;
1130 		stackPos++;
1131 		return value;
1132 	}
1133 
1134 	/// Algorithm interface - set a value by name
1135 	void put(A name, V value)
1136 	{
1137 		assert(workingSet !is Set.emptySet, "Not iterating");
1138 		if (name !in resolvedValues)
1139 			if (auto pvalue = name in singularValues)
1140 				if (*pvalue == value)
1141 					return;
1142 
1143 		resolvedValues[name] = value;
1144 		dirtyValues.add(name);
1145 	}
1146 
1147 	/// Apply a function over every possible value of the given
1148 	/// variable, without resolving it (unless it's already resolved).
1149 	void transform(A name, scope void delegate(ref V value) fun)
1150 	{
1151 		assert(workingSet !is Set.emptySet, "Not iterating");
1152 		if (auto pvalue = name in resolvedValues)
1153 		{
1154 			dirtyValues.add(name);
1155 			return fun(*pvalue);
1156 		}
1157 
1158 		workingSet = workingSet.bringToFront(name);
1159 		Set[V] newChildren;
1160 		foreach (ref child; workingSet.root.children)
1161 		{
1162 			V value = child.value;
1163 			fun(value);
1164 			newChildren.updateVoid(value,
1165 				() => child.set,
1166 				(ref Set set)
1167 				{
1168 					set = set.merge(child.set);
1169 				});
1170 		}
1171 		workingSet = Set(new immutable Set.Node(name, cast(immutable) newChildren)).deduplicate;
1172 	}
1173 
1174 	/// Apply a function over every possible value of the given
1175 	/// variable, without resolving it (unless it's already resolved).
1176 	/// The function is assumed to be injective (does not produce
1177 	/// duplicate outputs for distinct inputs).
1178 	void injectiveTransform(A name, scope void delegate(ref V value) fun)
1179 	{
1180 		assert(workingSet !is Set.emptySet, "Not iterating");
1181 		if (auto pvalue = name in resolvedValues)
1182 		{
1183 			dirtyValues.add(name);
1184 			return fun(*pvalue);
1185 		}
1186 
1187 		workingSet = workingSet.bringToFront(name);
1188 		auto newChildren = workingSet.root.children.dup;
1189 		foreach (ref child; newChildren)
1190 			fun(child.value);
1191 		newChildren.sort();
1192 		workingSet = Set(new immutable Set.Node(name, cast(immutable) newChildren)).deduplicate;
1193 	}
1194 
1195 	/// Inject a variable and values to iterate over.
1196 	/// The variable must not have been resolved yet.
1197 	void inject(A name, V[] values)
1198 	{
1199 		assert(name !in resolvedValues, "Already resolved");
1200 		assert(values.length > 0, "Injecting zero values would result in an empty set");
1201 		singularValues.remove(name);
1202 		workingSet = workingSet.cartesianProduct(name, values);
1203 	}
1204 }
1205 
1206 /// An algorithm which divides two numbers.
1207 /// When the divisor is zero, we don't even query the dividend,
1208 /// therefore processing all dividends in one iteration.
1209 unittest
1210 {
1211 	alias M = MapSet!(string, int);
1212 	M m = M.unitSet
1213 		.cartesianProduct("divisor" , [0, 1, 2])
1214 		.cartesianProduct("dividend", [0, 1, 2]);
1215 	assert(m.count == 9);
1216 
1217 	auto v = MapSetVisitor!(string, int)(m);
1218 	M results;
1219 	int iterations;
1220 	while (v.next())
1221 	{
1222 		iterations++;
1223 		auto divisor = v.get("divisor");
1224 		if (divisor == 0)
1225 			continue;
1226 		auto dividend = v.get("dividend");
1227 		v.put("quotient", dividend / divisor);
1228 		results = results.merge(v.currentSubset);
1229 	}
1230 
1231 	assert(iterations == 7); // 1 for division by zero + 3 for division by one + 3 for division by two
1232 	assert(results.get("divisor", 2).get("dividend", 2).all("quotient") == [1]);
1233 	assert(results.get("divisor", 0).count == 0);
1234 }
1235 
1236 unittest
1237 {
1238 	import std.algorithm.sorting : sort;
1239 
1240 	alias M = MapSet!(string, int);
1241 	M m = M.unitSet.cartesianProduct("x", [1, 2, 3]);
1242 	auto v = MapSetVisitor!(string, int)(m);
1243 	v.next();
1244 	v.transform("x", (ref int v) { v *= 2; });
1245 	assert(v.currentSubset.all("x").dup.sort.release == [2, 4, 6]);
1246 }
1247 
1248 unittest
1249 {
1250 	alias M = MapSet!(string, int);
1251 	M m = M.unitSet.cartesianProduct("x", [1, 2, 3]);
1252 	auto v = MapSetVisitor!(string, int)(m);
1253 	while (v.next())
1254 	{
1255 		v.transform("x", (ref int v) { v *= 2; });
1256 		v.put("y", v.get("x"));
1257 	}
1258 }