1 /**
2  * Data structure for optimized "sparse" N-dimensional matrices
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  */
13 
14 module ae.utils.mapset;
15 
16 static if (__VERSION__ >= 2083):
17 
18 import std.algorithm.iteration;
19 import std.algorithm.searching;
20 import std.algorithm.sorting;
21 import std.array;
22 import std.exception;
23 import std.typecons : tuple;
24 
25 import ae.utils.aa;
26 import ae.utils.array : amap;
27 
28 /**
29    Data structure for holding optimized "sparse" N-dimensional matrices.
30    The number of dimensions is arbitrary and can be varied at runtime.
31 
32    Unlike classical sparse arrays/matrices, this data structure is
33    optimized not just for arrays which are mostly zero (or any given
34    value), but for any matrices where data is likely to repeat across
35    dimensions.
36 
37    This is done by storing the data as a tree, with each depth
38    representing one dimension. A sub-tree is, thus, a sub-matrix
39    (slice of the matrix represented by the top-level tree) along the
40    top-level dimension. Each sub-tree is individually immutable, and
41    can therefore be shared within the same tree or even by several
42    instances of this type.
43 
44    Dimension order need not be the same for all sub-trees. Even the
45    number of dimensions in siblings' sub-trees need not be the same;
46    i.e, the data structure is itself sparse with regards to the
47    dimensions.
48 
49    As there is no explicit "value" type (associated with every set of
50    coordinates), this data structure can be seen as a representation
51    of a set of points, but it can be simulated by adding a "value"
52    dimension (in the same way how a gray-scale image can be
53    represented as a set of 3D points like a height-map).
54 
55    An alternative way to look at this data structure is as a set of
56    maps (i.e. associative arrays). Thus, each layer of the tree stores
57    one possible map key, and under it, all the maps that have this key
58    with a specific value. Note that for this use case, DimValue needs
59    to have a value (configured using nullValue) which indicates the
60    absence of a certain key (dimension) in a map.
61 
62    Params:
63      DimName = The type used to indicate the name (or index) of a dimension.
64      DimValue = The type for indicating a point on a dimension's axis.
65      nullValue = A value for DimValue indicating that a dimension is unset
66  */
67 struct MapSet(DimName, DimValue, DimValue nullValue = DimValue.init)
68 {
69 	/// Logically, each MapSet node has a map of values to a subset.
70 	/// However, it is faster to represent that map as an array of key-value pairs
71 	/// rather than a D associative array, so we do that here.
72 	struct Pair
73 	{
74 		DimValue value; ///
75 		MapSet set; ///
76 
77 		int opCmp(ref const typeof(this) other) const
78 		{
79 			static if (is(typeof(value.opCmp(other.value)) : int))
80 				return value.opCmp(other.value);
81 			else
82 				return value < other.value ? -1 : value > other.value ? 1 : 0;
83 		} ///
84 	}
85 
86 	struct Node
87 	{
88 		DimName dim; ///
89 		Pair[] children; ///
90 
91 		immutable this(DimName dim, immutable Pair[] children)
92 		{
93 			// Zero children doesn't make sense.
94 			// It would be the equivalent of an empty set,
95 			// but then we should just use MapSet.emptySet instead.
96 			assert(!children.empty, "Node with zero children");
97 
98 			// Nodes should be in their canonical form for
99 			// memoization and deduplication to be effective.
100 			assert(children.isSorted, "Children are not sorted");
101 
102 			this.dim = dim;
103 			this.children = children;
104 
105 			// Because each subset is immutable, we can
106 			// pre-calculate the hash during construction.
107 			hash = hashOf(dim) ^ hashOf(children);
108 
109 			size_t totalMembers = 0;
110 			foreach (i, ref pair; children)
111 			{
112 				if (i)
113 					assert(pair.value != children[i-1].value, "Duplicate value");
114 
115 				pair.set.assertDeduplicated();
116 				// Same as "Node with zero children"
117 				assert(pair.set !is emptySet, "Empty set as subset");
118 
119 				totalMembers += pair.set.count;
120 			}
121 			this.totalMembers = totalMembers;
122 		} ///
123 
124 		immutable this(DimName dim, immutable MapSet[DimValue] children)
125 		{
126 			auto childrenList = children.byKeyValue.map!(kv => Pair(kv.key, kv.value)).array;
127 			childrenList.sort();
128 			this(dim, cast(immutable) childrenList);
129 		} ///
130 
131 		void toString(scope void delegate(const(char)[]) sink) const
132 		{
133 			import std.format : formattedWrite;
134 			sink.formattedWrite("{ %(%s%), [ ", (&dim)[0..1]);
135 			bool first = true;
136 			foreach (ref pair; children)
137 			{
138 				if (first)
139 					first = false;
140 				else
141 					sink(", ");
142 				sink.formattedWrite("%s : %s", pair.value, pair.set);
143 			}
144 			sink(" ] }");
145 		} ///
146 
147 		private hash_t hash;
148 		private size_t totalMembers;
149 
150 		hash_t toHash() const
151 		{
152 			return hash;
153 		} ///
154 
155 		bool opEquals(ref const typeof(this) s) const
156 		{
157 			return hash == s.hash && dim == s.dim && children == s.children;
158 		} ///
159 	} ///
160 
161 	/// Indicates the empty set.
162 	/// May only occur at the top level (never as a subset).
163 	private enum emptySetRoot = cast(immutable(Node)*)1;
164 
165 	/// If emptySetRoot, zero values.
166 	/// If null, one value with zero dimensions.
167 	/// Otherwise, pointer to node describing the next dimension.
168 	immutable(Node)* root = emptySetRoot;
169 
170 	/// A set containing a single nil-dimensional element.
171 	/// Holds exactly one value (a point with all dimensions at
172 	/// nullValue).
173 	static immutable unitSet = MapSet(null);
174 
175 	/// The empty set. Represents a set which holds zero values.
176 	static immutable emptySet = MapSet(emptySetRoot);
177 
178 	/// Return the total number of items in this set.
179 	size_t count() const
180 	{
181 		if (this is emptySet)
182 			return 0;
183 		if (this is unitSet)
184 			return 1;
185 		return root.totalMembers;
186 	}
187 
188 	private struct SetSetOp { MapSet a, b; }
189 	private struct SetDimOp { MapSet set; DimName dim; }
190 	private struct SetIdxOp { MapSet set; size_t index; }
191 	private struct Cache
192 	{
193 		/// For deduplication - key is value
194 		MapSet[MapSet] instances;
195 		/// Operations - things that operate recursively on subtrees
196 		/// should be memoized here
197 		MapSet[SetSetOp] merge, subtract, cartesianProduct, reorderUsing;
198 		MapSet[SetDimOp] remove, bringToFront;
199 		MapSet[SetIdxOp] swapDepth;
200 		MapSet[MapSet] optimize, completeSuperset;
201 		size_t[MapSet] uniqueNodes, maxDepth;
202 	}
203 
204 	/// Because subtrees can be reused within the tree, a way of
205 	/// memoizing operations across the entire tree (instead of just
206 	/// across children of a single node, or siblings) is crucial for
207 	/// performance.
208 	static Cache cache;
209 
210 	/// Clear the global operations cache.
211 	static void clearCache()
212 	{
213 		cache = Cache.init;
214 	}
215 
216 	/// Because MapSet operations benefit greatly from memoization,
217 	/// maintaining a set of interned sets benefits performance. After
218 	/// calling `clearCache`, call this method to re-register extant
219 	/// live instances to the instance cache.
220 	void addToCache() const
221 	{
222 		cache.instances.updateVoid(
223 			this,
224 			{
225 				if (this !is emptySet && this !is unitSet)
226 					foreach (ref pair; root.children)
227 						pair.set.addToCache();
228 				return this;
229 			},
230 			(ref MapSet set)
231 			{
232 				assert(set is this);
233 			}
234 		);
235 	}
236 
237 	/// Intern and deduplicate this MapSet.
238 	/// Needs to be called only after constructing a MapSet manually
239 	/// (by allocating, populating, and setting `root`).
240 	MapSet deduplicate() const
241 	{
242 		MapSet deduplicated;
243 		cache.instances.updateVoid(
244 			this,
245 			{
246 				debug if (this !is emptySet && this !is unitSet)
247 					foreach (ref pair; root.children)
248 						pair.set.assertDeduplicated();
249 				deduplicated = this;
250 				return this;
251 			},
252 			(ref MapSet set)
253 			{
254 				deduplicated = set;
255 			}
256 		);
257 		return deduplicated;
258 	}
259 
260 	private void assertDeduplicated() const { debug assert(this is emptySet || this is unitSet || cache.instances[this] is this); }
261 
262 	/// Count and return the total number of unique nodes in this MapSet.
263 	size_t uniqueNodes() const
264 	{
265 		return cache.uniqueNodes.require(this, {
266 			HashSet!MapSet seen;
267 			void visit(MapSet set)
268 			{
269 				if (set is emptySet || set is unitSet || set in seen)
270 					return;
271 				seen.add(set);
272 				foreach (ref pair; set.root.children)
273 					visit(pair.set);
274 			}
275 			visit(this);
276 			return seen.length;
277 		}());
278 	}
279 
280 	/// Collect the names of all dimensions occurring in this tree.
281 	DimName[] getDims() const
282 	{
283 		HashSet!DimName dims;
284 		HashSet!MapSet seen;
285 		void visit(MapSet set)
286 		{
287 			if (set is emptySet || set is unitSet || set in seen)
288 				return;
289 			seen.add(set);
290 			dims.add(set.root.dim);
291 			foreach (ref pair; set.root.children)
292 				visit(pair.set);
293 		}
294 		visit(this);
295 		return dims.keys;
296 	}
297 
298 	/// Return all values for all dimensions occurring in this set.
299 	/// The Cartesian product of these values would thus be a superset
300 	/// of this set.
301 	/// This is equivalent to, but faster than, calling `getDims` and
302 	/// then `all` for each dim.
303 	HashSet!(immutable(DimValue))[DimName] getDimsAndValues(this This)()
304 	{
305 		if (this is emptySet) return null;
306 
307 		// Be careful to count the implicit nullValues on branches
308 		// where a dim doesn't occur.
309 		MapSet set = this.completeSuperset;
310 		HashSet!(immutable(DimValue))[DimName] result;
311 		while (set !is unitSet)
312 		{
313 			DimName dim = set.root.dim;
314 			HashSet!(immutable(DimValue)) values = set.root.children.map!((ref child) => child.value).toSet;
315 			bool added = result.addNew(dim, values);
316 			assert(added, "Duplicate dimension");
317 			set = set.root.children[0].set;
318 		}
319 		return result;
320 	}
321 
322 	/// Combine two matrices together, returning their union.
323 	/// If `other` is a subset of `this`, return `this` unchanged.
324 	MapSet merge(MapSet other) const
325 	{
326 		if (this is emptySet) return other;
327 		if (other is emptySet) return this;
328 		if (this is other) return this;
329 		if (!root) return bringToFront(other.root.dim).merge(other);
330 
331 		this.assertDeduplicated();
332 		other.assertDeduplicated();
333 		return cache.merge.require(SetSetOp(this, other), {
334 			other = other.bringToFront(root.dim);
335 
336 			if (root.children.length == 1 && other.root.children.length == 1 &&
337 				root.children[0].value == other.root.children[0].value)
338 			{
339 				// Single-child optimization
340 				auto mergeResult = root.children[0].set.merge(other.root.children[0].set);
341 				if (mergeResult !is root.children[0].set)
342 					return MapSet(new immutable Node(root.dim, [Pair(root.children[0].value, mergeResult)])).deduplicate;
343 				else
344 					return this;
345 			}
346 
347 			MapSet[DimValue] newChildren;
348 			foreach (ref pair; root.children)
349 				newChildren[pair.value] = pair.set;
350 
351 			bool modified;
352 			foreach (ref pair; other.root.children)
353 				newChildren.updateVoid(pair.value,
354 					{
355 						modified = true;
356 						return pair.set;
357 					},
358 					(ref MapSet oldSubmatrix)
359 					{
360 						auto mergeResult = oldSubmatrix.merge(pair.set);
361 						if (oldSubmatrix !is mergeResult)
362 						{
363 							oldSubmatrix = mergeResult;
364 							modified = true;
365 						}
366 					}
367 				);
368 
369 			if (!modified)
370 				return this;
371 
372 			return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate;
373 		}());
374 	}
375 
376 	/// Return the difference between `this` and the given set.
377 	/// If `other` does not intersect with `this`, return `this` unchanged.
378 	MapSet subtract(MapSet other) const
379 	{
380 		if (this is emptySet) return this;
381 		if (other is emptySet) return this;
382 		if (this is other) return emptySet;
383 		if (!root) return bringToFront(other.root.dim).subtract(other);
384 
385 		this.assertDeduplicated();
386 		other.assertDeduplicated();
387 		return cache.subtract.require(SetSetOp(this, other), {
388 			other = other.bringToFront(root.dim);
389 
390 			if (root.children.length == 1 && other.root.children.length == 1 &&
391 				root.children[0].value == other.root.children[0].value)
392 			{
393 				// Single-child optimization
394 				auto subtractResult = root.children[0].set.subtract(other.root.children[0].set);
395 				if (subtractResult is emptySet)
396 					return emptySet;
397 				else
398 				if (subtractResult !is root.children[0].set)
399 					return MapSet(new immutable Node(root.dim, [Pair(root.children[0].value, subtractResult)])).deduplicate;
400 				else
401 					return this;
402 			}
403 
404 			MapSet[DimValue] newChildren;
405 			foreach (ref pair; root.children)
406 				newChildren[pair.value] = pair.set;
407 
408 			bool modified;
409 			foreach (ref pair; other.root.children)
410 				if (auto poldSubmatrix = pair.value in newChildren)
411 				{
412 					auto subtractResult = poldSubmatrix.subtract(pair.set);
413 					if (*poldSubmatrix !is subtractResult)
414 					{
415 						*poldSubmatrix = subtractResult;
416 						if (subtractResult is emptySet)
417 							newChildren.remove(pair.value);
418 						modified = true;
419 					}
420 				}
421 
422 			if (!modified)
423 				return this;
424 			if (!newChildren.length)
425 				return emptySet;
426 
427 			return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate;
428 		}());
429 	}
430 
431 	/// Apply `fn` over subsets, and return a new `MapSet`.
432 	/*private*/ MapSet lazyMap(scope MapSet delegate(MapSet) fn) const
433 	{
434 		// Defer allocation until the need to mutate
435 		foreach (i, ref pair; root.children) // Read-only scan
436 		{
437 			auto newSet = fn(pair.set);
438 			if (newSet !is pair.set)
439 			{
440 				auto newChildren = new Pair[root.children.length];
441 				// Known to not need mutation
442 				newChildren[0 .. i] = root.children[0 .. i];
443 				// Reuse already calculated result
444 				newChildren[i] = Pair(pair.value, newSet);
445 				// Continue scan with mutation
446 				foreach (j, ref pair2; root.children[i + 1 .. $])
447 					newChildren[i + 1 + j] = Pair(pair2.value, fn(pair2.set));
448 				return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate;
449 			}
450 		}
451 		return this; // No mutation necessary
452 	}
453 
454 	/// "Unset" a given dimension, removing it from the matrix.
455 	/// The result is the union of all sub-matrices for all values of `dim`.
456 	MapSet remove(DimName dim) const
457 	{
458 		if (this is emptySet || this is unitSet) return this;
459 		this.assertDeduplicated();
460 		return cache.remove.require(SetDimOp(this, dim), {
461 			if (root.dim == dim)
462 			{
463 				MapSet result;
464 				foreach (ref pair; root.children)
465 					result = result.merge(pair.set);
466 				return result;
467 			}
468 			return lazyMap(set => set.remove(dim));
469 		}());
470 	}
471 
472 	/// Unset dimensions according to a predicate.
473 	/// This is faster than removing dimensions individually, however,
474 	/// unlike the `DimName` overload, this one does not benefit from global memoization.
475 	MapSet remove(bool delegate(DimName) pred) const
476 	{
477 		if (this is emptySet) return this;
478 		this.assertDeduplicated();
479 
480 		MapSet[MapSet] cache;
481 		MapSet visit(MapSet set)
482 		{
483 			if (set is unitSet)
484 				return set;
485 			return cache.require(set, {
486 				if (pred(set.root.dim))
487 				{
488 					MapSet result;
489 					foreach (ref pair; set.root.children)
490 						result = result.merge(visit(pair.set));
491 					return result;
492 				}
493 				return set.lazyMap(set => visit(set));
494 			}());
495 		}
496 		return visit(this);
497 	}
498 
499 	/// Set the given dimension to always have the given value,
500 	/// collapsing (returning the union of) all sub-matrices
501 	/// for previously different values of `dim`.
502 	MapSet set(DimName dim, DimValue value) const
503 	{
504 		return this.remove(dim).addDim(dim, value);
505 	}
506 
507 	/// Adds a new dimension with the given value.
508 	/// The dimension must not have previously existed in `this`.
509 	private MapSet addDim(DimName dim, DimValue value) const
510 	{
511 		if (this is emptySet) return this;
512 		this.assertDeduplicated();
513 		assert(this is this.remove(dim), "Duplicate dimension");
514 		if (value == nullValue)
515 			return this;
516 		return MapSet(new immutable Node(dim, [Pair(value, this)])).deduplicate;
517 	}
518 
519 	/// Return a sub-matrix for all points where the given dimension has this value.
520 	/// The dimension itself is not included in the result.
521 	/// Note: if `dim` doesn't occur (e.g. because `this` is the unit set) and `value` is `nullValue`,
522 	/// this returns the unit set (not the empty set).
523 	MapSet slice(DimName dim, DimValue value) const
524 	{
525 		if (this is emptySet) return emptySet;
526 		this.assertDeduplicated();
527 		foreach (ref pair; bringToFront(dim).root.children)
528 			if (pair.value == value)
529 				return pair.set;
530 		return emptySet;
531 	}
532 
533 	/// Return a subset of this set for all points where the given dimension has this value.
534 	/// Unlike `slice`, the dimension itself is included in the result (with the given value).
535 	MapSet get(DimName dim, DimValue value) const
536 	{
537 		if (this is emptySet) return emptySet;
538 		this.assertDeduplicated();
539 		foreach (ref pair; bringToFront(dim).root.children)
540 			if (pair.value == value)
541 				return MapSet(new immutable Node(dim, [Pair(value, pair.set)])).deduplicate;
542 		return emptySet;
543 	}
544 
545 	/// Return all unique values occurring for a given dimension.
546 	/// Unless this is the empty set, the return value is always non-empty.
547 	/// If `dim` doesn't occur, it will be `[nullValue]`.
548 	const(DimValue)[] all(DimName dim) const
549 	{
550 		// return bringToFront(dim).root.children.keys;
551 		if (this is emptySet) return null;
552 		if (this is unitSet) return [nullValue];
553 		if (root.dim == dim) return root.children.amap!(child => child.value);
554 		this.assertDeduplicated();
555 
556 		HashSet!DimValue allValues;
557 		HashSet!MapSet seen;
558 		void visit(MapSet set)
559 		{
560 			if (set is unitSet)
561 			{
562 				allValues.add(nullValue);
563 				return;
564 			}
565 			if (set in seen)
566 				return;
567 			seen.add(set);
568 
569 			if (set.root.dim == dim)
570 				foreach (ref pair; set.root.children)
571 					allValues.add(pair.value);
572 			else
573 				foreach (ref pair; set.root.children)
574 					visit(pair.set);
575 		}
576 		visit(this);
577 		return allValues.keys;
578 	}
579 
580 	/// Return a set which represents the Cartesian product between
581 	/// this set and the given `values` across the specified
582 	/// dimension.
583 	MapSet cartesianProduct(DimName dim, DimValue[] values) const
584 	{
585 		if (this is emptySet) return emptySet;
586 		if (values.length == 0) return emptySet;
587 		this.assertDeduplicated();
588 		auto unset = this.remove(dim);
589 		auto children = values.map!(value => Pair(value, unset)).array;
590 		children.sort();
591 		return MapSet(new immutable Node(dim, cast(immutable) children)).deduplicate;
592 	}
593 
594 	/// Return a set which represents the Cartesian product between
595 	/// this and the given set.
596 	/// Duplicate dimensions are first removed from `this` set.
597 	/// For best performance, call `big.cartesianProduct(small)`
598 	MapSet cartesianProduct(MapSet other) const
599 	{
600 		if (this is emptySet || other is emptySet) return emptySet;
601 		if (this is unitSet) return other;
602 		if (other is unitSet) return this;
603 
604 		MapSet unset = this;
605 		foreach (dim; other.getDims())
606 			unset = unset.remove(dim);
607 
608 		return other.uncheckedCartesianProduct(unset);
609 	}
610 
611 	// Assumes that the dimensions in `this` and `other` are disjoint.
612 	private MapSet uncheckedCartesianProduct(MapSet other) const
613 	{
614 		if (this is unitSet) return other;
615 		if (other is unitSet) return this;
616 
617 		this.assertDeduplicated();
618 		other.assertDeduplicated();
619 
620 		return cache.cartesianProduct.require(SetSetOp(this, other), {
621 			return lazyMap(set => set.uncheckedCartesianProduct(other));
622 		}());
623 	}
624 
625 	/// Return a superset of this set, consisting of a Cartesian
626 	/// product of every value of every dimension. The total number of
627 	/// unique nodes is thus equal to the number of dimensions.
628 	MapSet completeSuperset() const
629 	{
630 		if (this is emptySet || this is unitSet) return this;
631 		this.assertDeduplicated();
632 
633 		return cache.completeSuperset.require(this, {
634 			MapSet child = MapSet.emptySet;
635 			foreach (ref pair; root.children)
636 				child = child.merge(pair.set);
637 			child = child.completeSuperset();
638 			auto newChildren = root.children.dup;
639 			foreach (ref pair; newChildren)
640 				pair.set = child;
641 			return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate;
642 		}());
643 	}
644 
645 	/// Refactor this matrix into one with the same data,
646 	/// but putting the given dimension in front.
647 	/// This will speed up access to values with the given dimension.
648 	/// If the dimension does not yet occur in the set (or any subset),
649 	/// it is instantiated with a single `nullValue` value.
650 	/// The set must be non-empty.
651 	MapSet bringToFront(DimName dim) const
652 	{
653 		assert(this !is emptySet, "Empty sets may not have dimensions");
654 
655 		if (this is unitSet)
656 		{
657 			// We reached the bottom, and did not find `dim` along the way.
658 			// Create it now.
659 			return MapSet(new immutable Node(dim, [Pair(nullValue, unitSet)])).deduplicate;
660 		}
661 
662 		if (dim == root.dim)
663 		{
664 			// Already at the front.
665 			return this;
666 		}
667 
668 		// 1. Recurse.
669 		// 2. After recursion, all children should have `dim` at the front.
670 		// So, just swap this layer with the next one.
671 
672 		this.assertDeduplicated();
673 		return cache.bringToFront.require(SetDimOp(this, dim), {
674 			if (root.children.length == 1)
675 			{
676 				auto newSubmatrix = root.children[0].set.bringToFront(dim);
677 				auto newChildren = newSubmatrix.root.children.dup;
678 				foreach (ref pair; newChildren)
679 					pair.set = MapSet(new immutable Node(root.dim, [Pair(root.children[0].value, pair.set)])).deduplicate;
680 				return MapSet(new immutable Node(dim, cast(immutable) newChildren)).deduplicate;
681 			}
682 
683 			Pair[][DimValue] subsets;
684 			foreach (ref pair; root.children)
685 			{
686 				auto newSubmatrix = pair.set.bringToFront(dim);
687 				assert(newSubmatrix.root.dim == dim);
688 				foreach (ref pair2; newSubmatrix.root.children)
689 					subsets[pair2.value] ~= Pair(pair.value, pair2.set);
690 			}
691 			Pair[] newChildren;
692 			foreach (value, children; subsets)
693 			{
694 				children.sort();
695 				newChildren ~= Pair(value, MapSet(new immutable Node(root.dim, cast(immutable) children)).deduplicate);
696 			}
697 			newChildren.sort();
698 			return MapSet(new immutable Node(dim, cast(immutable) newChildren)).deduplicate;
699 		}());
700 	}
701 
702 	/// Refactor this matrix into one with the same data,
703 	/// but attempting to lower the total number of nodes.
704 	MapSet optimize() const
705 	{
706 		if (this is emptySet || this is unitSet) return this;
707 		this.assertDeduplicated();
708 
709 		return cache.optimize.require(this, {
710 			struct Result { bool done; MapSet[MapSet] map; }
711 			static Result optimizeLayer(HashSet!MapSet sets0)
712 			{
713 				// - At the bottom?
714 				//   - Yes:
715 				//     - return failure
716 				//   - No:
717 				//     - Try to swap this layer. Success?
718 				//       - Yes:
719 				//         - return success
720 				//       - No:
721 				//         - Recurse and try to swap next layer. Success?
722 				//           - Yes: Retry this layer
723 				//           - No: return failure (bottom reached)
724 
725 				assert(!sets0.empty);
726 				if (sets0.byKey.front is unitSet)
727 					return Result(true, null); // at the bottom
728 				auto dim0 = sets0.byKey.front.root.dim;
729 				assert(sets0.byKey.all!(set => set !is unitSet), "Leaf/non-leaf nodes mismatch");
730 				assert(sets0.byKey.all!(set => set.root.dim == dim0), "Dim mismatch");
731 
732 				auto sets1 = sets0.byKey.map!(set => set.root.children.map!(function MapSet (ref child) => child.set)).joiner.toSet;
733 				assert(!sets1.empty);
734 				if (sets1.byKey.front is unitSet)
735 					return Result(true, null); // one layer away from the bottom, nothing to swap with
736 				auto dim1 = sets1.byKey.front.root.dim;
737 				assert(sets1.byKey.all!(set => set !is unitSet), "Leaf/non-leaf nodes mismatch");
738 				assert(sets1.byKey.all!(set => set.root.dim == dim1), "Dim mismatch");
739 
740 				auto currentNodes = sets0.length + sets1.length;
741 
742 				MapSet[MapSet] swappedSets;
743 				HashSet!MapSet sets0new, sets1new;
744 
745 				foreach (set0; sets0.byKey)
746 				{
747 					Pair[][DimValue] subsets;
748 					foreach (ref pair0; set0.root.children)
749 						foreach (ref pair1; pair0.set.root.children)
750 							subsets[pair1.value] ~= Pair(pair0.value, pair1.set);
751 
752 					Pair[] newChildren;
753 					foreach (value, children; subsets)
754 					{
755 						children.sort();
756 						auto set1new = MapSet(new immutable Node(dim0, cast(immutable) children)).deduplicate;
757 						sets1new.add(set1new);
758 						newChildren ~= Pair(value, set1new);
759 					}
760 					newChildren.sort();
761 					auto set0new = MapSet(new immutable Node(dim1, cast(immutable) newChildren)).deduplicate;
762 					sets0new.add(set0new);
763 					swappedSets[set0] = set0new;
764 				}
765 
766 				auto newNodes = sets0new.length + sets1new.length;
767 
768 				if (newNodes < currentNodes)
769 					return Result(false, swappedSets); // Success, retry above layer
770 
771 				// Failure, descend
772 
773 				auto result1 = optimizeLayer(sets1);
774 				if (!result1.map)
775 				{
776 					assert(result1.done);
777 					return Result(true, null); // Done, bottom reached
778 				}
779 
780 				// Apply result
781 				sets0new.clear();
782 				foreach (set0; sets0.byKey)
783 				{
784 					set0.assertDeduplicated();
785 					auto newChildren = set0.root.children.dup;
786 					foreach (ref pair0; newChildren)
787 						pair0.set = result1.map[pair0.set];
788 					auto set0new = MapSet(new immutable Node(dim0, cast(immutable) newChildren)).deduplicate;
789 					sets0new.add(set0new);
790 					swappedSets[set0] = set0new;
791 				}
792 
793 				if (result1.done)
794 					return Result(true, swappedSets);
795 
796 				// Retry this layer
797 				auto result0 = optimizeLayer(sets0new);
798 				if (!result0.map)
799 				{
800 					assert(result0.done);
801 					return Result(true, swappedSets); // Bottom was reached upon retry, just return our results unchanged
802 				}
803 
804 				MapSet[MapSet] compoundedResult;
805 				foreach (set0; sets0.byKey)
806 					compoundedResult[set0] = result0.map[swappedSets[set0]];
807 				return Result(result0.done, compoundedResult);
808 			}
809 
810 			MapSet[1] root = [this.normalize];
811 			while (true)
812 			{
813 				auto result = optimizeLayer(root[].toSet());
814 				if (result.map)
815 					root[0] = result.map[root[0]];
816 				if (result.done)
817 					return root[0];
818 			}
819 		}());
820 	}
821 
822 	/// Return a set equivalent to a unit set (all dimensions
823 	/// explicitly set to `nullValue`), with all dimensions in `this`,
824 	/// in an order approximately following that of the dimensions in
825 	/// `this`.  Implicitly normalized.
826 	private MapSet dimOrderReference() const
827 	{
828 		if (this is unitSet || this is emptySet) return this;
829 
830 		DimName[] dims;
831 		HashSet!MapSet seen;
832 		void visit(MapSet set, size_t pos)
833 		{
834 			if (set is unitSet) return;
835 			if (set in seen) return;
836 			seen.add(set);
837 			if ((pos == dims.length || dims[pos] != set.root.dim) && !dims.canFind(set.root.dim))
838 			{
839 				dims.insertInPlace(pos, set.root.dim);
840 				pos++;
841 			}
842 			foreach (ref pair; set.root.children)
843 				visit(pair.set, pos);
844 		}
845 		visit(this, 0);
846 
847 		MapSet result = unitSet;
848 		foreach_reverse (dim; dims)
849 			result = MapSet(new immutable Node(dim, [Pair(nullValue, result)])).deduplicate();
850 
851 		return result;
852 	}
853 
854 	/// Refactor this matrix into one in which dimensions always occur
855 	/// in the same order, no matter what path is taken.
856 	MapSet normalize() const
857 	{
858 		if (this is unitSet || this is emptySet) return this;
859 
860 		return reorderUsing(dimOrderReference());
861 	}
862 
863 	private size_t maxDepth() const
864 	{
865 		import std.algorithm.comparison : max;
866 
867 		if (this is emptySet || this is unitSet)
868 			return 0;
869 		this.assertDeduplicated();
870 		return cache.maxDepth.require(this, {
871 			size_t maxDepth = 0;
872 			foreach (ref pair; root.children)
873 				maxDepth = max(maxDepth, pair.set.maxDepth());
874 			return 1 + maxDepth;
875 		}());
876 	}
877 
878 	/// Swap the two adjacent dimensions which are `depth` dimensions away.
879 	/*private*/ MapSet swapDepth(size_t depth) const
880 	{
881 		if (this is emptySet || this is unitSet) return this;
882 		this.assertDeduplicated();
883 
884 		return cache.swapDepth.require(SetIdxOp(this, depth), {
885 			if (depth == 0)
886 			{
887 				foreach (ref pair; root.children)
888 					if (pair.set !is unitSet)
889 						return bringToFront(pair.set.root.dim);
890 				return this;
891 			}
892 			else
893 			{
894 				auto newChildren = root.children.dup;
895 				foreach (ref pair; newChildren)
896 					pair.set = pair.set.swapDepth(depth - 1);
897 				return MapSet(new immutable Node(root.dim, cast(immutable) newChildren)).deduplicate;
898 			}
899 		}());
900 	}
901 
902 	/// Refactor this matrix into one with the same data, but in which
903 	/// the dimensions always occur as in `reference` (which is
904 	/// assumed to be normalized).
905 	MapSet reorderUsing(MapSet reference) const
906 	{
907 		if (this is emptySet || reference is emptySet || reference is unitSet) return this;
908 		this.assertDeduplicated();
909 		reference.assertDeduplicated();
910 
911 		return cache.reorderUsing.require(SetSetOp(this, reference), {
912 			return bringToFront(reference.root.dim).lazyMap(set => set.reorderUsing(reference.root.children[0].set));
913 		}());
914 	}
915 
916 	void toString(scope void delegate(const(char)[]) sink) const
917 	{
918 		import std.format : formattedWrite;
919 		if (this is emptySet)
920 			sink("{}");
921 		else
922 		if (this is unitSet)
923 			sink("{[]}");
924 		else
925 			sink.formattedWrite!"%s"(*root);
926 	} ///
927 
928 	hash_t toHash() const
929 	{
930 		return
931 			this is emptySet ? 0 :
932 			this is unitSet ? 1 :
933 			root.toHash();
934 	} ///
935 
936 	bool opEquals(const typeof(this) s) const
937 	{
938 		if (root is s.root)
939 			return true;
940 		if (this is emptySet || this is unitSet || s is emptySet || s is unitSet)
941 			return this is s;
942 		return *root == *s.root;
943 	} ///
944 }
945 
946 unittest
947 {
948 	import std.algorithm.sorting : sort;
949 
950 	alias M = MapSet!(string, int);
951 	M m, n;
952 	m = m.merge(M.unitSet.set("x", 1).set("y", 5));
953 	m = m.merge(M.unitSet.set("x", 1).set("y", 6));
954 	assert(m.all("x") == [1]);
955 	assert(m.all("y").dup.sort.release == [5, 6]);
956 
957 	m = m.merge(M.unitSet.set("x", 2).set("y", 6));
958 	assert(m.get("x", 1).all("y").dup.sort.release == [5, 6]);
959 	assert(m.get("y", 6).all("x").dup.sort.release == [1, 2]);
960 
961 	m = m.subtract(M.unitSet.set("x", 1).set("y", 6));
962 	assert(m.all("x").dup.sort.release == [1, 2]);
963 	assert(m.all("y").dup.sort.release == [5, 6]);
964 	assert(m.get("x", 1).all("y") == [5]);
965 	assert(m.get("y", 6).all("x") == [2]);
966 
967 	m = M.emptySet;
968 	m = m.merge(M.unitSet.set("x", 10).set("y", 20));
969 	m = m.merge(M.unitSet.set("x", 10).set("y", 21));
970 	m = m.merge(M.unitSet.set("x", 11).set("y", 21));
971 	m = m.merge(M.unitSet.set("x", 11).set("y", 22));
972 	auto o = m.optimize();
973 	assert(o.uniqueNodes < m.uniqueNodes);
974 
975 	m = M.emptySet;
976 	assert(m.all("x") == []);
977 	m = M.unitSet;
978 	assert(m.all("x") == [0]);
979 	m = m.merge(M.unitSet.set("x", 1));
980 	assert(m.all("x").dup.sort.release == [0, 1]);
981 
982 	m = M.unitSet;
983 	assert(m.set("x", 1).set("x", 1).all("x") == [1]);
984 
985 	m = M.unitSet;
986 	m = m.cartesianProduct("x", [1, 2, 3]);
987 	m = m.cartesianProduct("y", [1, 2, 3]);
988 	m = m.cartesianProduct("z", [1, 2, 3]);
989 	assert(m.count == 3 * 3 * 3);
990 	assert(m            .all("x").dup.sort.release == [1, 2, 3]);
991 	assert(m.set("z", 1).all("x").dup.sort.release == [1, 2, 3]);
992 	assert(m.set("x", 1).all("z").dup.sort.release == [1, 2, 3]);
993 
994 	m = M.unitSet;
995 	m = m.cartesianProduct("a", [1, 2, 3]);
996 	m = m.cartesianProduct("b", [1, 2, 3]);
997 	n = M.unitSet;
998 	n = n.cartesianProduct("c", [1, 2, 3]);
999 	n = n.cartesianProduct("d", [1, 2, 3]);
1000 	m = m.cartesianProduct(n);
1001 	assert(m.count == 3 * 3 * 3 * 3);
1002 
1003 	assert(M.unitSet != M.unitSet.set("x", 1));
1004 
1005 	m = M.emptySet;
1006 	m = m.merge(M.unitSet.set("x", 1).set("y", 11));
1007 	m = m.merge(M.unitSet.set("x", 2).set("y", 12).set("z", 22));
1008 	m = m.completeSuperset();
1009 	assert(m.uniqueNodes == 3);
1010 	assert(m.count == 8); // 2 ^^ 3
1011 	assert(m.all("x").dup.sort.release == [1, 2]);
1012 	assert(m.all("y").dup.sort.release == [11, 12]);
1013 	assert(m.all("z").dup.sort.release == [0, 22]);
1014 }
1015 
1016 
1017 /// Allows executing a deterministic algorithm over all states in a given MapSet.
1018 /// If a variable is not queried by the algorithm, states for all
1019 /// variations of that variable are processed in one iteration.
1020 struct MapSetVisitor(A, V)
1021 {
1022 	/// Underlying `MapSet`.
1023 	alias Set = MapSet!(A, V);
1024 	Set set; /// ditto
1025 
1026 	/// Internal state.
1027 	/*private*/ public
1028 	{
1029 		struct Var
1030 		{
1031 			A name; ///
1032 			const(V)[] values; ///
1033 			size_t pos; ///
1034 		}
1035 		Var[] stack;
1036 		size_t stackPos;
1037 		V[A] singularValues, resolvedValues; // Faster than workingSet.all(name)[0]
1038 		private HashSet!A dirtyValues; // Accumulate MapSet.set calls
1039 		private Set workingSet;
1040 	}
1041 
1042 	this(Set set)
1043 	{
1044 		this.set = set;
1045 		foreach (dim, values; set.getDimsAndValues())
1046 			if (values.length == 1)
1047 				singularValues[dim] = values.byKey.front;
1048 	} ///
1049 
1050 	/// Resets iteration to the beginning.
1051 	/// Equivalent to but faster than constructing a new MapSetVisitor
1052 	/// instance (`visitor = MapSetVisitor(visitor.set)`).
1053 	void reset()
1054 	{
1055 		workingSet = Set.emptySet;
1056 		stack = null;
1057 	}
1058 
1059 	/// Returns true if there are more states to iterate over,
1060 	/// otherwise returns false
1061 	bool next()
1062 	{
1063 		if (set is Set.emptySet)
1064 			return false;
1065 		if (workingSet is Set.emptySet)
1066 		{
1067 			// first iteration
1068 		}
1069 		else
1070 			while (true)
1071 			{
1072 				if (!stack.length)
1073 					return false; // All possibilities exhausted
1074 				auto last = &stack[$-1];
1075 				last.pos++;
1076 				if (last.pos == last.values.length)
1077 				{
1078 					stack = stack[0 .. $ - 1];
1079 					continue;
1080 				}
1081 				break;
1082 			}
1083 
1084 		workingSet = set;
1085 		resolvedValues = null;
1086 		dirtyValues.clear();
1087 		stackPos = 0;
1088 		return true;
1089 	}
1090 
1091 	private void flush()
1092 	{
1093 		if (dirtyValues.empty)
1094 			return;
1095 		workingSet = workingSet.remove((A name) => name in dirtyValues);
1096 		foreach (name; dirtyValues)
1097 			workingSet = workingSet.addDim(name, resolvedValues[name]);
1098 		dirtyValues.clear();
1099 	}
1100 
1101 	/// Peek at the subset the algorithm is currently working with.
1102 	@property Set currentSubset()
1103 	{
1104 		assert(workingSet !is Set.emptySet, "Not iterating");
1105 		flush();
1106 		return workingSet;
1107 	}
1108 
1109 	/// Algorithm interface - get a value by name
1110 	V get(A name)
1111 	{
1112 		assert(workingSet !is Set.emptySet, "Not iterating");
1113 		if (auto pvalue = name in resolvedValues)
1114 			return *pvalue;
1115 		if (auto pvalue = name in singularValues)
1116 			return *pvalue;
1117 
1118 		if (stackPos == stack.length)
1119 		{
1120 			// Expand new variable
1121 			auto values = workingSet.all(name);
1122 			auto value = values[0];
1123 			resolvedValues[name] = value;
1124 			stack ~= Var(name, values, 0);
1125 			stackPos++;
1126 			if (values.length > 1)
1127 				workingSet = workingSet.get(name, value);
1128 			return value;
1129 		}
1130 
1131 		// Iterate over known variable
1132 		auto var = &stack[stackPos];
1133 		assert(var.name == name, "Mismatching get order");
1134 		auto value = var.values[var.pos];
1135 		workingSet = workingSet.get(var.name, value);
1136 		assert(workingSet !is Set.emptySet, "Empty set after restoring");
1137 		resolvedValues[var.name] = value;
1138 		stackPos++;
1139 		return value;
1140 	}
1141 
1142 	/// Algorithm interface - set a value by name
1143 	void put(A name, V value)
1144 	{
1145 		assert(workingSet !is Set.emptySet, "Not iterating");
1146 		if (name !in resolvedValues)
1147 			if (auto pvalue = name in singularValues)
1148 				if (*pvalue == value)
1149 					return;
1150 
1151 		resolvedValues[name] = value;
1152 		dirtyValues.add(name);
1153 	}
1154 
1155 	/// Apply a function over every possible value of the given
1156 	/// variable, without resolving it (unless it's already resolved).
1157 	void transform(A name, scope void delegate(ref V value) fun)
1158 	{
1159 		assert(workingSet !is Set.emptySet, "Not iterating");
1160 		if (auto pvalue = name in resolvedValues)
1161 		{
1162 			dirtyValues.add(name);
1163 			return fun(*pvalue);
1164 		}
1165 
1166 		workingSet = workingSet.bringToFront(name);
1167 		Set[V] newChildren;
1168 		foreach (ref child; workingSet.root.children)
1169 		{
1170 			V value = child.value;
1171 			fun(value);
1172 			newChildren.updateVoid(value,
1173 				() => child.set,
1174 				(ref Set set)
1175 				{
1176 					set = set.merge(child.set);
1177 				});
1178 		}
1179 		workingSet = Set(new immutable Set.Node(name, cast(immutable) newChildren)).deduplicate;
1180 	}
1181 
1182 	/// Apply a function over every possible value of the given
1183 	/// variable, without resolving it (unless it's already resolved).
1184 	/// The function is assumed to be injective (does not produce
1185 	/// duplicate outputs for distinct inputs).
1186 	void injectiveTransform(A name, scope void delegate(ref V value) fun)
1187 	{
1188 		assert(workingSet !is Set.emptySet, "Not iterating");
1189 		if (auto pvalue = name in resolvedValues)
1190 		{
1191 			dirtyValues.add(name);
1192 			return fun(*pvalue);
1193 		}
1194 
1195 		workingSet = workingSet.bringToFront(name);
1196 		auto newChildren = workingSet.root.children.dup;
1197 		foreach (ref child; newChildren)
1198 			fun(child.value);
1199 		newChildren.sort();
1200 		workingSet = Set(new immutable Set.Node(name, cast(immutable) newChildren)).deduplicate;
1201 	}
1202 
1203 	/// Inject a variable and values to iterate over.
1204 	/// The variable must not have been resolved yet.
1205 	void inject(A name, V[] values)
1206 	{
1207 		assert(name !in resolvedValues, "Already resolved");
1208 		assert(values.length > 0, "Injecting zero values would result in an empty set");
1209 		singularValues.remove(name);
1210 		workingSet = workingSet.cartesianProduct(name, values);
1211 	}
1212 }
1213 
1214 /// An algorithm which divides two numbers.
1215 /// When the divisor is zero, we don't even query the dividend,
1216 /// therefore processing all dividends in one iteration.
1217 unittest
1218 {
1219 	alias M = MapSet!(string, int);
1220 	M m = M.unitSet
1221 		.cartesianProduct("divisor" , [0, 1, 2])
1222 		.cartesianProduct("dividend", [0, 1, 2]);
1223 	assert(m.count == 9);
1224 
1225 	auto v = MapSetVisitor!(string, int)(m);
1226 	M results;
1227 	int iterations;
1228 	while (v.next())
1229 	{
1230 		iterations++;
1231 		auto divisor = v.get("divisor");
1232 		if (divisor == 0)
1233 			continue;
1234 		auto dividend = v.get("dividend");
1235 		v.put("quotient", dividend / divisor);
1236 		results = results.merge(v.currentSubset);
1237 	}
1238 
1239 	assert(iterations == 7); // 1 for division by zero + 3 for division by one + 3 for division by two
1240 	assert(results.get("divisor", 2).get("dividend", 2).all("quotient") == [1]);
1241 	assert(results.get("divisor", 0).count == 0);
1242 }
1243 
1244 unittest
1245 {
1246 	import std.algorithm.sorting : sort;
1247 
1248 	alias M = MapSet!(string, int);
1249 	M m = M.unitSet.cartesianProduct("x", [1, 2, 3]);
1250 	auto v = MapSetVisitor!(string, int)(m);
1251 	v.next();
1252 	v.transform("x", (ref int v) { v *= 2; });
1253 	assert(v.currentSubset.all("x").dup.sort.release == [2, 4, 6]);
1254 }
1255 
1256 unittest
1257 {
1258 	alias M = MapSet!(string, int);
1259 	M m = M.unitSet.cartesianProduct("x", [1, 2, 3]);
1260 	auto v = MapSetVisitor!(string, int)(m);
1261 	while (v.next())
1262 	{
1263 		v.transform("x", (ref int v) { v *= 2; });
1264 		v.put("y", v.get("x"));
1265 	}
1266 }