1 /**
2  * Array utility functions
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.utils.array;
15 
16 import std.algorithm.iteration;
17 import std.algorithm.mutation;
18 import std.algorithm.searching;
19 import std.algorithm.sorting;
20 import std.array;
21 import std.exception;
22 import std.format;
23 import std.traits;
24 
25 import ae.utils.meta;
26 
27 public import ae.utils.aa;
28 public import ae.utils.appender;
29 
30 /// Slice a variable.
31 T[] toArray(T)(ref T v)
32 {
33 	return (&v)[0..1];
34 }
35 
36 /// Return the value represented as an array of bytes.
37 @property inout(ubyte)[] bytes(T)(ref inout(T) value)
38 	if (!hasIndirections!T)
39 {
40 	return value.toArray().bytes;
41 }
42 
43 /// ditto
44 @property inout(ubyte)[] bytes(T)(inout(T) value)
45 	if (is(T U : U[]) && !hasIndirections!U)
46 {
47 	return cast(inout(ubyte)[])value;
48 }
49 
50 unittest
51 {
52 	ubyte b = 5;
53 	assert(b.bytes == [5]);
54 
55 	struct S { ubyte b = 5; }
56 	S s;
57 	assert(s.bytes == [5]);
58 
59 	ubyte[1] sa = [5];
60 	assert(sa.bytes == [5]);
61 
62 	void[] va = sa[];
63 	assert(va.bytes == [5]);
64 }
65 
66 /// Reverse of bytes()
67 ref inout(T) fromBytes(T)(inout(ubyte)[] bytes)
68 	if (!hasIndirections!T)
69 {
70 	assert(bytes.length == T.sizeof, "Data length mismatch for %s".format(T.stringof));
71 	return *cast(inout(T)*)bytes.ptr;
72 }
73 
74 /// ditto
75 inout(T) fromBytes(T)(inout(ubyte)[] bytes)
76 	if (is(T U : U[]) && !hasIndirections!U)
77 {
78 	return cast(inout(T))bytes;
79 }
80 
81 unittest
82 {
83 	{       ubyte b = 5; assert(b.bytes.fromBytes!ubyte == 5); }
84 	{ const ubyte b = 5; assert(b.bytes.fromBytes!ubyte == 5); }
85 	struct S { ubyte b; }
86 	{       ubyte b = 5; assert(b.bytes.fromBytes!S == S(5)); }
87 }
88 
89 unittest
90 {
91 	struct S { ubyte a, b; }
92 	ubyte[] arr = [1, 2];
93 	assert(arr.fromBytes!S == S(1, 2));
94 	assert(arr.fromBytes!(S[]) == [S(1, 2)]);
95 }
96 
97 /// Returns an empty, but non-null slice of T.
98 auto emptySlice(T)() pure
99 {
100 	static if (false) // LDC optimizes this out
101 	{
102 		T[0] arr;
103 		auto p = arr.ptr;
104 	}
105 	else
106 		auto p = cast(T*)1;
107 	return p[0..0];
108 }
109 
110 unittest
111 {
112 	int[] arr = emptySlice!int;
113 	assert(arr.ptr);
114 	immutable int[] iarr = emptySlice!int;
115 	assert(iarr.ptr);
116 }
117 
118 int memcmp(in ubyte[] a, in ubyte[] b)
119 {
120 	assert(a.length == b.length);
121 	import core.stdc.string : memcmp;
122 	return memcmp(a.ptr, b.ptr, a.length);
123 }
124 
125 /// Like std.algorithm.copy, but without the auto-decode bullshit.
126 /// https://issues.dlang.org/show_bug.cgi?id=13650
127 void memmove(T)(T[] dst, in T[] src)
128 {
129 	assert(src.length == dst.length);
130 	import core.stdc.string : memmove;
131 	memmove(dst.ptr, src.ptr, dst.length * T.sizeof);
132 }
133 
134 T[] vector(string op, T)(T[] a, T[] b)
135 {
136 	assert(a.length == b.length);
137 	T[] result = new T[a.length];
138 	foreach (i, ref r; result)
139 		r = mixin("a[i]" ~ op ~ "b[i]");
140 	return result;
141 }
142 
143 T[] vectorAssign(string op, T)(T[] a, T[] b)
144 {
145 	assert(a.length == b.length);
146 	foreach (i, ref r; a)
147 		mixin("r " ~ op ~ "= b[i];");
148 	return a;
149 }
150 
151 T[] padRight(T)(T[] s, size_t l, T c)
152 {
153 	auto ol = s.length;
154 	if (ol < l)
155 	{
156 		s.length = l;
157 		s[ol..$] = c;
158 	}
159 	return s;
160 }
161 
162 T[] repeatOne(T)(T c, size_t l)
163 {
164 	T[] result = new T[l];
165 	result[] = c;
166 	return result;
167 }
168 
169 /// Complement to std.string.indexOf which works with arrays
170 /// of non-character types.
171 /// Unlike std.algorithm.countUntil, it does not auto-decode,
172 /// and returns an index usable for array indexing/slicing.
173 sizediff_t indexOf(T, D)(in T[] arr, in D val)
174 //	if (!isSomeChar!T)
175 	if (!isSomeChar!T && is(typeof(arr.countUntil(val))) && is(typeof(arr[0]==val)))
176 {
177 	//assert(arr[0]==val);
178 	return arr.countUntil(val);
179 }
180 
181 sizediff_t indexOf(T)(in T[] arr, in T[] val) /// ditto
182 	if (!isSomeChar!T && is(typeof(arr.countUntil(val))))
183 {
184 	return arr.countUntil(val);
185 }
186 
187 /// Index of element, no BS.
188 sizediff_t indexOfElement(T, D)(in T[] arr, auto ref in D val)
189 	if (is(typeof(arr[0]==val)))
190 {
191 	foreach (i, ref v; arr)
192 		if (v == val)
193 			return i;
194 	return -1;
195 }
196 
197 /// Whether array contains value, no BS.
198 bool contains(T, V)(in T[] arr, auto ref in V val)
199 	if (is(typeof(arr[0]==val)))
200 {
201 	return arr.indexOfElement(val) >= 0;
202 }
203 
204 /// Ditto, for substrings
205 bool contains(T, U)(T[] str, U[] what)
206 if (is(Unqual!T == Unqual!U))
207 {
208 	return str._indexOf(what) >= 0;
209 }
210 
211 unittest
212 {
213 	assert( "abc".contains('b'));
214 	assert(!"abc".contains('x'));
215 	assert( "abc".contains("b"));
216 	assert(!"abc".contains("x"));
217 }
218 
219 /// Like startsWith, but with an offset.
220 bool containsAt(T)(in T[] haystack, in T[] needle, size_t offset)
221 {
222 	return haystack.length >= offset + needle.length
223 		&& haystack[offset..offset+needle.length] == needle;
224 }
225 
226 unittest
227 {
228 	assert( "abracadabra".containsAt("ada", 5));
229 	assert(!"abracadabra".containsAt("ada", 6));
230 	assert(!"abracadabra".containsAt("ada", 99));
231 }
232 
233 bool isIn(T)(T val, in T[] arr)
234 {
235 	return arr.contains(val);
236 }
237 
238 bool isOneOf(T)(T val, T[] arr...)
239 {
240 	return arr.contains(val);
241 }
242 
243 /// Like AA.get - soft indexing, throws an
244 /// Exception (not an Error) on out-of-bounds,
245 /// even in release builds.
246 ref T get(T)(T[] arr, size_t index)
247 {
248 	enforce(index < arr.length, "Out-of-bounds array access");
249 	return arr[index];
250 }
251 
252 /// Like AA.get - soft indexing, returns
253 /// default value on out-of-bounds.
254 auto get(T)(T[] arr, size_t index, auto ref T defaultValue)
255 {
256 	if (index >= arr.length)
257 		return defaultValue;
258 	return arr[index];
259 }
260 
261 /// Expand the array if index is out-of-bounds.
262 ref T getExpand(T)(ref T[] arr, size_t index)
263 {
264 	if (index >= arr.length)
265 		arr.length = index + 1;
266 	return arr[index];
267 }
268 
269 /// ditto
270 ref T putExpand(T)(ref T[] arr, size_t index, auto ref T value)
271 {
272 	if (index >= arr.length)
273 		arr.length = index + 1;
274 	return arr[index] = value;
275 }
276 
277 /// Slices an array. Throws an Exception (not an Error)
278 /// on out-of-bounds, even in release builds.
279 T[] slice(T)(T[] arr, size_t p0, size_t p1)
280 {
281 	enforce(p0 < p1 && p1 < arr.length, "Out-of-bounds array slice");
282 	return arr[p0..p1];
283 }
284 
285 /// Given an array and its slice, returns the
286 /// start index of the slice inside the array.
287 size_t sliceIndex(T)(in T[] arr, in T[] slice)
288 {
289 	auto a = arr.ptr;
290 	auto b = a + arr.length;
291 	auto p = slice.ptr;
292 	assert(a <= p && p <= b, "Out-of-bounds array slice");
293 	return p - a;
294 }
295 
296 /// Like std.array.split, but returns null if val was empty.
297 auto splitEmpty(T, S)(T value, S separator)
298 {
299 	return value.length ? split(value, separator) : null;
300 }
301 
302 /// Include delimiter in result chunks as suffix
303 H[] splitWithSuffix(H, S)(H haystack, S separator)
304 {
305 	H[] result;
306 	while (haystack.length)
307 	{
308 		auto pos = haystack._indexOf(separator);
309 		if (pos < 0)
310 			pos = haystack.length;
311 		else
312 		{
313 			static if (is(typeof(haystack[0] == separator)))
314 				pos += 1;
315 			else
316 			static if (is(typeof(haystack[0..1] == separator)))
317 				pos += separator.length;
318 			else
319 				static assert(false, "Don't know how to split " ~ H.stringof ~ " by " ~ S.stringof);
320 		}
321 		result ~= haystack[0..pos];
322 		haystack = haystack[pos..$];
323 	}
324 	return result;
325 }
326 
327 unittest
328 {
329 	assert("a\nb".splitWithSuffix('\n') == ["a\n", "b"]);
330 	assert([1, 0, 2].splitWithSuffix(0) == [[1, 0], [2]]);
331 
332 	assert("a\r\nb".splitWithSuffix("\r\n") == ["a\r\n", "b"]);
333 	assert([1, 0, 0, 2].splitWithSuffix([0, 0]) == [[1, 0, 0], [2]]);
334 }
335 
336 /// Include delimiter in result chunks as prefix
337 H[] splitWithPrefix(H, S)(H haystack, S separator)
338 {
339 	H[] result;
340 	while (haystack.length)
341 	{
342 		auto pos = haystack[1..$]._indexOf(separator);
343 		if (pos < 0)
344 			pos = haystack.length;
345 		else
346 			pos++;
347 		result ~= haystack[0..pos];
348 		haystack = haystack[pos..$];
349 	}
350 	return result;
351 }
352 
353 unittest
354 {
355 	assert("a\nb".splitWithPrefix('\n') == ["a", "\nb"]);
356 	assert([1, 0, 2].splitWithPrefix(0) == [[1], [0, 2]]);
357 
358 	assert("a\r\nb".splitWithPrefix("\r\n") == ["a", "\r\nb"]);
359 	assert([1, 0, 0, 2].splitWithPrefix([0, 0]) == [[1], [0, 0, 2]]);
360 }
361 
362 /// Include delimiters in result chunks as prefix/suffix
363 S[] splitWithPrefixAndSuffix(S)(S haystack, S prefix, S suffix)
364 {
365 	S[] result;
366 	auto separator = suffix ~ prefix;
367 	while (haystack.length)
368 	{
369 		auto pos = haystack._indexOf(separator);
370 		if (pos < 0)
371 			pos = haystack.length;
372 		else
373 			pos += suffix.length;
374 		result ~= haystack[0..pos];
375 		haystack = haystack[pos..$];
376 	}
377 	return result;
378 }
379 
380 ///
381 unittest
382 {
383 	auto s = q"EOF
384 Section 1:
385 10
386 11
387 12
388 Section 2:
389 21
390 22
391 23
392 Section 3:
393 31
394 32
395 33
396 EOF";
397 	auto parts = s.splitWithPrefixAndSuffix("Section ", "\n");
398 	assert(parts.length == 3 && parts.join == s);
399 	foreach (part; parts)
400 		assert(part.startsWith("Section ") && part.endsWith("\n"));
401 }
402 
403 /// Ensure that arr is non-null if empty.
404 T nonNull(T)(T arr)
405 {
406 	if (arr !is null)
407 		return arr;
408 	return emptySlice!(typeof(arr[0]));
409 }
410 
411 /// If arr is null, return null. Otherwise, return a non-null
412 /// transformation dg over arr.
413 template mapNull(alias dg)
414 {
415 	auto mapNull(T)(T arr)
416 	{
417 		if (arr is null)
418 			return null;
419 		return dg(arr).nonNull;
420 	}
421 }
422 
423 unittest
424 {
425 	assert(string.init.mapNull!(s => s          )  is null);
426 	assert(string.init.mapNull!(s => ""         )  is null);
427 	assert(""         .mapNull!(s => s          ) !is null);
428 	assert(""         .mapNull!(s => string.init) !is null);
429 }
430 
431 /// Select and return a random element from the array.
432 auto ref sample(T)(T[] arr)
433 {
434 	import std.random;
435 	return arr[uniform(0, $)];
436 }
437 
438 unittest
439 {
440 	assert([7, 7, 7].sample == 7);
441 	auto s = ["foo", "bar"].sample(); // Issue 13807
442 	const(int)[] a2 = [5]; sample(a2);
443 }
444 
445 /// Select and return a random element from the array,
446 /// and remove it from the array.
447 T pluck(T)(ref T[] arr)
448 {
449 	import std.random;
450 	auto pos = uniform(0, arr.length);
451 	auto result = arr[pos];
452 	arr = arr.remove(pos);
453 	return result;
454 }
455 
456 unittest
457 {
458 	auto arr = [1, 2, 3];
459 	auto res = [arr.pluck, arr.pluck, arr.pluck];
460 	res.sort();
461 	assert(res == [1, 2, 3]);
462 }
463 
464 import std.functional;
465 
466 T[] countSort(alias value = "a", T)(T[] arr)
467 {
468 	alias unaryFun!value getValue;
469 	alias typeof(getValue(arr[0])) V;
470 	if (arr.length == 0) return arr;
471 	V min = getValue(arr[0]), max = getValue(arr[0]);
472 	foreach (el; arr[1..$])
473 	{
474 		auto v = getValue(el);
475 		if (min > v)
476 			min = v;
477 		if (max < v)
478 			max = v;
479 	}
480 	auto n = max-min+1;
481 	auto counts = new size_t[n];
482 	foreach (el; arr)
483 		counts[getValue(el)-min]++;
484 	auto indices = new size_t[n];
485 	foreach (i; 1..n)
486 		indices[i] = indices[i-1] + counts[i-1];
487 	T[] result = new T[arr.length];
488 	foreach (el; arr)
489 		result[indices[getValue(el)-min]++] = el;
490 	return result;
491 }
492 
493 // ***************************************************************************
494 
495 void stackPush(T)(ref T[] arr, auto ref T val)
496 {
497 	arr ~= val;
498 }
499 alias stackPush queuePush;
500 
501 ref T stackPeek(T)(T[] arr) { return arr[$-1]; }
502 
503 ref T stackPop(T)(ref T[] arr)
504 {
505 	auto ret = &arr[$-1];
506 	arr = arr[0..$-1];
507 	return *ret;
508 }
509 
510 ref T queuePeek(T)(T[] arr) { return arr[0]; }
511 
512 ref T queuePeekLast(T)(T[] arr) { return arr[$-1]; }
513 
514 ref T queuePop(T)(ref T[] arr)
515 {
516 	auto ret = &arr[0];
517 	arr = arr[1..$];
518 	if (!arr.length) arr = null;
519 	return *ret;
520 }
521 
522 ref T shift(T)(ref T[] arr) { auto oldArr = arr; arr = arr[1..$]; return oldArr[0]; }
523 T[] shift(T)(ref T[] arr, size_t n) { T[] result = arr[0..n]; arr = arr[n..$]; return result; }
524 T[N] shift(size_t N, T)(ref T[] arr) { T[N] result = cast(T[N])(arr[0..N]); arr = arr[N..$]; return result; }
525 void unshift(T)(ref T[] arr, T value) { arr.insertInPlace(0, value); }
526 void unshift(T)(ref T[] arr, T[] value) { arr.insertInPlace(0, value); }
527 
528 unittest
529 {
530 	int[] arr = [1, 2, 3];
531 	assert(arr.shift == 1);
532 	assert(arr == [2, 3]);
533 	assert(arr.shift(2) == [2, 3]);
534 	assert(arr == []);
535 
536 	arr = [3];
537 	arr.unshift([1, 2]);
538 	assert(arr == [1, 2, 3]);
539 	arr.unshift(0);
540 	assert(arr == [0, 1, 2, 3]);
541 
542 	assert(arr.shift!2 == [0, 1]);
543 	assert(arr == [2, 3]);
544 }
545 
546 /// If arr starts with prefix, slice it off and return true.
547 /// Otherwise leave arr unchaned and return false.
548 deprecated("Use std.algorithm.skipOver instead")
549 bool eat(T)(ref T[] arr, T[] prefix)
550 {
551 	if (arr.startsWith(prefix))
552 	{
553 		arr = arr[prefix.length..$];
554 		return true;
555 	}
556 	return false;
557 }
558 
559 // Overload disambiguator
560 private sizediff_t _indexOf(H, N)(H haystack, N needle)
561 {
562 	static import std.string;
563 
564 	static if (is(typeof(ae.utils.array.indexOf(haystack, needle))))
565 		alias indexOf = ae.utils.array.indexOf;
566 	else
567 	static if (is(typeof(std..string.indexOf(haystack, needle))))
568 		alias indexOf = std..string.indexOf;
569 	else
570 		static assert(false, "No suitable indexOf overload found");
571 	return indexOf(haystack, needle);
572 }
573 
574 /// Returns the slice of source up to the first occurrence of delim,
575 /// and fast-forwards source to the point after delim.
576 /// If delim is not found, the behavior depends on orUntilEnd:
577 /// - If orUntilEnd is false (default), it returns null
578 ///   and leaves source unchanged.
579 /// - If orUntilEnd is true, it returns source,
580 ///   and then sets source to null.
581 T[] skipUntil(T, D)(ref T[] source, D delim, bool orUntilEnd = false)
582 {
583 	enum bool isSlice = is(typeof(source[0..1]==delim));
584 	enum bool isElem  = is(typeof(source[0]   ==delim));
585 	static assert(isSlice || isElem, "Can't skip " ~ T.stringof ~ " until " ~ D.stringof);
586 	static assert(isSlice != isElem, "Ambiguous types for skipUntil: " ~ T.stringof ~ " and " ~ D.stringof);
587 	static if (isSlice)
588 		auto delimLength = delim.length;
589 	else
590 		enum delimLength = 1;
591 
592 	static import std.string;
593 
594 	auto i = _indexOf(source, delim);
595 	if (i < 0)
596 	{
597 		if (orUntilEnd)
598 		{
599 			auto result = source;
600 			source = null;
601 			return result;
602 		}
603 		else
604 			return null;
605 	}
606 	auto result = source[0..i];
607 	source = source[i+delimLength..$];
608 	return result;
609 }
610 
611 deprecated("Use skipUntil instead")
612 enum OnEof { returnNull, returnRemainder, throwException }
613 
614 deprecated("Use skipUntil instead")
615 template eatUntil(OnEof onEof = OnEof.throwException)
616 {
617 	T[] eatUntil(T, D)(ref T[] source, D delim)
618 	{
619 		static if (onEof == OnEof.returnNull)
620 			return skipUntil(source, delim, false);
621 		else
622 		static if (onEof == OnEof.returnRemainder)
623 			return skipUntil(source, delim, true);
624 		else
625 			return skipUntil(source, delim, false).enforce("Delimiter not found in source");
626 	}
627 }
628 
629 deprecated unittest
630 {
631 	string s;
632 
633 	s = "Mary had a little lamb";
634 	assert(s.eatUntil(" ") == "Mary");
635 	assert(s.eatUntil(" ") == "had");
636 	assert(s.eatUntil(' ') == "a");
637 
638 	assertThrown!Exception(s.eatUntil("#"));
639 	assert(s.eatUntil!(OnEof.returnNull)("#") is null);
640 	assert(s.eatUntil!(OnEof.returnRemainder)("#") == "little lamb");
641 
642 	ubyte[] bytes = [1, 2, 0, 3, 4, 0, 0];
643 	assert(bytes.eatUntil(0) == [1, 2]);
644 	assert(bytes.eatUntil([ubyte(0), ubyte(0)]) == [3, 4]);
645 }
646 
647 // ***************************************************************************
648 
649 // Equivalents of array(xxx(...)), but less parens and UFCS-able.
650 auto amap(alias pred, T)(T[] arr) { return array(map!pred(arr)); }
651 auto afilter(alias pred, T)(T[] arr) { return array(filter!pred(arr)); }
652 auto auniq(T)(T[] arr) { return array(uniq(arr)); }
653 auto asort(alias pred, T)(T[] arr) { sort!pred(arr); return arr; }
654 
655 unittest
656 {
657 	assert([1, 2, 3].amap!`a*2`() == [2, 4, 6]);
658 	assert([1, 2, 3].amap!(n => n*n)() == [1, 4, 9]);
659 }
660 
661 // ***************************************************************************
662 
663 /// Array with normalized comparison and hashing.
664 /// Params:
665 ///   T = array element type to wrap.
666 ///   normalize = function which should return a range of normalized elements.
667 struct NormalizedArray(T, alias normalize)
668 {
669 	T[] arr;
670 
671 	this(T[] arr) { this.arr = arr; }
672 
673 	int opCmp    (in T[]                 other) const { return std.algorithm.cmp(normalize(arr), normalize(other    ))   ; }
674 	int opCmp    (    const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))   ; }
675 	int opCmp    (ref const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))   ; }
676 	bool opEquals(in T[]                 other) const { return std.algorithm.cmp(normalize(arr), normalize(other    ))==0; }
677 	bool opEquals(    const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))==0; }
678 	bool opEquals(ref const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))==0; }
679 
680 	hash_t toHashReal() const
681 	{
682 		import std.digest.crc;
683 		CRC32 crc;
684 		foreach (c; normalize(arr))
685 			crc.put(cast(ubyte[])((&c)[0..1]));
686 		static union Result { ubyte[4] crcResult; hash_t hash; }
687 		return Result(crc.finish()).hash;
688 	}
689 
690 	hash_t toHash() const nothrow @trusted
691 	{
692 		return (cast(hash_t delegate() nothrow @safe)&toHashReal)();
693 	}
694 }
695 
696 // ***************************************************************************
697 
698 /// Equivalent of PHP's `list` language construct:
699 /// http://php.net/manual/en/function.list.php
700 /// Works with arrays and tuples.
701 /// Specify `null` as an argument to ignore that index
702 /// (equivalent of `list(x, , y)` in PHP).
703 auto list(Args...)(auto ref Args args)
704 {
705 	struct List
706 	{
707 		auto dummy() { return args[0]; } // https://issues.dlang.org/show_bug.cgi?id=11886
708 		void opAssign(T)(auto ref T t)
709 		{
710 			assert(t.length == args.length,
711 				"Assigning %d elements to list with %d elements"
712 				.format(t.length, args.length));
713 			foreach (i; RangeTuple!(Args.length))
714 				static if (!is(Args[i] == typeof(null)))
715 					args[i] = t[i];
716 		}
717 	}
718 	return List();
719 }
720 
721 ///
722 unittest
723 {
724 	string name, value;
725 	list(name, null, value) = "NAME=VALUE".findSplit("=");
726 	assert(name == "NAME" && value == "VALUE");
727 }
728 
729 version(LittleEndian)
730 unittest
731 {
732 	uint onlyValue;
733 	ubyte[] data = [ubyte(42), 0, 0, 0];
734 	list(onlyValue) = cast(uint[])data;
735 	assert(onlyValue == 42);
736 }