1 /**
2  * Array utility functions
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.utils.array;
15 
16 import std.algorithm.iteration;
17 import std.algorithm.mutation;
18 import std.algorithm.searching;
19 import std.algorithm.sorting;
20 import std.array;
21 import std.exception;
22 import std.format;
23 import std.traits;
24 
25 import ae.utils.meta;
26 
27 public import ae.utils.aa;
28 public import ae.utils.appender;
29 
30 /// Slice a variable.
31 T[] toArray(T)(ref T v)
32 {
33 	return (&v)[0..1];
34 }
35 
36 /// Return the value represented as an array of bytes.
37 @property inout(ubyte)[] bytes(T)(ref inout(T) value)
38 	if (!hasIndirections!T)
39 {
40 	return value.toArray().bytes;
41 }
42 
43 /// ditto
44 @property inout(ubyte)[] bytes(T)(inout(T) value)
45 	if (is(T U : U[]) && !hasIndirections!U)
46 {
47 	return cast(inout(ubyte)[])value;
48 }
49 
50 unittest
51 {
52 	ubyte b = 5;
53 	assert(b.bytes == [5]);
54 
55 	struct S { ubyte b = 5; }
56 	S s;
57 	assert(s.bytes == [5]);
58 
59 	ubyte[1] sa = [5];
60 	assert(sa.bytes == [5]);
61 
62 	void[] va = sa[];
63 	assert(va.bytes == [5]);
64 }
65 
66 /// Reverse of bytes()
67 ref inout(T) fromBytes(T)(inout(ubyte)[] bytes)
68 	if (!hasIndirections!T)
69 {
70 	assert(bytes.length == T.sizeof, "Data length mismatch for %s".format(T.stringof));
71 	return *cast(inout(T)*)bytes.ptr;
72 }
73 
74 /// ditto
75 inout(T) fromBytes(T)(inout(ubyte)[] bytes)
76 	if (is(T U : U[]) && !hasIndirections!U)
77 {
78 	return cast(inout(T))bytes;
79 }
80 
81 unittest
82 {
83 	{       ubyte b = 5; assert(b.bytes.fromBytes!ubyte == 5); }
84 	{ const ubyte b = 5; assert(b.bytes.fromBytes!ubyte == 5); }
85 	struct S { ubyte b; }
86 	{       ubyte b = 5; assert(b.bytes.fromBytes!S == S(5)); }
87 }
88 
89 unittest
90 {
91 	struct S { ubyte a, b; }
92 	ubyte[] arr = [1, 2];
93 	assert(arr.fromBytes!S == S(1, 2));
94 	assert(arr.fromBytes!(S[]) == [S(1, 2)]);
95 }
96 
97 /// Returns an empty, but non-null slice of T.
98 auto emptySlice(T)() pure
99 {
100 	T[0] arr;
101 	auto p = arr.ptr;
102 	return p[0..0];
103 }
104 
105 unittest
106 {
107 	int[] arr = emptySlice!int;
108 	assert(arr.ptr);
109 	immutable int[] iarr = emptySlice!int;
110 	assert(iarr.ptr);
111 }
112 
113 int memcmp(in ubyte[] a, in ubyte[] b)
114 {
115 	assert(a.length == b.length);
116 	import core.stdc.string : memcmp;
117 	return memcmp(a.ptr, b.ptr, a.length);
118 }
119 
120 /// Like std.algorithm.copy, but without the auto-decode bullshit.
121 /// https://issues.dlang.org/show_bug.cgi?id=13650
122 void memmove(T)(T[] dst, in T[] src)
123 {
124 	assert(src.length == dst.length);
125 	import core.stdc.string : memmove;
126 	memmove(dst.ptr, src.ptr, dst.length * T.sizeof);
127 }
128 
129 T[] vector(string op, T)(T[] a, T[] b)
130 {
131 	assert(a.length == b.length);
132 	T[] result = new T[a.length];
133 	foreach (i, ref r; result)
134 		r = mixin("a[i]" ~ op ~ "b[i]");
135 	return result;
136 }
137 
138 T[] vectorAssign(string op, T)(T[] a, T[] b)
139 {
140 	assert(a.length == b.length);
141 	foreach (i, ref r; a)
142 		mixin("r " ~ op ~ "= b[i];");
143 	return a;
144 }
145 
146 T[] padRight(T)(T[] s, size_t l, T c)
147 {
148 	auto ol = s.length;
149 	if (ol < l)
150 	{
151 		s.length = l;
152 		s[ol..$] = c;
153 	}
154 	return s;
155 }
156 
157 T[] repeatOne(T)(T c, size_t l)
158 {
159 	T[] result = new T[l];
160 	result[] = c;
161 	return result;
162 }
163 
164 /// Complement to std.string.indexOf which works with arrays
165 /// of non-character types.
166 /// Unlike std.algorithm.countUntil, it does not auto-decode,
167 /// and returns an index usable for array indexing/slicing.
168 sizediff_t indexOf(T, D)(in T[] arr, in D val)
169 //	if (!isSomeChar!T)
170 	if (!isSomeChar!T && is(typeof(arr.countUntil(val))) && is(typeof(arr[0]==val)))
171 {
172 	//assert(arr[0]==val);
173 	return arr.countUntil(val);
174 }
175 
176 sizediff_t indexOf(T)(in T[] arr, in T[] val) /// ditto
177 	if (!isSomeChar!T && is(typeof(arr.countUntil(val))))
178 {
179 	return arr.countUntil(val);
180 }
181 
182 /// Index of element, no BS.
183 sizediff_t indexOfElement(T, D)(in T[] arr, auto ref in D val)
184 	if (is(typeof(arr[0]==val)))
185 {
186 	foreach (i, ref v; arr)
187 		if (v == val)
188 			return i;
189 	return -1;
190 }
191 
192 /// Whether array contains value, no BS.
193 bool contains(T, V)(in T[] arr, auto ref in V val)
194 	if (is(typeof(arr[0]==val)))
195 {
196 	return arr.indexOfElement(val) >= 0;
197 }
198 
199 /// Ditto, for substrings
200 bool contains(T, U)(T[] str, U[] what)
201 if (is(Unqual!T == Unqual!U))
202 {
203 	return str._indexOf(what) >= 0;
204 }
205 
206 unittest
207 {
208 	assert( "abc".contains('b'));
209 	assert(!"abc".contains('x'));
210 	assert( "abc".contains("b"));
211 	assert(!"abc".contains("x"));
212 }
213 
214 /// Like startsWith, but with an offset.
215 bool containsAt(T)(in T[] haystack, in T[] needle, size_t offset)
216 {
217 	return haystack.length >= offset + needle.length
218 		&& haystack[offset..offset+needle.length] == needle;
219 }
220 
221 unittest
222 {
223 	assert( "abracadabra".containsAt("ada", 5));
224 	assert(!"abracadabra".containsAt("ada", 6));
225 	assert(!"abracadabra".containsAt("ada", 99));
226 }
227 
228 bool isIn(T)(T val, in T[] arr)
229 {
230 	return arr.contains(val);
231 }
232 
233 bool isOneOf(T)(T val, T[] arr...)
234 {
235 	return arr.contains(val);
236 }
237 
238 /// Like AA.get - soft indexing, throws an
239 /// Exception (not an Error) on out-of-bounds,
240 /// even in release builds.
241 ref T get(T)(T[] arr, size_t index)
242 {
243 	enforce(index < arr.length, "Out-of-bounds array access");
244 	return arr[index];
245 }
246 
247 /// Like AA.get - soft indexing, returns
248 /// default value on out-of-bounds.
249 auto get(T)(T[] arr, size_t index, auto ref T defaultValue)
250 {
251 	if (index >= arr.length)
252 		return defaultValue;
253 	return arr[index];
254 }
255 
256 /// Expand the array if index is out-of-bounds.
257 ref T getExpand(T)(ref T[] arr, size_t index)
258 {
259 	if (index >= arr.length)
260 		arr.length = index + 1;
261 	return arr[index];
262 }
263 
264 /// ditto
265 ref T putExpand(T)(ref T[] arr, size_t index, auto ref T value)
266 {
267 	if (index >= arr.length)
268 		arr.length = index + 1;
269 	return arr[index] = value;
270 }
271 
272 /// Slices an array. Throws an Exception (not an Error)
273 /// on out-of-bounds, even in release builds.
274 T[] slice(T)(T[] arr, size_t p0, size_t p1)
275 {
276 	enforce(p0 < p1 && p1 < arr.length, "Out-of-bounds array slice");
277 	return arr[p0..p1];
278 }
279 
280 /// Given an array and its slice, returns the
281 /// start index of the slice inside the array.
282 size_t sliceIndex(T)(in T[] arr, in T[] slice)
283 {
284 	auto a = arr.ptr;
285 	auto b = a + arr.length;
286 	auto p = slice.ptr;
287 	assert(a <= p && p <= b, "Out-of-bounds array slice");
288 	return p - a;
289 }
290 
291 /// Like std.array.split, but returns null if val was empty.
292 auto splitEmpty(T, S)(T value, S separator)
293 {
294 	return value.length ? split(value, separator) : null;
295 }
296 
297 /// Include delimiter in result chunks as suffix
298 H[] splitWithSuffix(H, S)(H haystack, S separator)
299 {
300 	H[] result;
301 	while (haystack.length)
302 	{
303 		auto pos = haystack._indexOf(separator);
304 		if (pos < 0)
305 			pos = haystack.length;
306 		else
307 		{
308 			static if (is(typeof(haystack[0] == separator)))
309 				pos += 1;
310 			else
311 			static if (is(typeof(haystack[0..1] == separator)))
312 				pos += separator.length;
313 			else
314 				static assert(false, "Don't know how to split " ~ H.stringof ~ " by " ~ S.stringof);
315 		}
316 		result ~= haystack[0..pos];
317 		haystack = haystack[pos..$];
318 	}
319 	return result;
320 }
321 
322 unittest
323 {
324 	assert("a\nb".splitWithSuffix('\n') == ["a\n", "b"]);
325 	assert([1, 0, 2].splitWithSuffix(0) == [[1, 0], [2]]);
326 
327 	assert("a\r\nb".splitWithSuffix("\r\n") == ["a\r\n", "b"]);
328 	assert([1, 0, 0, 2].splitWithSuffix([0, 0]) == [[1, 0, 0], [2]]);
329 }
330 
331 /// Include delimiter in result chunks as prefix
332 H[] splitWithPrefix(H, S)(H haystack, S separator)
333 {
334 	H[] result;
335 	while (haystack.length)
336 	{
337 		auto pos = haystack[1..$]._indexOf(separator);
338 		if (pos < 0)
339 			pos = haystack.length;
340 		else
341 			pos++;
342 		result ~= haystack[0..pos];
343 		haystack = haystack[pos..$];
344 	}
345 	return result;
346 }
347 
348 unittest
349 {
350 	assert("a\nb".splitWithPrefix('\n') == ["a", "\nb"]);
351 	assert([1, 0, 2].splitWithPrefix(0) == [[1], [0, 2]]);
352 
353 	assert("a\r\nb".splitWithPrefix("\r\n") == ["a", "\r\nb"]);
354 	assert([1, 0, 0, 2].splitWithPrefix([0, 0]) == [[1], [0, 0, 2]]);
355 }
356 
357 /// Ensure that arr is non-null if empty.
358 T nonNull(T)(T arr)
359 {
360 	if (arr !is null)
361 		return arr;
362 	typeof(arr[0])[0] v;
363 	auto p = v.ptr;
364 	return p[0..0];
365 }
366 
367 /// If arr is null, return null. Otherwise, return a non-null
368 /// transformation dg over arr.
369 template mapNull(alias dg)
370 {
371 	auto mapNull(T)(T arr)
372 	{
373 		if (arr is null)
374 			return null;
375 		return dg(arr).nonNull;
376 	}
377 }
378 
379 unittest
380 {
381 	assert(string.init.mapNull!(s => s          )  is null);
382 	assert(string.init.mapNull!(s => ""         )  is null);
383 	assert(""         .mapNull!(s => s          ) !is null);
384 	assert(""         .mapNull!(s => string.init) !is null);
385 }
386 
387 /// Select and return a random element from the array.
388 auto ref sample(T)(T[] arr)
389 {
390 	import std.random;
391 	return arr[uniform(0, $)];
392 }
393 
394 unittest
395 {
396 	assert([7, 7, 7].sample == 7);
397 	auto s = ["foo", "bar"].sample(); // Issue 13807
398 	const(int)[] a2 = [5]; sample(a2);
399 }
400 
401 /// Select and return a random element from the array,
402 /// and remove it from the array.
403 T pluck(T)(ref T[] arr)
404 {
405 	import std.random;
406 	auto pos = uniform(0, arr.length);
407 	auto result = arr[pos];
408 	arr = arr.remove(pos);
409 	return result;
410 }
411 
412 unittest
413 {
414 	auto arr = [1, 2, 3];
415 	auto res = [arr.pluck, arr.pluck, arr.pluck];
416 	res.sort();
417 	assert(res == [1, 2, 3]);
418 }
419 
420 import std.functional;
421 
422 T[] countSort(alias value = "a", T)(T[] arr)
423 {
424 	alias unaryFun!value getValue;
425 	alias typeof(getValue(arr[0])) V;
426 	if (arr.length == 0) return arr;
427 	V min = getValue(arr[0]), max = getValue(arr[0]);
428 	foreach (el; arr[1..$])
429 	{
430 		auto v = getValue(el);
431 		if (min > v)
432 			min = v;
433 		if (max < v)
434 			max = v;
435 	}
436 	auto n = max-min+1;
437 	auto counts = new size_t[n];
438 	foreach (el; arr)
439 		counts[getValue(el)-min]++;
440 	auto indices = new size_t[n];
441 	foreach (i; 1..n)
442 		indices[i] = indices[i-1] + counts[i-1];
443 	T[] result = new T[arr.length];
444 	foreach (el; arr)
445 		result[indices[getValue(el)-min]++] = el;
446 	return result;
447 }
448 
449 // ***************************************************************************
450 
451 void stackPush(T)(ref T[] arr, T val)
452 {
453 	arr ~= val;
454 }
455 alias stackPush queuePush;
456 
457 T stackPeek(T)(T[] arr) { return arr[$-1]; }
458 
459 T stackPop(T)(ref T[] arr)
460 {
461 	auto ret = arr[$-1];
462 	arr = arr[0..$-1];
463 	return ret;
464 }
465 
466 T queuePeek(T)(T[] arr) { return arr[0]; }
467 
468 T queuePeekLast(T)(T[] arr) { return arr[$-1]; }
469 
470 T queuePop(T)(ref T[] arr)
471 {
472 	auto ret = arr[0];
473 	arr = arr[1..$];
474 	if (!arr.length) arr = null;
475 	return ret;
476 }
477 
478 T shift(T)(ref T[] arr) { T result = arr[0]; arr = arr[1..$]; return result; }
479 T[] shift(T)(ref T[] arr, size_t n) { T[] result = arr[0..n]; arr = arr[n..$]; return result; }
480 T[N] shift(size_t N, T)(ref T[] arr) { T[N] result = cast(T[N])(arr[0..N]); arr = arr[N..$]; return result; }
481 void unshift(T)(ref T[] arr, T value) { arr.insertInPlace(0, value); }
482 void unshift(T)(ref T[] arr, T[] value) { arr.insertInPlace(0, value); }
483 
484 unittest
485 {
486 	int[] arr = [1, 2, 3];
487 	assert(arr.shift == 1);
488 	assert(arr == [2, 3]);
489 	assert(arr.shift(2) == [2, 3]);
490 	assert(arr == []);
491 
492 	arr = [3];
493 	arr.unshift([1, 2]);
494 	assert(arr == [1, 2, 3]);
495 	arr.unshift(0);
496 	assert(arr == [0, 1, 2, 3]);
497 
498 	assert(arr.shift!2 == [0, 1]);
499 	assert(arr == [2, 3]);
500 }
501 
502 /// If arr starts with prefix, slice it off and return true.
503 /// Otherwise leave arr unchaned and return false.
504 deprecated("Use std.algorithm.skipOver instead")
505 bool eat(T)(ref T[] arr, T[] prefix)
506 {
507 	if (arr.startsWith(prefix))
508 	{
509 		arr = arr[prefix.length..$];
510 		return true;
511 	}
512 	return false;
513 }
514 
515 // Overload disambiguator
516 private sizediff_t _indexOf(H, N)(H haystack, N needle)
517 {
518 	static import std.string;
519 
520 	static if (is(typeof(ae.utils.array.indexOf(haystack, needle))))
521 		alias indexOf = ae.utils.array.indexOf;
522 	else
523 	static if (is(typeof(std..string.indexOf(haystack, needle))))
524 		alias indexOf = std..string.indexOf;
525 	else
526 		static assert(false, "No suitable indexOf overload found");
527 	return indexOf(haystack, needle);
528 }
529 
530 /// Returns the slice of source up to the first occurrence of delim,
531 /// and fast-forwards source to the point after delim.
532 /// If delim is not found, the behavior depends on orUntilEnd:
533 /// - If orUntilEnd is false (default), it returns null
534 ///   and leaves source unchanged.
535 /// - If orUntilEnd is true, it returns source,
536 ///   and then sets source to null.
537 T[] skipUntil(T, D)(ref T[] source, D delim, bool orUntilEnd = false)
538 {
539 	enum bool isSlice = is(typeof(source[0..1]==delim));
540 	enum bool isElem  = is(typeof(source[0]   ==delim));
541 	static assert(isSlice || isElem, "Can't skip " ~ T.stringof ~ " until " ~ D.stringof);
542 	static assert(isSlice != isElem, "Ambiguous types for skipUntil: " ~ T.stringof ~ " and " ~ D.stringof);
543 	static if (isSlice)
544 		auto delimLength = delim.length;
545 	else
546 		enum delimLength = 1;
547 
548 	static import std.string;
549 
550 	auto i = _indexOf(source, delim);
551 	if (i < 0)
552 	{
553 		if (orUntilEnd)
554 		{
555 			auto result = source;
556 			source = null;
557 			return result;
558 		}
559 		else
560 			return null;
561 	}
562 	auto result = source[0..i];
563 	source = source[i+delimLength..$];
564 	return result;
565 }
566 
567 deprecated("Use skipUntil instead")
568 enum OnEof { returnNull, returnRemainder, throwException }
569 
570 deprecated("Use skipUntil instead")
571 template eatUntil(OnEof onEof = OnEof.throwException)
572 {
573 	T[] eatUntil(T, D)(ref T[] source, D delim)
574 	{
575 		static if (onEof == OnEof.returnNull)
576 			return skipUntil(source, delim, false);
577 		else
578 		static if (onEof == OnEof.returnRemainder)
579 			return skipUntil(source, delim, true);
580 		else
581 			return skipUntil(source, delim, false).enforce("Delimiter not found in source");
582 	}
583 }
584 
585 deprecated unittest
586 {
587 	string s;
588 
589 	s = "Mary had a little lamb";
590 	assert(s.eatUntil(" ") == "Mary");
591 	assert(s.eatUntil(" ") == "had");
592 	assert(s.eatUntil(' ') == "a");
593 
594 	assertThrown!Exception(s.eatUntil("#"));
595 	assert(s.eatUntil!(OnEof.returnNull)("#") is null);
596 	assert(s.eatUntil!(OnEof.returnRemainder)("#") == "little lamb");
597 
598 	ubyte[] bytes = [1, 2, 0, 3, 4, 0, 0];
599 	assert(bytes.eatUntil(0) == [1, 2]);
600 	assert(bytes.eatUntil([ubyte(0), ubyte(0)]) == [3, 4]);
601 }
602 
603 // ***************************************************************************
604 
605 // Equivalents of array(xxx(...)), but less parens and UFCS-able.
606 auto amap(alias pred, T)(T[] arr) { return array(map!pred(arr)); }
607 auto afilter(alias pred, T)(T[] arr) { return array(filter!pred(arr)); }
608 auto auniq(T)(T[] arr) { return array(uniq(arr)); }
609 auto asort(alias pred, T)(T[] arr) { sort!pred(arr); return arr; }
610 
611 unittest
612 {
613 	assert([1, 2, 3].amap!`a*2`() == [2, 4, 6]);
614 	assert([1, 2, 3].amap!(n => n*n)() == [1, 4, 9]);
615 }
616 
617 // ***************************************************************************
618 
619 /// Array with normalized comparison and hashing.
620 /// Params:
621 ///   T = array element type to wrap.
622 ///   normalize = function which should return a range of normalized elements.
623 struct NormalizedArray(T, alias normalize)
624 {
625 	T[] arr;
626 
627 	this(T[] arr) { this.arr = arr; }
628 
629 	int opCmp    (in T[]                 other) const { return std.algorithm.cmp(normalize(arr), normalize(other    ))   ; }
630 	int opCmp    (    const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))   ; }
631 	int opCmp    (ref const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))   ; }
632 	bool opEquals(in T[]                 other) const { return std.algorithm.cmp(normalize(arr), normalize(other    ))==0; }
633 	bool opEquals(    const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))==0; }
634 	bool opEquals(ref const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))==0; }
635 
636 	hash_t toHashReal() const
637 	{
638 		import std.digest.crc;
639 		CRC32 crc;
640 		foreach (c; normalize(arr))
641 			crc.put(cast(ubyte[])((&c)[0..1]));
642 		static union Result { ubyte[4] crcResult; hash_t hash; }
643 		return Result(crc.finish()).hash;
644 	}
645 
646 	hash_t toHash() const nothrow @trusted
647 	{
648 		return (cast(hash_t delegate() nothrow @safe)&toHashReal)();
649 	}
650 }
651 
652 // ***************************************************************************
653 
654 /// Equivalent of PHP's `list` language construct:
655 /// http://php.net/manual/en/function.list.php
656 /// Works with arrays and tuples.
657 /// Specify `null` as an argument to ignore that index
658 /// (equivalent of `list(x, , y)` in PHP).
659 auto list(Args...)(auto ref Args args)
660 {
661 	struct List
662 	{
663 		auto dummy() { return args[0]; }
664 		void opAssign(T)(auto ref T t)
665 		{
666 			assert(t.length == args.length,
667 				"Assigning %d elements to list with %d elements"
668 				.format(t.length, args.length));
669 			foreach (i; RangeTuple!(Args.length))
670 				static if (!is(Args[i] == typeof(null)))
671 					args[i] = t[i];
672 		}
673 	}
674 	return List();
675 }
676 
677 ///
678 unittest
679 {
680 	string name, value;
681 	list(name, null, value) = "NAME=VALUE".findSplit("=");
682 	assert(name == "NAME" && value == "VALUE");
683 }
684 
685 version(LittleEndian)
686 unittest
687 {
688 	uint onlyValue;
689 	ubyte[] data = [ubyte(42), 0, 0, 0];
690 	list(onlyValue) = cast(uint[])data;
691 	assert(onlyValue == 42);
692 }