1 /**
2  * Array utility functions
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.utils.array;
15 
16 import std.algorithm.iteration;
17 import std.algorithm.mutation;
18 import std.algorithm.searching;
19 import std.algorithm.sorting;
20 import std.array;
21 import std.exception;
22 import std.format;
23 import std.traits;
24 
25 import ae.utils.meta;
26 
27 public import ae.utils.aa;
28 public import ae.utils.appender;
29 
30 /// Slice a variable.
31 T[] toArray(T)(ref T v)
32 {
33 	return (&v)[0..1];
34 }
35 
36 /// Return the value represented as an array of bytes.
37 @property inout(ubyte)[] bytes(T)(ref inout(T) value)
38 	if (!hasIndirections!T)
39 {
40 	return value.toArray().bytes;
41 }
42 
43 /// ditto
44 @property inout(ubyte)[] bytes(T)(inout(T) value)
45 	if (is(T U : U[]) && !hasIndirections!U)
46 {
47 	return cast(inout(ubyte)[])value;
48 }
49 
50 unittest
51 {
52 	ubyte b = 5;
53 	assert(b.bytes == [5]);
54 
55 	struct S { ubyte b = 5; }
56 	S s;
57 	assert(s.bytes == [5]);
58 
59 	ubyte[1] sa = [5];
60 	assert(sa.bytes == [5]);
61 
62 	void[] va = sa[];
63 	assert(va.bytes == [5]);
64 }
65 
66 /// Reverse of bytes()
67 ref inout(T) fromBytes(T)(inout(ubyte)[] bytes)
68 	if (!hasIndirections!T)
69 {
70 	assert(bytes.length == T.sizeof, "Data length mismatch for %s".format(T.stringof));
71 	return *cast(inout(T)*)bytes.ptr;
72 }
73 
74 /// ditto
75 inout(T) fromBytes(T)(inout(ubyte)[] bytes)
76 	if (is(T U : U[]) && !hasIndirections!U)
77 {
78 	return cast(inout(T))bytes;
79 }
80 
81 unittest
82 {
83 	{       ubyte b = 5; assert(b.bytes.fromBytes!ubyte == 5); }
84 	{ const ubyte b = 5; assert(b.bytes.fromBytes!ubyte == 5); }
85 	struct S { ubyte b; }
86 	{       ubyte b = 5; assert(b.bytes.fromBytes!S == S(5)); }
87 }
88 
89 unittest
90 {
91 	struct S { ubyte a, b; }
92 	ubyte[] arr = [1, 2];
93 	assert(arr.fromBytes!S == S(1, 2));
94 	assert(arr.fromBytes!(S[]) == [S(1, 2)]);
95 }
96 
97 /// Returns an empty, but non-null slice of T.
98 auto emptySlice(T)() pure
99 {
100 	T[0] arr;
101 	auto p = arr.ptr;
102 	return p[0..0];
103 }
104 
105 unittest
106 {
107 	int[] arr = emptySlice!int;
108 	assert(arr.ptr);
109 	immutable int[] iarr = emptySlice!int;
110 	assert(iarr.ptr);
111 }
112 
113 int memcmp(in ubyte[] a, in ubyte[] b)
114 {
115 	assert(a.length == b.length);
116 	import core.stdc.string : memcmp;
117 	return memcmp(a.ptr, b.ptr, a.length);
118 }
119 
120 /// Like std.algorithm.copy, but without the auto-decode bullshit.
121 /// https://issues.dlang.org/show_bug.cgi?id=13650
122 void memmove(T)(T[] dst, in T[] src)
123 {
124 	assert(src.length == dst.length);
125 	import core.stdc.string : memmove;
126 	memmove(dst.ptr, src.ptr, dst.length * T.sizeof);
127 }
128 
129 T[] vector(string op, T)(T[] a, T[] b)
130 {
131 	assert(a.length == b.length);
132 	T[] result = new T[a.length];
133 	foreach (i, ref r; result)
134 		r = mixin("a[i]" ~ op ~ "b[i]");
135 	return result;
136 }
137 
138 T[] vectorAssign(string op, T)(T[] a, T[] b)
139 {
140 	assert(a.length == b.length);
141 	foreach (i, ref r; a)
142 		mixin("r " ~ op ~ "= b[i];");
143 	return a;
144 }
145 
146 T[] padRight(T)(T[] s, size_t l, T c)
147 {
148 	auto ol = s.length;
149 	if (ol < l)
150 	{
151 		s.length = l;
152 		s[ol..$] = c;
153 	}
154 	return s;
155 }
156 
157 T[] repeatOne(T)(T c, size_t l)
158 {
159 	T[] result = new T[l];
160 	result[] = c;
161 	return result;
162 }
163 
164 /// Complement to std.string.indexOf which works with arrays
165 /// of non-character types.
166 /// Unlike std.algorithm.countUntil, it does not auto-decode,
167 /// and returns an index usable for array indexing/slicing.
168 sizediff_t indexOf(T, D)(in T[] arr, in D val)
169 //	if (!isSomeChar!T)
170 	if (!isSomeChar!T && is(typeof(arr.countUntil(val))) && is(typeof(arr[0]==val)))
171 {
172 	//assert(arr[0]==val);
173 	return arr.countUntil(val);
174 }
175 
176 sizediff_t indexOf(T)(in T[] arr, in T[] val) /// ditto
177 	if (!isSomeChar!T && is(typeof(arr.countUntil(val))))
178 {
179 	return arr.countUntil(val);
180 }
181 
182 /// Index of element, no BS.
183 sizediff_t indexOfElement(T, D)(in T[] arr, auto ref in D val)
184 	if (is(typeof(arr[0]==val)))
185 {
186 	foreach (i, ref v; arr)
187 		if (v == val)
188 			return i;
189 	return -1;
190 }
191 
192 /// Whether array contains value, no BS.
193 bool contains(T, V)(in T[] arr, auto ref in V val)
194 	if (is(typeof(arr[0]==val)))
195 {
196 	return arr.indexOfElement(val) >= 0;
197 }
198 
199 /// Ditto, for substrings
200 bool contains(T, U)(T[] str, U[] what)
201 if (is(Unqual!T == Unqual!U))
202 {
203 	return str._indexOf(what) >= 0;
204 }
205 
206 unittest
207 {
208 	assert( "abc".contains('b'));
209 	assert(!"abc".contains('x'));
210 	assert( "abc".contains("b"));
211 	assert(!"abc".contains("x"));
212 }
213 
214 /// Like startsWith, but with an offset.
215 bool containsAt(T)(in T[] haystack, in T[] needle, size_t offset)
216 {
217 	return haystack.length >= offset + needle.length
218 		&& haystack[offset..offset+needle.length] == needle;
219 }
220 
221 unittest
222 {
223 	assert( "abracadabra".containsAt("ada", 5));
224 	assert(!"abracadabra".containsAt("ada", 6));
225 	assert(!"abracadabra".containsAt("ada", 99));
226 }
227 
228 bool isIn(T)(T val, in T[] arr)
229 {
230 	return arr.contains(val);
231 }
232 
233 bool isOneOf(T)(T val, T[] arr...)
234 {
235 	return arr.contains(val);
236 }
237 
238 /// Like AA.get - soft indexing, throws an
239 /// Exception (not an Error) on out-of-bounds,
240 /// even in release builds.
241 ref T get(T)(T[] arr, size_t index)
242 {
243 	enforce(index < arr.length, "Out-of-bounds array access");
244 	return arr[index];
245 }
246 
247 /// Like AA.get - soft indexing, returns
248 /// default value on out-of-bounds.
249 auto get(T)(T[] arr, size_t index, auto ref T defaultValue)
250 {
251 	if (index >= arr.length)
252 		return defaultValue;
253 	return arr[index];
254 }
255 
256 /// Expand the array if index is out-of-bounds.
257 ref T getExpand(T)(ref T[] arr, size_t index)
258 {
259 	if (index >= arr.length)
260 		arr.length = index + 1;
261 	return arr[index];
262 }
263 
264 /// ditto
265 ref T putExpand(T)(ref T[] arr, size_t index, auto ref T value)
266 {
267 	if (index >= arr.length)
268 		arr.length = index + 1;
269 	return arr[index] = value;
270 }
271 
272 /// Slices an array. Throws an Exception (not an Error)
273 /// on out-of-bounds, even in release builds.
274 T[] slice(T)(T[] arr, size_t p0, size_t p1)
275 {
276 	enforce(p0 < p1 && p1 < arr.length, "Out-of-bounds array slice");
277 	return arr[p0..p1];
278 }
279 
280 /// Given an array and its slice, returns the
281 /// start index of the slice inside the array.
282 size_t sliceIndex(T)(in T[] arr, in T[] slice)
283 {
284 	auto a = arr.ptr;
285 	auto b = a + arr.length;
286 	auto p = slice.ptr;
287 	assert(a <= p && p <= b, "Out-of-bounds array slice");
288 	return p - a;
289 }
290 
291 /// Like std.array.split, but returns null if val was empty.
292 auto splitEmpty(T, S)(T value, S separator)
293 {
294 	return value.length ? split(value, separator) : null;
295 }
296 
297 /// Include delimiter in result chunks as suffix
298 H[] splitWithSuffix(H, S)(H haystack, S separator)
299 {
300 	H[] result;
301 	while (haystack.length)
302 	{
303 		auto pos = haystack._indexOf(separator);
304 		if (pos < 0)
305 			pos = haystack.length;
306 		else
307 		{
308 			static if (is(typeof(haystack[0] == separator)))
309 				pos += 1;
310 			else
311 			static if (is(typeof(haystack[0..1] == separator)))
312 				pos += separator.length;
313 			else
314 				static assert(false, "Don't know how to split " ~ H.stringof ~ " by " ~ S.stringof);
315 		}
316 		result ~= haystack[0..pos];
317 		haystack = haystack[pos..$];
318 	}
319 	return result;
320 }
321 
322 unittest
323 {
324 	assert("a\nb".splitWithSuffix('\n') == ["a\n", "b"]);
325 	assert([1, 0, 2].splitWithSuffix(0) == [[1, 0], [2]]);
326 
327 	assert("a\r\nb".splitWithSuffix("\r\n") == ["a\r\n", "b"]);
328 	assert([1, 0, 0, 2].splitWithSuffix([0, 0]) == [[1, 0, 0], [2]]);
329 }
330 
331 /// Include delimiter in result chunks as prefix
332 H[] splitWithPrefix(H, S)(H haystack, S separator)
333 {
334 	H[] result;
335 	while (haystack.length)
336 	{
337 		auto pos = haystack[1..$]._indexOf(separator);
338 		if (pos < 0)
339 			pos = haystack.length;
340 		else
341 			pos++;
342 		result ~= haystack[0..pos];
343 		haystack = haystack[pos..$];
344 	}
345 	return result;
346 }
347 
348 unittest
349 {
350 	assert("a\nb".splitWithPrefix('\n') == ["a", "\nb"]);
351 	assert([1, 0, 2].splitWithPrefix(0) == [[1], [0, 2]]);
352 
353 	assert("a\r\nb".splitWithPrefix("\r\n") == ["a", "\r\nb"]);
354 	assert([1, 0, 0, 2].splitWithPrefix([0, 0]) == [[1], [0, 0, 2]]);
355 }
356 
357 /// Select and return a random element from the array.
358 auto ref sample(T)(T[] arr)
359 {
360 	import std.random;
361 	return arr[uniform(0, $)];
362 }
363 
364 unittest
365 {
366 	assert([7, 7, 7].sample == 7);
367 	auto s = ["foo", "bar"].sample(); // Issue 13807
368 	const(int)[] a2 = [5]; sample(a2);
369 }
370 
371 /// Select and return a random element from the array,
372 /// and remove it from the array.
373 T pluck(T)(ref T[] arr)
374 {
375 	import std.random;
376 	auto pos = uniform(0, arr.length);
377 	auto result = arr[pos];
378 	arr = arr.remove(pos);
379 	return result;
380 }
381 
382 unittest
383 {
384 	auto arr = [1, 2, 3];
385 	auto res = [arr.pluck, arr.pluck, arr.pluck];
386 	res.sort();
387 	assert(res == [1, 2, 3]);
388 }
389 
390 import std.functional;
391 
392 T[] countSort(alias value = "a", T)(T[] arr)
393 {
394 	alias unaryFun!value getValue;
395 	alias typeof(getValue(arr[0])) V;
396 	if (arr.length == 0) return arr;
397 	V min = getValue(arr[0]), max = getValue(arr[0]);
398 	foreach (el; arr[1..$])
399 	{
400 		auto v = getValue(el);
401 		if (min > v)
402 			min = v;
403 		if (max < v)
404 			max = v;
405 	}
406 	auto n = max-min+1;
407 	auto counts = new size_t[n];
408 	foreach (el; arr)
409 		counts[getValue(el)-min]++;
410 	auto indices = new size_t[n];
411 	foreach (i; 1..n)
412 		indices[i] = indices[i-1] + counts[i-1];
413 	T[] result = new T[arr.length];
414 	foreach (el; arr)
415 		result[indices[getValue(el)-min]++] = el;
416 	return result;
417 }
418 
419 // ***************************************************************************
420 
421 void stackPush(T)(ref T[] arr, T val)
422 {
423 	arr ~= val;
424 }
425 alias stackPush queuePush;
426 
427 T stackPeek(T)(T[] arr) { return arr[$-1]; }
428 
429 T stackPop(T)(ref T[] arr)
430 {
431 	auto ret = arr[$-1];
432 	arr = arr[0..$-1];
433 	return ret;
434 }
435 
436 T queuePeek(T)(T[] arr) { return arr[0]; }
437 
438 T queuePeekLast(T)(T[] arr) { return arr[$-1]; }
439 
440 T queuePop(T)(ref T[] arr)
441 {
442 	auto ret = arr[0];
443 	arr = arr[1..$];
444 	if (!arr.length) arr = null;
445 	return ret;
446 }
447 
448 T shift(T)(ref T[] arr) { T result = arr[0]; arr = arr[1..$]; return result; }
449 T[] shift(T)(ref T[] arr, size_t n) { T[] result = arr[0..n]; arr = arr[n..$]; return result; }
450 T[N] shift(size_t N, T)(ref T[] arr) { T[N] result = cast(T[N])(arr[0..N]); arr = arr[N..$]; return result; }
451 void unshift(T)(ref T[] arr, T value) { arr.insertInPlace(0, value); }
452 void unshift(T)(ref T[] arr, T[] value) { arr.insertInPlace(0, value); }
453 
454 unittest
455 {
456 	int[] arr = [1, 2, 3];
457 	assert(arr.shift == 1);
458 	assert(arr == [2, 3]);
459 	assert(arr.shift(2) == [2, 3]);
460 	assert(arr == []);
461 
462 	arr = [3];
463 	arr.unshift([1, 2]);
464 	assert(arr == [1, 2, 3]);
465 	arr.unshift(0);
466 	assert(arr == [0, 1, 2, 3]);
467 
468 	assert(arr.shift!2 == [0, 1]);
469 	assert(arr == [2, 3]);
470 }
471 
472 /// If arr starts with prefix, slice it off and return true.
473 /// Otherwise leave arr unchaned and return false.
474 deprecated("Use std.algorithm.skipOver instead")
475 bool eat(T)(ref T[] arr, T[] prefix)
476 {
477 	if (arr.startsWith(prefix))
478 	{
479 		arr = arr[prefix.length..$];
480 		return true;
481 	}
482 	return false;
483 }
484 
485 // Overload disambiguator
486 private sizediff_t _indexOf(H, N)(H haystack, N needle)
487 {
488 	static import std.string;
489 
490 	static if (is(typeof(ae.utils.array.indexOf(haystack, needle))))
491 		alias indexOf = ae.utils.array.indexOf;
492 	else
493 	static if (is(typeof(std..string.indexOf(haystack, needle))))
494 		alias indexOf = std..string.indexOf;
495 	else
496 		static assert(false, "No suitable indexOf overload found");
497 	return indexOf(haystack, needle);
498 }
499 
500 /// Returns the slice of source up to the first occurrence of delim,
501 /// and fast-forwards source to the point after delim.
502 /// If delim is not found, the behavior depends on orUntilEnd:
503 /// - If orUntilEnd is false (default), it returns null
504 ///   and leaves source unchanged.
505 /// - If orUntilEnd is true, it returns source,
506 ///   and then sets source to null.
507 T[] skipUntil(T, D)(ref T[] source, D delim, bool orUntilEnd = false)
508 {
509 	enum bool isSlice = is(typeof(source[0..1]==delim));
510 	enum bool isElem  = is(typeof(source[0]   ==delim));
511 	static assert(isSlice || isElem, "Can't skip " ~ T.stringof ~ " until " ~ D.stringof);
512 	static assert(isSlice != isElem, "Ambiguous types for skipUntil: " ~ T.stringof ~ " and " ~ D.stringof);
513 	static if (isSlice)
514 		auto delimLength = delim.length;
515 	else
516 		enum delimLength = 1;
517 
518 	static import std.string;
519 
520 	auto i = _indexOf(source, delim);
521 	if (i < 0)
522 	{
523 		if (orUntilEnd)
524 		{
525 			auto result = source;
526 			source = null;
527 			return result;
528 		}
529 		else
530 			return null;
531 	}
532 	auto result = source[0..i];
533 	source = source[i+delimLength..$];
534 	return result;
535 }
536 
537 deprecated("Use skipUntil instead")
538 enum OnEof { returnNull, returnRemainder, throwException }
539 
540 deprecated("Use skipUntil instead")
541 template eatUntil(OnEof onEof = OnEof.throwException)
542 {
543 	T[] eatUntil(T, D)(ref T[] source, D delim)
544 	{
545 		static if (onEof == OnEof.returnNull)
546 			return skipUntil(source, delim, false);
547 		else
548 		static if (onEof == OnEof.returnRemainder)
549 			return skipUntil(source, delim, true);
550 		else
551 			return skipUntil(source, delim, false).enforce("Delimiter not found in source");
552 	}
553 }
554 
555 deprecated unittest
556 {
557 	string s;
558 
559 	s = "Mary had a little lamb";
560 	assert(s.eatUntil(" ") == "Mary");
561 	assert(s.eatUntil(" ") == "had");
562 	assert(s.eatUntil(' ') == "a");
563 
564 	assertThrown!Exception(s.eatUntil("#"));
565 	assert(s.eatUntil!(OnEof.returnNull)("#") is null);
566 	assert(s.eatUntil!(OnEof.returnRemainder)("#") == "little lamb");
567 
568 	ubyte[] bytes = [1, 2, 0, 3, 4, 0, 0];
569 	assert(bytes.eatUntil(0) == [1, 2]);
570 	assert(bytes.eatUntil([ubyte(0), ubyte(0)]) == [3, 4]);
571 }
572 
573 // ***************************************************************************
574 
575 // Equivalents of array(xxx(...)), but less parens and UFCS-able.
576 auto amap(alias pred, T)(T[] arr) { return array(map!pred(arr)); }
577 auto afilter(alias pred, T)(T[] arr) { return array(filter!pred(arr)); }
578 auto auniq(T)(T[] arr) { return array(uniq(arr)); }
579 auto asort(alias pred, T)(T[] arr) { sort!pred(arr); return arr; }
580 
581 unittest
582 {
583 	assert([1, 2, 3].amap!`a*2`() == [2, 4, 6]);
584 	assert([1, 2, 3].amap!(n => n*n)() == [1, 4, 9]);
585 }
586 
587 // ***************************************************************************
588 
589 /// Array with normalized comparison and hashing.
590 /// Params:
591 ///   T = array element type to wrap.
592 ///   normalize = function which should return a range of normalized elements.
593 struct NormalizedArray(T, alias normalize)
594 {
595 	T[] arr;
596 
597 	this(T[] arr) { this.arr = arr; }
598 
599 	int opCmp    (in T[]                 other) const { return std.algorithm.cmp(normalize(arr), normalize(other    ))   ; }
600 	int opCmp    (    const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))   ; }
601 	int opCmp    (ref const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))   ; }
602 	bool opEquals(in T[]                 other) const { return std.algorithm.cmp(normalize(arr), normalize(other    ))==0; }
603 	bool opEquals(    const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))==0; }
604 	bool opEquals(ref const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))==0; }
605 
606 	hash_t toHashReal() const
607 	{
608 		import std.digest.crc;
609 		CRC32 crc;
610 		foreach (c; normalize(arr))
611 			crc.put(cast(ubyte[])((&c)[0..1]));
612 		static union Result { ubyte[4] crcResult; hash_t hash; }
613 		return Result(crc.finish()).hash;
614 	}
615 
616 	hash_t toHash() const nothrow @trusted
617 	{
618 		return (cast(hash_t delegate() nothrow @safe)&toHashReal)();
619 	}
620 }
621 
622 // ***************************************************************************
623 
624 /// Equivalent of PHP's `list` language construct:
625 /// http://php.net/manual/en/function.list.php
626 /// Works with arrays and tuples.
627 /// Specify `null` as an argument to ignore that index
628 /// (equivalent of `list(x, , y)` in PHP).
629 auto list(Args...)(auto ref Args args)
630 {
631 	struct List
632 	{
633 		auto dummy() { return args[0]; }
634 		void opAssign(T)(auto ref T t)
635 		{
636 			assert(t.length == args.length,
637 				"Assigning %d elements to list with %d elements"
638 				.format(t.length, args.length));
639 			foreach (i; RangeTuple!(Args.length))
640 				static if (!is(Args[i] == typeof(null)))
641 					args[i] = t[i];
642 		}
643 	}
644 	return List();
645 }
646 
647 ///
648 unittest
649 {
650 	string name, value;
651 	list(name, null, value) = "NAME=VALUE".findSplit("=");
652 	assert(name == "NAME" && value == "VALUE");
653 }
654 
655 version(LittleEndian)
656 unittest
657 {
658 	uint onlyValue;
659 	ubyte[] data = [ubyte(42), 0, 0, 0];
660 	list(onlyValue) = cast(uint[])data;
661 	assert(onlyValue == 42);
662 }