1 /** 2 * Array utility functions 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <vladimir@thecybershadow.net> 12 */ 13 14 module ae.utils.array; 15 16 import std.algorithm.iteration; 17 import std.algorithm.mutation; 18 import std.algorithm.searching; 19 import std.algorithm.sorting; 20 import std.array; 21 import std.exception; 22 import std.format; 23 import std.functional; 24 import std.traits; 25 26 import ae.utils.meta; 27 28 public import ae.utils.aa; 29 public import ae.utils.appender; 30 31 /// Slice a variable. 32 T[] toArray(T)(ref T v) 33 { 34 return (&v)[0..1]; 35 } 36 37 /// std.array.staticArray shim 38 static if (__traits(hasMember, std.array, "staticArray")) 39 public import std.array : staticArray; 40 else 41 pragma(inline, true) T[n] staticArray(T, size_t n)(auto ref T[n] a) { return a; } 42 43 /// Return the value represented as an array of bytes. 44 @property inout(ubyte)[] bytes(T)(ref inout(T) value) 45 if (!hasIndirections!T) 46 { 47 return value.toArray().bytes; 48 } 49 50 /// ditto 51 @property inout(ubyte)[] bytes(T)(inout(T) value) 52 if (is(T U : U[]) && !hasIndirections!U) 53 { 54 return cast(inout(ubyte)[])value; 55 } 56 57 unittest 58 { 59 ubyte b = 5; 60 assert(b.bytes == [5]); 61 62 struct S { ubyte b = 5; } 63 S s; 64 assert(s.bytes == [5]); 65 66 ubyte[1] sa = [5]; 67 assert(sa.bytes == [5]); 68 69 void[] va = sa[]; 70 assert(va.bytes == [5]); 71 } 72 73 /// Reverse of bytes() 74 ref inout(T) fromBytes(T)(inout(ubyte)[] bytes) 75 if (!hasIndirections!T) 76 { 77 assert(bytes.length == T.sizeof, "Data length mismatch for %s".format(T.stringof)); 78 return *cast(inout(T)*)bytes.ptr; 79 } 80 81 /// ditto 82 inout(T) fromBytes(T)(inout(ubyte)[] bytes) 83 if (is(T U : U[]) && !hasIndirections!U) 84 { 85 return cast(inout(T))bytes; 86 } 87 88 unittest 89 { 90 { ubyte b = 5; assert(b.bytes.fromBytes!ubyte == 5); } 91 { const ubyte b = 5; assert(b.bytes.fromBytes!ubyte == 5); } 92 struct S { ubyte b; } 93 { ubyte b = 5; assert(b.bytes.fromBytes!S == S(5)); } 94 } 95 96 unittest 97 { 98 struct S { ubyte a, b; } 99 ubyte[] arr = [1, 2]; 100 assert(arr.fromBytes!S == S(1, 2)); 101 assert(arr.fromBytes!(S[]) == [S(1, 2)]); 102 } 103 104 /// Returns an empty, but non-null slice of T. 105 auto emptySlice(T)() pure 106 { 107 static if (false) // LDC optimizes this out 108 { 109 T[0] arr; 110 auto p = arr.ptr; 111 } 112 else 113 auto p = cast(T*)1; 114 return p[0..0]; 115 } 116 117 unittest 118 { 119 int[] arr = emptySlice!int; 120 assert(arr.ptr); 121 immutable int[] iarr = emptySlice!int; 122 assert(iarr.ptr); 123 } 124 125 int memcmp(in ubyte[] a, in ubyte[] b) 126 { 127 assert(a.length == b.length); 128 import core.stdc.string : memcmp; 129 return memcmp(a.ptr, b.ptr, a.length); 130 } 131 132 /// Like std.algorithm.copy, but without the auto-decode bullshit. 133 /// https://issues.dlang.org/show_bug.cgi?id=13650 134 void memmove(T)(T[] dst, in T[] src) 135 { 136 assert(src.length == dst.length); 137 import core.stdc.string : memmove; 138 memmove(dst.ptr, src.ptr, dst.length * T.sizeof); 139 } 140 141 T[] vector(string op, T)(T[] a, T[] b) 142 { 143 assert(a.length == b.length); 144 T[] result = new T[a.length]; 145 foreach (i, ref r; result) 146 r = mixin("a[i]" ~ op ~ "b[i]"); 147 return result; 148 } 149 150 T[] vectorAssign(string op, T)(T[] a, T[] b) 151 { 152 assert(a.length == b.length); 153 foreach (i, ref r; a) 154 mixin("r " ~ op ~ "= b[i];"); 155 return a; 156 } 157 158 T[] padRight(T)(T[] s, size_t l, T c) 159 { 160 auto ol = s.length; 161 if (ol < l) 162 { 163 s.length = l; 164 s[ol..$] = c; 165 } 166 return s; 167 } 168 169 T[] repeatOne(T)(T c, size_t l) 170 { 171 T[] result = new T[l]; 172 result[] = c; 173 return result; 174 } 175 176 /// Complement to std.string.indexOf which works with arrays 177 /// of non-character types. 178 /// Unlike std.algorithm.countUntil, it does not auto-decode, 179 /// and returns an index usable for array indexing/slicing. 180 sizediff_t indexOf(T, D)(in T[] arr, in D val) 181 // if (!isSomeChar!T) 182 if (!isSomeChar!T && is(typeof(arr.countUntil(val))) && is(typeof(arr[0]==val))) 183 { 184 //assert(arr[0]==val); 185 return arr.countUntil(val); 186 } 187 188 sizediff_t indexOf(T)(in T[] arr, in T[] val) /// ditto 189 if (!isSomeChar!T && is(typeof(arr.countUntil(val)))) 190 { 191 return arr.countUntil(val); 192 } 193 194 /// Index of element, no BS. 195 sizediff_t indexOfElement(T, D)(in T[] arr, auto ref const D val) 196 if (is(typeof(arr[0]==val))) 197 { 198 foreach (i, ref v; arr) 199 if (v == val) 200 return i; 201 return -1; 202 } 203 204 /// Whether array contains value, no BS. 205 bool contains(T, V)(in T[] arr, auto ref const V val) 206 if (is(typeof(arr[0]==val))) 207 { 208 return arr.indexOfElement(val) >= 0; 209 } 210 211 /// Ditto, for substrings 212 bool contains(T, U)(T[] str, U[] what) 213 if (is(Unqual!T == Unqual!U)) 214 { 215 return str._indexOf(what) >= 0; 216 } 217 218 unittest 219 { 220 assert( "abc".contains('b')); 221 assert(!"abc".contains('x')); 222 assert( "abc".contains("b")); 223 assert(!"abc".contains("x")); 224 } 225 226 /// Like startsWith, but with an offset. 227 bool containsAt(T)(in T[] haystack, in T[] needle, size_t offset) 228 { 229 return haystack.length >= offset + needle.length 230 && haystack[offset..offset+needle.length] == needle; 231 } 232 233 unittest 234 { 235 assert( "abracadabra".containsAt("ada", 5)); 236 assert(!"abracadabra".containsAt("ada", 6)); 237 assert(!"abracadabra".containsAt("ada", 99)); 238 } 239 240 bool isIn(T)(T val, in T[] arr) 241 { 242 return arr.contains(val); 243 } 244 245 bool isOneOf(T)(T val, T[] arr...) 246 { 247 return arr.contains(val); 248 } 249 250 /// Like AA.get - soft indexing, throws an 251 /// Exception (not an Error) on out-of-bounds, 252 /// even in release builds. 253 ref T get(T)(T[] arr, size_t index) 254 { 255 enforce(index < arr.length, "Out-of-bounds array access"); 256 return arr[index]; 257 } 258 259 /// Like AA.get - soft indexing, returns 260 /// default value on out-of-bounds. 261 auto get(T)(T[] arr, size_t index, auto ref T defaultValue) 262 { 263 if (index >= arr.length) 264 return defaultValue; 265 return arr[index]; 266 } 267 268 /// Expand the array if index is out-of-bounds. 269 ref T getExpand(T)(ref T[] arr, size_t index) 270 { 271 if (index >= arr.length) 272 arr.length = index + 1; 273 return arr[index]; 274 } 275 276 /// ditto 277 ref T putExpand(T)(ref T[] arr, size_t index, auto ref T value) 278 { 279 if (index >= arr.length) 280 arr.length = index + 1; 281 return arr[index] = value; 282 } 283 284 /// Slices an array. Throws an Exception (not an Error) 285 /// on out-of-bounds, even in release builds. 286 T[] slice(T)(T[] arr, size_t p0, size_t p1) 287 { 288 enforce(p0 < p1 && p1 < arr.length, "Out-of-bounds array slice"); 289 return arr[p0..p1]; 290 } 291 292 /// Given an array and its slice, returns the 293 /// start index of the slice inside the array. 294 size_t sliceIndex(T)(in T[] arr, in T[] slice) 295 { 296 auto a = arr.ptr; 297 auto b = a + arr.length; 298 auto p = slice.ptr; 299 assert(a <= p && p <= b, "Out-of-bounds array slice"); 300 return p - a; 301 } 302 303 /// Like std.array.split, but returns null if val was empty. 304 auto splitEmpty(T, S)(T value, S separator) 305 { 306 return value.length ? split(value, separator) : null; 307 } 308 309 /// Like std.array.split, but always returns a non-empty array. 310 auto split1(T, S)(T value, S separator) 311 { 312 auto result = split(value, separator); 313 return result.length ? result : [value]; 314 } 315 316 /// Include delimiter in result chunks as suffix 317 H[] splitWithSuffix(H, S)(H haystack, S separator) 318 { 319 H[] result; 320 while (haystack.length) 321 { 322 auto pos = haystack._indexOf(separator); 323 if (pos < 0) 324 pos = haystack.length; 325 else 326 { 327 static if (is(typeof(haystack[0] == separator))) 328 pos += 1; 329 else 330 static if (is(typeof(haystack[0..1] == separator))) 331 pos += separator.length; 332 else 333 static assert(false, "Don't know how to split " ~ H.stringof ~ " by " ~ S.stringof); 334 } 335 result ~= haystack[0..pos]; 336 haystack = haystack[pos..$]; 337 } 338 return result; 339 } 340 341 unittest 342 { 343 assert("a\nb".splitWithSuffix('\n') == ["a\n", "b"]); 344 assert([1, 0, 2].splitWithSuffix(0) == [[1, 0], [2]]); 345 346 assert("a\r\nb".splitWithSuffix("\r\n") == ["a\r\n", "b"]); 347 assert([1, 0, 0, 2].splitWithSuffix([0, 0]) == [[1, 0, 0], [2]]); 348 } 349 350 /// Include delimiter in result chunks as prefix 351 H[] splitWithPrefix(H, S)(H haystack, S separator) 352 { 353 H[] result; 354 while (haystack.length) 355 { 356 auto pos = haystack[1..$]._indexOf(separator); 357 if (pos < 0) 358 pos = haystack.length; 359 else 360 pos++; 361 result ~= haystack[0..pos]; 362 haystack = haystack[pos..$]; 363 } 364 return result; 365 } 366 367 unittest 368 { 369 assert("a\nb".splitWithPrefix('\n') == ["a", "\nb"]); 370 assert([1, 0, 2].splitWithPrefix(0) == [[1], [0, 2]]); 371 372 assert("a\r\nb".splitWithPrefix("\r\n") == ["a", "\r\nb"]); 373 assert([1, 0, 0, 2].splitWithPrefix([0, 0]) == [[1], [0, 0, 2]]); 374 } 375 376 /// Include delimiters in result chunks as prefix/suffix 377 S[] splitWithPrefixAndSuffix(S)(S haystack, S prefix, S suffix) 378 { 379 S[] result; 380 auto separator = suffix ~ prefix; 381 while (haystack.length) 382 { 383 auto pos = haystack._indexOf(separator); 384 if (pos < 0) 385 pos = haystack.length; 386 else 387 pos += suffix.length; 388 result ~= haystack[0..pos]; 389 haystack = haystack[pos..$]; 390 } 391 return result; 392 } 393 394 /// 395 unittest 396 { 397 auto s = q"EOF 398 Section 1: 399 10 400 11 401 12 402 Section 2: 403 21 404 22 405 23 406 Section 3: 407 31 408 32 409 33 410 EOF"; 411 auto parts = s.splitWithPrefixAndSuffix("Section ", "\n"); 412 assert(parts.length == 3 && parts.join == s); 413 foreach (part; parts) 414 assert(part.startsWith("Section ") && part.endsWith("\n")); 415 } 416 417 /// Ensure that arr is non-null if empty. 418 T[] nonNull(T)(T[] arr) 419 { 420 if (arr !is null) 421 return arr; 422 return emptySlice!(typeof(arr[0])); 423 } 424 425 /// If arr is null, return null. Otherwise, return a non-null 426 /// transformation dg over arr. 427 template mapNull(alias dg) 428 { 429 auto mapNull(T)(T arr) 430 { 431 if (arr is null) 432 return null; 433 return dg(arr).nonNull; 434 } 435 } 436 437 unittest 438 { 439 assert(string.init.mapNull!(s => s ) is null); 440 assert(string.init.mapNull!(s => "" ) is null); 441 assert("" .mapNull!(s => s ) !is null); 442 assert("" .mapNull!(s => string.init) !is null); 443 } 444 445 /// Select and return a random element from the array. 446 auto ref sample(T)(T[] arr) 447 { 448 import std.random; 449 return arr[uniform(0, $)]; 450 } 451 452 unittest 453 { 454 assert([7, 7, 7].sample == 7); 455 auto s = ["foo", "bar"].sample(); // Issue 13807 456 const(int)[] a2 = [5]; sample(a2); 457 } 458 459 /// Select and return a random element from the array, 460 /// and remove it from the array. 461 T pluck(T)(ref T[] arr) 462 { 463 import std.random; 464 auto pos = uniform(0, arr.length); 465 auto result = arr[pos]; 466 arr = arr.remove(pos); 467 return result; 468 } 469 470 unittest 471 { 472 auto arr = [1, 2, 3]; 473 auto res = [arr.pluck, arr.pluck, arr.pluck]; 474 res.sort(); 475 assert(res == [1, 2, 3]); 476 } 477 478 import std.functional; 479 480 T[] countSort(alias value = "a", T)(T[] arr) 481 { 482 alias unaryFun!value getValue; 483 alias typeof(getValue(arr[0])) V; 484 if (arr.length == 0) return arr; 485 V min = getValue(arr[0]), max = getValue(arr[0]); 486 foreach (el; arr[1..$]) 487 { 488 auto v = getValue(el); 489 if (min > v) 490 min = v; 491 if (max < v) 492 max = v; 493 } 494 auto n = max-min+1; 495 auto counts = new size_t[n]; 496 foreach (el; arr) 497 counts[getValue(el)-min]++; 498 auto indices = new size_t[n]; 499 foreach (i; 1..n) 500 indices[i] = indices[i-1] + counts[i-1]; 501 T[] result = new T[arr.length]; 502 foreach (el; arr) 503 result[indices[getValue(el)-min]++] = el; 504 return result; 505 } 506 507 // *************************************************************************** 508 509 void stackPush(T)(ref T[] arr, auto ref T val) 510 { 511 arr ~= val; 512 } 513 alias stackPush queuePush; 514 515 ref T stackPeek(T)(T[] arr) { return arr[$-1]; } 516 517 ref T stackPop(T)(ref T[] arr) 518 { 519 auto ret = &arr[$-1]; 520 arr = arr[0..$-1]; 521 return *ret; 522 } 523 524 ref T queuePeek(T)(T[] arr) { return arr[0]; } 525 526 ref T queuePeekLast(T)(T[] arr) { return arr[$-1]; } 527 528 ref T queuePop(T)(ref T[] arr) 529 { 530 auto ret = &arr[0]; 531 arr = arr[1..$]; 532 if (!arr.length) arr = null; 533 return *ret; 534 } 535 536 ref T shift(T)(ref T[] arr) { auto oldArr = arr; arr = arr[1..$]; return oldArr[0]; } 537 T[] shift(T)(ref T[] arr, size_t n) { T[] result = arr[0..n]; arr = arr[n..$]; return result; } 538 T[N] shift(size_t N, T)(ref T[] arr) { T[N] result = cast(T[N])(arr[0..N]); arr = arr[N..$]; return result; } 539 void unshift(T)(ref T[] arr, T value) { arr.insertInPlace(0, value); } 540 void unshift(T)(ref T[] arr, T[] value) { arr.insertInPlace(0, value); } 541 542 unittest 543 { 544 int[] arr = [1, 2, 3]; 545 assert(arr.shift == 1); 546 assert(arr == [2, 3]); 547 assert(arr.shift(2) == [2, 3]); 548 assert(arr == []); 549 550 arr = [3]; 551 arr.unshift([1, 2]); 552 assert(arr == [1, 2, 3]); 553 arr.unshift(0); 554 assert(arr == [0, 1, 2, 3]); 555 556 assert(arr.shift!2 == [0, 1]); 557 assert(arr == [2, 3]); 558 } 559 560 /// If arr starts with prefix, slice it off and return true. 561 /// Otherwise leave arr unchaned and return false. 562 deprecated("Use std.algorithm.skipOver instead") 563 bool eat(T)(ref T[] arr, T[] prefix) 564 { 565 if (arr.startsWith(prefix)) 566 { 567 arr = arr[prefix.length..$]; 568 return true; 569 } 570 return false; 571 } 572 573 // Overload disambiguator 574 private sizediff_t _indexOf(H, N)(H haystack, N needle) 575 { 576 static import std.string; 577 578 static if (is(typeof(ae.utils.array.indexOf(haystack, needle)))) 579 alias indexOf = ae.utils.array.indexOf; 580 else 581 static if (is(typeof(std..string.indexOf(haystack, needle)))) 582 alias indexOf = std..string.indexOf; 583 else 584 static assert(false, "No suitable indexOf overload found"); 585 return indexOf(haystack, needle); 586 } 587 588 /// Returns the slice of source up to the first occurrence of delim, 589 /// and fast-forwards source to the point after delim. 590 /// If delim is not found, the behavior depends on orUntilEnd: 591 /// - If orUntilEnd is false (default), it returns null 592 /// and leaves source unchanged. 593 /// - If orUntilEnd is true, it returns source, 594 /// and then sets source to null. 595 T[] skipUntil(T, D)(ref T[] source, D delim, bool orUntilEnd = false) 596 { 597 enum bool isSlice = is(typeof(source[0..1]==delim)); 598 enum bool isElem = is(typeof(source[0] ==delim)); 599 static assert(isSlice || isElem, "Can't skip " ~ T.stringof ~ " until " ~ D.stringof); 600 static assert(isSlice != isElem, "Ambiguous types for skipUntil: " ~ T.stringof ~ " and " ~ D.stringof); 601 static if (isSlice) 602 auto delimLength = delim.length; 603 else 604 enum delimLength = 1; 605 606 static import std.string; 607 608 auto i = _indexOf(source, delim); 609 if (i < 0) 610 { 611 if (orUntilEnd) 612 { 613 auto result = source; 614 source = null; 615 return result; 616 } 617 else 618 return null; 619 } 620 auto result = source[0..i]; 621 source = source[i+delimLength..$]; 622 return result; 623 } 624 625 deprecated("Use skipUntil instead") 626 enum OnEof { returnNull, returnRemainder, throwException } 627 628 deprecated("Use skipUntil instead") 629 template eatUntil(OnEof onEof = OnEof.throwException) 630 { 631 T[] eatUntil(T, D)(ref T[] source, D delim) 632 { 633 static if (onEof == OnEof.returnNull) 634 return skipUntil(source, delim, false); 635 else 636 static if (onEof == OnEof.returnRemainder) 637 return skipUntil(source, delim, true); 638 else 639 return skipUntil(source, delim, false).enforce("Delimiter not found in source"); 640 } 641 } 642 643 deprecated unittest 644 { 645 string s; 646 647 s = "Mary had a little lamb"; 648 assert(s.eatUntil(" ") == "Mary"); 649 assert(s.eatUntil(" ") == "had"); 650 assert(s.eatUntil(' ') == "a"); 651 652 assertThrown!Exception(s.eatUntil("#")); 653 assert(s.eatUntil!(OnEof.returnNull)("#") is null); 654 assert(s.eatUntil!(OnEof.returnRemainder)("#") == "little lamb"); 655 656 ubyte[] bytes = [1, 2, 0, 3, 4, 0, 0]; 657 assert(bytes.eatUntil(0) == [1, 2]); 658 assert(bytes.eatUntil([ubyte(0), ubyte(0)]) == [3, 4]); 659 } 660 661 // *************************************************************************** 662 663 // Equivalents of array(xxx(...)), but less parens and UFCS-able. 664 auto amap(alias pred, T)(T[] arr) { return array(map!pred(arr)); } 665 auto afilter(alias pred, T)(T[] arr) { return array(filter!pred(arr)); } 666 auto auniq(T)(T[] arr) { return array(uniq(arr)); } 667 auto asort(alias pred, T)(T[] arr) { sort!pred(arr); return arr; } 668 669 unittest 670 { 671 assert([1, 2, 3].amap!`a*2`() == [2, 4, 6]); 672 assert([1, 2, 3].amap!(n => n*n)() == [1, 4, 9]); 673 } 674 675 auto amap(alias pred, T, size_t n)(T[n] arr) 676 { 677 alias R = typeof(unaryFun!pred(arr[0])); 678 R[n] result; 679 foreach (i, ref r; result) 680 r = unaryFun!pred(arr[i]); 681 return result; 682 } 683 684 // *************************************************************************** 685 686 /// Array with normalized comparison and hashing. 687 /// Params: 688 /// T = array element type to wrap. 689 /// normalize = function which should return a range of normalized elements. 690 struct NormalizedArray(T, alias normalize) 691 { 692 T[] arr; 693 694 this(T[] arr) { this.arr = arr; } 695 696 int opCmp (in T[] other) const { return std.algorithm.cmp(normalize(arr), normalize(other )) ; } 697 int opCmp ( const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr)) ; } 698 int opCmp (ref const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr)) ; } 699 bool opEquals(in T[] other) const { return std.algorithm.cmp(normalize(arr), normalize(other ))==0; } 700 bool opEquals( const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))==0; } 701 bool opEquals(ref const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))==0; } 702 703 hash_t toHashReal() const 704 { 705 import std.digest.crc; 706 CRC32 crc; 707 foreach (c; normalize(arr)) 708 crc.put(cast(ubyte[])((&c)[0..1])); 709 static union Result { ubyte[4] crcResult; hash_t hash; } 710 return Result(crc.finish()).hash; 711 } 712 713 hash_t toHash() const nothrow @trusted 714 { 715 return (cast(hash_t delegate() nothrow @safe)&toHashReal)(); 716 } 717 } 718 719 // *************************************************************************** 720 721 /// Equivalent of PHP's `list` language construct: 722 /// http://php.net/manual/en/function.list.php 723 /// Works with arrays and tuples. 724 /// Specify `null` as an argument to ignore that index 725 /// (equivalent of `list(x, , y)` in PHP). 726 auto list(Args...)(auto ref Args args) 727 { 728 struct List 729 { 730 auto dummy() { return args[0]; } // https://issues.dlang.org/show_bug.cgi?id=11886 731 void opAssign(T)(auto ref T t) 732 { 733 assert(t.length == args.length, 734 "Assigning %d elements to list with %d elements" 735 .format(t.length, args.length)); 736 foreach (i; RangeTuple!(Args.length)) 737 static if (!is(Args[i] == typeof(null))) 738 args[i] = t[i]; 739 } 740 } 741 return List(); 742 } 743 744 /// 745 unittest 746 { 747 string name, value; 748 list(name, null, value) = "NAME=VALUE".findSplit("="); 749 assert(name == "NAME" && value == "VALUE"); 750 } 751 752 version(LittleEndian) 753 unittest 754 { 755 uint onlyValue; 756 ubyte[] data = [ubyte(42), 0, 0, 0]; 757 list(onlyValue) = cast(uint[])data; 758 assert(onlyValue == 42); 759 }