1 /** 2 * Array utility functions 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <vladimir@thecybershadow.net> 12 */ 13 14 module ae.utils.array; 15 16 import std.algorithm.iteration; 17 import std.algorithm.mutation; 18 import std.algorithm.searching; 19 import std.algorithm.sorting; 20 import std.array; 21 import std.exception; 22 import std.format; 23 import std.traits; 24 25 import ae.utils.meta; 26 27 public import ae.utils.aa; 28 public import ae.utils.appender; 29 30 /// Slice a variable. 31 T[] toArray(T)(ref T v) 32 { 33 return (&v)[0..1]; 34 } 35 36 /// std.array.staticArray shim 37 static if (__traits(hasMember, std.array, "staticArray")) 38 public import std.array : staticArray; 39 else 40 pragma(inline, true) T[n] staticArray(T, size_t n)(auto ref T[n] a) { return a; } 41 42 /// Return the value represented as an array of bytes. 43 @property inout(ubyte)[] bytes(T)(ref inout(T) value) 44 if (!hasIndirections!T) 45 { 46 return value.toArray().bytes; 47 } 48 49 /// ditto 50 @property inout(ubyte)[] bytes(T)(inout(T) value) 51 if (is(T U : U[]) && !hasIndirections!U) 52 { 53 return cast(inout(ubyte)[])value; 54 } 55 56 unittest 57 { 58 ubyte b = 5; 59 assert(b.bytes == [5]); 60 61 struct S { ubyte b = 5; } 62 S s; 63 assert(s.bytes == [5]); 64 65 ubyte[1] sa = [5]; 66 assert(sa.bytes == [5]); 67 68 void[] va = sa[]; 69 assert(va.bytes == [5]); 70 } 71 72 /// Reverse of bytes() 73 ref inout(T) fromBytes(T)(inout(ubyte)[] bytes) 74 if (!hasIndirections!T) 75 { 76 assert(bytes.length == T.sizeof, "Data length mismatch for %s".format(T.stringof)); 77 return *cast(inout(T)*)bytes.ptr; 78 } 79 80 /// ditto 81 inout(T) fromBytes(T)(inout(ubyte)[] bytes) 82 if (is(T U : U[]) && !hasIndirections!U) 83 { 84 return cast(inout(T))bytes; 85 } 86 87 unittest 88 { 89 { ubyte b = 5; assert(b.bytes.fromBytes!ubyte == 5); } 90 { const ubyte b = 5; assert(b.bytes.fromBytes!ubyte == 5); } 91 struct S { ubyte b; } 92 { ubyte b = 5; assert(b.bytes.fromBytes!S == S(5)); } 93 } 94 95 unittest 96 { 97 struct S { ubyte a, b; } 98 ubyte[] arr = [1, 2]; 99 assert(arr.fromBytes!S == S(1, 2)); 100 assert(arr.fromBytes!(S[]) == [S(1, 2)]); 101 } 102 103 /// Returns an empty, but non-null slice of T. 104 auto emptySlice(T)() pure 105 { 106 static if (false) // LDC optimizes this out 107 { 108 T[0] arr; 109 auto p = arr.ptr; 110 } 111 else 112 auto p = cast(T*)1; 113 return p[0..0]; 114 } 115 116 unittest 117 { 118 int[] arr = emptySlice!int; 119 assert(arr.ptr); 120 immutable int[] iarr = emptySlice!int; 121 assert(iarr.ptr); 122 } 123 124 int memcmp(in ubyte[] a, in ubyte[] b) 125 { 126 assert(a.length == b.length); 127 import core.stdc.string : memcmp; 128 return memcmp(a.ptr, b.ptr, a.length); 129 } 130 131 /// Like std.algorithm.copy, but without the auto-decode bullshit. 132 /// https://issues.dlang.org/show_bug.cgi?id=13650 133 void memmove(T)(T[] dst, in T[] src) 134 { 135 assert(src.length == dst.length); 136 import core.stdc.string : memmove; 137 memmove(dst.ptr, src.ptr, dst.length * T.sizeof); 138 } 139 140 T[] vector(string op, T)(T[] a, T[] b) 141 { 142 assert(a.length == b.length); 143 T[] result = new T[a.length]; 144 foreach (i, ref r; result) 145 r = mixin("a[i]" ~ op ~ "b[i]"); 146 return result; 147 } 148 149 T[] vectorAssign(string op, T)(T[] a, T[] b) 150 { 151 assert(a.length == b.length); 152 foreach (i, ref r; a) 153 mixin("r " ~ op ~ "= b[i];"); 154 return a; 155 } 156 157 T[] padRight(T)(T[] s, size_t l, T c) 158 { 159 auto ol = s.length; 160 if (ol < l) 161 { 162 s.length = l; 163 s[ol..$] = c; 164 } 165 return s; 166 } 167 168 T[] repeatOne(T)(T c, size_t l) 169 { 170 T[] result = new T[l]; 171 result[] = c; 172 return result; 173 } 174 175 /// Complement to std.string.indexOf which works with arrays 176 /// of non-character types. 177 /// Unlike std.algorithm.countUntil, it does not auto-decode, 178 /// and returns an index usable for array indexing/slicing. 179 sizediff_t indexOf(T, D)(in T[] arr, in D val) 180 // if (!isSomeChar!T) 181 if (!isSomeChar!T && is(typeof(arr.countUntil(val))) && is(typeof(arr[0]==val))) 182 { 183 //assert(arr[0]==val); 184 return arr.countUntil(val); 185 } 186 187 sizediff_t indexOf(T)(in T[] arr, in T[] val) /// ditto 188 if (!isSomeChar!T && is(typeof(arr.countUntil(val)))) 189 { 190 return arr.countUntil(val); 191 } 192 193 /// Index of element, no BS. 194 sizediff_t indexOfElement(T, D)(in T[] arr, auto ref in D val) 195 if (is(typeof(arr[0]==val))) 196 { 197 foreach (i, ref v; arr) 198 if (v == val) 199 return i; 200 return -1; 201 } 202 203 /// Whether array contains value, no BS. 204 bool contains(T, V)(in T[] arr, auto ref in V val) 205 if (is(typeof(arr[0]==val))) 206 { 207 return arr.indexOfElement(val) >= 0; 208 } 209 210 /// Ditto, for substrings 211 bool contains(T, U)(T[] str, U[] what) 212 if (is(Unqual!T == Unqual!U)) 213 { 214 return str._indexOf(what) >= 0; 215 } 216 217 unittest 218 { 219 assert( "abc".contains('b')); 220 assert(!"abc".contains('x')); 221 assert( "abc".contains("b")); 222 assert(!"abc".contains("x")); 223 } 224 225 /// Like startsWith, but with an offset. 226 bool containsAt(T)(in T[] haystack, in T[] needle, size_t offset) 227 { 228 return haystack.length >= offset + needle.length 229 && haystack[offset..offset+needle.length] == needle; 230 } 231 232 unittest 233 { 234 assert( "abracadabra".containsAt("ada", 5)); 235 assert(!"abracadabra".containsAt("ada", 6)); 236 assert(!"abracadabra".containsAt("ada", 99)); 237 } 238 239 bool isIn(T)(T val, in T[] arr) 240 { 241 return arr.contains(val); 242 } 243 244 bool isOneOf(T)(T val, T[] arr...) 245 { 246 return arr.contains(val); 247 } 248 249 /// Like AA.get - soft indexing, throws an 250 /// Exception (not an Error) on out-of-bounds, 251 /// even in release builds. 252 ref T get(T)(T[] arr, size_t index) 253 { 254 enforce(index < arr.length, "Out-of-bounds array access"); 255 return arr[index]; 256 } 257 258 /// Like AA.get - soft indexing, returns 259 /// default value on out-of-bounds. 260 auto get(T)(T[] arr, size_t index, auto ref T defaultValue) 261 { 262 if (index >= arr.length) 263 return defaultValue; 264 return arr[index]; 265 } 266 267 /// Expand the array if index is out-of-bounds. 268 ref T getExpand(T)(ref T[] arr, size_t index) 269 { 270 if (index >= arr.length) 271 arr.length = index + 1; 272 return arr[index]; 273 } 274 275 /// ditto 276 ref T putExpand(T)(ref T[] arr, size_t index, auto ref T value) 277 { 278 if (index >= arr.length) 279 arr.length = index + 1; 280 return arr[index] = value; 281 } 282 283 /// Slices an array. Throws an Exception (not an Error) 284 /// on out-of-bounds, even in release builds. 285 T[] slice(T)(T[] arr, size_t p0, size_t p1) 286 { 287 enforce(p0 < p1 && p1 < arr.length, "Out-of-bounds array slice"); 288 return arr[p0..p1]; 289 } 290 291 /// Given an array and its slice, returns the 292 /// start index of the slice inside the array. 293 size_t sliceIndex(T)(in T[] arr, in T[] slice) 294 { 295 auto a = arr.ptr; 296 auto b = a + arr.length; 297 auto p = slice.ptr; 298 assert(a <= p && p <= b, "Out-of-bounds array slice"); 299 return p - a; 300 } 301 302 /// Like std.array.split, but returns null if val was empty. 303 auto splitEmpty(T, S)(T value, S separator) 304 { 305 return value.length ? split(value, separator) : null; 306 } 307 308 /// Like std.array.split, but always returns a non-empty array. 309 auto split1(T, S)(T value, S separator) 310 { 311 auto result = split(value, separator); 312 return result.length ? result : [value]; 313 } 314 315 /// Include delimiter in result chunks as suffix 316 H[] splitWithSuffix(H, S)(H haystack, S separator) 317 { 318 H[] result; 319 while (haystack.length) 320 { 321 auto pos = haystack._indexOf(separator); 322 if (pos < 0) 323 pos = haystack.length; 324 else 325 { 326 static if (is(typeof(haystack[0] == separator))) 327 pos += 1; 328 else 329 static if (is(typeof(haystack[0..1] == separator))) 330 pos += separator.length; 331 else 332 static assert(false, "Don't know how to split " ~ H.stringof ~ " by " ~ S.stringof); 333 } 334 result ~= haystack[0..pos]; 335 haystack = haystack[pos..$]; 336 } 337 return result; 338 } 339 340 unittest 341 { 342 assert("a\nb".splitWithSuffix('\n') == ["a\n", "b"]); 343 assert([1, 0, 2].splitWithSuffix(0) == [[1, 0], [2]]); 344 345 assert("a\r\nb".splitWithSuffix("\r\n") == ["a\r\n", "b"]); 346 assert([1, 0, 0, 2].splitWithSuffix([0, 0]) == [[1, 0, 0], [2]]); 347 } 348 349 /// Include delimiter in result chunks as prefix 350 H[] splitWithPrefix(H, S)(H haystack, S separator) 351 { 352 H[] result; 353 while (haystack.length) 354 { 355 auto pos = haystack[1..$]._indexOf(separator); 356 if (pos < 0) 357 pos = haystack.length; 358 else 359 pos++; 360 result ~= haystack[0..pos]; 361 haystack = haystack[pos..$]; 362 } 363 return result; 364 } 365 366 unittest 367 { 368 assert("a\nb".splitWithPrefix('\n') == ["a", "\nb"]); 369 assert([1, 0, 2].splitWithPrefix(0) == [[1], [0, 2]]); 370 371 assert("a\r\nb".splitWithPrefix("\r\n") == ["a", "\r\nb"]); 372 assert([1, 0, 0, 2].splitWithPrefix([0, 0]) == [[1], [0, 0, 2]]); 373 } 374 375 /// Include delimiters in result chunks as prefix/suffix 376 S[] splitWithPrefixAndSuffix(S)(S haystack, S prefix, S suffix) 377 { 378 S[] result; 379 auto separator = suffix ~ prefix; 380 while (haystack.length) 381 { 382 auto pos = haystack._indexOf(separator); 383 if (pos < 0) 384 pos = haystack.length; 385 else 386 pos += suffix.length; 387 result ~= haystack[0..pos]; 388 haystack = haystack[pos..$]; 389 } 390 return result; 391 } 392 393 /// 394 unittest 395 { 396 auto s = q"EOF 397 Section 1: 398 10 399 11 400 12 401 Section 2: 402 21 403 22 404 23 405 Section 3: 406 31 407 32 408 33 409 EOF"; 410 auto parts = s.splitWithPrefixAndSuffix("Section ", "\n"); 411 assert(parts.length == 3 && parts.join == s); 412 foreach (part; parts) 413 assert(part.startsWith("Section ") && part.endsWith("\n")); 414 } 415 416 /// Ensure that arr is non-null if empty. 417 T[] nonNull(T)(T[] arr) 418 { 419 if (arr !is null) 420 return arr; 421 return emptySlice!(typeof(arr[0])); 422 } 423 424 /// If arr is null, return null. Otherwise, return a non-null 425 /// transformation dg over arr. 426 template mapNull(alias dg) 427 { 428 auto mapNull(T)(T arr) 429 { 430 if (arr is null) 431 return null; 432 return dg(arr).nonNull; 433 } 434 } 435 436 unittest 437 { 438 assert(string.init.mapNull!(s => s ) is null); 439 assert(string.init.mapNull!(s => "" ) is null); 440 assert("" .mapNull!(s => s ) !is null); 441 assert("" .mapNull!(s => string.init) !is null); 442 } 443 444 /// Select and return a random element from the array. 445 auto ref sample(T)(T[] arr) 446 { 447 import std.random; 448 return arr[uniform(0, $)]; 449 } 450 451 unittest 452 { 453 assert([7, 7, 7].sample == 7); 454 auto s = ["foo", "bar"].sample(); // Issue 13807 455 const(int)[] a2 = [5]; sample(a2); 456 } 457 458 /// Select and return a random element from the array, 459 /// and remove it from the array. 460 T pluck(T)(ref T[] arr) 461 { 462 import std.random; 463 auto pos = uniform(0, arr.length); 464 auto result = arr[pos]; 465 arr = arr.remove(pos); 466 return result; 467 } 468 469 unittest 470 { 471 auto arr = [1, 2, 3]; 472 auto res = [arr.pluck, arr.pluck, arr.pluck]; 473 res.sort(); 474 assert(res == [1, 2, 3]); 475 } 476 477 import std.functional; 478 479 T[] countSort(alias value = "a", T)(T[] arr) 480 { 481 alias unaryFun!value getValue; 482 alias typeof(getValue(arr[0])) V; 483 if (arr.length == 0) return arr; 484 V min = getValue(arr[0]), max = getValue(arr[0]); 485 foreach (el; arr[1..$]) 486 { 487 auto v = getValue(el); 488 if (min > v) 489 min = v; 490 if (max < v) 491 max = v; 492 } 493 auto n = max-min+1; 494 auto counts = new size_t[n]; 495 foreach (el; arr) 496 counts[getValue(el)-min]++; 497 auto indices = new size_t[n]; 498 foreach (i; 1..n) 499 indices[i] = indices[i-1] + counts[i-1]; 500 T[] result = new T[arr.length]; 501 foreach (el; arr) 502 result[indices[getValue(el)-min]++] = el; 503 return result; 504 } 505 506 // *************************************************************************** 507 508 void stackPush(T)(ref T[] arr, auto ref T val) 509 { 510 arr ~= val; 511 } 512 alias stackPush queuePush; 513 514 ref T stackPeek(T)(T[] arr) { return arr[$-1]; } 515 516 ref T stackPop(T)(ref T[] arr) 517 { 518 auto ret = &arr[$-1]; 519 arr = arr[0..$-1]; 520 return *ret; 521 } 522 523 ref T queuePeek(T)(T[] arr) { return arr[0]; } 524 525 ref T queuePeekLast(T)(T[] arr) { return arr[$-1]; } 526 527 ref T queuePop(T)(ref T[] arr) 528 { 529 auto ret = &arr[0]; 530 arr = arr[1..$]; 531 if (!arr.length) arr = null; 532 return *ret; 533 } 534 535 ref T shift(T)(ref T[] arr) { auto oldArr = arr; arr = arr[1..$]; return oldArr[0]; } 536 T[] shift(T)(ref T[] arr, size_t n) { T[] result = arr[0..n]; arr = arr[n..$]; return result; } 537 T[N] shift(size_t N, T)(ref T[] arr) { T[N] result = cast(T[N])(arr[0..N]); arr = arr[N..$]; return result; } 538 void unshift(T)(ref T[] arr, T value) { arr.insertInPlace(0, value); } 539 void unshift(T)(ref T[] arr, T[] value) { arr.insertInPlace(0, value); } 540 541 unittest 542 { 543 int[] arr = [1, 2, 3]; 544 assert(arr.shift == 1); 545 assert(arr == [2, 3]); 546 assert(arr.shift(2) == [2, 3]); 547 assert(arr == []); 548 549 arr = [3]; 550 arr.unshift([1, 2]); 551 assert(arr == [1, 2, 3]); 552 arr.unshift(0); 553 assert(arr == [0, 1, 2, 3]); 554 555 assert(arr.shift!2 == [0, 1]); 556 assert(arr == [2, 3]); 557 } 558 559 /// If arr starts with prefix, slice it off and return true. 560 /// Otherwise leave arr unchaned and return false. 561 deprecated("Use std.algorithm.skipOver instead") 562 bool eat(T)(ref T[] arr, T[] prefix) 563 { 564 if (arr.startsWith(prefix)) 565 { 566 arr = arr[prefix.length..$]; 567 return true; 568 } 569 return false; 570 } 571 572 // Overload disambiguator 573 private sizediff_t _indexOf(H, N)(H haystack, N needle) 574 { 575 static import std.string; 576 577 static if (is(typeof(ae.utils.array.indexOf(haystack, needle)))) 578 alias indexOf = ae.utils.array.indexOf; 579 else 580 static if (is(typeof(std..string.indexOf(haystack, needle)))) 581 alias indexOf = std..string.indexOf; 582 else 583 static assert(false, "No suitable indexOf overload found"); 584 return indexOf(haystack, needle); 585 } 586 587 /// Returns the slice of source up to the first occurrence of delim, 588 /// and fast-forwards source to the point after delim. 589 /// If delim is not found, the behavior depends on orUntilEnd: 590 /// - If orUntilEnd is false (default), it returns null 591 /// and leaves source unchanged. 592 /// - If orUntilEnd is true, it returns source, 593 /// and then sets source to null. 594 T[] skipUntil(T, D)(ref T[] source, D delim, bool orUntilEnd = false) 595 { 596 enum bool isSlice = is(typeof(source[0..1]==delim)); 597 enum bool isElem = is(typeof(source[0] ==delim)); 598 static assert(isSlice || isElem, "Can't skip " ~ T.stringof ~ " until " ~ D.stringof); 599 static assert(isSlice != isElem, "Ambiguous types for skipUntil: " ~ T.stringof ~ " and " ~ D.stringof); 600 static if (isSlice) 601 auto delimLength = delim.length; 602 else 603 enum delimLength = 1; 604 605 static import std.string; 606 607 auto i = _indexOf(source, delim); 608 if (i < 0) 609 { 610 if (orUntilEnd) 611 { 612 auto result = source; 613 source = null; 614 return result; 615 } 616 else 617 return null; 618 } 619 auto result = source[0..i]; 620 source = source[i+delimLength..$]; 621 return result; 622 } 623 624 deprecated("Use skipUntil instead") 625 enum OnEof { returnNull, returnRemainder, throwException } 626 627 deprecated("Use skipUntil instead") 628 template eatUntil(OnEof onEof = OnEof.throwException) 629 { 630 T[] eatUntil(T, D)(ref T[] source, D delim) 631 { 632 static if (onEof == OnEof.returnNull) 633 return skipUntil(source, delim, false); 634 else 635 static if (onEof == OnEof.returnRemainder) 636 return skipUntil(source, delim, true); 637 else 638 return skipUntil(source, delim, false).enforce("Delimiter not found in source"); 639 } 640 } 641 642 deprecated unittest 643 { 644 string s; 645 646 s = "Mary had a little lamb"; 647 assert(s.eatUntil(" ") == "Mary"); 648 assert(s.eatUntil(" ") == "had"); 649 assert(s.eatUntil(' ') == "a"); 650 651 assertThrown!Exception(s.eatUntil("#")); 652 assert(s.eatUntil!(OnEof.returnNull)("#") is null); 653 assert(s.eatUntil!(OnEof.returnRemainder)("#") == "little lamb"); 654 655 ubyte[] bytes = [1, 2, 0, 3, 4, 0, 0]; 656 assert(bytes.eatUntil(0) == [1, 2]); 657 assert(bytes.eatUntil([ubyte(0), ubyte(0)]) == [3, 4]); 658 } 659 660 // *************************************************************************** 661 662 // Equivalents of array(xxx(...)), but less parens and UFCS-able. 663 auto amap(alias pred, T)(T[] arr) { return array(map!pred(arr)); } 664 auto afilter(alias pred, T)(T[] arr) { return array(filter!pred(arr)); } 665 auto auniq(T)(T[] arr) { return array(uniq(arr)); } 666 auto asort(alias pred, T)(T[] arr) { sort!pred(arr); return arr; } 667 668 unittest 669 { 670 assert([1, 2, 3].amap!`a*2`() == [2, 4, 6]); 671 assert([1, 2, 3].amap!(n => n*n)() == [1, 4, 9]); 672 } 673 674 // *************************************************************************** 675 676 /// Array with normalized comparison and hashing. 677 /// Params: 678 /// T = array element type to wrap. 679 /// normalize = function which should return a range of normalized elements. 680 struct NormalizedArray(T, alias normalize) 681 { 682 T[] arr; 683 684 this(T[] arr) { this.arr = arr; } 685 686 int opCmp (in T[] other) const { return std.algorithm.cmp(normalize(arr), normalize(other )) ; } 687 int opCmp ( const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr)) ; } 688 int opCmp (ref const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr)) ; } 689 bool opEquals(in T[] other) const { return std.algorithm.cmp(normalize(arr), normalize(other ))==0; } 690 bool opEquals( const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))==0; } 691 bool opEquals(ref const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))==0; } 692 693 hash_t toHashReal() const 694 { 695 import std.digest.crc; 696 CRC32 crc; 697 foreach (c; normalize(arr)) 698 crc.put(cast(ubyte[])((&c)[0..1])); 699 static union Result { ubyte[4] crcResult; hash_t hash; } 700 return Result(crc.finish()).hash; 701 } 702 703 hash_t toHash() const nothrow @trusted 704 { 705 return (cast(hash_t delegate() nothrow @safe)&toHashReal)(); 706 } 707 } 708 709 // *************************************************************************** 710 711 /// Equivalent of PHP's `list` language construct: 712 /// http://php.net/manual/en/function.list.php 713 /// Works with arrays and tuples. 714 /// Specify `null` as an argument to ignore that index 715 /// (equivalent of `list(x, , y)` in PHP). 716 auto list(Args...)(auto ref Args args) 717 { 718 struct List 719 { 720 auto dummy() { return args[0]; } // https://issues.dlang.org/show_bug.cgi?id=11886 721 void opAssign(T)(auto ref T t) 722 { 723 assert(t.length == args.length, 724 "Assigning %d elements to list with %d elements" 725 .format(t.length, args.length)); 726 foreach (i; RangeTuple!(Args.length)) 727 static if (!is(Args[i] == typeof(null))) 728 args[i] = t[i]; 729 } 730 } 731 return List(); 732 } 733 734 /// 735 unittest 736 { 737 string name, value; 738 list(name, null, value) = "NAME=VALUE".findSplit("="); 739 assert(name == "NAME" && value == "VALUE"); 740 } 741 742 version(LittleEndian) 743 unittest 744 { 745 uint onlyValue; 746 ubyte[] data = [ubyte(42), 0, 0, 0]; 747 list(onlyValue) = cast(uint[])data; 748 assert(onlyValue == 42); 749 }