1 /** 2 * Array utility functions 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <vladimir@thecybershadow.net> 12 */ 13 14 module ae.utils.array; 15 16 import std.algorithm.iteration; 17 import std.algorithm.mutation; 18 import std.algorithm.searching; 19 import std.algorithm.sorting; 20 import std.array; 21 import std.exception; 22 import std.format; 23 import std.traits; 24 25 import ae.utils.meta; 26 27 public import ae.utils.aa; 28 public import ae.utils.appender; 29 30 /// Slice a variable. 31 T[] toArray(T)(ref T v) 32 { 33 return (&v)[0..1]; 34 } 35 36 /// Return the value represented as an array of bytes. 37 @property inout(ubyte)[] bytes(T)(ref inout(T) value) 38 if (!hasIndirections!T) 39 { 40 return value.toArray().bytes; 41 } 42 43 /// ditto 44 @property inout(ubyte)[] bytes(T)(inout(T) value) 45 if (is(T U : U[]) && !hasIndirections!U) 46 { 47 return cast(inout(ubyte)[])value; 48 } 49 50 unittest 51 { 52 ubyte b = 5; 53 assert(b.bytes == [5]); 54 55 struct S { ubyte b = 5; } 56 S s; 57 assert(s.bytes == [5]); 58 59 ubyte[1] sa = [5]; 60 assert(sa.bytes == [5]); 61 62 void[] va = sa[]; 63 assert(va.bytes == [5]); 64 } 65 66 /// Reverse of bytes() 67 ref inout(T) fromBytes(T)(inout(ubyte)[] bytes) 68 if (!hasIndirections!T) 69 { 70 assert(bytes.length == T.sizeof, "Data length mismatch for %s".format(T.stringof)); 71 return *cast(inout(T)*)bytes.ptr; 72 } 73 74 /// ditto 75 inout(T) fromBytes(T)(inout(ubyte)[] bytes) 76 if (is(T U : U[]) && !hasIndirections!U) 77 { 78 return cast(inout(T))bytes; 79 } 80 81 unittest 82 { 83 { ubyte b = 5; assert(b.bytes.fromBytes!ubyte == 5); } 84 { const ubyte b = 5; assert(b.bytes.fromBytes!ubyte == 5); } 85 struct S { ubyte b; } 86 { ubyte b = 5; assert(b.bytes.fromBytes!S == S(5)); } 87 } 88 89 unittest 90 { 91 struct S { ubyte a, b; } 92 ubyte[] arr = [1, 2]; 93 assert(arr.fromBytes!S == S(1, 2)); 94 assert(arr.fromBytes!(S[]) == [S(1, 2)]); 95 } 96 97 /// Returns an empty, but non-null slice of T. 98 auto emptySlice(T)() pure 99 { 100 static if (false) // LDC optimizes this out 101 { 102 T[0] arr; 103 auto p = arr.ptr; 104 } 105 else 106 auto p = cast(T*)1; 107 return p[0..0]; 108 } 109 110 unittest 111 { 112 int[] arr = emptySlice!int; 113 assert(arr.ptr); 114 immutable int[] iarr = emptySlice!int; 115 assert(iarr.ptr); 116 } 117 118 int memcmp(in ubyte[] a, in ubyte[] b) 119 { 120 assert(a.length == b.length); 121 import core.stdc.string : memcmp; 122 return memcmp(a.ptr, b.ptr, a.length); 123 } 124 125 /// Like std.algorithm.copy, but without the auto-decode bullshit. 126 /// https://issues.dlang.org/show_bug.cgi?id=13650 127 void memmove(T)(T[] dst, in T[] src) 128 { 129 assert(src.length == dst.length); 130 import core.stdc.string : memmove; 131 memmove(dst.ptr, src.ptr, dst.length * T.sizeof); 132 } 133 134 T[] vector(string op, T)(T[] a, T[] b) 135 { 136 assert(a.length == b.length); 137 T[] result = new T[a.length]; 138 foreach (i, ref r; result) 139 r = mixin("a[i]" ~ op ~ "b[i]"); 140 return result; 141 } 142 143 T[] vectorAssign(string op, T)(T[] a, T[] b) 144 { 145 assert(a.length == b.length); 146 foreach (i, ref r; a) 147 mixin("r " ~ op ~ "= b[i];"); 148 return a; 149 } 150 151 T[] padRight(T)(T[] s, size_t l, T c) 152 { 153 auto ol = s.length; 154 if (ol < l) 155 { 156 s.length = l; 157 s[ol..$] = c; 158 } 159 return s; 160 } 161 162 T[] repeatOne(T)(T c, size_t l) 163 { 164 T[] result = new T[l]; 165 result[] = c; 166 return result; 167 } 168 169 /// Complement to std.string.indexOf which works with arrays 170 /// of non-character types. 171 /// Unlike std.algorithm.countUntil, it does not auto-decode, 172 /// and returns an index usable for array indexing/slicing. 173 sizediff_t indexOf(T, D)(in T[] arr, in D val) 174 // if (!isSomeChar!T) 175 if (!isSomeChar!T && is(typeof(arr.countUntil(val))) && is(typeof(arr[0]==val))) 176 { 177 //assert(arr[0]==val); 178 return arr.countUntil(val); 179 } 180 181 sizediff_t indexOf(T)(in T[] arr, in T[] val) /// ditto 182 if (!isSomeChar!T && is(typeof(arr.countUntil(val)))) 183 { 184 return arr.countUntil(val); 185 } 186 187 /// Index of element, no BS. 188 sizediff_t indexOfElement(T, D)(in T[] arr, auto ref in D val) 189 if (is(typeof(arr[0]==val))) 190 { 191 foreach (i, ref v; arr) 192 if (v == val) 193 return i; 194 return -1; 195 } 196 197 /// Whether array contains value, no BS. 198 bool contains(T, V)(in T[] arr, auto ref in V val) 199 if (is(typeof(arr[0]==val))) 200 { 201 return arr.indexOfElement(val) >= 0; 202 } 203 204 /// Ditto, for substrings 205 bool contains(T, U)(T[] str, U[] what) 206 if (is(Unqual!T == Unqual!U)) 207 { 208 return str._indexOf(what) >= 0; 209 } 210 211 unittest 212 { 213 assert( "abc".contains('b')); 214 assert(!"abc".contains('x')); 215 assert( "abc".contains("b")); 216 assert(!"abc".contains("x")); 217 } 218 219 /// Like startsWith, but with an offset. 220 bool containsAt(T)(in T[] haystack, in T[] needle, size_t offset) 221 { 222 return haystack.length >= offset + needle.length 223 && haystack[offset..offset+needle.length] == needle; 224 } 225 226 unittest 227 { 228 assert( "abracadabra".containsAt("ada", 5)); 229 assert(!"abracadabra".containsAt("ada", 6)); 230 assert(!"abracadabra".containsAt("ada", 99)); 231 } 232 233 bool isIn(T)(T val, in T[] arr) 234 { 235 return arr.contains(val); 236 } 237 238 bool isOneOf(T)(T val, T[] arr...) 239 { 240 return arr.contains(val); 241 } 242 243 /// Like AA.get - soft indexing, throws an 244 /// Exception (not an Error) on out-of-bounds, 245 /// even in release builds. 246 ref T get(T)(T[] arr, size_t index) 247 { 248 enforce(index < arr.length, "Out-of-bounds array access"); 249 return arr[index]; 250 } 251 252 /// Like AA.get - soft indexing, returns 253 /// default value on out-of-bounds. 254 auto get(T)(T[] arr, size_t index, auto ref T defaultValue) 255 { 256 if (index >= arr.length) 257 return defaultValue; 258 return arr[index]; 259 } 260 261 /// Expand the array if index is out-of-bounds. 262 ref T getExpand(T)(ref T[] arr, size_t index) 263 { 264 if (index >= arr.length) 265 arr.length = index + 1; 266 return arr[index]; 267 } 268 269 /// ditto 270 ref T putExpand(T)(ref T[] arr, size_t index, auto ref T value) 271 { 272 if (index >= arr.length) 273 arr.length = index + 1; 274 return arr[index] = value; 275 } 276 277 /// Slices an array. Throws an Exception (not an Error) 278 /// on out-of-bounds, even in release builds. 279 T[] slice(T)(T[] arr, size_t p0, size_t p1) 280 { 281 enforce(p0 < p1 && p1 < arr.length, "Out-of-bounds array slice"); 282 return arr[p0..p1]; 283 } 284 285 /// Given an array and its slice, returns the 286 /// start index of the slice inside the array. 287 size_t sliceIndex(T)(in T[] arr, in T[] slice) 288 { 289 auto a = arr.ptr; 290 auto b = a + arr.length; 291 auto p = slice.ptr; 292 assert(a <= p && p <= b, "Out-of-bounds array slice"); 293 return p - a; 294 } 295 296 /// Like std.array.split, but returns null if val was empty. 297 auto splitEmpty(T, S)(T value, S separator) 298 { 299 return value.length ? split(value, separator) : null; 300 } 301 302 /// Like std.array.split, but always returns a non-empty array. 303 auto split1(T, S)(T value, S separator) 304 { 305 auto result = split(value, separator); 306 return result.length ? result : [value]; 307 } 308 309 /// Include delimiter in result chunks as suffix 310 H[] splitWithSuffix(H, S)(H haystack, S separator) 311 { 312 H[] result; 313 while (haystack.length) 314 { 315 auto pos = haystack._indexOf(separator); 316 if (pos < 0) 317 pos = haystack.length; 318 else 319 { 320 static if (is(typeof(haystack[0] == separator))) 321 pos += 1; 322 else 323 static if (is(typeof(haystack[0..1] == separator))) 324 pos += separator.length; 325 else 326 static assert(false, "Don't know how to split " ~ H.stringof ~ " by " ~ S.stringof); 327 } 328 result ~= haystack[0..pos]; 329 haystack = haystack[pos..$]; 330 } 331 return result; 332 } 333 334 unittest 335 { 336 assert("a\nb".splitWithSuffix('\n') == ["a\n", "b"]); 337 assert([1, 0, 2].splitWithSuffix(0) == [[1, 0], [2]]); 338 339 assert("a\r\nb".splitWithSuffix("\r\n") == ["a\r\n", "b"]); 340 assert([1, 0, 0, 2].splitWithSuffix([0, 0]) == [[1, 0, 0], [2]]); 341 } 342 343 /// Include delimiter in result chunks as prefix 344 H[] splitWithPrefix(H, S)(H haystack, S separator) 345 { 346 H[] result; 347 while (haystack.length) 348 { 349 auto pos = haystack[1..$]._indexOf(separator); 350 if (pos < 0) 351 pos = haystack.length; 352 else 353 pos++; 354 result ~= haystack[0..pos]; 355 haystack = haystack[pos..$]; 356 } 357 return result; 358 } 359 360 unittest 361 { 362 assert("a\nb".splitWithPrefix('\n') == ["a", "\nb"]); 363 assert([1, 0, 2].splitWithPrefix(0) == [[1], [0, 2]]); 364 365 assert("a\r\nb".splitWithPrefix("\r\n") == ["a", "\r\nb"]); 366 assert([1, 0, 0, 2].splitWithPrefix([0, 0]) == [[1], [0, 0, 2]]); 367 } 368 369 /// Include delimiters in result chunks as prefix/suffix 370 S[] splitWithPrefixAndSuffix(S)(S haystack, S prefix, S suffix) 371 { 372 S[] result; 373 auto separator = suffix ~ prefix; 374 while (haystack.length) 375 { 376 auto pos = haystack._indexOf(separator); 377 if (pos < 0) 378 pos = haystack.length; 379 else 380 pos += suffix.length; 381 result ~= haystack[0..pos]; 382 haystack = haystack[pos..$]; 383 } 384 return result; 385 } 386 387 /// 388 unittest 389 { 390 auto s = q"EOF 391 Section 1: 392 10 393 11 394 12 395 Section 2: 396 21 397 22 398 23 399 Section 3: 400 31 401 32 402 33 403 EOF"; 404 auto parts = s.splitWithPrefixAndSuffix("Section ", "\n"); 405 assert(parts.length == 3 && parts.join == s); 406 foreach (part; parts) 407 assert(part.startsWith("Section ") && part.endsWith("\n")); 408 } 409 410 /// Ensure that arr is non-null if empty. 411 T[] nonNull(T)(T[] arr) 412 { 413 if (arr !is null) 414 return arr; 415 return emptySlice!(typeof(arr[0])); 416 } 417 418 /// If arr is null, return null. Otherwise, return a non-null 419 /// transformation dg over arr. 420 template mapNull(alias dg) 421 { 422 auto mapNull(T)(T arr) 423 { 424 if (arr is null) 425 return null; 426 return dg(arr).nonNull; 427 } 428 } 429 430 unittest 431 { 432 assert(string.init.mapNull!(s => s ) is null); 433 assert(string.init.mapNull!(s => "" ) is null); 434 assert("" .mapNull!(s => s ) !is null); 435 assert("" .mapNull!(s => string.init) !is null); 436 } 437 438 /// Select and return a random element from the array. 439 auto ref sample(T)(T[] arr) 440 { 441 import std.random; 442 return arr[uniform(0, $)]; 443 } 444 445 unittest 446 { 447 assert([7, 7, 7].sample == 7); 448 auto s = ["foo", "bar"].sample(); // Issue 13807 449 const(int)[] a2 = [5]; sample(a2); 450 } 451 452 /// Select and return a random element from the array, 453 /// and remove it from the array. 454 T pluck(T)(ref T[] arr) 455 { 456 import std.random; 457 auto pos = uniform(0, arr.length); 458 auto result = arr[pos]; 459 arr = arr.remove(pos); 460 return result; 461 } 462 463 unittest 464 { 465 auto arr = [1, 2, 3]; 466 auto res = [arr.pluck, arr.pluck, arr.pluck]; 467 res.sort(); 468 assert(res == [1, 2, 3]); 469 } 470 471 import std.functional; 472 473 T[] countSort(alias value = "a", T)(T[] arr) 474 { 475 alias unaryFun!value getValue; 476 alias typeof(getValue(arr[0])) V; 477 if (arr.length == 0) return arr; 478 V min = getValue(arr[0]), max = getValue(arr[0]); 479 foreach (el; arr[1..$]) 480 { 481 auto v = getValue(el); 482 if (min > v) 483 min = v; 484 if (max < v) 485 max = v; 486 } 487 auto n = max-min+1; 488 auto counts = new size_t[n]; 489 foreach (el; arr) 490 counts[getValue(el)-min]++; 491 auto indices = new size_t[n]; 492 foreach (i; 1..n) 493 indices[i] = indices[i-1] + counts[i-1]; 494 T[] result = new T[arr.length]; 495 foreach (el; arr) 496 result[indices[getValue(el)-min]++] = el; 497 return result; 498 } 499 500 // *************************************************************************** 501 502 void stackPush(T)(ref T[] arr, auto ref T val) 503 { 504 arr ~= val; 505 } 506 alias stackPush queuePush; 507 508 ref T stackPeek(T)(T[] arr) { return arr[$-1]; } 509 510 ref T stackPop(T)(ref T[] arr) 511 { 512 auto ret = &arr[$-1]; 513 arr = arr[0..$-1]; 514 return *ret; 515 } 516 517 ref T queuePeek(T)(T[] arr) { return arr[0]; } 518 519 ref T queuePeekLast(T)(T[] arr) { return arr[$-1]; } 520 521 ref T queuePop(T)(ref T[] arr) 522 { 523 auto ret = &arr[0]; 524 arr = arr[1..$]; 525 if (!arr.length) arr = null; 526 return *ret; 527 } 528 529 ref T shift(T)(ref T[] arr) { auto oldArr = arr; arr = arr[1..$]; return oldArr[0]; } 530 T[] shift(T)(ref T[] arr, size_t n) { T[] result = arr[0..n]; arr = arr[n..$]; return result; } 531 T[N] shift(size_t N, T)(ref T[] arr) { T[N] result = cast(T[N])(arr[0..N]); arr = arr[N..$]; return result; } 532 void unshift(T)(ref T[] arr, T value) { arr.insertInPlace(0, value); } 533 void unshift(T)(ref T[] arr, T[] value) { arr.insertInPlace(0, value); } 534 535 unittest 536 { 537 int[] arr = [1, 2, 3]; 538 assert(arr.shift == 1); 539 assert(arr == [2, 3]); 540 assert(arr.shift(2) == [2, 3]); 541 assert(arr == []); 542 543 arr = [3]; 544 arr.unshift([1, 2]); 545 assert(arr == [1, 2, 3]); 546 arr.unshift(0); 547 assert(arr == [0, 1, 2, 3]); 548 549 assert(arr.shift!2 == [0, 1]); 550 assert(arr == [2, 3]); 551 } 552 553 /// If arr starts with prefix, slice it off and return true. 554 /// Otherwise leave arr unchaned and return false. 555 deprecated("Use std.algorithm.skipOver instead") 556 bool eat(T)(ref T[] arr, T[] prefix) 557 { 558 if (arr.startsWith(prefix)) 559 { 560 arr = arr[prefix.length..$]; 561 return true; 562 } 563 return false; 564 } 565 566 // Overload disambiguator 567 private sizediff_t _indexOf(H, N)(H haystack, N needle) 568 { 569 static import std.string; 570 571 static if (is(typeof(ae.utils.array.indexOf(haystack, needle)))) 572 alias indexOf = ae.utils.array.indexOf; 573 else 574 static if (is(typeof(std..string.indexOf(haystack, needle)))) 575 alias indexOf = std..string.indexOf; 576 else 577 static assert(false, "No suitable indexOf overload found"); 578 return indexOf(haystack, needle); 579 } 580 581 /// Returns the slice of source up to the first occurrence of delim, 582 /// and fast-forwards source to the point after delim. 583 /// If delim is not found, the behavior depends on orUntilEnd: 584 /// - If orUntilEnd is false (default), it returns null 585 /// and leaves source unchanged. 586 /// - If orUntilEnd is true, it returns source, 587 /// and then sets source to null. 588 T[] skipUntil(T, D)(ref T[] source, D delim, bool orUntilEnd = false) 589 { 590 enum bool isSlice = is(typeof(source[0..1]==delim)); 591 enum bool isElem = is(typeof(source[0] ==delim)); 592 static assert(isSlice || isElem, "Can't skip " ~ T.stringof ~ " until " ~ D.stringof); 593 static assert(isSlice != isElem, "Ambiguous types for skipUntil: " ~ T.stringof ~ " and " ~ D.stringof); 594 static if (isSlice) 595 auto delimLength = delim.length; 596 else 597 enum delimLength = 1; 598 599 static import std.string; 600 601 auto i = _indexOf(source, delim); 602 if (i < 0) 603 { 604 if (orUntilEnd) 605 { 606 auto result = source; 607 source = null; 608 return result; 609 } 610 else 611 return null; 612 } 613 auto result = source[0..i]; 614 source = source[i+delimLength..$]; 615 return result; 616 } 617 618 deprecated("Use skipUntil instead") 619 enum OnEof { returnNull, returnRemainder, throwException } 620 621 deprecated("Use skipUntil instead") 622 template eatUntil(OnEof onEof = OnEof.throwException) 623 { 624 T[] eatUntil(T, D)(ref T[] source, D delim) 625 { 626 static if (onEof == OnEof.returnNull) 627 return skipUntil(source, delim, false); 628 else 629 static if (onEof == OnEof.returnRemainder) 630 return skipUntil(source, delim, true); 631 else 632 return skipUntil(source, delim, false).enforce("Delimiter not found in source"); 633 } 634 } 635 636 deprecated unittest 637 { 638 string s; 639 640 s = "Mary had a little lamb"; 641 assert(s.eatUntil(" ") == "Mary"); 642 assert(s.eatUntil(" ") == "had"); 643 assert(s.eatUntil(' ') == "a"); 644 645 assertThrown!Exception(s.eatUntil("#")); 646 assert(s.eatUntil!(OnEof.returnNull)("#") is null); 647 assert(s.eatUntil!(OnEof.returnRemainder)("#") == "little lamb"); 648 649 ubyte[] bytes = [1, 2, 0, 3, 4, 0, 0]; 650 assert(bytes.eatUntil(0) == [1, 2]); 651 assert(bytes.eatUntil([ubyte(0), ubyte(0)]) == [3, 4]); 652 } 653 654 // *************************************************************************** 655 656 // Equivalents of array(xxx(...)), but less parens and UFCS-able. 657 auto amap(alias pred, T)(T[] arr) { return array(map!pred(arr)); } 658 auto afilter(alias pred, T)(T[] arr) { return array(filter!pred(arr)); } 659 auto auniq(T)(T[] arr) { return array(uniq(arr)); } 660 auto asort(alias pred, T)(T[] arr) { sort!pred(arr); return arr; } 661 662 unittest 663 { 664 assert([1, 2, 3].amap!`a*2`() == [2, 4, 6]); 665 assert([1, 2, 3].amap!(n => n*n)() == [1, 4, 9]); 666 } 667 668 // *************************************************************************** 669 670 /// Array with normalized comparison and hashing. 671 /// Params: 672 /// T = array element type to wrap. 673 /// normalize = function which should return a range of normalized elements. 674 struct NormalizedArray(T, alias normalize) 675 { 676 T[] arr; 677 678 this(T[] arr) { this.arr = arr; } 679 680 int opCmp (in T[] other) const { return std.algorithm.cmp(normalize(arr), normalize(other )) ; } 681 int opCmp ( const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr)) ; } 682 int opCmp (ref const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr)) ; } 683 bool opEquals(in T[] other) const { return std.algorithm.cmp(normalize(arr), normalize(other ))==0; } 684 bool opEquals( const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))==0; } 685 bool opEquals(ref const typeof(this) other) const { return std.algorithm.cmp(normalize(arr), normalize(other.arr))==0; } 686 687 hash_t toHashReal() const 688 { 689 import std.digest.crc; 690 CRC32 crc; 691 foreach (c; normalize(arr)) 692 crc.put(cast(ubyte[])((&c)[0..1])); 693 static union Result { ubyte[4] crcResult; hash_t hash; } 694 return Result(crc.finish()).hash; 695 } 696 697 hash_t toHash() const nothrow @trusted 698 { 699 return (cast(hash_t delegate() nothrow @safe)&toHashReal)(); 700 } 701 } 702 703 // *************************************************************************** 704 705 /// Equivalent of PHP's `list` language construct: 706 /// http://php.net/manual/en/function.list.php 707 /// Works with arrays and tuples. 708 /// Specify `null` as an argument to ignore that index 709 /// (equivalent of `list(x, , y)` in PHP). 710 auto list(Args...)(auto ref Args args) 711 { 712 struct List 713 { 714 auto dummy() { return args[0]; } // https://issues.dlang.org/show_bug.cgi?id=11886 715 void opAssign(T)(auto ref T t) 716 { 717 assert(t.length == args.length, 718 "Assigning %d elements to list with %d elements" 719 .format(t.length, args.length)); 720 foreach (i; RangeTuple!(Args.length)) 721 static if (!is(Args[i] == typeof(null))) 722 args[i] = t[i]; 723 } 724 } 725 return List(); 726 } 727 728 /// 729 unittest 730 { 731 string name, value; 732 list(name, null, value) = "NAME=VALUE".findSplit("="); 733 assert(name == "NAME" && value == "VALUE"); 734 } 735 736 version(LittleEndian) 737 unittest 738 { 739 uint onlyValue; 740 ubyte[] data = [ubyte(42), 0, 0, 0]; 741 list(onlyValue) = cast(uint[])data; 742 assert(onlyValue == 42); 743 }