1 /** 2 * Utility code related to string and text processing. 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <vladimir@thecybershadow.net> 12 */ 13 14 module ae.utils.text; 15 16 import std.algorithm; 17 import std.ascii; 18 import std.exception; 19 import std.conv; 20 import std.format; 21 import std.range.primitives; 22 import std.string; 23 import std.traits; 24 import std.typetuple; 25 26 import core.stdc.stdio : snprintf, sscanf; 27 import core.stdc.string; 28 29 import ae.utils.array; 30 import ae.utils.meta; 31 import ae.utils.text.parsefp; 32 import ae.utils.textout; 33 34 alias indexOf = std..string.indexOf; 35 36 public import ae.utils.text.ascii : ascii, decimalSize, toDec, toDecFixed, asciiToLower, asciiToUpper; 37 deprecated public import ae.utils.text.ascii : DecimalSize; 38 public import ae.utils.array : contains; 39 40 // ************************************************************************ 41 42 /// CTFE helper 43 string formatAs(T)(auto ref T obj, string fmt) 44 { 45 return format(fmt, obj); 46 } 47 48 /// Lazily formatted object 49 auto formatted(string fmt, T...)(auto ref T values) 50 { 51 static struct Formatted 52 { 53 T values; 54 55 void toString(void delegate(const(char)[]) sink) const 56 { 57 sink.formattedWrite!fmt(values); 58 } 59 60 void toString(W)(ref W writer) const 61 if (isOutputRange!(W, char)) 62 { 63 writer.formattedWrite!fmt(values); 64 } 65 } 66 return Formatted(values); 67 } 68 69 unittest 70 { 71 assert(format!"%s%s%s"("<", formatted!"%x"(64), ">") == "<40>"); 72 } 73 74 // ************************************************************************ 75 76 /// Consume a LF or CRLF terminated line from s. 77 /// Sets s to null and returns the remainder 78 /// if there is no line terminator in s. 79 T[] eatLine(T)(ref T[] s, bool eatIncompleteLines = true) 80 { 81 return s.skipUntil([T('\n')], eatIncompleteLines).chomp(); 82 } 83 84 deprecated template eatLine(OnEof onEof) 85 { 86 T[] eatLine(T)(ref T[] s) 87 { 88 return s.eatUntil!onEof([T('\n')]).chomp(); 89 } 90 } 91 92 unittest 93 { 94 string s = "Hello\nworld"; 95 assert(s.eatLine() == "Hello"); 96 assert(s.eatLine() == "world"); 97 assert(s is null); 98 assert(s.eatLine() is null); 99 } 100 101 // Uses memchr (not Boyer-Moore), best for short strings. 102 T[] fastReplace(T)(T[] what, T[] from, T[] to) 103 if (T.sizeof == 1) // TODO (uses memchr) 104 { 105 alias Unqual!T U; 106 107 // debug scope(failure) std.stdio.writeln("fastReplace crashed: ", [what, from, to]); 108 enum RAM = cast(U*)null; 109 110 if (what.length < from.length || from.length==0) 111 return what; 112 113 if (from.length==1) 114 { 115 auto fromc = from[0]; 116 if (to.length==1) 117 { 118 auto p = cast(T*)memchr(what.ptr, fromc, what.length); 119 if (!p) 120 return what; 121 122 T[] result = what.dup; 123 auto delta = result.ptr - what.ptr; 124 auto toChar = to[0]; 125 auto end = what.ptr + what.length; 126 do 127 { 128 (cast(U*)p)[delta] = toChar; // zomg hax lol 129 p++; 130 p = cast(T*)memchr(p, fromc, end - p); 131 } while (p); 132 return result; 133 } 134 else 135 { 136 auto p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length); 137 if (!p) 138 return what; 139 140 auto sb = StringBuilder(what.length); 141 do 142 { 143 sb.put(what[0..p-what.ptr], to); 144 what = what[p-what.ptr+1..$]; 145 p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length); 146 } 147 while (p); 148 149 sb.put(what); 150 return sb.get(); 151 } 152 } 153 154 auto head = from[0]; 155 auto tail = from[1..$]; 156 157 auto p = cast(T*)what.ptr; 158 auto end = p + what.length - tail.length; 159 p = cast(T*)memchr(p, head, end-p); 160 while (p) 161 { 162 p++; 163 if (p[0..tail.length] == tail) 164 { 165 if (from.length == to.length) 166 { 167 T[] result = what.dup; 168 auto deltaMinusOne = (result.ptr - what.ptr) - 1; 169 170 goto replaceA; 171 dummyA: // compiler complains 172 173 do 174 { 175 p++; 176 if (p[0..tail.length] == tail) 177 { 178 replaceA: 179 (cast(U*)p+deltaMinusOne)[0..to.length] = to[]; 180 } 181 p = cast(T*)memchr(p, head, end-p); 182 } 183 while (p); 184 185 return result; 186 } 187 else 188 { 189 auto start = cast(T*)what.ptr; 190 auto sb = StringBuilder(what.length); 191 goto replaceB; 192 dummyB: // compiler complains 193 194 do 195 { 196 p++; 197 if (p[0..tail.length] == tail) 198 { 199 replaceB: 200 sb.put(RAM[cast(size_t)start .. cast(size_t)p-1], to); 201 start = p + tail.length; 202 what = what[start-what.ptr..$]; 203 } 204 else 205 { 206 what = what[p-what.ptr..$]; 207 } 208 p = cast(T*)memchr(what.ptr, head, what.length); 209 } 210 while (p); 211 212 //sb.put(what); 213 sb.put(RAM[cast(size_t)start..cast(size_t)(what.ptr+what.length)]); 214 return sb.get(); 215 } 216 217 assert(0); 218 } 219 p = cast(T*)memchr(p, head, end-p); 220 } 221 222 return what; 223 } 224 225 unittest 226 { 227 import std.array; 228 void test(string haystack, string from, string to) 229 { 230 auto description = `("` ~ haystack ~ `", "` ~ from ~ `", "` ~ to ~ `")`; 231 232 auto r1 = fastReplace(haystack, from, to); 233 auto r2 = replace(haystack, from, to); 234 assert(r1 == r2, `Bad replace: ` ~ description ~ ` == "` ~ r1 ~ `"`); 235 236 if (r1 == haystack) 237 assert(r1 is haystack, `Pointless reallocation: ` ~ description); 238 } 239 240 test("Mary had a little lamb", "a", "b"); 241 test("Mary had a little lamb", "a", "aaa"); 242 test("Mary had a little lamb", "Mary", "Lucy"); 243 test("Mary had a little lamb", "Mary", "Jimmy"); 244 test("Mary had a little lamb", "lamb", "goat"); 245 test("Mary had a little lamb", "lamb", "sheep"); 246 test("Mary had a little lamb", " l", " x"); 247 test("Mary had a little lamb", " l", " xx"); 248 249 test("Mary had a little lamb", "X" , "Y" ); 250 test("Mary had a little lamb", "XX", "Y" ); 251 test("Mary had a little lamb", "X" , "YY"); 252 test("Mary had a little lamb", "XX", "YY"); 253 test("Mary had a little lamb", "aX", "Y" ); 254 test("Mary had a little lamb", "aX", "YY"); 255 256 test("foo", "foobar", "bar"); 257 } 258 259 T[][] fastSplit(T, U)(T[] s, U d) 260 if (is(Unqual!T == Unqual!U)) 261 { 262 if (!s.length) 263 return null; 264 265 auto p = cast(T*)memchr(s.ptr, d, s.length); 266 if (!p) 267 return [s]; 268 269 size_t n; 270 auto end = s.ptr + s.length; 271 do 272 { 273 n++; 274 p++; 275 p = cast(T*) memchr(p, d, end-p); 276 } 277 while (p); 278 279 auto result = new T[][n+1]; 280 n = 0; 281 auto start = s.ptr; 282 p = cast(T*) memchr(start, d, s.length); 283 do 284 { 285 result[n++] = start[0..p-start]; 286 start = ++p; 287 p = cast(T*) memchr(p, d, end-p); 288 } 289 while (p); 290 result[n] = start[0..end-start]; 291 292 return result; 293 } 294 295 T[][] splitAsciiLines(T)(T[] text) 296 if (is(Unqual!T == char)) 297 { 298 auto lines = text.fastSplit('\n'); 299 foreach (ref line; lines) 300 if (line.length && line[$-1]=='\r') 301 line = line[0..$-1]; 302 return lines; 303 } 304 305 unittest 306 { 307 assert(splitAsciiLines("a\nb\r\nc\r\rd\n\re\r\n\nf") == ["a", "b", "c\r\rd", "\re", "", "f"]); 308 assert(splitAsciiLines(string.init) == splitLines(string.init)); 309 } 310 311 /// Like std.string.split (one argument version, which splits by 312 /// whitespace), but only splits by ASCII and does not autodecode. 313 T[][] asciiSplit(T)(T[] text) 314 if (is(Unqual!T == char)) 315 { 316 bool inWhitespace = true; 317 size_t wordStart; 318 T[][] result; 319 320 void endWord(size_t p) 321 { 322 if (!inWhitespace) 323 { 324 result ~= text[wordStart..p]; 325 inWhitespace = true; 326 } 327 } 328 329 foreach (p, c; text) 330 if (std.ascii.isWhite(c)) 331 endWord(p); 332 else 333 if (inWhitespace) 334 { 335 inWhitespace = false; 336 wordStart = p; 337 } 338 endWord(text.length); 339 return result; 340 } 341 342 unittest 343 { 344 foreach (s; ["", " ", "a", " a", "a ", "a b", " a b", "a b ", " a b ", 345 " ", " a", "a ", "a b", "a b ", "a b c"]) 346 assert(s.split == s.asciiSplit, format("Got %s, expected %s", s.asciiSplit, s.split)); 347 } 348 349 T[] asciiStrip(T)(T[] s) 350 if (is(Unqual!T == char)) 351 { 352 while (s.length && isWhite(s[0])) 353 s = s[1..$]; 354 while (s.length && isWhite(s[$-1])) 355 s = s[0..$-1]; 356 return s; 357 } 358 359 unittest 360 { 361 string s = "Hello, world!"; 362 assert(asciiStrip(s) is s); 363 assert(asciiStrip("\r\n\tHello ".dup) == "Hello"); 364 } 365 366 /// Covering slice-list of s with interleaved whitespace. 367 T[][] segmentByWhitespace(T)(T[] s) 368 if (is(Unqual!T == char)) 369 { 370 if (!s.length) 371 return null; 372 373 T[][] segments; 374 bool wasWhite = isWhite(s[0]); 375 size_t start = 0; 376 foreach (p, char c; s) 377 { 378 bool isWhite = isWhite(c); 379 if (isWhite != wasWhite) 380 segments ~= s[start..p], 381 start = p; 382 wasWhite = isWhite; 383 } 384 segments ~= s[start..$]; 385 386 return segments; 387 } 388 389 T[] newlinesToSpaces(T)(T[] s) 390 if (is(Unqual!T == char)) 391 { 392 auto slices = segmentByWhitespace(s); 393 foreach (ref slice; slices) 394 if (slice.contains("\n")) 395 slice = " "; 396 return slices.join(); 397 } 398 399 ascii normalizeWhitespace(ascii s) 400 { 401 auto slices = segmentByWhitespace(strip(s)); 402 foreach (i, ref slice; slices) 403 if (i & 1) // odd 404 slice = " "; 405 return slices.join(); 406 } 407 408 unittest 409 { 410 assert(normalizeWhitespace(" Mary had\ta\nlittle\r\n\tlamb") == "Mary had a little lamb"); 411 } 412 413 string[] splitByCamelCase(string s) 414 { 415 string[] result; 416 size_t start = 0; 417 foreach (i; 1..s.length+1) 418 if (i == s.length 419 || (isLower(s[i-1]) && isUpper(s[i])) 420 || (i+1 < s.length && isUpper(s[i-1]) && isUpper(s[i]) && isLower(s[i+1])) 421 ) 422 { 423 result ~= s[start..i]; 424 start = i; 425 } 426 return result; 427 } 428 429 unittest 430 { 431 assert(splitByCamelCase("parseIPString") == ["parse", "IP", "String"]); 432 assert(splitByCamelCase("IPString") == ["IP", "String"]); 433 } 434 435 string camelCaseJoin(string[] arr) 436 { 437 if (!arr.length) 438 return null; 439 string result = arr[0]; 440 foreach (s; arr[1..$]) 441 result ~= std.ascii.toUpper(s[0]) ~ s[1..$]; 442 return result; 443 } 444 445 unittest 446 { 447 assert("parse-IP-string".split('-').camelCaseJoin() == "parseIPString"); 448 } 449 450 // ************************************************************************ 451 452 /// Like std.string.wrap, but preserves whitespace at line start and 453 /// between (non-wrapped) words. 454 string verbatimWrap( 455 string s, 456 size_t columns = 80, 457 string firstIndent = null, 458 string indent = null, 459 size_t tabWidth = 8, 460 ) 461 { 462 if (!s.length) 463 return s; 464 465 import std.uni : isWhite; 466 import std.range; 467 468 // Result buffer. Append-only (contains only text which has been wrapped). 469 string result; 470 // Index in `s` corresponding to the end of `result` 471 size_t start; 472 // Index in `s` corresponding to after the last newline in `result` 473 size_t lineStart; 474 // Current column 475 size_t col; 476 // Was the previous character we looked at whitespace? 477 bool wasWhite; 478 // We need to add an indent at the next (non-newline) character. 479 bool needIndent; 480 481 result = firstIndent; 482 col = firstIndent.walkLength; 483 auto indentWidth = indent.walkLength; 484 485 void flush(size_t pos) 486 { 487 if (col > columns && start > lineStart) 488 { 489 result ~= "\n" ~ indent; 490 col = indentWidth; 491 492 // Consume whitespace at line break 493 size_t numWhite; 494 foreach (i, c; s[start .. $]) 495 if (isWhite(c)) 496 numWhite = i; 497 else 498 break; 499 start += numWhite; 500 lineStart = start; 501 } 502 result ~= s[start .. pos]; 503 start = pos; 504 } 505 506 foreach (pos, dchar c; s) 507 { 508 auto atWhite = isWhite(c); 509 if (atWhite && !wasWhite) 510 flush(pos); 511 if (c == '\n') 512 { 513 flush(pos); 514 result ~= "\n"; 515 start++; // past newline 516 lineStart = start; 517 needIndent = true; 518 col = 0; 519 } 520 else 521 { 522 if (needIndent) 523 { 524 assert(col == 0); 525 result ~= indent; 526 col += indentWidth; 527 needIndent = false; 528 } 529 if (c == '\t') 530 col += tabWidth; 531 else 532 col++; 533 } 534 wasWhite = atWhite; 535 } 536 flush(s.length); 537 if (col) 538 result ~= "\n"; // trailing newline 539 540 return result; 541 } 542 543 // ************************************************************************ 544 545 /// Case-insensitive ASCII string. 546 alias CIAsciiString = NormalizedArray!(immutable(char), s => s.byCodeUnit.map!(std.ascii.toLower)); 547 548 /// 549 unittest 550 { 551 CIAsciiString s = "test"; 552 assert(s == "TEST"); 553 assert(s >= "Test" && s <= "Test"); 554 assert(CIAsciiString("a") == CIAsciiString("A")); 555 assert(CIAsciiString("a") != CIAsciiString("B")); 556 assert(CIAsciiString("a") < CIAsciiString("B")); 557 assert(CIAsciiString("A") < CIAsciiString("b")); 558 assert(CIAsciiString("я") != CIAsciiString("Я")); 559 } 560 561 /// Case-insensitive Unicode string. 562 alias CIUniString = NormalizedArray!(immutable(char), s => s.map!(std.uni.toLower)); 563 564 /// 565 unittest 566 { 567 CIUniString s = "привет"; 568 assert(s == "ПРИВЕТ"); 569 assert(s >= "Привет" && s <= "Привет"); 570 assert(CIUniString("я") == CIUniString("Я")); 571 assert(CIUniString("а") != CIUniString("Б")); 572 assert(CIUniString("а") < CIUniString("Б")); 573 assert(CIUniString("А") < CIUniString("б")); 574 } 575 576 // ************************************************************************ 577 578 import std.utf; 579 580 /// Convert any data to a valid UTF-8 bytestream, so D's string functions can 581 /// properly work on it. 582 string rawToUTF8(in char[] s) 583 { 584 auto d = new dchar[s.length]; 585 foreach (i, char c; s) 586 d[i] = c; 587 return toUTF8(d); 588 } 589 590 /// Undo rawToUTF8. 591 ascii UTF8ToRaw(in char[] r) pure 592 { 593 auto s = new char[r.length]; 594 size_t i = 0; 595 foreach (dchar c; r) 596 { 597 assert(c < '\u0100'); 598 s[i++] = cast(char)c; 599 } 600 return s[0..i]; 601 } 602 603 unittest 604 { 605 char[1] c; 606 for (int i=0; i<256; i++) 607 { 608 c[0] = cast(char)i; 609 assert(UTF8ToRaw(rawToUTF8(c[])) == c[], format("%s -> %s -> %s", cast(ubyte[])c[], cast(ubyte[])rawToUTF8(c[]), cast(ubyte[])UTF8ToRaw(rawToUTF8(c[])))); 610 } 611 } 612 613 /// Where a delegate with this signature is required. 614 string nullStringTransform(in char[] s) { return to!string(s); } 615 616 string forceValidUTF8(string s) 617 { 618 try 619 { 620 validate(s); 621 return s; 622 } 623 catch (UTFException) 624 return rawToUTF8(s); 625 } 626 627 // ************************************************************************ 628 629 /// Return the slice up to the first NUL character, 630 /// or of the whole array if none is found. 631 C[] fromZArray(C, n)(ref C[n] arr) 632 { 633 auto p = arr.representation.countUntil(0); 634 return arr[0 .. p<0 ? $ : p]; 635 } 636 637 /// ditto 638 C[] fromZArray(C)(C[] arr) 639 { 640 auto p = arr.representation.countUntil(0); 641 return arr[0 .. p<0 ? $ : p]; 642 } 643 644 unittest 645 { 646 char[4] arr = "ab\0d"; 647 assert(arr.fromZArray == "ab"); 648 arr[] = "abcd"; 649 assert(arr.fromZArray == "abcd"); 650 } 651 652 unittest 653 { 654 string arr = "ab\0d"; 655 assert(arr.fromZArray == "ab"); 656 arr = "abcd"; 657 assert(arr.fromZArray == "abcd"); 658 } 659 660 // ************************************************************************ 661 662 /// Formats binary data as a hex dump (three-column layout consisting of hex 663 /// offset, byte values in hex, and printable low-ASCII characters). 664 string hexDump(const(void)[] b) 665 { 666 auto data = cast(const(ubyte)[]) b; 667 assert(data.length); 668 size_t i=0; 669 string s; 670 while (i<data.length) 671 { 672 s ~= format("%08X: ", i); 673 foreach (x; 0..16) 674 { 675 if (i+x<data.length) 676 s ~= format("%02X ", data[i+x]); 677 else 678 s ~= " "; 679 if (x==7) 680 s ~= "| "; 681 } 682 s ~= " "; 683 foreach (x; 0..16) 684 { 685 if (i+x<data.length) 686 if (data[i+x]==0) 687 s ~= ' '; 688 else 689 if (data[i+x]<32 || data[i+x]>=128) 690 s ~= '.'; 691 else 692 s ~= cast(char)data[i+x]; 693 else 694 s ~= ' '; 695 } 696 s ~= "\n"; 697 i += 16; 698 } 699 return s; 700 } 701 702 import std.conv; 703 704 T fromHex(T : ulong = uint, C)(const(C)[] s) 705 { 706 T result = parse!T(s, 16); 707 enforce(s.length==0, new ConvException("Could not parse entire string")); 708 return result; 709 } 710 711 ubyte[] arrayFromHex(in char[] hex) 712 { 713 auto buf = new ubyte[hex.length/2]; 714 arrayFromHex(hex, buf); 715 return buf; 716 } 717 718 struct HexParseConfig 719 { 720 bool checked = true; 721 bool lower = true; 722 bool upper = true; 723 } 724 725 ubyte parseHexDigit(HexParseConfig config = HexParseConfig.init)(char c) 726 { 727 static assert(config.lower || config.upper, 728 "Must parse at least either lower or upper case digits"); 729 static if (config.checked) 730 { 731 switch (c) 732 { 733 case '0': .. case '9': return cast(ubyte)(c - '0'); 734 case 'a': .. case 'f': return cast(ubyte)(c - 'a' + 10); 735 case 'A': .. case 'F': return cast(ubyte)(c - 'A' + 10); 736 default: throw new Exception("Bad hex digit: " ~ c); 737 } 738 } 739 else 740 { 741 if (c <= '9') 742 return cast(ubyte)(c - '0'); 743 static if (config.lower && config.upper) 744 { 745 if (c < 'a') 746 return cast(ubyte)(c - 'A' + 10); 747 else 748 return cast(ubyte)(c - 'a' + 10); 749 } 750 else 751 static if (config.lower) 752 return cast(ubyte)(c - 'a' + 10); 753 else 754 return cast(ubyte)(c - 'A' + 10); 755 } 756 } 757 758 void arrayFromHex(HexParseConfig config = HexParseConfig.init)(in char[] hex, ubyte[] buf) 759 { 760 assert(buf.length == hex.length/2, "Wrong buffer size for arrayFromHex"); 761 for (int i=0; i<hex.length; i+=2) 762 buf[i/2] = cast(ubyte)( 763 parseHexDigit!config(hex[i ])*16 + 764 parseHexDigit!config(hex[i+1]) 765 ); 766 } 767 768 /// Fast version for static arrays of known length. 769 void sarrayFromHex(HexParseConfig config = HexParseConfig.init, size_t N, Hex)(ref const Hex hex, ref ubyte[N] buf) 770 if (is(Hex == char[N*2])) 771 { 772 foreach (i; 0..N/4) 773 { 774 ulong chars = (cast(ulong*)hex.ptr)[i]; 775 uint res = 776 (parseHexDigit!config((chars >> (8*0)) & 0xFF) << (4*1)) | 777 (parseHexDigit!config((chars >> (8*1)) & 0xFF) << (4*0)) | 778 (parseHexDigit!config((chars >> (8*2)) & 0xFF) << (4*3)) | 779 (parseHexDigit!config((chars >> (8*3)) & 0xFF) << (4*2)) | 780 (parseHexDigit!config((chars >> (8*4)) & 0xFF) << (4*5)) | 781 (parseHexDigit!config((chars >> (8*5)) & 0xFF) << (4*4)) | 782 (parseHexDigit!config((chars >> (8*6)) & 0xFF) << (4*7)) | 783 (parseHexDigit!config((chars >> (8*7)) & 0xFF) << (4*6)); 784 (cast(uint*)buf.ptr)[i] = res; 785 } 786 foreach (i; N/4*4..N) 787 buf[i] = cast(ubyte)( 788 parseHexDigit!config(hex[i*2 ])*16 + 789 parseHexDigit!config(hex[i*2+1]) 790 ); 791 } 792 793 unittest 794 { 795 foreach (checked; TypeTuple!(false, true)) 796 foreach (lower; TypeTuple!(false, true)) 797 foreach (upper; TypeTuple!(false, true)) 798 static if (lower || upper) 799 { 800 enum config = HexParseConfig(checked, lower, upper); 801 char[18] buf; 802 foreach (n; 0..18) 803 if (lower && upper ? n & 1 : upper) 804 buf[n] = hexDigits[n % 16]; 805 else 806 buf[n] = lowerHexDigits[n % 16]; 807 ubyte[9] res; 808 sarrayFromHex!config(buf, res); 809 assert(res == [0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01], text(res)); 810 } 811 } 812 813 template toHex(alias digits = hexDigits) 814 { 815 char[] toHex(in ubyte[] data, char[] buf) pure 816 { 817 assert(buf.length == data.length*2); 818 foreach (i, b; data) 819 { 820 buf[i*2 ] = digits[b>>4]; 821 buf[i*2+1] = digits[b&15]; 822 } 823 return buf; 824 } 825 826 char[n*2] toHex(size_t n)(in ubyte[n] data) pure 827 { 828 char[n*2] buf; 829 foreach (i, b; data) 830 { 831 buf[i*2 ] = digits[b>>4]; 832 buf[i*2+1] = digits[b&15]; 833 } 834 return buf; 835 } 836 837 string toHex(in ubyte[] data) pure 838 { 839 auto buf = new char[data.length*2]; 840 foreach (i, b; data) 841 { 842 buf[i*2 ] = digits[b>>4]; 843 buf[i*2+1] = digits[b&15]; 844 } 845 return buf; 846 } 847 } 848 849 alias toLowerHex = toHex!lowerHexDigits; 850 851 void toHex(T : ulong, size_t U = T.sizeof*2)(T n, ref char[U] buf) 852 { 853 Unqual!T x = n; 854 foreach (i; Reverse!(RangeTuple!(T.sizeof*2))) 855 { 856 buf[i] = hexDigits[x & 0xF]; 857 x >>= 4; 858 } 859 } 860 861 unittest 862 { 863 ubyte[] bytes = [0x12, 0x34]; 864 assert(toHex(bytes) == "1234"); 865 } 866 867 unittest 868 { 869 ubyte[] bytes = [0x12, 0x34]; 870 char[] buf = new char[4]; 871 toHex(bytes, buf); 872 assert(buf == "1234"); 873 } 874 875 unittest 876 { 877 char[8] buf; 878 toHex(0x01234567, buf); 879 assert(buf == "01234567"); 880 } 881 882 char[T.sizeof*2] toHex(T : ulong)(T n) 883 { 884 char[T.sizeof*2] buf; 885 toHex(n, buf); 886 return buf; 887 } 888 889 unittest 890 { 891 assert(toHex(0x01234567) == "01234567"); 892 } 893 894 unittest 895 { 896 ubyte[2] bytes = [0x12, 0x34]; 897 auto buf = bytes.toLowerHex(); 898 static assert(buf.length == 4); 899 assert(buf == "1234"); 900 } 901 902 /// How many significant decimal digits does a FP type have 903 /// (determined empirically - valid for all D FP types on x86/64) 904 enum significantDigits(T : real) = 2 + 2 * T.sizeof; 905 906 /// Format string for a FP type which includes all necessary 907 /// significant digits 908 enum fpFormatString(T) = "%." ~ text(significantDigits!T) ~ "g"; 909 template cWidthString(T) 910 { 911 static if (is(Unqual!T == float)) 912 enum cWidthString = ""; 913 else 914 static if (is(Unqual!T == double)) 915 enum cWidthString = "l"; 916 else 917 static if (is(Unqual!T == real)) 918 enum cWidthString = "L"; 919 } 920 enum fpCFormatString(T) = "%." ~ text(significantDigits!T) ~ cWidthString!T ~ "g"; 921 922 private auto safeSprintf(size_t N, Args...)(ref char[N] buf, auto ref Args args) @trusted @nogc 923 { 924 return snprintf(buf.ptr, N, args); 925 } 926 927 private auto fpToBuf(Q)(Q val) @safe nothrow @nogc 928 { 929 alias F = Unqual!Q; 930 931 /// Bypass FPU register, which may contain a different precision 932 static F forceType(F d) { static F n; n = d; return n; } 933 934 enum isReal = is(F == real); 935 936 StaticBuf!(char, 64) buf = void; 937 938 // MSVC workaround from std.format: 939 version (CRuntime_Microsoft) 940 { 941 import std.math : isNaN, isInfinity; 942 immutable double v = val; // convert early to get "inf" in case of overflow 943 { 944 string s; 945 if (isNaN(v)) 946 s = "nan"; // snprintf writes 1.#QNAN 947 else if (isInfinity(v)) 948 s = val >= 0 ? "inf" : "-inf"; // snprintf writes 1.#INF 949 else 950 goto L1; 951 buf.buf[0..s.length] = s; 952 buf.pos = s.length; 953 return buf; 954 L1: 955 } 956 } 957 else 958 alias v = val; 959 960 buf.pos = safeSprintf(buf.buf, &fpCFormatString!F[0], forceType(v)); 961 char[] s = buf.data(); 962 963 F parse(char[] s) 964 { 965 F f; 966 auto res = tryParse(s, f); 967 assert(res, "Failed to parse number we created"); 968 assert(!s.length, "Failed to completely parse number we created"); 969 return f; 970 } 971 972 if (s != "nan" && s != "-nan" && s != "inf" && s != "-inf") 973 { 974 if (forceType(parse(s)) != v) 975 { 976 static if (isReal) 977 { 978 // Something funny with DM libc real parsing... e.g. 0.6885036635121051783 979 return buf; 980 } 981 else 982 // assert(false, "Initial conversion fails: " ~ format(fpFormatString!F, parse(s)) ~ " / " ~ s); 983 assert(false, "Initial conversion fails"); 984 } 985 986 foreach_reverse (i; 1..s.length) 987 if (s[i]>='0' && s[i]<='8') 988 { 989 s[i]++; 990 if (forceType(parse(s[0..i+1]))==v) 991 s = s[0..i+1]; 992 else 993 s[i]--; 994 } 995 while (s.length>2 && s[$-1]!='.' && forceType(parse(s[0..$-1]))==v) 996 s = s[0..$-1]; 997 } 998 buf.pos = s.length; 999 return buf; 1000 } 1001 1002 void putFP(Writer, F)(auto ref Writer writer, F v) 1003 { 1004 writer.put(fpToBuf(v).data); 1005 } 1006 1007 1008 /// Get shortest string representation of a FP type that still converts to exactly the same number. 1009 template fpToString(F) 1010 { 1011 string fpToString(F v) @safe nothrow 1012 { 1013 return fpToBuf(v).data.idup; 1014 } 1015 1016 static if (!is(Unqual!F == real)) 1017 unittest 1018 { 1019 union U 1020 { 1021 ubyte[F.sizeof] bytes; 1022 Unqual!F d; 1023 string toString() const { return (fpFormatString!F ~ " %a [%(%02X %)]").format(d, d, bytes[]); } 1024 } 1025 import std.random : Xorshift, uniform; 1026 import std.stdio : stderr; 1027 Xorshift rng; 1028 foreach (n; 0..10000) 1029 { 1030 U u; 1031 foreach (ref b; u.bytes[]) 1032 b = uniform!ubyte(rng); 1033 static if (is(Unqual!F == real)) 1034 u.bytes[7] |= 0x80; // require normalized value 1035 scope(failure) stderr.writeln("Input:\t", u); 1036 auto s = fpToString(u.d); 1037 scope(failure) stderr.writeln("Result:\t", s); 1038 if (s == "nan" || s == "-nan") 1039 continue; // there are many NaNs... 1040 U r; 1041 r.d = to!F(s); 1042 assert(r.bytes == u.bytes, 1043 "fpToString mismatch:\nOutput:\t%s".format(r)); 1044 } 1045 } 1046 } 1047 1048 alias doubleToString = fpToString!double; 1049 1050 unittest 1051 { 1052 alias floatToString = fpToString!float; 1053 alias realToString = fpToString!real; 1054 alias crealToString = fpToString!(const(real)); 1055 } 1056 1057 /// Wraps the result of a fpToString in a non-allocating stringifiable struct. 1058 struct FPAsString(T) 1059 { 1060 typeof(fpToBuf(T.init)) buf; 1061 1062 this(T f) 1063 { 1064 buf = fpToBuf(f); 1065 } 1066 1067 string toString() const pure nothrow 1068 { 1069 return buf.data.idup; 1070 } 1071 1072 void toString(W)(ref W w) const 1073 { 1074 static if (is(typeof(w.put(buf.data)))) 1075 w.put(buf.data); 1076 else 1077 foreach (c; buf.data) 1078 w.put(c); 1079 } 1080 } 1081 FPAsString!T fpAsString(T)(T f) { return FPAsString!T(f); } /// ditto 1082 1083 @safe //nothrow @nogc 1084 unittest 1085 { 1086 StaticBuf!(char, 1024) buf; 1087 buf.formattedWrite!"%s"(fpAsString(0.1)); 1088 assert(buf.data == "0.1"); 1089 } 1090 1091 string numberToString(T)(T v) 1092 if (isNumeric!T) 1093 { 1094 static if (is(T : ulong)) 1095 return toDec(v); 1096 else 1097 return fpToString(v); 1098 } 1099 1100 // ************************************************************************ 1101 1102 /// Simpler implementation of Levenshtein string distance 1103 int stringDistance(string s, string t) 1104 { 1105 int n = cast(int)s.length; 1106 int m = cast(int)t.length; 1107 if (n == 0) return m; 1108 if (m == 0) return n; 1109 int[][] distance = new int[][](n+1, m+1); // matrix 1110 int cost=0; 1111 //init1 1112 foreach (i; 0..n+1) distance[i][0]=i; 1113 foreach (j; 0..m+1) distance[0][j]=j; 1114 //find min distance 1115 foreach (i; 1..n+1) 1116 foreach (j; 1..m+1) 1117 { 1118 cost = t[j-1] == s[i-1] ? 0 : 1; 1119 distance[i][j] = min( 1120 distance[i-1][j ] + 1, 1121 distance[i ][j-1] + 1, 1122 distance[i-1][j-1] + cost 1123 ); 1124 } 1125 return distance[n][m]; 1126 } 1127 1128 /// Return a number between 0.0 and 1.0 indicating how similar two strings are 1129 /// (1.0 if identical) 1130 float stringSimilarity(string string1, string string2) 1131 { 1132 float dis = stringDistance(string1, string2); 1133 float maxLen = string1.length; 1134 if (maxLen < string2.length) 1135 maxLen = string2.length; 1136 if (maxLen == 0) 1137 return 1; 1138 else 1139 return 1f - dis/maxLen; 1140 } 1141 1142 /// Select best match from a list of items. 1143 /// Returns -1 if none are above the threshold. 1144 sizediff_t findBestMatch(in string[] items, string target, float threshold = 0.7) 1145 { 1146 sizediff_t found = -1; 1147 float best = 0; 1148 1149 foreach (i, item; items) 1150 { 1151 float match = stringSimilarity(toLower(item),toLower(target)); 1152 if (match>threshold && match>=best) 1153 { 1154 best = match; 1155 found = i; 1156 } 1157 } 1158 1159 return found; 1160 } 1161 1162 /// Select best match from a list of items. 1163 /// Returns null if none are above the threshold. 1164 string selectBestFrom(in string[] items, string target, float threshold = 0.7) 1165 { 1166 auto index = findBestMatch(items, target, threshold); 1167 return index < 0 ? null : items[index]; 1168 } 1169 1170 // ************************************************************************ 1171 1172 string randomString()(int length=20, string chars="abcdefghijklmnopqrstuvwxyz") 1173 { 1174 import std.random; 1175 import std.range; 1176 1177 return length.iota.map!(n => chars[uniform(0, $)]).array; 1178 }