1 /** 2 * Utility code related to string and text processing. 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <vladimir@thecybershadow.net> 12 */ 13 14 module ae.utils.text; 15 16 import std.algorithm; 17 import std.ascii; 18 import std.exception; 19 import std.conv; 20 import std.format; 21 import std.range.primitives; 22 import std.string; 23 import std.traits; 24 import std.typetuple; 25 26 import core.stdc.stdio : snprintf, sscanf; 27 import core.stdc.string; 28 29 import ae.utils.array; 30 import ae.utils.meta; 31 import ae.utils.text.parsefp; 32 import ae.utils.textout; 33 34 alias indexOf = std..string.indexOf; 35 36 public import ae.utils.text.ascii : ascii, DecimalSize, toDec, toDecFixed, asciiToLower, asciiToUpper; 37 public import ae.utils.array : contains; 38 39 // ************************************************************************ 40 41 /// CTFE helper 42 string formatAs(T)(auto ref T obj, string fmt) 43 { 44 return format(fmt, obj); 45 } 46 47 /// Lazily formatted object 48 auto formatted(string fmt, T...)(auto ref T values) 49 { 50 static struct Formatted 51 { 52 T values; 53 54 void toString(void delegate(const(char)[]) sink) const 55 { 56 sink.formattedWrite!fmt(values); 57 } 58 59 void toString(W)(ref W writer) const 60 if (isOutputRange!(W, char)) 61 { 62 writer.formattedWrite!fmt(values); 63 } 64 } 65 return Formatted(values); 66 } 67 68 unittest 69 { 70 assert(format!"%s%s%s"("<", formatted!"%x"(64), ">") == "<40>"); 71 } 72 73 // ************************************************************************ 74 75 /// Consume a LF or CRLF terminated line from s. 76 /// Sets s to null and returns the remainder 77 /// if there is no line terminator in s. 78 T[] eatLine(T)(ref T[] s, bool eatIncompleteLines = true) 79 { 80 return s.skipUntil([T('\n')], eatIncompleteLines).chomp(); 81 } 82 83 deprecated template eatLine(OnEof onEof) 84 { 85 T[] eatLine(T)(ref T[] s) 86 { 87 return s.eatUntil!onEof([T('\n')]).chomp(); 88 } 89 } 90 91 unittest 92 { 93 string s = "Hello\nworld"; 94 assert(s.eatLine() == "Hello"); 95 assert(s.eatLine() == "world"); 96 assert(s is null); 97 assert(s.eatLine() is null); 98 } 99 100 // Uses memchr (not Boyer-Moore), best for short strings. 101 T[] fastReplace(T)(T[] what, T[] from, T[] to) 102 if (T.sizeof == 1) // TODO (uses memchr) 103 { 104 alias Unqual!T U; 105 106 // debug scope(failure) std.stdio.writeln("fastReplace crashed: ", [what, from, to]); 107 enum RAM = cast(U*)null; 108 109 if (what.length < from.length || from.length==0) 110 return what; 111 112 if (from.length==1) 113 { 114 auto fromc = from[0]; 115 if (to.length==1) 116 { 117 auto p = cast(T*)memchr(what.ptr, fromc, what.length); 118 if (!p) 119 return what; 120 121 T[] result = what.dup; 122 auto delta = result.ptr - what.ptr; 123 auto toChar = to[0]; 124 auto end = what.ptr + what.length; 125 do 126 { 127 (cast(U*)p)[delta] = toChar; // zomg hax lol 128 p++; 129 p = cast(T*)memchr(p, fromc, end - p); 130 } while (p); 131 return result; 132 } 133 else 134 { 135 auto p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length); 136 if (!p) 137 return what; 138 139 auto sb = StringBuilder(what.length); 140 do 141 { 142 sb.put(what[0..p-what.ptr], to); 143 what = what[p-what.ptr+1..$]; 144 p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length); 145 } 146 while (p); 147 148 sb.put(what); 149 return sb.get(); 150 } 151 } 152 153 auto head = from[0]; 154 auto tail = from[1..$]; 155 156 auto p = cast(T*)what.ptr; 157 auto end = p + what.length - tail.length; 158 p = cast(T*)memchr(p, head, end-p); 159 while (p) 160 { 161 p++; 162 if (p[0..tail.length] == tail) 163 { 164 if (from.length == to.length) 165 { 166 T[] result = what.dup; 167 auto deltaMinusOne = (result.ptr - what.ptr) - 1; 168 169 goto replaceA; 170 dummyA: // compiler complains 171 172 do 173 { 174 p++; 175 if (p[0..tail.length] == tail) 176 { 177 replaceA: 178 (cast(U*)p+deltaMinusOne)[0..to.length] = to[]; 179 } 180 p = cast(T*)memchr(p, head, end-p); 181 } 182 while (p); 183 184 return result; 185 } 186 else 187 { 188 auto start = cast(T*)what.ptr; 189 auto sb = StringBuilder(what.length); 190 goto replaceB; 191 dummyB: // compiler complains 192 193 do 194 { 195 p++; 196 if (p[0..tail.length] == tail) 197 { 198 replaceB: 199 sb.put(RAM[cast(size_t)start .. cast(size_t)p-1], to); 200 start = p + tail.length; 201 what = what[start-what.ptr..$]; 202 } 203 else 204 { 205 what = what[p-what.ptr..$]; 206 } 207 p = cast(T*)memchr(what.ptr, head, what.length); 208 } 209 while (p); 210 211 //sb.put(what); 212 sb.put(RAM[cast(size_t)start..cast(size_t)(what.ptr+what.length)]); 213 return sb.get(); 214 } 215 216 assert(0); 217 } 218 p = cast(T*)memchr(p, head, end-p); 219 } 220 221 return what; 222 } 223 224 unittest 225 { 226 import std.array; 227 void test(string haystack, string from, string to) 228 { 229 auto description = `("` ~ haystack ~ `", "` ~ from ~ `", "` ~ to ~ `")`; 230 231 auto r1 = fastReplace(haystack, from, to); 232 auto r2 = replace(haystack, from, to); 233 assert(r1 == r2, `Bad replace: ` ~ description ~ ` == "` ~ r1 ~ `"`); 234 235 if (r1 == haystack) 236 assert(r1 is haystack, `Pointless reallocation: ` ~ description); 237 } 238 239 test("Mary had a little lamb", "a", "b"); 240 test("Mary had a little lamb", "a", "aaa"); 241 test("Mary had a little lamb", "Mary", "Lucy"); 242 test("Mary had a little lamb", "Mary", "Jimmy"); 243 test("Mary had a little lamb", "lamb", "goat"); 244 test("Mary had a little lamb", "lamb", "sheep"); 245 test("Mary had a little lamb", " l", " x"); 246 test("Mary had a little lamb", " l", " xx"); 247 248 test("Mary had a little lamb", "X" , "Y" ); 249 test("Mary had a little lamb", "XX", "Y" ); 250 test("Mary had a little lamb", "X" , "YY"); 251 test("Mary had a little lamb", "XX", "YY"); 252 test("Mary had a little lamb", "aX", "Y" ); 253 test("Mary had a little lamb", "aX", "YY"); 254 255 test("foo", "foobar", "bar"); 256 } 257 258 T[][] fastSplit(T, U)(T[] s, U d) 259 if (is(Unqual!T == Unqual!U)) 260 { 261 if (!s.length) 262 return null; 263 264 auto p = cast(T*)memchr(s.ptr, d, s.length); 265 if (!p) 266 return [s]; 267 268 size_t n; 269 auto end = s.ptr + s.length; 270 do 271 { 272 n++; 273 p++; 274 p = cast(T*) memchr(p, d, end-p); 275 } 276 while (p); 277 278 auto result = new T[][n+1]; 279 n = 0; 280 auto start = s.ptr; 281 p = cast(T*) memchr(start, d, s.length); 282 do 283 { 284 result[n++] = start[0..p-start]; 285 start = ++p; 286 p = cast(T*) memchr(p, d, end-p); 287 } 288 while (p); 289 result[n] = start[0..end-start]; 290 291 return result; 292 } 293 294 T[][] splitAsciiLines(T)(T[] text) 295 if (is(Unqual!T == char)) 296 { 297 auto lines = text.fastSplit('\n'); 298 foreach (ref line; lines) 299 if (line.length && line[$-1]=='\r') 300 line = line[0..$-1]; 301 return lines; 302 } 303 304 unittest 305 { 306 assert(splitAsciiLines("a\nb\r\nc\r\rd\n\re\r\n\nf") == ["a", "b", "c\r\rd", "\re", "", "f"]); 307 assert(splitAsciiLines(string.init) == splitLines(string.init)); 308 } 309 310 /// Like std.string.split (one argument version, which splits by 311 /// whitespace), but only splits by ASCII and does not autodecode. 312 T[][] asciiSplit(T)(T[] text) 313 if (is(Unqual!T == char)) 314 { 315 bool inWhitespace = true; 316 size_t wordStart; 317 T[][] result; 318 319 void endWord(size_t p) 320 { 321 if (!inWhitespace) 322 { 323 result ~= text[wordStart..p]; 324 inWhitespace = true; 325 } 326 } 327 328 foreach (p, c; text) 329 if (std.ascii.isWhite(c)) 330 endWord(p); 331 else 332 if (inWhitespace) 333 { 334 inWhitespace = false; 335 wordStart = p; 336 } 337 endWord(text.length); 338 return result; 339 } 340 341 unittest 342 { 343 foreach (s; ["", " ", "a", " a", "a ", "a b", " a b", "a b ", " a b ", 344 " ", " a", "a ", "a b", "a b ", "a b c"]) 345 assert(s.split == s.asciiSplit, format("Got %s, expected %s", s.asciiSplit, s.split)); 346 } 347 348 T[] asciiStrip(T)(T[] s) 349 if (is(Unqual!T == char)) 350 { 351 while (s.length && isWhite(s[0])) 352 s = s[1..$]; 353 while (s.length && isWhite(s[$-1])) 354 s = s[0..$-1]; 355 return s; 356 } 357 358 unittest 359 { 360 string s = "Hello, world!"; 361 assert(asciiStrip(s) is s); 362 assert(asciiStrip("\r\n\tHello ".dup) == "Hello"); 363 } 364 365 /// Covering slice-list of s with interleaved whitespace. 366 T[][] segmentByWhitespace(T)(T[] s) 367 if (is(Unqual!T == char)) 368 { 369 if (!s.length) 370 return null; 371 372 T[][] segments; 373 bool wasWhite = isWhite(s[0]); 374 size_t start = 0; 375 foreach (p, char c; s) 376 { 377 bool isWhite = isWhite(c); 378 if (isWhite != wasWhite) 379 segments ~= s[start..p], 380 start = p; 381 wasWhite = isWhite; 382 } 383 segments ~= s[start..$]; 384 385 return segments; 386 } 387 388 T[] newlinesToSpaces(T)(T[] s) 389 if (is(Unqual!T == char)) 390 { 391 auto slices = segmentByWhitespace(s); 392 foreach (ref slice; slices) 393 if (slice.contains("\n")) 394 slice = " "; 395 return slices.join(); 396 } 397 398 ascii normalizeWhitespace(ascii s) 399 { 400 auto slices = segmentByWhitespace(strip(s)); 401 foreach (i, ref slice; slices) 402 if (i & 1) // odd 403 slice = " "; 404 return slices.join(); 405 } 406 407 unittest 408 { 409 assert(normalizeWhitespace(" Mary had\ta\nlittle\r\n\tlamb") == "Mary had a little lamb"); 410 } 411 412 string[] splitByCamelCase(string s) 413 { 414 string[] result; 415 size_t start = 0; 416 foreach (i; 1..s.length+1) 417 if (i == s.length 418 || (isLower(s[i-1]) && isUpper(s[i])) 419 || (i+1 < s.length && isUpper(s[i-1]) && isUpper(s[i]) && isLower(s[i+1])) 420 ) 421 { 422 result ~= s[start..i]; 423 start = i; 424 } 425 return result; 426 } 427 428 unittest 429 { 430 assert(splitByCamelCase("parseIPString") == ["parse", "IP", "String"]); 431 assert(splitByCamelCase("IPString") == ["IP", "String"]); 432 } 433 434 string camelCaseJoin(string[] arr) 435 { 436 if (!arr.length) 437 return null; 438 string result = arr[0]; 439 foreach (s; arr[1..$]) 440 result ~= std.ascii.toUpper(s[0]) ~ s[1..$]; 441 return result; 442 } 443 444 unittest 445 { 446 assert("parse-IP-string".split('-').camelCaseJoin() == "parseIPString"); 447 } 448 449 // ************************************************************************ 450 451 /// Like std.string.wrap, but preserves whitespace at line start and 452 /// between (non-wrapped) words. 453 string verbatimWrap( 454 string s, 455 size_t columns = 80, 456 string firstIndent = null, 457 string indent = null, 458 size_t tabWidth = 8, 459 ) 460 { 461 if (!s.length) 462 return s; 463 464 import std.uni : isWhite; 465 import std.range; 466 467 // Result buffer. Append-only (contains only text which has been wrapped). 468 string result; 469 // Index in `s` corresponding to the end of `result` 470 size_t start; 471 // Index in `s` corresponding to after the last newline in `result` 472 size_t lineStart; 473 // Current column 474 size_t col; 475 // Was the previous character we looked at whitespace? 476 bool wasWhite; 477 // We need to add an indent at the next (non-newline) character. 478 bool needIndent; 479 480 result = firstIndent; 481 col = firstIndent.walkLength; 482 auto indentWidth = indent.walkLength; 483 484 void flush(size_t pos) 485 { 486 if (col > columns && start > lineStart) 487 { 488 result ~= "\n" ~ indent; 489 col = indentWidth; 490 491 // Consume whitespace at line break 492 size_t numWhite; 493 foreach (i, c; s[start .. $]) 494 if (isWhite(c)) 495 numWhite = i; 496 else 497 break; 498 start += numWhite; 499 lineStart = start; 500 } 501 result ~= s[start .. pos]; 502 start = pos; 503 } 504 505 foreach (pos, dchar c; s) 506 { 507 auto atWhite = isWhite(c); 508 if (atWhite && !wasWhite) 509 flush(pos); 510 if (c == '\n') 511 { 512 flush(pos); 513 result ~= "\n"; 514 start++; // past newline 515 lineStart = start; 516 needIndent = true; 517 col = 0; 518 } 519 else 520 { 521 if (needIndent) 522 { 523 assert(col == 0); 524 result ~= indent; 525 col += indentWidth; 526 needIndent = false; 527 } 528 if (c == '\t') 529 col += tabWidth; 530 else 531 col++; 532 } 533 wasWhite = atWhite; 534 } 535 flush(s.length); 536 if (col) 537 result ~= "\n"; // trailing newline 538 539 return result; 540 } 541 542 // ************************************************************************ 543 544 /// Case-insensitive ASCII string. 545 alias CIAsciiString = NormalizedArray!(immutable(char), s => s.byCodeUnit.map!(std.ascii.toLower)); 546 547 /// 548 unittest 549 { 550 CIAsciiString s = "test"; 551 assert(s == "TEST"); 552 assert(s >= "Test" && s <= "Test"); 553 assert(CIAsciiString("a") == CIAsciiString("A")); 554 assert(CIAsciiString("a") != CIAsciiString("B")); 555 assert(CIAsciiString("a") < CIAsciiString("B")); 556 assert(CIAsciiString("A") < CIAsciiString("b")); 557 assert(CIAsciiString("я") != CIAsciiString("Я")); 558 } 559 560 /// Case-insensitive Unicode string. 561 alias CIUniString = NormalizedArray!(immutable(char), s => s.map!(std.uni.toLower)); 562 563 /// 564 unittest 565 { 566 CIUniString s = "привет"; 567 assert(s == "ПРИВЕТ"); 568 assert(s >= "Привет" && s <= "Привет"); 569 assert(CIUniString("я") == CIUniString("Я")); 570 assert(CIUniString("а") != CIUniString("Б")); 571 assert(CIUniString("а") < CIUniString("Б")); 572 assert(CIUniString("А") < CIUniString("б")); 573 } 574 575 // ************************************************************************ 576 577 import std.utf; 578 579 /// Convert any data to a valid UTF-8 bytestream, so D's string functions can 580 /// properly work on it. 581 string rawToUTF8(in char[] s) 582 { 583 auto d = new dchar[s.length]; 584 foreach (i, char c; s) 585 d[i] = c; 586 return toUTF8(d); 587 } 588 589 /// Undo rawToUTF8. 590 ascii UTF8ToRaw(in char[] r) pure 591 { 592 auto s = new char[r.length]; 593 size_t i = 0; 594 foreach (dchar c; r) 595 { 596 assert(c < '\u0100'); 597 s[i++] = cast(char)c; 598 } 599 return s[0..i]; 600 } 601 602 unittest 603 { 604 char[1] c; 605 for (int i=0; i<256; i++) 606 { 607 c[0] = cast(char)i; 608 assert(UTF8ToRaw(rawToUTF8(c[])) == c[], format("%s -> %s -> %s", cast(ubyte[])c[], cast(ubyte[])rawToUTF8(c[]), cast(ubyte[])UTF8ToRaw(rawToUTF8(c[])))); 609 } 610 } 611 612 /// Where a delegate with this signature is required. 613 string nullStringTransform(in char[] s) { return to!string(s); } 614 615 string forceValidUTF8(string s) 616 { 617 try 618 { 619 validate(s); 620 return s; 621 } 622 catch (UTFException) 623 return rawToUTF8(s); 624 } 625 626 // ************************************************************************ 627 628 /// Return the slice up to the first NUL character, 629 /// or of the whole array if none is found. 630 C[] fromZArray(C, n)(ref C[n] arr) 631 { 632 auto p = arr.representation.countUntil(0); 633 return arr[0 .. p<0 ? $ : p]; 634 } 635 636 /// ditto 637 C[] fromZArray(C)(C[] arr) 638 { 639 auto p = arr.representation.countUntil(0); 640 return arr[0 .. p<0 ? $ : p]; 641 } 642 643 unittest 644 { 645 char[4] arr = "ab\0d"; 646 assert(arr.fromZArray == "ab"); 647 arr[] = "abcd"; 648 assert(arr.fromZArray == "abcd"); 649 } 650 651 unittest 652 { 653 string arr = "ab\0d"; 654 assert(arr.fromZArray == "ab"); 655 arr = "abcd"; 656 assert(arr.fromZArray == "abcd"); 657 } 658 659 // ************************************************************************ 660 661 /// Formats binary data as a hex dump (three-column layout consisting of hex 662 /// offset, byte values in hex, and printable low-ASCII characters). 663 string hexDump(const(void)[] b) 664 { 665 auto data = cast(const(ubyte)[]) b; 666 assert(data.length); 667 size_t i=0; 668 string s; 669 while (i<data.length) 670 { 671 s ~= format("%08X: ", i); 672 foreach (x; 0..16) 673 { 674 if (i+x<data.length) 675 s ~= format("%02X ", data[i+x]); 676 else 677 s ~= " "; 678 if (x==7) 679 s ~= "| "; 680 } 681 s ~= " "; 682 foreach (x; 0..16) 683 { 684 if (i+x<data.length) 685 if (data[i+x]==0) 686 s ~= ' '; 687 else 688 if (data[i+x]<32 || data[i+x]>=128) 689 s ~= '.'; 690 else 691 s ~= cast(char)data[i+x]; 692 else 693 s ~= ' '; 694 } 695 s ~= "\n"; 696 i += 16; 697 } 698 return s; 699 } 700 701 import std.conv; 702 703 T fromHex(T : ulong = uint, C)(const(C)[] s) 704 { 705 T result = parse!T(s, 16); 706 enforce(s.length==0, new ConvException("Could not parse entire string")); 707 return result; 708 } 709 710 ubyte[] arrayFromHex(in char[] hex) 711 { 712 auto buf = new ubyte[hex.length/2]; 713 arrayFromHex(hex, buf); 714 return buf; 715 } 716 717 struct HexParseConfig 718 { 719 bool checked = true; 720 bool lower = true; 721 bool upper = true; 722 } 723 724 ubyte parseHexDigit(HexParseConfig config = HexParseConfig.init)(char c) 725 { 726 static assert(config.lower || config.upper, 727 "Must parse at least either lower or upper case digits"); 728 static if (config.checked) 729 { 730 switch (c) 731 { 732 case '0': .. case '9': return cast(ubyte)(c - '0'); 733 case 'a': .. case 'f': return cast(ubyte)(c - 'a' + 10); 734 case 'A': .. case 'F': return cast(ubyte)(c - 'A' + 10); 735 default: throw new Exception("Bad hex digit: " ~ c); 736 } 737 } 738 else 739 { 740 if (c <= '9') 741 return cast(ubyte)(c - '0'); 742 static if (config.lower && config.upper) 743 { 744 if (c < 'a') 745 return cast(ubyte)(c - 'A' + 10); 746 else 747 return cast(ubyte)(c - 'a' + 10); 748 } 749 else 750 static if (config.lower) 751 return cast(ubyte)(c - 'a' + 10); 752 else 753 return cast(ubyte)(c - 'A' + 10); 754 } 755 } 756 757 void arrayFromHex(HexParseConfig config = HexParseConfig.init)(in char[] hex, ubyte[] buf) 758 { 759 assert(buf.length == hex.length/2, "Wrong buffer size for arrayFromHex"); 760 for (int i=0; i<hex.length; i+=2) 761 buf[i/2] = cast(ubyte)( 762 parseHexDigit!config(hex[i ])*16 + 763 parseHexDigit!config(hex[i+1]) 764 ); 765 } 766 767 /// Fast version for static arrays of known length. 768 void sarrayFromHex(HexParseConfig config = HexParseConfig.init, size_t N, Hex)(in ref Hex hex, ref ubyte[N] buf) 769 if (is(Hex == char[N*2])) 770 { 771 foreach (i; 0..N/4) 772 { 773 ulong chars = (cast(ulong*)hex.ptr)[i]; 774 uint res = 775 (parseHexDigit!config((chars >> (8*0)) & 0xFF) << (4*1)) | 776 (parseHexDigit!config((chars >> (8*1)) & 0xFF) << (4*0)) | 777 (parseHexDigit!config((chars >> (8*2)) & 0xFF) << (4*3)) | 778 (parseHexDigit!config((chars >> (8*3)) & 0xFF) << (4*2)) | 779 (parseHexDigit!config((chars >> (8*4)) & 0xFF) << (4*5)) | 780 (parseHexDigit!config((chars >> (8*5)) & 0xFF) << (4*4)) | 781 (parseHexDigit!config((chars >> (8*6)) & 0xFF) << (4*7)) | 782 (parseHexDigit!config((chars >> (8*7)) & 0xFF) << (4*6)); 783 (cast(uint*)buf.ptr)[i] = res; 784 } 785 foreach (i; N/4*4..N) 786 buf[i] = cast(ubyte)( 787 parseHexDigit!config(hex[i*2 ])*16 + 788 parseHexDigit!config(hex[i*2+1]) 789 ); 790 } 791 792 unittest 793 { 794 foreach (checked; TypeTuple!(false, true)) 795 foreach (lower; TypeTuple!(false, true)) 796 foreach (upper; TypeTuple!(false, true)) 797 static if (lower || upper) 798 { 799 enum config = HexParseConfig(checked, lower, upper); 800 char[18] buf; 801 foreach (n; 0..18) 802 if (lower && upper ? n & 1 : upper) 803 buf[n] = hexDigits[n % 16]; 804 else 805 buf[n] = lowerHexDigits[n % 16]; 806 ubyte[9] res; 807 sarrayFromHex!config(buf, res); 808 assert(res == [0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01], text(res)); 809 } 810 } 811 812 template toHex(alias digits = hexDigits) 813 { 814 char[] toHex(in ubyte[] data, char[] buf) pure 815 { 816 assert(buf.length == data.length*2); 817 foreach (i, b; data) 818 { 819 buf[i*2 ] = digits[b>>4]; 820 buf[i*2+1] = digits[b&15]; 821 } 822 return buf; 823 } 824 825 char[n*2] toHex(size_t n)(in ubyte[n] data) pure 826 { 827 char[n*2] buf; 828 foreach (i, b; data) 829 { 830 buf[i*2 ] = digits[b>>4]; 831 buf[i*2+1] = digits[b&15]; 832 } 833 return buf; 834 } 835 836 string toHex(in ubyte[] data) pure 837 { 838 auto buf = new char[data.length*2]; 839 foreach (i, b; data) 840 { 841 buf[i*2 ] = digits[b>>4]; 842 buf[i*2+1] = digits[b&15]; 843 } 844 return buf; 845 } 846 } 847 848 alias toLowerHex = toHex!lowerHexDigits; 849 850 void toHex(T : ulong, size_t U = T.sizeof*2)(T n, ref char[U] buf) 851 { 852 Unqual!T x = n; 853 foreach (i; Reverse!(RangeTuple!(T.sizeof*2))) 854 { 855 buf[i] = hexDigits[x & 0xF]; 856 x >>= 4; 857 } 858 } 859 860 unittest 861 { 862 ubyte[] bytes = [0x12, 0x34]; 863 assert(toHex(bytes) == "1234"); 864 } 865 866 unittest 867 { 868 ubyte[] bytes = [0x12, 0x34]; 869 char[] buf = new char[4]; 870 toHex(bytes, buf); 871 assert(buf == "1234"); 872 } 873 874 unittest 875 { 876 char[8] buf; 877 toHex(0x01234567, buf); 878 assert(buf == "01234567"); 879 } 880 881 char[T.sizeof*2] toHex(T : ulong)(T n) 882 { 883 char[T.sizeof*2] buf; 884 toHex(n, buf); 885 return buf; 886 } 887 888 unittest 889 { 890 assert(toHex(0x01234567) == "01234567"); 891 } 892 893 unittest 894 { 895 ubyte[2] bytes = [0x12, 0x34]; 896 auto buf = bytes.toLowerHex(); 897 static assert(buf.length == 4); 898 assert(buf == "1234"); 899 } 900 901 /// How many significant decimal digits does a FP type have 902 /// (determined empirically - valid for all D FP types on x86/64) 903 enum significantDigits(T : real) = 2 + 2 * T.sizeof; 904 905 /// Format string for a FP type which includes all necessary 906 /// significant digits 907 enum fpFormatString(T) = "%." ~ text(significantDigits!T) ~ "g"; 908 template cWidthString(T) 909 { 910 static if (is(Unqual!T == float)) 911 enum cWidthString = ""; 912 else 913 static if (is(Unqual!T == double)) 914 enum cWidthString = "l"; 915 else 916 static if (is(Unqual!T == real)) 917 enum cWidthString = "L"; 918 } 919 enum fpCFormatString(T) = "%." ~ text(significantDigits!T) ~ cWidthString!T ~ "g"; 920 921 private auto safeSprintf(size_t N, Args...)(ref char[N] buf, auto ref Args args) @trusted @nogc 922 { 923 return snprintf(buf.ptr, N, args); 924 } 925 926 private auto fpToBuf(Q)(Q val) @safe nothrow @nogc 927 { 928 alias F = Unqual!Q; 929 930 /// Bypass FPU register, which may contain a different precision 931 static F forceType(F d) { static F n; n = d; return n; } 932 933 enum isReal = is(F == real); 934 935 StaticBuf!(char, 64) buf = void; 936 937 // MSVC workaround from std.format: 938 version (CRuntime_Microsoft) 939 { 940 import std.math : isNaN, isInfinity; 941 immutable double v = val; // convert early to get "inf" in case of overflow 942 { 943 string s; 944 if (isNaN(v)) 945 s = "nan"; // snprintf writes 1.#QNAN 946 else if (isInfinity(v)) 947 s = val >= 0 ? "inf" : "-inf"; // snprintf writes 1.#INF 948 else 949 goto L1; 950 buf.buf[0..s.length] = s; 951 buf.pos = s.length; 952 return buf; 953 L1: 954 } 955 } 956 else 957 alias v = val; 958 959 buf.pos = safeSprintf(buf.buf, &fpCFormatString!F[0], forceType(v)); 960 char[] s = buf.data(); 961 962 F parse(char[] s) 963 { 964 F f; 965 auto res = tryParse(s, f); 966 assert(res, "Failed to parse number we created"); 967 assert(!s.length, "Failed to completely parse number we created"); 968 return f; 969 } 970 971 if (s != "nan" && s != "-nan" && s != "inf" && s != "-inf") 972 { 973 if (forceType(parse(s)) != v) 974 { 975 static if (isReal) 976 { 977 // Something funny with DM libc real parsing... e.g. 0.6885036635121051783 978 return buf; 979 } 980 else 981 // assert(false, "Initial conversion fails: " ~ format(fpFormatString!F, parse(s)) ~ " / " ~ s); 982 assert(false, "Initial conversion fails"); 983 } 984 985 foreach_reverse (i; 1..s.length) 986 if (s[i]>='0' && s[i]<='8') 987 { 988 s[i]++; 989 if (forceType(parse(s[0..i+1]))==v) 990 s = s[0..i+1]; 991 else 992 s[i]--; 993 } 994 while (s.length>2 && s[$-1]!='.' && forceType(parse(s[0..$-1]))==v) 995 s = s[0..$-1]; 996 } 997 buf.pos = s.length; 998 return buf; 999 } 1000 1001 void putFP(Writer, F)(auto ref Writer writer, F v) 1002 { 1003 writer.put(fpToBuf(v).data); 1004 } 1005 1006 1007 /// Get shortest string representation of a FP type that still converts to exactly the same number. 1008 template fpToString(F) 1009 { 1010 string fpToString(F v) @safe nothrow 1011 { 1012 return fpToBuf(v).data.idup; 1013 } 1014 1015 static if (!is(Unqual!F == real)) 1016 unittest 1017 { 1018 union U 1019 { 1020 ubyte[F.sizeof] bytes; 1021 Unqual!F d; 1022 string toString() const { return (fpFormatString!F ~ " %a [%(%02X %)]").format(d, d, bytes[]); } 1023 } 1024 import std.random : Xorshift, uniform; 1025 import std.stdio : stderr; 1026 Xorshift rng; 1027 foreach (n; 0..10000) 1028 { 1029 U u; 1030 foreach (ref b; u.bytes[]) 1031 b = uniform!ubyte(rng); 1032 static if (is(Unqual!F == real)) 1033 u.bytes[7] |= 0x80; // require normalized value 1034 scope(failure) stderr.writeln("Input:\t", u); 1035 auto s = fpToString(u.d); 1036 scope(failure) stderr.writeln("Result:\t", s); 1037 if (s == "nan" || s == "-nan") 1038 continue; // there are many NaNs... 1039 U r; 1040 r.d = to!F(s); 1041 assert(r.bytes == u.bytes, 1042 "fpToString mismatch:\nOutput:\t%s".format(r)); 1043 } 1044 } 1045 } 1046 1047 alias doubleToString = fpToString!double; 1048 1049 unittest 1050 { 1051 alias floatToString = fpToString!float; 1052 alias realToString = fpToString!real; 1053 alias crealToString = fpToString!(const(real)); 1054 } 1055 1056 /// Wraps the result of a fpToString in a non-allocating stringifiable struct. 1057 struct FPAsString(T) 1058 { 1059 typeof(fpToBuf(T.init)) buf; 1060 1061 this(T f) 1062 { 1063 buf = fpToBuf(f); 1064 } 1065 1066 string toString() const pure nothrow 1067 { 1068 return buf.data.idup; 1069 } 1070 1071 void toString(W)(ref W w) const 1072 { 1073 static if (is(typeof(w.put(buf.data)))) 1074 w.put(buf.data); 1075 else 1076 foreach (c; buf.data) 1077 w.put(c); 1078 } 1079 } 1080 FPAsString!T fpAsString(T)(T f) { return FPAsString!T(f); } /// ditto 1081 1082 @safe //nothrow @nogc 1083 unittest 1084 { 1085 StaticBuf!(char, 1024) buf; 1086 buf.formattedWrite!"%s"(fpAsString(0.1)); 1087 assert(buf.data == "0.1"); 1088 } 1089 1090 string numberToString(T)(T v) 1091 if (isNumeric!T) 1092 { 1093 static if (is(T : ulong)) 1094 return toDec(v); 1095 else 1096 return fpToString(v); 1097 } 1098 1099 // ************************************************************************ 1100 1101 /// Simpler implementation of Levenshtein string distance 1102 int stringDistance(string s, string t) 1103 { 1104 int n = cast(int)s.length; 1105 int m = cast(int)t.length; 1106 if (n == 0) return m; 1107 if (m == 0) return n; 1108 int[][] distance = new int[][](n+1, m+1); // matrix 1109 int cost=0; 1110 //init1 1111 foreach (i; 0..n+1) distance[i][0]=i; 1112 foreach (j; 0..m+1) distance[0][j]=j; 1113 //find min distance 1114 foreach (i; 1..n+1) 1115 foreach (j; 1..m+1) 1116 { 1117 cost = t[j-1] == s[i-1] ? 0 : 1; 1118 distance[i][j] = min( 1119 distance[i-1][j ] + 1, 1120 distance[i ][j-1] + 1, 1121 distance[i-1][j-1] + cost 1122 ); 1123 } 1124 return distance[n][m]; 1125 } 1126 1127 /// Return a number between 0.0 and 1.0 indicating how similar two strings are 1128 /// (1.0 if identical) 1129 float stringSimilarity(string string1, string string2) 1130 { 1131 float dis = stringDistance(string1, string2); 1132 float maxLen = string1.length; 1133 if (maxLen < string2.length) 1134 maxLen = string2.length; 1135 if (maxLen == 0) 1136 return 1; 1137 else 1138 return 1f - dis/maxLen; 1139 } 1140 1141 /// Select best match from a list of items. 1142 /// Returns -1 if none are above the threshold. 1143 sizediff_t findBestMatch(in string[] items, string target, float threshold = 0.7) 1144 { 1145 sizediff_t found = -1; 1146 float best = 0; 1147 1148 foreach (i, item; items) 1149 { 1150 float match = stringSimilarity(toLower(item),toLower(target)); 1151 if (match>threshold && match>=best) 1152 { 1153 best = match; 1154 found = i; 1155 } 1156 } 1157 1158 return found; 1159 } 1160 1161 /// Select best match from a list of items. 1162 /// Returns null if none are above the threshold. 1163 string selectBestFrom(in string[] items, string target, float threshold = 0.7) 1164 { 1165 auto index = findBestMatch(items, target, threshold); 1166 return index < 0 ? null : items[index]; 1167 } 1168 1169 // ************************************************************************ 1170 1171 string randomString()(int length=20, string chars="abcdefghijklmnopqrstuvwxyz") 1172 { 1173 import std.random; 1174 import std.range; 1175 1176 return length.iota.map!(n => chars[uniform(0, $)]).array; 1177 }