1 /** 2 * Utility code related to string and text processing. 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <vladimir@thecybershadow.net> 12 */ 13 14 module ae.utils.text; 15 16 import std.algorithm; 17 import std.ascii; 18 import std.exception; 19 import std.conv; 20 import std.format; 21 import std.string; 22 import std.traits; 23 import std.typetuple; 24 25 import core.stdc.string; 26 27 import ae.utils.array; 28 import ae.utils.meta; 29 import ae.utils.textout; 30 31 public import ae.utils.regex; 32 33 alias indexOf = std..string.indexOf; 34 35 public import ae.utils.text.ascii : ascii, DecimalSize, toDec, toDecFixed; 36 37 // ************************************************************************ 38 39 /// Convenience helper 40 bool contains(T, U)(T[] str, U[] what) 41 if (is(Unqual!T == Unqual!U)) 42 { 43 return str.indexOf(what)>=0; 44 } 45 46 /// CTFE helper 47 string formatAs(T)(auto ref T obj, string fmt) 48 { 49 return format(fmt, obj); 50 } 51 52 /// Consume a LF or CRLF terminated line from s. 53 /// Sets s to null and returns the remainder 54 /// if there is no line terminator in s. 55 T[] eatLine(T)(ref T[] s, bool eatIncompleteLines = true) 56 { 57 return s.skipUntil([T('\n')], eatIncompleteLines).chomp(); 58 } 59 60 deprecated template eatLine(OnEof onEof) 61 { 62 T[] eatLine(T)(ref T[] s) 63 { 64 return s.eatUntil!onEof([T('\n')]).chomp(); 65 } 66 } 67 68 unittest 69 { 70 string s = "Hello\nworld"; 71 assert(s.eatLine() == "Hello"); 72 assert(s.eatLine() == "world"); 73 assert(s is null); 74 assert(s.eatLine() is null); 75 } 76 77 // Uses memchr (not Boyer-Moore), best for short strings. 78 T[] fastReplace(T)(T[] what, T[] from, T[] to) 79 if (T.sizeof == 1) // TODO (uses memchr) 80 { 81 alias Unqual!T U; 82 83 // debug scope(failure) std.stdio.writeln("fastReplace crashed: ", [what, from, to]); 84 enum RAM = cast(U*)null; 85 86 if (what.length < from.length || from.length==0) 87 return what; 88 89 if (from.length==1) 90 { 91 auto fromc = from[0]; 92 if (to.length==1) 93 { 94 auto p = cast(T*)memchr(what.ptr, fromc, what.length); 95 if (!p) 96 return what; 97 98 auto result = what.dup; 99 auto delta = result.ptr - what.ptr; 100 auto toChar = to[0]; 101 auto end = what.ptr + what.length; 102 do 103 { 104 (cast(U*)p)[delta] = toChar; // zomg hax lol 105 p++; 106 p = cast(T*)memchr(p, fromc, end - p); 107 } while (p); 108 return result; 109 } 110 else 111 { 112 auto p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length); 113 if (!p) 114 return what; 115 116 auto sb = StringBuilder(what.length); 117 do 118 { 119 sb.put(what[0..p-what.ptr], to); 120 what = what[p-what.ptr+1..$]; 121 p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length); 122 } 123 while (p); 124 125 sb.put(what); 126 return sb.get(); 127 } 128 } 129 130 auto head = from[0]; 131 auto tail = from[1..$]; 132 133 auto p = cast(T*)what.ptr; 134 auto end = p + what.length - tail.length; 135 p = cast(T*)memchr(p, head, end-p); 136 while (p) 137 { 138 p++; 139 if (p[0..tail.length] == tail) 140 { 141 if (from.length == to.length) 142 { 143 auto result = what.dup; 144 auto deltaMinusOne = (result.ptr - what.ptr) - 1; 145 146 goto replaceA; 147 dummyA: // compiler complains 148 149 do 150 { 151 p++; 152 if (p[0..tail.length] == tail) 153 { 154 replaceA: 155 (cast(U*)p+deltaMinusOne)[0..to.length] = to[]; 156 } 157 p = cast(T*)memchr(p, head, end-p); 158 } 159 while (p); 160 161 return result; 162 } 163 else 164 { 165 auto start = cast(T*)what.ptr; 166 auto sb = StringBuilder(what.length); 167 goto replaceB; 168 dummyB: // compiler complains 169 170 do 171 { 172 p++; 173 if (p[0..tail.length] == tail) 174 { 175 replaceB: 176 sb.put(RAM[cast(size_t)start .. cast(size_t)p-1], to); 177 start = p + tail.length; 178 what = what[start-what.ptr..$]; 179 } 180 else 181 { 182 what = what[p-what.ptr..$]; 183 } 184 p = cast(T*)memchr(what.ptr, head, what.length); 185 } 186 while (p); 187 188 //sb.put(what); 189 sb.put(RAM[cast(size_t)start..cast(size_t)(what.ptr+what.length)]); 190 return sb.get(); 191 } 192 193 assert(0); 194 } 195 p = cast(T*)memchr(p, head, end-p); 196 } 197 198 return what; 199 } 200 201 unittest 202 { 203 import std.array; 204 void test(string haystack, string from, string to) 205 { 206 auto description = `("` ~ haystack ~ `", "` ~ from ~ `", "` ~ to ~ `")`; 207 208 auto r1 = fastReplace(haystack, from, to); 209 auto r2 = replace(haystack, from, to); 210 assert(r1 == r2, `Bad replace: ` ~ description ~ ` == "` ~ r1 ~ `"`); 211 212 if (r1 == haystack) 213 assert(r1 is haystack, `Pointless reallocation: ` ~ description); 214 } 215 216 test("Mary had a little lamb", "a", "b"); 217 test("Mary had a little lamb", "a", "aaa"); 218 test("Mary had a little lamb", "Mary", "Lucy"); 219 test("Mary had a little lamb", "Mary", "Jimmy"); 220 test("Mary had a little lamb", "lamb", "goat"); 221 test("Mary had a little lamb", "lamb", "sheep"); 222 test("Mary had a little lamb", " l", " x"); 223 test("Mary had a little lamb", " l", " xx"); 224 225 test("Mary had a little lamb", "X" , "Y" ); 226 test("Mary had a little lamb", "XX", "Y" ); 227 test("Mary had a little lamb", "X" , "YY"); 228 test("Mary had a little lamb", "XX", "YY"); 229 test("Mary had a little lamb", "aX", "Y" ); 230 test("Mary had a little lamb", "aX", "YY"); 231 232 test("foo", "foobar", "bar"); 233 } 234 235 T[][] fastSplit(T, U)(T[] s, U d) 236 if (is(Unqual!T == Unqual!U)) 237 { 238 if (!s.length) 239 return null; 240 241 auto p = cast(T*)memchr(s.ptr, d, s.length); 242 if (!p) 243 return [s]; 244 245 size_t n; 246 auto end = s.ptr + s.length; 247 do 248 { 249 n++; 250 p++; 251 p = cast(T*) memchr(p, d, end-p); 252 } 253 while (p); 254 255 auto result = new T[][n+1]; 256 n = 0; 257 auto start = s.ptr; 258 p = cast(T*) memchr(start, d, s.length); 259 do 260 { 261 result[n++] = start[0..p-start]; 262 start = ++p; 263 p = cast(T*) memchr(p, d, end-p); 264 } 265 while (p); 266 result[n] = start[0..end-start]; 267 268 return result; 269 } 270 271 T[][] splitAsciiLines(T)(T[] text) 272 if (is(Unqual!T == char)) 273 { 274 auto lines = text.fastSplit('\n'); 275 foreach (ref line; lines) 276 if (line.length && line[$-1]=='\r') 277 line = line[0..$-1]; 278 return lines; 279 } 280 281 unittest 282 { 283 assert(splitAsciiLines("a\nb\r\nc\r\rd\n\re\r\n\nf") == ["a", "b", "c\r\rd", "\re", "", "f"]); 284 assert(splitAsciiLines(string.init) == splitLines(string.init)); 285 } 286 287 /// Like std.string.split (one argument version, which splits by 288 /// whitespace), but only splits by ASCII and does not autodecode. 289 T[][] asciiSplit(T)(T[] text) 290 if (is(Unqual!T == char)) 291 { 292 bool inWhitespace = true; 293 size_t wordStart; 294 T[][] result; 295 296 void endWord(size_t p) 297 { 298 if (!inWhitespace) 299 { 300 result ~= text[wordStart..p]; 301 inWhitespace = true; 302 } 303 } 304 305 foreach (p, c; text) 306 if (std.ascii.isWhite(c)) 307 endWord(p); 308 else 309 if (inWhitespace) 310 { 311 inWhitespace = false; 312 wordStart = p; 313 } 314 endWord(text.length); 315 return result; 316 } 317 318 unittest 319 { 320 foreach (s; ["", " ", "a", " a", "a ", "a b", " a b", "a b ", " a b ", 321 " ", " a", "a ", "a b", "a b ", "a b c"]) 322 assert(s.split == s.asciiSplit, format("Got %s, expected %s", s.asciiSplit, s.split)); 323 } 324 325 T[] asciiStrip(T)(T[] s) 326 if (is(Unqual!T == char)) 327 { 328 while (s.length && isWhite(s[0])) 329 s = s[1..$]; 330 while (s.length && isWhite(s[$-1])) 331 s = s[0..$-1]; 332 return s; 333 } 334 335 unittest 336 { 337 string s = "Hello, world!"; 338 assert(asciiStrip(s) is s); 339 assert(asciiStrip("\r\n\tHello ".dup) == "Hello"); 340 } 341 342 /// Covering slice-list of s with interleaved whitespace. 343 T[][] segmentByWhitespace(T)(T[] s) 344 if (is(Unqual!T == char)) 345 { 346 if (!s.length) 347 return null; 348 349 T[][] segments; 350 bool wasWhite = isWhite(s[0]); 351 size_t start = 0; 352 foreach (p, char c; s) 353 { 354 bool isWhite = isWhite(c); 355 if (isWhite != wasWhite) 356 segments ~= s[start..p], 357 start = p; 358 wasWhite = isWhite; 359 } 360 segments ~= s[start..$]; 361 362 return segments; 363 } 364 365 T[] newlinesToSpaces(T)(T[] s) 366 if (is(Unqual!T == char)) 367 { 368 auto slices = segmentByWhitespace(s); 369 foreach (ref slice; slices) 370 if (slice.contains("\n")) 371 slice = " "; 372 return slices.join(); 373 } 374 375 ascii normalizeWhitespace(ascii s) 376 { 377 auto slices = segmentByWhitespace(strip(s)); 378 foreach (i, ref slice; slices) 379 if (i & 1) // odd 380 slice = " "; 381 return slices.join(); 382 } 383 384 unittest 385 { 386 assert(normalizeWhitespace(" Mary had\ta\nlittle\r\n\tlamb") == "Mary had a little lamb"); 387 } 388 389 string[] splitByCamelCase(string s) 390 { 391 string[] result; 392 size_t start = 0; 393 foreach (i; 1..s.length+1) 394 if (i == s.length 395 || (isLower(s[i-1]) && isUpper(s[i])) 396 || (i+1 < s.length && isUpper(s[i-1]) && isUpper(s[i]) && isLower(s[i+1])) 397 ) 398 { 399 result ~= s[start..i]; 400 start = i; 401 } 402 return result; 403 } 404 405 unittest 406 { 407 assert(splitByCamelCase("parseIPString") == ["parse", "IP", "String"]); 408 assert(splitByCamelCase("IPString") == ["IP", "String"]); 409 } 410 411 string camelCaseJoin(string[] arr) 412 { 413 if (!arr.length) 414 return null; 415 string result = arr[0]; 416 foreach (s; arr[1..$]) 417 result ~= std.ascii.toUpper(s[0]) ~ s[1..$]; 418 return result; 419 } 420 421 unittest 422 { 423 assert("parse-IP-string".split('-').camelCaseJoin() == "parseIPString"); 424 } 425 426 // ************************************************************************ 427 428 private __gshared char[256] asciiLower, asciiUpper; 429 430 shared static this() 431 { 432 foreach (c; 0..256) 433 { 434 asciiLower[c] = cast(char)std.ascii.toLower(c); 435 asciiUpper[c] = cast(char)std.ascii.toUpper(c); 436 } 437 } 438 439 void xlat(alias TABLE, T)(T[] buf) 440 { 441 foreach (ref c; buf) 442 c = TABLE[c]; 443 } 444 445 alias xlat!(asciiLower, char) asciiToLower; 446 alias xlat!(asciiUpper, char) asciiToUpper; 447 448 // ************************************************************************ 449 450 /// Case-insensitive ASCII string. 451 alias CIAsciiString = NormalizedArray!(immutable(char), s => s.byCodeUnit.map!(std.ascii.toLower)); 452 453 /// 454 unittest 455 { 456 CIAsciiString s = "test"; 457 assert(s == "TEST"); 458 assert(s >= "Test" && s <= "Test"); 459 assert(CIAsciiString("a") == CIAsciiString("A")); 460 assert(CIAsciiString("a") != CIAsciiString("B")); 461 assert(CIAsciiString("a") < CIAsciiString("B")); 462 assert(CIAsciiString("A") < CIAsciiString("b")); 463 assert(CIAsciiString("я") != CIAsciiString("Я")); 464 } 465 466 /// Case-insensitive Unicode string. 467 alias CIUniString = NormalizedArray!(immutable(char), s => s.map!(std.uni.toLower)); 468 469 /// 470 unittest 471 { 472 CIUniString s = "привет"; 473 assert(s == "ПРИВЕТ"); 474 assert(s >= "Привет" && s <= "Привет"); 475 assert(CIUniString("я") == CIUniString("Я")); 476 assert(CIUniString("а") != CIUniString("Б")); 477 assert(CIUniString("а") < CIUniString("Б")); 478 assert(CIUniString("А") < CIUniString("б")); 479 } 480 481 // ************************************************************************ 482 483 import std.utf; 484 485 /// Convert any data to a valid UTF-8 bytestream, so D's string functions can 486 /// properly work on it. 487 string rawToUTF8(in char[] s) 488 { 489 auto d = new dchar[s.length]; 490 foreach (i, char c; s) 491 d[i] = c; 492 return toUTF8(d); 493 } 494 495 /// Undo rawToUTF8. 496 ascii UTF8ToRaw(in char[] r) pure 497 { 498 auto s = new char[r.length]; 499 size_t i = 0; 500 foreach (dchar c; r) 501 { 502 assert(c < '\u0100'); 503 s[i++] = cast(char)c; 504 } 505 return s[0..i]; 506 } 507 508 unittest 509 { 510 char[1] c; 511 for (int i=0; i<256; i++) 512 { 513 c[0] = cast(char)i; 514 assert(UTF8ToRaw(rawToUTF8(c[])) == c[], format("%s -> %s -> %s", cast(ubyte[])c[], cast(ubyte[])rawToUTF8(c[]), cast(ubyte[])UTF8ToRaw(rawToUTF8(c[])))); 515 } 516 } 517 518 /// Where a delegate with this signature is required. 519 string nullStringTransform(in char[] s) { return to!string(s); } 520 521 string forceValidUTF8(string s) 522 { 523 try 524 { 525 validate(s); 526 return s; 527 } 528 catch (UTFException) 529 return rawToUTF8(s); 530 } 531 532 // ************************************************************************ 533 534 /// Return the slice up to the first NUL character, 535 /// or of the whole array if none is found. 536 C[] fromZArray(C, n)(ref C[n] arr) 537 { 538 auto p = arr.representation.countUntil(0); 539 return arr[0 .. p<0 ? $ : p]; 540 } 541 542 /// ditto 543 C[] fromZArray(C)(C[] arr) 544 { 545 auto p = arr.representation.countUntil(0); 546 return arr[0 .. p<0 ? $ : p]; 547 } 548 549 unittest 550 { 551 char[4] arr = "ab\0d"; 552 assert(arr.fromZArray == "ab"); 553 arr[] = "abcd"; 554 assert(arr.fromZArray == "abcd"); 555 } 556 557 unittest 558 { 559 string arr = "ab\0d"; 560 assert(arr.fromZArray == "ab"); 561 arr = "abcd"; 562 assert(arr.fromZArray == "abcd"); 563 } 564 565 // ************************************************************************ 566 567 /// Formats binary data as a hex dump (three-column layout consisting of hex 568 /// offset, byte values in hex, and printable low-ASCII characters). 569 string hexDump(const(void)[] b) 570 { 571 auto data = cast(const(ubyte)[]) b; 572 assert(data.length); 573 size_t i=0; 574 string s; 575 while (i<data.length) 576 { 577 s ~= format("%08X: ", i); 578 foreach (x; 0..16) 579 { 580 if (i+x<data.length) 581 s ~= format("%02X ", data[i+x]); 582 else 583 s ~= " "; 584 if (x==7) 585 s ~= "| "; 586 } 587 s ~= " "; 588 foreach (x; 0..16) 589 { 590 if (i+x<data.length) 591 if (data[i+x]==0) 592 s ~= ' '; 593 else 594 if (data[i+x]<32 || data[i+x]>=128) 595 s ~= '.'; 596 else 597 s ~= cast(char)data[i+x]; 598 else 599 s ~= ' '; 600 } 601 s ~= "\n"; 602 i += 16; 603 } 604 return s; 605 } 606 607 import std.conv; 608 609 T fromHex(T : ulong = uint, C)(const(C)[] s) 610 { 611 T result = parse!T(s, 16); 612 enforce(s.length==0, new ConvException("Could not parse entire string")); 613 return result; 614 } 615 616 ubyte[] arrayFromHex(in char[] hex) 617 { 618 auto buf = new ubyte[hex.length/2]; 619 arrayFromHex(hex, buf); 620 return buf; 621 } 622 623 struct HexParseConfig 624 { 625 bool checked = true; 626 bool lower = true; 627 bool upper = true; 628 } 629 630 ubyte parseHexDigit(HexParseConfig config = HexParseConfig.init)(char c) 631 { 632 static assert(config.lower || config.upper, 633 "Must parse at least either lower or upper case digits"); 634 static if (config.checked) 635 { 636 switch (c) 637 { 638 case '0': .. case '9': return cast(ubyte)(c - '0'); 639 case 'a': .. case 'f': return cast(ubyte)(c - 'a' + 10); 640 case 'A': .. case 'F': return cast(ubyte)(c - 'A' + 10); 641 default: throw new Exception("Bad hex digit: " ~ c); 642 } 643 } 644 else 645 { 646 if (c <= '9') 647 return cast(ubyte)(c - '0'); 648 static if (config.lower && config.upper) 649 { 650 if (c < 'a') 651 return cast(ubyte)(c - 'A' + 10); 652 else 653 return cast(ubyte)(c - 'a' + 10); 654 } 655 else 656 static if (config.lower) 657 return cast(ubyte)(c - 'a' + 10); 658 else 659 return cast(ubyte)(c - 'A' + 10); 660 } 661 } 662 663 void arrayFromHex(HexParseConfig config = HexParseConfig.init)(in char[] hex, ubyte[] buf) 664 { 665 assert(buf.length == hex.length/2, "Wrong buffer size for arrayFromHex"); 666 for (int i=0; i<hex.length; i+=2) 667 buf[i/2] = cast(ubyte)( 668 parseHexDigit!config(hex[i ])*16 + 669 parseHexDigit!config(hex[i+1]) 670 ); 671 } 672 673 /// Fast version for static arrays of known length. 674 void sarrayFromHex(HexParseConfig config = HexParseConfig.init, size_t N, Hex)(in ref Hex hex, ref ubyte[N] buf) 675 if (is(Hex == char[N*2])) 676 { 677 foreach (i; 0..N/4) 678 { 679 ulong chars = (cast(ulong*)hex.ptr)[i]; 680 uint res = 681 (parseHexDigit!config((chars >> (8*0)) & 0xFF) << (4*1)) | 682 (parseHexDigit!config((chars >> (8*1)) & 0xFF) << (4*0)) | 683 (parseHexDigit!config((chars >> (8*2)) & 0xFF) << (4*3)) | 684 (parseHexDigit!config((chars >> (8*3)) & 0xFF) << (4*2)) | 685 (parseHexDigit!config((chars >> (8*4)) & 0xFF) << (4*5)) | 686 (parseHexDigit!config((chars >> (8*5)) & 0xFF) << (4*4)) | 687 (parseHexDigit!config((chars >> (8*6)) & 0xFF) << (4*7)) | 688 (parseHexDigit!config((chars >> (8*7)) & 0xFF) << (4*6)); 689 (cast(uint*)buf.ptr)[i] = res; 690 } 691 foreach (i; N/4*4..N) 692 buf[i] = cast(ubyte)( 693 parseHexDigit!config(hex[i*2 ])*16 + 694 parseHexDigit!config(hex[i*2+1]) 695 ); 696 } 697 698 unittest 699 { 700 foreach (checked; TypeTuple!(false, true)) 701 foreach (lower; TypeTuple!(false, true)) 702 foreach (upper; TypeTuple!(false, true)) 703 static if (lower || upper) 704 { 705 enum config = HexParseConfig(checked, lower, upper); 706 char[18] buf; 707 foreach (n; 0..18) 708 if (lower && upper ? n & 1 : upper) 709 buf[n] = hexDigits[n % 16]; 710 else 711 buf[n] = lowerHexDigits[n % 16]; 712 ubyte[9] res; 713 sarrayFromHex!config(buf, res); 714 assert(res == [0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01], text(res)); 715 } 716 } 717 718 template toHex(alias digits = hexDigits) 719 { 720 char[] toHex(in ubyte[] data, char[] buf) pure 721 { 722 assert(buf.length == data.length*2); 723 foreach (i, b; data) 724 { 725 buf[i*2 ] = digits[b>>4]; 726 buf[i*2+1] = digits[b&15]; 727 } 728 return buf; 729 } 730 731 string toHex(in ubyte[] data) pure 732 { 733 auto buf = new char[data.length*2]; 734 foreach (i, b; data) 735 { 736 buf[i*2 ] = digits[b>>4]; 737 buf[i*2+1] = digits[b&15]; 738 } 739 return buf; 740 } 741 } 742 743 alias toLowerHex = toHex!lowerHexDigits; 744 745 void toHex(T : ulong, size_t U = T.sizeof*2)(T n, ref char[U] buf) 746 { 747 foreach (i; Reverse!(RangeTuple!(T.sizeof*2))) 748 { 749 buf[i] = hexDigits[n & 0xF]; 750 n >>= 4; 751 } 752 } 753 754 unittest 755 { 756 ubyte[] bytes = [0x12, 0x34]; 757 assert(toHex(bytes) == "1234"); 758 } 759 760 unittest 761 { 762 ubyte[] bytes = [0x12, 0x34]; 763 char[] buf = new char[4]; 764 toHex(bytes, buf); 765 assert(buf == "1234"); 766 } 767 768 unittest 769 { 770 char[8] buf; 771 toHex(0x01234567, buf); 772 assert(buf == "01234567"); 773 } 774 775 /// How many significant decimal digits does a FP type have 776 /// (determined empirically) 777 enum significantDigits(T : real) = 2 + 2 * T.sizeof; 778 779 /// Format string for a FP type which includes all necessary 780 /// significant digits 781 enum fpFormatString(T) = "%." ~ text(significantDigits!T) ~ "g"; 782 783 /// Get shortest string representation of a FP type that still converts to exactly the same number. 784 template fpToString(F) 785 { 786 string fpToString(F v) 787 { 788 /// Bypass FPU register, which may contain a different precision 789 static F forceType(F d) { static F n; n = d; return n; } 790 791 StaticBuf!(char, 64) buf; 792 formattedWrite(&buf, fpFormatString!F, forceType(v)); 793 char[] s = buf.data(); 794 795 if (s != "nan" && s != "-nan" && s != "inf" && s != "-inf") 796 { 797 if (forceType(to!F(s)) != v) 798 { 799 static if (is(F == real)) 800 { 801 // Something funny with DM libc real parsing... e.g. 0.6885036635121051783 802 return s.idup; 803 } 804 else 805 assert(false, "Initial conversion fails: " ~ format(fpFormatString!F, to!F(s))); 806 } 807 808 foreach_reverse (i; 1..s.length) 809 if (s[i]>='0' && s[i]<='8') 810 { 811 s[i]++; 812 if (forceType(to!F(s[0..i+1]))==v) 813 s = s[0..i+1]; 814 else 815 s[i]--; 816 } 817 while (s.length>2 && s[$-1]!='.' && forceType(to!F(s[0..$-1]))==v) 818 s = s[0..$-1]; 819 } 820 return s.idup; 821 } 822 823 static if (!is(F == real)) 824 unittest 825 { 826 union U 827 { 828 ubyte[F.sizeof] bytes; 829 F d; 830 string toString() { return (fpFormatString!F ~ " %a [%(%02X %)]").format(d, d, bytes[]); } 831 } 832 import std.random : Xorshift, uniform; 833 import std.stdio : stderr; 834 Xorshift rng; 835 foreach (n; 0..10000) 836 { 837 U u; 838 foreach (ref b; u.bytes[]) 839 b = uniform!ubyte(rng); 840 static if (is(F == real)) 841 u.bytes[7] |= 0x80; // require normalized value 842 scope(failure) stderr.writeln("Input:\t", u); 843 auto s = fpToString(u.d); 844 scope(failure) stderr.writeln("Result:\t", s); 845 if (s == "nan" || s == "-nan") 846 continue; // there are many NaNs... 847 U r; 848 r.d = to!F(s); 849 assert(r.bytes == u.bytes, 850 "fpToString mismatch:\nOutput:\t%s".format(r)); 851 } 852 } 853 } 854 855 alias doubleToString = fpToString!double; 856 857 unittest 858 { 859 alias floatToString = fpToString!float; 860 alias realToString = fpToString!real; 861 } 862 863 string numberToString(T)(T v) 864 if (isNumeric!T) 865 { 866 static if (is(T : real)) 867 return fpToString(v); 868 else 869 return toDec(v); 870 } 871 872 // ************************************************************************ 873 874 /// Simpler implementation of Levenshtein string distance 875 int stringDistance(string s, string t) 876 { 877 int n = cast(int)s.length; 878 int m = cast(int)t.length; 879 if (n == 0) return m; 880 if (m == 0) return n; 881 int[][] distance = new int[][](n+1, m+1); // matrix 882 int cost=0; 883 //init1 884 foreach (i; 0..n+1) distance[i][0]=i; 885 foreach (j; 0..m+1) distance[0][j]=j; 886 //find min distance 887 foreach (i; 1..n+1) 888 foreach (j; 1..m+1) 889 { 890 cost = t[j-1] == s[i-1] ? 0 : 1; 891 distance[i][j] = min( 892 distance[i-1][j ] + 1, 893 distance[i ][j-1] + 1, 894 distance[i-1][j-1] + cost 895 ); 896 } 897 return distance[n][m]; 898 } 899 900 /// Return a number between 0.0 and 1.0 indicating how similar two strings are 901 /// (1.0 if identical) 902 float stringSimilarity(string string1, string string2) 903 { 904 float dis = stringDistance(string1, string2); 905 float maxLen = string1.length; 906 if (maxLen < string2.length) 907 maxLen = string2.length; 908 if (maxLen == 0) 909 return 1; 910 else 911 return 1f - dis/maxLen; 912 } 913 914 /// Select best match from a list of items. 915 /// Returns -1 if none are above the threshold. 916 sizediff_t findBestMatch(in string[] items, string target, float threshold = 0.7) 917 { 918 sizediff_t found = -1; 919 float best = 0; 920 921 foreach (i, item; items) 922 { 923 float match = stringSimilarity(toLower(item),toLower(target)); 924 if (match>threshold && match>=best) 925 { 926 best = match; 927 found = i; 928 } 929 } 930 931 return found; 932 } 933 934 /// Select best match from a list of items. 935 /// Returns null if none are above the threshold. 936 string selectBestFrom(in string[] items, string target, float threshold = 0.7) 937 { 938 auto index = findBestMatch(items, target, threshold); 939 return index < 0 ? null : items[index]; 940 } 941 942 // ************************************************************************ 943 944 945 string randomString(int length=20, string chars="abcdefghijklmnopqrstuvwxyz") 946 { 947 import std.random; 948 import std.range; 949 950 return length.iota.map!(n => chars[uniform(0, $)]).array; 951 }