1 /** 2 * Utility code related to string and text processing. 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <vladimir@thecybershadow.net> 12 */ 13 14 module ae.utils.text; 15 16 import std.algorithm; 17 import std.ascii; 18 import std.exception; 19 import std.conv; 20 import std.format; 21 import std.string; 22 import std.traits; 23 import std.typetuple; 24 25 import core.stdc.string; 26 27 import ae.utils.array; 28 import ae.utils.meta; 29 import ae.utils.textout; 30 31 alias indexOf = std..string.indexOf; 32 33 public import ae.utils.text.ascii : ascii, DecimalSize, toDec, toDecFixed; 34 35 // ************************************************************************ 36 37 /// Convenience helper 38 bool contains(T, U)(T[] str, U[] what) 39 if (is(Unqual!T == Unqual!U)) 40 { 41 return str.indexOf(what)>=0; 42 } 43 44 /// CTFE helper 45 string formatAs(T)(auto ref T obj, string fmt) 46 { 47 return format(fmt, obj); 48 } 49 50 /// Consume a LF or CRLF terminated line from s. 51 /// Sets s to null and returns the remainder 52 /// if there is no line terminator in s. 53 T[] eatLine(T)(ref T[] s, bool eatIncompleteLines = true) 54 { 55 return s.skipUntil([T('\n')], eatIncompleteLines).chomp(); 56 } 57 58 deprecated template eatLine(OnEof onEof) 59 { 60 T[] eatLine(T)(ref T[] s) 61 { 62 return s.eatUntil!onEof([T('\n')]).chomp(); 63 } 64 } 65 66 unittest 67 { 68 string s = "Hello\nworld"; 69 assert(s.eatLine() == "Hello"); 70 assert(s.eatLine() == "world"); 71 assert(s is null); 72 assert(s.eatLine() is null); 73 } 74 75 // Uses memchr (not Boyer-Moore), best for short strings. 76 T[] fastReplace(T)(T[] what, T[] from, T[] to) 77 if (T.sizeof == 1) // TODO (uses memchr) 78 { 79 alias Unqual!T U; 80 81 // debug scope(failure) std.stdio.writeln("fastReplace crashed: ", [what, from, to]); 82 enum RAM = cast(U*)null; 83 84 if (what.length < from.length || from.length==0) 85 return what; 86 87 if (from.length==1) 88 { 89 auto fromc = from[0]; 90 if (to.length==1) 91 { 92 auto p = cast(T*)memchr(what.ptr, fromc, what.length); 93 if (!p) 94 return what; 95 96 auto result = what.dup; 97 auto delta = result.ptr - what.ptr; 98 auto toChar = to[0]; 99 auto end = what.ptr + what.length; 100 do 101 { 102 (cast(U*)p)[delta] = toChar; // zomg hax lol 103 p++; 104 p = cast(T*)memchr(p, fromc, end - p); 105 } while (p); 106 return result; 107 } 108 else 109 { 110 auto p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length); 111 if (!p) 112 return what; 113 114 auto sb = StringBuilder(what.length); 115 do 116 { 117 sb.put(what[0..p-what.ptr], to); 118 what = what[p-what.ptr+1..$]; 119 p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length); 120 } 121 while (p); 122 123 sb.put(what); 124 return sb.get(); 125 } 126 } 127 128 auto head = from[0]; 129 auto tail = from[1..$]; 130 131 auto p = cast(T*)what.ptr; 132 auto end = p + what.length - tail.length; 133 p = cast(T*)memchr(p, head, end-p); 134 while (p) 135 { 136 p++; 137 if (p[0..tail.length] == tail) 138 { 139 if (from.length == to.length) 140 { 141 auto result = what.dup; 142 auto deltaMinusOne = (result.ptr - what.ptr) - 1; 143 144 goto replaceA; 145 dummyA: // compiler complains 146 147 do 148 { 149 p++; 150 if (p[0..tail.length] == tail) 151 { 152 replaceA: 153 (cast(U*)p+deltaMinusOne)[0..to.length] = to[]; 154 } 155 p = cast(T*)memchr(p, head, end-p); 156 } 157 while (p); 158 159 return result; 160 } 161 else 162 { 163 auto start = cast(T*)what.ptr; 164 auto sb = StringBuilder(what.length); 165 goto replaceB; 166 dummyB: // compiler complains 167 168 do 169 { 170 p++; 171 if (p[0..tail.length] == tail) 172 { 173 replaceB: 174 sb.put(RAM[cast(size_t)start .. cast(size_t)p-1], to); 175 start = p + tail.length; 176 what = what[start-what.ptr..$]; 177 } 178 else 179 { 180 what = what[p-what.ptr..$]; 181 } 182 p = cast(T*)memchr(what.ptr, head, what.length); 183 } 184 while (p); 185 186 //sb.put(what); 187 sb.put(RAM[cast(size_t)start..cast(size_t)(what.ptr+what.length)]); 188 return sb.get(); 189 } 190 191 assert(0); 192 } 193 p = cast(T*)memchr(p, head, end-p); 194 } 195 196 return what; 197 } 198 199 unittest 200 { 201 import std.array; 202 void test(string haystack, string from, string to) 203 { 204 auto description = `("` ~ haystack ~ `", "` ~ from ~ `", "` ~ to ~ `")`; 205 206 auto r1 = fastReplace(haystack, from, to); 207 auto r2 = replace(haystack, from, to); 208 assert(r1 == r2, `Bad replace: ` ~ description ~ ` == "` ~ r1 ~ `"`); 209 210 if (r1 == haystack) 211 assert(r1 is haystack, `Pointless reallocation: ` ~ description); 212 } 213 214 test("Mary had a little lamb", "a", "b"); 215 test("Mary had a little lamb", "a", "aaa"); 216 test("Mary had a little lamb", "Mary", "Lucy"); 217 test("Mary had a little lamb", "Mary", "Jimmy"); 218 test("Mary had a little lamb", "lamb", "goat"); 219 test("Mary had a little lamb", "lamb", "sheep"); 220 test("Mary had a little lamb", " l", " x"); 221 test("Mary had a little lamb", " l", " xx"); 222 223 test("Mary had a little lamb", "X" , "Y" ); 224 test("Mary had a little lamb", "XX", "Y" ); 225 test("Mary had a little lamb", "X" , "YY"); 226 test("Mary had a little lamb", "XX", "YY"); 227 test("Mary had a little lamb", "aX", "Y" ); 228 test("Mary had a little lamb", "aX", "YY"); 229 230 test("foo", "foobar", "bar"); 231 } 232 233 T[][] fastSplit(T, U)(T[] s, U d) 234 if (is(Unqual!T == Unqual!U)) 235 { 236 if (!s.length) 237 return null; 238 239 auto p = cast(T*)memchr(s.ptr, d, s.length); 240 if (!p) 241 return [s]; 242 243 size_t n; 244 auto end = s.ptr + s.length; 245 do 246 { 247 n++; 248 p++; 249 p = cast(T*) memchr(p, d, end-p); 250 } 251 while (p); 252 253 auto result = new T[][n+1]; 254 n = 0; 255 auto start = s.ptr; 256 p = cast(T*) memchr(start, d, s.length); 257 do 258 { 259 result[n++] = start[0..p-start]; 260 start = ++p; 261 p = cast(T*) memchr(p, d, end-p); 262 } 263 while (p); 264 result[n] = start[0..end-start]; 265 266 return result; 267 } 268 269 T[][] splitAsciiLines(T)(T[] text) 270 if (is(Unqual!T == char)) 271 { 272 auto lines = text.fastSplit('\n'); 273 foreach (ref line; lines) 274 if (line.length && line[$-1]=='\r') 275 line = line[0..$-1]; 276 return lines; 277 } 278 279 unittest 280 { 281 assert(splitAsciiLines("a\nb\r\nc\r\rd\n\re\r\n\nf") == ["a", "b", "c\r\rd", "\re", "", "f"]); 282 assert(splitAsciiLines(string.init) == splitLines(string.init)); 283 } 284 285 /// Like std.string.split (one argument version, which splits by 286 /// whitespace), but only splits by ASCII and does not autodecode. 287 T[][] asciiSplit(T)(T[] text) 288 if (is(Unqual!T == char)) 289 { 290 bool inWhitespace = true; 291 size_t wordStart; 292 T[][] result; 293 294 void endWord(size_t p) 295 { 296 if (!inWhitespace) 297 { 298 result ~= text[wordStart..p]; 299 inWhitespace = true; 300 } 301 } 302 303 foreach (p, c; text) 304 if (std.ascii.isWhite(c)) 305 endWord(p); 306 else 307 if (inWhitespace) 308 { 309 inWhitespace = false; 310 wordStart = p; 311 } 312 endWord(text.length); 313 return result; 314 } 315 316 unittest 317 { 318 foreach (s; ["", " ", "a", " a", "a ", "a b", " a b", "a b ", " a b ", 319 " ", " a", "a ", "a b", "a b ", "a b c"]) 320 assert(s.split == s.asciiSplit, format("Got %s, expected %s", s.asciiSplit, s.split)); 321 } 322 323 T[] asciiStrip(T)(T[] s) 324 if (is(Unqual!T == char)) 325 { 326 while (s.length && isWhite(s[0])) 327 s = s[1..$]; 328 while (s.length && isWhite(s[$-1])) 329 s = s[0..$-1]; 330 return s; 331 } 332 333 unittest 334 { 335 string s = "Hello, world!"; 336 assert(asciiStrip(s) is s); 337 assert(asciiStrip("\r\n\tHello ".dup) == "Hello"); 338 } 339 340 /// Covering slice-list of s with interleaved whitespace. 341 T[][] segmentByWhitespace(T)(T[] s) 342 if (is(Unqual!T == char)) 343 { 344 if (!s.length) 345 return null; 346 347 T[][] segments; 348 bool wasWhite = isWhite(s[0]); 349 size_t start = 0; 350 foreach (p, char c; s) 351 { 352 bool isWhite = isWhite(c); 353 if (isWhite != wasWhite) 354 segments ~= s[start..p], 355 start = p; 356 wasWhite = isWhite; 357 } 358 segments ~= s[start..$]; 359 360 return segments; 361 } 362 363 T[] newlinesToSpaces(T)(T[] s) 364 if (is(Unqual!T == char)) 365 { 366 auto slices = segmentByWhitespace(s); 367 foreach (ref slice; slices) 368 if (slice.contains("\n")) 369 slice = " "; 370 return slices.join(); 371 } 372 373 ascii normalizeWhitespace(ascii s) 374 { 375 auto slices = segmentByWhitespace(strip(s)); 376 foreach (i, ref slice; slices) 377 if (i & 1) // odd 378 slice = " "; 379 return slices.join(); 380 } 381 382 unittest 383 { 384 assert(normalizeWhitespace(" Mary had\ta\nlittle\r\n\tlamb") == "Mary had a little lamb"); 385 } 386 387 string[] splitByCamelCase(string s) 388 { 389 string[] result; 390 size_t start = 0; 391 foreach (i; 1..s.length+1) 392 if (i == s.length 393 || (isLower(s[i-1]) && isUpper(s[i])) 394 || (i+1 < s.length && isUpper(s[i-1]) && isUpper(s[i]) && isLower(s[i+1])) 395 ) 396 { 397 result ~= s[start..i]; 398 start = i; 399 } 400 return result; 401 } 402 403 unittest 404 { 405 assert(splitByCamelCase("parseIPString") == ["parse", "IP", "String"]); 406 assert(splitByCamelCase("IPString") == ["IP", "String"]); 407 } 408 409 string camelCaseJoin(string[] arr) 410 { 411 if (!arr.length) 412 return null; 413 string result = arr[0]; 414 foreach (s; arr[1..$]) 415 result ~= std.ascii.toUpper(s[0]) ~ s[1..$]; 416 return result; 417 } 418 419 unittest 420 { 421 assert("parse-IP-string".split('-').camelCaseJoin() == "parseIPString"); 422 } 423 424 // ************************************************************************ 425 426 private __gshared char[256] asciiLower, asciiUpper; 427 428 shared static this() 429 { 430 foreach (c; 0..256) 431 { 432 asciiLower[c] = cast(char)std.ascii.toLower(c); 433 asciiUpper[c] = cast(char)std.ascii.toUpper(c); 434 } 435 } 436 437 void xlat(alias TABLE, T)(T[] buf) 438 { 439 foreach (ref c; buf) 440 c = TABLE[c]; 441 } 442 443 alias xlat!(asciiLower, char) asciiToLower; 444 alias xlat!(asciiUpper, char) asciiToUpper; 445 446 // ************************************************************************ 447 448 /// Case-insensitive ASCII string. 449 alias CIAsciiString = NormalizedArray!(immutable(char), s => s.byCodeUnit.map!(std.ascii.toLower)); 450 451 /// 452 unittest 453 { 454 CIAsciiString s = "test"; 455 assert(s == "TEST"); 456 assert(s >= "Test" && s <= "Test"); 457 assert(CIAsciiString("a") == CIAsciiString("A")); 458 assert(CIAsciiString("a") != CIAsciiString("B")); 459 assert(CIAsciiString("a") < CIAsciiString("B")); 460 assert(CIAsciiString("A") < CIAsciiString("b")); 461 assert(CIAsciiString("я") != CIAsciiString("Я")); 462 } 463 464 /// Case-insensitive Unicode string. 465 alias CIUniString = NormalizedArray!(immutable(char), s => s.map!(std.uni.toLower)); 466 467 /// 468 unittest 469 { 470 CIUniString s = "привет"; 471 assert(s == "ПРИВЕТ"); 472 assert(s >= "Привет" && s <= "Привет"); 473 assert(CIUniString("я") == CIUniString("Я")); 474 assert(CIUniString("а") != CIUniString("Б")); 475 assert(CIUniString("а") < CIUniString("Б")); 476 assert(CIUniString("А") < CIUniString("б")); 477 } 478 479 // ************************************************************************ 480 481 import std.utf; 482 483 /// Convert any data to a valid UTF-8 bytestream, so D's string functions can 484 /// properly work on it. 485 string rawToUTF8(in char[] s) 486 { 487 auto d = new dchar[s.length]; 488 foreach (i, char c; s) 489 d[i] = c; 490 return toUTF8(d); 491 } 492 493 /// Undo rawToUTF8. 494 ascii UTF8ToRaw(in char[] r) pure 495 { 496 auto s = new char[r.length]; 497 size_t i = 0; 498 foreach (dchar c; r) 499 { 500 assert(c < '\u0100'); 501 s[i++] = cast(char)c; 502 } 503 return s[0..i]; 504 } 505 506 unittest 507 { 508 char[1] c; 509 for (int i=0; i<256; i++) 510 { 511 c[0] = cast(char)i; 512 assert(UTF8ToRaw(rawToUTF8(c[])) == c[], format("%s -> %s -> %s", cast(ubyte[])c[], cast(ubyte[])rawToUTF8(c[]), cast(ubyte[])UTF8ToRaw(rawToUTF8(c[])))); 513 } 514 } 515 516 /// Where a delegate with this signature is required. 517 string nullStringTransform(in char[] s) { return to!string(s); } 518 519 string forceValidUTF8(string s) 520 { 521 try 522 { 523 validate(s); 524 return s; 525 } 526 catch (UTFException) 527 return rawToUTF8(s); 528 } 529 530 // ************************************************************************ 531 532 /// Return the slice up to the first NUL character, 533 /// or of the whole array if none is found. 534 C[] fromZArray(C, n)(ref C[n] arr) 535 { 536 auto p = arr.representation.countUntil(0); 537 return arr[0 .. p<0 ? $ : p]; 538 } 539 540 /// ditto 541 C[] fromZArray(C)(C[] arr) 542 { 543 auto p = arr.representation.countUntil(0); 544 return arr[0 .. p<0 ? $ : p]; 545 } 546 547 unittest 548 { 549 char[4] arr = "ab\0d"; 550 assert(arr.fromZArray == "ab"); 551 arr[] = "abcd"; 552 assert(arr.fromZArray == "abcd"); 553 } 554 555 unittest 556 { 557 string arr = "ab\0d"; 558 assert(arr.fromZArray == "ab"); 559 arr = "abcd"; 560 assert(arr.fromZArray == "abcd"); 561 } 562 563 // ************************************************************************ 564 565 /// Formats binary data as a hex dump (three-column layout consisting of hex 566 /// offset, byte values in hex, and printable low-ASCII characters). 567 string hexDump(const(void)[] b) 568 { 569 auto data = cast(const(ubyte)[]) b; 570 assert(data.length); 571 size_t i=0; 572 string s; 573 while (i<data.length) 574 { 575 s ~= format("%08X: ", i); 576 foreach (x; 0..16) 577 { 578 if (i+x<data.length) 579 s ~= format("%02X ", data[i+x]); 580 else 581 s ~= " "; 582 if (x==7) 583 s ~= "| "; 584 } 585 s ~= " "; 586 foreach (x; 0..16) 587 { 588 if (i+x<data.length) 589 if (data[i+x]==0) 590 s ~= ' '; 591 else 592 if (data[i+x]<32 || data[i+x]>=128) 593 s ~= '.'; 594 else 595 s ~= cast(char)data[i+x]; 596 else 597 s ~= ' '; 598 } 599 s ~= "\n"; 600 i += 16; 601 } 602 return s; 603 } 604 605 import std.conv; 606 607 T fromHex(T : ulong = uint, C)(const(C)[] s) 608 { 609 T result = parse!T(s, 16); 610 enforce(s.length==0, new ConvException("Could not parse entire string")); 611 return result; 612 } 613 614 ubyte[] arrayFromHex(in char[] hex) 615 { 616 auto buf = new ubyte[hex.length/2]; 617 arrayFromHex(hex, buf); 618 return buf; 619 } 620 621 struct HexParseConfig 622 { 623 bool checked = true; 624 bool lower = true; 625 bool upper = true; 626 } 627 628 ubyte parseHexDigit(HexParseConfig config = HexParseConfig.init)(char c) 629 { 630 static assert(config.lower || config.upper, 631 "Must parse at least either lower or upper case digits"); 632 static if (config.checked) 633 { 634 switch (c) 635 { 636 case '0': .. case '9': return cast(ubyte)(c - '0'); 637 case 'a': .. case 'f': return cast(ubyte)(c - 'a' + 10); 638 case 'A': .. case 'F': return cast(ubyte)(c - 'A' + 10); 639 default: throw new Exception("Bad hex digit: " ~ c); 640 } 641 } 642 else 643 { 644 if (c <= '9') 645 return cast(ubyte)(c - '0'); 646 static if (config.lower && config.upper) 647 { 648 if (c < 'a') 649 return cast(ubyte)(c - 'A' + 10); 650 else 651 return cast(ubyte)(c - 'a' + 10); 652 } 653 else 654 static if (config.lower) 655 return cast(ubyte)(c - 'a' + 10); 656 else 657 return cast(ubyte)(c - 'A' + 10); 658 } 659 } 660 661 void arrayFromHex(HexParseConfig config = HexParseConfig.init)(in char[] hex, ubyte[] buf) 662 { 663 assert(buf.length == hex.length/2, "Wrong buffer size for arrayFromHex"); 664 for (int i=0; i<hex.length; i+=2) 665 buf[i/2] = cast(ubyte)( 666 parseHexDigit!config(hex[i ])*16 + 667 parseHexDigit!config(hex[i+1]) 668 ); 669 } 670 671 /// Fast version for static arrays of known length. 672 void sarrayFromHex(HexParseConfig config = HexParseConfig.init, size_t N, Hex)(in ref Hex hex, ref ubyte[N] buf) 673 if (is(Hex == char[N*2])) 674 { 675 foreach (i; 0..N/4) 676 { 677 ulong chars = (cast(ulong*)hex.ptr)[i]; 678 uint res = 679 (parseHexDigit!config((chars >> (8*0)) & 0xFF) << (4*1)) | 680 (parseHexDigit!config((chars >> (8*1)) & 0xFF) << (4*0)) | 681 (parseHexDigit!config((chars >> (8*2)) & 0xFF) << (4*3)) | 682 (parseHexDigit!config((chars >> (8*3)) & 0xFF) << (4*2)) | 683 (parseHexDigit!config((chars >> (8*4)) & 0xFF) << (4*5)) | 684 (parseHexDigit!config((chars >> (8*5)) & 0xFF) << (4*4)) | 685 (parseHexDigit!config((chars >> (8*6)) & 0xFF) << (4*7)) | 686 (parseHexDigit!config((chars >> (8*7)) & 0xFF) << (4*6)); 687 (cast(uint*)buf.ptr)[i] = res; 688 } 689 foreach (i; N/4*4..N) 690 buf[i] = cast(ubyte)( 691 parseHexDigit!config(hex[i*2 ])*16 + 692 parseHexDigit!config(hex[i*2+1]) 693 ); 694 } 695 696 unittest 697 { 698 foreach (checked; TypeTuple!(false, true)) 699 foreach (lower; TypeTuple!(false, true)) 700 foreach (upper; TypeTuple!(false, true)) 701 static if (lower || upper) 702 { 703 enum config = HexParseConfig(checked, lower, upper); 704 char[18] buf; 705 foreach (n; 0..18) 706 if (lower && upper ? n & 1 : upper) 707 buf[n] = hexDigits[n % 16]; 708 else 709 buf[n] = lowerHexDigits[n % 16]; 710 ubyte[9] res; 711 sarrayFromHex!config(buf, res); 712 assert(res == [0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01], text(res)); 713 } 714 } 715 716 template toHex(alias digits = hexDigits) 717 { 718 char[] toHex(in ubyte[] data, char[] buf) pure 719 { 720 assert(buf.length == data.length*2); 721 foreach (i, b; data) 722 { 723 buf[i*2 ] = digits[b>>4]; 724 buf[i*2+1] = digits[b&15]; 725 } 726 return buf; 727 } 728 729 string toHex(in ubyte[] data) pure 730 { 731 auto buf = new char[data.length*2]; 732 foreach (i, b; data) 733 { 734 buf[i*2 ] = digits[b>>4]; 735 buf[i*2+1] = digits[b&15]; 736 } 737 return buf; 738 } 739 } 740 741 alias toLowerHex = toHex!lowerHexDigits; 742 743 void toHex(T : ulong, size_t U = T.sizeof*2)(T n, ref char[U] buf) 744 { 745 foreach (i; Reverse!(RangeTuple!(T.sizeof*2))) 746 { 747 buf[i] = hexDigits[n & 0xF]; 748 n >>= 4; 749 } 750 } 751 752 unittest 753 { 754 ubyte[] bytes = [0x12, 0x34]; 755 assert(toHex(bytes) == "1234"); 756 } 757 758 unittest 759 { 760 ubyte[] bytes = [0x12, 0x34]; 761 char[] buf = new char[4]; 762 toHex(bytes, buf); 763 assert(buf == "1234"); 764 } 765 766 unittest 767 { 768 char[8] buf; 769 toHex(0x01234567, buf); 770 assert(buf == "01234567"); 771 } 772 773 /// How many significant decimal digits does a FP type have 774 /// (determined empirically) 775 enum significantDigits(T : real) = 2 + 2 * T.sizeof; 776 777 /// Format string for a FP type which includes all necessary 778 /// significant digits 779 enum fpFormatString(T) = "%." ~ text(significantDigits!T) ~ "g"; 780 781 /// Get shortest string representation of a FP type that still converts to exactly the same number. 782 template fpToString(F) 783 { 784 string fpToString(F v) 785 { 786 /// Bypass FPU register, which may contain a different precision 787 static F forceType(F d) { static F n; n = d; return n; } 788 789 StaticBuf!(char, 64) buf; 790 formattedWrite(&buf, fpFormatString!F, forceType(v)); 791 char[] s = buf.data(); 792 793 if (s != "nan" && s != "-nan" && s != "inf" && s != "-inf") 794 { 795 if (forceType(to!F(s)) != v) 796 { 797 static if (is(F == real)) 798 { 799 // Something funny with DM libc real parsing... e.g. 0.6885036635121051783 800 return s.idup; 801 } 802 else 803 assert(false, "Initial conversion fails: " ~ format(fpFormatString!F, to!F(s))); 804 } 805 806 foreach_reverse (i; 1..s.length) 807 if (s[i]>='0' && s[i]<='8') 808 { 809 s[i]++; 810 if (forceType(to!F(s[0..i+1]))==v) 811 s = s[0..i+1]; 812 else 813 s[i]--; 814 } 815 while (s.length>2 && s[$-1]!='.' && forceType(to!F(s[0..$-1]))==v) 816 s = s[0..$-1]; 817 } 818 return s.idup; 819 } 820 821 static if (!is(F == real)) 822 unittest 823 { 824 union U 825 { 826 ubyte[F.sizeof] bytes; 827 F d; 828 string toString() { return (fpFormatString!F ~ " %a [%(%02X %)]").format(d, d, bytes[]); } 829 } 830 import std.random : Xorshift, uniform; 831 import std.stdio : stderr; 832 Xorshift rng; 833 foreach (n; 0..10000) 834 { 835 U u; 836 foreach (ref b; u.bytes[]) 837 b = uniform!ubyte(rng); 838 static if (is(F == real)) 839 u.bytes[7] |= 0x80; // require normalized value 840 scope(failure) stderr.writeln("Input:\t", u); 841 auto s = fpToString(u.d); 842 scope(failure) stderr.writeln("Result:\t", s); 843 if (s == "nan" || s == "-nan") 844 continue; // there are many NaNs... 845 U r; 846 r.d = to!F(s); 847 assert(r.bytes == u.bytes, 848 "fpToString mismatch:\nOutput:\t%s".format(r)); 849 } 850 } 851 } 852 853 alias doubleToString = fpToString!double; 854 855 unittest 856 { 857 alias floatToString = fpToString!float; 858 alias realToString = fpToString!real; 859 } 860 861 string numberToString(T)(T v) 862 if (isNumeric!T) 863 { 864 static if (is(T : real)) 865 return fpToString(v); 866 else 867 return toDec(v); 868 } 869 870 // ************************************************************************ 871 872 /// Simpler implementation of Levenshtein string distance 873 int stringDistance(string s, string t) 874 { 875 int n = cast(int)s.length; 876 int m = cast(int)t.length; 877 if (n == 0) return m; 878 if (m == 0) return n; 879 int[][] distance = new int[][](n+1, m+1); // matrix 880 int cost=0; 881 //init1 882 foreach (i; 0..n+1) distance[i][0]=i; 883 foreach (j; 0..m+1) distance[0][j]=j; 884 //find min distance 885 foreach (i; 1..n+1) 886 foreach (j; 1..m+1) 887 { 888 cost = t[j-1] == s[i-1] ? 0 : 1; 889 distance[i][j] = min( 890 distance[i-1][j ] + 1, 891 distance[i ][j-1] + 1, 892 distance[i-1][j-1] + cost 893 ); 894 } 895 return distance[n][m]; 896 } 897 898 /// Return a number between 0.0 and 1.0 indicating how similar two strings are 899 /// (1.0 if identical) 900 float stringSimilarity(string string1, string string2) 901 { 902 float dis = stringDistance(string1, string2); 903 float maxLen = string1.length; 904 if (maxLen < string2.length) 905 maxLen = string2.length; 906 if (maxLen == 0) 907 return 1; 908 else 909 return 1f - dis/maxLen; 910 } 911 912 /// Select best match from a list of items. 913 /// Returns -1 if none are above the threshold. 914 sizediff_t findBestMatch(in string[] items, string target, float threshold = 0.7) 915 { 916 sizediff_t found = -1; 917 float best = 0; 918 919 foreach (i, item; items) 920 { 921 float match = stringSimilarity(toLower(item),toLower(target)); 922 if (match>threshold && match>=best) 923 { 924 best = match; 925 found = i; 926 } 927 } 928 929 return found; 930 } 931 932 /// Select best match from a list of items. 933 /// Returns null if none are above the threshold. 934 string selectBestFrom(in string[] items, string target, float threshold = 0.7) 935 { 936 auto index = findBestMatch(items, target, threshold); 937 return index < 0 ? null : items[index]; 938 } 939 940 // ************************************************************************ 941 942 943 string randomString(int length=20, string chars="abcdefghijklmnopqrstuvwxyz") 944 { 945 import std.random; 946 import std.range; 947 948 return length.iota.map!(n => chars[uniform(0, $)]).array; 949 }