1 /** 2 * Utility code related to string and text processing. 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <vladimir@thecybershadow.net> 12 */ 13 14 module ae.utils.text; 15 16 import std.algorithm; 17 import std.ascii; 18 import std.exception; 19 import std.conv; 20 import std.format; 21 import std.string; 22 import std.traits; 23 import std.typetuple; 24 25 import core.stdc.stdio : sprintf, sscanf; 26 import core.stdc.string; 27 28 import ae.utils.array; 29 import ae.utils.meta; 30 import ae.utils.text.parsefp; 31 import ae.utils.textout; 32 33 alias indexOf = std..string.indexOf; 34 35 public import ae.utils.text.ascii : ascii, DecimalSize, toDec, toDecFixed, asciiToLower, asciiToUpper; 36 public import ae.utils.array : contains; 37 38 // ************************************************************************ 39 40 /// CTFE helper 41 string formatAs(T)(auto ref T obj, string fmt) 42 { 43 return format(fmt, obj); 44 } 45 46 /// Consume a LF or CRLF terminated line from s. 47 /// Sets s to null and returns the remainder 48 /// if there is no line terminator in s. 49 T[] eatLine(T)(ref T[] s, bool eatIncompleteLines = true) 50 { 51 return s.skipUntil([T('\n')], eatIncompleteLines).chomp(); 52 } 53 54 deprecated template eatLine(OnEof onEof) 55 { 56 T[] eatLine(T)(ref T[] s) 57 { 58 return s.eatUntil!onEof([T('\n')]).chomp(); 59 } 60 } 61 62 unittest 63 { 64 string s = "Hello\nworld"; 65 assert(s.eatLine() == "Hello"); 66 assert(s.eatLine() == "world"); 67 assert(s is null); 68 assert(s.eatLine() is null); 69 } 70 71 // Uses memchr (not Boyer-Moore), best for short strings. 72 T[] fastReplace(T)(T[] what, T[] from, T[] to) 73 if (T.sizeof == 1) // TODO (uses memchr) 74 { 75 alias Unqual!T U; 76 77 // debug scope(failure) std.stdio.writeln("fastReplace crashed: ", [what, from, to]); 78 enum RAM = cast(U*)null; 79 80 if (what.length < from.length || from.length==0) 81 return what; 82 83 if (from.length==1) 84 { 85 auto fromc = from[0]; 86 if (to.length==1) 87 { 88 auto p = cast(T*)memchr(what.ptr, fromc, what.length); 89 if (!p) 90 return what; 91 92 T[] result = what.dup; 93 auto delta = result.ptr - what.ptr; 94 auto toChar = to[0]; 95 auto end = what.ptr + what.length; 96 do 97 { 98 (cast(U*)p)[delta] = toChar; // zomg hax lol 99 p++; 100 p = cast(T*)memchr(p, fromc, end - p); 101 } while (p); 102 return result; 103 } 104 else 105 { 106 auto p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length); 107 if (!p) 108 return what; 109 110 auto sb = StringBuilder(what.length); 111 do 112 { 113 sb.put(what[0..p-what.ptr], to); 114 what = what[p-what.ptr+1..$]; 115 p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length); 116 } 117 while (p); 118 119 sb.put(what); 120 return sb.get(); 121 } 122 } 123 124 auto head = from[0]; 125 auto tail = from[1..$]; 126 127 auto p = cast(T*)what.ptr; 128 auto end = p + what.length - tail.length; 129 p = cast(T*)memchr(p, head, end-p); 130 while (p) 131 { 132 p++; 133 if (p[0..tail.length] == tail) 134 { 135 if (from.length == to.length) 136 { 137 T[] result = what.dup; 138 auto deltaMinusOne = (result.ptr - what.ptr) - 1; 139 140 goto replaceA; 141 dummyA: // compiler complains 142 143 do 144 { 145 p++; 146 if (p[0..tail.length] == tail) 147 { 148 replaceA: 149 (cast(U*)p+deltaMinusOne)[0..to.length] = to[]; 150 } 151 p = cast(T*)memchr(p, head, end-p); 152 } 153 while (p); 154 155 return result; 156 } 157 else 158 { 159 auto start = cast(T*)what.ptr; 160 auto sb = StringBuilder(what.length); 161 goto replaceB; 162 dummyB: // compiler complains 163 164 do 165 { 166 p++; 167 if (p[0..tail.length] == tail) 168 { 169 replaceB: 170 sb.put(RAM[cast(size_t)start .. cast(size_t)p-1], to); 171 start = p + tail.length; 172 what = what[start-what.ptr..$]; 173 } 174 else 175 { 176 what = what[p-what.ptr..$]; 177 } 178 p = cast(T*)memchr(what.ptr, head, what.length); 179 } 180 while (p); 181 182 //sb.put(what); 183 sb.put(RAM[cast(size_t)start..cast(size_t)(what.ptr+what.length)]); 184 return sb.get(); 185 } 186 187 assert(0); 188 } 189 p = cast(T*)memchr(p, head, end-p); 190 } 191 192 return what; 193 } 194 195 unittest 196 { 197 import std.array; 198 void test(string haystack, string from, string to) 199 { 200 auto description = `("` ~ haystack ~ `", "` ~ from ~ `", "` ~ to ~ `")`; 201 202 auto r1 = fastReplace(haystack, from, to); 203 auto r2 = replace(haystack, from, to); 204 assert(r1 == r2, `Bad replace: ` ~ description ~ ` == "` ~ r1 ~ `"`); 205 206 if (r1 == haystack) 207 assert(r1 is haystack, `Pointless reallocation: ` ~ description); 208 } 209 210 test("Mary had a little lamb", "a", "b"); 211 test("Mary had a little lamb", "a", "aaa"); 212 test("Mary had a little lamb", "Mary", "Lucy"); 213 test("Mary had a little lamb", "Mary", "Jimmy"); 214 test("Mary had a little lamb", "lamb", "goat"); 215 test("Mary had a little lamb", "lamb", "sheep"); 216 test("Mary had a little lamb", " l", " x"); 217 test("Mary had a little lamb", " l", " xx"); 218 219 test("Mary had a little lamb", "X" , "Y" ); 220 test("Mary had a little lamb", "XX", "Y" ); 221 test("Mary had a little lamb", "X" , "YY"); 222 test("Mary had a little lamb", "XX", "YY"); 223 test("Mary had a little lamb", "aX", "Y" ); 224 test("Mary had a little lamb", "aX", "YY"); 225 226 test("foo", "foobar", "bar"); 227 } 228 229 T[][] fastSplit(T, U)(T[] s, U d) 230 if (is(Unqual!T == Unqual!U)) 231 { 232 if (!s.length) 233 return null; 234 235 auto p = cast(T*)memchr(s.ptr, d, s.length); 236 if (!p) 237 return [s]; 238 239 size_t n; 240 auto end = s.ptr + s.length; 241 do 242 { 243 n++; 244 p++; 245 p = cast(T*) memchr(p, d, end-p); 246 } 247 while (p); 248 249 auto result = new T[][n+1]; 250 n = 0; 251 auto start = s.ptr; 252 p = cast(T*) memchr(start, d, s.length); 253 do 254 { 255 result[n++] = start[0..p-start]; 256 start = ++p; 257 p = cast(T*) memchr(p, d, end-p); 258 } 259 while (p); 260 result[n] = start[0..end-start]; 261 262 return result; 263 } 264 265 T[][] splitAsciiLines(T)(T[] text) 266 if (is(Unqual!T == char)) 267 { 268 auto lines = text.fastSplit('\n'); 269 foreach (ref line; lines) 270 if (line.length && line[$-1]=='\r') 271 line = line[0..$-1]; 272 return lines; 273 } 274 275 unittest 276 { 277 assert(splitAsciiLines("a\nb\r\nc\r\rd\n\re\r\n\nf") == ["a", "b", "c\r\rd", "\re", "", "f"]); 278 assert(splitAsciiLines(string.init) == splitLines(string.init)); 279 } 280 281 /// Like std.string.split (one argument version, which splits by 282 /// whitespace), but only splits by ASCII and does not autodecode. 283 T[][] asciiSplit(T)(T[] text) 284 if (is(Unqual!T == char)) 285 { 286 bool inWhitespace = true; 287 size_t wordStart; 288 T[][] result; 289 290 void endWord(size_t p) 291 { 292 if (!inWhitespace) 293 { 294 result ~= text[wordStart..p]; 295 inWhitespace = true; 296 } 297 } 298 299 foreach (p, c; text) 300 if (std.ascii.isWhite(c)) 301 endWord(p); 302 else 303 if (inWhitespace) 304 { 305 inWhitespace = false; 306 wordStart = p; 307 } 308 endWord(text.length); 309 return result; 310 } 311 312 unittest 313 { 314 foreach (s; ["", " ", "a", " a", "a ", "a b", " a b", "a b ", " a b ", 315 " ", " a", "a ", "a b", "a b ", "a b c"]) 316 assert(s.split == s.asciiSplit, format("Got %s, expected %s", s.asciiSplit, s.split)); 317 } 318 319 T[] asciiStrip(T)(T[] s) 320 if (is(Unqual!T == char)) 321 { 322 while (s.length && isWhite(s[0])) 323 s = s[1..$]; 324 while (s.length && isWhite(s[$-1])) 325 s = s[0..$-1]; 326 return s; 327 } 328 329 unittest 330 { 331 string s = "Hello, world!"; 332 assert(asciiStrip(s) is s); 333 assert(asciiStrip("\r\n\tHello ".dup) == "Hello"); 334 } 335 336 /// Covering slice-list of s with interleaved whitespace. 337 T[][] segmentByWhitespace(T)(T[] s) 338 if (is(Unqual!T == char)) 339 { 340 if (!s.length) 341 return null; 342 343 T[][] segments; 344 bool wasWhite = isWhite(s[0]); 345 size_t start = 0; 346 foreach (p, char c; s) 347 { 348 bool isWhite = isWhite(c); 349 if (isWhite != wasWhite) 350 segments ~= s[start..p], 351 start = p; 352 wasWhite = isWhite; 353 } 354 segments ~= s[start..$]; 355 356 return segments; 357 } 358 359 T[] newlinesToSpaces(T)(T[] s) 360 if (is(Unqual!T == char)) 361 { 362 auto slices = segmentByWhitespace(s); 363 foreach (ref slice; slices) 364 if (slice.contains("\n")) 365 slice = " "; 366 return slices.join(); 367 } 368 369 ascii normalizeWhitespace(ascii s) 370 { 371 auto slices = segmentByWhitespace(strip(s)); 372 foreach (i, ref slice; slices) 373 if (i & 1) // odd 374 slice = " "; 375 return slices.join(); 376 } 377 378 unittest 379 { 380 assert(normalizeWhitespace(" Mary had\ta\nlittle\r\n\tlamb") == "Mary had a little lamb"); 381 } 382 383 string[] splitByCamelCase(string s) 384 { 385 string[] result; 386 size_t start = 0; 387 foreach (i; 1..s.length+1) 388 if (i == s.length 389 || (isLower(s[i-1]) && isUpper(s[i])) 390 || (i+1 < s.length && isUpper(s[i-1]) && isUpper(s[i]) && isLower(s[i+1])) 391 ) 392 { 393 result ~= s[start..i]; 394 start = i; 395 } 396 return result; 397 } 398 399 unittest 400 { 401 assert(splitByCamelCase("parseIPString") == ["parse", "IP", "String"]); 402 assert(splitByCamelCase("IPString") == ["IP", "String"]); 403 } 404 405 string camelCaseJoin(string[] arr) 406 { 407 if (!arr.length) 408 return null; 409 string result = arr[0]; 410 foreach (s; arr[1..$]) 411 result ~= std.ascii.toUpper(s[0]) ~ s[1..$]; 412 return result; 413 } 414 415 unittest 416 { 417 assert("parse-IP-string".split('-').camelCaseJoin() == "parseIPString"); 418 } 419 420 // ************************************************************************ 421 422 /// Case-insensitive ASCII string. 423 alias CIAsciiString = NormalizedArray!(immutable(char), s => s.byCodeUnit.map!(std.ascii.toLower)); 424 425 /// 426 unittest 427 { 428 CIAsciiString s = "test"; 429 assert(s == "TEST"); 430 assert(s >= "Test" && s <= "Test"); 431 assert(CIAsciiString("a") == CIAsciiString("A")); 432 assert(CIAsciiString("a") != CIAsciiString("B")); 433 assert(CIAsciiString("a") < CIAsciiString("B")); 434 assert(CIAsciiString("A") < CIAsciiString("b")); 435 assert(CIAsciiString("я") != CIAsciiString("Я")); 436 } 437 438 /// Case-insensitive Unicode string. 439 alias CIUniString = NormalizedArray!(immutable(char), s => s.map!(std.uni.toLower)); 440 441 /// 442 unittest 443 { 444 CIUniString s = "привет"; 445 assert(s == "ПРИВЕТ"); 446 assert(s >= "Привет" && s <= "Привет"); 447 assert(CIUniString("я") == CIUniString("Я")); 448 assert(CIUniString("а") != CIUniString("Б")); 449 assert(CIUniString("а") < CIUniString("Б")); 450 assert(CIUniString("А") < CIUniString("б")); 451 } 452 453 // ************************************************************************ 454 455 import std.utf; 456 457 /// Convert any data to a valid UTF-8 bytestream, so D's string functions can 458 /// properly work on it. 459 string rawToUTF8(in char[] s) 460 { 461 auto d = new dchar[s.length]; 462 foreach (i, char c; s) 463 d[i] = c; 464 return toUTF8(d); 465 } 466 467 /// Undo rawToUTF8. 468 ascii UTF8ToRaw(in char[] r) pure 469 { 470 auto s = new char[r.length]; 471 size_t i = 0; 472 foreach (dchar c; r) 473 { 474 assert(c < '\u0100'); 475 s[i++] = cast(char)c; 476 } 477 return s[0..i]; 478 } 479 480 unittest 481 { 482 char[1] c; 483 for (int i=0; i<256; i++) 484 { 485 c[0] = cast(char)i; 486 assert(UTF8ToRaw(rawToUTF8(c[])) == c[], format("%s -> %s -> %s", cast(ubyte[])c[], cast(ubyte[])rawToUTF8(c[]), cast(ubyte[])UTF8ToRaw(rawToUTF8(c[])))); 487 } 488 } 489 490 /// Where a delegate with this signature is required. 491 string nullStringTransform(in char[] s) { return to!string(s); } 492 493 string forceValidUTF8(string s) 494 { 495 try 496 { 497 validate(s); 498 return s; 499 } 500 catch (UTFException) 501 return rawToUTF8(s); 502 } 503 504 // ************************************************************************ 505 506 /// Return the slice up to the first NUL character, 507 /// or of the whole array if none is found. 508 C[] fromZArray(C, n)(ref C[n] arr) 509 { 510 auto p = arr.representation.countUntil(0); 511 return arr[0 .. p<0 ? $ : p]; 512 } 513 514 /// ditto 515 C[] fromZArray(C)(C[] arr) 516 { 517 auto p = arr.representation.countUntil(0); 518 return arr[0 .. p<0 ? $ : p]; 519 } 520 521 unittest 522 { 523 char[4] arr = "ab\0d"; 524 assert(arr.fromZArray == "ab"); 525 arr[] = "abcd"; 526 assert(arr.fromZArray == "abcd"); 527 } 528 529 unittest 530 { 531 string arr = "ab\0d"; 532 assert(arr.fromZArray == "ab"); 533 arr = "abcd"; 534 assert(arr.fromZArray == "abcd"); 535 } 536 537 // ************************************************************************ 538 539 /// Formats binary data as a hex dump (three-column layout consisting of hex 540 /// offset, byte values in hex, and printable low-ASCII characters). 541 string hexDump(const(void)[] b) 542 { 543 auto data = cast(const(ubyte)[]) b; 544 assert(data.length); 545 size_t i=0; 546 string s; 547 while (i<data.length) 548 { 549 s ~= format("%08X: ", i); 550 foreach (x; 0..16) 551 { 552 if (i+x<data.length) 553 s ~= format("%02X ", data[i+x]); 554 else 555 s ~= " "; 556 if (x==7) 557 s ~= "| "; 558 } 559 s ~= " "; 560 foreach (x; 0..16) 561 { 562 if (i+x<data.length) 563 if (data[i+x]==0) 564 s ~= ' '; 565 else 566 if (data[i+x]<32 || data[i+x]>=128) 567 s ~= '.'; 568 else 569 s ~= cast(char)data[i+x]; 570 else 571 s ~= ' '; 572 } 573 s ~= "\n"; 574 i += 16; 575 } 576 return s; 577 } 578 579 import std.conv; 580 581 T fromHex(T : ulong = uint, C)(const(C)[] s) 582 { 583 T result = parse!T(s, 16); 584 enforce(s.length==0, new ConvException("Could not parse entire string")); 585 return result; 586 } 587 588 ubyte[] arrayFromHex(in char[] hex) 589 { 590 auto buf = new ubyte[hex.length/2]; 591 arrayFromHex(hex, buf); 592 return buf; 593 } 594 595 struct HexParseConfig 596 { 597 bool checked = true; 598 bool lower = true; 599 bool upper = true; 600 } 601 602 ubyte parseHexDigit(HexParseConfig config = HexParseConfig.init)(char c) 603 { 604 static assert(config.lower || config.upper, 605 "Must parse at least either lower or upper case digits"); 606 static if (config.checked) 607 { 608 switch (c) 609 { 610 case '0': .. case '9': return cast(ubyte)(c - '0'); 611 case 'a': .. case 'f': return cast(ubyte)(c - 'a' + 10); 612 case 'A': .. case 'F': return cast(ubyte)(c - 'A' + 10); 613 default: throw new Exception("Bad hex digit: " ~ c); 614 } 615 } 616 else 617 { 618 if (c <= '9') 619 return cast(ubyte)(c - '0'); 620 static if (config.lower && config.upper) 621 { 622 if (c < 'a') 623 return cast(ubyte)(c - 'A' + 10); 624 else 625 return cast(ubyte)(c - 'a' + 10); 626 } 627 else 628 static if (config.lower) 629 return cast(ubyte)(c - 'a' + 10); 630 else 631 return cast(ubyte)(c - 'A' + 10); 632 } 633 } 634 635 void arrayFromHex(HexParseConfig config = HexParseConfig.init)(in char[] hex, ubyte[] buf) 636 { 637 assert(buf.length == hex.length/2, "Wrong buffer size for arrayFromHex"); 638 for (int i=0; i<hex.length; i+=2) 639 buf[i/2] = cast(ubyte)( 640 parseHexDigit!config(hex[i ])*16 + 641 parseHexDigit!config(hex[i+1]) 642 ); 643 } 644 645 /// Fast version for static arrays of known length. 646 void sarrayFromHex(HexParseConfig config = HexParseConfig.init, size_t N, Hex)(in ref Hex hex, ref ubyte[N] buf) 647 if (is(Hex == char[N*2])) 648 { 649 foreach (i; 0..N/4) 650 { 651 ulong chars = (cast(ulong*)hex.ptr)[i]; 652 uint res = 653 (parseHexDigit!config((chars >> (8*0)) & 0xFF) << (4*1)) | 654 (parseHexDigit!config((chars >> (8*1)) & 0xFF) << (4*0)) | 655 (parseHexDigit!config((chars >> (8*2)) & 0xFF) << (4*3)) | 656 (parseHexDigit!config((chars >> (8*3)) & 0xFF) << (4*2)) | 657 (parseHexDigit!config((chars >> (8*4)) & 0xFF) << (4*5)) | 658 (parseHexDigit!config((chars >> (8*5)) & 0xFF) << (4*4)) | 659 (parseHexDigit!config((chars >> (8*6)) & 0xFF) << (4*7)) | 660 (parseHexDigit!config((chars >> (8*7)) & 0xFF) << (4*6)); 661 (cast(uint*)buf.ptr)[i] = res; 662 } 663 foreach (i; N/4*4..N) 664 buf[i] = cast(ubyte)( 665 parseHexDigit!config(hex[i*2 ])*16 + 666 parseHexDigit!config(hex[i*2+1]) 667 ); 668 } 669 670 unittest 671 { 672 foreach (checked; TypeTuple!(false, true)) 673 foreach (lower; TypeTuple!(false, true)) 674 foreach (upper; TypeTuple!(false, true)) 675 static if (lower || upper) 676 { 677 enum config = HexParseConfig(checked, lower, upper); 678 char[18] buf; 679 foreach (n; 0..18) 680 if (lower && upper ? n & 1 : upper) 681 buf[n] = hexDigits[n % 16]; 682 else 683 buf[n] = lowerHexDigits[n % 16]; 684 ubyte[9] res; 685 sarrayFromHex!config(buf, res); 686 assert(res == [0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01], text(res)); 687 } 688 } 689 690 template toHex(alias digits = hexDigits) 691 { 692 char[] toHex(in ubyte[] data, char[] buf) pure 693 { 694 assert(buf.length == data.length*2); 695 foreach (i, b; data) 696 { 697 buf[i*2 ] = digits[b>>4]; 698 buf[i*2+1] = digits[b&15]; 699 } 700 return buf; 701 } 702 703 string toHex(in ubyte[] data) pure 704 { 705 auto buf = new char[data.length*2]; 706 foreach (i, b; data) 707 { 708 buf[i*2 ] = digits[b>>4]; 709 buf[i*2+1] = digits[b&15]; 710 } 711 return buf; 712 } 713 } 714 715 alias toLowerHex = toHex!lowerHexDigits; 716 717 void toHex(T : ulong, size_t U = T.sizeof*2)(T n, ref char[U] buf) 718 { 719 foreach (i; Reverse!(RangeTuple!(T.sizeof*2))) 720 { 721 buf[i] = hexDigits[n & 0xF]; 722 n >>= 4; 723 } 724 } 725 726 unittest 727 { 728 ubyte[] bytes = [0x12, 0x34]; 729 assert(toHex(bytes) == "1234"); 730 } 731 732 unittest 733 { 734 ubyte[] bytes = [0x12, 0x34]; 735 char[] buf = new char[4]; 736 toHex(bytes, buf); 737 assert(buf == "1234"); 738 } 739 740 unittest 741 { 742 char[8] buf; 743 toHex(0x01234567, buf); 744 assert(buf == "01234567"); 745 } 746 747 /// How many significant decimal digits does a FP type have 748 /// (determined empirically - valid for all D FP types on x86/64) 749 enum significantDigits(T : real) = 2 + 2 * T.sizeof; 750 751 /// Format string for a FP type which includes all necessary 752 /// significant digits 753 enum fpFormatString(T) = "%." ~ text(significantDigits!T) ~ "g"; 754 template cWidthString(T) 755 { 756 static if (is(T == float)) 757 enum cWidthString = ""; 758 else 759 static if (is(T == double)) 760 enum cWidthString = "l"; 761 else 762 static if (is(T == real)) 763 enum cWidthString = "L"; 764 } 765 enum fpCFormatString(T) = "%." ~ text(significantDigits!T) ~ cWidthString!T ~ "g"; 766 767 private auto fpToBuf(F)(F val) @nogc 768 { 769 /// Bypass FPU register, which may contain a different precision 770 static F forceType(F d) { static F n; n = d; return n; } 771 772 enum isReal = is(Unqual!F == real); 773 774 StaticBuf!(char, 64) buf = void; 775 776 // MSVC workaround from std.format: 777 version (CRuntime_Microsoft) 778 { 779 import std.math : isNaN, isInfinity; 780 immutable double v = val; // convert early to get "inf" in case of overflow 781 { 782 string s; 783 if (isNaN(v)) 784 s = "nan"; // snprintf writes 1.#QNAN 785 else if (isInfinity(v)) 786 s = val >= 0 ? "inf" : "-inf"; // snprintf writes 1.#INF 787 else 788 goto L1; 789 buf.buf[0..s.length] = s; 790 buf.pos = s.length; 791 return buf; 792 L1: 793 } 794 } 795 else 796 alias v = val; 797 798 buf.pos = sprintf(buf.buf.ptr, fpCFormatString!F.ptr, forceType(v)); 799 char[] s = buf.data(); 800 801 F parse(char[] s) 802 { 803 F f; 804 auto res = tryParse(s, f); 805 assert(res, "Failed to parse number we created"); 806 assert(!s.length, "Failed to completely parse number we created"); 807 return f; 808 } 809 810 if (s != "nan" && s != "-nan" && s != "inf" && s != "-inf") 811 { 812 if (forceType(parse(s)) != v) 813 { 814 static if (isReal) 815 { 816 // Something funny with DM libc real parsing... e.g. 0.6885036635121051783 817 return buf; 818 } 819 else 820 // assert(false, "Initial conversion fails: " ~ format(fpFormatString!F, parse(s)) ~ " / " ~ s); 821 assert(false, "Initial conversion fails"); 822 } 823 824 foreach_reverse (i; 1..s.length) 825 if (s[i]>='0' && s[i]<='8') 826 { 827 s[i]++; 828 if (forceType(parse(s[0..i+1]))==v) 829 s = s[0..i+1]; 830 else 831 s[i]--; 832 } 833 while (s.length>2 && s[$-1]!='.' && forceType(parse(s[0..$-1]))==v) 834 s = s[0..$-1]; 835 } 836 buf.pos = s.length; 837 return buf; 838 } 839 840 void putFP(Writer, F)(auto ref Writer writer, F v) @nogc 841 { 842 writer.put(fpToBuf(v).data); 843 } 844 845 846 /// Get shortest string representation of a FP type that still converts to exactly the same number. 847 template fpToString(F) 848 { 849 string fpToString(F v) 850 { 851 return fpToBuf(v).data.idup; 852 } 853 854 static if (!is(F == real)) 855 unittest 856 { 857 union U 858 { 859 ubyte[F.sizeof] bytes; 860 F d; 861 string toString() { return (fpFormatString!F ~ " %a [%(%02X %)]").format(d, d, bytes[]); } 862 } 863 import std.random : Xorshift, uniform; 864 import std.stdio : stderr; 865 Xorshift rng; 866 foreach (n; 0..10000) 867 { 868 U u; 869 foreach (ref b; u.bytes[]) 870 b = uniform!ubyte(rng); 871 static if (is(F == real)) 872 u.bytes[7] |= 0x80; // require normalized value 873 scope(failure) stderr.writeln("Input:\t", u); 874 auto s = fpToString(u.d); 875 scope(failure) stderr.writeln("Result:\t", s); 876 if (s == "nan" || s == "-nan") 877 continue; // there are many NaNs... 878 U r; 879 r.d = to!F(s); 880 assert(r.bytes == u.bytes, 881 "fpToString mismatch:\nOutput:\t%s".format(r)); 882 } 883 } 884 } 885 886 alias doubleToString = fpToString!double; 887 888 unittest 889 { 890 alias floatToString = fpToString!float; 891 alias realToString = fpToString!real; 892 } 893 894 string numberToString(T)(T v) 895 if (isNumeric!T) 896 { 897 static if (is(T : ulong)) 898 return toDec(v); 899 else 900 return fpToString(v); 901 } 902 903 // ************************************************************************ 904 905 /// Simpler implementation of Levenshtein string distance 906 int stringDistance(string s, string t) 907 { 908 int n = cast(int)s.length; 909 int m = cast(int)t.length; 910 if (n == 0) return m; 911 if (m == 0) return n; 912 int[][] distance = new int[][](n+1, m+1); // matrix 913 int cost=0; 914 //init1 915 foreach (i; 0..n+1) distance[i][0]=i; 916 foreach (j; 0..m+1) distance[0][j]=j; 917 //find min distance 918 foreach (i; 1..n+1) 919 foreach (j; 1..m+1) 920 { 921 cost = t[j-1] == s[i-1] ? 0 : 1; 922 distance[i][j] = min( 923 distance[i-1][j ] + 1, 924 distance[i ][j-1] + 1, 925 distance[i-1][j-1] + cost 926 ); 927 } 928 return distance[n][m]; 929 } 930 931 /// Return a number between 0.0 and 1.0 indicating how similar two strings are 932 /// (1.0 if identical) 933 float stringSimilarity(string string1, string string2) 934 { 935 float dis = stringDistance(string1, string2); 936 float maxLen = string1.length; 937 if (maxLen < string2.length) 938 maxLen = string2.length; 939 if (maxLen == 0) 940 return 1; 941 else 942 return 1f - dis/maxLen; 943 } 944 945 /// Select best match from a list of items. 946 /// Returns -1 if none are above the threshold. 947 sizediff_t findBestMatch(in string[] items, string target, float threshold = 0.7) 948 { 949 sizediff_t found = -1; 950 float best = 0; 951 952 foreach (i, item; items) 953 { 954 float match = stringSimilarity(toLower(item),toLower(target)); 955 if (match>threshold && match>=best) 956 { 957 best = match; 958 found = i; 959 } 960 } 961 962 return found; 963 } 964 965 /// Select best match from a list of items. 966 /// Returns null if none are above the threshold. 967 string selectBestFrom(in string[] items, string target, float threshold = 0.7) 968 { 969 auto index = findBestMatch(items, target, threshold); 970 return index < 0 ? null : items[index]; 971 } 972 973 // ************************************************************************ 974 975 976 string randomString()(int length=20, string chars="abcdefghijklmnopqrstuvwxyz") 977 { 978 import std.random; 979 import std.range; 980 981 return length.iota.map!(n => chars[uniform(0, $)]).array; 982 }