1 /** 2 * Utility code related to string and text processing. 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <vladimir@thecybershadow.net> 12 */ 13 14 module ae.utils.text; 15 16 import std.algorithm; 17 import std.ascii; 18 import std.exception; 19 import std.conv; 20 import std.format; 21 import std.string; 22 import std.traits; 23 import std.typetuple; 24 25 import core.stdc.stdio : snprintf, sscanf; 26 import core.stdc.string; 27 28 import ae.utils.array; 29 import ae.utils.meta; 30 import ae.utils.text.parsefp; 31 import ae.utils.textout; 32 33 alias indexOf = std..string.indexOf; 34 35 public import ae.utils.text.ascii : ascii, DecimalSize, toDec, toDecFixed, asciiToLower, asciiToUpper; 36 public import ae.utils.array : contains; 37 38 // ************************************************************************ 39 40 /// CTFE helper 41 string formatAs(T)(auto ref T obj, string fmt) 42 { 43 return format(fmt, obj); 44 } 45 46 /// Consume a LF or CRLF terminated line from s. 47 /// Sets s to null and returns the remainder 48 /// if there is no line terminator in s. 49 T[] eatLine(T)(ref T[] s, bool eatIncompleteLines = true) 50 { 51 return s.skipUntil([T('\n')], eatIncompleteLines).chomp(); 52 } 53 54 deprecated template eatLine(OnEof onEof) 55 { 56 T[] eatLine(T)(ref T[] s) 57 { 58 return s.eatUntil!onEof([T('\n')]).chomp(); 59 } 60 } 61 62 unittest 63 { 64 string s = "Hello\nworld"; 65 assert(s.eatLine() == "Hello"); 66 assert(s.eatLine() == "world"); 67 assert(s is null); 68 assert(s.eatLine() is null); 69 } 70 71 // Uses memchr (not Boyer-Moore), best for short strings. 72 T[] fastReplace(T)(T[] what, T[] from, T[] to) 73 if (T.sizeof == 1) // TODO (uses memchr) 74 { 75 alias Unqual!T U; 76 77 // debug scope(failure) std.stdio.writeln("fastReplace crashed: ", [what, from, to]); 78 enum RAM = cast(U*)null; 79 80 if (what.length < from.length || from.length==0) 81 return what; 82 83 if (from.length==1) 84 { 85 auto fromc = from[0]; 86 if (to.length==1) 87 { 88 auto p = cast(T*)memchr(what.ptr, fromc, what.length); 89 if (!p) 90 return what; 91 92 T[] result = what.dup; 93 auto delta = result.ptr - what.ptr; 94 auto toChar = to[0]; 95 auto end = what.ptr + what.length; 96 do 97 { 98 (cast(U*)p)[delta] = toChar; // zomg hax lol 99 p++; 100 p = cast(T*)memchr(p, fromc, end - p); 101 } while (p); 102 return result; 103 } 104 else 105 { 106 auto p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length); 107 if (!p) 108 return what; 109 110 auto sb = StringBuilder(what.length); 111 do 112 { 113 sb.put(what[0..p-what.ptr], to); 114 what = what[p-what.ptr+1..$]; 115 p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length); 116 } 117 while (p); 118 119 sb.put(what); 120 return sb.get(); 121 } 122 } 123 124 auto head = from[0]; 125 auto tail = from[1..$]; 126 127 auto p = cast(T*)what.ptr; 128 auto end = p + what.length - tail.length; 129 p = cast(T*)memchr(p, head, end-p); 130 while (p) 131 { 132 p++; 133 if (p[0..tail.length] == tail) 134 { 135 if (from.length == to.length) 136 { 137 T[] result = what.dup; 138 auto deltaMinusOne = (result.ptr - what.ptr) - 1; 139 140 goto replaceA; 141 dummyA: // compiler complains 142 143 do 144 { 145 p++; 146 if (p[0..tail.length] == tail) 147 { 148 replaceA: 149 (cast(U*)p+deltaMinusOne)[0..to.length] = to[]; 150 } 151 p = cast(T*)memchr(p, head, end-p); 152 } 153 while (p); 154 155 return result; 156 } 157 else 158 { 159 auto start = cast(T*)what.ptr; 160 auto sb = StringBuilder(what.length); 161 goto replaceB; 162 dummyB: // compiler complains 163 164 do 165 { 166 p++; 167 if (p[0..tail.length] == tail) 168 { 169 replaceB: 170 sb.put(RAM[cast(size_t)start .. cast(size_t)p-1], to); 171 start = p + tail.length; 172 what = what[start-what.ptr..$]; 173 } 174 else 175 { 176 what = what[p-what.ptr..$]; 177 } 178 p = cast(T*)memchr(what.ptr, head, what.length); 179 } 180 while (p); 181 182 //sb.put(what); 183 sb.put(RAM[cast(size_t)start..cast(size_t)(what.ptr+what.length)]); 184 return sb.get(); 185 } 186 187 assert(0); 188 } 189 p = cast(T*)memchr(p, head, end-p); 190 } 191 192 return what; 193 } 194 195 unittest 196 { 197 import std.array; 198 void test(string haystack, string from, string to) 199 { 200 auto description = `("` ~ haystack ~ `", "` ~ from ~ `", "` ~ to ~ `")`; 201 202 auto r1 = fastReplace(haystack, from, to); 203 auto r2 = replace(haystack, from, to); 204 assert(r1 == r2, `Bad replace: ` ~ description ~ ` == "` ~ r1 ~ `"`); 205 206 if (r1 == haystack) 207 assert(r1 is haystack, `Pointless reallocation: ` ~ description); 208 } 209 210 test("Mary had a little lamb", "a", "b"); 211 test("Mary had a little lamb", "a", "aaa"); 212 test("Mary had a little lamb", "Mary", "Lucy"); 213 test("Mary had a little lamb", "Mary", "Jimmy"); 214 test("Mary had a little lamb", "lamb", "goat"); 215 test("Mary had a little lamb", "lamb", "sheep"); 216 test("Mary had a little lamb", " l", " x"); 217 test("Mary had a little lamb", " l", " xx"); 218 219 test("Mary had a little lamb", "X" , "Y" ); 220 test("Mary had a little lamb", "XX", "Y" ); 221 test("Mary had a little lamb", "X" , "YY"); 222 test("Mary had a little lamb", "XX", "YY"); 223 test("Mary had a little lamb", "aX", "Y" ); 224 test("Mary had a little lamb", "aX", "YY"); 225 226 test("foo", "foobar", "bar"); 227 } 228 229 T[][] fastSplit(T, U)(T[] s, U d) 230 if (is(Unqual!T == Unqual!U)) 231 { 232 if (!s.length) 233 return null; 234 235 auto p = cast(T*)memchr(s.ptr, d, s.length); 236 if (!p) 237 return [s]; 238 239 size_t n; 240 auto end = s.ptr + s.length; 241 do 242 { 243 n++; 244 p++; 245 p = cast(T*) memchr(p, d, end-p); 246 } 247 while (p); 248 249 auto result = new T[][n+1]; 250 n = 0; 251 auto start = s.ptr; 252 p = cast(T*) memchr(start, d, s.length); 253 do 254 { 255 result[n++] = start[0..p-start]; 256 start = ++p; 257 p = cast(T*) memchr(p, d, end-p); 258 } 259 while (p); 260 result[n] = start[0..end-start]; 261 262 return result; 263 } 264 265 T[][] splitAsciiLines(T)(T[] text) 266 if (is(Unqual!T == char)) 267 { 268 auto lines = text.fastSplit('\n'); 269 foreach (ref line; lines) 270 if (line.length && line[$-1]=='\r') 271 line = line[0..$-1]; 272 return lines; 273 } 274 275 unittest 276 { 277 assert(splitAsciiLines("a\nb\r\nc\r\rd\n\re\r\n\nf") == ["a", "b", "c\r\rd", "\re", "", "f"]); 278 assert(splitAsciiLines(string.init) == splitLines(string.init)); 279 } 280 281 /// Like std.string.split (one argument version, which splits by 282 /// whitespace), but only splits by ASCII and does not autodecode. 283 T[][] asciiSplit(T)(T[] text) 284 if (is(Unqual!T == char)) 285 { 286 bool inWhitespace = true; 287 size_t wordStart; 288 T[][] result; 289 290 void endWord(size_t p) 291 { 292 if (!inWhitespace) 293 { 294 result ~= text[wordStart..p]; 295 inWhitespace = true; 296 } 297 } 298 299 foreach (p, c; text) 300 if (std.ascii.isWhite(c)) 301 endWord(p); 302 else 303 if (inWhitespace) 304 { 305 inWhitespace = false; 306 wordStart = p; 307 } 308 endWord(text.length); 309 return result; 310 } 311 312 unittest 313 { 314 foreach (s; ["", " ", "a", " a", "a ", "a b", " a b", "a b ", " a b ", 315 " ", " a", "a ", "a b", "a b ", "a b c"]) 316 assert(s.split == s.asciiSplit, format("Got %s, expected %s", s.asciiSplit, s.split)); 317 } 318 319 T[] asciiStrip(T)(T[] s) 320 if (is(Unqual!T == char)) 321 { 322 while (s.length && isWhite(s[0])) 323 s = s[1..$]; 324 while (s.length && isWhite(s[$-1])) 325 s = s[0..$-1]; 326 return s; 327 } 328 329 unittest 330 { 331 string s = "Hello, world!"; 332 assert(asciiStrip(s) is s); 333 assert(asciiStrip("\r\n\tHello ".dup) == "Hello"); 334 } 335 336 /// Covering slice-list of s with interleaved whitespace. 337 T[][] segmentByWhitespace(T)(T[] s) 338 if (is(Unqual!T == char)) 339 { 340 if (!s.length) 341 return null; 342 343 T[][] segments; 344 bool wasWhite = isWhite(s[0]); 345 size_t start = 0; 346 foreach (p, char c; s) 347 { 348 bool isWhite = isWhite(c); 349 if (isWhite != wasWhite) 350 segments ~= s[start..p], 351 start = p; 352 wasWhite = isWhite; 353 } 354 segments ~= s[start..$]; 355 356 return segments; 357 } 358 359 T[] newlinesToSpaces(T)(T[] s) 360 if (is(Unqual!T == char)) 361 { 362 auto slices = segmentByWhitespace(s); 363 foreach (ref slice; slices) 364 if (slice.contains("\n")) 365 slice = " "; 366 return slices.join(); 367 } 368 369 ascii normalizeWhitespace(ascii s) 370 { 371 auto slices = segmentByWhitespace(strip(s)); 372 foreach (i, ref slice; slices) 373 if (i & 1) // odd 374 slice = " "; 375 return slices.join(); 376 } 377 378 unittest 379 { 380 assert(normalizeWhitespace(" Mary had\ta\nlittle\r\n\tlamb") == "Mary had a little lamb"); 381 } 382 383 string[] splitByCamelCase(string s) 384 { 385 string[] result; 386 size_t start = 0; 387 foreach (i; 1..s.length+1) 388 if (i == s.length 389 || (isLower(s[i-1]) && isUpper(s[i])) 390 || (i+1 < s.length && isUpper(s[i-1]) && isUpper(s[i]) && isLower(s[i+1])) 391 ) 392 { 393 result ~= s[start..i]; 394 start = i; 395 } 396 return result; 397 } 398 399 unittest 400 { 401 assert(splitByCamelCase("parseIPString") == ["parse", "IP", "String"]); 402 assert(splitByCamelCase("IPString") == ["IP", "String"]); 403 } 404 405 string camelCaseJoin(string[] arr) 406 { 407 if (!arr.length) 408 return null; 409 string result = arr[0]; 410 foreach (s; arr[1..$]) 411 result ~= std.ascii.toUpper(s[0]) ~ s[1..$]; 412 return result; 413 } 414 415 unittest 416 { 417 assert("parse-IP-string".split('-').camelCaseJoin() == "parseIPString"); 418 } 419 420 // ************************************************************************ 421 422 /// Case-insensitive ASCII string. 423 alias CIAsciiString = NormalizedArray!(immutable(char), s => s.byCodeUnit.map!(std.ascii.toLower)); 424 425 /// 426 unittest 427 { 428 CIAsciiString s = "test"; 429 assert(s == "TEST"); 430 assert(s >= "Test" && s <= "Test"); 431 assert(CIAsciiString("a") == CIAsciiString("A")); 432 assert(CIAsciiString("a") != CIAsciiString("B")); 433 assert(CIAsciiString("a") < CIAsciiString("B")); 434 assert(CIAsciiString("A") < CIAsciiString("b")); 435 assert(CIAsciiString("я") != CIAsciiString("Я")); 436 } 437 438 /// Case-insensitive Unicode string. 439 alias CIUniString = NormalizedArray!(immutable(char), s => s.map!(std.uni.toLower)); 440 441 /// 442 unittest 443 { 444 CIUniString s = "привет"; 445 assert(s == "ПРИВЕТ"); 446 assert(s >= "Привет" && s <= "Привет"); 447 assert(CIUniString("я") == CIUniString("Я")); 448 assert(CIUniString("а") != CIUniString("Б")); 449 assert(CIUniString("а") < CIUniString("Б")); 450 assert(CIUniString("А") < CIUniString("б")); 451 } 452 453 // ************************************************************************ 454 455 import std.utf; 456 457 /// Convert any data to a valid UTF-8 bytestream, so D's string functions can 458 /// properly work on it. 459 string rawToUTF8(in char[] s) 460 { 461 auto d = new dchar[s.length]; 462 foreach (i, char c; s) 463 d[i] = c; 464 return toUTF8(d); 465 } 466 467 /// Undo rawToUTF8. 468 ascii UTF8ToRaw(in char[] r) pure 469 { 470 auto s = new char[r.length]; 471 size_t i = 0; 472 foreach (dchar c; r) 473 { 474 assert(c < '\u0100'); 475 s[i++] = cast(char)c; 476 } 477 return s[0..i]; 478 } 479 480 unittest 481 { 482 char[1] c; 483 for (int i=0; i<256; i++) 484 { 485 c[0] = cast(char)i; 486 assert(UTF8ToRaw(rawToUTF8(c[])) == c[], format("%s -> %s -> %s", cast(ubyte[])c[], cast(ubyte[])rawToUTF8(c[]), cast(ubyte[])UTF8ToRaw(rawToUTF8(c[])))); 487 } 488 } 489 490 /// Where a delegate with this signature is required. 491 string nullStringTransform(in char[] s) { return to!string(s); } 492 493 string forceValidUTF8(string s) 494 { 495 try 496 { 497 validate(s); 498 return s; 499 } 500 catch (UTFException) 501 return rawToUTF8(s); 502 } 503 504 // ************************************************************************ 505 506 /// Return the slice up to the first NUL character, 507 /// or of the whole array if none is found. 508 C[] fromZArray(C, n)(ref C[n] arr) 509 { 510 auto p = arr.representation.countUntil(0); 511 return arr[0 .. p<0 ? $ : p]; 512 } 513 514 /// ditto 515 C[] fromZArray(C)(C[] arr) 516 { 517 auto p = arr.representation.countUntil(0); 518 return arr[0 .. p<0 ? $ : p]; 519 } 520 521 unittest 522 { 523 char[4] arr = "ab\0d"; 524 assert(arr.fromZArray == "ab"); 525 arr[] = "abcd"; 526 assert(arr.fromZArray == "abcd"); 527 } 528 529 unittest 530 { 531 string arr = "ab\0d"; 532 assert(arr.fromZArray == "ab"); 533 arr = "abcd"; 534 assert(arr.fromZArray == "abcd"); 535 } 536 537 // ************************************************************************ 538 539 /// Formats binary data as a hex dump (three-column layout consisting of hex 540 /// offset, byte values in hex, and printable low-ASCII characters). 541 string hexDump(const(void)[] b) 542 { 543 auto data = cast(const(ubyte)[]) b; 544 assert(data.length); 545 size_t i=0; 546 string s; 547 while (i<data.length) 548 { 549 s ~= format("%08X: ", i); 550 foreach (x; 0..16) 551 { 552 if (i+x<data.length) 553 s ~= format("%02X ", data[i+x]); 554 else 555 s ~= " "; 556 if (x==7) 557 s ~= "| "; 558 } 559 s ~= " "; 560 foreach (x; 0..16) 561 { 562 if (i+x<data.length) 563 if (data[i+x]==0) 564 s ~= ' '; 565 else 566 if (data[i+x]<32 || data[i+x]>=128) 567 s ~= '.'; 568 else 569 s ~= cast(char)data[i+x]; 570 else 571 s ~= ' '; 572 } 573 s ~= "\n"; 574 i += 16; 575 } 576 return s; 577 } 578 579 import std.conv; 580 581 T fromHex(T : ulong = uint, C)(const(C)[] s) 582 { 583 T result = parse!T(s, 16); 584 enforce(s.length==0, new ConvException("Could not parse entire string")); 585 return result; 586 } 587 588 ubyte[] arrayFromHex(in char[] hex) 589 { 590 auto buf = new ubyte[hex.length/2]; 591 arrayFromHex(hex, buf); 592 return buf; 593 } 594 595 struct HexParseConfig 596 { 597 bool checked = true; 598 bool lower = true; 599 bool upper = true; 600 } 601 602 ubyte parseHexDigit(HexParseConfig config = HexParseConfig.init)(char c) 603 { 604 static assert(config.lower || config.upper, 605 "Must parse at least either lower or upper case digits"); 606 static if (config.checked) 607 { 608 switch (c) 609 { 610 case '0': .. case '9': return cast(ubyte)(c - '0'); 611 case 'a': .. case 'f': return cast(ubyte)(c - 'a' + 10); 612 case 'A': .. case 'F': return cast(ubyte)(c - 'A' + 10); 613 default: throw new Exception("Bad hex digit: " ~ c); 614 } 615 } 616 else 617 { 618 if (c <= '9') 619 return cast(ubyte)(c - '0'); 620 static if (config.lower && config.upper) 621 { 622 if (c < 'a') 623 return cast(ubyte)(c - 'A' + 10); 624 else 625 return cast(ubyte)(c - 'a' + 10); 626 } 627 else 628 static if (config.lower) 629 return cast(ubyte)(c - 'a' + 10); 630 else 631 return cast(ubyte)(c - 'A' + 10); 632 } 633 } 634 635 void arrayFromHex(HexParseConfig config = HexParseConfig.init)(in char[] hex, ubyte[] buf) 636 { 637 assert(buf.length == hex.length/2, "Wrong buffer size for arrayFromHex"); 638 for (int i=0; i<hex.length; i+=2) 639 buf[i/2] = cast(ubyte)( 640 parseHexDigit!config(hex[i ])*16 + 641 parseHexDigit!config(hex[i+1]) 642 ); 643 } 644 645 /// Fast version for static arrays of known length. 646 void sarrayFromHex(HexParseConfig config = HexParseConfig.init, size_t N, Hex)(in ref Hex hex, ref ubyte[N] buf) 647 if (is(Hex == char[N*2])) 648 { 649 foreach (i; 0..N/4) 650 { 651 ulong chars = (cast(ulong*)hex.ptr)[i]; 652 uint res = 653 (parseHexDigit!config((chars >> (8*0)) & 0xFF) << (4*1)) | 654 (parseHexDigit!config((chars >> (8*1)) & 0xFF) << (4*0)) | 655 (parseHexDigit!config((chars >> (8*2)) & 0xFF) << (4*3)) | 656 (parseHexDigit!config((chars >> (8*3)) & 0xFF) << (4*2)) | 657 (parseHexDigit!config((chars >> (8*4)) & 0xFF) << (4*5)) | 658 (parseHexDigit!config((chars >> (8*5)) & 0xFF) << (4*4)) | 659 (parseHexDigit!config((chars >> (8*6)) & 0xFF) << (4*7)) | 660 (parseHexDigit!config((chars >> (8*7)) & 0xFF) << (4*6)); 661 (cast(uint*)buf.ptr)[i] = res; 662 } 663 foreach (i; N/4*4..N) 664 buf[i] = cast(ubyte)( 665 parseHexDigit!config(hex[i*2 ])*16 + 666 parseHexDigit!config(hex[i*2+1]) 667 ); 668 } 669 670 unittest 671 { 672 foreach (checked; TypeTuple!(false, true)) 673 foreach (lower; TypeTuple!(false, true)) 674 foreach (upper; TypeTuple!(false, true)) 675 static if (lower || upper) 676 { 677 enum config = HexParseConfig(checked, lower, upper); 678 char[18] buf; 679 foreach (n; 0..18) 680 if (lower && upper ? n & 1 : upper) 681 buf[n] = hexDigits[n % 16]; 682 else 683 buf[n] = lowerHexDigits[n % 16]; 684 ubyte[9] res; 685 sarrayFromHex!config(buf, res); 686 assert(res == [0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01], text(res)); 687 } 688 } 689 690 template toHex(alias digits = hexDigits) 691 { 692 char[] toHex(in ubyte[] data, char[] buf) pure 693 { 694 assert(buf.length == data.length*2); 695 foreach (i, b; data) 696 { 697 buf[i*2 ] = digits[b>>4]; 698 buf[i*2+1] = digits[b&15]; 699 } 700 return buf; 701 } 702 703 string toHex(in ubyte[] data) pure 704 { 705 auto buf = new char[data.length*2]; 706 foreach (i, b; data) 707 { 708 buf[i*2 ] = digits[b>>4]; 709 buf[i*2+1] = digits[b&15]; 710 } 711 return buf; 712 } 713 } 714 715 alias toLowerHex = toHex!lowerHexDigits; 716 717 void toHex(T : ulong, size_t U = T.sizeof*2)(T n, ref char[U] buf) 718 { 719 Unqual!T x = n; 720 foreach (i; Reverse!(RangeTuple!(T.sizeof*2))) 721 { 722 buf[i] = hexDigits[x & 0xF]; 723 x >>= 4; 724 } 725 } 726 727 unittest 728 { 729 ubyte[] bytes = [0x12, 0x34]; 730 assert(toHex(bytes) == "1234"); 731 } 732 733 unittest 734 { 735 ubyte[] bytes = [0x12, 0x34]; 736 char[] buf = new char[4]; 737 toHex(bytes, buf); 738 assert(buf == "1234"); 739 } 740 741 unittest 742 { 743 char[8] buf; 744 toHex(0x01234567, buf); 745 assert(buf == "01234567"); 746 } 747 748 char[T.sizeof*2] toHex(T : ulong)(T n) 749 { 750 char[T.sizeof*2] buf; 751 toHex(n, buf); 752 return buf; 753 } 754 755 unittest 756 { 757 assert(toHex(0x01234567) == "01234567"); 758 } 759 760 /// How many significant decimal digits does a FP type have 761 /// (determined empirically - valid for all D FP types on x86/64) 762 enum significantDigits(T : real) = 2 + 2 * T.sizeof; 763 764 /// Format string for a FP type which includes all necessary 765 /// significant digits 766 enum fpFormatString(T) = "%." ~ text(significantDigits!T) ~ "g"; 767 template cWidthString(T) 768 { 769 static if (is(Unqual!T == float)) 770 enum cWidthString = ""; 771 else 772 static if (is(Unqual!T == double)) 773 enum cWidthString = "l"; 774 else 775 static if (is(Unqual!T == real)) 776 enum cWidthString = "L"; 777 } 778 enum fpCFormatString(T) = "%." ~ text(significantDigits!T) ~ cWidthString!T ~ "g"; 779 780 private auto safeSprintf(size_t N, Args...)(ref char[N] buf, auto ref Args args) @trusted @nogc 781 { 782 return snprintf(buf.ptr, N, args); 783 } 784 785 private auto fpToBuf(Q)(Q val) @safe nothrow @nogc 786 { 787 alias F = Unqual!Q; 788 789 /// Bypass FPU register, which may contain a different precision 790 static F forceType(F d) { static F n; n = d; return n; } 791 792 enum isReal = is(F == real); 793 794 StaticBuf!(char, 64) buf = void; 795 796 // MSVC workaround from std.format: 797 version (CRuntime_Microsoft) 798 { 799 import std.math : isNaN, isInfinity; 800 immutable double v = val; // convert early to get "inf" in case of overflow 801 { 802 string s; 803 if (isNaN(v)) 804 s = "nan"; // snprintf writes 1.#QNAN 805 else if (isInfinity(v)) 806 s = val >= 0 ? "inf" : "-inf"; // snprintf writes 1.#INF 807 else 808 goto L1; 809 buf.buf[0..s.length] = s; 810 buf.pos = s.length; 811 return buf; 812 L1: 813 } 814 } 815 else 816 alias v = val; 817 818 buf.pos = safeSprintf(buf.buf, &fpCFormatString!F[0], forceType(v)); 819 char[] s = buf.data(); 820 821 F parse(char[] s) 822 { 823 F f; 824 auto res = tryParse(s, f); 825 assert(res, "Failed to parse number we created"); 826 assert(!s.length, "Failed to completely parse number we created"); 827 return f; 828 } 829 830 if (s != "nan" && s != "-nan" && s != "inf" && s != "-inf") 831 { 832 if (forceType(parse(s)) != v) 833 { 834 static if (isReal) 835 { 836 // Something funny with DM libc real parsing... e.g. 0.6885036635121051783 837 return buf; 838 } 839 else 840 // assert(false, "Initial conversion fails: " ~ format(fpFormatString!F, parse(s)) ~ " / " ~ s); 841 assert(false, "Initial conversion fails"); 842 } 843 844 foreach_reverse (i; 1..s.length) 845 if (s[i]>='0' && s[i]<='8') 846 { 847 s[i]++; 848 if (forceType(parse(s[0..i+1]))==v) 849 s = s[0..i+1]; 850 else 851 s[i]--; 852 } 853 while (s.length>2 && s[$-1]!='.' && forceType(parse(s[0..$-1]))==v) 854 s = s[0..$-1]; 855 } 856 buf.pos = s.length; 857 return buf; 858 } 859 860 void putFP(Writer, F)(auto ref Writer writer, F v) 861 { 862 writer.put(fpToBuf(v).data); 863 } 864 865 866 /// Get shortest string representation of a FP type that still converts to exactly the same number. 867 template fpToString(F) 868 { 869 string fpToString(F v) @safe nothrow 870 { 871 return fpToBuf(v).data.idup; 872 } 873 874 static if (!is(Unqual!F == real)) 875 unittest 876 { 877 union U 878 { 879 ubyte[F.sizeof] bytes; 880 Unqual!F d; 881 string toString() const { return (fpFormatString!F ~ " %a [%(%02X %)]").format(d, d, bytes[]); } 882 } 883 import std.random : Xorshift, uniform; 884 import std.stdio : stderr; 885 Xorshift rng; 886 foreach (n; 0..10000) 887 { 888 U u; 889 foreach (ref b; u.bytes[]) 890 b = uniform!ubyte(rng); 891 static if (is(Unqual!F == real)) 892 u.bytes[7] |= 0x80; // require normalized value 893 scope(failure) stderr.writeln("Input:\t", u); 894 auto s = fpToString(u.d); 895 scope(failure) stderr.writeln("Result:\t", s); 896 if (s == "nan" || s == "-nan") 897 continue; // there are many NaNs... 898 U r; 899 r.d = to!F(s); 900 assert(r.bytes == u.bytes, 901 "fpToString mismatch:\nOutput:\t%s".format(r)); 902 } 903 } 904 } 905 906 alias doubleToString = fpToString!double; 907 908 unittest 909 { 910 alias floatToString = fpToString!float; 911 alias realToString = fpToString!real; 912 alias crealToString = fpToString!(const(real)); 913 } 914 915 /// Wraps the result of a fpToString in a non-allocating stringifiable struct. 916 struct FPAsString(T) 917 { 918 typeof(fpToBuf(T.init)) buf; 919 920 this(T f) 921 { 922 buf = fpToBuf(f); 923 } 924 925 string toString() const pure nothrow 926 { 927 return buf.data.idup; 928 } 929 930 void toString(W)(ref W w) const 931 { 932 static if (is(typeof(w.put(buf.data)))) 933 w.put(buf.data); 934 else 935 foreach (c; buf.data) 936 w.put(c); 937 } 938 } 939 FPAsString!T fpAsString(T)(T f) { return FPAsString!T(f); } /// ditto 940 941 @safe //nothrow @nogc 942 unittest 943 { 944 StaticBuf!(char, 1024) buf; 945 buf.formattedWrite!"%s"(fpAsString(0.1)); 946 assert(buf.data == "0.1"); 947 } 948 949 string numberToString(T)(T v) 950 if (isNumeric!T) 951 { 952 static if (is(T : ulong)) 953 return toDec(v); 954 else 955 return fpToString(v); 956 } 957 958 // ************************************************************************ 959 960 /// Simpler implementation of Levenshtein string distance 961 int stringDistance(string s, string t) 962 { 963 int n = cast(int)s.length; 964 int m = cast(int)t.length; 965 if (n == 0) return m; 966 if (m == 0) return n; 967 int[][] distance = new int[][](n+1, m+1); // matrix 968 int cost=0; 969 //init1 970 foreach (i; 0..n+1) distance[i][0]=i; 971 foreach (j; 0..m+1) distance[0][j]=j; 972 //find min distance 973 foreach (i; 1..n+1) 974 foreach (j; 1..m+1) 975 { 976 cost = t[j-1] == s[i-1] ? 0 : 1; 977 distance[i][j] = min( 978 distance[i-1][j ] + 1, 979 distance[i ][j-1] + 1, 980 distance[i-1][j-1] + cost 981 ); 982 } 983 return distance[n][m]; 984 } 985 986 /// Return a number between 0.0 and 1.0 indicating how similar two strings are 987 /// (1.0 if identical) 988 float stringSimilarity(string string1, string string2) 989 { 990 float dis = stringDistance(string1, string2); 991 float maxLen = string1.length; 992 if (maxLen < string2.length) 993 maxLen = string2.length; 994 if (maxLen == 0) 995 return 1; 996 else 997 return 1f - dis/maxLen; 998 } 999 1000 /// Select best match from a list of items. 1001 /// Returns -1 if none are above the threshold. 1002 sizediff_t findBestMatch(in string[] items, string target, float threshold = 0.7) 1003 { 1004 sizediff_t found = -1; 1005 float best = 0; 1006 1007 foreach (i, item; items) 1008 { 1009 float match = stringSimilarity(toLower(item),toLower(target)); 1010 if (match>threshold && match>=best) 1011 { 1012 best = match; 1013 found = i; 1014 } 1015 } 1016 1017 return found; 1018 } 1019 1020 /// Select best match from a list of items. 1021 /// Returns null if none are above the threshold. 1022 string selectBestFrom(in string[] items, string target, float threshold = 0.7) 1023 { 1024 auto index = findBestMatch(items, target, threshold); 1025 return index < 0 ? null : items[index]; 1026 } 1027 1028 // ************************************************************************ 1029 1030 1031 string randomString()(int length=20, string chars="abcdefghijklmnopqrstuvwxyz") 1032 { 1033 import std.random; 1034 import std.range; 1035 1036 return length.iota.map!(n => chars[uniform(0, $)]).array; 1037 }