1 /** 2 * Utility code related to string and text processing. 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <vladimir@thecybershadow.net> 12 */ 13 14 module ae.utils.text; 15 16 import std.algorithm; 17 import std.ascii; 18 import std.exception; 19 import std.conv; 20 import std.format; 21 import std.string; 22 import std.traits; 23 import std.typetuple; 24 25 import core.stdc.string; 26 27 import ae.utils.array; 28 import ae.utils.meta; 29 import ae.utils.textout; 30 31 public import ae.utils.regex; 32 33 alias indexOf = std..string.indexOf; 34 35 // ************************************************************************ 36 37 /// Semantic alias for an array of immutable bytes containing some 38 /// ASCII-based 8-bit character encoding. Might be UTF-8, but not 39 /// necessarily - thus, is a semantic superset of the D "string" alias. 40 alias string ascii; 41 42 /// Convenience helper 43 bool contains(T, U)(T[] str, U[] what) 44 if (is(Unqual!T == Unqual!U)) 45 { 46 return str.indexOf(what)>=0; 47 } 48 49 /// CTFE helper 50 string formatAs(T)(auto ref T obj, string fmt) 51 { 52 return format(fmt, obj); 53 } 54 55 /// Consume a LF or CRLF terminated line from s. 56 /// Sets s to null and returns the remainder 57 /// if there is no line terminator in s. 58 T[] eatLine(T)(ref T[] s, bool eatIncompleteLines = true) 59 { 60 return s.skipUntil([T('\n')], eatIncompleteLines).chomp(); 61 } 62 63 deprecated template eatLine(OnEof onEof) 64 { 65 T[] eatLine(T)(ref T[] s) 66 { 67 return s.eatUntil!onEof([T('\n')]).chomp(); 68 } 69 } 70 71 unittest 72 { 73 string s = "Hello\nworld"; 74 assert(s.eatLine() == "Hello"); 75 assert(s.eatLine() == "world"); 76 assert(s is null); 77 assert(s.eatLine() is null); 78 } 79 80 // Uses memchr (not Boyer-Moore), best for short strings. 81 T[] fastReplace(T)(T[] what, T[] from, T[] to) 82 if (T.sizeof == 1) // TODO (uses memchr) 83 { 84 alias Unqual!T U; 85 86 // debug scope(failure) std.stdio.writeln("fastReplace crashed: ", [what, from, to]); 87 enum RAM = cast(U*)null; 88 89 if (what.length < from.length || from.length==0) 90 return what; 91 92 if (from.length==1) 93 { 94 auto fromc = from[0]; 95 if (to.length==1) 96 { 97 auto p = cast(T*)memchr(what.ptr, fromc, what.length); 98 if (!p) 99 return what; 100 101 auto result = what.dup; 102 auto delta = result.ptr - what.ptr; 103 auto toChar = to[0]; 104 auto end = what.ptr + what.length; 105 do 106 { 107 (cast(U*)p)[delta] = toChar; // zomg hax lol 108 p++; 109 p = cast(T*)memchr(p, fromc, end - p); 110 } while (p); 111 return assumeUnique(result); 112 } 113 else 114 { 115 auto p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length); 116 if (!p) 117 return what; 118 119 auto sb = StringBuilder(what.length); 120 do 121 { 122 sb.put(what[0..p-what.ptr], to); 123 what = what[p-what.ptr+1..$]; 124 p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length); 125 } 126 while (p); 127 128 sb.put(what); 129 return sb.get(); 130 } 131 } 132 133 auto head = from[0]; 134 auto tail = from[1..$]; 135 136 auto p = cast(T*)what.ptr; 137 auto end = p + what.length - tail.length; 138 p = cast(T*)memchr(p, head, end-p); 139 while (p) 140 { 141 p++; 142 if (p[0..tail.length] == tail) 143 { 144 if (from.length == to.length) 145 { 146 auto result = what.dup; 147 auto deltaMinusOne = (result.ptr - what.ptr) - 1; 148 149 goto replaceA; 150 dummyA: // compiler complains 151 152 do 153 { 154 p++; 155 if (p[0..tail.length] == tail) 156 { 157 replaceA: 158 (cast(U*)p+deltaMinusOne)[0..to.length] = to[]; 159 } 160 p = cast(T*)memchr(p, head, end-p); 161 } 162 while (p); 163 164 return assumeUnique(result); 165 } 166 else 167 { 168 auto start = cast(T*)what.ptr; 169 auto sb = StringBuilder(what.length); 170 goto replaceB; 171 dummyB: // compiler complains 172 173 do 174 { 175 p++; 176 if (p[0..tail.length] == tail) 177 { 178 replaceB: 179 sb.put(RAM[cast(size_t)start .. cast(size_t)p-1], to); 180 start = p + tail.length; 181 what = what[start-what.ptr..$]; 182 } 183 else 184 { 185 what = what[p-what.ptr..$]; 186 } 187 p = cast(T*)memchr(what.ptr, head, what.length); 188 } 189 while (p); 190 191 //sb.put(what); 192 sb.put(RAM[cast(size_t)start..cast(size_t)(what.ptr+what.length)]); 193 return sb.get(); 194 } 195 196 assert(0); 197 } 198 p = cast(T*)memchr(p, head, end-p); 199 } 200 201 return what; 202 } 203 204 unittest 205 { 206 import std.array; 207 void test(string haystack, string from, string to) 208 { 209 auto description = `("` ~ haystack ~ `", "` ~ from ~ `", "` ~ to ~ `")`; 210 211 auto r1 = fastReplace(haystack, from, to); 212 auto r2 = replace(haystack, from, to); 213 assert(r1 == r2, `Bad replace: ` ~ description ~ ` == "` ~ r1 ~ `"`); 214 215 if (r1 == haystack) 216 assert(r1 is haystack, `Pointless reallocation: ` ~ description); 217 } 218 219 test("Mary had a little lamb", "a", "b"); 220 test("Mary had a little lamb", "a", "aaa"); 221 test("Mary had a little lamb", "Mary", "Lucy"); 222 test("Mary had a little lamb", "Mary", "Jimmy"); 223 test("Mary had a little lamb", "lamb", "goat"); 224 test("Mary had a little lamb", "lamb", "sheep"); 225 test("Mary had a little lamb", " l", " x"); 226 test("Mary had a little lamb", " l", " xx"); 227 228 test("Mary had a little lamb", "X" , "Y" ); 229 test("Mary had a little lamb", "XX", "Y" ); 230 test("Mary had a little lamb", "X" , "YY"); 231 test("Mary had a little lamb", "XX", "YY"); 232 test("Mary had a little lamb", "aX", "Y" ); 233 test("Mary had a little lamb", "aX", "YY"); 234 235 test("foo", "foobar", "bar"); 236 } 237 238 T[][] fastSplit(T, U)(T[] s, U d) 239 if (is(Unqual!T == Unqual!U)) 240 { 241 if (!s.length) 242 return null; 243 244 auto p = cast(T*)memchr(s.ptr, d, s.length); 245 if (!p) 246 return [s]; 247 248 size_t n; 249 auto end = s.ptr + s.length; 250 do 251 { 252 n++; 253 p++; 254 p = cast(T*) memchr(p, d, end-p); 255 } 256 while (p); 257 258 auto result = new T[][n+1]; 259 n = 0; 260 auto start = s.ptr; 261 p = cast(T*) memchr(start, d, s.length); 262 do 263 { 264 result[n++] = start[0..p-start]; 265 start = ++p; 266 p = cast(T*) memchr(p, d, end-p); 267 } 268 while (p); 269 result[n] = start[0..end-start]; 270 271 return result; 272 } 273 274 T[][] splitAsciiLines(T)(T[] text) 275 if (is(Unqual!T == char)) 276 { 277 auto lines = text.fastSplit('\n'); 278 foreach (ref line; lines) 279 if (line.length && line[$-1]=='\r') 280 line = line[0..$-1]; 281 return lines; 282 } 283 284 unittest 285 { 286 assert(splitAsciiLines("a\nb\r\nc\r\rd\n\re\r\n\nf") == ["a", "b", "c\r\rd", "\re", "", "f"]); 287 assert(splitAsciiLines(string.init) == splitLines(string.init)); 288 } 289 290 T[] asciiStrip(T)(T[] s) 291 if (is(Unqual!T == char)) 292 { 293 while (s.length && isWhite(s[0])) 294 s = s[1..$]; 295 while (s.length && isWhite(s[$-1])) 296 s = s[0..$-1]; 297 return s; 298 } 299 300 unittest 301 { 302 string s = "Hello, world!"; 303 assert(asciiStrip(s) is s); 304 assert(asciiStrip("\r\n\tHello ".dup) == "Hello"); 305 } 306 307 /// Covering slice-list of s with interleaved whitespace. 308 T[][] segmentByWhitespace(T)(T[] s) 309 if (is(Unqual!T == char)) 310 { 311 if (!s.length) 312 return null; 313 314 T[][] segments; 315 bool wasWhite = isWhite(s[0]); 316 size_t start = 0; 317 foreach (p, char c; s) 318 { 319 bool isWhite = isWhite(c); 320 if (isWhite != wasWhite) 321 segments ~= s[start..p], 322 start = p; 323 wasWhite = isWhite; 324 } 325 segments ~= s[start..$]; 326 327 return segments; 328 } 329 330 T[] newlinesToSpaces(T)(T[] s) 331 if (is(Unqual!T == char)) 332 { 333 auto slices = segmentByWhitespace(s); 334 foreach (ref slice; slices) 335 if (slice.contains("\n")) 336 slice = " "; 337 return slices.join(); 338 } 339 340 ascii normalizeWhitespace(ascii s) 341 { 342 auto slices = segmentByWhitespace(strip(s)); 343 foreach (i, ref slice; slices) 344 if (i & 1) // odd 345 slice = " "; 346 return slices.join(); 347 } 348 349 unittest 350 { 351 assert(normalizeWhitespace(" Mary had\ta\nlittle\r\n\tlamb") == "Mary had a little lamb"); 352 } 353 354 string[] splitByCamelCase(string s) 355 { 356 string[] result; 357 size_t start = 0; 358 foreach (i; 1..s.length+1) 359 if (i == s.length 360 || (isLower(s[i-1]) && isUpper(s[i])) 361 || (i+1 < s.length && isUpper(s[i-1]) && isUpper(s[i]) && isLower(s[i+1])) 362 ) 363 { 364 result ~= s[start..i]; 365 start = i; 366 } 367 return result; 368 } 369 370 unittest 371 { 372 assert(splitByCamelCase("parseIPString") == ["parse", "IP", "String"]); 373 assert(splitByCamelCase("IPString") == ["IP", "String"]); 374 } 375 376 string camelCaseJoin(string[] arr) 377 { 378 if (!arr.length) 379 return null; 380 string result = arr[0]; 381 foreach (s; arr[1..$]) 382 result ~= std.ascii.toUpper(s[0]) ~ s[1..$]; 383 return result; 384 } 385 386 unittest 387 { 388 assert("parse-IP-string".split('-').camelCaseJoin() == "parseIPString"); 389 } 390 391 // ************************************************************************ 392 393 private __gshared char[256] asciiLower, asciiUpper; 394 395 shared static this() 396 { 397 foreach (c; 0..256) 398 { 399 asciiLower[c] = cast(char)std.ascii.toLower(c); 400 asciiUpper[c] = cast(char)std.ascii.toUpper(c); 401 } 402 } 403 404 void xlat(alias TABLE, T)(T[] buf) 405 { 406 foreach (ref c; buf) 407 c = TABLE[c]; 408 } 409 410 alias xlat!(asciiLower, char) asciiToLower; 411 alias xlat!(asciiUpper, char) asciiToUpper; 412 413 // ************************************************************************ 414 415 /// Case-insensitive ASCII string. 416 alias CIAsciiString = NormalizedArray!(immutable(char), s => s.byCodeUnit.map!(std.ascii.toLower)); 417 418 /// 419 unittest 420 { 421 CIAsciiString s = "test"; 422 assert(s == "TEST"); 423 assert(s >= "Test" && s <= "Test"); 424 assert(CIAsciiString("a") == CIAsciiString("A")); 425 assert(CIAsciiString("a") != CIAsciiString("B")); 426 assert(CIAsciiString("a") < CIAsciiString("B")); 427 assert(CIAsciiString("A") < CIAsciiString("b")); 428 assert(CIAsciiString("я") != CIAsciiString("Я")); 429 } 430 431 /// Case-insensitive Unicode string. 432 alias CIUniString = NormalizedArray!(immutable(char), s => s.map!(std.uni.toLower)); 433 434 /// 435 unittest 436 { 437 CIUniString s = "привет"; 438 assert(s == "ПРИВЕТ"); 439 assert(s >= "Привет" && s <= "Привет"); 440 assert(CIUniString("я") == CIUniString("Я")); 441 assert(CIUniString("а") != CIUniString("Б")); 442 assert(CIUniString("а") < CIUniString("Б")); 443 assert(CIUniString("А") < CIUniString("б")); 444 } 445 446 // ************************************************************************ 447 448 import std.utf; 449 450 /// Convert any data to a valid UTF-8 bytestream, so D's string functions can 451 /// properly work on it. 452 string rawToUTF8(in char[] s) 453 { 454 auto d = new dchar[s.length]; 455 foreach (i, char c; s) 456 d[i] = c; 457 return toUTF8(d); 458 } 459 460 /// Undo rawToUTF8. 461 ascii UTF8ToRaw(in char[] r) 462 { 463 auto s = new char[r.length]; 464 size_t i = 0; 465 foreach (dchar c; r) 466 { 467 assert(c < '\u0100'); 468 s[i++] = cast(char)c; 469 } 470 return assumeUnique(s[0..i]); 471 } 472 473 unittest 474 { 475 char[1] c; 476 for (int i=0; i<256; i++) 477 { 478 c[0] = cast(char)i; 479 assert(UTF8ToRaw(rawToUTF8(c[])) == c[], format("%s -> %s -> %s", cast(ubyte[])c[], cast(ubyte[])rawToUTF8(c[]), cast(ubyte[])UTF8ToRaw(rawToUTF8(c[])))); 480 } 481 } 482 483 /// Where a delegate with this signature is required. 484 string nullStringTransform(in char[] s) { return to!string(s); } 485 486 string forceValidUTF8(string s) 487 { 488 try 489 { 490 validate(s); 491 return s; 492 } 493 catch (UTFException) 494 return rawToUTF8(s); 495 } 496 497 // ************************************************************************ 498 499 /// Return the slice up to the first NUL character, 500 /// or of the whole array if none is found. 501 C[] fromZArray(C, n)(ref C[n] arr) 502 { 503 auto p = arr.representation.countUntil(0); 504 return arr[0 .. p<0 ? $ : p]; 505 } 506 507 /// ditto 508 C[] fromZArray(C)(C[] arr) 509 { 510 auto p = arr.representation.countUntil(0); 511 return arr[0 .. p<0 ? $ : p]; 512 } 513 514 unittest 515 { 516 char[4] arr = "ab\0d"; 517 assert(arr.fromZArray == "ab"); 518 arr[] = "abcd"; 519 assert(arr.fromZArray == "abcd"); 520 } 521 522 unittest 523 { 524 string arr = "ab\0d"; 525 assert(arr.fromZArray == "ab"); 526 arr = "abcd"; 527 assert(arr.fromZArray == "abcd"); 528 } 529 530 // ************************************************************************ 531 532 /// Formats binary data as a hex dump (three-column layout consisting of hex 533 /// offset, byte values in hex, and printable low-ASCII characters). 534 string hexDump(const(void)[] b) 535 { 536 auto data = cast(const(ubyte)[]) b; 537 assert(data.length); 538 size_t i=0; 539 string s; 540 while (i<data.length) 541 { 542 s ~= format("%08X: ", i); 543 foreach (x; 0..16) 544 { 545 if (i+x<data.length) 546 s ~= format("%02X ", data[i+x]); 547 else 548 s ~= " "; 549 if (x==7) 550 s ~= "| "; 551 } 552 s ~= " "; 553 foreach (x; 0..16) 554 { 555 if (i+x<data.length) 556 if (data[i+x]==0) 557 s ~= ' '; 558 else 559 if (data[i+x]<32 || data[i+x]>=128) 560 s ~= '.'; 561 else 562 s ~= cast(char)data[i+x]; 563 else 564 s ~= ' '; 565 } 566 s ~= "\n"; 567 i += 16; 568 } 569 return s; 570 } 571 572 import std.conv; 573 574 T fromHex(T : ulong = uint, C)(const(C)[] s) 575 { 576 T result = parse!T(s, 16); 577 enforce(s.length==0, new ConvException("Could not parse entire string")); 578 return result; 579 } 580 581 ubyte[] arrayFromHex(in char[] hex, ubyte[] buf = null) 582 { 583 if (buf is null) 584 buf = new ubyte[hex.length/2]; 585 else 586 assert(buf.length == hex.length/2); 587 for (int i=0; i<hex.length; i+=2) 588 buf[i/2] = cast(ubyte)( 589 hexDigits.indexOf(hex[i ], CaseSensitive.no)*16 + 590 hexDigits.indexOf(hex[i+1], CaseSensitive.no) 591 ); 592 return buf; 593 } 594 595 string toHex(alias digits = hexDigits)(in ubyte[] data, char[] buf = null) 596 { 597 if (buf is null) 598 buf = new char[data.length*2]; 599 else 600 assert(buf.length == data.length*2); 601 foreach (i, b; data) 602 { 603 buf[i*2 ] = digits[b>>4]; 604 buf[i*2+1] = digits[b&15]; 605 } 606 return assumeUnique(buf); 607 } 608 609 alias toLowerHex = toHex!lowerHexDigits; 610 611 void toHex(T : ulong, size_t U = T.sizeof*2)(T n, ref char[U] buf) 612 { 613 foreach (i; Reverse!(RangeTuple!(T.sizeof*2))) 614 { 615 buf[i] = hexDigits[n & 0xF]; 616 n >>= 4; 617 } 618 } 619 620 unittest 621 { 622 char[8] buf; 623 toHex(0x01234567, buf); 624 assert(buf == "01234567"); 625 } 626 627 /// How many significant decimal digits does a FP type have 628 /// (determined empirically) 629 enum significantDigits(T : real) = 2 + 2 * T.sizeof; 630 631 /// Format string for a FP type which includes all necessary 632 /// significant digits 633 enum fpFormatString(T) = "%." ~ text(significantDigits!T) ~ "g"; 634 635 /// Get shortest string representation of a FP type that still converts to exactly the same number. 636 template fpToString(F) 637 { 638 string fpToString(F v) 639 { 640 /// Bypass FPU register, which may contain a different precision 641 static F forceType(F d) { static F n; n = d; return n; } 642 643 StaticBuf!(char, 64) buf; 644 formattedWrite(&buf, fpFormatString!F, forceType(v)); 645 char[] s = buf.data(); 646 647 if (s != "nan" && s != "-nan" && s != "inf" && s != "-inf") 648 { 649 if (forceType(to!F(s)) != v) 650 { 651 static if (is(F == real)) 652 { 653 // Something funny with DM libc real parsing... e.g. 0.6885036635121051783 654 return s.idup; 655 } 656 else 657 assert(false, "Initial conversion fails: " ~ format(fpFormatString!F, to!F(s))); 658 } 659 660 foreach_reverse (i; 1..s.length) 661 if (s[i]>='0' && s[i]<='8') 662 { 663 s[i]++; 664 if (forceType(to!F(s[0..i+1]))==v) 665 s = s[0..i+1]; 666 else 667 s[i]--; 668 } 669 while (s.length>2 && s[$-1]!='.' && forceType(to!F(s[0..$-1]))==v) 670 s = s[0..$-1]; 671 } 672 return s.idup; 673 } 674 675 static if (!is(F == real)) 676 unittest 677 { 678 union U 679 { 680 ubyte[F.sizeof] bytes; 681 F d; 682 string toString() { return (fpFormatString!F ~ " %a [%(%02X %)]").format(d, d, bytes[]); } 683 } 684 import std.random : Xorshift, uniform; 685 import std.stdio : stderr; 686 Xorshift rng; 687 foreach (n; 0..10000) 688 { 689 U u; 690 foreach (ref b; u.bytes[]) 691 b = uniform!ubyte(rng); 692 static if (is(F == real)) 693 u.bytes[7] |= 0x80; // require normalized value 694 scope(failure) stderr.writeln("Input:\t", u); 695 auto s = fpToString(u.d); 696 scope(failure) stderr.writeln("Result:\t", s); 697 if (s == "nan" || s == "-nan") 698 continue; // there are many NaNs... 699 U r; 700 r.d = to!F(s); 701 assert(r.bytes == u.bytes, 702 "fpToString mismatch:\nOutput:\t%s".format(r)); 703 } 704 } 705 } 706 707 alias doubleToString = fpToString!double; 708 709 unittest 710 { 711 alias floatToString = fpToString!float; 712 alias realToString = fpToString!real; 713 } 714 715 import std.algorithm : max; 716 717 template DecimalSize(T : ulong) 718 { 719 enum DecimalSize = max(text(T.min).length, text(T.max).length); 720 } 721 722 static assert(DecimalSize!ubyte == 3); 723 static assert(DecimalSize!byte == 4); 724 static assert(DecimalSize!ushort == 5); 725 static assert(DecimalSize!short == 6); 726 static assert(DecimalSize!uint == 10); 727 static assert(DecimalSize!int == 11); 728 static assert(DecimalSize!ulong == 20); 729 static assert(DecimalSize!long == 20); 730 731 import std.typecons; 732 733 /// Writes n as decimal number to buf (right-aligned), returns slice of buf containing result. 734 char[] toDec(N : ulong, size_t U)(N o, ref char[U] buf) 735 { 736 static assert(U >= DecimalSize!N, "Buffer too small to fit any " ~ N.stringof ~ " value"); 737 738 Unqual!N n = o; 739 char* p = buf.ptr+buf.length; 740 741 if (isSigned!N && n<0) 742 { 743 do 744 { 745 *--p = '0' - n%10; 746 n = n/10; 747 } while (n); 748 *--p = '-'; 749 } 750 else 751 do 752 { 753 *--p = '0' + n%10; 754 n = n/10; 755 } while (n); 756 757 return p[0 .. buf.ptr + buf.length - p]; 758 } 759 760 string toDec(T : ulong)(T n) 761 { 762 static struct Buf { char[DecimalSize!T] buf; } // Can't put static array on heap, use struct 763 return assumeUnique(toDec(n, (new Buf).buf)); 764 } 765 766 unittest 767 { 768 assert(toDec(42) == "42"); 769 assert(toDec(int.min) == int.min.to!string()); 770 } 771 772 /// Print an unsigned integer as a zero-padded, right-aligned decimal number into a buffer 773 void toDecFixed(N : ulong, size_t U)(N n, ref char[U] buf) 774 if (!isSigned!N) 775 { 776 assert(n < 10^^U, "Number too large"); 777 778 foreach (i; Reverse!(RangeTuple!U)) 779 { 780 buf[i] = cast(char)('0' + (n % 10)); 781 n /= 10; 782 } 783 } 784 785 /// ditto 786 char[U] toDecFixed(size_t U, N : ulong)(N n) 787 if (!isSigned!N) 788 { 789 char[U] buf; 790 toDecFixed(n, buf); 791 return buf; 792 } 793 794 unittest 795 { 796 assert(toDecFixed!6(12345u) == "012345"); 797 } 798 799 string numberToString(T)(T v) 800 if (isNumeric!T) 801 { 802 static if (is(T : real)) 803 return fpToString(v); 804 else 805 return toDec(v); 806 } 807 808 // ************************************************************************ 809 810 /// Simpler implementation of Levenshtein string distance 811 int stringDistance(string s, string t) 812 { 813 int n = cast(int)s.length; 814 int m = cast(int)t.length; 815 if (n == 0) return m; 816 if (m == 0) return n; 817 int[][] distance = new int[][](n+1, m+1); // matrix 818 int cost=0; 819 //init1 820 foreach (i; 0..n+1) distance[i][0]=i; 821 foreach (j; 0..m+1) distance[0][j]=j; 822 //find min distance 823 foreach (i; 1..n+1) 824 foreach (j; 1..m+1) 825 { 826 cost = t[j-1] == s[i-1] ? 0 : 1; 827 distance[i][j] = min( 828 distance[i-1][j ] + 1, 829 distance[i ][j-1] + 1, 830 distance[i-1][j-1] + cost 831 ); 832 } 833 return distance[n][m]; 834 } 835 836 /// Return a number between 0.0 and 1.0 indicating how similar two strings are 837 /// (1.0 if identical) 838 float stringSimilarity(string string1, string string2) 839 { 840 float dis = stringDistance(string1, string2); 841 float maxLen = string1.length; 842 if (maxLen < string2.length) 843 maxLen = string2.length; 844 if (maxLen == 0) 845 return 1; 846 else 847 return 1f - dis/maxLen; 848 } 849 850 /// Select best match from a list of items. 851 /// Returns -1 if none are above the threshold. 852 sizediff_t findBestMatch(in string[] items, string target, float threshold = 0.7) 853 { 854 sizediff_t found = -1; 855 float best = 0; 856 857 foreach (i, item; items) 858 { 859 float match = stringSimilarity(toLower(item),toLower(target)); 860 if (match>threshold && match>=best) 861 { 862 best = match; 863 found = i; 864 } 865 } 866 867 return found; 868 } 869 870 /// Select best match from a list of items. 871 /// Returns null if none are above the threshold. 872 string selectBestFrom(in string[] items, string target, float threshold = 0.7) 873 { 874 auto index = findBestMatch(items, target, threshold); 875 return index < 0 ? null : items[index]; 876 } 877 878 // ************************************************************************ 879 880 import std.random; 881 882 string randomString(int length=20, string chars="abcdefghijklmnopqrstuvwxyz") 883 { 884 char[] result = new char[length]; 885 foreach (ref c; result) 886 c = chars[uniform(0, $)]; 887 return assumeUnique(result); 888 }