ae.utils.text source code

1 /**
2  * Utility code related to string and text processing.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.utils.text;
15 
16 import std.algorithm;
17 import std.ascii;
18 import std.exception;
19 import std.conv;
20 import std.format;
21 import std.string;
22 import std.traits;
23 import std.typetuple;
24 
25 import core.stdc.string;
26 
27 import ae.utils.array;
28 import ae.utils.meta;
29 import ae.utils.textout;
30 
31 public import ae.utils.regex;
32 
33 alias indexOf = std..string.indexOf;
34 
35 public import ae.utils.text.ascii : ascii, DecimalSize, toDec, toDecFixed;
36 
37 // ************************************************************************
38 
39 /// Convenience helper
40 bool contains(T, U)(T[] str, U[] what)
41 	if (is(Unqual!T == Unqual!U))
42 {
43 	return str.indexOf(what)>=0;
44 }
45 
46 /// CTFE helper
47 string formatAs(T)(auto ref T obj, string fmt)
48 {
49 	return format(fmt, obj);
50 }
51 
52 /// Consume a LF or CRLF terminated line from s.
53 /// Sets s to null and returns the remainder
54 /// if there is no line terminator in s.
55 T[] eatLine(T)(ref T[] s, bool eatIncompleteLines = true)
56 {
57 	return s.skipUntil([T('\n')], eatIncompleteLines).chomp();
58 }
59 
60 deprecated template eatLine(OnEof onEof)
61 {
62 	T[] eatLine(T)(ref T[] s)
63 	{
64 		return s.eatUntil!onEof([T('\n')]).chomp();
65 	}
66 }
67 
68 unittest
69 {
70 	string s = "Hello\nworld";
71 	assert(s.eatLine() == "Hello");
72 	assert(s.eatLine() == "world");
73 	assert(s is null);
74 	assert(s.eatLine() is null);
75 }
76 
77 // Uses memchr (not Boyer-Moore), best for short strings.
78 T[] fastReplace(T)(T[] what, T[] from, T[] to)
79 	if (T.sizeof == 1) // TODO (uses memchr)
80 {
81 	alias Unqual!T U;
82 
83 //	debug scope(failure) std.stdio.writeln("fastReplace crashed: ", [what, from, to]);
84 	enum RAM = cast(U*)null;
85 
86 	if (what.length < from.length || from.length==0)
87 		return what;
88 
89 	if (from.length==1)
90 	{
91 		auto fromc = from[0];
92 		if (to.length==1)
93 		{
94 			auto p = cast(T*)memchr(what.ptr, fromc, what.length);
95 			if (!p)
96 				return what;
97 
98 			auto result = what.dup;
99 			auto delta = result.ptr - what.ptr;
100 			auto toChar = to[0];
101 			auto end = what.ptr + what.length;
102 			do
103 			{
104 				(cast(U*)p)[delta] = toChar; // zomg hax lol
105 				p++;
106 				p = cast(T*)memchr(p, fromc, end - p);
107 			} while (p);
108 			return result;
109 		}
110 		else
111 		{
112 			auto p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
113 			if (!p)
114 				return what;
115 
116 			auto sb = StringBuilder(what.length);
117 			do
118 			{
119 				sb.put(what[0..p-what.ptr], to);
120 				what = what[p-what.ptr+1..$];
121 				p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
122 			}
123 			while (p);
124 
125 			sb.put(what);
126 			return sb.get();
127 		}
128 	}
129 
130 	auto head = from[0];
131 	auto tail = from[1..$];
132 
133 	auto p = cast(T*)what.ptr;
134 	auto end = p + what.length - tail.length;
135 	p = cast(T*)memchr(p, head, end-p);
136 	while (p)
137 	{
138 		p++;
139 		if (p[0..tail.length] == tail)
140 		{
141 			if (from.length == to.length)
142 			{
143 				auto result = what.dup;
144 				auto deltaMinusOne = (result.ptr - what.ptr) - 1;
145 
146 				goto replaceA;
147 			dummyA: // compiler complains
148 
149 				do
150 				{
151 					p++;
152 					if (p[0..tail.length] == tail)
153 					{
154 					replaceA:
155 						(cast(U*)p+deltaMinusOne)[0..to.length] = to[];
156 					}
157 					p = cast(T*)memchr(p, head, end-p);
158 				}
159 				while (p);
160 
161 				return result;
162 			}
163 			else
164 			{
165 				auto start = cast(T*)what.ptr;
166 				auto sb = StringBuilder(what.length);
167 				goto replaceB;
168 			dummyB: // compiler complains
169 
170 				do
171 				{
172 					p++;
173 					if (p[0..tail.length] == tail)
174 					{
175 					replaceB:
176 						sb.put(RAM[cast(size_t)start .. cast(size_t)p-1], to);
177 						start = p + tail.length;
178 						what = what[start-what.ptr..$];
179 					}
180 					else
181 					{
182 						what = what[p-what.ptr..$];
183 					}
184 					p = cast(T*)memchr(what.ptr, head, what.length);
185 				}
186 				while (p);
187 
188 				//sb.put(what);
189 				sb.put(RAM[cast(size_t)start..cast(size_t)(what.ptr+what.length)]);
190 				return sb.get();
191 			}
192 
193 			assert(0);
194 		}
195 		p = cast(T*)memchr(p, head, end-p);
196 	}
197 
198 	return what;
199 }
200 
201 unittest
202 {
203 	import std.array;
204 	void test(string haystack, string from, string to)
205 	{
206 		auto description = `("` ~ haystack ~ `", "` ~ from ~ `", "` ~ to ~ `")`;
207 
208 		auto r1 = fastReplace(haystack, from, to);
209 		auto r2 =     replace(haystack, from, to);
210 		assert(r1 == r2, `Bad replace: ` ~ description ~ ` == "` ~ r1 ~ `"`);
211 
212 		if (r1 == haystack)
213 			assert(r1 is haystack, `Pointless reallocation: ` ~ description);
214 	}
215 
216 	test("Mary had a little lamb", "a", "b");
217 	test("Mary had a little lamb", "a", "aaa");
218 	test("Mary had a little lamb", "Mary", "Lucy");
219 	test("Mary had a little lamb", "Mary", "Jimmy");
220 	test("Mary had a little lamb", "lamb", "goat");
221 	test("Mary had a little lamb", "lamb", "sheep");
222 	test("Mary had a little lamb", " l", " x");
223 	test("Mary had a little lamb", " l", " xx");
224 
225 	test("Mary had a little lamb", "X" , "Y" );
226 	test("Mary had a little lamb", "XX", "Y" );
227 	test("Mary had a little lamb", "X" , "YY");
228 	test("Mary had a little lamb", "XX", "YY");
229 	test("Mary had a little lamb", "aX", "Y" );
230 	test("Mary had a little lamb", "aX", "YY");
231 
232 	test("foo", "foobar", "bar");
233 }
234 
235 T[][] fastSplit(T, U)(T[] s, U d)
236 	if (is(Unqual!T == Unqual!U))
237 {
238 	if (!s.length)
239 		return null;
240 
241 	auto p = cast(T*)memchr(s.ptr, d, s.length);
242 	if (!p)
243 		return [s];
244 
245 	size_t n;
246 	auto end = s.ptr + s.length;
247 	do
248 	{
249 		n++;
250 		p++;
251 		p = cast(T*) memchr(p, d, end-p);
252 	}
253 	while (p);
254 
255 	auto result = new T[][n+1];
256 	n = 0;
257 	auto start = s.ptr;
258 	p = cast(T*) memchr(start, d, s.length);
259 	do
260 	{
261 		result[n++] = start[0..p-start];
262 		start = ++p;
263 		p = cast(T*) memchr(p, d, end-p);
264 	}
265 	while (p);
266 	result[n] = start[0..end-start];
267 
268 	return result;
269 }
270 
271 T[][] splitAsciiLines(T)(T[] text)
272 	if (is(Unqual!T == char))
273 {
274 	auto lines = text.fastSplit('\n');
275 	foreach (ref line; lines)
276 		if (line.length && line[$-1]=='\r')
277 			line = line[0..$-1];
278 	return lines;
279 }
280 
281 unittest
282 {
283 	assert(splitAsciiLines("a\nb\r\nc\r\rd\n\re\r\n\nf") == ["a", "b", "c\r\rd", "\re", "", "f"]);
284 	assert(splitAsciiLines(string.init) == splitLines(string.init));
285 }
286 
287 /// Like std.string.split (one argument version, which splits by
288 /// whitespace), but only splits by ASCII and does not autodecode.
289 T[][] asciiSplit(T)(T[] text)
290 	if (is(Unqual!T == char))
291 {
292 	bool inWhitespace = true;
293 	size_t wordStart;
294 	T[][] result;
295 
296 	void endWord(size_t p)
297 	{
298 		if (!inWhitespace)
299 		{
300 			result ~= text[wordStart..p];
301 			inWhitespace = true;
302 		}
303 	}
304 
305 	foreach (p, c; text)
306 		if (std.ascii.isWhite(c))
307 			endWord(p);
308 		else
309 			if (inWhitespace)
310 			{
311 				inWhitespace = false;
312 				wordStart = p;
313 			}
314 	endWord(text.length);
315 	return result;
316 }
317 
318 unittest
319 {
320 	foreach (s; ["", " ", "a", " a", "a ", "a b", " a b", "a b ", " a b ",
321 			"  ", "  a", "a  ", "a  b", "a  b  ", "a b  c"])
322 		assert(s.split == s.asciiSplit, format("Got %s, expected %s", s.asciiSplit, s.split));
323 }
324 
325 T[] asciiStrip(T)(T[] s)
326 	if (is(Unqual!T == char))
327 {
328 	while (s.length && isWhite(s[0]))
329 		s = s[1..$];
330 	while (s.length && isWhite(s[$-1]))
331 		s = s[0..$-1];
332 	return s;
333 }
334 
335 unittest
336 {
337 	string s = "Hello, world!";
338 	assert(asciiStrip(s) is s);
339 	assert(asciiStrip("\r\n\tHello ".dup) == "Hello");
340 }
341 
342 /// Covering slice-list of s with interleaved whitespace.
343 T[][] segmentByWhitespace(T)(T[] s)
344 	if (is(Unqual!T == char))
345 {
346 	if (!s.length)
347 		return null;
348 
349 	T[][] segments;
350 	bool wasWhite = isWhite(s[0]);
351 	size_t start = 0;
352 	foreach (p, char c; s)
353 	{
354 		bool isWhite = isWhite(c);
355 		if (isWhite != wasWhite)
356 			segments ~= s[start..p],
357 			start = p;
358 		wasWhite = isWhite;
359 	}
360 	segments ~= s[start..$];
361 
362 	return segments;
363 }
364 
365 T[] newlinesToSpaces(T)(T[] s)
366 	if (is(Unqual!T == char))
367 {
368 	auto slices = segmentByWhitespace(s);
369 	foreach (ref slice; slices)
370 		if (slice.contains("\n"))
371 			slice = " ";
372 	return slices.join();
373 }
374 
375 ascii normalizeWhitespace(ascii s)
376 {
377 	auto slices = segmentByWhitespace(strip(s));
378 	foreach (i, ref slice; slices)
379 		if (i & 1) // odd
380 			slice = " ";
381 	return slices.join();
382 }
383 
384 unittest
385 {
386 	assert(normalizeWhitespace(" Mary  had\ta\nlittle\r\n\tlamb") == "Mary had a little lamb");
387 }
388 
389 string[] splitByCamelCase(string s)
390 {
391 	string[] result;
392 	size_t start = 0;
393 	foreach (i; 1..s.length+1)
394 		if (i == s.length
395 		 || (isLower(s[i-1]) && isUpper(s[i]))
396 		 || (i+1 < s.length && isUpper(s[i-1]) && isUpper(s[i]) && isLower(s[i+1]))
397 		)
398 		{
399 			result ~= s[start..i];
400 			start = i;
401 		}
402 	return result;
403 }
404 
405 unittest
406 {
407 	assert(splitByCamelCase("parseIPString") == ["parse", "IP", "String"]);
408 	assert(splitByCamelCase("IPString") == ["IP", "String"]);
409 }
410 
411 string camelCaseJoin(string[] arr)
412 {
413 	if (!arr.length)
414 		return null;
415 	string result = arr[0];
416 	foreach (s; arr[1..$])
417 		result ~= std.ascii.toUpper(s[0]) ~ s[1..$];
418 	return result;
419 }
420 
421 unittest
422 {
423 	assert("parse-IP-string".split('-').camelCaseJoin() == "parseIPString");
424 }
425 
426 // ************************************************************************
427 
428 private __gshared char[256] asciiLower, asciiUpper;
429 
430 shared static this()
431 {
432 	foreach (c; 0..256)
433 	{
434 		asciiLower[c] = cast(char)std.ascii.toLower(c);
435 		asciiUpper[c] = cast(char)std.ascii.toUpper(c);
436 	}
437 }
438 
439 void xlat(alias TABLE, T)(T[] buf)
440 {
441 	foreach (ref c; buf)
442 		c = TABLE[c];
443 }
444 
445 alias xlat!(asciiLower, char) asciiToLower;
446 alias xlat!(asciiUpper, char) asciiToUpper;
447 
448 // ************************************************************************
449 
450 /// Case-insensitive ASCII string.
451 alias CIAsciiString = NormalizedArray!(immutable(char), s => s.byCodeUnit.map!(std.ascii.toLower));
452 
453 ///
454 unittest
455 {
456 	CIAsciiString s = "test";
457 	assert(s == "TEST");
458 	assert(s >= "Test" && s <= "Test");
459 	assert(CIAsciiString("a") == CIAsciiString("A"));
460 	assert(CIAsciiString("a") != CIAsciiString("B"));
461 	assert(CIAsciiString("a") <  CIAsciiString("B"));
462 	assert(CIAsciiString("A") <  CIAsciiString("b"));
463 	assert(CIAsciiString("я") != CIAsciiString("Я"));
464 }
465 
466 /// Case-insensitive Unicode string.
467 alias CIUniString = NormalizedArray!(immutable(char), s => s.map!(std.uni.toLower));
468 
469 ///
470 unittest
471 {
472 	CIUniString s = "привет";
473 	assert(s == "ПРИВЕТ");
474 	assert(s >= "Привет" && s <= "Привет");
475 	assert(CIUniString("я") == CIUniString("Я"));
476 	assert(CIUniString("а") != CIUniString("Б"));
477 	assert(CIUniString("а") <  CIUniString("Б"));
478 	assert(CIUniString("А") <  CIUniString("б"));
479 }
480 
481 // ************************************************************************
482 
483 import std.utf;
484 
485 /// Convert any data to a valid UTF-8 bytestream, so D's string functions can
486 /// properly work on it.
487 string rawToUTF8(in char[] s)
488 {
489 	auto d = new dchar[s.length];
490 	foreach (i, char c; s)
491 		d[i] = c;
492 	return toUTF8(d);
493 }
494 
495 /// Undo rawToUTF8.
496 ascii UTF8ToRaw(in char[] r) pure
497 {
498 	auto s = new char[r.length];
499 	size_t i = 0;
500 	foreach (dchar c; r)
501 	{
502 		assert(c < '\u0100');
503 		s[i++] = cast(char)c;
504 	}
505 	return s[0..i];
506 }
507 
508 unittest
509 {
510 	char[1] c;
511 	for (int i=0; i<256; i++)
512 	{
513 		c[0] = cast(char)i;
514 		assert(UTF8ToRaw(rawToUTF8(c[])) == c[], format("%s -> %s -> %s", cast(ubyte[])c[], cast(ubyte[])rawToUTF8(c[]), cast(ubyte[])UTF8ToRaw(rawToUTF8(c[]))));
515 	}
516 }
517 
518 /// Where a delegate with this signature is required.
519 string nullStringTransform(in char[] s) { return to!string(s); }
520 
521 string forceValidUTF8(string s)
522 {
523 	try
524 	{
525 		validate(s);
526 		return s;
527 	}
528 	catch (UTFException)
529 		return rawToUTF8(s);
530 }
531 
532 // ************************************************************************
533 
534 /// Return the slice up to the first NUL character,
535 /// or of the whole array if none is found.
536 C[] fromZArray(C, n)(ref C[n] arr)
537 {
538 	auto p = arr.representation.countUntil(0);
539 	return arr[0 .. p<0 ? $ : p];
540 }
541 
542 /// ditto
543 C[] fromZArray(C)(C[] arr)
544 {
545 	auto p = arr.representation.countUntil(0);
546 	return arr[0 .. p<0 ? $ : p];
547 }
548 
549 unittest
550 {
551 	char[4] arr = "ab\0d";
552 	assert(arr.fromZArray == "ab");
553 	arr[] = "abcd";
554 	assert(arr.fromZArray == "abcd");
555 }
556 
557 unittest
558 {
559 	string arr = "ab\0d";
560 	assert(arr.fromZArray == "ab");
561 	arr = "abcd";
562 	assert(arr.fromZArray == "abcd");
563 }
564 
565 // ************************************************************************
566 
567 /// Formats binary data as a hex dump (three-column layout consisting of hex
568 /// offset, byte values in hex, and printable low-ASCII characters).
569 string hexDump(const(void)[] b)
570 {
571 	auto data = cast(const(ubyte)[]) b;
572 	assert(data.length);
573 	size_t i=0;
574 	string s;
575 	while (i<data.length)
576 	{
577 		s ~= format("%08X:  ", i);
578 		foreach (x; 0..16)
579 		{
580 			if (i+x<data.length)
581 				s ~= format("%02X ", data[i+x]);
582 			else
583 				s ~= "   ";
584 			if (x==7)
585 				s ~= "| ";
586 		}
587 		s ~= "  ";
588 		foreach (x; 0..16)
589 		{
590 			if (i+x<data.length)
591 				if (data[i+x]==0)
592 					s ~= ' ';
593 				else
594 				if (data[i+x]<32 || data[i+x]>=128)
595 					s ~= '.';
596 				else
597 					s ~= cast(char)data[i+x];
598 			else
599 				s ~= ' ';
600 		}
601 		s ~= "\n";
602 		i += 16;
603 	}
604 	return s;
605 }
606 
607 import std.conv;
608 
609 T fromHex(T : ulong = uint, C)(const(C)[] s)
610 {
611 	T result = parse!T(s, 16);
612 	enforce(s.length==0, new ConvException("Could not parse entire string"));
613 	return result;
614 }
615 
616 ubyte[] arrayFromHex(in char[] hex)
617 {
618 	auto buf = new ubyte[hex.length/2];
619 	arrayFromHex(hex, buf);
620 	return buf;
621 }
622 
623 struct HexParseConfig
624 {
625 	bool checked = true;
626 	bool lower = true;
627 	bool upper = true;
628 }
629 
630 ubyte parseHexDigit(HexParseConfig config = HexParseConfig.init)(char c)
631 {
632 	static assert(config.lower || config.upper,
633 		"Must parse at least either lower or upper case digits");
634 	static if (config.checked)
635 	{
636 		switch (c)
637 		{
638 			case '0': .. case '9': return cast(ubyte)(c - '0');
639 			case 'a': .. case 'f': return cast(ubyte)(c - 'a' + 10);
640 			case 'A': .. case 'F': return cast(ubyte)(c - 'A' + 10);
641 			default: throw new Exception("Bad hex digit: " ~ c);
642 		}
643 	}
644 	else
645 	{
646 		if (c <= '9')
647 			return cast(ubyte)(c - '0');
648 		static if (config.lower && config.upper)
649 		{
650 			if (c < 'a')
651 				return cast(ubyte)(c - 'A' + 10);
652 			else
653 				return cast(ubyte)(c - 'a' + 10);
654 		}
655 		else
656 			static if (config.lower)
657 				return cast(ubyte)(c - 'a' + 10);
658 			else
659 				return cast(ubyte)(c - 'A' + 10);
660 	}
661 }
662 
663 void arrayFromHex(HexParseConfig config = HexParseConfig.init)(in char[] hex, ubyte[] buf)
664 {
665 	assert(buf.length == hex.length/2, "Wrong buffer size for arrayFromHex");
666 	for (int i=0; i<hex.length; i+=2)
667 		buf[i/2] = cast(ubyte)(
668 			parseHexDigit!config(hex[i  ])*16 +
669 			parseHexDigit!config(hex[i+1])
670 		);
671 }
672 
673 /// Fast version for static arrays of known length.
674 void sarrayFromHex(HexParseConfig config = HexParseConfig.init, size_t N, Hex)(in ref Hex hex, ref ubyte[N] buf)
675 if (is(Hex == char[N*2]))
676 {
677 	foreach (i; 0..N/4)
678 	{
679 		ulong chars = (cast(ulong*)hex.ptr)[i];
680 		uint res =
681 			(parseHexDigit!config((chars >> (8*0)) & 0xFF) << (4*1)) |
682 			(parseHexDigit!config((chars >> (8*1)) & 0xFF) << (4*0)) |
683 			(parseHexDigit!config((chars >> (8*2)) & 0xFF) << (4*3)) |
684 			(parseHexDigit!config((chars >> (8*3)) & 0xFF) << (4*2)) |
685 			(parseHexDigit!config((chars >> (8*4)) & 0xFF) << (4*5)) |
686 			(parseHexDigit!config((chars >> (8*5)) & 0xFF) << (4*4)) |
687 			(parseHexDigit!config((chars >> (8*6)) & 0xFF) << (4*7)) |
688 			(parseHexDigit!config((chars >> (8*7)) & 0xFF) << (4*6));
689 		(cast(uint*)buf.ptr)[i] = res;
690 	}
691 	foreach (i; N/4*4..N)
692 		buf[i] = cast(ubyte)(
693 			parseHexDigit!config(hex[i*2  ])*16 +
694 			parseHexDigit!config(hex[i*2+1])
695 		);
696 }
697 
698 unittest
699 {
700 	foreach (checked; TypeTuple!(false, true))
701 		foreach (lower; TypeTuple!(false, true))
702 			foreach (upper; TypeTuple!(false, true))
703 				static if (lower || upper)
704 				{
705 					enum config = HexParseConfig(checked, lower, upper);
706 					char[18] buf;
707 					foreach (n; 0..18)
708 						if (lower && upper ? n & 1 : upper)
709 							buf[n] = hexDigits[n % 16];
710 						else
711 							buf[n] = lowerHexDigits[n % 16];
712 					ubyte[9] res;
713 					sarrayFromHex!config(buf, res);
714 					assert(res == [0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01], text(res));
715 				}
716 }
717 
718 template toHex(alias digits = hexDigits)
719 {
720 	char[] toHex(in ubyte[] data, char[] buf) pure
721 	{
722 		assert(buf.length == data.length*2);
723 		foreach (i, b; data)
724 		{
725 			buf[i*2  ] = digits[b>>4];
726 			buf[i*2+1] = digits[b&15];
727 		}
728 		return buf;
729 	}
730 
731 	string toHex(in ubyte[] data) pure
732 	{
733 		auto buf = new char[data.length*2];
734 		foreach (i, b; data)
735 		{
736 			buf[i*2  ] = digits[b>>4];
737 			buf[i*2+1] = digits[b&15];
738 		}
739 		return buf;
740 	}
741 }
742 
743 alias toLowerHex = toHex!lowerHexDigits;
744 
745 void toHex(T : ulong, size_t U = T.sizeof*2)(T n, ref char[U] buf)
746 {
747 	foreach (i; Reverse!(RangeTuple!(T.sizeof*2)))
748 	{
749 		buf[i] = hexDigits[n & 0xF];
750 		n >>= 4;
751 	}
752 }
753 
754 unittest
755 {
756 	ubyte[] bytes = [0x12, 0x34];
757 	assert(toHex(bytes) == "1234");
758 }
759 
760 unittest
761 {
762 	ubyte[] bytes = [0x12, 0x34];
763 	char[] buf = new char[4];
764 	toHex(bytes, buf);
765 	assert(buf == "1234");
766 }
767 
768 unittest
769 {
770 	char[8] buf;
771 	toHex(0x01234567, buf);
772 	assert(buf == "01234567");
773 }
774 
775 /// How many significant decimal digits does a FP type have
776 /// (determined empirically)
777 enum significantDigits(T : real) = 2 + 2 * T.sizeof;
778 
779 /// Format string for a FP type which includes all necessary
780 /// significant digits
781 enum fpFormatString(T) = "%." ~ text(significantDigits!T) ~ "g";
782 
783 /// Get shortest string representation of a FP type that still converts to exactly the same number.
784 template fpToString(F)
785 {
786 	string fpToString(F v)
787 	{
788 		/// Bypass FPU register, which may contain a different precision
789 		static F forceType(F d) { static F n; n = d; return n; }
790 
791 		StaticBuf!(char, 64) buf;
792 		formattedWrite(&buf, fpFormatString!F, forceType(v));
793 		char[] s = buf.data();
794 
795 		if (s != "nan" && s != "-nan" && s != "inf" && s != "-inf")
796 		{
797 			if (forceType(to!F(s)) != v)
798 			{
799 				static if (is(F == real))
800 				{
801 					// Something funny with DM libc real parsing... e.g. 0.6885036635121051783
802 					return s.idup;
803 				}
804 				else
805 					assert(false, "Initial conversion fails: " ~ format(fpFormatString!F, to!F(s)));
806 			}
807 
808 			foreach_reverse (i; 1..s.length)
809 				if (s[i]>='0' && s[i]<='8')
810 				{
811 					s[i]++;
812 					if (forceType(to!F(s[0..i+1]))==v)
813 						s = s[0..i+1];
814 					else
815 						s[i]--;
816 				}
817 			while (s.length>2 && s[$-1]!='.' && forceType(to!F(s[0..$-1]))==v)
818 				s = s[0..$-1];
819 		}
820 		return s.idup;
821 	}
822 
823 	static if (!is(F == real))
824 	unittest
825 	{
826 		union U
827 		{
828 			ubyte[F.sizeof] bytes;
829 			F d;
830 			string toString() { return (fpFormatString!F ~ " %a [%(%02X %)]").format(d, d, bytes[]); }
831 		}
832 		import std.random : Xorshift, uniform;
833 		import std.stdio : stderr;
834 		Xorshift rng;
835 		foreach (n; 0..10000)
836 		{
837 			U u;
838 			foreach (ref b; u.bytes[])
839 				b = uniform!ubyte(rng);
840 			static if (is(F == real))
841 				u.bytes[7] |= 0x80; // require normalized value
842 			scope(failure) stderr.writeln("Input:\t", u);
843 			auto s = fpToString(u.d);
844 			scope(failure) stderr.writeln("Result:\t", s);
845 			if (s == "nan" || s == "-nan")
846 				continue; // there are many NaNs...
847 			U r;
848 			r.d = to!F(s);
849 			assert(r.bytes == u.bytes,
850 				"fpToString mismatch:\nOutput:\t%s".format(r));
851 		}
852 	}
853 }
854 
855 alias doubleToString = fpToString!double;
856 
857 unittest
858 {
859 	alias floatToString = fpToString!float;
860 	alias realToString = fpToString!real;
861 }
862 
863 string numberToString(T)(T v)
864 	if (isNumeric!T)
865 {
866 	static if (is(T : real))
867 		return fpToString(v);
868 	else
869 		return toDec(v);
870 }
871 
872 // ************************************************************************
873 
874 /// Simpler implementation of Levenshtein string distance
875 int stringDistance(string s, string t)
876 {
877 	int n = cast(int)s.length;
878 	int m = cast(int)t.length;
879 	if (n == 0) return m;
880 	if (m == 0) return n;
881 	int[][] distance = new int[][](n+1, m+1); // matrix
882 	int cost=0;
883 	//init1
884 	foreach (i; 0..n+1) distance[i][0]=i;
885 	foreach (j; 0..m+1) distance[0][j]=j;
886 	//find min distance
887 	foreach (i; 1..n+1)
888 		foreach (j; 1..m+1)
889 		{
890 			cost = t[j-1] == s[i-1] ? 0 : 1;
891 			distance[i][j] = min(
892 				distance[i-1][j  ] + 1,
893 				distance[i  ][j-1] + 1,
894 				distance[i-1][j-1] + cost
895 			);
896 		}
897 	return distance[n][m];
898 }
899 
900 /// Return a number between 0.0 and 1.0 indicating how similar two strings are
901 /// (1.0 if identical)
902 float stringSimilarity(string string1, string string2)
903 {
904 	float dis = stringDistance(string1, string2);
905 	float maxLen = string1.length;
906 	if (maxLen < string2.length)
907 		maxLen = string2.length;
908 	if (maxLen == 0)
909 		return 1;
910 	else
911 		return 1f - dis/maxLen;
912 }
913 
914 /// Select best match from a list of items.
915 /// Returns -1 if none are above the threshold.
916 sizediff_t findBestMatch(in string[] items, string target, float threshold = 0.7)
917 {
918 	sizediff_t found = -1;
919 	float best = 0;
920 
921 	foreach (i, item; items)
922 	{
923 		float match = stringSimilarity(toLower(item),toLower(target));
924 		if (match>threshold && match>=best)
925 		{
926 			best = match;
927 			found = i;
928 		}
929 	}
930 
931 	return found;
932 }
933 
934 /// Select best match from a list of items.
935 /// Returns null if none are above the threshold.
936 string selectBestFrom(in string[] items, string target, float threshold = 0.7)
937 {
938 	auto index = findBestMatch(items, target, threshold);
939 	return index < 0 ? null : items[index];
940 }
941 
942 // ************************************************************************
943 
944 
945 string randomString(int length=20, string chars="abcdefghijklmnopqrstuvwxyz")
946 {
947 	import std.random;
948 	import std.range;
949 
950 	return length.iota.map!(n => chars[uniform(0, $)]).array;
951 }