ae.utils.text source code

1 /**
2  * Utility code related to string and text processing.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.utils.text;
15 
16 import std.algorithm;
17 import std.ascii;
18 import std.exception;
19 import std.conv;
20 import std.format;
21 import std.string;
22 import std.traits;
23 import std.typetuple;
24 
25 import core.stdc.string;
26 
27 import ae.utils.array;
28 import ae.utils.meta;
29 import ae.utils.textout;
30 
31 alias indexOf = std..string.indexOf;
32 
33 public import ae.utils.text.ascii : ascii, DecimalSize, toDec, toDecFixed;
34 
35 // ************************************************************************
36 
37 /// Convenience helper
38 bool contains(T, U)(T[] str, U[] what)
39 	if (is(Unqual!T == Unqual!U))
40 {
41 	return str.indexOf(what)>=0;
42 }
43 
44 /// CTFE helper
45 string formatAs(T)(auto ref T obj, string fmt)
46 {
47 	return format(fmt, obj);
48 }
49 
50 /// Consume a LF or CRLF terminated line from s.
51 /// Sets s to null and returns the remainder
52 /// if there is no line terminator in s.
53 T[] eatLine(T)(ref T[] s, bool eatIncompleteLines = true)
54 {
55 	return s.skipUntil([T('\n')], eatIncompleteLines).chomp();
56 }
57 
58 deprecated template eatLine(OnEof onEof)
59 {
60 	T[] eatLine(T)(ref T[] s)
61 	{
62 		return s.eatUntil!onEof([T('\n')]).chomp();
63 	}
64 }
65 
66 unittest
67 {
68 	string s = "Hello\nworld";
69 	assert(s.eatLine() == "Hello");
70 	assert(s.eatLine() == "world");
71 	assert(s is null);
72 	assert(s.eatLine() is null);
73 }
74 
75 // Uses memchr (not Boyer-Moore), best for short strings.
76 T[] fastReplace(T)(T[] what, T[] from, T[] to)
77 	if (T.sizeof == 1) // TODO (uses memchr)
78 {
79 	alias Unqual!T U;
80 
81 //	debug scope(failure) std.stdio.writeln("fastReplace crashed: ", [what, from, to]);
82 	enum RAM = cast(U*)null;
83 
84 	if (what.length < from.length || from.length==0)
85 		return what;
86 
87 	if (from.length==1)
88 	{
89 		auto fromc = from[0];
90 		if (to.length==1)
91 		{
92 			auto p = cast(T*)memchr(what.ptr, fromc, what.length);
93 			if (!p)
94 				return what;
95 
96 			auto result = what.dup;
97 			auto delta = result.ptr - what.ptr;
98 			auto toChar = to[0];
99 			auto end = what.ptr + what.length;
100 			do
101 			{
102 				(cast(U*)p)[delta] = toChar; // zomg hax lol
103 				p++;
104 				p = cast(T*)memchr(p, fromc, end - p);
105 			} while (p);
106 			return result;
107 		}
108 		else
109 		{
110 			auto p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
111 			if (!p)
112 				return what;
113 
114 			auto sb = StringBuilder(what.length);
115 			do
116 			{
117 				sb.put(what[0..p-what.ptr], to);
118 				what = what[p-what.ptr+1..$];
119 				p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
120 			}
121 			while (p);
122 
123 			sb.put(what);
124 			return sb.get();
125 		}
126 	}
127 
128 	auto head = from[0];
129 	auto tail = from[1..$];
130 
131 	auto p = cast(T*)what.ptr;
132 	auto end = p + what.length - tail.length;
133 	p = cast(T*)memchr(p, head, end-p);
134 	while (p)
135 	{
136 		p++;
137 		if (p[0..tail.length] == tail)
138 		{
139 			if (from.length == to.length)
140 			{
141 				auto result = what.dup;
142 				auto deltaMinusOne = (result.ptr - what.ptr) - 1;
143 
144 				goto replaceA;
145 			dummyA: // compiler complains
146 
147 				do
148 				{
149 					p++;
150 					if (p[0..tail.length] == tail)
151 					{
152 					replaceA:
153 						(cast(U*)p+deltaMinusOne)[0..to.length] = to[];
154 					}
155 					p = cast(T*)memchr(p, head, end-p);
156 				}
157 				while (p);
158 
159 				return result;
160 			}
161 			else
162 			{
163 				auto start = cast(T*)what.ptr;
164 				auto sb = StringBuilder(what.length);
165 				goto replaceB;
166 			dummyB: // compiler complains
167 
168 				do
169 				{
170 					p++;
171 					if (p[0..tail.length] == tail)
172 					{
173 					replaceB:
174 						sb.put(RAM[cast(size_t)start .. cast(size_t)p-1], to);
175 						start = p + tail.length;
176 						what = what[start-what.ptr..$];
177 					}
178 					else
179 					{
180 						what = what[p-what.ptr..$];
181 					}
182 					p = cast(T*)memchr(what.ptr, head, what.length);
183 				}
184 				while (p);
185 
186 				//sb.put(what);
187 				sb.put(RAM[cast(size_t)start..cast(size_t)(what.ptr+what.length)]);
188 				return sb.get();
189 			}
190 
191 			assert(0);
192 		}
193 		p = cast(T*)memchr(p, head, end-p);
194 	}
195 
196 	return what;
197 }
198 
199 unittest
200 {
201 	import std.array;
202 	void test(string haystack, string from, string to)
203 	{
204 		auto description = `("` ~ haystack ~ `", "` ~ from ~ `", "` ~ to ~ `")`;
205 
206 		auto r1 = fastReplace(haystack, from, to);
207 		auto r2 =     replace(haystack, from, to);
208 		assert(r1 == r2, `Bad replace: ` ~ description ~ ` == "` ~ r1 ~ `"`);
209 
210 		if (r1 == haystack)
211 			assert(r1 is haystack, `Pointless reallocation: ` ~ description);
212 	}
213 
214 	test("Mary had a little lamb", "a", "b");
215 	test("Mary had a little lamb", "a", "aaa");
216 	test("Mary had a little lamb", "Mary", "Lucy");
217 	test("Mary had a little lamb", "Mary", "Jimmy");
218 	test("Mary had a little lamb", "lamb", "goat");
219 	test("Mary had a little lamb", "lamb", "sheep");
220 	test("Mary had a little lamb", " l", " x");
221 	test("Mary had a little lamb", " l", " xx");
222 
223 	test("Mary had a little lamb", "X" , "Y" );
224 	test("Mary had a little lamb", "XX", "Y" );
225 	test("Mary had a little lamb", "X" , "YY");
226 	test("Mary had a little lamb", "XX", "YY");
227 	test("Mary had a little lamb", "aX", "Y" );
228 	test("Mary had a little lamb", "aX", "YY");
229 
230 	test("foo", "foobar", "bar");
231 }
232 
233 T[][] fastSplit(T, U)(T[] s, U d)
234 	if (is(Unqual!T == Unqual!U))
235 {
236 	if (!s.length)
237 		return null;
238 
239 	auto p = cast(T*)memchr(s.ptr, d, s.length);
240 	if (!p)
241 		return [s];
242 
243 	size_t n;
244 	auto end = s.ptr + s.length;
245 	do
246 	{
247 		n++;
248 		p++;
249 		p = cast(T*) memchr(p, d, end-p);
250 	}
251 	while (p);
252 
253 	auto result = new T[][n+1];
254 	n = 0;
255 	auto start = s.ptr;
256 	p = cast(T*) memchr(start, d, s.length);
257 	do
258 	{
259 		result[n++] = start[0..p-start];
260 		start = ++p;
261 		p = cast(T*) memchr(p, d, end-p);
262 	}
263 	while (p);
264 	result[n] = start[0..end-start];
265 
266 	return result;
267 }
268 
269 T[][] splitAsciiLines(T)(T[] text)
270 	if (is(Unqual!T == char))
271 {
272 	auto lines = text.fastSplit('\n');
273 	foreach (ref line; lines)
274 		if (line.length && line[$-1]=='\r')
275 			line = line[0..$-1];
276 	return lines;
277 }
278 
279 unittest
280 {
281 	assert(splitAsciiLines("a\nb\r\nc\r\rd\n\re\r\n\nf") == ["a", "b", "c\r\rd", "\re", "", "f"]);
282 	assert(splitAsciiLines(string.init) == splitLines(string.init));
283 }
284 
285 /// Like std.string.split (one argument version, which splits by
286 /// whitespace), but only splits by ASCII and does not autodecode.
287 T[][] asciiSplit(T)(T[] text)
288 	if (is(Unqual!T == char))
289 {
290 	bool inWhitespace = true;
291 	size_t wordStart;
292 	T[][] result;
293 
294 	void endWord(size_t p)
295 	{
296 		if (!inWhitespace)
297 		{
298 			result ~= text[wordStart..p];
299 			inWhitespace = true;
300 		}
301 	}
302 
303 	foreach (p, c; text)
304 		if (std.ascii.isWhite(c))
305 			endWord(p);
306 		else
307 			if (inWhitespace)
308 			{
309 				inWhitespace = false;
310 				wordStart = p;
311 			}
312 	endWord(text.length);
313 	return result;
314 }
315 
316 unittest
317 {
318 	foreach (s; ["", " ", "a", " a", "a ", "a b", " a b", "a b ", " a b ",
319 			"  ", "  a", "a  ", "a  b", "a  b  ", "a b  c"])
320 		assert(s.split == s.asciiSplit, format("Got %s, expected %s", s.asciiSplit, s.split));
321 }
322 
323 T[] asciiStrip(T)(T[] s)
324 	if (is(Unqual!T == char))
325 {
326 	while (s.length && isWhite(s[0]))
327 		s = s[1..$];
328 	while (s.length && isWhite(s[$-1]))
329 		s = s[0..$-1];
330 	return s;
331 }
332 
333 unittest
334 {
335 	string s = "Hello, world!";
336 	assert(asciiStrip(s) is s);
337 	assert(asciiStrip("\r\n\tHello ".dup) == "Hello");
338 }
339 
340 /// Covering slice-list of s with interleaved whitespace.
341 T[][] segmentByWhitespace(T)(T[] s)
342 	if (is(Unqual!T == char))
343 {
344 	if (!s.length)
345 		return null;
346 
347 	T[][] segments;
348 	bool wasWhite = isWhite(s[0]);
349 	size_t start = 0;
350 	foreach (p, char c; s)
351 	{
352 		bool isWhite = isWhite(c);
353 		if (isWhite != wasWhite)
354 			segments ~= s[start..p],
355 			start = p;
356 		wasWhite = isWhite;
357 	}
358 	segments ~= s[start..$];
359 
360 	return segments;
361 }
362 
363 T[] newlinesToSpaces(T)(T[] s)
364 	if (is(Unqual!T == char))
365 {
366 	auto slices = segmentByWhitespace(s);
367 	foreach (ref slice; slices)
368 		if (slice.contains("\n"))
369 			slice = " ";
370 	return slices.join();
371 }
372 
373 ascii normalizeWhitespace(ascii s)
374 {
375 	auto slices = segmentByWhitespace(strip(s));
376 	foreach (i, ref slice; slices)
377 		if (i & 1) // odd
378 			slice = " ";
379 	return slices.join();
380 }
381 
382 unittest
383 {
384 	assert(normalizeWhitespace(" Mary  had\ta\nlittle\r\n\tlamb") == "Mary had a little lamb");
385 }
386 
387 string[] splitByCamelCase(string s)
388 {
389 	string[] result;
390 	size_t start = 0;
391 	foreach (i; 1..s.length+1)
392 		if (i == s.length
393 		 || (isLower(s[i-1]) && isUpper(s[i]))
394 		 || (i+1 < s.length && isUpper(s[i-1]) && isUpper(s[i]) && isLower(s[i+1]))
395 		)
396 		{
397 			result ~= s[start..i];
398 			start = i;
399 		}
400 	return result;
401 }
402 
403 unittest
404 {
405 	assert(splitByCamelCase("parseIPString") == ["parse", "IP", "String"]);
406 	assert(splitByCamelCase("IPString") == ["IP", "String"]);
407 }
408 
409 string camelCaseJoin(string[] arr)
410 {
411 	if (!arr.length)
412 		return null;
413 	string result = arr[0];
414 	foreach (s; arr[1..$])
415 		result ~= std.ascii.toUpper(s[0]) ~ s[1..$];
416 	return result;
417 }
418 
419 unittest
420 {
421 	assert("parse-IP-string".split('-').camelCaseJoin() == "parseIPString");
422 }
423 
424 // ************************************************************************
425 
426 private __gshared char[256] asciiLower, asciiUpper;
427 
428 shared static this()
429 {
430 	foreach (c; 0..256)
431 	{
432 		asciiLower[c] = cast(char)std.ascii.toLower(c);
433 		asciiUpper[c] = cast(char)std.ascii.toUpper(c);
434 	}
435 }
436 
437 void xlat(alias TABLE, T)(T[] buf)
438 {
439 	foreach (ref c; buf)
440 		c = TABLE[c];
441 }
442 
443 alias xlat!(asciiLower, char) asciiToLower;
444 alias xlat!(asciiUpper, char) asciiToUpper;
445 
446 // ************************************************************************
447 
448 /// Case-insensitive ASCII string.
449 alias CIAsciiString = NormalizedArray!(immutable(char), s => s.byCodeUnit.map!(std.ascii.toLower));
450 
451 ///
452 unittest
453 {
454 	CIAsciiString s = "test";
455 	assert(s == "TEST");
456 	assert(s >= "Test" && s <= "Test");
457 	assert(CIAsciiString("a") == CIAsciiString("A"));
458 	assert(CIAsciiString("a") != CIAsciiString("B"));
459 	assert(CIAsciiString("a") <  CIAsciiString("B"));
460 	assert(CIAsciiString("A") <  CIAsciiString("b"));
461 	assert(CIAsciiString("я") != CIAsciiString("Я"));
462 }
463 
464 /// Case-insensitive Unicode string.
465 alias CIUniString = NormalizedArray!(immutable(char), s => s.map!(std.uni.toLower));
466 
467 ///
468 unittest
469 {
470 	CIUniString s = "привет";
471 	assert(s == "ПРИВЕТ");
472 	assert(s >= "Привет" && s <= "Привет");
473 	assert(CIUniString("я") == CIUniString("Я"));
474 	assert(CIUniString("а") != CIUniString("Б"));
475 	assert(CIUniString("а") <  CIUniString("Б"));
476 	assert(CIUniString("А") <  CIUniString("б"));
477 }
478 
479 // ************************************************************************
480 
481 import std.utf;
482 
483 /// Convert any data to a valid UTF-8 bytestream, so D's string functions can
484 /// properly work on it.
485 string rawToUTF8(in char[] s)
486 {
487 	auto d = new dchar[s.length];
488 	foreach (i, char c; s)
489 		d[i] = c;
490 	return toUTF8(d);
491 }
492 
493 /// Undo rawToUTF8.
494 ascii UTF8ToRaw(in char[] r) pure
495 {
496 	auto s = new char[r.length];
497 	size_t i = 0;
498 	foreach (dchar c; r)
499 	{
500 		assert(c < '\u0100');
501 		s[i++] = cast(char)c;
502 	}
503 	return s[0..i];
504 }
505 
506 unittest
507 {
508 	char[1] c;
509 	for (int i=0; i<256; i++)
510 	{
511 		c[0] = cast(char)i;
512 		assert(UTF8ToRaw(rawToUTF8(c[])) == c[], format("%s -> %s -> %s", cast(ubyte[])c[], cast(ubyte[])rawToUTF8(c[]), cast(ubyte[])UTF8ToRaw(rawToUTF8(c[]))));
513 	}
514 }
515 
516 /// Where a delegate with this signature is required.
517 string nullStringTransform(in char[] s) { return to!string(s); }
518 
519 string forceValidUTF8(string s)
520 {
521 	try
522 	{
523 		validate(s);
524 		return s;
525 	}
526 	catch (UTFException)
527 		return rawToUTF8(s);
528 }
529 
530 // ************************************************************************
531 
532 /// Return the slice up to the first NUL character,
533 /// or of the whole array if none is found.
534 C[] fromZArray(C, n)(ref C[n] arr)
535 {
536 	auto p = arr.representation.countUntil(0);
537 	return arr[0 .. p<0 ? $ : p];
538 }
539 
540 /// ditto
541 C[] fromZArray(C)(C[] arr)
542 {
543 	auto p = arr.representation.countUntil(0);
544 	return arr[0 .. p<0 ? $ : p];
545 }
546 
547 unittest
548 {
549 	char[4] arr = "ab\0d";
550 	assert(arr.fromZArray == "ab");
551 	arr[] = "abcd";
552 	assert(arr.fromZArray == "abcd");
553 }
554 
555 unittest
556 {
557 	string arr = "ab\0d";
558 	assert(arr.fromZArray == "ab");
559 	arr = "abcd";
560 	assert(arr.fromZArray == "abcd");
561 }
562 
563 // ************************************************************************
564 
565 /// Formats binary data as a hex dump (three-column layout consisting of hex
566 /// offset, byte values in hex, and printable low-ASCII characters).
567 string hexDump(const(void)[] b)
568 {
569 	auto data = cast(const(ubyte)[]) b;
570 	assert(data.length);
571 	size_t i=0;
572 	string s;
573 	while (i<data.length)
574 	{
575 		s ~= format("%08X:  ", i);
576 		foreach (x; 0..16)
577 		{
578 			if (i+x<data.length)
579 				s ~= format("%02X ", data[i+x]);
580 			else
581 				s ~= "   ";
582 			if (x==7)
583 				s ~= "| ";
584 		}
585 		s ~= "  ";
586 		foreach (x; 0..16)
587 		{
588 			if (i+x<data.length)
589 				if (data[i+x]==0)
590 					s ~= ' ';
591 				else
592 				if (data[i+x]<32 || data[i+x]>=128)
593 					s ~= '.';
594 				else
595 					s ~= cast(char)data[i+x];
596 			else
597 				s ~= ' ';
598 		}
599 		s ~= "\n";
600 		i += 16;
601 	}
602 	return s;
603 }
604 
605 import std.conv;
606 
607 T fromHex(T : ulong = uint, C)(const(C)[] s)
608 {
609 	T result = parse!T(s, 16);
610 	enforce(s.length==0, new ConvException("Could not parse entire string"));
611 	return result;
612 }
613 
614 ubyte[] arrayFromHex(in char[] hex)
615 {
616 	auto buf = new ubyte[hex.length/2];
617 	arrayFromHex(hex, buf);
618 	return buf;
619 }
620 
621 struct HexParseConfig
622 {
623 	bool checked = true;
624 	bool lower = true;
625 	bool upper = true;
626 }
627 
628 ubyte parseHexDigit(HexParseConfig config = HexParseConfig.init)(char c)
629 {
630 	static assert(config.lower || config.upper,
631 		"Must parse at least either lower or upper case digits");
632 	static if (config.checked)
633 	{
634 		switch (c)
635 		{
636 			case '0': .. case '9': return cast(ubyte)(c - '0');
637 			case 'a': .. case 'f': return cast(ubyte)(c - 'a' + 10);
638 			case 'A': .. case 'F': return cast(ubyte)(c - 'A' + 10);
639 			default: throw new Exception("Bad hex digit: " ~ c);
640 		}
641 	}
642 	else
643 	{
644 		if (c <= '9')
645 			return cast(ubyte)(c - '0');
646 		static if (config.lower && config.upper)
647 		{
648 			if (c < 'a')
649 				return cast(ubyte)(c - 'A' + 10);
650 			else
651 				return cast(ubyte)(c - 'a' + 10);
652 		}
653 		else
654 			static if (config.lower)
655 				return cast(ubyte)(c - 'a' + 10);
656 			else
657 				return cast(ubyte)(c - 'A' + 10);
658 	}
659 }
660 
661 void arrayFromHex(HexParseConfig config = HexParseConfig.init)(in char[] hex, ubyte[] buf)
662 {
663 	assert(buf.length == hex.length/2, "Wrong buffer size for arrayFromHex");
664 	for (int i=0; i<hex.length; i+=2)
665 		buf[i/2] = cast(ubyte)(
666 			parseHexDigit!config(hex[i  ])*16 +
667 			parseHexDigit!config(hex[i+1])
668 		);
669 }
670 
671 /// Fast version for static arrays of known length.
672 void sarrayFromHex(HexParseConfig config = HexParseConfig.init, size_t N, Hex)(in ref Hex hex, ref ubyte[N] buf)
673 if (is(Hex == char[N*2]))
674 {
675 	foreach (i; 0..N/4)
676 	{
677 		ulong chars = (cast(ulong*)hex.ptr)[i];
678 		uint res =
679 			(parseHexDigit!config((chars >> (8*0)) & 0xFF) << (4*1)) |
680 			(parseHexDigit!config((chars >> (8*1)) & 0xFF) << (4*0)) |
681 			(parseHexDigit!config((chars >> (8*2)) & 0xFF) << (4*3)) |
682 			(parseHexDigit!config((chars >> (8*3)) & 0xFF) << (4*2)) |
683 			(parseHexDigit!config((chars >> (8*4)) & 0xFF) << (4*5)) |
684 			(parseHexDigit!config((chars >> (8*5)) & 0xFF) << (4*4)) |
685 			(parseHexDigit!config((chars >> (8*6)) & 0xFF) << (4*7)) |
686 			(parseHexDigit!config((chars >> (8*7)) & 0xFF) << (4*6));
687 		(cast(uint*)buf.ptr)[i] = res;
688 	}
689 	foreach (i; N/4*4..N)
690 		buf[i] = cast(ubyte)(
691 			parseHexDigit!config(hex[i*2  ])*16 +
692 			parseHexDigit!config(hex[i*2+1])
693 		);
694 }
695 
696 unittest
697 {
698 	foreach (checked; TypeTuple!(false, true))
699 		foreach (lower; TypeTuple!(false, true))
700 			foreach (upper; TypeTuple!(false, true))
701 				static if (lower || upper)
702 				{
703 					enum config = HexParseConfig(checked, lower, upper);
704 					char[18] buf;
705 					foreach (n; 0..18)
706 						if (lower && upper ? n & 1 : upper)
707 							buf[n] = hexDigits[n % 16];
708 						else
709 							buf[n] = lowerHexDigits[n % 16];
710 					ubyte[9] res;
711 					sarrayFromHex!config(buf, res);
712 					assert(res == [0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01], text(res));
713 				}
714 }
715 
716 template toHex(alias digits = hexDigits)
717 {
718 	char[] toHex(in ubyte[] data, char[] buf) pure
719 	{
720 		assert(buf.length == data.length*2);
721 		foreach (i, b; data)
722 		{
723 			buf[i*2  ] = digits[b>>4];
724 			buf[i*2+1] = digits[b&15];
725 		}
726 		return buf;
727 	}
728 
729 	string toHex(in ubyte[] data) pure
730 	{
731 		auto buf = new char[data.length*2];
732 		foreach (i, b; data)
733 		{
734 			buf[i*2  ] = digits[b>>4];
735 			buf[i*2+1] = digits[b&15];
736 		}
737 		return buf;
738 	}
739 }
740 
741 alias toLowerHex = toHex!lowerHexDigits;
742 
743 void toHex(T : ulong, size_t U = T.sizeof*2)(T n, ref char[U] buf)
744 {
745 	foreach (i; Reverse!(RangeTuple!(T.sizeof*2)))
746 	{
747 		buf[i] = hexDigits[n & 0xF];
748 		n >>= 4;
749 	}
750 }
751 
752 unittest
753 {
754 	ubyte[] bytes = [0x12, 0x34];
755 	assert(toHex(bytes) == "1234");
756 }
757 
758 unittest
759 {
760 	ubyte[] bytes = [0x12, 0x34];
761 	char[] buf = new char[4];
762 	toHex(bytes, buf);
763 	assert(buf == "1234");
764 }
765 
766 unittest
767 {
768 	char[8] buf;
769 	toHex(0x01234567, buf);
770 	assert(buf == "01234567");
771 }
772 
773 /// How many significant decimal digits does a FP type have
774 /// (determined empirically)
775 enum significantDigits(T : real) = 2 + 2 * T.sizeof;
776 
777 /// Format string for a FP type which includes all necessary
778 /// significant digits
779 enum fpFormatString(T) = "%." ~ text(significantDigits!T) ~ "g";
780 
781 /// Get shortest string representation of a FP type that still converts to exactly the same number.
782 template fpToString(F)
783 {
784 	string fpToString(F v)
785 	{
786 		/// Bypass FPU register, which may contain a different precision
787 		static F forceType(F d) { static F n; n = d; return n; }
788 
789 		StaticBuf!(char, 64) buf;
790 		formattedWrite(&buf, fpFormatString!F, forceType(v));
791 		char[] s = buf.data();
792 
793 		if (s != "nan" && s != "-nan" && s != "inf" && s != "-inf")
794 		{
795 			if (forceType(to!F(s)) != v)
796 			{
797 				static if (is(F == real))
798 				{
799 					// Something funny with DM libc real parsing... e.g. 0.6885036635121051783
800 					return s.idup;
801 				}
802 				else
803 					assert(false, "Initial conversion fails: " ~ format(fpFormatString!F, to!F(s)));
804 			}
805 
806 			foreach_reverse (i; 1..s.length)
807 				if (s[i]>='0' && s[i]<='8')
808 				{
809 					s[i]++;
810 					if (forceType(to!F(s[0..i+1]))==v)
811 						s = s[0..i+1];
812 					else
813 						s[i]--;
814 				}
815 			while (s.length>2 && s[$-1]!='.' && forceType(to!F(s[0..$-1]))==v)
816 				s = s[0..$-1];
817 		}
818 		return s.idup;
819 	}
820 
821 	static if (!is(F == real))
822 	unittest
823 	{
824 		union U
825 		{
826 			ubyte[F.sizeof] bytes;
827 			F d;
828 			string toString() { return (fpFormatString!F ~ " %a [%(%02X %)]").format(d, d, bytes[]); }
829 		}
830 		import std.random : Xorshift, uniform;
831 		import std.stdio : stderr;
832 		Xorshift rng;
833 		foreach (n; 0..10000)
834 		{
835 			U u;
836 			foreach (ref b; u.bytes[])
837 				b = uniform!ubyte(rng);
838 			static if (is(F == real))
839 				u.bytes[7] |= 0x80; // require normalized value
840 			scope(failure) stderr.writeln("Input:\t", u);
841 			auto s = fpToString(u.d);
842 			scope(failure) stderr.writeln("Result:\t", s);
843 			if (s == "nan" || s == "-nan")
844 				continue; // there are many NaNs...
845 			U r;
846 			r.d = to!F(s);
847 			assert(r.bytes == u.bytes,
848 				"fpToString mismatch:\nOutput:\t%s".format(r));
849 		}
850 	}
851 }
852 
853 alias doubleToString = fpToString!double;
854 
855 unittest
856 {
857 	alias floatToString = fpToString!float;
858 	alias realToString = fpToString!real;
859 }
860 
861 string numberToString(T)(T v)
862 	if (isNumeric!T)
863 {
864 	static if (is(T : real))
865 		return fpToString(v);
866 	else
867 		return toDec(v);
868 }
869 
870 // ************************************************************************
871 
872 /// Simpler implementation of Levenshtein string distance
873 int stringDistance(string s, string t)
874 {
875 	int n = cast(int)s.length;
876 	int m = cast(int)t.length;
877 	if (n == 0) return m;
878 	if (m == 0) return n;
879 	int[][] distance = new int[][](n+1, m+1); // matrix
880 	int cost=0;
881 	//init1
882 	foreach (i; 0..n+1) distance[i][0]=i;
883 	foreach (j; 0..m+1) distance[0][j]=j;
884 	//find min distance
885 	foreach (i; 1..n+1)
886 		foreach (j; 1..m+1)
887 		{
888 			cost = t[j-1] == s[i-1] ? 0 : 1;
889 			distance[i][j] = min(
890 				distance[i-1][j  ] + 1,
891 				distance[i  ][j-1] + 1,
892 				distance[i-1][j-1] + cost
893 			);
894 		}
895 	return distance[n][m];
896 }
897 
898 /// Return a number between 0.0 and 1.0 indicating how similar two strings are
899 /// (1.0 if identical)
900 float stringSimilarity(string string1, string string2)
901 {
902 	float dis = stringDistance(string1, string2);
903 	float maxLen = string1.length;
904 	if (maxLen < string2.length)
905 		maxLen = string2.length;
906 	if (maxLen == 0)
907 		return 1;
908 	else
909 		return 1f - dis/maxLen;
910 }
911 
912 /// Select best match from a list of items.
913 /// Returns -1 if none are above the threshold.
914 sizediff_t findBestMatch(in string[] items, string target, float threshold = 0.7)
915 {
916 	sizediff_t found = -1;
917 	float best = 0;
918 
919 	foreach (i, item; items)
920 	{
921 		float match = stringSimilarity(toLower(item),toLower(target));
922 		if (match>threshold && match>=best)
923 		{
924 			best = match;
925 			found = i;
926 		}
927 	}
928 
929 	return found;
930 }
931 
932 /// Select best match from a list of items.
933 /// Returns null if none are above the threshold.
934 string selectBestFrom(in string[] items, string target, float threshold = 0.7)
935 {
936 	auto index = findBestMatch(items, target, threshold);
937 	return index < 0 ? null : items[index];
938 }
939 
940 // ************************************************************************
941 
942 
943 string randomString(int length=20, string chars="abcdefghijklmnopqrstuvwxyz")
944 {
945 	import std.random;
946 	import std.range;
947 
948 	return length.iota.map!(n => chars[uniform(0, $)]).array;
949 }