ae.utils.text source code

1 /**
2  * Utility code related to string and text processing.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.utils.text;
15 
16 import std.algorithm;
17 import std.ascii;
18 import std.exception;
19 import std.conv;
20 import std.format;
21 import std.string;
22 import std.traits;
23 import std.typetuple;
24 
25 import core.stdc.stdio : snprintf, sscanf;
26 import core.stdc.string;
27 
28 import ae.utils.array;
29 import ae.utils.meta;
30 import ae.utils.text.parsefp;
31 import ae.utils.textout;
32 
33 alias indexOf = std..string.indexOf;
34 
35 public import ae.utils.text.ascii : ascii, DecimalSize, toDec, toDecFixed, asciiToLower, asciiToUpper;
36 public import ae.utils.array : contains;
37 
38 // ************************************************************************
39 
40 /// CTFE helper
41 string formatAs(T)(auto ref T obj, string fmt)
42 {
43 	return format(fmt, obj);
44 }
45 
46 /// Consume a LF or CRLF terminated line from s.
47 /// Sets s to null and returns the remainder
48 /// if there is no line terminator in s.
49 T[] eatLine(T)(ref T[] s, bool eatIncompleteLines = true)
50 {
51 	return s.skipUntil([T('\n')], eatIncompleteLines).chomp();
52 }
53 
54 deprecated template eatLine(OnEof onEof)
55 {
56 	T[] eatLine(T)(ref T[] s)
57 	{
58 		return s.eatUntil!onEof([T('\n')]).chomp();
59 	}
60 }
61 
62 unittest
63 {
64 	string s = "Hello\nworld";
65 	assert(s.eatLine() == "Hello");
66 	assert(s.eatLine() == "world");
67 	assert(s is null);
68 	assert(s.eatLine() is null);
69 }
70 
71 // Uses memchr (not Boyer-Moore), best for short strings.
72 T[] fastReplace(T)(T[] what, T[] from, T[] to)
73 	if (T.sizeof == 1) // TODO (uses memchr)
74 {
75 	alias Unqual!T U;
76 
77 //	debug scope(failure) std.stdio.writeln("fastReplace crashed: ", [what, from, to]);
78 	enum RAM = cast(U*)null;
79 
80 	if (what.length < from.length || from.length==0)
81 		return what;
82 
83 	if (from.length==1)
84 	{
85 		auto fromc = from[0];
86 		if (to.length==1)
87 		{
88 			auto p = cast(T*)memchr(what.ptr, fromc, what.length);
89 			if (!p)
90 				return what;
91 
92 			T[] result = what.dup;
93 			auto delta = result.ptr - what.ptr;
94 			auto toChar = to[0];
95 			auto end = what.ptr + what.length;
96 			do
97 			{
98 				(cast(U*)p)[delta] = toChar; // zomg hax lol
99 				p++;
100 				p = cast(T*)memchr(p, fromc, end - p);
101 			} while (p);
102 			return result;
103 		}
104 		else
105 		{
106 			auto p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
107 			if (!p)
108 				return what;
109 
110 			auto sb = StringBuilder(what.length);
111 			do
112 			{
113 				sb.put(what[0..p-what.ptr], to);
114 				what = what[p-what.ptr+1..$];
115 				p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
116 			}
117 			while (p);
118 
119 			sb.put(what);
120 			return sb.get();
121 		}
122 	}
123 
124 	auto head = from[0];
125 	auto tail = from[1..$];
126 
127 	auto p = cast(T*)what.ptr;
128 	auto end = p + what.length - tail.length;
129 	p = cast(T*)memchr(p, head, end-p);
130 	while (p)
131 	{
132 		p++;
133 		if (p[0..tail.length] == tail)
134 		{
135 			if (from.length == to.length)
136 			{
137 				T[] result = what.dup;
138 				auto deltaMinusOne = (result.ptr - what.ptr) - 1;
139 
140 				goto replaceA;
141 			dummyA: // compiler complains
142 
143 				do
144 				{
145 					p++;
146 					if (p[0..tail.length] == tail)
147 					{
148 					replaceA:
149 						(cast(U*)p+deltaMinusOne)[0..to.length] = to[];
150 					}
151 					p = cast(T*)memchr(p, head, end-p);
152 				}
153 				while (p);
154 
155 				return result;
156 			}
157 			else
158 			{
159 				auto start = cast(T*)what.ptr;
160 				auto sb = StringBuilder(what.length);
161 				goto replaceB;
162 			dummyB: // compiler complains
163 
164 				do
165 				{
166 					p++;
167 					if (p[0..tail.length] == tail)
168 					{
169 					replaceB:
170 						sb.put(RAM[cast(size_t)start .. cast(size_t)p-1], to);
171 						start = p + tail.length;
172 						what = what[start-what.ptr..$];
173 					}
174 					else
175 					{
176 						what = what[p-what.ptr..$];
177 					}
178 					p = cast(T*)memchr(what.ptr, head, what.length);
179 				}
180 				while (p);
181 
182 				//sb.put(what);
183 				sb.put(RAM[cast(size_t)start..cast(size_t)(what.ptr+what.length)]);
184 				return sb.get();
185 			}
186 
187 			assert(0);
188 		}
189 		p = cast(T*)memchr(p, head, end-p);
190 	}
191 
192 	return what;
193 }
194 
195 unittest
196 {
197 	import std.array;
198 	void test(string haystack, string from, string to)
199 	{
200 		auto description = `("` ~ haystack ~ `", "` ~ from ~ `", "` ~ to ~ `")`;
201 
202 		auto r1 = fastReplace(haystack, from, to);
203 		auto r2 =     replace(haystack, from, to);
204 		assert(r1 == r2, `Bad replace: ` ~ description ~ ` == "` ~ r1 ~ `"`);
205 
206 		if (r1 == haystack)
207 			assert(r1 is haystack, `Pointless reallocation: ` ~ description);
208 	}
209 
210 	test("Mary had a little lamb", "a", "b");
211 	test("Mary had a little lamb", "a", "aaa");
212 	test("Mary had a little lamb", "Mary", "Lucy");
213 	test("Mary had a little lamb", "Mary", "Jimmy");
214 	test("Mary had a little lamb", "lamb", "goat");
215 	test("Mary had a little lamb", "lamb", "sheep");
216 	test("Mary had a little lamb", " l", " x");
217 	test("Mary had a little lamb", " l", " xx");
218 
219 	test("Mary had a little lamb", "X" , "Y" );
220 	test("Mary had a little lamb", "XX", "Y" );
221 	test("Mary had a little lamb", "X" , "YY");
222 	test("Mary had a little lamb", "XX", "YY");
223 	test("Mary had a little lamb", "aX", "Y" );
224 	test("Mary had a little lamb", "aX", "YY");
225 
226 	test("foo", "foobar", "bar");
227 }
228 
229 T[][] fastSplit(T, U)(T[] s, U d)
230 	if (is(Unqual!T == Unqual!U))
231 {
232 	if (!s.length)
233 		return null;
234 
235 	auto p = cast(T*)memchr(s.ptr, d, s.length);
236 	if (!p)
237 		return [s];
238 
239 	size_t n;
240 	auto end = s.ptr + s.length;
241 	do
242 	{
243 		n++;
244 		p++;
245 		p = cast(T*) memchr(p, d, end-p);
246 	}
247 	while (p);
248 
249 	auto result = new T[][n+1];
250 	n = 0;
251 	auto start = s.ptr;
252 	p = cast(T*) memchr(start, d, s.length);
253 	do
254 	{
255 		result[n++] = start[0..p-start];
256 		start = ++p;
257 		p = cast(T*) memchr(p, d, end-p);
258 	}
259 	while (p);
260 	result[n] = start[0..end-start];
261 
262 	return result;
263 }
264 
265 T[][] splitAsciiLines(T)(T[] text)
266 	if (is(Unqual!T == char))
267 {
268 	auto lines = text.fastSplit('\n');
269 	foreach (ref line; lines)
270 		if (line.length && line[$-1]=='\r')
271 			line = line[0..$-1];
272 	return lines;
273 }
274 
275 unittest
276 {
277 	assert(splitAsciiLines("a\nb\r\nc\r\rd\n\re\r\n\nf") == ["a", "b", "c\r\rd", "\re", "", "f"]);
278 	assert(splitAsciiLines(string.init) == splitLines(string.init));
279 }
280 
281 /// Like std.string.split (one argument version, which splits by
282 /// whitespace), but only splits by ASCII and does not autodecode.
283 T[][] asciiSplit(T)(T[] text)
284 	if (is(Unqual!T == char))
285 {
286 	bool inWhitespace = true;
287 	size_t wordStart;
288 	T[][] result;
289 
290 	void endWord(size_t p)
291 	{
292 		if (!inWhitespace)
293 		{
294 			result ~= text[wordStart..p];
295 			inWhitespace = true;
296 		}
297 	}
298 
299 	foreach (p, c; text)
300 		if (std.ascii.isWhite(c))
301 			endWord(p);
302 		else
303 			if (inWhitespace)
304 			{
305 				inWhitespace = false;
306 				wordStart = p;
307 			}
308 	endWord(text.length);
309 	return result;
310 }
311 
312 unittest
313 {
314 	foreach (s; ["", " ", "a", " a", "a ", "a b", " a b", "a b ", " a b ",
315 			"  ", "  a", "a  ", "a  b", "a  b  ", "a b  c"])
316 		assert(s.split == s.asciiSplit, format("Got %s, expected %s", s.asciiSplit, s.split));
317 }
318 
319 T[] asciiStrip(T)(T[] s)
320 	if (is(Unqual!T == char))
321 {
322 	while (s.length && isWhite(s[0]))
323 		s = s[1..$];
324 	while (s.length && isWhite(s[$-1]))
325 		s = s[0..$-1];
326 	return s;
327 }
328 
329 unittest
330 {
331 	string s = "Hello, world!";
332 	assert(asciiStrip(s) is s);
333 	assert(asciiStrip("\r\n\tHello ".dup) == "Hello");
334 }
335 
336 /// Covering slice-list of s with interleaved whitespace.
337 T[][] segmentByWhitespace(T)(T[] s)
338 	if (is(Unqual!T == char))
339 {
340 	if (!s.length)
341 		return null;
342 
343 	T[][] segments;
344 	bool wasWhite = isWhite(s[0]);
345 	size_t start = 0;
346 	foreach (p, char c; s)
347 	{
348 		bool isWhite = isWhite(c);
349 		if (isWhite != wasWhite)
350 			segments ~= s[start..p],
351 			start = p;
352 		wasWhite = isWhite;
353 	}
354 	segments ~= s[start..$];
355 
356 	return segments;
357 }
358 
359 T[] newlinesToSpaces(T)(T[] s)
360 	if (is(Unqual!T == char))
361 {
362 	auto slices = segmentByWhitespace(s);
363 	foreach (ref slice; slices)
364 		if (slice.contains("\n"))
365 			slice = " ";
366 	return slices.join();
367 }
368 
369 ascii normalizeWhitespace(ascii s)
370 {
371 	auto slices = segmentByWhitespace(strip(s));
372 	foreach (i, ref slice; slices)
373 		if (i & 1) // odd
374 			slice = " ";
375 	return slices.join();
376 }
377 
378 unittest
379 {
380 	assert(normalizeWhitespace(" Mary  had\ta\nlittle\r\n\tlamb") == "Mary had a little lamb");
381 }
382 
383 string[] splitByCamelCase(string s)
384 {
385 	string[] result;
386 	size_t start = 0;
387 	foreach (i; 1..s.length+1)
388 		if (i == s.length
389 		 || (isLower(s[i-1]) && isUpper(s[i]))
390 		 || (i+1 < s.length && isUpper(s[i-1]) && isUpper(s[i]) && isLower(s[i+1]))
391 		)
392 		{
393 			result ~= s[start..i];
394 			start = i;
395 		}
396 	return result;
397 }
398 
399 unittest
400 {
401 	assert(splitByCamelCase("parseIPString") == ["parse", "IP", "String"]);
402 	assert(splitByCamelCase("IPString") == ["IP", "String"]);
403 }
404 
405 string camelCaseJoin(string[] arr)
406 {
407 	if (!arr.length)
408 		return null;
409 	string result = arr[0];
410 	foreach (s; arr[1..$])
411 		result ~= std.ascii.toUpper(s[0]) ~ s[1..$];
412 	return result;
413 }
414 
415 unittest
416 {
417 	assert("parse-IP-string".split('-').camelCaseJoin() == "parseIPString");
418 }
419 
420 // ************************************************************************
421 
422 /// Case-insensitive ASCII string.
423 alias CIAsciiString = NormalizedArray!(immutable(char), s => s.byCodeUnit.map!(std.ascii.toLower));
424 
425 ///
426 unittest
427 {
428 	CIAsciiString s = "test";
429 	assert(s == "TEST");
430 	assert(s >= "Test" && s <= "Test");
431 	assert(CIAsciiString("a") == CIAsciiString("A"));
432 	assert(CIAsciiString("a") != CIAsciiString("B"));
433 	assert(CIAsciiString("a") <  CIAsciiString("B"));
434 	assert(CIAsciiString("A") <  CIAsciiString("b"));
435 	assert(CIAsciiString("я") != CIAsciiString("Я"));
436 }
437 
438 /// Case-insensitive Unicode string.
439 alias CIUniString = NormalizedArray!(immutable(char), s => s.map!(std.uni.toLower));
440 
441 ///
442 unittest
443 {
444 	CIUniString s = "привет";
445 	assert(s == "ПРИВЕТ");
446 	assert(s >= "Привет" && s <= "Привет");
447 	assert(CIUniString("я") == CIUniString("Я"));
448 	assert(CIUniString("а") != CIUniString("Б"));
449 	assert(CIUniString("а") <  CIUniString("Б"));
450 	assert(CIUniString("А") <  CIUniString("б"));
451 }
452 
453 // ************************************************************************
454 
455 import std.utf;
456 
457 /// Convert any data to a valid UTF-8 bytestream, so D's string functions can
458 /// properly work on it.
459 string rawToUTF8(in char[] s)
460 {
461 	auto d = new dchar[s.length];
462 	foreach (i, char c; s)
463 		d[i] = c;
464 	return toUTF8(d);
465 }
466 
467 /// Undo rawToUTF8.
468 ascii UTF8ToRaw(in char[] r) pure
469 {
470 	auto s = new char[r.length];
471 	size_t i = 0;
472 	foreach (dchar c; r)
473 	{
474 		assert(c < '\u0100');
475 		s[i++] = cast(char)c;
476 	}
477 	return s[0..i];
478 }
479 
480 unittest
481 {
482 	char[1] c;
483 	for (int i=0; i<256; i++)
484 	{
485 		c[0] = cast(char)i;
486 		assert(UTF8ToRaw(rawToUTF8(c[])) == c[], format("%s -> %s -> %s", cast(ubyte[])c[], cast(ubyte[])rawToUTF8(c[]), cast(ubyte[])UTF8ToRaw(rawToUTF8(c[]))));
487 	}
488 }
489 
490 /// Where a delegate with this signature is required.
491 string nullStringTransform(in char[] s) { return to!string(s); }
492 
493 string forceValidUTF8(string s)
494 {
495 	try
496 	{
497 		validate(s);
498 		return s;
499 	}
500 	catch (UTFException)
501 		return rawToUTF8(s);
502 }
503 
504 // ************************************************************************
505 
506 /// Return the slice up to the first NUL character,
507 /// or of the whole array if none is found.
508 C[] fromZArray(C, n)(ref C[n] arr)
509 {
510 	auto p = arr.representation.countUntil(0);
511 	return arr[0 .. p<0 ? $ : p];
512 }
513 
514 /// ditto
515 C[] fromZArray(C)(C[] arr)
516 {
517 	auto p = arr.representation.countUntil(0);
518 	return arr[0 .. p<0 ? $ : p];
519 }
520 
521 unittest
522 {
523 	char[4] arr = "ab\0d";
524 	assert(arr.fromZArray == "ab");
525 	arr[] = "abcd";
526 	assert(arr.fromZArray == "abcd");
527 }
528 
529 unittest
530 {
531 	string arr = "ab\0d";
532 	assert(arr.fromZArray == "ab");
533 	arr = "abcd";
534 	assert(arr.fromZArray == "abcd");
535 }
536 
537 // ************************************************************************
538 
539 /// Formats binary data as a hex dump (three-column layout consisting of hex
540 /// offset, byte values in hex, and printable low-ASCII characters).
541 string hexDump(const(void)[] b)
542 {
543 	auto data = cast(const(ubyte)[]) b;
544 	assert(data.length);
545 	size_t i=0;
546 	string s;
547 	while (i<data.length)
548 	{
549 		s ~= format("%08X:  ", i);
550 		foreach (x; 0..16)
551 		{
552 			if (i+x<data.length)
553 				s ~= format("%02X ", data[i+x]);
554 			else
555 				s ~= "   ";
556 			if (x==7)
557 				s ~= "| ";
558 		}
559 		s ~= "  ";
560 		foreach (x; 0..16)
561 		{
562 			if (i+x<data.length)
563 				if (data[i+x]==0)
564 					s ~= ' ';
565 				else
566 				if (data[i+x]<32 || data[i+x]>=128)
567 					s ~= '.';
568 				else
569 					s ~= cast(char)data[i+x];
570 			else
571 				s ~= ' ';
572 		}
573 		s ~= "\n";
574 		i += 16;
575 	}
576 	return s;
577 }
578 
579 import std.conv;
580 
581 T fromHex(T : ulong = uint, C)(const(C)[] s)
582 {
583 	T result = parse!T(s, 16);
584 	enforce(s.length==0, new ConvException("Could not parse entire string"));
585 	return result;
586 }
587 
588 ubyte[] arrayFromHex(in char[] hex)
589 {
590 	auto buf = new ubyte[hex.length/2];
591 	arrayFromHex(hex, buf);
592 	return buf;
593 }
594 
595 struct HexParseConfig
596 {
597 	bool checked = true;
598 	bool lower = true;
599 	bool upper = true;
600 }
601 
602 ubyte parseHexDigit(HexParseConfig config = HexParseConfig.init)(char c)
603 {
604 	static assert(config.lower || config.upper,
605 		"Must parse at least either lower or upper case digits");
606 	static if (config.checked)
607 	{
608 		switch (c)
609 		{
610 			case '0': .. case '9': return cast(ubyte)(c - '0');
611 			case 'a': .. case 'f': return cast(ubyte)(c - 'a' + 10);
612 			case 'A': .. case 'F': return cast(ubyte)(c - 'A' + 10);
613 			default: throw new Exception("Bad hex digit: " ~ c);
614 		}
615 	}
616 	else
617 	{
618 		if (c <= '9')
619 			return cast(ubyte)(c - '0');
620 		static if (config.lower && config.upper)
621 		{
622 			if (c < 'a')
623 				return cast(ubyte)(c - 'A' + 10);
624 			else
625 				return cast(ubyte)(c - 'a' + 10);
626 		}
627 		else
628 			static if (config.lower)
629 				return cast(ubyte)(c - 'a' + 10);
630 			else
631 				return cast(ubyte)(c - 'A' + 10);
632 	}
633 }
634 
635 void arrayFromHex(HexParseConfig config = HexParseConfig.init)(in char[] hex, ubyte[] buf)
636 {
637 	assert(buf.length == hex.length/2, "Wrong buffer size for arrayFromHex");
638 	for (int i=0; i<hex.length; i+=2)
639 		buf[i/2] = cast(ubyte)(
640 			parseHexDigit!config(hex[i  ])*16 +
641 			parseHexDigit!config(hex[i+1])
642 		);
643 }
644 
645 /// Fast version for static arrays of known length.
646 void sarrayFromHex(HexParseConfig config = HexParseConfig.init, size_t N, Hex)(in ref Hex hex, ref ubyte[N] buf)
647 if (is(Hex == char[N*2]))
648 {
649 	foreach (i; 0..N/4)
650 	{
651 		ulong chars = (cast(ulong*)hex.ptr)[i];
652 		uint res =
653 			(parseHexDigit!config((chars >> (8*0)) & 0xFF) << (4*1)) |
654 			(parseHexDigit!config((chars >> (8*1)) & 0xFF) << (4*0)) |
655 			(parseHexDigit!config((chars >> (8*2)) & 0xFF) << (4*3)) |
656 			(parseHexDigit!config((chars >> (8*3)) & 0xFF) << (4*2)) |
657 			(parseHexDigit!config((chars >> (8*4)) & 0xFF) << (4*5)) |
658 			(parseHexDigit!config((chars >> (8*5)) & 0xFF) << (4*4)) |
659 			(parseHexDigit!config((chars >> (8*6)) & 0xFF) << (4*7)) |
660 			(parseHexDigit!config((chars >> (8*7)) & 0xFF) << (4*6));
661 		(cast(uint*)buf.ptr)[i] = res;
662 	}
663 	foreach (i; N/4*4..N)
664 		buf[i] = cast(ubyte)(
665 			parseHexDigit!config(hex[i*2  ])*16 +
666 			parseHexDigit!config(hex[i*2+1])
667 		);
668 }
669 
670 unittest
671 {
672 	foreach (checked; TypeTuple!(false, true))
673 		foreach (lower; TypeTuple!(false, true))
674 			foreach (upper; TypeTuple!(false, true))
675 				static if (lower || upper)
676 				{
677 					enum config = HexParseConfig(checked, lower, upper);
678 					char[18] buf;
679 					foreach (n; 0..18)
680 						if (lower && upper ? n & 1 : upper)
681 							buf[n] = hexDigits[n % 16];
682 						else
683 							buf[n] = lowerHexDigits[n % 16];
684 					ubyte[9] res;
685 					sarrayFromHex!config(buf, res);
686 					assert(res == [0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01], text(res));
687 				}
688 }
689 
690 template toHex(alias digits = hexDigits)
691 {
692 	char[] toHex(in ubyte[] data, char[] buf) pure
693 	{
694 		assert(buf.length == data.length*2);
695 		foreach (i, b; data)
696 		{
697 			buf[i*2  ] = digits[b>>4];
698 			buf[i*2+1] = digits[b&15];
699 		}
700 		return buf;
701 	}
702 
703 	string toHex(in ubyte[] data) pure
704 	{
705 		auto buf = new char[data.length*2];
706 		foreach (i, b; data)
707 		{
708 			buf[i*2  ] = digits[b>>4];
709 			buf[i*2+1] = digits[b&15];
710 		}
711 		return buf;
712 	}
713 }
714 
715 alias toLowerHex = toHex!lowerHexDigits;
716 
717 void toHex(T : ulong, size_t U = T.sizeof*2)(T n, ref char[U] buf)
718 {
719 	Unqual!T x = n;
720 	foreach (i; Reverse!(RangeTuple!(T.sizeof*2)))
721 	{
722 		buf[i] = hexDigits[x & 0xF];
723 		x >>= 4;
724 	}
725 }
726 
727 unittest
728 {
729 	ubyte[] bytes = [0x12, 0x34];
730 	assert(toHex(bytes) == "1234");
731 }
732 
733 unittest
734 {
735 	ubyte[] bytes = [0x12, 0x34];
736 	char[] buf = new char[4];
737 	toHex(bytes, buf);
738 	assert(buf == "1234");
739 }
740 
741 unittest
742 {
743 	char[8] buf;
744 	toHex(0x01234567, buf);
745 	assert(buf == "01234567");
746 }
747 
748 char[T.sizeof*2] toHex(T : ulong)(T n)
749 {
750 	char[T.sizeof*2] buf;
751 	toHex(n, buf);
752 	return buf;
753 }
754 
755 unittest
756 {
757 	assert(toHex(0x01234567) == "01234567");
758 }
759 
760 /// How many significant decimal digits does a FP type have
761 /// (determined empirically - valid for all D FP types on x86/64)
762 enum significantDigits(T : real) = 2 + 2 * T.sizeof;
763 
764 /// Format string for a FP type which includes all necessary
765 /// significant digits
766 enum fpFormatString(T) = "%." ~ text(significantDigits!T) ~ "g";
767 template cWidthString(T)
768 {
769 	static if (is(Unqual!T == float))
770 		enum cWidthString = "";
771 	else
772 	static if (is(Unqual!T == double))
773 		enum cWidthString = "l";
774 	else
775 	static if (is(Unqual!T == real))
776 		enum cWidthString = "L";
777 }
778 enum fpCFormatString(T) = "%." ~ text(significantDigits!T) ~ cWidthString!T ~ "g";
779 
780 private auto safeSprintf(size_t N, Args...)(ref char[N] buf, auto ref Args args) @trusted @nogc
781 {
782 	return snprintf(buf.ptr, N, args);
783 }
784 
785 private auto fpToBuf(Q)(Q val) @safe nothrow @nogc
786 {
787 	alias F = Unqual!Q;
788 
789 	/// Bypass FPU register, which may contain a different precision
790 	static F forceType(F d) { static F n; n = d; return n; }
791 
792 	enum isReal = is(F == real);
793 
794 	StaticBuf!(char, 64) buf = void;
795 
796 	// MSVC workaround from std.format:
797 	version (CRuntime_Microsoft)
798 	{
799 		import std.math : isNaN, isInfinity;
800 		immutable double v = val; // convert early to get "inf" in case of overflow
801 		{
802 			string s;
803 			if (isNaN(v))
804 				s = "nan"; // snprintf writes 1.#QNAN
805 			else if (isInfinity(v))
806 				s = val >= 0 ? "inf" : "-inf"; // snprintf writes 1.#INF
807 			else
808 				goto L1;
809 			buf.buf[0..s.length] = s;
810 			buf.pos = s.length;
811 			return buf;
812 		L1:
813 		}
814 	}
815 	else
816 		alias v = val;
817 
818 	buf.pos = safeSprintf(buf.buf, &fpCFormatString!F[0], forceType(v));
819 	char[] s = buf.data();
820 
821 	F parse(char[] s)
822 	{
823 		F f;
824 		auto res = tryParse(s, f);
825 		assert(res, "Failed to parse number we created");
826 		assert(!s.length, "Failed to completely parse number we created");
827 		return f;
828 	}
829 
830 	if (s != "nan" && s != "-nan" && s != "inf" && s != "-inf")
831 	{
832 		if (forceType(parse(s)) != v)
833 		{
834 			static if (isReal)
835 			{
836 				// Something funny with DM libc real parsing... e.g. 0.6885036635121051783
837 				return buf;
838 			}
839 			else
840 			//	assert(false, "Initial conversion fails: " ~ format(fpFormatString!F, parse(s)) ~ " / " ~ s);
841 				assert(false, "Initial conversion fails");
842 		}
843 
844 		foreach_reverse (i; 1..s.length)
845 			if (s[i]>='0' && s[i]<='8')
846 			{
847 				s[i]++;
848 				if (forceType(parse(s[0..i+1]))==v)
849 					s = s[0..i+1];
850 				else
851 					s[i]--;
852 			}
853 		while (s.length>2 && s[$-1]!='.' && forceType(parse(s[0..$-1]))==v)
854 			s = s[0..$-1];
855 	}
856 	buf.pos = s.length;
857 	return buf;
858 }
859 
860 void putFP(Writer, F)(auto ref Writer writer, F v)
861 {
862 	writer.put(fpToBuf(v).data);
863 }
864 
865 
866 /// Get shortest string representation of a FP type that still converts to exactly the same number.
867 template fpToString(F)
868 {
869 	string fpToString(F v) @safe nothrow
870 	{
871 		return fpToBuf(v).data.idup;
872 	}
873 
874 	static if (!is(Unqual!F == real))
875 	unittest
876 	{
877 		union U
878 		{
879 			ubyte[F.sizeof] bytes;
880 			Unqual!F d;
881 			string toString() const { return (fpFormatString!F ~ " %a [%(%02X %)]").format(d, d, bytes[]); }
882 		}
883 		import std.random : Xorshift, uniform;
884 		import std.stdio : stderr;
885 		Xorshift rng;
886 		foreach (n; 0..10000)
887 		{
888 			U u;
889 			foreach (ref b; u.bytes[])
890 				b = uniform!ubyte(rng);
891 			static if (is(Unqual!F == real))
892 				u.bytes[7] |= 0x80; // require normalized value
893 			scope(failure) stderr.writeln("Input:\t", u);
894 			auto s = fpToString(u.d);
895 			scope(failure) stderr.writeln("Result:\t", s);
896 			if (s == "nan" || s == "-nan")
897 				continue; // there are many NaNs...
898 			U r;
899 			r.d = to!F(s);
900 			assert(r.bytes == u.bytes,
901 				"fpToString mismatch:\nOutput:\t%s".format(r));
902 		}
903 	}
904 }
905 
906 alias doubleToString = fpToString!double;
907 
908 unittest
909 {
910 	alias floatToString = fpToString!float;
911 	alias realToString = fpToString!real;
912 	alias crealToString = fpToString!(const(real));
913 }
914 
915 /// Wraps the result of a fpToString in a non-allocating stringifiable struct.
916 struct FPAsString(T)
917 {
918 	typeof(fpToBuf(T.init)) buf;
919 
920 	this(T f)
921 	{
922 		buf = fpToBuf(f);
923 	}
924 
925 	string toString() const pure nothrow
926 	{
927 		return buf.data.idup;
928 	}
929 
930 	void toString(W)(ref W w) const
931 	{
932 		static if (is(typeof(w.put(buf.data))))
933 			w.put(buf.data);
934 		else
935 			foreach (c; buf.data)
936 				w.put(c);
937 	}
938 }
939 FPAsString!T fpAsString(T)(T f) { return FPAsString!T(f); } /// ditto
940 
941 @safe //nothrow @nogc
942 unittest
943 {
944 	StaticBuf!(char, 1024) buf;
945 	buf.formattedWrite!"%s"(fpAsString(0.1));
946 	assert(buf.data == "0.1");
947 }
948 
949 string numberToString(T)(T v)
950 	if (isNumeric!T)
951 {
952 	static if (is(T : ulong))
953 		return toDec(v);
954 	else
955 		return fpToString(v);
956 }
957 
958 // ************************************************************************
959 
960 /// Simpler implementation of Levenshtein string distance
961 int stringDistance(string s, string t)
962 {
963 	int n = cast(int)s.length;
964 	int m = cast(int)t.length;
965 	if (n == 0) return m;
966 	if (m == 0) return n;
967 	int[][] distance = new int[][](n+1, m+1); // matrix
968 	int cost=0;
969 	//init1
970 	foreach (i; 0..n+1) distance[i][0]=i;
971 	foreach (j; 0..m+1) distance[0][j]=j;
972 	//find min distance
973 	foreach (i; 1..n+1)
974 		foreach (j; 1..m+1)
975 		{
976 			cost = t[j-1] == s[i-1] ? 0 : 1;
977 			distance[i][j] = min(
978 				distance[i-1][j  ] + 1,
979 				distance[i  ][j-1] + 1,
980 				distance[i-1][j-1] + cost
981 			);
982 		}
983 	return distance[n][m];
984 }
985 
986 /// Return a number between 0.0 and 1.0 indicating how similar two strings are
987 /// (1.0 if identical)
988 float stringSimilarity(string string1, string string2)
989 {
990 	float dis = stringDistance(string1, string2);
991 	float maxLen = string1.length;
992 	if (maxLen < string2.length)
993 		maxLen = string2.length;
994 	if (maxLen == 0)
995 		return 1;
996 	else
997 		return 1f - dis/maxLen;
998 }
999 
1000 /// Select best match from a list of items.
1001 /// Returns -1 if none are above the threshold.
1002 sizediff_t findBestMatch(in string[] items, string target, float threshold = 0.7)
1003 {
1004 	sizediff_t found = -1;
1005 	float best = 0;
1006 
1007 	foreach (i, item; items)
1008 	{
1009 		float match = stringSimilarity(toLower(item),toLower(target));
1010 		if (match>threshold && match>=best)
1011 		{
1012 			best = match;
1013 			found = i;
1014 		}
1015 	}
1016 
1017 	return found;
1018 }
1019 
1020 /// Select best match from a list of items.
1021 /// Returns null if none are above the threshold.
1022 string selectBestFrom(in string[] items, string target, float threshold = 0.7)
1023 {
1024 	auto index = findBestMatch(items, target, threshold);
1025 	return index < 0 ? null : items[index];
1026 }
1027 
1028 // ************************************************************************
1029 
1030 
1031 string randomString()(int length=20, string chars="abcdefghijklmnopqrstuvwxyz")
1032 {
1033 	import std.random;
1034 	import std.range;
1035 
1036 	return length.iota.map!(n => chars[uniform(0, $)]).array;
1037 }