ae.utils.text source code

1 /**
2  * Utility code related to string and text processing.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.utils.text;
15 
16 import std.algorithm;
17 import std.ascii;
18 import std.exception;
19 import std.conv;
20 import std.format;
21 import std.string;
22 import std.traits;
23 import std.typetuple;
24 
25 import core.stdc.stdio : sprintf, sscanf;
26 import core.stdc.string;
27 
28 import ae.utils.array;
29 import ae.utils.meta;
30 import ae.utils.text.parsefp;
31 import ae.utils.textout;
32 
33 alias indexOf = std..string.indexOf;
34 
35 public import ae.utils.text.ascii : ascii, DecimalSize, toDec, toDecFixed, asciiToLower, asciiToUpper;
36 public import ae.utils.array : contains;
37 
38 // ************************************************************************
39 
40 /// CTFE helper
41 string formatAs(T)(auto ref T obj, string fmt)
42 {
43 	return format(fmt, obj);
44 }
45 
46 /// Consume a LF or CRLF terminated line from s.
47 /// Sets s to null and returns the remainder
48 /// if there is no line terminator in s.
49 T[] eatLine(T)(ref T[] s, bool eatIncompleteLines = true)
50 {
51 	return s.skipUntil([T('\n')], eatIncompleteLines).chomp();
52 }
53 
54 deprecated template eatLine(OnEof onEof)
55 {
56 	T[] eatLine(T)(ref T[] s)
57 	{
58 		return s.eatUntil!onEof([T('\n')]).chomp();
59 	}
60 }
61 
62 unittest
63 {
64 	string s = "Hello\nworld";
65 	assert(s.eatLine() == "Hello");
66 	assert(s.eatLine() == "world");
67 	assert(s is null);
68 	assert(s.eatLine() is null);
69 }
70 
71 // Uses memchr (not Boyer-Moore), best for short strings.
72 T[] fastReplace(T)(T[] what, T[] from, T[] to)
73 	if (T.sizeof == 1) // TODO (uses memchr)
74 {
75 	alias Unqual!T U;
76 
77 //	debug scope(failure) std.stdio.writeln("fastReplace crashed: ", [what, from, to]);
78 	enum RAM = cast(U*)null;
79 
80 	if (what.length < from.length || from.length==0)
81 		return what;
82 
83 	if (from.length==1)
84 	{
85 		auto fromc = from[0];
86 		if (to.length==1)
87 		{
88 			auto p = cast(T*)memchr(what.ptr, fromc, what.length);
89 			if (!p)
90 				return what;
91 
92 			T[] result = what.dup;
93 			auto delta = result.ptr - what.ptr;
94 			auto toChar = to[0];
95 			auto end = what.ptr + what.length;
96 			do
97 			{
98 				(cast(U*)p)[delta] = toChar; // zomg hax lol
99 				p++;
100 				p = cast(T*)memchr(p, fromc, end - p);
101 			} while (p);
102 			return result;
103 		}
104 		else
105 		{
106 			auto p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
107 			if (!p)
108 				return what;
109 
110 			auto sb = StringBuilder(what.length);
111 			do
112 			{
113 				sb.put(what[0..p-what.ptr], to);
114 				what = what[p-what.ptr+1..$];
115 				p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
116 			}
117 			while (p);
118 
119 			sb.put(what);
120 			return sb.get();
121 		}
122 	}
123 
124 	auto head = from[0];
125 	auto tail = from[1..$];
126 
127 	auto p = cast(T*)what.ptr;
128 	auto end = p + what.length - tail.length;
129 	p = cast(T*)memchr(p, head, end-p);
130 	while (p)
131 	{
132 		p++;
133 		if (p[0..tail.length] == tail)
134 		{
135 			if (from.length == to.length)
136 			{
137 				T[] result = what.dup;
138 				auto deltaMinusOne = (result.ptr - what.ptr) - 1;
139 
140 				goto replaceA;
141 			dummyA: // compiler complains
142 
143 				do
144 				{
145 					p++;
146 					if (p[0..tail.length] == tail)
147 					{
148 					replaceA:
149 						(cast(U*)p+deltaMinusOne)[0..to.length] = to[];
150 					}
151 					p = cast(T*)memchr(p, head, end-p);
152 				}
153 				while (p);
154 
155 				return result;
156 			}
157 			else
158 			{
159 				auto start = cast(T*)what.ptr;
160 				auto sb = StringBuilder(what.length);
161 				goto replaceB;
162 			dummyB: // compiler complains
163 
164 				do
165 				{
166 					p++;
167 					if (p[0..tail.length] == tail)
168 					{
169 					replaceB:
170 						sb.put(RAM[cast(size_t)start .. cast(size_t)p-1], to);
171 						start = p + tail.length;
172 						what = what[start-what.ptr..$];
173 					}
174 					else
175 					{
176 						what = what[p-what.ptr..$];
177 					}
178 					p = cast(T*)memchr(what.ptr, head, what.length);
179 				}
180 				while (p);
181 
182 				//sb.put(what);
183 				sb.put(RAM[cast(size_t)start..cast(size_t)(what.ptr+what.length)]);
184 				return sb.get();
185 			}
186 
187 			assert(0);
188 		}
189 		p = cast(T*)memchr(p, head, end-p);
190 	}
191 
192 	return what;
193 }
194 
195 unittest
196 {
197 	import std.array;
198 	void test(string haystack, string from, string to)
199 	{
200 		auto description = `("` ~ haystack ~ `", "` ~ from ~ `", "` ~ to ~ `")`;
201 
202 		auto r1 = fastReplace(haystack, from, to);
203 		auto r2 =     replace(haystack, from, to);
204 		assert(r1 == r2, `Bad replace: ` ~ description ~ ` == "` ~ r1 ~ `"`);
205 
206 		if (r1 == haystack)
207 			assert(r1 is haystack, `Pointless reallocation: ` ~ description);
208 	}
209 
210 	test("Mary had a little lamb", "a", "b");
211 	test("Mary had a little lamb", "a", "aaa");
212 	test("Mary had a little lamb", "Mary", "Lucy");
213 	test("Mary had a little lamb", "Mary", "Jimmy");
214 	test("Mary had a little lamb", "lamb", "goat");
215 	test("Mary had a little lamb", "lamb", "sheep");
216 	test("Mary had a little lamb", " l", " x");
217 	test("Mary had a little lamb", " l", " xx");
218 
219 	test("Mary had a little lamb", "X" , "Y" );
220 	test("Mary had a little lamb", "XX", "Y" );
221 	test("Mary had a little lamb", "X" , "YY");
222 	test("Mary had a little lamb", "XX", "YY");
223 	test("Mary had a little lamb", "aX", "Y" );
224 	test("Mary had a little lamb", "aX", "YY");
225 
226 	test("foo", "foobar", "bar");
227 }
228 
229 T[][] fastSplit(T, U)(T[] s, U d)
230 	if (is(Unqual!T == Unqual!U))
231 {
232 	if (!s.length)
233 		return null;
234 
235 	auto p = cast(T*)memchr(s.ptr, d, s.length);
236 	if (!p)
237 		return [s];
238 
239 	size_t n;
240 	auto end = s.ptr + s.length;
241 	do
242 	{
243 		n++;
244 		p++;
245 		p = cast(T*) memchr(p, d, end-p);
246 	}
247 	while (p);
248 
249 	auto result = new T[][n+1];
250 	n = 0;
251 	auto start = s.ptr;
252 	p = cast(T*) memchr(start, d, s.length);
253 	do
254 	{
255 		result[n++] = start[0..p-start];
256 		start = ++p;
257 		p = cast(T*) memchr(p, d, end-p);
258 	}
259 	while (p);
260 	result[n] = start[0..end-start];
261 
262 	return result;
263 }
264 
265 T[][] splitAsciiLines(T)(T[] text)
266 	if (is(Unqual!T == char))
267 {
268 	auto lines = text.fastSplit('\n');
269 	foreach (ref line; lines)
270 		if (line.length && line[$-1]=='\r')
271 			line = line[0..$-1];
272 	return lines;
273 }
274 
275 unittest
276 {
277 	assert(splitAsciiLines("a\nb\r\nc\r\rd\n\re\r\n\nf") == ["a", "b", "c\r\rd", "\re", "", "f"]);
278 	assert(splitAsciiLines(string.init) == splitLines(string.init));
279 }
280 
281 /// Like std.string.split (one argument version, which splits by
282 /// whitespace), but only splits by ASCII and does not autodecode.
283 T[][] asciiSplit(T)(T[] text)
284 	if (is(Unqual!T == char))
285 {
286 	bool inWhitespace = true;
287 	size_t wordStart;
288 	T[][] result;
289 
290 	void endWord(size_t p)
291 	{
292 		if (!inWhitespace)
293 		{
294 			result ~= text[wordStart..p];
295 			inWhitespace = true;
296 		}
297 	}
298 
299 	foreach (p, c; text)
300 		if (std.ascii.isWhite(c))
301 			endWord(p);
302 		else
303 			if (inWhitespace)
304 			{
305 				inWhitespace = false;
306 				wordStart = p;
307 			}
308 	endWord(text.length);
309 	return result;
310 }
311 
312 unittest
313 {
314 	foreach (s; ["", " ", "a", " a", "a ", "a b", " a b", "a b ", " a b ",
315 			"  ", "  a", "a  ", "a  b", "a  b  ", "a b  c"])
316 		assert(s.split == s.asciiSplit, format("Got %s, expected %s", s.asciiSplit, s.split));
317 }
318 
319 T[] asciiStrip(T)(T[] s)
320 	if (is(Unqual!T == char))
321 {
322 	while (s.length && isWhite(s[0]))
323 		s = s[1..$];
324 	while (s.length && isWhite(s[$-1]))
325 		s = s[0..$-1];
326 	return s;
327 }
328 
329 unittest
330 {
331 	string s = "Hello, world!";
332 	assert(asciiStrip(s) is s);
333 	assert(asciiStrip("\r\n\tHello ".dup) == "Hello");
334 }
335 
336 /// Covering slice-list of s with interleaved whitespace.
337 T[][] segmentByWhitespace(T)(T[] s)
338 	if (is(Unqual!T == char))
339 {
340 	if (!s.length)
341 		return null;
342 
343 	T[][] segments;
344 	bool wasWhite = isWhite(s[0]);
345 	size_t start = 0;
346 	foreach (p, char c; s)
347 	{
348 		bool isWhite = isWhite(c);
349 		if (isWhite != wasWhite)
350 			segments ~= s[start..p],
351 			start = p;
352 		wasWhite = isWhite;
353 	}
354 	segments ~= s[start..$];
355 
356 	return segments;
357 }
358 
359 T[] newlinesToSpaces(T)(T[] s)
360 	if (is(Unqual!T == char))
361 {
362 	auto slices = segmentByWhitespace(s);
363 	foreach (ref slice; slices)
364 		if (slice.contains("\n"))
365 			slice = " ";
366 	return slices.join();
367 }
368 
369 ascii normalizeWhitespace(ascii s)
370 {
371 	auto slices = segmentByWhitespace(strip(s));
372 	foreach (i, ref slice; slices)
373 		if (i & 1) // odd
374 			slice = " ";
375 	return slices.join();
376 }
377 
378 unittest
379 {
380 	assert(normalizeWhitespace(" Mary  had\ta\nlittle\r\n\tlamb") == "Mary had a little lamb");
381 }
382 
383 string[] splitByCamelCase(string s)
384 {
385 	string[] result;
386 	size_t start = 0;
387 	foreach (i; 1..s.length+1)
388 		if (i == s.length
389 		 || (isLower(s[i-1]) && isUpper(s[i]))
390 		 || (i+1 < s.length && isUpper(s[i-1]) && isUpper(s[i]) && isLower(s[i+1]))
391 		)
392 		{
393 			result ~= s[start..i];
394 			start = i;
395 		}
396 	return result;
397 }
398 
399 unittest
400 {
401 	assert(splitByCamelCase("parseIPString") == ["parse", "IP", "String"]);
402 	assert(splitByCamelCase("IPString") == ["IP", "String"]);
403 }
404 
405 string camelCaseJoin(string[] arr)
406 {
407 	if (!arr.length)
408 		return null;
409 	string result = arr[0];
410 	foreach (s; arr[1..$])
411 		result ~= std.ascii.toUpper(s[0]) ~ s[1..$];
412 	return result;
413 }
414 
415 unittest
416 {
417 	assert("parse-IP-string".split('-').camelCaseJoin() == "parseIPString");
418 }
419 
420 // ************************************************************************
421 
422 /// Case-insensitive ASCII string.
423 alias CIAsciiString = NormalizedArray!(immutable(char), s => s.byCodeUnit.map!(std.ascii.toLower));
424 
425 ///
426 unittest
427 {
428 	CIAsciiString s = "test";
429 	assert(s == "TEST");
430 	assert(s >= "Test" && s <= "Test");
431 	assert(CIAsciiString("a") == CIAsciiString("A"));
432 	assert(CIAsciiString("a") != CIAsciiString("B"));
433 	assert(CIAsciiString("a") <  CIAsciiString("B"));
434 	assert(CIAsciiString("A") <  CIAsciiString("b"));
435 	assert(CIAsciiString("я") != CIAsciiString("Я"));
436 }
437 
438 /// Case-insensitive Unicode string.
439 alias CIUniString = NormalizedArray!(immutable(char), s => s.map!(std.uni.toLower));
440 
441 ///
442 unittest
443 {
444 	CIUniString s = "привет";
445 	assert(s == "ПРИВЕТ");
446 	assert(s >= "Привет" && s <= "Привет");
447 	assert(CIUniString("я") == CIUniString("Я"));
448 	assert(CIUniString("а") != CIUniString("Б"));
449 	assert(CIUniString("а") <  CIUniString("Б"));
450 	assert(CIUniString("А") <  CIUniString("б"));
451 }
452 
453 // ************************************************************************
454 
455 import std.utf;
456 
457 /// Convert any data to a valid UTF-8 bytestream, so D's string functions can
458 /// properly work on it.
459 string rawToUTF8(in char[] s)
460 {
461 	auto d = new dchar[s.length];
462 	foreach (i, char c; s)
463 		d[i] = c;
464 	return toUTF8(d);
465 }
466 
467 /// Undo rawToUTF8.
468 ascii UTF8ToRaw(in char[] r) pure
469 {
470 	auto s = new char[r.length];
471 	size_t i = 0;
472 	foreach (dchar c; r)
473 	{
474 		assert(c < '\u0100');
475 		s[i++] = cast(char)c;
476 	}
477 	return s[0..i];
478 }
479 
480 unittest
481 {
482 	char[1] c;
483 	for (int i=0; i<256; i++)
484 	{
485 		c[0] = cast(char)i;
486 		assert(UTF8ToRaw(rawToUTF8(c[])) == c[], format("%s -> %s -> %s", cast(ubyte[])c[], cast(ubyte[])rawToUTF8(c[]), cast(ubyte[])UTF8ToRaw(rawToUTF8(c[]))));
487 	}
488 }
489 
490 /// Where a delegate with this signature is required.
491 string nullStringTransform(in char[] s) { return to!string(s); }
492 
493 string forceValidUTF8(string s)
494 {
495 	try
496 	{
497 		validate(s);
498 		return s;
499 	}
500 	catch (UTFException)
501 		return rawToUTF8(s);
502 }
503 
504 // ************************************************************************
505 
506 /// Return the slice up to the first NUL character,
507 /// or of the whole array if none is found.
508 C[] fromZArray(C, n)(ref C[n] arr)
509 {
510 	auto p = arr.representation.countUntil(0);
511 	return arr[0 .. p<0 ? $ : p];
512 }
513 
514 /// ditto
515 C[] fromZArray(C)(C[] arr)
516 {
517 	auto p = arr.representation.countUntil(0);
518 	return arr[0 .. p<0 ? $ : p];
519 }
520 
521 unittest
522 {
523 	char[4] arr = "ab\0d";
524 	assert(arr.fromZArray == "ab");
525 	arr[] = "abcd";
526 	assert(arr.fromZArray == "abcd");
527 }
528 
529 unittest
530 {
531 	string arr = "ab\0d";
532 	assert(arr.fromZArray == "ab");
533 	arr = "abcd";
534 	assert(arr.fromZArray == "abcd");
535 }
536 
537 // ************************************************************************
538 
539 /// Formats binary data as a hex dump (three-column layout consisting of hex
540 /// offset, byte values in hex, and printable low-ASCII characters).
541 string hexDump(const(void)[] b)
542 {
543 	auto data = cast(const(ubyte)[]) b;
544 	assert(data.length);
545 	size_t i=0;
546 	string s;
547 	while (i<data.length)
548 	{
549 		s ~= format("%08X:  ", i);
550 		foreach (x; 0..16)
551 		{
552 			if (i+x<data.length)
553 				s ~= format("%02X ", data[i+x]);
554 			else
555 				s ~= "   ";
556 			if (x==7)
557 				s ~= "| ";
558 		}
559 		s ~= "  ";
560 		foreach (x; 0..16)
561 		{
562 			if (i+x<data.length)
563 				if (data[i+x]==0)
564 					s ~= ' ';
565 				else
566 				if (data[i+x]<32 || data[i+x]>=128)
567 					s ~= '.';
568 				else
569 					s ~= cast(char)data[i+x];
570 			else
571 				s ~= ' ';
572 		}
573 		s ~= "\n";
574 		i += 16;
575 	}
576 	return s;
577 }
578 
579 import std.conv;
580 
581 T fromHex(T : ulong = uint, C)(const(C)[] s)
582 {
583 	T result = parse!T(s, 16);
584 	enforce(s.length==0, new ConvException("Could not parse entire string"));
585 	return result;
586 }
587 
588 ubyte[] arrayFromHex(in char[] hex)
589 {
590 	auto buf = new ubyte[hex.length/2];
591 	arrayFromHex(hex, buf);
592 	return buf;
593 }
594 
595 struct HexParseConfig
596 {
597 	bool checked = true;
598 	bool lower = true;
599 	bool upper = true;
600 }
601 
602 ubyte parseHexDigit(HexParseConfig config = HexParseConfig.init)(char c)
603 {
604 	static assert(config.lower || config.upper,
605 		"Must parse at least either lower or upper case digits");
606 	static if (config.checked)
607 	{
608 		switch (c)
609 		{
610 			case '0': .. case '9': return cast(ubyte)(c - '0');
611 			case 'a': .. case 'f': return cast(ubyte)(c - 'a' + 10);
612 			case 'A': .. case 'F': return cast(ubyte)(c - 'A' + 10);
613 			default: throw new Exception("Bad hex digit: " ~ c);
614 		}
615 	}
616 	else
617 	{
618 		if (c <= '9')
619 			return cast(ubyte)(c - '0');
620 		static if (config.lower && config.upper)
621 		{
622 			if (c < 'a')
623 				return cast(ubyte)(c - 'A' + 10);
624 			else
625 				return cast(ubyte)(c - 'a' + 10);
626 		}
627 		else
628 			static if (config.lower)
629 				return cast(ubyte)(c - 'a' + 10);
630 			else
631 				return cast(ubyte)(c - 'A' + 10);
632 	}
633 }
634 
635 void arrayFromHex(HexParseConfig config = HexParseConfig.init)(in char[] hex, ubyte[] buf)
636 {
637 	assert(buf.length == hex.length/2, "Wrong buffer size for arrayFromHex");
638 	for (int i=0; i<hex.length; i+=2)
639 		buf[i/2] = cast(ubyte)(
640 			parseHexDigit!config(hex[i  ])*16 +
641 			parseHexDigit!config(hex[i+1])
642 		);
643 }
644 
645 /// Fast version for static arrays of known length.
646 void sarrayFromHex(HexParseConfig config = HexParseConfig.init, size_t N, Hex)(in ref Hex hex, ref ubyte[N] buf)
647 if (is(Hex == char[N*2]))
648 {
649 	foreach (i; 0..N/4)
650 	{
651 		ulong chars = (cast(ulong*)hex.ptr)[i];
652 		uint res =
653 			(parseHexDigit!config((chars >> (8*0)) & 0xFF) << (4*1)) |
654 			(parseHexDigit!config((chars >> (8*1)) & 0xFF) << (4*0)) |
655 			(parseHexDigit!config((chars >> (8*2)) & 0xFF) << (4*3)) |
656 			(parseHexDigit!config((chars >> (8*3)) & 0xFF) << (4*2)) |
657 			(parseHexDigit!config((chars >> (8*4)) & 0xFF) << (4*5)) |
658 			(parseHexDigit!config((chars >> (8*5)) & 0xFF) << (4*4)) |
659 			(parseHexDigit!config((chars >> (8*6)) & 0xFF) << (4*7)) |
660 			(parseHexDigit!config((chars >> (8*7)) & 0xFF) << (4*6));
661 		(cast(uint*)buf.ptr)[i] = res;
662 	}
663 	foreach (i; N/4*4..N)
664 		buf[i] = cast(ubyte)(
665 			parseHexDigit!config(hex[i*2  ])*16 +
666 			parseHexDigit!config(hex[i*2+1])
667 		);
668 }
669 
670 unittest
671 {
672 	foreach (checked; TypeTuple!(false, true))
673 		foreach (lower; TypeTuple!(false, true))
674 			foreach (upper; TypeTuple!(false, true))
675 				static if (lower || upper)
676 				{
677 					enum config = HexParseConfig(checked, lower, upper);
678 					char[18] buf;
679 					foreach (n; 0..18)
680 						if (lower && upper ? n & 1 : upper)
681 							buf[n] = hexDigits[n % 16];
682 						else
683 							buf[n] = lowerHexDigits[n % 16];
684 					ubyte[9] res;
685 					sarrayFromHex!config(buf, res);
686 					assert(res == [0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01], text(res));
687 				}
688 }
689 
690 template toHex(alias digits = hexDigits)
691 {
692 	char[] toHex(in ubyte[] data, char[] buf) pure
693 	{
694 		assert(buf.length == data.length*2);
695 		foreach (i, b; data)
696 		{
697 			buf[i*2  ] = digits[b>>4];
698 			buf[i*2+1] = digits[b&15];
699 		}
700 		return buf;
701 	}
702 
703 	string toHex(in ubyte[] data) pure
704 	{
705 		auto buf = new char[data.length*2];
706 		foreach (i, b; data)
707 		{
708 			buf[i*2  ] = digits[b>>4];
709 			buf[i*2+1] = digits[b&15];
710 		}
711 		return buf;
712 	}
713 }
714 
715 alias toLowerHex = toHex!lowerHexDigits;
716 
717 void toHex(T : ulong, size_t U = T.sizeof*2)(T n, ref char[U] buf)
718 {
719 	foreach (i; Reverse!(RangeTuple!(T.sizeof*2)))
720 	{
721 		buf[i] = hexDigits[n & 0xF];
722 		n >>= 4;
723 	}
724 }
725 
726 unittest
727 {
728 	ubyte[] bytes = [0x12, 0x34];
729 	assert(toHex(bytes) == "1234");
730 }
731 
732 unittest
733 {
734 	ubyte[] bytes = [0x12, 0x34];
735 	char[] buf = new char[4];
736 	toHex(bytes, buf);
737 	assert(buf == "1234");
738 }
739 
740 unittest
741 {
742 	char[8] buf;
743 	toHex(0x01234567, buf);
744 	assert(buf == "01234567");
745 }
746 
747 /// How many significant decimal digits does a FP type have
748 /// (determined empirically - valid for all D FP types on x86/64)
749 enum significantDigits(T : real) = 2 + 2 * T.sizeof;
750 
751 /// Format string for a FP type which includes all necessary
752 /// significant digits
753 enum fpFormatString(T) = "%." ~ text(significantDigits!T) ~ "g";
754 template cWidthString(T)
755 {
756 	static if (is(T == float))
757 		enum cWidthString = "";
758 	else
759 	static if (is(T == double))
760 		enum cWidthString = "l";
761 	else
762 	static if (is(T == real))
763 		enum cWidthString = "L";
764 }
765 enum fpCFormatString(T) = "%." ~ text(significantDigits!T) ~ cWidthString!T ~ "g";
766 
767 private auto fpToBuf(F)(F val) @nogc
768 {
769 	/// Bypass FPU register, which may contain a different precision
770 	static F forceType(F d) { static F n; n = d; return n; }
771 
772 	enum isReal = is(Unqual!F == real);
773 
774 	StaticBuf!(char, 64) buf = void;
775 
776 	// MSVC workaround from std.format:
777 	version (CRuntime_Microsoft)
778 	{
779 		import std.math : isNaN, isInfinity;
780 		immutable double v = val; // convert early to get "inf" in case of overflow
781 		{
782 			string s;
783 			if (isNaN(v))
784 				s = "nan"; // snprintf writes 1.#QNAN
785 			else if (isInfinity(v))
786 				s = val >= 0 ? "inf" : "-inf"; // snprintf writes 1.#INF
787 			else
788 				goto L1;
789 			buf.buf[0..s.length] = s;
790 			buf.pos = s.length;
791 			return buf;
792 		L1:
793 		}
794 	}
795 	else
796 		alias v = val;
797 
798 	buf.pos = sprintf(buf.buf.ptr, fpCFormatString!F.ptr, forceType(v));
799 	char[] s = buf.data();
800 
801 	F parse(char[] s)
802 	{
803 		F f;
804 		auto res = tryParse(s, f);
805 		assert(res, "Failed to parse number we created");
806 		assert(!s.length, "Failed to completely parse number we created");
807 		return f;
808 	}
809 
810 	if (s != "nan" && s != "-nan" && s != "inf" && s != "-inf")
811 	{
812 		if (forceType(parse(s)) != v)
813 		{
814 			static if (isReal)
815 			{
816 				// Something funny with DM libc real parsing... e.g. 0.6885036635121051783
817 				return buf;
818 			}
819 			else
820 			//	assert(false, "Initial conversion fails: " ~ format(fpFormatString!F, parse(s)) ~ " / " ~ s);
821 				assert(false, "Initial conversion fails");
822 		}
823 
824 		foreach_reverse (i; 1..s.length)
825 			if (s[i]>='0' && s[i]<='8')
826 			{
827 				s[i]++;
828 				if (forceType(parse(s[0..i+1]))==v)
829 					s = s[0..i+1];
830 				else
831 					s[i]--;
832 			}
833 		while (s.length>2 && s[$-1]!='.' && forceType(parse(s[0..$-1]))==v)
834 			s = s[0..$-1];
835 	}
836 	buf.pos = s.length;
837 	return buf;
838 }
839 
840 void putFP(Writer, F)(auto ref Writer writer, F v) @nogc
841 {
842 	writer.put(fpToBuf(v).data);
843 }
844 
845 
846 /// Get shortest string representation of a FP type that still converts to exactly the same number.
847 template fpToString(F)
848 {
849 	string fpToString(F v)
850 	{
851 		return fpToBuf(v).data.idup;
852 	}
853 
854 	static if (!is(F == real))
855 	unittest
856 	{
857 		union U
858 		{
859 			ubyte[F.sizeof] bytes;
860 			F d;
861 			string toString() { return (fpFormatString!F ~ " %a [%(%02X %)]").format(d, d, bytes[]); }
862 		}
863 		import std.random : Xorshift, uniform;
864 		import std.stdio : stderr;
865 		Xorshift rng;
866 		foreach (n; 0..10000)
867 		{
868 			U u;
869 			foreach (ref b; u.bytes[])
870 				b = uniform!ubyte(rng);
871 			static if (is(F == real))
872 				u.bytes[7] |= 0x80; // require normalized value
873 			scope(failure) stderr.writeln("Input:\t", u);
874 			auto s = fpToString(u.d);
875 			scope(failure) stderr.writeln("Result:\t", s);
876 			if (s == "nan" || s == "-nan")
877 				continue; // there are many NaNs...
878 			U r;
879 			r.d = to!F(s);
880 			assert(r.bytes == u.bytes,
881 				"fpToString mismatch:\nOutput:\t%s".format(r));
882 		}
883 	}
884 }
885 
886 alias doubleToString = fpToString!double;
887 
888 unittest
889 {
890 	alias floatToString = fpToString!float;
891 	alias realToString = fpToString!real;
892 }
893 
894 string numberToString(T)(T v)
895 	if (isNumeric!T)
896 {
897 	static if (is(T : ulong))
898 		return toDec(v);
899 	else
900 		return fpToString(v);
901 }
902 
903 // ************************************************************************
904 
905 /// Simpler implementation of Levenshtein string distance
906 int stringDistance(string s, string t)
907 {
908 	int n = cast(int)s.length;
909 	int m = cast(int)t.length;
910 	if (n == 0) return m;
911 	if (m == 0) return n;
912 	int[][] distance = new int[][](n+1, m+1); // matrix
913 	int cost=0;
914 	//init1
915 	foreach (i; 0..n+1) distance[i][0]=i;
916 	foreach (j; 0..m+1) distance[0][j]=j;
917 	//find min distance
918 	foreach (i; 1..n+1)
919 		foreach (j; 1..m+1)
920 		{
921 			cost = t[j-1] == s[i-1] ? 0 : 1;
922 			distance[i][j] = min(
923 				distance[i-1][j  ] + 1,
924 				distance[i  ][j-1] + 1,
925 				distance[i-1][j-1] + cost
926 			);
927 		}
928 	return distance[n][m];
929 }
930 
931 /// Return a number between 0.0 and 1.0 indicating how similar two strings are
932 /// (1.0 if identical)
933 float stringSimilarity(string string1, string string2)
934 {
935 	float dis = stringDistance(string1, string2);
936 	float maxLen = string1.length;
937 	if (maxLen < string2.length)
938 		maxLen = string2.length;
939 	if (maxLen == 0)
940 		return 1;
941 	else
942 		return 1f - dis/maxLen;
943 }
944 
945 /// Select best match from a list of items.
946 /// Returns -1 if none are above the threshold.
947 sizediff_t findBestMatch(in string[] items, string target, float threshold = 0.7)
948 {
949 	sizediff_t found = -1;
950 	float best = 0;
951 
952 	foreach (i, item; items)
953 	{
954 		float match = stringSimilarity(toLower(item),toLower(target));
955 		if (match>threshold && match>=best)
956 		{
957 			best = match;
958 			found = i;
959 		}
960 	}
961 
962 	return found;
963 }
964 
965 /// Select best match from a list of items.
966 /// Returns null if none are above the threshold.
967 string selectBestFrom(in string[] items, string target, float threshold = 0.7)
968 {
969 	auto index = findBestMatch(items, target, threshold);
970 	return index < 0 ? null : items[index];
971 }
972 
973 // ************************************************************************
974 
975 
976 string randomString()(int length=20, string chars="abcdefghijklmnopqrstuvwxyz")
977 {
978 	import std.random;
979 	import std.range;
980 
981 	return length.iota.map!(n => chars[uniform(0, $)]).array;
982 }