ae.utils.text source code

1 /**
2  * Utility code related to string and text processing.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.utils.text;
15 
16 import std.algorithm;
17 import std.ascii;
18 import std.exception;
19 import std.conv;
20 import std.format;
21 import std.range.primitives;
22 import std.string;
23 import std.traits;
24 import std.typetuple;
25 
26 import core.stdc.stdio : snprintf, sscanf;
27 import core.stdc.string;
28 
29 import ae.utils.array;
30 import ae.utils.meta;
31 import ae.utils.text.parsefp;
32 import ae.utils.textout;
33 
34 alias indexOf = std..string.indexOf;
35 
36 public import ae.utils.text.ascii : ascii, decimalSize, toDec, toDecFixed, asciiToLower, asciiToUpper;
37 deprecated public import ae.utils.text.ascii : DecimalSize;
38 public import ae.utils.array : contains;
39 
40 // ************************************************************************
41 
42 /// CTFE helper
43 string formatAs(T)(auto ref T obj, string fmt)
44 {
45 	return format(fmt, obj);
46 }
47 
48 /// Lazily formatted object
49 auto formatted(string fmt, T...)(auto ref T values)
50 {
51 	static struct Formatted
52 	{
53 		T values;
54 
55 		void toString(void delegate(const(char)[]) sink) const
56 		{
57 			sink.formattedWrite!fmt(values);
58 		}
59 
60 		void toString(W)(ref W writer) const
61 		if (isOutputRange!(W, char))
62 		{
63 			writer.formattedWrite!fmt(values);
64 		}
65 	}
66 	return Formatted(values);
67 }
68 
69 unittest
70 {
71 	assert(format!"%s%s%s"("<", formatted!"%x"(64), ">") == "<40>");
72 }
73 
74 // ************************************************************************
75 
76 /// Consume a LF or CRLF terminated line from s.
77 /// Sets s to null and returns the remainder
78 /// if there is no line terminator in s.
79 T[] eatLine(T)(ref T[] s, bool eatIncompleteLines = true)
80 {
81 	return s.skipUntil([T('\n')], eatIncompleteLines).chomp();
82 }
83 
84 deprecated template eatLine(OnEof onEof)
85 {
86 	T[] eatLine(T)(ref T[] s)
87 	{
88 		return s.eatUntil!onEof([T('\n')]).chomp();
89 	}
90 }
91 
92 unittest
93 {
94 	string s = "Hello\nworld";
95 	assert(s.eatLine() == "Hello");
96 	assert(s.eatLine() == "world");
97 	assert(s is null);
98 	assert(s.eatLine() is null);
99 }
100 
101 // Uses memchr (not Boyer-Moore), best for short strings.
102 T[] fastReplace(T)(T[] what, T[] from, T[] to)
103 	if (T.sizeof == 1) // TODO (uses memchr)
104 {
105 	alias Unqual!T U;
106 
107 //	debug scope(failure) std.stdio.writeln("fastReplace crashed: ", [what, from, to]);
108 	enum RAM = cast(U*)null;
109 
110 	if (what.length < from.length || from.length==0)
111 		return what;
112 
113 	if (from.length==1)
114 	{
115 		auto fromc = from[0];
116 		if (to.length==1)
117 		{
118 			auto p = cast(T*)memchr(what.ptr, fromc, what.length);
119 			if (!p)
120 				return what;
121 
122 			T[] result = what.dup;
123 			auto delta = result.ptr - what.ptr;
124 			auto toChar = to[0];
125 			auto end = what.ptr + what.length;
126 			do
127 			{
128 				(cast(U*)p)[delta] = toChar; // zomg hax lol
129 				p++;
130 				p = cast(T*)memchr(p, fromc, end - p);
131 			} while (p);
132 			return result;
133 		}
134 		else
135 		{
136 			auto p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
137 			if (!p)
138 				return what;
139 
140 			auto sb = StringBuilder(what.length);
141 			do
142 			{
143 				sb.put(what[0..p-what.ptr], to);
144 				what = what[p-what.ptr+1..$];
145 				p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
146 			}
147 			while (p);
148 
149 			sb.put(what);
150 			return sb.get();
151 		}
152 	}
153 
154 	auto head = from[0];
155 	auto tail = from[1..$];
156 
157 	auto p = cast(T*)what.ptr;
158 	auto end = p + what.length - tail.length;
159 	p = cast(T*)memchr(p, head, end-p);
160 	while (p)
161 	{
162 		p++;
163 		if (p[0..tail.length] == tail)
164 		{
165 			if (from.length == to.length)
166 			{
167 				T[] result = what.dup;
168 				auto deltaMinusOne = (result.ptr - what.ptr) - 1;
169 
170 				goto replaceA;
171 			dummyA: // compiler complains
172 
173 				do
174 				{
175 					p++;
176 					if (p[0..tail.length] == tail)
177 					{
178 					replaceA:
179 						(cast(U*)p+deltaMinusOne)[0..to.length] = to[];
180 					}
181 					p = cast(T*)memchr(p, head, end-p);
182 				}
183 				while (p);
184 
185 				return result;
186 			}
187 			else
188 			{
189 				auto start = cast(T*)what.ptr;
190 				auto sb = StringBuilder(what.length);
191 				goto replaceB;
192 			dummyB: // compiler complains
193 
194 				do
195 				{
196 					p++;
197 					if (p[0..tail.length] == tail)
198 					{
199 					replaceB:
200 						sb.put(RAM[cast(size_t)start .. cast(size_t)p-1], to);
201 						start = p + tail.length;
202 						what = what[start-what.ptr..$];
203 					}
204 					else
205 					{
206 						what = what[p-what.ptr..$];
207 					}
208 					p = cast(T*)memchr(what.ptr, head, what.length);
209 				}
210 				while (p);
211 
212 				//sb.put(what);
213 				sb.put(RAM[cast(size_t)start..cast(size_t)(what.ptr+what.length)]);
214 				return sb.get();
215 			}
216 
217 			assert(0);
218 		}
219 		p = cast(T*)memchr(p, head, end-p);
220 	}
221 
222 	return what;
223 }
224 
225 unittest
226 {
227 	import std.array;
228 	void test(string haystack, string from, string to)
229 	{
230 		auto description = `("` ~ haystack ~ `", "` ~ from ~ `", "` ~ to ~ `")`;
231 
232 		auto r1 = fastReplace(haystack, from, to);
233 		auto r2 =     replace(haystack, from, to);
234 		assert(r1 == r2, `Bad replace: ` ~ description ~ ` == "` ~ r1 ~ `"`);
235 
236 		if (r1 == haystack)
237 			assert(r1 is haystack, `Pointless reallocation: ` ~ description);
238 	}
239 
240 	test("Mary had a little lamb", "a", "b");
241 	test("Mary had a little lamb", "a", "aaa");
242 	test("Mary had a little lamb", "Mary", "Lucy");
243 	test("Mary had a little lamb", "Mary", "Jimmy");
244 	test("Mary had a little lamb", "lamb", "goat");
245 	test("Mary had a little lamb", "lamb", "sheep");
246 	test("Mary had a little lamb", " l", " x");
247 	test("Mary had a little lamb", " l", " xx");
248 
249 	test("Mary had a little lamb", "X" , "Y" );
250 	test("Mary had a little lamb", "XX", "Y" );
251 	test("Mary had a little lamb", "X" , "YY");
252 	test("Mary had a little lamb", "XX", "YY");
253 	test("Mary had a little lamb", "aX", "Y" );
254 	test("Mary had a little lamb", "aX", "YY");
255 
256 	test("foo", "foobar", "bar");
257 }
258 
259 T[][] fastSplit(T, U)(T[] s, U d)
260 	if (is(Unqual!T == Unqual!U))
261 {
262 	if (!s.length)
263 		return null;
264 
265 	auto p = cast(T*)memchr(s.ptr, d, s.length);
266 	if (!p)
267 		return [s];
268 
269 	size_t n;
270 	auto end = s.ptr + s.length;
271 	do
272 	{
273 		n++;
274 		p++;
275 		p = cast(T*) memchr(p, d, end-p);
276 	}
277 	while (p);
278 
279 	auto result = new T[][n+1];
280 	n = 0;
281 	auto start = s.ptr;
282 	p = cast(T*) memchr(start, d, s.length);
283 	do
284 	{
285 		result[n++] = start[0..p-start];
286 		start = ++p;
287 		p = cast(T*) memchr(p, d, end-p);
288 	}
289 	while (p);
290 	result[n] = start[0..end-start];
291 
292 	return result;
293 }
294 
295 T[][] splitAsciiLines(T)(T[] text)
296 	if (is(Unqual!T == char))
297 {
298 	auto lines = text.fastSplit('\n');
299 	foreach (ref line; lines)
300 		if (line.length && line[$-1]=='\r')
301 			line = line[0..$-1];
302 	return lines;
303 }
304 
305 unittest
306 {
307 	assert(splitAsciiLines("a\nb\r\nc\r\rd\n\re\r\n\nf") == ["a", "b", "c\r\rd", "\re", "", "f"]);
308 	assert(splitAsciiLines(string.init) == splitLines(string.init));
309 }
310 
311 /// Like std.string.split (one argument version, which splits by
312 /// whitespace), but only splits by ASCII and does not autodecode.
313 T[][] asciiSplit(T)(T[] text)
314 	if (is(Unqual!T == char))
315 {
316 	bool inWhitespace = true;
317 	size_t wordStart;
318 	T[][] result;
319 
320 	void endWord(size_t p)
321 	{
322 		if (!inWhitespace)
323 		{
324 			result ~= text[wordStart..p];
325 			inWhitespace = true;
326 		}
327 	}
328 
329 	foreach (p, c; text)
330 		if (std.ascii.isWhite(c))
331 			endWord(p);
332 		else
333 			if (inWhitespace)
334 			{
335 				inWhitespace = false;
336 				wordStart = p;
337 			}
338 	endWord(text.length);
339 	return result;
340 }
341 
342 unittest
343 {
344 	foreach (s; ["", " ", "a", " a", "a ", "a b", " a b", "a b ", " a b ",
345 			"  ", "  a", "a  ", "a  b", "a  b  ", "a b  c"])
346 		assert(s.split == s.asciiSplit, format("Got %s, expected %s", s.asciiSplit, s.split));
347 }
348 
349 T[] asciiStrip(T)(T[] s)
350 	if (is(Unqual!T == char))
351 {
352 	while (s.length && isWhite(s[0]))
353 		s = s[1..$];
354 	while (s.length && isWhite(s[$-1]))
355 		s = s[0..$-1];
356 	return s;
357 }
358 
359 unittest
360 {
361 	string s = "Hello, world!";
362 	assert(asciiStrip(s) is s);
363 	assert(asciiStrip("\r\n\tHello ".dup) == "Hello");
364 }
365 
366 /// Covering slice-list of s with interleaved whitespace.
367 T[][] segmentByWhitespace(T)(T[] s)
368 	if (is(Unqual!T == char))
369 {
370 	if (!s.length)
371 		return null;
372 
373 	T[][] segments;
374 	bool wasWhite = isWhite(s[0]);
375 	size_t start = 0;
376 	foreach (p, char c; s)
377 	{
378 		bool isWhite = isWhite(c);
379 		if (isWhite != wasWhite)
380 			segments ~= s[start..p],
381 			start = p;
382 		wasWhite = isWhite;
383 	}
384 	segments ~= s[start..$];
385 
386 	return segments;
387 }
388 
389 T[] newlinesToSpaces(T)(T[] s)
390 	if (is(Unqual!T == char))
391 {
392 	auto slices = segmentByWhitespace(s);
393 	foreach (ref slice; slices)
394 		if (slice.contains("\n"))
395 			slice = " ";
396 	return slices.join();
397 }
398 
399 ascii normalizeWhitespace(ascii s)
400 {
401 	auto slices = segmentByWhitespace(strip(s));
402 	foreach (i, ref slice; slices)
403 		if (i & 1) // odd
404 			slice = " ";
405 	return slices.join();
406 }
407 
408 unittest
409 {
410 	assert(normalizeWhitespace(" Mary  had\ta\nlittle\r\n\tlamb") == "Mary had a little lamb");
411 }
412 
413 string[] splitByCamelCase(string s)
414 {
415 	string[] result;
416 	size_t start = 0;
417 	foreach (i; 1..s.length+1)
418 		if (i == s.length
419 		 || (isLower(s[i-1]) && isUpper(s[i]))
420 		 || (i+1 < s.length && isUpper(s[i-1]) && isUpper(s[i]) && isLower(s[i+1]))
421 		)
422 		{
423 			result ~= s[start..i];
424 			start = i;
425 		}
426 	return result;
427 }
428 
429 unittest
430 {
431 	assert(splitByCamelCase("parseIPString") == ["parse", "IP", "String"]);
432 	assert(splitByCamelCase("IPString") == ["IP", "String"]);
433 }
434 
435 string camelCaseJoin(string[] arr)
436 {
437 	if (!arr.length)
438 		return null;
439 	string result = arr[0];
440 	foreach (s; arr[1..$])
441 		result ~= std.ascii.toUpper(s[0]) ~ s[1..$];
442 	return result;
443 }
444 
445 unittest
446 {
447 	assert("parse-IP-string".split('-').camelCaseJoin() == "parseIPString");
448 }
449 
450 // ************************************************************************
451 
452 /// Like std.string.wrap, but preserves whitespace at line start and
453 /// between (non-wrapped) words.
454 string verbatimWrap(
455 	string s,
456 	size_t columns = 80,
457 	string firstIndent = null,
458 	string indent = null,
459 	size_t tabWidth = 8,
460 )
461 {
462 	if (!s.length)
463 		return s;
464 
465 	import std.uni : isWhite;
466 	import std.range;
467 
468 	// Result buffer. Append-only (contains only text which has been wrapped).
469 	string result;
470 	// Index in `s` corresponding to the end of `result`
471 	size_t start;
472 	// Index in `s` corresponding to after the last newline in `result`
473 	size_t lineStart;
474 	// Current column
475 	size_t col;
476 	// Was the previous character we looked at whitespace?
477 	bool wasWhite;
478 	// We need to add an indent at the next (non-newline) character.
479 	bool needIndent;
480 
481 	result = firstIndent;
482 	col = firstIndent.walkLength;
483 	auto indentWidth = indent.walkLength;
484 
485 	void flush(size_t pos)
486 	{
487 		if (col > columns && start > lineStart)
488 		{
489 			result ~= "\n" ~ indent;
490 			col = indentWidth;
491 
492 			// Consume whitespace at line break
493 			size_t numWhite;
494 			foreach (i, c; s[start .. $])
495 				if (isWhite(c))
496 					numWhite = i;
497 				else
498 					break;
499 			start += numWhite;
500 			lineStart = start;
501 		}
502 		result ~= s[start .. pos];
503 		start = pos;
504 	}
505 
506 	foreach (pos, dchar c; s)
507 	{
508 		auto atWhite = isWhite(c);
509 		if (atWhite && !wasWhite)
510 			flush(pos);
511 		if (c == '\n')
512 		{
513 			flush(pos);
514 			result ~= "\n";
515 			start++; // past newline
516 			lineStart = start;
517 			needIndent = true;
518 			col = 0;
519 		}
520 		else
521 		{
522 			if (needIndent)
523 			{
524 				assert(col == 0);
525 				result ~= indent;
526 				col += indentWidth;
527 				needIndent = false;
528 			}
529 			if (c == '\t')
530 				col += tabWidth;
531 			else
532 				col++;
533 		}
534 		wasWhite = atWhite;
535 	}
536 	flush(s.length);
537 	if (col)
538 		result ~= "\n"; // trailing newline
539 
540 	return result;
541 }
542 
543 // ************************************************************************
544 
545 /// Case-insensitive ASCII string.
546 alias CIAsciiString = NormalizedArray!(immutable(char), s => s.byCodeUnit.map!(std.ascii.toLower));
547 
548 ///
549 unittest
550 {
551 	CIAsciiString s = "test";
552 	assert(s == "TEST");
553 	assert(s >= "Test" && s <= "Test");
554 	assert(CIAsciiString("a") == CIAsciiString("A"));
555 	assert(CIAsciiString("a") != CIAsciiString("B"));
556 	assert(CIAsciiString("a") <  CIAsciiString("B"));
557 	assert(CIAsciiString("A") <  CIAsciiString("b"));
558 	assert(CIAsciiString("я") != CIAsciiString("Я"));
559 }
560 
561 /// Case-insensitive Unicode string.
562 alias CIUniString = NormalizedArray!(immutable(char), s => s.map!(std.uni.toLower));
563 
564 ///
565 unittest
566 {
567 	CIUniString s = "привет";
568 	assert(s == "ПРИВЕТ");
569 	assert(s >= "Привет" && s <= "Привет");
570 	assert(CIUniString("я") == CIUniString("Я"));
571 	assert(CIUniString("а") != CIUniString("Б"));
572 	assert(CIUniString("а") <  CIUniString("Б"));
573 	assert(CIUniString("А") <  CIUniString("б"));
574 }
575 
576 // ************************************************************************
577 
578 import std.utf;
579 
580 /// Convert any data to a valid UTF-8 bytestream, so D's string functions can
581 /// properly work on it.
582 string rawToUTF8(in char[] s)
583 {
584 	auto d = new dchar[s.length];
585 	foreach (i, char c; s)
586 		d[i] = c;
587 	return toUTF8(d);
588 }
589 
590 /// Undo rawToUTF8.
591 ascii UTF8ToRaw(in char[] r) pure
592 {
593 	auto s = new char[r.length];
594 	size_t i = 0;
595 	foreach (dchar c; r)
596 	{
597 		assert(c < '\u0100');
598 		s[i++] = cast(char)c;
599 	}
600 	return s[0..i];
601 }
602 
603 unittest
604 {
605 	char[1] c;
606 	for (int i=0; i<256; i++)
607 	{
608 		c[0] = cast(char)i;
609 		assert(UTF8ToRaw(rawToUTF8(c[])) == c[], format("%s -> %s -> %s", cast(ubyte[])c[], cast(ubyte[])rawToUTF8(c[]), cast(ubyte[])UTF8ToRaw(rawToUTF8(c[]))));
610 	}
611 }
612 
613 /// Where a delegate with this signature is required.
614 string nullStringTransform(in char[] s) { return to!string(s); }
615 
616 string forceValidUTF8(string s)
617 {
618 	try
619 	{
620 		validate(s);
621 		return s;
622 	}
623 	catch (UTFException)
624 		return rawToUTF8(s);
625 }
626 
627 // ************************************************************************
628 
629 /// Return the slice up to the first NUL character,
630 /// or of the whole array if none is found.
631 C[] fromZArray(C, n)(ref C[n] arr)
632 {
633 	auto p = arr.representation.countUntil(0);
634 	return arr[0 .. p<0 ? $ : p];
635 }
636 
637 /// ditto
638 C[] fromZArray(C)(C[] arr)
639 {
640 	auto p = arr.representation.countUntil(0);
641 	return arr[0 .. p<0 ? $ : p];
642 }
643 
644 unittest
645 {
646 	char[4] arr = "ab\0d";
647 	assert(arr.fromZArray == "ab");
648 	arr[] = "abcd";
649 	assert(arr.fromZArray == "abcd");
650 }
651 
652 unittest
653 {
654 	string arr = "ab\0d";
655 	assert(arr.fromZArray == "ab");
656 	arr = "abcd";
657 	assert(arr.fromZArray == "abcd");
658 }
659 
660 // ************************************************************************
661 
662 /// Formats binary data as a hex dump (three-column layout consisting of hex
663 /// offset, byte values in hex, and printable low-ASCII characters).
664 string hexDump(const(void)[] b)
665 {
666 	auto data = cast(const(ubyte)[]) b;
667 	assert(data.length);
668 	size_t i=0;
669 	string s;
670 	while (i<data.length)
671 	{
672 		s ~= format("%08X:  ", i);
673 		foreach (x; 0..16)
674 		{
675 			if (i+x<data.length)
676 				s ~= format("%02X ", data[i+x]);
677 			else
678 				s ~= "   ";
679 			if (x==7)
680 				s ~= "| ";
681 		}
682 		s ~= "  ";
683 		foreach (x; 0..16)
684 		{
685 			if (i+x<data.length)
686 				if (data[i+x]==0)
687 					s ~= ' ';
688 				else
689 				if (data[i+x]<32 || data[i+x]>=128)
690 					s ~= '.';
691 				else
692 					s ~= cast(char)data[i+x];
693 			else
694 				s ~= ' ';
695 		}
696 		s ~= "\n";
697 		i += 16;
698 	}
699 	return s;
700 }
701 
702 import std.conv;
703 
704 T fromHex(T : ulong = uint, C)(const(C)[] s)
705 {
706 	T result = parse!T(s, 16);
707 	enforce(s.length==0, new ConvException("Could not parse entire string"));
708 	return result;
709 }
710 
711 ubyte[] arrayFromHex(in char[] hex)
712 {
713 	auto buf = new ubyte[hex.length/2];
714 	arrayFromHex(hex, buf);
715 	return buf;
716 }
717 
718 struct HexParseConfig
719 {
720 	bool checked = true;
721 	bool lower = true;
722 	bool upper = true;
723 }
724 
725 ubyte parseHexDigit(HexParseConfig config = HexParseConfig.init)(char c)
726 {
727 	static assert(config.lower || config.upper,
728 		"Must parse at least either lower or upper case digits");
729 	static if (config.checked)
730 	{
731 		switch (c)
732 		{
733 			case '0': .. case '9': return cast(ubyte)(c - '0');
734 			case 'a': .. case 'f': return cast(ubyte)(c - 'a' + 10);
735 			case 'A': .. case 'F': return cast(ubyte)(c - 'A' + 10);
736 			default: throw new Exception("Bad hex digit: " ~ c);
737 		}
738 	}
739 	else
740 	{
741 		if (c <= '9')
742 			return cast(ubyte)(c - '0');
743 		static if (config.lower && config.upper)
744 		{
745 			if (c < 'a')
746 				return cast(ubyte)(c - 'A' + 10);
747 			else
748 				return cast(ubyte)(c - 'a' + 10);
749 		}
750 		else
751 			static if (config.lower)
752 				return cast(ubyte)(c - 'a' + 10);
753 			else
754 				return cast(ubyte)(c - 'A' + 10);
755 	}
756 }
757 
758 void arrayFromHex(HexParseConfig config = HexParseConfig.init)(in char[] hex, ubyte[] buf)
759 {
760 	assert(buf.length == hex.length/2, "Wrong buffer size for arrayFromHex");
761 	for (int i=0; i<hex.length; i+=2)
762 		buf[i/2] = cast(ubyte)(
763 			parseHexDigit!config(hex[i  ])*16 +
764 			parseHexDigit!config(hex[i+1])
765 		);
766 }
767 
768 /// Fast version for static arrays of known length.
769 void sarrayFromHex(HexParseConfig config = HexParseConfig.init, size_t N, Hex)(ref const Hex hex, ref ubyte[N] buf)
770 if (is(Hex == char[N*2]))
771 {
772 	foreach (i; 0..N/4)
773 	{
774 		ulong chars = (cast(ulong*)hex.ptr)[i];
775 		uint res =
776 			(parseHexDigit!config((chars >> (8*0)) & 0xFF) << (4*1)) |
777 			(parseHexDigit!config((chars >> (8*1)) & 0xFF) << (4*0)) |
778 			(parseHexDigit!config((chars >> (8*2)) & 0xFF) << (4*3)) |
779 			(parseHexDigit!config((chars >> (8*3)) & 0xFF) << (4*2)) |
780 			(parseHexDigit!config((chars >> (8*4)) & 0xFF) << (4*5)) |
781 			(parseHexDigit!config((chars >> (8*5)) & 0xFF) << (4*4)) |
782 			(parseHexDigit!config((chars >> (8*6)) & 0xFF) << (4*7)) |
783 			(parseHexDigit!config((chars >> (8*7)) & 0xFF) << (4*6));
784 		(cast(uint*)buf.ptr)[i] = res;
785 	}
786 	foreach (i; N/4*4..N)
787 		buf[i] = cast(ubyte)(
788 			parseHexDigit!config(hex[i*2  ])*16 +
789 			parseHexDigit!config(hex[i*2+1])
790 		);
791 }
792 
793 unittest
794 {
795 	foreach (checked; TypeTuple!(false, true))
796 		foreach (lower; TypeTuple!(false, true))
797 			foreach (upper; TypeTuple!(false, true))
798 				static if (lower || upper)
799 				{
800 					enum config = HexParseConfig(checked, lower, upper);
801 					char[18] buf;
802 					foreach (n; 0..18)
803 						if (lower && upper ? n & 1 : upper)
804 							buf[n] = hexDigits[n % 16];
805 						else
806 							buf[n] = lowerHexDigits[n % 16];
807 					ubyte[9] res;
808 					sarrayFromHex!config(buf, res);
809 					assert(res == [0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01], text(res));
810 				}
811 }
812 
813 template toHex(alias digits = hexDigits)
814 {
815 	char[] toHex(in ubyte[] data, char[] buf) pure
816 	{
817 		assert(buf.length == data.length*2);
818 		foreach (i, b; data)
819 		{
820 			buf[i*2  ] = digits[b>>4];
821 			buf[i*2+1] = digits[b&15];
822 		}
823 		return buf;
824 	}
825 
826 	char[n*2] toHex(size_t n)(in ubyte[n] data) pure
827 	{
828 		char[n*2] buf;
829 		foreach (i, b; data)
830 		{
831 			buf[i*2  ] = digits[b>>4];
832 			buf[i*2+1] = digits[b&15];
833 		}
834 		return buf;
835 	}
836 
837 	string toHex(in ubyte[] data) pure
838 	{
839 		auto buf = new char[data.length*2];
840 		foreach (i, b; data)
841 		{
842 			buf[i*2  ] = digits[b>>4];
843 			buf[i*2+1] = digits[b&15];
844 		}
845 		return buf;
846 	}
847 }
848 
849 alias toLowerHex = toHex!lowerHexDigits;
850 
851 void toHex(T : ulong, size_t U = T.sizeof*2)(T n, ref char[U] buf)
852 {
853 	Unqual!T x = n;
854 	foreach (i; Reverse!(RangeTuple!(T.sizeof*2)))
855 	{
856 		buf[i] = hexDigits[x & 0xF];
857 		x >>= 4;
858 	}
859 }
860 
861 unittest
862 {
863 	ubyte[] bytes = [0x12, 0x34];
864 	assert(toHex(bytes) == "1234");
865 }
866 
867 unittest
868 {
869 	ubyte[] bytes = [0x12, 0x34];
870 	char[] buf = new char[4];
871 	toHex(bytes, buf);
872 	assert(buf == "1234");
873 }
874 
875 unittest
876 {
877 	char[8] buf;
878 	toHex(0x01234567, buf);
879 	assert(buf == "01234567");
880 }
881 
882 char[T.sizeof*2] toHex(T : ulong)(T n)
883 {
884 	char[T.sizeof*2] buf;
885 	toHex(n, buf);
886 	return buf;
887 }
888 
889 unittest
890 {
891 	assert(toHex(0x01234567) == "01234567");
892 }
893 
894 unittest
895 {
896 	ubyte[2] bytes = [0x12, 0x34];
897 	auto buf = bytes.toLowerHex();
898 	static assert(buf.length == 4);
899 	assert(buf == "1234");
900 }
901 
902 /// How many significant decimal digits does a FP type have
903 /// (determined empirically - valid for all D FP types on x86/64)
904 enum significantDigits(T : real) = 2 + 2 * T.sizeof;
905 
906 /// Format string for a FP type which includes all necessary
907 /// significant digits
908 enum fpFormatString(T) = "%." ~ text(significantDigits!T) ~ "g";
909 template cWidthString(T)
910 {
911 	static if (is(Unqual!T == float))
912 		enum cWidthString = "";
913 	else
914 	static if (is(Unqual!T == double))
915 		enum cWidthString = "l";
916 	else
917 	static if (is(Unqual!T == real))
918 		enum cWidthString = "L";
919 }
920 enum fpCFormatString(T) = "%." ~ text(significantDigits!T) ~ cWidthString!T ~ "g";
921 
922 private auto safeSprintf(size_t N, Args...)(ref char[N] buf, auto ref Args args) @trusted @nogc
923 {
924 	return snprintf(buf.ptr, N, args);
925 }
926 
927 private auto fpToBuf(Q)(Q val) @safe nothrow @nogc
928 {
929 	alias F = Unqual!Q;
930 
931 	/// Bypass FPU register, which may contain a different precision
932 	static F forceType(F d) { static F n; n = d; return n; }
933 
934 	enum isReal = is(F == real);
935 
936 	StaticBuf!(char, 64) buf = void;
937 
938 	// MSVC workaround from std.format:
939 	version (CRuntime_Microsoft)
940 	{
941 		import std.math : isNaN, isInfinity;
942 		immutable double v = val; // convert early to get "inf" in case of overflow
943 		{
944 			string s;
945 			if (isNaN(v))
946 				s = "nan"; // snprintf writes 1.#QNAN
947 			else if (isInfinity(v))
948 				s = val >= 0 ? "inf" : "-inf"; // snprintf writes 1.#INF
949 			else
950 				goto L1;
951 			buf.buf[0..s.length] = s;
952 			buf.pos = s.length;
953 			return buf;
954 		L1:
955 		}
956 	}
957 	else
958 		alias v = val;
959 
960 	buf.pos = safeSprintf(buf.buf, &fpCFormatString!F[0], forceType(v));
961 	char[] s = buf.data();
962 
963 	F parse(char[] s)
964 	{
965 		F f;
966 		auto res = tryParse(s, f);
967 		assert(res, "Failed to parse number we created");
968 		assert(!s.length, "Failed to completely parse number we created");
969 		return f;
970 	}
971 
972 	if (s != "nan" && s != "-nan" && s != "inf" && s != "-inf")
973 	{
974 		if (forceType(parse(s)) != v)
975 		{
976 			static if (isReal)
977 			{
978 				// Something funny with DM libc real parsing... e.g. 0.6885036635121051783
979 				return buf;
980 			}
981 			else
982 			//	assert(false, "Initial conversion fails: " ~ format(fpFormatString!F, parse(s)) ~ " / " ~ s);
983 				assert(false, "Initial conversion fails");
984 		}
985 
986 		foreach_reverse (i; 1..s.length)
987 			if (s[i]>='0' && s[i]<='8')
988 			{
989 				s[i]++;
990 				if (forceType(parse(s[0..i+1]))==v)
991 					s = s[0..i+1];
992 				else
993 					s[i]--;
994 			}
995 		while (s.length>2 && s[$-1]!='.' && forceType(parse(s[0..$-1]))==v)
996 			s = s[0..$-1];
997 	}
998 	buf.pos = s.length;
999 	return buf;
1000 }
1001 
1002 void putFP(Writer, F)(auto ref Writer writer, F v)
1003 {
1004 	writer.put(fpToBuf(v).data);
1005 }
1006 
1007 
1008 /// Get shortest string representation of a FP type that still converts to exactly the same number.
1009 template fpToString(F)
1010 {
1011 	string fpToString(F v) @safe nothrow
1012 	{
1013 		return fpToBuf(v).data.idup;
1014 	}
1015 
1016 	static if (!is(Unqual!F == real))
1017 	unittest
1018 	{
1019 		union U
1020 		{
1021 			ubyte[F.sizeof] bytes;
1022 			Unqual!F d;
1023 			string toString() const { return (fpFormatString!F ~ " %a [%(%02X %)]").format(d, d, bytes[]); }
1024 		}
1025 		import std.random : Xorshift, uniform;
1026 		import std.stdio : stderr;
1027 		Xorshift rng;
1028 		foreach (n; 0..10000)
1029 		{
1030 			U u;
1031 			foreach (ref b; u.bytes[])
1032 				b = uniform!ubyte(rng);
1033 			static if (is(Unqual!F == real))
1034 				u.bytes[7] |= 0x80; // require normalized value
1035 			scope(failure) stderr.writeln("Input:\t", u);
1036 			auto s = fpToString(u.d);
1037 			scope(failure) stderr.writeln("Result:\t", s);
1038 			if (s == "nan" || s == "-nan")
1039 				continue; // there are many NaNs...
1040 			U r;
1041 			r.d = to!F(s);
1042 			assert(r.bytes == u.bytes,
1043 				"fpToString mismatch:\nOutput:\t%s".format(r));
1044 		}
1045 	}
1046 }
1047 
1048 alias doubleToString = fpToString!double;
1049 
1050 unittest
1051 {
1052 	alias floatToString = fpToString!float;
1053 	alias realToString = fpToString!real;
1054 	alias crealToString = fpToString!(const(real));
1055 }
1056 
1057 /// Wraps the result of a fpToString in a non-allocating stringifiable struct.
1058 struct FPAsString(T)
1059 {
1060 	typeof(fpToBuf(T.init)) buf;
1061 
1062 	this(T f)
1063 	{
1064 		buf = fpToBuf(f);
1065 	}
1066 
1067 	string toString() const pure nothrow
1068 	{
1069 		return buf.data.idup;
1070 	}
1071 
1072 	void toString(W)(ref W w) const
1073 	{
1074 		static if (is(typeof(w.put(buf.data))))
1075 			w.put(buf.data);
1076 		else
1077 			foreach (c; buf.data)
1078 				w.put(c);
1079 	}
1080 }
1081 FPAsString!T fpAsString(T)(T f) { return FPAsString!T(f); } /// ditto
1082 
1083 @safe //nothrow @nogc
1084 unittest
1085 {
1086 	StaticBuf!(char, 1024) buf;
1087 	buf.formattedWrite!"%s"(fpAsString(0.1));
1088 	assert(buf.data == "0.1");
1089 }
1090 
1091 string numberToString(T)(T v)
1092 	if (isNumeric!T)
1093 {
1094 	static if (is(T : ulong))
1095 		return toDec(v);
1096 	else
1097 		return fpToString(v);
1098 }
1099 
1100 // ************************************************************************
1101 
1102 /// Simpler implementation of Levenshtein string distance
1103 int stringDistance(string s, string t)
1104 {
1105 	int n = cast(int)s.length;
1106 	int m = cast(int)t.length;
1107 	if (n == 0) return m;
1108 	if (m == 0) return n;
1109 	int[][] distance = new int[][](n+1, m+1); // matrix
1110 	int cost=0;
1111 	//init1
1112 	foreach (i; 0..n+1) distance[i][0]=i;
1113 	foreach (j; 0..m+1) distance[0][j]=j;
1114 	//find min distance
1115 	foreach (i; 1..n+1)
1116 		foreach (j; 1..m+1)
1117 		{
1118 			cost = t[j-1] == s[i-1] ? 0 : 1;
1119 			distance[i][j] = min(
1120 				distance[i-1][j  ] + 1,
1121 				distance[i  ][j-1] + 1,
1122 				distance[i-1][j-1] + cost
1123 			);
1124 		}
1125 	return distance[n][m];
1126 }
1127 
1128 /// Return a number between 0.0 and 1.0 indicating how similar two strings are
1129 /// (1.0 if identical)
1130 float stringSimilarity(string string1, string string2)
1131 {
1132 	float dis = stringDistance(string1, string2);
1133 	float maxLen = string1.length;
1134 	if (maxLen < string2.length)
1135 		maxLen = string2.length;
1136 	if (maxLen == 0)
1137 		return 1;
1138 	else
1139 		return 1f - dis/maxLen;
1140 }
1141 
1142 /// Select best match from a list of items.
1143 /// Returns -1 if none are above the threshold.
1144 sizediff_t findBestMatch(in string[] items, string target, float threshold = 0.7)
1145 {
1146 	sizediff_t found = -1;
1147 	float best = 0;
1148 
1149 	foreach (i, item; items)
1150 	{
1151 		float match = stringSimilarity(toLower(item),toLower(target));
1152 		if (match>threshold && match>=best)
1153 		{
1154 			best = match;
1155 			found = i;
1156 		}
1157 	}
1158 
1159 	return found;
1160 }
1161 
1162 /// Select best match from a list of items.
1163 /// Returns null if none are above the threshold.
1164 string selectBestFrom(in string[] items, string target, float threshold = 0.7)
1165 {
1166 	auto index = findBestMatch(items, target, threshold);
1167 	return index < 0 ? null : items[index];
1168 }
1169 
1170 // ************************************************************************
1171 
1172 string randomString()(int length=20, string chars="abcdefghijklmnopqrstuvwxyz")
1173 {
1174 	import std.random;
1175 	import std.range;
1176 
1177 	return length.iota.map!(n => chars[uniform(0, $)]).array;
1178 }