ae.utils.text source code

1 /**
2  * Utility code related to string and text processing.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.utils.text;
15 
16 import std.algorithm;
17 import std.ascii;
18 import std.exception;
19 import std.conv;
20 import std.format;
21 import std.range.primitives;
22 import std.string;
23 import std.traits;
24 import std.typetuple;
25 
26 import core.stdc.stdio : snprintf, sscanf;
27 import core.stdc.string;
28 
29 import ae.utils.array;
30 import ae.utils.meta;
31 import ae.utils.text.parsefp;
32 import ae.utils.textout;
33 
34 alias indexOf = std..string.indexOf;
35 
36 public import ae.utils.text.ascii : ascii, DecimalSize, toDec, toDecFixed, asciiToLower, asciiToUpper;
37 public import ae.utils.array : contains;
38 
39 // ************************************************************************
40 
41 /// CTFE helper
42 string formatAs(T)(auto ref T obj, string fmt)
43 {
44 	return format(fmt, obj);
45 }
46 
47 /// Lazily formatted object
48 auto formatted(string fmt, T...)(auto ref T values)
49 {
50 	static struct Formatted
51 	{
52 		T values;
53 
54 		void toString(void delegate(const(char)[]) sink) const
55 		{
56 			sink.formattedWrite!fmt(values);
57 		}
58 
59 		void toString(W)(ref W writer) const
60 		if (isOutputRange!(W, char))
61 		{
62 			writer.formattedWrite!fmt(values);
63 		}
64 	}
65 	return Formatted(values);
66 }
67 
68 unittest
69 {
70 	assert(format!"%s%s%s"("<", formatted!"%x"(64), ">") == "<40>");
71 }
72 
73 // ************************************************************************
74 
75 /// Consume a LF or CRLF terminated line from s.
76 /// Sets s to null and returns the remainder
77 /// if there is no line terminator in s.
78 T[] eatLine(T)(ref T[] s, bool eatIncompleteLines = true)
79 {
80 	return s.skipUntil([T('\n')], eatIncompleteLines).chomp();
81 }
82 
83 deprecated template eatLine(OnEof onEof)
84 {
85 	T[] eatLine(T)(ref T[] s)
86 	{
87 		return s.eatUntil!onEof([T('\n')]).chomp();
88 	}
89 }
90 
91 unittest
92 {
93 	string s = "Hello\nworld";
94 	assert(s.eatLine() == "Hello");
95 	assert(s.eatLine() == "world");
96 	assert(s is null);
97 	assert(s.eatLine() is null);
98 }
99 
100 // Uses memchr (not Boyer-Moore), best for short strings.
101 T[] fastReplace(T)(T[] what, T[] from, T[] to)
102 	if (T.sizeof == 1) // TODO (uses memchr)
103 {
104 	alias Unqual!T U;
105 
106 //	debug scope(failure) std.stdio.writeln("fastReplace crashed: ", [what, from, to]);
107 	enum RAM = cast(U*)null;
108 
109 	if (what.length < from.length || from.length==0)
110 		return what;
111 
112 	if (from.length==1)
113 	{
114 		auto fromc = from[0];
115 		if (to.length==1)
116 		{
117 			auto p = cast(T*)memchr(what.ptr, fromc, what.length);
118 			if (!p)
119 				return what;
120 
121 			T[] result = what.dup;
122 			auto delta = result.ptr - what.ptr;
123 			auto toChar = to[0];
124 			auto end = what.ptr + what.length;
125 			do
126 			{
127 				(cast(U*)p)[delta] = toChar; // zomg hax lol
128 				p++;
129 				p = cast(T*)memchr(p, fromc, end - p);
130 			} while (p);
131 			return result;
132 		}
133 		else
134 		{
135 			auto p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
136 			if (!p)
137 				return what;
138 
139 			auto sb = StringBuilder(what.length);
140 			do
141 			{
142 				sb.put(what[0..p-what.ptr], to);
143 				what = what[p-what.ptr+1..$];
144 				p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
145 			}
146 			while (p);
147 
148 			sb.put(what);
149 			return sb.get();
150 		}
151 	}
152 
153 	auto head = from[0];
154 	auto tail = from[1..$];
155 
156 	auto p = cast(T*)what.ptr;
157 	auto end = p + what.length - tail.length;
158 	p = cast(T*)memchr(p, head, end-p);
159 	while (p)
160 	{
161 		p++;
162 		if (p[0..tail.length] == tail)
163 		{
164 			if (from.length == to.length)
165 			{
166 				T[] result = what.dup;
167 				auto deltaMinusOne = (result.ptr - what.ptr) - 1;
168 
169 				goto replaceA;
170 			dummyA: // compiler complains
171 
172 				do
173 				{
174 					p++;
175 					if (p[0..tail.length] == tail)
176 					{
177 					replaceA:
178 						(cast(U*)p+deltaMinusOne)[0..to.length] = to[];
179 					}
180 					p = cast(T*)memchr(p, head, end-p);
181 				}
182 				while (p);
183 
184 				return result;
185 			}
186 			else
187 			{
188 				auto start = cast(T*)what.ptr;
189 				auto sb = StringBuilder(what.length);
190 				goto replaceB;
191 			dummyB: // compiler complains
192 
193 				do
194 				{
195 					p++;
196 					if (p[0..tail.length] == tail)
197 					{
198 					replaceB:
199 						sb.put(RAM[cast(size_t)start .. cast(size_t)p-1], to);
200 						start = p + tail.length;
201 						what = what[start-what.ptr..$];
202 					}
203 					else
204 					{
205 						what = what[p-what.ptr..$];
206 					}
207 					p = cast(T*)memchr(what.ptr, head, what.length);
208 				}
209 				while (p);
210 
211 				//sb.put(what);
212 				sb.put(RAM[cast(size_t)start..cast(size_t)(what.ptr+what.length)]);
213 				return sb.get();
214 			}
215 
216 			assert(0);
217 		}
218 		p = cast(T*)memchr(p, head, end-p);
219 	}
220 
221 	return what;
222 }
223 
224 unittest
225 {
226 	import std.array;
227 	void test(string haystack, string from, string to)
228 	{
229 		auto description = `("` ~ haystack ~ `", "` ~ from ~ `", "` ~ to ~ `")`;
230 
231 		auto r1 = fastReplace(haystack, from, to);
232 		auto r2 =     replace(haystack, from, to);
233 		assert(r1 == r2, `Bad replace: ` ~ description ~ ` == "` ~ r1 ~ `"`);
234 
235 		if (r1 == haystack)
236 			assert(r1 is haystack, `Pointless reallocation: ` ~ description);
237 	}
238 
239 	test("Mary had a little lamb", "a", "b");
240 	test("Mary had a little lamb", "a", "aaa");
241 	test("Mary had a little lamb", "Mary", "Lucy");
242 	test("Mary had a little lamb", "Mary", "Jimmy");
243 	test("Mary had a little lamb", "lamb", "goat");
244 	test("Mary had a little lamb", "lamb", "sheep");
245 	test("Mary had a little lamb", " l", " x");
246 	test("Mary had a little lamb", " l", " xx");
247 
248 	test("Mary had a little lamb", "X" , "Y" );
249 	test("Mary had a little lamb", "XX", "Y" );
250 	test("Mary had a little lamb", "X" , "YY");
251 	test("Mary had a little lamb", "XX", "YY");
252 	test("Mary had a little lamb", "aX", "Y" );
253 	test("Mary had a little lamb", "aX", "YY");
254 
255 	test("foo", "foobar", "bar");
256 }
257 
258 T[][] fastSplit(T, U)(T[] s, U d)
259 	if (is(Unqual!T == Unqual!U))
260 {
261 	if (!s.length)
262 		return null;
263 
264 	auto p = cast(T*)memchr(s.ptr, d, s.length);
265 	if (!p)
266 		return [s];
267 
268 	size_t n;
269 	auto end = s.ptr + s.length;
270 	do
271 	{
272 		n++;
273 		p++;
274 		p = cast(T*) memchr(p, d, end-p);
275 	}
276 	while (p);
277 
278 	auto result = new T[][n+1];
279 	n = 0;
280 	auto start = s.ptr;
281 	p = cast(T*) memchr(start, d, s.length);
282 	do
283 	{
284 		result[n++] = start[0..p-start];
285 		start = ++p;
286 		p = cast(T*) memchr(p, d, end-p);
287 	}
288 	while (p);
289 	result[n] = start[0..end-start];
290 
291 	return result;
292 }
293 
294 T[][] splitAsciiLines(T)(T[] text)
295 	if (is(Unqual!T == char))
296 {
297 	auto lines = text.fastSplit('\n');
298 	foreach (ref line; lines)
299 		if (line.length && line[$-1]=='\r')
300 			line = line[0..$-1];
301 	return lines;
302 }
303 
304 unittest
305 {
306 	assert(splitAsciiLines("a\nb\r\nc\r\rd\n\re\r\n\nf") == ["a", "b", "c\r\rd", "\re", "", "f"]);
307 	assert(splitAsciiLines(string.init) == splitLines(string.init));
308 }
309 
310 /// Like std.string.split (one argument version, which splits by
311 /// whitespace), but only splits by ASCII and does not autodecode.
312 T[][] asciiSplit(T)(T[] text)
313 	if (is(Unqual!T == char))
314 {
315 	bool inWhitespace = true;
316 	size_t wordStart;
317 	T[][] result;
318 
319 	void endWord(size_t p)
320 	{
321 		if (!inWhitespace)
322 		{
323 			result ~= text[wordStart..p];
324 			inWhitespace = true;
325 		}
326 	}
327 
328 	foreach (p, c; text)
329 		if (std.ascii.isWhite(c))
330 			endWord(p);
331 		else
332 			if (inWhitespace)
333 			{
334 				inWhitespace = false;
335 				wordStart = p;
336 			}
337 	endWord(text.length);
338 	return result;
339 }
340 
341 unittest
342 {
343 	foreach (s; ["", " ", "a", " a", "a ", "a b", " a b", "a b ", " a b ",
344 			"  ", "  a", "a  ", "a  b", "a  b  ", "a b  c"])
345 		assert(s.split == s.asciiSplit, format("Got %s, expected %s", s.asciiSplit, s.split));
346 }
347 
348 T[] asciiStrip(T)(T[] s)
349 	if (is(Unqual!T == char))
350 {
351 	while (s.length && isWhite(s[0]))
352 		s = s[1..$];
353 	while (s.length && isWhite(s[$-1]))
354 		s = s[0..$-1];
355 	return s;
356 }
357 
358 unittest
359 {
360 	string s = "Hello, world!";
361 	assert(asciiStrip(s) is s);
362 	assert(asciiStrip("\r\n\tHello ".dup) == "Hello");
363 }
364 
365 /// Covering slice-list of s with interleaved whitespace.
366 T[][] segmentByWhitespace(T)(T[] s)
367 	if (is(Unqual!T == char))
368 {
369 	if (!s.length)
370 		return null;
371 
372 	T[][] segments;
373 	bool wasWhite = isWhite(s[0]);
374 	size_t start = 0;
375 	foreach (p, char c; s)
376 	{
377 		bool isWhite = isWhite(c);
378 		if (isWhite != wasWhite)
379 			segments ~= s[start..p],
380 			start = p;
381 		wasWhite = isWhite;
382 	}
383 	segments ~= s[start..$];
384 
385 	return segments;
386 }
387 
388 T[] newlinesToSpaces(T)(T[] s)
389 	if (is(Unqual!T == char))
390 {
391 	auto slices = segmentByWhitespace(s);
392 	foreach (ref slice; slices)
393 		if (slice.contains("\n"))
394 			slice = " ";
395 	return slices.join();
396 }
397 
398 ascii normalizeWhitespace(ascii s)
399 {
400 	auto slices = segmentByWhitespace(strip(s));
401 	foreach (i, ref slice; slices)
402 		if (i & 1) // odd
403 			slice = " ";
404 	return slices.join();
405 }
406 
407 unittest
408 {
409 	assert(normalizeWhitespace(" Mary  had\ta\nlittle\r\n\tlamb") == "Mary had a little lamb");
410 }
411 
412 string[] splitByCamelCase(string s)
413 {
414 	string[] result;
415 	size_t start = 0;
416 	foreach (i; 1..s.length+1)
417 		if (i == s.length
418 		 || (isLower(s[i-1]) && isUpper(s[i]))
419 		 || (i+1 < s.length && isUpper(s[i-1]) && isUpper(s[i]) && isLower(s[i+1]))
420 		)
421 		{
422 			result ~= s[start..i];
423 			start = i;
424 		}
425 	return result;
426 }
427 
428 unittest
429 {
430 	assert(splitByCamelCase("parseIPString") == ["parse", "IP", "String"]);
431 	assert(splitByCamelCase("IPString") == ["IP", "String"]);
432 }
433 
434 string camelCaseJoin(string[] arr)
435 {
436 	if (!arr.length)
437 		return null;
438 	string result = arr[0];
439 	foreach (s; arr[1..$])
440 		result ~= std.ascii.toUpper(s[0]) ~ s[1..$];
441 	return result;
442 }
443 
444 unittest
445 {
446 	assert("parse-IP-string".split('-').camelCaseJoin() == "parseIPString");
447 }
448 
449 // ************************************************************************
450 
451 /// Like std.string.wrap, but preserves whitespace at line start and
452 /// between (non-wrapped) words.
453 string verbatimWrap(
454 	string s,
455 	size_t columns = 80,
456 	string firstIndent = null,
457 	string indent = null,
458 	size_t tabWidth = 8,
459 )
460 {
461 	if (!s.length)
462 		return s;
463 
464 	import std.uni : isWhite;
465 	import std.range;
466 
467 	// Result buffer. Append-only (contains only text which has been wrapped).
468 	string result;
469 	// Index in `s` corresponding to the end of `result`
470 	size_t start;
471 	// Index in `s` corresponding to after the last newline in `result`
472 	size_t lineStart;
473 	// Current column
474 	size_t col;
475 	// Was the previous character we looked at whitespace?
476 	bool wasWhite;
477 	// We need to add an indent at the next (non-newline) character.
478 	bool needIndent;
479 
480 	result = firstIndent;
481 	col = firstIndent.walkLength;
482 	auto indentWidth = indent.walkLength;
483 
484 	void flush(size_t pos)
485 	{
486 		if (col > columns && start > lineStart)
487 		{
488 			result ~= "\n" ~ indent;
489 			col = indentWidth;
490 
491 			// Consume whitespace at line break
492 			size_t numWhite;
493 			foreach (i, c; s[start .. $])
494 				if (isWhite(c))
495 					numWhite = i;
496 				else
497 					break;
498 			start += numWhite;
499 			lineStart = start;
500 		}
501 		result ~= s[start .. pos];
502 		start = pos;
503 	}
504 
505 	foreach (pos, dchar c; s)
506 	{
507 		auto atWhite = isWhite(c);
508 		if (atWhite && !wasWhite)
509 			flush(pos);
510 		if (c == '\n')
511 		{
512 			flush(pos);
513 			result ~= "\n";
514 			start++; // past newline
515 			lineStart = start;
516 			needIndent = true;
517 			col = 0;
518 		}
519 		else
520 		{
521 			if (needIndent)
522 			{
523 				assert(col == 0);
524 				result ~= indent;
525 				col += indentWidth;
526 				needIndent = false;
527 			}
528 			if (c == '\t')
529 				col += tabWidth;
530 			else
531 				col++;
532 		}
533 		wasWhite = atWhite;
534 	}
535 	flush(s.length);
536 	if (col)
537 		result ~= "\n"; // trailing newline
538 
539 	return result;
540 }
541 
542 // ************************************************************************
543 
544 /// Case-insensitive ASCII string.
545 alias CIAsciiString = NormalizedArray!(immutable(char), s => s.byCodeUnit.map!(std.ascii.toLower));
546 
547 ///
548 unittest
549 {
550 	CIAsciiString s = "test";
551 	assert(s == "TEST");
552 	assert(s >= "Test" && s <= "Test");
553 	assert(CIAsciiString("a") == CIAsciiString("A"));
554 	assert(CIAsciiString("a") != CIAsciiString("B"));
555 	assert(CIAsciiString("a") <  CIAsciiString("B"));
556 	assert(CIAsciiString("A") <  CIAsciiString("b"));
557 	assert(CIAsciiString("я") != CIAsciiString("Я"));
558 }
559 
560 /// Case-insensitive Unicode string.
561 alias CIUniString = NormalizedArray!(immutable(char), s => s.map!(std.uni.toLower));
562 
563 ///
564 unittest
565 {
566 	CIUniString s = "привет";
567 	assert(s == "ПРИВЕТ");
568 	assert(s >= "Привет" && s <= "Привет");
569 	assert(CIUniString("я") == CIUniString("Я"));
570 	assert(CIUniString("а") != CIUniString("Б"));
571 	assert(CIUniString("а") <  CIUniString("Б"));
572 	assert(CIUniString("А") <  CIUniString("б"));
573 }
574 
575 // ************************************************************************
576 
577 import std.utf;
578 
579 /// Convert any data to a valid UTF-8 bytestream, so D's string functions can
580 /// properly work on it.
581 string rawToUTF8(in char[] s)
582 {
583 	auto d = new dchar[s.length];
584 	foreach (i, char c; s)
585 		d[i] = c;
586 	return toUTF8(d);
587 }
588 
589 /// Undo rawToUTF8.
590 ascii UTF8ToRaw(in char[] r) pure
591 {
592 	auto s = new char[r.length];
593 	size_t i = 0;
594 	foreach (dchar c; r)
595 	{
596 		assert(c < '\u0100');
597 		s[i++] = cast(char)c;
598 	}
599 	return s[0..i];
600 }
601 
602 unittest
603 {
604 	char[1] c;
605 	for (int i=0; i<256; i++)
606 	{
607 		c[0] = cast(char)i;
608 		assert(UTF8ToRaw(rawToUTF8(c[])) == c[], format("%s -> %s -> %s", cast(ubyte[])c[], cast(ubyte[])rawToUTF8(c[]), cast(ubyte[])UTF8ToRaw(rawToUTF8(c[]))));
609 	}
610 }
611 
612 /// Where a delegate with this signature is required.
613 string nullStringTransform(in char[] s) { return to!string(s); }
614 
615 string forceValidUTF8(string s)
616 {
617 	try
618 	{
619 		validate(s);
620 		return s;
621 	}
622 	catch (UTFException)
623 		return rawToUTF8(s);
624 }
625 
626 // ************************************************************************
627 
628 /// Return the slice up to the first NUL character,
629 /// or of the whole array if none is found.
630 C[] fromZArray(C, n)(ref C[n] arr)
631 {
632 	auto p = arr.representation.countUntil(0);
633 	return arr[0 .. p<0 ? $ : p];
634 }
635 
636 /// ditto
637 C[] fromZArray(C)(C[] arr)
638 {
639 	auto p = arr.representation.countUntil(0);
640 	return arr[0 .. p<0 ? $ : p];
641 }
642 
643 unittest
644 {
645 	char[4] arr = "ab\0d";
646 	assert(arr.fromZArray == "ab");
647 	arr[] = "abcd";
648 	assert(arr.fromZArray == "abcd");
649 }
650 
651 unittest
652 {
653 	string arr = "ab\0d";
654 	assert(arr.fromZArray == "ab");
655 	arr = "abcd";
656 	assert(arr.fromZArray == "abcd");
657 }
658 
659 // ************************************************************************
660 
661 /// Formats binary data as a hex dump (three-column layout consisting of hex
662 /// offset, byte values in hex, and printable low-ASCII characters).
663 string hexDump(const(void)[] b)
664 {
665 	auto data = cast(const(ubyte)[]) b;
666 	assert(data.length);
667 	size_t i=0;
668 	string s;
669 	while (i<data.length)
670 	{
671 		s ~= format("%08X:  ", i);
672 		foreach (x; 0..16)
673 		{
674 			if (i+x<data.length)
675 				s ~= format("%02X ", data[i+x]);
676 			else
677 				s ~= "   ";
678 			if (x==7)
679 				s ~= "| ";
680 		}
681 		s ~= "  ";
682 		foreach (x; 0..16)
683 		{
684 			if (i+x<data.length)
685 				if (data[i+x]==0)
686 					s ~= ' ';
687 				else
688 				if (data[i+x]<32 || data[i+x]>=128)
689 					s ~= '.';
690 				else
691 					s ~= cast(char)data[i+x];
692 			else
693 				s ~= ' ';
694 		}
695 		s ~= "\n";
696 		i += 16;
697 	}
698 	return s;
699 }
700 
701 import std.conv;
702 
703 T fromHex(T : ulong = uint, C)(const(C)[] s)
704 {
705 	T result = parse!T(s, 16);
706 	enforce(s.length==0, new ConvException("Could not parse entire string"));
707 	return result;
708 }
709 
710 ubyte[] arrayFromHex(in char[] hex)
711 {
712 	auto buf = new ubyte[hex.length/2];
713 	arrayFromHex(hex, buf);
714 	return buf;
715 }
716 
717 struct HexParseConfig
718 {
719 	bool checked = true;
720 	bool lower = true;
721 	bool upper = true;
722 }
723 
724 ubyte parseHexDigit(HexParseConfig config = HexParseConfig.init)(char c)
725 {
726 	static assert(config.lower || config.upper,
727 		"Must parse at least either lower or upper case digits");
728 	static if (config.checked)
729 	{
730 		switch (c)
731 		{
732 			case '0': .. case '9': return cast(ubyte)(c - '0');
733 			case 'a': .. case 'f': return cast(ubyte)(c - 'a' + 10);
734 			case 'A': .. case 'F': return cast(ubyte)(c - 'A' + 10);
735 			default: throw new Exception("Bad hex digit: " ~ c);
736 		}
737 	}
738 	else
739 	{
740 		if (c <= '9')
741 			return cast(ubyte)(c - '0');
742 		static if (config.lower && config.upper)
743 		{
744 			if (c < 'a')
745 				return cast(ubyte)(c - 'A' + 10);
746 			else
747 				return cast(ubyte)(c - 'a' + 10);
748 		}
749 		else
750 			static if (config.lower)
751 				return cast(ubyte)(c - 'a' + 10);
752 			else
753 				return cast(ubyte)(c - 'A' + 10);
754 	}
755 }
756 
757 void arrayFromHex(HexParseConfig config = HexParseConfig.init)(in char[] hex, ubyte[] buf)
758 {
759 	assert(buf.length == hex.length/2, "Wrong buffer size for arrayFromHex");
760 	for (int i=0; i<hex.length; i+=2)
761 		buf[i/2] = cast(ubyte)(
762 			parseHexDigit!config(hex[i  ])*16 +
763 			parseHexDigit!config(hex[i+1])
764 		);
765 }
766 
767 /// Fast version for static arrays of known length.
768 void sarrayFromHex(HexParseConfig config = HexParseConfig.init, size_t N, Hex)(in ref Hex hex, ref ubyte[N] buf)
769 if (is(Hex == char[N*2]))
770 {
771 	foreach (i; 0..N/4)
772 	{
773 		ulong chars = (cast(ulong*)hex.ptr)[i];
774 		uint res =
775 			(parseHexDigit!config((chars >> (8*0)) & 0xFF) << (4*1)) |
776 			(parseHexDigit!config((chars >> (8*1)) & 0xFF) << (4*0)) |
777 			(parseHexDigit!config((chars >> (8*2)) & 0xFF) << (4*3)) |
778 			(parseHexDigit!config((chars >> (8*3)) & 0xFF) << (4*2)) |
779 			(parseHexDigit!config((chars >> (8*4)) & 0xFF) << (4*5)) |
780 			(parseHexDigit!config((chars >> (8*5)) & 0xFF) << (4*4)) |
781 			(parseHexDigit!config((chars >> (8*6)) & 0xFF) << (4*7)) |
782 			(parseHexDigit!config((chars >> (8*7)) & 0xFF) << (4*6));
783 		(cast(uint*)buf.ptr)[i] = res;
784 	}
785 	foreach (i; N/4*4..N)
786 		buf[i] = cast(ubyte)(
787 			parseHexDigit!config(hex[i*2  ])*16 +
788 			parseHexDigit!config(hex[i*2+1])
789 		);
790 }
791 
792 unittest
793 {
794 	foreach (checked; TypeTuple!(false, true))
795 		foreach (lower; TypeTuple!(false, true))
796 			foreach (upper; TypeTuple!(false, true))
797 				static if (lower || upper)
798 				{
799 					enum config = HexParseConfig(checked, lower, upper);
800 					char[18] buf;
801 					foreach (n; 0..18)
802 						if (lower && upper ? n & 1 : upper)
803 							buf[n] = hexDigits[n % 16];
804 						else
805 							buf[n] = lowerHexDigits[n % 16];
806 					ubyte[9] res;
807 					sarrayFromHex!config(buf, res);
808 					assert(res == [0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01], text(res));
809 				}
810 }
811 
812 template toHex(alias digits = hexDigits)
813 {
814 	char[] toHex(in ubyte[] data, char[] buf) pure
815 	{
816 		assert(buf.length == data.length*2);
817 		foreach (i, b; data)
818 		{
819 			buf[i*2  ] = digits[b>>4];
820 			buf[i*2+1] = digits[b&15];
821 		}
822 		return buf;
823 	}
824 
825 	char[n*2] toHex(size_t n)(in ubyte[n] data) pure
826 	{
827 		char[n*2] buf;
828 		foreach (i, b; data)
829 		{
830 			buf[i*2  ] = digits[b>>4];
831 			buf[i*2+1] = digits[b&15];
832 		}
833 		return buf;
834 	}
835 
836 	string toHex(in ubyte[] data) pure
837 	{
838 		auto buf = new char[data.length*2];
839 		foreach (i, b; data)
840 		{
841 			buf[i*2  ] = digits[b>>4];
842 			buf[i*2+1] = digits[b&15];
843 		}
844 		return buf;
845 	}
846 }
847 
848 alias toLowerHex = toHex!lowerHexDigits;
849 
850 void toHex(T : ulong, size_t U = T.sizeof*2)(T n, ref char[U] buf)
851 {
852 	Unqual!T x = n;
853 	foreach (i; Reverse!(RangeTuple!(T.sizeof*2)))
854 	{
855 		buf[i] = hexDigits[x & 0xF];
856 		x >>= 4;
857 	}
858 }
859 
860 unittest
861 {
862 	ubyte[] bytes = [0x12, 0x34];
863 	assert(toHex(bytes) == "1234");
864 }
865 
866 unittest
867 {
868 	ubyte[] bytes = [0x12, 0x34];
869 	char[] buf = new char[4];
870 	toHex(bytes, buf);
871 	assert(buf == "1234");
872 }
873 
874 unittest
875 {
876 	char[8] buf;
877 	toHex(0x01234567, buf);
878 	assert(buf == "01234567");
879 }
880 
881 char[T.sizeof*2] toHex(T : ulong)(T n)
882 {
883 	char[T.sizeof*2] buf;
884 	toHex(n, buf);
885 	return buf;
886 }
887 
888 unittest
889 {
890 	assert(toHex(0x01234567) == "01234567");
891 }
892 
893 unittest
894 {
895 	ubyte[2] bytes = [0x12, 0x34];
896 	auto buf = bytes.toLowerHex();
897 	static assert(buf.length == 4);
898 	assert(buf == "1234");
899 }
900 
901 /// How many significant decimal digits does a FP type have
902 /// (determined empirically - valid for all D FP types on x86/64)
903 enum significantDigits(T : real) = 2 + 2 * T.sizeof;
904 
905 /// Format string for a FP type which includes all necessary
906 /// significant digits
907 enum fpFormatString(T) = "%." ~ text(significantDigits!T) ~ "g";
908 template cWidthString(T)
909 {
910 	static if (is(Unqual!T == float))
911 		enum cWidthString = "";
912 	else
913 	static if (is(Unqual!T == double))
914 		enum cWidthString = "l";
915 	else
916 	static if (is(Unqual!T == real))
917 		enum cWidthString = "L";
918 }
919 enum fpCFormatString(T) = "%." ~ text(significantDigits!T) ~ cWidthString!T ~ "g";
920 
921 private auto safeSprintf(size_t N, Args...)(ref char[N] buf, auto ref Args args) @trusted @nogc
922 {
923 	return snprintf(buf.ptr, N, args);
924 }
925 
926 private auto fpToBuf(Q)(Q val) @safe nothrow @nogc
927 {
928 	alias F = Unqual!Q;
929 
930 	/// Bypass FPU register, which may contain a different precision
931 	static F forceType(F d) { static F n; n = d; return n; }
932 
933 	enum isReal = is(F == real);
934 
935 	StaticBuf!(char, 64) buf = void;
936 
937 	// MSVC workaround from std.format:
938 	version (CRuntime_Microsoft)
939 	{
940 		import std.math : isNaN, isInfinity;
941 		immutable double v = val; // convert early to get "inf" in case of overflow
942 		{
943 			string s;
944 			if (isNaN(v))
945 				s = "nan"; // snprintf writes 1.#QNAN
946 			else if (isInfinity(v))
947 				s = val >= 0 ? "inf" : "-inf"; // snprintf writes 1.#INF
948 			else
949 				goto L1;
950 			buf.buf[0..s.length] = s;
951 			buf.pos = s.length;
952 			return buf;
953 		L1:
954 		}
955 	}
956 	else
957 		alias v = val;
958 
959 	buf.pos = safeSprintf(buf.buf, &fpCFormatString!F[0], forceType(v));
960 	char[] s = buf.data();
961 
962 	F parse(char[] s)
963 	{
964 		F f;
965 		auto res = tryParse(s, f);
966 		assert(res, "Failed to parse number we created");
967 		assert(!s.length, "Failed to completely parse number we created");
968 		return f;
969 	}
970 
971 	if (s != "nan" && s != "-nan" && s != "inf" && s != "-inf")
972 	{
973 		if (forceType(parse(s)) != v)
974 		{
975 			static if (isReal)
976 			{
977 				// Something funny with DM libc real parsing... e.g. 0.6885036635121051783
978 				return buf;
979 			}
980 			else
981 			//	assert(false, "Initial conversion fails: " ~ format(fpFormatString!F, parse(s)) ~ " / " ~ s);
982 				assert(false, "Initial conversion fails");
983 		}
984 
985 		foreach_reverse (i; 1..s.length)
986 			if (s[i]>='0' && s[i]<='8')
987 			{
988 				s[i]++;
989 				if (forceType(parse(s[0..i+1]))==v)
990 					s = s[0..i+1];
991 				else
992 					s[i]--;
993 			}
994 		while (s.length>2 && s[$-1]!='.' && forceType(parse(s[0..$-1]))==v)
995 			s = s[0..$-1];
996 	}
997 	buf.pos = s.length;
998 	return buf;
999 }
1000 
1001 void putFP(Writer, F)(auto ref Writer writer, F v)
1002 {
1003 	writer.put(fpToBuf(v).data);
1004 }
1005 
1006 
1007 /// Get shortest string representation of a FP type that still converts to exactly the same number.
1008 template fpToString(F)
1009 {
1010 	string fpToString(F v) @safe nothrow
1011 	{
1012 		return fpToBuf(v).data.idup;
1013 	}
1014 
1015 	static if (!is(Unqual!F == real))
1016 	unittest
1017 	{
1018 		union U
1019 		{
1020 			ubyte[F.sizeof] bytes;
1021 			Unqual!F d;
1022 			string toString() const { return (fpFormatString!F ~ " %a [%(%02X %)]").format(d, d, bytes[]); }
1023 		}
1024 		import std.random : Xorshift, uniform;
1025 		import std.stdio : stderr;
1026 		Xorshift rng;
1027 		foreach (n; 0..10000)
1028 		{
1029 			U u;
1030 			foreach (ref b; u.bytes[])
1031 				b = uniform!ubyte(rng);
1032 			static if (is(Unqual!F == real))
1033 				u.bytes[7] |= 0x80; // require normalized value
1034 			scope(failure) stderr.writeln("Input:\t", u);
1035 			auto s = fpToString(u.d);
1036 			scope(failure) stderr.writeln("Result:\t", s);
1037 			if (s == "nan" || s == "-nan")
1038 				continue; // there are many NaNs...
1039 			U r;
1040 			r.d = to!F(s);
1041 			assert(r.bytes == u.bytes,
1042 				"fpToString mismatch:\nOutput:\t%s".format(r));
1043 		}
1044 	}
1045 }
1046 
1047 alias doubleToString = fpToString!double;
1048 
1049 unittest
1050 {
1051 	alias floatToString = fpToString!float;
1052 	alias realToString = fpToString!real;
1053 	alias crealToString = fpToString!(const(real));
1054 }
1055 
1056 /// Wraps the result of a fpToString in a non-allocating stringifiable struct.
1057 struct FPAsString(T)
1058 {
1059 	typeof(fpToBuf(T.init)) buf;
1060 
1061 	this(T f)
1062 	{
1063 		buf = fpToBuf(f);
1064 	}
1065 
1066 	string toString() const pure nothrow
1067 	{
1068 		return buf.data.idup;
1069 	}
1070 
1071 	void toString(W)(ref W w) const
1072 	{
1073 		static if (is(typeof(w.put(buf.data))))
1074 			w.put(buf.data);
1075 		else
1076 			foreach (c; buf.data)
1077 				w.put(c);
1078 	}
1079 }
1080 FPAsString!T fpAsString(T)(T f) { return FPAsString!T(f); } /// ditto
1081 
1082 @safe //nothrow @nogc
1083 unittest
1084 {
1085 	StaticBuf!(char, 1024) buf;
1086 	buf.formattedWrite!"%s"(fpAsString(0.1));
1087 	assert(buf.data == "0.1");
1088 }
1089 
1090 string numberToString(T)(T v)
1091 	if (isNumeric!T)
1092 {
1093 	static if (is(T : ulong))
1094 		return toDec(v);
1095 	else
1096 		return fpToString(v);
1097 }
1098 
1099 // ************************************************************************
1100 
1101 /// Simpler implementation of Levenshtein string distance
1102 int stringDistance(string s, string t)
1103 {
1104 	int n = cast(int)s.length;
1105 	int m = cast(int)t.length;
1106 	if (n == 0) return m;
1107 	if (m == 0) return n;
1108 	int[][] distance = new int[][](n+1, m+1); // matrix
1109 	int cost=0;
1110 	//init1
1111 	foreach (i; 0..n+1) distance[i][0]=i;
1112 	foreach (j; 0..m+1) distance[0][j]=j;
1113 	//find min distance
1114 	foreach (i; 1..n+1)
1115 		foreach (j; 1..m+1)
1116 		{
1117 			cost = t[j-1] == s[i-1] ? 0 : 1;
1118 			distance[i][j] = min(
1119 				distance[i-1][j  ] + 1,
1120 				distance[i  ][j-1] + 1,
1121 				distance[i-1][j-1] + cost
1122 			);
1123 		}
1124 	return distance[n][m];
1125 }
1126 
1127 /// Return a number between 0.0 and 1.0 indicating how similar two strings are
1128 /// (1.0 if identical)
1129 float stringSimilarity(string string1, string string2)
1130 {
1131 	float dis = stringDistance(string1, string2);
1132 	float maxLen = string1.length;
1133 	if (maxLen < string2.length)
1134 		maxLen = string2.length;
1135 	if (maxLen == 0)
1136 		return 1;
1137 	else
1138 		return 1f - dis/maxLen;
1139 }
1140 
1141 /// Select best match from a list of items.
1142 /// Returns -1 if none are above the threshold.
1143 sizediff_t findBestMatch(in string[] items, string target, float threshold = 0.7)
1144 {
1145 	sizediff_t found = -1;
1146 	float best = 0;
1147 
1148 	foreach (i, item; items)
1149 	{
1150 		float match = stringSimilarity(toLower(item),toLower(target));
1151 		if (match>threshold && match>=best)
1152 		{
1153 			best = match;
1154 			found = i;
1155 		}
1156 	}
1157 
1158 	return found;
1159 }
1160 
1161 /// Select best match from a list of items.
1162 /// Returns null if none are above the threshold.
1163 string selectBestFrom(in string[] items, string target, float threshold = 0.7)
1164 {
1165 	auto index = findBestMatch(items, target, threshold);
1166 	return index < 0 ? null : items[index];
1167 }
1168 
1169 // ************************************************************************
1170 
1171 string randomString()(int length=20, string chars="abcdefghijklmnopqrstuvwxyz")
1172 {
1173 	import std.random;
1174 	import std.range;
1175 
1176 	return length.iota.map!(n => chars[uniform(0, $)]).array;
1177 }