1 /**
2  * Utility code related to string and text processing.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  */
13 
14 module ae.utils.text;
15 
16 import core.stdc.stdio : snprintf, sscanf;
17 import core.stdc.string;
18 
19 import std.algorithm;
20 import std.ascii;
21 import std.exception;
22 import std.conv;
23 import std.format;
24 import std.range.primitives;
25 import std.string;
26 import std.traits;
27 import std.typetuple;
28 
29 import ae.utils.array;
30 import ae.utils.meta : rangeTuple;
31 import ae.utils.text.parsefp;
32 import ae.utils.textout;
33 
34 private alias indexOf = std..string.indexOf;
35 
36 public import ae.utils.array : contains;
37 public import ae.utils.text.ascii : ascii, decimalSize, toDec, toDecFixed, asciiToLower, asciiToUpper;
38 public import ae.utils.text.functor : formatted;
39 
40 deprecated("legacy transitive import - please `import ae.utils.text.ascii;`.")
41 public import ae.utils.text.ascii : DecimalSize;
42 
43 // ************************************************************************
44 
45 /// UFCS helper
46 string formatAs(T)(auto ref T obj, string fmt)
47 {
48 	return format(fmt, obj);
49 }
50 
51 ///
52 unittest
53 {
54 	assert(5.formatAs("%03d") == "005");
55 }
56 
57 // ************************************************************************
58 
59 /// Consume a LF or CRLF terminated line from s.
60 /// Sets s to null and returns the remainder
61 /// if there is no line terminator in s.
62 T[] eatLine(T)(ref T[] s, bool eatIncompleteLines = true)
63 {
64 	return s.skipUntil([T('\n')], eatIncompleteLines).chomp();
65 }
66 
67 deprecated template eatLine(OnEof onEof)
68 {
69 	T[] eatLine(T)(ref T[] s)
70 	{
71 		return s.eatUntil!onEof([T('\n')]).chomp();
72 	}
73 }
74 
75 unittest
76 {
77 	string s = "Hello\nworld";
78 	assert(s.eatLine() == "Hello");
79 	assert(s.eatLine() == "world");
80 	assert(s is null);
81 	assert(s.eatLine() is null);
82 }
83 
84 // Uses memchr (not Boyer-Moore), best for short strings.
85 /// An implementation of `replace` optimized for common cases (short strings).
86 T[] fastReplace(T)(T[] what, T[] from, T[] to)
87 	if (T.sizeof == 1) // TODO (uses memchr)
88 {
89 	alias Unqual!T U;
90 
91 //	debug scope(failure) std.stdio.writeln("fastReplace crashed: ", [what, from, to]);
92 	enum RAM = cast(U*)null;
93 
94 	if (what.length < from.length || from.length==0)
95 		return what;
96 
97 	if (from.length==1)
98 	{
99 		auto fromc = from[0];
100 		if (to.length==1)
101 		{
102 			auto p = cast(T*)memchr(what.ptr, fromc, what.length);
103 			if (!p)
104 				return what;
105 
106 			T[] result = what.dup;
107 			auto delta = result.ptr - what.ptr;
108 			auto toChar = to[0];
109 			auto end = what.ptr + what.length;
110 			do
111 			{
112 				(cast(U*)p)[delta] = toChar; // zomg hax lol
113 				p++;
114 				p = cast(T*)memchr(p, fromc, end - p);
115 			} while (p);
116 			return result;
117 		}
118 		else
119 		{
120 			auto p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
121 			if (!p)
122 				return what;
123 
124 			auto sb = StringBuilder(what.length);
125 			do
126 			{
127 				sb.put(what[0..p-what.ptr], to);
128 				what = what[p-what.ptr+1..$];
129 				p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
130 			}
131 			while (p);
132 
133 			sb.put(what);
134 			return sb.get();
135 		}
136 	}
137 
138 	auto head = from[0];
139 	auto tail = from[1..$];
140 
141 	auto p = cast(T*)what.ptr;
142 	auto end = p + what.length - tail.length;
143 	p = cast(T*)memchr(p, head, end-p);
144 	while (p)
145 	{
146 		p++;
147 		if (p[0..tail.length] == tail)
148 		{
149 			if (from.length == to.length)
150 			{
151 				T[] result = what.dup;
152 				auto deltaMinusOne = (result.ptr - what.ptr) - 1;
153 
154 				goto replaceA;
155 			dummyA: // compiler complains
156 
157 				do
158 				{
159 					p++;
160 					if (p[0..tail.length] == tail)
161 					{
162 					replaceA:
163 						(cast(U*)p+deltaMinusOne)[0..to.length] = to[];
164 					}
165 					p = cast(T*)memchr(p, head, end-p);
166 				}
167 				while (p);
168 
169 				return result;
170 			}
171 			else
172 			{
173 				auto start = cast(T*)what.ptr;
174 				auto sb = StringBuilder(what.length);
175 				goto replaceB;
176 			dummyB: // compiler complains
177 
178 				do
179 				{
180 					p++;
181 					if (p[0..tail.length] == tail)
182 					{
183 					replaceB:
184 						sb.put(RAM[cast(size_t)start .. cast(size_t)p-1], to);
185 						start = p + tail.length;
186 						what = what[start-what.ptr..$];
187 					}
188 					else
189 					{
190 						what = what[p-what.ptr..$];
191 					}
192 					p = cast(T*)memchr(what.ptr, head, what.length);
193 				}
194 				while (p);
195 
196 				//sb.put(what);
197 				sb.put(RAM[cast(size_t)start..cast(size_t)(what.ptr+what.length)]);
198 				return sb.get();
199 			}
200 
201 			assert(0);
202 		}
203 		p = cast(T*)memchr(p, head, end-p);
204 	}
205 
206 	return what;
207 }
208 
209 unittest
210 {
211 	import std.array;
212 	void test(string haystack, string from, string to)
213 	{
214 		auto description = `("` ~ haystack ~ `", "` ~ from ~ `", "` ~ to ~ `")`;
215 
216 		auto r1 = fastReplace(haystack, from, to);
217 		auto r2 =     replace(haystack, from, to);
218 		assert(r1 == r2, `Bad replace: ` ~ description ~ ` == "` ~ r1 ~ `"`);
219 
220 		if (r1 == haystack)
221 			assert(r1 is haystack, `Pointless reallocation: ` ~ description);
222 	}
223 
224 	test("Mary had a little lamb", "a", "b");
225 	test("Mary had a little lamb", "a", "aaa");
226 	test("Mary had a little lamb", "Mary", "Lucy");
227 	test("Mary had a little lamb", "Mary", "Jimmy");
228 	test("Mary had a little lamb", "lamb", "goat");
229 	test("Mary had a little lamb", "lamb", "sheep");
230 	test("Mary had a little lamb", " l", " x");
231 	test("Mary had a little lamb", " l", " xx");
232 
233 	test("Mary had a little lamb", "X" , "Y" );
234 	test("Mary had a little lamb", "XX", "Y" );
235 	test("Mary had a little lamb", "X" , "YY");
236 	test("Mary had a little lamb", "XX", "YY");
237 	test("Mary had a little lamb", "aX", "Y" );
238 	test("Mary had a little lamb", "aX", "YY");
239 
240 	test("foo", "foobar", "bar");
241 }
242 
243 /// An implementation of `split` optimized for common cases. Allocates only once.
244 T[][] fastSplit(T, U)(T[] s, U d)
245 	if (is(Unqual!T == Unqual!U))
246 {
247 	if (!s.length)
248 		return null;
249 
250 	auto p = cast(T*)memchr(s.ptr, d, s.length);
251 	if (!p)
252 		return [s];
253 
254 	size_t n;
255 	auto end = s.ptr + s.length;
256 	do
257 	{
258 		n++;
259 		p++;
260 		p = cast(T*) memchr(p, d, end-p);
261 	}
262 	while (p);
263 
264 	auto result = new T[][n+1];
265 	n = 0;
266 	auto start = s.ptr;
267 	p = cast(T*) memchr(start, d, s.length);
268 	do
269 	{
270 		result[n++] = start[0..p-start];
271 		start = ++p;
272 		p = cast(T*) memchr(p, d, end-p);
273 	}
274 	while (p);
275 	result[n] = start[0..end-start];
276 
277 	return result;
278 }
279 
280 /// Like `splitLines`, but does not attempt to split on Unicode line endings.
281 /// Only splits on `"\r"`, `"\n"`, and `"\r\n"`.
282 T[][] splitAsciiLines(T)(T[] text)
283 	if (is(Unqual!T == char))
284 {
285 	auto lines = text.fastSplit('\n');
286 	foreach (ref line; lines)
287 		if (line.length && line[$-1]=='\r')
288 			line = line[0..$-1];
289 	return lines;
290 }
291 
292 unittest
293 {
294 	assert(splitAsciiLines("a\nb\r\nc\r\rd\n\re\r\n\nf") == ["a", "b", "c\r\rd", "\re", "", "f"]);
295 	assert(splitAsciiLines(string.init) == splitLines(string.init));
296 }
297 
298 /// Like std.string.split (one argument version, which splits by
299 /// whitespace), but only splits by ASCII and does not autodecode.
300 T[][] asciiSplit(T)(T[] text)
301 	if (is(Unqual!T == char))
302 {
303 	bool inWhitespace = true;
304 	size_t wordStart;
305 	T[][] result;
306 
307 	void endWord(size_t p)
308 	{
309 		if (!inWhitespace)
310 		{
311 			result ~= text[wordStart..p];
312 			inWhitespace = true;
313 		}
314 	}
315 
316 	foreach (p, c; text)
317 		if (std.ascii.isWhite(c))
318 			endWord(p);
319 		else
320 			if (inWhitespace)
321 			{
322 				inWhitespace = false;
323 				wordStart = p;
324 			}
325 	endWord(text.length);
326 	return result;
327 }
328 
329 unittest
330 {
331 	foreach (s; ["", " ", "a", " a", "a ", "a b", " a b", "a b ", " a b ",
332 			"  ", "  a", "a  ", "a  b", "a  b  ", "a b  c"])
333 		assert(s.split == s.asciiSplit, format("Got %s, expected %s", s.asciiSplit, s.split));
334 }
335 
336 /// Like `strip`, but only removes ASCII whitespace.
337 T[] asciiStrip(T)(T[] s)
338 	if (is(Unqual!T == char))
339 {
340 	while (s.length && isWhite(s[0]))
341 		s = s[1..$];
342 	while (s.length && isWhite(s[$-1]))
343 		s = s[0..$-1];
344 	return s;
345 }
346 
347 ///
348 unittest
349 {
350 	string s = "Hello, world!";
351 	assert(asciiStrip(s) is s);
352 	assert(asciiStrip("\r\n\tHello ".dup) == "Hello");
353 }
354 
355 /// Covering slice-list of s with interleaved whitespace.
356 T[][] segmentByWhitespace(T)(T[] s)
357 	if (is(Unqual!T == char))
358 {
359 	if (!s.length)
360 		return null;
361 
362 	T[][] segments;
363 	bool wasWhite = isWhite(s[0]);
364 	size_t start = 0;
365 	foreach (p, char c; s)
366 	{
367 		bool isWhite = isWhite(c);
368 		if (isWhite != wasWhite)
369 			segments ~= s[start..p],
370 			start = p;
371 		wasWhite = isWhite;
372 	}
373 	segments ~= s[start..$];
374 
375 	return segments;
376 }
377 
378 /// Replaces runs of ASCII whitespace which contain a newline (`'\n'`) into a single space.
379 T[] newlinesToSpaces(T)(T[] s)
380 	if (is(Unqual!T == char))
381 {
382 	auto slices = segmentByWhitespace(s);
383 	foreach (ref slice; slices)
384 		if (slice.contains("\n"))
385 			slice = " ";
386 	return slices.join();
387 }
388 
389 /// Replaces all runs of ASCII whitespace with a single space.
390 ascii normalizeWhitespace(ascii s)
391 {
392 	auto slices = segmentByWhitespace(strip(s));
393 	foreach (i, ref slice; slices)
394 		if (i & 1) // odd
395 			slice = " ";
396 	return slices.join();
397 }
398 
399 ///
400 unittest
401 {
402 	assert(normalizeWhitespace(" Mary  had\ta\nlittle\r\n\tlamb") == "Mary had a little lamb");
403 }
404 
405 /// Splits out words from a camel-cased string.
406 /// All-uppercase words are returned as a single word.
407 string[] splitByCamelCase(string s)
408 {
409 	string[] result;
410 	size_t start = 0;
411 	foreach (i; 1..s.length+1)
412 		if (i == s.length
413 		 || (isLower(s[i-1]) && isUpper(s[i]))
414 		 || (i+1 < s.length && isUpper(s[i-1]) && isUpper(s[i]) && isLower(s[i+1]))
415 		)
416 		{
417 			result ~= s[start..i];
418 			start = i;
419 		}
420 	return result;
421 }
422 
423 ///
424 unittest
425 {
426 	assert(splitByCamelCase("parseIPString") == ["parse", "IP", "String"]);
427 	assert(splitByCamelCase("IPString") == ["IP", "String"]);
428 }
429 
430 /// Join an array of words into a camel-cased string.
431 string camelCaseJoin(string[] arr)
432 {
433 	if (!arr.length)
434 		return null;
435 	string result = arr[0];
436 	foreach (s; arr[1..$])
437 		result ~= std.ascii.toUpper(s[0]) ~ s[1..$];
438 	return result;
439 }
440 
441 unittest
442 {
443 	assert("parse-IP-string".split('-').camelCaseJoin() == "parseIPString");
444 }
445 
446 // ************************************************************************
447 
448 /// Like std.string.wrap, but preserves whitespace at line start and
449 /// between (non-wrapped) words.
450 string verbatimWrap(
451 	string s,
452 	size_t columns = 80,
453 	string firstIndent = null,
454 	string indent = null,
455 	size_t tabWidth = 8,
456 )
457 {
458 	if (!s.length)
459 		return s;
460 
461 	import std.uni : isWhite;
462 	import std.range;
463 
464 	// Result buffer. Append-only (contains only text which has been wrapped).
465 	string result;
466 	// Index in `s` corresponding to the end of `result`
467 	size_t start;
468 	// Index in `s` corresponding to after the last newline in `result`
469 	size_t lineStart;
470 	// Current column
471 	size_t col;
472 	// Was the previous character we looked at whitespace?
473 	bool wasWhite;
474 	// We need to add an indent at the next (non-newline) character.
475 	bool needIndent;
476 
477 	result = firstIndent;
478 	col = firstIndent.walkLength;
479 	auto indentWidth = indent.walkLength;
480 
481 	void flush(size_t pos)
482 	{
483 		if (col > columns && start > lineStart)
484 		{
485 			result ~= "\n" ~ indent;
486 			col = indentWidth + (pos - start);
487 
488 			// Consume whitespace at line break
489 			size_t numWhite;
490 			foreach (i, c; s[start .. $])
491 				if (isWhite(c))
492 					numWhite = i;
493 				else
494 					break;
495 			start += numWhite;
496 			lineStart = start;
497 		}
498 		result ~= s[start .. pos];
499 		start = pos;
500 	}
501 
502 	foreach (pos, dchar c; s)
503 	{
504 		auto atWhite = isWhite(c);
505 		if (atWhite && !wasWhite)
506 			flush(pos);
507 		if (c == '\n')
508 		{
509 			flush(pos);
510 			result ~= "\n";
511 			start++; // past newline
512 			lineStart = start;
513 			needIndent = true;
514 			col = 0;
515 		}
516 		else
517 		{
518 			if (needIndent)
519 			{
520 				assert(col == 0);
521 				result ~= indent;
522 				col += indentWidth;
523 				needIndent = false;
524 			}
525 			if (c == '\t')
526 				col += tabWidth;
527 			else
528 				col++;
529 		}
530 		wasWhite = atWhite;
531 	}
532 	flush(s.length);
533 	if (col)
534 		result ~= "\n"; // trailing newline
535 
536 	return result;
537 }
538 
539 // ************************************************************************
540 
541 /// Case-insensitive ASCII string.
542 alias CIAsciiString = NormalizedArray!(immutable(char), s => s.byCodeUnit.map!(std.ascii.toLower));
543 
544 ///
545 unittest
546 {
547 	CIAsciiString s = "test";
548 	assert(s == "TEST");
549 	assert(s >= "Test" && s <= "Test");
550 	assert(CIAsciiString("a") == CIAsciiString("A"));
551 	assert(CIAsciiString("a") != CIAsciiString("B"));
552 	assert(CIAsciiString("a") <  CIAsciiString("B"));
553 	assert(CIAsciiString("A") <  CIAsciiString("b"));
554 	assert(CIAsciiString("я") != CIAsciiString("Я"));
555 }
556 
557 import std.uni : toLower;
558 
559 /// Case-insensitive Unicode string.
560 alias CIUniString = NormalizedArray!(immutable(char), s => s.map!(toLower));
561 
562 ///
563 unittest
564 {
565 	CIUniString s = "привет";
566 	assert(s == "ПРИВЕТ");
567 	assert(s >= "Привет" && s <= "Привет");
568 	assert(CIUniString("я") == CIUniString("Я"));
569 	assert(CIUniString("а") != CIUniString("Б"));
570 	assert(CIUniString("а") <  CIUniString("Б"));
571 	assert(CIUniString("А") <  CIUniString("б"));
572 }
573 
574 // ************************************************************************
575 
576 import std.utf;
577 
578 /// Convert any data to a valid UTF-8 bytestream, so D's string functions can
579 /// properly work on it.
580 string rawToUTF8(in char[] s)
581 {
582 	auto d = new dchar[s.length];
583 	foreach (i, char c; s)
584 		d[i] = c;
585 	return toUTF8(d);
586 }
587 
588 /// Undo rawToUTF8.
589 ascii UTF8ToRaw(in char[] r) pure
590 {
591 	auto s = new char[r.length];
592 	size_t i = 0;
593 	foreach (dchar c; r)
594 	{
595 		assert(c < '\u0100');
596 		s[i++] = cast(char)c;
597 	}
598 	return s[0..i];
599 }
600 
601 unittest
602 {
603 	char[1] c;
604 	for (int i=0; i<256; i++)
605 	{
606 		c[0] = cast(char)i;
607 		assert(UTF8ToRaw(rawToUTF8(c[])) == c[], format("%s -> %s -> %s", cast(ubyte[])c[], cast(ubyte[])rawToUTF8(c[]), cast(ubyte[])UTF8ToRaw(rawToUTF8(c[]))));
608 	}
609 }
610 
611 /// Where a delegate with this signature is required.
612 string nullStringTransform(in char[] s) { return to!string(s); }
613 
614 /// Like readText, but with in-memory data.
615 /// Reverse of ae.utils.array.bytes (for strings).
616 inout(char)[] asText(inout(ubyte)[] bytes)
617 {
618 	auto s = cast(inout(char)[]) bytes;
619 	validate(s);
620 	return s;
621 }
622 
623 /// Lossily convert arbitrary data into a valid UTF-8 string.
624 string forceValidUTF8(ascii s)
625 {
626 	try
627 	{
628 		validate(s);
629 		return s;
630 	}
631 	catch (UTFException)
632 		return rawToUTF8(s);
633 }
634 
635 // ************************************************************************
636 
637 /// Return the slice up to the first NUL character,
638 /// or of the whole array if none is found.
639 C[] fromZArray(C, n)(ref C[n] arr)
640 {
641 	auto p = arr.representation.countUntil(0);
642 	return arr[0 .. p<0 ? $ : p];
643 }
644 
645 /// ditto
646 C[] fromZArray(C)(C[] arr)
647 {
648 	auto p = arr.representation.countUntil(0);
649 	return arr[0 .. p<0 ? $ : p];
650 }
651 
652 unittest
653 {
654 	char[4] arr = "ab\0d";
655 	assert(arr.fromZArray == "ab");
656 	arr[] = "abcd";
657 	assert(arr.fromZArray == "abcd");
658 }
659 
660 unittest
661 {
662 	string arr = "ab\0d";
663 	assert(arr.fromZArray == "ab");
664 	arr = "abcd";
665 	assert(arr.fromZArray == "abcd");
666 }
667 
668 // ************************************************************************
669 
670 /// Formats binary data as a hex dump (three-column layout consisting of hex
671 /// offset, byte values in hex, and printable low-ASCII characters).
672 string hexDump(const(void)[] b)
673 {
674 	auto data = cast(const(ubyte)[]) b;
675 	assert(data.length);
676 	size_t i=0;
677 	string s;
678 	while (i<data.length)
679 	{
680 		s ~= format("%08X:  ", i);
681 		foreach (x; 0..16)
682 		{
683 			if (i+x<data.length)
684 				s ~= format("%02X ", data[i+x]);
685 			else
686 				s ~= "   ";
687 			if (x==7)
688 				s ~= "| ";
689 		}
690 		s ~= "  ";
691 		foreach (x; 0..16)
692 		{
693 			if (i+x<data.length)
694 				if (data[i+x]==0)
695 					s ~= ' ';
696 				else
697 				if (data[i+x]<32 || data[i+x]>=128)
698 					s ~= '.';
699 				else
700 					s ~= cast(char)data[i+x];
701 			else
702 				s ~= ' ';
703 		}
704 		s ~= "\n";
705 		i += 16;
706 	}
707 	return s;
708 }
709 
710 import std.conv;
711 
712 /// Parses `s` as a hexadecimal number into an integer of type `T`.
713 T fromHex(T : ulong = uint, C)(const(C)[] s)
714 {
715 	T result = parse!T(s, 16);
716 	enforce(s.length==0, new ConvException("Could not parse entire string"));
717 	return result;
718 }
719 
720 /// Parses `hex` into an array of bytes.
721 /// `hex.length` should be even.
722 ubyte[] arrayFromHex(in char[] hex)
723 {
724 	auto buf = new ubyte[hex.length/2];
725 	arrayFromHex(hex, buf);
726 	return buf;
727 }
728 
729 /// Policy for `parseHexDigit`.
730 struct HexParseConfig
731 {
732 	bool checked = true; /// Throw on invalid digits.
733 	bool lower   = true; /// Accept lower-case digits.
734 	bool upper   = true; /// Accept upper-case digits.
735 }
736 
737 /// Parse a single hexadecimal digit according to the policy in `config`.
738 ubyte parseHexDigit(HexParseConfig config = HexParseConfig.init)(char c)
739 {
740 	static assert(config.lower || config.upper,
741 		"Must parse at least either lower or upper case digits");
742 	static if (config.checked)
743 	{
744 		switch (c)
745 		{
746 			case '0': .. case '9': return cast(ubyte)(c - '0');
747 			case 'a': .. case 'f': return cast(ubyte)(c - 'a' + 10);
748 			case 'A': .. case 'F': return cast(ubyte)(c - 'A' + 10);
749 			default: throw new Exception("Bad hex digit: " ~ c);
750 		}
751 	}
752 	else
753 	{
754 		if (c <= '9')
755 			return cast(ubyte)(c - '0');
756 		static if (config.lower && config.upper)
757 		{
758 			if (c < 'a')
759 				return cast(ubyte)(c - 'A' + 10);
760 			else
761 				return cast(ubyte)(c - 'a' + 10);
762 		}
763 		else
764 			static if (config.lower)
765 				return cast(ubyte)(c - 'a' + 10);
766 			else
767 				return cast(ubyte)(c - 'A' + 10);
768 	}
769 }
770 
771 /// Parses `hex` into the given array `buf`.
772 void arrayFromHex(HexParseConfig config = HexParseConfig.init)(in char[] hex, ubyte[] buf)
773 {
774 	assert(buf.length == hex.length/2, "Wrong buffer size for arrayFromHex");
775 	for (int i=0; i<hex.length; i+=2)
776 		buf[i/2] = cast(ubyte)(
777 			parseHexDigit!config(hex[i  ])*16 +
778 			parseHexDigit!config(hex[i+1])
779 		);
780 }
781 
782 /// Parses `hex` into the given array `buf`.
783 /// Fast version for static arrays of known length.
784 void sarrayFromHex(HexParseConfig config = HexParseConfig.init, size_t N, Hex)(ref const Hex hex, ref ubyte[N] buf)
785 if (is(Hex == char[N*2]))
786 {
787 	foreach (i; 0..N/4)
788 	{
789 		ulong chars = (cast(ulong*)hex.ptr)[i];
790 		uint res =
791 			(parseHexDigit!config((chars >> (8*0)) & 0xFF) << (4*1)) |
792 			(parseHexDigit!config((chars >> (8*1)) & 0xFF) << (4*0)) |
793 			(parseHexDigit!config((chars >> (8*2)) & 0xFF) << (4*3)) |
794 			(parseHexDigit!config((chars >> (8*3)) & 0xFF) << (4*2)) |
795 			(parseHexDigit!config((chars >> (8*4)) & 0xFF) << (4*5)) |
796 			(parseHexDigit!config((chars >> (8*5)) & 0xFF) << (4*4)) |
797 			(parseHexDigit!config((chars >> (8*6)) & 0xFF) << (4*7)) |
798 			(parseHexDigit!config((chars >> (8*7)) & 0xFF) << (4*6));
799 		(cast(uint*)buf.ptr)[i] = res;
800 	}
801 	foreach (i; N/4*4..N)
802 		buf[i] = cast(ubyte)(
803 			parseHexDigit!config(hex[i*2  ])*16 +
804 			parseHexDigit!config(hex[i*2+1])
805 		);
806 }
807 
808 unittest
809 {
810 	foreach (checked; TypeTuple!(false, true))
811 		foreach (lower; TypeTuple!(false, true))
812 			foreach (upper; TypeTuple!(false, true))
813 				static if (lower || upper)
814 				{
815 					enum config = HexParseConfig(checked, lower, upper);
816 					char[18] buf;
817 					foreach (n; 0..18)
818 						if (lower && upper ? n & 1 : upper)
819 							buf[n] = hexDigits[n % 16];
820 						else
821 							buf[n] = lowerHexDigits[n % 16];
822 					ubyte[9] res;
823 					sarrayFromHex!config(buf, res);
824 					assert(res == [0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01], text(res));
825 				}
826 }
827 
828 /// Conversion from bytes to hexadecimal strings.
829 template toHex(alias digits = hexDigits)
830 {
831 	// Note: using template constraints instead of overloads due to
832 	// https://issues.dlang.org/show_bug.cgi?id=21504
833 
834 	enum isHexifiable(T) =
835 		is(T : ulong) || // number
836 		is(T : const(ubyte)[]) || // dynamic array of bytes
837 		is(T : const(ubyte)[n], size_t n); // static array of bytes
838 
839 	enum isBuffer(T) =
840 		(is(T : C[], C) && isSomeChar!C) || // dynamic array of chars
841 		(is(T : C[n], n, C) && isSomeChar!C); // static array of chars
842 
843 	auto toHex(T, B)(auto ref T value, auto ref B buf)
844 	if (isHexifiable!T && isBuffer!B)
845 	{
846 		// Get result length
847 		static if (is(T : ulong))
848 			enum resultLength = T.sizeof * 2;
849 		else
850 		static if (is(T : const(ubyte)[n], size_t n))
851 			enum resultLength = T.length * 2;
852 		else
853 			auto resultLength = value.length * 2;
854 
855 		enum fixedResultLength = !is(typeof(&resultLength));
856 
857 		// Ensure buffer size
858 		{
859 			enum fixedBufferLength = !is(T : C[], C);
860 			static if (!fixedBufferLength && __traits(isRef, buf))
861 			{
862 				if (buf.length < resultLength)
863 					buf.length = resultLength;
864 			}
865 			else
866 			{
867 				static if (fixedResultLength && fixedBufferLength)
868 					static assert(resultLength <= buf.length, "Buffer size is insufficient");
869 				else
870 					assert(resultLength <= buf.length, "Buffer size is insufficient");
871 			}
872 		}
873 
874 		static if (is(T : ulong))
875 		{
876 			Unqual!T x = value;
877 			foreach (i; Reverse!(rangeTuple!(T.sizeof*2)))
878 			{
879 				buf[i] = hexDigits[x & 0xF];
880 				x >>= 4;
881 			}
882 		}
883 		else
884 		{
885 			foreach (i, b; value)
886 			{
887 				buf[i*2  ] = digits[b>>4];
888 				buf[i*2+1] = digits[b&15];
889 			}
890 		}
891 
892 		alias C = typeof(buf[0]);
893 
894 		static if (fixedResultLength)
895 			C[resultLength] result;
896 		else
897 			C[] result;
898 		result = buf[0 .. resultLength];
899 		return result;
900 	}
901 
902 	auto toHex(C = char, T)(auto ref T value)
903 	if (isSomeChar!C && isHexifiable!T)
904 	{
905 		static if (is(T : const(ubyte)[]))
906 			C[] buf;
907 		else
908 			C[T.sizeof * 2] buf;
909 		return toHex(value, buf);
910 	}
911 }
912 
913 alias toLowerHex = toHex!lowerHexDigits; /// ditto
914 
915 unittest
916 {
917 	ubyte[] bytes = [0x12, 0x34];
918 	assert(toHex(bytes) == "1234");
919 }
920 
921 unittest
922 {
923 	ubyte[] bytes = [0x12, 0x34];
924 	char[] buf = new char[4];
925 	toHex(bytes, buf);
926 	assert(buf == "1234");
927 }
928 
929 unittest
930 {
931 	char[8] buf;
932 	toHex(0x01234567, buf);
933 	assert(buf == "01234567");
934 }
935 
936 unittest
937 {
938 	assert(toHex(0x01234567) == "01234567");
939 }
940 
941 unittest
942 {
943 	ubyte[2] bytes = [0x12, 0x34];
944 	auto buf = bytes.toLowerHex();
945 	static assert(buf.length == 4);
946 	assert(buf == "1234");
947 }
948 
949 unittest
950 {
951 	import core.exception : AssertError;
952 
953 	ubyte[] a = new ubyte[10];
954 	char[] b = new char[10];
955 	toHex(a[0..1], b[0..2]);
956 	assertThrown!AssertError(toHex(a[0..1], b[0..1]));
957 }
958 
959 /// How many significant decimal digits does a FP type have
960 /// (determined empirically - valid for all D FP types on x86/64)
961 enum significantDigits(T : real) = 2 + 2 * T.sizeof;
962 
963 /// Format string for a FP type which includes all necessary
964 /// significant digits
965 enum fpFormatString(T) = "%." ~ text(significantDigits!T) ~ "g";
966 private template cWidthString(T)
967 {
968 	static if (is(Unqual!T == float))
969 		enum cWidthString = "";
970 	else
971 	static if (is(Unqual!T == double))
972 		enum cWidthString = "l";
973 	else
974 	static if (is(Unqual!T == real))
975 		enum cWidthString = "L";
976 }
977 /// C format string to exactly format a floating-point type `T`.
978 enum fpCFormatString(T) = "%." ~ text(significantDigits!T) ~ cWidthString!T ~ "g";
979 /// C format string to scan a floating-point type `T`.
980 enum fpCScanString(T) = "%" ~ cWidthString!T ~ "f";
981 
982 private auto safeSprintf(size_t N, Args...)(ref char[N] buf, auto ref Args args) @trusted @nogc
983 {
984 	return snprintf(buf.ptr, N, args);
985 }
986 
987 /// Parse a floating-point number using the C standard library.
988 /// Note: might produce slightly different results than e.g. `to!double`.
989 bool fpTryParse(F, C)(ref const(C)[] s, ref F f) @trusted @nogc nothrow
990 if (isFloatingPoint!F && isSomeChar!C)
991 {
992 	// Parity with glibc:
993 	version (Windows)
994 	{
995 		// https://issues.dlang.org/show_bug.cgi?id=22302
996 		if (s.representation.startsWith("nan".representation)) { s = s[3..$]; f = F.nan; return true; }
997 		if (s.representation.startsWith("inf".representation)) { s = s[3..$]; f = F.infinity; return true; }
998 		if (s.representation.startsWith("-inf".representation)) { s = s[4..$]; f = -F.infinity; return true; }
999 	}
1000 
1001 	import core.stdc.stdlib : malloc, free;
1002 	char[64] sbuf = void;
1003 	auto buf = s.length >= sbuf.length ? cast(char*)malloc(s.length + 1) : sbuf.ptr;
1004 	if (!buf)
1005 		assert(false, "Memory allocation failed");
1006 	scope(exit) if (buf != sbuf.ptr) free(buf);
1007 
1008 	foreach (i, c; s)
1009 	{
1010 		if (c >= 0x100)
1011 			return false; // Non-ASCII
1012 		buf[i] = cast(char)c;
1013 	}
1014 	buf[s.length] = 0;
1015 
1016 	static immutable fmt = fpCScanString!F ~ "%n\0";
1017 	int read;
1018 	if (!sscanf(buf, fmt.ptr, &f, &read))
1019 		return false;
1020 	s = s[read .. $];
1021 	return true;
1022 }
1023 
1024 /// ditto
1025 F fpParse(F, C)(const(C)[] s)
1026 if (isFloatingPoint!F && isSomeChar!C)
1027 {
1028 	F f;
1029 	fpTryParse(s, f).enforce("Failed to parse " ~ F.stringof);
1030 	enforce(s.length == 0, "Failed to completely parse " ~ F.stringof);
1031 	return f;
1032 }
1033 
1034 private F fpParseAssumeValid(F)(const(char)[] s) @nogc
1035 {
1036 	F f;
1037 	auto res = fpTryParse(s, f);
1038 	assert(res, "Failed to parse number we created");
1039 	assert(!s.length, "Failed to completely parse number we created");
1040 	return f;
1041 }
1042 
1043 private auto fpToBuf(Q)(Q val) @safe nothrow @nogc
1044 {
1045 	alias F = Unqual!Q;
1046 
1047 	/// Bypass FPU register, which may contain a different precision
1048 	static F forceType(F d) { static F n; n = d; return n; }
1049 
1050 	enum isReal = is(F == real);
1051 
1052 	StaticBuf!(char, 64) buf = void;
1053 
1054 	// MSVC workaround from std.format:
1055 	version (CRuntime_Microsoft)
1056 	{
1057 		import std.math : isNaN, isInfinity;
1058 		immutable double v = val; // convert early to get "inf" in case of overflow
1059 		{
1060 			string s;
1061 			if (isNaN(v))
1062 				s = "nan"; // snprintf writes 1.#QNAN
1063 			else if (isInfinity(v))
1064 				s = val >= 0 ? "inf" : "-inf"; // snprintf writes 1.#INF
1065 			else
1066 				goto L1;
1067 			buf.buf[0..s.length] = s;
1068 			buf.pos = s.length;
1069 			return buf;
1070 		L1:
1071 		}
1072 	}
1073 	else
1074 		alias v = val;
1075 
1076 	buf.pos = safeSprintf(buf.buf, &fpCFormatString!F[0], forceType(v));
1077 	char[] s = buf.data();
1078 
1079 	F parse(char[] s)
1080 	{
1081 		auto f = fpParseAssumeValid!F(s);
1082 		return f;
1083 	}
1084 
1085 	if (s != "nan" && s != "-nan" && s != "inf" && s != "-inf")
1086 	{
1087 		if (forceType(parse(s)) != v)
1088 		{
1089 			static if (isReal)
1090 			{
1091 				// Something funny with DM libc real parsing... e.g. 0.6885036635121051783
1092 				return buf;
1093 			}
1094 			else
1095 			//	assert(false, "Initial conversion fails: " ~ format(fpFormatString!F, parse(s)) ~ " / " ~ s);
1096 				assert(false, "Initial conversion fails");
1097 		}
1098 
1099 		auto suffixPos = s.length;
1100 		foreach (i, c; s)
1101 			if (c == 'e' || c == 'E')
1102 			{
1103 				suffixPos = i;
1104 				break;
1105 			}
1106 		auto suffix = s[suffixPos .. $];
1107 		s = s[0 .. suffixPos];
1108 
1109 		StaticBuf!(char, 64) testBuf;
1110 		void render(char[] prefix)
1111 		{
1112 			testBuf.pos = 0;
1113 			testBuf.put(prefix);
1114 			testBuf.put(suffix);
1115 		}
1116 		F tryPrefix(char[] prefix)
1117 		{
1118 			render(prefix);
1119 			return forceType(parse(testBuf.data));
1120 		}
1121 
1122 		foreach_reverse (i; 1..s.length)
1123 			if (s[i]>='0' && s[i]<='8')
1124 			{
1125 				s[i]++;
1126 				if (tryPrefix(s[0..i+1])==v)
1127 					s = s[0..i+1];
1128 				else
1129 					s[i]--;
1130 			}
1131 		while (s.length>2 && s[$-1]!='.' && tryPrefix(s[0..$-1])==v)
1132 			s = s[0..$-1];
1133 
1134 		render(s);
1135 		return testBuf;
1136 	}
1137 	buf.pos = s.length;
1138 	return buf;
1139 }
1140 
1141 /// Get shortest string representation of a FP type that still converts to exactly the same number.
1142 template fpToString(F)
1143 {
1144 	string fpToString(F v) @safe nothrow
1145 	{
1146 		return fpToBuf(v).data.idup;
1147 	}
1148 
1149 	static if (!is(Unqual!F == real))
1150 	unittest
1151 	{
1152 		union U
1153 		{
1154 			ubyte[F.sizeof] bytes;
1155 			Unqual!F d;
1156 			string toString() const { return (fpFormatString!F ~ " %a [%(%02X %)]").format(d, d, bytes[]); }
1157 		}
1158 		import std.random : Xorshift, uniform;
1159 		import std.stdio : stderr;
1160 		Xorshift rng;
1161 		foreach (n; 0..10000)
1162 		{
1163 			U u;
1164 			foreach (ref b; u.bytes[])
1165 				b = uniform!ubyte(rng);
1166 			static if (is(Unqual!F == real))
1167 				u.bytes[7] |= 0x80; // require normalized value
1168 			scope(failure) stderr.writeln("Input:\t", u);
1169 			auto s = fpToString(u.d);
1170 			scope(failure) stderr.writeln("Result:\t", s);
1171 			if (s == "nan" || s == "-nan")
1172 				continue; // there are many NaNs...
1173 			U r;
1174 			//r.d = to!F(s);
1175 			r.d = fpParse!F(s);
1176 			assert(r.bytes == u.bytes,
1177 				"fpToString mismatch:\nOutput:\t%s".format(r));
1178 		}
1179 	}
1180 }
1181 
1182 alias doubleToString = fpToString!double; ///
1183 
1184 unittest
1185 {
1186 	alias floatToString = fpToString!float;
1187 	alias realToString = fpToString!real;
1188 	alias crealToString = fpToString!(const(real));
1189 }
1190 
1191 unittest
1192 {
1193 	assert(2.3841857910156251e-07.doubleToString == "2.384185791015625e-07");
1194 	assert(1.3e-07.doubleToString == "1.3e-07");
1195 }
1196 
1197 /// Like `fpToString`, but writes the result to a sink.
1198 void putFP(Writer, F)(auto ref Writer writer, F v)
1199 {
1200 	writer.put(fpToBuf(v).data);
1201 }
1202 
1203 
1204 /// Wraps the result of `fpToString` in a non-allocating stringifiable struct.
1205 struct FPAsString(T)
1206 {
1207 	private typeof(fpToBuf(T.init)) buf;
1208 
1209 	this(T f)
1210 	{
1211 		buf = fpToBuf(f);
1212 	} ///
1213 
1214 	string toString() const pure nothrow
1215 	{
1216 		return buf.data.idup;
1217 	} ///
1218 
1219 	void toString(W)(ref W w) const
1220 	{
1221 		static if (is(typeof(w.put(buf.data))))
1222 			w.put(buf.data);
1223 		else
1224 			foreach (c; buf.data)
1225 				w.put(c);
1226 	} ///
1227 }
1228 FPAsString!T fpAsString(T)(T f) { return FPAsString!T(f); } /// ditto
1229 
1230 @safe //nothrow @nogc
1231 unittest
1232 {
1233 	StaticBuf!(char, 1024) buf;
1234 	buf.formattedWrite!"%s"(fpAsString(0.1));
1235 	assert(buf.data == "0.1", buf.data.idup);
1236 }
1237 
1238 /// Get shortest string representation of a numeric
1239 /// type that still converts to exactly the same number.
1240 string numberToString(T)(T v)
1241 	if (isNumeric!T)
1242 {
1243 	static if (is(T : ulong))
1244 		return toDec(v);
1245 	else
1246 		return fpToString(v);
1247 }
1248 
1249 // ************************************************************************
1250 
1251 /// Simpler implementation of Levenshtein string distance
1252 int stringDistance(string s, string t)
1253 {
1254 	int n = cast(int)s.length;
1255 	int m = cast(int)t.length;
1256 	if (n == 0) return m;
1257 	if (m == 0) return n;
1258 	int[][] distance = new int[][](n+1, m+1); // matrix
1259 	int cost=0;
1260 	//init1
1261 	foreach (i; 0..n+1) distance[i][0]=i;
1262 	foreach (j; 0..m+1) distance[0][j]=j;
1263 	//find min distance
1264 	foreach (i; 1..n+1)
1265 		foreach (j; 1..m+1)
1266 		{
1267 			cost = t[j-1] == s[i-1] ? 0 : 1;
1268 			distance[i][j] = min(
1269 				distance[i-1][j  ] + 1,
1270 				distance[i  ][j-1] + 1,
1271 				distance[i-1][j-1] + cost
1272 			);
1273 		}
1274 	return distance[n][m];
1275 }
1276 
1277 /// Return a number between 0.0 and 1.0 indicating how similar two strings are
1278 /// (1.0 if identical)
1279 float stringSimilarity(string string1, string string2)
1280 {
1281 	float dis = stringDistance(string1, string2);
1282 	float maxLen = string1.length;
1283 	if (maxLen < string2.length)
1284 		maxLen = string2.length;
1285 	if (maxLen == 0)
1286 		return 1;
1287 	else
1288 		return 1f - dis/maxLen;
1289 }
1290 
1291 /// Select best match from a list of items.
1292 /// Returns -1 if none are above the threshold.
1293 sizediff_t findBestMatch(in string[] items, string target, float threshold = 0.7)
1294 {
1295 	sizediff_t found = -1;
1296 	float best = 0;
1297 
1298 	foreach (i, item; items)
1299 	{
1300 		float match = stringSimilarity(toLower(item),toLower(target));
1301 		if (match>threshold && match>=best)
1302 		{
1303 			best = match;
1304 			found = i;
1305 		}
1306 	}
1307 
1308 	return found;
1309 }
1310 
1311 /// Select best match from a list of items.
1312 /// Returns null if none are above the threshold.
1313 string selectBestFrom(in string[] items, string target, float threshold = 0.7)
1314 {
1315 	auto index = findBestMatch(items, target, threshold);
1316 	return index < 0 ? null : items[index];
1317 }
1318 
1319 // ************************************************************************
1320 
1321 /// Generate a random string with the given parameters.
1322 /// `std.random` is used as the source of randomness.
1323 /// Not cryptographically secure.
1324 string randomString()(int length=20, string chars="abcdefghijklmnopqrstuvwxyz")
1325 {
1326 	import std.random;
1327 	import std.range;
1328 
1329 	return length.iota.map!(n => chars[uniform(0, $)]).array;
1330 }