ae.utils.text source code

1 /**
2  * Utility code related to string and text processing.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  */
13 
14 module ae.utils.text;
15 
16 import std.algorithm;
17 import std.ascii;
18 import std.exception;
19 import std.conv;
20 import std.format;
21 import std.range.primitives;
22 import std.string;
23 import std.traits;
24 import std.typetuple;
25 
26 import core.stdc.stdio : snprintf, sscanf;
27 import core.stdc.string;
28 
29 import ae.utils.array;
30 import ae.utils.meta;
31 import ae.utils.text.parsefp;
32 import ae.utils.textout;
33 
34 private alias indexOf = std..string.indexOf;
35 
36 public import ae.utils.text.ascii : ascii, DecimalSize, toDec, toDecFixed, asciiToLower, asciiToUpper;
37 public import ae.utils.array : contains;
38 
39 // ************************************************************************
40 
41 /// UFCS helper
42 string formatAs(T)(auto ref T obj, string fmt)
43 {
44 	return format(fmt, obj);
45 }
46 
47 /// Lazily formatted object
48 auto formatted(string fmt, T...)(auto ref T values)
49 {
50 	static struct Formatted
51 	{
52 		T values;
53 
54 		void toString(void delegate(const(char)[]) sink) const
55 		{
56 			sink.formattedWrite!fmt(values);
57 		}
58 
59 		void toString(W)(ref W writer) const
60 		if (isOutputRange!(W, char))
61 		{
62 			writer.formattedWrite!fmt(values);
63 		}
64 	}
65 	return Formatted(values);
66 }
67 
68 unittest
69 {
70 	assert(format!"%s%s%s"("<", formatted!"%x"(64), ">") == "<40>");
71 }
72 
73 // ************************************************************************
74 
75 /// Consume a LF or CRLF terminated line from s.
76 /// Sets s to null and returns the remainder
77 /// if there is no line terminator in s.
78 T[] eatLine(T)(ref T[] s, bool eatIncompleteLines = true)
79 {
80 	return s.skipUntil([T('\n')], eatIncompleteLines).chomp();
81 }
82 
83 deprecated template eatLine(OnEof onEof)
84 {
85 	T[] eatLine(T)(ref T[] s)
86 	{
87 		return s.eatUntil!onEof([T('\n')]).chomp();
88 	}
89 }
90 
91 unittest
92 {
93 	string s = "Hello\nworld";
94 	assert(s.eatLine() == "Hello");
95 	assert(s.eatLine() == "world");
96 	assert(s is null);
97 	assert(s.eatLine() is null);
98 }
99 
100 // Uses memchr (not Boyer-Moore), best for short strings.
101 /// An implementation of `replace` optimized for common cases (short strings).
102 T[] fastReplace(T)(T[] what, T[] from, T[] to)
103 	if (T.sizeof == 1) // TODO (uses memchr)
104 {
105 	alias Unqual!T U;
106 
107 //	debug scope(failure) std.stdio.writeln("fastReplace crashed: ", [what, from, to]);
108 	enum RAM = cast(U*)null;
109 
110 	if (what.length < from.length || from.length==0)
111 		return what;
112 
113 	if (from.length==1)
114 	{
115 		auto fromc = from[0];
116 		if (to.length==1)
117 		{
118 			auto p = cast(T*)memchr(what.ptr, fromc, what.length);
119 			if (!p)
120 				return what;
121 
122 			T[] result = what.dup;
123 			auto delta = result.ptr - what.ptr;
124 			auto toChar = to[0];
125 			auto end = what.ptr + what.length;
126 			do
127 			{
128 				(cast(U*)p)[delta] = toChar; // zomg hax lol
129 				p++;
130 				p = cast(T*)memchr(p, fromc, end - p);
131 			} while (p);
132 			return result;
133 		}
134 		else
135 		{
136 			auto p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
137 			if (!p)
138 				return what;
139 
140 			auto sb = StringBuilder(what.length);
141 			do
142 			{
143 				sb.put(what[0..p-what.ptr], to);
144 				what = what[p-what.ptr+1..$];
145 				p = cast(immutable(T)*)memchr(what.ptr, fromc, what.length);
146 			}
147 			while (p);
148 
149 			sb.put(what);
150 			return sb.get();
151 		}
152 	}
153 
154 	auto head = from[0];
155 	auto tail = from[1..$];
156 
157 	auto p = cast(T*)what.ptr;
158 	auto end = p + what.length - tail.length;
159 	p = cast(T*)memchr(p, head, end-p);
160 	while (p)
161 	{
162 		p++;
163 		if (p[0..tail.length] == tail)
164 		{
165 			if (from.length == to.length)
166 			{
167 				T[] result = what.dup;
168 				auto deltaMinusOne = (result.ptr - what.ptr) - 1;
169 
170 				goto replaceA;
171 			dummyA: // compiler complains
172 
173 				do
174 				{
175 					p++;
176 					if (p[0..tail.length] == tail)
177 					{
178 					replaceA:
179 						(cast(U*)p+deltaMinusOne)[0..to.length] = to[];
180 					}
181 					p = cast(T*)memchr(p, head, end-p);
182 				}
183 				while (p);
184 
185 				return result;
186 			}
187 			else
188 			{
189 				auto start = cast(T*)what.ptr;
190 				auto sb = StringBuilder(what.length);
191 				goto replaceB;
192 			dummyB: // compiler complains
193 
194 				do
195 				{
196 					p++;
197 					if (p[0..tail.length] == tail)
198 					{
199 					replaceB:
200 						sb.put(RAM[cast(size_t)start .. cast(size_t)p-1], to);
201 						start = p + tail.length;
202 						what = what[start-what.ptr..$];
203 					}
204 					else
205 					{
206 						what = what[p-what.ptr..$];
207 					}
208 					p = cast(T*)memchr(what.ptr, head, what.length);
209 				}
210 				while (p);
211 
212 				//sb.put(what);
213 				sb.put(RAM[cast(size_t)start..cast(size_t)(what.ptr+what.length)]);
214 				return sb.get();
215 			}
216 
217 			assert(0);
218 		}
219 		p = cast(T*)memchr(p, head, end-p);
220 	}
221 
222 	return what;
223 }
224 
225 unittest
226 {
227 	import std.array;
228 	void test(string haystack, string from, string to)
229 	{
230 		auto description = `("` ~ haystack ~ `", "` ~ from ~ `", "` ~ to ~ `")`;
231 
232 		auto r1 = fastReplace(haystack, from, to);
233 		auto r2 =     replace(haystack, from, to);
234 		assert(r1 == r2, `Bad replace: ` ~ description ~ ` == "` ~ r1 ~ `"`);
235 
236 		if (r1 == haystack)
237 			assert(r1 is haystack, `Pointless reallocation: ` ~ description);
238 	}
239 
240 	test("Mary had a little lamb", "a", "b");
241 	test("Mary had a little lamb", "a", "aaa");
242 	test("Mary had a little lamb", "Mary", "Lucy");
243 	test("Mary had a little lamb", "Mary", "Jimmy");
244 	test("Mary had a little lamb", "lamb", "goat");
245 	test("Mary had a little lamb", "lamb", "sheep");
246 	test("Mary had a little lamb", " l", " x");
247 	test("Mary had a little lamb", " l", " xx");
248 
249 	test("Mary had a little lamb", "X" , "Y" );
250 	test("Mary had a little lamb", "XX", "Y" );
251 	test("Mary had a little lamb", "X" , "YY");
252 	test("Mary had a little lamb", "XX", "YY");
253 	test("Mary had a little lamb", "aX", "Y" );
254 	test("Mary had a little lamb", "aX", "YY");
255 
256 	test("foo", "foobar", "bar");
257 }
258 
259 /// An implementation of `split` optimized for common cases. Allocates only once.
260 T[][] fastSplit(T, U)(T[] s, U d)
261 	if (is(Unqual!T == Unqual!U))
262 {
263 	if (!s.length)
264 		return null;
265 
266 	auto p = cast(T*)memchr(s.ptr, d, s.length);
267 	if (!p)
268 		return [s];
269 
270 	size_t n;
271 	auto end = s.ptr + s.length;
272 	do
273 	{
274 		n++;
275 		p++;
276 		p = cast(T*) memchr(p, d, end-p);
277 	}
278 	while (p);
279 
280 	auto result = new T[][n+1];
281 	n = 0;
282 	auto start = s.ptr;
283 	p = cast(T*) memchr(start, d, s.length);
284 	do
285 	{
286 		result[n++] = start[0..p-start];
287 		start = ++p;
288 		p = cast(T*) memchr(p, d, end-p);
289 	}
290 	while (p);
291 	result[n] = start[0..end-start];
292 
293 	return result;
294 }
295 
296 /// Like `splitLines`, but does not attempt to split on Unicode line endings.
297 /// Only splits on `"\r"`, `"\n"`, and `"\r\n"`.
298 T[][] splitAsciiLines(T)(T[] text)
299 	if (is(Unqual!T == char))
300 {
301 	auto lines = text.fastSplit('\n');
302 	foreach (ref line; lines)
303 		if (line.length && line[$-1]=='\r')
304 			line = line[0..$-1];
305 	return lines;
306 }
307 
308 unittest
309 {
310 	assert(splitAsciiLines("a\nb\r\nc\r\rd\n\re\r\n\nf") == ["a", "b", "c\r\rd", "\re", "", "f"]);
311 	assert(splitAsciiLines(string.init) == splitLines(string.init));
312 }
313 
314 /// Like std.string.split (one argument version, which splits by
315 /// whitespace), but only splits by ASCII and does not autodecode.
316 T[][] asciiSplit(T)(T[] text)
317 	if (is(Unqual!T == char))
318 {
319 	bool inWhitespace = true;
320 	size_t wordStart;
321 	T[][] result;
322 
323 	void endWord(size_t p)
324 	{
325 		if (!inWhitespace)
326 		{
327 			result ~= text[wordStart..p];
328 			inWhitespace = true;
329 		}
330 	}
331 
332 	foreach (p, c; text)
333 		if (std.ascii.isWhite(c))
334 			endWord(p);
335 		else
336 			if (inWhitespace)
337 			{
338 				inWhitespace = false;
339 				wordStart = p;
340 			}
341 	endWord(text.length);
342 	return result;
343 }
344 
345 unittest
346 {
347 	foreach (s; ["", " ", "a", " a", "a ", "a b", " a b", "a b ", " a b ",
348 			"  ", "  a", "a  ", "a  b", "a  b  ", "a b  c"])
349 		assert(s.split == s.asciiSplit, format("Got %s, expected %s", s.asciiSplit, s.split));
350 }
351 
352 /// Like `strip`, but only removes ASCII whitespace.
353 T[] asciiStrip(T)(T[] s)
354 	if (is(Unqual!T == char))
355 {
356 	while (s.length && isWhite(s[0]))
357 		s = s[1..$];
358 	while (s.length && isWhite(s[$-1]))
359 		s = s[0..$-1];
360 	return s;
361 }
362 
363 ///
364 unittest
365 {
366 	string s = "Hello, world!";
367 	assert(asciiStrip(s) is s);
368 	assert(asciiStrip("\r\n\tHello ".dup) == "Hello");
369 }
370 
371 /// Covering slice-list of s with interleaved whitespace.
372 T[][] segmentByWhitespace(T)(T[] s)
373 	if (is(Unqual!T == char))
374 {
375 	if (!s.length)
376 		return null;
377 
378 	T[][] segments;
379 	bool wasWhite = isWhite(s[0]);
380 	size_t start = 0;
381 	foreach (p, char c; s)
382 	{
383 		bool isWhite = isWhite(c);
384 		if (isWhite != wasWhite)
385 			segments ~= s[start..p],
386 			start = p;
387 		wasWhite = isWhite;
388 	}
389 	segments ~= s[start..$];
390 
391 	return segments;
392 }
393 
394 /// Replaces runs of ASCII whitespace which contain a newline (`'\n'`) into a single space.
395 T[] newlinesToSpaces(T)(T[] s)
396 	if (is(Unqual!T == char))
397 {
398 	auto slices = segmentByWhitespace(s);
399 	foreach (ref slice; slices)
400 		if (slice.contains("\n"))
401 			slice = " ";
402 	return slices.join();
403 }
404 
405 /// Replaces all runs of ASCII whitespace with a single space.
406 ascii normalizeWhitespace(ascii s)
407 {
408 	auto slices = segmentByWhitespace(strip(s));
409 	foreach (i, ref slice; slices)
410 		if (i & 1) // odd
411 			slice = " ";
412 	return slices.join();
413 }
414 
415 ///
416 unittest
417 {
418 	assert(normalizeWhitespace(" Mary  had\ta\nlittle\r\n\tlamb") == "Mary had a little lamb");
419 }
420 
421 /// Splits out words from a camel-cased string.
422 /// All-uppercase words are returned as a single word.
423 string[] splitByCamelCase(string s)
424 {
425 	string[] result;
426 	size_t start = 0;
427 	foreach (i; 1..s.length+1)
428 		if (i == s.length
429 		 || (isLower(s[i-1]) && isUpper(s[i]))
430 		 || (i+1 < s.length && isUpper(s[i-1]) && isUpper(s[i]) && isLower(s[i+1]))
431 		)
432 		{
433 			result ~= s[start..i];
434 			start = i;
435 		}
436 	return result;
437 }
438 
439 ///
440 unittest
441 {
442 	assert(splitByCamelCase("parseIPString") == ["parse", "IP", "String"]);
443 	assert(splitByCamelCase("IPString") == ["IP", "String"]);
444 }
445 
446 /// Join an array of words into a camel-cased string.
447 string camelCaseJoin(string[] arr)
448 {
449 	if (!arr.length)
450 		return null;
451 	string result = arr[0];
452 	foreach (s; arr[1..$])
453 		result ~= std.ascii.toUpper(s[0]) ~ s[1..$];
454 	return result;
455 }
456 
457 unittest
458 {
459 	assert("parse-IP-string".split('-').camelCaseJoin() == "parseIPString");
460 }
461 
462 // ************************************************************************
463 
464 /// Like std.string.wrap, but preserves whitespace at line start and
465 /// between (non-wrapped) words.
466 string verbatimWrap(
467 	string s,
468 	size_t columns = 80,
469 	string firstIndent = null,
470 	string indent = null,
471 	size_t tabWidth = 8,
472 )
473 {
474 	if (!s.length)
475 		return s;
476 
477 	import std.uni : isWhite;
478 	import std.range;
479 
480 	// Result buffer. Append-only (contains only text which has been wrapped).
481 	string result;
482 	// Index in `s` corresponding to the end of `result`
483 	size_t start;
484 	// Index in `s` corresponding to after the last newline in `result`
485 	size_t lineStart;
486 	// Current column
487 	size_t col;
488 	// Was the previous character we looked at whitespace?
489 	bool wasWhite;
490 	// We need to add an indent at the next (non-newline) character.
491 	bool needIndent;
492 
493 	result = firstIndent;
494 	col = firstIndent.walkLength;
495 	auto indentWidth = indent.walkLength;
496 
497 	void flush(size_t pos)
498 	{
499 		if (col > columns && start > lineStart)
500 		{
501 			result ~= "\n" ~ indent;
502 			col = indentWidth;
503 
504 			// Consume whitespace at line break
505 			size_t numWhite;
506 			foreach (i, c; s[start .. $])
507 				if (isWhite(c))
508 					numWhite = i;
509 				else
510 					break;
511 			start += numWhite;
512 			lineStart = start;
513 		}
514 		result ~= s[start .. pos];
515 		start = pos;
516 	}
517 
518 	foreach (pos, dchar c; s)
519 	{
520 		auto atWhite = isWhite(c);
521 		if (atWhite && !wasWhite)
522 			flush(pos);
523 		if (c == '\n')
524 		{
525 			flush(pos);
526 			result ~= "\n";
527 			start++; // past newline
528 			lineStart = start;
529 			needIndent = true;
530 			col = 0;
531 		}
532 		else
533 		{
534 			if (needIndent)
535 			{
536 				assert(col == 0);
537 				result ~= indent;
538 				col += indentWidth;
539 				needIndent = false;
540 			}
541 			if (c == '\t')
542 				col += tabWidth;
543 			else
544 				col++;
545 		}
546 		wasWhite = atWhite;
547 	}
548 	flush(s.length);
549 	if (col)
550 		result ~= "\n"; // trailing newline
551 
552 	return result;
553 }
554 
555 // ************************************************************************
556 
557 /// Case-insensitive ASCII string.
558 alias CIAsciiString = NormalizedArray!(immutable(char), s => s.byCodeUnit.map!(std.ascii.toLower));
559 
560 ///
561 unittest
562 {
563 	CIAsciiString s = "test";
564 	assert(s == "TEST");
565 	assert(s >= "Test" && s <= "Test");
566 	assert(CIAsciiString("a") == CIAsciiString("A"));
567 	assert(CIAsciiString("a") != CIAsciiString("B"));
568 	assert(CIAsciiString("a") <  CIAsciiString("B"));
569 	assert(CIAsciiString("A") <  CIAsciiString("b"));
570 	assert(CIAsciiString("я") != CIAsciiString("Я"));
571 }
572 
573 import std.uni : toLower;
574 
575 /// Case-insensitive Unicode string.
576 alias CIUniString = NormalizedArray!(immutable(char), s => s.map!(toLower));
577 
578 ///
579 unittest
580 {
581 	CIUniString s = "привет";
582 	assert(s == "ПРИВЕТ");
583 	assert(s >= "Привет" && s <= "Привет");
584 	assert(CIUniString("я") == CIUniString("Я"));
585 	assert(CIUniString("а") != CIUniString("Б"));
586 	assert(CIUniString("а") <  CIUniString("Б"));
587 	assert(CIUniString("А") <  CIUniString("б"));
588 }
589 
590 // ************************************************************************
591 
592 import std.utf;
593 
594 /// Convert any data to a valid UTF-8 bytestream, so D's string functions can
595 /// properly work on it.
596 string rawToUTF8(in char[] s)
597 {
598 	auto d = new dchar[s.length];
599 	foreach (i, char c; s)
600 		d[i] = c;
601 	return toUTF8(d);
602 }
603 
604 /// Undo rawToUTF8.
605 ascii UTF8ToRaw(in char[] r) pure
606 {
607 	auto s = new char[r.length];
608 	size_t i = 0;
609 	foreach (dchar c; r)
610 	{
611 		assert(c < '\u0100');
612 		s[i++] = cast(char)c;
613 	}
614 	return s[0..i];
615 }
616 
617 unittest
618 {
619 	char[1] c;
620 	for (int i=0; i<256; i++)
621 	{
622 		c[0] = cast(char)i;
623 		assert(UTF8ToRaw(rawToUTF8(c[])) == c[], format("%s -> %s -> %s", cast(ubyte[])c[], cast(ubyte[])rawToUTF8(c[]), cast(ubyte[])UTF8ToRaw(rawToUTF8(c[]))));
624 	}
625 }
626 
627 /// Where a delegate with this signature is required.
628 string nullStringTransform(in char[] s) { return to!string(s); }
629 
630 /// Like readText, but with in-memory data.
631 /// Reverse of ae.utils.array.bytes (for strings).
632 inout(char)[] asText(inout(ubyte)[] bytes)
633 {
634 	auto s = cast(inout(char)[]) bytes;
635 	validate(s);
636 	return s;
637 }
638 
639 /// Lossily convert arbitrary data into a valid UTF-8 string.
640 string forceValidUTF8(ascii s)
641 {
642 	try
643 	{
644 		validate(s);
645 		return s;
646 	}
647 	catch (UTFException)
648 		return rawToUTF8(s);
649 }
650 
651 // ************************************************************************
652 
653 /// Return the slice up to the first NUL character,
654 /// or of the whole array if none is found.
655 C[] fromZArray(C, n)(ref C[n] arr)
656 {
657 	auto p = arr.representation.countUntil(0);
658 	return arr[0 .. p<0 ? $ : p];
659 }
660 
661 /// ditto
662 C[] fromZArray(C)(C[] arr)
663 {
664 	auto p = arr.representation.countUntil(0);
665 	return arr[0 .. p<0 ? $ : p];
666 }
667 
668 unittest
669 {
670 	char[4] arr = "ab\0d";
671 	assert(arr.fromZArray == "ab");
672 	arr[] = "abcd";
673 	assert(arr.fromZArray == "abcd");
674 }
675 
676 unittest
677 {
678 	string arr = "ab\0d";
679 	assert(arr.fromZArray == "ab");
680 	arr = "abcd";
681 	assert(arr.fromZArray == "abcd");
682 }
683 
684 // ************************************************************************
685 
686 /// Formats binary data as a hex dump (three-column layout consisting of hex
687 /// offset, byte values in hex, and printable low-ASCII characters).
688 string hexDump(const(void)[] b)
689 {
690 	auto data = cast(const(ubyte)[]) b;
691 	assert(data.length);
692 	size_t i=0;
693 	string s;
694 	while (i<data.length)
695 	{
696 		s ~= format("%08X:  ", i);
697 		foreach (x; 0..16)
698 		{
699 			if (i+x<data.length)
700 				s ~= format("%02X ", data[i+x]);
701 			else
702 				s ~= "   ";
703 			if (x==7)
704 				s ~= "| ";
705 		}
706 		s ~= "  ";
707 		foreach (x; 0..16)
708 		{
709 			if (i+x<data.length)
710 				if (data[i+x]==0)
711 					s ~= ' ';
712 				else
713 				if (data[i+x]<32 || data[i+x]>=128)
714 					s ~= '.';
715 				else
716 					s ~= cast(char)data[i+x];
717 			else
718 				s ~= ' ';
719 		}
720 		s ~= "\n";
721 		i += 16;
722 	}
723 	return s;
724 }
725 
726 import std.conv;
727 
728 /// Parses `s` as a hexadecimal number into an integer of type `T`.
729 T fromHex(T : ulong = uint, C)(const(C)[] s)
730 {
731 	T result = parse!T(s, 16);
732 	enforce(s.length==0, new ConvException("Could not parse entire string"));
733 	return result;
734 }
735 
736 /// Parses `hex` into an array of bytes.
737 /// `hex.length` should be even.
738 ubyte[] arrayFromHex(in char[] hex)
739 {
740 	auto buf = new ubyte[hex.length/2];
741 	arrayFromHex(hex, buf);
742 	return buf;
743 }
744 
745 /// Policy for `parseHexDigit`.
746 struct HexParseConfig
747 {
748 	bool checked = true; /// Throw on invalid digits.
749 	bool lower   = true; /// Accept lower-case digits.
750 	bool upper   = true; /// Accept upper-case digits.
751 }
752 
753 /// Parse a single hexadecimal digit according to the policy in `config`.
754 ubyte parseHexDigit(HexParseConfig config = HexParseConfig.init)(char c)
755 {
756 	static assert(config.lower || config.upper,
757 		"Must parse at least either lower or upper case digits");
758 	static if (config.checked)
759 	{
760 		switch (c)
761 		{
762 			case '0': .. case '9': return cast(ubyte)(c - '0');
763 			case 'a': .. case 'f': return cast(ubyte)(c - 'a' + 10);
764 			case 'A': .. case 'F': return cast(ubyte)(c - 'A' + 10);
765 			default: throw new Exception("Bad hex digit: " ~ c);
766 		}
767 	}
768 	else
769 	{
770 		if (c <= '9')
771 			return cast(ubyte)(c - '0');
772 		static if (config.lower && config.upper)
773 		{
774 			if (c < 'a')
775 				return cast(ubyte)(c - 'A' + 10);
776 			else
777 				return cast(ubyte)(c - 'a' + 10);
778 		}
779 		else
780 			static if (config.lower)
781 				return cast(ubyte)(c - 'a' + 10);
782 			else
783 				return cast(ubyte)(c - 'A' + 10);
784 	}
785 }
786 
787 /// Parses `hex` into the given array `buf`.
788 void arrayFromHex(HexParseConfig config = HexParseConfig.init)(in char[] hex, ubyte[] buf)
789 {
790 	assert(buf.length == hex.length/2, "Wrong buffer size for arrayFromHex");
791 	for (int i=0; i<hex.length; i+=2)
792 		buf[i/2] = cast(ubyte)(
793 			parseHexDigit!config(hex[i  ])*16 +
794 			parseHexDigit!config(hex[i+1])
795 		);
796 }
797 
798 /// Parses `hex` into the given array `buf`.
799 /// Fast version for static arrays of known length.
800 void sarrayFromHex(HexParseConfig config = HexParseConfig.init, size_t N, Hex)(ref const Hex hex, ref ubyte[N] buf)
801 if (is(Hex == char[N*2]))
802 {
803 	foreach (i; 0..N/4)
804 	{
805 		ulong chars = (cast(ulong*)hex.ptr)[i];
806 		uint res =
807 			(parseHexDigit!config((chars >> (8*0)) & 0xFF) << (4*1)) |
808 			(parseHexDigit!config((chars >> (8*1)) & 0xFF) << (4*0)) |
809 			(parseHexDigit!config((chars >> (8*2)) & 0xFF) << (4*3)) |
810 			(parseHexDigit!config((chars >> (8*3)) & 0xFF) << (4*2)) |
811 			(parseHexDigit!config((chars >> (8*4)) & 0xFF) << (4*5)) |
812 			(parseHexDigit!config((chars >> (8*5)) & 0xFF) << (4*4)) |
813 			(parseHexDigit!config((chars >> (8*6)) & 0xFF) << (4*7)) |
814 			(parseHexDigit!config((chars >> (8*7)) & 0xFF) << (4*6));
815 		(cast(uint*)buf.ptr)[i] = res;
816 	}
817 	foreach (i; N/4*4..N)
818 		buf[i] = cast(ubyte)(
819 			parseHexDigit!config(hex[i*2  ])*16 +
820 			parseHexDigit!config(hex[i*2+1])
821 		);
822 }
823 
824 unittest
825 {
826 	foreach (checked; TypeTuple!(false, true))
827 		foreach (lower; TypeTuple!(false, true))
828 			foreach (upper; TypeTuple!(false, true))
829 				static if (lower || upper)
830 				{
831 					enum config = HexParseConfig(checked, lower, upper);
832 					char[18] buf;
833 					foreach (n; 0..18)
834 						if (lower && upper ? n & 1 : upper)
835 							buf[n] = hexDigits[n % 16];
836 						else
837 							buf[n] = lowerHexDigits[n % 16];
838 					ubyte[9] res;
839 					sarrayFromHex!config(buf, res);
840 					assert(res == [0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01], text(res));
841 				}
842 }
843 
844 /// Conversion from bytes to hexadecimal strings.
845 template toHex(alias digits = hexDigits)
846 {
847 	/// Dynamic array version.
848 	char[] toHex(in ubyte[] data, char[] buf) pure
849 	{
850 		assert(buf.length == data.length*2);
851 		foreach (i, b; data)
852 		{
853 			buf[i*2  ] = digits[b>>4];
854 			buf[i*2+1] = digits[b&15];
855 		}
856 		return buf;
857 	}
858 
859 	/// Static array version.
860 	char[n*2] toHex(size_t n)(in ubyte[n] data) pure
861 	{
862 		char[n*2] buf;
863 		foreach (i, b; data)
864 		{
865 			buf[i*2  ] = digits[b>>4];
866 			buf[i*2+1] = digits[b&15];
867 		}
868 		return buf;
869 	}
870 
871 	/// Allocating version.
872 	string toHex(in ubyte[] data) pure
873 	{
874 		auto buf = new char[data.length*2];
875 		foreach (i, b; data)
876 		{
877 			buf[i*2  ] = digits[b>>4];
878 			buf[i*2+1] = digits[b&15];
879 		}
880 		return buf;
881 	}
882 }
883 
884 alias toLowerHex = toHex!lowerHexDigits; /// ditto
885 
886 /// Conversion an integer type to a fixed-length hexadecimal string.
887 void toHex(T : ulong, size_t U = T.sizeof*2)(T n, ref char[U] buf)
888 {
889 	Unqual!T x = n;
890 	foreach (i; Reverse!(RangeTuple!(T.sizeof*2)))
891 	{
892 		buf[i] = hexDigits[x & 0xF];
893 		x >>= 4;
894 	}
895 }
896 
897 unittest
898 {
899 	ubyte[] bytes = [0x12, 0x34];
900 	assert(toHex(bytes) == "1234");
901 }
902 
903 unittest
904 {
905 	ubyte[] bytes = [0x12, 0x34];
906 	char[] buf = new char[4];
907 	toHex(bytes, buf);
908 	assert(buf == "1234");
909 }
910 
911 unittest
912 {
913 	char[8] buf;
914 	toHex(0x01234567, buf);
915 	assert(buf == "01234567");
916 }
917 
918 /// ditto
919 char[T.sizeof*2] toHex(T : ulong)(T n)
920 {
921 	char[T.sizeof*2] buf;
922 	toHex(n, buf);
923 	return buf;
924 }
925 
926 unittest
927 {
928 	assert(toHex(0x01234567) == "01234567");
929 }
930 
931 unittest
932 {
933 	ubyte[2] bytes = [0x12, 0x34];
934 	auto buf = bytes.toLowerHex();
935 	static assert(buf.length == 4);
936 	assert(buf == "1234");
937 }
938 
939 /// How many significant decimal digits does a FP type have
940 /// (determined empirically - valid for all D FP types on x86/64)
941 enum significantDigits(T : real) = 2 + 2 * T.sizeof;
942 
943 /// Format string for a FP type which includes all necessary
944 /// significant digits
945 enum fpFormatString(T) = "%." ~ text(significantDigits!T) ~ "g";
946 private template cWidthString(T)
947 {
948 	static if (is(Unqual!T == float))
949 		enum cWidthString = "";
950 	else
951 	static if (is(Unqual!T == double))
952 		enum cWidthString = "l";
953 	else
954 	static if (is(Unqual!T == real))
955 		enum cWidthString = "L";
956 }
957 /// C format string to exactly format a floating-point type `T`.
958 enum fpCFormatString(T) = "%." ~ text(significantDigits!T) ~ cWidthString!T ~ "g";
959 
960 private auto safeSprintf(size_t N, Args...)(ref char[N] buf, auto ref Args args) @trusted @nogc
961 {
962 	return snprintf(buf.ptr, N, args);
963 }
964 
965 private auto fpToBuf(Q)(Q val) @safe nothrow @nogc
966 {
967 	alias F = Unqual!Q;
968 
969 	/// Bypass FPU register, which may contain a different precision
970 	static F forceType(F d) { static F n; n = d; return n; }
971 
972 	enum isReal = is(F == real);
973 
974 	StaticBuf!(char, 64) buf = void;
975 
976 	// MSVC workaround from std.format:
977 	version (CRuntime_Microsoft)
978 	{
979 		import std.math : isNaN, isInfinity;
980 		immutable double v = val; // convert early to get "inf" in case of overflow
981 		{
982 			string s;
983 			if (isNaN(v))
984 				s = "nan"; // snprintf writes 1.#QNAN
985 			else if (isInfinity(v))
986 				s = val >= 0 ? "inf" : "-inf"; // snprintf writes 1.#INF
987 			else
988 				goto L1;
989 			buf.buf[0..s.length] = s;
990 			buf.pos = s.length;
991 			return buf;
992 		L1:
993 		}
994 	}
995 	else
996 		alias v = val;
997 
998 	buf.pos = safeSprintf(buf.buf, &fpCFormatString!F[0], forceType(v));
999 	char[] s = buf.data();
1000 
1001 	F parse(char[] s)
1002 	{
1003 		F f;
1004 		auto res = tryParse(s, f);
1005 		assert(res, "Failed to parse number we created");
1006 		assert(!s.length, "Failed to completely parse number we created");
1007 		return f;
1008 	}
1009 
1010 	if (s != "nan" && s != "-nan" && s != "inf" && s != "-inf")
1011 	{
1012 		if (forceType(parse(s)) != v)
1013 		{
1014 			static if (isReal)
1015 			{
1016 				// Something funny with DM libc real parsing... e.g. 0.6885036635121051783
1017 				return buf;
1018 			}
1019 			else
1020 			//	assert(false, "Initial conversion fails: " ~ format(fpFormatString!F, parse(s)) ~ " / " ~ s);
1021 				assert(false, "Initial conversion fails");
1022 		}
1023 
1024 		foreach_reverse (i; 1..s.length)
1025 			if (s[i]>='0' && s[i]<='8')
1026 			{
1027 				s[i]++;
1028 				if (forceType(parse(s[0..i+1]))==v)
1029 					s = s[0..i+1];
1030 				else
1031 					s[i]--;
1032 			}
1033 		while (s.length>2 && s[$-1]!='.' && forceType(parse(s[0..$-1]))==v)
1034 			s = s[0..$-1];
1035 	}
1036 	buf.pos = s.length;
1037 	return buf;
1038 }
1039 
1040 /// Get shortest string representation of a FP type that still converts to exactly the same number.
1041 template fpToString(F)
1042 {
1043 	string fpToString(F v) @safe nothrow
1044 	{
1045 		return fpToBuf(v).data.idup;
1046 	}
1047 
1048 	static if (!is(Unqual!F == real))
1049 	unittest
1050 	{
1051 		union U
1052 		{
1053 			ubyte[F.sizeof] bytes;
1054 			Unqual!F d;
1055 			string toString() const { return (fpFormatString!F ~ " %a [%(%02X %)]").format(d, d, bytes[]); }
1056 		}
1057 		import std.random : Xorshift, uniform;
1058 		import std.stdio : stderr;
1059 		Xorshift rng;
1060 		foreach (n; 0..10000)
1061 		{
1062 			U u;
1063 			foreach (ref b; u.bytes[])
1064 				b = uniform!ubyte(rng);
1065 			static if (is(Unqual!F == real))
1066 				u.bytes[7] |= 0x80; // require normalized value
1067 			scope(failure) stderr.writeln("Input:\t", u);
1068 			auto s = fpToString(u.d);
1069 			scope(failure) stderr.writeln("Result:\t", s);
1070 			if (s == "nan" || s == "-nan")
1071 				continue; // there are many NaNs...
1072 			U r;
1073 			r.d = to!F(s);
1074 			assert(r.bytes == u.bytes,
1075 				"fpToString mismatch:\nOutput:\t%s".format(r));
1076 		}
1077 	}
1078 }
1079 
1080 alias doubleToString = fpToString!double; ///
1081 
1082 unittest
1083 {
1084 	alias floatToString = fpToString!float;
1085 	alias realToString = fpToString!real;
1086 	alias crealToString = fpToString!(const(real));
1087 }
1088 
1089 /// Like `fpToString`, but writes the result to a sink.
1090 void putFP(Writer, F)(auto ref Writer writer, F v)
1091 {
1092 	writer.put(fpToBuf(v).data);
1093 }
1094 
1095 
1096 /// Wraps the result of `fpToString` in a non-allocating stringifiable struct.
1097 struct FPAsString(T)
1098 {
1099 	private typeof(fpToBuf(T.init)) buf;
1100 
1101 	this(T f)
1102 	{
1103 		buf = fpToBuf(f);
1104 	} ///
1105 
1106 	string toString() const pure nothrow
1107 	{
1108 		return buf.data.idup;
1109 	} ///
1110 
1111 	void toString(W)(ref W w) const
1112 	{
1113 		static if (is(typeof(w.put(buf.data))))
1114 			w.put(buf.data);
1115 		else
1116 			foreach (c; buf.data)
1117 				w.put(c);
1118 	} ///
1119 }
1120 FPAsString!T fpAsString(T)(T f) { return FPAsString!T(f); } /// ditto
1121 
1122 @safe //nothrow @nogc
1123 unittest
1124 {
1125 	StaticBuf!(char, 1024) buf;
1126 	buf.formattedWrite!"%s"(fpAsString(0.1));
1127 	assert(buf.data == "0.1");
1128 }
1129 
1130 /// Get shortest string representation of a numeric
1131 /// type that still converts to exactly the same number.
1132 string numberToString(T)(T v)
1133 	if (isNumeric!T)
1134 {
1135 	static if (is(T : ulong))
1136 		return toDec(v);
1137 	else
1138 		return fpToString(v);
1139 }
1140 
1141 // ************************************************************************
1142 
1143 /// Simpler implementation of Levenshtein string distance
1144 int stringDistance(string s, string t)
1145 {
1146 	int n = cast(int)s.length;
1147 	int m = cast(int)t.length;
1148 	if (n == 0) return m;
1149 	if (m == 0) return n;
1150 	int[][] distance = new int[][](n+1, m+1); // matrix
1151 	int cost=0;
1152 	//init1
1153 	foreach (i; 0..n+1) distance[i][0]=i;
1154 	foreach (j; 0..m+1) distance[0][j]=j;
1155 	//find min distance
1156 	foreach (i; 1..n+1)
1157 		foreach (j; 1..m+1)
1158 		{
1159 			cost = t[j-1] == s[i-1] ? 0 : 1;
1160 			distance[i][j] = min(
1161 				distance[i-1][j  ] + 1,
1162 				distance[i  ][j-1] + 1,
1163 				distance[i-1][j-1] + cost
1164 			);
1165 		}
1166 	return distance[n][m];
1167 }
1168 
1169 /// Return a number between 0.0 and 1.0 indicating how similar two strings are
1170 /// (1.0 if identical)
1171 float stringSimilarity(string string1, string string2)
1172 {
1173 	float dis = stringDistance(string1, string2);
1174 	float maxLen = string1.length;
1175 	if (maxLen < string2.length)
1176 		maxLen = string2.length;
1177 	if (maxLen == 0)
1178 		return 1;
1179 	else
1180 		return 1f - dis/maxLen;
1181 }
1182 
1183 /// Select best match from a list of items.
1184 /// Returns -1 if none are above the threshold.
1185 sizediff_t findBestMatch(in string[] items, string target, float threshold = 0.7)
1186 {
1187 	sizediff_t found = -1;
1188 	float best = 0;
1189 
1190 	foreach (i, item; items)
1191 	{
1192 		float match = stringSimilarity(toLower(item),toLower(target));
1193 		if (match>threshold && match>=best)
1194 		{
1195 			best = match;
1196 			found = i;
1197 		}
1198 	}
1199 
1200 	return found;
1201 }
1202 
1203 /// Select best match from a list of items.
1204 /// Returns null if none are above the threshold.
1205 string selectBestFrom(in string[] items, string target, float threshold = 0.7)
1206 {
1207 	auto index = findBestMatch(items, target, threshold);
1208 	return index < 0 ? null : items[index];
1209 }
1210 
1211 // ************************************************************************
1212 
1213 /// Generate a random string with the given parameters.
1214 /// `std.random` is used as the source of randomness.
1215 /// Not cryptographically secure.
1216 string randomString()(int length=20, string chars="abcdefghijklmnopqrstuvwxyz")
1217 {
1218 	import std.random;
1219 	import std.range;
1220 
1221 	return length.iota.map!(n => chars[uniform(0, $)]).array;
1222 }