1 /**
2  * Light read-only XML library
3  * May be deprecated in the future.
4  * See other XML modules for better implementations.
5  *
6  * License:
7  *   This Source Code Form is subject to the terms of
8  *   the Mozilla Public License, v. 2.0. If a copy of
9  *   the MPL was not distributed with this file, You
10  *   can obtain one at http://mozilla.org/MPL/2.0/.
11  *
12  * Authors:
13  *   Vladimir Panteleev <vladimir@thecybershadow.net>
14  *   Simon Arlott
15  */
16 
17 module ae.utils.xmllite;
18 
19 // TODO: better/safer handling of malformed XML
20 
21 import std.string;
22 import std.ascii;
23 import std.exception;
24 
25 import ae.utils.array;
26 import ae.utils.exception;
27 import ae.utils.xmlwriter;
28 
29 // ************************************************************************
30 
31 /// std.stream.Stream-like type with bonus speed
32 private struct StringStream
33 {
34 	string s;
35 	size_t position;
36 
37 	@disable this();
38 	@disable this(this);
39 	this(string s)
40 	{
41 		enum ditch = "'\">\0\0\0\0\0"; // Dirty precaution
42 		this.s = (s ~ ditch)[0..$-ditch.length];
43 	}
44 
45 	char read() { return s[position++]; }
46 	@property size_t size() { return s.length; }
47 }
48 
49 // ************************************************************************
50 
51 mixin DeclareException!q{XmlParseException};
52 
53 enum XmlNodeType
54 {
55 	None,
56 	Root,
57 	Node,
58 	Comment,
59 	Meta,
60 	DocType,
61 	CData,
62 	Text
63 }
64 
65 alias XmlAttributes = OrderedMap!(string, string);
66 
67 class XmlNode
68 {
69 	string tag;
70 	XmlAttributes attributes;
71 	XmlNode parent;
72 	XmlNode[] children;
73 	XmlNodeType type;
74 	ulong startPos, endPos;
75 
76 	this(ref StringStream s) { parseInto!XmlParseConfig(this, s, null); }
77 	this(string s) { auto ss = StringStream(s); this(ss); }
78 
79 	this(XmlNodeType type = XmlNodeType.None, string tag = null)
80 	{
81 		this.type = type;
82 		this.tag = tag;
83 	}
84 
85 	XmlNode addAttribute(string name, string value)
86 	{
87 		attributes[name] = value;
88 		return this;
89 	}
90 
91 	XmlNode addChild(XmlNode child)
92 	{
93 		child.parent = this;
94 		children ~= child;
95 		return this;
96 	}
97 
98 	override string toString() const
99 	{
100 		XmlWriter writer;
101 		writeTo(writer);
102 		return writer.output.get();
103 	}
104 
105 	string toPrettyString() const
106 	{
107 		PrettyXmlWriter writer;
108 		writeTo(writer);
109 		return writer.output.get();
110 	}
111 
112 	final void writeTo(XmlWriter)(ref XmlWriter output) const
113 	{
114 		void writeChildren()
115 		{
116 			foreach (child; children)
117 				child.writeTo(output);
118 		}
119 
120 		void writeAttributes()
121 		{
122 			foreach (key, value; attributes)
123 				output.addAttribute(key, value);
124 		}
125 
126 		final switch (type)
127 		{
128 			case XmlNodeType.None:
129 				assert(false);
130 			case XmlNodeType.Root:
131 				writeChildren();
132 				return;
133 			case XmlNodeType.Node:
134 				output.startTagWithAttributes(tag);
135 				writeAttributes();
136 				if (children.length)
137 				{
138 					bool oneLine = children.length == 1 && children[0].type == XmlNodeType.Text;
139 					if (oneLine)
140 						output.formatter.enabled = false;
141 					output.endAttributes();
142 					writeChildren();
143 					output.endTag(tag);
144 					if (oneLine)
145 					{
146 						output.formatter.enabled = true;
147 						output.newLine();
148 					}
149 				}
150 				else
151 					output.endAttributesAndTag();
152 				return;
153 			case XmlNodeType.Meta:
154 				assert(children.length == 0);
155 				output.startPI(tag);
156 				writeAttributes();
157 				output.endPI();
158 				return;
159 			case XmlNodeType.DocType:
160 				assert(children.length == 0);
161 				output.doctype(tag);
162 				return;
163 			case XmlNodeType.Text:
164 				output.startLine();
165 				output.text(tag);
166 				output.newLine();
167 				return;
168 			case XmlNodeType.Comment:
169 				output.startLine();
170 				output.comment(tag);
171 				return;
172 			case XmlNodeType.CData:
173 				output.text(tag);
174 				return;
175 		}
176 	}
177 
178 	@property string text()
179 	{
180 		final switch (type)
181 		{
182 			case XmlNodeType.None:
183 				assert(false);
184 			case XmlNodeType.Text:
185 			case XmlNodeType.CData:
186 				return tag;
187 			case XmlNodeType.Node:
188 			case XmlNodeType.Root:
189 				string result;
190 				if (tag == "br")
191 					result = "\n";
192 				foreach (child; children)
193 					result ~= child.text();
194 				return result;
195 			case XmlNodeType.Comment:
196 			case XmlNodeType.Meta:
197 			case XmlNodeType.DocType:
198 				return null;
199 		}
200 	}
201 
202 	final XmlNode findChild(string tag)
203 	{
204 		foreach (child; children)
205 			if (child.type == XmlNodeType.Node && child.tag == tag)
206 				return child;
207 		return null;
208 	}
209 
210 	final XmlNode[] findChildren(string tag)
211 	{
212 		XmlNode[] result;
213 		foreach (child; children)
214 			if (child.type == XmlNodeType.Node && child.tag == tag)
215 				result ~= child;
216 		return result;
217 	}
218 
219 	final XmlNode opIndex(string tag)
220 	{
221 		auto node = findChild(tag);
222 		if (node is null)
223 			throw new XmlParseException("No such child: " ~ tag);
224 		return node;
225 	}
226 
227 	final XmlNode opIndex(string tag, size_t index)
228 	{
229 		auto nodes = findChildren(tag);
230 		if (index >= nodes.length)
231 			throw new XmlParseException(format("Can't get node with tag %s and index %d, there are only %d children with that tag", tag, index, nodes.length));
232 		return nodes[index];
233 	}
234 
235 	final ref XmlNode opIndex(size_t index)
236 	{
237 		return children[index];
238 	}
239 
240 	final @property size_t length() { return children.length; }
241 	alias opDollar = length;
242 
243 	int opApply(int delegate(ref XmlNode) dg)
244 	{
245 		int result = 0;
246 
247 		for (int i = 0; i < children.length; i++)
248 		{
249 			result = dg(children[i]);
250 			if (result)
251 				break;
252 		}
253 		return result;
254 	}
255 
256 	final @property XmlNode dup()
257 	{
258 		auto result = new XmlNode(type, tag);
259 		result.attributes = attributes.dup;
260 		result.children.reserve(children.length);
261 		foreach (child; children)
262 			result.addChild(child.dup);
263 		return result;
264 	}
265 }
266 
267 class XmlDocument : XmlNode
268 {
269 	this()
270 	{
271 		super(XmlNodeType.Root);
272 		tag = "<Root>";
273 	}
274 
275 	this(ref StringStream s) { this(); parseInto!XmlParseConfig(this, s); }
276 	this(string s) { auto ss = StringStream(s); this(ss); }
277 }
278 
279 /// The logic for how to handle a node's closing tags.
280 enum NodeCloseMode
281 {
282 	/// This element must always have an explicit closing tag
283 	/// (or a self-closing tag). An unclosed tag will lead to
284 	/// a parse error.
285 	/// In XML, all tags are "always".
286 	always,
287 /*
288 	/// Close tags are optional. When an element with a tag is
289 	/// encountered directly under an element with the same tag,
290 	/// it is assumed that the first element is closed before
291 	/// the second, so the two are siblings, not parent/child.
292 	/// Thus, `<p>a<p>b</p>` is parsed as `<p>a</p><p>b</p>`,
293 	/// not `<p>a<p>b</p></p>`, however `<p>a<div><p>b</div>` is
294 	/// still parsed as `<p>a<div><p>b</p></div></p>`.
295 	/// This mode can be used for relaxed HTML parsing.
296 	optional,
297 */
298 	/// Close tags are optional, but are implied when absent.
299 	/// As a result, these elements cannot have any content,
300 	/// and any close tags must be adjacent to the open tag.
301 	implicit,
302 
303 	/// This element is void and must never have a closing tag.
304 	/// It is always implicitly closed right after opening.
305 	/// A close tag is always an error.
306 	/// This mode can be used for strict parsing of HTML5 void
307 	/// elements.
308 	never,
309 }
310 
311 /// Configuration for parsing XML.
312 struct XmlParseConfig
313 {
314 static:
315 	NodeCloseMode nodeCloseMode(string tag) { return NodeCloseMode.always; }
316 	bool preserveWhitespace(string tag) { return false; }
317 	enum optionalParameterValues = false;
318 }
319 
320 /// Configuration for strict parsing of HTML5.
321 /// All void tags must never be closed, and all
322 /// non-void tags must always be explicitly closed.
323 /// Attributes must still be quoted like in XML.
324 struct Html5StrictParseConfig
325 {
326 static:
327 	immutable voidElements = [
328 		"area"   , "base"  , "br"   , "col" ,
329 		"command", "embed" , "hr"   , "img" ,
330 		"input"  , "keygen", "link" , "meta",
331 		"param"  , "source", "track", "wbr" ,
332 	];
333 
334 	NodeCloseMode nodeCloseMode(string tag)
335 	{
336 		return tag.isOneOf(voidElements)
337 			? NodeCloseMode.never
338 			: NodeCloseMode.always
339 		;
340 	}
341 
342 	enum optionalParameterValues = true;
343 	bool preserveWhitespace(string tag) { return false; /*TODO*/ }
344 }
345 
346 /// Parse an SGML-ish string into an XmlNode
347 alias parse = parseString!XmlNode;
348 
349 /// Parse an SGML-ish StringStream into an XmlDocument
350 alias parseDocument = parseString!XmlDocument;
351 
352 alias xmlParse = parseDocument!XmlParseConfig;
353 
354 private:
355 
356 public // alias
357 template parseString(Node)
358 {
359 	Node parseString(Config)(string s)
360 	{
361 		auto ss = StringStream(s);
362 		alias f = parseStream!Node;
363 		return f!Config(ss);
364 	}
365 }
366 
367 template parseStream(Node)
368 {
369 	Node parseStream(Config)(ref StringStream s)
370 	{
371 		auto n = new Node;
372 		parseInto!Config(n, s);
373 		return n;
374 	}
375 }
376 
377 alias parseNode = parseStream!XmlNode;
378 
379 /// Parse an SGML-ish StringStream into an XmlDocument
380 void parseInto(Config)(XmlDocument d, ref StringStream s)
381 {
382 	skipWhitespace(s);
383 	while (s.position < s.size)
384 		try
385 		{
386 			auto n = new XmlNode;
387 			parseInto!Config(n, s, null);
388 			d.addChild(n);
389 			skipWhitespace(s);
390 		}
391 		catch (XmlParseException e)
392 		{
393 			import std.algorithm.searching;
394 			import std.range : retro;
395 
396 			auto head = s.s[0..s.position];
397 			auto row    = head.representation.count('\n');
398 			auto column = head.representation.retro.countUntil('\n');
399 			if (column < 0)
400 				column = head.length;
401 			throw new XmlParseException("Error at %d:%d (offset %d)".format(
402 				1 + row,
403 				1 + column,
404 				head.length,
405 			), e);
406 		}
407 }
408 
409 /// Parse an SGML-ish StringStream into an XmlNode
410 void parseInto(Config)(XmlNode node, ref StringStream s, string parentTag = null, bool preserveWhitespace = false)
411 {
412 	char c;
413 
414 	preserveWhitespace |= Config.preserveWhitespace(parentTag);
415 	if (preserveWhitespace)
416 		c = s.read();
417 	else
418 		do
419 			c = s.read();
420 		while (isWhiteChar[c]);
421 
422 	node.startPos = s.position;
423 	if (c!='<')  // text node
424 	{
425 		node.type = XmlNodeType.Text;
426 		string text;
427 		while (c!='<')
428 		{
429 			// TODO: check for EOF
430 			text ~= c;
431 			c = s.read();
432 		}
433 		s.position--; // rewind to '<'
434 		if (!preserveWhitespace)
435 			while (text.length && isWhiteChar[text[$-1]])
436 				text = text[0..$-1];
437 		node.tag = decodeEntities(text);
438 		//tag = tag.strip();
439 	}
440 	else
441 	{
442 		c = s.read();
443 		if (c=='!')
444 		{
445 			c = s.read();
446 			if (c == '-') // comment
447 			{
448 				expect(s, '-');
449 				node.type = XmlNodeType.Comment;
450 				string tag;
451 				do
452 				{
453 					c = s.read();
454 					tag ~= c;
455 				} while (tag.length<3 || tag[$-3..$] != "-->");
456 				tag = tag[0..$-3];
457 				node.tag = tag;
458 			}
459 			else
460 			if (c == '[') // CDATA
461 			{
462 				foreach (x; "CDATA[")
463 					expect(s, x);
464 				node.type = XmlNodeType.CData;
465 				string tag;
466 				do
467 				{
468 					c = s.read();
469 					tag ~= c;
470 				} while (tag.length<3 || tag[$-3..$] != "]]>");
471 				tag = tag[0..$-3];
472 				node.tag = tag;
473 			}
474 			else // doctype, etc.
475 			{
476 				node.type = XmlNodeType.DocType;
477 				while (c != '>')
478 				{
479 					node.tag ~= c;
480 					c = s.read();
481 				}
482 			}
483 		}
484 		else
485 		if (c=='?')
486 		{
487 			node.type = XmlNodeType.Meta;
488 			node.tag = readWord(s);
489 			if (node.tag.length==0) throw new XmlParseException("Invalid tag");
490 			while (true)
491 			{
492 				skipWhitespace(s);
493 				if (peek(s)=='?')
494 					break;
495 				readAttribute!Config(node, s);
496 			}
497 			c = s.read();
498 			expect(s, '>');
499 		}
500 		else
501 		if (c=='/')
502 			throw new XmlParseException("Unexpected close tag");
503 		else
504 		{
505 			node.type = XmlNodeType.Node;
506 			node.tag = c~readWord(s);
507 			while (true)
508 			{
509 				skipWhitespace(s);
510 				c = peek(s);
511 				if (c=='>' || c=='/')
512 					break;
513 				readAttribute!Config(node, s);
514 			}
515 			c = s.read();
516 
517 			auto closeMode = Config.nodeCloseMode(node.tag);
518 			if (closeMode == NodeCloseMode.never)
519 				enforce!XmlParseException(c=='>', "Self-closing void tag <%s>".format(node.tag));
520 			else
521 			if (closeMode == NodeCloseMode.implicit)
522 			{
523 				if (c == '/')
524 					expect(s, '>');
525 			}
526 			else
527 			{
528 				if (c=='>')
529 				{
530 					while (true)
531 					{
532 						while (true)
533 						{
534 							if (!preserveWhitespace && !Config.preserveWhitespace(node.tag))
535 								skipWhitespace(s);
536 							if (peek(s)=='<' && peek(s, 2)=='/')
537 								break;
538 							try
539 							{
540 								auto child = new XmlNode;
541 								parseInto!Config(child, s, node.tag, preserveWhitespace);
542 								node.addChild(child);
543 							}
544 							catch (XmlParseException e)
545 								throw new XmlParseException("Error while processing child of "~node.tag, e);
546 						}
547 						expect(s, '<');
548 						expect(s, '/');
549 						auto word = readWord(s);
550 						if (word != node.tag)
551 						{
552 							auto closeMode2 = Config.nodeCloseMode(word);
553 							if (closeMode2 == NodeCloseMode.implicit)
554 							{
555 								auto parent = node.parent;
556 								enforce!XmlParseException(parent, "Top-level close tag for implicitly-closed node </%s>".format(word));
557 								enforce!XmlParseException(parent.children.length, "First-child close tag for implicitly-closed node </%s>".format(word));
558 								enforce!XmlParseException(parent.children[$-1].tag == word, "Non-empty implicitly-closed node <%s>".format(word));
559 								continue;
560 							}
561 							else
562 								enforce!XmlParseException(word == node.tag, "Expected </%s>, not </%s>".format(node.tag, word));
563 						}
564 						expect(s, '>');
565 						break;
566 					}
567 				}
568 				else // '/'
569 					expect(s, '>');
570 			}
571 		}
572 	}
573 	node.endPos = s.position;
574 }
575 
576 private:
577 
578 void readAttribute(Config)(XmlNode node, ref StringStream s)
579 {
580 	string name = readWord(s);
581 	if (name.length==0) throw new XmlParseException("Invalid attribute");
582 	skipWhitespace(s);
583 
584 	static if (Config.optionalParameterValues)
585 	{
586 		if (peek(s) != '=')
587 		{
588 			node.attributes[name] = null;
589 			return;
590 		}
591 	}
592 
593 	expect(s, '=');
594 	skipWhitespace(s);
595 	char delim;
596 	delim = s.read();
597 	if (delim != '\'' && delim != '"')
598 		throw new XmlParseException("Expected ' or \", not %s".format(delim));
599 	string value = readUntil(s, delim);
600 	node.attributes[name] = decodeEntities(value);
601 }
602 
603 char peek(ref StringStream s, int n=1)
604 {
605 	return s.s[s.position + n - 1];
606 }
607 
608 void skipWhitespace(ref StringStream s)
609 {
610 	while (isWhiteChar[s.s.ptr[s.position]])
611 		s.position++;
612 }
613 
614 __gshared bool[256] isWhiteChar, isWordChar;
615 
616 shared static this()
617 {
618 	foreach (c; 0..256)
619 	{
620 		isWhiteChar[c] = isWhite(c);
621 		isWordChar[c] = c=='-' || c=='_' || c==':' || isAlphaNum(c);
622 	}
623 }
624 
625 string readWord(ref StringStream stream)
626 {
627 	auto start = stream.s.ptr + stream.position;
628 	auto end = stream.s.ptr + stream.s.length;
629 	auto p = start;
630 	while (p < end && isWordChar[*p])
631 		p++;
632 	auto len = p-start;
633 	stream.position += len;
634 	return start[0..len];
635 }
636 
637 void expect(ref StringStream s, char c)
638 {
639 	char c2;
640 	c2 = s.read();
641 	enforce!XmlParseException(c==c2, "Expected " ~ c ~ ", got " ~ c2);
642 }
643 
644 string readUntil(ref StringStream s, char until)
645 {
646 	auto start = s.s.ptr + s.position;
647 	auto p = start;
648 	while (*p != until) p++;
649 	auto len = p-start;
650 	s.position += len + 1;
651 	return start[0..len];
652 }
653 
654 unittest
655 {
656 	enum xmlText =
657 		`<?xml version="1.0" encoding="UTF-8"?>` ~
658 		`<quotes>` ~
659 			`<quote author="Alan Perlis">` ~
660 				`When someone says, "I want a programming language in which I need only say what I want done," give him a lollipop.` ~
661 			`</quote>` ~
662 		`</quotes>`;
663 	auto doc = new XmlDocument(xmlText);
664 	assert(doc.toString() == xmlText, doc.toString());
665 }
666 
667 unittest
668 {
669 	string testOne(bool preserve)(string s)
670 	{
671 		static struct ParseConfig
672 		{
673 		static:
674 			NodeCloseMode nodeCloseMode(string tag) { return XmlParseConfig.nodeCloseMode(tag); }
675 			bool preserveWhitespace(string tag) { return preserve; }
676 			enum optionalParameterValues = XmlParseConfig.optionalParameterValues;
677 		}
678 		auto node = new XmlNode;
679 		auto str = StringStream("<tag>" ~ s ~ "</tag>");
680 		parseInto!ParseConfig(node, str, null);
681 		// import std.stdio; writeln(preserve, ": ", str.s, " -> ", node.toString);
682 		return node.children.length ? node.children[0].tag : null;
683 	}
684 
685 	foreach (tag; ["a", " a", "a ", " a ", " a  a ", " ", ""])
686 	{
687 		assert(testOne!false(tag) == strip(tag),
688 			"Parsing <tag>" ~ tag ~ "</tag> while not preserving whitespace, expecting '" ~ strip(tag) ~ "', got '" ~ testOne!false(tag) ~ "'");
689 		assert(testOne!true(tag) == tag,
690 			"Parsing <tag>" ~ tag ~ "</tag> while preserving whitespace, expecting '" ~ tag ~ "', got '" ~ testOne!true(tag) ~ "'");
691 	}
692 }
693 
694 unittest
695 {
696 	static struct ParseConfig
697 	{
698 	static:
699 		NodeCloseMode nodeCloseMode(string tag) { return XmlParseConfig.nodeCloseMode(tag); }
700 		bool preserveWhitespace(string tag) { return tag == "a"; }
701 		enum optionalParameterValues = XmlParseConfig.optionalParameterValues;
702 	}
703 	auto node = new XmlNode;
704 	auto str = StringStream("<a><b> foo </b></a>");
705 	parseInto!ParseConfig(node, str, null);
706 	assert(node.children[0].children[0].tag == " foo ");
707 }
708 
709 const dchar[string] entities;
710 /*const*/ string[dchar] entityNames;
711 shared static this()
712 {
713 	entities =
714 	[
715 		"quot" : '\&quot;',
716 		"amp" : '\&amp;',
717 		"lt" : '\&lt;',
718 		"gt" : '\&gt;',
719 
720 		"OElig" : '\&OElig;',
721 		"oelig" : '\&oelig;',
722 		"Scaron" : '\&Scaron;',
723 		"scaron" : '\&scaron;',
724 		"Yuml" : '\&Yuml;',
725 		"circ" : '\&circ;',
726 		"tilde" : '\&tilde;',
727 		"ensp" : '\&ensp;',
728 		"emsp" : '\&emsp;',
729 		"thinsp" : '\&thinsp;',
730 		"zwnj" : '\&zwnj;',
731 		"zwj" : '\&zwj;',
732 		"lrm" : '\&lrm;',
733 		"rlm" : '\&rlm;',
734 		"ndash" : '\&ndash;',
735 		"mdash" : '\&mdash;',
736 		"lsquo" : '\&lsquo;',
737 		"rsquo" : '\&rsquo;',
738 		"sbquo" : '\&sbquo;',
739 		"ldquo" : '\&ldquo;',
740 		"rdquo" : '\&rdquo;',
741 		"bdquo" : '\&bdquo;',
742 		"dagger" : '\&dagger;',
743 		"Dagger" : '\&Dagger;',
744 		"permil" : '\&permil;',
745 		"lsaquo" : '\&lsaquo;',
746 		"rsaquo" : '\&rsaquo;',
747 		"euro" : '\&euro;',
748 
749 		"nbsp" : '\&nbsp;',
750 		"iexcl" : '\&iexcl;',
751 		"cent" : '\&cent;',
752 		"pound" : '\&pound;',
753 		"curren" : '\&curren;',
754 		"yen" : '\&yen;',
755 		"brvbar" : '\&brvbar;',
756 		"sect" : '\&sect;',
757 		"uml" : '\&uml;',
758 		"copy" : '\&copy;',
759 		"ordf" : '\&ordf;',
760 		"laquo" : '\&laquo;',
761 		"not" : '\&not;',
762 		"shy" : '\&shy;',
763 		"reg" : '\&reg;',
764 		"macr" : '\&macr;',
765 		"deg" : '\&deg;',
766 		"plusmn" : '\&plusmn;',
767 		"sup2" : '\&sup2;',
768 		"sup3" : '\&sup3;',
769 		"acute" : '\&acute;',
770 		"micro" : '\&micro;',
771 		"para" : '\&para;',
772 		"middot" : '\&middot;',
773 		"cedil" : '\&cedil;',
774 		"sup1" : '\&sup1;',
775 		"ordm" : '\&ordm;',
776 		"raquo" : '\&raquo;',
777 		"frac14" : '\&frac14;',
778 		"frac12" : '\&frac12;',
779 		"frac34" : '\&frac34;',
780 		"iquest" : '\&iquest;',
781 		"Agrave" : '\&Agrave;',
782 		"Aacute" : '\&Aacute;',
783 		"Acirc" : '\&Acirc;',
784 		"Atilde" : '\&Atilde;',
785 		"Auml" : '\&Auml;',
786 		"Aring" : '\&Aring;',
787 		"AElig" : '\&AElig;',
788 		"Ccedil" : '\&Ccedil;',
789 		"Egrave" : '\&Egrave;',
790 		"Eacute" : '\&Eacute;',
791 		"Ecirc" : '\&Ecirc;',
792 		"Euml" : '\&Euml;',
793 		"Igrave" : '\&Igrave;',
794 		"Iacute" : '\&Iacute;',
795 		"Icirc" : '\&Icirc;',
796 		"Iuml" : '\&Iuml;',
797 		"ETH" : '\&ETH;',
798 		"Ntilde" : '\&Ntilde;',
799 		"Ograve" : '\&Ograve;',
800 		"Oacute" : '\&Oacute;',
801 		"Ocirc" : '\&Ocirc;',
802 		"Otilde" : '\&Otilde;',
803 		"Ouml" : '\&Ouml;',
804 		"times" : '\&times;',
805 		"Oslash" : '\&Oslash;',
806 		"Ugrave" : '\&Ugrave;',
807 		"Uacute" : '\&Uacute;',
808 		"Ucirc" : '\&Ucirc;',
809 		"Uuml" : '\&Uuml;',
810 		"Yacute" : '\&Yacute;',
811 		"THORN" : '\&THORN;',
812 		"szlig" : '\&szlig;',
813 		"agrave" : '\&agrave;',
814 		"aacute" : '\&aacute;',
815 		"acirc" : '\&acirc;',
816 		"atilde" : '\&atilde;',
817 		"auml" : '\&auml;',
818 		"aring" : '\&aring;',
819 		"aelig" : '\&aelig;',
820 		"ccedil" : '\&ccedil;',
821 		"egrave" : '\&egrave;',
822 		"eacute" : '\&eacute;',
823 		"ecirc" : '\&ecirc;',
824 		"euml" : '\&euml;',
825 		"igrave" : '\&igrave;',
826 		"iacute" : '\&iacute;',
827 		"icirc" : '\&icirc;',
828 		"iuml" : '\&iuml;',
829 		"eth" : '\&eth;',
830 		"ntilde" : '\&ntilde;',
831 		"ograve" : '\&ograve;',
832 		"oacute" : '\&oacute;',
833 		"ocirc" : '\&ocirc;',
834 		"otilde" : '\&otilde;',
835 		"ouml" : '\&ouml;',
836 		"divide" : '\&divide;',
837 		"oslash" : '\&oslash;',
838 		"ugrave" : '\&ugrave;',
839 		"uacute" : '\&uacute;',
840 		"ucirc" : '\&ucirc;',
841 		"uuml" : '\&uuml;',
842 		"yacute" : '\&yacute;',
843 		"thorn" : '\&thorn;',
844 		"yuml" : '\&yuml;',
845 
846 		"fnof" : '\&fnof;',
847 		"Alpha" : '\&Alpha;',
848 		"Beta" : '\&Beta;',
849 		"Gamma" : '\&Gamma;',
850 		"Delta" : '\&Delta;',
851 		"Epsilon" : '\&Epsilon;',
852 		"Zeta" : '\&Zeta;',
853 		"Eta" : '\&Eta;',
854 		"Theta" : '\&Theta;',
855 		"Iota" : '\&Iota;',
856 		"Kappa" : '\&Kappa;',
857 		"Lambda" : '\&Lambda;',
858 		"Mu" : '\&Mu;',
859 		"Nu" : '\&Nu;',
860 		"Xi" : '\&Xi;',
861 		"Omicron" : '\&Omicron;',
862 		"Pi" : '\&Pi;',
863 		"Rho" : '\&Rho;',
864 		"Sigma" : '\&Sigma;',
865 		"Tau" : '\&Tau;',
866 		"Upsilon" : '\&Upsilon;',
867 		"Phi" : '\&Phi;',
868 		"Chi" : '\&Chi;',
869 		"Psi" : '\&Psi;',
870 		"Omega" : '\&Omega;',
871 		"alpha" : '\&alpha;',
872 		"beta" : '\&beta;',
873 		"gamma" : '\&gamma;',
874 		"delta" : '\&delta;',
875 		"epsilon" : '\&epsilon;',
876 		"zeta" : '\&zeta;',
877 		"eta" : '\&eta;',
878 		"theta" : '\&theta;',
879 		"iota" : '\&iota;',
880 		"kappa" : '\&kappa;',
881 		"lambda" : '\&lambda;',
882 		"mu" : '\&mu;',
883 		"nu" : '\&nu;',
884 		"xi" : '\&xi;',
885 		"omicron" : '\&omicron;',
886 		"pi" : '\&pi;',
887 		"rho" : '\&rho;',
888 		"sigmaf" : '\&sigmaf;',
889 		"sigma" : '\&sigma;',
890 		"tau" : '\&tau;',
891 		"upsilon" : '\&upsilon;',
892 		"phi" : '\&phi;',
893 		"chi" : '\&chi;',
894 		"psi" : '\&psi;',
895 		"omega" : '\&omega;',
896 		"thetasym" : '\&thetasym;',
897 		"upsih" : '\&upsih;',
898 		"piv" : '\&piv;',
899 		"bull" : '\&bull;',
900 		"hellip" : '\&hellip;',
901 		"prime" : '\&prime;',
902 		"Prime" : '\&Prime;',
903 		"oline" : '\&oline;',
904 		"frasl" : '\&frasl;',
905 		"weierp" : '\&weierp;',
906 		"image" : '\&image;',
907 		"real" : '\&real;',
908 		"trade" : '\&trade;',
909 		"alefsym" : '\&alefsym;',
910 		"larr" : '\&larr;',
911 		"uarr" : '\&uarr;',
912 		"rarr" : '\&rarr;',
913 		"darr" : '\&darr;',
914 		"harr" : '\&harr;',
915 		"crarr" : '\&crarr;',
916 		"lArr" : '\&lArr;',
917 		"uArr" : '\&uArr;',
918 		"rArr" : '\&rArr;',
919 		"dArr" : '\&dArr;',
920 		"hArr" : '\&hArr;',
921 		"forall" : '\&forall;',
922 		"part" : '\&part;',
923 		"exist" : '\&exist;',
924 		"empty" : '\&empty;',
925 		"nabla" : '\&nabla;',
926 		"isin" : '\&isin;',
927 		"notin" : '\&notin;',
928 		"ni" : '\&ni;',
929 		"prod" : '\&prod;',
930 		"sum" : '\&sum;',
931 		"minus" : '\&minus;',
932 		"lowast" : '\&lowast;',
933 		"radic" : '\&radic;',
934 		"prop" : '\&prop;',
935 		"infin" : '\&infin;',
936 		"ang" : '\&ang;',
937 		"and" : '\&and;',
938 		"or" : '\&or;',
939 		"cap" : '\&cap;',
940 		"cup" : '\&cup;',
941 		"int" : '\&int;',
942 		"there4" : '\&there4;',
943 		"sim" : '\&sim;',
944 		"cong" : '\&cong;',
945 		"asymp" : '\&asymp;',
946 		"ne" : '\&ne;',
947 		"equiv" : '\&equiv;',
948 		"le" : '\&le;',
949 		"ge" : '\&ge;',
950 		"sub" : '\&sub;',
951 		"sup" : '\&sup;',
952 		"nsub" : '\&nsub;',
953 		"sube" : '\&sube;',
954 		"supe" : '\&supe;',
955 		"oplus" : '\&oplus;',
956 		"otimes" : '\&otimes;',
957 		"perp" : '\&perp;',
958 		"sdot" : '\&sdot;',
959 		"lceil" : '\&lceil;',
960 		"rceil" : '\&rceil;',
961 		"lfloor" : '\&lfloor;',
962 		"rfloor" : '\&rfloor;',
963 		"loz" : '\&loz;',
964 		"spades" : '\&spades;',
965 		"clubs" : '\&clubs;',
966 		"hearts" : '\&hearts;',
967 		"diams" : '\&diams;',
968 		"lang" : '\&lang;',
969 		"rang" : '\&rang;',
970 
971 		"apos"  : '\''
972 	];
973 	foreach (name, c; entities)
974 		entityNames[c] = name;
975 }
976 
977 import core.stdc.stdio;
978 import std.utf;
979 import ae.utils.textout;
980 
981 /*private*/ public string encodeEntitiesImpl(bool unicode, alias pred)(string str)
982 {
983 	size_t i = 0;
984 	while (i < str.length)
985 	{
986 		size_t o = i;
987 		static if (unicode)
988 			dchar c = decode(str, i);
989 		else
990 			char c = str[i++];
991 
992 		if (pred(c))
993 		{
994 			StringBuilder sb;
995 			sb.preallocate(str.length * 11 / 10);
996 			sb.put(str[0..o]);
997 			sb.putEncodedEntitiesImpl!(unicode, pred)(str[o..$]);
998 			return sb.get();
999 		}
1000 	}
1001 	return str;
1002 }
1003 
1004 /*private*/ public template putEncodedEntitiesImpl(bool unicode, alias pred)
1005 {
1006 	void putEncodedEntitiesImpl(Sink, S)(ref Sink sink, S str)
1007 	{
1008 		size_t start = 0, i = 0;
1009 		while (i < str.length)
1010 		{
1011 			size_t o = i;
1012 			static if (unicode)
1013 				dchar c = decode(str, i);
1014 			else
1015 				char c = str[i++];
1016 
1017 			if (pred(c))
1018 			{
1019 				sink.put(str[start..o], '&', entityNames[c], ';');
1020 				start = i;
1021 			}
1022 		}
1023 		sink.put(str[start..$]);
1024 	}
1025 }
1026 
1027 public alias encodeEntities = encodeEntitiesImpl!(false, (char c) => c=='<' || c=='>' || c=='"' || c=='\'' || c=='&');
1028 public alias putEncodedEntities = putEncodedEntitiesImpl!(false, (char c) => c=='<' || c=='>' || c=='"' || c=='\'' || c=='&');
1029 
1030 public string encodeAllEntities(string str)
1031 {
1032 	// TODO: optimize
1033 	foreach_reverse (i, dchar c; str)
1034 	{
1035 		auto name = c in entityNames;
1036 		if (name)
1037 			str = str[0..i] ~ '&' ~ *name ~ ';' ~ str[i+stride(str,i)..$];
1038 	}
1039 	return str;
1040 }
1041 
1042 import ae.utils.text;
1043 import std.conv;
1044 
1045 public string decodeEntities(string str)
1046 {
1047 	auto fragments = str.fastSplit('&');
1048 	if (fragments.length <= 1)
1049 		return str;
1050 
1051 	auto interleaved = new string[fragments.length*2 - 1];
1052 	auto buffers = new char[4][fragments.length-1];
1053 	interleaved[0] = fragments[0];
1054 
1055 	foreach (n, fragment; fragments[1..$])
1056 	{
1057 		auto p = fragment.indexOf(';');
1058 		enforce!XmlParseException(p>0, "Invalid entity (unescaped ampersand?)");
1059 
1060 		dchar c;
1061 		if (fragment[0]=='#')
1062 		{
1063 			if (fragment[1]=='x')
1064 				c = fromHex!uint(fragment[2..p]);
1065 			else
1066 				c = to!uint(fragment[1..p]);
1067 		}
1068 		else
1069 		{
1070 			auto pentity = fragment[0..p] in entities;
1071 			enforce!XmlParseException(pentity, "Unknown entity: " ~ fragment[0..p]);
1072 			c = *pentity;
1073 		}
1074 
1075 		interleaved[1+n*2] = cast(string) buffers[n][0..std.utf.encode(buffers[n], c)];
1076 		interleaved[2+n*2] = fragment[p+1..$];
1077 	}
1078 
1079 	return interleaved.join();
1080 }
1081 
1082 deprecated alias decodeEntities convertEntities;
1083 
1084 unittest
1085 {
1086 	assert(encodeEntities(`The <Smith & Wesson> "lock'n'load"`) == `The &lt;Smith &amp; Wesson&gt; &quot;lock&apos;n&apos;load&quot;`);
1087 	assert(encodeAllEntities("©,€") == "&copy;,&euro;");
1088 	assert(decodeEntities("&copy;,&euro;") == "©,€");
1089 }