1 /**
2  * Light read-only XML library
3  * May be deprecated in the future.
4  * See other XML modules for better implementations.
5  *
6  * License:
7  *   This Source Code Form is subject to the terms of
8  *   the Mozilla Public License, v. 2.0. If a copy of
9  *   the MPL was not distributed with this file, You
10  *   can obtain one at http://mozilla.org/MPL/2.0/.
11  *
12  * Authors:
13  *   Vladimir Panteleev <vladimir@thecybershadow.net>
14  *   Simon Arlott
15  */
16 
17 module ae.utils.xmllite;
18 
19 // TODO: better/safer handling of malformed XML
20 
21 import std.string;
22 import std.ascii;
23 import std.exception;
24 
25 import ae.utils.array;
26 import ae.utils.exception;
27 import ae.utils.xmlwriter;
28 
29 // ************************************************************************
30 
31 /// std.stream.Stream-like type with bonus speed
32 private struct StringStream
33 {
34 	string s;
35 	size_t position;
36 
37 	@disable this();
38 	@disable this(this);
39 	this(string s)
40 	{
41 		enum ditch = "'\">\0\0\0\0\0"; // Dirty precaution
42 		this.s = (s ~ ditch)[0..$-ditch.length];
43 	}
44 
45 	char read() { return s[position++]; }
46 	@property size_t size() { return s.length; }
47 }
48 
49 // ************************************************************************
50 
51 mixin DeclareException!q{XmlParseException};
52 
53 enum XmlNodeType
54 {
55 	None,
56 	Root,
57 	Node,
58 	Comment,
59 	Meta,
60 	DocType,
61 	CData,
62 	Text
63 }
64 
65 alias XmlAttributes = OrderedMap!(string, string);
66 
67 class XmlNode
68 {
69 	string tag;
70 	XmlAttributes attributes;
71 	XmlNode parent;
72 	XmlNode[] children;
73 	XmlNodeType type;
74 	ulong startPos, endPos;
75 
76 	this(ref StringStream s) { parseInto!XmlParseConfig(this, s, null); }
77 	this(string s) { auto ss = StringStream(s); this(ss); }
78 
79 	this(XmlNodeType type = XmlNodeType.None, string tag = null)
80 	{
81 		this.type = type;
82 		this.tag = tag;
83 	}
84 
85 	XmlNode addAttribute(string name, string value)
86 	{
87 		attributes[name] = value;
88 		return this;
89 	}
90 
91 	XmlNode addChild(XmlNode child)
92 	{
93 		child.parent = this;
94 		children ~= child;
95 		return this;
96 	}
97 
98 	override string toString() const
99 	{
100 		XmlWriter writer;
101 		writeTo(writer);
102 		return writer.output.get();
103 	}
104 
105 	final void writeTo(XmlWriter)(ref XmlWriter output) const
106 	{
107 		void writeChildren()
108 		{
109 			foreach (child; children)
110 				child.writeTo(output);
111 		}
112 
113 		void writeAttributes()
114 		{
115 			foreach (key, value; attributes)
116 				output.addAttribute(key, value);
117 		}
118 
119 		final switch (type)
120 		{
121 			case XmlNodeType.None:
122 				assert(false);
123 			case XmlNodeType.Root:
124 				writeChildren();
125 				return;
126 			case XmlNodeType.Node:
127 				output.startTagWithAttributes(tag);
128 				writeAttributes();
129 				if (children.length)
130 				{
131 					bool oneLine = children.length == 1 && children[0].type == XmlNodeType.Text;
132 					if (oneLine)
133 						output.formatter.enabled = false;
134 					output.endAttributes();
135 					writeChildren();
136 					output.endTag(tag);
137 					if (oneLine)
138 					{
139 						output.formatter.enabled = true;
140 						output.newLine();
141 					}
142 				}
143 				else
144 					output.endAttributesAndTag();
145 				return;
146 			case XmlNodeType.Meta:
147 				assert(children.length == 0);
148 				output.startPI(tag);
149 				writeAttributes();
150 				output.endPI();
151 				return;
152 			case XmlNodeType.DocType:
153 				assert(children.length == 0);
154 				output.doctype(tag);
155 				return;
156 			case XmlNodeType.Text:
157 				output.startLine();
158 				output.text(tag);
159 				output.newLine();
160 				return;
161 			case XmlNodeType.Comment:
162 				output.startLine();
163 				output.comment(tag);
164 				return;
165 			case XmlNodeType.CData:
166 				output.text(tag);
167 				return;
168 		}
169 	}
170 
171 	@property string text()
172 	{
173 		final switch (type)
174 		{
175 			case XmlNodeType.None:
176 				assert(false);
177 			case XmlNodeType.Text:
178 			case XmlNodeType.CData:
179 				return tag;
180 			case XmlNodeType.Node:
181 			case XmlNodeType.Root:
182 				string result;
183 				if (tag == "br")
184 					result = "\n";
185 				foreach (child; children)
186 					result ~= child.text();
187 				return result;
188 			case XmlNodeType.Comment:
189 			case XmlNodeType.Meta:
190 			case XmlNodeType.DocType:
191 				return null;
192 		}
193 	}
194 
195 	final XmlNode findChild(string tag)
196 	{
197 		foreach (child; children)
198 			if (child.type == XmlNodeType.Node && child.tag == tag)
199 				return child;
200 		return null;
201 	}
202 
203 	final XmlNode[] findChildren(string tag)
204 	{
205 		XmlNode[] result;
206 		foreach (child; children)
207 			if (child.type == XmlNodeType.Node && child.tag == tag)
208 				result ~= child;
209 		return result;
210 	}
211 
212 	final XmlNode opIndex(string tag)
213 	{
214 		auto node = findChild(tag);
215 		if (node is null)
216 			throw new XmlParseException("No such child: " ~ tag);
217 		return node;
218 	}
219 
220 	final XmlNode opIndex(string tag, size_t index)
221 	{
222 		auto nodes = findChildren(tag);
223 		if (index >= nodes.length)
224 			throw new XmlParseException(format("Can't get node with tag %s and index %d, there are only %d children with that tag", tag, index, nodes.length));
225 		return nodes[index];
226 	}
227 
228 	final XmlNode opIndex(size_t index)
229 	{
230 		return children[index];
231 	}
232 
233 	final @property size_t length() { return children.length; }
234 
235 	int opApply(int delegate(ref XmlNode) dg)
236 	{
237 		int result = 0;
238 
239 		for (int i = 0; i < children.length; i++)
240 		{
241 			result = dg(children[i]);
242 			if (result)
243 				break;
244 		}
245 		return result;
246 	}
247 
248 	final @property XmlNode dup()
249 	{
250 		auto result = new XmlNode(type, tag);
251 		result.attributes = attributes.dup;
252 		result.children.reserve(children.length);
253 		foreach (child; children)
254 			result.addChild(child.dup);
255 		return result;
256 	}
257 }
258 
259 class XmlDocument : XmlNode
260 {
261 	this()
262 	{
263 		super(XmlNodeType.Root);
264 		tag = "<Root>";
265 	}
266 
267 	this(ref StringStream s) { this(); parseInto!XmlParseConfig(this, s); }
268 	this(string s) { auto ss = StringStream(s); this(ss); }
269 }
270 
271 /// The logic for how to handle a node's closing tags.
272 enum NodeCloseMode
273 {
274 	/// This element must always have an explicit closing tag
275 	/// (or a self-closing tag). An unclosed tag will lead to
276 	/// a parse error.
277 	/// In XML, all tags are "always".
278 	always,
279 /*
280 	/// Close tags are optional. When an element with a tag is
281 	/// encountered directly under an element with the same tag,
282 	/// it is assumed that the first element is closed before
283 	/// the second, so the two are siblings, not parent/child.
284 	/// Thus, `<p>a<p>b</p>` is parsed as `<p>a</p><p>b</p>`,
285 	/// not `<p>a<p>b</p></p>`, however `<p>a<div><p>b</div>` is
286 	/// still parsed as `<p>a<div><p>b</p></div></p>`.
287 	/// This mode can be used for relaxed HTML parsing.
288 	optional,
289 */
290 	/// Close tags are optional, but are implied when absent.
291 	/// As a result, these elements cannot have any content,
292 	/// and any close tags must be adjacent to the open tag.
293 	implicit,
294 
295 	/// This element is void and must never have a closing tag.
296 	/// It is always implicitly closed right after opening.
297 	/// A close tag is always an error.
298 	/// This mode can be used for strict parsing of HTML5 void
299 	/// elements.
300 	never,
301 }
302 
303 /// Configuration for parsing XML.
304 struct XmlParseConfig
305 {
306 static:
307 	NodeCloseMode nodeCloseMode(string tag) { return NodeCloseMode.always; }
308 	bool preserveWhitespace(string tag) { return false; }
309 	enum optionalParameterValues = false;
310 }
311 
312 /// Configuration for strict parsing of HTML5.
313 /// All void tags must never be closed, and all
314 /// non-void tags must always be explicitly closed.
315 /// Attributes must still be quoted like in XML.
316 struct Html5StrictParseConfig
317 {
318 static:
319 	immutable voidElements = [
320 		"area"   , "base"  , "br"   , "col" ,
321 		"command", "embed" , "hr"   , "img" ,
322 		"input"  , "keygen", "link" , "meta",
323 		"param"  , "source", "track", "wbr" ,
324 	];
325 
326 	NodeCloseMode nodeCloseMode(string tag)
327 	{
328 		return tag.isOneOf(voidElements)
329 			? NodeCloseMode.never
330 			: NodeCloseMode.always
331 		;
332 	}
333 
334 	enum optionalParameterValues = true;
335 	bool preserveWhitespace(string tag) { return false; /*TODO*/ }
336 }
337 
338 /// Parse an SGML-ish string into an XmlNode
339 alias parse = parseString!XmlNode;
340 
341 /// Parse an SGML-ish StringStream into an XmlDocument
342 alias parseDocument = parseString!XmlDocument;
343 
344 alias xmlParse = parseDocument!XmlParseConfig;
345 
346 private:
347 
348 public // alias
349 template parseString(Node)
350 {
351 	Node parseString(Config)(string s)
352 	{
353 		auto ss = StringStream(s);
354 		alias f = parseStream!Node;
355 		return f!Config(ss);
356 	}
357 }
358 
359 template parseStream(Node)
360 {
361 	Node parseStream(Config)(ref StringStream s)
362 	{
363 		auto n = new Node;
364 		parseInto!Config(n, s);
365 		return n;
366 	}
367 }
368 
369 alias parseNode = parseStream!XmlNode;
370 
371 /// Parse an SGML-ish StringStream into an XmlDocument
372 void parseInto(Config)(XmlDocument d, ref StringStream s)
373 {
374 	skipWhitespace(s);
375 	while (s.position < s.size)
376 		try
377 		{
378 			auto n = new XmlNode;
379 			parseInto!Config(n, s, null);
380 			d.addChild(n);
381 			skipWhitespace(s);
382 		}
383 		catch (XmlParseException e)
384 		{
385 			import std.algorithm.searching;
386 			import std.range : retro;
387 
388 			auto head = s.s[0..s.position];
389 			auto row    = head.representation.count('\n');
390 			auto column = head.representation.retro.countUntil('\n');
391 			if (column < 0)
392 				column = head.length;
393 			throw new XmlParseException("Error at %d:%d (offset %d)".format(
394 				1 + row,
395 				1 + column,
396 				head.length,
397 			), e);
398 		}
399 }
400 
401 /// Parse an SGML-ish StringStream into an XmlNode
402 void parseInto(Config)(XmlNode node, ref StringStream s, string parentTag = null, bool preserveWhitespace = false)
403 {
404 	char c;
405 
406 	preserveWhitespace |= Config.preserveWhitespace(parentTag);
407 	if (preserveWhitespace)
408 		c = s.read();
409 	else
410 		do
411 			c = s.read();
412 		while (isWhiteChar[c]);
413 
414 	node.startPos = s.position;
415 	if (c!='<')  // text node
416 	{
417 		node.type = XmlNodeType.Text;
418 		string text;
419 		while (c!='<')
420 		{
421 			// TODO: check for EOF
422 			text ~= c;
423 			c = s.read();
424 		}
425 		s.position--; // rewind to '<'
426 		if (!preserveWhitespace)
427 			while (text.length && isWhiteChar[text[$-1]])
428 				text = text[0..$-1];
429 		node.tag = decodeEntities(text);
430 		//tag = tag.strip();
431 	}
432 	else
433 	{
434 		c = s.read();
435 		if (c=='!')
436 		{
437 			c = s.read();
438 			if (c == '-') // comment
439 			{
440 				expect(s, '-');
441 				node.type = XmlNodeType.Comment;
442 				string tag;
443 				do
444 				{
445 					c = s.read();
446 					tag ~= c;
447 				} while (tag.length<3 || tag[$-3..$] != "-->");
448 				tag = tag[0..$-3];
449 				node.tag = tag;
450 			}
451 			else
452 			if (c == '[') // CDATA
453 			{
454 				foreach (x; "CDATA[")
455 					expect(s, x);
456 				node.type = XmlNodeType.CData;
457 				string tag;
458 				do
459 				{
460 					c = s.read();
461 					tag ~= c;
462 				} while (tag.length<3 || tag[$-3..$] != "]]>");
463 				tag = tag[0..$-3];
464 				node.tag = tag;
465 			}
466 			else // doctype, etc.
467 			{
468 				node.type = XmlNodeType.DocType;
469 				while (c != '>')
470 				{
471 					node.tag ~= c;
472 					c = s.read();
473 				}
474 			}
475 		}
476 		else
477 		if (c=='?')
478 		{
479 			node.type = XmlNodeType.Meta;
480 			node.tag = readWord(s);
481 			if (node.tag.length==0) throw new XmlParseException("Invalid tag");
482 			while (true)
483 			{
484 				skipWhitespace(s);
485 				if (peek(s)=='?')
486 					break;
487 				readAttribute!Config(node, s);
488 			}
489 			c = s.read();
490 			expect(s, '>');
491 		}
492 		else
493 		if (c=='/')
494 			throw new XmlParseException("Unexpected close tag");
495 		else
496 		{
497 			node.type = XmlNodeType.Node;
498 			node.tag = c~readWord(s);
499 			while (true)
500 			{
501 				skipWhitespace(s);
502 				c = peek(s);
503 				if (c=='>' || c=='/')
504 					break;
505 				readAttribute!Config(node, s);
506 			}
507 			c = s.read();
508 
509 			auto closeMode = Config.nodeCloseMode(node.tag);
510 			if (closeMode == NodeCloseMode.never)
511 				enforce!XmlParseException(c=='>', "Self-closing void tag <%s>".format(node.tag));
512 			else
513 			if (closeMode == NodeCloseMode.implicit)
514 			{
515 				if (c == '/')
516 					expect(s, '>');
517 			}
518 			else
519 			{
520 				if (c=='>')
521 				{
522 					while (true)
523 					{
524 						while (true)
525 						{
526 							if (!preserveWhitespace && !Config.preserveWhitespace(node.tag))
527 								skipWhitespace(s);
528 							if (peek(s)=='<' && peek(s, 2)=='/')
529 								break;
530 							try
531 							{
532 								auto child = new XmlNode;
533 								parseInto!Config(child, s, node.tag, preserveWhitespace);
534 								node.addChild(child);
535 							}
536 							catch (XmlParseException e)
537 								throw new XmlParseException("Error while processing child of "~node.tag, e);
538 						}
539 						expect(s, '<');
540 						expect(s, '/');
541 						auto word = readWord(s);
542 						if (word != node.tag)
543 						{
544 							auto closeMode2 = Config.nodeCloseMode(word);
545 							if (closeMode2 == NodeCloseMode.implicit)
546 							{
547 								auto parent = node.parent;
548 								enforce!XmlParseException(parent, "Top-level close tag for implicitly-closed node </%s>".format(word));
549 								enforce!XmlParseException(parent.children.length, "First-child close tag for implicitly-closed node </%s>".format(word));
550 								enforce!XmlParseException(parent.children[$-1].tag == word, "Non-empty implicitly-closed node <%s>".format(word));
551 								continue;
552 							}
553 							else
554 								enforce!XmlParseException(word == node.tag, "Expected </%s>, not </%s>".format(node.tag, word));
555 						}
556 						expect(s, '>');
557 						break;
558 					}
559 				}
560 				else // '/'
561 					expect(s, '>');
562 			}
563 		}
564 	}
565 	node.endPos = s.position;
566 }
567 
568 private:
569 
570 void readAttribute(Config)(XmlNode node, ref StringStream s)
571 {
572 	string name = readWord(s);
573 	if (name.length==0) throw new XmlParseException("Invalid attribute");
574 	skipWhitespace(s);
575 
576 	static if (Config.optionalParameterValues)
577 	{
578 		if (peek(s) != '=')
579 		{
580 			node.attributes[name] = null;
581 			return;
582 		}
583 	}
584 
585 	expect(s, '=');
586 	skipWhitespace(s);
587 	char delim;
588 	delim = s.read();
589 	if (delim != '\'' && delim != '"')
590 		throw new XmlParseException("Expected ' or \", not %s".format(delim));
591 	string value = readUntil(s, delim);
592 	node.attributes[name] = decodeEntities(value);
593 }
594 
595 char peek(ref StringStream s, int n=1)
596 {
597 	return s.s[s.position + n - 1];
598 }
599 
600 void skipWhitespace(ref StringStream s)
601 {
602 	while (isWhiteChar[s.s.ptr[s.position]])
603 		s.position++;
604 }
605 
606 __gshared bool[256] isWhiteChar, isWordChar;
607 
608 shared static this()
609 {
610 	foreach (c; 0..256)
611 	{
612 		isWhiteChar[c] = isWhite(c);
613 		isWordChar[c] = c=='-' || c=='_' || c==':' || isAlphaNum(c);
614 	}
615 }
616 
617 string readWord(ref StringStream stream)
618 {
619 	auto start = stream.s.ptr + stream.position;
620 	auto end = stream.s.ptr + stream.s.length;
621 	auto p = start;
622 	while (p < end && isWordChar[*p])
623 		p++;
624 	auto len = p-start;
625 	stream.position += len;
626 	return start[0..len];
627 }
628 
629 void expect(ref StringStream s, char c)
630 {
631 	char c2;
632 	c2 = s.read();
633 	enforce!XmlParseException(c==c2, "Expected " ~ c ~ ", got " ~ c2);
634 }
635 
636 string readUntil(ref StringStream s, char until)
637 {
638 	auto start = s.s.ptr + s.position;
639 	auto p = start;
640 	while (*p != until) p++;
641 	auto len = p-start;
642 	s.position += len + 1;
643 	return start[0..len];
644 }
645 
646 unittest
647 {
648 	enum xmlText =
649 		`<?xml version="1.0" encoding="UTF-8"?>` ~
650 		`<quotes>` ~
651 			`<quote author="Alan Perlis">` ~
652 				`When someone says, &quot;I want a programming language in which I need only say what I want done,&quot; give him a lollipop.` ~
653 			`</quote>` ~
654 		`</quotes>`;
655 	auto doc = new XmlDocument(xmlText);
656 	assert(doc.toString() == xmlText, doc.toString());
657 }
658 
659 unittest
660 {
661 	string testOne(bool preserve)(string s)
662 	{
663 		static struct ParseConfig
664 		{
665 		static:
666 			NodeCloseMode nodeCloseMode(string tag) { return XmlParseConfig.nodeCloseMode(tag); }
667 			bool preserveWhitespace(string tag) { return preserve; }
668 			enum optionalParameterValues = XmlParseConfig.optionalParameterValues;
669 		}
670 		auto node = new XmlNode;
671 		auto str = StringStream("<tag>" ~ s ~ "</tag>");
672 		parseInto!ParseConfig(node, str, null);
673 		// import std.stdio; writeln(preserve, ": ", str.s, " -> ", node.toString);
674 		return node.children.length ? node.children[0].tag : null;
675 	}
676 
677 	foreach (tag; ["a", " a", "a ", " a ", " a  a ", " ", ""])
678 	{
679 		assert(testOne!false(tag) == strip(tag),
680 			"Parsing <tag>" ~ tag ~ "</tag> while not preserving whitespace, expecting '" ~ strip(tag) ~ "', got '" ~ testOne!false(tag) ~ "'");
681 		assert(testOne!true(tag) == tag,
682 			"Parsing <tag>" ~ tag ~ "</tag> while preserving whitespace, expecting '" ~ tag ~ "', got '" ~ testOne!true(tag) ~ "'");
683 	}
684 }
685 
686 unittest
687 {
688 	static struct ParseConfig
689 	{
690 	static:
691 		NodeCloseMode nodeCloseMode(string tag) { return XmlParseConfig.nodeCloseMode(tag); }
692 		bool preserveWhitespace(string tag) { return tag == "a"; }
693 		enum optionalParameterValues = XmlParseConfig.optionalParameterValues;
694 	}
695 	auto node = new XmlNode;
696 	auto str = StringStream("<a><b> foo </b></a>");
697 	parseInto!ParseConfig(node, str, null);
698 	assert(node.children[0].children[0].tag == " foo ");
699 }
700 
701 const dchar[string] entities;
702 /*const*/ string[dchar] entityNames;
703 shared static this()
704 {
705 	entities =
706 	[
707 		"quot" : '\&quot;',
708 		"amp" : '\&amp;',
709 		"lt" : '\&lt;',
710 		"gt" : '\&gt;',
711 
712 		"OElig" : '\&OElig;',
713 		"oelig" : '\&oelig;',
714 		"Scaron" : '\&Scaron;',
715 		"scaron" : '\&scaron;',
716 		"Yuml" : '\&Yuml;',
717 		"circ" : '\&circ;',
718 		"tilde" : '\&tilde;',
719 		"ensp" : '\&ensp;',
720 		"emsp" : '\&emsp;',
721 		"thinsp" : '\&thinsp;',
722 		"zwnj" : '\&zwnj;',
723 		"zwj" : '\&zwj;',
724 		"lrm" : '\&lrm;',
725 		"rlm" : '\&rlm;',
726 		"ndash" : '\&ndash;',
727 		"mdash" : '\&mdash;',
728 		"lsquo" : '\&lsquo;',
729 		"rsquo" : '\&rsquo;',
730 		"sbquo" : '\&sbquo;',
731 		"ldquo" : '\&ldquo;',
732 		"rdquo" : '\&rdquo;',
733 		"bdquo" : '\&bdquo;',
734 		"dagger" : '\&dagger;',
735 		"Dagger" : '\&Dagger;',
736 		"permil" : '\&permil;',
737 		"lsaquo" : '\&lsaquo;',
738 		"rsaquo" : '\&rsaquo;',
739 		"euro" : '\&euro;',
740 
741 		"nbsp" : '\&nbsp;',
742 		"iexcl" : '\&iexcl;',
743 		"cent" : '\&cent;',
744 		"pound" : '\&pound;',
745 		"curren" : '\&curren;',
746 		"yen" : '\&yen;',
747 		"brvbar" : '\&brvbar;',
748 		"sect" : '\&sect;',
749 		"uml" : '\&uml;',
750 		"copy" : '\&copy;',
751 		"ordf" : '\&ordf;',
752 		"laquo" : '\&laquo;',
753 		"not" : '\&not;',
754 		"shy" : '\&shy;',
755 		"reg" : '\&reg;',
756 		"macr" : '\&macr;',
757 		"deg" : '\&deg;',
758 		"plusmn" : '\&plusmn;',
759 		"sup2" : '\&sup2;',
760 		"sup3" : '\&sup3;',
761 		"acute" : '\&acute;',
762 		"micro" : '\&micro;',
763 		"para" : '\&para;',
764 		"middot" : '\&middot;',
765 		"cedil" : '\&cedil;',
766 		"sup1" : '\&sup1;',
767 		"ordm" : '\&ordm;',
768 		"raquo" : '\&raquo;',
769 		"frac14" : '\&frac14;',
770 		"frac12" : '\&frac12;',
771 		"frac34" : '\&frac34;',
772 		"iquest" : '\&iquest;',
773 		"Agrave" : '\&Agrave;',
774 		"Aacute" : '\&Aacute;',
775 		"Acirc" : '\&Acirc;',
776 		"Atilde" : '\&Atilde;',
777 		"Auml" : '\&Auml;',
778 		"Aring" : '\&Aring;',
779 		"AElig" : '\&AElig;',
780 		"Ccedil" : '\&Ccedil;',
781 		"Egrave" : '\&Egrave;',
782 		"Eacute" : '\&Eacute;',
783 		"Ecirc" : '\&Ecirc;',
784 		"Euml" : '\&Euml;',
785 		"Igrave" : '\&Igrave;',
786 		"Iacute" : '\&Iacute;',
787 		"Icirc" : '\&Icirc;',
788 		"Iuml" : '\&Iuml;',
789 		"ETH" : '\&ETH;',
790 		"Ntilde" : '\&Ntilde;',
791 		"Ograve" : '\&Ograve;',
792 		"Oacute" : '\&Oacute;',
793 		"Ocirc" : '\&Ocirc;',
794 		"Otilde" : '\&Otilde;',
795 		"Ouml" : '\&Ouml;',
796 		"times" : '\&times;',
797 		"Oslash" : '\&Oslash;',
798 		"Ugrave" : '\&Ugrave;',
799 		"Uacute" : '\&Uacute;',
800 		"Ucirc" : '\&Ucirc;',
801 		"Uuml" : '\&Uuml;',
802 		"Yacute" : '\&Yacute;',
803 		"THORN" : '\&THORN;',
804 		"szlig" : '\&szlig;',
805 		"agrave" : '\&agrave;',
806 		"aacute" : '\&aacute;',
807 		"acirc" : '\&acirc;',
808 		"atilde" : '\&atilde;',
809 		"auml" : '\&auml;',
810 		"aring" : '\&aring;',
811 		"aelig" : '\&aelig;',
812 		"ccedil" : '\&ccedil;',
813 		"egrave" : '\&egrave;',
814 		"eacute" : '\&eacute;',
815 		"ecirc" : '\&ecirc;',
816 		"euml" : '\&euml;',
817 		"igrave" : '\&igrave;',
818 		"iacute" : '\&iacute;',
819 		"icirc" : '\&icirc;',
820 		"iuml" : '\&iuml;',
821 		"eth" : '\&eth;',
822 		"ntilde" : '\&ntilde;',
823 		"ograve" : '\&ograve;',
824 		"oacute" : '\&oacute;',
825 		"ocirc" : '\&ocirc;',
826 		"otilde" : '\&otilde;',
827 		"ouml" : '\&ouml;',
828 		"divide" : '\&divide;',
829 		"oslash" : '\&oslash;',
830 		"ugrave" : '\&ugrave;',
831 		"uacute" : '\&uacute;',
832 		"ucirc" : '\&ucirc;',
833 		"uuml" : '\&uuml;',
834 		"yacute" : '\&yacute;',
835 		"thorn" : '\&thorn;',
836 		"yuml" : '\&yuml;',
837 
838 		"fnof" : '\&fnof;',
839 		"Alpha" : '\&Alpha;',
840 		"Beta" : '\&Beta;',
841 		"Gamma" : '\&Gamma;',
842 		"Delta" : '\&Delta;',
843 		"Epsilon" : '\&Epsilon;',
844 		"Zeta" : '\&Zeta;',
845 		"Eta" : '\&Eta;',
846 		"Theta" : '\&Theta;',
847 		"Iota" : '\&Iota;',
848 		"Kappa" : '\&Kappa;',
849 		"Lambda" : '\&Lambda;',
850 		"Mu" : '\&Mu;',
851 		"Nu" : '\&Nu;',
852 		"Xi" : '\&Xi;',
853 		"Omicron" : '\&Omicron;',
854 		"Pi" : '\&Pi;',
855 		"Rho" : '\&Rho;',
856 		"Sigma" : '\&Sigma;',
857 		"Tau" : '\&Tau;',
858 		"Upsilon" : '\&Upsilon;',
859 		"Phi" : '\&Phi;',
860 		"Chi" : '\&Chi;',
861 		"Psi" : '\&Psi;',
862 		"Omega" : '\&Omega;',
863 		"alpha" : '\&alpha;',
864 		"beta" : '\&beta;',
865 		"gamma" : '\&gamma;',
866 		"delta" : '\&delta;',
867 		"epsilon" : '\&epsilon;',
868 		"zeta" : '\&zeta;',
869 		"eta" : '\&eta;',
870 		"theta" : '\&theta;',
871 		"iota" : '\&iota;',
872 		"kappa" : '\&kappa;',
873 		"lambda" : '\&lambda;',
874 		"mu" : '\&mu;',
875 		"nu" : '\&nu;',
876 		"xi" : '\&xi;',
877 		"omicron" : '\&omicron;',
878 		"pi" : '\&pi;',
879 		"rho" : '\&rho;',
880 		"sigmaf" : '\&sigmaf;',
881 		"sigma" : '\&sigma;',
882 		"tau" : '\&tau;',
883 		"upsilon" : '\&upsilon;',
884 		"phi" : '\&phi;',
885 		"chi" : '\&chi;',
886 		"psi" : '\&psi;',
887 		"omega" : '\&omega;',
888 		"thetasym" : '\&thetasym;',
889 		"upsih" : '\&upsih;',
890 		"piv" : '\&piv;',
891 		"bull" : '\&bull;',
892 		"hellip" : '\&hellip;',
893 		"prime" : '\&prime;',
894 		"Prime" : '\&Prime;',
895 		"oline" : '\&oline;',
896 		"frasl" : '\&frasl;',
897 		"weierp" : '\&weierp;',
898 		"image" : '\&image;',
899 		"real" : '\&real;',
900 		"trade" : '\&trade;',
901 		"alefsym" : '\&alefsym;',
902 		"larr" : '\&larr;',
903 		"uarr" : '\&uarr;',
904 		"rarr" : '\&rarr;',
905 		"darr" : '\&darr;',
906 		"harr" : '\&harr;',
907 		"crarr" : '\&crarr;',
908 		"lArr" : '\&lArr;',
909 		"uArr" : '\&uArr;',
910 		"rArr" : '\&rArr;',
911 		"dArr" : '\&dArr;',
912 		"hArr" : '\&hArr;',
913 		"forall" : '\&forall;',
914 		"part" : '\&part;',
915 		"exist" : '\&exist;',
916 		"empty" : '\&empty;',
917 		"nabla" : '\&nabla;',
918 		"isin" : '\&isin;',
919 		"notin" : '\&notin;',
920 		"ni" : '\&ni;',
921 		"prod" : '\&prod;',
922 		"sum" : '\&sum;',
923 		"minus" : '\&minus;',
924 		"lowast" : '\&lowast;',
925 		"radic" : '\&radic;',
926 		"prop" : '\&prop;',
927 		"infin" : '\&infin;',
928 		"ang" : '\&ang;',
929 		"and" : '\&and;',
930 		"or" : '\&or;',
931 		"cap" : '\&cap;',
932 		"cup" : '\&cup;',
933 		"int" : '\&int;',
934 		"there4" : '\&there4;',
935 		"sim" : '\&sim;',
936 		"cong" : '\&cong;',
937 		"asymp" : '\&asymp;',
938 		"ne" : '\&ne;',
939 		"equiv" : '\&equiv;',
940 		"le" : '\&le;',
941 		"ge" : '\&ge;',
942 		"sub" : '\&sub;',
943 		"sup" : '\&sup;',
944 		"nsub" : '\&nsub;',
945 		"sube" : '\&sube;',
946 		"supe" : '\&supe;',
947 		"oplus" : '\&oplus;',
948 		"otimes" : '\&otimes;',
949 		"perp" : '\&perp;',
950 		"sdot" : '\&sdot;',
951 		"lceil" : '\&lceil;',
952 		"rceil" : '\&rceil;',
953 		"lfloor" : '\&lfloor;',
954 		"rfloor" : '\&rfloor;',
955 		"loz" : '\&loz;',
956 		"spades" : '\&spades;',
957 		"clubs" : '\&clubs;',
958 		"hearts" : '\&hearts;',
959 		"diams" : '\&diams;',
960 		"lang" : '\&lang;',
961 		"rang" : '\&rang;',
962 
963 		"apos"  : '\''
964 	];
965 	foreach (name, c; entities)
966 		entityNames[c] = name;
967 }
968 
969 import core.stdc.stdio;
970 import std.utf;
971 import ae.utils.textout;
972 
973 public string encodeEntities(string str)
974 {
975 	foreach (i, c; str)
976 		if (c=='<' || c=='>' || c=='"' || c=='\'' || c=='&')
977 		{
978 			StringBuilder sb;
979 			sb.preallocate(str.length * 11 / 10);
980 			sb.put(str[0..i]);
981 			sb.putEncodedEntities(str[i..$]);
982 			return sb.get();
983 		}
984 	return str;
985 }
986 
987 public void putEncodedEntities(Sink, S)(ref Sink sink, S str)
988 {
989 	size_t start = 0;
990 	foreach (i, c; str)
991 		if (c=='<' || c=='>' || c=='"' || c=='\'' || c=='&')
992 		{
993 			sink.put(str[start..i], '&', entityNames[c], ';');
994 			start = i+1;
995 		}
996 	sink.put(str[start..$]);
997 }
998 
999 public string encodeAllEntities(string str)
1000 {
1001 	// TODO: optimize
1002 	foreach_reverse (i, dchar c; str)
1003 	{
1004 		auto name = c in entityNames;
1005 		if (name)
1006 			str = str[0..i] ~ '&' ~ *name ~ ';' ~ str[i+stride(str,i)..$];
1007 	}
1008 	return str;
1009 }
1010 
1011 import ae.utils.text;
1012 import std.conv;
1013 
1014 public string decodeEntities(string str)
1015 {
1016 	auto fragments = str.fastSplit('&');
1017 	if (fragments.length <= 1)
1018 		return str;
1019 
1020 	auto interleaved = new string[fragments.length*2 - 1];
1021 	auto buffers = new char[4][fragments.length-1];
1022 	interleaved[0] = fragments[0];
1023 
1024 	foreach (n, fragment; fragments[1..$])
1025 	{
1026 		auto p = fragment.indexOf(';');
1027 		enforce!XmlParseException(p>0, "Invalid entity (unescaped ampersand?)");
1028 
1029 		dchar c;
1030 		if (fragment[0]=='#')
1031 		{
1032 			if (fragment[1]=='x')
1033 				c = fromHex!uint(fragment[2..p]);
1034 			else
1035 				c = to!uint(fragment[1..p]);
1036 		}
1037 		else
1038 		{
1039 			auto pentity = fragment[0..p] in entities;
1040 			enforce!XmlParseException(pentity, "Unknown entity: " ~ fragment[0..p]);
1041 			c = *pentity;
1042 		}
1043 
1044 		interleaved[1+n*2] = cast(string) buffers[n][0..std.utf.encode(buffers[n], c)];
1045 		interleaved[2+n*2] = fragment[p+1..$];
1046 	}
1047 
1048 	return interleaved.join();
1049 }
1050 
1051 deprecated alias decodeEntities convertEntities;
1052 
1053 unittest
1054 {
1055 	assert(encodeEntities(`The <Smith & Wesson> "lock'n'load"`) == `The &lt;Smith &amp; Wesson&gt; &quot;lock&apos;n&apos;load&quot;`);
1056 	assert(encodeAllEntities("©,€") == "&copy;,&euro;");
1057 	assert(decodeEntities("&copy;,&euro;") == "©,€");
1058 }