1 /**
2  * Light read-only XML library
3  * May be deprecated in the future.
4  * See other XML modules for better implementations.
5  *
6  * License:
7  *   This Source Code Form is subject to the terms of
8  *   the Mozilla Public License, v. 2.0. If a copy of
9  *   the MPL was not distributed with this file, You
10  *   can obtain one at http://mozilla.org/MPL/2.0/.
11  *
12  * Authors:
13  *   Vladimir Panteleev <vladimir@thecybershadow.net>
14  *   Simon Arlott
15  */
16 
17 module ae.utils.xmllite;
18 
19 // TODO: better/safer handling of malformed XML
20 
21 import std.string;
22 import std.ascii;
23 import std.exception;
24 
25 import ae.utils.array;
26 import ae.utils.exception;
27 import ae.utils.xmlwriter;
28 
29 // ************************************************************************
30 
31 /// std.stream.Stream-like type with bonus speed
32 private struct StringStream
33 {
34 	string s;
35 	size_t position;
36 
37 	@disable this();
38 	@disable this(this);
39 	this(string s)
40 	{
41 		enum ditch = "'\">\0\0\0\0\0"; // Dirty precaution
42 		this.s = (s ~ ditch)[0..$-ditch.length];
43 	}
44 
45 	char read() { return s[position++]; }
46 	@property size_t size() { return s.length; }
47 }
48 
49 // ************************************************************************
50 
51 mixin DeclareException!q{XmlParseException};
52 
53 enum XmlNodeType
54 {
55 	None,
56 	Root,
57 	Node,
58 	Comment,
59 	Meta,
60 	DocType,
61 	CData,
62 	Text
63 }
64 
65 class XmlNode
66 {
67 	string tag;
68 	OrderedMap!(string, string) attributes;
69 	XmlNode parent;
70 	XmlNode[] children;
71 	XmlNodeType type;
72 	ulong startPos, endPos;
73 
74 	this(ref StringStream s) { parseInto!XmlParseConfig(this, s); }
75 	this(string s) { auto ss = StringStream(s); this(ss); }
76 
77 	this(XmlNodeType type = XmlNodeType.None, string tag = null)
78 	{
79 		this.type = type;
80 		this.tag = tag;
81 	}
82 
83 	XmlNode addAttribute(string name, string value)
84 	{
85 		attributes[name] = value;
86 		return this;
87 	}
88 
89 	XmlNode addChild(XmlNode child)
90 	{
91 		child.parent = this;
92 		children ~= child;
93 		return this;
94 	}
95 
96 	override string toString() const
97 	{
98 		XmlWriter writer;
99 		writeTo(writer);
100 		return writer.output.get();
101 	}
102 
103 	final void writeTo(XmlWriter)(ref XmlWriter output) const
104 	{
105 		void writeChildren()
106 		{
107 			foreach (child; children)
108 				child.writeTo(output);
109 		}
110 
111 		void writeAttributes()
112 		{
113 			foreach (key, value; attributes)
114 				output.addAttribute(key, value);
115 		}
116 
117 		final switch (type)
118 		{
119 			case XmlNodeType.None:
120 				assert(false);
121 			case XmlNodeType.Root:
122 				writeChildren();
123 				return;
124 			case XmlNodeType.Node:
125 				output.startTagWithAttributes(tag);
126 				writeAttributes();
127 				if (children.length)
128 				{
129 					bool oneLine = children.length == 1 && children[0].type == XmlNodeType.Text;
130 					if (oneLine)
131 						output.formatter.enabled = false;
132 					output.endAttributes();
133 					writeChildren();
134 					output.endTag(tag);
135 					if (oneLine)
136 					{
137 						output.formatter.enabled = true;
138 						output.newLine();
139 					}
140 				}
141 				else
142 					output.endAttributesAndTag();
143 				return;
144 			case XmlNodeType.Meta:
145 				assert(children.length == 0);
146 				output.startPI(tag);
147 				writeAttributes();
148 				output.endPI();
149 				return;
150 			case XmlNodeType.DocType:
151 				assert(children.length == 0);
152 				output.doctype(tag);
153 				return;
154 			case XmlNodeType.Text:
155 				output.text(tag);
156 				return;
157 			case XmlNodeType.Comment:
158 				// TODO
159 				return;
160 			case XmlNodeType.CData:
161 				output.text(tag);
162 				return;
163 		}
164 	}
165 
166 	@property string text()
167 	{
168 		final switch (type)
169 		{
170 			case XmlNodeType.None:
171 				assert(false);
172 			case XmlNodeType.Text:
173 			case XmlNodeType.CData:
174 				return tag;
175 			case XmlNodeType.Node:
176 			case XmlNodeType.Root:
177 				string result;
178 				if (tag == "br")
179 					result = "\n";
180 				foreach (child; children)
181 					result ~= child.text();
182 				return result;
183 			case XmlNodeType.Comment:
184 			case XmlNodeType.Meta:
185 			case XmlNodeType.DocType:
186 				return null;
187 		}
188 	}
189 
190 	final XmlNode findChild(string tag)
191 	{
192 		foreach (child; children)
193 			if (child.type == XmlNodeType.Node && child.tag == tag)
194 				return child;
195 		return null;
196 	}
197 
198 	final XmlNode[] findChildren(string tag)
199 	{
200 		XmlNode[] result;
201 		foreach (child; children)
202 			if (child.type == XmlNodeType.Node && child.tag == tag)
203 				result ~= child;
204 		return result;
205 	}
206 
207 	final XmlNode opIndex(string tag)
208 	{
209 		auto node = findChild(tag);
210 		if (node is null)
211 			throw new XmlParseException("No such child: " ~ tag);
212 		return node;
213 	}
214 
215 	final XmlNode opIndex(string tag, size_t index)
216 	{
217 		auto nodes = findChildren(tag);
218 		if (index >= nodes.length)
219 			throw new XmlParseException(format("Can't get node with tag %s and index %d, there are only %d children with that tag", tag, index, nodes.length));
220 		return nodes[index];
221 	}
222 
223 	final XmlNode opIndex(size_t index)
224 	{
225 		return children[index];
226 	}
227 
228 	final @property size_t length() { return children.length; }
229 
230 	int opApply(int delegate(ref XmlNode) dg)
231 	{
232 		int result = 0;
233 
234 		for (int i = 0; i < children.length; i++)
235 		{
236 			result = dg(children[i]);
237 			if (result)
238 				break;
239 		}
240 		return result;
241 	}
242 
243 	final @property XmlNode dup()
244 	{
245 		auto result = new XmlNode(type, tag);
246 		result.attributes = attributes.dup;
247 		result.children.reserve(children.length);
248 		foreach (child; children)
249 			result.addChild(child.dup);
250 		return result;
251 	}
252 }
253 
254 class XmlDocument : XmlNode
255 {
256 	this()
257 	{
258 		super(XmlNodeType.Root);
259 		tag = "<Root>";
260 	}
261 
262 	this(ref StringStream s) { this(); parseInto!XmlParseConfig(this, s); }
263 	this(string s) { auto ss = StringStream(s); this(ss); }
264 }
265 
266 /// The logic for how to handle a node's closing tags.
267 enum NodeCloseMode
268 {
269 	/// This element must always have an explicit closing tag
270 	/// (or a self-closing tag). An unclosed tag will lead to
271 	/// a parse error.
272 	/// In XML, all tags are "always".
273 	always,
274 /*
275 	/// Close tags are optional. When an element with a tag is
276 	/// encountered directly under an element with the same tag,
277 	/// it is assumed that the first element is closed before
278 	/// the second, so the two are siblings, not parent/child.
279 	/// Thus, `<p>a<p>b</p>` is parsed as `<p>a</p><p>b</p>`,
280 	/// not `<p>a<p>b</p></p>`, however `<p>a<div><p>b</div>` is
281 	/// still parsed as `<p>a<div><p>b</p></div></p>`.
282 	/// This mode can be used for relaxed HTML parsing.
283 	optional,
284 */
285 	/// Close tags are optional, but are implied when absent.
286 	/// As a result, these elements cannot have any content,
287 	/// and any close tags must be adjacent to the open tag.
288 	implicit,
289 
290 	/// This element is void and must never have a closing tag.
291 	/// It is always implicitly closed right after opening.
292 	/// A close tag is always an error.
293 	/// This mode can be used for strict parsing of HTML5 void
294 	/// elements.
295 	never,
296 }
297 
298 /// Configuration for parsing XML.
299 struct XmlParseConfig
300 {
301 static:
302 	NodeCloseMode nodeCloseMode(string tag) { return NodeCloseMode.always; }
303 	enum optionalParameterValues = false;
304 }
305 
306 /// Configuration for strict parsing of HTML5.
307 /// All void tags must never be closed, and all
308 /// non-void tags must always be explicitly closed.
309 /// Attributes must still be quoted like in XML.
310 struct Html5StrictParseConfig
311 {
312 static:
313 	immutable voidElements = [
314 		"area"   , "base"  , "br"   , "col" ,
315 		"command", "embed" , "hr"   , "img" ,
316 		"input"  , "keygen", "link" , "meta",
317 		"param"  , "source", "track", "wbr" ,
318 	];
319 
320 	NodeCloseMode nodeCloseMode(string tag)
321 	{
322 		return tag.isOneOf(voidElements)
323 			? NodeCloseMode.never
324 			: NodeCloseMode.always
325 		;
326 	}
327 
328 	enum optionalParameterValues = true;
329 }
330 
331 /// Parse an SGML-ish string into an XmlNode
332 alias parse = parseString!XmlNode;
333 
334 /// Parse an SGML-ish StringStream into an XmlDocument
335 alias parseDocument = parseString!XmlDocument;
336 
337 alias xmlParse = parseDocument!XmlParseConfig;
338 
339 private:
340 
341 public // alias
342 template parseString(Node)
343 {
344 	Node parseString(Config)(string s)
345 	{
346 		auto ss = StringStream(s);
347 		alias f = parseStream!Node;
348 		return f!Config(ss);
349 	}
350 }
351 
352 template parseStream(Node)
353 {
354 	Node parseStream(Config)(ref StringStream s)
355 	{
356 		auto n = new Node;
357 		parseInto!Config(n, s);
358 		return n;
359 	}
360 }
361 
362 alias parseNode = parseStream!XmlNode;
363 
364 /// Parse an SGML-ish StringStream into an XmlDocument
365 void parseInto(Config)(XmlDocument d, ref StringStream s)
366 {
367 	skipWhitespace(s);
368 	while (s.position < s.size)
369 		try
370 		{
371 			auto n = new XmlNode;
372 			parseInto!Config(n, s);
373 			d.addChild(n);
374 			skipWhitespace(s);
375 		}
376 		catch (XmlParseException e)
377 		{
378 			import std.algorithm.searching;
379 			import std.range : retro;
380 
381 			auto head = s.s[0..s.position];
382 			auto row    = head.representation.count('\n');
383 			auto column = head.representation.retro.countUntil('\n');
384 			if (column < 0)
385 				column = head.length;
386 			throw new XmlParseException("Error at %d:%d (offset %d)".format(
387 				1 + row,
388 				1 + column,
389 				head.length,
390 			), e);
391 		}
392 }
393 
394 /// Parse an SGML-ish StringStream into an XmlNode
395 void parseInto(Config)(XmlNode node, ref StringStream s)
396 {
397 	node.startPos = s.position;
398 	char c;
399 	do
400 		c = s.read();
401 	while (isWhiteChar[c]);
402 
403 	if (c!='<')  // text node
404 	{
405 		node.type = XmlNodeType.Text;
406 		string text;
407 		while (c!='<')
408 		{
409 			// TODO: check for EOF
410 			text ~= c;
411 			c = s.read();
412 		}
413 		s.position--; // rewind to '<'
414 		node.tag = decodeEntities(text);
415 		//tag = tag.strip();
416 	}
417 	else
418 	{
419 		c = s.read();
420 		if (c=='!')
421 		{
422 			c = s.read();
423 			if (c == '-') // comment
424 			{
425 				expect(s, '-');
426 				node.type = XmlNodeType.Comment;
427 				string tag;
428 				do
429 				{
430 					c = s.read();
431 					tag ~= c;
432 				} while (tag.length<3 || tag[$-3..$] != "-->");
433 				tag = tag[0..$-3];
434 				node.tag = tag;
435 			}
436 			else
437 			if (c == '[') // CDATA
438 			{
439 				foreach (x; "CDATA[")
440 					expect(s, x);
441 				node.type = XmlNodeType.CData;
442 				string tag;
443 				do
444 				{
445 					c = s.read();
446 					tag ~= c;
447 				} while (tag.length<3 || tag[$-3..$] != "]]>");
448 				tag = tag[0..$-3];
449 				node.tag = tag;
450 			}
451 			else // doctype, etc.
452 			{
453 				node.type = XmlNodeType.DocType;
454 				while (c != '>')
455 				{
456 					node.tag ~= c;
457 					c = s.read();
458 				}
459 			}
460 		}
461 		else
462 		if (c=='?')
463 		{
464 			node.type = XmlNodeType.Meta;
465 			node.tag = readWord(s);
466 			if (node.tag.length==0) throw new XmlParseException("Invalid tag");
467 			while (true)
468 			{
469 				skipWhitespace(s);
470 				if (peek(s)=='?')
471 					break;
472 				readAttribute!Config(node, s);
473 			}
474 			c = s.read();
475 			expect(s, '>');
476 		}
477 		else
478 		if (c=='/')
479 			throw new XmlParseException("Unexpected close tag");
480 		else
481 		{
482 			node.type = XmlNodeType.Node;
483 			node.tag = c~readWord(s);
484 			while (true)
485 			{
486 				skipWhitespace(s);
487 				c = peek(s);
488 				if (c=='>' || c=='/')
489 					break;
490 				readAttribute!Config(node, s);
491 			}
492 			c = s.read();
493 
494 			auto closeMode = Config.nodeCloseMode(node.tag);
495 			if (closeMode == NodeCloseMode.never)
496 				enforce!XmlParseException(c=='>', "Self-closing void tag <%s>".format(node.tag));
497 			else
498 			if (closeMode == NodeCloseMode.implicit)
499 			{
500 				if (c == '/')
501 					expect(s, '>');
502 			}
503 			else
504 			{
505 				if (c=='>')
506 				{
507 					while (true)
508 					{
509 						while (true)
510 						{
511 							skipWhitespace(s);
512 							if (peek(s)=='<' && peek(s, 2)=='/')
513 								break;
514 							try
515 								node.addChild(parseNode!Config(s));
516 							catch (XmlParseException e)
517 								throw new XmlParseException("Error while processing child of "~node.tag, e);
518 						}
519 						expect(s, '<');
520 						expect(s, '/');
521 						auto word = readWord(s);
522 						if (word != node.tag)
523 						{
524 							auto closeMode2 = Config.nodeCloseMode(word);
525 							if (closeMode2 == NodeCloseMode.implicit)
526 							{
527 								auto parent = node.parent;
528 								enforce!XmlParseException(parent, "Top-level close tag for implicitly-closed node </%s>".format(word));
529 								enforce!XmlParseException(parent.children.length, "First-child close tag for implicitly-closed node </%s>".format(word));
530 								enforce!XmlParseException(parent.children[$-1].tag == word, "Non-empty implicitly-closed node <%s>".format(word));
531 								continue;
532 							}
533 							else
534 								enforce!XmlParseException(word == node.tag, "Expected </%s>, not </%s>".format(node.tag, word));
535 						}
536 						expect(s, '>');
537 						break;
538 					}
539 				}
540 				else // '/'
541 					expect(s, '>');
542 			}
543 		}
544 	}
545 	node.endPos = s.position;
546 }
547 
548 private:
549 
550 void readAttribute(Config)(XmlNode node, ref StringStream s)
551 {
552 	string name = readWord(s);
553 	if (name.length==0) throw new XmlParseException("Invalid attribute");
554 	skipWhitespace(s);
555 
556 	static if (Config.optionalParameterValues)
557 	{
558 		if (peek(s) != '=')
559 		{
560 			node.attributes[name] = null;
561 			return;
562 		}
563 	}
564 
565 	expect(s, '=');
566 	skipWhitespace(s);
567 	char delim;
568 	delim = s.read();
569 	if (delim != '\'' && delim != '"')
570 		throw new XmlParseException("Expected ' or \", not %s".format(delim));
571 	string value = readUntil(s, delim);
572 	node.attributes[name] = decodeEntities(value);
573 }
574 
575 char peek(ref StringStream s, int n=1)
576 {
577 	return s.s[s.position + n - 1];
578 }
579 
580 void skipWhitespace(ref StringStream s)
581 {
582 	while (isWhiteChar[s.s.ptr[s.position]])
583 		s.position++;
584 }
585 
586 __gshared bool[256] isWhiteChar, isWordChar;
587 
588 shared static this()
589 {
590 	foreach (c; 0..256)
591 	{
592 		isWhiteChar[c] = isWhite(c);
593 		isWordChar[c] = c=='-' || c=='_' || c==':' || isAlphaNum(c);
594 	}
595 }
596 
597 string readWord(ref StringStream stream)
598 {
599 	auto start = stream.s.ptr + stream.position;
600 	auto end = stream.s.ptr + stream.s.length;
601 	auto p = start;
602 	while (p < end && isWordChar[*p])
603 		p++;
604 	auto len = p-start;
605 	stream.position += len;
606 	return start[0..len];
607 }
608 
609 void expect(ref StringStream s, char c)
610 {
611 	char c2;
612 	c2 = s.read();
613 	enforce!XmlParseException(c==c2, "Expected " ~ c ~ ", got " ~ c2);
614 }
615 
616 string readUntil(ref StringStream s, char until)
617 {
618 	auto start = s.s.ptr + s.position;
619 	auto p = start;
620 	while (*p != until) p++;
621 	auto len = p-start;
622 	s.position += len + 1;
623 	return start[0..len];
624 }
625 
626 unittest
627 {
628 	enum xmlText =
629 		`<?xml version="1.0" encoding="UTF-8"?>` ~
630 		`<quotes>` ~
631 			`<quote author="Alan Perlis">` ~
632 				`When someone says, &quot;I want a programming language in which I need only say what I want done,&quot; give him a lollipop.` ~
633 			`</quote>` ~
634 		`</quotes>`;
635 	auto doc = new XmlDocument(xmlText);
636 	assert(doc.toString() == xmlText);
637 }
638 
639 const dchar[string] entities;
640 /*const*/ string[dchar] entityNames;
641 shared static this()
642 {
643 	entities =
644 	[
645 		"quot" : '\&quot;',
646 		"amp" : '\&amp;',
647 		"lt" : '\&lt;',
648 		"gt" : '\&gt;',
649 
650 		"OElig" : '\&OElig;',
651 		"oelig" : '\&oelig;',
652 		"Scaron" : '\&Scaron;',
653 		"scaron" : '\&scaron;',
654 		"Yuml" : '\&Yuml;',
655 		"circ" : '\&circ;',
656 		"tilde" : '\&tilde;',
657 		"ensp" : '\&ensp;',
658 		"emsp" : '\&emsp;',
659 		"thinsp" : '\&thinsp;',
660 		"zwnj" : '\&zwnj;',
661 		"zwj" : '\&zwj;',
662 		"lrm" : '\&lrm;',
663 		"rlm" : '\&rlm;',
664 		"ndash" : '\&ndash;',
665 		"mdash" : '\&mdash;',
666 		"lsquo" : '\&lsquo;',
667 		"rsquo" : '\&rsquo;',
668 		"sbquo" : '\&sbquo;',
669 		"ldquo" : '\&ldquo;',
670 		"rdquo" : '\&rdquo;',
671 		"bdquo" : '\&bdquo;',
672 		"dagger" : '\&dagger;',
673 		"Dagger" : '\&Dagger;',
674 		"permil" : '\&permil;',
675 		"lsaquo" : '\&lsaquo;',
676 		"rsaquo" : '\&rsaquo;',
677 		"euro" : '\&euro;',
678 
679 		"nbsp" : '\&nbsp;',
680 		"iexcl" : '\&iexcl;',
681 		"cent" : '\&cent;',
682 		"pound" : '\&pound;',
683 		"curren" : '\&curren;',
684 		"yen" : '\&yen;',
685 		"brvbar" : '\&brvbar;',
686 		"sect" : '\&sect;',
687 		"uml" : '\&uml;',
688 		"copy" : '\&copy;',
689 		"ordf" : '\&ordf;',
690 		"laquo" : '\&laquo;',
691 		"not" : '\&not;',
692 		"shy" : '\&shy;',
693 		"reg" : '\&reg;',
694 		"macr" : '\&macr;',
695 		"deg" : '\&deg;',
696 		"plusmn" : '\&plusmn;',
697 		"sup2" : '\&sup2;',
698 		"sup3" : '\&sup3;',
699 		"acute" : '\&acute;',
700 		"micro" : '\&micro;',
701 		"para" : '\&para;',
702 		"middot" : '\&middot;',
703 		"cedil" : '\&cedil;',
704 		"sup1" : '\&sup1;',
705 		"ordm" : '\&ordm;',
706 		"raquo" : '\&raquo;',
707 		"frac14" : '\&frac14;',
708 		"frac12" : '\&frac12;',
709 		"frac34" : '\&frac34;',
710 		"iquest" : '\&iquest;',
711 		"Agrave" : '\&Agrave;',
712 		"Aacute" : '\&Aacute;',
713 		"Acirc" : '\&Acirc;',
714 		"Atilde" : '\&Atilde;',
715 		"Auml" : '\&Auml;',
716 		"Aring" : '\&Aring;',
717 		"AElig" : '\&AElig;',
718 		"Ccedil" : '\&Ccedil;',
719 		"Egrave" : '\&Egrave;',
720 		"Eacute" : '\&Eacute;',
721 		"Ecirc" : '\&Ecirc;',
722 		"Euml" : '\&Euml;',
723 		"Igrave" : '\&Igrave;',
724 		"Iacute" : '\&Iacute;',
725 		"Icirc" : '\&Icirc;',
726 		"Iuml" : '\&Iuml;',
727 		"ETH" : '\&ETH;',
728 		"Ntilde" : '\&Ntilde;',
729 		"Ograve" : '\&Ograve;',
730 		"Oacute" : '\&Oacute;',
731 		"Ocirc" : '\&Ocirc;',
732 		"Otilde" : '\&Otilde;',
733 		"Ouml" : '\&Ouml;',
734 		"times" : '\&times;',
735 		"Oslash" : '\&Oslash;',
736 		"Ugrave" : '\&Ugrave;',
737 		"Uacute" : '\&Uacute;',
738 		"Ucirc" : '\&Ucirc;',
739 		"Uuml" : '\&Uuml;',
740 		"Yacute" : '\&Yacute;',
741 		"THORN" : '\&THORN;',
742 		"szlig" : '\&szlig;',
743 		"agrave" : '\&agrave;',
744 		"aacute" : '\&aacute;',
745 		"acirc" : '\&acirc;',
746 		"atilde" : '\&atilde;',
747 		"auml" : '\&auml;',
748 		"aring" : '\&aring;',
749 		"aelig" : '\&aelig;',
750 		"ccedil" : '\&ccedil;',
751 		"egrave" : '\&egrave;',
752 		"eacute" : '\&eacute;',
753 		"ecirc" : '\&ecirc;',
754 		"euml" : '\&euml;',
755 		"igrave" : '\&igrave;',
756 		"iacute" : '\&iacute;',
757 		"icirc" : '\&icirc;',
758 		"iuml" : '\&iuml;',
759 		"eth" : '\&eth;',
760 		"ntilde" : '\&ntilde;',
761 		"ograve" : '\&ograve;',
762 		"oacute" : '\&oacute;',
763 		"ocirc" : '\&ocirc;',
764 		"otilde" : '\&otilde;',
765 		"ouml" : '\&ouml;',
766 		"divide" : '\&divide;',
767 		"oslash" : '\&oslash;',
768 		"ugrave" : '\&ugrave;',
769 		"uacute" : '\&uacute;',
770 		"ucirc" : '\&ucirc;',
771 		"uuml" : '\&uuml;',
772 		"yacute" : '\&yacute;',
773 		"thorn" : '\&thorn;',
774 		"yuml" : '\&yuml;',
775 
776 		"fnof" : '\&fnof;',
777 		"Alpha" : '\&Alpha;',
778 		"Beta" : '\&Beta;',
779 		"Gamma" : '\&Gamma;',
780 		"Delta" : '\&Delta;',
781 		"Epsilon" : '\&Epsilon;',
782 		"Zeta" : '\&Zeta;',
783 		"Eta" : '\&Eta;',
784 		"Theta" : '\&Theta;',
785 		"Iota" : '\&Iota;',
786 		"Kappa" : '\&Kappa;',
787 		"Lambda" : '\&Lambda;',
788 		"Mu" : '\&Mu;',
789 		"Nu" : '\&Nu;',
790 		"Xi" : '\&Xi;',
791 		"Omicron" : '\&Omicron;',
792 		"Pi" : '\&Pi;',
793 		"Rho" : '\&Rho;',
794 		"Sigma" : '\&Sigma;',
795 		"Tau" : '\&Tau;',
796 		"Upsilon" : '\&Upsilon;',
797 		"Phi" : '\&Phi;',
798 		"Chi" : '\&Chi;',
799 		"Psi" : '\&Psi;',
800 		"Omega" : '\&Omega;',
801 		"alpha" : '\&alpha;',
802 		"beta" : '\&beta;',
803 		"gamma" : '\&gamma;',
804 		"delta" : '\&delta;',
805 		"epsilon" : '\&epsilon;',
806 		"zeta" : '\&zeta;',
807 		"eta" : '\&eta;',
808 		"theta" : '\&theta;',
809 		"iota" : '\&iota;',
810 		"kappa" : '\&kappa;',
811 		"lambda" : '\&lambda;',
812 		"mu" : '\&mu;',
813 		"nu" : '\&nu;',
814 		"xi" : '\&xi;',
815 		"omicron" : '\&omicron;',
816 		"pi" : '\&pi;',
817 		"rho" : '\&rho;',
818 		"sigmaf" : '\&sigmaf;',
819 		"sigma" : '\&sigma;',
820 		"tau" : '\&tau;',
821 		"upsilon" : '\&upsilon;',
822 		"phi" : '\&phi;',
823 		"chi" : '\&chi;',
824 		"psi" : '\&psi;',
825 		"omega" : '\&omega;',
826 		"thetasym" : '\&thetasym;',
827 		"upsih" : '\&upsih;',
828 		"piv" : '\&piv;',
829 		"bull" : '\&bull;',
830 		"hellip" : '\&hellip;',
831 		"prime" : '\&prime;',
832 		"Prime" : '\&Prime;',
833 		"oline" : '\&oline;',
834 		"frasl" : '\&frasl;',
835 		"weierp" : '\&weierp;',
836 		"image" : '\&image;',
837 		"real" : '\&real;',
838 		"trade" : '\&trade;',
839 		"alefsym" : '\&alefsym;',
840 		"larr" : '\&larr;',
841 		"uarr" : '\&uarr;',
842 		"rarr" : '\&rarr;',
843 		"darr" : '\&darr;',
844 		"harr" : '\&harr;',
845 		"crarr" : '\&crarr;',
846 		"lArr" : '\&lArr;',
847 		"uArr" : '\&uArr;',
848 		"rArr" : '\&rArr;',
849 		"dArr" : '\&dArr;',
850 		"hArr" : '\&hArr;',
851 		"forall" : '\&forall;',
852 		"part" : '\&part;',
853 		"exist" : '\&exist;',
854 		"empty" : '\&empty;',
855 		"nabla" : '\&nabla;',
856 		"isin" : '\&isin;',
857 		"notin" : '\&notin;',
858 		"ni" : '\&ni;',
859 		"prod" : '\&prod;',
860 		"sum" : '\&sum;',
861 		"minus" : '\&minus;',
862 		"lowast" : '\&lowast;',
863 		"radic" : '\&radic;',
864 		"prop" : '\&prop;',
865 		"infin" : '\&infin;',
866 		"ang" : '\&ang;',
867 		"and" : '\&and;',
868 		"or" : '\&or;',
869 		"cap" : '\&cap;',
870 		"cup" : '\&cup;',
871 		"int" : '\&int;',
872 		"there4" : '\&there4;',
873 		"sim" : '\&sim;',
874 		"cong" : '\&cong;',
875 		"asymp" : '\&asymp;',
876 		"ne" : '\&ne;',
877 		"equiv" : '\&equiv;',
878 		"le" : '\&le;',
879 		"ge" : '\&ge;',
880 		"sub" : '\&sub;',
881 		"sup" : '\&sup;',
882 		"nsub" : '\&nsub;',
883 		"sube" : '\&sube;',
884 		"supe" : '\&supe;',
885 		"oplus" : '\&oplus;',
886 		"otimes" : '\&otimes;',
887 		"perp" : '\&perp;',
888 		"sdot" : '\&sdot;',
889 		"lceil" : '\&lceil;',
890 		"rceil" : '\&rceil;',
891 		"lfloor" : '\&lfloor;',
892 		"rfloor" : '\&rfloor;',
893 		"loz" : '\&loz;',
894 		"spades" : '\&spades;',
895 		"clubs" : '\&clubs;',
896 		"hearts" : '\&hearts;',
897 		"diams" : '\&diams;',
898 		"lang" : '\&lang;',
899 		"rang" : '\&rang;',
900 
901 		"apos"  : '\''
902 	];
903 	foreach (name, c; entities)
904 		entityNames[c] = name;
905 }
906 
907 import core.stdc.stdio;
908 import std.utf;
909 import ae.utils.textout;
910 
911 public string encodeEntities(string str)
912 {
913 	foreach (i, c; str)
914 		if (c=='<' || c=='>' || c=='"' || c=='\'' || c=='&')
915 		{
916 			StringBuilder sb;
917 			sb.preallocate(str.length * 11 / 10);
918 			sb.put(str[0..i]);
919 			sb.putEncodedEntities(str[i..$]);
920 			return sb.get();
921 		}
922 	return str;
923 }
924 
925 public void putEncodedEntities(Sink, S)(ref Sink sink, S str)
926 {
927 	size_t start = 0;
928 	foreach (i, c; str)
929 		if (c=='<' || c=='>' || c=='"' || c=='\'' || c=='&')
930 		{
931 			sink.put(str[start..i], '&', entityNames[c], ';');
932 			start = i+1;
933 		}
934 	sink.put(str[start..$]);
935 }
936 
937 public string encodeAllEntities(string str)
938 {
939 	// TODO: optimize
940 	foreach_reverse (i, dchar c; str)
941 	{
942 		auto name = c in entityNames;
943 		if (name)
944 			str = str[0..i] ~ '&' ~ *name ~ ';' ~ str[i+stride(str,i)..$];
945 	}
946 	return str;
947 }
948 
949 import ae.utils.text;
950 import std.conv;
951 
952 public string decodeEntities(string str)
953 {
954 	auto fragments = str.fastSplit('&');
955 	if (fragments.length <= 1)
956 		return str;
957 
958 	auto interleaved = new string[fragments.length*2 - 1];
959 	auto buffers = new char[4][fragments.length-1];
960 	interleaved[0] = fragments[0];
961 
962 	foreach (n, fragment; fragments[1..$])
963 	{
964 		auto p = fragment.indexOf(';');
965 		enforce!XmlParseException(p>0, "Invalid entity (unescaped ampersand?)");
966 
967 		dchar c;
968 		if (fragment[0]=='#')
969 		{
970 			if (fragment[1]=='x')
971 				c = fromHex!uint(fragment[2..p]);
972 			else
973 				c = to!uint(fragment[1..p]);
974 		}
975 		else
976 		{
977 			auto pentity = fragment[0..p] in entities;
978 			enforce!XmlParseException(pentity, "Unknown entity: " ~ fragment[0..p]);
979 			c = *pentity;
980 		}
981 
982 		interleaved[1+n*2] = cast(string) buffers[n][0..std.utf.encode(buffers[n], c)];
983 		interleaved[2+n*2] = fragment[p+1..$];
984 	}
985 
986 	return interleaved.join();
987 }
988 
989 deprecated alias decodeEntities convertEntities;
990 
991 unittest
992 {
993 	assert(encodeEntities(`The <Smith & Wesson> "lock'n'load"`) == `The &lt;Smith &amp; Wesson&gt; &quot;lock&apos;n&apos;load&quot;`);
994 	assert(encodeAllEntities("©,€") == "&copy;,&euro;");
995 	assert(decodeEntities("&copy;,&euro;") == "©,€");
996 }