1 /**
2  * Light read-only XML library
3  * May be deprecated in the future.
4  * See other XML modules for better implementations.
5  *
6  * License:
7  *   This Source Code Form is subject to the terms of
8  *   the Mozilla Public License, v. 2.0. If a copy of
9  *   the MPL was not distributed with this file, You
10  *   can obtain one at http://mozilla.org/MPL/2.0/.
11  *
12  * Authors:
13  *   Vladimir Panteleev <vladimir@thecybershadow.net>
14  *   Simon Arlott
15  */
16 
17 module ae.utils.xmllite;
18 
19 // TODO: better/safer handling of malformed XML
20 
21 import std.string;
22 import std.ascii;
23 import std.exception;
24 
25 import ae.utils.array;
26 import ae.utils.exception;
27 import ae.utils.xmlwriter;
28 
29 // ************************************************************************
30 
31 /// std.stream.Stream-like type with bonus speed
32 private struct StringStream
33 {
34 	string s;
35 	size_t position;
36 
37 	@disable this();
38 	@disable this(this);
39 	this(string s)
40 	{
41 		enum ditch = "'\">\0\0\0\0\0"; // Dirty precaution
42 		this.s = (s ~ ditch)[0..$-ditch.length];
43 	}
44 
45 	char read() { return s[position++]; }
46 	@property size_t size() { return s.length; }
47 }
48 
49 // ************************************************************************
50 
51 mixin DeclareException!q{XmlParseException};
52 
53 enum XmlNodeType
54 {
55 	None,
56 	Root,
57 	Node,
58 	Comment,
59 	Meta,
60 	DocType,
61 	CData,
62 	Text
63 }
64 
65 alias XmlAttributes = OrderedMap!(string, string);
66 
67 class XmlNode
68 {
69 	string tag;
70 	XmlAttributes attributes;
71 	XmlNode parent;
72 	XmlNode[] children;
73 	XmlNodeType type;
74 	ulong startPos, endPos;
75 
76 	this(ref StringStream s) { parseInto!XmlParseConfig(this, s); }
77 	this(string s) { auto ss = StringStream(s); this(ss); }
78 
79 	this(XmlNodeType type = XmlNodeType.None, string tag = null)
80 	{
81 		this.type = type;
82 		this.tag = tag;
83 	}
84 
85 	XmlNode addAttribute(string name, string value)
86 	{
87 		attributes[name] = value;
88 		return this;
89 	}
90 
91 	XmlNode addChild(XmlNode child)
92 	{
93 		child.parent = this;
94 		children ~= child;
95 		return this;
96 	}
97 
98 	override string toString() const
99 	{
100 		XmlWriter writer;
101 		writeTo(writer);
102 		return writer.output.get();
103 	}
104 
105 	final void writeTo(XmlWriter)(ref XmlWriter output) const
106 	{
107 		void writeChildren()
108 		{
109 			foreach (child; children)
110 				child.writeTo(output);
111 		}
112 
113 		void writeAttributes()
114 		{
115 			foreach (key, value; attributes)
116 				output.addAttribute(key, value);
117 		}
118 
119 		final switch (type)
120 		{
121 			case XmlNodeType.None:
122 				assert(false);
123 			case XmlNodeType.Root:
124 				writeChildren();
125 				return;
126 			case XmlNodeType.Node:
127 				output.startTagWithAttributes(tag);
128 				writeAttributes();
129 				if (children.length)
130 				{
131 					bool oneLine = children.length == 1 && children[0].type == XmlNodeType.Text;
132 					if (oneLine)
133 						output.formatter.enabled = false;
134 					output.endAttributes();
135 					writeChildren();
136 					output.endTag(tag);
137 					if (oneLine)
138 					{
139 						output.formatter.enabled = true;
140 						output.newLine();
141 					}
142 				}
143 				else
144 					output.endAttributesAndTag();
145 				return;
146 			case XmlNodeType.Meta:
147 				assert(children.length == 0);
148 				output.startPI(tag);
149 				writeAttributes();
150 				output.endPI();
151 				return;
152 			case XmlNodeType.DocType:
153 				assert(children.length == 0);
154 				output.doctype(tag);
155 				return;
156 			case XmlNodeType.Text:
157 				output.startLine();
158 				output.text(tag);
159 				output.newLine();
160 				return;
161 			case XmlNodeType.Comment:
162 				// TODO
163 				return;
164 			case XmlNodeType.CData:
165 				output.text(tag);
166 				return;
167 		}
168 	}
169 
170 	@property string text()
171 	{
172 		final switch (type)
173 		{
174 			case XmlNodeType.None:
175 				assert(false);
176 			case XmlNodeType.Text:
177 			case XmlNodeType.CData:
178 				return tag;
179 			case XmlNodeType.Node:
180 			case XmlNodeType.Root:
181 				string result;
182 				if (tag == "br")
183 					result = "\n";
184 				foreach (child; children)
185 					result ~= child.text();
186 				return result;
187 			case XmlNodeType.Comment:
188 			case XmlNodeType.Meta:
189 			case XmlNodeType.DocType:
190 				return null;
191 		}
192 	}
193 
194 	final XmlNode findChild(string tag)
195 	{
196 		foreach (child; children)
197 			if (child.type == XmlNodeType.Node && child.tag == tag)
198 				return child;
199 		return null;
200 	}
201 
202 	final XmlNode[] findChildren(string tag)
203 	{
204 		XmlNode[] result;
205 		foreach (child; children)
206 			if (child.type == XmlNodeType.Node && child.tag == tag)
207 				result ~= child;
208 		return result;
209 	}
210 
211 	final XmlNode opIndex(string tag)
212 	{
213 		auto node = findChild(tag);
214 		if (node is null)
215 			throw new XmlParseException("No such child: " ~ tag);
216 		return node;
217 	}
218 
219 	final XmlNode opIndex(string tag, size_t index)
220 	{
221 		auto nodes = findChildren(tag);
222 		if (index >= nodes.length)
223 			throw new XmlParseException(format("Can't get node with tag %s and index %d, there are only %d children with that tag", tag, index, nodes.length));
224 		return nodes[index];
225 	}
226 
227 	final XmlNode opIndex(size_t index)
228 	{
229 		return children[index];
230 	}
231 
232 	final @property size_t length() { return children.length; }
233 
234 	int opApply(int delegate(ref XmlNode) dg)
235 	{
236 		int result = 0;
237 
238 		for (int i = 0; i < children.length; i++)
239 		{
240 			result = dg(children[i]);
241 			if (result)
242 				break;
243 		}
244 		return result;
245 	}
246 
247 	final @property XmlNode dup()
248 	{
249 		auto result = new XmlNode(type, tag);
250 		result.attributes = attributes.dup;
251 		result.children.reserve(children.length);
252 		foreach (child; children)
253 			result.addChild(child.dup);
254 		return result;
255 	}
256 }
257 
258 class XmlDocument : XmlNode
259 {
260 	this()
261 	{
262 		super(XmlNodeType.Root);
263 		tag = "<Root>";
264 	}
265 
266 	this(ref StringStream s) { this(); parseInto!XmlParseConfig(this, s); }
267 	this(string s) { auto ss = StringStream(s); this(ss); }
268 }
269 
270 /// The logic for how to handle a node's closing tags.
271 enum NodeCloseMode
272 {
273 	/// This element must always have an explicit closing tag
274 	/// (or a self-closing tag). An unclosed tag will lead to
275 	/// a parse error.
276 	/// In XML, all tags are "always".
277 	always,
278 /*
279 	/// Close tags are optional. When an element with a tag is
280 	/// encountered directly under an element with the same tag,
281 	/// it is assumed that the first element is closed before
282 	/// the second, so the two are siblings, not parent/child.
283 	/// Thus, `<p>a<p>b</p>` is parsed as `<p>a</p><p>b</p>`,
284 	/// not `<p>a<p>b</p></p>`, however `<p>a<div><p>b</div>` is
285 	/// still parsed as `<p>a<div><p>b</p></div></p>`.
286 	/// This mode can be used for relaxed HTML parsing.
287 	optional,
288 */
289 	/// Close tags are optional, but are implied when absent.
290 	/// As a result, these elements cannot have any content,
291 	/// and any close tags must be adjacent to the open tag.
292 	implicit,
293 
294 	/// This element is void and must never have a closing tag.
295 	/// It is always implicitly closed right after opening.
296 	/// A close tag is always an error.
297 	/// This mode can be used for strict parsing of HTML5 void
298 	/// elements.
299 	never,
300 }
301 
302 /// Configuration for parsing XML.
303 struct XmlParseConfig
304 {
305 static:
306 	NodeCloseMode nodeCloseMode(string tag) { return NodeCloseMode.always; }
307 	enum optionalParameterValues = false;
308 }
309 
310 /// Configuration for strict parsing of HTML5.
311 /// All void tags must never be closed, and all
312 /// non-void tags must always be explicitly closed.
313 /// Attributes must still be quoted like in XML.
314 struct Html5StrictParseConfig
315 {
316 static:
317 	immutable voidElements = [
318 		"area"   , "base"  , "br"   , "col" ,
319 		"command", "embed" , "hr"   , "img" ,
320 		"input"  , "keygen", "link" , "meta",
321 		"param"  , "source", "track", "wbr" ,
322 	];
323 
324 	NodeCloseMode nodeCloseMode(string tag)
325 	{
326 		return tag.isOneOf(voidElements)
327 			? NodeCloseMode.never
328 			: NodeCloseMode.always
329 		;
330 	}
331 
332 	enum optionalParameterValues = true;
333 }
334 
335 /// Parse an SGML-ish string into an XmlNode
336 alias parse = parseString!XmlNode;
337 
338 /// Parse an SGML-ish StringStream into an XmlDocument
339 alias parseDocument = parseString!XmlDocument;
340 
341 alias xmlParse = parseDocument!XmlParseConfig;
342 
343 private:
344 
345 public // alias
346 template parseString(Node)
347 {
348 	Node parseString(Config)(string s)
349 	{
350 		auto ss = StringStream(s);
351 		alias f = parseStream!Node;
352 		return f!Config(ss);
353 	}
354 }
355 
356 template parseStream(Node)
357 {
358 	Node parseStream(Config)(ref StringStream s)
359 	{
360 		auto n = new Node;
361 		parseInto!Config(n, s);
362 		return n;
363 	}
364 }
365 
366 alias parseNode = parseStream!XmlNode;
367 
368 /// Parse an SGML-ish StringStream into an XmlDocument
369 void parseInto(Config)(XmlDocument d, ref StringStream s)
370 {
371 	skipWhitespace(s);
372 	while (s.position < s.size)
373 		try
374 		{
375 			auto n = new XmlNode;
376 			parseInto!Config(n, s);
377 			d.addChild(n);
378 			skipWhitespace(s);
379 		}
380 		catch (XmlParseException e)
381 		{
382 			import std.algorithm.searching;
383 			import std.range : retro;
384 
385 			auto head = s.s[0..s.position];
386 			auto row    = head.representation.count('\n');
387 			auto column = head.representation.retro.countUntil('\n');
388 			if (column < 0)
389 				column = head.length;
390 			throw new XmlParseException("Error at %d:%d (offset %d)".format(
391 				1 + row,
392 				1 + column,
393 				head.length,
394 			), e);
395 		}
396 }
397 
398 /// Parse an SGML-ish StringStream into an XmlNode
399 void parseInto(Config)(XmlNode node, ref StringStream s)
400 {
401 	node.startPos = s.position;
402 	char c;
403 	do
404 		c = s.read();
405 	while (isWhiteChar[c]);
406 
407 	if (c!='<')  // text node
408 	{
409 		node.type = XmlNodeType.Text;
410 		string text;
411 		while (c!='<')
412 		{
413 			// TODO: check for EOF
414 			text ~= c;
415 			c = s.read();
416 		}
417 		s.position--; // rewind to '<'
418 		node.tag = decodeEntities(text);
419 		//tag = tag.strip();
420 	}
421 	else
422 	{
423 		c = s.read();
424 		if (c=='!')
425 		{
426 			c = s.read();
427 			if (c == '-') // comment
428 			{
429 				expect(s, '-');
430 				node.type = XmlNodeType.Comment;
431 				string tag;
432 				do
433 				{
434 					c = s.read();
435 					tag ~= c;
436 				} while (tag.length<3 || tag[$-3..$] != "-->");
437 				tag = tag[0..$-3];
438 				node.tag = tag;
439 			}
440 			else
441 			if (c == '[') // CDATA
442 			{
443 				foreach (x; "CDATA[")
444 					expect(s, x);
445 				node.type = XmlNodeType.CData;
446 				string tag;
447 				do
448 				{
449 					c = s.read();
450 					tag ~= c;
451 				} while (tag.length<3 || tag[$-3..$] != "]]>");
452 				tag = tag[0..$-3];
453 				node.tag = tag;
454 			}
455 			else // doctype, etc.
456 			{
457 				node.type = XmlNodeType.DocType;
458 				while (c != '>')
459 				{
460 					node.tag ~= c;
461 					c = s.read();
462 				}
463 			}
464 		}
465 		else
466 		if (c=='?')
467 		{
468 			node.type = XmlNodeType.Meta;
469 			node.tag = readWord(s);
470 			if (node.tag.length==0) throw new XmlParseException("Invalid tag");
471 			while (true)
472 			{
473 				skipWhitespace(s);
474 				if (peek(s)=='?')
475 					break;
476 				readAttribute!Config(node, s);
477 			}
478 			c = s.read();
479 			expect(s, '>');
480 		}
481 		else
482 		if (c=='/')
483 			throw new XmlParseException("Unexpected close tag");
484 		else
485 		{
486 			node.type = XmlNodeType.Node;
487 			node.tag = c~readWord(s);
488 			while (true)
489 			{
490 				skipWhitespace(s);
491 				c = peek(s);
492 				if (c=='>' || c=='/')
493 					break;
494 				readAttribute!Config(node, s);
495 			}
496 			c = s.read();
497 
498 			auto closeMode = Config.nodeCloseMode(node.tag);
499 			if (closeMode == NodeCloseMode.never)
500 				enforce!XmlParseException(c=='>', "Self-closing void tag <%s>".format(node.tag));
501 			else
502 			if (closeMode == NodeCloseMode.implicit)
503 			{
504 				if (c == '/')
505 					expect(s, '>');
506 			}
507 			else
508 			{
509 				if (c=='>')
510 				{
511 					while (true)
512 					{
513 						while (true)
514 						{
515 							skipWhitespace(s);
516 							if (peek(s)=='<' && peek(s, 2)=='/')
517 								break;
518 							try
519 								node.addChild(parseNode!Config(s));
520 							catch (XmlParseException e)
521 								throw new XmlParseException("Error while processing child of "~node.tag, e);
522 						}
523 						expect(s, '<');
524 						expect(s, '/');
525 						auto word = readWord(s);
526 						if (word != node.tag)
527 						{
528 							auto closeMode2 = Config.nodeCloseMode(word);
529 							if (closeMode2 == NodeCloseMode.implicit)
530 							{
531 								auto parent = node.parent;
532 								enforce!XmlParseException(parent, "Top-level close tag for implicitly-closed node </%s>".format(word));
533 								enforce!XmlParseException(parent.children.length, "First-child close tag for implicitly-closed node </%s>".format(word));
534 								enforce!XmlParseException(parent.children[$-1].tag == word, "Non-empty implicitly-closed node <%s>".format(word));
535 								continue;
536 							}
537 							else
538 								enforce!XmlParseException(word == node.tag, "Expected </%s>, not </%s>".format(node.tag, word));
539 						}
540 						expect(s, '>');
541 						break;
542 					}
543 				}
544 				else // '/'
545 					expect(s, '>');
546 			}
547 		}
548 	}
549 	node.endPos = s.position;
550 }
551 
552 private:
553 
554 void readAttribute(Config)(XmlNode node, ref StringStream s)
555 {
556 	string name = readWord(s);
557 	if (name.length==0) throw new XmlParseException("Invalid attribute");
558 	skipWhitespace(s);
559 
560 	static if (Config.optionalParameterValues)
561 	{
562 		if (peek(s) != '=')
563 		{
564 			node.attributes[name] = null;
565 			return;
566 		}
567 	}
568 
569 	expect(s, '=');
570 	skipWhitespace(s);
571 	char delim;
572 	delim = s.read();
573 	if (delim != '\'' && delim != '"')
574 		throw new XmlParseException("Expected ' or \", not %s".format(delim));
575 	string value = readUntil(s, delim);
576 	node.attributes[name] = decodeEntities(value);
577 }
578 
579 char peek(ref StringStream s, int n=1)
580 {
581 	return s.s[s.position + n - 1];
582 }
583 
584 void skipWhitespace(ref StringStream s)
585 {
586 	while (isWhiteChar[s.s.ptr[s.position]])
587 		s.position++;
588 }
589 
590 __gshared bool[256] isWhiteChar, isWordChar;
591 
592 shared static this()
593 {
594 	foreach (c; 0..256)
595 	{
596 		isWhiteChar[c] = isWhite(c);
597 		isWordChar[c] = c=='-' || c=='_' || c==':' || isAlphaNum(c);
598 	}
599 }
600 
601 string readWord(ref StringStream stream)
602 {
603 	auto start = stream.s.ptr + stream.position;
604 	auto end = stream.s.ptr + stream.s.length;
605 	auto p = start;
606 	while (p < end && isWordChar[*p])
607 		p++;
608 	auto len = p-start;
609 	stream.position += len;
610 	return start[0..len];
611 }
612 
613 void expect(ref StringStream s, char c)
614 {
615 	char c2;
616 	c2 = s.read();
617 	enforce!XmlParseException(c==c2, "Expected " ~ c ~ ", got " ~ c2);
618 }
619 
620 string readUntil(ref StringStream s, char until)
621 {
622 	auto start = s.s.ptr + s.position;
623 	auto p = start;
624 	while (*p != until) p++;
625 	auto len = p-start;
626 	s.position += len + 1;
627 	return start[0..len];
628 }
629 
630 unittest
631 {
632 	enum xmlText =
633 		`<?xml version="1.0" encoding="UTF-8"?>` ~
634 		`<quotes>` ~
635 			`<quote author="Alan Perlis">` ~
636 				`When someone says, &quot;I want a programming language in which I need only say what I want done,&quot; give him a lollipop.` ~
637 			`</quote>` ~
638 		`</quotes>`;
639 	auto doc = new XmlDocument(xmlText);
640 	assert(doc.toString() == xmlText);
641 }
642 
643 const dchar[string] entities;
644 /*const*/ string[dchar] entityNames;
645 shared static this()
646 {
647 	entities =
648 	[
649 		"quot" : '\&quot;',
650 		"amp" : '\&amp;',
651 		"lt" : '\&lt;',
652 		"gt" : '\&gt;',
653 
654 		"OElig" : '\&OElig;',
655 		"oelig" : '\&oelig;',
656 		"Scaron" : '\&Scaron;',
657 		"scaron" : '\&scaron;',
658 		"Yuml" : '\&Yuml;',
659 		"circ" : '\&circ;',
660 		"tilde" : '\&tilde;',
661 		"ensp" : '\&ensp;',
662 		"emsp" : '\&emsp;',
663 		"thinsp" : '\&thinsp;',
664 		"zwnj" : '\&zwnj;',
665 		"zwj" : '\&zwj;',
666 		"lrm" : '\&lrm;',
667 		"rlm" : '\&rlm;',
668 		"ndash" : '\&ndash;',
669 		"mdash" : '\&mdash;',
670 		"lsquo" : '\&lsquo;',
671 		"rsquo" : '\&rsquo;',
672 		"sbquo" : '\&sbquo;',
673 		"ldquo" : '\&ldquo;',
674 		"rdquo" : '\&rdquo;',
675 		"bdquo" : '\&bdquo;',
676 		"dagger" : '\&dagger;',
677 		"Dagger" : '\&Dagger;',
678 		"permil" : '\&permil;',
679 		"lsaquo" : '\&lsaquo;',
680 		"rsaquo" : '\&rsaquo;',
681 		"euro" : '\&euro;',
682 
683 		"nbsp" : '\&nbsp;',
684 		"iexcl" : '\&iexcl;',
685 		"cent" : '\&cent;',
686 		"pound" : '\&pound;',
687 		"curren" : '\&curren;',
688 		"yen" : '\&yen;',
689 		"brvbar" : '\&brvbar;',
690 		"sect" : '\&sect;',
691 		"uml" : '\&uml;',
692 		"copy" : '\&copy;',
693 		"ordf" : '\&ordf;',
694 		"laquo" : '\&laquo;',
695 		"not" : '\&not;',
696 		"shy" : '\&shy;',
697 		"reg" : '\&reg;',
698 		"macr" : '\&macr;',
699 		"deg" : '\&deg;',
700 		"plusmn" : '\&plusmn;',
701 		"sup2" : '\&sup2;',
702 		"sup3" : '\&sup3;',
703 		"acute" : '\&acute;',
704 		"micro" : '\&micro;',
705 		"para" : '\&para;',
706 		"middot" : '\&middot;',
707 		"cedil" : '\&cedil;',
708 		"sup1" : '\&sup1;',
709 		"ordm" : '\&ordm;',
710 		"raquo" : '\&raquo;',
711 		"frac14" : '\&frac14;',
712 		"frac12" : '\&frac12;',
713 		"frac34" : '\&frac34;',
714 		"iquest" : '\&iquest;',
715 		"Agrave" : '\&Agrave;',
716 		"Aacute" : '\&Aacute;',
717 		"Acirc" : '\&Acirc;',
718 		"Atilde" : '\&Atilde;',
719 		"Auml" : '\&Auml;',
720 		"Aring" : '\&Aring;',
721 		"AElig" : '\&AElig;',
722 		"Ccedil" : '\&Ccedil;',
723 		"Egrave" : '\&Egrave;',
724 		"Eacute" : '\&Eacute;',
725 		"Ecirc" : '\&Ecirc;',
726 		"Euml" : '\&Euml;',
727 		"Igrave" : '\&Igrave;',
728 		"Iacute" : '\&Iacute;',
729 		"Icirc" : '\&Icirc;',
730 		"Iuml" : '\&Iuml;',
731 		"ETH" : '\&ETH;',
732 		"Ntilde" : '\&Ntilde;',
733 		"Ograve" : '\&Ograve;',
734 		"Oacute" : '\&Oacute;',
735 		"Ocirc" : '\&Ocirc;',
736 		"Otilde" : '\&Otilde;',
737 		"Ouml" : '\&Ouml;',
738 		"times" : '\&times;',
739 		"Oslash" : '\&Oslash;',
740 		"Ugrave" : '\&Ugrave;',
741 		"Uacute" : '\&Uacute;',
742 		"Ucirc" : '\&Ucirc;',
743 		"Uuml" : '\&Uuml;',
744 		"Yacute" : '\&Yacute;',
745 		"THORN" : '\&THORN;',
746 		"szlig" : '\&szlig;',
747 		"agrave" : '\&agrave;',
748 		"aacute" : '\&aacute;',
749 		"acirc" : '\&acirc;',
750 		"atilde" : '\&atilde;',
751 		"auml" : '\&auml;',
752 		"aring" : '\&aring;',
753 		"aelig" : '\&aelig;',
754 		"ccedil" : '\&ccedil;',
755 		"egrave" : '\&egrave;',
756 		"eacute" : '\&eacute;',
757 		"ecirc" : '\&ecirc;',
758 		"euml" : '\&euml;',
759 		"igrave" : '\&igrave;',
760 		"iacute" : '\&iacute;',
761 		"icirc" : '\&icirc;',
762 		"iuml" : '\&iuml;',
763 		"eth" : '\&eth;',
764 		"ntilde" : '\&ntilde;',
765 		"ograve" : '\&ograve;',
766 		"oacute" : '\&oacute;',
767 		"ocirc" : '\&ocirc;',
768 		"otilde" : '\&otilde;',
769 		"ouml" : '\&ouml;',
770 		"divide" : '\&divide;',
771 		"oslash" : '\&oslash;',
772 		"ugrave" : '\&ugrave;',
773 		"uacute" : '\&uacute;',
774 		"ucirc" : '\&ucirc;',
775 		"uuml" : '\&uuml;',
776 		"yacute" : '\&yacute;',
777 		"thorn" : '\&thorn;',
778 		"yuml" : '\&yuml;',
779 
780 		"fnof" : '\&fnof;',
781 		"Alpha" : '\&Alpha;',
782 		"Beta" : '\&Beta;',
783 		"Gamma" : '\&Gamma;',
784 		"Delta" : '\&Delta;',
785 		"Epsilon" : '\&Epsilon;',
786 		"Zeta" : '\&Zeta;',
787 		"Eta" : '\&Eta;',
788 		"Theta" : '\&Theta;',
789 		"Iota" : '\&Iota;',
790 		"Kappa" : '\&Kappa;',
791 		"Lambda" : '\&Lambda;',
792 		"Mu" : '\&Mu;',
793 		"Nu" : '\&Nu;',
794 		"Xi" : '\&Xi;',
795 		"Omicron" : '\&Omicron;',
796 		"Pi" : '\&Pi;',
797 		"Rho" : '\&Rho;',
798 		"Sigma" : '\&Sigma;',
799 		"Tau" : '\&Tau;',
800 		"Upsilon" : '\&Upsilon;',
801 		"Phi" : '\&Phi;',
802 		"Chi" : '\&Chi;',
803 		"Psi" : '\&Psi;',
804 		"Omega" : '\&Omega;',
805 		"alpha" : '\&alpha;',
806 		"beta" : '\&beta;',
807 		"gamma" : '\&gamma;',
808 		"delta" : '\&delta;',
809 		"epsilon" : '\&epsilon;',
810 		"zeta" : '\&zeta;',
811 		"eta" : '\&eta;',
812 		"theta" : '\&theta;',
813 		"iota" : '\&iota;',
814 		"kappa" : '\&kappa;',
815 		"lambda" : '\&lambda;',
816 		"mu" : '\&mu;',
817 		"nu" : '\&nu;',
818 		"xi" : '\&xi;',
819 		"omicron" : '\&omicron;',
820 		"pi" : '\&pi;',
821 		"rho" : '\&rho;',
822 		"sigmaf" : '\&sigmaf;',
823 		"sigma" : '\&sigma;',
824 		"tau" : '\&tau;',
825 		"upsilon" : '\&upsilon;',
826 		"phi" : '\&phi;',
827 		"chi" : '\&chi;',
828 		"psi" : '\&psi;',
829 		"omega" : '\&omega;',
830 		"thetasym" : '\&thetasym;',
831 		"upsih" : '\&upsih;',
832 		"piv" : '\&piv;',
833 		"bull" : '\&bull;',
834 		"hellip" : '\&hellip;',
835 		"prime" : '\&prime;',
836 		"Prime" : '\&Prime;',
837 		"oline" : '\&oline;',
838 		"frasl" : '\&frasl;',
839 		"weierp" : '\&weierp;',
840 		"image" : '\&image;',
841 		"real" : '\&real;',
842 		"trade" : '\&trade;',
843 		"alefsym" : '\&alefsym;',
844 		"larr" : '\&larr;',
845 		"uarr" : '\&uarr;',
846 		"rarr" : '\&rarr;',
847 		"darr" : '\&darr;',
848 		"harr" : '\&harr;',
849 		"crarr" : '\&crarr;',
850 		"lArr" : '\&lArr;',
851 		"uArr" : '\&uArr;',
852 		"rArr" : '\&rArr;',
853 		"dArr" : '\&dArr;',
854 		"hArr" : '\&hArr;',
855 		"forall" : '\&forall;',
856 		"part" : '\&part;',
857 		"exist" : '\&exist;',
858 		"empty" : '\&empty;',
859 		"nabla" : '\&nabla;',
860 		"isin" : '\&isin;',
861 		"notin" : '\&notin;',
862 		"ni" : '\&ni;',
863 		"prod" : '\&prod;',
864 		"sum" : '\&sum;',
865 		"minus" : '\&minus;',
866 		"lowast" : '\&lowast;',
867 		"radic" : '\&radic;',
868 		"prop" : '\&prop;',
869 		"infin" : '\&infin;',
870 		"ang" : '\&ang;',
871 		"and" : '\&and;',
872 		"or" : '\&or;',
873 		"cap" : '\&cap;',
874 		"cup" : '\&cup;',
875 		"int" : '\&int;',
876 		"there4" : '\&there4;',
877 		"sim" : '\&sim;',
878 		"cong" : '\&cong;',
879 		"asymp" : '\&asymp;',
880 		"ne" : '\&ne;',
881 		"equiv" : '\&equiv;',
882 		"le" : '\&le;',
883 		"ge" : '\&ge;',
884 		"sub" : '\&sub;',
885 		"sup" : '\&sup;',
886 		"nsub" : '\&nsub;',
887 		"sube" : '\&sube;',
888 		"supe" : '\&supe;',
889 		"oplus" : '\&oplus;',
890 		"otimes" : '\&otimes;',
891 		"perp" : '\&perp;',
892 		"sdot" : '\&sdot;',
893 		"lceil" : '\&lceil;',
894 		"rceil" : '\&rceil;',
895 		"lfloor" : '\&lfloor;',
896 		"rfloor" : '\&rfloor;',
897 		"loz" : '\&loz;',
898 		"spades" : '\&spades;',
899 		"clubs" : '\&clubs;',
900 		"hearts" : '\&hearts;',
901 		"diams" : '\&diams;',
902 		"lang" : '\&lang;',
903 		"rang" : '\&rang;',
904 
905 		"apos"  : '\''
906 	];
907 	foreach (name, c; entities)
908 		entityNames[c] = name;
909 }
910 
911 import core.stdc.stdio;
912 import std.utf;
913 import ae.utils.textout;
914 
915 public string encodeEntities(string str)
916 {
917 	foreach (i, c; str)
918 		if (c=='<' || c=='>' || c=='"' || c=='\'' || c=='&')
919 		{
920 			StringBuilder sb;
921 			sb.preallocate(str.length * 11 / 10);
922 			sb.put(str[0..i]);
923 			sb.putEncodedEntities(str[i..$]);
924 			return sb.get();
925 		}
926 	return str;
927 }
928 
929 public void putEncodedEntities(Sink, S)(ref Sink sink, S str)
930 {
931 	size_t start = 0;
932 	foreach (i, c; str)
933 		if (c=='<' || c=='>' || c=='"' || c=='\'' || c=='&')
934 		{
935 			sink.put(str[start..i], '&', entityNames[c], ';');
936 			start = i+1;
937 		}
938 	sink.put(str[start..$]);
939 }
940 
941 public string encodeAllEntities(string str)
942 {
943 	// TODO: optimize
944 	foreach_reverse (i, dchar c; str)
945 	{
946 		auto name = c in entityNames;
947 		if (name)
948 			str = str[0..i] ~ '&' ~ *name ~ ';' ~ str[i+stride(str,i)..$];
949 	}
950 	return str;
951 }
952 
953 import ae.utils.text;
954 import std.conv;
955 
956 public string decodeEntities(string str)
957 {
958 	auto fragments = str.fastSplit('&');
959 	if (fragments.length <= 1)
960 		return str;
961 
962 	auto interleaved = new string[fragments.length*2 - 1];
963 	auto buffers = new char[4][fragments.length-1];
964 	interleaved[0] = fragments[0];
965 
966 	foreach (n, fragment; fragments[1..$])
967 	{
968 		auto p = fragment.indexOf(';');
969 		enforce!XmlParseException(p>0, "Invalid entity (unescaped ampersand?)");
970 
971 		dchar c;
972 		if (fragment[0]=='#')
973 		{
974 			if (fragment[1]=='x')
975 				c = fromHex!uint(fragment[2..p]);
976 			else
977 				c = to!uint(fragment[1..p]);
978 		}
979 		else
980 		{
981 			auto pentity = fragment[0..p] in entities;
982 			enforce!XmlParseException(pentity, "Unknown entity: " ~ fragment[0..p]);
983 			c = *pentity;
984 		}
985 
986 		interleaved[1+n*2] = cast(string) buffers[n][0..std.utf.encode(buffers[n], c)];
987 		interleaved[2+n*2] = fragment[p+1..$];
988 	}
989 
990 	return interleaved.join();
991 }
992 
993 deprecated alias decodeEntities convertEntities;
994 
995 unittest
996 {
997 	assert(encodeEntities(`The <Smith & Wesson> "lock'n'load"`) == `The &lt;Smith &amp; Wesson&gt; &quot;lock&apos;n&apos;load&quot;`);
998 	assert(encodeAllEntities("©,€") == "&copy;,&euro;");
999 	assert(decodeEntities("&copy;,&euro;") == "©,€");
1000 }