1 /**
2  * Light read-only XML library
3  * May be deprecated in the future.
4  * See other XML modules for better implementations.
5  *
6  * License:
7  *   This Source Code Form is subject to the terms of
8  *   the Mozilla Public License, v. 2.0. If a copy of
9  *   the MPL was not distributed with this file, You
10  *   can obtain one at http://mozilla.org/MPL/2.0/.
11  *
12  * Authors:
13  *   Vladimir Panteleev <vladimir@thecybershadow.net>
14  *   Simon Arlott
15  */
16 
17 module ae.utils.xmllite;
18 
19 // TODO: better/safer handling of malformed XML
20 
21 import std.string;
22 import std.ascii;
23 import std.exception;
24 
25 import ae.utils.array;
26 import ae.utils.exception;
27 import ae.utils.xmlwriter;
28 
29 // ************************************************************************
30 
31 /// std.stream.Stream-like type with bonus speed
32 private struct StringStream
33 {
34 	string s;
35 	size_t position;
36 
37 	@disable this();
38 	@disable this(this);
39 	this(string s)
40 	{
41 		enum ditch = "'\">\0\0\0\0\0"; // Dirty precaution
42 		this.s = (s ~ ditch)[0..$-ditch.length];
43 	}
44 
45 	char read() { return s[position++]; }
46 	@property size_t size() { return s.length; }
47 }
48 
49 // ************************************************************************
50 
51 mixin DeclareException!q{XmlParseException};
52 
53 enum XmlNodeType
54 {
55 	None,
56 	Root,
57 	Node,
58 	Comment,
59 	Meta,
60 	DocType,
61 	CData,
62 	Text
63 }
64 
65 class XmlNode
66 {
67 	string tag;
68 	string[string] attributes;
69 	XmlNode parent;
70 	XmlNode[] children;
71 	XmlNodeType type;
72 	ulong startPos, endPos;
73 
74 	this(ref StringStream s) { parseInto!XmlParseConfig(this, s); }
75 	this(string s) { auto ss = StringStream(s); this(ss); }
76 
77 	this(XmlNodeType type = XmlNodeType.None, string tag = null)
78 	{
79 		this.type = type;
80 		this.tag = tag;
81 	}
82 
83 	XmlNode addAttribute(string name, string value)
84 	{
85 		attributes[name] = value;
86 		return this;
87 	}
88 
89 	XmlNode addChild(XmlNode child)
90 	{
91 		child.parent = this;
92 		children ~= child;
93 		return this;
94 	}
95 
96 	override string toString() const
97 	{
98 		XmlWriter writer;
99 		writeTo(writer);
100 		return writer.output.get();
101 	}
102 
103 	final void writeTo(XmlWriter)(ref XmlWriter output) const
104 	{
105 		void writeChildren()
106 		{
107 			foreach (child; children)
108 				child.writeTo(output);
109 		}
110 
111 		void writeAttributes()
112 		{
113 			foreach (key, value; attributes)
114 				output.addAttribute(key, value);
115 		}
116 
117 		final switch (type)
118 		{
119 			case XmlNodeType.None:
120 				assert(false);
121 			case XmlNodeType.Root:
122 				writeChildren();
123 				return;
124 			case XmlNodeType.Node:
125 				output.startTagWithAttributes(tag);
126 				writeAttributes();
127 				output.endAttributes();
128 				writeChildren();
129 				output.endTag(tag);
130 				return;
131 			case XmlNodeType.Meta:
132 				assert(children.length == 0);
133 				output.startPI(tag);
134 				writeAttributes();
135 				output.endPI();
136 				return;
137 			case XmlNodeType.DocType:
138 				assert(children.length == 0);
139 				output.doctype(tag);
140 				return;
141 			case XmlNodeType.Text:
142 				output.text(tag);
143 				return;
144 			case XmlNodeType.Comment:
145 				// TODO
146 				return;
147 			case XmlNodeType.CData:
148 				output.text(tag);
149 				return;
150 		}
151 	}
152 
153 	@property string text()
154 	{
155 		final switch (type)
156 		{
157 			case XmlNodeType.None:
158 				assert(false);
159 			case XmlNodeType.Text:
160 			case XmlNodeType.CData:
161 				return tag;
162 			case XmlNodeType.Node:
163 			case XmlNodeType.Root:
164 				string result;
165 				if (tag == "br")
166 					result = "\n";
167 				foreach (child; children)
168 					result ~= child.text();
169 				return result;
170 			case XmlNodeType.Comment:
171 			case XmlNodeType.Meta:
172 			case XmlNodeType.DocType:
173 				return null;
174 		}
175 	}
176 
177 	final XmlNode findChild(string tag)
178 	{
179 		foreach (child; children)
180 			if (child.type == XmlNodeType.Node && child.tag == tag)
181 				return child;
182 		return null;
183 	}
184 
185 	final XmlNode[] findChildren(string tag)
186 	{
187 		XmlNode[] result;
188 		foreach (child; children)
189 			if (child.type == XmlNodeType.Node && child.tag == tag)
190 				result ~= child;
191 		return result;
192 	}
193 
194 	final XmlNode opIndex(string tag)
195 	{
196 		auto node = findChild(tag);
197 		if (node is null)
198 			throw new XmlParseException("No such child: " ~ tag);
199 		return node;
200 	}
201 
202 	final XmlNode opIndex(string tag, size_t index)
203 	{
204 		auto nodes = findChildren(tag);
205 		if (index >= nodes.length)
206 			throw new XmlParseException(format("Can't get node with tag %s and index %d, there are only %d children with that tag", tag, index, nodes.length));
207 		return nodes[index];
208 	}
209 
210 	final XmlNode opIndex(size_t index)
211 	{
212 		return children[index];
213 	}
214 
215 	final @property size_t length() { return children.length; }
216 
217 	int opApply(int delegate(ref XmlNode) dg)
218 	{
219 		int result = 0;
220 
221 		for (int i = 0; i < children.length; i++)
222 		{
223 			result = dg(children[i]);
224 			if (result)
225 				break;
226 		}
227 		return result;
228 	}
229 
230 	final @property XmlNode dup()
231 	{
232 		auto result = new XmlNode(type, tag);
233 		result.attributes = attributes.dup;
234 		result.children.reserve(children.length);
235 		foreach (child; children)
236 			result.addChild(child.dup);
237 		return result;
238 	}
239 }
240 
241 class XmlDocument : XmlNode
242 {
243 	this()
244 	{
245 		super(XmlNodeType.Root);
246 		tag = "<Root>";
247 	}
248 
249 	this(ref StringStream s) { this(); parseInto!XmlParseConfig(this, s); }
250 	this(string s) { auto ss = StringStream(s); this(ss); }
251 }
252 
253 /// The logic for how to handle a node's closing tags.
254 enum NodeCloseMode
255 {
256 	/// This element must always have an explicit closing tag
257 	/// (or a self-closing tag). An unclosed tag will lead to
258 	/// a parse error.
259 	/// In XML, all tags are "always".
260 	always,
261 /*
262 	/// Close tags are optional. When an element with a tag is
263 	/// encountered directly under an element with the same tag,
264 	/// it is assumed that the first element is closed before
265 	/// the second, so the two are siblings, not parent/child.
266 	/// Thus, `<p>a<p>b</p>` is parsed as `<p>a</p><p>b</p>`,
267 	/// not `<p>a<p>b</p></p>`, however `<p>a<div><p>b</div>` is
268 	/// still parsed as `<p>a<div><p>b</p></div></p>`.
269 	/// This mode can be used for relaxed HTML parsing.
270 	optional,
271 */
272 	/// Close tags are optional, but are implied when absent.
273 	/// As a result, these elements cannot have any content,
274 	/// and any close tags must be adjacent to the open tag.
275 	implicit,
276 
277 	/// This element is void and must never have a closing tag.
278 	/// It is always implicitly closed right after opening.
279 	/// A close tag is always an error.
280 	/// This mode can be used for strict parsing of HTML5 void
281 	/// elements.
282 	never,
283 }
284 
285 /// Configuration for parsing XML.
286 struct XmlParseConfig
287 {
288 static:
289 	NodeCloseMode nodeCloseMode(string tag) { return NodeCloseMode.always; }
290 	enum optionalParameterValues = false;
291 }
292 
293 /// Configuration for strict parsing of HTML5.
294 /// All void tags must never be closed, and all
295 /// non-void tags must always be explicitly closed.
296 /// Attributes must still be quoted like in XML.
297 struct Html5StrictParseConfig
298 {
299 static:
300 	immutable voidElements = [
301 		"area"   , "base"  , "br"   , "col" ,
302 		"command", "embed" , "hr"   , "img" ,
303 		"input"  , "keygen", "link" , "meta",
304 		"param"  , "source", "track", "wbr" ,
305 	];
306 
307 	NodeCloseMode nodeCloseMode(string tag)
308 	{
309 		return tag.isOneOf(voidElements)
310 			? NodeCloseMode.never
311 			: NodeCloseMode.always
312 		;
313 	}
314 
315 	enum optionalParameterValues = true;
316 }
317 
318 /// Parse an SGML-ish string into an XmlNode
319 alias parse = parseString!XmlNode;
320 
321 /// Parse an SGML-ish StringStream into an XmlDocument
322 alias parseDocument = parseString!XmlDocument;
323 
324 alias xmlParse = parseDocument!XmlParseConfig;
325 
326 private:
327 
328 public // alias
329 template parseString(Node)
330 {
331 	Node parseString(Config)(string s)
332 	{
333 		auto ss = StringStream(s);
334 		alias f = parseStream!Node;
335 		return f!Config(ss);
336 	}
337 }
338 
339 template parseStream(Node)
340 {
341 	Node parseStream(Config)(ref StringStream s)
342 	{
343 		auto n = new Node;
344 		parseInto!Config(n, s);
345 		return n;
346 	}
347 }
348 
349 alias parseNode = parseStream!XmlNode;
350 
351 /// Parse an SGML-ish StringStream into an XmlDocument
352 void parseInto(Config)(XmlDocument d, ref StringStream s)
353 {
354 	skipWhitespace(s);
355 	while (s.position < s.size)
356 		try
357 		{
358 			auto n = new XmlNode;
359 			parseInto!Config(n, s);
360 			d.addChild(n);
361 			skipWhitespace(s);
362 		}
363 		catch (XmlParseException e)
364 		{
365 			import std.algorithm.searching;
366 			import std.range : retro;
367 
368 			auto head = s.s[0..s.position];
369 			auto row    = head.representation.count('\n');
370 			auto column = head.representation.retro.countUntil('\n');
371 			if (column < 0)
372 				column = head.length;
373 			throw new XmlParseException("Error at %d:%d (offset %d)".format(
374 				1 + row,
375 				1 + column,
376 				head.length,
377 			), e);
378 		}
379 }
380 
381 /// Parse an SGML-ish StringStream into an XmlNode
382 void parseInto(Config)(XmlNode node, ref StringStream s)
383 {
384 	node.startPos = s.position;
385 	char c;
386 	do
387 		c = s.read();
388 	while (isWhiteChar[c]);
389 
390 	if (c!='<')  // text node
391 	{
392 		node.type = XmlNodeType.Text;
393 		string text;
394 		while (c!='<')
395 		{
396 			// TODO: check for EOF
397 			text ~= c;
398 			c = s.read();
399 		}
400 		s.position--; // rewind to '<'
401 		node.tag = decodeEntities(text);
402 		//tag = tag.strip();
403 	}
404 	else
405 	{
406 		c = s.read();
407 		if (c=='!')
408 		{
409 			c = s.read();
410 			if (c == '-') // comment
411 			{
412 				expect(s, '-');
413 				node.type = XmlNodeType.Comment;
414 				string tag;
415 				do
416 				{
417 					c = s.read();
418 					tag ~= c;
419 				} while (tag.length<3 || tag[$-3..$] != "-->");
420 				tag = tag[0..$-3];
421 				node.tag = tag;
422 			}
423 			else
424 			if (c == '[') // CDATA
425 			{
426 				foreach (x; "CDATA[")
427 					expect(s, x);
428 				node.type = XmlNodeType.CData;
429 				string tag;
430 				do
431 				{
432 					c = s.read();
433 					tag ~= c;
434 				} while (tag.length<3 || tag[$-3..$] != "]]>");
435 				tag = tag[0..$-3];
436 				node.tag = tag;
437 			}
438 			else // doctype, etc.
439 			{
440 				node.type = XmlNodeType.DocType;
441 				while (c != '>')
442 				{
443 					node.tag ~= c;
444 					c = s.read();
445 				}
446 			}
447 		}
448 		else
449 		if (c=='?')
450 		{
451 			node.type = XmlNodeType.Meta;
452 			node.tag = readWord(s);
453 			if (node.tag.length==0) throw new XmlParseException("Invalid tag");
454 			while (true)
455 			{
456 				skipWhitespace(s);
457 				if (peek(s)=='?')
458 					break;
459 				readAttribute!Config(node, s);
460 			}
461 			c = s.read();
462 			expect(s, '>');
463 		}
464 		else
465 		if (c=='/')
466 			throw new XmlParseException("Unexpected close tag");
467 		else
468 		{
469 			node.type = XmlNodeType.Node;
470 			node.tag = c~readWord(s);
471 			while (true)
472 			{
473 				skipWhitespace(s);
474 				c = peek(s);
475 				if (c=='>' || c=='/')
476 					break;
477 				readAttribute!Config(node, s);
478 			}
479 			c = s.read();
480 
481 			auto closeMode = Config.nodeCloseMode(node.tag);
482 			if (closeMode == NodeCloseMode.never)
483 				enforce!XmlParseException(c=='>', "Self-closing void tag <%s>".format(node.tag));
484 			else
485 			if (closeMode == NodeCloseMode.implicit)
486 			{
487 				if (c == '/')
488 					expect(s, '>');
489 			}
490 			else
491 			{
492 				if (c=='>')
493 				{
494 					while (true)
495 					{
496 						while (true)
497 						{
498 							skipWhitespace(s);
499 							if (peek(s)=='<' && peek(s, 2)=='/')
500 								break;
501 							try
502 								node.addChild(parseNode!Config(s));
503 							catch (XmlParseException e)
504 								throw new XmlParseException("Error while processing child of "~node.tag, e);
505 						}
506 						expect(s, '<');
507 						expect(s, '/');
508 						auto word = readWord(s);
509 						if (word != node.tag)
510 						{
511 							auto closeMode2 = Config.nodeCloseMode(word);
512 							if (closeMode2 == NodeCloseMode.implicit)
513 							{
514 								auto parent = node.parent;
515 								enforce!XmlParseException(parent, "Top-level close tag for implicitly-closed node </%s>".format(word));
516 								enforce!XmlParseException(parent.children.length, "First-child close tag for implicitly-closed node </%s>".format(word));
517 								enforce!XmlParseException(parent.children[$-1].tag == word, "Non-empty implicitly-closed node <%s>".format(word));
518 								continue;
519 							}
520 							else
521 								enforce!XmlParseException(word == node.tag, "Expected </%s>, not </%s>".format(node.tag, word));
522 						}
523 						expect(s, '>');
524 						break;
525 					}
526 				}
527 				else // '/'
528 					expect(s, '>');
529 			}
530 		}
531 	}
532 	node.endPos = s.position;
533 }
534 
535 private:
536 
537 void readAttribute(Config)(XmlNode node, ref StringStream s)
538 {
539 	string name = readWord(s);
540 	if (name.length==0) throw new XmlParseException("Invalid attribute");
541 	skipWhitespace(s);
542 
543 	static if (Config.optionalParameterValues)
544 	{
545 		if (peek(s) != '=')
546 		{
547 			node.attributes[name] = null;
548 			return;
549 		}
550 	}
551 
552 	expect(s, '=');
553 	skipWhitespace(s);
554 	char delim;
555 	delim = s.read();
556 	if (delim != '\'' && delim != '"')
557 		throw new XmlParseException("Expected ' or \", not %s".format(delim));
558 	string value = readUntil(s, delim);
559 	node.attributes[name] = decodeEntities(value);
560 }
561 
562 char peek(ref StringStream s, int n=1)
563 {
564 	return s.s[s.position + n - 1];
565 }
566 
567 void skipWhitespace(ref StringStream s)
568 {
569 	while (isWhiteChar[s.s.ptr[s.position]])
570 		s.position++;
571 }
572 
573 __gshared bool[256] isWhiteChar, isWordChar;
574 
575 shared static this()
576 {
577 	foreach (c; 0..256)
578 	{
579 		isWhiteChar[c] = isWhite(c);
580 		isWordChar[c] = c=='-' || c=='_' || c==':' || isAlphaNum(c);
581 	}
582 }
583 
584 string readWord(ref StringStream stream)
585 {
586 	auto start = stream.s.ptr + stream.position;
587 	auto end = stream.s.ptr + stream.s.length;
588 	auto p = start;
589 	while (p < end && isWordChar[*p])
590 		p++;
591 	auto len = p-start;
592 	stream.position += len;
593 	return start[0..len];
594 }
595 
596 void expect(ref StringStream s, char c)
597 {
598 	char c2;
599 	c2 = s.read();
600 	enforce!XmlParseException(c==c2, "Expected " ~ c ~ ", got " ~ c2);
601 }
602 
603 string readUntil(ref StringStream s, char until)
604 {
605 	auto start = s.s.ptr + s.position;
606 	auto p = start;
607 	while (*p != until) p++;
608 	auto len = p-start;
609 	s.position += len + 1;
610 	return start[0..len];
611 }
612 
613 unittest
614 {
615 	enum xmlText =
616 		`<?xml version="1.0" encoding="UTF-8"?>`
617 		`<quotes>`
618 			`<quote author="Alan Perlis">`
619 				`When someone says, &quot;I want a programming language in which I need only say what I want done,&quot; give him a lollipop.`
620 			`</quote>`
621 		`</quotes>`;
622 	auto doc = new XmlDocument(xmlText);
623 	assert(doc.toString() == xmlText);
624 }
625 
626 const dchar[string] entities;
627 /*const*/ string[dchar] entityNames;
628 shared static this()
629 {
630 	entities =
631 	[
632 		"quot" : '\&quot;',
633 		"amp" : '\&amp;',
634 		"lt" : '\&lt;',
635 		"gt" : '\&gt;',
636 
637 		"OElig" : '\&OElig;',
638 		"oelig" : '\&oelig;',
639 		"Scaron" : '\&Scaron;',
640 		"scaron" : '\&scaron;',
641 		"Yuml" : '\&Yuml;',
642 		"circ" : '\&circ;',
643 		"tilde" : '\&tilde;',
644 		"ensp" : '\&ensp;',
645 		"emsp" : '\&emsp;',
646 		"thinsp" : '\&thinsp;',
647 		"zwnj" : '\&zwnj;',
648 		"zwj" : '\&zwj;',
649 		"lrm" : '\&lrm;',
650 		"rlm" : '\&rlm;',
651 		"ndash" : '\&ndash;',
652 		"mdash" : '\&mdash;',
653 		"lsquo" : '\&lsquo;',
654 		"rsquo" : '\&rsquo;',
655 		"sbquo" : '\&sbquo;',
656 		"ldquo" : '\&ldquo;',
657 		"rdquo" : '\&rdquo;',
658 		"bdquo" : '\&bdquo;',
659 		"dagger" : '\&dagger;',
660 		"Dagger" : '\&Dagger;',
661 		"permil" : '\&permil;',
662 		"lsaquo" : '\&lsaquo;',
663 		"rsaquo" : '\&rsaquo;',
664 		"euro" : '\&euro;',
665 
666 		"nbsp" : '\&nbsp;',
667 		"iexcl" : '\&iexcl;',
668 		"cent" : '\&cent;',
669 		"pound" : '\&pound;',
670 		"curren" : '\&curren;',
671 		"yen" : '\&yen;',
672 		"brvbar" : '\&brvbar;',
673 		"sect" : '\&sect;',
674 		"uml" : '\&uml;',
675 		"copy" : '\&copy;',
676 		"ordf" : '\&ordf;',
677 		"laquo" : '\&laquo;',
678 		"not" : '\&not;',
679 		"shy" : '\&shy;',
680 		"reg" : '\&reg;',
681 		"macr" : '\&macr;',
682 		"deg" : '\&deg;',
683 		"plusmn" : '\&plusmn;',
684 		"sup2" : '\&sup2;',
685 		"sup3" : '\&sup3;',
686 		"acute" : '\&acute;',
687 		"micro" : '\&micro;',
688 		"para" : '\&para;',
689 		"middot" : '\&middot;',
690 		"cedil" : '\&cedil;',
691 		"sup1" : '\&sup1;',
692 		"ordm" : '\&ordm;',
693 		"raquo" : '\&raquo;',
694 		"frac14" : '\&frac14;',
695 		"frac12" : '\&frac12;',
696 		"frac34" : '\&frac34;',
697 		"iquest" : '\&iquest;',
698 		"Agrave" : '\&Agrave;',
699 		"Aacute" : '\&Aacute;',
700 		"Acirc" : '\&Acirc;',
701 		"Atilde" : '\&Atilde;',
702 		"Auml" : '\&Auml;',
703 		"Aring" : '\&Aring;',
704 		"AElig" : '\&AElig;',
705 		"Ccedil" : '\&Ccedil;',
706 		"Egrave" : '\&Egrave;',
707 		"Eacute" : '\&Eacute;',
708 		"Ecirc" : '\&Ecirc;',
709 		"Euml" : '\&Euml;',
710 		"Igrave" : '\&Igrave;',
711 		"Iacute" : '\&Iacute;',
712 		"Icirc" : '\&Icirc;',
713 		"Iuml" : '\&Iuml;',
714 		"ETH" : '\&ETH;',
715 		"Ntilde" : '\&Ntilde;',
716 		"Ograve" : '\&Ograve;',
717 		"Oacute" : '\&Oacute;',
718 		"Ocirc" : '\&Ocirc;',
719 		"Otilde" : '\&Otilde;',
720 		"Ouml" : '\&Ouml;',
721 		"times" : '\&times;',
722 		"Oslash" : '\&Oslash;',
723 		"Ugrave" : '\&Ugrave;',
724 		"Uacute" : '\&Uacute;',
725 		"Ucirc" : '\&Ucirc;',
726 		"Uuml" : '\&Uuml;',
727 		"Yacute" : '\&Yacute;',
728 		"THORN" : '\&THORN;',
729 		"szlig" : '\&szlig;',
730 		"agrave" : '\&agrave;',
731 		"aacute" : '\&aacute;',
732 		"acirc" : '\&acirc;',
733 		"atilde" : '\&atilde;',
734 		"auml" : '\&auml;',
735 		"aring" : '\&aring;',
736 		"aelig" : '\&aelig;',
737 		"ccedil" : '\&ccedil;',
738 		"egrave" : '\&egrave;',
739 		"eacute" : '\&eacute;',
740 		"ecirc" : '\&ecirc;',
741 		"euml" : '\&euml;',
742 		"igrave" : '\&igrave;',
743 		"iacute" : '\&iacute;',
744 		"icirc" : '\&icirc;',
745 		"iuml" : '\&iuml;',
746 		"eth" : '\&eth;',
747 		"ntilde" : '\&ntilde;',
748 		"ograve" : '\&ograve;',
749 		"oacute" : '\&oacute;',
750 		"ocirc" : '\&ocirc;',
751 		"otilde" : '\&otilde;',
752 		"ouml" : '\&ouml;',
753 		"divide" : '\&divide;',
754 		"oslash" : '\&oslash;',
755 		"ugrave" : '\&ugrave;',
756 		"uacute" : '\&uacute;',
757 		"ucirc" : '\&ucirc;',
758 		"uuml" : '\&uuml;',
759 		"yacute" : '\&yacute;',
760 		"thorn" : '\&thorn;',
761 		"yuml" : '\&yuml;',
762 
763 		"fnof" : '\&fnof;',
764 		"Alpha" : '\&Alpha;',
765 		"Beta" : '\&Beta;',
766 		"Gamma" : '\&Gamma;',
767 		"Delta" : '\&Delta;',
768 		"Epsilon" : '\&Epsilon;',
769 		"Zeta" : '\&Zeta;',
770 		"Eta" : '\&Eta;',
771 		"Theta" : '\&Theta;',
772 		"Iota" : '\&Iota;',
773 		"Kappa" : '\&Kappa;',
774 		"Lambda" : '\&Lambda;',
775 		"Mu" : '\&Mu;',
776 		"Nu" : '\&Nu;',
777 		"Xi" : '\&Xi;',
778 		"Omicron" : '\&Omicron;',
779 		"Pi" : '\&Pi;',
780 		"Rho" : '\&Rho;',
781 		"Sigma" : '\&Sigma;',
782 		"Tau" : '\&Tau;',
783 		"Upsilon" : '\&Upsilon;',
784 		"Phi" : '\&Phi;',
785 		"Chi" : '\&Chi;',
786 		"Psi" : '\&Psi;',
787 		"Omega" : '\&Omega;',
788 		"alpha" : '\&alpha;',
789 		"beta" : '\&beta;',
790 		"gamma" : '\&gamma;',
791 		"delta" : '\&delta;',
792 		"epsilon" : '\&epsilon;',
793 		"zeta" : '\&zeta;',
794 		"eta" : '\&eta;',
795 		"theta" : '\&theta;',
796 		"iota" : '\&iota;',
797 		"kappa" : '\&kappa;',
798 		"lambda" : '\&lambda;',
799 		"mu" : '\&mu;',
800 		"nu" : '\&nu;',
801 		"xi" : '\&xi;',
802 		"omicron" : '\&omicron;',
803 		"pi" : '\&pi;',
804 		"rho" : '\&rho;',
805 		"sigmaf" : '\&sigmaf;',
806 		"sigma" : '\&sigma;',
807 		"tau" : '\&tau;',
808 		"upsilon" : '\&upsilon;',
809 		"phi" : '\&phi;',
810 		"chi" : '\&chi;',
811 		"psi" : '\&psi;',
812 		"omega" : '\&omega;',
813 		"thetasym" : '\&thetasym;',
814 		"upsih" : '\&upsih;',
815 		"piv" : '\&piv;',
816 		"bull" : '\&bull;',
817 		"hellip" : '\&hellip;',
818 		"prime" : '\&prime;',
819 		"Prime" : '\&Prime;',
820 		"oline" : '\&oline;',
821 		"frasl" : '\&frasl;',
822 		"weierp" : '\&weierp;',
823 		"image" : '\&image;',
824 		"real" : '\&real;',
825 		"trade" : '\&trade;',
826 		"alefsym" : '\&alefsym;',
827 		"larr" : '\&larr;',
828 		"uarr" : '\&uarr;',
829 		"rarr" : '\&rarr;',
830 		"darr" : '\&darr;',
831 		"harr" : '\&harr;',
832 		"crarr" : '\&crarr;',
833 		"lArr" : '\&lArr;',
834 		"uArr" : '\&uArr;',
835 		"rArr" : '\&rArr;',
836 		"dArr" : '\&dArr;',
837 		"hArr" : '\&hArr;',
838 		"forall" : '\&forall;',
839 		"part" : '\&part;',
840 		"exist" : '\&exist;',
841 		"empty" : '\&empty;',
842 		"nabla" : '\&nabla;',
843 		"isin" : '\&isin;',
844 		"notin" : '\&notin;',
845 		"ni" : '\&ni;',
846 		"prod" : '\&prod;',
847 		"sum" : '\&sum;',
848 		"minus" : '\&minus;',
849 		"lowast" : '\&lowast;',
850 		"radic" : '\&radic;',
851 		"prop" : '\&prop;',
852 		"infin" : '\&infin;',
853 		"ang" : '\&ang;',
854 		"and" : '\&and;',
855 		"or" : '\&or;',
856 		"cap" : '\&cap;',
857 		"cup" : '\&cup;',
858 		"int" : '\&int;',
859 		"there4" : '\&there4;',
860 		"sim" : '\&sim;',
861 		"cong" : '\&cong;',
862 		"asymp" : '\&asymp;',
863 		"ne" : '\&ne;',
864 		"equiv" : '\&equiv;',
865 		"le" : '\&le;',
866 		"ge" : '\&ge;',
867 		"sub" : '\&sub;',
868 		"sup" : '\&sup;',
869 		"nsub" : '\&nsub;',
870 		"sube" : '\&sube;',
871 		"supe" : '\&supe;',
872 		"oplus" : '\&oplus;',
873 		"otimes" : '\&otimes;',
874 		"perp" : '\&perp;',
875 		"sdot" : '\&sdot;',
876 		"lceil" : '\&lceil;',
877 		"rceil" : '\&rceil;',
878 		"lfloor" : '\&lfloor;',
879 		"rfloor" : '\&rfloor;',
880 		"loz" : '\&loz;',
881 		"spades" : '\&spades;',
882 		"clubs" : '\&clubs;',
883 		"hearts" : '\&hearts;',
884 		"diams" : '\&diams;',
885 		"lang" : '\&lang;',
886 		"rang" : '\&rang;',
887 
888 		"apos"  : '\''
889 	];
890 	foreach (name, c; entities)
891 		entityNames[c] = name;
892 }
893 
894 import core.stdc.stdio;
895 import std.utf;
896 import ae.utils.textout;
897 
898 public string encodeEntities(string str)
899 {
900 	foreach (i, c; str)
901 		if (c=='<' || c=='>' || c=='"' || c=='\'' || c=='&')
902 		{
903 			StringBuilder sb;
904 			sb.preallocate(str.length * 11 / 10);
905 			sb.put(str[0..i]);
906 			sb.putEncodedEntities(str[i..$]);
907 			return sb.get();
908 		}
909 	return str;
910 }
911 
912 public void putEncodedEntities(Sink, S)(ref Sink sink, S str)
913 {
914 	size_t start = 0;
915 	foreach (i, c; str)
916 		if (c=='<' || c=='>' || c=='"' || c=='\'' || c=='&')
917 		{
918 			sink.put(str[start..i], '&', entityNames[c], ';');
919 			start = i+1;
920 		}
921 	sink.put(str[start..$]);
922 }
923 
924 public string encodeAllEntities(string str)
925 {
926 	// TODO: optimize
927 	foreach_reverse (i, dchar c; str)
928 	{
929 		auto name = c in entityNames;
930 		if (name)
931 			str = str[0..i] ~ '&' ~ *name ~ ';' ~ str[i+stride(str,i)..$];
932 	}
933 	return str;
934 }
935 
936 import ae.utils.text;
937 import std.conv;
938 
939 public string decodeEntities(string str)
940 {
941 	auto fragments = str.fastSplit('&');
942 	if (fragments.length <= 1)
943 		return str;
944 
945 	auto interleaved = new string[fragments.length*2 - 1];
946 	auto buffers = new char[4][fragments.length-1];
947 	interleaved[0] = fragments[0];
948 
949 	foreach (n, fragment; fragments[1..$])
950 	{
951 		auto p = fragment.indexOf(';');
952 		enforce!XmlParseException(p>0, "Invalid entity (unescaped ampersand?)");
953 
954 		dchar c;
955 		if (fragment[0]=='#')
956 		{
957 			if (fragment[1]=='x')
958 				c = fromHex!uint(fragment[2..p]);
959 			else
960 				c = to!uint(fragment[1..p]);
961 		}
962 		else
963 		{
964 			auto pentity = fragment[0..p] in entities;
965 			enforce!XmlParseException(pentity, "Unknown entity: " ~ fragment[0..p]);
966 			c = *pentity;
967 		}
968 
969 		interleaved[1+n*2] = cast(string) buffers[n][0..std.utf.encode(buffers[n], c)];
970 		interleaved[2+n*2] = fragment[p+1..$];
971 	}
972 
973 	return interleaved.join();
974 }
975 
976 deprecated alias decodeEntities convertEntities;
977 
978 unittest
979 {
980 	assert(encodeEntities(`The <Smith & Wesson> "lock'n'load"`) == `The &lt;Smith &amp; Wesson&gt; &quot;lock&apos;n&apos;load&quot;`);
981 	assert(encodeAllEntities("©,€") == "&copy;,&euro;");
982 	assert(decodeEntities("&copy;,&euro;") == "©,€");
983 }