1 /**
2  * XmlParser client which builds a DOM efficiently.
3  * WORK IN PROGRESS.
4  *
5  * License:
6  *   This Source Code Form is subject to the terms of
7  *   the Mozilla Public License, v. 2.0. If a copy of
8  *   the MPL was not distributed with this file, You
9  *   can obtain one at http://mozilla.org/MPL/2.0/.
10  *
11  * Authors:
12  *   Vladimir Panteleev <vladimir@thecybershadow.net>
13  */
14 
15 module ae.utils.xmldom;
16 
17 import std.exception;
18 
19 import ae.utils.xmlparser;
20 import ae.utils.alloc;
21 
22 enum XmlNodeType
23 {
24 	root,
25 	tag,
26 	attribute,
27 	directive,
28 	processingInstruction,
29 	text
30 }
31 
32 
33 struct XmlDom(STRING=string, XML_STRING=XmlString!STRING)
34 {
35 	struct Node
36 	{
37 		XmlNodeType type;
38 		union
39 		{
40 			STRING tag;
41 			alias tag attributeName;
42 			XML_STRING text;
43 		}
44 		union
45 		{
46 			XML_STRING attributeValue; /// Attribute tags only
47 			Node* firstChild; /// Non-attribute tags only
48 		}
49 		Node* nextSibling;
50 		Node* parent;
51 	}
52 
53 	Node* root;
54 
55 	struct Cursor
56 	{
57 		Node* currentParent, currentSibling;
58 
59 		void descend()
60 		{
61 			assert(currentSibling, "Nowhere to descend to");
62 			currentParent = currentSibling;
63 			//currentSibling = null;
64 			currentSibling = currentSibling.firstChild;
65 		}
66 
67 		void ascend()
68 		{
69 			assert(currentParent, "Nowhere to ascend to");
70 			currentSibling = currentParent;
71 			currentParent = currentSibling.parent;
72 		}
73 
74 		void insertNode(Node* n)
75 		{
76 			n.parent = currentParent;
77 			auto pNext = currentSibling ? &currentSibling.nextSibling : &currentParent.firstChild;
78 			n.nextSibling = *pNext;
79 		}
80 
81 		/// Assumes we are at the last sibling
82 		void appendNode(Node* n)
83 		{
84 			n.parent = currentParent;
85 			n.nextSibling = null;
86 			auto pNext = currentSibling ? &currentSibling.nextSibling : &currentParent.firstChild;
87 			assert(*pNext, "Cannot append when not at the last sibling");
88 			currentSibling = *pNext = n;
89 		}
90 
91 		@property bool empty() { return currentSibling !is null; }
92 		alias currentSibling front;
93 		void popFront() { currentSibling = currentSibling.nextSibling; }
94 	}
95 
96 	Cursor getCursor()
97 	{
98 		assert(root, "No root node");
99 		Cursor cursor;
100 		cursor.currentParent = root;
101 		cursor.currentSibling = null;
102 		return cursor;
103 	}
104 }
105 
106 static template XmlDomWriter(alias dom_, alias allocator=heapAllocator)
107 {
108 	alias dom = dom_;
109 	alias DOM = typeof(dom);
110 	alias DOM.Node Node;
111 
112 	private Node* newNode()
113 	{
114 		auto n = allocator.allocate!Node();
115 		n.nextSibling = null;
116 		return n;
117 	}
118 
119 	DOM.Cursor newDocument()
120 	{
121 		with (*(dom.root = newNode()))
122 			type = XmlNodeType.root,
123 			parent = null;
124 		return dom.getCursor();
125 	}
126 }
127 
128 /// STRING_FILTER is a policy type which determines how strings are
129 /// transformed for permanent storage inside the DOM.
130 /// By default, we store slices of the original XML document.
131 /// However, if the parsing is done using temporary buffers,
132 /// STRING_FILTER will want to copy (.idup) the strings before
133 /// letting us store them.
134 /// STRING_FILTER can also be used to implement a string pool, to
135 /// make a trade-off between memory consumption and speed
136 /// (an XML document is likely to contain many repeating strings,
137 /// such as tag and attribute names). Merging identical strings
138 /// to obtain unique string pointers or IDs would also allow very
139 /// quick tag/attribute name lookup, and avoid repeated entity
140 /// decoding.
141 struct XmlDomParser(alias WRITER, alias STRING_FILTER=NoopStringFilter, bool CHECKED=true)
142 {
143 	WRITER.DOM.Cursor cursor;
144 
145 	STRING_FILTER stringFilter;
146 
147 	alias WRITER.Node Node;
148 
149 	private Node* addNode(XmlNodeType type)
150 	{
151 		assert(cursor.currentParent.type != XmlNodeType.attribute);
152 		Node* n = WRITER.newNode();
153 		n.type = type;
154 		cursor.appendNode(n);
155 		return n;
156 	}
157 
158 	void startDocument()
159 	{
160 		cursor = WRITER.newDocument();
161 	}
162 
163 	void text(XML_STRING)(XML_STRING s)
164 	{
165 		with (*addNode(XmlNodeType.text))
166 			text = stringFilter.handleXmlString(s);
167 	}
168 
169 	void directive(XML_STRING)(XML_STRING s)
170 	{
171 		with (*addNode(XmlNodeType.directive))
172 			text = stringFilter.handleXmlString(s);
173 	}
174 
175 	void startProcessingInstruction(STRING)(STRING s)
176 	{
177 		with (*addNode(XmlNodeType.processingInstruction))
178 			tag = stringFilter.handleString(s);
179 		cursor.descend();
180 	}
181 
182 	void endProcessingInstruction()
183 	{
184 		cursor.ascend();
185 	}
186 
187 	void startTag(STRING)(STRING s)
188 	{
189 		with (*addNode(XmlNodeType.tag))
190 			tag = stringFilter.handleString(s);
191 		cursor.descend();
192 	}
193 
194 	void attribute(STRING, XML_STRING)(STRING name, XML_STRING value)
195 	{
196 		with (*addNode(XmlNodeType.attribute))
197 		{
198 			attributeName  = stringFilter.handleString   (name);
199 			attributeValue = stringFilter.handleXmlString(value);
200 		}
201 	}
202 
203 	void endAttributes() {}
204 
205 	void endAttributesAndTag() {}
206 
207 	void endTag(STRING)(STRING s)
208 	{
209 		cursor.ascend();
210 		static if (CHECKED)
211 			enforce(stringFilter.handleString(s) == cursor.currentSibling.tag);
212 	}
213 
214 	void endDocument()
215 	{
216 		cursor.ascend();
217 		enforce(cursor.currentSibling is WRITER.dom.root, "Unexpected end of document");
218 	}
219 }
220 
221 struct NoopStringFilter
222 {
223 	auto handleString   (S)(S s) { return s; }
224 	auto handleXmlString(S)(S s) { return s; }
225 }
226 
227 unittest
228 {
229 	// Test instantiation
230 	XmlDom!string dom;
231 	alias XmlDomWriter!dom WRITER;
232 	alias XmlDomParser!WRITER OUTPUT;
233 	alias XmlParser!(string, OUTPUT) PARSER;
234 	PARSER p;
235 }