1 /**
2  * An XML writer written for speed
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  */
13 
14 module ae.utils.xmlwriter;
15 
16 import ae.utils.textout;
17 
18 /// Null formatter.
19 struct NullXmlFormatter
20 {
21 	/// Implementation of formatter interface.
22 	@property bool enabled() { return false; }
23 	@property void enabled(bool value) {} /// ditto
24 
25 	mixin template Mixin(alias formatter)
26 	{
27 		/// Stubs.
28 		void newLine() {}
29 		void startLine() {} /// ditto
30 		void indent() {} /// ditto
31 		void outdent() {} /// ditto
32 	} /// ditto
33 }
34 
35 /// Customizable formatter.
36 struct CustomXmlFormatter(char indentCharP, uint indentSizeP)
37 {
38 	enum indentChar = indentCharP; ///
39 	enum indentSize = indentSizeP; ///
40 
41 	/// Implementation of formatter interface.
42 	bool enabled = true;
43 
44 	mixin template Mixin(alias formatter)
45 	{
46 		private uint indentLevel = 0;
47 
48 		/// Implementation of formatter interface.
49 		void newLine()
50 		{
51 			if (formatter.enabled)
52 				output.put('\n');
53 		}
54 
55 		void startLine()
56 		{
57 			if (formatter.enabled)
58 				output.allocate(indentLevel * formatter.indentSize)[] = formatter.indentChar;
59 		} /// ditto
60 
61 		void indent () {                      indentLevel++; } /// ditto
62 		void outdent() { assert(indentLevel); indentLevel--; } /// ditto
63 	} /// ditto
64 }
65 
66 /// Default formatter, configured with indentation consisting of one tab character.
67 alias DefaultXmlFormatter = CustomXmlFormatter!('\t', 1);
68 
69 /// Customizable XML writer.
70 struct CustomXmlWriter(WRITER, Formatter)
71 {
72 	/// You can set this to something to e.g. write to another buffer.
73 	WRITER output;
74 
75 	/// Formatter instance.
76 	Formatter formatter;
77 	mixin Formatter.Mixin!formatter;
78 
79 	private debug // verify well-formedness
80 	{
81 		string[] tagStack;
82 		void pushTag(string tag) { tagStack ~= tag; }
83 		void popTag ()
84 		{
85 			assert(tagStack.length, "No tag to close");
86 			tagStack = tagStack[0..$-1];
87 		}
88 		void popTag (string tag)
89 		{
90 			assert(tagStack.length, "No tag to close");
91 			assert(tagStack[$-1] == tag, "Closing wrong tag (" ~ tag ~ " instead of " ~ tagStack[$-1] ~ ")");
92 			tagStack = tagStack[0..$-1];
93 		}
94 
95 		bool inAttributes;
96 	}
97 
98 	/// Write the beginning of an XML document.
99 	void startDocument()
100 	{
101 		output.put(`<?xml version="1.0" encoding="UTF-8"?>`);
102 		newLine();
103 		debug assert(tagStack.length==0);
104 	}
105 
106 	deprecated alias text putText;
107 
108 	/// Write plain text (escaping entities).
109 	void text(in char[] s)
110 	{
111 		escapedText!(EscapeScope.text)(s);
112 	}
113 
114 	/// Write attribute contents.
115 	alias attrText = escapedText!(EscapeScope.attribute);
116 
117 	private void escapedText(EscapeScope escapeScope)(in char[] s)
118 	{
119 		// https://gist.github.com/2192846
120 
121 		auto start = s.ptr, p = start, end = start+s.length;
122 
123 		alias E = Escapes!escapeScope;
124 
125 		while (p < end)
126 		{
127 			auto c = *p++;
128 			if (E.escaped[c])
129 				output.put(start[0..p-start-1], E.chars[c]),
130 				start = p;
131 		}
132 
133 		output.put(start[0..p-start]);
134 	}
135 
136 	// Common
137 
138 	private enum mixStartWithAttributesGeneric =
139 	q{
140 		debug assert(!inAttributes, "Tag attributes not ended");
141 		startLine();
142 
143 		static if (STATIC)
144 			output.put(OPEN ~ name);
145 		else
146 			output.put(OPEN, name);
147 
148 		debug inAttributes = true;
149 		debug pushTag(name);
150 	};
151 
152 	private enum mixEndAttributesAndTagGeneric =
153 	q{
154 		debug assert(inAttributes, "Tag attributes not started");
155 		output.put(CLOSE);
156 		newLine();
157 		debug inAttributes = false;
158 		debug popTag();
159 	};
160 
161 	// startTag
162 
163 	private enum mixStartTag =
164 	q{
165 		debug assert(!inAttributes, "Tag attributes not ended");
166 		startLine();
167 
168 		static if (STATIC)
169 			output.put('<' ~ name ~ '>');
170 		else
171 			output.put('<', name, '>');
172 
173 		newLine();
174 		indent();
175 		debug pushTag(name);
176 	};
177 
178 	/// Write opening a tag (no attributes).
179 	void startTag(string name)() { enum STATIC = true;  mixin(mixStartTag); }
180 	void startTag()(string name) { enum STATIC = false; mixin(mixStartTag); } /// ditto
181 
182 	// startTagWithAttributes
183 
184 	/// Write opening a tag (attributes follow).
185 	void startTagWithAttributes(string name)() { enum STATIC = true;  enum OPEN = '<'; mixin(mixStartWithAttributesGeneric); }
186 	void startTagWithAttributes()(string name) { enum STATIC = false; enum OPEN = '<'; mixin(mixStartWithAttributesGeneric); } /// ditto
187 
188 	// addAttribute
189 
190 	private enum mixAddAttribute =
191 	q{
192 		debug assert(inAttributes, "Tag attributes not started");
193 
194 		static if (STATIC)
195 			output.put(' ' ~ name ~ `="`);
196 		else
197 			output.put(' ', name, `="`);
198 
199 		attrText(value);
200 		output.put('"');
201 	};
202 
203 	/// Write tag attribute.
204 	void addAttribute(string name)(string value)   { enum STATIC = true;  mixin(mixAddAttribute); }
205 	void addAttribute()(string name, string value) { enum STATIC = false; mixin(mixAddAttribute); } /// ditto
206 
207 	// endAttributes[AndTag]
208 
209 	/// Write end of attributes and begin tag contents.
210 	void endAttributes()
211 	{
212 		debug assert(inAttributes, "Tag attributes not started");
213 		output.put('>');
214 		newLine();
215 		indent();
216 		debug inAttributes = false;
217 	}
218 
219 	/// Write end of attributes and tag.
220 	void endAttributesAndTag() { enum CLOSE = "/>"; mixin(mixEndAttributesAndTagGeneric); }
221 
222 	// endTag
223 
224 	private enum mixEndTag =
225 	q{
226 		debug assert(!inAttributes, "Tag attributes not ended");
227 		outdent();
228 		startLine();
229 
230 		static if (STATIC)
231 			output.put("</" ~ name ~ ">");
232 		else
233 			output.put("</", name, ">");
234 
235 		newLine();
236 		debug popTag(name);
237 	};
238 
239 	/// Write end of tag.
240 	void endTag(string name)() { enum STATIC = true;  mixin(mixEndTag); }
241 	void endTag()(string name) { enum STATIC = false; mixin(mixEndTag); } /// ditto
242 
243 	// Processing instructions
244 
245 	/// Write a processing instruction.
246 	void startPI(string name)() { enum STATIC = true;  enum OPEN = "<?"; mixin(mixStartWithAttributesGeneric); }
247 	void startPI()(string name) { enum STATIC = false; enum OPEN = "<?"; mixin(mixStartWithAttributesGeneric); } /// ditto
248 	void endPI() { enum CLOSE = "?>"; mixin(mixEndAttributesAndTagGeneric); } /// ditto
249 
250 	// Doctypes
251 
252 	deprecated alias doctype putDoctype;
253 
254 	/// Write a DOCTYPE declaration.
255 	void doctype(string text)
256 	{
257 		debug assert(!inAttributes, "Tag attributes not ended");
258 		output.put("<!", text, ">");
259 		newLine();
260 	}
261 
262 	/// Write an XML comment.
263 	void comment(string text)
264 	{
265 		debug assert(!inAttributes, "Tag attributes not ended");
266 		output.put("<!--", text, "-->");
267 		newLine();
268 	}
269 }
270 
271 deprecated template CustomXmlWriter(Writer, bool pretty)
272 {
273 	static if (pretty)
274 		alias CustomXmlWriter = CustomXmlWriter!(Writer, DefaultXmlFormatter);
275 	else
276 		alias CustomXmlWriter = CustomXmlWriter!(Writer, NullXmlFormatter);
277 }
278 
279 /// XML writer with no formatting.
280 alias CustomXmlWriter!(StringBuilder, NullXmlFormatter   ) XmlWriter;
281 /// XML writer with formatting.
282 alias CustomXmlWriter!(StringBuilder, DefaultXmlFormatter) PrettyXmlWriter;
283 
284 private:
285 
286 enum EscapeScope
287 {
288 	text,
289 	attribute,
290 }
291 
292 private struct Escapes(EscapeScope escapeScope)
293 {
294 	static __gshared string[256] chars;
295 	static __gshared bool[256] escaped;
296 
297 	shared static this()
298 	{
299 		import std.string;
300 
301 		escaped[] = true;
302 		foreach (c; 0..256)
303 			if (c=='<')
304 				chars[c] = "&lt;";
305 			else
306 			if (c=='>')
307 				chars[c] = "&gt;";
308 			else
309 			if (c=='&')
310 				chars[c] = "&amp;";
311 			else
312 			if (escapeScope == EscapeScope.attribute &&
313 				c=='"')
314 				chars[c] = "&quot;";
315 			else
316 			if (c < 0x20 && c != 0x0D && c != 0x0A && c != 0x09)
317 				chars[c] = format("&#x%02X;", c);
318 			else
319 				chars[c] = [cast(char)c],
320 				escaped[c] = false;
321 	}
322 }
323 
324 unittest
325 {
326 	string[string] quotes;
327 	quotes["Alan Perlis"] = "When someone says, \"I want a programming language in which I need only say what I want done,\" give him a lollipop.";
328 
329 	XmlWriter xml;
330 	xml.startDocument();
331 	xml.startTag!"quotes"();
332 	foreach (author, text; quotes)
333 	{
334 		xml.startTagWithAttributes!"quote"();
335 		xml.addAttribute!"author"(author);
336 		xml.endAttributes();
337 		xml.text(text);
338 		xml.endTag!"quote"();
339 	}
340 	xml.endTag!"quotes"();
341 
342 	auto str = xml.output.get();
343 	assert(str ==
344 		`<?xml version="1.0" encoding="UTF-8"?>` ~
345 		`<quotes>` ~
346 			`<quote author="Alan Perlis">` ~
347 				`When someone says, "I want a programming language in which I need only say what I want done," give him a lollipop.` ~
348 			`</quote>` ~
349 		`</quotes>`);
350 }
351 
352 // TODO: StringBuilder-compatible XML-encoding string sink/filter?
353 // e.g. to allow putTime to write directly to an XML node contents