1 /** 2 * XmlParser client which builds a DOM efficiently. 3 * WORK IN PROGRESS. 4 * 5 * License: 6 * This Source Code Form is subject to the terms of 7 * the Mozilla Public License, v. 2.0. If a copy of 8 * the MPL was not distributed with this file, You 9 * can obtain one at http://mozilla.org/MPL/2.0/. 10 * 11 * Authors: 12 * Vladimir Panteleev <vladimir@thecybershadow.net> 13 */ 14 15 module ae.utils.xmldom; 16 17 import std.exception; 18 19 import ae.utils.xmlparser; 20 import ae.utils.alloc; 21 22 enum XmlNodeType 23 { 24 root, 25 tag, 26 attribute, 27 directive, 28 processingInstruction, 29 text 30 } 31 32 33 struct XmlDom(STRING=string, XML_STRING=XmlString!STRING) 34 { 35 struct Node 36 { 37 XmlNodeType type; 38 union 39 { 40 STRING tag; 41 alias tag attributeName; 42 XML_STRING text; 43 } 44 union 45 { 46 XML_STRING attributeValue; /// Attribute tags only 47 Node* firstChild; /// Non-attribute tags only 48 } 49 Node* nextSibling; 50 Node* parent; 51 } 52 53 Node* root; 54 55 struct Cursor 56 { 57 Node* currentParent, currentSibling; 58 59 void descend() 60 { 61 assert(currentSibling, "Nowhere to descend to"); 62 currentParent = currentSibling; 63 //currentSibling = null; 64 currentSibling = currentSibling.firstChild; 65 } 66 67 void ascend() 68 { 69 assert(currentParent, "Nowhere to ascend to"); 70 currentSibling = currentParent; 71 currentParent = currentSibling.parent; 72 } 73 74 void insertNode(Node* n) 75 { 76 n.parent = currentParent; 77 auto pNext = currentSibling ? ¤tSibling.nextSibling : ¤tParent.firstChild; 78 n.nextSibling = *pNext; 79 } 80 81 /// Assumes we are at the last sibling 82 void appendNode(Node* n) 83 { 84 n.parent = currentParent; 85 n.nextSibling = null; 86 auto pNext = currentSibling ? ¤tSibling.nextSibling : ¤tParent.firstChild; 87 assert(*pNext, "Cannot append when not at the last sibling"); 88 currentSibling = *pNext = n; 89 } 90 91 @property bool empty() { return currentSibling !is null; } 92 alias currentSibling front; 93 void popFront() { currentSibling = currentSibling.nextSibling; } 94 } 95 96 Cursor getCursor() 97 { 98 assert(root, "No root node"); 99 Cursor cursor; 100 cursor.currentParent = root; 101 cursor.currentSibling = null; 102 return cursor; 103 } 104 } 105 106 static template XmlDomWriter(alias dom_, alias allocator=heapAllocator) 107 { 108 alias dom = dom_; 109 alias DOM = typeof(dom); 110 alias DOM.Node Node; 111 112 private Node* newNode() 113 { 114 auto n = allocator.allocate!Node(); 115 n.nextSibling = null; 116 return n; 117 } 118 119 DOM.Cursor newDocument() 120 { 121 with (*(dom.root = newNode())) 122 type = XmlNodeType.root, 123 parent = null; 124 return dom.getCursor(); 125 } 126 } 127 128 /// STRING_FILTER is a policy type which determines how strings are 129 /// transformed for permanent storage inside the DOM. 130 /// By default, we store slices of the original XML document. 131 /// However, if the parsing is done using temporary buffers, 132 /// STRING_FILTER will want to copy (.idup) the strings before 133 /// letting us store them. 134 /// STRING_FILTER can also be used to implement a string pool, to 135 /// make a trade-off between memory consumption and speed 136 /// (an XML document is likely to contain many repeating strings, 137 /// such as tag and attribute names). Merging identical strings 138 /// to obtain unique string pointers or IDs would also allow very 139 /// quick tag/attribute name lookup, and avoid repeated entity 140 /// decoding. 141 struct XmlDomParser(alias WRITER, alias STRING_FILTER=NoopStringFilter, bool CHECKED=true) 142 { 143 WRITER.DOM.Cursor cursor; 144 145 STRING_FILTER stringFilter; 146 147 alias WRITER.Node Node; 148 149 private Node* addNode(XmlNodeType type) 150 { 151 assert(cursor.currentParent.type != XmlNodeType.attribute); 152 Node* n = WRITER.newNode(); 153 n.type = type; 154 cursor.appendNode(n); 155 return n; 156 } 157 158 void startDocument() 159 { 160 cursor = WRITER.newDocument(); 161 } 162 163 void text(XML_STRING)(XML_STRING s) 164 { 165 with (*addNode(XmlNodeType.text)) 166 text = stringFilter.handleXmlString(s); 167 } 168 169 void directive(XML_STRING)(XML_STRING s) 170 { 171 with (*addNode(XmlNodeType.directive)) 172 text = stringFilter.handleXmlString(s); 173 } 174 175 void startProcessingInstruction(STRING)(STRING s) 176 { 177 with (*addNode(XmlNodeType.processingInstruction)) 178 tag = stringFilter.handleString(s); 179 cursor.descend(); 180 } 181 182 void endProcessingInstruction() 183 { 184 cursor.ascend(); 185 } 186 187 void startTag(STRING)(STRING s) 188 { 189 with (*addNode(XmlNodeType.tag)) 190 tag = stringFilter.handleString(s); 191 cursor.descend(); 192 } 193 194 void attribute(STRING, XML_STRING)(STRING name, XML_STRING value) 195 { 196 with (*addNode(XmlNodeType.attribute)) 197 { 198 attributeName = stringFilter.handleString (name); 199 attributeValue = stringFilter.handleXmlString(value); 200 } 201 } 202 203 void endAttributes() {} 204 205 void endAttributesAndTag() {} 206 207 void endTag(STRING)(STRING s) 208 { 209 cursor.ascend(); 210 static if (CHECKED) 211 enforce(stringFilter.handleString(s) == cursor.currentSibling.tag); 212 } 213 214 void endDocument() 215 { 216 cursor.ascend(); 217 enforce(cursor.currentSibling is WRITER.dom.root, "Unexpected end of document"); 218 } 219 } 220 221 struct NoopStringFilter 222 { 223 auto handleString (S)(S s) { return s; } 224 auto handleXmlString(S)(S s) { return s; } 225 } 226 227 unittest 228 { 229 // Test instantiation 230 XmlDom!string dom; 231 alias XmlDomWriter!dom WRITER; 232 alias XmlDomParser!WRITER OUTPUT; 233 alias XmlParser!(string, OUTPUT) PARSER; 234 PARSER p; 235 }