1 /** 2 * SAX-like XML parser 3 * WORK IN PROGRESS. 4 * 5 * License: 6 * This Source Code Form is subject to the terms of 7 * the Mozilla Public License, v. 2.0. If a copy of 8 * the MPL was not distributed with this file, You 9 * can obtain one at http://mozilla.org/MPL/2.0/. 10 * 11 * Authors: 12 * Vladimir Panteleev <ae@cy.md> 13 */ 14 15 deprecated module ae.utils.xmlparser; 16 deprecated: 17 18 import std.exception; 19 import std.functional; 20 import std.range; 21 import std.string; 22 import std.traits; 23 24 import ae.utils.range; 25 26 /// Does not allocate (except for exceptions). 27 /// No XML nesting state. 28 /// Does not check for premature stream end, paired tags, etc. 29 /// 30 /// INPUT is an input range which needs to support the following 31 /// additional properties: 32 /// .ptr - returns a type usable with ptrSlice (used to save 33 /// the position in INPUT, then later take a slice 34 /// from that position until an end position). 35 /// WARNING: Using a narrow D string type for INPUT will result 36 /// in wasteful UTF decoding (due to std.array.front returning a 37 /// dchar). 38 /// 39 /// OUTPUT accepts strings with the XML entities still encoded, 40 /// to allow for lazy decoding. 41 42 // TODO: namespaces, CDATA 43 44 struct XmlParser(INPUT, OUTPUT) 45 { 46 INPUT input; 47 OUTPUT output; 48 49 alias typeof(input.front) C; 50 alias std.traits.Unqual!C U; 51 52 void run() 53 { 54 output.startDocument(); 55 skipWhitespace(); 56 57 while (!input.empty) 58 { 59 if (input.front != '<') // text node 60 output.text(xmlString(readWhile!q{c != '<'}())); 61 else 62 { 63 input.popFront(); 64 if (input.front=='!') 65 { 66 input.popFront(); 67 if (input.front == '-') // comment 68 { 69 input.popFront(); 70 expect('-'); 71 U c0, c1, c2; 72 do 73 { 74 c0=c1; c1=c2; c2=input.front; 75 input.popFront(); 76 } while (c0 != '-' || c1 != '-' || c2 != '>'); 77 } 78 else // doctype, etc. 79 output.directive(xmlString(readWhile!q{c != '>'}())); 80 } 81 else 82 if (input.front=='?') 83 { 84 input.popFront(); 85 output.startProcessingInstruction(readWord()); 86 while (!input.empty) 87 { 88 skipWhitespace(); 89 if (input.front=='?') 90 break; 91 readAttribute(); 92 } 93 input.popFront(); // '?' 94 expect('>'); 95 output.endProcessingInstruction(); 96 } 97 else 98 if (input.front=='/') 99 { 100 input.popFront(); 101 output.endTag(readWord()); 102 expect('>'); 103 } 104 else 105 { 106 output.startTag(readWord()); 107 while (!input.empty) 108 { 109 skipWhitespace(); 110 if (input.front=='>' || input.front=='/') 111 break; 112 readAttribute(); 113 } 114 output.endAttributes(); 115 if (input.front == '/') 116 { 117 input.popFront(); 118 output.endAttributesAndTag(); 119 expect('>'); 120 } 121 else 122 input.popFront(); // '>' 123 } 124 } 125 skipWhitespace(); 126 } 127 128 output.endDocument(); 129 } 130 131 private: 132 void readAttribute() 133 { 134 auto name = readWord(); 135 skipWhitespace(); 136 expect('='); 137 skipWhitespace(); 138 auto delim = input.front; 139 enforce(delim == '\'' || delim == '"', format("Bad attribute delimiter. Expected ' or \", got %s", delim)); 140 auto value = delim == '"' ? readWhile!q{c != '"'}() : readWhile!q{c != '\''}(); 141 output.attribute(name, xmlString(value)); 142 } 143 144 void expect(C c) 145 { 146 enforce(input.front == c, format("Expected %s, got %s", c, input.front)); 147 input.popFront(); 148 } 149 150 auto readWhile(alias COND)() 151 { 152 auto start = input.ptr; 153 skipWhile!COND(); 154 return ptrSlice(start, input.ptr); 155 } 156 157 void skipWhile(alias COND)() 158 { 159 alias unaryFun!(COND, "c") cond; 160 while (!input.empty && cond(input.front)) 161 input.popFront(); 162 } 163 164 alias skipWhile!xmlIsWhite skipWhitespace; 165 alias readWhile!xmlIsWord readWord; 166 } 167 168 /// The type of a slice (using ptrSlice) of an input range used in XmlParser 169 template SliceType(INPUT) 170 { 171 alias typeof(ptrSlice(T.init.ptr, T.init.ptr)) SliceType; 172 } 173 174 unittest 175 { 176 // Just test compilation with a dummy receiver 177 static struct DummyOutput 178 { 179 void opDispatch(string S, T...)(T args) { } 180 } 181 182 // Note: don't use string! This will do UTF-8 decoding. 183 XmlParser!(string, DummyOutput) stringParser; 184 185 // An example with more sensible performance 186 XmlParser!(FastArrayRange!(immutable(char)), DummyOutput) fastParser; 187 } 188 189 // *************************************************************************** 190 191 /// Represents a string (slice of XmlParser input stream) which still contains 192 /// encoded XML entities. 193 struct XmlString(S) 194 { 195 S encoded; 196 } 197 198 XmlString!S xmlString(S)(S s) { return XmlString!S(s); } 199 200 /+ 201 import std.traits; 202 203 static import ae.utils.xmllite; 204 205 XmlString!S getXmlEncodedString(S)(S s) 206 if (isSomeString!S) 207 { 208 XmlString!S xmls; 209 xmls.encoded = ae.utils.xmllite.encodeEntities(s); 210 return xmls; 211 } 212 213 X getXmlEncodedString(X)(X x) 214 if (is(X S : XmlString!S)) 215 { 216 return x; 217 } 218 219 auto getXmlDecodedString(X)(X x) 220 if (is(X S : XmlString!S)) 221 { 222 return ae.utils.xmllite.decodeEntities(x.encoded); 223 } 224 225 S getXmlDecodedString(S)(S s) 226 if (isSomeString!S) 227 { 228 return s; 229 } 230 231 unittest 232 { 233 auto s0 = "<"; 234 auto s1 = s0.getXmlDecodedString(); 235 assert(s0 is s1); 236 auto x0 = s0.getXmlEncodedString(); 237 assert(x0.encoded == "<"); 238 auto x1 = x0.getXmlEncodedString(); 239 assert(x0.encoded is x1.encoded); 240 auto s2 = x0.getXmlDecodedString(); 241 assert(s0 == s2); 242 } 243 +/ 244 245 // *************************************************************************** 246 247 /// Generate a fast table lookup function, which compiles to a single lookup 248 /// for small index types and an additional check + default value for larger 249 /// index types. 250 private template fastLookup(alias TABLE, bool DEFAULT) 251 { 252 bool fastLookup(C)(C c) @trusted pure nothrow 253 { 254 static if (cast(size_t)C.max > TABLE.length) 255 if (cast(size_t)c >= TABLE.length) 256 return DEFAULT; 257 return TABLE.ptr[cast(size_t)c]; 258 } 259 } 260 261 alias fastLookup!(xmlWhiteChars, false) xmlIsWhite; 262 alias fastLookup!(xmlWordChars , true ) xmlIsWord ; /// ditto 263 264 bool[256] genTable(string COND)() 265 { 266 import std.ascii; 267 bool[256] table; 268 foreach (uint c, ref b; table) b = mixin(COND); 269 return table; 270 } 271 272 immutable bool[256] xmlWhiteChars = genTable!q{isWhite (c) }(); 273 immutable bool[256] xmlWordChars = genTable!q{isAlphaNum(c) || c=='-' || c=='_' || c==':'}(); 274 275 unittest 276 { 277 assert( xmlIsWhite(' ')); 278 assert(!xmlIsWhite('a')); 279 assert(!xmlIsWhite('я')); 280 assert(!xmlIsWord (' ')); 281 assert( xmlIsWord ('a')); 282 assert( xmlIsWord ('я')); 283 }