1 /** 2 * ae.utils.xmlsel 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <ae@cy.md> 12 */ 13 14 module ae.utils.xmlsel; 15 16 import std.algorithm; 17 import std.conv; 18 import std.exception; 19 import std..string; 20 21 import ae.utils.xmllite; 22 23 /// A slow and simple CSS "selector". 24 XmlNode[] find(XmlNode[] roots, string selector, bool allowEmpty = true) 25 { 26 selector = selector.strip(); 27 while (selector.length) 28 { 29 bool recursive = true; 30 if (selector[0] == '>') 31 { 32 recursive = false; 33 selector = selector[1..$].stripLeft(); 34 } 35 36 string spec = selector; selector = null; 37 foreach (i, c; spec) 38 if (c == ' ' || c == '>') 39 { 40 selector = spec[i..$].stripLeft(); 41 spec = spec[0..i]; 42 break; 43 } 44 45 string tag, id, cls; 46 string[] pss; // pseudo-selectors 47 48 string* tgt = &tag; 49 foreach (c; spec) 50 if (c == '.') 51 tgt = &cls; 52 else 53 if (c == '#') 54 tgt = &id; 55 else 56 if (c == ':') 57 { 58 pss ~= null; 59 tgt = &pss[$-1]; 60 } 61 else 62 *tgt ~= c; 63 64 int nthChild; 65 foreach (ps; pss) 66 switch (ps.findSplit("(")[0]) 67 { 68 case "nth-child": 69 nthChild = ps.findSplit("(")[2].findSplit(")")[0].to!int(); 70 break; 71 default: 72 throw new Exception("Unknown pseudo-selector: " ~ ps); 73 } 74 75 if (tag == "*") 76 tag = null; 77 78 XmlNode[] findSpec(XmlNode n) 79 { 80 XmlNode[] result; 81 foreach (i, c; n.children) 82 if (c.type == XmlNodeType.Node) 83 { 84 if (tag && c.tag != tag) 85 goto wrong; 86 if (id && c.attributes.get("id", null) != id) 87 goto wrong; 88 if (cls && !c.attributes.get("class", null).split().canFind(cls)) 89 goto wrong; 90 if (nthChild && (i+1) != nthChild) 91 goto wrong; 92 result ~= c; 93 94 wrong: 95 if (recursive) 96 result ~= findSpec(c); 97 } 98 return result; 99 } 100 101 XmlNode[] newRoots; 102 103 foreach (root; roots) 104 newRoots ~= findSpec(root); 105 roots = newRoots; 106 if (!allowEmpty) 107 enforce(roots.length, "Can't find " ~ spec); 108 } 109 110 return roots; 111 } 112 113 XmlNode find(XmlNode roots, string selector) 114 { 115 return find([roots], selector, false)[0]; 116 } /// ditto 117 118 XmlNode[] findAll(XmlNode roots, string selector) 119 { 120 return find([roots], selector); 121 } /// ditto 122 123 /// 124 unittest 125 { 126 enum xmlText = 127 `<doc>` ~ 128 `<test>Test 1</test>` ~ 129 `<node id="test2">Test 2</node>` ~ 130 `<node class="test3">Test 3</node>` ~ 131 `</doc>`; 132 auto doc = xmlText.xmlParse(); 133 134 assert(doc.find("test" ).text == "Test 1"); 135 assert(doc.find("#test2").text == "Test 2"); 136 assert(doc.find(".test3").text == "Test 3"); 137 138 assert(doc.find("doc test").text == "Test 1"); 139 assert(doc.find("doc>test").text == "Test 1"); 140 assert(doc.find("doc> test").text == "Test 1"); 141 assert(doc.find("doc >test").text == "Test 1"); 142 assert(doc.find("doc > test").text == "Test 1"); 143 144 assert(![doc].find("foo").length); 145 assert(![doc].find("#foo").length); 146 assert(![doc].find(".foo").length); 147 assert(![doc].find("doc foo").length); 148 assert(![doc].find("foo test").length); 149 150 assert(doc.find("doc > :nth-child(1)").text == "Test 1"); 151 assert(doc.find("doc > :nth-child(2)").text == "Test 2"); 152 assert(doc.find("doc > :nth-child(3)").text == "Test 3"); 153 }