1 /**
2 * ae.utils.xmlsel
3 *
4 * License:
5 * This Source Code Form is subject to the terms of
6 * the Mozilla Public License, v. 2.0. If a copy of
7 * the MPL was not distributed with this file, You
8 * can obtain one at http://mozilla.org/MPL/2.0/.
9 *
10 * Authors:
11 * Vladimir Panteleev <ae@cy.md>
12 */
13
14 module ae.utils.xmlsel;
15
16 import std.algorithm;
17 import std.conv;
18 import std.exception;
19 import std.string;
20
21 import ae.utils.xmllite;
22
23 /// A slow and simple CSS "selector".
24 XmlNode[] find(XmlNode[] roots, string selector, bool allowEmpty = true)
25 {
26 selector = selector.strip();
27 while (selector.length)
28 {
29 bool recursive = true;
30 if (selector[0] == '>')
31 {
32 recursive = false;
33 selector = selector[1..$].stripLeft();
34 }
35
36 string spec = selector; selector = null;
37 foreach (i, c; spec)
38 if (c == ' ' || c == '>')
39 {
40 selector = spec[i..$].stripLeft();
41 spec = spec[0..i];
42 break;
43 }
44
45 string tag, id, cls;
46 string[] pss; // pseudo-selectors
47
48 string* tgt = &tag;
49 foreach (c; spec)
50 if (c == '.')
51 tgt = &cls;
52 else
53 if (c == '#')
54 tgt = &id;
55 else
56 if (c == ':')
57 {
58 pss ~= null;
59 tgt = &pss[$-1];
60 }
61 else
62 *tgt ~= c;
63
64 int nthChild;
65 foreach (ps; pss)
66 switch (ps.findSplit("(")[0])
67 {
68 case "nth-child":
69 nthChild = ps.findSplit("(")[2].findSplit(")")[0].to!int();
70 break;
71 default:
72 throw new Exception("Unknown pseudo-selector: " ~ ps);
73 }
74
75 if (tag == "*")
76 tag = null;
77
78 XmlNode[] findSpec(XmlNode n)
79 {
80 XmlNode[] result;
81 foreach (i, c; n.children)
82 if (c.type == XmlNodeType.Node)
83 {
84 if (tag && c.tag != tag)
85 goto wrong;
86 if (id && c.attributes.get("id", null) != id)
87 goto wrong;
88 if (cls && !c.attributes.get("class", null).split().canFind(cls))
89 goto wrong;
90 if (nthChild && (i+1) != nthChild)
91 goto wrong;
92 result ~= c;
93
94 wrong:
95 if (recursive)
96 result ~= findSpec(c);
97 }
98 return result;
99 }
100
101 XmlNode[] newRoots;
102
103 foreach (root; roots)
104 newRoots ~= findSpec(root);
105 roots = newRoots;
106 if (!allowEmpty)
107 enforce(roots.length, "Can't find " ~ spec);
108 }
109
110 return roots;
111 }
112
113 XmlNode find(XmlNode roots, string selector)
114 {
115 return find([roots], selector, false)[0];
116 } /// ditto
117
118 XmlNode[] findAll(XmlNode roots, string selector)
119 {
120 return find([roots], selector);
121 } /// ditto
122
123 ///
124 unittest
125 {
126 enum xmlText =
127 `<doc>` ~
128 `<test>Test 1</test>` ~
129 `<node id="test2">Test 2</node>` ~
130 `<node class="test3">Test 3</node>` ~
131 `</doc>`;
132 auto doc = xmlText.xmlParse();
133
134 assert(doc.find("test" ).text == "Test 1");
135 assert(doc.find("#test2").text == "Test 2");
136 assert(doc.find(".test3").text == "Test 3");
137
138 assert(doc.find("doc test").text == "Test 1");
139 assert(doc.find("doc>test").text == "Test 1");
140 assert(doc.find("doc> test").text == "Test 1");
141 assert(doc.find("doc >test").text == "Test 1");
142 assert(doc.find("doc > test").text == "Test 1");
143
144 assert(![doc].find("foo").length);
145 assert(![doc].find("#foo").length);
146 assert(![doc].find(".foo").length);
147 assert(![doc].find("doc foo").length);
148 assert(![doc].find("foo test").length);
149
150 assert(doc.find("doc > :nth-child(1)").text == "Test 1");
151 assert(doc.find("doc > :nth-child(2)").text == "Test 2");
152 assert(doc.find("doc > :nth-child(3)").text == "Test 3");
153 }