1 /**
2  * ae.utils.xmlsel
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  */
13 
14 module ae.utils.xmlsel;
15 
16 import std.algorithm;
17 import std.conv;
18 import std.exception;
19 import std.string;
20 
21 import ae.utils.xmllite;
22 
23 /// A slow and simple CSS "selector".
24 XmlNode[] find(XmlNode[] roots, string selector, bool allowEmpty = true)
25 {
26 	selector = selector.strip();
27 	while (selector.length)
28 	{
29 		bool recursive = true;
30 		if (selector[0] == '>')
31 		{
32 			recursive = false;
33 			selector = selector[1..$].stripLeft();
34 		}
35 
36 		string spec = selector; selector = null;
37 		foreach (i, c; spec)
38 			if (c == ' ' || c == '>')
39 			{
40 				selector = spec[i..$].stripLeft();
41 				spec = spec[0..i];
42 				break;
43 			}
44 
45 		string tag, id, cls;
46 		string[] pss; // pseudo-selectors
47 
48 		string* tgt = &tag;
49 		foreach (c; spec)
50 			if (c == '.')
51 				tgt = &cls;
52 			else
53 			if (c == '#')
54 				tgt = &id;
55 			else
56 			if (c == ':')
57 			{
58 				pss ~= null;
59 				tgt = &pss[$-1];
60 			}
61 			else
62 				*tgt ~= c;
63 
64 		int nthChild;
65 		foreach (ps; pss)
66 			switch (ps.findSplit("(")[0])
67 			{
68 				case "nth-child":
69 					nthChild = ps.findSplit("(")[2].findSplit(")")[0].to!int();
70 					break;
71 				default:
72 					throw new Exception("Unknown pseudo-selector: " ~ ps);
73 			}
74 
75 		if (tag == "*")
76 			tag = null;
77 
78 		XmlNode[] findSpec(XmlNode n)
79 		{
80 			XmlNode[] result;
81 			foreach (i, c; n.children)
82 				if (c.type == XmlNodeType.Node)
83 				{
84 					if (tag && c.tag != tag)
85 						goto wrong;
86 					if (id && c.attributes.get("id", null) != id)
87 						goto wrong;
88 					if (cls && !c.attributes.get("class", null).split().canFind(cls))
89 						goto wrong;
90 					if (nthChild && (i+1) != nthChild)
91 						goto wrong;
92 					result ~= c;
93 
94 				wrong:
95 					if (recursive)
96 						result ~= findSpec(c);
97 				}
98 			return result;
99 		}
100 
101 		XmlNode[] newRoots;
102 
103 		foreach (root; roots)
104 			newRoots ~= findSpec(root);
105 		roots = newRoots;
106 		if (!allowEmpty)
107 			enforce(roots.length, "Can't find " ~ spec);
108 	}
109 
110 	return roots;
111 }
112 
113 XmlNode find(XmlNode roots, string selector)
114 {
115 	return find([roots], selector, false)[0];
116 } /// ditto
117 
118 XmlNode[] findAll(XmlNode roots, string selector)
119 {
120 	return find([roots], selector);
121 } /// ditto
122 
123 ///
124 unittest
125 {
126 	enum xmlText =
127 		`<doc>` ~
128 			`<test>Test 1</test>` ~
129 			`<node id="test2">Test 2</node>` ~
130 			`<node class="test3">Test 3</node>` ~
131 		`</doc>`;
132 	auto doc = xmlText.xmlParse();
133 
134 	assert(doc.find("test"  ).text == "Test 1");
135 	assert(doc.find("#test2").text == "Test 2");
136 	assert(doc.find(".test3").text == "Test 3");
137 
138 	assert(doc.find("doc test").text == "Test 1");
139 	assert(doc.find("doc>test").text == "Test 1");
140 	assert(doc.find("doc> test").text == "Test 1");
141 	assert(doc.find("doc >test").text == "Test 1");
142 	assert(doc.find("doc > test").text == "Test 1");
143 
144 	assert(![doc].find("foo").length);
145 	assert(![doc].find("#foo").length);
146 	assert(![doc].find(".foo").length);
147 	assert(![doc].find("doc foo").length);
148 	assert(![doc].find("foo test").length);
149 
150 	assert(doc.find("doc > :nth-child(1)").text == "Test 1");
151 	assert(doc.find("doc > :nth-child(2)").text == "Test 2");
152 	assert(doc.find("doc > :nth-child(3)").text == "Test 3");
153 }