1 /**
2  * std.regex helpers
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.utils.regex;
15 
16 import std.algorithm;
17 import std.conv;
18 import std.exception;
19 import std.regex;
20 import std.string;
21 import std.traits;
22 import std.typecons;
23 
24 import ae.utils.text;
25 
26 // ************************************************************************
27 
28 /// Allows specifying regular expression patterns in expressions,
29 /// without having to compile them each time.
30 /// Example:
31 ///   if (text.match(`^\d+$`)) {}    // old code - recompiles every time
32 ///   if (text.match(re!`^\d+$`)) {} // new code - recompiles once
33 
34 Regex!char re(string pattern, alias flags = [])()
35 {
36 	static Regex!char r;
37 	if (r.empty)
38 		r = regex(pattern, flags);
39 	return r;
40 }
41 
42 unittest
43 {
44 	assert( "123".match(re!`^\d+$`));
45 	assert(!"abc".match(re!`^\d+$`));
46 }
47 
48 void convertCaptures(C, T...)(C captures, out T values)
49 {
50 	assert(values.length == captures.length-1, "Capture group count mismatch: %s arguments / %s capture groups".format(values.length, captures.length-1));
51 	foreach (n, ref value; values)
52 		value = to!(T[n])(captures[n+1]);
53 }
54 
55 /// Lua-like pattern matching.
56 bool matchInto(S, R, Args...)(S s, R r, ref Args args)
57 {
58 	auto m = s.match(r);
59 	if (m)
60 	{
61 		convertCaptures(m.captures, args);
62 		return true;
63 	}
64 	return false;
65 }
66 
67 ///
68 unittest
69 {
70 	string name, fruit;
71 	int count;
72 	assert("Mary has 5 apples"
73 		.matchInto(`^(\w+) has (\d+) (\w+)$`, name, count, fruit));
74 	assert(name == "Mary" && count == 5 && fruit == "apples");
75 }
76 
77 /// Match into a delegate.
78 bool matchCaptures(S, R, Ret, Args...)(S s, R r, Ret delegate(Args args) fun)
79 {
80 	auto m = s.match(r);
81 	if (m)
82 	{
83 		Args args;
84 		convertCaptures(m.captures, args);
85 		fun(args);
86 		return true;
87 	}
88 	return false;
89 }
90 
91 ///
92 unittest
93 {
94 	assert("Mary has 5 apples"
95 		.matchCaptures(`^(\w+) has (\d+) (\w+)$`,
96 			(string name, int count, string fruit)
97 			{
98 				assert(name == "Mary" && count == 5 && fruit == "apples");
99 			}
100 		)
101 	);
102 }
103 
104 /// Call a delegate over all matches.
105 size_t matchAllCaptures(S, R, Ret, Args...)(S s, R r, Ret delegate(Args args) fun)
106 {
107 	size_t matches;
108 	foreach (m; s.matchAll(r))
109 	{
110 		Args args;
111 		convertCaptures(m.captures, args);
112 		fun(args);
113 		matches++;
114 	}
115 	return matches;
116 }
117 
118 /// Returns a range which extracts a capture from text.
119 template extractCaptures(T...)
120 {
121 	auto extractCaptures(S, R)(S s, R r)
122 	{
123 		return s.matchAll(r).map!(
124 			(m)
125 			{
126 				static if (T.length == 1)
127 					return m.captures[1].to!T;
128 				else
129 				{
130 					Tuple!T r;
131 					foreach (n, TT; T)
132 						r[n] = m.captures[1+n].to!TT;
133 					return r;
134 				}
135 			});
136 	}
137 }
138 
139 alias extractCapture = extractCaptures;
140 
141 auto extractCapture(S, R)(S s, R r)
142 if (isSomeString!S)
143 {
144 	alias x = .extractCaptures!S;
145 	return x(s, r);
146 }
147 
148 ///
149 unittest
150 {
151 	auto s = "One 2 three 42";
152 	auto r = `(\d+)`;
153 	assert(s.extractCapture    (r).equal(["2", "42"]));
154 	assert(s.extractCapture!int(r).equal([ 2 ,  42 ]));
155 }
156 
157 ///
158 unittest
159 {
160 	auto s = "2.3 4.56 78.9";
161 	auto r = `(\d+)\.(\d+)`;
162 	assert(s.extractCapture!(int, int)(r).equal([tuple(2, 3), tuple(4, 56), tuple(78, 9)]));
163 }
164 
165 // ************************************************************************
166 
167 /// Take a string, and return a regular expression that matches that string
168 /// exactly (escape RE metacharacters).
169 string escapeRE(string s)
170 {
171 	// TODO: test
172 
173 	string result;
174 	foreach (c; s)
175 		switch (c)
176 		{
177 		//	case '!':
178 		//	case '"':
179 		//	case '#':
180 			case '$':
181 		//	case '%':
182 		//	case '&':
183 			case '\'':
184 			case '(':
185 			case ')':
186 			case '*':
187 			case '+':
188 		//	case ',':
189 		//	case '-':
190 			case '.':
191 			case '/':
192 		//	case ':':
193 		//	case ';':
194 		//	case '<':
195 		//	case '=':
196 		//	case '>':
197 			case '?':
198 		//	case '@':
199 			case '[':
200 			case '\\':
201 			case ']':
202 			case '^':
203 		//	case '_':
204 		//	case '`':
205 			case '{':
206 			case '|':
207 			case '}':
208 		//	case '~':
209 				result ~= '\\';
210 				goto default;
211 			default:
212 				result ~= c;
213 		}
214 	return result;
215 }
216 
217 // We only need to make sure that there are no unescaped forward slashes
218 // in the regex, which would mean the end of the search pattern part of the
219 // regex transform. All escaped forward slashes will be unescaped during
220 // parsing of the regex transform (which won't affect the regex, as forward
221 // slashes have no special meaning, escaped or unescaped).
222 private string escapeUnescapedSlashes(string s)
223 {
224 	bool escaped = false;
225 	string result;
226 	foreach (c; s)
227 	{
228 		if (escaped)
229 			escaped = false;
230 		else
231 		if (c == '\\')
232 			escaped = true;
233 		else
234 		if (c == '/')
235 			result ~= '\\';
236 
237 		result ~= c;
238 	}
239 	assert(!escaped, "Regex ends with an escape");
240 	return result;
241 }
242 
243 // For the replacement part, we just need to escape all forward and backslashes.
244 private string escapeSlashes(string s)
245 {
246 	return s.fastReplace(`\`, `\\`).fastReplace(`/`, `\/`);
247 }
248 
249 // Reverse of the above
250 private string unescapeSlashes(string s)
251 {
252 	return s.fastReplace(`\/`, `/`).fastReplace(`\\`, `\`);
253 }
254 
255 /// Build a RE search-and-replace transform (as used by applyRE).
256 string buildReplaceTransformation(string search, string replacement, string flags)
257 {
258 	return "s/" ~ escapeUnescapedSlashes(search) ~ "/" ~ escapeSlashes(replacement) ~ "/" ~ flags;
259 }
260 
261 private string[] splitRETransformation(string t)
262 {
263 	enforce(t.length >= 2, "Bad transformation");
264 	string[] result = [t[0..1]];
265 	auto boundary = t[1];
266 	t = t[2..$];
267 	size_t start = 0;
268 	bool escaped = false;
269 	foreach (i, c; t)
270 		if (escaped)
271 			escaped = false;
272 		else
273 		if (c=='\\')
274 			escaped = true;
275 		else
276 		if (c == boundary)
277 		{
278 			result ~= t[start..i];
279 			start = i+1;
280 		}
281 	result ~= t[start..$];
282 	return result;
283 }
284 
285 unittest
286 {
287 	assert(splitRETransformation("s/from/to/") == ["s", "from", "to", ""]);
288 }
289 
290 /// Apply regex transformation (in the form of "s/FROM/TO/FLAGS") to a string.
291 string applyRE()(string str, string transformation)
292 {
293 	import std.regex;
294 	auto params = splitRETransformation(transformation);
295 	enforce(params[0] == "s", "Unsupported regex transformation");
296 	enforce(params.length == 4, "Wrong number of regex transformation parameters");
297 	auto r = regex(params[1], params[3]);
298 	return replace(str, r, unescapeSlashes(params[2]));
299 }
300 
301 unittest
302 {
303 	auto transformation = buildReplaceTransformation(`(?<=\d)(?=(\d\d\d)+\b)`, `,`, "g");
304 	assert("12000 + 42100 = 54100".applyRE(transformation) == "12,000 + 42,100 = 54,100");
305 
306 	void testSlashes(string s)
307 	{
308 		assert(s.applyRE(buildReplaceTransformation(`\/`, `\`, "g")) == s.fastReplace(`/`, `\`));
309 		assert(s.applyRE(buildReplaceTransformation(`\\`, `/`, "g")) == s.fastReplace(`\`, `/`));
310 	}
311 	testSlashes(`a/b\c`);
312 	testSlashes(`a//b\\c`);
313 	testSlashes(`a/\b\/c`);
314 	testSlashes(`a/\\b\//c`);
315 	testSlashes(`a//\b\\/c`);
316 }
317 
318 // ************************************************************************