1 /** 2 * std.regex helpers 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <vladimir@thecybershadow.net> 12 */ 13 14 module ae.utils.regex; 15 16 import std.algorithm; 17 import std.conv; 18 import std.exception; 19 import std.regex; 20 import std.string; 21 import std.traits; 22 import std.typecons; 23 24 import ae.utils.text; 25 26 // ************************************************************************ 27 28 /// Allows specifying regular expression patterns in expressions, 29 /// without having to compile them each time. 30 /// Example: 31 /// if (text.match(`^\d+$`)) {} // old code - recompiles every time 32 /// if (text.match(re!`^\d+$`)) {} // new code - recompiles once 33 34 Regex!char re(string pattern, alias flags = [])() 35 { 36 static Regex!char r; 37 if (r.empty) 38 r = regex(pattern, flags); 39 return r; 40 } 41 42 unittest 43 { 44 assert( "123".match(re!`^\d+$`)); 45 assert(!"abc".match(re!`^\d+$`)); 46 } 47 48 void convertCaptures(C, T...)(C captures, out T values) 49 { 50 assert(values.length == captures.length-1, "Capture group count mismatch: %s arguments / %s capture groups".format(values.length, captures.length-1)); 51 foreach (n, ref value; values) 52 value = to!(T[n])(captures[n+1]); 53 } 54 55 /// Lua-like pattern matching. 56 bool matchInto(S, R, Args...)(S s, R r, ref Args args) 57 { 58 auto m = s.match(r); 59 if (m) 60 { 61 convertCaptures(m.captures, args); 62 return true; 63 } 64 return false; 65 } 66 67 /// 68 unittest 69 { 70 string name, fruit; 71 int count; 72 assert("Mary has 5 apples" 73 .matchInto(`^(\w+) has (\d+) (\w+)$`, name, count, fruit)); 74 assert(name == "Mary" && count == 5 && fruit == "apples"); 75 } 76 77 /// Match into a delegate. 78 bool matchCaptures(S, R, Ret, Args...)(S s, R r, Ret delegate(Args args) fun) 79 { 80 auto m = s.match(r); 81 if (m) 82 { 83 Args args; 84 convertCaptures(m.captures, args); 85 fun(args); 86 return true; 87 } 88 return false; 89 } 90 91 /// 92 unittest 93 { 94 assert("Mary has 5 apples" 95 .matchCaptures(`^(\w+) has (\d+) (\w+)$`, 96 (string name, int count, string fruit) 97 { 98 assert(name == "Mary" && count == 5 && fruit == "apples"); 99 } 100 ) 101 ); 102 } 103 104 /// Call a delegate over all matches. 105 size_t matchAllCaptures(S, R, Ret, Args...)(S s, R r, Ret delegate(Args args) fun) 106 { 107 size_t matches; 108 foreach (m; s.matchAll(r)) 109 { 110 Args args; 111 convertCaptures(m.captures, args); 112 fun(args); 113 matches++; 114 } 115 return matches; 116 } 117 118 /// Returns a range which extracts a capture from text. 119 template extractCaptures(T...) 120 { 121 auto extractCaptures(S, R)(S s, R r) 122 { 123 return s.matchAll(r).map!( 124 (m) 125 { 126 static if (T.length == 1) 127 return m.captures[1].to!T; 128 else 129 { 130 Tuple!T r; 131 foreach (n, TT; T) 132 r[n] = m.captures[1+n].to!TT; 133 return r; 134 } 135 }); 136 } 137 } 138 139 alias extractCapture = extractCaptures; 140 141 auto extractCapture(S, R)(S s, R r) 142 if (isSomeString!S) 143 { 144 alias x = .extractCaptures!S; 145 return x(s, r); 146 } 147 148 /// 149 unittest 150 { 151 auto s = "One 2 three 42"; 152 auto r = `(\d+)`; 153 assert(s.extractCapture (r).equal(["2", "42"])); 154 assert(s.extractCapture!int(r).equal([ 2 , 42 ])); 155 } 156 157 /// 158 unittest 159 { 160 auto s = "2.3 4.56 78.9"; 161 auto r = `(\d+)\.(\d+)`; 162 assert(s.extractCapture!(int, int)(r).equal([tuple(2, 3), tuple(4, 56), tuple(78, 9)])); 163 } 164 165 // ************************************************************************ 166 167 /// Take a string, and return a regular expression that matches that string 168 /// exactly (escape RE metacharacters). 169 string escapeRE(string s) 170 { 171 // TODO: test 172 173 string result; 174 foreach (c; s) 175 switch (c) 176 { 177 // case '!': 178 // case '"': 179 // case '#': 180 case '$': 181 // case '%': 182 // case '&': 183 case '\'': 184 case '(': 185 case ')': 186 case '*': 187 case '+': 188 // case ',': 189 // case '-': 190 case '.': 191 case '/': 192 // case ':': 193 // case ';': 194 // case '<': 195 // case '=': 196 // case '>': 197 case '?': 198 // case '@': 199 case '[': 200 case '\\': 201 case ']': 202 case '^': 203 // case '_': 204 // case '`': 205 case '{': 206 case '|': 207 case '}': 208 // case '~': 209 result ~= '\\'; 210 goto default; 211 default: 212 result ~= c; 213 } 214 return result; 215 } 216 217 // We only need to make sure that there are no unescaped forward slashes 218 // in the regex, which would mean the end of the search pattern part of the 219 // regex transform. All escaped forward slashes will be unescaped during 220 // parsing of the regex transform (which won't affect the regex, as forward 221 // slashes have no special meaning, escaped or unescaped). 222 private string escapeUnescapedSlashes(string s) 223 { 224 bool escaped = false; 225 string result; 226 foreach (c; s) 227 { 228 if (escaped) 229 escaped = false; 230 else 231 if (c == '\\') 232 escaped = true; 233 else 234 if (c == '/') 235 result ~= '\\'; 236 237 result ~= c; 238 } 239 assert(!escaped, "Regex ends with an escape"); 240 return result; 241 } 242 243 // For the replacement part, we just need to escape all forward and backslashes. 244 private string escapeSlashes(string s) 245 { 246 return s.fastReplace(`\`, `\\`).fastReplace(`/`, `\/`); 247 } 248 249 // Reverse of the above 250 private string unescapeSlashes(string s) 251 { 252 return s.fastReplace(`\/`, `/`).fastReplace(`\\`, `\`); 253 } 254 255 /// Build a RE search-and-replace transform (as used by applyRE). 256 string buildReplaceTransformation(string search, string replacement, string flags) 257 { 258 return "s/" ~ escapeUnescapedSlashes(search) ~ "/" ~ escapeSlashes(replacement) ~ "/" ~ flags; 259 } 260 261 private string[] splitRETransformation(string t) 262 { 263 enforce(t.length >= 2, "Bad transformation"); 264 string[] result = [t[0..1]]; 265 auto boundary = t[1]; 266 t = t[2..$]; 267 size_t start = 0; 268 bool escaped = false; 269 foreach (i, c; t) 270 if (escaped) 271 escaped = false; 272 else 273 if (c=='\\') 274 escaped = true; 275 else 276 if (c == boundary) 277 { 278 result ~= t[start..i]; 279 start = i+1; 280 } 281 result ~= t[start..$]; 282 return result; 283 } 284 285 unittest 286 { 287 assert(splitRETransformation("s/from/to/") == ["s", "from", "to", ""]); 288 } 289 290 /// Apply regex transformation (in the form of "s/FROM/TO/FLAGS") to a string. 291 string applyRE()(string str, string transformation) 292 { 293 import std.regex; 294 auto params = splitRETransformation(transformation); 295 enforce(params[0] == "s", "Unsupported regex transformation"); 296 enforce(params.length == 4, "Wrong number of regex transformation parameters"); 297 auto r = regex(params[1], params[3]); 298 return replace(str, r, unescapeSlashes(params[2])); 299 } 300 301 unittest 302 { 303 auto transformation = buildReplaceTransformation(`(?<=\d)(?=(\d\d\d)+\b)`, `,`, "g"); 304 assert("12000 + 42100 = 54100".applyRE(transformation) == "12,000 + 42,100 = 54,100"); 305 306 void testSlashes(string s) 307 { 308 assert(s.applyRE(buildReplaceTransformation(`\/`, `\`, "g")) == s.fastReplace(`/`, `\`)); 309 assert(s.applyRE(buildReplaceTransformation(`\\`, `/`, "g")) == s.fastReplace(`\`, `/`)); 310 } 311 testSlashes(`a/b\c`); 312 testSlashes(`a//b\\c`); 313 testSlashes(`a/\b\/c`); 314 testSlashes(`a/\\b\//c`); 315 testSlashes(`a//\b\\/c`); 316 } 317 318 // ************************************************************************