1 /** 2 * std.regex helpers 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <ae@cy.md> 12 */ 13 14 module ae.utils.regex; 15 16 import std.algorithm; 17 import std.conv; 18 import std.exception; 19 import std.regex; 20 import std.string; 21 import std.traits; 22 import std.typecons; 23 24 import ae.utils.text; 25 26 // ************************************************************************ 27 28 /// Allows specifying regular expression patterns in expressions, 29 /// without having to compile them each time. 30 /// Example: 31 /// if (text.match(`^\d+$`)) {} // old code - recompiles every time 32 /// if (text.match(re!`^\d+$`)) {} // new code - recompiles once 33 34 Regex!char re(string pattern, alias flags = [])() 35 { 36 static Regex!char r; 37 if (r.empty) 38 r = regex(pattern, flags); 39 return r; 40 } 41 42 unittest 43 { 44 assert( "123".match(re!`^\d+$`)); 45 assert(!"abc".match(re!`^\d+$`)); 46 } 47 48 private void convertCaptures(C, T...)(C captures, out T values) 49 { 50 assert(values.length == captures.length-1, "Capture group count mismatch: %s arguments / %s capture groups".format(values.length, captures.length-1)); 51 foreach (n, ref value; values) 52 value = to!(T[n])(captures[n+1]); 53 } 54 55 /// Lua-like pattern matching. 56 bool matchInto(S, R, Args...)(S s, R r, ref Args args) 57 { 58 auto m = s.match(r); 59 if (m) 60 { 61 convertCaptures(m.captures, args); 62 return true; 63 } 64 return false; 65 } 66 67 /// 68 unittest 69 { 70 string name, fruit; 71 int count; 72 assert("Mary has 5 apples" 73 .matchInto(`^(\w+) has (\d+) (\w+)$`, name, count, fruit)); 74 assert(name == "Mary" && count == 5 && fruit == "apples"); 75 } 76 77 /// Match into a delegate. 78 bool matchCaptures(S, R, Ret, Args...)(S s, R r, Ret delegate(Args args) fun) 79 { 80 auto m = s.match(r); 81 if (m) 82 { 83 Args args; 84 convertCaptures(m.captures, args); 85 fun(args); 86 return true; 87 } 88 return false; 89 } 90 91 /// 92 unittest 93 { 94 assert("Mary has 5 apples" 95 .matchCaptures(`^(\w+) has (\d+) (\w+)$`, 96 (string name, int count, string fruit) 97 { 98 assert(name == "Mary" && count == 5 && fruit == "apples"); 99 } 100 ) 101 ); 102 } 103 104 /// Call a delegate over all matches. 105 size_t matchAllCaptures(S, R, Ret, Args...)(S s, R r, Ret delegate(Args args) fun) 106 { 107 size_t matches; 108 foreach (m; s.matchAll(r)) 109 { 110 Args args; 111 convertCaptures(m.captures, args); 112 fun(args); 113 matches++; 114 } 115 return matches; 116 } 117 118 /// Returns a range which extracts a capture from text. 119 template extractCaptures(T...) 120 { 121 auto extractCaptures(S, R)(S s, R r) 122 { 123 return s.matchAll(r).map!( 124 (m) 125 { 126 static if (T.length == 1) 127 return m.captures[1].to!T; 128 else 129 { 130 Tuple!T r; 131 foreach (n, TT; T) 132 r[n] = m.captures[1+n].to!TT; 133 return r; 134 } 135 }); 136 } 137 } 138 139 alias extractCapture = extractCaptures; /// ditto 140 141 auto extractCapture(S, R)(S s, R r) 142 if (isSomeString!S) 143 { 144 alias x = .extractCaptures!S; 145 return x(s, r); 146 } /// ditto 147 148 /// 149 unittest 150 { 151 auto s = "One 2 three 42"; 152 auto r = `(\d+)`; 153 assert(s.extractCapture (r).equal(["2", "42"])); 154 assert(s.extractCapture!int(r).equal([ 2 , 42 ])); 155 } 156 157 /// 158 unittest 159 { 160 auto s = "2.3 4.56 78.9"; 161 auto r = `(\d+)\.(\d+)`; 162 assert(s.extractCapture!(int, int)(r).equal([tuple(2, 3), tuple(4, 56), tuple(78, 9)])); 163 } 164 165 // ************************************************************************ 166 167 /// Take a string, and return a regular expression that matches that string 168 /// exactly (escape RE metacharacters). 169 string escapeRE(string s) 170 { 171 // TODO: test 172 173 string result; 174 foreach (c; s) 175 switch (c) 176 { 177 // case '!': 178 // case '"': 179 // case '#': 180 case '$': 181 // case '%': 182 // case '&': 183 case '\'': 184 case '(': 185 case ')': 186 case '*': 187 case '+': 188 // case ',': 189 // case '-': 190 case '.': 191 case '/': 192 // case ':': 193 // case ';': 194 // case '<': 195 // case '=': 196 // case '>': 197 case '?': 198 // case '@': 199 case '[': 200 case '\\': 201 case ']': 202 case '^': 203 // case '_': 204 // case '`': 205 case '{': 206 case '|': 207 case '}': 208 // case '~': 209 result ~= '\\'; 210 goto default; 211 default: 212 result ~= c; 213 } 214 return result; 215 } 216 217 // We only need to make sure that there are no unescaped forward slashes 218 // in the regex, which would mean the end of the search pattern part of the 219 // regex transform. All escaped forward slashes will be unescaped during 220 // parsing of the regex transform (which won't affect the regex, as forward 221 // slashes have no special meaning, escaped or unescaped). 222 private string escapeUnescapedSlashes(string s) 223 { 224 bool escaped = false; 225 string result; 226 foreach (c; s) 227 { 228 if (escaped) 229 escaped = false; 230 else 231 if (c == '\\') 232 escaped = true; 233 else 234 if (c == '/') 235 result ~= '\\'; 236 237 result ~= c; 238 } 239 assert(!escaped, "Regex ends with an escape"); 240 return result; 241 } 242 243 // For the replacement part, we just need to escape all forward and backslashes. 244 private string escapeSlashes(string s) 245 { 246 return s.fastReplace(`\`, `\\`).fastReplace(`/`, `\/`); 247 } 248 249 // Reverse of the above 250 private string unescapeSlashes(string s) 251 { 252 return s.fastReplace(`\/`, `/`).fastReplace(`\\`, `\`); 253 } 254 255 /// Build a RE search-and-replace transform (as used by applyRE). 256 string buildReplaceTransformation(string search, string replacement, string flags) 257 { 258 return "s/" ~ escapeUnescapedSlashes(search) ~ "/" ~ escapeSlashes(replacement) ~ "/" ~ flags; 259 } 260 261 private struct Transformation 262 { 263 enum Type 264 { 265 replace, 266 } 267 Type type; 268 269 struct Replace 270 { 271 string search, replacement, flags; 272 } 273 274 union 275 { 276 Replace replace; 277 } 278 } 279 280 private Transformation[] splitRETransformation(string s) 281 { 282 enforce(s.length >= 2, "Bad transformation"); 283 Transformation[] result; 284 while (s.length) 285 { 286 Transformation t; 287 switch (s[0]) 288 { 289 case 's': 290 { 291 t.type = Transformation.Type.replace; 292 s = s[1..$]; 293 294 auto boundary = s[0]; 295 s = s[1..$]; 296 297 string readString() 298 { 299 bool escaped = false; 300 foreach (i, c; s) 301 if (escaped) 302 escaped = false; 303 else 304 if (c=='\\') 305 escaped = true; 306 else 307 if (c == boundary) 308 { 309 auto result = s[0..i]; 310 s = s[i+1..$]; 311 return result; 312 } 313 throw new Exception("Unexpected end of regex replace transformation"); 314 } 315 316 t.replace.search = readString(); 317 t.replace.replacement = readString(); 318 foreach (i, c; s) 319 { 320 if (c == ';') 321 { 322 t.replace.flags = s[0..i]; 323 s = s[i+1..$]; 324 goto endOfReplace; 325 } 326 else 327 if (c == boundary) 328 throw new Exception("Too many regex replace transformation parameters"); 329 } 330 t.replace.flags = s; 331 s = null; 332 endOfReplace: 333 result ~= t; 334 break; 335 } 336 default: 337 throw new Exception("Unsupported regex transformation: " ~ s[0]); 338 } 339 } 340 return result; 341 } 342 343 unittest 344 { 345 auto actual = splitRETransformation("s/from/to/"); 346 Transformation expected = { type : Transformation.Type.replace, replace : { search : "from", replacement : "to", flags : "" } }; 347 assert(actual.length == 1 && actual[0].tupleof == expected.tupleof); 348 } 349 350 /// Apply sed-like regex transformation (in the form of "s/FROM/TO/FLAGS") to a string. 351 /// Multiple commands can be separated by ';'. 352 string applyRE()(string str, string transformation) 353 { 354 import std.regex; 355 auto transformations = splitRETransformation(transformation); 356 foreach (t; transformations) 357 final switch (t.type) 358 { 359 case Transformation.Type.replace: 360 { 361 auto r = regex(t.replace.search, t.replace.flags); 362 str = replace(str, r, unescapeSlashes(t.replace.replacement)); 363 } 364 } 365 return str; 366 } 367 368 unittest 369 { 370 auto transformation = buildReplaceTransformation(`(?<=\d)(?=(\d\d\d)+\b)`, `,`, "g"); 371 assert("12000 + 42100 = 54100".applyRE(transformation) == "12,000 + 42,100 = 54,100"); 372 373 void testSlashes(string s) 374 { 375 assert(s.applyRE(buildReplaceTransformation(`\/`, `\`, "g")) == s.fastReplace(`/`, `\`)); 376 assert(s.applyRE(buildReplaceTransformation(`\\`, `/`, "g")) == s.fastReplace(`\`, `/`)); 377 } 378 testSlashes(`a/b\c`); 379 testSlashes(`a//b\\c`); 380 testSlashes(`a/\b\/c`); 381 testSlashes(`a/\\b\//c`); 382 testSlashes(`a//\b\\/c`); 383 384 assert("babba".applyRE(`s/a/c/g;s/b/a/g;s/c/b/g`) == "abaab"); 385 } 386 387 // ************************************************************************