1 /** 2 * std.regex helpers 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <vladimir@thecybershadow.net> 12 */ 13 14 module ae.utils.regex; 15 16 import std.algorithm; 17 import std.conv; 18 import std.exception; 19 import std.regex; 20 import std.string; 21 22 import ae.utils.text; 23 24 // ************************************************************************ 25 26 /// Allows specifying regular expression patterns in expressions, 27 /// without having to compile them each time. 28 /// Example: 29 /// if (text.match(`^\d+$`)) {} // old code - recompiles every time 30 /// if (text.match(re!`^\d+$`)) {} // new code - recompiles once 31 32 Regex!char re(string pattern, alias flags = [])() 33 { 34 static Regex!char r; 35 if (r.empty) 36 r = regex(pattern, flags); 37 return r; 38 } 39 40 unittest 41 { 42 assert( "123".match(re!`^\d+$`)); 43 assert(!"abc".match(re!`^\d+$`)); 44 } 45 46 void convertCaptures(C, T...)(C captures, out T values) 47 { 48 assert(values.length == captures.length-1, "Capture group count mismatch: %s arguments / %s capture groups".format(values.length, captures.length-1)); 49 foreach (n, ref value; values) 50 value = to!(T[n])(captures[n+1]); 51 } 52 53 /// Lua-like pattern matching. 54 bool matchInto(S, R, Args...)(S s, R r, ref Args args) 55 { 56 auto m = s.match(r); 57 if (m) 58 { 59 convertCaptures(m.captures, args); 60 return true; 61 } 62 return false; 63 } 64 65 /// 66 unittest 67 { 68 string name, fruit; 69 int count; 70 assert("Mary has 5 apples" 71 .matchInto(`^(\w+) has (\d+) (\w+)$`, name, count, fruit)); 72 assert(name == "Mary" && count == 5 && fruit == "apples"); 73 } 74 75 /// Match into a delegate. 76 bool matchCaptures(S, R, Ret, Args...)(S s, R r, Ret delegate(Args args) fun) 77 { 78 auto m = s.match(r); 79 if (m) 80 { 81 Args args; 82 convertCaptures(m.captures, args); 83 fun(args); 84 return true; 85 } 86 return false; 87 } 88 89 /// 90 unittest 91 { 92 assert("Mary has 5 apples" 93 .matchCaptures(`^(\w+) has (\d+) (\w+)$`, 94 (string name, int count, string fruit) 95 { 96 assert(name == "Mary" && count == 5 && fruit == "apples"); 97 } 98 ) 99 ); 100 } 101 102 /// Call a delegate over all matches. 103 size_t matchAllCaptures(S, R, Ret, Args...)(S s, R r, Ret delegate(Args args) fun) 104 { 105 size_t matches; 106 foreach (m; s.matchAll(r)) 107 { 108 Args args; 109 convertCaptures(m.captures, args); 110 fun(args); 111 matches++; 112 } 113 return matches; 114 } 115 116 /// Returns a range which extracts a capture from text. 117 template extractCapture(T) 118 { 119 auto extractCapture(S, R)(S s, R r) 120 { 121 return s.matchAll(r).map!(m => m.captures[1].to!T); 122 } 123 } 124 125 auto extractCapture(S, R)(S s, R r) { alias x = extractCapture!S; return x(s, r); } 126 127 /// 128 unittest 129 { 130 auto s = "One 2 three 42"; 131 auto r = `(\d+)`; 132 assert(s.extractCapture (r).equal(["2", "42"])); 133 assert(s.extractCapture!int(r).equal([ 2 , 42 ])); 134 } 135 136 // ************************************************************************ 137 138 /// Take a string, and return a regular expression that matches that string 139 /// exactly (escape RE metacharacters). 140 string escapeRE(string s) 141 { 142 // TODO: test 143 144 string result; 145 foreach (c; s) 146 switch (c) 147 { 148 // case '!': 149 // case '"': 150 // case '#': 151 case '$': 152 // case '%': 153 // case '&': 154 case '\'': 155 case '(': 156 case ')': 157 case '*': 158 case '+': 159 // case ',': 160 // case '-': 161 case '.': 162 case '/': 163 // case ':': 164 // case ';': 165 // case '<': 166 // case '=': 167 // case '>': 168 case '?': 169 // case '@': 170 case '[': 171 case '\\': 172 case ']': 173 case '^': 174 // case '_': 175 // case '`': 176 case '{': 177 case '|': 178 case '}': 179 // case '~': 180 result ~= '\\'; 181 goto default; 182 default: 183 result ~= c; 184 } 185 return result; 186 } 187 188 // We only need to make sure that there are no unescaped forward slashes 189 // in the regex, which would mean the end of the search pattern part of the 190 // regex transform. All escaped forward slashes will be unescaped during 191 // parsing of the regex transform (which won't affect the regex, as forward 192 // slashes have no special meaning, escaped or unescaped). 193 private string escapeUnescapedSlashes(string s) 194 { 195 bool escaped = false; 196 string result; 197 foreach (c; s) 198 { 199 if (escaped) 200 escaped = false; 201 else 202 if (c == '\\') 203 escaped = true; 204 else 205 if (c == '/') 206 result ~= '\\'; 207 208 result ~= c; 209 } 210 assert(!escaped, "Regex ends with an escape"); 211 return result; 212 } 213 214 // For the replacement part, we just need to escape all forward and backslashes. 215 private string escapeSlashes(string s) 216 { 217 return s.fastReplace(`\`, `\\`).fastReplace(`/`, `\/`); 218 } 219 220 // Reverse of the above 221 private string unescapeSlashes(string s) 222 { 223 return s.fastReplace(`\/`, `/`).fastReplace(`\\`, `\`); 224 } 225 226 /// Build a RE search-and-replace transform (as used by applyRE). 227 string buildReplaceTransformation(string search, string replacement, string flags) 228 { 229 return "s/" ~ escapeUnescapedSlashes(search) ~ "/" ~ escapeSlashes(replacement) ~ "/" ~ flags; 230 } 231 232 private string[] splitRETransformation(string t) 233 { 234 enforce(t.length >= 2, "Bad transformation"); 235 string[] result = [t[0..1]]; 236 auto boundary = t[1]; 237 t = t[2..$]; 238 size_t start = 0; 239 bool escaped = false; 240 foreach (i, c; t) 241 if (escaped) 242 escaped = false; 243 else 244 if (c=='\\') 245 escaped = true; 246 else 247 if (c == boundary) 248 { 249 result ~= t[start..i]; 250 start = i+1; 251 } 252 result ~= t[start..$]; 253 return result; 254 } 255 256 unittest 257 { 258 assert(splitRETransformation("s/from/to/") == ["s", "from", "to", ""]); 259 } 260 261 /// Apply regex transformation (in the form of "s/FROM/TO/FLAGS") to a string. 262 string applyRE()(string str, string transformation) 263 { 264 import std.regex; 265 auto params = splitRETransformation(transformation); 266 enforce(params[0] == "s", "Unsupported regex transformation"); 267 enforce(params.length == 4, "Wrong number of regex transformation parameters"); 268 auto r = regex(params[1], params[3]); 269 return replace(str, r, unescapeSlashes(params[2])); 270 } 271 272 unittest 273 { 274 auto transformation = buildReplaceTransformation(`(?<=\d)(?=(\d\d\d)+\b)`, `,`, "g"); 275 assert("12000 + 42100 = 54100".applyRE(transformation) == "12,000 + 42,100 = 54,100"); 276 277 void testSlashes(string s) 278 { 279 assert(s.applyRE(buildReplaceTransformation(`\/`, `\`, "g")) == s.fastReplace(`/`, `\`)); 280 assert(s.applyRE(buildReplaceTransformation(`\\`, `/`, "g")) == s.fastReplace(`\`, `/`)); 281 } 282 testSlashes(`a/b\c`); 283 testSlashes(`a//b\\c`); 284 testSlashes(`a/\b\/c`); 285 testSlashes(`a/\\b\//c`); 286 testSlashes(`a//\b\\/c`); 287 } 288 289 // ************************************************************************