1 /** 2 * Time parsing functions. 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <ae@cy.md> 12 */ 13 14 module ae.utils.time.parse; 15 16 import core.stdc.time : time_t; 17 import core.time : minutes, seconds, dur; 18 19 import std.exception : enforce; 20 import std.conv : to; 21 import std.ascii : isDigit, isWhite; 22 import std.datetime; 23 import std.string : indexOf; 24 import std.string : strip, startsWith; 25 26 import ae.utils.time.common; 27 import ae.utils.time.types : AbsTime; 28 29 private struct ParseContext(Char, bool checked) 30 { 31 int year=0, month=1, day=1, hour=0, minute=0, second=0, nsecs=0; 32 int hour12 = 0; bool pm; 33 TimeZone tz_; 34 int dow = -1; 35 Char[] t; 36 bool escaping; 37 38 // CTFE-compatible alternative to Rebindable 39 @property immutable(TimeZone) tz() { return cast(immutable)tz_; } 40 @property void tz(immutable(TimeZone) tz) { tz_ = cast()tz; } 41 42 void need(size_t n)() 43 { 44 static if (checked) 45 enforce(t.length >= n, "Not enough characters in date string"); 46 } 47 48 auto take(size_t n)() 49 { 50 need!n(); 51 auto result = t[0..n]; 52 t = t[n..$]; 53 return result; 54 } 55 56 char takeOne() 57 { 58 need!1(); 59 auto result = t[0]; 60 t = t[1..$]; 61 return result; 62 } 63 64 R takeNumber(size_t n, sizediff_t maxP = -1, R = int)() 65 { 66 enum max = maxP == -1 ? n : maxP; 67 need!n(); 68 foreach (i, c; t[0..n]) 69 enforce((i==0 && c=='-') || isDigit(c) || isWhite(c), "Number expected"); 70 static if (n == max) 71 enum i = n; 72 else 73 { 74 auto i = n; 75 while (i < max && (checked ? i < t.length : true) && isDigit(t[i])) 76 i++; 77 } 78 auto s = t[0..i]; 79 t = t[i..$]; 80 return s.strip().to!R(); 81 } 82 83 int takeWord(in string[] words, string name) 84 { 85 foreach (idx, string word; words) 86 { 87 static if (checked) 88 bool b = t.startsWith(word); 89 else 90 bool b = t[0..word.length] == word; 91 if (b) 92 { 93 t = t[word.length..$]; 94 return cast(int)idx; 95 } 96 } 97 throw new Exception(name ~ " expected"); 98 } 99 100 char peek() 101 { 102 need!1(); 103 return *t.ptr; 104 } 105 } 106 107 private void parseToken(alias c, alias context)() 108 { 109 with (context) 110 { 111 // TODO: check if the compiler optimizes this check away 112 // in the compile-time version. If not, "escaping" needs to 113 // be moved into an alias parameter. 114 if (escaping) 115 { 116 enforce(takeOne() == c, c ~ " expected"); 117 escaping = false; 118 return; 119 } 120 121 switch (c) 122 { 123 // Day 124 case TimeFormatElement.dayOfMonthZeroPadded: 125 day = takeNumber!(2)(); 126 break; 127 case TimeFormatElement.dayOfWeekNameShort: 128 dow = takeWord(WeekdayShortNames, "Weekday"); 129 break; 130 case TimeFormatElement.dayOfMonth: 131 day = takeNumber!(1, 2); 132 break; 133 case TimeFormatElement.dayOfWeekName: 134 dow = takeWord(WeekdayLongNames, "Weekday"); 135 break; 136 case TimeFormatElement.dayOfWeekIndexISO8601: 137 dow = takeNumber!1 % 7; 138 break; 139 case TimeFormatElement.dayOfMonthOrdinalSuffix: // ordinal suffix 140 take!2; 141 break; 142 case TimeFormatElement.dayOfWeekIndex: 143 dow = takeNumber!1; 144 break; 145 //case TimeFormatElement.dayOfYear: TODO 146 147 // Week 148 //case TimeFormatElement.weekOfYear: TODO 149 150 // Month 151 case TimeFormatElement.monthName: 152 month = takeWord(MonthLongNames, "Month") + 1; 153 break; 154 case TimeFormatElement.monthZeroPadded: 155 month = takeNumber!2; 156 break; 157 case TimeFormatElement.monthNameShort: 158 month = takeWord(MonthShortNames, "Month") + 1; 159 break; 160 case TimeFormatElement.month: 161 month = takeNumber!(1, 2); 162 break; 163 case TimeFormatElement.daysInMonth: 164 takeNumber!(1, 2); // TODO: validate DIM? 165 break; 166 167 // Year 168 case TimeFormatElement.yearIsLeapYear: 169 takeNumber!1; // TODO: validate leapness? 170 break; 171 // case TimeFormatElement.yearForWeekNumbering: TODO (ISO 8601 year number) 172 case TimeFormatElement.year: 173 year = takeNumber!4; 174 break; 175 case TimeFormatElement.yearOfCentury: 176 year = takeNumber!2; 177 if (year > 50) // TODO: find correct logic for this 178 year += 1900; 179 else 180 year += 2000; 181 break; 182 183 // Time 184 case TimeFormatElement.ampmLower: 185 pm = takeWord(["am", "pm"], "am/pm")==1; 186 break; 187 case TimeFormatElement.ampmUpper: 188 pm = takeWord(["AM", "PM"], "AM/PM")==1; 189 break; 190 // case TimeFormatElement.swatchInternetTime: TODO (Swatch Internet time) 191 case TimeFormatElement.hour12: 192 hour12 = takeNumber!(1, 2); 193 break; 194 case TimeFormatElement.hour: 195 hour = takeNumber!(1, 2); 196 break; 197 case TimeFormatElement.hour12ZeroPadded: 198 hour12 = takeNumber!2; 199 break; 200 case TimeFormatElement.hourZeroPadded: 201 hour = takeNumber!2; 202 break; 203 case TimeFormatElement.minute: 204 minute = takeNumber!2; 205 break; 206 case TimeFormatElement.second: 207 second = takeNumber!2; 208 break; 209 case TimeFormatElement.milliseconds: 210 case TimeFormatElement.millisecondsAlt: // not standard 211 nsecs = takeNumber!3 * 1_000_000; 212 break; 213 case TimeFormatElement.microseconds: 214 nsecs = takeNumber!6 * 1_000; 215 break; 216 case TimeFormatElement.nanoseconds: // not standard 217 nsecs = takeNumber!9; 218 break; 219 220 // Timezone 221 // case TimeFormatElement.timezoneName: ??? 222 case TimeFormatElement.isDST: 223 takeNumber!1; 224 break; 225 case TimeFormatElement.timezoneOffsetWithoutColon: 226 { 227 if (peek() == 'Z') 228 { 229 t = t[1..$]; 230 tz = UTC(); 231 } 232 else 233 if (peek() == 'G') 234 { 235 enforce(take!3() == "GMT", "GMT expected"); 236 tz = UTC(); 237 } 238 else 239 { 240 auto tzStr = take!5(); 241 enforce(tzStr[0]=='-' || tzStr[0]=='+', "- / + expected"); 242 auto n = (to!int(tzStr[1..3]) * 60 + to!int(tzStr[3..5])) * (tzStr[0]=='-' ? -1 : 1); 243 tz = new immutable(SimpleTimeZone)(minutes(n)); 244 } 245 break; 246 } 247 case TimeFormatElement.timezoneOffsetWithColon: 248 { 249 auto tzStr = take!6(); 250 enforce(tzStr[0]=='-' || tzStr[0]=='+', "- / + expected"); 251 enforce(tzStr[3]==':', ": expected"); 252 auto n = (to!int(tzStr[1..3]) * 60 + to!int(tzStr[4..6])) * (tzStr[0]=='-' ? -1 : 1); 253 tz = new immutable(SimpleTimeZone)(minutes(n)); 254 break; 255 } 256 case TimeFormatElement.timezoneAbbreviation: 257 version(Posix) 258 tz = PosixTimeZone.getTimeZone(t.idup); 259 else 260 version(Windows) 261 tz = WindowsTimeZone.getTimeZone(t.idup); 262 263 t = null; 264 break; 265 case TimeFormatElement.timezoneOffsetSeconds: 266 { 267 // TODO: is this correct? 268 auto n = takeNumber!(1, 6); 269 tz = new immutable(SimpleTimeZone)(seconds(n)); 270 break; 271 } 272 273 // Full date/time 274 //case TimeFormatElement.dateTimeISO8601: TODO 275 //case TimeFormatElement.dateTimeRFC2822: TODO 276 case TimeFormatElement.dateTimeUNIX: 277 { 278 auto unixTime = takeNumber!(1, 20, time_t); 279 auto d = SysTime.fromUnixTime(unixTime, UTC()).to!DateTime; 280 year = d.year; 281 month = d.month; 282 day = d.day; 283 hour = d.hour; 284 minute = d.minute; 285 second = d.second; 286 break; 287 } 288 289 // Escape next character 290 case TimeFormatElement.escapeNextCharacter: 291 escaping = true; 292 break; 293 294 // Other characters (whitespace, delimiters) 295 default: 296 { 297 enforce(t.length && t[0]==c, c~ " expected or unsupported format character"); 298 t = t[1..$]; 299 } 300 } 301 } 302 } 303 304 import ae.utils.meta; 305 306 private T parseTimeImpl(alias fmt, T, bool checked, C)(C[] t, immutable TimeZone defaultTZ = null) 307 { 308 ParseContext!(C, checked) context; 309 context.t = t; 310 context.tz = defaultTZ; 311 if (__ctfe && context.tz is null) 312 context.tz = UTC(); 313 314 foreach (c; CTIterate!fmt) 315 parseToken!(c, context)(); 316 317 enforce(context.t.length == 0, "Left-over characters: " ~ context.t); 318 319 with (context) 320 { 321 if (hour12) 322 hour = hour12 % 12 + (pm ? 12 : 0); 323 324 static if (is(T == SysTime)) 325 { 326 // Compatibility with both <=2.066 and >=2.067 327 static if (__traits(hasMember, SysTime, "fracSecs")) 328 auto frac = dur!"nsecs"(nsecs); 329 else 330 auto frac = FracSec.from!"hnsecs"(nsecs / 100); 331 332 SysTime result = SysTime( 333 DateTime(year, month, day, hour, minute, second), 334 frac, 335 tz); 336 337 if (dow >= 0 && !__ctfe) 338 enforce(result.dayOfWeek == dow, "Mismatching weekday"); 339 340 return result; 341 } 342 else 343 static if (is(T == AbsTime)) 344 { 345 auto frac = dur!"nsecs"(nsecs); 346 347 auto dt = DateTime(year, month, day, hour, minute, second); 348 AbsTime result = AbsTime(dt, frac); 349 350 if (dow >= 0 && !__ctfe) 351 enforce(dt.dayOfWeek == dow, "Mismatching weekday"); 352 353 return result; 354 } 355 else 356 static if (is(T == Date)) 357 { 358 enforce(defaultTZ is null, "Date has no concept of time zone"); 359 return Date(year, month, day); 360 } 361 else 362 static if (is(T == TimeOfDay)) 363 { 364 enforce(defaultTZ is null, "TimeOfDay has no concept of time zone"); 365 return TimeOfDay(hour, minute, second); 366 } 367 else 368 static if (is(T == DateTime)) 369 { 370 enforce(defaultTZ is null, "DateTime has no concept of time zone"); 371 return DateTime(year, month, day, hour, minute, second); 372 } 373 } 374 } 375 376 /*private*/ template parseTimeLike(T) 377 { 378 // Compile-time format string parsing 379 /*private*/ T parseTimeLike(string fmt, C)(C[] str, immutable TimeZone tz = null) 380 { 381 // Omit length checks if we know the input string is long enough 382 enum maxLength = timeFormatSize(fmt); 383 if (str.length < maxLength) 384 return parseTimeImpl!(fmt, T, true )(str, tz); 385 else 386 return parseTimeImpl!(fmt, T, false)(str, tz); 387 } 388 389 // Run-time format string parsing 390 // Deprecated because the argument order is confusing for UFCS; 391 // use the parseTimeLikeUsing aliases instead. 392 /*private*/ deprecated T parseTimeLike(C)(in char[] fmt, C[] str, immutable TimeZone tz = null) 393 { 394 return parseTimeImpl!(fmt, T, true)(str, tz); 395 } 396 } 397 398 /*private*/ template parseTimeLikeUsing(T) 399 { 400 // Run-time format string parsing 401 /*private*/ T parseTimeLikeUsing(C)(C[] str, in char[] fmt, immutable TimeZone tz = null) 402 { 403 return parseTimeImpl!(fmt, T, true)(str, tz); 404 } 405 } 406 407 /// Parse the given string into a SysTime, using the format spec fmt. 408 /// This version generates specialized code for the given fmt. 409 alias parseTime = parseTimeLike!SysTime; 410 411 /// Parse the given string into a SysTime, using the format spec fmt. 412 /// This version parses fmt at runtime. 413 alias parseTimeUsing = parseTimeLikeUsing!SysTime; 414 415 version(unittest) import ae.utils.time.format; 416 417 unittest 418 { 419 const s0 = "Tue Jun 07 13:23:19 GMT+0100 2011"; 420 //enum t = s0.parseTime!(TimeFormats.STD_DATE); // https://issues.dlang.org/show_bug.cgi?id=12042 421 auto t = s0.parseTime!(TimeFormats.STD_DATE); 422 auto s1 = t.formatTime(TimeFormats.STD_DATE); 423 assert(s0 == s1, s0 ~ "/" ~ s1); 424 auto t1 = s0.parseTimeUsing(TimeFormats.STD_DATE); 425 assert(t == t1); 426 } 427 428 unittest 429 { 430 "Tue, 21 Nov 2006 21:19:46 +0000".parseTime!(TimeFormats.RFC2822); 431 "Tue, 21 Nov 2006 21:19:46 +0000".parseTimeUsing(TimeFormats.RFC2822); 432 } 433 434 unittest 435 { 436 const char[] s = "Tue, 21 Nov 2006 21:19:46 +0000"; 437 auto d = s.parseTime!(TimeFormats.RFC2822); 438 assert(d.stdTime == d.formatTime!"U".parseTime!"U".stdTime); 439 } 440 441 /// 442 unittest 443 { 444 enum buildTime = __TIMESTAMP__.parseTime!(TimeFormats.CTIME).stdTime; 445 } 446 447 /// Parse log timestamps generated by `ae.sys.log`, 448 /// including all previous versions of it. 449 SysTime parseLogTimestamp(string s) 450 { 451 enforce(s.length, "Empty line"); 452 453 if (s[0] == '[') // Input is an entire line 454 { 455 auto i = s.indexOf(']'); 456 enforce(i > 0, "Unmatched ["); 457 s = s[1..i]; 458 } 459 460 switch (s.length) 461 { 462 case 33: // Fri Jun 29 05:44:13 GMT+0300 2007 463 return s.parseTime!(TimeFormats.STD_DATE)(UTC()); 464 case 23: 465 if (s[4] == '.') // 2015.02.24 21:03:01.868 466 return s.parseTime!"Y.m.d H:i:s.E"(UTC()); 467 else // 2015-11-04 00:00:45.964 468 return s.parseTime!"Y-m-d H:i:s.E"(UTC()); 469 case 26: // 2015-11-04 00:00:45.964983 470 return s.parseTime!"Y-m-d H:i:s.u"(UTC()); 471 default: 472 throw new Exception("Unknown log timestamp format: " ~ s); 473 } 474 } 475 476 /// Parse the given string into a DateTime, using the format spec fmt. 477 /// This version generates specialized code for the given fmt. 478 /// Fields which are not representable in a DateTime, such as timezone 479 /// or milliseconds, are parsed but silently discarded. 480 alias parseDateTime = parseTimeLike!DateTime; 481 482 /// Parse the given string into a DateTime, using the format spec fmt. 483 /// This version parses fmt at runtime. 484 /// Fields which are not representable in a DateTime, such as timezone 485 /// or milliseconds, are parsed but silently discarded. 486 alias parseDateTimeUsing = parseTimeLikeUsing!DateTime; 487 488 unittest 489 { 490 const char[] s = "Tue, 21 Nov 2006 21:19:46 +0000"; 491 auto d = s.parseDateTime!(TimeFormats.RFC2822); 492 assert(d.year == 2006 && d.second == 46); 493 } 494 495 /// Parse the given string into a Date, using the format spec fmt. 496 /// This version generates specialized code for the given fmt. 497 /// Fields which are not representable in a Date, such as timezone 498 /// or time of day, are parsed but silently discarded. 499 alias parseDate = parseTimeLike!Date; 500 501 /// Parse the given string into a Date, using the format spec fmt. 502 /// This version parses fmt at runtime. 503 /// Fields which are not representable in a Date, such as timezone 504 /// or time of day, are parsed but silently discarded. 505 alias parseDateUsing = parseTimeLikeUsing!Date; 506 507 unittest 508 { 509 const char[] s = "Tue, 21 Nov 2006 21:19:46 +0000"; 510 auto d = s.parseDate!(TimeFormats.RFC2822); 511 assert(d.year == 2006 && d.month == Month.nov); 512 } 513 514 /// Parse the given string into a TimeOfDay, using the format spec fmt. 515 /// This version generates specialized code for the given fmt. 516 /// Fields which are not representable in a TimeOfDay, such as 517 /// year/month/day or timezone, are parsed but silently discarded. 518 alias parseTimeOfDay = parseTimeLike!TimeOfDay; 519 520 /// Parse the given string into a TimeOfDay, using the format spec fmt. 521 /// This version parses fmt at runtime. 522 /// Fields which are not representable in a TimeOfDay, such as 523 /// year/month/day or timezone, are parsed but silently discarded. 524 alias parseTimeOfDayUsing = parseTimeLikeUsing!TimeOfDay; 525 526 unittest 527 { 528 const char[] s = "Tue, 21 Nov 2006 21:19:46 +0000"; 529 auto d = s.parseTimeOfDay!(TimeFormats.RFC2822); 530 assert(d.hour == 21 && d.second == 46); 531 } 532 533 /// Parse the given string into an AbsTime, using the format spec fmt. 534 /// This version generates specialized code for the given fmt. 535 /// Fields which are not representable in an AbsTime, such as timezone, 536 /// are parsed but silently discarded. 537 alias parseAbsTime = parseTimeLike!AbsTime; 538 539 /// Parse the given string into an AbsTime, using the format spec fmt. 540 /// This version parses fmt at runtime. 541 /// Fields which are not representable in an AbsTime, such as timezone, 542 /// are parsed but silently discarded. 543 alias parseAbsTimeUsing = parseTimeLikeUsing!AbsTime; 544 545 unittest 546 { 547 const char[] s = "Tue, 21 Nov 2006 21:19:46 +0000"; 548 auto d = s.parseAbsTime!(TimeFormats.RFC2822); 549 assert(d.sysTime.year == 2006 && d.sysTime.second == 46); 550 }