1 /**
2  * Time parsing functions.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  */
13 
14 module ae.utils.time.parse;
15 
16 import core.stdc.time : time_t;
17 import core.time : minutes, seconds, dur;
18 
19 import std.exception : enforce;
20 import std.conv : to;
21 import std.ascii : isDigit, isWhite;
22 import std.datetime;
23 import std.string : indexOf;
24 import std.string : strip, startsWith;
25 
26 import ae.utils.time.common;
27 import ae.utils.time.types : AbsTime;
28 
29 private struct ParseContext(Char, bool checked)
30 {
31 	int year=0, month=1, day=1, hour=0, minute=0, second=0, nsecs=0;
32 	int hour12 = 0; bool pm;
33 	TimeZone tz_;
34 	int dow = -1;
35 	Char[] t;
36 	bool escaping;
37 
38 	// CTFE-compatible alternative to Rebindable
39 	@property immutable(TimeZone) tz() { return cast(immutable)tz_; }
40 	@property void tz(immutable(TimeZone) tz) { tz_ = cast()tz; }
41 
42 	void need(size_t n)()
43 	{
44 		static if (checked)
45 			enforce(t.length >= n, "Not enough characters in date string");
46 	}
47 
48 	auto take(size_t n)()
49 	{
50 		need!n();
51 		auto result = t[0..n];
52 		t = t[n..$];
53 		return result;
54 	}
55 
56 	char takeOne()
57 	{
58 		need!1();
59 		auto result = t[0];
60 		t = t[1..$];
61 		return result;
62 	}
63 
64 	R takeNumber(size_t n, sizediff_t maxP = -1, R = int)()
65 	{
66 		enum max = maxP == -1 ? n : maxP;
67 		need!n();
68 		foreach (i, c; t[0..n])
69 			enforce((i==0 && c=='-') || isDigit(c) || isWhite(c), "Number expected");
70 		static if (n == max)
71 			enum i = n;
72 		else
73 		{
74 			auto i = n;
75 			while (i < max && (checked ? i < t.length : true) && isDigit(t[i]))
76 				i++;
77 		}
78 		auto s = t[0..i];
79 		t = t[i..$];
80 		return s.strip().to!R();
81 	}
82 
83 	int takeWord(in string[] words, string name)
84 	{
85 		foreach (idx, string word; words)
86 		{
87 			static if (checked)
88 				bool b = t.startsWith(word);
89 			else
90 				bool b = t[0..word.length] == word;
91 			if (b)
92 			{
93 				t = t[word.length..$];
94 				return cast(int)idx;
95 			}
96 		}
97 		throw new Exception(name ~ " expected");
98 	}
99 
100 	char peek()
101 	{
102 		need!1();
103 		return *t.ptr;
104 	}
105 }
106 
107 private void parseToken(alias c, alias context)()
108 {
109 	with (context)
110 	{
111 		// TODO: check if the compiler optimizes this check away
112 		// in the compile-time version. If not, "escaping" needs to
113 		// be moved into an alias parameter.
114 		if (escaping)
115 		{
116 			enforce(takeOne() == c, c ~ " expected");
117 			escaping = false;
118 			return;
119 		}
120 
121 		switch (c)
122 		{
123 			// Day
124 			case TimeFormatElement.dayOfMonthZeroPadded:
125 				day = takeNumber!(2)();
126 				break;
127 			case TimeFormatElement.dayOfWeekNameShort:
128 				dow = takeWord(WeekdayShortNames, "Weekday");
129 				break;
130 			case TimeFormatElement.dayOfMonth:
131 				day = takeNumber!(1, 2);
132 				break;
133 			case TimeFormatElement.dayOfWeekName:
134 				dow = takeWord(WeekdayLongNames, "Weekday");
135 				break;
136 			case TimeFormatElement.dayOfWeekIndexISO8601:
137 				dow = takeNumber!1 % 7;
138 				break;
139 			case TimeFormatElement.dayOfMonthOrdinalSuffix: // ordinal suffix
140 				take!2;
141 				break;
142 			case TimeFormatElement.dayOfWeekIndex:
143 				dow = takeNumber!1;
144 				break;
145 			//case TimeFormatElement.dayOfYear: TODO
146 
147 			// Week
148 			//case TimeFormatElement.weekOfYear: TODO
149 
150 			// Month
151 			case TimeFormatElement.monthName:
152 				month = takeWord(MonthLongNames, "Month") + 1;
153 				break;
154 			case TimeFormatElement.monthZeroPadded:
155 				month = takeNumber!2;
156 				break;
157 			case TimeFormatElement.monthNameShort:
158 				month = takeWord(MonthShortNames, "Month") + 1;
159 				break;
160 			case TimeFormatElement.month:
161 				month = takeNumber!(1, 2);
162 				break;
163 			case TimeFormatElement.daysInMonth:
164 				takeNumber!(1, 2); // TODO: validate DIM?
165 				break;
166 
167 			// Year
168 			case TimeFormatElement.yearIsLeapYear:
169 				takeNumber!1; // TODO: validate leapness?
170 				break;
171 			// case TimeFormatElement.yearForWeekNumbering: TODO (ISO 8601 year number)
172 			case TimeFormatElement.year:
173 				year = takeNumber!4;
174 				break;
175 			case TimeFormatElement.yearOfCentury:
176 				year = takeNumber!2;
177 				if (year > 50) // TODO: find correct logic for this
178 					year += 1900;
179 				else
180 					year += 2000;
181 				break;
182 
183 			// Time
184 			case TimeFormatElement.ampmLower:
185 				pm = takeWord(["am", "pm"], "am/pm")==1;
186 				break;
187 			case TimeFormatElement.ampmUpper:
188 				pm = takeWord(["AM", "PM"], "AM/PM")==1;
189 				break;
190 			// case TimeFormatElement.swatchInternetTime: TODO (Swatch Internet time)
191 			case TimeFormatElement.hour12:
192 				hour12 = takeNumber!(1, 2);
193 				break;
194 			case TimeFormatElement.hour:
195 				hour = takeNumber!(1, 2);
196 				break;
197 			case TimeFormatElement.hour12ZeroPadded:
198 				hour12 = takeNumber!2;
199 				break;
200 			case TimeFormatElement.hourZeroPadded:
201 				hour = takeNumber!2;
202 				break;
203 			case TimeFormatElement.minute:
204 				minute = takeNumber!2;
205 				break;
206 			case TimeFormatElement.second:
207 				second = takeNumber!2;
208 				break;
209 			case TimeFormatElement.milliseconds:
210 			case TimeFormatElement.millisecondsAlt: // not standard
211 				nsecs = takeNumber!3 * 1_000_000;
212 				break;
213 			case TimeFormatElement.microseconds:
214 				nsecs = takeNumber!6 * 1_000;
215 				break;
216 			case TimeFormatElement.nanoseconds: // not standard
217 				nsecs = takeNumber!9;
218 				break;
219 
220 			// Timezone
221 			// case TimeFormatElement.timezoneName: ???
222 			case TimeFormatElement.isDST:
223 				takeNumber!1;
224 				break;
225 			case TimeFormatElement.timezoneOffsetWithoutColon:
226 			{
227 				if (peek() == 'Z')
228 				{
229 					t = t[1..$];
230 					tz = UTC();
231 				}
232 				else
233 				if (peek() == 'G')
234 				{
235 					enforce(take!3() == "GMT", "GMT expected");
236 					tz = UTC();
237 				}
238 				else
239 				{
240 					auto tzStr = take!5();
241 					enforce(tzStr[0]=='-' || tzStr[0]=='+', "- / + expected");
242 					auto n = (to!int(tzStr[1..3]) * 60 + to!int(tzStr[3..5])) * (tzStr[0]=='-' ? -1 : 1);
243 					tz = new immutable(SimpleTimeZone)(minutes(n));
244 				}
245 				break;
246 			}
247 			case TimeFormatElement.timezoneOffsetWithColon:
248 			{
249 				auto tzStr = take!6();
250 				enforce(tzStr[0]=='-' || tzStr[0]=='+', "- / + expected");
251 				enforce(tzStr[3]==':', ": expected");
252 				auto n = (to!int(tzStr[1..3]) * 60 + to!int(tzStr[4..6])) * (tzStr[0]=='-' ? -1 : 1);
253 				tz = new immutable(SimpleTimeZone)(minutes(n));
254 				break;
255 			}
256 			case TimeFormatElement.timezoneAbbreviation:
257 				version(Posix)
258 					tz = PosixTimeZone.getTimeZone(t.idup);
259 				else
260 				version(Windows)
261 					tz = WindowsTimeZone.getTimeZone(t.idup);
262 
263 				t = null;
264 				break;
265 			case TimeFormatElement.timezoneOffsetSeconds:
266 			{
267 				// TODO: is this correct?
268 				auto n = takeNumber!(1, 6);
269 				tz = new immutable(SimpleTimeZone)(seconds(n));
270 				break;
271 			}
272 
273 			// Full date/time
274 			//case TimeFormatElement.dateTimeISO8601: TODO
275 			//case TimeFormatElement.dateTimeRFC2822: TODO
276 			case TimeFormatElement.dateTimeUNIX:
277 			{
278 				auto unixTime = takeNumber!(1, 20, time_t);
279 				auto d = SysTime.fromUnixTime(unixTime, UTC()).to!DateTime;
280 				year = d.year;
281 				month = d.month;
282 				day = d.day;
283 				hour = d.hour;
284 				minute = d.minute;
285 				second = d.second;
286 				break;
287 			}
288 
289 			// Escape next character
290 			case TimeFormatElement.escapeNextCharacter:
291 				escaping = true;
292 				break;
293 
294 			// Other characters (whitespace, delimiters)
295 			default:
296 			{
297 				enforce(t.length && t[0]==c, c~ " expected or unsupported format character");
298 				t = t[1..$];
299 			}
300 		}
301 	}
302 }
303 
304 import ae.utils.meta;
305 
306 private T parseTimeImpl(alias fmt, T, bool checked, C)(C[] t, immutable TimeZone defaultTZ = null)
307 {
308 	ParseContext!(C, checked) context;
309 	context.t = t;
310 	context.tz = defaultTZ;
311 	if (__ctfe && context.tz is null)
312 		context.tz = UTC();
313 
314 	foreach (c; CTIterate!fmt)
315 		parseToken!(c, context)();
316 
317 	enforce(context.t.length == 0, "Left-over characters: " ~ context.t);
318 
319 	with (context)
320 	{
321 		if (hour12)
322 			hour = hour12 % 12 + (pm ? 12 : 0);
323 
324 		static if (is(T == SysTime))
325 		{
326 			// Compatibility with both <=2.066 and >=2.067
327 			static if (__traits(hasMember, SysTime, "fracSecs"))
328 				auto frac = dur!"nsecs"(nsecs);
329 			else
330 				auto frac = FracSec.from!"hnsecs"(nsecs / 100);
331 
332 			SysTime result = SysTime(
333 				DateTime(year, month, day, hour, minute, second),
334 				frac,
335 				tz);
336 
337 			if (dow >= 0 && !__ctfe)
338 				enforce(result.dayOfWeek == dow, "Mismatching weekday");
339 
340 			return result;
341 		}
342 		else
343 		static if (is(T == AbsTime))
344 		{
345 			auto frac = dur!"nsecs"(nsecs);
346 
347 			auto dt = DateTime(year, month, day, hour, minute, second);
348 			AbsTime result = AbsTime(dt, frac);
349 
350 			if (dow >= 0 && !__ctfe)
351 				enforce(dt.dayOfWeek == dow, "Mismatching weekday");
352 
353 			return result;
354 		}
355 		else
356 		static if (is(T == Date))
357 		{
358 			enforce(defaultTZ is null, "Date has no concept of time zone");
359 			return Date(year, month, day);
360 		}
361 		else
362 		static if (is(T == TimeOfDay))
363 		{
364 			enforce(defaultTZ is null, "TimeOfDay has no concept of time zone");
365 			return TimeOfDay(hour, minute, second);
366 		}
367 		else
368 		static if (is(T == DateTime))
369 		{
370 			enforce(defaultTZ is null, "DateTime has no concept of time zone");
371 			return DateTime(year, month, day, hour, minute, second);
372 		}
373 	}
374 }
375 
376 /*private*/ template parseTimeLike(T)
377 {
378 	// Compile-time format string parsing
379 	/*private*/ T parseTimeLike(string fmt, C)(C[] str, immutable TimeZone tz = null)
380 	{
381 		// Omit length checks if we know the input string is long enough
382 		enum maxLength = timeFormatSize(fmt);
383 		if (str.length < maxLength)
384 			return parseTimeImpl!(fmt, T, true )(str, tz);
385 		else
386 			return parseTimeImpl!(fmt, T, false)(str, tz);
387 	}
388 
389 	// Run-time format string parsing
390 	// Deprecated because the argument order is confusing for UFCS;
391 	// use the parseTimeLikeUsing aliases instead.
392 	/*private*/ deprecated T parseTimeLike(C)(in char[] fmt, C[] str, immutable TimeZone tz = null)
393 	{
394 		return parseTimeImpl!(fmt, T, true)(str, tz);
395 	}
396 }
397 
398 /*private*/ template parseTimeLikeUsing(T)
399 {
400 	// Run-time format string parsing
401 	/*private*/ T parseTimeLikeUsing(C)(C[] str, in char[] fmt, immutable TimeZone tz = null)
402 	{
403 		return parseTimeImpl!(fmt, T, true)(str, tz);
404 	}
405 }
406 
407 /// Parse the given string into a SysTime, using the format spec fmt.
408 /// This version generates specialized code for the given fmt.
409 alias parseTime = parseTimeLike!SysTime;
410 
411 /// Parse the given string into a SysTime, using the format spec fmt.
412 /// This version parses fmt at runtime.
413 alias parseTimeUsing = parseTimeLikeUsing!SysTime;
414 
415 version(unittest) import ae.utils.time.format;
416 
417 unittest
418 {
419 	const s0 = "Tue Jun 07 13:23:19 GMT+0100 2011";
420 	//enum t = s0.parseTime!(TimeFormats.STD_DATE); // https://issues.dlang.org/show_bug.cgi?id=12042
421 	auto t = s0.parseTime!(TimeFormats.STD_DATE);
422 	auto s1 = t.formatTime(TimeFormats.STD_DATE);
423 	assert(s0 == s1, s0 ~ "/" ~ s1);
424 	auto t1 = s0.parseTimeUsing(TimeFormats.STD_DATE);
425 	assert(t == t1);
426 }
427 
428 unittest
429 {
430 	"Tue, 21 Nov 2006 21:19:46 +0000".parseTime!(TimeFormats.RFC2822);
431 	"Tue, 21 Nov 2006 21:19:46 +0000".parseTimeUsing(TimeFormats.RFC2822);
432 }
433 
434 unittest
435 {
436 	const char[] s = "Tue, 21 Nov 2006 21:19:46 +0000";
437 	auto d = s.parseTime!(TimeFormats.RFC2822);
438 	assert(d.stdTime == d.formatTime!"U".parseTime!"U".stdTime);
439 }
440 
441 ///
442 unittest
443 {
444 	enum buildTime = __TIMESTAMP__.parseTime!(TimeFormats.CTIME).stdTime;
445 }
446 
447 /// Parse log timestamps generated by `ae.sys.log`,
448 /// including all previous versions of it.
449 SysTime parseLogTimestamp(string s)
450 {
451 	enforce(s.length, "Empty line");
452 
453 	if (s[0] == '[') // Input is an entire line
454 	{
455 		auto i = s.indexOf(']');
456 		enforce(i > 0, "Unmatched [");
457 		s = s[1..i];
458 	}
459 
460 	switch (s.length)
461 	{
462 		case 33: // Fri Jun 29 05:44:13 GMT+0300 2007
463 			return s.parseTime!(TimeFormats.STD_DATE)(UTC());
464 		case 23:
465 			if (s[4] == '.') // 2015.02.24 21:03:01.868
466 				return s.parseTime!"Y.m.d H:i:s.E"(UTC());
467 			else // 2015-11-04 00:00:45.964
468 				return s.parseTime!"Y-m-d H:i:s.E"(UTC());
469 		case 26: // 2015-11-04 00:00:45.964983
470 			return s.parseTime!"Y-m-d H:i:s.u"(UTC());
471 		default:
472 			throw new Exception("Unknown log timestamp format: " ~ s);
473 	}
474 }
475 
476 /// Parse the given string into a DateTime, using the format spec fmt.
477 /// This version generates specialized code for the given fmt.
478 /// Fields which are not representable in a DateTime, such as timezone
479 /// or milliseconds, are parsed but silently discarded.
480 alias parseDateTime = parseTimeLike!DateTime;
481 
482 /// Parse the given string into a DateTime, using the format spec fmt.
483 /// This version parses fmt at runtime.
484 /// Fields which are not representable in a DateTime, such as timezone
485 /// or milliseconds, are parsed but silently discarded.
486 alias parseDateTimeUsing = parseTimeLikeUsing!DateTime;
487 
488 unittest
489 {
490 	const char[] s = "Tue, 21 Nov 2006 21:19:46 +0000";
491 	auto d = s.parseDateTime!(TimeFormats.RFC2822);
492 	assert(d.year == 2006 && d.second == 46);
493 }
494 
495 /// Parse the given string into a Date, using the format spec fmt.
496 /// This version generates specialized code for the given fmt.
497 /// Fields which are not representable in a Date, such as timezone
498 /// or time of day, are parsed but silently discarded.
499 alias parseDate = parseTimeLike!Date;
500 
501 /// Parse the given string into a Date, using the format spec fmt.
502 /// This version parses fmt at runtime.
503 /// Fields which are not representable in a Date, such as timezone
504 /// or time of day, are parsed but silently discarded.
505 alias parseDateUsing = parseTimeLikeUsing!Date;
506 
507 unittest
508 {
509 	const char[] s = "Tue, 21 Nov 2006 21:19:46 +0000";
510 	auto d = s.parseDate!(TimeFormats.RFC2822);
511 	assert(d.year == 2006 && d.month == Month.nov);
512 }
513 
514 /// Parse the given string into a TimeOfDay, using the format spec fmt.
515 /// This version generates specialized code for the given fmt.
516 /// Fields which are not representable in a TimeOfDay, such as
517 /// year/month/day or timezone, are parsed but silently discarded.
518 alias parseTimeOfDay = parseTimeLike!TimeOfDay;
519 
520 /// Parse the given string into a TimeOfDay, using the format spec fmt.
521 /// This version parses fmt at runtime.
522 /// Fields which are not representable in a TimeOfDay, such as
523 /// year/month/day or timezone, are parsed but silently discarded.
524 alias parseTimeOfDayUsing = parseTimeLikeUsing!TimeOfDay;
525 
526 unittest
527 {
528 	const char[] s = "Tue, 21 Nov 2006 21:19:46 +0000";
529 	auto d = s.parseTimeOfDay!(TimeFormats.RFC2822);
530 	assert(d.hour == 21 && d.second == 46);
531 }
532 
533 /// Parse the given string into an AbsTime, using the format spec fmt.
534 /// This version generates specialized code for the given fmt.
535 /// Fields which are not representable in an AbsTime, such as timezone,
536 /// are parsed but silently discarded.
537 alias parseAbsTime = parseTimeLike!AbsTime;
538 
539 /// Parse the given string into an AbsTime, using the format spec fmt.
540 /// This version parses fmt at runtime.
541 /// Fields which are not representable in an AbsTime, such as timezone,
542 /// are parsed but silently discarded.
543 alias parseAbsTimeUsing = parseTimeLikeUsing!AbsTime;
544 
545 unittest
546 {
547 	const char[] s = "Tue, 21 Nov 2006 21:19:46 +0000";
548 	auto d = s.parseAbsTime!(TimeFormats.RFC2822);
549 	assert(d.sysTime.year == 2006 && d.sysTime.second == 46);
550 }