1 /**
2  * Time parsing functions.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  */
13 
14 module ae.utils.time.parse;
15 
16 import core.stdc.time : time_t;
17 import core.time : minutes, seconds, dur;
18 
19 import std.exception : enforce;
20 import std.conv : to;
21 import std.ascii : isDigit, isWhite;
22 import std.datetime;
23 import std.string : indexOf;
24 import std.string : strip, startsWith;
25 
26 import ae.utils.time.common;
27 
28 private struct ParseContext(Char, bool checked)
29 {
30 	int year=0, month=1, day=1, hour=0, minute=0, second=0, nsecs=0;
31 	int hour12 = 0; bool pm;
32 	TimeZone tz_;
33 	int dow = -1;
34 	Char[] t;
35 	bool escaping;
36 
37 	// CTFE-compatible alternative to Rebindable
38 	@property immutable(TimeZone) tz() { return cast(immutable)tz_; }
39 	@property void tz(immutable(TimeZone) tz) { tz_ = cast()tz; }
40 
41 	void need(size_t n)()
42 	{
43 		static if (checked)
44 			enforce(t.length >= n, "Not enough characters in date string");
45 	}
46 
47 	auto take(size_t n)()
48 	{
49 		need!n();
50 		auto result = t[0..n];
51 		t = t[n..$];
52 		return result;
53 	}
54 
55 	char takeOne()
56 	{
57 		need!1();
58 		auto result = t[0];
59 		t = t[1..$];
60 		return result;
61 	}
62 
63 	R takeNumber(size_t n, sizediff_t maxP = -1, R = int)()
64 	{
65 		enum max = maxP == -1 ? n : maxP;
66 		need!n();
67 		foreach (i, c; t[0..n])
68 			enforce((i==0 && c=='-') || isDigit(c) || isWhite(c), "Number expected");
69 		static if (n == max)
70 			enum i = n;
71 		else
72 		{
73 			auto i = n;
74 			while (i < max && (checked ? i < t.length : true) && isDigit(t[i]))
75 				i++;
76 		}
77 		auto s = t[0..i];
78 		t = t[i..$];
79 		return s.strip().to!R();
80 	}
81 
82 	int takeWord(in string[] words, string name)
83 	{
84 		foreach (idx, string word; words)
85 		{
86 			static if (checked)
87 				bool b = t.startsWith(word);
88 			else
89 				bool b = t[0..word.length] == word;
90 			if (b)
91 			{
92 				t = t[word.length..$];
93 				return cast(int)idx;
94 			}
95 		}
96 		throw new Exception(name ~ " expected");
97 	}
98 
99 	char peek()
100 	{
101 		need!1();
102 		return *t.ptr;
103 	}
104 }
105 
106 private void parseToken(alias c, alias context)()
107 {
108 	with (context)
109 	{
110 		// TODO: check if the compiler optimizes this check away
111 		// in the compile-time version. If not, "escaping" needs to
112 		// be moved into an alias parameter.
113 		if (escaping)
114 		{
115 			enforce(takeOne() == c, c ~ " expected");
116 			escaping = false;
117 			return;
118 		}
119 
120 		switch (c)
121 		{
122 			// Day
123 			case TimeFormatElement.dayOfMonthZeroPadded:
124 				day = takeNumber!(2)();
125 				break;
126 			case TimeFormatElement.dayOfWeekNameShort:
127 				dow = takeWord(WeekdayShortNames, "Weekday");
128 				break;
129 			case TimeFormatElement.dayOfMonth:
130 				day = takeNumber!(1, 2);
131 				break;
132 			case TimeFormatElement.dayOfWeekName:
133 				dow = takeWord(WeekdayLongNames, "Weekday");
134 				break;
135 			case TimeFormatElement.dayOfWeekIndexISO8601:
136 				dow = takeNumber!1 % 7;
137 				break;
138 			case TimeFormatElement.dayOfMonthOrdinalSuffix: // ordinal suffix
139 				take!2;
140 				break;
141 			case TimeFormatElement.dayOfWeekIndex:
142 				dow = takeNumber!1;
143 				break;
144 			//case TimeFormatElement.dayOfYear: TODO
145 
146 			// Week
147 			//case TimeFormatElement.weekOfYear: TODO
148 
149 			// Month
150 			case TimeFormatElement.monthName:
151 				month = takeWord(MonthLongNames, "Month") + 1;
152 				break;
153 			case TimeFormatElement.monthZeroPadded:
154 				month = takeNumber!2;
155 				break;
156 			case TimeFormatElement.monthNameShort:
157 				month = takeWord(MonthShortNames, "Month") + 1;
158 				break;
159 			case TimeFormatElement.month:
160 				month = takeNumber!(1, 2);
161 				break;
162 			case TimeFormatElement.daysInMonth:
163 				takeNumber!(1, 2); // TODO: validate DIM?
164 				break;
165 
166 			// Year
167 			case TimeFormatElement.yearIsLeapYear:
168 				takeNumber!1; // TODO: validate leapness?
169 				break;
170 			// case TimeFormatElement.yearForWeekNumbering: TODO (ISO 8601 year number)
171 			case TimeFormatElement.year:
172 				year = takeNumber!4;
173 				break;
174 			case TimeFormatElement.yearOfCentury:
175 				year = takeNumber!2;
176 				if (year > 50) // TODO: find correct logic for this
177 					year += 1900;
178 				else
179 					year += 2000;
180 				break;
181 
182 			// Time
183 			case TimeFormatElement.ampmLower:
184 				pm = takeWord(["am", "pm"], "am/pm")==1;
185 				break;
186 			case TimeFormatElement.ampmUpper:
187 				pm = takeWord(["AM", "PM"], "AM/PM")==1;
188 				break;
189 			// case TimeFormatElement.swatchInternetTime: TODO (Swatch Internet time)
190 			case TimeFormatElement.hour12:
191 				hour12 = takeNumber!(1, 2);
192 				break;
193 			case TimeFormatElement.hour:
194 				hour = takeNumber!(1, 2);
195 				break;
196 			case TimeFormatElement.hour12ZeroPadded:
197 				hour12 = takeNumber!2;
198 				break;
199 			case TimeFormatElement.hourZeroPadded:
200 				hour = takeNumber!2;
201 				break;
202 			case TimeFormatElement.minute:
203 				minute = takeNumber!2;
204 				break;
205 			case TimeFormatElement.second:
206 				second = takeNumber!2;
207 				break;
208 			case TimeFormatElement.milliseconds:
209 			case TimeFormatElement.millisecondsAlt: // not standard
210 				nsecs = takeNumber!3 * 1_000_000;
211 				break;
212 			case TimeFormatElement.microseconds:
213 				nsecs = takeNumber!6 * 1_000;
214 				break;
215 			case TimeFormatElement.nanoseconds: // not standard
216 				nsecs = takeNumber!9;
217 				break;
218 
219 			// Timezone
220 			// case TimeFormatElement.timezoneName: ???
221 			case TimeFormatElement.isDST:
222 				takeNumber!1;
223 				break;
224 			case TimeFormatElement.timezoneOffsetWithoutColon:
225 			{
226 				if (peek() == TimeFormatElement.timezoneOffsetSeconds)
227 				{
228 					t = t[1..$];
229 					tz = UTC();
230 				}
231 				else
232 				if (peek() == TimeFormatElement.hour)
233 				{
234 					enforce(take!3() == "GMT", "GMT expected");
235 					tz = UTC();
236 				}
237 				else
238 				{
239 					auto tzStr = take!5();
240 					enforce(tzStr[0]=='-' || tzStr[0]=='+', "- / + expected");
241 					auto n = (to!int(tzStr[1..3]) * 60 + to!int(tzStr[3..5])) * (tzStr[0]=='-' ? -1 : 1);
242 					tz = new immutable(SimpleTimeZone)(minutes(n));
243 				}
244 				break;
245 			}
246 			case TimeFormatElement.timezoneOffsetWithColon:
247 			{
248 				auto tzStr = take!6();
249 				enforce(tzStr[0]=='-' || tzStr[0]=='+', "- / + expected");
250 				enforce(tzStr[3]==':', ": expected");
251 				auto n = (to!int(tzStr[1..3]) * 60 + to!int(tzStr[4..6])) * (tzStr[0]=='-' ? -1 : 1);
252 				tz = new immutable(SimpleTimeZone)(minutes(n));
253 				break;
254 			}
255 			case TimeFormatElement.timezoneAbbreviation:
256 				version(Posix)
257 					tz = PosixTimeZone.getTimeZone(t.idup);
258 				else
259 				version(Windows)
260 					tz = WindowsTimeZone.getTimeZone(t.idup);
261 
262 				t = null;
263 				break;
264 			case TimeFormatElement.timezoneOffsetSeconds:
265 			{
266 				// TODO: is this correct?
267 				auto n = takeNumber!(1, 6);
268 				tz = new immutable(SimpleTimeZone)(seconds(n));
269 				break;
270 			}
271 
272 			// Full date/time
273 			//case TimeFormatElement.dateTimeISO8601: TODO
274 			//case TimeFormatElement.dateTimeRFC2822: TODO
275 			case TimeFormatElement.dateTimeUNIX:
276 			{
277 				auto unixTime = takeNumber!(1, 20, time_t);
278 				auto d = SysTime.fromUnixTime(unixTime, UTC()).to!DateTime;
279 				year = d.year;
280 				month = d.month;
281 				day = d.day;
282 				hour = d.hour;
283 				minute = d.minute;
284 				second = d.second;
285 				break;
286 			}
287 
288 			// Escape next character
289 			case TimeFormatElement.escapeNextCharacter:
290 				escaping = true;
291 				break;
292 
293 			// Other characters (whitespace, delimiters)
294 			default:
295 			{
296 				enforce(t.length && t[0]==c, c~ " expected or unsupported format character");
297 				t = t[1..$];
298 			}
299 		}
300 	}
301 }
302 
303 import ae.utils.meta;
304 
305 private SysTime parseTimeImpl(alias fmt, bool checked, C)(C[] t, immutable TimeZone defaultTZ = null)
306 {
307 	ParseContext!(C, checked) context;
308 	context.t = t;
309 	context.tz = defaultTZ;
310 	if (__ctfe && context.tz is null)
311 		context.tz = UTC();
312 
313 	foreach (c; CTIterate!fmt)
314 		parseToken!(c, context)();
315 
316 	enforce(context.t.length == 0, "Left-over characters: " ~ context.t);
317 
318 	with (context)
319 	{
320 		if (hour12)
321 			hour = hour12%12 + (pm ? 12 : 0);
322 
323 		// Compatibility with both <=2.066 and >=2.067
324 		static if (__traits(hasMember, SysTime, "fracSecs"))
325 			auto frac = dur!"nsecs"(nsecs);
326 		else
327 			auto frac = FracSec.from!"hnsecs"(nsecs / 100);
328 
329 		SysTime result = SysTime(
330 			DateTime(year, month, day, hour, minute, second),
331 			frac,
332 			tz);
333 
334 		if (dow >= 0 && !__ctfe)
335 			enforce(result.dayOfWeek == dow, "Mismatching weekday");
336 
337 		return result;
338 	}
339 }
340 
341 /// Parse the given string into a SysTime, using the format spec fmt.
342 /// This version generates specialized code for the given fmt.
343 SysTime parseTime(string fmt, C)(C[] t, immutable TimeZone tz = null)
344 {
345 	// Omit length checks if we know the input string is long enough
346 	enum maxLength = timeFormatSize(fmt);
347 	if (t.length < maxLength)
348 		return parseTimeImpl!(fmt, true )(t, tz);
349 	else
350 		return parseTimeImpl!(fmt, false)(t, tz);
351 }
352 
353 /// Parse the given string into a SysTime, using the format spec fmt.
354 /// This version parses fmt at runtime.
355 SysTime parseTimeUsing(C)(C[] t, in char[] fmt)
356 {
357 	return parseTimeImpl!(fmt, true)(t);
358 }
359 
360 deprecated SysTime parseTime(C)(const(char)[] fmt, C[] t)
361 {
362 	return t.parseTimeUsing(fmt);
363 }
364 
365 version(unittest) import ae.utils.time.format;
366 
367 unittest
368 {
369 	const s0 = "Tue Jun 07 13:23:19 GMT+0100 2011";
370 	//enum t = s0.parseTime!(TimeFormats.STD_DATE); // https://issues.dlang.org/show_bug.cgi?id=12042
371 	auto t = s0.parseTime!(TimeFormats.STD_DATE);
372 	auto s1 = t.formatTime(TimeFormats.STD_DATE);
373 	assert(s0 == s1, s0 ~ "/" ~ s1);
374 	auto t1 = s0.parseTimeUsing(TimeFormats.STD_DATE);
375 	assert(t == t1);
376 }
377 
378 unittest
379 {
380 	"Tue, 21 Nov 2006 21:19:46 +0000".parseTime!(TimeFormats.RFC2822);
381 	"Tue, 21 Nov 2006 21:19:46 +0000".parseTimeUsing(TimeFormats.RFC2822);
382 }
383 
384 unittest
385 {
386 	const char[] s = "Tue, 21 Nov 2006 21:19:46 +0000";
387 	auto d = s.parseTime!(TimeFormats.RFC2822);
388 	assert(d.stdTime == d.formatTime!"U".parseTime!"U".stdTime);
389 }
390 
391 ///
392 unittest
393 {
394 	enum buildTime = __TIMESTAMP__.parseTime!(TimeFormats.CTIME).stdTime;
395 }
396 
397 /// Parse log timestamps generated by `ae.sys.log`,
398 /// including all previous versions of it.
399 SysTime parseLogTimestamp(string s)
400 {
401 	enforce(s.length, "Empty line");
402 
403 	if (s[0] == '[') // Input is an entire line
404 	{
405 		auto i = s.indexOf(']');
406 		enforce(i > 0, "Unmatched [");
407 		s = s[1..i];
408 	}
409 
410 	switch (s.length)
411 	{
412 		case 33: // Fri Jun 29 05:44:13 GMT+0300 2007
413 			return s.parseTime!(TimeFormats.STD_DATE)(UTC());
414 		case 23:
415 			if (s[4] == '.') // 2015.02.24 21:03:01.868
416 				return s.parseTime!"Y.m.d H:i:s.E"(UTC());
417 			else // 2015-11-04 00:00:45.964
418 				return s.parseTime!"Y-m-d H:i:s.E"(UTC());
419 		case 26: // 2015-11-04 00:00:45.964983
420 			return s.parseTime!"Y-m-d H:i:s.u"(UTC());
421 		default:
422 			throw new Exception("Unknown log timestamp format: " ~ s);
423 	}
424 }