1 /**
2  * Time parsing functions.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.utils.time.parse;
15 
16 import core.time : minutes, seconds, dur;
17 
18 import std.exception : enforce;
19 import std.conv : to;
20 import std.ascii : isDigit, isWhite;
21 import std.datetime;
22 import std.string : indexOf;
23 import std.string : strip, startsWith;
24 
25 import ae.utils.time.common;
26 
27 private struct ParseContext(Char, bool checked)
28 {
29 	int year=0, month=1, day=1, hour=0, minute=0, second=0, usecs=0;
30 	int hour12 = 0; bool pm;
31 	TimeZone tz_;
32 	int dow = -1;
33 	Char[] t;
34 	bool escaping;
35 
36 	// CTFE-compatible alternative to Rebindable
37 	@property immutable(TimeZone) tz() { return cast(immutable)tz_; }
38 	@property void tz(immutable(TimeZone) tz) { tz_ = cast()tz; }
39 
40 	void need(size_t n)()
41 	{
42 		static if (checked)
43 			enforce(t.length >= n, "Not enough characters in date string");
44 	}
45 
46 	auto take(size_t n)()
47 	{
48 		need!n();
49 		auto result = t[0..n];
50 		t = t[n..$];
51 		return result;
52 	}
53 
54 	char takeOne()
55 	{
56 		need!1();
57 		auto result = t[0];
58 		t = t[1..$];
59 		return result;
60 	}
61 
62 	R takeNumber(size_t n, sizediff_t maxP = -1, R = int)()
63 	{
64 		enum max = maxP == -1 ? n : maxP;
65 		need!n();
66 		foreach (i, c; t[0..n])
67 			enforce((i==0 && c=='-') || isDigit(c) || isWhite(c), "Number expected");
68 		static if (n == max)
69 			enum i = n;
70 		else
71 		{
72 			auto i = n;
73 			while (i < max && (checked ? i < t.length : true) && isDigit(t[i]))
74 				i++;
75 		}
76 		auto s = t[0..i];
77 		t = t[i..$];
78 		return s.strip().to!R();
79 	}
80 
81 	int takeWord(in string[] words, string name)
82 	{
83 		foreach (idx, string word; words)
84 		{
85 			static if (checked)
86 				bool b = t.startsWith(word);
87 			else
88 				bool b = t[0..word.length] == word;
89 			if (b)
90 			{
91 				t = t[word.length..$];
92 				return cast(int)idx;
93 			}
94 		}
95 		throw new Exception(name ~ " expected");
96 	}
97 
98 	char peek()
99 	{
100 		need!1();
101 		return *t.ptr;
102 	}
103 }
104 
105 private void parseToken(alias c, alias context)()
106 {
107 	with (context)
108 	{
109 		// TODO: check if the compiler optimizes this check away
110 		// in the compile-time version. If not, "escaping" needs to
111 		// be moved into an alias parameter.
112 		if (escaping)
113 		{
114 			enforce(takeOne() == c, c ~ " expected");
115 			escaping = false;
116 			return;
117 		}
118 
119 		switch (c)
120 		{
121 			// Day
122 			case TimeFormatElement.dayOfMonthZeroPadded:
123 				day = takeNumber!(2)();
124 				break;
125 			case TimeFormatElement.dayOfWeekNameShort:
126 				dow = takeWord(WeekdayShortNames, "Weekday");
127 				break;
128 			case TimeFormatElement.dayOfMonth:
129 				day = takeNumber!(1, 2);
130 				break;
131 			case TimeFormatElement.dayOfWeekName:
132 				dow = takeWord(WeekdayLongNames, "Weekday");
133 				break;
134 			case TimeFormatElement.dayOfWeekIndexISO8601:
135 				dow = takeNumber!1 % 7;
136 				break;
137 			case TimeFormatElement.dayOfMonthOrdinalSuffix: // ordinal suffix
138 				take!2;
139 				break;
140 			case TimeFormatElement.dayOfWeekIndex:
141 				dow = takeNumber!1;
142 				break;
143 			//case TimeFormatElement.dayOfYear: TODO
144 
145 			// Week
146 			//case TimeFormatElement.weekOfYear: TODO
147 
148 			// Month
149 			case TimeFormatElement.monthName:
150 				month = takeWord(MonthLongNames, "Month") + 1;
151 				break;
152 			case TimeFormatElement.monthZeroPadded:
153 				month = takeNumber!2;
154 				break;
155 			case TimeFormatElement.monthNameShort:
156 				month = takeWord(MonthShortNames, "Month") + 1;
157 				break;
158 			case TimeFormatElement.month:
159 				month = takeNumber!(1, 2);
160 				break;
161 			case TimeFormatElement.daysInMonth:
162 				takeNumber!(1, 2); // TODO: validate DIM?
163 				break;
164 
165 			// Year
166 			case TimeFormatElement.yearIsLeapYear:
167 				takeNumber!1; // TODO: validate leapness?
168 				break;
169 			// case TimeFormatElement.yearForWeekNumbering: TODO (ISO 8601 year number)
170 			case TimeFormatElement.year:
171 				year = takeNumber!4;
172 				break;
173 			case TimeFormatElement.yearOfCentury:
174 				year = takeNumber!2;
175 				if (year > 50) // TODO: find correct logic for this
176 					year += 1900;
177 				else
178 					year += 2000;
179 				break;
180 
181 			// Time
182 			case TimeFormatElement.ampmLower:
183 				pm = takeWord(["am", "pm"], "am/pm")==1;
184 				break;
185 			case TimeFormatElement.ampmUpper:
186 				pm = takeWord(["AM", "PM"], "AM/PM")==1;
187 				break;
188 			// case TimeFormatElement.swatchInternetTime: TODO (Swatch Internet time)
189 			case TimeFormatElement.hour12:
190 				hour12 = takeNumber!(1, 2);
191 				break;
192 			case TimeFormatElement.hour:
193 				hour = takeNumber!(1, 2);
194 				break;
195 			case TimeFormatElement.hour12ZeroPadded:
196 				hour12 = takeNumber!2;
197 				break;
198 			case TimeFormatElement.hourZeroPadded:
199 				hour = takeNumber!2;
200 				break;
201 			case TimeFormatElement.minute:
202 				minute = takeNumber!2;
203 				break;
204 			case TimeFormatElement.second:
205 				second = takeNumber!2;
206 				break;
207 			case TimeFormatElement.microseconds:
208 				usecs = takeNumber!6;
209 				break;
210 			case TimeFormatElement.milliseconds:
211 			case TimeFormatElement.millisecondsAlt: // not standard
212 				usecs = 1000 * takeNumber!3;
213 				break;
214 
215 			// Timezone
216 			// case TimeFormatElement.timezoneName: ???
217 			case TimeFormatElement.isDST:
218 				takeNumber!1;
219 				break;
220 			case TimeFormatElement.timezoneOffsetWithoutColon:
221 			{
222 				if (peek() == TimeFormatElement.timezoneOffsetSeconds)
223 				{
224 					t = t[1..$];
225 					tz = UTC();
226 				}
227 				else
228 				if (peek() == TimeFormatElement.hour)
229 				{
230 					enforce(take!3() == "GMT", "GMT expected");
231 					tz = UTC();
232 				}
233 				else
234 				{
235 					auto tzStr = take!5();
236 					enforce(tzStr[0]=='-' || tzStr[0]=='+', "- / + expected");
237 					auto n = (to!int(tzStr[1..3]) * 60 + to!int(tzStr[3..5])) * (tzStr[0]=='-' ? -1 : 1);
238 					tz = new immutable(SimpleTimeZone)(minutes(n));
239 				}
240 				break;
241 			}
242 			case TimeFormatElement.timezoneOffsetWithColon:
243 			{
244 				auto tzStr = take!6();
245 				enforce(tzStr[0]=='-' || tzStr[0]=='+', "- / + expected");
246 				enforce(tzStr[3]==':', ": expected");
247 				auto n = (to!int(tzStr[1..3]) * 60 + to!int(tzStr[4..6])) * (tzStr[0]=='-' ? -1 : 1);
248 				tz = new immutable(SimpleTimeZone)(minutes(n));
249 				break;
250 			}
251 			case TimeFormatElement.timezoneAbbreviation:
252 				version(Posix)
253 					tz = PosixTimeZone.getTimeZone(t.idup);
254 				else
255 				version(Windows)
256 					tz = WindowsTimeZone.getTimeZone(t.idup);
257 
258 				t = null;
259 				break;
260 			case TimeFormatElement.timezoneOffsetSeconds:
261 			{
262 				// TODO: is this correct?
263 				auto n = takeNumber!(1, 6);
264 				tz = new immutable(SimpleTimeZone)(seconds(n));
265 				break;
266 			}
267 
268 			// Full date/time
269 			//case TimeFormatElement.dateTimeISO8601: TODO
270 			//case TimeFormatElement.dateTimeRFC2822: TODO
271 			case TimeFormatElement.dateTimeUNIX:
272 			{
273 				auto unixTime = takeNumber!(1, 20);
274 				auto d = SysTime.fromUnixTime(unixTime).to!DateTime;
275 				year = d.year;
276 				month = d.month;
277 				day = d.day;
278 				hour = d.hour;
279 				minute = d.minute;
280 				second = d.second;
281 				break;
282 			}
283 
284 			// Escape next character
285 			case TimeFormatElement.escapeNextCharacter:
286 				escaping = true;
287 				break;
288 
289 			// Other characters (whitespace, delimiters)
290 			default:
291 			{
292 				enforce(t.length && t[0]==c, c~ " expected or unsupported format character");
293 				t = t[1..$];
294 			}
295 		}
296 	}
297 }
298 
299 import ae.utils.meta;
300 
301 private SysTime parseTimeImpl(alias fmt, bool checked, C)(C[] t, immutable TimeZone defaultTZ = null)
302 {
303 	ParseContext!(C, checked) context;
304 	context.t = t;
305 	context.tz = defaultTZ;
306 	if (__ctfe && context.tz is null)
307 		context.tz = UTC();
308 
309 	foreach (c; CTIterate!fmt)
310 		parseToken!(c, context)();
311 
312 	enforce(context.t.length == 0, "Left-over characters: " ~ context.t);
313 
314 	with (context)
315 	{
316 		if (hour12)
317 			hour = hour12%12 + (pm ? 12 : 0);
318 
319 		// Compatibility with both <=2.066 and >=2.067
320 		static if (__traits(hasMember, SysTime, "fracSecs"))
321 			auto frac = dur!"usecs"(usecs);
322 		else
323 			auto frac = FracSec.from!"usecs"(usecs);
324 
325 		SysTime result = SysTime(
326 			DateTime(year, month, day, hour, minute, second),
327 			frac,
328 			tz);
329 
330 		if (dow >= 0 && !__ctfe)
331 			enforce(result.dayOfWeek == dow, "Mismatching weekday");
332 
333 		return result;
334 	}
335 }
336 
337 /// Parse the given string into a SysTime, using the format spec fmt.
338 /// This version generates specialized code for the given fmt.
339 SysTime parseTime(string fmt, C)(C[] t, immutable TimeZone tz = null)
340 {
341 	// Omit length checks if we know the input string is long enough
342 	enum maxLength = timeFormatSize(fmt);
343 	if (t.length < maxLength)
344 		return parseTimeImpl!(fmt, true )(t, tz);
345 	else
346 		return parseTimeImpl!(fmt, false)(t, tz);
347 }
348 
349 /// Parse the given string into a SysTime, using the format spec fmt.
350 /// This version parses fmt at runtime.
351 SysTime parseTimeUsing(C)(C[] t, in char[] fmt)
352 {
353 	return parseTimeImpl!(fmt, true)(t);
354 }
355 
356 deprecated SysTime parseTime(C)(const(char)[] fmt, C[] t)
357 {
358 	return t.parseTimeUsing(fmt);
359 }
360 
361 version(unittest) import ae.utils.time.format;
362 
363 unittest
364 {
365 	const s0 = "Tue Jun 07 13:23:19 GMT+0100 2011";
366 	//enum t = s0.parseTime!(TimeFormats.STD_DATE); // https://d.puremagic.com/issues/show_bug.cgi?id=12042
367 	auto t = s0.parseTime!(TimeFormats.STD_DATE);
368 	auto s1 = t.formatTime(TimeFormats.STD_DATE);
369 	assert(s0 == s1, s0 ~ "/" ~ s1);
370 	auto t1 = s0.parseTimeUsing(TimeFormats.STD_DATE);
371 	assert(t == t1);
372 }
373 
374 unittest
375 {
376 	"Tue, 21 Nov 2006 21:19:46 +0000".parseTime!(TimeFormats.RFC2822);
377 	"Tue, 21 Nov 2006 21:19:46 +0000".parseTimeUsing(TimeFormats.RFC2822);
378 }
379 
380 unittest
381 {
382 	const char[] s = "Tue, 21 Nov 2006 21:19:46 +0000";
383 	auto d = s.parseTime!(TimeFormats.RFC2822);
384 	assert(d.stdTime == d.formatTime!"U".parseTime!"U".stdTime);
385 }
386 
387 unittest
388 {
389 	enum buildTime = __TIMESTAMP__.parseTime!(TimeFormats.CTIME).stdTime;
390 }
391 
392 /// Parse log timestamps generated by ae.sys.log,
393 /// including all previous versions of it.
394 SysTime parseLogTimestamp(string s)
395 {
396 	enforce(s.length, "Empty line");
397 
398 	if (s[0] == '[') // Input is an entire line
399 	{
400 		auto i = s.indexOf(']');
401 		enforce(i > 0, "Unmatched [");
402 		s = s[1..i];
403 	}
404 
405 	switch (s.length)
406 	{
407 		case 33: // Fri Jun 29 05:44:13 GMT+0300 2007
408 			return s.parseTime!(TimeFormats.STD_DATE)(UTC());
409 		case 23:
410 			if (s[4] == '.') // 2015.02.24 21:03:01.868
411 				return s.parseTime!"Y.m.d H:i:s.E"(UTC());
412 			else // 2015-11-04 00:00:45.964
413 				return s.parseTime!"Y-m-d H:i:s.E"(UTC());
414 		case 26: // 2015-11-04 00:00:45.964983
415 			return s.parseTime!"Y-m-d H:i:s.u"(UTC());
416 		default:
417 			throw new Exception("Unknown log timestamp format: " ~ s);
418 	}
419 }