1 /**
2  * Wrappers for raw _data located in unmanaged memory.
3  *
4  * Using the Data type will only place a small object in managed memory,
5  * keeping the actual _data in unmanaged memory.
6  * A proxy class (DataWrapper) is used to safely allow multiple references to
7  * the same block of unmanaged memory.
8  * When the DataWrapper object is destroyed (either manually or by the garbage
9  * collector when there are no remaining Data references), the unmanaged
10  * memory is deallocated.
11  *
12  * This has the following advantage over using managed memory:
13  * $(UL
14  *  $(LI Faster allocation and deallocation, since memory is requested from
15  *       the OS directly as whole pages)
16  *  $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to
17  *       stray pointers)
18  *  $(LI Overall improved GC performance due to reduced size of managed heap)
19  *  $(LI Memory is immediately returned to the OS when _data is deallocated)
20  * )
21  * On the other hand, using Data has the following disadvantages:
22  * $(UL
23  *  $(LI This module is designed to store raw _data which does not have any
24  *       pointers. Storing objects containing pointers to managed memory is
25  *       unsupported, and may result in memory corruption.)
26  *  $(LI Small objects may be stored inefficiently, as the module requests
27  *       entire pages of memory from the OS. Considering allocating one large
28  *       object and use slices (Data instances) for individual objects.)
29  *  $(LI Incorrect usage (i.e. retaining/escaping references to wrapped memory
30  *       without keeping a reference to its corresponding DataWrapper) can
31  *       result in dangling pointers and hard-to-debug memory corruption.)
32  * )
33  *
34  * License:
35  *   This Source Code Form is subject to the terms of
36  *   the Mozilla Public License, v. 2.0. If a copy of
37  *   the MPL was not distributed with this file, You
38  *   can obtain one at http://mozilla.org/MPL/2.0/.
39  *
40  * Authors:
41  *   Vladimir Panteleev <vladimir@thecybershadow.net>
42  */
43 
44 module ae.sys.data;
45 
46 static import core.stdc.stdlib;
47 import core.stdc.string : memmove;
48 import std.traits;
49 import core.memory;
50 import core.exception;
51 debug import std.stdio;
52 debug import std.string;
53 public import ae.sys.dataset;
54 import ae.utils.math;
55 
56 debug(DATA) import core.stdc.stdio;
57 
58 // ideas/todo:
59 // * templatize (and forbid using aliased types)?
60 // * use heap (malloc/Windows heap API) for small objects?
61 // * reference counting?
62 // * "immutable" support?
63 
64 /**
65  * Wrapper for data located in external memory, to prevent faux references.
66  * Represents a slice of data, which may or may not be in unmanaged memory.
67  * Data in unmanaged memory is bound to a DataWrapper class instance.
68  *
69  * All operations on this class should be safe, except for accessing contents directly.
70  * All operations on contents must be accompanied by a live reference to the Data object,
71  * to keep a GC anchor towards the unmanaged data.
72  *
73  * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead.
74  *
75  * Be sure not to lose Data references while using their contents!
76  * For example, avoid code like this:
77  * ----
78  * fun(cast(string)transformSomeData(someData).contents);
79  * ----
80  * The Data return value may be unreachable once .contents is evaluated.
81  * Use .toHeap instead of .contents in such cases to get a safe heap copy.
82  */
83 struct Data
84 {
85 private:
86 	/// Wrapped data
87 	const(void)[] _contents;
88 	/// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory.
89 	/// Used as a GC anchor to unmanaged data, and for in-place expands (for appends).
90 	DataWrapper wrapper;
91 	/// Indicates whether we're allowed to modify the data contents.
92 	bool mutable;
93 
94 	/// Maximum preallocation for append operations.
95 	enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2
96 
97 public:
98 	/**
99 	 * Create new instance wrapping the given data.
100 	 * Params:
101 	 *   data = initial data
102 	 *   forceReallocation = when false, the contents will be duplicated to
103 	 *     unmanaged memory only when it's not on the managed heap; when true,
104 	 *     the contents will be reallocated always.
105 	 */
106 	this(const(void)[] data, bool forceReallocation = false)
107 	{
108 		if (data.length == 0)
109 			contents = null;
110 		else
111 		if (forceReallocation || GC.addrOf(data.ptr) is null)
112 		{
113 			// copy to unmanaged memory
114 			auto wrapper = unmanagedNew!MemoryDataWrapper(data.length, data.length);
115 			this.wrapper = wrapper;
116 			wrapper.contents[] = data[];
117 			contents = wrapper.contents;
118 			mutable = true;
119 		}
120 		else
121 		{
122 			// just save a reference
123 			contents = data;
124 			mutable = false;
125 		}
126 
127 		assert(this.length == data.length);
128 	}
129 
130 	/// ditto
131 	this(void[] data, bool forceReallocation = false)
132 	{
133 		const(void)[] cdata = data;
134 		this(cdata, forceReallocation);
135 		mutable = true;
136 	}
137 
138 	/// Create a new instance with given size/capacity. Capacity defaults to size.
139 	this(size_t size, size_t capacity = 0)
140 	in
141 	{
142 		assert(capacity == 0 || size <= capacity);
143 	}
144 	body
145 	{
146 		if (!capacity)
147 			capacity = size;
148 
149 		if (capacity)
150 		{
151 			auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity);
152 			this.wrapper = wrapper;
153 			contents = wrapper.contents;
154 			mutable = true;
155 		}
156 		else
157 		{
158 			wrapper = null;
159 			contents = null;
160 		}
161 
162 		assert(this.length == size);
163 	}
164 
165 	this(DataWrapper wrapper, bool mutable)
166 	{
167 		this.wrapper = wrapper;
168 		this.mutable = mutable;
169 		this.contents = wrapper.contents;
170 	}
171 
172 	this(this)
173 	{
174 		if (wrapper)
175 		{
176 			wrapper.references++;
177 			debug (DATA_REFCOUNT) debugLog("%p -> %p: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references);
178 		}
179 		else
180 			debug (DATA_REFCOUNT) debugLog("%p -> %p: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper);
181 	}
182 
183 	~this() pure
184 	{
185 		//clear();
186 		// https://issues.dlang.org/show_bug.cgi?id=13809
187 		(cast(void delegate() pure)&clear)();
188 	}
189 
190 /*
191 	/// Create new instance as a slice over an existing DataWrapper.
192 	private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max)
193 	{
194 		this.wrapper = wrapper;
195 		this.start = start;
196 		this.end = end==size_t.max ? wrapper.capacity : end;
197 	}
198 */
199 
200 	@property const(void)[] contents() const
201 	{
202 		return _contents;
203 	}
204 
205 	@property private const(void)[] contents(const(void)[] data)
206 	{
207 		return _contents = data;
208 	}
209 
210 	/// Get mutable contents
211 	@property void[] mcontents()
212 	{
213 		if (!mutable && length)
214 		{
215 			reallocate(length, length);
216 			assert(mutable);
217 		}
218 		return cast(void[])_contents;
219 	}
220 
221 	@property const(void)* ptr() const
222 	{
223 		return contents.ptr;
224 	}
225 
226 	@property void* mptr()
227 	{
228 		return mcontents.ptr;
229 	}
230 
231 	@property size_t length() const
232 	{
233 		return contents.length;
234 	}
235 
236 	@property bool empty() const
237 	{
238 		return contents is null;
239 	}
240 
241 	bool opCast(T)()
242 		if (is(T == bool))
243 	{
244 		return !empty;
245 	}
246 
247 	@property size_t capacity() const
248 	{
249 		if (wrapper is null)
250 			return length;
251 		// We can only safely expand if the memory slice is at the end of the used unmanaged memory block.
252 		auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data
253 		auto end = pos + length;               // end   position in wrapper data
254 		assert(end <= wrapper.size);
255 		if (end == wrapper.size && end < wrapper.capacity)
256 			return wrapper.capacity - pos;
257 		else
258 			return length;
259 	}
260 
261 	/// Put a copy of the data on D's managed heap, and return it.
262 	@property
263 	void[] toHeap() const
264 	{
265 		return _contents.dup;
266 	}
267 
268 	private void reallocate(size_t size, size_t capacity)
269 	{
270 		auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity);
271 		wrapper.contents[0..this.length] = contents[];
272 		//(cast(ubyte[])newWrapper.contents)[this.length..value] = 0;
273 
274 		clear();
275 		this.wrapper = wrapper;
276 		this.contents = wrapper.contents;
277 		mutable = true;
278 	}
279 
280 	private void expand(size_t newSize, size_t newCapacity)
281 	in
282 	{
283 		assert(length < newSize);
284 		assert(newSize <= newCapacity);
285 	}
286 	out
287 	{
288 		assert(length == newSize);
289 	}
290 	body
291 	{
292 		if (newCapacity <= capacity)
293 		{
294 			auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data
295 			wrapper.setSize(pos + newSize);
296 			contents = ptr[0..newSize];
297 		}
298 		else
299 			reallocate(newSize, newCapacity);
300 	}
301 
302 	@property void length(size_t value)
303 	{
304 		if (value == length) // no change
305 			return;
306 		if (value < length)  // shorten
307 			_contents = _contents[0..value];
308 		else                 // lengthen
309 			expand(value, value);
310 	}
311 	alias length opDollar;
312 
313 	@property Data dup() const
314 	{
315 		return Data(contents, true);
316 	}
317 
318 	/// This used to be an unsafe method which deleted the wrapped data.
319 	/// Now that Data is refcounted, this simply calls clear() and
320 	/// additionally asserts that this Data is the only Data holding
321 	/// a reference to the wrapper.
322 	void deleteContents()
323 	out
324 	{
325 		assert(wrapper is null);
326 	}
327 	body
328 	{
329 		if (wrapper)
330 		{
331 			assert(wrapper.references == 1, "Attempting to call deleteContents with ");
332 			clear();
333 		}
334 	}
335 
336 	void clear()
337 	{
338 		if (wrapper)
339 		{
340 			assert(wrapper.references > 0, "Dangling pointer to wrapper");
341 			wrapper.references--;
342 			debug (DATA_REFCOUNT) debugLog("%p -> %p: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references);
343 			if (wrapper.references == 0)
344 				wrapper.destroy();
345 
346 			wrapper = null;
347 		}
348 
349 		contents = null;
350 	}
351 
352 	Data concat(const(void)[] data)
353 	{
354 		if (data.length==0)
355 			return this;
356 		Data result = Data(length + data.length);
357 		result.mcontents[0..this.length] = contents[];
358 		result.mcontents[this.length..$] = data[];
359 		return result;
360 	}
361 
362 	Data opCat(T)(const(T)[] data)
363 		if (!hasIndirections!T)
364 	{
365 		return concat(data);
366 	}
367 
368 	Data opCat()(Data data)
369 	{
370 		return concat(data.contents);
371 	}
372 
373 	Data prepend(const(void)[] data)
374 	{
375 		Data result = Data(data.length + length);
376 		result.mcontents[0..data.length] = data[];
377 		result.mcontents[data.length..$] = contents[];
378 		return result;
379 	}
380 
381 	Data opCat_r(T)(const(T)[] data)
382 		if (!hasIndirections!T)
383 	{
384 		return prepend(data);
385 	}
386 
387 	private static size_t getPreallocSize(size_t length)
388 	{
389 		if (length < MAX_PREALLOC)
390 			return nextPowerOfTwo(length);
391 		else
392 			return ((length-1) | (MAX_PREALLOC-1)) + 1;
393 	}
394 
395 	Data append(const(void)[] data)
396 	{
397 		if (data.length==0)
398 			return this;
399 		size_t oldLength = length;
400 		size_t newLength = length + data.length;
401 		expand(newLength, getPreallocSize(newLength));
402 		auto newContents = cast(void[])_contents[oldLength..$];
403 		newContents[] = (cast(void[])data)[];
404 		return this;
405 	}
406 
407 	/// Note that unlike opCat (a ~ b), opCatAssign (a ~= b) will preallocate.
408 	Data opCatAssign(T)(const(T)[] data)
409 		if (!hasIndirections!T)
410 	{
411 		return append(data);
412 	}
413 
414 	Data opCatAssign()(Data data)
415 	{
416 		return append(data.contents);
417 	}
418 
419 	Data opCatAssign()(ubyte value) // hack?
420 	{
421 		return append((&value)[0..1]);
422 	}
423 
424 	Data opSlice()
425 	{
426 		return this;
427 	}
428 
429 	Data opSlice(size_t x, size_t y)
430 	in
431 	{
432 		assert(x <= y);
433 		assert(y <= length);
434 	}
435 // https://issues.dlang.org/show_bug.cgi?id=13463
436 //	out(result)
437 //	{
438 //		assert(result.length == y-x);
439 //	}
440 	body
441 	{
442 		if (x == y)
443 			return Data();
444 		else
445 		{
446 			Data result = this;
447 			result.contents = result.contents[x..y];
448 			return result;
449 		}
450 	}
451 
452 	/// Return a new Data for the first size bytes, and slice this instance from size to end.
453 	Data popFront(size_t size)
454 	in
455 	{
456 		assert(size <= length);
457 	}
458 	body
459 	{
460 		Data result = this;
461 		result.contents = contents[0..size];
462 		this  .contents = contents[size..$];
463 		return result;
464 	}
465 }
466 
467 unittest
468 {
469 	Data d = Data("aaaaa");
470 	assert(d.wrapper.references == 1);
471 	Data s = d[1..4];
472 	assert(d.wrapper.references == 2);
473 }
474 
475 // ************************************************************************
476 
477 static /*thread-local*/ size_t dataMemory, dataMemoryPeak;
478 static /*thread-local*/ uint   dataCount, allocCount;
479 
480 // Abstract wrapper.
481 abstract class DataWrapper
482 {
483 	sizediff_t references = 1;
484 	abstract @property inout(void)[] contents() inout;
485 	abstract @property size_t size() const;
486 	abstract void setSize(size_t newSize);
487 	abstract @property size_t capacity() const;
488 
489 	deprecated new(size_t sz) { return unmanagedAlloc(sz); }
490 
491 	debug ~this()
492 	{
493 		debug(DATA_REFCOUNT) debugLog("%.*s.~this, references==%d", this.classinfo.name.length, this.classinfo.name.ptr, references);
494 		assert(references == 0, "Deleting DataWrapper with non-zero reference count");
495 	}
496 
497 	@nogc deprecated delete(void* p)
498 	{
499 		unmanagedFree(p);
500 	}
501 }
502 
503 void setGCThreshold(size_t value) { MemoryDataWrapper.collectThreshold = value; }
504 
505 C unmanagedNew(C, Args...)(auto ref Args args)
506 if (is(C == class))
507 {
508 	import std.conv : emplace;
509 	enum size = __traits(classInstanceSize, C);
510 	auto p = unmanagedAlloc(size);
511 	emplace!C(p[0..size], args);
512 	return cast(C)p;
513 }
514 
515 void unmanagedDelete(C)(C c)
516 if (is(C == class))
517 {
518 	c.__xdtor();
519 	unmanagedFree(p);
520 }
521 
522 private:
523 
524 void* unmanagedAlloc(size_t sz)
525 {
526 	auto p = core.stdc.stdlib.malloc(sz);
527 
528 	debug(DATA_REFCOUNT) debugLog("? -> %p: Allocating via malloc", p);
529 
530 	if (!p)
531 		throw new OutOfMemoryError();
532 
533 	//GC.addRange(p, sz);
534 	return p;
535 }
536 
537 void unmanagedFree(void* p) @nogc
538 {
539 	if (p)
540 	{
541 		debug(DATA_REFCOUNT) debugLog("? -> %p: Deleting via free", p);
542 
543 		//GC.removeRange(p);
544 		core.stdc.stdlib.free(p);
545 	}
546 }
547 
548 version (Windows)
549 	import core.sys.windows.windows;
550 else
551 {
552 	import core.sys.posix.unistd;
553 	import core.sys.posix.sys.mman;
554 }
555 
556 /// Wrapper for data in RAM, allocated from the OS.
557 final class MemoryDataWrapper : DataWrapper
558 {
559 	/// Pointer to actual data.
560 	void* data;
561 	/// Used size. Needed for safe appends.
562 	size_t _size;
563 	/// Allocated capacity.
564 	size_t _capacity;
565 
566 	/// Threshold of allocated memory to trigger a collect.
567 	__gshared size_t collectThreshold = 8*1024*1024; // 8MB
568 	/// Counter towards the threshold.
569 	static /*thread-local*/ size_t allocatedThreshold;
570 
571 	/// Create a new instance with given capacity.
572 	this(size_t size, size_t capacity)
573 	{
574 		data = malloc(/*ref*/ capacity);
575 		if (data is null)
576 		{
577 			debug(DATA) printf("Garbage collect triggered by failed Data allocation of %llu bytes... ", cast(ulong)capacity);
578 			GC.collect();
579 			debug(DATA) printf("Done\n");
580 			data = malloc(/*ref*/ capacity);
581 			allocatedThreshold = 0;
582 		}
583 		if (data is null)
584 			onOutOfMemoryError();
585 
586 		dataMemory += capacity;
587 		if (dataMemoryPeak < dataMemory)
588 			dataMemoryPeak = dataMemory;
589 		dataCount ++;
590 		allocCount ++;
591 
592 		this._size = size;
593 		this._capacity = capacity;
594 
595 		// also collect
596 		allocatedThreshold += capacity;
597 		if (allocatedThreshold > collectThreshold)
598 		{
599 			debug(DATA) printf("Garbage collect triggered by total allocated Data exceeding threshold... ");
600 			GC.collect();
601 			debug(DATA) printf("Done\n");
602 			allocatedThreshold = 0;
603 		}
604 	}
605 
606 	/// Destructor - destroys the wrapped data.
607 	~this()
608 	{
609 		free(data, capacity);
610 		data = null;
611 		// If DataWrapper is created and manually deleted, there is no need to cause frequent collections
612 		if (allocatedThreshold > capacity)
613 			allocatedThreshold -= capacity;
614 		else
615 			allocatedThreshold = 0;
616 
617 		dataMemory -= capacity;
618 		dataCount --;
619 	}
620 
621 	@property override
622 	size_t size() const { return _size; }
623 
624 	@property override
625 	size_t capacity() const { return _capacity; }
626 
627 	override void setSize(size_t newSize)
628 	{
629 		assert(newSize <= capacity);
630 		_size = newSize;
631 	}
632 
633 	@property override
634 	inout(void)[] contents() inout
635 	{
636 		return data[0..size];
637 	}
638 
639 	// https://github.com/D-Programming-Language/druntime/pull/759
640 	version(OSX)
641 		enum _SC_PAGE_SIZE = 29;
642 
643 	// https://github.com/D-Programming-Language/druntime/pull/1140
644 	version(FreeBSD)
645 		enum _SC_PAGE_SIZE = 47;
646 
647 	version(Windows)
648 	{
649 		static immutable size_t pageSize;
650 
651 		shared static this()
652 		{
653 			SYSTEM_INFO si;
654 			GetSystemInfo(&si);
655 			pageSize = si.dwPageSize;
656 		}
657 	}
658 	else
659 	static if (is(typeof(_SC_PAGE_SIZE)))
660 	{
661 		static immutable size_t pageSize;
662 
663 		shared static this()
664 		{
665 			pageSize = sysconf(_SC_PAGE_SIZE);
666 		}
667 	}
668 
669 	static void* malloc(ref size_t size)
670 	{
671 		if (is(typeof(pageSize)))
672 			size = ((size-1) | (pageSize-1))+1;
673 
674 		version(Windows)
675 		{
676 			return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE);
677 		}
678 		else
679 		version(Posix)
680 		{
681 			version(linux)
682 				import core.sys.linux.sys.mman : MAP_ANON;
683 			auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
684 			return (p == MAP_FAILED) ? null : p;
685 		}
686 		else
687 			return core.stdc.malloc(size);
688 	}
689 
690 	static void free(void* p, size_t size)
691 	{
692 		debug
693 		{
694 			(cast(ubyte*)p)[0..size] = 0xDB;
695 		}
696 		version(Windows)
697 			VirtualFree(p, 0, MEM_RELEASE);
698 		else
699 		version(Posix)
700 			munmap(p, size);
701 		else
702 			core.stdc.free(size);
703 	}
704 }
705 
706 // Source: Win32 bindings project
707 version(Windows)
708 {
709 	struct SYSTEM_INFO {
710 		union {
711 			DWORD dwOemId;
712 			struct {
713 				WORD wProcessorArchitecture;
714 				WORD wReserved;
715 			}
716 		}
717 		DWORD dwPageSize;
718 		PVOID lpMinimumApplicationAddress;
719 		PVOID lpMaximumApplicationAddress;
720 		DWORD dwActiveProcessorMask;
721 		DWORD dwNumberOfProcessors;
722 		DWORD dwProcessorType;
723 		DWORD dwAllocationGranularity;
724 		WORD  wProcessorLevel;
725 		WORD  wProcessorRevision;
726 	}
727 	alias SYSTEM_INFO* LPSYSTEM_INFO;
728 
729 	extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO);
730 }
731 
732 debug(DATA_REFCOUNT) import ae.utils.exception, ae.sys.memory, std.stdio;
733 
734 debug(DATA_REFCOUNT) void debugLog(Args...)(const char* s, Args args) @nogc
735 {
736 	printf(s, args);
737 	printf("\n");
738 	if (inCollect())
739 		printf("\t(in GC collect)\n");
740 	else
741 		(cast(void function() @nogc)&debugStackTrace)();
742 	fflush(core.stdc.stdio.stdout);
743 }
744 
745 debug(DATA_REFCOUNT) void debugStackTrace()
746 {
747 	try
748 		foreach (line; getStackTrace())
749 			writeln("\t", line);
750 	catch (Throwable e)
751 		writeln("\t(stacktrace error: ", e.msg, ")");
752 }