1 /**
2  * Wrappers for raw _data located in unmanaged memory.
3  *
4  * Using the Data type will only place a small object in managed memory,
5  * keeping the actual _data in unmanaged memory.
6  * A proxy class (DataWrapper) is used to safely allow multiple references to
7  * the same block of unmanaged memory.
8  * When the DataWrapper object is destroyed (either manually or by the garbage
9  * collector when there are no remaining Data references), the unmanaged
10  * memory is deallocated.
11  *
12  * This has the following advantage over using managed memory:
13  * $(UL
14  *  $(LI Faster allocation and deallocation, since memory is requested from
15  *       the OS directly as whole pages)
16  *  $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to
17  *       stray pointers)
18  *  $(LI Overall improved GC performance due to reduced size of managed heap)
19  *  $(LI Memory is immediately returned to the OS when _data is deallocated)
20  * )
21  * On the other hand, using Data has the following disadvantages:
22  * $(UL
23  *  $(LI This module is designed to store raw _data which does not have any
24  *       pointers. Storing objects containing pointers to managed memory is
25  *       unsupported, and may result in memory corruption.)
26  *  $(LI Small objects may be stored inefficiently, as the module requests
27  *       entire pages of memory from the OS. Considering allocating one large
28  *       object and use slices (Data instances) for individual objects.)
29  *  $(LI Incorrect usage (i.e. retaining/escaping references to wrapped memory
30  *       without keeping a reference to its corresponding DataWrapper) can
31  *       result in dangling pointers and hard-to-debug memory corruption.)
32  * )
33  *
34  * License:
35  *   This Source Code Form is subject to the terms of
36  *   the Mozilla Public License, v. 2.0. If a copy of
37  *   the MPL was not distributed with this file, You
38  *   can obtain one at http://mozilla.org/MPL/2.0/.
39  *
40  * Authors:
41  *   Vladimir Panteleev <vladimir@thecybershadow.net>
42  */
43 
44 module ae.sys.data;
45 
46 static import core.stdc.stdlib;
47 import core.stdc..string : memmove;
48 import std.traits;
49 import core.memory;
50 import core.exception;
51 debug import std.stdio;
52 debug import std..string;
53 public import ae.sys.dataset;
54 import ae.utils.math;
55 
56 debug(DATA) import core.stdc.stdio;
57 
58 // ideas/todo:
59 // * templatize (and forbid using aliased types)?
60 // * use heap (malloc/Windows heap API) for small objects?
61 // * reference counting?
62 // * "immutable" support?
63 
64 /**
65  * Wrapper for data located in external memory, to prevent faux references.
66  * Represents a slice of data, which may or may not be in unmanaged memory.
67  * Data in unmanaged memory is bound to a DataWrapper class instance.
68  *
69  * All operations on this class should be safe, except for accessing contents directly.
70  * All operations on contents must be accompanied by a live reference to the Data object,
71  * to keep a GC anchor towards the unmanaged data.
72  *
73  * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead.
74  *
75  * Be sure not to lose Data references while using their contents!
76  * For example, avoid code like this:
77  * ----
78  * fun(cast(string)transformSomeData(someData).contents);
79  * ----
80  * The Data return value may be unreachable once .contents is evaluated.
81  * Use .toHeap instead of .contents in such cases to get a safe heap copy.
82  */
83 struct Data
84 {
85 private:
86 	/// Wrapped data
87 	const(void)[] _contents;
88 	/// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory.
89 	/// Used as a GC anchor to unmanaged data, and for in-place expands (for appends).
90 	DataWrapper wrapper;
91 	/// Indicates whether we're allowed to modify the data contents.
92 	bool mutable;
93 
94 	/// Maximum preallocation for append operations.
95 	enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2
96 
97 public:
98 	/**
99 	 * Create new instance wrapping the given data.
100 	 * Params:
101 	 *   data = initial data
102 	 *   forceReallocation = when false, the contents will be duplicated to
103 	 *     unmanaged memory only when it's not on the managed heap; when true,
104 	 *     the contents will be reallocated always.
105 	 */
106 	this(const(void)[] data, bool forceReallocation = false)
107 	{
108 		if (data.length == 0)
109 			contents = null;
110 		else
111 		if (forceReallocation || GC.addrOf(data.ptr) is null)
112 		{
113 			// copy to unmanaged memory
114 			auto wrapper = unmanagedNew!MemoryDataWrapper(data.length, data.length);
115 			this.wrapper = wrapper;
116 			wrapper.contents[] = data[];
117 			contents = wrapper.contents;
118 			mutable = true;
119 		}
120 		else
121 		{
122 			// just save a reference
123 			contents = data;
124 			mutable = false;
125 		}
126 
127 		assert(this.length == data.length);
128 	}
129 
130 	/// ditto
131 	this(void[] data, bool forceReallocation = false)
132 	{
133 		const(void)[] cdata = data;
134 		this(cdata, forceReallocation);
135 		mutable = true;
136 	}
137 
138 	/// Create a new instance with given size/capacity. Capacity defaults to size.
139 	this(size_t size, size_t capacity = 0)
140 	in
141 	{
142 		assert(capacity == 0 || size <= capacity);
143 	}
144 	do
145 	{
146 		if (!capacity)
147 			capacity = size;
148 
149 		if (capacity)
150 		{
151 			auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity);
152 			this.wrapper = wrapper;
153 			contents = wrapper.contents;
154 			mutable = true;
155 		}
156 		else
157 		{
158 			wrapper = null;
159 			contents = null;
160 		}
161 
162 		assert(this.length == size);
163 	}
164 
165 	this(DataWrapper wrapper, bool mutable)
166 	{
167 		this.wrapper = wrapper;
168 		this.mutable = mutable;
169 		this.contents = wrapper.contents;
170 	}
171 
172 	this(this)
173 	{
174 		if (wrapper)
175 		{
176 			wrapper.references++;
177 			debug (DATA_REFCOUNT) debugLog("%p -> %p: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references);
178 		}
179 		else
180 			debug (DATA_REFCOUNT) debugLog("%p -> %p: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper);
181 	}
182 
183 	~this() pure
184 	{
185 		//clear();
186 		// https://issues.dlang.org/show_bug.cgi?id=13809
187 		(cast(void delegate() pure)&clear)();
188 	}
189 
190 	debug(DATA) invariant
191 	{
192 		if (wrapper)
193 			assert(wrapper.references > 0, "Data referencing DataWrapper with bad reference count");
194 	}
195 
196 /*
197 	/// Create new instance as a slice over an existing DataWrapper.
198 	private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max)
199 	{
200 		this.wrapper = wrapper;
201 		this.start = start;
202 		this.end = end==size_t.max ? wrapper.capacity : end;
203 	}
204 */
205 
206 	@property const(void)[] contents() const
207 	{
208 		return _contents;
209 	}
210 
211 	@property private const(void)[] contents(const(void)[] data)
212 	{
213 		return _contents = data;
214 	}
215 
216 	/// Get mutable contents
217 	@property void[] mcontents()
218 	{
219 		if (!mutable && length)
220 		{
221 			reallocate(length, length);
222 			assert(mutable);
223 		}
224 		return cast(void[])_contents;
225 	}
226 
227 	@property const(void)* ptr() const
228 	{
229 		return contents.ptr;
230 	}
231 
232 	@property void* mptr()
233 	{
234 		return mcontents.ptr;
235 	}
236 
237 	@property size_t length() const
238 	{
239 		return contents.length;
240 	}
241 
242 	@property bool empty() const
243 	{
244 		return contents is null;
245 	}
246 
247 	bool opCast(T)()
248 		if (is(T == bool))
249 	{
250 		return !empty;
251 	}
252 
253 	@property size_t capacity() const
254 	{
255 		if (wrapper is null)
256 			return length;
257 		// We can only safely expand if the memory slice is at the end of the used unmanaged memory block.
258 		auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data
259 		auto end = pos + length;               // end   position in wrapper data
260 		assert(end <= wrapper.size);
261 		if (end == wrapper.size && end < wrapper.capacity)
262 			return wrapper.capacity - pos;
263 		else
264 			return length;
265 	}
266 
267 	/// Put a copy of the data on D's managed heap, and return it.
268 	@property
269 	void[] toHeap() const
270 	{
271 		return _contents.dup;
272 	}
273 
274 	private void reallocate(size_t size, size_t capacity)
275 	{
276 		auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity);
277 		wrapper.contents[0..this.length] = contents[];
278 		//(cast(ubyte[])newWrapper.contents)[this.length..value] = 0;
279 
280 		clear();
281 		this.wrapper = wrapper;
282 		this.contents = wrapper.contents;
283 		mutable = true;
284 	}
285 
286 	private void expand(size_t newSize, size_t newCapacity)
287 	in
288 	{
289 		assert(length < newSize);
290 		assert(newSize <= newCapacity);
291 	}
292 	out
293 	{
294 		assert(length == newSize);
295 	}
296 	do
297 	{
298 		if (newCapacity <= capacity)
299 		{
300 			auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data
301 			wrapper.setSize(pos + newSize);
302 			contents = ptr[0..newSize];
303 		}
304 		else
305 			reallocate(newSize, newCapacity);
306 	}
307 
308 	@property void length(size_t value)
309 	{
310 		if (value == length) // no change
311 			return;
312 		if (value < length)  // shorten
313 			_contents = _contents[0..value];
314 		else                 // lengthen
315 			expand(value, value);
316 	}
317 	alias length opDollar;
318 
319 	@property Data dup() const
320 	{
321 		return Data(contents, true);
322 	}
323 
324 	/// This used to be an unsafe method which deleted the wrapped data.
325 	/// Now that Data is refcounted, this simply calls clear() and
326 	/// additionally asserts that this Data is the only Data holding
327 	/// a reference to the wrapper.
328 	void deleteContents()
329 	out
330 	{
331 		assert(wrapper is null);
332 	}
333 	do
334 	{
335 		if (wrapper)
336 		{
337 			assert(wrapper.references == 1, "Attempting to call deleteContents with ");
338 			clear();
339 		}
340 	}
341 
342 	void clear()
343 	{
344 		if (wrapper)
345 		{
346 			assert(wrapper.references > 0, "Dangling pointer to wrapper");
347 			wrapper.references--;
348 			debug (DATA_REFCOUNT) debugLog("%p -> %p: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references);
349 			if (wrapper.references == 0)
350 				wrapper.destroy();
351 
352 			wrapper = null;
353 		}
354 
355 		contents = null;
356 	}
357 
358 	Data concat(const(void)[] data)
359 	{
360 		if (data.length==0)
361 			return this;
362 		Data result = Data(length + data.length);
363 		result.mcontents[0..this.length] = contents[];
364 		result.mcontents[this.length..$] = data[];
365 		return result;
366 	}
367 
368 	template opBinary(string op) if (op == "~")
369 	{
370 		Data opBinary(T)(const(T)[] data)
371 		if (!hasIndirections!T)
372 		{
373 			return concat(data);
374 		}
375 
376 		Data opBinary()(Data data)
377 		{
378 			return concat(data.contents);
379 		}
380 	}
381 
382 	Data prepend(const(void)[] data)
383 	{
384 		Data result = Data(data.length + length);
385 		result.mcontents[0..data.length] = data[];
386 		result.mcontents[data.length..$] = contents[];
387 		return result;
388 	}
389 
390 	template opBinaryRight(string op) if (op == "~")
391 	{
392 		Data opBinaryRight(T)(const(T)[] data)
393 		if (!hasIndirections!T)
394 		{
395 			return prepend(data);
396 		}
397 	}
398 
399 	private static size_t getPreallocSize(size_t length)
400 	{
401 		if (length < MAX_PREALLOC)
402 			return nextPowerOfTwo(length);
403 		else
404 			return ((length-1) | (MAX_PREALLOC-1)) + 1;
405 	}
406 
407 	Data append(const(void)[] data)
408 	{
409 		if (data.length==0)
410 			return this;
411 		size_t oldLength = length;
412 		size_t newLength = length + data.length;
413 		expand(newLength, getPreallocSize(newLength));
414 		auto newContents = cast(void[])_contents[oldLength..$];
415 		newContents[] = (cast(void[])data)[];
416 		return this;
417 	}
418 
419 	/// Note that unlike concatenation (a ~ b), appending (a ~= b) will preallocate.
420 	template opOpAssign(string op) if (op == "~")
421 	{
422 		Data opOpAssign(T)(const(T)[] data)
423 		if (!hasIndirections!T)
424 		{
425 			return append(data);
426 		}
427 
428 		Data opOpAssign()(Data data)
429 		{
430 			return append(data.contents);
431 		}
432 
433 		Data opOpAssign()(ubyte value) // hack?
434 		{
435 			return append((&value)[0..1]);
436 		}
437 	}
438 
439 	Data opSlice()
440 	{
441 		return this;
442 	}
443 
444 	Data opSlice(size_t x, size_t y)
445 	in
446 	{
447 		assert(x <= y);
448 		assert(y <= length);
449 	}
450 // https://issues.dlang.org/show_bug.cgi?id=13463
451 //	out(result)
452 //	{
453 //		assert(result.length == y-x);
454 //	}
455 	do
456 	{
457 		if (x == y)
458 			return Data();
459 		else
460 		{
461 			Data result = this;
462 			result.contents = result.contents[x..y];
463 			return result;
464 		}
465 	}
466 
467 	/// Return a new Data for the first size bytes, and slice this instance from size to end.
468 	Data popFront(size_t size)
469 	in
470 	{
471 		assert(size <= length);
472 	}
473 	do
474 	{
475 		Data result = this;
476 		result.contents = contents[0..size];
477 		this  .contents = contents[size..$];
478 		return result;
479 	}
480 }
481 
482 unittest
483 {
484 	Data d = Data("aaaaa");
485 	assert(d.wrapper.references == 1);
486 	Data s = d[1..4];
487 	assert(d.wrapper.references == 2);
488 }
489 
490 // ************************************************************************
491 
492 static /*thread-local*/ size_t dataMemory, dataMemoryPeak;
493 static /*thread-local*/ uint   dataCount, allocCount;
494 
495 // Abstract wrapper.
496 abstract class DataWrapper
497 {
498 	sizediff_t references = 1;
499 	abstract @property inout(void)[] contents() inout;
500 	abstract @property size_t size() const;
501 	abstract void setSize(size_t newSize);
502 	abstract @property size_t capacity() const;
503 
504 	debug ~this()
505 	{
506 		debug(DATA_REFCOUNT) debugLog("%.*s.~this, references==%d", this.classinfo.name.length, this.classinfo.name.ptr, references);
507 		assert(references == 0, "Deleting DataWrapper with non-zero reference count");
508 	}
509 }
510 
511 void setGCThreshold(size_t value) { MemoryDataWrapper.collectThreshold = value; }
512 
513 C unmanagedNew(C, Args...)(auto ref Args args)
514 if (is(C == class))
515 {
516 	import std.conv : emplace;
517 	enum size = __traits(classInstanceSize, C);
518 	auto p = unmanagedAlloc(size);
519 	emplace!C(p[0..size], args);
520 	return cast(C)p;
521 }
522 
523 void unmanagedDelete(C)(C c)
524 if (is(C == class))
525 {
526 	c.__xdtor();
527 	unmanagedFree(p);
528 }
529 
530 private:
531 
532 void* unmanagedAlloc(size_t sz)
533 {
534 	auto p = core.stdc.stdlib.malloc(sz);
535 
536 	debug(DATA_REFCOUNT) debugLog("? -> %p: Allocating via malloc (%d bytes)", p, cast(uint)sz);
537 
538 	if (!p)
539 		throw new OutOfMemoryError();
540 
541 	//GC.addRange(p, sz);
542 	return p;
543 }
544 
545 void unmanagedFree(void* p) @nogc
546 {
547 	if (p)
548 	{
549 		debug(DATA_REFCOUNT) debugLog("? -> %p: Deleting via free", p);
550 
551 		//GC.removeRange(p);
552 		core.stdc.stdlib.free(p);
553 	}
554 }
555 
556 version (Windows)
557 	import core.sys.windows.windows;
558 else
559 {
560 	import core.sys.posix.unistd;
561 	import core.sys.posix.sys.mman;
562 }
563 
564 /// Wrapper for data in RAM, allocated from the OS.
565 final class MemoryDataWrapper : DataWrapper
566 {
567 	/// Pointer to actual data.
568 	void* data;
569 	/// Used size. Needed for safe appends.
570 	size_t _size;
571 	/// Allocated capacity.
572 	size_t _capacity;
573 
574 	/// Threshold of allocated memory to trigger a collect.
575 	__gshared size_t collectThreshold = 8*1024*1024; // 8MB
576 	/// Counter towards the threshold.
577 	static /*thread-local*/ size_t allocatedThreshold;
578 
579 	/// Create a new instance with given capacity.
580 	this(size_t size, size_t capacity)
581 	{
582 		data = malloc(/*ref*/ capacity);
583 		if (data is null)
584 		{
585 			debug(DATA) printf("Garbage collect triggered by failed Data allocation of %llu bytes... ", cast(ulong)capacity);
586 			GC.collect();
587 			debug(DATA) printf("Done\n");
588 			data = malloc(/*ref*/ capacity);
589 			allocatedThreshold = 0;
590 		}
591 		if (data is null)
592 			onOutOfMemoryError();
593 
594 		dataMemory += capacity;
595 		if (dataMemoryPeak < dataMemory)
596 			dataMemoryPeak = dataMemory;
597 		dataCount ++;
598 		allocCount ++;
599 
600 		this._size = size;
601 		this._capacity = capacity;
602 
603 		// also collect
604 		allocatedThreshold += capacity;
605 		if (allocatedThreshold > collectThreshold)
606 		{
607 			debug(DATA) printf("Garbage collect triggered by total allocated Data exceeding threshold... ");
608 			GC.collect();
609 			debug(DATA) printf("Done\n");
610 			allocatedThreshold = 0;
611 		}
612 	}
613 
614 	/// Destructor - destroys the wrapped data.
615 	~this()
616 	{
617 		free(data, capacity);
618 		data = null;
619 		// If DataWrapper is created and manually deleted, there is no need to cause frequent collections
620 		if (allocatedThreshold > capacity)
621 			allocatedThreshold -= capacity;
622 		else
623 			allocatedThreshold = 0;
624 
625 		dataMemory -= capacity;
626 		dataCount --;
627 	}
628 
629 	@property override
630 	size_t size() const { return _size; }
631 
632 	@property override
633 	size_t capacity() const { return _capacity; }
634 
635 	override void setSize(size_t newSize)
636 	{
637 		assert(newSize <= capacity);
638 		_size = newSize;
639 	}
640 
641 	@property override
642 	inout(void)[] contents() inout
643 	{
644 		return data[0..size];
645 	}
646 
647 	// https://github.com/D-Programming-Language/druntime/pull/759
648 	version(OSX)
649 		enum _SC_PAGE_SIZE = 29;
650 
651 	// https://github.com/D-Programming-Language/druntime/pull/1140
652 	version(FreeBSD)
653 		enum _SC_PAGE_SIZE = 47;
654 
655 	version(Windows)
656 	{
657 		static immutable size_t pageSize;
658 
659 		shared static this()
660 		{
661 			SYSTEM_INFO si;
662 			GetSystemInfo(&si);
663 			pageSize = si.dwPageSize;
664 		}
665 	}
666 	else
667 	static if (is(typeof(_SC_PAGE_SIZE)))
668 	{
669 		static immutable size_t pageSize;
670 
671 		shared static this()
672 		{
673 			pageSize = sysconf(_SC_PAGE_SIZE);
674 		}
675 	}
676 
677 	static void* malloc(ref size_t size)
678 	{
679 		if (is(typeof(pageSize)))
680 			size = ((size-1) | (pageSize-1))+1;
681 
682 		version(Windows)
683 		{
684 			return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE);
685 		}
686 		else
687 		version(Posix)
688 		{
689 			version(linux)
690 				import core.sys.linux.sys.mman : MAP_ANON;
691 			auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
692 			return (p == MAP_FAILED) ? null : p;
693 		}
694 		else
695 			return core.stdc.malloc(size);
696 	}
697 
698 	static void free(void* p, size_t size)
699 	{
700 		debug
701 		{
702 			(cast(ubyte*)p)[0..size] = 0xDB;
703 		}
704 		version(Windows)
705 			VirtualFree(p, 0, MEM_RELEASE);
706 		else
707 		version(Posix)
708 			munmap(p, size);
709 		else
710 			core.stdc.free(size);
711 	}
712 }
713 
714 // Source: Win32 bindings project
715 version(Windows)
716 {
717 	struct SYSTEM_INFO {
718 		union {
719 			DWORD dwOemId;
720 			struct {
721 				WORD wProcessorArchitecture;
722 				WORD wReserved;
723 			}
724 		}
725 		DWORD dwPageSize;
726 		PVOID lpMinimumApplicationAddress;
727 		PVOID lpMaximumApplicationAddress;
728 		DWORD dwActiveProcessorMask;
729 		DWORD dwNumberOfProcessors;
730 		DWORD dwProcessorType;
731 		DWORD dwAllocationGranularity;
732 		WORD  wProcessorLevel;
733 		WORD  wProcessorRevision;
734 	}
735 	alias SYSTEM_INFO* LPSYSTEM_INFO;
736 
737 	extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO);
738 }
739 
740 debug(DATA_REFCOUNT) import ae.utils.exception, ae.sys.memory, std.stdio;
741 
742 debug(DATA_REFCOUNT) void debugLog(Args...)(const char* s, Args args) @nogc
743 {
744 	printf(s, args);
745 	printf("\n");
746 	if (inCollect())
747 		printf("\t(in GC collect)\n");
748 	else
749 		(cast(void function() @nogc)&debugStackTrace)();
750 	fflush(core.stdc.stdio.stdout);
751 }
752 
753 debug(DATA_REFCOUNT) void debugStackTrace()
754 {
755 	try
756 		foreach (line; getStackTrace())
757 			writeln("\t", line);
758 	catch (Throwable e)
759 		writeln("\t(stacktrace error: ", e.msg, ")");
760 }