1 /**
2  * Wrappers for raw _data located in unmanaged memory.
3  *
4  * Using the Data type will only place a small object in managed memory,
5  * keeping the actual _data in unmanaged memory.
6  * A proxy class (DataWrapper) is used to safely allow multiple references to
7  * the same block of unmanaged memory.
8  * When the DataWrapper object is destroyed (either manually or by the garbage
9  * collector when there are no remaining Data references), the unmanaged
10  * memory is deallocated.
11  *
12  * This has the following advantage over using managed memory:
13  * $(UL
14  *  $(LI Faster allocation and deallocation, since memory is requested from
15  *       the OS directly as whole pages)
16  *  $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to
17  *       stray pointers)
18  *  $(LI Overall improved GC performance due to reduced size of managed heap)
19  *  $(LI Memory is immediately returned to the OS when _data is deallocated)
20  * )
21  * On the other hand, using Data has the following disadvantages:
22  * $(UL
23  *  $(LI This module is designed to store raw _data which does not have any
24  *       pointers. Storing objects containing pointers to managed memory is
25  *       unsupported, and may result in memory corruption.)
26  *  $(LI Small objects may be stored inefficiently, as the module requests
27  *       entire pages of memory from the OS. Considering allocating one large
28  *       object and use slices (Data instances) for individual objects.)
29  *  $(LI Incorrect usage (i.e. retaining/escaping references to wrapped memory
30  *       without keeping a reference to its corresponding DataWrapper) can
31  *       result in dangling pointers and hard-to-debug memory corruption.)
32  * )
33  *
34  * License:
35  *   This Source Code Form is subject to the terms of
36  *   the Mozilla Public License, v. 2.0. If a copy of
37  *   the MPL was not distributed with this file, You
38  *   can obtain one at http://mozilla.org/MPL/2.0/.
39  *
40  * Authors:
41  *   Vladimir Panteleev <vladimir@thecybershadow.net>
42  */
43 
44 module ae.sys.data;
45 
46 static import core.stdc.stdlib;
47 import core.stdc.string : memmove;
48 import std.traits;
49 import core.memory;
50 import core.exception;
51 debug import std.string;
52 public import ae.sys.dataset;
53 import ae.utils.math;
54 
55 debug(DATA) import core.stdc.stdio;
56 
57 // ideas/todo:
58 // * templatize (and forbid using aliased types)?
59 // * use heap (malloc/Windows heap API) for small objects?
60 // * reference counting?
61 // * "immutable" support?
62 
63 /**
64  * Wrapper for data located in external memory, to prevent faux references.
65  * Represents a slice of data, which may or may not be in unmanaged memory.
66  * Data in unmanaged memory is bound to a DataWrapper class instance.
67  *
68  * All operations on this class should be safe, except for accessing contents directly.
69  * All operations on contents must be accompanied by a live reference to the Data object,
70  * to keep a GC anchor towards the unmanaged data.
71  *
72  * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead.
73  *
74  * Be sure not to lose Data references while using their contents!
75  * For example, avoid code like this:
76  * ----
77  * fun(cast(string)transformSomeData(someData).contents);
78  * ----
79  * The Data return value may be unreachable once .contents is evaluated.
80  * Use .toHeap instead of .contents in such cases to get a safe heap copy.
81  */
82 struct Data
83 {
84 private:
85 	/// Wrapped data
86 	const(void)[] _contents;
87 	/// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory.
88 	/// Used as a GC anchor to unmanaged data, and for in-place expands (for appends).
89 	DataWrapper wrapper;
90 	/// Indicates whether we're allowed to modify the data contents.
91 	bool mutable;
92 
93 	/// Maximum preallocation for append operations.
94 	enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2
95 
96 public:
97 	/**
98 	 * Create new instance wrapping the given data.
99 	 * Params:
100 	 *   data = initial data
101 	 *   forceReallocation = when false, the contents will be duplicated to
102 	 *     unmanaged memory only when it's not on the managed heap; when true,
103 	 *     the contents will be reallocated always.
104 	 */
105 	this(const(void)[] data, bool forceReallocation = false)
106 	{
107 		if (data.length == 0)
108 			contents = null;
109 		else
110 		if (forceReallocation || GC.addrOf(data.ptr) is null)
111 		{
112 			// copy to unmanaged memory
113 			auto wrapper = unmanagedNew!MemoryDataWrapper(data.length, data.length);
114 			this.wrapper = wrapper;
115 			wrapper.contents[] = data[];
116 			contents = wrapper.contents;
117 			mutable = true;
118 		}
119 		else
120 		{
121 			// just save a reference
122 			contents = data;
123 			mutable = false;
124 		}
125 
126 		assert(this.length == data.length);
127 	}
128 
129 	/// ditto
130 	this(void[] data, bool forceReallocation = false)
131 	{
132 		const(void)[] cdata = data;
133 		this(cdata, forceReallocation);
134 		mutable = true;
135 	}
136 
137 	/// Create a new instance with given size/capacity. Capacity defaults to size.
138 	this(size_t size, size_t capacity = 0)
139 	in
140 	{
141 		assert(capacity == 0 || size <= capacity);
142 	}
143 	do
144 	{
145 		if (!capacity)
146 			capacity = size;
147 
148 		if (capacity)
149 		{
150 			auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity);
151 			this.wrapper = wrapper;
152 			contents = wrapper.contents;
153 			mutable = true;
154 		}
155 		else
156 		{
157 			wrapper = null;
158 			contents = null;
159 		}
160 
161 		assert(this.length == size);
162 	}
163 
164 	this(DataWrapper wrapper, bool mutable)
165 	{
166 		this.wrapper = wrapper;
167 		this.mutable = mutable;
168 		this.contents = wrapper.contents;
169 	}
170 
171 	this(this)
172 	{
173 		if (wrapper)
174 		{
175 			wrapper.references++;
176 			debug (DATA_REFCOUNT) debugLog("%p -> %p: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references);
177 		}
178 		else
179 			debug (DATA_REFCOUNT) debugLog("%p -> %p: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper);
180 	}
181 
182 	~this() pure
183 	{
184 		//clear();
185 		// https://issues.dlang.org/show_bug.cgi?id=13809
186 		(cast(void delegate() pure)&clear)();
187 	}
188 
189 	debug(DATA) invariant
190 	{
191 		if (wrapper)
192 			assert(wrapper.references > 0, "Data referencing DataWrapper with bad reference count");
193 	}
194 
195 /*
196 	/// Create new instance as a slice over an existing DataWrapper.
197 	private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max)
198 	{
199 		this.wrapper = wrapper;
200 		this.start = start;
201 		this.end = end==size_t.max ? wrapper.capacity : end;
202 	}
203 */
204 
205 	@property const(void)[] contents() const
206 	{
207 		return _contents;
208 	}
209 
210 	@property private const(void)[] contents(const(void)[] data)
211 	{
212 		return _contents = data;
213 	}
214 
215 	/// Get mutable contents
216 	@property void[] mcontents()
217 	{
218 		if (!mutable && length)
219 		{
220 			reallocate(length, length);
221 			assert(mutable);
222 		}
223 		return cast(void[])_contents;
224 	}
225 
226 	@property const(void)* ptr() const
227 	{
228 		return contents.ptr;
229 	}
230 
231 	@property void* mptr()
232 	{
233 		return mcontents.ptr;
234 	}
235 
236 	@property size_t length() const
237 	{
238 		return contents.length;
239 	}
240 
241 	@property bool empty() const
242 	{
243 		return contents is null;
244 	}
245 
246 	bool opCast(T)()
247 		if (is(T == bool))
248 	{
249 		return !empty;
250 	}
251 
252 	@property size_t capacity() const
253 	{
254 		if (wrapper is null)
255 			return length;
256 		// We can only safely expand if the memory slice is at the end of the used unmanaged memory block.
257 		auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data
258 		auto end = pos + length;               // end   position in wrapper data
259 		assert(end <= wrapper.size);
260 		if (end == wrapper.size && end < wrapper.capacity)
261 			return wrapper.capacity - pos;
262 		else
263 			return length;
264 	}
265 
266 	/// Put a copy of the data on D's managed heap, and return it.
267 	@property
268 	void[] toHeap() const
269 	{
270 		return _contents.dup;
271 	}
272 
273 	private void reallocate(size_t size, size_t capacity)
274 	{
275 		auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity);
276 		wrapper.contents[0..this.length] = contents[];
277 		//(cast(ubyte[])newWrapper.contents)[this.length..value] = 0;
278 
279 		clear();
280 		this.wrapper = wrapper;
281 		this.contents = wrapper.contents;
282 		mutable = true;
283 	}
284 
285 	private void expand(size_t newSize, size_t newCapacity)
286 	in
287 	{
288 		assert(length < newSize);
289 		assert(newSize <= newCapacity);
290 	}
291 	out
292 	{
293 		assert(length == newSize);
294 	}
295 	do
296 	{
297 		if (newCapacity <= capacity)
298 		{
299 			auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data
300 			wrapper.setSize(pos + newSize);
301 			contents = ptr[0..newSize];
302 		}
303 		else
304 			reallocate(newSize, newCapacity);
305 	}
306 
307 	@property void length(size_t value)
308 	{
309 		if (value == length) // no change
310 			return;
311 		if (value < length)  // shorten
312 			_contents = _contents[0..value];
313 		else                 // lengthen
314 			expand(value, value);
315 	}
316 	alias length opDollar;
317 
318 	@property Data dup() const
319 	{
320 		return Data(contents, true);
321 	}
322 
323 	/// This used to be an unsafe method which deleted the wrapped data.
324 	/// Now that Data is refcounted, this simply calls clear() and
325 	/// additionally asserts that this Data is the only Data holding
326 	/// a reference to the wrapper.
327 	void deleteContents()
328 	out
329 	{
330 		assert(wrapper is null);
331 	}
332 	do
333 	{
334 		if (wrapper)
335 		{
336 			assert(wrapper.references == 1, "Attempting to call deleteContents with ");
337 			clear();
338 		}
339 	}
340 
341 	void clear()
342 	{
343 		if (wrapper)
344 		{
345 			assert(wrapper.references > 0, "Dangling pointer to wrapper");
346 			wrapper.references--;
347 			debug (DATA_REFCOUNT) debugLog("%p -> %p: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references);
348 			if (wrapper.references == 0)
349 				wrapper.destroy();
350 
351 			wrapper = null;
352 		}
353 
354 		contents = null;
355 	}
356 
357 	Data concat(const(void)[] data)
358 	{
359 		if (data.length==0)
360 			return this;
361 		Data result = Data(length + data.length);
362 		result.mcontents[0..this.length] = contents[];
363 		result.mcontents[this.length..$] = data[];
364 		return result;
365 	}
366 
367 	template opBinary(string op) if (op == "~")
368 	{
369 		Data opBinary(T)(const(T)[] data)
370 		if (!hasIndirections!T)
371 		{
372 			return concat(data);
373 		}
374 
375 		Data opBinary()(Data data)
376 		{
377 			return concat(data.contents);
378 		}
379 	}
380 
381 	Data prepend(const(void)[] data)
382 	{
383 		Data result = Data(data.length + length);
384 		result.mcontents[0..data.length] = data[];
385 		result.mcontents[data.length..$] = contents[];
386 		return result;
387 	}
388 
389 	template opBinaryRight(string op) if (op == "~")
390 	{
391 		Data opBinaryRight(T)(const(T)[] data)
392 		if (!hasIndirections!T)
393 		{
394 			return prepend(data);
395 		}
396 	}
397 
398 	private static size_t getPreallocSize(size_t length)
399 	{
400 		if (length < MAX_PREALLOC)
401 			return nextPowerOfTwo(length);
402 		else
403 			return ((length-1) | (MAX_PREALLOC-1)) + 1;
404 	}
405 
406 	Data append(const(void)[] data)
407 	{
408 		if (data.length==0)
409 			return this;
410 		size_t oldLength = length;
411 		size_t newLength = length + data.length;
412 		expand(newLength, getPreallocSize(newLength));
413 		auto newContents = cast(void[])_contents[oldLength..$];
414 		newContents[] = (cast(void[])data)[];
415 		return this;
416 	}
417 
418 	/// Note that unlike concatenation (a ~ b), appending (a ~= b) will preallocate.
419 	template opOpAssign(string op) if (op == "~")
420 	{
421 		Data opOpAssign(T)(const(T)[] data)
422 		if (!hasIndirections!T)
423 		{
424 			return append(data);
425 		}
426 
427 		Data opOpAssign()(Data data)
428 		{
429 			return append(data.contents);
430 		}
431 
432 		Data opOpAssign()(ubyte value) // hack?
433 		{
434 			return append((&value)[0..1]);
435 		}
436 	}
437 
438 	Data opSlice()
439 	{
440 		return this;
441 	}
442 
443 	Data opSlice(size_t x, size_t y)
444 	in
445 	{
446 		assert(x <= y);
447 		assert(y <= length);
448 	}
449 // https://issues.dlang.org/show_bug.cgi?id=13463
450 //	out(result)
451 //	{
452 //		assert(result.length == y-x);
453 //	}
454 	do
455 	{
456 		if (x == y)
457 			return Data();
458 		else
459 		{
460 			Data result = this;
461 			result.contents = result.contents[x..y];
462 			return result;
463 		}
464 	}
465 
466 	/// Return a new Data for the first size bytes, and slice this instance from size to end.
467 	Data popFront(size_t size)
468 	in
469 	{
470 		assert(size <= length);
471 	}
472 	do
473 	{
474 		Data result = this;
475 		result.contents = contents[0..size];
476 		this  .contents = contents[size..$];
477 		return result;
478 	}
479 }
480 
481 unittest
482 {
483 	Data d = Data("aaaaa");
484 	assert(d.wrapper.references == 1);
485 	Data s = d[1..4];
486 	assert(d.wrapper.references == 2);
487 }
488 
489 // ************************************************************************
490 
491 static /*thread-local*/ size_t dataMemory, dataMemoryPeak;
492 static /*thread-local*/ uint   dataCount, allocCount;
493 
494 // Abstract wrapper.
495 abstract class DataWrapper
496 {
497 	sizediff_t references = 1;
498 	abstract @property inout(void)[] contents() inout;
499 	abstract @property size_t size() const;
500 	abstract void setSize(size_t newSize);
501 	abstract @property size_t capacity() const;
502 
503 	debug ~this()
504 	{
505 		debug(DATA_REFCOUNT) debugLog("%.*s.~this, references==%d", this.classinfo.name.length, this.classinfo.name.ptr, references);
506 		assert(references == 0, "Deleting DataWrapper with non-zero reference count");
507 	}
508 }
509 
510 void setGCThreshold(size_t value) { MemoryDataWrapper.collectThreshold = value; }
511 
512 C unmanagedNew(C, Args...)(auto ref Args args)
513 if (is(C == class))
514 {
515 	import std.conv : emplace;
516 	enum size = __traits(classInstanceSize, C);
517 	auto p = unmanagedAlloc(size);
518 	emplace!C(p[0..size], args);
519 	return cast(C)p;
520 }
521 
522 void unmanagedDelete(C)(C c)
523 if (is(C == class))
524 {
525 	c.__xdtor();
526 	unmanagedFree(p);
527 }
528 
529 private:
530 
531 void* unmanagedAlloc(size_t sz)
532 {
533 	auto p = core.stdc.stdlib.malloc(sz);
534 
535 	debug(DATA_REFCOUNT) debugLog("? -> %p: Allocating via malloc (%d bytes)", p, cast(uint)sz);
536 
537 	if (!p)
538 		throw new OutOfMemoryError();
539 
540 	//GC.addRange(p, sz);
541 	return p;
542 }
543 
544 void unmanagedFree(void* p) @nogc
545 {
546 	if (p)
547 	{
548 		debug(DATA_REFCOUNT) debugLog("? -> %p: Deleting via free", p);
549 
550 		//GC.removeRange(p);
551 		core.stdc.stdlib.free(p);
552 	}
553 }
554 
555 version (Windows)
556 	import core.sys.windows.windows;
557 else
558 {
559 	import core.sys.posix.unistd;
560 	import core.sys.posix.sys.mman;
561 }
562 
563 /// Wrapper for data in RAM, allocated from the OS.
564 final class MemoryDataWrapper : DataWrapper
565 {
566 	/// Pointer to actual data.
567 	void* data;
568 	/// Used size. Needed for safe appends.
569 	size_t _size;
570 	/// Allocated capacity.
571 	size_t _capacity;
572 
573 	/// Threshold of allocated memory to trigger a collect.
574 	__gshared size_t collectThreshold = 8*1024*1024; // 8MB
575 	/// Counter towards the threshold.
576 	static /*thread-local*/ size_t allocatedThreshold;
577 
578 	/// Create a new instance with given capacity.
579 	this(size_t size, size_t capacity)
580 	{
581 		data = malloc(/*ref*/ capacity);
582 		if (data is null)
583 		{
584 			debug(DATA) fprintf(stderr, "Garbage collect triggered by failed Data allocation of %llu bytes... ", cast(ulong)capacity);
585 			GC.collect();
586 			debug(DATA) fprintf(stderr, "Done\n");
587 			data = malloc(/*ref*/ capacity);
588 			allocatedThreshold = 0;
589 		}
590 		if (data is null)
591 			onOutOfMemoryError();
592 
593 		dataMemory += capacity;
594 		if (dataMemoryPeak < dataMemory)
595 			dataMemoryPeak = dataMemory;
596 		dataCount ++;
597 		allocCount ++;
598 
599 		this._size = size;
600 		this._capacity = capacity;
601 
602 		// also collect
603 		allocatedThreshold += capacity;
604 		if (allocatedThreshold > collectThreshold)
605 		{
606 			debug(DATA) fprintf(stderr, "Garbage collect triggered by total allocated Data exceeding threshold... ");
607 			GC.collect();
608 			debug(DATA) fprintf(stderr, "Done\n");
609 			allocatedThreshold = 0;
610 		}
611 	}
612 
613 	/// Destructor - destroys the wrapped data.
614 	~this()
615 	{
616 		free(data, capacity);
617 		data = null;
618 		// If DataWrapper is created and manually deleted, there is no need to cause frequent collections
619 		if (allocatedThreshold > capacity)
620 			allocatedThreshold -= capacity;
621 		else
622 			allocatedThreshold = 0;
623 
624 		dataMemory -= capacity;
625 		dataCount --;
626 	}
627 
628 	@property override
629 	size_t size() const { return _size; }
630 
631 	@property override
632 	size_t capacity() const { return _capacity; }
633 
634 	override void setSize(size_t newSize)
635 	{
636 		assert(newSize <= capacity);
637 		_size = newSize;
638 	}
639 
640 	@property override
641 	inout(void)[] contents() inout
642 	{
643 		return data[0..size];
644 	}
645 
646 	// https://github.com/D-Programming-Language/druntime/pull/759
647 	version(OSX)
648 		enum _SC_PAGE_SIZE = 29;
649 
650 	// https://github.com/D-Programming-Language/druntime/pull/1140
651 	version(FreeBSD)
652 		enum _SC_PAGE_SIZE = 47;
653 
654 	version(Windows)
655 	{
656 		static immutable size_t pageSize;
657 
658 		shared static this()
659 		{
660 			SYSTEM_INFO si;
661 			GetSystemInfo(&si);
662 			pageSize = si.dwPageSize;
663 		}
664 	}
665 	else
666 	static if (is(typeof(_SC_PAGE_SIZE)))
667 	{
668 		static immutable size_t pageSize;
669 
670 		shared static this()
671 		{
672 			pageSize = sysconf(_SC_PAGE_SIZE);
673 		}
674 	}
675 
676 	static void* malloc(ref size_t size)
677 	{
678 		if (is(typeof(pageSize)))
679 			size = ((size-1) | (pageSize-1))+1;
680 
681 		version(Windows)
682 		{
683 			return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE);
684 		}
685 		else
686 		version(Posix)
687 		{
688 			version(linux)
689 				import core.sys.linux.sys.mman : MAP_ANON;
690 			auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
691 			return (p == MAP_FAILED) ? null : p;
692 		}
693 		else
694 			return core.stdc.malloc(size);
695 	}
696 
697 	static void free(void* p, size_t size)
698 	{
699 		debug
700 		{
701 			(cast(ubyte*)p)[0..size] = 0xDB;
702 		}
703 		version(Windows)
704 			VirtualFree(p, 0, MEM_RELEASE);
705 		else
706 		version(Posix)
707 			munmap(p, size);
708 		else
709 			core.stdc.free(size);
710 	}
711 }
712 
713 // Source: Win32 bindings project
714 version(Windows)
715 {
716 	struct SYSTEM_INFO {
717 		union {
718 			DWORD dwOemId;
719 			struct {
720 				WORD wProcessorArchitecture;
721 				WORD wReserved;
722 			}
723 		}
724 		DWORD dwPageSize;
725 		PVOID lpMinimumApplicationAddress;
726 		PVOID lpMaximumApplicationAddress;
727 		DWORD dwActiveProcessorMask;
728 		DWORD dwNumberOfProcessors;
729 		DWORD dwProcessorType;
730 		DWORD dwAllocationGranularity;
731 		WORD  wProcessorLevel;
732 		WORD  wProcessorRevision;
733 	}
734 	alias SYSTEM_INFO* LPSYSTEM_INFO;
735 
736 	extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO);
737 }
738 
739 debug(DATA_REFCOUNT) import ae.utils.exception, ae.sys.memory, std.stdio;
740 
741 debug(DATA_REFCOUNT) void debugLog(Args...)(const char* s, Args args) @nogc
742 {
743 	fprintf(stderr, s, args);
744 	fprintf(stderr, "\n");
745 	if (inCollect())
746 		fprintf(stderr, "\t(in GC collect)\n");
747 	else
748 		(cast(void function() @nogc)&debugStackTrace)();
749 	fflush(core.stdc.stdio.stderr);
750 }
751 
752 debug(DATA_REFCOUNT) void debugStackTrace()
753 {
754 	try
755 		foreach (line; getStackTrace())
756 			writeln("\t", line);
757 	catch (Throwable e)
758 		writeln("\t(stacktrace error: ", e.msg, ")");
759 }