1 /**
2  * Wrappers for raw _data located in unmanaged memory.
3  *
4  * Using the Data type will only place a small object in managed memory,
5  * keeping the actual _data in unmanaged memory.
6  * A proxy class (DataWrapper) is used to safely allow multiple references to
7  * the same block of unmanaged memory.
8  * When the DataWrapper object is destroyed (either manually or by the garbage
9  * collector when there are no remaining Data references), the unmanaged
10  * memory is deallocated.
11  *
12  * This has the following advantage over using managed memory:
13  * $(UL
14  *  $(LI Faster allocation and deallocation, since memory is requested from
15  *       the OS directly as whole pages)
16  *  $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to
17  *       stray pointers)
18  *  $(LI Overall improved GC performance due to reduced size of managed heap)
19  *  $(LI Memory is immediately returned to the OS when _data is deallocated)
20  * )
21  * On the other hand, using Data has the following disadvantages:
22  * $(UL
23  *  $(LI This module is designed to store raw _data which does not have any
24  *       pointers. Storing objects containing pointers to managed memory is
25  *       unsupported, and may result in memory corruption.)
26  *  $(LI Small objects may be stored inefficiently, as the module requests
27  *       entire pages of memory from the OS. Considering allocating one large
28  *       object and use slices (Data instances) for individual objects.)
29  * )
30  *
31  * License:
32  *   This Source Code Form is subject to the terms of
33  *   the Mozilla Public License, v. 2.0. If a copy of
34  *   the MPL was not distributed with this file, You
35  *   can obtain one at http://mozilla.org/MPL/2.0/.
36  *
37  * Authors:
38  *   Vladimir Panteleev <vladimir@thecybershadow.net>
39  */
40 
41 module ae.sys.data;
42 
43 static import core.stdc.stdlib;
44 import core.stdc.string : memmove;
45 import std.traits;
46 import core.memory;
47 import core.exception;
48 debug import std.stdio;
49 debug import std.string;
50 public import ae.sys.dataset;
51 import ae.utils.math;
52 
53 debug(DATA) import core.stdc.stdio;
54 
55 // ideas/todo:
56 // * templatize (and forbid using aliased types)?
57 // * use heap (malloc/Windows heap API) for small objects?
58 // * reference counting?
59 // * "immutable" support?
60 
61 /**
62  * Wrapper for data located in external memory, to prevent faux references.
63  * Represents a slice of data, which may or may not be in unmanaged memory.
64  * Data in unmanaged memory is bound to a DataWrapper class instance.
65  *
66  * All operations on this class should be safe, except for accessing contents directly.
67  * All operations on contents must be accompanied by a live reference to the Data object,
68  * to keep a GC anchor towards the unmanaged data.
69  *
70  * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead.
71  *
72  * Be sure not to lose Data references while using their contents!
73  * For example, avoid code like this:
74  * ----
75  * fun(cast(string)transformSomeData(someData).contents);
76  * ----
77  * The Data return value may be unreachable once .contents is evaluated.
78  * Use .toHeap instead of .contents in such cases to get a safe heap copy.
79  */
80 struct Data
81 {
82 private:
83 	/// Wrapped data
84 	const(void)[] _contents;
85 	/// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory.
86 	/// Used as a GC anchor to unmanaged data, and for in-place expands (for appends).
87 	DataWrapper wrapper;
88 	/// Indicates whether we're allowed to modify the data contents.
89 	bool mutable;
90 
91 	/// Maximum preallocation for append operations.
92 	enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2
93 
94 public:
95 	/**
96 	 * Create new instance wrapping the given data.
97 	 * Params:
98 	 *   data = initial data
99 	 *   forceReallocation = when false, the contents will be duplicated to
100 	 *     unmanaged memory only when it's not on the managed heap; when true,
101 	 *     the contents will be reallocated always.
102 	 */
103 	this(const(void)[] data, bool forceReallocation = false)
104 	{
105 		if (data.length == 0)
106 			contents = null;
107 		else
108 		if (forceReallocation || GC.addrOf(data.ptr) is null)
109 		{
110 			// copy to unmanaged memory
111 			auto wrapper = new MemoryDataWrapper(data.length, data.length);
112 			this.wrapper = wrapper;
113 			wrapper.contents[] = data[];
114 			contents = wrapper.contents;
115 			mutable = true;
116 		}
117 		else
118 		{
119 			// just save a reference
120 			contents = data;
121 			mutable = false;
122 		}
123 
124 		assert(this.length == data.length);
125 	}
126 
127 	/// ditto
128 	this(void[] data, bool forceReallocation = false)
129 	{
130 		const(void)[] cdata = data;
131 		this(cdata, forceReallocation);
132 		mutable = true;
133 	}
134 
135 	/// Create a new instance with given size/capacity. Capacity defaults to size.
136 	this(size_t size, size_t capacity = 0)
137 	in
138 	{
139 		assert(capacity == 0 || size <= capacity);
140 	}
141 	body
142 	{
143 		if (!capacity)
144 			capacity = size;
145 
146 		if (capacity)
147 		{
148 			auto wrapper = new MemoryDataWrapper(size, capacity);
149 			this.wrapper = wrapper;
150 			contents = wrapper.contents;
151 			mutable = true;
152 		}
153 		else
154 		{
155 			wrapper = null;
156 			contents = null;
157 		}
158 
159 		assert(this.length == size);
160 	}
161 
162 	this(DataWrapper wrapper, bool mutable)
163 	{
164 		this.wrapper = wrapper;
165 		this.mutable = mutable;
166 		this.contents = wrapper.contents;
167 	}
168 
169 	this(this)
170 	{
171 		if (wrapper)
172 		{
173 			wrapper.references++;
174 			debug (DATA_REFCOUNT) debugLog("%p -> %p: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references);
175 		}
176 		else
177 			debug (DATA_REFCOUNT) debugLog("%p -> %p: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper);
178 	}
179 
180 	~this() pure
181 	{
182 		//clear();
183 		// https://issues.dlang.org/show_bug.cgi?id=13809
184 		(cast(void delegate() pure)&clear)();
185 	}
186 
187 /*
188 	/// Create new instance as a slice over an existing DataWrapper.
189 	private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max)
190 	{
191 		this.wrapper = wrapper;
192 		this.start = start;
193 		this.end = end==size_t.max ? wrapper.capacity : end;
194 	}
195 */
196 
197 	@property const(void)[] contents() const
198 	{
199 		return _contents;
200 	}
201 
202 	@property private const(void)[] contents(const(void)[] data)
203 	{
204 		return _contents = data;
205 	}
206 
207 	/// Get mutable contents
208 	@property void[] mcontents()
209 	{
210 		if (!mutable && length)
211 		{
212 			reallocate(length, length);
213 			assert(mutable);
214 		}
215 		return cast(void[])_contents;
216 	}
217 
218 	@property const(void)* ptr() const
219 	{
220 		return contents.ptr;
221 	}
222 
223 	@property void* mptr()
224 	{
225 		return mcontents.ptr;
226 	}
227 
228 	@property size_t length() const
229 	{
230 		return contents.length;
231 	}
232 
233 	@property bool empty() const
234 	{
235 		return contents is null;
236 	}
237 
238 	bool opCast(T)()
239 		if (is(T == bool))
240 	{
241 		return !empty;
242 	}
243 
244 	@property size_t capacity() const
245 	{
246 		if (wrapper is null)
247 			return length;
248 		// We can only safely expand if the memory slice is at the end of the used unmanaged memory block.
249 		auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data
250 		auto end = pos + length;               // end   position in wrapper data
251 		assert(end <= wrapper.size);
252 		if (end == wrapper.size && end < wrapper.capacity)
253 			return wrapper.capacity - pos;
254 		else
255 			return length;
256 	}
257 
258 	/// Put a copy of the data on D's managed heap, and return it.
259 	@property
260 	void[] toHeap() const
261 	{
262 		return _contents.dup;
263 	}
264 
265 	private void reallocate(size_t size, size_t capacity)
266 	{
267 		auto wrapper = new MemoryDataWrapper(size, capacity);
268 		wrapper.contents[0..this.length] = contents[];
269 		//(cast(ubyte[])newWrapper.contents)[this.length..value] = 0;
270 
271 		clear();
272 		this.wrapper = wrapper;
273 		this.contents = wrapper.contents;
274 		mutable = true;
275 	}
276 
277 	private void expand(size_t newSize, size_t newCapacity)
278 	in
279 	{
280 		assert(length < newSize);
281 		assert(newSize <= newCapacity);
282 	}
283 	out
284 	{
285 		assert(length == newSize);
286 	}
287 	body
288 	{
289 		if (newCapacity <= capacity)
290 		{
291 			auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data
292 			wrapper.setSize(pos + newSize);
293 			contents = ptr[0..newSize];
294 		}
295 		else
296 			reallocate(newSize, newCapacity);
297 	}
298 
299 	@property void length(size_t value)
300 	{
301 		if (value == length) // no change
302 			return;
303 		if (value < length)  // shorten
304 			_contents = _contents[0..value];
305 		else                 // lengthen
306 			expand(value, value);
307 	}
308 	alias length opDollar;
309 
310 	@property Data dup() const
311 	{
312 		return Data(contents, true);
313 	}
314 
315 	/// This used to be an unsafe method which deleted the wrapped data.
316 	/// Now that Data is refcounted, this simply calls clear() and
317 	/// additionally asserts that this Data is the only Data holding
318 	/// a reference to the wrapper.
319 	void deleteContents()
320 	out
321 	{
322 		assert(wrapper is null);
323 	}
324 	body
325 	{
326 		if (wrapper)
327 		{
328 			assert(wrapper.references == 1, "Attempting to call deleteContents with ");
329 			clear();
330 		}
331 	}
332 
333 	void clear()
334 	{
335 		if (wrapper)
336 		{
337 			assert(wrapper.references > 0, "Dangling pointer to wrapper");
338 			wrapper.references--;
339 			debug (DATA_REFCOUNT) debugLog("%p -> %p: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references);
340 			if (wrapper.references == 0)
341 				wrapper.destroy();
342 
343 			wrapper = null;
344 		}
345 
346 		contents = null;
347 	}
348 
349 	Data concat(const(void)[] data)
350 	{
351 		if (data.length==0)
352 			return this;
353 		Data result = Data(length + data.length);
354 		result.mcontents[0..this.length] = contents[];
355 		result.mcontents[this.length..$] = data[];
356 		return result;
357 	}
358 
359 	Data opCat(T)(const(T)[] data)
360 		if (!hasIndirections!T)
361 	{
362 		return concat(data);
363 	}
364 
365 	Data opCat()(Data data)
366 	{
367 		return concat(data.contents);
368 	}
369 
370 	Data prepend(const(void)[] data)
371 	{
372 		Data result = Data(data.length + length);
373 		result.mcontents[0..data.length] = data[];
374 		result.mcontents[data.length..$] = contents[];
375 		return result;
376 	}
377 
378 	Data opCat_r(T)(const(T)[] data)
379 		if (!hasIndirections!T)
380 	{
381 		return prepend(data);
382 	}
383 
384 	private static size_t getPreallocSize(size_t length)
385 	{
386 		if (length < MAX_PREALLOC)
387 			return nextPowerOfTwo(length);
388 		else
389 			return ((length-1) | (MAX_PREALLOC-1)) + 1;
390 	}
391 
392 	Data append(const(void)[] data)
393 	{
394 		if (data.length==0)
395 			return this;
396 		size_t oldLength = length;
397 		size_t newLength = length + data.length;
398 		expand(newLength, getPreallocSize(newLength));
399 		auto newContents = cast(void[])_contents[oldLength..$];
400 		newContents[] = (cast(void[])data)[];
401 		return this;
402 	}
403 
404 	/// Note that unlike opCat (a ~ b), opCatAssign (a ~= b) will preallocate.
405 	Data opCatAssign(T)(const(T)[] data)
406 		if (!hasIndirections!T)
407 	{
408 		return append(data);
409 	}
410 
411 	Data opCatAssign()(Data data)
412 	{
413 		return append(data.contents);
414 	}
415 
416 	Data opCatAssign()(ubyte value) // hack?
417 	{
418 		return append((&value)[0..1]);
419 	}
420 
421 	Data opSlice()
422 	{
423 		return this;
424 	}
425 
426 	Data opSlice(size_t x, size_t y)
427 	in
428 	{
429 		assert(x <= y);
430 		assert(y <= length);
431 	}
432 // https://issues.dlang.org/show_bug.cgi?id=13463
433 //	out(result)
434 //	{
435 //		assert(result.length == y-x);
436 //	}
437 	body
438 	{
439 		if (x == y)
440 			return Data();
441 		else
442 		{
443 			Data result = this;
444 			result.contents = result.contents[x..y];
445 			return result;
446 		}
447 	}
448 
449 	/// Return a new Data for the first size bytes, and slice this instance from size to end.
450 	Data popFront(size_t size)
451 	in
452 	{
453 		assert(size <= length);
454 	}
455 	body
456 	{
457 		Data result = this;
458 		result.contents = contents[0..size];
459 		this  .contents = contents[size..$];
460 		return result;
461 	}
462 }
463 
464 unittest
465 {
466 	Data d = Data("aaaaa");
467 	assert(d.wrapper.references == 1);
468 	Data s = d[1..4];
469 	assert(d.wrapper.references == 2);
470 }
471 
472 // ************************************************************************
473 
474 static /*thread-local*/ size_t dataMemory, dataMemoryPeak;
475 static /*thread-local*/ uint   dataCount, allocCount;
476 
477 // Abstract wrapper.
478 abstract class DataWrapper
479 {
480 	sizediff_t references = 1;
481 	abstract @property inout(void)[] contents() inout;
482 	abstract @property size_t size() const;
483 	abstract void setSize(size_t newSize);
484 	abstract @property size_t capacity() const;
485 
486 	new(size_t sz)
487 	{
488 		void* p = core.stdc.stdlib.malloc(sz);
489 
490 		debug(DATA_REFCOUNT) debugLog("? -> %p: Allocating via malloc", p);
491 
492 		if (!p)
493 			throw new OutOfMemoryError();
494 
495 		//GC.addRange(p, sz);
496 		return p;
497 	}
498 
499 	debug ~this()
500 	{
501 		debug(DATA_REFCOUNT) debugLog("%.*s.~this, references==%d", this.classinfo.name.length, this.classinfo.name.ptr, references);
502 		assert(references == 0, "Deleting DataWrapper with non-zero reference count");
503 	}
504 
505 	@nogc delete(void* p)
506 	{
507 		if (p)
508 		{
509 			debug(DATA_REFCOUNT) debugLog("? -> %p: Deleting via free", p);
510 
511 			//GC.removeRange(p);
512 			core.stdc.stdlib.free(p);
513 		}
514 	}
515 }
516 
517 void setGCThreshold(size_t value) { MemoryDataWrapper.collectThreshold = value; }
518 
519 private:
520 
521 version (Windows)
522 	import core.sys.windows.windows;
523 else
524 {
525 	import core.sys.posix.unistd;
526 	import core.sys.posix.sys.mman;
527 }
528 
529 /// Wrapper for data in RAM, allocated from the OS.
530 final class MemoryDataWrapper : DataWrapper
531 {
532 	/// Pointer to actual data.
533 	void* data;
534 	/// Used size. Needed for safe appends.
535 	size_t _size;
536 	/// Allocated capacity.
537 	size_t _capacity;
538 
539 	/// Threshold of allocated memory to trigger a collect.
540 	__gshared size_t collectThreshold = 8*1024*1024; // 8MB
541 	/// Counter towards the threshold.
542 	static /*thread-local*/ size_t allocatedThreshold;
543 
544 	/// Create a new instance with given capacity.
545 	this(size_t size, size_t capacity)
546 	{
547 		data = malloc(/*ref*/ capacity);
548 		if (data is null)
549 		{
550 			debug(DATA) printf("Garbage collect triggered by failed Data allocation of %llu bytes... ", cast(ulong)capacity);
551 			GC.collect();
552 			debug(DATA) printf("Done\n");
553 			data = malloc(/*ref*/ capacity);
554 			allocatedThreshold = 0;
555 		}
556 		if (data is null)
557 			onOutOfMemoryError();
558 
559 		dataMemory += capacity;
560 		if (dataMemoryPeak < dataMemory)
561 			dataMemoryPeak = dataMemory;
562 		dataCount ++;
563 		allocCount ++;
564 
565 		this._size = size;
566 		this._capacity = capacity;
567 
568 		// also collect
569 		allocatedThreshold += capacity;
570 		if (allocatedThreshold > collectThreshold)
571 		{
572 			debug(DATA) printf("Garbage collect triggered by total allocated Data exceeding threshold... ");
573 			GC.collect();
574 			debug(DATA) printf("Done\n");
575 			allocatedThreshold = 0;
576 		}
577 	}
578 
579 	/// Destructor - destroys the wrapped data.
580 	~this()
581 	{
582 		free(data, capacity);
583 		data = null;
584 		// If DataWrapper is created and manually deleted, there is no need to cause frequent collections
585 		if (allocatedThreshold > capacity)
586 			allocatedThreshold -= capacity;
587 		else
588 			allocatedThreshold = 0;
589 
590 		dataMemory -= capacity;
591 		dataCount --;
592 	}
593 
594 	@property override
595 	size_t size() const { return _size; }
596 
597 	@property override
598 	size_t capacity() const { return _capacity; }
599 
600 	override void setSize(size_t newSize)
601 	{
602 		assert(newSize <= capacity);
603 		_size = newSize;
604 	}
605 
606 	@property override
607 	inout(void)[] contents() inout
608 	{
609 		return data[0..size];
610 	}
611 
612 	// https://github.com/D-Programming-Language/druntime/pull/759
613 	version(OSX)
614 		enum _SC_PAGE_SIZE = 29;
615 
616 	// https://github.com/D-Programming-Language/druntime/pull/1140
617 	version(FreeBSD)
618 		enum _SC_PAGE_SIZE = 47;
619 
620 	version(Windows)
621 	{
622 		static immutable size_t pageSize;
623 
624 		shared static this()
625 		{
626 			SYSTEM_INFO si;
627 			GetSystemInfo(&si);
628 			pageSize = si.dwPageSize;
629 		}
630 	}
631 	else
632 	static if (is(typeof(_SC_PAGE_SIZE)))
633 	{
634 		static immutable size_t pageSize;
635 
636 		shared static this()
637 		{
638 			pageSize = sysconf(_SC_PAGE_SIZE);
639 		}
640 	}
641 
642 	static void* malloc(ref size_t size)
643 	{
644 		if (is(typeof(pageSize)))
645 			size = ((size-1) | (pageSize-1))+1;
646 
647 		version(Windows)
648 		{
649 			return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE);
650 		}
651 		else
652 		version(Posix)
653 		{
654 			version(linux)
655 				import core.sys.linux.sys.mman : MAP_ANON;
656 			auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
657 			return (p == MAP_FAILED) ? null : p;
658 		}
659 		else
660 			return core.stdc.malloc(size);
661 	}
662 
663 	static void free(void* p, size_t size)
664 	{
665 		debug
666 		{
667 			(cast(ubyte*)p)[0..size] = 0xDB;
668 		}
669 		version(Windows)
670 			VirtualFree(p, 0, MEM_RELEASE);
671 		else
672 		version(Posix)
673 			munmap(p, size);
674 		else
675 			core.stdc.free(size);
676 	}
677 }
678 
679 // Source: Win32 bindings project
680 version(Windows)
681 {
682 	struct SYSTEM_INFO {
683 		union {
684 			DWORD dwOemId;
685 			struct {
686 				WORD wProcessorArchitecture;
687 				WORD wReserved;
688 			}
689 		}
690 		DWORD dwPageSize;
691 		PVOID lpMinimumApplicationAddress;
692 		PVOID lpMaximumApplicationAddress;
693 		DWORD dwActiveProcessorMask;
694 		DWORD dwNumberOfProcessors;
695 		DWORD dwProcessorType;
696 		DWORD dwAllocationGranularity;
697 		WORD  wProcessorLevel;
698 		WORD  wProcessorRevision;
699 	}
700 	alias SYSTEM_INFO* LPSYSTEM_INFO;
701 
702 	extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO);
703 }
704 
705 debug(DATA_REFCOUNT) import ae.utils.exception, ae.sys.memory, std.stdio;
706 
707 debug(DATA_REFCOUNT) void debugLog(Args...)(const char* s, Args args) @nogc
708 {
709 	printf(s, args);
710 	printf("\n");
711 	if (inCollect())
712 		printf("\t(in GC collect)\n");
713 	else
714 		(cast(void function() @nogc)&debugStackTrace)();
715 	fflush(core.stdc.stdio.stdout);
716 }
717 
718 debug(DATA_REFCOUNT) void debugStackTrace()
719 {
720 	try
721 		foreach (line; getStackTrace())
722 			writeln("\t", line);
723 	catch (Throwable e)
724 		writeln("\t(stacktrace error: ", e.msg, ")");
725 }