1 /**
2  * Wrappers for raw _data located in unmanaged memory.
3  *
4  * Using the Data type will only place a small object in managed memory,
5  * keeping the actual _data in unmanaged memory.
6  * A proxy class (DataWrapper) is used to safely allow multiple references to
7  * the same block of unmanaged memory.
8  * When the DataWrapper object is destroyed (either manually or by the garbage
9  * collector when there are no remaining Data references), the unmanaged
10  * memory is deallocated.
11  *
12  * This has the following advantage over using managed memory:
13  * $(UL
14  *  $(LI Faster allocation and deallocation, since memory is requested from
15  *       the OS directly as whole pages)
16  *  $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to
17  *       stray pointers)
18  *  $(LI Overall improved GC performance due to reduced size of managed heap)
19  *  $(LI Memory is immediately returned to the OS when _data is deallocated)
20  * )
21  * On the other hand, using Data has the following disadvantages:
22  * $(UL
23  *  $(LI This module is designed to store raw _data which does not have any
24  *       pointers. Storing objects containing pointers to managed memory is
25  *       unsupported, and may result in memory corruption.)
26  *  $(LI Small objects may be stored inefficiently, as the module requests
27  *       entire pages of memory from the OS. Considering allocating one large
28  *       object and use slices (Data instances) for individual objects.)
29  * )
30  *
31  * License:
32  *   This Source Code Form is subject to the terms of
33  *   the Mozilla Public License, v. 2.0. If a copy of
34  *   the MPL was not distributed with this file, You
35  *   can obtain one at http://mozilla.org/MPL/2.0/.
36  *
37  * Authors:
38  *   Vladimir Panteleev <vladimir@thecybershadow.net>
39  */
40 
41 module ae.sys.data;
42 
43 static import core.stdc.stdlib;
44 import core.stdc.string : memmove;
45 import std.traits;
46 import core.memory;
47 import core.exception;
48 debug import std.stdio;
49 debug import std.string;
50 public import ae.sys.dataset;
51 import ae.utils.math;
52 
53 debug(DATA) import core.stdc.stdio;
54 
55 // ideas/todo:
56 // * templatize (and forbid using aliased types)?
57 // * use heap (malloc/Windows heap API) for small objects?
58 // * reference counting?
59 // * "immutable" support?
60 
61 /**
62  * Wrapper for data located in external memory, to prevent faux references.
63  * Represents a slice of data, which may or may not be in unmanaged memory.
64  * Data in unmanaged memory is bound to a DataWrapper class instance.
65  *
66  * All operations on this class should be safe, except for accessing contents directly.
67  * All operations on contents must be accompanied by a live reference to the Data object,
68  * to keep a GC anchor towards the unmanaged data.
69  *
70  * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead.
71  *
72  * Be sure not to lose Data references while using their contents!
73  * For example, avoid code like this:
74  * ----
75  * fun(cast(string)transformSomeData(someData).contents);
76  * ----
77  * The Data return value may be unreachable once .contents is evaluated.
78  * Use .toHeap instead of .contents in such cases to get a safe heap copy.
79  */
80 struct Data
81 {
82 private:
83 	/// Wrapped data
84 	const(void)[] _contents;
85 	/// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory.
86 	/// Used as a GC anchor to unmanaged data, and for in-place expands (for appends).
87 	DataWrapper wrapper;
88 	/// Indicates whether we're allowed to modify the data contents.
89 	bool mutable;
90 
91 	/// Maximum preallocation for append operations.
92 	enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2
93 
94 public:
95 	/**
96 	 * Create new instance wrapping the given data.
97 	 * Params:
98 	 *   data = initial data
99 	 *   forceReallocation = when false, the contents will be duplicated to
100 	 *     unmanaged memory only when it's not on the managed heap; when true,
101 	 *     the contents will be reallocated always.
102 	 */
103 	this(const(void)[] data, bool forceReallocation = false)
104 	{
105 		if (data.length == 0)
106 			contents = null;
107 		else
108 		if (forceReallocation || GC.addrOf(data.ptr) is null)
109 		{
110 			// copy to unmanaged memory
111 			auto wrapper = new MemoryDataWrapper(data.length, data.length);
112 			this.wrapper = wrapper;
113 			wrapper.contents[] = data[];
114 			contents = wrapper.contents;
115 			mutable = true;
116 		}
117 		else
118 		{
119 			// just save a reference
120 			contents = data;
121 			mutable = false;
122 		}
123 
124 		assert(this.length == data.length);
125 	}
126 
127 	/// ditto
128 	this(void[] data, bool forceReallocation = false)
129 	{
130 		const(void)[] cdata = data;
131 		this(cdata, forceReallocation);
132 		mutable = true;
133 	}
134 
135 	/// Create a new instance with given size/capacity. Capacity defaults to size.
136 	this(size_t size, size_t capacity = 0)
137 	in
138 	{
139 		assert(capacity == 0 || size <= capacity);
140 	}
141 	body
142 	{
143 		if (!capacity)
144 			capacity = size;
145 
146 		if (capacity)
147 		{
148 			auto wrapper = new MemoryDataWrapper(size, capacity);
149 			this.wrapper = wrapper;
150 			contents = wrapper.contents;
151 			mutable = true;
152 		}
153 		else
154 		{
155 			wrapper = null;
156 			contents = null;
157 		}
158 
159 		assert(this.length == size);
160 	}
161 
162 	this(DataWrapper wrapper, bool mutable)
163 	{
164 		this.wrapper = wrapper;
165 		this.mutable = mutable;
166 		this.contents = wrapper.contents;
167 	}
168 
169 	this(this)
170 	{
171 		if (wrapper)
172 		{
173 			wrapper.references++;
174 			debug (DATA_REFCOUNT) debugLog("%p -> %p: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references);
175 		}
176 		else
177 			debug (DATA_REFCOUNT) debugLog("%p -> %p: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper);
178 	}
179 
180 	~this() pure
181 	{
182 		//clear();
183 		// https://issues.dlang.org/show_bug.cgi?id=13809
184 		(cast(void delegate() pure)&clear)();
185 	}
186 
187 /*
188 	/// Create new instance as a slice over an existing DataWrapper.
189 	private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max)
190 	{
191 		this.wrapper = wrapper;
192 		this.start = start;
193 		this.end = end==size_t.max ? wrapper.capacity : end;
194 	}
195 */
196 
197 	@property const(void)[] contents() const
198 	{
199 		return _contents;
200 	}
201 
202 	@property private const(void)[] contents(const(void)[] data)
203 	{
204 		return _contents = data;
205 	}
206 
207 	/// Get mutable contents
208 	@property void[] mcontents()
209 	{
210 		if (!mutable && length)
211 		{
212 			reallocate(length, length);
213 			assert(mutable);
214 		}
215 		return cast(void[])_contents;
216 	}
217 
218 	@property const(void)* ptr() const
219 	{
220 		return contents.ptr;
221 	}
222 
223 	@property void* mptr()
224 	{
225 		return mcontents.ptr;
226 	}
227 
228 	@property size_t length() const
229 	{
230 		return contents.length;
231 	}
232 
233 	@property bool empty() const
234 	{
235 		return contents is null;
236 	}
237 
238 	bool opCast(T)()
239 		if (is(T == bool))
240 	{
241 		return !empty;
242 	}
243 
244 	@property size_t capacity() const
245 	{
246 		if (wrapper is null)
247 			return length;
248 		// We can only safely expand if the memory slice is at the end of the used unmanaged memory block.
249 		auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data
250 		auto end = pos + length;               // end   position in wrapper data
251 		assert(end <= wrapper.size);
252 		if (end == wrapper.size && end < wrapper.capacity)
253 			return wrapper.capacity - pos;
254 		else
255 			return length;
256 	}
257 
258 	/// Put a copy of the data on D's managed heap, and return it.
259 	@property
260 	void[] toHeap() const
261 	{
262 		return _contents.dup;
263 	}
264 
265 	private void reallocate(size_t size, size_t capacity)
266 	{
267 		auto wrapper = new MemoryDataWrapper(size, capacity);
268 		wrapper.contents[0..this.length] = contents[];
269 		//(cast(ubyte[])newWrapper.contents)[this.length..value] = 0;
270 
271 		clear();
272 		this.wrapper = wrapper;
273 		this.contents = wrapper.contents;
274 		mutable = true;
275 	}
276 
277 	private void expand(size_t newSize, size_t newCapacity)
278 	in
279 	{
280 		assert(length < newSize);
281 		assert(newSize <= newCapacity);
282 	}
283 	out
284 	{
285 		assert(length == newSize);
286 	}
287 	body
288 	{
289 		if (newCapacity <= capacity)
290 		{
291 			auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data
292 			wrapper.setSize(pos + newSize);
293 			contents = ptr[0..newSize];
294 		}
295 		else
296 			reallocate(newSize, newCapacity);
297 	}
298 
299 	@property void length(size_t value)
300 	{
301 		if (value == length) // no change
302 			return;
303 		if (value < length)  // shorten
304 			_contents = _contents[0..value];
305 		else                 // lengthen
306 			expand(value, value);
307 	}
308 	alias length opDollar;
309 
310 	@property Data dup() const
311 	{
312 		return Data(contents, true);
313 	}
314 
315 	/// This used to be an unsafe method which deleted the wrapped data.
316 	/// Now that Data is refcounted, this simply calls clear() and
317 	/// additionally asserts that this Data is the only Data holding
318 	/// a reference to the wrapper.
319 	void deleteContents()
320 	out
321 	{
322 		assert(wrapper is null);
323 	}
324 	body
325 	{
326 		if (wrapper)
327 		{
328 			assert(wrapper.references == 1, "Attempting to call deleteContents with ");
329 			clear();
330 		}
331 	}
332 
333 	void clear()
334 	{
335 		if (wrapper)
336 		{
337 			assert(wrapper.references > 0, "Dangling pointer to wrapper");
338 			wrapper.references--;
339 			debug (DATA_REFCOUNT) debugLog("%p -> %p: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references);
340 			if (wrapper.references == 0)
341 				delete wrapper;
342 
343 			wrapper = null;
344 		}
345 
346 		contents = null;
347 	}
348 
349 	Data concat(const(void)[] data)
350 	{
351 		if (data.length==0)
352 			return this;
353 		Data result = Data(length + data.length);
354 		result.mcontents[0..this.length] = contents[];
355 		result.mcontents[this.length..$] = data[];
356 		return result;
357 	}
358 
359 	Data opCat(T)(const(T)[] data)
360 		if (!hasIndirections!T)
361 	{
362 		return concat(data);
363 	}
364 
365 	Data opCat()(Data data)
366 	{
367 		return concat(data.contents);
368 	}
369 
370 	Data prepend(const(void)[] data)
371 	{
372 		Data result = Data(data.length + length);
373 		result.mcontents[0..data.length] = data[];
374 		result.mcontents[data.length..$] = contents[];
375 		return result;
376 	}
377 
378 	Data opCat_r(T)(const(T)[] data)
379 		if (!hasIndirections!T)
380 	{
381 		return prepend(data);
382 	}
383 
384 	private static size_t getPreallocSize(size_t length)
385 	{
386 		if (length < MAX_PREALLOC)
387 			return nextPowerOfTwo(length);
388 		else
389 			return ((length-1) | (MAX_PREALLOC-1)) + 1;
390 	}
391 
392 	Data append(const(void)[] data)
393 	{
394 		if (data.length==0)
395 			return this;
396 		size_t oldLength = length;
397 		size_t newLength = length + data.length;
398 		expand(newLength, getPreallocSize(newLength));
399 		auto newContents = cast(void[])_contents[oldLength..$];
400 		newContents[] = (cast(void[])data)[];
401 		return this;
402 	}
403 
404 	/// Note that unlike opCat (a ~ b), opCatAssign (a ~= b) will preallocate.
405 	Data opCatAssign(T)(const(T)[] data)
406 		if (!hasIndirections!T)
407 	{
408 		return append(data);
409 	}
410 
411 	Data opCatAssign()(Data data)
412 	{
413 		return append(data.contents);
414 	}
415 
416 	Data opCatAssign()(ubyte value) // hack?
417 	{
418 		return append((&value)[0..1]);
419 	}
420 
421 	Data opSlice()
422 	{
423 		return this;
424 	}
425 
426 	Data opSlice(size_t x, size_t y)
427 	in
428 	{
429 		assert(x <= y);
430 		assert(y <= length);
431 	}
432 // https://issues.dlang.org/show_bug.cgi?id=13463
433 //	out(result)
434 //	{
435 //		assert(result.length == y-x);
436 //	}
437 	body
438 	{
439 		if (x == y)
440 			return Data();
441 		else
442 		{
443 			Data result = this;
444 			result.contents = result.contents[x..y];
445 			return result;
446 		}
447 	}
448 
449 	/// Return a new Data for the first size bytes, and slice this instance from size to end.
450 	Data popFront(size_t size)
451 	in
452 	{
453 		assert(size <= length);
454 	}
455 	body
456 	{
457 		Data result = this;
458 		result.contents = contents[0..size];
459 		this  .contents = contents[size..$];
460 		return result;
461 	}
462 }
463 
464 unittest
465 {
466 	Data d = Data("aaaaa");
467 	assert(d.wrapper.references == 1);
468 	Data s = d[1..4];
469 	assert(d.wrapper.references == 2);
470 }
471 
472 // ************************************************************************
473 
474 static /*thread-local*/ size_t dataMemory, dataMemoryPeak;
475 static /*thread-local*/ uint   dataCount, allocCount;
476 
477 // Abstract wrapper.
478 abstract class DataWrapper
479 {
480 	sizediff_t references = 1;
481 	abstract @property inout(void)[] contents() inout;
482 	abstract @property size_t size() const;
483 	abstract void setSize(size_t newSize);
484 	abstract @property size_t capacity() const;
485 
486 	new(size_t sz)
487 	{
488 		void* p = core.stdc.stdlib.malloc(sz);
489 
490 		debug(DATA_REFCOUNT) debugLog("? -> %p: Allocating via malloc", p);
491 
492 		if (!p)
493 			throw new OutOfMemoryError();
494 
495 		//GC.addRange(p, sz);
496 		return p;
497 	}
498 
499 	debug ~this()
500 	{
501 		debug(DATA_REFCOUNT) debugLog("%.*s.~this, references==%d", this.classinfo.name.length, this.classinfo.name.ptr, references);
502 		assert(references == 0, "Deleting DataWrapper with non-zero reference count");
503 	}
504 
505 	@nogc delete(void* p)
506 	{
507 		if (p)
508 		{
509 			debug(DATA_REFCOUNT) debugLog("? -> %p: Deleting via free", p);
510 
511 			//GC.removeRange(p);
512 			core.stdc.stdlib.free(p);
513 		}
514 	}
515 }
516 
517 private:
518 
519 version (Windows)
520 	import core.sys.windows.windows;
521 else
522 {
523 	import core.sys.posix.unistd;
524 	import core.sys.posix.sys.mman;
525 }
526 
527 /// Wrapper for data in RAM, allocated from the OS.
528 final class MemoryDataWrapper : DataWrapper
529 {
530 	/// Pointer to actual data.
531 	void* data;
532 	/// Used size. Needed for safe appends.
533 	size_t _size;
534 	/// Allocated capacity.
535 	size_t _capacity;
536 
537 	/// Threshold of allocated memory to trigger a collect.
538 	enum { COLLECT_THRESHOLD = 8*1024*1024 } // 8MB
539 	/// Counter towards the threshold.
540 	static /*thread-local*/ size_t allocatedThreshold;
541 
542 	/// Create a new instance with given capacity.
543 	this(size_t size, size_t capacity)
544 	{
545 		data = malloc(/*ref*/ capacity);
546 		if (data is null)
547 		{
548 			debug(DATA) printf("Garbage collect triggered by failed Data allocation of %llu bytes... ", cast(ulong)capacity);
549 			GC.collect();
550 			debug(DATA) printf("Done\n");
551 			data = malloc(/*ref*/ capacity);
552 			allocatedThreshold = 0;
553 		}
554 		if (data is null)
555 			onOutOfMemoryError();
556 
557 		dataMemory += capacity;
558 		if (dataMemoryPeak < dataMemory)
559 			dataMemoryPeak = dataMemory;
560 		dataCount ++;
561 		allocCount ++;
562 
563 		this._size = size;
564 		this._capacity = capacity;
565 
566 		// also collect
567 		allocatedThreshold += capacity;
568 		if (allocatedThreshold > COLLECT_THRESHOLD)
569 		{
570 			debug(DATA) printf("Garbage collect triggered by total allocated Data exceeding threshold... ");
571 			GC.collect();
572 			debug(DATA) printf("Done\n");
573 			allocatedThreshold = 0;
574 		}
575 	}
576 
577 	/// Destructor - destroys the wrapped data.
578 	~this()
579 	{
580 		free(data, capacity);
581 		data = null;
582 		// If DataWrapper is created and manually deleted, there is no need to cause frequent collections
583 		if (allocatedThreshold > capacity)
584 			allocatedThreshold -= capacity;
585 		else
586 			allocatedThreshold = 0;
587 
588 		dataMemory -= capacity;
589 		dataCount --;
590 	}
591 
592 	@property override
593 	size_t size() const { return _size; }
594 
595 	@property override
596 	size_t capacity() const { return _capacity; }
597 
598 	override void setSize(size_t newSize)
599 	{
600 		assert(newSize <= capacity);
601 		_size = newSize;
602 	}
603 
604 	@property override
605 	inout(void)[] contents() inout
606 	{
607 		return data[0..size];
608 	}
609 
610 	// https://github.com/D-Programming-Language/druntime/pull/759
611 	version(OSX)
612 		enum _SC_PAGE_SIZE = 29;
613 
614 	// https://github.com/D-Programming-Language/druntime/pull/1140
615 	version(FreeBSD)
616 		enum _SC_PAGE_SIZE = 47;
617 
618 	version(Windows)
619 	{
620 		static immutable size_t pageSize;
621 
622 		shared static this()
623 		{
624 			SYSTEM_INFO si;
625 			GetSystemInfo(&si);
626 			pageSize = si.dwPageSize;
627 		}
628 	}
629 	else
630 	static if (is(typeof(_SC_PAGE_SIZE)))
631 	{
632 		static immutable size_t pageSize;
633 
634 		shared static this()
635 		{
636 			pageSize = sysconf(_SC_PAGE_SIZE);
637 		}
638 	}
639 
640 	static void* malloc(ref size_t size)
641 	{
642 		if (is(typeof(pageSize)))
643 			size = ((size-1) | (pageSize-1))+1;
644 
645 		version(Windows)
646 		{
647 			return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE);
648 		}
649 		else
650 		version(Posix)
651 		{
652 			version(linux)
653 				import core.sys.linux.sys.mman : MAP_ANON;
654 			auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
655 			return (p == MAP_FAILED) ? null : p;
656 		}
657 		else
658 			return core.stdc.malloc(size);
659 	}
660 
661 	static void free(void* p, size_t size)
662 	{
663 		debug
664 		{
665 			(cast(ubyte*)p)[0..size] = 0xDB;
666 		}
667 		version(Windows)
668 			VirtualFree(p, 0, MEM_RELEASE);
669 		else
670 		version(Posix)
671 			munmap(p, size);
672 		else
673 			core.stdc.free(size);
674 	}
675 }
676 
677 // Source: Win32 bindings project
678 version(Windows)
679 {
680 	struct SYSTEM_INFO {
681 		union {
682 			DWORD dwOemId;
683 			struct {
684 				WORD wProcessorArchitecture;
685 				WORD wReserved;
686 			}
687 		}
688 		DWORD dwPageSize;
689 		PVOID lpMinimumApplicationAddress;
690 		PVOID lpMaximumApplicationAddress;
691 		DWORD dwActiveProcessorMask;
692 		DWORD dwNumberOfProcessors;
693 		DWORD dwProcessorType;
694 		DWORD dwAllocationGranularity;
695 		WORD  wProcessorLevel;
696 		WORD  wProcessorRevision;
697 	}
698 	alias SYSTEM_INFO* LPSYSTEM_INFO;
699 
700 	extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO);
701 }
702 
703 debug(DATA_REFCOUNT) import ae.utils.exception, ae.sys.memory, std.stdio;
704 
705 debug(DATA_REFCOUNT) void debugLog(Args...)(const char* s, Args args) @nogc
706 {
707 	printf(s, args);
708 	printf("\n");
709 	if (inCollect())
710 		printf("\t(in GC collect)\n");
711 	else
712 		(cast(void function() @nogc)&debugStackTrace)();
713 	fflush(core.stdc.stdio.stdout);
714 }
715 
716 debug(DATA_REFCOUNT) void debugStackTrace()
717 {
718 	try
719 		foreach (line; getStackTrace())
720 			writeln("\t", line);
721 	catch (Throwable e)
722 		writeln("\t(stacktrace error: ", e.msg, ")");
723 }