1 /**
2  * Wrappers for raw _data located in unmanaged memory.
3  *
4  * Using the Data type will only place a small object in managed memory,
5  * keeping the actual _data in unmanaged memory.
6  * A proxy class (DataWrapper) is used to safely allow multiple references to
7  * the same block of unmanaged memory.
8  * When the DataWrapper object is destroyed (either manually or by the garbage
9  * collector when there are no remaining Data references), the unmanaged
10  * memory is deallocated.
11  *
12  * This has the following advantage over using managed memory:
13  * $(UL
14  *  $(LI Faster allocation and deallocation, since memory is requested from
15  *       the OS directly as whole pages)
16  *  $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to
17  *       stray pointers)
18  *  $(LI Overall improved GC performance due to reduced size of managed heap)
19  *  $(LI Memory is immediately returned to the OS when _data is deallocated)
20  * )
21  * On the other hand, using Data has the following disadvantages:
22  * $(UL
23  *  $(LI This module is designed to store raw _data which does not have any
24  *       pointers. Storing objects containing pointers to managed memory is
25  *       unsupported, and may result in memory corruption.)
26  *  $(LI Small objects may be stored inefficiently, as the module requests
27  *       entire pages of memory from the OS. Considering allocating one large
28  *       object and use slices (Data instances) for individual objects.)
29  * )
30  *
31  * License:
32  *   This Source Code Form is subject to the terms of
33  *   the Mozilla Public License, v. 2.0. If a copy of
34  *   the MPL was not distributed with this file, You
35  *   can obtain one at http://mozilla.org/MPL/2.0/.
36  *
37  * Authors:
38  *   Vladimir Panteleev <vladimir@thecybershadow.net>
39  */
40 
41 module ae.sys.data;
42 
43 static import core.stdc.stdlib;
44 import core.stdc.string : memmove;
45 import std.traits;
46 import core.memory;
47 import core.exception;
48 debug import std.stdio;
49 debug import std.string;
50 public import ae.sys.dataset;
51 import ae.utils.math;
52 
53 // ideas/todo:
54 // * templatize (and forbid using aliased types)?
55 // * use heap (malloc/Windows heap API) for small objects?
56 // * reference counting?
57 // * "immutable" support?
58 
59 /**
60  * Wrapper for data located in external memory, to prevent faux references.
61  * Represents a slice of data, which may or may not be in unmanaged memory.
62  * Data in unmanaged memory is bound to a DataWrapper class instance.
63  *
64  * All operations on this class should be safe, except for accessing contents directly.
65  * All operations on contents must be accompanied by a live reference to the Data object,
66  * to keep a GC anchor towards the unmanaged data.
67  *
68  * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead.
69  *
70  * Be sure not to lose Data references while using their contents!
71  * For example, avoid code like this:
72  * ----
73  * fun(cast(string)transformSomeData(someData).contents);
74  * ----
75  * The Data return value may be unreachable once .contents is evaluated.
76  * Use .toHeap instead of .contents in such cases to get a safe heap copy.
77  */
78 struct Data
79 {
80 private:
81 	/// Wrapped data
82 	const(void)[] _contents;
83 	/// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory.
84 	/// Used as a GC anchor to unmanaged data, and for in-place expands (for appends).
85 	DataWrapper wrapper;
86 	/// Indicates whether we're allowed to modify the data contents.
87 	bool mutable;
88 
89 	/// Maximum preallocation for append operations.
90 	enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2
91 
92 public:
93 	/**
94 	 * Create new instance wrapping the given data.
95 	 * Params:
96 	 *   data = initial data
97 	 *   forceReallocation = when false, the contents will be duplicated to
98 	 *     unmanaged memory only when it's not on the managed heap; when true,
99 	 *     the contents will be reallocated always.
100 	 */
101 	this(const(void)[] data, bool forceReallocation = false)
102 	{
103 		if (data.length == 0)
104 			contents = null;
105 		else
106 		if (forceReallocation || GC.addrOf(data.ptr) is null)
107 		{
108 			// copy to unmanaged memory
109 			auto wrapper = new MemoryDataWrapper(data.length, data.length);
110 			this.wrapper = wrapper;
111 			wrapper.contents[] = data[];
112 			contents = wrapper.contents;
113 			mutable = true;
114 		}
115 		else
116 		{
117 			// just save a reference
118 			contents = data;
119 			mutable = false;
120 		}
121 
122 		assert(this.length == data.length);
123 	}
124 
125 	/// ditto
126 	this(void[] data, bool forceReallocation = false)
127 	{
128 		const(void)[] cdata = data;
129 		this(cdata, forceReallocation);
130 		mutable = true;
131 	}
132 
133 	/// Create a new instance with given size/capacity. Capacity defaults to size.
134 	this(size_t size = 0, size_t capacity = 0)
135 	in
136 	{
137 		assert(capacity == 0 || size <= capacity);
138 	}
139 	body
140 	{
141 		if (!capacity)
142 			capacity = size;
143 
144 		if (capacity)
145 		{
146 			auto wrapper = new MemoryDataWrapper(size, capacity);
147 			this.wrapper = wrapper;
148 			contents = wrapper.contents;
149 			mutable = true;
150 		}
151 		else
152 		{
153 			wrapper = null;
154 			contents = null;
155 		}
156 
157 		assert(this.length == size);
158 	}
159 
160 	this(DataWrapper wrapper, bool mutable)
161 	{
162 		this.wrapper = wrapper;
163 		this.mutable = mutable;
164 		this.contents = wrapper.contents;
165 	}
166 
167 	this(this)
168 	{
169 		if (wrapper)
170 		{
171 			wrapper.references++;
172 			debug (DATA_REFCOUNT) debugLog(format("%s -> %s: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references));
173 		}
174 		else
175 			debug (DATA_REFCOUNT) debugLog(format("%s -> %s: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper));
176 	}
177 
178 	~this() pure
179 	{
180 		//clear();
181 		// https://issues.dlang.org/show_bug.cgi?id=13809
182 		(cast(void delegate() pure)&clear)();
183 	}
184 
185 /*
186 	/// Create new instance as a slice over an existing DataWrapper.
187 	private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max)
188 	{
189 		this.wrapper = wrapper;
190 		this.start = start;
191 		this.end = end==size_t.max ? wrapper.capacity : end;
192 	}
193 */
194 
195 	@property const(void)[] contents() const
196 	{
197 		return _contents;
198 	}
199 
200 	@property private const(void)[] contents(const(void)[] data)
201 	{
202 		return _contents = data;
203 	}
204 
205 	/// Get mutable contents
206 	@property void[] mcontents()
207 	{
208 		if (!mutable && length)
209 		{
210 			reallocate(length, length);
211 			assert(mutable);
212 		}
213 		return cast(void[])_contents;
214 	}
215 
216 	@property const(void)* ptr() const
217 	{
218 		return contents.ptr;
219 	}
220 
221 	@property void* mptr()
222 	{
223 		return mcontents.ptr;
224 	}
225 
226 	@property size_t length() const
227 	{
228 		return contents.length;
229 	}
230 
231 	@property bool empty() const
232 	{
233 		return contents is null;
234 	}
235 
236 	bool opCast(T)()
237 		if (is(T == bool))
238 	{
239 		return !empty;
240 	}
241 
242 	@property size_t capacity() const
243 	{
244 		if (wrapper is null)
245 			return length;
246 		// We can only safely expand if the memory slice is at the end of the used unmanaged memory block.
247 		auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data
248 		auto end = pos + length;               // end   position in wrapper data
249 		assert(end <= wrapper.size);
250 		if (end == wrapper.size && end < wrapper.capacity)
251 			return wrapper.capacity - pos;
252 		else
253 			return length;
254 	}
255 
256 	/// Put a copy of the data on D's managed heap, and return it.
257 	@property
258 	void[] toHeap() const
259 	{
260 		return _contents.dup;
261 	}
262 
263 	private void reallocate(size_t size, size_t capacity)
264 	{
265 		auto wrapper = new MemoryDataWrapper(size, capacity);
266 		wrapper.contents[0..this.length] = contents[];
267 		//(cast(ubyte[])newWrapper.contents)[this.length..value] = 0;
268 
269 		clear();
270 		this.wrapper = wrapper;
271 		this.contents = wrapper.contents;
272 		mutable = true;
273 	}
274 
275 	private void expand(size_t newSize, size_t newCapacity)
276 	in
277 	{
278 		assert(length < newSize);
279 		assert(newSize <= newCapacity);
280 	}
281 	out
282 	{
283 		assert(length == newSize);
284 	}
285 	body
286 	{
287 		if (newCapacity <= capacity)
288 		{
289 			auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data
290 			wrapper.setSize(pos + newSize);
291 			contents = ptr[0..newSize];
292 		}
293 		else
294 			reallocate(newSize, newCapacity);
295 	}
296 
297 	@property void length(size_t value)
298 	{
299 		if (value == length) // no change
300 			return;
301 		if (value < length)  // shorten
302 			_contents = _contents[0..value];
303 		else                 // lengthen
304 			expand(value, value);
305 	}
306 	alias length opDollar;
307 
308 	@property Data dup() const
309 	{
310 		return Data(contents, true);
311 	}
312 
313 	/// UNSAFE! Use only when you know there is only one reference to the data.
314 	void deleteContents()
315 	out
316 	{
317 		assert(wrapper is null);
318 	}
319 	body
320 	{
321 		delete wrapper;
322 		contents = null;
323 	}
324 
325 	void clear()
326 	{
327 		if (wrapper)
328 		{
329 			assert(wrapper.references > 0, "Dangling pointer to wrapper");
330 			wrapper.references--;
331 			debug (DATA_REFCOUNT) debugLog(format("%s -> %s: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references));
332 			if (wrapper.references == 0)
333 				destroy(wrapper);
334 		}
335 
336 		wrapper = null;
337 		contents = null;
338 	}
339 
340 	Data concat(const(void)[] data)
341 	{
342 		if (data.length==0)
343 			return this;
344 		Data result = Data(length + data.length);
345 		result.mcontents[0..this.length] = contents[];
346 		result.mcontents[this.length..$] = data[];
347 		return result;
348 	}
349 
350 	Data opCat(T)(const(T)[] data)
351 		if (!hasIndirections!T)
352 	{
353 		return concat(data);
354 	}
355 
356 	Data opCat()(Data data)
357 	{
358 		return concat(data.contents);
359 	}
360 
361 	Data prepend(const(void)[] data)
362 	{
363 		Data result = Data(data.length + length);
364 		result.mcontents[0..data.length] = data[];
365 		result.mcontents[data.length..$] = contents[];
366 		return result;
367 	}
368 
369 	Data opCat_r(T)(const(T)[] data)
370 		if (!hasIndirections!T)
371 	{
372 		return prepend(data);
373 	}
374 
375 	private static size_t getPreallocSize(size_t length)
376 	{
377 		if (length < MAX_PREALLOC)
378 			return nextPowerOfTwo(length);
379 		else
380 			return ((length-1) | (MAX_PREALLOC-1)) + 1;
381 	}
382 
383 	Data append(const(void)[] data)
384 	{
385 		if (data.length==0)
386 			return this;
387 		size_t oldLength = length;
388 		size_t newLength = length + data.length;
389 		expand(newLength, getPreallocSize(newLength));
390 		auto newContents = cast(void[])_contents[oldLength..$];
391 		newContents[] = (cast(void[])data)[];
392 		return this;
393 	}
394 
395 	/// Note that unlike opCat (a ~ b), opCatAssign (a ~= b) will preallocate.
396 	Data opCatAssign(T)(const(T)[] data)
397 		if (!hasIndirections!T)
398 	{
399 		return append(data);
400 	}
401 
402 	Data opCatAssign()(Data data)
403 	{
404 		return append(data.contents);
405 	}
406 
407 	Data opCatAssign()(ubyte value) // hack?
408 	{
409 		return append((&value)[0..1]);
410 	}
411 
412 	Data opSlice(size_t x, size_t y)
413 	in
414 	{
415 		assert(x <= y);
416 		assert(y <= length);
417 	}
418 // https://issues.dlang.org/show_bug.cgi?id=13463
419 //	out(result)
420 //	{
421 //		assert(result.length == y-x);
422 //	}
423 	body
424 	{
425 		if (x == y)
426 			return Data();
427 		else
428 		{
429 			Data result = this;
430 			result.contents = result.contents[x..y];
431 			return result;
432 		}
433 	}
434 
435 	/// Return a new Data for the first size bytes, and slice this instance from size to end.
436 	Data popFront(size_t size)
437 	in
438 	{
439 		assert(size <= length);
440 	}
441 	body
442 	{
443 		Data result = this;
444 		result.contents = contents[0..size];
445 		this  .contents = contents[size..$];
446 		return result;
447 	}
448 }
449 
450 unittest
451 {
452 	Data d = Data("aaaaa");
453 	assert(d.wrapper.references == 1);
454 	Data s = d[1..4];
455 	assert(d.wrapper.references == 2);
456 }
457 
458 // ************************************************************************
459 
460 static /*thread-local*/ size_t dataMemory, dataMemoryPeak;
461 static /*thread-local*/ uint   dataCount, allocCount;
462 
463 // Abstract wrapper.
464 abstract class DataWrapper
465 {
466 	size_t references = 1;
467 	abstract @property inout(void)[] contents() inout;
468 	abstract @property size_t size() const;
469 	abstract void setSize(size_t newSize);
470 	abstract @property size_t capacity() const;
471 }
472 
473 private:
474 
475 version (Windows)
476 	import core.sys.windows.windows;
477 else
478 {
479 	import core.sys.posix.unistd;
480 	import core.sys.posix.sys.mman;
481 }
482 
483 /// Wrapper for data in RAM, allocated from the OS.
484 final class MemoryDataWrapper : DataWrapper
485 {
486 	/// Pointer to actual data.
487 	void* data;
488 	/// Used size. Needed for safe appends.
489 	size_t _size;
490 	/// Allocated capacity.
491 	size_t _capacity;
492 
493 	/// Threshold of allocated memory to trigger a collect.
494 	enum { COLLECT_THRESHOLD = 8*1024*1024 } // 8MB
495 	/// Counter towards the threshold.
496 	static /*thread-local*/ size_t allocatedThreshold;
497 
498 	/// Create a new instance with given capacity.
499 	this(size_t size, size_t capacity)
500 	{
501 		data = malloc(/*ref*/ capacity);
502 		if (data is null)
503 		{
504 			debug(DATA) printf("Garbage collect triggered by failed Data allocation... ");
505 			GC.collect();
506 			debug(DATA) printf("Done\n");
507 			data = malloc(/*ref*/ capacity);
508 			allocatedThreshold = 0;
509 		}
510 		if (data is null)
511 			onOutOfMemoryError();
512 
513 		dataMemory += capacity;
514 		if (dataMemoryPeak < dataMemory)
515 			dataMemoryPeak = dataMemory;
516 		dataCount ++;
517 		allocCount ++;
518 
519 		this._size = size;
520 		this._capacity = capacity;
521 
522 		// also collect
523 		allocatedThreshold += capacity;
524 		if (allocatedThreshold > COLLECT_THRESHOLD)
525 		{
526 			debug(DATA) printf("Garbage collect triggered by total allocated Data exceeding threshold... ");
527 			GC.collect();
528 			debug(DATA) printf("Done\n");
529 			allocatedThreshold = 0;
530 		}
531 	}
532 
533 	/// Destructor - destroys the wrapped data.
534 	~this()
535 	{
536 		free(data, capacity);
537 		data = null;
538 		// If DataWrapper is created and manually deleted, there is no need to cause frequent collections
539 		if (allocatedThreshold > capacity)
540 			allocatedThreshold -= capacity;
541 		else
542 			allocatedThreshold = 0;
543 
544 		dataMemory -= capacity;
545 		dataCount --;
546 	}
547 
548 	@property override
549 	size_t size() const { return _size; }
550 
551 	@property override
552 	size_t capacity() const { return _capacity; }
553 
554 	override void setSize(size_t newSize)
555 	{
556 		assert(newSize <= capacity);
557 		_size = newSize;
558 	}
559 
560 	@property override
561 	inout(void)[] contents() inout
562 	{
563 		return data[0..size];
564 	}
565 
566 	// https://github.com/D-Programming-Language/druntime/pull/759
567 	version(OSX)
568 		enum _SC_PAGE_SIZE = 29;
569 
570 	version(Windows)
571 	{
572 		static immutable size_t pageSize;
573 
574 		shared static this()
575 		{
576 			SYSTEM_INFO si;
577 			GetSystemInfo(&si);
578 			pageSize = si.dwPageSize;
579 		}
580 	}
581 	else
582 	static if (is(typeof(_SC_PAGE_SIZE)))
583 	{
584 		static immutable size_t pageSize;
585 
586 		shared static this()
587 		{
588 			pageSize = sysconf(_SC_PAGE_SIZE);
589 		}
590 	}
591 
592 	static void* malloc(ref size_t size)
593 	{
594 		if (is(typeof(pageSize)))
595 			size = ((size-1) | (pageSize-1))+1;
596 
597 		version(Windows)
598 		{
599 			return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE);
600 		}
601 		else
602 		version(Posix)
603 		{
604 			version(linux)
605 				import core.sys.linux.sys.mman : MAP_ANON;
606 			auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
607 			return (p == MAP_FAILED) ? null : p;
608 		}
609 		else
610 			return core.stdc.malloc(size);
611 	}
612 
613 	static void free(void* p, size_t size)
614 	{
615 		debug
616 		{
617 			(cast(ubyte*)p)[0..size] = 0xDB;
618 		}
619 		version(Windows)
620 			VirtualFree(p, 0, MEM_RELEASE);
621 		else
622 		version(Posix)
623 			munmap(p, size);
624 		else
625 			core.stdc.free(size);
626 	}
627 }
628 
629 // Source: Win32 bindings project
630 version(Windows)
631 {
632 	struct SYSTEM_INFO {
633 		union {
634 			DWORD dwOemId;
635 			struct {
636 				WORD wProcessorArchitecture;
637 				WORD wReserved;
638 			}
639 		}
640 		DWORD dwPageSize;
641 		PVOID lpMinimumApplicationAddress;
642 		PVOID lpMaximumApplicationAddress;
643 		DWORD dwActiveProcessorMask;
644 		DWORD dwNumberOfProcessors;
645 		DWORD dwProcessorType;
646 		DWORD dwAllocationGranularity;
647 		WORD  wProcessorLevel;
648 		WORD  wProcessorRevision;
649 	}
650 	alias SYSTEM_INFO* LPSYSTEM_INFO;
651 
652 	extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO);
653 }
654 
655 debug(DATA_REFCOUNT) import ae.utils.exception;
656 
657 debug(DATA_REFCOUNT) void debugLog(string s)
658 {
659 	writeln(s);
660 	foreach (line; getStackTrace())
661 		writeln("\t", line);
662 }