1 /**
2 * Wrappers for raw _data located in unmanaged memory.
3 *
4 * Using the Data type will only place a small object in managed memory,
5 * keeping the actual _data in unmanaged memory.
6 * A proxy class (DataWrapper) is used to safely allow multiple references to
7 * the same block of unmanaged memory.
8 * When the DataWrapper object is destroyed (either manually or by the garbage
9 * collector when there are no remaining Data references), the unmanaged
10 * memory is deallocated.
11 *
12 * This has the following advantage over using managed memory:
13 * $(UL
14 * $(LI Faster allocation and deallocation, since memory is requested from
15 * the OS directly as whole pages)
16 * $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to
17 * stray pointers)
18 * $(LI Overall improved GC performance due to reduced size of managed heap)
19 * $(LI Memory is immediately returned to the OS when _data is deallocated)
20 * )
21 * On the other hand, using Data has the following disadvantages:
22 * $(UL
23 * $(LI This module is designed to store raw _data which does not have any
24 * pointers. Storing objects containing pointers to managed memory is
25 * unsupported, and may result in memory corruption.)
26 * $(LI Small objects may be stored inefficiently, as the module requests
27 * entire pages of memory from the OS. Considering allocating one large
28 * object and use slices (Data instances) for individual objects.)
29 * )
30 *
31 * License:
32 * This Source Code Form is subject to the terms of
33 * the Mozilla Public License, v. 2.0. If a copy of
34 * the MPL was not distributed with this file, You
35 * can obtain one at http://mozilla.org/MPL/2.0/.
36 *
37 * Authors:
38 * Vladimir Panteleev <vladimir@thecybershadow.net>
39 */
40
41 module ae.sys.data;
42
43 static import core.stdc.stdlib;
44 import core.stdc.string : memmove;
45 import std.traits;
46 import core.memory;
47 import core.exception;
48 debug import std.stdio;
49 debug import std.string;
50 public import ae.sys.dataset;
51 import ae.utils.math;
52
53 debug(DATA) import core.stdc.stdio;
54
55 // ideas/todo:
56 // * templatize (and forbid using aliased types)?
57 // * use heap (malloc/Windows heap API) for small objects?
58 // * reference counting?
59 // * "immutable" support?
60
61 /**
62 * Wrapper for data located in external memory, to prevent faux references.
63 * Represents a slice of data, which may or may not be in unmanaged memory.
64 * Data in unmanaged memory is bound to a DataWrapper class instance.
65 *
66 * All operations on this class should be safe, except for accessing contents directly.
67 * All operations on contents must be accompanied by a live reference to the Data object,
68 * to keep a GC anchor towards the unmanaged data.
69 *
70 * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead.
71 *
72 * Be sure not to lose Data references while using their contents!
73 * For example, avoid code like this:
74 * ----
75 * fun(cast(string)transformSomeData(someData).contents);
76 * ----
77 * The Data return value may be unreachable once .contents is evaluated.
78 * Use .toHeap instead of .contents in such cases to get a safe heap copy.
79 */
80 struct Data
81 {
82 private:
83 /// Wrapped data
84 const(void)[] _contents;
85 /// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory.
86 /// Used as a GC anchor to unmanaged data, and for in-place expands (for appends).
87 DataWrapper wrapper;
88 /// Indicates whether we're allowed to modify the data contents.
89 bool mutable;
90
91 /// Maximum preallocation for append operations.
92 enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2
93
94 public:
95 /**
96 * Create new instance wrapping the given data.
97 * Params:
98 * data = initial data
99 * forceReallocation = when false, the contents will be duplicated to
100 * unmanaged memory only when it's not on the managed heap; when true,
101 * the contents will be reallocated always.
102 */
103 this(const(void)[] data, bool forceReallocation = false)
104 {
105 if (data.length == 0)
106 contents = null;
107 else
108 if (forceReallocation || GC.addrOf(data.ptr) is null)
109 {
110 // copy to unmanaged memory
111 auto wrapper = new MemoryDataWrapper(data.length, data.length);
112 this.wrapper = wrapper;
113 wrapper.contents[] = data[];
114 contents = wrapper.contents;
115 mutable = true;
116 }
117 else
118 {
119 // just save a reference
120 contents = data;
121 mutable = false;
122 }
123
124 assert(this.length == data.length);
125 }
126
127 /// ditto
128 this(void[] data, bool forceReallocation = false)
129 {
130 const(void)[] cdata = data;
131 this(cdata, forceReallocation);
132 mutable = true;
133 }
134
135 /// Create a new instance with given size/capacity. Capacity defaults to size.
136 this(size_t size, size_t capacity = 0)
137 in
138 {
139 assert(capacity == 0 || size <= capacity);
140 }
141 body
142 {
143 if (!capacity)
144 capacity = size;
145
146 if (capacity)
147 {
148 auto wrapper = new MemoryDataWrapper(size, capacity);
149 this.wrapper = wrapper;
150 contents = wrapper.contents;
151 mutable = true;
152 }
153 else
154 {
155 wrapper = null;
156 contents = null;
157 }
158
159 assert(this.length == size);
160 }
161
162 this(DataWrapper wrapper, bool mutable)
163 {
164 this.wrapper = wrapper;
165 this.mutable = mutable;
166 this.contents = wrapper.contents;
167 }
168
169 this(this)
170 {
171 if (wrapper)
172 {
173 wrapper.references++;
174 debug (DATA_REFCOUNT) debugLog("%p -> %p: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references);
175 }
176 else
177 debug (DATA_REFCOUNT) debugLog("%p -> %p: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper);
178 }
179
180 ~this() pure
181 {
182 //clear();
183 // https://issues.dlang.org/show_bug.cgi?id=13809
184 (cast(void delegate() pure)&clear)();
185 }
186
187 /*
188 /// Create new instance as a slice over an existing DataWrapper.
189 private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max)
190 {
191 this.wrapper = wrapper;
192 this.start = start;
193 this.end = end==size_t.max ? wrapper.capacity : end;
194 }
195 */
196
197 @property const(void)[] contents() const
198 {
199 return _contents;
200 }
201
202 @property private const(void)[] contents(const(void)[] data)
203 {
204 return _contents = data;
205 }
206
207 /// Get mutable contents
208 @property void[] mcontents()
209 {
210 if (!mutable && length)
211 {
212 reallocate(length, length);
213 assert(mutable);
214 }
215 return cast(void[])_contents;
216 }
217
218 @property const(void)* ptr() const
219 {
220 return contents.ptr;
221 }
222
223 @property void* mptr()
224 {
225 return mcontents.ptr;
226 }
227
228 @property size_t length() const
229 {
230 return contents.length;
231 }
232
233 @property bool empty() const
234 {
235 return contents is null;
236 }
237
238 bool opCast(T)()
239 if (is(T == bool))
240 {
241 return !empty;
242 }
243
244 @property size_t capacity() const
245 {
246 if (wrapper is null)
247 return length;
248 // We can only safely expand if the memory slice is at the end of the used unmanaged memory block.
249 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data
250 auto end = pos + length; // end position in wrapper data
251 assert(end <= wrapper.size);
252 if (end == wrapper.size && end < wrapper.capacity)
253 return wrapper.capacity - pos;
254 else
255 return length;
256 }
257
258 /// Put a copy of the data on D's managed heap, and return it.
259 @property
260 void[] toHeap() const
261 {
262 return _contents.dup;
263 }
264
265 private void reallocate(size_t size, size_t capacity)
266 {
267 auto wrapper = new MemoryDataWrapper(size, capacity);
268 wrapper.contents[0..this.length] = contents[];
269 //(cast(ubyte[])newWrapper.contents)[this.length..value] = 0;
270
271 clear();
272 this.wrapper = wrapper;
273 this.contents = wrapper.contents;
274 mutable = true;
275 }
276
277 private void expand(size_t newSize, size_t newCapacity)
278 in
279 {
280 assert(length < newSize);
281 assert(newSize <= newCapacity);
282 }
283 out
284 {
285 assert(length == newSize);
286 }
287 body
288 {
289 if (newCapacity <= capacity)
290 {
291 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data
292 wrapper.setSize(pos + newSize);
293 contents = ptr[0..newSize];
294 }
295 else
296 reallocate(newSize, newCapacity);
297 }
298
299 @property void length(size_t value)
300 {
301 if (value == length) // no change
302 return;
303 if (value < length) // shorten
304 _contents = _contents[0..value];
305 else // lengthen
306 expand(value, value);
307 }
308 alias length opDollar;
309
310 @property Data dup() const
311 {
312 return Data(contents, true);
313 }
314
315 /// This used to be an unsafe method which deleted the wrapped data.
316 /// Now that Data is refcounted, this simply calls clear() and
317 /// additionally asserts that this Data is the only Data holding
318 /// a reference to the wrapper.
319 void deleteContents()
320 out
321 {
322 assert(wrapper is null);
323 }
324 body
325 {
326 if (wrapper)
327 {
328 assert(wrapper.references == 1, "Attempting to call deleteContents with ");
329 clear();
330 }
331 }
332
333 void clear()
334 {
335 if (wrapper)
336 {
337 assert(wrapper.references > 0, "Dangling pointer to wrapper");
338 wrapper.references--;
339 debug (DATA_REFCOUNT) debugLog("%p -> %p: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references);
340 if (wrapper.references == 0)
341 delete wrapper;
342
343 wrapper = null;
344 }
345
346 contents = null;
347 }
348
349 Data concat(const(void)[] data)
350 {
351 if (data.length==0)
352 return this;
353 Data result = Data(length + data.length);
354 result.mcontents[0..this.length] = contents[];
355 result.mcontents[this.length..$] = data[];
356 return result;
357 }
358
359 Data opCat(T)(const(T)[] data)
360 if (!hasIndirections!T)
361 {
362 return concat(data);
363 }
364
365 Data opCat()(Data data)
366 {
367 return concat(data.contents);
368 }
369
370 Data prepend(const(void)[] data)
371 {
372 Data result = Data(data.length + length);
373 result.mcontents[0..data.length] = data[];
374 result.mcontents[data.length..$] = contents[];
375 return result;
376 }
377
378 Data opCat_r(T)(const(T)[] data)
379 if (!hasIndirections!T)
380 {
381 return prepend(data);
382 }
383
384 private static size_t getPreallocSize(size_t length)
385 {
386 if (length < MAX_PREALLOC)
387 return nextPowerOfTwo(length);
388 else
389 return ((length-1) | (MAX_PREALLOC-1)) + 1;
390 }
391
392 Data append(const(void)[] data)
393 {
394 if (data.length==0)
395 return this;
396 size_t oldLength = length;
397 size_t newLength = length + data.length;
398 expand(newLength, getPreallocSize(newLength));
399 auto newContents = cast(void[])_contents[oldLength..$];
400 newContents[] = (cast(void[])data)[];
401 return this;
402 }
403
404 /// Note that unlike opCat (a ~ b), opCatAssign (a ~= b) will preallocate.
405 Data opCatAssign(T)(const(T)[] data)
406 if (!hasIndirections!T)
407 {
408 return append(data);
409 }
410
411 Data opCatAssign()(Data data)
412 {
413 return append(data.contents);
414 }
415
416 Data opCatAssign()(ubyte value) // hack?
417 {
418 return append((&value)[0..1]);
419 }
420
421 Data opSlice()
422 {
423 return this;
424 }
425
426 Data opSlice(size_t x, size_t y)
427 in
428 {
429 assert(x <= y);
430 assert(y <= length);
431 }
432 // https://issues.dlang.org/show_bug.cgi?id=13463
433 // out(result)
434 // {
435 // assert(result.length == y-x);
436 // }
437 body
438 {
439 if (x == y)
440 return Data();
441 else
442 {
443 Data result = this;
444 result.contents = result.contents[x..y];
445 return result;
446 }
447 }
448
449 /// Return a new Data for the first size bytes, and slice this instance from size to end.
450 Data popFront(size_t size)
451 in
452 {
453 assert(size <= length);
454 }
455 body
456 {
457 Data result = this;
458 result.contents = contents[0..size];
459 this .contents = contents[size..$];
460 return result;
461 }
462 }
463
464 unittest
465 {
466 Data d = Data("aaaaa");
467 assert(d.wrapper.references == 1);
468 Data s = d[1..4];
469 assert(d.wrapper.references == 2);
470 }
471
472 // ************************************************************************
473
474 static /*thread-local*/ size_t dataMemory, dataMemoryPeak;
475 static /*thread-local*/ uint dataCount, allocCount;
476
477 // Abstract wrapper.
478 abstract class DataWrapper
479 {
480 sizediff_t references = 1;
481 abstract @property inout(void)[] contents() inout;
482 abstract @property size_t size() const;
483 abstract void setSize(size_t newSize);
484 abstract @property size_t capacity() const;
485
486 new(size_t sz)
487 {
488 void* p = core.stdc.stdlib.malloc(sz);
489
490 debug(DATA_REFCOUNT) debugLog("? -> %p: Allocating via malloc", p);
491
492 if (!p)
493 throw new OutOfMemoryError();
494
495 //GC.addRange(p, sz);
496 return p;
497 }
498
499 debug ~this()
500 {
501 debug(DATA_REFCOUNT) debugLog("%.*s.~this, references==%d", this.classinfo.name.length, this.classinfo.name.ptr, references);
502 assert(references == 0, "Deleting DataWrapper with non-zero reference count");
503 }
504
505 @nogc delete(void* p)
506 {
507 if (p)
508 {
509 debug(DATA_REFCOUNT) debugLog("? -> %p: Deleting via free", p);
510
511 //GC.removeRange(p);
512 core.stdc.stdlib.free(p);
513 }
514 }
515 }
516
517 private:
518
519 version (Windows)
520 import core.sys.windows.windows;
521 else
522 {
523 import core.sys.posix.unistd;
524 import core.sys.posix.sys.mman;
525 }
526
527 /// Wrapper for data in RAM, allocated from the OS.
528 final class MemoryDataWrapper : DataWrapper
529 {
530 /// Pointer to actual data.
531 void* data;
532 /// Used size. Needed for safe appends.
533 size_t _size;
534 /// Allocated capacity.
535 size_t _capacity;
536
537 /// Threshold of allocated memory to trigger a collect.
538 enum { COLLECT_THRESHOLD = 8*1024*1024 } // 8MB
539 /// Counter towards the threshold.
540 static /*thread-local*/ size_t allocatedThreshold;
541
542 /// Create a new instance with given capacity.
543 this(size_t size, size_t capacity)
544 {
545 data = malloc(/*ref*/ capacity);
546 if (data is null)
547 {
548 debug(DATA) printf("Garbage collect triggered by failed Data allocation of %llu bytes... ", cast(ulong)capacity);
549 GC.collect();
550 debug(DATA) printf("Done\n");
551 data = malloc(/*ref*/ capacity);
552 allocatedThreshold = 0;
553 }
554 if (data is null)
555 onOutOfMemoryError();
556
557 dataMemory += capacity;
558 if (dataMemoryPeak < dataMemory)
559 dataMemoryPeak = dataMemory;
560 dataCount ++;
561 allocCount ++;
562
563 this._size = size;
564 this._capacity = capacity;
565
566 // also collect
567 allocatedThreshold += capacity;
568 if (allocatedThreshold > COLLECT_THRESHOLD)
569 {
570 debug(DATA) printf("Garbage collect triggered by total allocated Data exceeding threshold... ");
571 GC.collect();
572 debug(DATA) printf("Done\n");
573 allocatedThreshold = 0;
574 }
575 }
576
577 /// Destructor - destroys the wrapped data.
578 ~this()
579 {
580 free(data, capacity);
581 data = null;
582 // If DataWrapper is created and manually deleted, there is no need to cause frequent collections
583 if (allocatedThreshold > capacity)
584 allocatedThreshold -= capacity;
585 else
586 allocatedThreshold = 0;
587
588 dataMemory -= capacity;
589 dataCount --;
590 }
591
592 @property override
593 size_t size() const { return _size; }
594
595 @property override
596 size_t capacity() const { return _capacity; }
597
598 override void setSize(size_t newSize)
599 {
600 assert(newSize <= capacity);
601 _size = newSize;
602 }
603
604 @property override
605 inout(void)[] contents() inout
606 {
607 return data[0..size];
608 }
609
610 // https://github.com/D-Programming-Language/druntime/pull/759
611 version(OSX)
612 enum _SC_PAGE_SIZE = 29;
613
614 // https://github.com/D-Programming-Language/druntime/pull/1140
615 version(FreeBSD)
616 enum _SC_PAGE_SIZE = 47;
617
618 version(Windows)
619 {
620 static immutable size_t pageSize;
621
622 shared static this()
623 {
624 SYSTEM_INFO si;
625 GetSystemInfo(&si);
626 pageSize = si.dwPageSize;
627 }
628 }
629 else
630 static if (is(typeof(_SC_PAGE_SIZE)))
631 {
632 static immutable size_t pageSize;
633
634 shared static this()
635 {
636 pageSize = sysconf(_SC_PAGE_SIZE);
637 }
638 }
639
640 static void* malloc(ref size_t size)
641 {
642 if (is(typeof(pageSize)))
643 size = ((size-1) | (pageSize-1))+1;
644
645 version(Windows)
646 {
647 return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE);
648 }
649 else
650 version(Posix)
651 {
652 version(linux)
653 import core.sys.linux.sys.mman : MAP_ANON;
654 auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
655 return (p == MAP_FAILED) ? null : p;
656 }
657 else
658 return core.stdc.malloc(size);
659 }
660
661 static void free(void* p, size_t size)
662 {
663 debug
664 {
665 (cast(ubyte*)p)[0..size] = 0xDB;
666 }
667 version(Windows)
668 VirtualFree(p, 0, MEM_RELEASE);
669 else
670 version(Posix)
671 munmap(p, size);
672 else
673 core.stdc.free(size);
674 }
675 }
676
677 // Source: Win32 bindings project
678 version(Windows)
679 {
680 struct SYSTEM_INFO {
681 union {
682 DWORD dwOemId;
683 struct {
684 WORD wProcessorArchitecture;
685 WORD wReserved;
686 }
687 }
688 DWORD dwPageSize;
689 PVOID lpMinimumApplicationAddress;
690 PVOID lpMaximumApplicationAddress;
691 DWORD dwActiveProcessorMask;
692 DWORD dwNumberOfProcessors;
693 DWORD dwProcessorType;
694 DWORD dwAllocationGranularity;
695 WORD wProcessorLevel;
696 WORD wProcessorRevision;
697 }
698 alias SYSTEM_INFO* LPSYSTEM_INFO;
699
700 extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO);
701 }
702
703 debug(DATA_REFCOUNT) import ae.utils.exception, ae.sys.memory, std.stdio;
704
705 debug(DATA_REFCOUNT) void debugLog(Args...)(const char* s, Args args) @nogc
706 {
707 printf(s, args);
708 printf("\n");
709 if (inCollect())
710 printf("\t(in GC collect)\n");
711 else
712 (cast(void function() @nogc)&debugStackTrace)();
713 fflush(core.stdc.stdio.stdout);
714 }
715
716 debug(DATA_REFCOUNT) void debugStackTrace()
717 {
718 try
719 foreach (line; getStackTrace())
720 writeln("\t", line);
721 catch (Throwable e)
722 writeln("\t(stacktrace error: ", e.msg, ")");
723 }