1 /** 2 * Wrappers for raw _data located in unmanaged memory. 3 * 4 * Using the Data type will only place a small object in managed memory, 5 * keeping the actual _data in unmanaged memory. 6 * A proxy class (DataWrapper) is used to safely allow multiple references to 7 * the same block of unmanaged memory. 8 * When the DataWrapper object is destroyed (either manually or by the garbage 9 * collector when there are no remaining Data references), the unmanaged 10 * memory is deallocated. 11 * 12 * This has the following advantage over using managed memory: 13 * $(UL 14 * $(LI Faster allocation and deallocation, since memory is requested from 15 * the OS directly as whole pages) 16 * $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to 17 * stray pointers) 18 * $(LI Overall improved GC performance due to reduced size of managed heap) 19 * $(LI Memory is immediately returned to the OS when _data is deallocated) 20 * ) 21 * On the other hand, using Data has the following disadvantages: 22 * $(UL 23 * $(LI This module is designed to store raw _data which does not have any 24 * pointers. Storing objects containing pointers to managed memory is 25 * unsupported, and may result in memory corruption.) 26 * $(LI Small objects may be stored inefficiently, as the module requests 27 * entire pages of memory from the OS. Considering allocating one large 28 * object and use slices (Data instances) for individual objects.) 29 * $(LI Incorrect usage (i.e. retaining/escaping references to wrapped memory 30 * without keeping a reference to its corresponding DataWrapper) can 31 * result in dangling pointers and hard-to-debug memory corruption.) 32 * ) 33 * 34 * License: 35 * This Source Code Form is subject to the terms of 36 * the Mozilla Public License, v. 2.0. If a copy of 37 * the MPL was not distributed with this file, You 38 * can obtain one at http://mozilla.org/MPL/2.0/. 39 * 40 * Authors: 41 * Vladimir Panteleev <ae@cy.md> 42 */ 43 44 module ae.sys.data; 45 46 static import core.stdc.stdlib; 47 import core.stdc..string : memmove; 48 import std.traits; 49 import core.memory; 50 import core.exception; 51 debug import std..string; 52 public import ae.sys.dataset; 53 import ae.utils.math; 54 55 debug(DATA) import core.stdc.stdio; 56 57 // ideas/todo: 58 // * templatize (and forbid using aliased types)? 59 // * use heap (malloc/Windows heap API) for small objects? 60 // * reference counting? 61 // * "immutable" support? 62 63 /** 64 * Wrapper for data located in external memory, to prevent faux references. 65 * Represents a slice of data, which may or may not be in unmanaged memory. 66 * Data in unmanaged memory is bound to a DataWrapper class instance. 67 * 68 * All operations on this class should be safe, except for accessing contents directly. 69 * All operations on contents must be accompanied by a live reference to the Data object, 70 * to keep a GC anchor towards the unmanaged data. 71 * 72 * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead. 73 * 74 * Be sure not to lose Data references while using their contents! 75 * For example, avoid code like this: 76 * ---- 77 * fun(cast(string)transformSomeData(someData).contents); 78 * ---- 79 * The Data return value may be unreachable once .contents is evaluated. 80 * Use .toHeap instead of .contents in such cases to get a safe heap copy. 81 */ 82 struct Data 83 { 84 private: 85 /// Wrapped data 86 const(void)[] _contents; 87 /// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory. 88 /// Used as a GC anchor to unmanaged data, and for in-place expands (for appends). 89 DataWrapper wrapper; 90 /// Indicates whether we're allowed to modify the data contents. 91 bool mutable; 92 93 /// Maximum preallocation for append operations. 94 enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2 95 96 public: 97 /** 98 * Create new instance wrapping the given data. 99 * Params: 100 * data = initial data 101 * forceReallocation = when false, the contents will be duplicated to 102 * unmanaged memory only when it's not on the managed heap; when true, 103 * the contents will be reallocated always. 104 */ 105 this(const(void)[] data, bool forceReallocation = false) 106 { 107 if (data is null) 108 contents = null; 109 else 110 if (data.length == 0) 111 { 112 wrapper = emptyDataWrapper; 113 wrapper.references++; 114 contents = data; 115 } 116 else 117 if (forceReallocation || GC.addrOf(data.ptr) is null) 118 { 119 // copy to unmanaged memory 120 auto wrapper = unmanagedNew!MemoryDataWrapper(data.length, data.length); 121 this.wrapper = wrapper; 122 wrapper.contents[] = data[]; 123 contents = wrapper.contents; 124 mutable = true; 125 } 126 else 127 { 128 // just save a reference 129 contents = data; 130 mutable = false; 131 } 132 133 assert(this.length == data.length); 134 } 135 136 /// ditto 137 this(void[] data, bool forceReallocation = false) 138 { 139 const(void)[] cdata = data; 140 this(cdata, forceReallocation); 141 mutable = true; 142 } 143 144 /// Create a new instance with given size/capacity. Capacity defaults to size. 145 this(size_t size, size_t capacity = 0) 146 in 147 { 148 assert(capacity == 0 || size <= capacity); 149 } 150 do 151 { 152 if (!capacity) 153 capacity = size; 154 155 if (capacity) 156 { 157 auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity); 158 this.wrapper = wrapper; 159 contents = wrapper.contents; 160 mutable = true; 161 } 162 else 163 { 164 wrapper = null; 165 contents = null; 166 } 167 168 assert(this.length == size); 169 } 170 171 /// Create a new instance slicing all of the given wrapper's contents. 172 this(DataWrapper wrapper, bool mutable) 173 { 174 this.wrapper = wrapper; 175 this.mutable = mutable; 176 this.contents = wrapper.contents; 177 } 178 179 this(this) 180 { 181 if (wrapper) 182 { 183 wrapper.references++; 184 debug (DATA_REFCOUNT) debugLog("%p -> %p: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 185 } 186 else 187 debug (DATA_REFCOUNT) debugLog("%p -> %p: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper); 188 } 189 190 ~this() pure 191 { 192 //clear(); 193 // https://issues.dlang.org/show_bug.cgi?id=13809 194 (cast(void delegate() pure)&clear)(); 195 } 196 197 debug(DATA) invariant 198 { 199 if (wrapper) 200 assert(wrapper.references > 0, "Data referencing DataWrapper with bad reference count"); 201 } 202 203 /* 204 /// Create new instance as a slice over an existing DataWrapper. 205 private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max) 206 { 207 this.wrapper = wrapper; 208 this.start = start; 209 this.end = end==size_t.max ? wrapper.capacity : end; 210 } 211 */ 212 213 /// Get contents 214 @property const(void)[] contents() const 215 { 216 return _contents; 217 } 218 219 @property private const(void)[] contents(const(void)[] data) 220 { 221 return _contents = data; 222 } 223 224 /// Get mutable contents 225 @property void[] mcontents() 226 { 227 if (!mutable && length) 228 { 229 reallocate(length, length); 230 assert(mutable); 231 } 232 return cast(void[])_contents; 233 } 234 235 /// Get pointer to contents 236 @property const(void)* ptr() const 237 { 238 return contents.ptr; 239 } 240 241 /// Get pointer to mutable contents 242 @property void* mptr() 243 { 244 return mcontents.ptr; 245 } 246 247 /// Size in bytes of contents 248 @property size_t length() const 249 { 250 return contents.length; 251 } 252 alias opDollar = length; /// ditto 253 254 /// True if contents is unset 255 @property bool empty() const 256 { 257 return contents is null; 258 } 259 260 bool opCast(T)() const 261 if (is(T == bool)) 262 { 263 return !empty; 264 } /// 265 266 /// Return maximum value that can be set to `length` without causing a reallocation 267 @property size_t capacity() const 268 { 269 if (wrapper is null) 270 return length; 271 // We can only safely expand if the memory slice is at the end of the used unmanaged memory block. 272 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 273 auto end = pos + length; // end position in wrapper data 274 assert(end <= wrapper.size); 275 if (end == wrapper.size && end < wrapper.capacity) 276 return wrapper.capacity - pos; 277 else 278 return length; 279 } 280 281 /// Put a copy of the data on D's managed heap, and return it. 282 @property 283 void[] toHeap() const 284 { 285 return _contents.dup; 286 } 287 288 private void reallocate(size_t size, size_t capacity) 289 { 290 auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity); 291 wrapper.contents[0..this.length] = contents[]; 292 //(cast(ubyte[])newWrapper.contents)[this.length..value] = 0; 293 294 clear(); 295 this.wrapper = wrapper; 296 this.contents = wrapper.contents; 297 mutable = true; 298 } 299 300 private void expand(size_t newSize, size_t newCapacity) 301 in 302 { 303 assert(length < newSize); 304 assert(newSize <= newCapacity); 305 } 306 out 307 { 308 assert(length == newSize); 309 } 310 do 311 { 312 if (newCapacity <= capacity) 313 { 314 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 315 wrapper.setSize(pos + newSize); 316 contents = ptr[0..newSize]; 317 } 318 else 319 reallocate(newSize, newCapacity); 320 } 321 322 /// Resize contents 323 @property void length(size_t value) 324 { 325 if (value == length) // no change 326 return; 327 if (value < length) // shorten 328 _contents = _contents[0..value]; 329 else // lengthen 330 expand(value, value); 331 } 332 333 /// Create a copy of the data 334 @property Data dup() const 335 { 336 return Data(contents, true); 337 } 338 339 /// This used to be an unsafe method which deleted the wrapped data. 340 /// Now that Data is refcounted, this simply calls clear() and 341 /// additionally asserts that this Data is the only Data holding 342 /// a reference to the wrapper. 343 void deleteContents() 344 out 345 { 346 assert(wrapper is null); 347 } 348 do 349 { 350 if (wrapper) 351 { 352 assert(wrapper.references == 1, "Attempting to call deleteContents with "); 353 clear(); 354 } 355 } 356 357 /// Unreference contents, freeing it if this was the last reference. 358 void clear() 359 { 360 if (wrapper) 361 { 362 assert(wrapper.references > 0, "Dangling pointer to wrapper"); 363 wrapper.references--; 364 debug (DATA_REFCOUNT) debugLog("%p -> %p: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 365 if (wrapper.references == 0) 366 wrapper.destroy(); 367 368 wrapper = null; 369 } 370 371 contents = null; 372 } 373 374 /// Create a new `Data` containing the concatenation of `this` and `data`. 375 /// Does not preallocate for successive appends. 376 Data concat(const(void)[] data) 377 { 378 if (data.length==0) 379 return this; 380 Data result = Data(length + data.length); 381 result.mcontents[0..this.length] = contents[]; 382 result.mcontents[this.length..$] = data[]; 383 return result; 384 } 385 386 /// ditto 387 template opBinary(string op) if (op == "~") 388 { 389 Data opBinary(T)(const(T)[] data) 390 if (!hasIndirections!T) 391 { 392 return concat(data); 393 } /// 394 395 Data opBinary()(Data data) 396 { 397 return concat(data.contents); 398 } /// 399 } 400 401 /// Create a new `Data` containing the concatenation of `data` and `this`. 402 /// Does not preallocate for successive appends. 403 Data prepend(const(void)[] data) 404 { 405 Data result = Data(data.length + length); 406 result.mcontents[0..data.length] = data[]; 407 result.mcontents[data.length..$] = contents[]; 408 return result; 409 } 410 411 /// ditto 412 template opBinaryRight(string op) if (op == "~") 413 { 414 Data opBinaryRight(T)(const(T)[] data) 415 if (!hasIndirections!T) 416 { 417 return prepend(data); 418 } /// 419 } 420 421 private static size_t getPreallocSize(size_t length) 422 { 423 if (length < MAX_PREALLOC) 424 return nextPowerOfTwo(length); 425 else 426 return ((length-1) | (MAX_PREALLOC-1)) + 1; 427 } 428 429 /// Append data to this `Data`. 430 /// Unlike concatenation (`a ~ b`), appending (`a ~= b`) will preallocate. 431 Data append(const(void)[] data) 432 { 433 if (data.length==0) 434 return this; 435 size_t oldLength = length; 436 size_t newLength = length + data.length; 437 expand(newLength, getPreallocSize(newLength)); 438 auto newContents = cast(void[])_contents[oldLength..$]; 439 newContents[] = (cast(void[])data)[]; 440 return this; 441 } 442 443 /// ditto 444 template opOpAssign(string op) if (op == "~") 445 { 446 Data opOpAssign(T)(const(T)[] data) 447 if (!hasIndirections!T) 448 { 449 return append(data); 450 } /// 451 452 Data opOpAssign()(Data data) 453 { 454 return append(data.contents); 455 } /// 456 457 Data opOpAssign()(ubyte value) // hack? 458 { 459 return append((&value)[0..1]); 460 } /// 461 } 462 463 /// Returns a `Data` pointing at a slice of this `Data`'s contents. 464 Data opSlice() 465 { 466 return this; 467 } 468 469 /// ditto 470 Data opSlice(size_t x, size_t y) 471 in 472 { 473 assert(x <= y); 474 assert(y <= length); 475 } 476 // https://issues.dlang.org/show_bug.cgi?id=13463 477 // out(result) 478 // { 479 // assert(result.length == y-x); 480 // } 481 do 482 { 483 if (x == y) 484 return Data(emptyDataWrapper.data[]); 485 else 486 { 487 Data result = this; 488 result.contents = result.contents[x..y]; 489 return result; 490 } 491 } 492 493 /// Return a new `Data` for the first `size` bytes, and slice this instance from size to end. 494 Data popFront(size_t size) 495 in 496 { 497 assert(size <= length); 498 } 499 do 500 { 501 Data result = this; 502 result.contents = contents[0..size]; 503 this .contents = contents[size..$]; 504 return result; 505 } 506 } 507 508 unittest 509 { 510 Data d = Data("aaaaa"); 511 assert(d.wrapper.references == 1); 512 Data s = d[1..4]; 513 assert(d.wrapper.references == 2); 514 } 515 516 // ************************************************************************ 517 518 /// How many bytes are currently in `Data`-owned memory. 519 static /*thread-local*/ size_t dataMemory, dataMemoryPeak; 520 /// How many `DataWrapper` instances there are live currently. 521 static /*thread-local*/ uint dataCount; 522 /// How many allocations have been done so far. 523 static /*thread-local*/ uint allocCount; 524 525 /// Base abstract class which owns a block of memory. 526 abstract class DataWrapper 527 { 528 sizediff_t references = 1; /// Reference count. 529 abstract @property inout(void)[] contents() inout; /// The owned memory 530 abstract @property size_t size() const; /// Length of `contents`. 531 abstract void setSize(size_t newSize); /// Resize `contents` up to `capacity`. 532 abstract @property size_t capacity() const; /// Maximum possible size. 533 534 debug ~this() @nogc 535 { 536 debug(DATA_REFCOUNT) debugLog("%.*s.~this, references==%d", this.classinfo.name.length, this.classinfo.name.ptr, references); 537 assert(references == 0, "Deleting DataWrapper with non-zero reference count"); 538 } 539 } 540 541 /// Set threshold of allocated memory to trigger a garbage collection. 542 void setGCThreshold(size_t value) { MemoryDataWrapper.collectThreshold = value; } 543 544 /// Allocate and construct a new class in `malloc`'d memory. 545 C unmanagedNew(C, Args...)(auto ref Args args) 546 if (is(C == class)) 547 { 548 import std.conv : emplace; 549 enum size = __traits(classInstanceSize, C); 550 auto p = unmanagedAlloc(size); 551 emplace!C(p[0..size], args); 552 return cast(C)p; 553 } 554 555 /// Delete a class instance created with `unmanagedNew`. 556 void unmanagedDelete(C)(C c) 557 if (is(C == class)) 558 { 559 c.__xdtor(); 560 unmanagedFree(p); 561 } 562 563 private: 564 565 void* unmanagedAlloc(size_t sz) 566 { 567 auto p = core.stdc.stdlib.malloc(sz); 568 569 debug(DATA_REFCOUNT) debugLog("? -> %p: Allocating via malloc (%d bytes)", p, cast(uint)sz); 570 571 if (!p) 572 throw new OutOfMemoryError(); 573 574 //GC.addRange(p, sz); 575 return p; 576 } 577 578 void unmanagedFree(void* p) @nogc 579 { 580 if (p) 581 { 582 debug(DATA_REFCOUNT) debugLog("? -> %p: Deleting via free", p); 583 584 //GC.removeRange(p); 585 core.stdc.stdlib.free(p); 586 } 587 } 588 589 version (Windows) 590 import core.sys.windows.windows; 591 else 592 { 593 import core.sys.posix.unistd; 594 import core.sys.posix.sys.mman; 595 } 596 597 /// Wrapper for data in RAM, allocated from the OS. 598 final class MemoryDataWrapper : DataWrapper 599 { 600 /// Pointer to actual data. 601 void* data; 602 /// Used size. Needed for safe appends. 603 size_t _size; 604 /// Allocated capacity. 605 size_t _capacity; 606 607 /// Threshold of allocated memory to trigger a collect. 608 __gshared size_t collectThreshold = 8*1024*1024; // 8MB 609 /// Counter towards the threshold. 610 static /*thread-local*/ size_t allocatedThreshold; 611 612 /// Create a new instance with given capacity. 613 this(size_t size, size_t capacity) 614 { 615 data = malloc(/*ref*/ capacity); 616 if (data is null) 617 { 618 debug(DATA) fprintf(stderr, "Garbage collect triggered by failed Data allocation of %llu bytes... ", cast(ulong)capacity); 619 GC.collect(); 620 debug(DATA) fprintf(stderr, "Done\n"); 621 data = malloc(/*ref*/ capacity); 622 allocatedThreshold = 0; 623 } 624 if (data is null) 625 onOutOfMemoryError(); 626 627 dataMemory += capacity; 628 if (dataMemoryPeak < dataMemory) 629 dataMemoryPeak = dataMemory; 630 dataCount ++; 631 allocCount ++; 632 633 this._size = size; 634 this._capacity = capacity; 635 636 // also collect 637 allocatedThreshold += capacity; 638 if (allocatedThreshold > collectThreshold) 639 { 640 debug(DATA) fprintf(stderr, "Garbage collect triggered by total allocated Data exceeding threshold... "); 641 GC.collect(); 642 debug(DATA) fprintf(stderr, "Done\n"); 643 allocatedThreshold = 0; 644 } 645 } 646 647 /// Destructor - destroys the wrapped data. 648 ~this() @nogc 649 { 650 free(data, capacity); 651 data = null; 652 // If DataWrapper is created and manually deleted, there is no need to cause frequent collections 653 if (allocatedThreshold > capacity) 654 allocatedThreshold -= capacity; 655 else 656 allocatedThreshold = 0; 657 658 dataMemory -= capacity; 659 dataCount --; 660 } 661 662 @property override 663 size_t size() const { return _size; } 664 665 @property override 666 size_t capacity() const @nogc { return _capacity; } 667 668 override void setSize(size_t newSize) 669 { 670 assert(newSize <= capacity); 671 _size = newSize; 672 } 673 674 @property override 675 inout(void)[] contents() inout 676 { 677 return data[0..size]; 678 } 679 680 // https://github.com/D-Programming-Language/druntime/pull/759 681 version(OSX) 682 enum _SC_PAGE_SIZE = 29; 683 684 // https://github.com/D-Programming-Language/druntime/pull/1140 685 version(FreeBSD) 686 enum _SC_PAGE_SIZE = 47; 687 688 version(Windows) 689 { 690 static immutable size_t pageSize; 691 692 shared static this() 693 { 694 SYSTEM_INFO si; 695 GetSystemInfo(&si); 696 pageSize = si.dwPageSize; 697 } 698 } 699 else 700 static if (is(typeof(_SC_PAGE_SIZE))) 701 { 702 static immutable size_t pageSize; 703 704 shared static this() 705 { 706 pageSize = sysconf(_SC_PAGE_SIZE); 707 } 708 } 709 710 static void* malloc(ref size_t size) 711 { 712 if (is(typeof(pageSize))) 713 size = ((size-1) | (pageSize-1))+1; 714 715 version(Windows) 716 { 717 return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE); 718 } 719 else 720 version(Posix) 721 { 722 version(linux) 723 import core.sys.linux.sys.mman : MAP_ANON; 724 auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 725 return (p == MAP_FAILED) ? null : p; 726 } 727 else 728 return core.stdc.malloc(size); 729 } 730 731 static void free(void* p, size_t size) @nogc 732 { 733 debug 734 { 735 (cast(ubyte*)p)[0..size] = 0xDB; 736 } 737 version(Windows) 738 VirtualFree(p, 0, MEM_RELEASE); 739 else 740 version(Posix) 741 munmap(p, size); 742 else 743 core.stdc.free(size); 744 } 745 } 746 747 // ************************************************************************ 748 749 /// DataWrapper implementation used for the empty (but non-null) Data slice. 750 class EmptyDataWrapper : DataWrapper 751 { 752 void[0] data; 753 754 override @property inout(void)[] contents() inout { return data[]; } 755 override @property size_t size() const { return data.length; } 756 override void setSize(size_t newSize) { assert(false); } 757 override @property size_t capacity() const { return data.length; } 758 } 759 760 __gshared EmptyDataWrapper emptyDataWrapper = new EmptyDataWrapper; 761 762 // ************************************************************************ 763 764 // Source: Win32 bindings project 765 version(Windows) 766 { 767 struct SYSTEM_INFO { 768 union { 769 DWORD dwOemId; 770 struct { 771 WORD wProcessorArchitecture; 772 WORD wReserved; 773 } 774 } 775 DWORD dwPageSize; 776 PVOID lpMinimumApplicationAddress; 777 PVOID lpMaximumApplicationAddress; 778 DWORD dwActiveProcessorMask; 779 DWORD dwNumberOfProcessors; 780 DWORD dwProcessorType; 781 DWORD dwAllocationGranularity; 782 WORD wProcessorLevel; 783 WORD wProcessorRevision; 784 } 785 alias SYSTEM_INFO* LPSYSTEM_INFO; 786 787 extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO); 788 } 789 790 debug(DATA_REFCOUNT) import ae.utils.exception, ae.sys.memory, std.stdio; 791 792 debug(DATA_REFCOUNT) void debugLog(Args...)(const char* s, Args args) @nogc 793 { 794 fprintf(stderr, s, args); 795 fprintf(stderr, "\n"); 796 if (inCollect()) 797 fprintf(stderr, "\t(in GC collect)\n"); 798 else 799 (cast(void function() @nogc)&debugStackTrace)(); 800 fflush(core.stdc.stdio.stderr); 801 } 802 803 debug(DATA_REFCOUNT) void debugStackTrace() 804 { 805 try 806 foreach (line; getStackTrace()) 807 writeln("\t", line); 808 catch (Throwable e) 809 writeln("\t(stacktrace error: ", e.msg, ")"); 810 }