1 /** 2 * Wrappers for raw _data located in unmanaged memory. 3 * 4 * Using the Data type will only place a small object in managed memory, 5 * keeping the actual _data in unmanaged memory. 6 * A proxy class (DataWrapper) is used to safely allow multiple references to 7 * the same block of unmanaged memory. 8 * When the DataWrapper object is destroyed (either manually or by the garbage 9 * collector when there are no remaining Data references), the unmanaged 10 * memory is deallocated. 11 * 12 * This has the following advantage over using managed memory: 13 * $(UL 14 * $(LI Faster allocation and deallocation, since memory is requested from 15 * the OS directly as whole pages) 16 * $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to 17 * stray pointers) 18 * $(LI Overall improved GC performance due to reduced size of managed heap) 19 * $(LI Memory is immediately returned to the OS when _data is deallocated) 20 * ) 21 * On the other hand, using Data has the following disadvantages: 22 * $(UL 23 * $(LI This module is designed to store raw _data which does not have any 24 * pointers. Storing objects containing pointers to managed memory is 25 * unsupported, and may result in memory corruption.) 26 * $(LI Small objects may be stored inefficiently, as the module requests 27 * entire pages of memory from the OS. Considering allocating one large 28 * object and use slices (Data instances) for individual objects.) 29 * $(LI Incorrect usage (i.e. retaining/escaping references to wrapped memory 30 * without keeping a reference to its corresponding DataWrapper) can 31 * result in dangling pointers and hard-to-debug memory corruption.) 32 * ) 33 * 34 * License: 35 * This Source Code Form is subject to the terms of 36 * the Mozilla Public License, v. 2.0. If a copy of 37 * the MPL was not distributed with this file, You 38 * can obtain one at http://mozilla.org/MPL/2.0/. 39 * 40 * Authors: 41 * Vladimir Panteleev <vladimir@thecybershadow.net> 42 */ 43 44 module ae.sys.data; 45 46 static import core.stdc.stdlib; 47 import core.stdc..string : memmove; 48 import std.traits; 49 import core.memory; 50 import core.exception; 51 debug import std.stdio; 52 debug import std..string; 53 public import ae.sys.dataset; 54 import ae.utils.math; 55 56 debug(DATA) import core.stdc.stdio; 57 58 // ideas/todo: 59 // * templatize (and forbid using aliased types)? 60 // * use heap (malloc/Windows heap API) for small objects? 61 // * reference counting? 62 // * "immutable" support? 63 64 /** 65 * Wrapper for data located in external memory, to prevent faux references. 66 * Represents a slice of data, which may or may not be in unmanaged memory. 67 * Data in unmanaged memory is bound to a DataWrapper class instance. 68 * 69 * All operations on this class should be safe, except for accessing contents directly. 70 * All operations on contents must be accompanied by a live reference to the Data object, 71 * to keep a GC anchor towards the unmanaged data. 72 * 73 * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead. 74 * 75 * Be sure not to lose Data references while using their contents! 76 * For example, avoid code like this: 77 * ---- 78 * fun(cast(string)transformSomeData(someData).contents); 79 * ---- 80 * The Data return value may be unreachable once .contents is evaluated. 81 * Use .toHeap instead of .contents in such cases to get a safe heap copy. 82 */ 83 struct Data 84 { 85 private: 86 /// Wrapped data 87 const(void)[] _contents; 88 /// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory. 89 /// Used as a GC anchor to unmanaged data, and for in-place expands (for appends). 90 DataWrapper wrapper; 91 /// Indicates whether we're allowed to modify the data contents. 92 bool mutable; 93 94 /// Maximum preallocation for append operations. 95 enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2 96 97 public: 98 /** 99 * Create new instance wrapping the given data. 100 * Params: 101 * data = initial data 102 * forceReallocation = when false, the contents will be duplicated to 103 * unmanaged memory only when it's not on the managed heap; when true, 104 * the contents will be reallocated always. 105 */ 106 this(const(void)[] data, bool forceReallocation = false) 107 { 108 if (data.length == 0) 109 contents = null; 110 else 111 if (forceReallocation || GC.addrOf(data.ptr) is null) 112 { 113 // copy to unmanaged memory 114 auto wrapper = unmanagedNew!MemoryDataWrapper(data.length, data.length); 115 this.wrapper = wrapper; 116 wrapper.contents[] = data[]; 117 contents = wrapper.contents; 118 mutable = true; 119 } 120 else 121 { 122 // just save a reference 123 contents = data; 124 mutable = false; 125 } 126 127 assert(this.length == data.length); 128 } 129 130 /// ditto 131 this(void[] data, bool forceReallocation = false) 132 { 133 const(void)[] cdata = data; 134 this(cdata, forceReallocation); 135 mutable = true; 136 } 137 138 /// Create a new instance with given size/capacity. Capacity defaults to size. 139 this(size_t size, size_t capacity = 0) 140 in 141 { 142 assert(capacity == 0 || size <= capacity); 143 } 144 do 145 { 146 if (!capacity) 147 capacity = size; 148 149 if (capacity) 150 { 151 auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity); 152 this.wrapper = wrapper; 153 contents = wrapper.contents; 154 mutable = true; 155 } 156 else 157 { 158 wrapper = null; 159 contents = null; 160 } 161 162 assert(this.length == size); 163 } 164 165 this(DataWrapper wrapper, bool mutable) 166 { 167 this.wrapper = wrapper; 168 this.mutable = mutable; 169 this.contents = wrapper.contents; 170 } 171 172 this(this) 173 { 174 if (wrapper) 175 { 176 wrapper.references++; 177 debug (DATA_REFCOUNT) debugLog("%p -> %p: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 178 } 179 else 180 debug (DATA_REFCOUNT) debugLog("%p -> %p: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper); 181 } 182 183 ~this() pure 184 { 185 //clear(); 186 // https://issues.dlang.org/show_bug.cgi?id=13809 187 (cast(void delegate() pure)&clear)(); 188 } 189 190 debug(DATA) invariant 191 { 192 if (wrapper) 193 assert(wrapper.references > 0, "Data referencing DataWrapper with bad reference count"); 194 } 195 196 /* 197 /// Create new instance as a slice over an existing DataWrapper. 198 private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max) 199 { 200 this.wrapper = wrapper; 201 this.start = start; 202 this.end = end==size_t.max ? wrapper.capacity : end; 203 } 204 */ 205 206 @property const(void)[] contents() const 207 { 208 return _contents; 209 } 210 211 @property private const(void)[] contents(const(void)[] data) 212 { 213 return _contents = data; 214 } 215 216 /// Get mutable contents 217 @property void[] mcontents() 218 { 219 if (!mutable && length) 220 { 221 reallocate(length, length); 222 assert(mutable); 223 } 224 return cast(void[])_contents; 225 } 226 227 @property const(void)* ptr() const 228 { 229 return contents.ptr; 230 } 231 232 @property void* mptr() 233 { 234 return mcontents.ptr; 235 } 236 237 @property size_t length() const 238 { 239 return contents.length; 240 } 241 242 @property bool empty() const 243 { 244 return contents is null; 245 } 246 247 bool opCast(T)() 248 if (is(T == bool)) 249 { 250 return !empty; 251 } 252 253 @property size_t capacity() const 254 { 255 if (wrapper is null) 256 return length; 257 // We can only safely expand if the memory slice is at the end of the used unmanaged memory block. 258 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 259 auto end = pos + length; // end position in wrapper data 260 assert(end <= wrapper.size); 261 if (end == wrapper.size && end < wrapper.capacity) 262 return wrapper.capacity - pos; 263 else 264 return length; 265 } 266 267 /// Put a copy of the data on D's managed heap, and return it. 268 @property 269 void[] toHeap() const 270 { 271 return _contents.dup; 272 } 273 274 private void reallocate(size_t size, size_t capacity) 275 { 276 auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity); 277 wrapper.contents[0..this.length] = contents[]; 278 //(cast(ubyte[])newWrapper.contents)[this.length..value] = 0; 279 280 clear(); 281 this.wrapper = wrapper; 282 this.contents = wrapper.contents; 283 mutable = true; 284 } 285 286 private void expand(size_t newSize, size_t newCapacity) 287 in 288 { 289 assert(length < newSize); 290 assert(newSize <= newCapacity); 291 } 292 out 293 { 294 assert(length == newSize); 295 } 296 do 297 { 298 if (newCapacity <= capacity) 299 { 300 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 301 wrapper.setSize(pos + newSize); 302 contents = ptr[0..newSize]; 303 } 304 else 305 reallocate(newSize, newCapacity); 306 } 307 308 @property void length(size_t value) 309 { 310 if (value == length) // no change 311 return; 312 if (value < length) // shorten 313 _contents = _contents[0..value]; 314 else // lengthen 315 expand(value, value); 316 } 317 alias length opDollar; 318 319 @property Data dup() const 320 { 321 return Data(contents, true); 322 } 323 324 /// This used to be an unsafe method which deleted the wrapped data. 325 /// Now that Data is refcounted, this simply calls clear() and 326 /// additionally asserts that this Data is the only Data holding 327 /// a reference to the wrapper. 328 void deleteContents() 329 out 330 { 331 assert(wrapper is null); 332 } 333 do 334 { 335 if (wrapper) 336 { 337 assert(wrapper.references == 1, "Attempting to call deleteContents with "); 338 clear(); 339 } 340 } 341 342 void clear() 343 { 344 if (wrapper) 345 { 346 assert(wrapper.references > 0, "Dangling pointer to wrapper"); 347 wrapper.references--; 348 debug (DATA_REFCOUNT) debugLog("%p -> %p: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 349 if (wrapper.references == 0) 350 wrapper.destroy(); 351 352 wrapper = null; 353 } 354 355 contents = null; 356 } 357 358 Data concat(const(void)[] data) 359 { 360 if (data.length==0) 361 return this; 362 Data result = Data(length + data.length); 363 result.mcontents[0..this.length] = contents[]; 364 result.mcontents[this.length..$] = data[]; 365 return result; 366 } 367 368 template opBinary(string op) if (op == "~") 369 { 370 Data opBinary(T)(const(T)[] data) 371 if (!hasIndirections!T) 372 { 373 return concat(data); 374 } 375 376 Data opBinary()(Data data) 377 { 378 return concat(data.contents); 379 } 380 } 381 382 Data prepend(const(void)[] data) 383 { 384 Data result = Data(data.length + length); 385 result.mcontents[0..data.length] = data[]; 386 result.mcontents[data.length..$] = contents[]; 387 return result; 388 } 389 390 template opBinaryRight(string op) if (op == "~") 391 { 392 Data opBinaryRight(T)(const(T)[] data) 393 if (!hasIndirections!T) 394 { 395 return prepend(data); 396 } 397 } 398 399 private static size_t getPreallocSize(size_t length) 400 { 401 if (length < MAX_PREALLOC) 402 return nextPowerOfTwo(length); 403 else 404 return ((length-1) | (MAX_PREALLOC-1)) + 1; 405 } 406 407 Data append(const(void)[] data) 408 { 409 if (data.length==0) 410 return this; 411 size_t oldLength = length; 412 size_t newLength = length + data.length; 413 expand(newLength, getPreallocSize(newLength)); 414 auto newContents = cast(void[])_contents[oldLength..$]; 415 newContents[] = (cast(void[])data)[]; 416 return this; 417 } 418 419 /// Note that unlike concatenation (a ~ b), appending (a ~= b) will preallocate. 420 template opOpAssign(string op) if (op == "~") 421 { 422 Data opOpAssign(T)(const(T)[] data) 423 if (!hasIndirections!T) 424 { 425 return append(data); 426 } 427 428 Data opOpAssign()(Data data) 429 { 430 return append(data.contents); 431 } 432 433 Data opOpAssign()(ubyte value) // hack? 434 { 435 return append((&value)[0..1]); 436 } 437 } 438 439 Data opSlice() 440 { 441 return this; 442 } 443 444 Data opSlice(size_t x, size_t y) 445 in 446 { 447 assert(x <= y); 448 assert(y <= length); 449 } 450 // https://issues.dlang.org/show_bug.cgi?id=13463 451 // out(result) 452 // { 453 // assert(result.length == y-x); 454 // } 455 do 456 { 457 if (x == y) 458 return Data(); 459 else 460 { 461 Data result = this; 462 result.contents = result.contents[x..y]; 463 return result; 464 } 465 } 466 467 /// Return a new Data for the first size bytes, and slice this instance from size to end. 468 Data popFront(size_t size) 469 in 470 { 471 assert(size <= length); 472 } 473 do 474 { 475 Data result = this; 476 result.contents = contents[0..size]; 477 this .contents = contents[size..$]; 478 return result; 479 } 480 } 481 482 unittest 483 { 484 Data d = Data("aaaaa"); 485 assert(d.wrapper.references == 1); 486 Data s = d[1..4]; 487 assert(d.wrapper.references == 2); 488 } 489 490 // ************************************************************************ 491 492 static /*thread-local*/ size_t dataMemory, dataMemoryPeak; 493 static /*thread-local*/ uint dataCount, allocCount; 494 495 // Abstract wrapper. 496 abstract class DataWrapper 497 { 498 sizediff_t references = 1; 499 abstract @property inout(void)[] contents() inout; 500 abstract @property size_t size() const; 501 abstract void setSize(size_t newSize); 502 abstract @property size_t capacity() const; 503 504 debug ~this() 505 { 506 debug(DATA_REFCOUNT) debugLog("%.*s.~this, references==%d", this.classinfo.name.length, this.classinfo.name.ptr, references); 507 assert(references == 0, "Deleting DataWrapper with non-zero reference count"); 508 } 509 } 510 511 void setGCThreshold(size_t value) { MemoryDataWrapper.collectThreshold = value; } 512 513 C unmanagedNew(C, Args...)(auto ref Args args) 514 if (is(C == class)) 515 { 516 import std.conv : emplace; 517 enum size = __traits(classInstanceSize, C); 518 auto p = unmanagedAlloc(size); 519 emplace!C(p[0..size], args); 520 return cast(C)p; 521 } 522 523 void unmanagedDelete(C)(C c) 524 if (is(C == class)) 525 { 526 c.__xdtor(); 527 unmanagedFree(p); 528 } 529 530 private: 531 532 void* unmanagedAlloc(size_t sz) 533 { 534 auto p = core.stdc.stdlib.malloc(sz); 535 536 debug(DATA_REFCOUNT) debugLog("? -> %p: Allocating via malloc (%d bytes)", p, cast(uint)sz); 537 538 if (!p) 539 throw new OutOfMemoryError(); 540 541 //GC.addRange(p, sz); 542 return p; 543 } 544 545 void unmanagedFree(void* p) @nogc 546 { 547 if (p) 548 { 549 debug(DATA_REFCOUNT) debugLog("? -> %p: Deleting via free", p); 550 551 //GC.removeRange(p); 552 core.stdc.stdlib.free(p); 553 } 554 } 555 556 version (Windows) 557 import core.sys.windows.windows; 558 else 559 { 560 import core.sys.posix.unistd; 561 import core.sys.posix.sys.mman; 562 } 563 564 /// Wrapper for data in RAM, allocated from the OS. 565 final class MemoryDataWrapper : DataWrapper 566 { 567 /// Pointer to actual data. 568 void* data; 569 /// Used size. Needed for safe appends. 570 size_t _size; 571 /// Allocated capacity. 572 size_t _capacity; 573 574 /// Threshold of allocated memory to trigger a collect. 575 __gshared size_t collectThreshold = 8*1024*1024; // 8MB 576 /// Counter towards the threshold. 577 static /*thread-local*/ size_t allocatedThreshold; 578 579 /// Create a new instance with given capacity. 580 this(size_t size, size_t capacity) 581 { 582 data = malloc(/*ref*/ capacity); 583 if (data is null) 584 { 585 debug(DATA) printf("Garbage collect triggered by failed Data allocation of %llu bytes... ", cast(ulong)capacity); 586 GC.collect(); 587 debug(DATA) printf("Done\n"); 588 data = malloc(/*ref*/ capacity); 589 allocatedThreshold = 0; 590 } 591 if (data is null) 592 onOutOfMemoryError(); 593 594 dataMemory += capacity; 595 if (dataMemoryPeak < dataMemory) 596 dataMemoryPeak = dataMemory; 597 dataCount ++; 598 allocCount ++; 599 600 this._size = size; 601 this._capacity = capacity; 602 603 // also collect 604 allocatedThreshold += capacity; 605 if (allocatedThreshold > collectThreshold) 606 { 607 debug(DATA) printf("Garbage collect triggered by total allocated Data exceeding threshold... "); 608 GC.collect(); 609 debug(DATA) printf("Done\n"); 610 allocatedThreshold = 0; 611 } 612 } 613 614 /// Destructor - destroys the wrapped data. 615 ~this() 616 { 617 free(data, capacity); 618 data = null; 619 // If DataWrapper is created and manually deleted, there is no need to cause frequent collections 620 if (allocatedThreshold > capacity) 621 allocatedThreshold -= capacity; 622 else 623 allocatedThreshold = 0; 624 625 dataMemory -= capacity; 626 dataCount --; 627 } 628 629 @property override 630 size_t size() const { return _size; } 631 632 @property override 633 size_t capacity() const { return _capacity; } 634 635 override void setSize(size_t newSize) 636 { 637 assert(newSize <= capacity); 638 _size = newSize; 639 } 640 641 @property override 642 inout(void)[] contents() inout 643 { 644 return data[0..size]; 645 } 646 647 // https://github.com/D-Programming-Language/druntime/pull/759 648 version(OSX) 649 enum _SC_PAGE_SIZE = 29; 650 651 // https://github.com/D-Programming-Language/druntime/pull/1140 652 version(FreeBSD) 653 enum _SC_PAGE_SIZE = 47; 654 655 version(Windows) 656 { 657 static immutable size_t pageSize; 658 659 shared static this() 660 { 661 SYSTEM_INFO si; 662 GetSystemInfo(&si); 663 pageSize = si.dwPageSize; 664 } 665 } 666 else 667 static if (is(typeof(_SC_PAGE_SIZE))) 668 { 669 static immutable size_t pageSize; 670 671 shared static this() 672 { 673 pageSize = sysconf(_SC_PAGE_SIZE); 674 } 675 } 676 677 static void* malloc(ref size_t size) 678 { 679 if (is(typeof(pageSize))) 680 size = ((size-1) | (pageSize-1))+1; 681 682 version(Windows) 683 { 684 return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE); 685 } 686 else 687 version(Posix) 688 { 689 version(linux) 690 import core.sys.linux.sys.mman : MAP_ANON; 691 auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 692 return (p == MAP_FAILED) ? null : p; 693 } 694 else 695 return core.stdc.malloc(size); 696 } 697 698 static void free(void* p, size_t size) 699 { 700 debug 701 { 702 (cast(ubyte*)p)[0..size] = 0xDB; 703 } 704 version(Windows) 705 VirtualFree(p, 0, MEM_RELEASE); 706 else 707 version(Posix) 708 munmap(p, size); 709 else 710 core.stdc.free(size); 711 } 712 } 713 714 // Source: Win32 bindings project 715 version(Windows) 716 { 717 struct SYSTEM_INFO { 718 union { 719 DWORD dwOemId; 720 struct { 721 WORD wProcessorArchitecture; 722 WORD wReserved; 723 } 724 } 725 DWORD dwPageSize; 726 PVOID lpMinimumApplicationAddress; 727 PVOID lpMaximumApplicationAddress; 728 DWORD dwActiveProcessorMask; 729 DWORD dwNumberOfProcessors; 730 DWORD dwProcessorType; 731 DWORD dwAllocationGranularity; 732 WORD wProcessorLevel; 733 WORD wProcessorRevision; 734 } 735 alias SYSTEM_INFO* LPSYSTEM_INFO; 736 737 extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO); 738 } 739 740 debug(DATA_REFCOUNT) import ae.utils.exception, ae.sys.memory, std.stdio; 741 742 debug(DATA_REFCOUNT) void debugLog(Args...)(const char* s, Args args) @nogc 743 { 744 printf(s, args); 745 printf("\n"); 746 if (inCollect()) 747 printf("\t(in GC collect)\n"); 748 else 749 (cast(void function() @nogc)&debugStackTrace)(); 750 fflush(core.stdc.stdio.stdout); 751 } 752 753 debug(DATA_REFCOUNT) void debugStackTrace() 754 { 755 try 756 foreach (line; getStackTrace()) 757 writeln("\t", line); 758 catch (Throwable e) 759 writeln("\t(stacktrace error: ", e.msg, ")"); 760 }