1 /** 2 * Wrappers for raw _data located in unmanaged memory. 3 * 4 * Using the Data type will only place a small object in managed memory, 5 * keeping the actual _data in unmanaged memory. 6 * A proxy class (DataWrapper) is used to safely allow multiple references to 7 * the same block of unmanaged memory. 8 * When the DataWrapper object is destroyed (either manually or by the garbage 9 * collector when there are no remaining Data references), the unmanaged 10 * memory is deallocated. 11 * 12 * This has the following advantage over using managed memory: 13 * $(UL 14 * $(LI Faster allocation and deallocation, since memory is requested from 15 * the OS directly as whole pages) 16 * $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to 17 * stray pointers) 18 * $(LI Overall improved GC performance due to reduced size of managed heap) 19 * $(LI Memory is immediately returned to the OS when _data is deallocated) 20 * ) 21 * On the other hand, using Data has the following disadvantages: 22 * $(UL 23 * $(LI This module is designed to store raw _data which does not have any 24 * pointers. Storing objects containing pointers to managed memory is 25 * unsupported, and may result in memory corruption.) 26 * $(LI Small objects may be stored inefficiently, as the module requests 27 * entire pages of memory from the OS. Considering allocating one large 28 * object and use slices (Data instances) for individual objects.) 29 * $(LI Incorrect usage (i.e. retaining/escaping references to wrapped memory 30 * without keeping a reference to its corresponding DataWrapper) can 31 * result in dangling pointers and hard-to-debug memory corruption.) 32 * ) 33 * 34 * License: 35 * This Source Code Form is subject to the terms of 36 * the Mozilla Public License, v. 2.0. If a copy of 37 * the MPL was not distributed with this file, You 38 * can obtain one at http://mozilla.org/MPL/2.0/. 39 * 40 * Authors: 41 * Vladimir Panteleev <vladimir@thecybershadow.net> 42 */ 43 44 module ae.sys.data; 45 46 static import core.stdc.stdlib; 47 import core.stdc.string : memmove; 48 import std.traits; 49 import core.memory; 50 import core.exception; 51 debug import std.stdio; 52 debug import std.string; 53 public import ae.sys.dataset; 54 import ae.utils.math; 55 56 debug(DATA) import core.stdc.stdio; 57 58 // ideas/todo: 59 // * templatize (and forbid using aliased types)? 60 // * use heap (malloc/Windows heap API) for small objects? 61 // * reference counting? 62 // * "immutable" support? 63 64 /** 65 * Wrapper for data located in external memory, to prevent faux references. 66 * Represents a slice of data, which may or may not be in unmanaged memory. 67 * Data in unmanaged memory is bound to a DataWrapper class instance. 68 * 69 * All operations on this class should be safe, except for accessing contents directly. 70 * All operations on contents must be accompanied by a live reference to the Data object, 71 * to keep a GC anchor towards the unmanaged data. 72 * 73 * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead. 74 * 75 * Be sure not to lose Data references while using their contents! 76 * For example, avoid code like this: 77 * ---- 78 * fun(cast(string)transformSomeData(someData).contents); 79 * ---- 80 * The Data return value may be unreachable once .contents is evaluated. 81 * Use .toHeap instead of .contents in such cases to get a safe heap copy. 82 */ 83 struct Data 84 { 85 private: 86 /// Wrapped data 87 const(void)[] _contents; 88 /// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory. 89 /// Used as a GC anchor to unmanaged data, and for in-place expands (for appends). 90 DataWrapper wrapper; 91 /// Indicates whether we're allowed to modify the data contents. 92 bool mutable; 93 94 /// Maximum preallocation for append operations. 95 enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2 96 97 public: 98 /** 99 * Create new instance wrapping the given data. 100 * Params: 101 * data = initial data 102 * forceReallocation = when false, the contents will be duplicated to 103 * unmanaged memory only when it's not on the managed heap; when true, 104 * the contents will be reallocated always. 105 */ 106 this(const(void)[] data, bool forceReallocation = false) 107 { 108 if (data.length == 0) 109 contents = null; 110 else 111 if (forceReallocation || GC.addrOf(data.ptr) is null) 112 { 113 // copy to unmanaged memory 114 auto wrapper = unmanagedNew!MemoryDataWrapper(data.length, data.length); 115 this.wrapper = wrapper; 116 wrapper.contents[] = data[]; 117 contents = wrapper.contents; 118 mutable = true; 119 } 120 else 121 { 122 // just save a reference 123 contents = data; 124 mutable = false; 125 } 126 127 assert(this.length == data.length); 128 } 129 130 /// ditto 131 this(void[] data, bool forceReallocation = false) 132 { 133 const(void)[] cdata = data; 134 this(cdata, forceReallocation); 135 mutable = true; 136 } 137 138 /// Create a new instance with given size/capacity. Capacity defaults to size. 139 this(size_t size, size_t capacity = 0) 140 in 141 { 142 assert(capacity == 0 || size <= capacity); 143 } 144 body 145 { 146 if (!capacity) 147 capacity = size; 148 149 if (capacity) 150 { 151 auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity); 152 this.wrapper = wrapper; 153 contents = wrapper.contents; 154 mutable = true; 155 } 156 else 157 { 158 wrapper = null; 159 contents = null; 160 } 161 162 assert(this.length == size); 163 } 164 165 this(DataWrapper wrapper, bool mutable) 166 { 167 this.wrapper = wrapper; 168 this.mutable = mutable; 169 this.contents = wrapper.contents; 170 } 171 172 this(this) 173 { 174 if (wrapper) 175 { 176 wrapper.references++; 177 debug (DATA_REFCOUNT) debugLog("%p -> %p: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 178 } 179 else 180 debug (DATA_REFCOUNT) debugLog("%p -> %p: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper); 181 } 182 183 ~this() pure 184 { 185 //clear(); 186 // https://issues.dlang.org/show_bug.cgi?id=13809 187 (cast(void delegate() pure)&clear)(); 188 } 189 190 /* 191 /// Create new instance as a slice over an existing DataWrapper. 192 private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max) 193 { 194 this.wrapper = wrapper; 195 this.start = start; 196 this.end = end==size_t.max ? wrapper.capacity : end; 197 } 198 */ 199 200 @property const(void)[] contents() const 201 { 202 return _contents; 203 } 204 205 @property private const(void)[] contents(const(void)[] data) 206 { 207 return _contents = data; 208 } 209 210 /// Get mutable contents 211 @property void[] mcontents() 212 { 213 if (!mutable && length) 214 { 215 reallocate(length, length); 216 assert(mutable); 217 } 218 return cast(void[])_contents; 219 } 220 221 @property const(void)* ptr() const 222 { 223 return contents.ptr; 224 } 225 226 @property void* mptr() 227 { 228 return mcontents.ptr; 229 } 230 231 @property size_t length() const 232 { 233 return contents.length; 234 } 235 236 @property bool empty() const 237 { 238 return contents is null; 239 } 240 241 bool opCast(T)() 242 if (is(T == bool)) 243 { 244 return !empty; 245 } 246 247 @property size_t capacity() const 248 { 249 if (wrapper is null) 250 return length; 251 // We can only safely expand if the memory slice is at the end of the used unmanaged memory block. 252 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 253 auto end = pos + length; // end position in wrapper data 254 assert(end <= wrapper.size); 255 if (end == wrapper.size && end < wrapper.capacity) 256 return wrapper.capacity - pos; 257 else 258 return length; 259 } 260 261 /// Put a copy of the data on D's managed heap, and return it. 262 @property 263 void[] toHeap() const 264 { 265 return _contents.dup; 266 } 267 268 private void reallocate(size_t size, size_t capacity) 269 { 270 auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity); 271 wrapper.contents[0..this.length] = contents[]; 272 //(cast(ubyte[])newWrapper.contents)[this.length..value] = 0; 273 274 clear(); 275 this.wrapper = wrapper; 276 this.contents = wrapper.contents; 277 mutable = true; 278 } 279 280 private void expand(size_t newSize, size_t newCapacity) 281 in 282 { 283 assert(length < newSize); 284 assert(newSize <= newCapacity); 285 } 286 out 287 { 288 assert(length == newSize); 289 } 290 body 291 { 292 if (newCapacity <= capacity) 293 { 294 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 295 wrapper.setSize(pos + newSize); 296 contents = ptr[0..newSize]; 297 } 298 else 299 reallocate(newSize, newCapacity); 300 } 301 302 @property void length(size_t value) 303 { 304 if (value == length) // no change 305 return; 306 if (value < length) // shorten 307 _contents = _contents[0..value]; 308 else // lengthen 309 expand(value, value); 310 } 311 alias length opDollar; 312 313 @property Data dup() const 314 { 315 return Data(contents, true); 316 } 317 318 /// This used to be an unsafe method which deleted the wrapped data. 319 /// Now that Data is refcounted, this simply calls clear() and 320 /// additionally asserts that this Data is the only Data holding 321 /// a reference to the wrapper. 322 void deleteContents() 323 out 324 { 325 assert(wrapper is null); 326 } 327 body 328 { 329 if (wrapper) 330 { 331 assert(wrapper.references == 1, "Attempting to call deleteContents with "); 332 clear(); 333 } 334 } 335 336 void clear() 337 { 338 if (wrapper) 339 { 340 assert(wrapper.references > 0, "Dangling pointer to wrapper"); 341 wrapper.references--; 342 debug (DATA_REFCOUNT) debugLog("%p -> %p: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 343 if (wrapper.references == 0) 344 wrapper.destroy(); 345 346 wrapper = null; 347 } 348 349 contents = null; 350 } 351 352 Data concat(const(void)[] data) 353 { 354 if (data.length==0) 355 return this; 356 Data result = Data(length + data.length); 357 result.mcontents[0..this.length] = contents[]; 358 result.mcontents[this.length..$] = data[]; 359 return result; 360 } 361 362 Data opCat(T)(const(T)[] data) 363 if (!hasIndirections!T) 364 { 365 return concat(data); 366 } 367 368 Data opCat()(Data data) 369 { 370 return concat(data.contents); 371 } 372 373 Data prepend(const(void)[] data) 374 { 375 Data result = Data(data.length + length); 376 result.mcontents[0..data.length] = data[]; 377 result.mcontents[data.length..$] = contents[]; 378 return result; 379 } 380 381 Data opCat_r(T)(const(T)[] data) 382 if (!hasIndirections!T) 383 { 384 return prepend(data); 385 } 386 387 private static size_t getPreallocSize(size_t length) 388 { 389 if (length < MAX_PREALLOC) 390 return nextPowerOfTwo(length); 391 else 392 return ((length-1) | (MAX_PREALLOC-1)) + 1; 393 } 394 395 Data append(const(void)[] data) 396 { 397 if (data.length==0) 398 return this; 399 size_t oldLength = length; 400 size_t newLength = length + data.length; 401 expand(newLength, getPreallocSize(newLength)); 402 auto newContents = cast(void[])_contents[oldLength..$]; 403 newContents[] = (cast(void[])data)[]; 404 return this; 405 } 406 407 /// Note that unlike opCat (a ~ b), opCatAssign (a ~= b) will preallocate. 408 Data opCatAssign(T)(const(T)[] data) 409 if (!hasIndirections!T) 410 { 411 return append(data); 412 } 413 414 Data opCatAssign()(Data data) 415 { 416 return append(data.contents); 417 } 418 419 Data opCatAssign()(ubyte value) // hack? 420 { 421 return append((&value)[0..1]); 422 } 423 424 Data opSlice() 425 { 426 return this; 427 } 428 429 Data opSlice(size_t x, size_t y) 430 in 431 { 432 assert(x <= y); 433 assert(y <= length); 434 } 435 // https://issues.dlang.org/show_bug.cgi?id=13463 436 // out(result) 437 // { 438 // assert(result.length == y-x); 439 // } 440 body 441 { 442 if (x == y) 443 return Data(); 444 else 445 { 446 Data result = this; 447 result.contents = result.contents[x..y]; 448 return result; 449 } 450 } 451 452 /// Return a new Data for the first size bytes, and slice this instance from size to end. 453 Data popFront(size_t size) 454 in 455 { 456 assert(size <= length); 457 } 458 body 459 { 460 Data result = this; 461 result.contents = contents[0..size]; 462 this .contents = contents[size..$]; 463 return result; 464 } 465 } 466 467 unittest 468 { 469 Data d = Data("aaaaa"); 470 assert(d.wrapper.references == 1); 471 Data s = d[1..4]; 472 assert(d.wrapper.references == 2); 473 } 474 475 // ************************************************************************ 476 477 static /*thread-local*/ size_t dataMemory, dataMemoryPeak; 478 static /*thread-local*/ uint dataCount, allocCount; 479 480 // Abstract wrapper. 481 abstract class DataWrapper 482 { 483 sizediff_t references = 1; 484 abstract @property inout(void)[] contents() inout; 485 abstract @property size_t size() const; 486 abstract void setSize(size_t newSize); 487 abstract @property size_t capacity() const; 488 489 debug ~this() 490 { 491 debug(DATA_REFCOUNT) debugLog("%.*s.~this, references==%d", this.classinfo.name.length, this.classinfo.name.ptr, references); 492 assert(references == 0, "Deleting DataWrapper with non-zero reference count"); 493 } 494 } 495 496 void setGCThreshold(size_t value) { MemoryDataWrapper.collectThreshold = value; } 497 498 C unmanagedNew(C, Args...)(auto ref Args args) 499 if (is(C == class)) 500 { 501 import std.conv : emplace; 502 enum size = __traits(classInstanceSize, C); 503 auto p = unmanagedAlloc(size); 504 emplace!C(p[0..size], args); 505 return cast(C)p; 506 } 507 508 void unmanagedDelete(C)(C c) 509 if (is(C == class)) 510 { 511 c.__xdtor(); 512 unmanagedFree(p); 513 } 514 515 private: 516 517 void* unmanagedAlloc(size_t sz) 518 { 519 auto p = core.stdc.stdlib.malloc(sz); 520 521 debug(DATA_REFCOUNT) debugLog("? -> %p: Allocating via malloc", p); 522 523 if (!p) 524 throw new OutOfMemoryError(); 525 526 //GC.addRange(p, sz); 527 return p; 528 } 529 530 void unmanagedFree(void* p) @nogc 531 { 532 if (p) 533 { 534 debug(DATA_REFCOUNT) debugLog("? -> %p: Deleting via free", p); 535 536 //GC.removeRange(p); 537 core.stdc.stdlib.free(p); 538 } 539 } 540 541 version (Windows) 542 import core.sys.windows.windows; 543 else 544 { 545 import core.sys.posix.unistd; 546 import core.sys.posix.sys.mman; 547 } 548 549 /// Wrapper for data in RAM, allocated from the OS. 550 final class MemoryDataWrapper : DataWrapper 551 { 552 /// Pointer to actual data. 553 void* data; 554 /// Used size. Needed for safe appends. 555 size_t _size; 556 /// Allocated capacity. 557 size_t _capacity; 558 559 /// Threshold of allocated memory to trigger a collect. 560 __gshared size_t collectThreshold = 8*1024*1024; // 8MB 561 /// Counter towards the threshold. 562 static /*thread-local*/ size_t allocatedThreshold; 563 564 /// Create a new instance with given capacity. 565 this(size_t size, size_t capacity) 566 { 567 data = malloc(/*ref*/ capacity); 568 if (data is null) 569 { 570 debug(DATA) printf("Garbage collect triggered by failed Data allocation of %llu bytes... ", cast(ulong)capacity); 571 GC.collect(); 572 debug(DATA) printf("Done\n"); 573 data = malloc(/*ref*/ capacity); 574 allocatedThreshold = 0; 575 } 576 if (data is null) 577 onOutOfMemoryError(); 578 579 dataMemory += capacity; 580 if (dataMemoryPeak < dataMemory) 581 dataMemoryPeak = dataMemory; 582 dataCount ++; 583 allocCount ++; 584 585 this._size = size; 586 this._capacity = capacity; 587 588 // also collect 589 allocatedThreshold += capacity; 590 if (allocatedThreshold > collectThreshold) 591 { 592 debug(DATA) printf("Garbage collect triggered by total allocated Data exceeding threshold... "); 593 GC.collect(); 594 debug(DATA) printf("Done\n"); 595 allocatedThreshold = 0; 596 } 597 } 598 599 /// Destructor - destroys the wrapped data. 600 ~this() 601 { 602 free(data, capacity); 603 data = null; 604 // If DataWrapper is created and manually deleted, there is no need to cause frequent collections 605 if (allocatedThreshold > capacity) 606 allocatedThreshold -= capacity; 607 else 608 allocatedThreshold = 0; 609 610 dataMemory -= capacity; 611 dataCount --; 612 } 613 614 @property override 615 size_t size() const { return _size; } 616 617 @property override 618 size_t capacity() const { return _capacity; } 619 620 override void setSize(size_t newSize) 621 { 622 assert(newSize <= capacity); 623 _size = newSize; 624 } 625 626 @property override 627 inout(void)[] contents() inout 628 { 629 return data[0..size]; 630 } 631 632 // https://github.com/D-Programming-Language/druntime/pull/759 633 version(OSX) 634 enum _SC_PAGE_SIZE = 29; 635 636 // https://github.com/D-Programming-Language/druntime/pull/1140 637 version(FreeBSD) 638 enum _SC_PAGE_SIZE = 47; 639 640 version(Windows) 641 { 642 static immutable size_t pageSize; 643 644 shared static this() 645 { 646 SYSTEM_INFO si; 647 GetSystemInfo(&si); 648 pageSize = si.dwPageSize; 649 } 650 } 651 else 652 static if (is(typeof(_SC_PAGE_SIZE))) 653 { 654 static immutable size_t pageSize; 655 656 shared static this() 657 { 658 pageSize = sysconf(_SC_PAGE_SIZE); 659 } 660 } 661 662 static void* malloc(ref size_t size) 663 { 664 if (is(typeof(pageSize))) 665 size = ((size-1) | (pageSize-1))+1; 666 667 version(Windows) 668 { 669 return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE); 670 } 671 else 672 version(Posix) 673 { 674 version(linux) 675 import core.sys.linux.sys.mman : MAP_ANON; 676 auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 677 return (p == MAP_FAILED) ? null : p; 678 } 679 else 680 return core.stdc.malloc(size); 681 } 682 683 static void free(void* p, size_t size) 684 { 685 debug 686 { 687 (cast(ubyte*)p)[0..size] = 0xDB; 688 } 689 version(Windows) 690 VirtualFree(p, 0, MEM_RELEASE); 691 else 692 version(Posix) 693 munmap(p, size); 694 else 695 core.stdc.free(size); 696 } 697 } 698 699 // Source: Win32 bindings project 700 version(Windows) 701 { 702 struct SYSTEM_INFO { 703 union { 704 DWORD dwOemId; 705 struct { 706 WORD wProcessorArchitecture; 707 WORD wReserved; 708 } 709 } 710 DWORD dwPageSize; 711 PVOID lpMinimumApplicationAddress; 712 PVOID lpMaximumApplicationAddress; 713 DWORD dwActiveProcessorMask; 714 DWORD dwNumberOfProcessors; 715 DWORD dwProcessorType; 716 DWORD dwAllocationGranularity; 717 WORD wProcessorLevel; 718 WORD wProcessorRevision; 719 } 720 alias SYSTEM_INFO* LPSYSTEM_INFO; 721 722 extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO); 723 } 724 725 debug(DATA_REFCOUNT) import ae.utils.exception, ae.sys.memory, std.stdio; 726 727 debug(DATA_REFCOUNT) void debugLog(Args...)(const char* s, Args args) @nogc 728 { 729 printf(s, args); 730 printf("\n"); 731 if (inCollect()) 732 printf("\t(in GC collect)\n"); 733 else 734 (cast(void function() @nogc)&debugStackTrace)(); 735 fflush(core.stdc.stdio.stdout); 736 } 737 738 debug(DATA_REFCOUNT) void debugStackTrace() 739 { 740 try 741 foreach (line; getStackTrace()) 742 writeln("\t", line); 743 catch (Throwable e) 744 writeln("\t(stacktrace error: ", e.msg, ")"); 745 }