1 /** 2 * Wrappers for raw _data located in unmanaged memory. 3 * 4 * Using the Data type will only place a small object in managed memory, 5 * keeping the actual _data in unmanaged memory. 6 * A proxy class (DataWrapper) is used to safely allow multiple references to 7 * the same block of unmanaged memory. 8 * When the DataWrapper object is destroyed (either manually or by the garbage 9 * collector when there are no remaining Data references), the unmanaged 10 * memory is deallocated. 11 * 12 * This has the following advantage over using managed memory: 13 * $(UL 14 * $(LI Faster allocation and deallocation, since memory is requested from 15 * the OS directly as whole pages) 16 * $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to 17 * stray pointers) 18 * $(LI Overall improved GC performance due to reduced size of managed heap) 19 * $(LI Memory is immediately returned to the OS when _data is deallocated) 20 * ) 21 * On the other hand, using Data has the following disadvantages: 22 * $(UL 23 * $(LI This module is designed to store raw _data which does not have any 24 * pointers. Storing objects containing pointers to managed memory is 25 * unsupported, and may result in memory corruption.) 26 * $(LI Small objects may be stored inefficiently, as the module requests 27 * entire pages of memory from the OS. Considering allocating one large 28 * object and use slices (Data instances) for individual objects.) 29 * $(LI Incorrect usage (i.e. retaining/escaping references to wrapped memory 30 * without keeping a reference to its corresponding DataWrapper) can 31 * result in dangling pointers and hard-to-debug memory corruption.) 32 * ) 33 * 34 * License: 35 * This Source Code Form is subject to the terms of 36 * the Mozilla Public License, v. 2.0. If a copy of 37 * the MPL was not distributed with this file, You 38 * can obtain one at http://mozilla.org/MPL/2.0/. 39 * 40 * Authors: 41 * Vladimir Panteleev <vladimir@thecybershadow.net> 42 */ 43 44 module ae.sys.data; 45 46 static import core.stdc.stdlib; 47 import core.stdc.string : memmove; 48 import std.traits; 49 import core.memory; 50 import core.exception; 51 debug import std.stdio; 52 debug import std.string; 53 public import ae.sys.dataset; 54 import ae.utils.math; 55 56 debug(DATA) import core.stdc.stdio; 57 58 // ideas/todo: 59 // * templatize (and forbid using aliased types)? 60 // * use heap (malloc/Windows heap API) for small objects? 61 // * reference counting? 62 // * "immutable" support? 63 64 /** 65 * Wrapper for data located in external memory, to prevent faux references. 66 * Represents a slice of data, which may or may not be in unmanaged memory. 67 * Data in unmanaged memory is bound to a DataWrapper class instance. 68 * 69 * All operations on this class should be safe, except for accessing contents directly. 70 * All operations on contents must be accompanied by a live reference to the Data object, 71 * to keep a GC anchor towards the unmanaged data. 72 * 73 * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead. 74 * 75 * Be sure not to lose Data references while using their contents! 76 * For example, avoid code like this: 77 * ---- 78 * fun(cast(string)transformSomeData(someData).contents); 79 * ---- 80 * The Data return value may be unreachable once .contents is evaluated. 81 * Use .toHeap instead of .contents in such cases to get a safe heap copy. 82 */ 83 struct Data 84 { 85 private: 86 /// Wrapped data 87 const(void)[] _contents; 88 /// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory. 89 /// Used as a GC anchor to unmanaged data, and for in-place expands (for appends). 90 DataWrapper wrapper; 91 /// Indicates whether we're allowed to modify the data contents. 92 bool mutable; 93 94 /// Maximum preallocation for append operations. 95 enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2 96 97 public: 98 /** 99 * Create new instance wrapping the given data. 100 * Params: 101 * data = initial data 102 * forceReallocation = when false, the contents will be duplicated to 103 * unmanaged memory only when it's not on the managed heap; when true, 104 * the contents will be reallocated always. 105 */ 106 this(const(void)[] data, bool forceReallocation = false) 107 { 108 if (data.length == 0) 109 contents = null; 110 else 111 if (forceReallocation || GC.addrOf(data.ptr) is null) 112 { 113 // copy to unmanaged memory 114 auto wrapper = unmanagedNew!MemoryDataWrapper(data.length, data.length); 115 this.wrapper = wrapper; 116 wrapper.contents[] = data[]; 117 contents = wrapper.contents; 118 mutable = true; 119 } 120 else 121 { 122 // just save a reference 123 contents = data; 124 mutable = false; 125 } 126 127 assert(this.length == data.length); 128 } 129 130 /// ditto 131 this(void[] data, bool forceReallocation = false) 132 { 133 const(void)[] cdata = data; 134 this(cdata, forceReallocation); 135 mutable = true; 136 } 137 138 /// Create a new instance with given size/capacity. Capacity defaults to size. 139 this(size_t size, size_t capacity = 0) 140 in 141 { 142 assert(capacity == 0 || size <= capacity); 143 } 144 body 145 { 146 if (!capacity) 147 capacity = size; 148 149 if (capacity) 150 { 151 auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity); 152 this.wrapper = wrapper; 153 contents = wrapper.contents; 154 mutable = true; 155 } 156 else 157 { 158 wrapper = null; 159 contents = null; 160 } 161 162 assert(this.length == size); 163 } 164 165 this(DataWrapper wrapper, bool mutable) 166 { 167 this.wrapper = wrapper; 168 this.mutable = mutable; 169 this.contents = wrapper.contents; 170 } 171 172 this(this) 173 { 174 if (wrapper) 175 { 176 wrapper.references++; 177 debug (DATA_REFCOUNT) debugLog("%p -> %p: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 178 } 179 else 180 debug (DATA_REFCOUNT) debugLog("%p -> %p: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper); 181 } 182 183 ~this() pure 184 { 185 //clear(); 186 // https://issues.dlang.org/show_bug.cgi?id=13809 187 (cast(void delegate() pure)&clear)(); 188 } 189 190 /* 191 /// Create new instance as a slice over an existing DataWrapper. 192 private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max) 193 { 194 this.wrapper = wrapper; 195 this.start = start; 196 this.end = end==size_t.max ? wrapper.capacity : end; 197 } 198 */ 199 200 @property const(void)[] contents() const 201 { 202 return _contents; 203 } 204 205 @property private const(void)[] contents(const(void)[] data) 206 { 207 return _contents = data; 208 } 209 210 /// Get mutable contents 211 @property void[] mcontents() 212 { 213 if (!mutable && length) 214 { 215 reallocate(length, length); 216 assert(mutable); 217 } 218 return cast(void[])_contents; 219 } 220 221 @property const(void)* ptr() const 222 { 223 return contents.ptr; 224 } 225 226 @property void* mptr() 227 { 228 return mcontents.ptr; 229 } 230 231 @property size_t length() const 232 { 233 return contents.length; 234 } 235 236 @property bool empty() const 237 { 238 return contents is null; 239 } 240 241 bool opCast(T)() 242 if (is(T == bool)) 243 { 244 return !empty; 245 } 246 247 @property size_t capacity() const 248 { 249 if (wrapper is null) 250 return length; 251 // We can only safely expand if the memory slice is at the end of the used unmanaged memory block. 252 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 253 auto end = pos + length; // end position in wrapper data 254 assert(end <= wrapper.size); 255 if (end == wrapper.size && end < wrapper.capacity) 256 return wrapper.capacity - pos; 257 else 258 return length; 259 } 260 261 /// Put a copy of the data on D's managed heap, and return it. 262 @property 263 void[] toHeap() const 264 { 265 return _contents.dup; 266 } 267 268 private void reallocate(size_t size, size_t capacity) 269 { 270 auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity); 271 wrapper.contents[0..this.length] = contents[]; 272 //(cast(ubyte[])newWrapper.contents)[this.length..value] = 0; 273 274 clear(); 275 this.wrapper = wrapper; 276 this.contents = wrapper.contents; 277 mutable = true; 278 } 279 280 private void expand(size_t newSize, size_t newCapacity) 281 in 282 { 283 assert(length < newSize); 284 assert(newSize <= newCapacity); 285 } 286 out 287 { 288 assert(length == newSize); 289 } 290 body 291 { 292 if (newCapacity <= capacity) 293 { 294 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 295 wrapper.setSize(pos + newSize); 296 contents = ptr[0..newSize]; 297 } 298 else 299 reallocate(newSize, newCapacity); 300 } 301 302 @property void length(size_t value) 303 { 304 if (value == length) // no change 305 return; 306 if (value < length) // shorten 307 _contents = _contents[0..value]; 308 else // lengthen 309 expand(value, value); 310 } 311 alias length opDollar; 312 313 @property Data dup() const 314 { 315 return Data(contents, true); 316 } 317 318 /// This used to be an unsafe method which deleted the wrapped data. 319 /// Now that Data is refcounted, this simply calls clear() and 320 /// additionally asserts that this Data is the only Data holding 321 /// a reference to the wrapper. 322 void deleteContents() 323 out 324 { 325 assert(wrapper is null); 326 } 327 body 328 { 329 if (wrapper) 330 { 331 assert(wrapper.references == 1, "Attempting to call deleteContents with "); 332 clear(); 333 } 334 } 335 336 void clear() 337 { 338 if (wrapper) 339 { 340 assert(wrapper.references > 0, "Dangling pointer to wrapper"); 341 wrapper.references--; 342 debug (DATA_REFCOUNT) debugLog("%p -> %p: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 343 if (wrapper.references == 0) 344 wrapper.destroy(); 345 346 wrapper = null; 347 } 348 349 contents = null; 350 } 351 352 Data concat(const(void)[] data) 353 { 354 if (data.length==0) 355 return this; 356 Data result = Data(length + data.length); 357 result.mcontents[0..this.length] = contents[]; 358 result.mcontents[this.length..$] = data[]; 359 return result; 360 } 361 362 Data opCat(T)(const(T)[] data) 363 if (!hasIndirections!T) 364 { 365 return concat(data); 366 } 367 368 Data opCat()(Data data) 369 { 370 return concat(data.contents); 371 } 372 373 Data prepend(const(void)[] data) 374 { 375 Data result = Data(data.length + length); 376 result.mcontents[0..data.length] = data[]; 377 result.mcontents[data.length..$] = contents[]; 378 return result; 379 } 380 381 Data opCat_r(T)(const(T)[] data) 382 if (!hasIndirections!T) 383 { 384 return prepend(data); 385 } 386 387 private static size_t getPreallocSize(size_t length) 388 { 389 if (length < MAX_PREALLOC) 390 return nextPowerOfTwo(length); 391 else 392 return ((length-1) | (MAX_PREALLOC-1)) + 1; 393 } 394 395 Data append(const(void)[] data) 396 { 397 if (data.length==0) 398 return this; 399 size_t oldLength = length; 400 size_t newLength = length + data.length; 401 expand(newLength, getPreallocSize(newLength)); 402 auto newContents = cast(void[])_contents[oldLength..$]; 403 newContents[] = (cast(void[])data)[]; 404 return this; 405 } 406 407 /// Note that unlike opCat (a ~ b), opCatAssign (a ~= b) will preallocate. 408 Data opCatAssign(T)(const(T)[] data) 409 if (!hasIndirections!T) 410 { 411 return append(data); 412 } 413 414 Data opCatAssign()(Data data) 415 { 416 return append(data.contents); 417 } 418 419 Data opCatAssign()(ubyte value) // hack? 420 { 421 return append((&value)[0..1]); 422 } 423 424 Data opSlice() 425 { 426 return this; 427 } 428 429 Data opSlice(size_t x, size_t y) 430 in 431 { 432 assert(x <= y); 433 assert(y <= length); 434 } 435 // https://issues.dlang.org/show_bug.cgi?id=13463 436 // out(result) 437 // { 438 // assert(result.length == y-x); 439 // } 440 body 441 { 442 if (x == y) 443 return Data(); 444 else 445 { 446 Data result = this; 447 result.contents = result.contents[x..y]; 448 return result; 449 } 450 } 451 452 /// Return a new Data for the first size bytes, and slice this instance from size to end. 453 Data popFront(size_t size) 454 in 455 { 456 assert(size <= length); 457 } 458 body 459 { 460 Data result = this; 461 result.contents = contents[0..size]; 462 this .contents = contents[size..$]; 463 return result; 464 } 465 } 466 467 unittest 468 { 469 Data d = Data("aaaaa"); 470 assert(d.wrapper.references == 1); 471 Data s = d[1..4]; 472 assert(d.wrapper.references == 2); 473 } 474 475 // ************************************************************************ 476 477 static /*thread-local*/ size_t dataMemory, dataMemoryPeak; 478 static /*thread-local*/ uint dataCount, allocCount; 479 480 // Abstract wrapper. 481 abstract class DataWrapper 482 { 483 sizediff_t references = 1; 484 abstract @property inout(void)[] contents() inout; 485 abstract @property size_t size() const; 486 abstract void setSize(size_t newSize); 487 abstract @property size_t capacity() const; 488 489 deprecated new(size_t sz) { return unmanagedAlloc(sz); } 490 491 debug ~this() 492 { 493 debug(DATA_REFCOUNT) debugLog("%.*s.~this, references==%d", this.classinfo.name.length, this.classinfo.name.ptr, references); 494 assert(references == 0, "Deleting DataWrapper with non-zero reference count"); 495 } 496 497 @nogc deprecated delete(void* p) 498 { 499 unmanagedFree(p); 500 } 501 } 502 503 void setGCThreshold(size_t value) { MemoryDataWrapper.collectThreshold = value; } 504 505 C unmanagedNew(C, Args...)(auto ref Args args) 506 if (is(C == class)) 507 { 508 import std.conv : emplace; 509 enum size = __traits(classInstanceSize, C); 510 auto p = unmanagedAlloc(size); 511 emplace!C(p[0..size], args); 512 return cast(C)p; 513 } 514 515 void unmanagedDelete(C)(C c) 516 if (is(C == class)) 517 { 518 c.__xdtor(); 519 unmanagedFree(p); 520 } 521 522 private: 523 524 void* unmanagedAlloc(size_t sz) 525 { 526 auto p = core.stdc.stdlib.malloc(sz); 527 528 debug(DATA_REFCOUNT) debugLog("? -> %p: Allocating via malloc", p); 529 530 if (!p) 531 throw new OutOfMemoryError(); 532 533 //GC.addRange(p, sz); 534 return p; 535 } 536 537 void unmanagedFree(void* p) @nogc 538 { 539 if (p) 540 { 541 debug(DATA_REFCOUNT) debugLog("? -> %p: Deleting via free", p); 542 543 //GC.removeRange(p); 544 core.stdc.stdlib.free(p); 545 } 546 } 547 548 version (Windows) 549 import core.sys.windows.windows; 550 else 551 { 552 import core.sys.posix.unistd; 553 import core.sys.posix.sys.mman; 554 } 555 556 /// Wrapper for data in RAM, allocated from the OS. 557 final class MemoryDataWrapper : DataWrapper 558 { 559 /// Pointer to actual data. 560 void* data; 561 /// Used size. Needed for safe appends. 562 size_t _size; 563 /// Allocated capacity. 564 size_t _capacity; 565 566 /// Threshold of allocated memory to trigger a collect. 567 __gshared size_t collectThreshold = 8*1024*1024; // 8MB 568 /// Counter towards the threshold. 569 static /*thread-local*/ size_t allocatedThreshold; 570 571 /// Create a new instance with given capacity. 572 this(size_t size, size_t capacity) 573 { 574 data = malloc(/*ref*/ capacity); 575 if (data is null) 576 { 577 debug(DATA) printf("Garbage collect triggered by failed Data allocation of %llu bytes... ", cast(ulong)capacity); 578 GC.collect(); 579 debug(DATA) printf("Done\n"); 580 data = malloc(/*ref*/ capacity); 581 allocatedThreshold = 0; 582 } 583 if (data is null) 584 onOutOfMemoryError(); 585 586 dataMemory += capacity; 587 if (dataMemoryPeak < dataMemory) 588 dataMemoryPeak = dataMemory; 589 dataCount ++; 590 allocCount ++; 591 592 this._size = size; 593 this._capacity = capacity; 594 595 // also collect 596 allocatedThreshold += capacity; 597 if (allocatedThreshold > collectThreshold) 598 { 599 debug(DATA) printf("Garbage collect triggered by total allocated Data exceeding threshold... "); 600 GC.collect(); 601 debug(DATA) printf("Done\n"); 602 allocatedThreshold = 0; 603 } 604 } 605 606 /// Destructor - destroys the wrapped data. 607 ~this() 608 { 609 free(data, capacity); 610 data = null; 611 // If DataWrapper is created and manually deleted, there is no need to cause frequent collections 612 if (allocatedThreshold > capacity) 613 allocatedThreshold -= capacity; 614 else 615 allocatedThreshold = 0; 616 617 dataMemory -= capacity; 618 dataCount --; 619 } 620 621 @property override 622 size_t size() const { return _size; } 623 624 @property override 625 size_t capacity() const { return _capacity; } 626 627 override void setSize(size_t newSize) 628 { 629 assert(newSize <= capacity); 630 _size = newSize; 631 } 632 633 @property override 634 inout(void)[] contents() inout 635 { 636 return data[0..size]; 637 } 638 639 // https://github.com/D-Programming-Language/druntime/pull/759 640 version(OSX) 641 enum _SC_PAGE_SIZE = 29; 642 643 // https://github.com/D-Programming-Language/druntime/pull/1140 644 version(FreeBSD) 645 enum _SC_PAGE_SIZE = 47; 646 647 version(Windows) 648 { 649 static immutable size_t pageSize; 650 651 shared static this() 652 { 653 SYSTEM_INFO si; 654 GetSystemInfo(&si); 655 pageSize = si.dwPageSize; 656 } 657 } 658 else 659 static if (is(typeof(_SC_PAGE_SIZE))) 660 { 661 static immutable size_t pageSize; 662 663 shared static this() 664 { 665 pageSize = sysconf(_SC_PAGE_SIZE); 666 } 667 } 668 669 static void* malloc(ref size_t size) 670 { 671 if (is(typeof(pageSize))) 672 size = ((size-1) | (pageSize-1))+1; 673 674 version(Windows) 675 { 676 return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE); 677 } 678 else 679 version(Posix) 680 { 681 version(linux) 682 import core.sys.linux.sys.mman : MAP_ANON; 683 auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 684 return (p == MAP_FAILED) ? null : p; 685 } 686 else 687 return core.stdc.malloc(size); 688 } 689 690 static void free(void* p, size_t size) 691 { 692 debug 693 { 694 (cast(ubyte*)p)[0..size] = 0xDB; 695 } 696 version(Windows) 697 VirtualFree(p, 0, MEM_RELEASE); 698 else 699 version(Posix) 700 munmap(p, size); 701 else 702 core.stdc.free(size); 703 } 704 } 705 706 // Source: Win32 bindings project 707 version(Windows) 708 { 709 struct SYSTEM_INFO { 710 union { 711 DWORD dwOemId; 712 struct { 713 WORD wProcessorArchitecture; 714 WORD wReserved; 715 } 716 } 717 DWORD dwPageSize; 718 PVOID lpMinimumApplicationAddress; 719 PVOID lpMaximumApplicationAddress; 720 DWORD dwActiveProcessorMask; 721 DWORD dwNumberOfProcessors; 722 DWORD dwProcessorType; 723 DWORD dwAllocationGranularity; 724 WORD wProcessorLevel; 725 WORD wProcessorRevision; 726 } 727 alias SYSTEM_INFO* LPSYSTEM_INFO; 728 729 extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO); 730 } 731 732 debug(DATA_REFCOUNT) import ae.utils.exception, ae.sys.memory, std.stdio; 733 734 debug(DATA_REFCOUNT) void debugLog(Args...)(const char* s, Args args) @nogc 735 { 736 printf(s, args); 737 printf("\n"); 738 if (inCollect()) 739 printf("\t(in GC collect)\n"); 740 else 741 (cast(void function() @nogc)&debugStackTrace)(); 742 fflush(core.stdc.stdio.stdout); 743 } 744 745 debug(DATA_REFCOUNT) void debugStackTrace() 746 { 747 try 748 foreach (line; getStackTrace()) 749 writeln("\t", line); 750 catch (Throwable e) 751 writeln("\t(stacktrace error: ", e.msg, ")"); 752 }