1 /** 2 * Wrappers for raw _data located in unmanaged memory. 3 * 4 * Using the Data type will only place a small object in managed memory, 5 * keeping the actual _data in unmanaged memory. 6 * A proxy class (DataWrapper) is used to safely allow multiple references to 7 * the same block of unmanaged memory. 8 * When the DataWrapper object is destroyed (either manually or by the garbage 9 * collector when there are no remaining Data references), the unmanaged 10 * memory is deallocated. 11 * 12 * This has the following advantage over using managed memory: 13 * $(UL 14 * $(LI Faster allocation and deallocation, since memory is requested from 15 * the OS directly as whole pages) 16 * $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to 17 * stray pointers) 18 * $(LI Overall improved GC performance due to reduced size of managed heap) 19 * $(LI Memory is immediately returned to the OS when _data is deallocated) 20 * ) 21 * On the other hand, using Data has the following disadvantages: 22 * $(UL 23 * $(LI This module is designed to store raw _data which does not have any 24 * pointers. Storing objects containing pointers to managed memory is 25 * unsupported, and may result in memory corruption.) 26 * $(LI Small objects may be stored inefficiently, as the module requests 27 * entire pages of memory from the OS. Considering allocating one large 28 * object and use slices (Data instances) for individual objects.) 29 * ) 30 * 31 * License: 32 * This Source Code Form is subject to the terms of 33 * the Mozilla Public License, v. 2.0. If a copy of 34 * the MPL was not distributed with this file, You 35 * can obtain one at http://mozilla.org/MPL/2.0/. 36 * 37 * Authors: 38 * Vladimir Panteleev <vladimir@thecybershadow.net> 39 */ 40 41 module ae.sys.data; 42 43 static import core.stdc.stdlib; 44 import core.stdc.string : memmove; 45 import std.traits; 46 import core.memory; 47 import core.exception; 48 debug import std.stdio; 49 debug import std.string; 50 public import ae.sys.dataset; 51 import ae.utils.math; 52 53 debug(DATA) import core.stdc.stdio; 54 55 // ideas/todo: 56 // * templatize (and forbid using aliased types)? 57 // * use heap (malloc/Windows heap API) for small objects? 58 // * reference counting? 59 // * "immutable" support? 60 61 /** 62 * Wrapper for data located in external memory, to prevent faux references. 63 * Represents a slice of data, which may or may not be in unmanaged memory. 64 * Data in unmanaged memory is bound to a DataWrapper class instance. 65 * 66 * All operations on this class should be safe, except for accessing contents directly. 67 * All operations on contents must be accompanied by a live reference to the Data object, 68 * to keep a GC anchor towards the unmanaged data. 69 * 70 * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead. 71 * 72 * Be sure not to lose Data references while using their contents! 73 * For example, avoid code like this: 74 * ---- 75 * fun(cast(string)transformSomeData(someData).contents); 76 * ---- 77 * The Data return value may be unreachable once .contents is evaluated. 78 * Use .toHeap instead of .contents in such cases to get a safe heap copy. 79 */ 80 struct Data 81 { 82 private: 83 /// Wrapped data 84 const(void)[] _contents; 85 /// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory. 86 /// Used as a GC anchor to unmanaged data, and for in-place expands (for appends). 87 DataWrapper wrapper; 88 /// Indicates whether we're allowed to modify the data contents. 89 bool mutable; 90 91 /// Maximum preallocation for append operations. 92 enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2 93 94 public: 95 /** 96 * Create new instance wrapping the given data. 97 * Params: 98 * data = initial data 99 * forceReallocation = when false, the contents will be duplicated to 100 * unmanaged memory only when it's not on the managed heap; when true, 101 * the contents will be reallocated always. 102 */ 103 this(const(void)[] data, bool forceReallocation = false) 104 { 105 if (data.length == 0) 106 contents = null; 107 else 108 if (forceReallocation || GC.addrOf(data.ptr) is null) 109 { 110 // copy to unmanaged memory 111 auto wrapper = unmanagedNew!MemoryDataWrapper(data.length, data.length); 112 this.wrapper = wrapper; 113 wrapper.contents[] = data[]; 114 contents = wrapper.contents; 115 mutable = true; 116 } 117 else 118 { 119 // just save a reference 120 contents = data; 121 mutable = false; 122 } 123 124 assert(this.length == data.length); 125 } 126 127 /// ditto 128 this(void[] data, bool forceReallocation = false) 129 { 130 const(void)[] cdata = data; 131 this(cdata, forceReallocation); 132 mutable = true; 133 } 134 135 /// Create a new instance with given size/capacity. Capacity defaults to size. 136 this(size_t size, size_t capacity = 0) 137 in 138 { 139 assert(capacity == 0 || size <= capacity); 140 } 141 body 142 { 143 if (!capacity) 144 capacity = size; 145 146 if (capacity) 147 { 148 auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity); 149 this.wrapper = wrapper; 150 contents = wrapper.contents; 151 mutable = true; 152 } 153 else 154 { 155 wrapper = null; 156 contents = null; 157 } 158 159 assert(this.length == size); 160 } 161 162 this(DataWrapper wrapper, bool mutable) 163 { 164 this.wrapper = wrapper; 165 this.mutable = mutable; 166 this.contents = wrapper.contents; 167 } 168 169 this(this) 170 { 171 if (wrapper) 172 { 173 wrapper.references++; 174 debug (DATA_REFCOUNT) debugLog("%p -> %p: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 175 } 176 else 177 debug (DATA_REFCOUNT) debugLog("%p -> %p: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper); 178 } 179 180 ~this() pure 181 { 182 //clear(); 183 // https://issues.dlang.org/show_bug.cgi?id=13809 184 (cast(void delegate() pure)&clear)(); 185 } 186 187 /* 188 /// Create new instance as a slice over an existing DataWrapper. 189 private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max) 190 { 191 this.wrapper = wrapper; 192 this.start = start; 193 this.end = end==size_t.max ? wrapper.capacity : end; 194 } 195 */ 196 197 @property const(void)[] contents() const 198 { 199 return _contents; 200 } 201 202 @property private const(void)[] contents(const(void)[] data) 203 { 204 return _contents = data; 205 } 206 207 /// Get mutable contents 208 @property void[] mcontents() 209 { 210 if (!mutable && length) 211 { 212 reallocate(length, length); 213 assert(mutable); 214 } 215 return cast(void[])_contents; 216 } 217 218 @property const(void)* ptr() const 219 { 220 return contents.ptr; 221 } 222 223 @property void* mptr() 224 { 225 return mcontents.ptr; 226 } 227 228 @property size_t length() const 229 { 230 return contents.length; 231 } 232 233 @property bool empty() const 234 { 235 return contents is null; 236 } 237 238 bool opCast(T)() 239 if (is(T == bool)) 240 { 241 return !empty; 242 } 243 244 @property size_t capacity() const 245 { 246 if (wrapper is null) 247 return length; 248 // We can only safely expand if the memory slice is at the end of the used unmanaged memory block. 249 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 250 auto end = pos + length; // end position in wrapper data 251 assert(end <= wrapper.size); 252 if (end == wrapper.size && end < wrapper.capacity) 253 return wrapper.capacity - pos; 254 else 255 return length; 256 } 257 258 /// Put a copy of the data on D's managed heap, and return it. 259 @property 260 void[] toHeap() const 261 { 262 return _contents.dup; 263 } 264 265 private void reallocate(size_t size, size_t capacity) 266 { 267 auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity); 268 wrapper.contents[0..this.length] = contents[]; 269 //(cast(ubyte[])newWrapper.contents)[this.length..value] = 0; 270 271 clear(); 272 this.wrapper = wrapper; 273 this.contents = wrapper.contents; 274 mutable = true; 275 } 276 277 private void expand(size_t newSize, size_t newCapacity) 278 in 279 { 280 assert(length < newSize); 281 assert(newSize <= newCapacity); 282 } 283 out 284 { 285 assert(length == newSize); 286 } 287 body 288 { 289 if (newCapacity <= capacity) 290 { 291 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 292 wrapper.setSize(pos + newSize); 293 contents = ptr[0..newSize]; 294 } 295 else 296 reallocate(newSize, newCapacity); 297 } 298 299 @property void length(size_t value) 300 { 301 if (value == length) // no change 302 return; 303 if (value < length) // shorten 304 _contents = _contents[0..value]; 305 else // lengthen 306 expand(value, value); 307 } 308 alias length opDollar; 309 310 @property Data dup() const 311 { 312 return Data(contents, true); 313 } 314 315 /// This used to be an unsafe method which deleted the wrapped data. 316 /// Now that Data is refcounted, this simply calls clear() and 317 /// additionally asserts that this Data is the only Data holding 318 /// a reference to the wrapper. 319 void deleteContents() 320 out 321 { 322 assert(wrapper is null); 323 } 324 body 325 { 326 if (wrapper) 327 { 328 assert(wrapper.references == 1, "Attempting to call deleteContents with "); 329 clear(); 330 } 331 } 332 333 void clear() 334 { 335 if (wrapper) 336 { 337 assert(wrapper.references > 0, "Dangling pointer to wrapper"); 338 wrapper.references--; 339 debug (DATA_REFCOUNT) debugLog("%p -> %p: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 340 if (wrapper.references == 0) 341 wrapper.destroy(); 342 343 wrapper = null; 344 } 345 346 contents = null; 347 } 348 349 Data concat(const(void)[] data) 350 { 351 if (data.length==0) 352 return this; 353 Data result = Data(length + data.length); 354 result.mcontents[0..this.length] = contents[]; 355 result.mcontents[this.length..$] = data[]; 356 return result; 357 } 358 359 Data opCat(T)(const(T)[] data) 360 if (!hasIndirections!T) 361 { 362 return concat(data); 363 } 364 365 Data opCat()(Data data) 366 { 367 return concat(data.contents); 368 } 369 370 Data prepend(const(void)[] data) 371 { 372 Data result = Data(data.length + length); 373 result.mcontents[0..data.length] = data[]; 374 result.mcontents[data.length..$] = contents[]; 375 return result; 376 } 377 378 Data opCat_r(T)(const(T)[] data) 379 if (!hasIndirections!T) 380 { 381 return prepend(data); 382 } 383 384 private static size_t getPreallocSize(size_t length) 385 { 386 if (length < MAX_PREALLOC) 387 return nextPowerOfTwo(length); 388 else 389 return ((length-1) | (MAX_PREALLOC-1)) + 1; 390 } 391 392 Data append(const(void)[] data) 393 { 394 if (data.length==0) 395 return this; 396 size_t oldLength = length; 397 size_t newLength = length + data.length; 398 expand(newLength, getPreallocSize(newLength)); 399 auto newContents = cast(void[])_contents[oldLength..$]; 400 newContents[] = (cast(void[])data)[]; 401 return this; 402 } 403 404 /// Note that unlike opCat (a ~ b), opCatAssign (a ~= b) will preallocate. 405 Data opCatAssign(T)(const(T)[] data) 406 if (!hasIndirections!T) 407 { 408 return append(data); 409 } 410 411 Data opCatAssign()(Data data) 412 { 413 return append(data.contents); 414 } 415 416 Data opCatAssign()(ubyte value) // hack? 417 { 418 return append((&value)[0..1]); 419 } 420 421 Data opSlice() 422 { 423 return this; 424 } 425 426 Data opSlice(size_t x, size_t y) 427 in 428 { 429 assert(x <= y); 430 assert(y <= length); 431 } 432 // https://issues.dlang.org/show_bug.cgi?id=13463 433 // out(result) 434 // { 435 // assert(result.length == y-x); 436 // } 437 body 438 { 439 if (x == y) 440 return Data(); 441 else 442 { 443 Data result = this; 444 result.contents = result.contents[x..y]; 445 return result; 446 } 447 } 448 449 /// Return a new Data for the first size bytes, and slice this instance from size to end. 450 Data popFront(size_t size) 451 in 452 { 453 assert(size <= length); 454 } 455 body 456 { 457 Data result = this; 458 result.contents = contents[0..size]; 459 this .contents = contents[size..$]; 460 return result; 461 } 462 } 463 464 unittest 465 { 466 Data d = Data("aaaaa"); 467 assert(d.wrapper.references == 1); 468 Data s = d[1..4]; 469 assert(d.wrapper.references == 2); 470 } 471 472 // ************************************************************************ 473 474 static /*thread-local*/ size_t dataMemory, dataMemoryPeak; 475 static /*thread-local*/ uint dataCount, allocCount; 476 477 // Abstract wrapper. 478 abstract class DataWrapper 479 { 480 sizediff_t references = 1; 481 abstract @property inout(void)[] contents() inout; 482 abstract @property size_t size() const; 483 abstract void setSize(size_t newSize); 484 abstract @property size_t capacity() const; 485 486 deprecated new(size_t sz) { return unmanagedAlloc(sz); } 487 488 debug ~this() 489 { 490 debug(DATA_REFCOUNT) debugLog("%.*s.~this, references==%d", this.classinfo.name.length, this.classinfo.name.ptr, references); 491 assert(references == 0, "Deleting DataWrapper with non-zero reference count"); 492 } 493 494 @nogc deprecated delete(void* p) 495 { 496 unmanagedFree(p); 497 } 498 } 499 500 void setGCThreshold(size_t value) { MemoryDataWrapper.collectThreshold = value; } 501 502 C unmanagedNew(C, Args...)(auto ref Args args) 503 if (is(C == class)) 504 { 505 import std.conv : emplace; 506 enum size = __traits(classInstanceSize, C); 507 auto p = unmanagedAlloc(size); 508 emplace!C(p[0..size], args); 509 return cast(C)p; 510 } 511 512 void unmanagedDelete(C)(C c) 513 if (is(C == class)) 514 { 515 c.__xdtor(); 516 unmanagedFree(p); 517 } 518 519 private: 520 521 void* unmanagedAlloc(size_t sz) 522 { 523 auto p = core.stdc.stdlib.malloc(sz); 524 525 debug(DATA_REFCOUNT) debugLog("? -> %p: Allocating via malloc", p); 526 527 if (!p) 528 throw new OutOfMemoryError(); 529 530 //GC.addRange(p, sz); 531 return p; 532 } 533 534 void unmanagedFree(void* p) @nogc 535 { 536 if (p) 537 { 538 debug(DATA_REFCOUNT) debugLog("? -> %p: Deleting via free", p); 539 540 //GC.removeRange(p); 541 core.stdc.stdlib.free(p); 542 } 543 } 544 545 version (Windows) 546 import core.sys.windows.windows; 547 else 548 { 549 import core.sys.posix.unistd; 550 import core.sys.posix.sys.mman; 551 } 552 553 /// Wrapper for data in RAM, allocated from the OS. 554 final class MemoryDataWrapper : DataWrapper 555 { 556 /// Pointer to actual data. 557 void* data; 558 /// Used size. Needed for safe appends. 559 size_t _size; 560 /// Allocated capacity. 561 size_t _capacity; 562 563 /// Threshold of allocated memory to trigger a collect. 564 __gshared size_t collectThreshold = 8*1024*1024; // 8MB 565 /// Counter towards the threshold. 566 static /*thread-local*/ size_t allocatedThreshold; 567 568 /// Create a new instance with given capacity. 569 this(size_t size, size_t capacity) 570 { 571 data = malloc(/*ref*/ capacity); 572 if (data is null) 573 { 574 debug(DATA) printf("Garbage collect triggered by failed Data allocation of %llu bytes... ", cast(ulong)capacity); 575 GC.collect(); 576 debug(DATA) printf("Done\n"); 577 data = malloc(/*ref*/ capacity); 578 allocatedThreshold = 0; 579 } 580 if (data is null) 581 onOutOfMemoryError(); 582 583 dataMemory += capacity; 584 if (dataMemoryPeak < dataMemory) 585 dataMemoryPeak = dataMemory; 586 dataCount ++; 587 allocCount ++; 588 589 this._size = size; 590 this._capacity = capacity; 591 592 // also collect 593 allocatedThreshold += capacity; 594 if (allocatedThreshold > collectThreshold) 595 { 596 debug(DATA) printf("Garbage collect triggered by total allocated Data exceeding threshold... "); 597 GC.collect(); 598 debug(DATA) printf("Done\n"); 599 allocatedThreshold = 0; 600 } 601 } 602 603 /// Destructor - destroys the wrapped data. 604 ~this() 605 { 606 free(data, capacity); 607 data = null; 608 // If DataWrapper is created and manually deleted, there is no need to cause frequent collections 609 if (allocatedThreshold > capacity) 610 allocatedThreshold -= capacity; 611 else 612 allocatedThreshold = 0; 613 614 dataMemory -= capacity; 615 dataCount --; 616 } 617 618 @property override 619 size_t size() const { return _size; } 620 621 @property override 622 size_t capacity() const { return _capacity; } 623 624 override void setSize(size_t newSize) 625 { 626 assert(newSize <= capacity); 627 _size = newSize; 628 } 629 630 @property override 631 inout(void)[] contents() inout 632 { 633 return data[0..size]; 634 } 635 636 // https://github.com/D-Programming-Language/druntime/pull/759 637 version(OSX) 638 enum _SC_PAGE_SIZE = 29; 639 640 // https://github.com/D-Programming-Language/druntime/pull/1140 641 version(FreeBSD) 642 enum _SC_PAGE_SIZE = 47; 643 644 version(Windows) 645 { 646 static immutable size_t pageSize; 647 648 shared static this() 649 { 650 SYSTEM_INFO si; 651 GetSystemInfo(&si); 652 pageSize = si.dwPageSize; 653 } 654 } 655 else 656 static if (is(typeof(_SC_PAGE_SIZE))) 657 { 658 static immutable size_t pageSize; 659 660 shared static this() 661 { 662 pageSize = sysconf(_SC_PAGE_SIZE); 663 } 664 } 665 666 static void* malloc(ref size_t size) 667 { 668 if (is(typeof(pageSize))) 669 size = ((size-1) | (pageSize-1))+1; 670 671 version(Windows) 672 { 673 return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE); 674 } 675 else 676 version(Posix) 677 { 678 version(linux) 679 import core.sys.linux.sys.mman : MAP_ANON; 680 auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 681 return (p == MAP_FAILED) ? null : p; 682 } 683 else 684 return core.stdc.malloc(size); 685 } 686 687 static void free(void* p, size_t size) 688 { 689 debug 690 { 691 (cast(ubyte*)p)[0..size] = 0xDB; 692 } 693 version(Windows) 694 VirtualFree(p, 0, MEM_RELEASE); 695 else 696 version(Posix) 697 munmap(p, size); 698 else 699 core.stdc.free(size); 700 } 701 } 702 703 // Source: Win32 bindings project 704 version(Windows) 705 { 706 struct SYSTEM_INFO { 707 union { 708 DWORD dwOemId; 709 struct { 710 WORD wProcessorArchitecture; 711 WORD wReserved; 712 } 713 } 714 DWORD dwPageSize; 715 PVOID lpMinimumApplicationAddress; 716 PVOID lpMaximumApplicationAddress; 717 DWORD dwActiveProcessorMask; 718 DWORD dwNumberOfProcessors; 719 DWORD dwProcessorType; 720 DWORD dwAllocationGranularity; 721 WORD wProcessorLevel; 722 WORD wProcessorRevision; 723 } 724 alias SYSTEM_INFO* LPSYSTEM_INFO; 725 726 extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO); 727 } 728 729 debug(DATA_REFCOUNT) import ae.utils.exception, ae.sys.memory, std.stdio; 730 731 debug(DATA_REFCOUNT) void debugLog(Args...)(const char* s, Args args) @nogc 732 { 733 printf(s, args); 734 printf("\n"); 735 if (inCollect()) 736 printf("\t(in GC collect)\n"); 737 else 738 (cast(void function() @nogc)&debugStackTrace)(); 739 fflush(core.stdc.stdio.stdout); 740 } 741 742 debug(DATA_REFCOUNT) void debugStackTrace() 743 { 744 try 745 foreach (line; getStackTrace()) 746 writeln("\t", line); 747 catch (Throwable e) 748 writeln("\t(stacktrace error: ", e.msg, ")"); 749 }