1 /** 2 * Wrappers for raw _data located in unmanaged memory. 3 * 4 * Using the Data type will only place a small object in managed memory, 5 * keeping the actual _data in unmanaged memory. 6 * A proxy class (DataWrapper) is used to safely allow multiple references to 7 * the same block of unmanaged memory. 8 * When the DataWrapper object is destroyed (either manually or by the garbage 9 * collector when there are no remaining Data references), the unmanaged 10 * memory is deallocated. 11 * 12 * This has the following advantage over using managed memory: 13 * $(UL 14 * $(LI Faster allocation and deallocation, since memory is requested from 15 * the OS directly as whole pages) 16 * $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to 17 * stray pointers) 18 * $(LI Overall improved GC performance due to reduced size of managed heap) 19 * $(LI Memory is immediately returned to the OS when _data is deallocated) 20 * ) 21 * On the other hand, using Data has the following disadvantages: 22 * $(UL 23 * $(LI This module is designed to store raw _data which does not have any 24 * pointers. Storing objects containing pointers to managed memory is 25 * unsupported, and may result in memory corruption.) 26 * $(LI Small objects may be stored inefficiently, as the module requests 27 * entire pages of memory from the OS. Considering allocating one large 28 * object and use slices (Data instances) for individual objects.) 29 * $(LI Incorrect usage (i.e. retaining/escaping references to wrapped memory 30 * without keeping a reference to its corresponding DataWrapper) can 31 * result in dangling pointers and hard-to-debug memory corruption.) 32 * ) 33 * 34 * License: 35 * This Source Code Form is subject to the terms of 36 * the Mozilla Public License, v. 2.0. If a copy of 37 * the MPL was not distributed with this file, You 38 * can obtain one at http://mozilla.org/MPL/2.0/. 39 * 40 * Authors: 41 * Vladimir Panteleev <vladimir@thecybershadow.net> 42 */ 43 44 module ae.sys.data; 45 46 static import core.stdc.stdlib; 47 import core.stdc.string : memmove; 48 import std.traits; 49 import core.memory; 50 import core.exception; 51 debug import std.string; 52 public import ae.sys.dataset; 53 import ae.utils.math; 54 55 debug(DATA) import core.stdc.stdio; 56 57 // ideas/todo: 58 // * templatize (and forbid using aliased types)? 59 // * use heap (malloc/Windows heap API) for small objects? 60 // * reference counting? 61 // * "immutable" support? 62 63 /** 64 * Wrapper for data located in external memory, to prevent faux references. 65 * Represents a slice of data, which may or may not be in unmanaged memory. 66 * Data in unmanaged memory is bound to a DataWrapper class instance. 67 * 68 * All operations on this class should be safe, except for accessing contents directly. 69 * All operations on contents must be accompanied by a live reference to the Data object, 70 * to keep a GC anchor towards the unmanaged data. 71 * 72 * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead. 73 * 74 * Be sure not to lose Data references while using their contents! 75 * For example, avoid code like this: 76 * ---- 77 * fun(cast(string)transformSomeData(someData).contents); 78 * ---- 79 * The Data return value may be unreachable once .contents is evaluated. 80 * Use .toHeap instead of .contents in such cases to get a safe heap copy. 81 */ 82 struct Data 83 { 84 private: 85 /// Wrapped data 86 const(void)[] _contents; 87 /// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory. 88 /// Used as a GC anchor to unmanaged data, and for in-place expands (for appends). 89 DataWrapper wrapper; 90 /// Indicates whether we're allowed to modify the data contents. 91 bool mutable; 92 93 /// Maximum preallocation for append operations. 94 enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2 95 96 public: 97 /** 98 * Create new instance wrapping the given data. 99 * Params: 100 * data = initial data 101 * forceReallocation = when false, the contents will be duplicated to 102 * unmanaged memory only when it's not on the managed heap; when true, 103 * the contents will be reallocated always. 104 */ 105 this(const(void)[] data, bool forceReallocation = false) 106 { 107 if (data.length == 0) 108 contents = null; 109 else 110 if (forceReallocation || GC.addrOf(data.ptr) is null) 111 { 112 // copy to unmanaged memory 113 auto wrapper = unmanagedNew!MemoryDataWrapper(data.length, data.length); 114 this.wrapper = wrapper; 115 wrapper.contents[] = data[]; 116 contents = wrapper.contents; 117 mutable = true; 118 } 119 else 120 { 121 // just save a reference 122 contents = data; 123 mutable = false; 124 } 125 126 assert(this.length == data.length); 127 } 128 129 /// ditto 130 this(void[] data, bool forceReallocation = false) 131 { 132 const(void)[] cdata = data; 133 this(cdata, forceReallocation); 134 mutable = true; 135 } 136 137 /// Create a new instance with given size/capacity. Capacity defaults to size. 138 this(size_t size, size_t capacity = 0) 139 in 140 { 141 assert(capacity == 0 || size <= capacity); 142 } 143 do 144 { 145 if (!capacity) 146 capacity = size; 147 148 if (capacity) 149 { 150 auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity); 151 this.wrapper = wrapper; 152 contents = wrapper.contents; 153 mutable = true; 154 } 155 else 156 { 157 wrapper = null; 158 contents = null; 159 } 160 161 assert(this.length == size); 162 } 163 164 this(DataWrapper wrapper, bool mutable) 165 { 166 this.wrapper = wrapper; 167 this.mutable = mutable; 168 this.contents = wrapper.contents; 169 } 170 171 this(this) 172 { 173 if (wrapper) 174 { 175 wrapper.references++; 176 debug (DATA_REFCOUNT) debugLog("%p -> %p: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 177 } 178 else 179 debug (DATA_REFCOUNT) debugLog("%p -> %p: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper); 180 } 181 182 ~this() pure 183 { 184 //clear(); 185 // https://issues.dlang.org/show_bug.cgi?id=13809 186 (cast(void delegate() pure)&clear)(); 187 } 188 189 debug(DATA) invariant 190 { 191 if (wrapper) 192 assert(wrapper.references > 0, "Data referencing DataWrapper with bad reference count"); 193 } 194 195 /* 196 /// Create new instance as a slice over an existing DataWrapper. 197 private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max) 198 { 199 this.wrapper = wrapper; 200 this.start = start; 201 this.end = end==size_t.max ? wrapper.capacity : end; 202 } 203 */ 204 205 @property const(void)[] contents() const 206 { 207 return _contents; 208 } 209 210 @property private const(void)[] contents(const(void)[] data) 211 { 212 return _contents = data; 213 } 214 215 /// Get mutable contents 216 @property void[] mcontents() 217 { 218 if (!mutable && length) 219 { 220 reallocate(length, length); 221 assert(mutable); 222 } 223 return cast(void[])_contents; 224 } 225 226 @property const(void)* ptr() const 227 { 228 return contents.ptr; 229 } 230 231 @property void* mptr() 232 { 233 return mcontents.ptr; 234 } 235 236 @property size_t length() const 237 { 238 return contents.length; 239 } 240 241 @property bool empty() const 242 { 243 return contents is null; 244 } 245 246 bool opCast(T)() 247 if (is(T == bool)) 248 { 249 return !empty; 250 } 251 252 @property size_t capacity() const 253 { 254 if (wrapper is null) 255 return length; 256 // We can only safely expand if the memory slice is at the end of the used unmanaged memory block. 257 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 258 auto end = pos + length; // end position in wrapper data 259 assert(end <= wrapper.size); 260 if (end == wrapper.size && end < wrapper.capacity) 261 return wrapper.capacity - pos; 262 else 263 return length; 264 } 265 266 /// Put a copy of the data on D's managed heap, and return it. 267 @property 268 void[] toHeap() const 269 { 270 return _contents.dup; 271 } 272 273 private void reallocate(size_t size, size_t capacity) 274 { 275 auto wrapper = unmanagedNew!MemoryDataWrapper(size, capacity); 276 wrapper.contents[0..this.length] = contents[]; 277 //(cast(ubyte[])newWrapper.contents)[this.length..value] = 0; 278 279 clear(); 280 this.wrapper = wrapper; 281 this.contents = wrapper.contents; 282 mutable = true; 283 } 284 285 private void expand(size_t newSize, size_t newCapacity) 286 in 287 { 288 assert(length < newSize); 289 assert(newSize <= newCapacity); 290 } 291 out 292 { 293 assert(length == newSize); 294 } 295 do 296 { 297 if (newCapacity <= capacity) 298 { 299 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 300 wrapper.setSize(pos + newSize); 301 contents = ptr[0..newSize]; 302 } 303 else 304 reallocate(newSize, newCapacity); 305 } 306 307 @property void length(size_t value) 308 { 309 if (value == length) // no change 310 return; 311 if (value < length) // shorten 312 _contents = _contents[0..value]; 313 else // lengthen 314 expand(value, value); 315 } 316 alias length opDollar; 317 318 @property Data dup() const 319 { 320 return Data(contents, true); 321 } 322 323 /// This used to be an unsafe method which deleted the wrapped data. 324 /// Now that Data is refcounted, this simply calls clear() and 325 /// additionally asserts that this Data is the only Data holding 326 /// a reference to the wrapper. 327 void deleteContents() 328 out 329 { 330 assert(wrapper is null); 331 } 332 do 333 { 334 if (wrapper) 335 { 336 assert(wrapper.references == 1, "Attempting to call deleteContents with "); 337 clear(); 338 } 339 } 340 341 void clear() 342 { 343 if (wrapper) 344 { 345 assert(wrapper.references > 0, "Dangling pointer to wrapper"); 346 wrapper.references--; 347 debug (DATA_REFCOUNT) debugLog("%p -> %p: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 348 if (wrapper.references == 0) 349 wrapper.destroy(); 350 351 wrapper = null; 352 } 353 354 contents = null; 355 } 356 357 Data concat(const(void)[] data) 358 { 359 if (data.length==0) 360 return this; 361 Data result = Data(length + data.length); 362 result.mcontents[0..this.length] = contents[]; 363 result.mcontents[this.length..$] = data[]; 364 return result; 365 } 366 367 template opBinary(string op) if (op == "~") 368 { 369 Data opBinary(T)(const(T)[] data) 370 if (!hasIndirections!T) 371 { 372 return concat(data); 373 } 374 375 Data opBinary()(Data data) 376 { 377 return concat(data.contents); 378 } 379 } 380 381 Data prepend(const(void)[] data) 382 { 383 Data result = Data(data.length + length); 384 result.mcontents[0..data.length] = data[]; 385 result.mcontents[data.length..$] = contents[]; 386 return result; 387 } 388 389 template opBinaryRight(string op) if (op == "~") 390 { 391 Data opBinaryRight(T)(const(T)[] data) 392 if (!hasIndirections!T) 393 { 394 return prepend(data); 395 } 396 } 397 398 private static size_t getPreallocSize(size_t length) 399 { 400 if (length < MAX_PREALLOC) 401 return nextPowerOfTwo(length); 402 else 403 return ((length-1) | (MAX_PREALLOC-1)) + 1; 404 } 405 406 Data append(const(void)[] data) 407 { 408 if (data.length==0) 409 return this; 410 size_t oldLength = length; 411 size_t newLength = length + data.length; 412 expand(newLength, getPreallocSize(newLength)); 413 auto newContents = cast(void[])_contents[oldLength..$]; 414 newContents[] = (cast(void[])data)[]; 415 return this; 416 } 417 418 /// Note that unlike concatenation (a ~ b), appending (a ~= b) will preallocate. 419 template opOpAssign(string op) if (op == "~") 420 { 421 Data opOpAssign(T)(const(T)[] data) 422 if (!hasIndirections!T) 423 { 424 return append(data); 425 } 426 427 Data opOpAssign()(Data data) 428 { 429 return append(data.contents); 430 } 431 432 Data opOpAssign()(ubyte value) // hack? 433 { 434 return append((&value)[0..1]); 435 } 436 } 437 438 Data opSlice() 439 { 440 return this; 441 } 442 443 Data opSlice(size_t x, size_t y) 444 in 445 { 446 assert(x <= y); 447 assert(y <= length); 448 } 449 // https://issues.dlang.org/show_bug.cgi?id=13463 450 // out(result) 451 // { 452 // assert(result.length == y-x); 453 // } 454 do 455 { 456 if (x == y) 457 return Data(); 458 else 459 { 460 Data result = this; 461 result.contents = result.contents[x..y]; 462 return result; 463 } 464 } 465 466 /// Return a new Data for the first size bytes, and slice this instance from size to end. 467 Data popFront(size_t size) 468 in 469 { 470 assert(size <= length); 471 } 472 do 473 { 474 Data result = this; 475 result.contents = contents[0..size]; 476 this .contents = contents[size..$]; 477 return result; 478 } 479 } 480 481 unittest 482 { 483 Data d = Data("aaaaa"); 484 assert(d.wrapper.references == 1); 485 Data s = d[1..4]; 486 assert(d.wrapper.references == 2); 487 } 488 489 // ************************************************************************ 490 491 static /*thread-local*/ size_t dataMemory, dataMemoryPeak; 492 static /*thread-local*/ uint dataCount, allocCount; 493 494 // Abstract wrapper. 495 abstract class DataWrapper 496 { 497 sizediff_t references = 1; 498 abstract @property inout(void)[] contents() inout; 499 abstract @property size_t size() const; 500 abstract void setSize(size_t newSize); 501 abstract @property size_t capacity() const; 502 503 debug ~this() 504 { 505 debug(DATA_REFCOUNT) debugLog("%.*s.~this, references==%d", this.classinfo.name.length, this.classinfo.name.ptr, references); 506 assert(references == 0, "Deleting DataWrapper with non-zero reference count"); 507 } 508 } 509 510 void setGCThreshold(size_t value) { MemoryDataWrapper.collectThreshold = value; } 511 512 C unmanagedNew(C, Args...)(auto ref Args args) 513 if (is(C == class)) 514 { 515 import std.conv : emplace; 516 enum size = __traits(classInstanceSize, C); 517 auto p = unmanagedAlloc(size); 518 emplace!C(p[0..size], args); 519 return cast(C)p; 520 } 521 522 void unmanagedDelete(C)(C c) 523 if (is(C == class)) 524 { 525 c.__xdtor(); 526 unmanagedFree(p); 527 } 528 529 private: 530 531 void* unmanagedAlloc(size_t sz) 532 { 533 auto p = core.stdc.stdlib.malloc(sz); 534 535 debug(DATA_REFCOUNT) debugLog("? -> %p: Allocating via malloc (%d bytes)", p, cast(uint)sz); 536 537 if (!p) 538 throw new OutOfMemoryError(); 539 540 //GC.addRange(p, sz); 541 return p; 542 } 543 544 void unmanagedFree(void* p) @nogc 545 { 546 if (p) 547 { 548 debug(DATA_REFCOUNT) debugLog("? -> %p: Deleting via free", p); 549 550 //GC.removeRange(p); 551 core.stdc.stdlib.free(p); 552 } 553 } 554 555 version (Windows) 556 import core.sys.windows.windows; 557 else 558 { 559 import core.sys.posix.unistd; 560 import core.sys.posix.sys.mman; 561 } 562 563 /// Wrapper for data in RAM, allocated from the OS. 564 final class MemoryDataWrapper : DataWrapper 565 { 566 /// Pointer to actual data. 567 void* data; 568 /// Used size. Needed for safe appends. 569 size_t _size; 570 /// Allocated capacity. 571 size_t _capacity; 572 573 /// Threshold of allocated memory to trigger a collect. 574 __gshared size_t collectThreshold = 8*1024*1024; // 8MB 575 /// Counter towards the threshold. 576 static /*thread-local*/ size_t allocatedThreshold; 577 578 /// Create a new instance with given capacity. 579 this(size_t size, size_t capacity) 580 { 581 data = malloc(/*ref*/ capacity); 582 if (data is null) 583 { 584 debug(DATA) fprintf(stderr, "Garbage collect triggered by failed Data allocation of %llu bytes... ", cast(ulong)capacity); 585 GC.collect(); 586 debug(DATA) fprintf(stderr, "Done\n"); 587 data = malloc(/*ref*/ capacity); 588 allocatedThreshold = 0; 589 } 590 if (data is null) 591 onOutOfMemoryError(); 592 593 dataMemory += capacity; 594 if (dataMemoryPeak < dataMemory) 595 dataMemoryPeak = dataMemory; 596 dataCount ++; 597 allocCount ++; 598 599 this._size = size; 600 this._capacity = capacity; 601 602 // also collect 603 allocatedThreshold += capacity; 604 if (allocatedThreshold > collectThreshold) 605 { 606 debug(DATA) fprintf(stderr, "Garbage collect triggered by total allocated Data exceeding threshold... "); 607 GC.collect(); 608 debug(DATA) fprintf(stderr, "Done\n"); 609 allocatedThreshold = 0; 610 } 611 } 612 613 /// Destructor - destroys the wrapped data. 614 ~this() 615 { 616 free(data, capacity); 617 data = null; 618 // If DataWrapper is created and manually deleted, there is no need to cause frequent collections 619 if (allocatedThreshold > capacity) 620 allocatedThreshold -= capacity; 621 else 622 allocatedThreshold = 0; 623 624 dataMemory -= capacity; 625 dataCount --; 626 } 627 628 @property override 629 size_t size() const { return _size; } 630 631 @property override 632 size_t capacity() const { return _capacity; } 633 634 override void setSize(size_t newSize) 635 { 636 assert(newSize <= capacity); 637 _size = newSize; 638 } 639 640 @property override 641 inout(void)[] contents() inout 642 { 643 return data[0..size]; 644 } 645 646 // https://github.com/D-Programming-Language/druntime/pull/759 647 version(OSX) 648 enum _SC_PAGE_SIZE = 29; 649 650 // https://github.com/D-Programming-Language/druntime/pull/1140 651 version(FreeBSD) 652 enum _SC_PAGE_SIZE = 47; 653 654 version(Windows) 655 { 656 static immutable size_t pageSize; 657 658 shared static this() 659 { 660 SYSTEM_INFO si; 661 GetSystemInfo(&si); 662 pageSize = si.dwPageSize; 663 } 664 } 665 else 666 static if (is(typeof(_SC_PAGE_SIZE))) 667 { 668 static immutable size_t pageSize; 669 670 shared static this() 671 { 672 pageSize = sysconf(_SC_PAGE_SIZE); 673 } 674 } 675 676 static void* malloc(ref size_t size) 677 { 678 if (is(typeof(pageSize))) 679 size = ((size-1) | (pageSize-1))+1; 680 681 version(Windows) 682 { 683 return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE); 684 } 685 else 686 version(Posix) 687 { 688 version(linux) 689 import core.sys.linux.sys.mman : MAP_ANON; 690 auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 691 return (p == MAP_FAILED) ? null : p; 692 } 693 else 694 return core.stdc.malloc(size); 695 } 696 697 static void free(void* p, size_t size) 698 { 699 debug 700 { 701 (cast(ubyte*)p)[0..size] = 0xDB; 702 } 703 version(Windows) 704 VirtualFree(p, 0, MEM_RELEASE); 705 else 706 version(Posix) 707 munmap(p, size); 708 else 709 core.stdc.free(size); 710 } 711 } 712 713 // Source: Win32 bindings project 714 version(Windows) 715 { 716 struct SYSTEM_INFO { 717 union { 718 DWORD dwOemId; 719 struct { 720 WORD wProcessorArchitecture; 721 WORD wReserved; 722 } 723 } 724 DWORD dwPageSize; 725 PVOID lpMinimumApplicationAddress; 726 PVOID lpMaximumApplicationAddress; 727 DWORD dwActiveProcessorMask; 728 DWORD dwNumberOfProcessors; 729 DWORD dwProcessorType; 730 DWORD dwAllocationGranularity; 731 WORD wProcessorLevel; 732 WORD wProcessorRevision; 733 } 734 alias SYSTEM_INFO* LPSYSTEM_INFO; 735 736 extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO); 737 } 738 739 debug(DATA_REFCOUNT) import ae.utils.exception, ae.sys.memory, std.stdio; 740 741 debug(DATA_REFCOUNT) void debugLog(Args...)(const char* s, Args args) @nogc 742 { 743 fprintf(stderr, s, args); 744 fprintf(stderr, "\n"); 745 if (inCollect()) 746 fprintf(stderr, "\t(in GC collect)\n"); 747 else 748 (cast(void function() @nogc)&debugStackTrace)(); 749 fflush(core.stdc.stdio.stderr); 750 } 751 752 debug(DATA_REFCOUNT) void debugStackTrace() 753 { 754 try 755 foreach (line; getStackTrace()) 756 writeln("\t", line); 757 catch (Throwable e) 758 writeln("\t(stacktrace error: ", e.msg, ")"); 759 }