1 /** 2 * Wrappers for raw _data located in unmanaged memory. 3 * 4 * Using the Data type will only place a small object in managed memory, 5 * keeping the actual _data in unmanaged memory. 6 * A proxy class (DataWrapper) is used to safely allow multiple references to 7 * the same block of unmanaged memory. 8 * When the DataWrapper object is destroyed (either manually or by the garbage 9 * collector when there are no remaining Data references), the unmanaged 10 * memory is deallocated. 11 * 12 * This has the following advantage over using managed memory: 13 * $(UL 14 * $(LI Faster allocation and deallocation, since memory is requested from 15 * the OS directly as whole pages) 16 * $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to 17 * stray pointers) 18 * $(LI Overall improved GC performance due to reduced size of managed heap) 19 * $(LI Memory is immediately returned to the OS when _data is deallocated) 20 * ) 21 * On the other hand, using Data has the following disadvantages: 22 * $(UL 23 * $(LI This module is designed to store raw _data which does not have any 24 * pointers. Storing objects containing pointers to managed memory is 25 * unsupported, and may result in memory corruption.) 26 * $(LI Small objects may be stored inefficiently, as the module requests 27 * entire pages of memory from the OS. Considering allocating one large 28 * object and use slices (Data instances) for individual objects.) 29 * ) 30 * 31 * License: 32 * This Source Code Form is subject to the terms of 33 * the Mozilla Public License, v. 2.0. If a copy of 34 * the MPL was not distributed with this file, You 35 * can obtain one at http://mozilla.org/MPL/2.0/. 36 * 37 * Authors: 38 * Vladimir Panteleev <vladimir@thecybershadow.net> 39 */ 40 41 module ae.sys.data; 42 43 static import core.stdc.stdlib; 44 import core.stdc.string : memmove; 45 import std.traits; 46 import core.memory; 47 import core.exception; 48 debug import std.stdio; 49 debug import std.string; 50 public import ae.sys.dataset; 51 import ae.utils.math; 52 53 debug(DATA) import core.stdc.stdio; 54 55 // ideas/todo: 56 // * templatize (and forbid using aliased types)? 57 // * use heap (malloc/Windows heap API) for small objects? 58 // * reference counting? 59 // * "immutable" support? 60 61 /** 62 * Wrapper for data located in external memory, to prevent faux references. 63 * Represents a slice of data, which may or may not be in unmanaged memory. 64 * Data in unmanaged memory is bound to a DataWrapper class instance. 65 * 66 * All operations on this class should be safe, except for accessing contents directly. 67 * All operations on contents must be accompanied by a live reference to the Data object, 68 * to keep a GC anchor towards the unmanaged data. 69 * 70 * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead. 71 * 72 * Be sure not to lose Data references while using their contents! 73 * For example, avoid code like this: 74 * ---- 75 * fun(cast(string)transformSomeData(someData).contents); 76 * ---- 77 * The Data return value may be unreachable once .contents is evaluated. 78 * Use .toHeap instead of .contents in such cases to get a safe heap copy. 79 */ 80 struct Data 81 { 82 private: 83 /// Wrapped data 84 const(void)[] _contents; 85 /// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory. 86 /// Used as a GC anchor to unmanaged data, and for in-place expands (for appends). 87 DataWrapper wrapper; 88 /// Indicates whether we're allowed to modify the data contents. 89 bool mutable; 90 91 /// Maximum preallocation for append operations. 92 enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2 93 94 public: 95 /** 96 * Create new instance wrapping the given data. 97 * Params: 98 * data = initial data 99 * forceReallocation = when false, the contents will be duplicated to 100 * unmanaged memory only when it's not on the managed heap; when true, 101 * the contents will be reallocated always. 102 */ 103 this(const(void)[] data, bool forceReallocation = false) 104 { 105 if (data.length == 0) 106 contents = null; 107 else 108 if (forceReallocation || GC.addrOf(data.ptr) is null) 109 { 110 // copy to unmanaged memory 111 auto wrapper = new MemoryDataWrapper(data.length, data.length); 112 this.wrapper = wrapper; 113 wrapper.contents[] = data[]; 114 contents = wrapper.contents; 115 mutable = true; 116 } 117 else 118 { 119 // just save a reference 120 contents = data; 121 mutable = false; 122 } 123 124 assert(this.length == data.length); 125 } 126 127 /// ditto 128 this(void[] data, bool forceReallocation = false) 129 { 130 const(void)[] cdata = data; 131 this(cdata, forceReallocation); 132 mutable = true; 133 } 134 135 /// Create a new instance with given size/capacity. Capacity defaults to size. 136 this(size_t size, size_t capacity = 0) 137 in 138 { 139 assert(capacity == 0 || size <= capacity); 140 } 141 body 142 { 143 if (!capacity) 144 capacity = size; 145 146 if (capacity) 147 { 148 auto wrapper = new MemoryDataWrapper(size, capacity); 149 this.wrapper = wrapper; 150 contents = wrapper.contents; 151 mutable = true; 152 } 153 else 154 { 155 wrapper = null; 156 contents = null; 157 } 158 159 assert(this.length == size); 160 } 161 162 this(DataWrapper wrapper, bool mutable) 163 { 164 this.wrapper = wrapper; 165 this.mutable = mutable; 166 this.contents = wrapper.contents; 167 } 168 169 this(this) 170 { 171 if (wrapper) 172 { 173 wrapper.references++; 174 debug (DATA_REFCOUNT) debugLog("%p -> %p: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 175 } 176 else 177 debug (DATA_REFCOUNT) debugLog("%p -> %p: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper); 178 } 179 180 ~this() pure 181 { 182 //clear(); 183 // https://issues.dlang.org/show_bug.cgi?id=13809 184 (cast(void delegate() pure)&clear)(); 185 } 186 187 /* 188 /// Create new instance as a slice over an existing DataWrapper. 189 private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max) 190 { 191 this.wrapper = wrapper; 192 this.start = start; 193 this.end = end==size_t.max ? wrapper.capacity : end; 194 } 195 */ 196 197 @property const(void)[] contents() const 198 { 199 return _contents; 200 } 201 202 @property private const(void)[] contents(const(void)[] data) 203 { 204 return _contents = data; 205 } 206 207 /// Get mutable contents 208 @property void[] mcontents() 209 { 210 if (!mutable && length) 211 { 212 reallocate(length, length); 213 assert(mutable); 214 } 215 return cast(void[])_contents; 216 } 217 218 @property const(void)* ptr() const 219 { 220 return contents.ptr; 221 } 222 223 @property void* mptr() 224 { 225 return mcontents.ptr; 226 } 227 228 @property size_t length() const 229 { 230 return contents.length; 231 } 232 233 @property bool empty() const 234 { 235 return contents is null; 236 } 237 238 bool opCast(T)() 239 if (is(T == bool)) 240 { 241 return !empty; 242 } 243 244 @property size_t capacity() const 245 { 246 if (wrapper is null) 247 return length; 248 // We can only safely expand if the memory slice is at the end of the used unmanaged memory block. 249 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 250 auto end = pos + length; // end position in wrapper data 251 assert(end <= wrapper.size); 252 if (end == wrapper.size && end < wrapper.capacity) 253 return wrapper.capacity - pos; 254 else 255 return length; 256 } 257 258 /// Put a copy of the data on D's managed heap, and return it. 259 @property 260 void[] toHeap() const 261 { 262 return _contents.dup; 263 } 264 265 private void reallocate(size_t size, size_t capacity) 266 { 267 auto wrapper = new MemoryDataWrapper(size, capacity); 268 wrapper.contents[0..this.length] = contents[]; 269 //(cast(ubyte[])newWrapper.contents)[this.length..value] = 0; 270 271 clear(); 272 this.wrapper = wrapper; 273 this.contents = wrapper.contents; 274 mutable = true; 275 } 276 277 private void expand(size_t newSize, size_t newCapacity) 278 in 279 { 280 assert(length < newSize); 281 assert(newSize <= newCapacity); 282 } 283 out 284 { 285 assert(length == newSize); 286 } 287 body 288 { 289 if (newCapacity <= capacity) 290 { 291 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 292 wrapper.setSize(pos + newSize); 293 contents = ptr[0..newSize]; 294 } 295 else 296 reallocate(newSize, newCapacity); 297 } 298 299 @property void length(size_t value) 300 { 301 if (value == length) // no change 302 return; 303 if (value < length) // shorten 304 _contents = _contents[0..value]; 305 else // lengthen 306 expand(value, value); 307 } 308 alias length opDollar; 309 310 @property Data dup() const 311 { 312 return Data(contents, true); 313 } 314 315 /// This used to be an unsafe method which deleted the wrapped data. 316 /// Now that Data is refcounted, this simply calls clear() and 317 /// additionally asserts that this Data is the only Data holding 318 /// a reference to the wrapper. 319 void deleteContents() 320 out 321 { 322 assert(wrapper is null); 323 } 324 body 325 { 326 if (wrapper) 327 { 328 assert(wrapper.references == 1, "Attempting to call deleteContents with "); 329 clear(); 330 } 331 } 332 333 void clear() 334 { 335 if (wrapper) 336 { 337 assert(wrapper.references > 0, "Dangling pointer to wrapper"); 338 wrapper.references--; 339 debug (DATA_REFCOUNT) debugLog("%p -> %p: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 340 if (wrapper.references == 0) 341 wrapper.destroy(); 342 343 wrapper = null; 344 } 345 346 contents = null; 347 } 348 349 Data concat(const(void)[] data) 350 { 351 if (data.length==0) 352 return this; 353 Data result = Data(length + data.length); 354 result.mcontents[0..this.length] = contents[]; 355 result.mcontents[this.length..$] = data[]; 356 return result; 357 } 358 359 Data opCat(T)(const(T)[] data) 360 if (!hasIndirections!T) 361 { 362 return concat(data); 363 } 364 365 Data opCat()(Data data) 366 { 367 return concat(data.contents); 368 } 369 370 Data prepend(const(void)[] data) 371 { 372 Data result = Data(data.length + length); 373 result.mcontents[0..data.length] = data[]; 374 result.mcontents[data.length..$] = contents[]; 375 return result; 376 } 377 378 Data opCat_r(T)(const(T)[] data) 379 if (!hasIndirections!T) 380 { 381 return prepend(data); 382 } 383 384 private static size_t getPreallocSize(size_t length) 385 { 386 if (length < MAX_PREALLOC) 387 return nextPowerOfTwo(length); 388 else 389 return ((length-1) | (MAX_PREALLOC-1)) + 1; 390 } 391 392 Data append(const(void)[] data) 393 { 394 if (data.length==0) 395 return this; 396 size_t oldLength = length; 397 size_t newLength = length + data.length; 398 expand(newLength, getPreallocSize(newLength)); 399 auto newContents = cast(void[])_contents[oldLength..$]; 400 newContents[] = (cast(void[])data)[]; 401 return this; 402 } 403 404 /// Note that unlike opCat (a ~ b), opCatAssign (a ~= b) will preallocate. 405 Data opCatAssign(T)(const(T)[] data) 406 if (!hasIndirections!T) 407 { 408 return append(data); 409 } 410 411 Data opCatAssign()(Data data) 412 { 413 return append(data.contents); 414 } 415 416 Data opCatAssign()(ubyte value) // hack? 417 { 418 return append((&value)[0..1]); 419 } 420 421 Data opSlice() 422 { 423 return this; 424 } 425 426 Data opSlice(size_t x, size_t y) 427 in 428 { 429 assert(x <= y); 430 assert(y <= length); 431 } 432 // https://issues.dlang.org/show_bug.cgi?id=13463 433 // out(result) 434 // { 435 // assert(result.length == y-x); 436 // } 437 body 438 { 439 if (x == y) 440 return Data(); 441 else 442 { 443 Data result = this; 444 result.contents = result.contents[x..y]; 445 return result; 446 } 447 } 448 449 /// Return a new Data for the first size bytes, and slice this instance from size to end. 450 Data popFront(size_t size) 451 in 452 { 453 assert(size <= length); 454 } 455 body 456 { 457 Data result = this; 458 result.contents = contents[0..size]; 459 this .contents = contents[size..$]; 460 return result; 461 } 462 } 463 464 unittest 465 { 466 Data d = Data("aaaaa"); 467 assert(d.wrapper.references == 1); 468 Data s = d[1..4]; 469 assert(d.wrapper.references == 2); 470 } 471 472 // ************************************************************************ 473 474 static /*thread-local*/ size_t dataMemory, dataMemoryPeak; 475 static /*thread-local*/ uint dataCount, allocCount; 476 477 // Abstract wrapper. 478 abstract class DataWrapper 479 { 480 sizediff_t references = 1; 481 abstract @property inout(void)[] contents() inout; 482 abstract @property size_t size() const; 483 abstract void setSize(size_t newSize); 484 abstract @property size_t capacity() const; 485 486 new(size_t sz) 487 { 488 void* p = core.stdc.stdlib.malloc(sz); 489 490 debug(DATA_REFCOUNT) debugLog("? -> %p: Allocating via malloc", p); 491 492 if (!p) 493 throw new OutOfMemoryError(); 494 495 //GC.addRange(p, sz); 496 return p; 497 } 498 499 debug ~this() 500 { 501 debug(DATA_REFCOUNT) debugLog("%.*s.~this, references==%d", this.classinfo.name.length, this.classinfo.name.ptr, references); 502 assert(references == 0, "Deleting DataWrapper with non-zero reference count"); 503 } 504 505 @nogc delete(void* p) 506 { 507 if (p) 508 { 509 debug(DATA_REFCOUNT) debugLog("? -> %p: Deleting via free", p); 510 511 //GC.removeRange(p); 512 core.stdc.stdlib.free(p); 513 } 514 } 515 } 516 517 void setGCThreshold(size_t value) { MemoryDataWrapper.collectThreshold = value; } 518 519 private: 520 521 version (Windows) 522 import core.sys.windows.windows; 523 else 524 { 525 import core.sys.posix.unistd; 526 import core.sys.posix.sys.mman; 527 } 528 529 /// Wrapper for data in RAM, allocated from the OS. 530 final class MemoryDataWrapper : DataWrapper 531 { 532 /// Pointer to actual data. 533 void* data; 534 /// Used size. Needed for safe appends. 535 size_t _size; 536 /// Allocated capacity. 537 size_t _capacity; 538 539 /// Threshold of allocated memory to trigger a collect. 540 __gshared size_t collectThreshold = 8*1024*1024; // 8MB 541 /// Counter towards the threshold. 542 static /*thread-local*/ size_t allocatedThreshold; 543 544 /// Create a new instance with given capacity. 545 this(size_t size, size_t capacity) 546 { 547 data = malloc(/*ref*/ capacity); 548 if (data is null) 549 { 550 debug(DATA) printf("Garbage collect triggered by failed Data allocation of %llu bytes... ", cast(ulong)capacity); 551 GC.collect(); 552 debug(DATA) printf("Done\n"); 553 data = malloc(/*ref*/ capacity); 554 allocatedThreshold = 0; 555 } 556 if (data is null) 557 onOutOfMemoryError(); 558 559 dataMemory += capacity; 560 if (dataMemoryPeak < dataMemory) 561 dataMemoryPeak = dataMemory; 562 dataCount ++; 563 allocCount ++; 564 565 this._size = size; 566 this._capacity = capacity; 567 568 // also collect 569 allocatedThreshold += capacity; 570 if (allocatedThreshold > collectThreshold) 571 { 572 debug(DATA) printf("Garbage collect triggered by total allocated Data exceeding threshold... "); 573 GC.collect(); 574 debug(DATA) printf("Done\n"); 575 allocatedThreshold = 0; 576 } 577 } 578 579 /// Destructor - destroys the wrapped data. 580 ~this() 581 { 582 free(data, capacity); 583 data = null; 584 // If DataWrapper is created and manually deleted, there is no need to cause frequent collections 585 if (allocatedThreshold > capacity) 586 allocatedThreshold -= capacity; 587 else 588 allocatedThreshold = 0; 589 590 dataMemory -= capacity; 591 dataCount --; 592 } 593 594 @property override 595 size_t size() const { return _size; } 596 597 @property override 598 size_t capacity() const { return _capacity; } 599 600 override void setSize(size_t newSize) 601 { 602 assert(newSize <= capacity); 603 _size = newSize; 604 } 605 606 @property override 607 inout(void)[] contents() inout 608 { 609 return data[0..size]; 610 } 611 612 // https://github.com/D-Programming-Language/druntime/pull/759 613 version(OSX) 614 enum _SC_PAGE_SIZE = 29; 615 616 // https://github.com/D-Programming-Language/druntime/pull/1140 617 version(FreeBSD) 618 enum _SC_PAGE_SIZE = 47; 619 620 version(Windows) 621 { 622 static immutable size_t pageSize; 623 624 shared static this() 625 { 626 SYSTEM_INFO si; 627 GetSystemInfo(&si); 628 pageSize = si.dwPageSize; 629 } 630 } 631 else 632 static if (is(typeof(_SC_PAGE_SIZE))) 633 { 634 static immutable size_t pageSize; 635 636 shared static this() 637 { 638 pageSize = sysconf(_SC_PAGE_SIZE); 639 } 640 } 641 642 static void* malloc(ref size_t size) 643 { 644 if (is(typeof(pageSize))) 645 size = ((size-1) | (pageSize-1))+1; 646 647 version(Windows) 648 { 649 return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE); 650 } 651 else 652 version(Posix) 653 { 654 version(linux) 655 import core.sys.linux.sys.mman : MAP_ANON; 656 auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 657 return (p == MAP_FAILED) ? null : p; 658 } 659 else 660 return core.stdc.malloc(size); 661 } 662 663 static void free(void* p, size_t size) 664 { 665 debug 666 { 667 (cast(ubyte*)p)[0..size] = 0xDB; 668 } 669 version(Windows) 670 VirtualFree(p, 0, MEM_RELEASE); 671 else 672 version(Posix) 673 munmap(p, size); 674 else 675 core.stdc.free(size); 676 } 677 } 678 679 // Source: Win32 bindings project 680 version(Windows) 681 { 682 struct SYSTEM_INFO { 683 union { 684 DWORD dwOemId; 685 struct { 686 WORD wProcessorArchitecture; 687 WORD wReserved; 688 } 689 } 690 DWORD dwPageSize; 691 PVOID lpMinimumApplicationAddress; 692 PVOID lpMaximumApplicationAddress; 693 DWORD dwActiveProcessorMask; 694 DWORD dwNumberOfProcessors; 695 DWORD dwProcessorType; 696 DWORD dwAllocationGranularity; 697 WORD wProcessorLevel; 698 WORD wProcessorRevision; 699 } 700 alias SYSTEM_INFO* LPSYSTEM_INFO; 701 702 extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO); 703 } 704 705 debug(DATA_REFCOUNT) import ae.utils.exception, ae.sys.memory, std.stdio; 706 707 debug(DATA_REFCOUNT) void debugLog(Args...)(const char* s, Args args) @nogc 708 { 709 printf(s, args); 710 printf("\n"); 711 if (inCollect()) 712 printf("\t(in GC collect)\n"); 713 else 714 (cast(void function() @nogc)&debugStackTrace)(); 715 fflush(core.stdc.stdio.stdout); 716 } 717 718 debug(DATA_REFCOUNT) void debugStackTrace() 719 { 720 try 721 foreach (line; getStackTrace()) 722 writeln("\t", line); 723 catch (Throwable e) 724 writeln("\t(stacktrace error: ", e.msg, ")"); 725 }