1 /** 2 * Wrappers for raw _data located in unmanaged memory. 3 * 4 * Using the Data type will only place a small object in managed memory, 5 * keeping the actual _data in unmanaged memory. 6 * A proxy class (DataWrapper) is used to safely allow multiple references to 7 * the same block of unmanaged memory. 8 * When the DataWrapper object is destroyed (either manually or by the garbage 9 * collector when there are no remaining Data references), the unmanaged 10 * memory is deallocated. 11 * 12 * This has the following advantage over using managed memory: 13 * $(UL 14 * $(LI Faster allocation and deallocation, since memory is requested from 15 * the OS directly as whole pages) 16 * $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to 17 * stray pointers) 18 * $(LI Overall improved GC performance due to reduced size of managed heap) 19 * $(LI Memory is immediately returned to the OS when _data is deallocated) 20 * ) 21 * On the other hand, using Data has the following disadvantages: 22 * $(UL 23 * $(LI This module is designed to store raw _data which does not have any 24 * pointers. Storing objects containing pointers to managed memory is 25 * unsupported, and may result in memory corruption.) 26 * $(LI Small objects may be stored inefficiently, as the module requests 27 * entire pages of memory from the OS. Considering allocating one large 28 * object and use slices (Data instances) for individual objects.) 29 * ) 30 * 31 * License: 32 * This Source Code Form is subject to the terms of 33 * the Mozilla Public License, v. 2.0. If a copy of 34 * the MPL was not distributed with this file, You 35 * can obtain one at http://mozilla.org/MPL/2.0/. 36 * 37 * Authors: 38 * Vladimir Panteleev <vladimir@thecybershadow.net> 39 */ 40 41 module ae.sys.data; 42 43 static import core.stdc.stdlib; 44 import core.stdc.string : memmove; 45 import std.traits; 46 import core.memory; 47 import core.exception; 48 debug import std.stdio; 49 debug import std.string; 50 public import ae.sys.dataset; 51 import ae.utils.math; 52 53 debug(DATA) import core.stdc.stdio; 54 55 // ideas/todo: 56 // * templatize (and forbid using aliased types)? 57 // * use heap (malloc/Windows heap API) for small objects? 58 // * reference counting? 59 // * "immutable" support? 60 61 /** 62 * Wrapper for data located in external memory, to prevent faux references. 63 * Represents a slice of data, which may or may not be in unmanaged memory. 64 * Data in unmanaged memory is bound to a DataWrapper class instance. 65 * 66 * All operations on this class should be safe, except for accessing contents directly. 67 * All operations on contents must be accompanied by a live reference to the Data object, 68 * to keep a GC anchor towards the unmanaged data. 69 * 70 * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead. 71 * 72 * Be sure not to lose Data references while using their contents! 73 * For example, avoid code like this: 74 * ---- 75 * fun(cast(string)transformSomeData(someData).contents); 76 * ---- 77 * The Data return value may be unreachable once .contents is evaluated. 78 * Use .toHeap instead of .contents in such cases to get a safe heap copy. 79 */ 80 struct Data 81 { 82 private: 83 /// Wrapped data 84 const(void)[] _contents; 85 /// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory. 86 /// Used as a GC anchor to unmanaged data, and for in-place expands (for appends). 87 DataWrapper wrapper; 88 /// Indicates whether we're allowed to modify the data contents. 89 bool mutable; 90 91 /// Maximum preallocation for append operations. 92 enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2 93 94 public: 95 /** 96 * Create new instance wrapping the given data. 97 * Params: 98 * data = initial data 99 * forceReallocation = when false, the contents will be duplicated to 100 * unmanaged memory only when it's not on the managed heap; when true, 101 * the contents will be reallocated always. 102 */ 103 this(const(void)[] data, bool forceReallocation = false) 104 { 105 if (data.length == 0) 106 contents = null; 107 else 108 if (forceReallocation || GC.addrOf(data.ptr) is null) 109 { 110 // copy to unmanaged memory 111 auto wrapper = new MemoryDataWrapper(data.length, data.length); 112 this.wrapper = wrapper; 113 wrapper.contents[] = data[]; 114 contents = wrapper.contents; 115 mutable = true; 116 } 117 else 118 { 119 // just save a reference 120 contents = data; 121 mutable = false; 122 } 123 124 assert(this.length == data.length); 125 } 126 127 /// ditto 128 this(void[] data, bool forceReallocation = false) 129 { 130 const(void)[] cdata = data; 131 this(cdata, forceReallocation); 132 mutable = true; 133 } 134 135 /// Create a new instance with given size/capacity. Capacity defaults to size. 136 this(size_t size, size_t capacity = 0) 137 in 138 { 139 assert(capacity == 0 || size <= capacity); 140 } 141 body 142 { 143 if (!capacity) 144 capacity = size; 145 146 if (capacity) 147 { 148 auto wrapper = new MemoryDataWrapper(size, capacity); 149 this.wrapper = wrapper; 150 contents = wrapper.contents; 151 mutable = true; 152 } 153 else 154 { 155 wrapper = null; 156 contents = null; 157 } 158 159 assert(this.length == size); 160 } 161 162 this(DataWrapper wrapper, bool mutable) 163 { 164 this.wrapper = wrapper; 165 this.mutable = mutable; 166 this.contents = wrapper.contents; 167 } 168 169 this(this) 170 { 171 if (wrapper) 172 { 173 wrapper.references++; 174 debug (DATA_REFCOUNT) debugLog("%p -> %p: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 175 } 176 else 177 debug (DATA_REFCOUNT) debugLog("%p -> %p: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper); 178 } 179 180 ~this() pure 181 { 182 //clear(); 183 // https://issues.dlang.org/show_bug.cgi?id=13809 184 (cast(void delegate() pure)&clear)(); 185 } 186 187 /* 188 /// Create new instance as a slice over an existing DataWrapper. 189 private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max) 190 { 191 this.wrapper = wrapper; 192 this.start = start; 193 this.end = end==size_t.max ? wrapper.capacity : end; 194 } 195 */ 196 197 @property const(void)[] contents() const 198 { 199 return _contents; 200 } 201 202 @property private const(void)[] contents(const(void)[] data) 203 { 204 return _contents = data; 205 } 206 207 /// Get mutable contents 208 @property void[] mcontents() 209 { 210 if (!mutable && length) 211 { 212 reallocate(length, length); 213 assert(mutable); 214 } 215 return cast(void[])_contents; 216 } 217 218 @property const(void)* ptr() const 219 { 220 return contents.ptr; 221 } 222 223 @property void* mptr() 224 { 225 return mcontents.ptr; 226 } 227 228 @property size_t length() const 229 { 230 return contents.length; 231 } 232 233 @property bool empty() const 234 { 235 return contents is null; 236 } 237 238 bool opCast(T)() 239 if (is(T == bool)) 240 { 241 return !empty; 242 } 243 244 @property size_t capacity() const 245 { 246 if (wrapper is null) 247 return length; 248 // We can only safely expand if the memory slice is at the end of the used unmanaged memory block. 249 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 250 auto end = pos + length; // end position in wrapper data 251 assert(end <= wrapper.size); 252 if (end == wrapper.size && end < wrapper.capacity) 253 return wrapper.capacity - pos; 254 else 255 return length; 256 } 257 258 /// Put a copy of the data on D's managed heap, and return it. 259 @property 260 void[] toHeap() const 261 { 262 return _contents.dup; 263 } 264 265 private void reallocate(size_t size, size_t capacity) 266 { 267 auto wrapper = new MemoryDataWrapper(size, capacity); 268 wrapper.contents[0..this.length] = contents[]; 269 //(cast(ubyte[])newWrapper.contents)[this.length..value] = 0; 270 271 clear(); 272 this.wrapper = wrapper; 273 this.contents = wrapper.contents; 274 mutable = true; 275 } 276 277 private void expand(size_t newSize, size_t newCapacity) 278 in 279 { 280 assert(length < newSize); 281 assert(newSize <= newCapacity); 282 } 283 out 284 { 285 assert(length == newSize); 286 } 287 body 288 { 289 if (newCapacity <= capacity) 290 { 291 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 292 wrapper.setSize(pos + newSize); 293 contents = ptr[0..newSize]; 294 } 295 else 296 reallocate(newSize, newCapacity); 297 } 298 299 @property void length(size_t value) 300 { 301 if (value == length) // no change 302 return; 303 if (value < length) // shorten 304 _contents = _contents[0..value]; 305 else // lengthen 306 expand(value, value); 307 } 308 alias length opDollar; 309 310 @property Data dup() const 311 { 312 return Data(contents, true); 313 } 314 315 /// This used to be an unsafe method which deleted the wrapped data. 316 /// Now that Data is refcounted, this simply calls clear() and 317 /// additionally asserts that this Data is the only Data holding 318 /// a reference to the wrapper. 319 void deleteContents() 320 out 321 { 322 assert(wrapper is null); 323 } 324 body 325 { 326 if (wrapper) 327 { 328 assert(wrapper.references == 1, "Attempting to call deleteContents with "); 329 clear(); 330 } 331 } 332 333 void clear() 334 { 335 if (wrapper) 336 { 337 assert(wrapper.references > 0, "Dangling pointer to wrapper"); 338 wrapper.references--; 339 debug (DATA_REFCOUNT) debugLog("%p -> %p: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references); 340 if (wrapper.references == 0) 341 delete wrapper; 342 343 wrapper = null; 344 } 345 346 contents = null; 347 } 348 349 Data concat(const(void)[] data) 350 { 351 if (data.length==0) 352 return this; 353 Data result = Data(length + data.length); 354 result.mcontents[0..this.length] = contents[]; 355 result.mcontents[this.length..$] = data[]; 356 return result; 357 } 358 359 Data opCat(T)(const(T)[] data) 360 if (!hasIndirections!T) 361 { 362 return concat(data); 363 } 364 365 Data opCat()(Data data) 366 { 367 return concat(data.contents); 368 } 369 370 Data prepend(const(void)[] data) 371 { 372 Data result = Data(data.length + length); 373 result.mcontents[0..data.length] = data[]; 374 result.mcontents[data.length..$] = contents[]; 375 return result; 376 } 377 378 Data opCat_r(T)(const(T)[] data) 379 if (!hasIndirections!T) 380 { 381 return prepend(data); 382 } 383 384 private static size_t getPreallocSize(size_t length) 385 { 386 if (length < MAX_PREALLOC) 387 return nextPowerOfTwo(length); 388 else 389 return ((length-1) | (MAX_PREALLOC-1)) + 1; 390 } 391 392 Data append(const(void)[] data) 393 { 394 if (data.length==0) 395 return this; 396 size_t oldLength = length; 397 size_t newLength = length + data.length; 398 expand(newLength, getPreallocSize(newLength)); 399 auto newContents = cast(void[])_contents[oldLength..$]; 400 newContents[] = (cast(void[])data)[]; 401 return this; 402 } 403 404 /// Note that unlike opCat (a ~ b), opCatAssign (a ~= b) will preallocate. 405 Data opCatAssign(T)(const(T)[] data) 406 if (!hasIndirections!T) 407 { 408 return append(data); 409 } 410 411 Data opCatAssign()(Data data) 412 { 413 return append(data.contents); 414 } 415 416 Data opCatAssign()(ubyte value) // hack? 417 { 418 return append((&value)[0..1]); 419 } 420 421 Data opSlice() 422 { 423 return this; 424 } 425 426 Data opSlice(size_t x, size_t y) 427 in 428 { 429 assert(x <= y); 430 assert(y <= length); 431 } 432 // https://issues.dlang.org/show_bug.cgi?id=13463 433 // out(result) 434 // { 435 // assert(result.length == y-x); 436 // } 437 body 438 { 439 if (x == y) 440 return Data(); 441 else 442 { 443 Data result = this; 444 result.contents = result.contents[x..y]; 445 return result; 446 } 447 } 448 449 /// Return a new Data for the first size bytes, and slice this instance from size to end. 450 Data popFront(size_t size) 451 in 452 { 453 assert(size <= length); 454 } 455 body 456 { 457 Data result = this; 458 result.contents = contents[0..size]; 459 this .contents = contents[size..$]; 460 return result; 461 } 462 } 463 464 unittest 465 { 466 Data d = Data("aaaaa"); 467 assert(d.wrapper.references == 1); 468 Data s = d[1..4]; 469 assert(d.wrapper.references == 2); 470 } 471 472 // ************************************************************************ 473 474 static /*thread-local*/ size_t dataMemory, dataMemoryPeak; 475 static /*thread-local*/ uint dataCount, allocCount; 476 477 // Abstract wrapper. 478 abstract class DataWrapper 479 { 480 sizediff_t references = 1; 481 abstract @property inout(void)[] contents() inout; 482 abstract @property size_t size() const; 483 abstract void setSize(size_t newSize); 484 abstract @property size_t capacity() const; 485 486 new(size_t sz) 487 { 488 void* p = core.stdc.stdlib.malloc(sz); 489 490 debug(DATA_REFCOUNT) debugLog("? -> %p: Allocating via malloc", p); 491 492 if (!p) 493 throw new OutOfMemoryError(); 494 495 //GC.addRange(p, sz); 496 return p; 497 } 498 499 debug ~this() 500 { 501 debug(DATA_REFCOUNT) debugLog("%.*s.~this, references==%d", this.classinfo.name.length, this.classinfo.name.ptr, references); 502 assert(references == 0, "Deleting DataWrapper with non-zero reference count"); 503 } 504 505 @nogc delete(void* p) 506 { 507 if (p) 508 { 509 debug(DATA_REFCOUNT) debugLog("? -> %p: Deleting via free", p); 510 511 //GC.removeRange(p); 512 core.stdc.stdlib.free(p); 513 } 514 } 515 } 516 517 private: 518 519 version (Windows) 520 import core.sys.windows.windows; 521 else 522 { 523 import core.sys.posix.unistd; 524 import core.sys.posix.sys.mman; 525 } 526 527 /// Wrapper for data in RAM, allocated from the OS. 528 final class MemoryDataWrapper : DataWrapper 529 { 530 /// Pointer to actual data. 531 void* data; 532 /// Used size. Needed for safe appends. 533 size_t _size; 534 /// Allocated capacity. 535 size_t _capacity; 536 537 /// Threshold of allocated memory to trigger a collect. 538 enum { COLLECT_THRESHOLD = 8*1024*1024 } // 8MB 539 /// Counter towards the threshold. 540 static /*thread-local*/ size_t allocatedThreshold; 541 542 /// Create a new instance with given capacity. 543 this(size_t size, size_t capacity) 544 { 545 data = malloc(/*ref*/ capacity); 546 if (data is null) 547 { 548 debug(DATA) printf("Garbage collect triggered by failed Data allocation of %llu bytes... ", cast(ulong)capacity); 549 GC.collect(); 550 debug(DATA) printf("Done\n"); 551 data = malloc(/*ref*/ capacity); 552 allocatedThreshold = 0; 553 } 554 if (data is null) 555 onOutOfMemoryError(); 556 557 dataMemory += capacity; 558 if (dataMemoryPeak < dataMemory) 559 dataMemoryPeak = dataMemory; 560 dataCount ++; 561 allocCount ++; 562 563 this._size = size; 564 this._capacity = capacity; 565 566 // also collect 567 allocatedThreshold += capacity; 568 if (allocatedThreshold > COLLECT_THRESHOLD) 569 { 570 debug(DATA) printf("Garbage collect triggered by total allocated Data exceeding threshold... "); 571 GC.collect(); 572 debug(DATA) printf("Done\n"); 573 allocatedThreshold = 0; 574 } 575 } 576 577 /// Destructor - destroys the wrapped data. 578 ~this() 579 { 580 free(data, capacity); 581 data = null; 582 // If DataWrapper is created and manually deleted, there is no need to cause frequent collections 583 if (allocatedThreshold > capacity) 584 allocatedThreshold -= capacity; 585 else 586 allocatedThreshold = 0; 587 588 dataMemory -= capacity; 589 dataCount --; 590 } 591 592 @property override 593 size_t size() const { return _size; } 594 595 @property override 596 size_t capacity() const { return _capacity; } 597 598 override void setSize(size_t newSize) 599 { 600 assert(newSize <= capacity); 601 _size = newSize; 602 } 603 604 @property override 605 inout(void)[] contents() inout 606 { 607 return data[0..size]; 608 } 609 610 // https://github.com/D-Programming-Language/druntime/pull/759 611 version(OSX) 612 enum _SC_PAGE_SIZE = 29; 613 614 // https://github.com/D-Programming-Language/druntime/pull/1140 615 version(FreeBSD) 616 enum _SC_PAGE_SIZE = 47; 617 618 version(Windows) 619 { 620 static immutable size_t pageSize; 621 622 shared static this() 623 { 624 SYSTEM_INFO si; 625 GetSystemInfo(&si); 626 pageSize = si.dwPageSize; 627 } 628 } 629 else 630 static if (is(typeof(_SC_PAGE_SIZE))) 631 { 632 static immutable size_t pageSize; 633 634 shared static this() 635 { 636 pageSize = sysconf(_SC_PAGE_SIZE); 637 } 638 } 639 640 static void* malloc(ref size_t size) 641 { 642 if (is(typeof(pageSize))) 643 size = ((size-1) | (pageSize-1))+1; 644 645 version(Windows) 646 { 647 return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE); 648 } 649 else 650 version(Posix) 651 { 652 version(linux) 653 import core.sys.linux.sys.mman : MAP_ANON; 654 auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 655 return (p == MAP_FAILED) ? null : p; 656 } 657 else 658 return core.stdc.malloc(size); 659 } 660 661 static void free(void* p, size_t size) 662 { 663 debug 664 { 665 (cast(ubyte*)p)[0..size] = 0xDB; 666 } 667 version(Windows) 668 VirtualFree(p, 0, MEM_RELEASE); 669 else 670 version(Posix) 671 munmap(p, size); 672 else 673 core.stdc.free(size); 674 } 675 } 676 677 // Source: Win32 bindings project 678 version(Windows) 679 { 680 struct SYSTEM_INFO { 681 union { 682 DWORD dwOemId; 683 struct { 684 WORD wProcessorArchitecture; 685 WORD wReserved; 686 } 687 } 688 DWORD dwPageSize; 689 PVOID lpMinimumApplicationAddress; 690 PVOID lpMaximumApplicationAddress; 691 DWORD dwActiveProcessorMask; 692 DWORD dwNumberOfProcessors; 693 DWORD dwProcessorType; 694 DWORD dwAllocationGranularity; 695 WORD wProcessorLevel; 696 WORD wProcessorRevision; 697 } 698 alias SYSTEM_INFO* LPSYSTEM_INFO; 699 700 extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO); 701 } 702 703 debug(DATA_REFCOUNT) import ae.utils.exception, ae.sys.memory, std.stdio; 704 705 debug(DATA_REFCOUNT) void debugLog(Args...)(const char* s, Args args) @nogc 706 { 707 printf(s, args); 708 printf("\n"); 709 if (inCollect()) 710 printf("\t(in GC collect)\n"); 711 else 712 (cast(void function() @nogc)&debugStackTrace)(); 713 fflush(core.stdc.stdio.stdout); 714 } 715 716 debug(DATA_REFCOUNT) void debugStackTrace() 717 { 718 try 719 foreach (line; getStackTrace()) 720 writeln("\t", line); 721 catch (Throwable e) 722 writeln("\t(stacktrace error: ", e.msg, ")"); 723 }