1 /** 2 * Wrappers for raw _data located in unmanaged memory. 3 * 4 * Using the Data type will only place a small object in managed memory, 5 * keeping the actual _data in unmanaged memory. 6 * A proxy class (DataWrapper) is used to safely allow multiple references to 7 * the same block of unmanaged memory. 8 * When the DataWrapper object is destroyed (either manually or by the garbage 9 * collector when there are no remaining Data references), the unmanaged 10 * memory is deallocated. 11 * 12 * This has the following advantage over using managed memory: 13 * $(UL 14 * $(LI Faster allocation and deallocation, since memory is requested from 15 * the OS directly as whole pages) 16 * $(LI Greatly reduced chance of memory leaks (on 32-bit platforms) due to 17 * stray pointers) 18 * $(LI Overall improved GC performance due to reduced size of managed heap) 19 * $(LI Memory is immediately returned to the OS when _data is deallocated) 20 * ) 21 * On the other hand, using Data has the following disadvantages: 22 * $(UL 23 * $(LI This module is designed to store raw _data which does not have any 24 * pointers. Storing objects containing pointers to managed memory is 25 * unsupported, and may result in memory corruption.) 26 * $(LI Small objects may be stored inefficiently, as the module requests 27 * entire pages of memory from the OS. Considering allocating one large 28 * object and use slices (Data instances) for individual objects.) 29 * ) 30 * 31 * License: 32 * This Source Code Form is subject to the terms of 33 * the Mozilla Public License, v. 2.0. If a copy of 34 * the MPL was not distributed with this file, You 35 * can obtain one at http://mozilla.org/MPL/2.0/. 36 * 37 * Authors: 38 * Vladimir Panteleev <vladimir@thecybershadow.net> 39 */ 40 41 module ae.sys.data; 42 43 static import core.stdc.stdlib; 44 import core.stdc.string : memmove; 45 import std.traits; 46 import core.memory; 47 import core.exception; 48 debug import std.stdio; 49 debug import std.string; 50 public import ae.sys.dataset; 51 import ae.utils.math; 52 53 // ideas/todo: 54 // * templatize (and forbid using aliased types)? 55 // * use heap (malloc/Windows heap API) for small objects? 56 // * reference counting? 57 // * "immutable" support? 58 59 /** 60 * Wrapper for data located in external memory, to prevent faux references. 61 * Represents a slice of data, which may or may not be in unmanaged memory. 62 * Data in unmanaged memory is bound to a DataWrapper class instance. 63 * 64 * All operations on this class should be safe, except for accessing contents directly. 65 * All operations on contents must be accompanied by a live reference to the Data object, 66 * to keep a GC anchor towards the unmanaged data. 67 * 68 * Concatenations and appends to Data contents will cause reallocations on the heap, consider using Data instead. 69 * 70 * Be sure not to lose Data references while using their contents! 71 * For example, avoid code like this: 72 * ---- 73 * fun(cast(string)transformSomeData(someData).contents); 74 * ---- 75 * The Data return value may be unreachable once .contents is evaluated. 76 * Use .toHeap instead of .contents in such cases to get a safe heap copy. 77 */ 78 struct Data 79 { 80 private: 81 /// Wrapped data 82 const(void)[] _contents; 83 /// Reference to the wrapper of the actual data - may be null to indicate wrapped data in managed memory. 84 /// Used as a GC anchor to unmanaged data, and for in-place expands (for appends). 85 DataWrapper wrapper; 86 /// Indicates whether we're allowed to modify the data contents. 87 bool mutable; 88 89 /// Maximum preallocation for append operations. 90 enum { MAX_PREALLOC = 4*1024*1024 } // must be power of 2 91 92 public: 93 /** 94 * Create new instance wrapping the given data. 95 * Params: 96 * data = initial data 97 * forceReallocation = when false, the contents will be duplicated to 98 * unmanaged memory only when it's not on the managed heap; when true, 99 * the contents will be reallocated always. 100 */ 101 this(const(void)[] data, bool forceReallocation = false) 102 { 103 if (data.length == 0) 104 contents = null; 105 else 106 if (forceReallocation || GC.addrOf(data.ptr) is null) 107 { 108 // copy to unmanaged memory 109 auto wrapper = new MemoryDataWrapper(data.length, data.length); 110 this.wrapper = wrapper; 111 wrapper.contents[] = data[]; 112 contents = wrapper.contents; 113 mutable = true; 114 } 115 else 116 { 117 // just save a reference 118 contents = data; 119 mutable = false; 120 } 121 122 assert(this.length == data.length); 123 } 124 125 /// ditto 126 this(void[] data, bool forceReallocation = false) 127 { 128 const(void)[] cdata = data; 129 this(cdata, forceReallocation); 130 mutable = true; 131 } 132 133 /// Create a new instance with given size/capacity. Capacity defaults to size. 134 this(size_t size = 0, size_t capacity = 0) 135 in 136 { 137 assert(capacity == 0 || size <= capacity); 138 } 139 body 140 { 141 if (!capacity) 142 capacity = size; 143 144 if (capacity) 145 { 146 auto wrapper = new MemoryDataWrapper(size, capacity); 147 this.wrapper = wrapper; 148 contents = wrapper.contents; 149 mutable = true; 150 } 151 else 152 { 153 wrapper = null; 154 contents = null; 155 } 156 157 assert(this.length == size); 158 } 159 160 this(DataWrapper wrapper, bool mutable) 161 { 162 this.wrapper = wrapper; 163 this.mutable = mutable; 164 this.contents = wrapper.contents; 165 } 166 167 this(this) 168 { 169 if (wrapper) 170 { 171 wrapper.references++; 172 debug (DATA_REFCOUNT) debugLog(format("%s -> %s: Incrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references)); 173 } 174 else 175 debug (DATA_REFCOUNT) debugLog(format("%s -> %s: this(this) with no wrapper", cast(void*)&this, cast(void*)wrapper)); 176 } 177 178 ~this() pure 179 { 180 //clear(); 181 // https://issues.dlang.org/show_bug.cgi?id=13809 182 (cast(void delegate() pure)&clear)(); 183 } 184 185 /* 186 /// Create new instance as a slice over an existing DataWrapper. 187 private this(DataWrapper wrapper, size_t start = 0, size_t end = size_t.max) 188 { 189 this.wrapper = wrapper; 190 this.start = start; 191 this.end = end==size_t.max ? wrapper.capacity : end; 192 } 193 */ 194 195 @property const(void)[] contents() const 196 { 197 return _contents; 198 } 199 200 @property private const(void)[] contents(const(void)[] data) 201 { 202 return _contents = data; 203 } 204 205 /// Get mutable contents 206 @property void[] mcontents() 207 { 208 if (!mutable && length) 209 { 210 reallocate(length, length); 211 assert(mutable); 212 } 213 return cast(void[])_contents; 214 } 215 216 @property const(void)* ptr() const 217 { 218 return contents.ptr; 219 } 220 221 @property void* mptr() 222 { 223 return mcontents.ptr; 224 } 225 226 @property size_t length() const 227 { 228 return contents.length; 229 } 230 231 @property bool empty() const 232 { 233 return contents is null; 234 } 235 236 bool opCast(T)() 237 if (is(T == bool)) 238 { 239 return !empty; 240 } 241 242 @property size_t capacity() const 243 { 244 if (wrapper is null) 245 return length; 246 // We can only safely expand if the memory slice is at the end of the used unmanaged memory block. 247 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 248 auto end = pos + length; // end position in wrapper data 249 assert(end <= wrapper.size); 250 if (end == wrapper.size && end < wrapper.capacity) 251 return wrapper.capacity - pos; 252 else 253 return length; 254 } 255 256 /// Put a copy of the data on D's managed heap, and return it. 257 @property 258 void[] toHeap() const 259 { 260 return _contents.dup; 261 } 262 263 private void reallocate(size_t size, size_t capacity) 264 { 265 auto wrapper = new MemoryDataWrapper(size, capacity); 266 wrapper.contents[0..this.length] = contents[]; 267 //(cast(ubyte[])newWrapper.contents)[this.length..value] = 0; 268 269 clear(); 270 this.wrapper = wrapper; 271 this.contents = wrapper.contents; 272 mutable = true; 273 } 274 275 private void expand(size_t newSize, size_t newCapacity) 276 in 277 { 278 assert(length < newSize); 279 assert(newSize <= newCapacity); 280 } 281 out 282 { 283 assert(length == newSize); 284 } 285 body 286 { 287 if (newCapacity <= capacity) 288 { 289 auto pos = ptr - wrapper.contents.ptr; // start position in wrapper data 290 wrapper.setSize(pos + newSize); 291 contents = ptr[0..newSize]; 292 } 293 else 294 reallocate(newSize, newCapacity); 295 } 296 297 @property void length(size_t value) 298 { 299 if (value == length) // no change 300 return; 301 if (value < length) // shorten 302 _contents = _contents[0..value]; 303 else // lengthen 304 expand(value, value); 305 } 306 alias length opDollar; 307 308 @property Data dup() const 309 { 310 return Data(contents, true); 311 } 312 313 /// UNSAFE! Use only when you know there is only one reference to the data. 314 void deleteContents() 315 out 316 { 317 assert(wrapper is null); 318 } 319 body 320 { 321 delete wrapper; 322 contents = null; 323 } 324 325 void clear() 326 { 327 if (wrapper) 328 { 329 assert(wrapper.references > 0, "Dangling pointer to wrapper"); 330 wrapper.references--; 331 debug (DATA_REFCOUNT) debugLog(format("%s -> %s: Decrementing refcount to %d", cast(void*)&this, cast(void*)wrapper, wrapper.references)); 332 if (wrapper.references == 0) 333 destroy(wrapper); 334 } 335 336 wrapper = null; 337 contents = null; 338 } 339 340 Data concat(const(void)[] data) 341 { 342 if (data.length==0) 343 return this; 344 Data result = Data(length + data.length); 345 result.mcontents[0..this.length] = contents[]; 346 result.mcontents[this.length..$] = data[]; 347 return result; 348 } 349 350 Data opCat(T)(const(T)[] data) 351 if (!hasIndirections!T) 352 { 353 return concat(data); 354 } 355 356 Data opCat()(Data data) 357 { 358 return concat(data.contents); 359 } 360 361 Data prepend(const(void)[] data) 362 { 363 Data result = Data(data.length + length); 364 result.mcontents[0..data.length] = data[]; 365 result.mcontents[data.length..$] = contents[]; 366 return result; 367 } 368 369 Data opCat_r(T)(const(T)[] data) 370 if (!hasIndirections!T) 371 { 372 return prepend(data); 373 } 374 375 private static size_t getPreallocSize(size_t length) 376 { 377 if (length < MAX_PREALLOC) 378 return nextPowerOfTwo(length); 379 else 380 return ((length-1) | (MAX_PREALLOC-1)) + 1; 381 } 382 383 Data append(const(void)[] data) 384 { 385 if (data.length==0) 386 return this; 387 size_t oldLength = length; 388 size_t newLength = length + data.length; 389 expand(newLength, getPreallocSize(newLength)); 390 auto newContents = cast(void[])_contents[oldLength..$]; 391 newContents[] = (cast(void[])data)[]; 392 return this; 393 } 394 395 /// Note that unlike opCat (a ~ b), opCatAssign (a ~= b) will preallocate. 396 Data opCatAssign(T)(const(T)[] data) 397 if (!hasIndirections!T) 398 { 399 return append(data); 400 } 401 402 Data opCatAssign()(Data data) 403 { 404 return append(data.contents); 405 } 406 407 Data opCatAssign()(ubyte value) // hack? 408 { 409 return append((&value)[0..1]); 410 } 411 412 Data opSlice(size_t x, size_t y) 413 in 414 { 415 assert(x <= y); 416 assert(y <= length); 417 } 418 // https://issues.dlang.org/show_bug.cgi?id=13463 419 // out(result) 420 // { 421 // assert(result.length == y-x); 422 // } 423 body 424 { 425 if (x == y) 426 return Data(); 427 else 428 { 429 Data result = this; 430 result.contents = result.contents[x..y]; 431 return result; 432 } 433 } 434 435 /// Return a new Data for the first size bytes, and slice this instance from size to end. 436 Data popFront(size_t size) 437 in 438 { 439 assert(size <= length); 440 } 441 body 442 { 443 Data result = this; 444 result.contents = contents[0..size]; 445 this .contents = contents[size..$]; 446 return result; 447 } 448 } 449 450 unittest 451 { 452 Data d = Data("aaaaa"); 453 assert(d.wrapper.references == 1); 454 Data s = d[1..4]; 455 assert(d.wrapper.references == 2); 456 } 457 458 // ************************************************************************ 459 460 static /*thread-local*/ size_t dataMemory, dataMemoryPeak; 461 static /*thread-local*/ uint dataCount, allocCount; 462 463 // Abstract wrapper. 464 abstract class DataWrapper 465 { 466 size_t references = 1; 467 abstract @property inout(void)[] contents() inout; 468 abstract @property size_t size() const; 469 abstract void setSize(size_t newSize); 470 abstract @property size_t capacity() const; 471 } 472 473 private: 474 475 version (Windows) 476 import core.sys.windows.windows; 477 else 478 { 479 import core.sys.posix.unistd; 480 import core.sys.posix.sys.mman; 481 } 482 483 /// Wrapper for data in RAM, allocated from the OS. 484 final class MemoryDataWrapper : DataWrapper 485 { 486 /// Pointer to actual data. 487 void* data; 488 /// Used size. Needed for safe appends. 489 size_t _size; 490 /// Allocated capacity. 491 size_t _capacity; 492 493 /// Threshold of allocated memory to trigger a collect. 494 enum { COLLECT_THRESHOLD = 8*1024*1024 } // 8MB 495 /// Counter towards the threshold. 496 static /*thread-local*/ size_t allocatedThreshold; 497 498 /// Create a new instance with given capacity. 499 this(size_t size, size_t capacity) 500 { 501 data = malloc(/*ref*/ capacity); 502 if (data is null) 503 { 504 debug(DATA) printf("Garbage collect triggered by failed Data allocation... "); 505 GC.collect(); 506 debug(DATA) printf("Done\n"); 507 data = malloc(/*ref*/ capacity); 508 allocatedThreshold = 0; 509 } 510 if (data is null) 511 onOutOfMemoryError(); 512 513 dataMemory += capacity; 514 if (dataMemoryPeak < dataMemory) 515 dataMemoryPeak = dataMemory; 516 dataCount ++; 517 allocCount ++; 518 519 this._size = size; 520 this._capacity = capacity; 521 522 // also collect 523 allocatedThreshold += capacity; 524 if (allocatedThreshold > COLLECT_THRESHOLD) 525 { 526 debug(DATA) printf("Garbage collect triggered by total allocated Data exceeding threshold... "); 527 GC.collect(); 528 debug(DATA) printf("Done\n"); 529 allocatedThreshold = 0; 530 } 531 } 532 533 /// Destructor - destroys the wrapped data. 534 ~this() 535 { 536 free(data, capacity); 537 data = null; 538 // If DataWrapper is created and manually deleted, there is no need to cause frequent collections 539 if (allocatedThreshold > capacity) 540 allocatedThreshold -= capacity; 541 else 542 allocatedThreshold = 0; 543 544 dataMemory -= capacity; 545 dataCount --; 546 } 547 548 @property override 549 size_t size() const { return _size; } 550 551 @property override 552 size_t capacity() const { return _capacity; } 553 554 override void setSize(size_t newSize) 555 { 556 assert(newSize <= capacity); 557 _size = newSize; 558 } 559 560 @property override 561 inout(void)[] contents() inout 562 { 563 return data[0..size]; 564 } 565 566 // https://github.com/D-Programming-Language/druntime/pull/759 567 version(OSX) 568 enum _SC_PAGE_SIZE = 29; 569 570 version(Windows) 571 { 572 static immutable size_t pageSize; 573 574 shared static this() 575 { 576 SYSTEM_INFO si; 577 GetSystemInfo(&si); 578 pageSize = si.dwPageSize; 579 } 580 } 581 else 582 static if (is(typeof(_SC_PAGE_SIZE))) 583 { 584 static immutable size_t pageSize; 585 586 shared static this() 587 { 588 pageSize = sysconf(_SC_PAGE_SIZE); 589 } 590 } 591 592 static void* malloc(ref size_t size) 593 { 594 if (is(typeof(pageSize))) 595 size = ((size-1) | (pageSize-1))+1; 596 597 version(Windows) 598 { 599 return VirtualAlloc(null, size, MEM_COMMIT, PAGE_READWRITE); 600 } 601 else 602 version(Posix) 603 { 604 version(linux) 605 import core.sys.linux.sys.mman : MAP_ANON; 606 auto p = mmap(null, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 607 return (p == MAP_FAILED) ? null : p; 608 } 609 else 610 return core.stdc.malloc(size); 611 } 612 613 static void free(void* p, size_t size) 614 { 615 debug 616 { 617 (cast(ubyte*)p)[0..size] = 0xDB; 618 } 619 version(Windows) 620 VirtualFree(p, 0, MEM_RELEASE); 621 else 622 version(Posix) 623 munmap(p, size); 624 else 625 core.stdc.free(size); 626 } 627 } 628 629 // Source: Win32 bindings project 630 version(Windows) 631 { 632 struct SYSTEM_INFO { 633 union { 634 DWORD dwOemId; 635 struct { 636 WORD wProcessorArchitecture; 637 WORD wReserved; 638 } 639 } 640 DWORD dwPageSize; 641 PVOID lpMinimumApplicationAddress; 642 PVOID lpMaximumApplicationAddress; 643 DWORD dwActiveProcessorMask; 644 DWORD dwNumberOfProcessors; 645 DWORD dwProcessorType; 646 DWORD dwAllocationGranularity; 647 WORD wProcessorLevel; 648 WORD wProcessorRevision; 649 } 650 alias SYSTEM_INFO* LPSYSTEM_INFO; 651 652 extern(Windows) VOID GetSystemInfo(LPSYSTEM_INFO); 653 } 654 655 debug(DATA_REFCOUNT) import ae.utils.exception; 656 657 debug(DATA_REFCOUNT) void debugLog(string s) 658 { 659 writeln(s); 660 foreach (line; getStackTrace()) 661 writeln("\t", line); 662 }