1 /** 2 Stuff having to do with memory management. Mostly a copy of RegionAllocator 3 for now until it gets into Phobos, as well as some RegionAllocator-specific 4 data structures. 5 6 Author: David Simcha*/ 7 /* 8 * License: 9 * Boost Software License - Version 1.0 - August 17th, 2003 10 * 11 * Permission is hereby granted, free of charge, to any person or organization 12 * obtaining a copy of the software and accompanying documentation covered by 13 * this license (the "Software") to use, reproduce, display, distribute, 14 * execute, and transmit the Software, and to prepare derivative works of the 15 * Software, and to permit third-parties to whom the Software is furnished to 16 * do so, all subject to the following: 17 * 18 * The copyright notices in the Software and this entire statement, including 19 * the above license grant, this restriction and the following disclaimer, 20 * must be included in all copies of the Software, in whole or in part, and 21 * all derivative works of the Software, unless such copies or derivative 22 * works are solely in the form of machine-executable object code generated by 23 * a source language processor. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 28 * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 29 * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 30 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 31 * DEALINGS IN THE SOFTWARE. 32 */ 33 34 module dstats.alloc; 35 36 import std.traits, core.memory, std.array, std.range, core.exception, 37 std.functional, std.math, std.algorithm : max; 38 39 static import core.stdc.stdlib; 40 import core.stdc..string : memcpy; 41 42 import dstats.base; 43 44 version(unittest) { 45 import std.stdio, std.conv, std.random, dstats.sort; 46 } 47 48 private template Appends(T, U) { 49 enum bool Appends = AppendsImpl!(T, U).ret; 50 } 51 52 private template AppendsImpl(T, U) { 53 T[] a; 54 U b; 55 enum bool ret = is(typeof(a ~= b)); 56 } 57 58 ///Appends to an array, deleting the old array if it has to be realloced. 59 void appendDelOld(T, U)(ref T[] to, U from) 60 if(Appends!(T, U)) { 61 auto old = to; 62 to ~= from; 63 if (old.ptr !is to.ptr && old.ptr !is null) delete old; 64 } 65 66 unittest { 67 uint[] foo; 68 foo.appendDelOld(5); 69 foo.appendDelOld(4); 70 foo.appendDelOld(3); 71 foo.appendDelOld(2); 72 foo.appendDelOld(1); 73 assert(foo == cast(uint[]) [5,4,3,2,1]); 74 } 75 76 private struct SHNode(K, V) { 77 alias SHNode!(K, V) SomeType; 78 SomeType* next; 79 Unqual!(K) key; 80 Unqual!(V) val; 81 } 82 83 /**Forward range struct for iterating over the keys or values of a 84 * StackHash or StackSet. The lifetime of this object must not exceed that 85 * of the underlying StackHash or StackSet.*/ 86 struct HashRange(K, S, bool vals = false) { 87 private: 88 S* set; 89 size_t index; 90 S.Node* next; 91 K* frontElem; 92 size_t _length; 93 94 this(S* set) { 95 this.set = set; 96 if(set.rNext[0] == set.usedSentinel) { 97 this.popFront; 98 } else { 99 static if(vals) { 100 frontElem = set.rVals.ptr; 101 } else { 102 frontElem = set.rKeys.ptr; 103 } 104 next = set.rNext[0]; 105 } 106 this._length = set.length; 107 } 108 109 public: 110 /// 111 void popFront() 112 in { 113 assert(!empty); 114 } body { 115 this._length--; 116 if(next is null) { 117 do { 118 index++; 119 if(index >= set.rNext.length) { 120 index = size_t.max; // Sentinel for empty. 121 return; 122 } 123 next = set.rNext[index]; 124 } while(set.rNext[index] == set.usedSentinel); 125 static if(vals) { 126 frontElem = &(set.rVals[index]); 127 } else { 128 frontElem = &(set.rKeys[index]); 129 } 130 } else { 131 static if(vals) { 132 frontElem = &(next.val); 133 } else { 134 frontElem = &(next.key); 135 } 136 next = next.next; 137 } 138 } 139 140 /// 141 static if(vals) { 142 @property ref Unqual!(K) front() 143 in { 144 assert(!empty); 145 } body { 146 return *frontElem; 147 } 148 } else { 149 @property Unqual!(K) front() 150 in { 151 assert(!empty); 152 } body { 153 return *frontElem; 154 } 155 } 156 157 /// 158 @property bool empty() { 159 return index == size_t.max; 160 } 161 162 /// 163 @property size_t length() { 164 return _length; 165 } 166 167 /// 168 @property typeof(this) save() { 169 return this; 170 } 171 } 172 173 /**A hash table that allocates its memory on RegionAllocator. Good for building a 174 * temporary hash tables that will not escape the current scope. 175 * 176 * Examples: 177 * --- 178 * auto alloc = newRegionAllocator(); 179 * auto ss = StackHash!(uint)(5, alloc); 180 * foreach(i; 0..5) { 181 * ss[i]++; 182 * } 183 * assert(ss[3] == 1); 184 * --- 185 * 186 * Warning: 187 * This implementation places removed nodes on an internal free list and 188 * recycles them, since there is no way to delete RegionAllocator-allocated data 189 * in a non-LIFO order. Therefore, you may not retain the address of a 190 * variable stored in a StackHash after deleting it from the StachHash. 191 * For example, DO NOT do this: 192 * --- 193 * SomeType* myPtr = &(myStackHash["foo"]); 194 * myStackHash.remove("foo"); 195 * *myPtr = someValue; 196 * --- 197 */ 198 struct StackHash(K, V) { 199 private: 200 alias SHNode!(K, V) Node; 201 202 // Using parallel arrays instead of structs to save on alignment overhead: 203 Unqual!(K)[] rKeys; 204 Unqual!(V)[] rVals; 205 Unqual!(Node*)[] rNext; 206 207 // Holds nodes that were deleted by remove(). 208 Node** freeList; 209 210 RegionAllocator alloc; 211 size_t _length; 212 213 // Tries to allocate off the free list. Otherwise allocates off 214 // RegionAllocator. 215 Node* allocNode() { 216 if(*freeList is null) { 217 return cast(Node*) alloc.allocate(Node.sizeof); 218 } 219 auto ret = *freeList; 220 *freeList = (*freeList).next; 221 return ret; 222 } 223 224 // Add a removed node to the free list. 225 void pushFreeList(Node* node) { 226 if(*freeList is null) { 227 node.next = null; // Sentinel 228 *freeList = node; 229 } else { 230 node.next = *freeList; 231 *freeList = node; 232 } 233 } 234 235 // rNext.ptr is stored in elements of rNext as a sentinel to indicate 236 // that the corresponding slot is unused. 237 Node* usedSentinel() @property { 238 return cast(Node*) rNext.ptr; 239 } 240 241 Node* newNode(K key) { 242 Node* ret = allocNode(); 243 ret.key = key; 244 ret.val = V.init; 245 ret.next = null; 246 return ret; 247 } 248 249 Node* newNode(K key, V val) { 250 Node* ret = allocNode(); 251 ret.key = key; 252 ret.val = val; 253 ret.next = null; 254 return ret; 255 } 256 257 hash_t getHash(K key) { 258 static if(is(K : long) && K.sizeof <= hash_t.sizeof) { 259 hash_t hash = cast(hash_t) key; 260 } else static if(__traits(compiles, key.toHash())) { 261 hash_t hash = key.toHash(); 262 } else { 263 hash_t hash = typeid(K).getHash(&key); 264 } 265 hash %= rNext.length; 266 return hash; 267 } 268 269 270 public: 271 /**Due to the nature of RegionAllocator, you must specify on object creation 272 * the approximate number of elements your table will have. Too large a 273 * number will waste space and incur poor cache performance. Too low a 274 * number will make this struct perform like a linked list. Generally, 275 * if you're building a table from some other range, some fraction of the 276 * size of that range is a good guess.*/ 277 this(size_t nElem, RegionAllocator alloc) { 278 // Obviously, the caller can never mean zero, because this struct 279 // can't work at all with nElem == 0, so assume it's a mistake and fix 280 // it here. 281 this.alloc = alloc; 282 283 if(nElem == 0) nElem++; 284 rKeys = alloc.uninitializedArray!(K[])(nElem); 285 rVals = alloc.uninitializedArray!(V[])(nElem); 286 287 // Allocate free list in same block with Node ptrs. That's what the 288 // + 1 is for. 289 rNext = alloc.uninitializedArray!(Node*[])(nElem + 1); 290 freeList = &(rNext[$ - 1]); 291 *freeList = null; 292 rNext = rNext[0..$ - 1]; 293 294 foreach(ref rKey; rKeys) { 295 rKey = K.init; 296 } 297 foreach(ref rVal; rVals) { 298 rVal = V.init; 299 } 300 foreach(ref r; rNext) { 301 r = usedSentinel; 302 } 303 304 305 } 306 307 /**Index an element of the range. If it does not exist, it will be created 308 * and initialized to V.init.*/ 309 ref V opIndex(K key) { 310 hash_t hash = getHash(key); 311 312 if(rNext[hash] == usedSentinel) { 313 rKeys[hash] = key; 314 rNext[hash] = null; 315 _length++; 316 return rVals[hash]; 317 } else if(rKeys[hash] == key) { 318 return rVals[hash]; 319 } else { // Collision. Start chaining. 320 Node** next = &(rNext[hash]); 321 while(*next !is null) { 322 if((**next).key == key) { 323 return (**next).val; 324 } 325 next = &((**next).next); 326 } 327 *next = newNode(key); 328 _length++; 329 return (**next).val; 330 } 331 } 332 333 /// 334 V opIndexAssign(V val, K key) { 335 hash_t hash = getHash(key); 336 337 if(rNext[hash] == usedSentinel) { 338 rKeys[hash] = key; 339 rVals[hash] = val; 340 rNext[hash] = null; 341 _length++; 342 return val; 343 } else if(rKeys[hash] == key) { 344 rVals[hash] = val; 345 return val; 346 } else { // Collision. Start chaining. 347 Node** next = &(rNext[hash]); 348 while(*next !is null) { 349 if((**next).key == key) { 350 (**next).val = val; 351 return val; 352 } 353 next = &((**next).next); 354 } 355 _length++; 356 *next = newNode(key, val); 357 return val; 358 } 359 } 360 361 /// 362 V* opIn_r(K key) { 363 hash_t hash = getHash(key); 364 365 if(rNext[hash] == usedSentinel) { 366 return null; 367 } else if(rKeys[hash] == key) { 368 return &(rVals[hash]); 369 } else { // Collision. Start chaining. 370 Node* next = rNext[hash]; 371 while(next !is null) { 372 if(next.key == key) { 373 return &(next.val); 374 } 375 next = next.next; 376 } 377 return null; 378 } 379 } 380 381 /// 382 void remove(K key) { 383 hash_t hash = getHash(key); 384 385 Node** next = &(rNext[hash]); 386 if(rNext[hash] == usedSentinel) { 387 return; 388 } else if(rKeys[hash] == key) { 389 _length--; 390 if(rNext[hash] is null) { 391 rKeys[hash] = K.init; 392 rVals[hash] = V.init; 393 rNext[hash] = usedSentinel; 394 return; 395 } else { 396 Node* toPush = *next; 397 398 rKeys[hash] = (**next).key; 399 rVals[hash] = (**next).val; 400 rNext[hash] = (**next).next; 401 402 pushFreeList(toPush); 403 return; 404 } 405 } else { // Collision. Start chaining. 406 while(*next !is null) { 407 if((**next).key == key) { 408 _length--; 409 410 Node* toPush = *next; 411 *next = (**next).next; 412 413 pushFreeList(toPush); 414 break; 415 } 416 next = &((**next).next); 417 } 418 return; 419 } 420 } 421 422 /**Returns a forward range to iterate over the keys of this table. 423 * The lifetime of this range must not exceed the lifetime of this 424 * StackHash.*/ 425 auto keys() { 426 return HashRange!(K, StackHash!(K, V))(&this); 427 } 428 429 /**Returns a forward range to iterate over the values of this table. 430 * The lifetime of this range must not exceed the lifetime of this 431 * StackHash.*/ 432 auto values() { 433 return HashRange!(V, StackHash!(K, V), true)(&this); 434 } 435 436 /// 437 @property size_t length() const { 438 return _length; 439 } 440 441 /** 442 Attempt to look up a key and return a default value if the key is not 443 present. 444 */ 445 V get(K key, lazy V defaultValue) { 446 auto ptr = key in this; 447 if(ptr) return *ptr; 448 return defaultValue; 449 } 450 451 int opApply(int delegate(ref K, ref V) dg) { 452 auto k = this.keys; 453 auto v = this.values; 454 int res; 455 456 while(!k.empty) { 457 auto kFront = k.front; 458 res = dg(kFront, v.front); 459 k.popFront; 460 v.popFront; 461 if(res) { 462 break; 463 } 464 } 465 466 return res; 467 } 468 469 real efficiency() { 470 uint used = 0; 471 foreach(root; rNext) { 472 if(root != usedSentinel) { 473 used++; 474 } 475 } 476 return cast(real) used / rNext.length; 477 } 478 } 479 480 unittest { 481 alias StackHash!(string, uint) mySh; 482 483 { // Basic sanity checks. 484 auto alloc = newRegionAllocator(); 485 auto data = mySh(2, alloc); // Make sure we get some collisions. 486 data["foo"] = 1; 487 data["bar"] = 2; 488 data["baz"] = 3; 489 data["waldo"] = 4; 490 assert(!("foobar" in data)); 491 assert(*("foo" in data) == 1); 492 assert(*("bar" in data) == 2); 493 assert(*("baz" in data) == 3); 494 assert(*("waldo" in data) == 4); 495 assert(data["foo"] == 1); 496 assert(data["bar"] == 2); 497 assert(data["baz"] == 3); 498 assert(data["waldo"] == 4); 499 auto myKeys = array(data.keys); 500 qsort(myKeys); 501 assert(myKeys == cast(string[]) ["bar", "baz", "foo", "waldo"]); 502 auto myValues = array(data.values); 503 qsort(myValues); 504 assert(myValues == [1U, 2, 3, 4]); 505 { 506 auto k = data.keys; 507 auto v = data.values; 508 while(!k.empty) { 509 assert(data[k.front] == v.front); 510 k.popFront; 511 v.popFront; 512 } 513 } 514 foreach(v; data.values) { 515 assert(v > 0 && v < 5); 516 } 517 } 518 519 alias StackHash!(uint, uint) mySh2; 520 { // Test remove. 521 auto alloc = newRegionAllocator(); 522 523 auto foo = mySh2(7, alloc); 524 for(uint i = 0; i < 200; i++) { 525 foo[i] = i; 526 } 527 assert(foo.length == 200); 528 for(uint i = 0; i < 200; i += 2) { 529 foo.remove(i); 530 } 531 foreach(i; 20..200) { 532 foo.remove(i); 533 } 534 for(uint i = 0; i < 20; i++) { 535 if(i & 1) { 536 assert(i in foo); 537 assert(*(i in foo) == i); 538 } else { 539 assert(!(i in foo)); 540 } 541 } 542 auto vals = array(foo.values); 543 assert(foo.length == 10); 544 assert(vals.qsort == [1U, 3, 5, 7, 9, 11, 13, 15, 17, 19]); 545 } 546 547 { // Monte carlo unittesting against builtin hash table. 548 auto alloc = newRegionAllocator(); 549 uint[uint] builtin; 550 auto monteSh = mySh2(20_000, alloc); 551 uint[] nums = alloc.uninitializedArray!(uint[])(100_000); 552 foreach(ref num; nums) { 553 num = uniform(0U, uint.max); 554 } 555 556 foreach(i; 0..1_000_000) { 557 auto index = uniform(0, cast(uint) nums.length); 558 if(index in builtin) { 559 assert(index in monteSh); 560 assert(builtin[index] == nums[index]); 561 assert(monteSh[index] == nums[index]); 562 builtin.remove(index); 563 monteSh.remove(index); 564 } else { 565 assert(!(index in monteSh)); 566 builtin[index] = nums[index]; 567 monteSh[index] = nums[index]; 568 } 569 } 570 571 assert(builtin.length == monteSh.length); 572 foreach(k, v; builtin) { 573 assert(k in monteSh); 574 assert(*(k in builtin) == *(k in monteSh)); 575 assert(monteSh[k] == v); 576 } 577 578 // Make sure nothing is missed in iteration. Since both keys and 579 // values use the same struct, just with a few static if statements, 580 // if it works for keys and simple tests work for values, it works. 581 foreach(k; monteSh.keys) { 582 builtin.remove(k); 583 } 584 assert(builtin.length == 0); 585 586 } 587 } 588 589 /**A hash set that allocates its memory on RegionAllocator. Good for building a 590 * temporary set that will not escape the current scope. 591 * 592 * Examples: 593 * --- 594 * auto alloc = newRegionAllocator(); 595 * auto ss = StackSet!(uint)(5, alloc); 596 * foreach(i; 0..5) { 597 * ss.insert(i); 598 * } 599 * assert(3 in ss); 600 * --- 601 */ 602 struct StackSet(K) { 603 private: 604 // Choose smallest representation of the data. 605 struct Node1 { 606 Node1* next; 607 K key; 608 } 609 610 struct Node2 { 611 K key; 612 Node2* next; 613 } 614 615 static if(Node1.sizeof < Node2.sizeof) { 616 alias Node1 Node; 617 } else { 618 alias Node2 Node; 619 } 620 621 Unqual!(K)[] rKeys; 622 Node*[] rNext; 623 624 Node** freeList; 625 size_t _length; 626 RegionAllocator alloc; 627 628 Node* usedSentinel() { 629 return cast(Node*) rNext.ptr; 630 } 631 632 // Tries to allocate off the free list. Otherwise allocates off 633 // RegionAllocator. 634 Node* allocNode() { 635 if(*freeList is null) { 636 return cast(Node*) alloc.allocate(Node.sizeof); 637 } 638 auto ret = *freeList; 639 *freeList = (*freeList).next; 640 return ret; 641 } 642 643 // Add a removed node to the free list. 644 void pushFreeList(Node* node) { 645 if(*freeList is null) { 646 node.next = null; // Sentinel 647 *freeList = node; 648 } else { 649 node.next = *freeList; 650 *freeList = node; 651 } 652 } 653 654 Node* newNode(K key) { 655 Node* ret = allocNode(); 656 ret.key = key; 657 ret.next = null; 658 return ret; 659 } 660 661 hash_t getHash(K key) { 662 static if(is(K : long) && K.sizeof <= hash_t.sizeof) { 663 hash_t hash = cast(hash_t) key; 664 } else static if(__traits(compiles, key.toHash())) { 665 hash_t hash = key.toHash(); 666 } else { 667 hash_t hash = typeid(K).getHash(&key); 668 } 669 hash %= rNext.length; 670 return hash; 671 } 672 673 public: 674 /**Due to the nature of RegionAllocator, you must specify on object creation 675 * the approximate number of elements your set will have. Too large a 676 * number will waste space and incur poor cache performance. Too low a 677 * number will make this struct perform like a linked list. Generally, 678 * if you're building a set from some other range, some fraction of the 679 * size of that range is a good guess.*/ 680 this(size_t nElem, RegionAllocator alloc) { 681 this.alloc = alloc; 682 683 // Obviously, the caller can never mean zero, because this struct 684 // can't work at all with nElem == 0, so assume it's a mistake and fix 685 // it here. 686 if(nElem == 0) nElem++; 687 688 // Allocate the free list as the last element of rNext. 689 rNext = alloc.uninitializedArray!(Node*[])(nElem + 1); 690 freeList = &(rNext[$ - 1]); 691 *freeList = null; 692 rNext = rNext[0..$ - 1]; 693 694 foreach(ref root; rNext) { 695 root = usedSentinel; 696 } 697 698 rKeys = alloc.uninitializedArray!(Unqual!(K)[])(nElem); 699 foreach(ref root; rKeys) { 700 root = K.init; 701 } 702 } 703 704 /// 705 void insert(K key) { 706 hash_t hash = getHash(key); 707 708 if(rNext[hash] == usedSentinel) { 709 rKeys[hash] = key; 710 rNext[hash] = null; 711 _length++; 712 return; 713 } else if(rKeys[hash] == key) { 714 return; 715 } else { // Collision. Start chaining. 716 Node** next = &(rNext[hash]); 717 while(*next !is null) { 718 if((**next).key == key) { 719 return; 720 } 721 next = &((**next).next); 722 } 723 *next = newNode(key); 724 _length++; 725 return; 726 } 727 } 728 729 /**Returns a forward range of the elements of this struct. The range's 730 * lifetime must not exceed the lifetime of this object.*/ 731 auto elems() { 732 return HashRange!(K, typeof(this))(&this); 733 } 734 735 /// 736 bool opIn_r(K key) { 737 hash_t hash = getHash(key); 738 739 if(rNext[hash] == usedSentinel) { 740 return false; 741 } else if(rKeys[hash] == key) { 742 return true; 743 } else { // Collision. Start chaining. 744 Node* next = rNext[hash]; 745 while(next !is null) { 746 if(next.key == key) { 747 return true; 748 } 749 next = next.next; 750 } 751 return false; 752 } 753 } 754 755 /// 756 void remove(K key) { 757 hash_t hash = getHash(key); 758 759 Node** next = &(rNext[hash]); 760 if(rNext[hash] == usedSentinel) { 761 return; 762 } else if(rKeys[hash] == key) { 763 _length--; 764 if(rNext[hash] is null) { 765 rKeys[hash] = K.init; 766 rNext[hash] = usedSentinel; 767 return; 768 } else { 769 Node* toPush = *next; 770 771 rKeys[hash] = (**next).key; 772 rNext[hash] = (**next).next; 773 774 pushFreeList(toPush); 775 return; 776 } 777 } else { // Collision. Start chaining. 778 while(*next !is null) { 779 if((**next).key == key) { 780 _length--; 781 Node* toPush = *next; 782 783 *next = (**next).next; 784 pushFreeList(toPush); 785 break; 786 } 787 next = &((**next).next); 788 } 789 return; 790 } 791 } 792 793 /// 794 @property size_t length() { 795 return _length; 796 } 797 } 798 799 unittest { 800 { // "Normal" unittesting. 801 auto alloc = newRegionAllocator(); 802 alias StackSet!(uint) mySS; 803 mySS set = mySS(12, alloc); 804 foreach(i; 0..20) { 805 set.insert(i); 806 } 807 assert(array(set.elems).qsort == seq(0U, 20U)); 808 809 for(uint i = 0; i < 20; i += 2) { 810 set.remove(i); 811 } 812 813 foreach(i; 0..20) { 814 if(i & 1) { 815 assert(i in set); 816 } else { 817 assert(!(i in set)); 818 } 819 } 820 uint[] contents; 821 822 foreach(elem; set.elems) { 823 contents ~= elem; 824 } 825 assert(contents.qsort == [1U,3,5,7,9,11,13,15,17,19]); 826 } 827 828 { // Monte carlo unittesting against builtin hash table. 829 auto alloc = newRegionAllocator(); 830 bool[uint] builtin; 831 auto monteSh = StackSet!uint(20_000, alloc); 832 833 foreach(i; 0..1_000_000) { 834 auto index = uniform(0, 100_000); 835 if(index in builtin) { 836 assert(index in monteSh); 837 builtin.remove(index); 838 monteSh.remove(index); 839 } else { 840 assert(!(index in monteSh)); 841 builtin[index] = 1; 842 monteSh.insert(index); 843 } 844 } 845 846 assert(builtin.length == monteSh.length); 847 foreach(k, v; builtin) { 848 assert(k in monteSh); 849 } 850 851 foreach(k; monteSh.elems) { 852 builtin.remove(k); 853 } 854 assert(builtin.length == 0); 855 } 856 } 857 858 private int height(T)(const T node) nothrow { 859 return (node is null) ? 0 : node.height; 860 } 861 862 struct AVLNodeRealHeight(T) { 863 T payload; 864 typeof(this)* left; 865 typeof(this)* right; 866 int height; 867 868 int balance() const nothrow @property { 869 return .height(left) - .height(right); 870 } 871 872 void fixHeight() nothrow { 873 auto leftHeight = .height(left); 874 auto rightHeight = .height(right); 875 876 height = ((leftHeight > rightHeight) ? leftHeight : rightHeight) + 1; 877 } 878 879 bool isLeaf() nothrow @property { 880 return left is null && right is null; 881 } 882 } 883 884 /* Store the height in the low order bits of the pointers to save space, 885 * since RegionAllocator allocates 16-byte aligned memory anyhow, but only if 886 * this would be smaller after considering alignment. 887 */ 888 struct AVLNodeBitwise(T) { 889 T payload; 890 size_t _left; 891 size_t _right; 892 893 enum size_t mask = 0b1111; 894 enum size_t notMask = ~mask; 895 896 typeof(this)* left() nothrow @property { 897 return cast(typeof(return)) (_left & notMask); 898 } 899 900 const(typeof(this))* left() const nothrow @property { 901 return cast(typeof(return)) (_left & notMask); 902 } 903 904 void left(typeof(this)* newLeft) nothrow @property 905 in { 906 assert((cast(size_t) newLeft & mask) == 0); 907 } body { 908 _left &= mask; 909 _left |= cast(size_t) newLeft; 910 assert(left is newLeft); 911 } 912 913 typeof(this)* right() nothrow @property { 914 return cast(typeof(return)) (_right & notMask); 915 } 916 917 const(typeof(this))* right() const nothrow @property { 918 return cast(typeof(return)) (_right & notMask); 919 } 920 921 void right(typeof(this)* newRight) nothrow @property 922 in { 923 assert((cast(size_t) newRight & mask) == 0); 924 } body { 925 _right &= mask; 926 _right |= cast(size_t) newRight; 927 assert(right is newRight); 928 } 929 930 int height() const nothrow @property { 931 return (((_left & mask) << 4) | 932 (_right & mask)); 933 } 934 935 void height(int newHeight) nothrow @property { 936 _right &= notMask; 937 _right |= (newHeight & mask); 938 newHeight >>= 4; 939 _left &= notMask; 940 _left |= (newHeight & mask); 941 } 942 943 int balance() const nothrow @property { 944 return .height(left) - .height(right); 945 } 946 947 void fixHeight() nothrow { 948 auto leftHeight = .height(left); 949 auto rightHeight = .height(right); 950 951 height = ((leftHeight > rightHeight) ? leftHeight : rightHeight) + 1; 952 } 953 954 bool isLeaf() const nothrow @property { 955 return left is null && right is null; 956 } 957 } 958 959 private template GetAligned(uint size) { 960 static if(size % RegionAllocator.alignBytes(size) == 0) { 961 enum GetAligned = 0; 962 } else { 963 enum GetAligned = 964 size - size % RegionAllocator.alignBytes(size) + 965 RegionAllocator.alignBytes(size); 966 } 967 } 968 969 /**An AVL tree implementation on top of RegionAllocator. If elements are removed, 970 * they are stored on an internal free list and recycled when new elements 971 * are added to the tree. 972 * 973 * Template paramters: 974 * 975 * T = The type to be stored in the tree. 976 * 977 * key = Function to access the key that what you're storing is to be compared 978 * on. 979 * 980 * compFun = The function for comparing keys. 981 * 982 * Examples: 983 * --- 984 * struct StringNum { 985 * string someString; 986 * uint num; 987 * } 988 * 989 * // Create a StackTree of StringNums, sorted in descending order, using 990 * // someString for comparison. 991 * auto alloc = newRegionAllocator(); 992 * auto myTree = StackTree!(StringNum, "a.someString", "a > b")(alloc); 993 * 994 * // Add some elements. 995 * myTree.insert( StringNum("foo", 1)); 996 * myTree.insert( StringNum("bar", 2)); 997 * myTree.insert( StringNum("foo", 3)); 998 * 999 * assert(myTree.find("foo") == StringNum("foo", 3)); 1000 * assert(myTree.find("bar") == StringNum("bar", 2)); 1001 * --- 1002 * 1003 * Note: This tree supports a compile-time interface similar to StackSet 1004 * and can be used as a finite set implementation. 1005 * 1006 * Warning: 1007 * This implementation places removed nodes on an internal free list and 1008 * recycles them, since there is no way to delete RegionAllocator-allocated data 1009 * in a non-LIFO order. Therefore, you may not retain the address of a 1010 * variable stored in a StackTree after deleting it from the StackTree. 1011 * For example, DO NOT do this: 1012 * --- 1013 * SomeType* myPtr = "foo" in myTree; 1014 * myTree.remove("foo"); 1015 * *myPtr = someValue; 1016 * --- 1017 */ 1018 struct StackTree(T, alias key = "a", alias compFun = "a < b") { 1019 private: 1020 1021 alias AVLNodeBitwise!(T) BitwiseNode; 1022 alias AVLNodeRealHeight!(T) RealNode; 1023 1024 enum size_t bitSize = GetAligned!(BitwiseNode.sizeof); 1025 enum size_t realHeightSize = GetAligned!(RealNode.sizeof); 1026 1027 static if(bitSize < realHeightSize ) { 1028 alias AVLNodeBitwise!(T) Node; 1029 } else { 1030 alias AVLNodeRealHeight!(T) Node; 1031 } 1032 1033 alias binaryFun!(compFun) comp; 1034 alias unaryFun!(key) getKey; 1035 1036 Node* head; 1037 Node** freeList; 1038 size_t _length; 1039 RegionAllocator alloc; 1040 1041 static bool insertComp(T lhs, T rhs) { 1042 return comp( getKey(lhs), getKey(rhs)); 1043 } 1044 1045 static Node* rotateRight(Node* node) 1046 in { 1047 assert(node.left !is null); 1048 assert( abs(node.balance) <= 2); 1049 1050 } body { 1051 Node* newHead = node.left; 1052 node.left = newHead.right; 1053 newHead.right = node; 1054 1055 node.fixHeight(); 1056 newHead.fixHeight(); 1057 1058 assert( abs(node.balance) < 2); 1059 return newHead; 1060 } 1061 1062 static Node* rotateLeft(Node* node) 1063 in { 1064 assert(node.right !is null); 1065 assert( abs(node.balance) <= 2); 1066 } body { 1067 Node* newHead = node.right; 1068 node.right = newHead.left; 1069 newHead.left = node; 1070 1071 node.fixHeight(); 1072 newHead.fixHeight(); 1073 1074 assert( abs(node.balance) < 2); 1075 return newHead; 1076 } 1077 1078 static Node* rebalance(Node* node) 1079 in { 1080 assert(node is null || abs(node.balance) <= 2); 1081 } out(ret) { 1082 assert( abs(ret.balance) < 2); 1083 } body { 1084 if(node is null) { 1085 return null; 1086 } 1087 1088 immutable balance = node.balance; 1089 if(abs(balance) <= 1) { 1090 return node; 1091 } 1092 1093 if(balance == -2) { 1094 1095 // It should be impossible to have a balance factor of -2 if 1096 // node.right is null. 1097 assert(node.right !is null); 1098 immutable rightBalance = node.right.balance; 1099 assert( abs(rightBalance) < 2); 1100 1101 if(rightBalance == 1) { 1102 node.right = rotateRight(node.right); 1103 node.fixHeight(); 1104 } 1105 1106 assert(node.balance == -2); 1107 return rotateLeft(node); 1108 1109 } else if(balance == 2) { 1110 // It should be impossible to have a balance factor of 2 if 1111 // node.left is null. 1112 assert(node.left !is null); 1113 immutable leftBalance = node.left.balance; 1114 assert( abs(leftBalance) < 2); 1115 1116 if(leftBalance == -1) { 1117 node.left = rotateLeft(node.left); 1118 node.fixHeight(); 1119 } 1120 1121 assert(node.balance == 2); 1122 return rotateRight(node); 1123 } 1124 1125 // AVL tree invariant is that abs(balance) <= 2 even during 1126 // insertion/deletion. 1127 assert(0); 1128 } 1129 1130 void pushFreeList(Node* node) { 1131 node.left = null; 1132 node.right = *freeList; 1133 *freeList = node; 1134 } 1135 1136 Node* popFreeList() 1137 in { 1138 assert(freeList); 1139 assert(*freeList); 1140 } body { 1141 auto ret = *freeList; 1142 *freeList = ret.right; 1143 return ret; 1144 } 1145 1146 Node* newNode(T payload) 1147 in { 1148 assert(freeList, "Uninitialized StackTree!(" ~ T.stringof ~ ")"); 1149 } body { 1150 Node* ret; 1151 if(*freeList !is null) { 1152 ret = popFreeList(); 1153 } else { 1154 ret = cast(Node*) alloc.allocate(Node.sizeof); 1155 } 1156 1157 ret.payload = payload; 1158 ret.left = null; 1159 ret.right = null; 1160 ret.height = 1; 1161 return ret; 1162 } 1163 1164 public: 1165 /// 1166 this(RegionAllocator alloc) { 1167 this.alloc = alloc; 1168 this.freeList = alloc.uninitializedArray!(Node*[])(1).ptr; 1169 *(this.freeList) = null; 1170 } 1171 1172 /**Insert an element.*/ 1173 void insert(T toInsert) { 1174 if(head is null) { 1175 head = newNode(toInsert); 1176 _length++; 1177 } else { 1178 head = insertImpl(toInsert, head); 1179 } 1180 } 1181 1182 Node* insertImpl(T toInsert, Node* insertInto) { 1183 if( insertComp(toInsert, insertInto.payload) ) { 1184 if(insertInto.left is null) { 1185 insertInto.left = newNode(toInsert); 1186 _length++; 1187 } else { 1188 insertInto.left = insertImpl(toInsert, insertInto.left); 1189 } 1190 } else if( insertComp(insertInto.payload, toInsert) ) { 1191 if(insertInto.right is null) { 1192 insertInto.right = newNode(toInsert); 1193 _length++; 1194 } else { 1195 insertInto.right = insertImpl(toInsert, insertInto.right); 1196 } 1197 } else { 1198 // This is correct: If the comparison key is only part of the 1199 // payload, the old payload may not be equal to the new payload, 1200 // even if the comparison keys are equal. 1201 insertInto.payload = toInsert; 1202 return insertInto; 1203 } 1204 1205 insertInto.fixHeight(); 1206 return rebalance(insertInto); 1207 } 1208 1209 /**Remove an element from this tree. The type of U is expected to be the 1210 * type of the key that this tree is sorted on. 1211 */ 1212 void remove(U)(U whatToRemove) { 1213 Node* removedNode; 1214 Node* leftMost; 1215 1216 Node* removeLeftMost(Node* node) { 1217 if(node.left is null) { 1218 auto ret = node.right; 1219 node.right = null; 1220 leftMost = node; 1221 return ret; 1222 } 1223 1224 node.left = removeLeftMost(node.left); 1225 node.fixHeight(); 1226 return rebalance(node); 1227 } 1228 1229 Node* removeSuccessor(Node* node) { 1230 if(node.right is null) { 1231 assert(node.left.isLeaf); 1232 leftMost = node.left; 1233 1234 node.left = null; 1235 return node; 1236 } 1237 1238 node.right = removeLeftMost(node.right); 1239 node.fixHeight(); 1240 return node; 1241 } 1242 1243 Node* removeImpl(U whatToRemove, Node* whereToRemove) { 1244 static bool findComp(V, W)(V lhs, W rhs) { 1245 static if(is(V == T)) { 1246 static assert(is(W == U)); 1247 return comp( getKey(lhs), rhs); 1248 } else { 1249 static assert(is(V == U)); 1250 static assert(is(W == T)); 1251 return comp(lhs, getKey(rhs) ); 1252 } 1253 } 1254 1255 if(whereToRemove is null) { 1256 return null; 1257 } 1258 1259 if( findComp(whatToRemove, whereToRemove.payload) ){ 1260 whereToRemove.left = removeImpl(whatToRemove, whereToRemove.left); 1261 whereToRemove.fixHeight(); 1262 return rebalance(whereToRemove); 1263 } else if( findComp(whereToRemove.payload, whatToRemove) ) { 1264 whereToRemove.right = removeImpl(whatToRemove, whereToRemove.right); 1265 whereToRemove.fixHeight(); 1266 return rebalance(whereToRemove); 1267 } else { 1268 // We've found it. 1269 _length--; 1270 removedNode = whereToRemove; 1271 if(whereToRemove.isLeaf) { 1272 return null; 1273 } 1274 1275 whereToRemove = removeSuccessor(whereToRemove); 1276 if(leftMost is null) { 1277 return null; 1278 } 1279 1280 leftMost.left = whereToRemove.left; 1281 leftMost.right = whereToRemove.right; 1282 leftMost.fixHeight(); 1283 return rebalance(leftMost); 1284 } 1285 } 1286 1287 head = removeImpl(whatToRemove, head); 1288 1289 debug(EXPENSIVE) assertAvl(head); 1290 1291 if(removedNode !is null) { 1292 pushFreeList(removedNode); 1293 } 1294 } 1295 1296 /**Find an element and return it. Throw an exception if it is not 1297 * present. U is expected to be the type of the key that this tree is 1298 * sorted on.*/ 1299 T find(U)(U whatToFind) { 1300 T* ptr = dstatsEnforce( opIn_r!(U)(whatToFind), 1301 "Item not found: " ~ to!string(whatToFind)); 1302 return *ptr; 1303 } 1304 1305 /**Find an element and return a pointer to it, or null if not present.*/ 1306 T* opIn_r(U)(U whatToFind) { 1307 auto ret = findImpl!(U)(whatToFind, head); 1308 if(ret is null) { 1309 return null; 1310 } 1311 return &(ret.payload); 1312 } 1313 1314 Node* findImpl(U)(U whatToFind, Node* whereToFind) { 1315 static bool findComp(V, W)(V lhs, W rhs) { 1316 static if(is(V == T)) { 1317 static assert(is(W == U)); 1318 return comp( getKey(lhs), rhs ); 1319 } else { 1320 static assert(is(V == U)); 1321 static assert(is(W == T)); 1322 return comp( lhs, getKey(rhs) ); 1323 } 1324 } 1325 1326 if(whereToFind is null) { 1327 return null; 1328 } 1329 1330 if( findComp(whatToFind, whereToFind.payload) ){ 1331 return findImpl!(U)(whatToFind, whereToFind.left); 1332 } else if( findComp(whereToFind.payload, whatToFind) ) { 1333 return findImpl!(U)(whatToFind, whereToFind.right); 1334 } else { 1335 // We've found it. 1336 return whereToFind; 1337 } 1338 1339 assert(0); 1340 } 1341 1342 /**Iterate over the elements of this tree in sorted order.*/ 1343 int opApply( int delegate(ref T) dg) { 1344 int res; 1345 int opApplyImpl(Node* node) { 1346 if(node is null) { 1347 return 0; 1348 } 1349 res = opApplyImpl(node.left); 1350 if(res) { 1351 return res; 1352 } 1353 res = dg(node.payload); 1354 if(res) { 1355 return res; 1356 } 1357 res = opApplyImpl(node.right); 1358 return res; 1359 } 1360 1361 return opApplyImpl(head); 1362 } 1363 1364 /**Number of elements in the tree.*/ 1365 @property size_t length() const pure nothrow { 1366 return _length; 1367 } 1368 } 1369 1370 private int assertAvl(T)(T node) { 1371 if(node is null) { 1372 return 0; 1373 } 1374 1375 int leftHeight = assertAvl(node.left); 1376 int rightHeight = assertAvl(node.right); 1377 assert(node.height == max(leftHeight, rightHeight) + 1); 1378 assert( abs(node.balance) < 2, 1379 text( height(node.left), '\t', height(node.right))); 1380 1381 if(node.left) { 1382 assert(node.left.payload < node.payload); 1383 } 1384 1385 if(node.right) { 1386 assert(node.right.payload > node.payload, 1387 text(node.payload, ' ', node.right.payload)); 1388 } 1389 1390 return node.height; 1391 } 1392 1393 1394 unittest { 1395 // Test against StackSet on random data. 1396 auto alloc = newRegionAllocator(); 1397 StackTree!(uint) myTree = StackTree!(uint)(alloc); 1398 StackSet!(uint) ss = StackSet!(uint)(500, alloc); 1399 foreach(i; 0..1_000_000) { 1400 uint num = uniform(0, 1_000); 1401 if(num in ss) { 1402 assert(num in myTree); 1403 assert(*(num in myTree) == num); 1404 ss.remove(num); 1405 myTree.remove(num); 1406 } else { 1407 assert(!(num in myTree)); 1408 ss.insert(num); 1409 myTree.insert(num); 1410 } 1411 } 1412 assertAvl(myTree.head); 1413 } 1414 1415 /**Struct that iterates over keys or values of a StackTreeAA. 1416 * 1417 * Bugs: Uses opApply instead of the more flexible ranges, because I 1418 * haven't figured out how to iterate efficiently and in sorted order over a 1419 * tree without control of the stack. 1420 */ 1421 struct TreeAaIter(T, alias mapFun) { 1422 alias unaryFun!(mapFun) mFun; 1423 T tree; 1424 alias typeof(*(tree.head)) Node; 1425 1426 // TreeRange!(T, mapFun) asRange() { 1427 // dstatsEnforce(0, "Not implemented yet."); 1428 // } 1429 1430 alias typeof( mFun( tree.head.payload ) ) IterType; 1431 1432 /// 1433 int opApply( int delegate(ref IterType) dg) { 1434 int res; 1435 int opApplyImpl(Node* node) { 1436 if(node is null) { 1437 return 0; 1438 } 1439 res = opApplyImpl(node.left); 1440 if(res) { 1441 return res; 1442 } 1443 1444 static if(__traits(compiles, dg(mFun(node.payload)))) { 1445 res = dg(mFun(node.payload)); 1446 } else { 1447 auto toDg = mFun(node.payload); 1448 res = dg(toDg); 1449 } 1450 1451 if(res) { 1452 return res; 1453 } 1454 res = opApplyImpl(node.right); 1455 return res; 1456 } 1457 1458 return opApplyImpl(tree.head); 1459 } 1460 1461 /// 1462 @property size_t length() const pure nothrow { 1463 return tree.length; 1464 } 1465 } 1466 1467 private struct StackTreeAANode(K, V) { 1468 Unqual!(K) key; 1469 Unqual!(V) value; 1470 } 1471 1472 /**An associative array implementation based on StackTree. Lookups and 1473 * insertions are O(log N). This is significantly slower in both theory and 1474 * practice than StackHash, but you may want to use it if: 1475 * 1476 * 1. You don't know the approximate size of the table you will be creating 1477 * in advance. Unlike StackHash, this AA implementation does not need 1478 * to pre-allocate anything. 1479 * 1480 * 2. You care more about worst-case performance than average-case 1481 * performance. 1482 * 1483 * 3. You have a good comparison function for your type, but not a good hash 1484 * function. 1485 * 1486 */ 1487 struct StackTreeAA(K, V) { 1488 alias StackTreeAANode!(K, V) Node; 1489 StackTree!(Node, "a.key") tree; 1490 1491 /// 1492 this(RegionAllocator alloc) { 1493 this.tree = typeof(tree)(alloc); 1494 } 1495 1496 /**Looks up key in the table, returns it by reference. If it does not 1497 * exist, it will be created and initialized to V.init. This is handy, 1498 * for example, when counting things with integer types. 1499 */ 1500 ref V opIndex(K key) { 1501 Node* result = key in tree; 1502 if(result is null) { 1503 tree.insert( Node(key, V.init)); 1504 result = key in tree; 1505 } 1506 1507 return result.value; 1508 } 1509 1510 /// 1511 V opIndexAssign(V val, K key) { 1512 tree.insert( Node(key, val)); 1513 return val; 1514 } 1515 1516 /// 1517 V* opIn_r(K key) { 1518 auto nodePtr = key in tree; 1519 if(nodePtr is null) { 1520 return null; 1521 } 1522 1523 return &(nodePtr.value); 1524 } 1525 1526 /// 1527 void remove(K key) { 1528 tree.remove(key); 1529 } 1530 1531 /// 1532 @property size_t length() const pure nothrow { 1533 return tree.length; 1534 } 1535 1536 /// 1537 TreeAaIter!( typeof(tree), "a.key") keys() @property { 1538 typeof(return) ret; 1539 ret.tree = tree; 1540 return ret; 1541 } 1542 1543 private static ref Unqual!(V) getVal(ref Node node) { 1544 return node.value; 1545 } 1546 1547 /// 1548 TreeAaIter!( typeof(tree), getVal) values() @property { 1549 return typeof(return)(tree); 1550 } 1551 1552 /**Iterate over both the keys and values of this associative array.*/ 1553 int opApply( int delegate(ref Unqual!(K), ref Unqual!(V)) dg) { 1554 alias typeof(*(tree.head)) TreeNode; 1555 int res; 1556 int opApplyImpl(TreeNode* node) { 1557 if(node is null) { 1558 return 0; 1559 } 1560 res = opApplyImpl(node.left); 1561 if(res) { 1562 return res; 1563 } 1564 res = dg(node.payload.key, node.payload.value); 1565 if(res) { 1566 return res; 1567 } 1568 res = opApplyImpl(node.right); 1569 return res; 1570 } 1571 1572 return opApplyImpl(tree.head); 1573 } 1574 1575 } 1576 1577 unittest { 1578 1579 // Test against builtin AA on random data. 1580 { 1581 auto alloc = newRegionAllocator(); 1582 alias StackTreeAA!(string, uint) mySh; 1583 auto data = mySh(alloc); 1584 data["foo"] = 1; 1585 data["bar"] = 2; 1586 data["baz"] = 3; 1587 data["waldo"] = 4; 1588 assert(!("foobar" in data)); 1589 assert(*("foo" in data) == 1); 1590 assert(*("bar" in data) == 2); 1591 assert(*("baz" in data) == 3); 1592 assert(*("waldo" in data) == 4); 1593 assert(data["foo"] == 1); 1594 assert(data["bar"] == 2); 1595 assert(data["baz"] == 3); 1596 assert(data["waldo"] == 4); 1597 1598 assert(data.length == 4); 1599 auto myKeys = array(data.keys); 1600 qsort(myKeys); 1601 assert(myKeys == cast(string[]) ["bar", "baz", "foo", "waldo"]); 1602 auto myValues = array(data.values); 1603 qsort(myValues); 1604 assert(myValues == [1U, 2, 3, 4]); 1605 1606 foreach(v; data.values) { 1607 assert(v > 0 && v < 5); 1608 } 1609 } 1610 1611 alias StackTreeAA!(uint, uint) mySh2; 1612 { // Test remove. 1613 auto alloc = newRegionAllocator(); 1614 1615 auto foo = mySh2(alloc); 1616 for(uint i = 0; i < 200; i++) { 1617 foo[i] = i; 1618 } 1619 assert(foo.length == 200); 1620 for(uint i = 0; i < 200; i += 2) { 1621 foo.remove(i); 1622 } 1623 foreach(i; 20..200) { 1624 foo.remove(i); 1625 } 1626 for(uint i = 0; i < 20; i++) { 1627 if(i & 1) { 1628 assert(i in foo); 1629 assert(*(i in foo) == i); 1630 } else { 1631 assert(!(i in foo)); 1632 } 1633 } 1634 auto vals = array(foo.values); 1635 assert(foo.length == 10); 1636 assert(vals.qsort == [1U, 3, 5, 7, 9, 11, 13, 15, 17, 19]); 1637 } 1638 1639 { // Monte carlo unittesting against builtin hash table. 1640 auto alloc = newRegionAllocator(); 1641 uint[uint] builtin; 1642 auto monteSh = mySh2(alloc); 1643 uint[] nums = alloc.uninitializedArray!(uint[])(100_000); 1644 foreach(ref num; nums) { 1645 num = uniform(0U, uint.max); 1646 } 1647 1648 foreach(i; 0..10_000) { 1649 auto index = uniform(0, cast(uint) nums.length); 1650 if(index in builtin) { 1651 assert(index in monteSh); 1652 assert(builtin[index] == nums[index]); 1653 assert(monteSh[index] == nums[index]); 1654 builtin.remove(index); 1655 monteSh.remove(index); 1656 } else { 1657 assert(!(index in monteSh)); 1658 builtin[index] = nums[index]; 1659 monteSh[index] = nums[index]; 1660 } 1661 } 1662 1663 assert(builtin.length == monteSh.length); 1664 foreach(k, v; builtin) { 1665 assert(k in monteSh); 1666 assert(*(k in builtin) == *(k in monteSh)); 1667 assert(monteSh[k] == v); 1668 } 1669 1670 // Make sure nothing is missed in iteration. Since both keys and 1671 // values use the same struct, just with a few static if statements, 1672 // if it works for keys and simple tests work for values, it works. 1673 foreach(k; monteSh.keys) { 1674 builtin.remove(k); 1675 } 1676 assert(builtin.length == 0); 1677 } 1678 } 1679 1680 version(scid) { 1681 public import scid.internal.regionallocator; 1682 } else { 1683 version = noscid; 1684 } 1685 1686 version(noscid): 1687 1688 import std.traits, core.memory, std.range, core.exception, std.conv, 1689 std.algorithm, std.typetuple, std.exception, std.typecons; 1690 1691 static import core.stdc.stdlib; 1692 1693 // This is just for convenience/code readability/saving typing. 1694 private enum ptrSize = (void*).sizeof; 1695 1696 // This was accidentally assumed in a few places and I'm too lazy to fix it 1697 // until I see proof that it needs to be fixed. 1698 static assert(bool.sizeof == 1); 1699 1700 enum size_t defaultSegmentSize = 4 * 1_024 * 1_024; 1701 1702 /** 1703 The exception that is thrown on invalid use of $(RegionAllocator) and 1704 $(D RegionAllocatorStack). This exception is not thrown on out of memory. 1705 An $(D OutOfMemoryError) is thrown instead. 1706 */ 1707 class RegionAllocatorException : Exception { 1708 this(string msg, string file, int line) @safe { 1709 super(msg, file, line); 1710 } 1711 } 1712 1713 /** 1714 This flag determines whether a given $(D RegionAllocatorStack) is scanned for 1715 pointers by the garbage collector (GC). If yes, the entire stack is scanned, 1716 not just the part currently in use, since there is currently no efficient way to 1717 modify the bounds of a GC region. The stack is scanned conservatively, meaning 1718 that any bit pattern that would point to GC-allocated memory if interpreted as 1719 a pointer is considered to be a pointer. This can result in GC-allocated 1720 memory being retained when it should be freed. Due to these caveats, 1721 it is recommended that any stack scanned by the GC be small and/or short-lived. 1722 */ 1723 enum GCScan : bool { 1724 /// 1725 no = false, 1726 1727 /// 1728 yes = true 1729 } 1730 1731 /** 1732 This object represents a segmented stack. Memory can be allocated from this 1733 stack using a $(XREF regionallocator RegionAllocator) object. Multiple 1734 $(D RegionAllocator) objects may be created per 1735 $(D RegionAllocatorStack) but each $(D RegionAllocator) uses a single 1736 $(D RegionAllocatorStack). 1737 1738 For most use cases it's convenient to use the default thread-local 1739 instance of $(D RegionAllocatorStack), which is lazily instantiated on 1740 the first call to the global function 1741 $(XREF regionallocator, newRegionAllocator). Occasionally it may be useful 1742 to have multiple independent stacks in one thread, in which case a 1743 $(D RegionAllocatorStack) can be created manually. 1744 1745 $(D RegionAllocatorStack) is reference counted and has reference semantics. 1746 When the last copy of a given instance goes out of scope, the memory 1747 held by the $(D RegionAllocatorStack) instance is released back to the 1748 heap. This cannot happen before memory allocated to a $(D RegionAllocator) 1749 instance is released back to the stack, because a $(D RegionAllocator) 1750 holds a copy of the $(D RegionAllocatorStack) instance it uses. 1751 1752 Examples: 1753 --- 1754 import std.regionallocator; 1755 1756 void main() { 1757 fun1(); 1758 } 1759 1760 void fun1() { 1761 auto stack = RegionAllocatorStack(1_048_576, GCScan.no); 1762 fun2(stack); 1763 1764 // At the end of fun1, the last copy of the RegionAllocatorStack 1765 // instance pointed to by stack goes out of scope. The memory 1766 // held by stack is released back to the heap. 1767 } 1768 1769 void fun2(RegionAllocatorStack stack) { 1770 auto alloc = stack.newRegionAllocator(); 1771 auto arr = alloc.newArray!(double[])(1_024); 1772 1773 // At the end of fun2, the last copy of the RegionAllocator instance 1774 // pointed to by alloc goes out of scope. The memory used by arr 1775 // is released back to stack. 1776 } 1777 --- 1778 */ 1779 struct RegionAllocatorStack { 1780 private: 1781 RefCounted!(RegionAllocatorStackImpl, RefCountedAutoInitialize.no) impl; 1782 bool initialized; 1783 bool _gcScanned; 1784 1785 public: 1786 /** 1787 Create a new $(D RegionAllocatorStack) with a given segment size in bytes. 1788 */ 1789 this(size_t segmentSize, GCScan shouldScan) { 1790 this._gcScanned = shouldScan; 1791 if(segmentSize == 0) { 1792 throw new RegionAllocatorException( 1793 "Cannot create a RegionAllocatorStack with segment size of 0.", 1794 __FILE__, __LINE__ 1795 ); 1796 } 1797 1798 impl = typeof(impl)(segmentSize, shouldScan); 1799 initialized = true; 1800 } 1801 1802 /** 1803 Creates a new $(D RegionAllocator) region using this stack. 1804 */ 1805 RegionAllocator newRegionAllocator() { 1806 if(!initialized) { 1807 throw new RegionAllocatorException( 1808 "Cannot create a RegionAllocator from an " ~ 1809 "uninitialized RegionAllocatorStack. Did you call " ~ 1810 "RegionAllocatorStack's constructor?", 1811 __FILE__, 1812 __LINE__ 1813 ); 1814 } 1815 1816 return RegionAllocator(this); 1817 } 1818 1819 /** 1820 Whether this stack is scanned by the garbage collector. 1821 */ 1822 bool gcScanned() @property const pure nothrow @safe { 1823 return _gcScanned; 1824 } 1825 } 1826 1827 private struct RegionAllocatorStackImpl { 1828 1829 this(size_t segmentSize, GCScan shouldScan) { 1830 this.segmentSize = segmentSize; 1831 space = alignedMalloc(segmentSize, shouldScan); 1832 1833 // We don't need 16-byte alignment for the bookkeeping array. 1834 immutable nBookKeep = segmentSize / alignBytes; 1835 bookkeep = (cast(SizetPtr*) core.stdc.stdlib.malloc(nBookKeep)) 1836 [0..nBookKeep / SizetPtr.sizeof]; 1837 1838 if(!bookkeep.ptr) { 1839 outOfMemory(); 1840 } 1841 1842 nBlocks++; 1843 } 1844 1845 size_t segmentSize; // The size of each segment. 1846 1847 size_t used; 1848 void* space; 1849 size_t bookkeepIndex; 1850 SizetPtr[] bookkeep; 1851 uint nBlocks; 1852 uint nFree; 1853 size_t regionIndex = size_t.max; 1854 1855 // inUse holds info for all blocks except the one currently being 1856 // allocated from. freeList holds space ptrs for all free blocks. 1857 1858 static struct Block { 1859 size_t used = 0; 1860 void* space = null; 1861 } 1862 1863 SimpleStack!(Block) inUse; 1864 SimpleStack!(void*) freeList; 1865 1866 void doubleSize(ref SizetPtr[] bookkeep) { 1867 size_t newSize = bookkeep.length * 2; 1868 auto ptr = cast(SizetPtr*) core.stdc.stdlib.realloc( 1869 bookkeep.ptr, newSize * SizetPtr.sizeof); 1870 1871 if(!ptr) { 1872 outOfMemory(); 1873 } 1874 1875 bookkeep = ptr[0..newSize]; 1876 } 1877 1878 // Add an element to bookkeep, checking length first. 1879 void putLast(void* last) { 1880 if (bookkeepIndex == bookkeep.length) doubleSize(bookkeep); 1881 bookkeep[bookkeepIndex].p = last; 1882 bookkeepIndex++; 1883 } 1884 1885 void putLast(size_t num) { 1886 return putLast(cast(void*) num); 1887 } 1888 1889 // The number of objects allocated on the C heap instead of here. 1890 ref size_t nLargeObjects() @property pure nothrow { 1891 return bookkeep[regionIndex - 4].i; 1892 } 1893 1894 // The number of extra segments that have been allocated in the current 1895 // region beyond the base segment of the region. 1896 ref size_t nExtraSegments() @property pure nothrow { 1897 return bookkeep[regionIndex - 3].i; 1898 } 1899 1900 // Pushes a segment to the internal free list and frees segments to the 1901 // heap if there are more than 2x as many segments on the free list as 1902 // in use. 1903 void freeSegment() { 1904 nExtraSegments--; 1905 freeList.push(space); 1906 auto newHead = inUse.pop(); 1907 space = newHead.space; 1908 used = newHead.used; 1909 nBlocks--; 1910 nFree++; 1911 1912 if (nFree >= nBlocks * 2) { 1913 foreach(i; 0..nFree / 2) { 1914 alignedFree(freeList.pop); 1915 nFree--; 1916 } 1917 } 1918 } 1919 1920 void destroy() { 1921 if(space) { 1922 alignedFree(space); 1923 space = null; 1924 } 1925 1926 if(bookkeep) { 1927 core.stdc.stdlib.free(bookkeep.ptr); 1928 bookkeep = null; 1929 } 1930 1931 while(inUse.index > 0) { 1932 auto toFree = inUse.pop(); 1933 alignedFree(toFree.space); 1934 } 1935 1936 inUse.destroy(); 1937 1938 while(freeList.index > 0) { 1939 auto toFree = freeList.pop(); 1940 alignedFree(toFree); 1941 } 1942 1943 freeList.destroy(); 1944 } 1945 1946 ~this() { 1947 destroy(); 1948 } 1949 } 1950 1951 /** 1952 These properties get and set the segment size of the default thread-local 1953 $(D RegionAllocatorStack) instance. The default size is 4 megabytes. 1954 The setter is only effective before the global function 1955 $(D newRegionAllocator) has been called for the first time in the current 1956 thread. Attempts to set this property after the first call to this 1957 function from the current thread throw a $(D RegionAllocatorException). 1958 */ 1959 size_t threadLocalSegmentSize() @property nothrow @safe { 1960 return _threadLocalSegmentSize; 1961 } 1962 1963 /// Ditto 1964 size_t threadLocalSegmentSize(size_t newSize) @property @safe { 1965 if(threadLocalInitialized) { 1966 throw new RegionAllocatorException( 1967 "Cannot set threadLocalSegmentSize after the thread-local " ~ 1968 "RegionAllocatorStack has been used for the first time.", 1969 __FILE__, 1970 __LINE__ 1971 ); 1972 } 1973 1974 return _threadLocalSegmentSize = newSize; 1975 } 1976 1977 /** 1978 These properties determine whether the default thread-local 1979 $(D RegionAllocatorStack) instance is scanned by the garbage collector. 1980 The default is no. In most cases, scanning a stack this long-lived is not 1981 recommended, as it will cause too many false pointers. (See $(XREF 1982 regionallocator, GCScan) for details.) 1983 1984 The setter is only effective before the global function 1985 $(D newRegionAllocator) has been called for the first time in the current 1986 thread. Attempts to set this property after the first call to this 1987 function from the current thread throw a $(D RegionAllocatorException). 1988 */ 1989 bool scanThreadLocalStack() @property nothrow @safe { 1990 return _scanThreadLocalStack; 1991 } 1992 1993 /// Ditto 1994 bool scanThreadLocalStack(bool shouldScan) @property @safe { 1995 if(threadLocalInitialized) { 1996 throw new RegionAllocatorException( 1997 "Cannot set scanThreadLocalStack after the thread-local " ~ 1998 "RegionAllocatorStack has been used for the first time.", 1999 __FILE__, 2000 __LINE__ 2001 ); 2002 } 2003 2004 return _scanThreadLocalStack = shouldScan; 2005 } 2006 2007 private size_t _threadLocalSegmentSize = defaultSegmentSize; 2008 private RegionAllocatorStack threadLocalStack; 2009 private bool threadLocalInitialized; 2010 private bool _scanThreadLocalStack = false; 2011 2012 // Ensures the thread-local stack is initialized, then returns it. 2013 private ref RegionAllocatorStack getThreadLocal() { 2014 if(!threadLocalInitialized) { 2015 threadLocalInitialized = true; 2016 threadLocalStack = RegionAllocatorStack( 2017 threadLocalSegmentSize, cast(GCScan) scanThreadLocalStack 2018 ); 2019 } 2020 2021 return threadLocalStack; 2022 } 2023 2024 static ~this() { 2025 if(threadLocalInitialized) { 2026 threadLocalStack.impl.refCountedPayload.destroy(); 2027 } 2028 } 2029 2030 /** 2031 This struct provides an interface to the $(D RegionAllocator) functionality 2032 and enforces scoped deletion. A new instance using the thread-local 2033 $(D RegionAllocatorStack) instance is created using the global 2034 $(D newRegionAllocator) function. A new instance using 2035 an explicitly created $(D RegionAllocatorStack) is created using 2036 $(D RegionAllocatorStack.newRegionAllocator). 2037 2038 Each instance has reference semantics in that any copy will allocate from the 2039 same memory. When the last copy of an instance goes out of scope, all memory 2040 allocated via that instance is freed. Only the most recently created 2041 still-existing $(D RegionAllocator) using a given $(D RegionAllocatorStack) 2042 may be used to allocate and free memory at any given time. Deviations 2043 from this model result in a $(D RegionAllocatorException) being thrown. 2044 2045 An uninitialized $(D RegionAllocator) (for example $(D RegionAllocator.init)) 2046 has semantics similar to a null pointer. It may be assigned to or passed to 2047 a function. However, any attempt to call a method will result in a 2048 $(D RegionAllocatorException) being thrown. 2049 2050 Examples: 2051 --- 2052 void foo() { 2053 auto alloc = newRegionAllocator(); 2054 auto ptr1 = bar(alloc); 2055 auto ptr2 = alloc.allocate(42); 2056 2057 // The last copy of the RegionAllocator object used to allocate ptr1 2058 // and ptr2 is going out of scope here. The memory pointed to by 2059 // both ptr1 and ptr2 will be freed. 2060 } 2061 2062 void* bar(RegionAllocator alloc) { 2063 auto ret = alloc.allocate(42); 2064 2065 auto alloc2 = newRegionAllocator(); 2066 auto ptr3 = alloc2.allocate(42); 2067 2068 // ptr3 was allocated using alloc2, which is going out of scope. 2069 // Its memory will therefore be freed. ret was allocated using alloc. 2070 // A copy of this RegionAllocator is still alive in foo() after 2071 // bar() executes. Therefore, ret will not be freed on returning and 2072 // is still valid after bar() returns. 2073 2074 return ret; 2075 } 2076 2077 void* thisIsSafe() { 2078 // This is safe because the two RegionAllocator objects being used 2079 // are using two different RegionAllocatorStack objects. 2080 auto alloc = newRegionAllocator(); 2081 auto ptr1 = alloc.allocate(42); 2082 2083 auto stack = RegionAllocatorStack(1_048_576, GCScan.no); 2084 auto alloc2 = stack.newRegionAllocator(); 2085 2086 auto ptr2 = alloc2.allocate(42); 2087 auto ptr3 = alloc.allocate(42); 2088 } 2089 2090 void* dontDoThis() { 2091 auto alloc = newRegionAllocator(); 2092 auto ptr1 = alloc.allocate(42); 2093 auto alloc2 = newRegionAllocator(); 2094 2095 // Error: Allocating from a RegionAllocator instance other than the 2096 // most recently created one that's still alive from a given stack. 2097 auto ptr = alloc.allocate(42); 2098 } 2099 2100 void uninitialized() { 2101 RegionAllocator alloc; 2102 auto ptr = alloc.allocate(42); // Error: alloc is not initialized. 2103 auto alloc2 = alloc; // Ok. Both alloc, alloc2 are uninitialized. 2104 2105 alloc2 = newRegionAllocator(); 2106 auto ptr2 = alloc2.allocate(42); // Ok. 2107 auto ptr3 = alloc.allocate(42); // Error: alloc is still uninitialized. 2108 2109 alloc = alloc2; 2110 auto ptr4 = alloc.allocate(42); // Ok. 2111 } 2112 --- 2113 2114 Note: Allocations larger than $(D this.segmentSize) are handled as a special 2115 case and fall back to allocating directly from the C heap. These large 2116 allocations are freed as if they were allocated on a $(D RegionAllocatorStack) 2117 when $(D free) or $(D freeLast) is called or the last copy of a 2118 $(D RegionAllocator) instance goes out of scope. However, due to the extra 2119 bookkeeping required, destroying a region (as happens when the last copy of 2120 a $(D RegionAllocator) instance goes out of scope) will require time linear 2121 instead of constant in the number of allocations for regions where these 2122 large allocations are present. 2123 */ 2124 struct RegionAllocator { 2125 private: 2126 RegionAllocatorStack stack; 2127 2128 // The region index that should be current anytime this instance is 2129 // being used. This is checked for in allocate() and free(). 2130 size_t correctRegionIndex = size_t.max; 2131 2132 this(ref RegionAllocatorStack stack) { 2133 assert(stack.initialized); 2134 auto impl = &(stack.impl.refCountedPayload()); 2135 this.stack = stack; 2136 2137 with(*impl) { 2138 putLast(0); // nLargeObjects. 2139 putLast(0); // nExtraSegments. 2140 putLast(regionIndex); // Old regionIndex. 2141 putLast(1); // Ref count of current RegionAllocator. 2142 regionIndex = bookkeepIndex; 2143 correctRegionIndex = regionIndex; 2144 } 2145 } 2146 2147 // CTFE function, for static assertions. Can't use bsr/bsf b/c it has 2148 // to be usable at compile time. 2149 static bool isPowerOf2(size_t num) pure nothrow @safe { 2150 return num && !(num & (num - 1)); 2151 } 2152 2153 alias RegionAllocatorStackImpl Impl; // Save typing. 2154 2155 // This is written as a mixin instead of a function because it's 2156 // performance critical and it wouldn't be inlinable because it throws. 2157 // By using a mixin, initialized can be checked all the time instead of 2158 // just in debug mode, for negligible performance cost. 2159 enum string getStackImplMixin = q{ 2160 if(!initialized) { 2161 throw new RegionAllocatorException( 2162 "RegionAllocator instance not initialized. Please use " ~ 2163 "newRegionAllocator() to create a RegionAllocator object.", 2164 __FILE__, 2165 __LINE__ 2166 ); 2167 } 2168 2169 auto impl = &(stack.impl.refCountedPayload()); 2170 }; 2171 2172 void incrementRefCount() { 2173 mixin(getStackImplMixin); 2174 impl.bookkeep[correctRegionIndex - 1].i++; 2175 } 2176 2177 void decrementRefCount() { 2178 mixin(getStackImplMixin); 2179 impl.bookkeep[correctRegionIndex - 1].i--; 2180 2181 if(impl.bookkeep[correctRegionIndex - 1].i == 0) { 2182 if(impl.regionIndex != correctRegionIndex) { 2183 throw new RegionAllocatorException( 2184 "Cannot free RegionAlloc regions in non-last in first " ~ 2185 "out order. Did you return a RegionAllocator from a " ~ 2186 "function?", 2187 __FILE__, 2188 __LINE__ 2189 ); 2190 } 2191 2192 with(*impl) { 2193 // Free allocations one at a time until we don't have any 2194 // more large objects. 2195 while (nLargeObjects > 0 && bookkeepIndex > regionIndex) { 2196 assert(bookkeepIndex > regionIndex); 2197 freeLast(); 2198 } 2199 2200 // Free any extra segments that were used by this region. 2201 while(nExtraSegments > 0) { 2202 freeSegment(); 2203 } 2204 2205 if(bookkeepIndex > regionIndex) { 2206 // Then there's something left to free. 2207 used = bookkeep[regionIndex].i - cast(size_t) space; 2208 } 2209 2210 bookkeepIndex = regionIndex - 4; 2211 regionIndex = bookkeep[regionIndex - 2].i; 2212 } 2213 } 2214 } 2215 2216 bool initialized() @property const pure nothrow @safe { 2217 return correctRegionIndex < size_t.max; 2218 } 2219 2220 Unqual!(ElementType!(R))[] arrayImplStack(R)(R range) { 2221 alias ElementType!(R) E; 2222 alias Unqual!(E) U; 2223 static if(hasLength!(R)) { 2224 U[] ret = uninitializedArray!(U[])(range.length); 2225 copy(range, ret); 2226 return ret; 2227 } else { 2228 mixin(getStackImplMixin); 2229 auto startPtr = allocate(0); 2230 size_t bytesCopied = 0; 2231 2232 while(!range.empty) { 2233 auto elem = range.front; 2234 if(impl.used + U.sizeof <= segmentSize) { 2235 range.popFront; 2236 *(cast(U*) (startPtr + bytesCopied)) = elem; 2237 bytesCopied += U.sizeof; 2238 impl.used += U.sizeof; 2239 } else { 2240 if(bytesCopied + U.sizeof >= segmentSize / 2) { 2241 // Then just heap-allocate. 2242 U[] result = (cast(U*) 2243 alignedMalloc(bytesCopied * 2, gcScanned)) 2244 [0..bytesCopied / U.sizeof * 2]; 2245 2246 immutable elemsCopied = bytesCopied / U.sizeof; 2247 result[0..elemsCopied] = (cast(U*) startPtr) 2248 [0..elemsCopied]; 2249 finishCopy(result, range, elemsCopied); 2250 freeLast(); 2251 impl.putLast(result.ptr); 2252 impl.nLargeObjects++; 2253 return result; 2254 } else { 2255 // Force allocation of a new segment. 2256 U[] oldData = (cast(U*) startPtr) 2257 [0..bytesCopied / U.sizeof]; 2258 impl.used -= bytesCopied; 2259 impl.bookkeepIndex--; 2260 U[] arr = uninitializedArray!(U[]) 2261 (bytesCopied / U.sizeof + 1); 2262 arr[0..oldData.length] = oldData[]; 2263 startPtr = impl.space; 2264 arr[$ - 1] = elem; 2265 bytesCopied += U.sizeof; 2266 range.popFront; 2267 } 2268 } 2269 } 2270 auto rem = bytesCopied % .alignBytes; 2271 if(rem != 0) { 2272 auto toAdd = .alignBytes - rem; 2273 if(impl.used + toAdd < RegionAllocator.segmentSize) { 2274 impl.used += toAdd; 2275 } else { 2276 impl.used = RegionAllocator.segmentSize; 2277 } 2278 } 2279 return (cast(U*) startPtr)[0..bytesCopied / U.sizeof]; 2280 } 2281 } 2282 2283 Unqual!(ElementType!(R))[] arrayImplHeap(R)(R range) { 2284 // Initial guess of how much space to allocate. It's relatively large 2285 // b/c the object will be short lived, so speed is more important than 2286 // space efficiency. 2287 enum initialGuess = 128; 2288 2289 mixin(getStackImplMixin); 2290 alias Unqual!(ElementType!R) E; 2291 auto arr = (cast(E*) alignedMalloc(E.sizeof * initialGuess, true)) 2292 [0..initialGuess]; 2293 2294 finishCopy(arr, range, 0); 2295 impl.putLast(arr.ptr); 2296 impl.nLargeObjects++; 2297 return arr; 2298 } 2299 2300 public: 2301 2302 this(this) { 2303 if(initialized) incrementRefCount(); 2304 } 2305 2306 ~this() { 2307 if(initialized) decrementRefCount(); 2308 } 2309 2310 void opAssign(RegionAllocator rhs) { 2311 if(initialized) decrementRefCount(); 2312 this.stack = rhs.stack; 2313 this.correctRegionIndex = rhs.correctRegionIndex; 2314 if(initialized) incrementRefCount(); 2315 } 2316 2317 /** 2318 Allocates $(D nBytes) bytes on the $(D RegionAllocatorStack) used by this 2319 $(D RegionAllocator) instance. The last block allocated from this 2320 $(D RegionAllocator) instance can be freed by calling 2321 $(D RegionAllocator.free) or $(D RegionAllocator.freeLast) or will be 2322 automatically freed when the last copy of this $(D RegionAllocator) 2323 instance goes out of scope. 2324 2325 Allocation requests larger than $(D segmentSize) are 2326 allocated directly on the C heap, are scanned by the GC iff 2327 the $(D RegionAllocatorStack) instance that this object uses is scanned by 2328 the GC, and are freed according to the same rules as described above. 2329 */ 2330 void* allocate(size_t nBytes) { 2331 mixin(getStackImplMixin); 2332 if(impl.regionIndex != this.correctRegionIndex) { 2333 throw new RegionAllocatorException( 2334 "Cannot allocate memory from a RegionAllocator that is not " ~ 2335 "currently at the top of the stack.", 2336 __FILE__, 2337 __LINE__ 2338 ); 2339 } 2340 2341 nBytes = allocSize(nBytes); 2342 with(*impl) { 2343 void* ret; 2344 if (segmentSize - used >= nBytes) { 2345 ret = space + used; 2346 used += nBytes; 2347 } else if (nBytes > segmentSize) { 2348 ret = alignedMalloc(nBytes, gcScanned); 2349 impl.nLargeObjects++; 2350 } else if (nFree > 0) { 2351 nExtraSegments++; 2352 inUse.push(Block(used, space)); 2353 space = freeList.pop; 2354 used = nBytes; 2355 nFree--; 2356 nBlocks++; 2357 ret = space; 2358 } else { // Allocate more space. 2359 nExtraSegments++; 2360 inUse.push(Block(used, space)); 2361 space = alignedMalloc(segmentSize, gcScanned); 2362 nBlocks++; 2363 used = nBytes; 2364 ret = space; 2365 } 2366 putLast(ret); 2367 return ret; 2368 } 2369 } 2370 2371 /** 2372 Frees the last block of memory allocated by the current 2373 $(D RegionAllocator). Throws a $(D RegionAllocatorException) if 2374 this $(D RegionAllocator) is not the most recently created still-existing 2375 $(D RegionAllocator) using its $(D RegionAllocatorStack) instance. 2376 */ 2377 void freeLast() { 2378 mixin(getStackImplMixin); 2379 if(impl.regionIndex != this.correctRegionIndex) { 2380 throw new RegionAllocatorException( 2381 "Cannot free memory to a RegionAllocator that is not " ~ 2382 "currently at the top of the stack, or memory that has not " ~ 2383 "been allocated with this instance.", 2384 __FILE__, 2385 __LINE__ 2386 ); 2387 } 2388 2389 with(*impl) { 2390 auto lastPos = bookkeep[--bookkeepIndex].p; 2391 2392 // Handle large blocks. 2393 if(lastPos > space + segmentSize || lastPos < space) { 2394 alignedFree(lastPos); 2395 impl.nLargeObjects--; 2396 return; 2397 } 2398 2399 used = (cast(size_t) lastPos) - (cast(size_t) space); 2400 if (nBlocks > 1 && used == 0) { 2401 impl.freeSegment(); 2402 } 2403 } 2404 } 2405 2406 /** 2407 Checks that $(D ptr) is a pointer to the block that would be freed by 2408 $(D freeLast) then calls $(D freeLast). Throws a 2409 $(D RegionAllocatorException) if the pointer does not point to the 2410 block that would be freed by $(D freeLast). 2411 */ 2412 void free(void* ptr) { 2413 mixin(getStackImplMixin); 2414 auto lastPos = impl.bookkeep[impl.bookkeepIndex - 1].p; 2415 if(ptr !is lastPos) { 2416 throw new RegionAllocatorException( 2417 "Cannot free RegionAllocator memory in non-LIFO order.", 2418 __FILE__, 2419 __LINE__ 2420 ); 2421 } 2422 2423 freeLast(); 2424 } 2425 2426 /** 2427 Attempts to resize a previously allocated block of memory in place. 2428 This is possible only if $(D ptr) points to the beginning of the last 2429 block allocated by this $(D RegionAllocator) instance and, in the 2430 case where $(D newSize) is greater than the old size, there is 2431 additional space in the segment that $(D ptr) was allocated from. 2432 Additionally, blocks larger than this $(D RegionAllocator)'s segment size 2433 cannot be grown or shrunk. 2434 2435 Returns: True if the block was successfully resized, false otherwise. 2436 */ 2437 bool resize(const(void)* ptr, size_t newSize) { 2438 mixin(getStackImplMixin); 2439 2440 // This works since we always allocate in increments of alignBytes 2441 // no matter what the allocation size. 2442 newSize = allocSize(newSize); 2443 2444 with(*impl) { 2445 auto lastPos = bookkeep[bookkeepIndex - 1].p; 2446 if(ptr !is lastPos) { 2447 return false; 2448 } 2449 2450 // If it's a large block directly allocated on the heap, 2451 // then we definitely can't resize it in place. 2452 if(lastPos > space + segmentSize || lastPos < space) { 2453 return false; 2454 } 2455 2456 immutable blockSize = used - ((cast(size_t) lastPos) - 2457 cast(size_t) space); 2458 immutable sizediff_t diff = newSize - blockSize; 2459 2460 if(cast(sizediff_t) (impl.segmentSize - used) >= diff) { 2461 used += diff; 2462 return true; 2463 } 2464 } 2465 2466 return false; 2467 } 2468 2469 /** 2470 Returns whether the $(D RegionAllocatorStack) used by this 2471 $(D RegionAllocator) instance is scanned by the garbage collector. 2472 */ 2473 bool gcScanned() @property const pure nothrow @safe { 2474 return stack.gcScanned; 2475 } 2476 2477 /**Allocates an array of type $(D T). $(D T) may be a multidimensional 2478 array. In this case sizes may be specified for any number of dimensions 2479 from 1 to the number in $(D T). 2480 2481 Examples: 2482 --- 2483 auto alloc = newRegionAllocator(); 2484 double[] arr = alloc.newArray!(double[])(100); 2485 assert(arr.length == 100); 2486 2487 double[][] matrix = alloc.newArray!(double[][])(42, 31); 2488 assert(matrix.length == 42); 2489 assert(matrix[0].length == 31); 2490 --- 2491 */ 2492 auto newArray(T, I...)(I sizes) 2493 if(allSatisfy!(isIntegral, I)) { 2494 2495 static void initialize(R)(R toInitialize) { 2496 static if(isArray!(ElementType!R)) { 2497 foreach(elem; toInitialize) { 2498 initialize(elem); 2499 } 2500 } else { 2501 toInitialize[] = ElementType!(R).init; 2502 } 2503 } 2504 2505 auto ret = uninitializedArray!(T, I)(sizes); 2506 initialize(ret); 2507 return ret; 2508 } 2509 2510 /** 2511 Same as $(D newArray), except skips initialization of elements for 2512 performance reasons. 2513 */ 2514 auto uninitializedArray(T, I...)(I sizes) 2515 if(allSatisfy!(isIntegral, I)) { 2516 static assert(sizes.length >= 1, 2517 "Cannot allocate an array without the size of at least the first " ~ 2518 " dimension."); 2519 static assert(sizes.length <= nDims!T, 2520 to!string(sizes.length) ~ " dimensions specified for a " ~ 2521 to!string(nDims!T) ~ " dimensional array."); 2522 2523 alias typeof(T.init[0]) E; 2524 2525 auto ptr = cast(E*) allocate(sizes[0] * E.sizeof); 2526 auto ret = ptr[0..sizes[0]]; 2527 2528 static if(sizes.length > 1) { 2529 foreach(ref elem; ret) { 2530 elem = uninitializedArray!(E)(sizes[1..$]); 2531 } 2532 } 2533 2534 return ret; 2535 } 2536 2537 /** 2538 Returns the number of bytes to which an allocation of size nBytes is 2539 guaranteed to be aligned. 2540 */ 2541 static size_t alignBytes(size_t nBytes) { 2542 return .alignBytes; 2543 } 2544 2545 /** 2546 Returns the number of bytes used to satisfy an allocation request 2547 of $(D nBytes). Will return a value greater than or equal to 2548 $(D nBytes) to account for alignment overhead. 2549 */ 2550 static size_t allocSize(size_t nBytes) pure nothrow { 2551 static assert(isPowerOf2(.alignBytes)); 2552 return (nBytes + (.alignBytes - 1)) & (~(.alignBytes - 1)); 2553 } 2554 2555 /** 2556 False because memory allocated by this allocator is not automatically 2557 reclaimed by the garbage collector. 2558 */ 2559 enum isAutomatic = false; 2560 2561 /** 2562 True because, when the last last copy of a $(RegionAllocator) instance 2563 goes out of scope, the memory it references is automatically freed. 2564 */ 2565 enum isScoped = true; 2566 2567 /** 2568 True because if memory is freed via $(D free()) instead of $(D freeLast()) 2569 then the pointer is checked for validity. 2570 */ 2571 enum freeIsChecked = true; 2572 2573 /** 2574 Returns the segment size of the $(D RegionAllocatorStack) used by this 2575 $(D RegionAllocator). 2576 */ 2577 size_t segmentSize() @property { 2578 mixin(getStackImplMixin); 2579 return impl.segmentSize; 2580 } 2581 2582 /** 2583 Returns the maximum number of bytes that may be allocated in the 2584 current segment. 2585 */ 2586 size_t segmentSlack() @property { 2587 mixin(getStackImplMixin); 2588 return impl.segmentSize - impl.used; 2589 } 2590 2591 /** 2592 Copies $(D range) to an array. The array will be located on the 2593 $(D RegionAllocator) stack if any of the following conditions apply: 2594 2595 1. $(D std.traits.hasIndirections!(ElementType!R)) is false. 2596 2597 2. $(D R) is a builtin array. In this case $(D range) maintains pointers 2598 to all elements at least until $(D array) returns, preventing the 2599 elements from being freed by the garbage collector. A similar 2600 assumption cannot be made for ranges other than builtin arrays. 2601 2602 3. The $(D RegionAllocatorStack) instance used by this 2603 $(D RegionAllocator) is scanned by the garbage collector. 2604 2605 If none of these conditions is met, the array is returned on the C heap 2606 and $(D GC.addRange) is called. In either case, $(D RegionAllocator.free), 2607 $(D RegionAllocator.freeLast), or the last copy of this $(D RegionAllocator) 2608 instance going out of scope will free the array as if it had been 2609 allocated on the $(D RegionAllocator) stack. 2610 2611 Rationale: The most common reason to call $(D array) on a builtin array is 2612 to modify its contents inside a function without affecting the 2613 caller's view. In this case $(D range) is not modified and 2614 prevents the elements from being freed by the garbage 2615 collector. Furthermore, if the copy returned does need 2616 to be scanned, the client can call $(D GC.addRange) before 2617 modifying the original array. 2618 2619 Examples: 2620 --- 2621 auto alloc = newRegionAllocator(); 2622 auto arr = alloc.array(iota(5)); 2623 assert(arr == [0, 1, 2, 3, 4]); 2624 --- 2625 */ 2626 Unqual!(ElementType!(R))[] array(R)(R range) if(isInputRange!R) { 2627 alias Unqual!(ElementType!(R)) E; 2628 if(gcScanned || !hasIndirections!E || isArray!R) { 2629 return arrayImplStack(range); 2630 } else { 2631 return arrayImplHeap(range); 2632 } 2633 } 2634 } 2635 2636 /** 2637 Returns a new $(D RegionAllocator) that uses the default thread-local 2638 $(D RegionAllocatorStack) instance. 2639 */ 2640 RegionAllocator newRegionAllocator() { 2641 return RegionAllocator(getThreadLocal()); 2642 } 2643 2644 // Finishes copying a range to a C heap allocated array. Assumes the first 2645 // half of the input array is stuff already copied and the second half is 2646 // free space. 2647 private void finishCopy(T, U)(ref T[] result, U range, size_t alreadyCopied) { 2648 void doRealloc() { 2649 auto newPtr = cast(T*) alignedRealloc( 2650 result.ptr, result.length * T.sizeof * 2, result.length * T.sizeof 2651 ); 2652 result = newPtr[0..result.length * 2]; 2653 } 2654 2655 auto index = alreadyCopied; 2656 foreach(elem; range) { 2657 if(index == result.length) doRealloc(); 2658 result[index++] = elem; 2659 } 2660 2661 result = result[0..index]; 2662 } 2663 2664 unittest { 2665 auto alloc = newRegionAllocator(); 2666 auto arr = alloc.array(iota(5)); 2667 assert(arr == [0, 1, 2, 3, 4]); 2668 2669 // Create quick and dirty finite but lengthless range. 2670 static struct Count { 2671 uint num; 2672 uint upTo; 2673 @property size_t front() { 2674 return num; 2675 } 2676 void popFront() { 2677 num++; 2678 } 2679 @property bool empty() { 2680 return num >= upTo; 2681 } 2682 } 2683 2684 alloc.allocate(1024 * 1024 * 3); 2685 Count count; 2686 count.upTo = 1024 * 1025; 2687 auto asArray = alloc.array(count); 2688 foreach(i, elem; asArray) { 2689 assert(i == elem, to!(string)(i) ~ "\t" ~ to!(string)(elem)); 2690 } 2691 assert(asArray.length == 1024 * 1025); 2692 alloc.freeLast(); 2693 alloc.freeLast(); 2694 2695 while(alloc.stack.impl.refCountedPayload.freeList.index > 0) { 2696 alignedFree(alloc.stack.impl.refCountedPayload.freeList.pop()); 2697 } 2698 } 2699 2700 unittest { 2701 auto alloc = newRegionAllocator(); 2702 double[] arr = alloc.uninitializedArray!(double[])(100); 2703 assert(arr.length == 100); 2704 2705 double[][] matrix = alloc.uninitializedArray!(double[][])(42, 31); 2706 assert(matrix.length == 42); 2707 assert(matrix[0].length == 31); 2708 2709 double[][] mat2 = alloc.newArray!(double[][])(3, 1); 2710 assert(mat2.length == 3); 2711 assert(mat2[0].length == 1); 2712 2713 import std.math; 2714 assert(isNaN(mat2[0][0])); 2715 } 2716 2717 unittest { 2718 /* Not a particularly good unittest in that it depends on knowing the 2719 * internals of RegionAllocator, but it's the best I could come up w/. This 2720 * is really more of a stress test/sanity check than a normal unittest.*/ 2721 2722 // Make sure state is completely reset. 2723 destroy(threadLocalStack.impl); 2724 threadLocalStack = RegionAllocatorStack.init; 2725 threadLocalInitialized = false; 2726 2727 // First test to make sure a large number of allocations does what it's 2728 // supposed to in terms of reallocing bookkeep[], etc. 2729 enum nIter = defaultSegmentSize * 5 / alignBytes; 2730 2731 { 2732 auto alloc = newRegionAllocator(); 2733 foreach(i; 0..nIter) { 2734 alloc.allocate(alignBytes); 2735 } 2736 assert(alloc.stack.impl.refCountedPayload.nBlocks == 5, 2737 to!string(alloc.stack.impl.refCountedPayload.nBlocks)); 2738 assert(alloc.stack.impl.refCountedPayload.nFree == 0); 2739 foreach(i; 0..nIter) { 2740 alloc.freeLast(); 2741 } 2742 assert(alloc.stack.impl.refCountedPayload.nBlocks == 1); 2743 assert(alloc.stack.impl.refCountedPayload.nFree == 2); 2744 2745 // Make sure logic for freeing excess blocks works. If it doesn't this 2746 // test will run out of memory. 2747 enum allocSize = defaultSegmentSize / 2; 2748 foreach(i; 0..50) { 2749 foreach(j; 0..50) { 2750 alloc.allocate(allocSize); 2751 } 2752 foreach(j; 0..50) { 2753 alloc.freeLast(); 2754 } 2755 } 2756 2757 // Make sure data is stored properly. 2758 foreach(i; 0..10) { 2759 alloc.allocate(allocSize); 2760 } 2761 foreach(i; 0..5) { 2762 alloc.freeLast(); 2763 } 2764 void* space = alloc.stack.impl.refCountedPayload.space; 2765 size_t used = alloc.stack.impl.refCountedPayload.used; 2766 2767 { 2768 auto alloc2 = newRegionAllocator(); 2769 auto arrays = new uint[][10]; 2770 2771 foreach(i; 0..10) { 2772 uint[] data = alloc2.uninitializedArray!(uint[])(250_000); 2773 foreach(j, ref e; data) { 2774 e = cast(uint) (j * (i + 1)); 2775 } 2776 arrays[i] = data; 2777 } 2778 2779 // Make stuff get overwrriten if blocks are getting GC'd when 2780 // they're not supposed to. 2781 GC.minimize; // Free up all excess pools. 2782 uint[][] foo; 2783 foreach(i; 0..40) { 2784 foo ~= new uint[1_048_576]; 2785 } 2786 foo = null; 2787 2788 for(size_t i = 9; i != size_t.max; i--) { 2789 foreach(j, e; arrays[i]) { 2790 assert(e == j * (i + 1)); 2791 } 2792 } 2793 } 2794 2795 assert(space == alloc.stack.impl.refCountedPayload.space, 2796 text(space, '\t', alloc.stack.impl.refCountedPayload.space)); 2797 assert(used == alloc.stack.impl.refCountedPayload.used, 2798 text(used, '\t', alloc.stack.impl.refCountedPayload.used)); 2799 while(alloc.stack.impl.refCountedPayload.nBlocks > 1 || 2800 alloc.stack.impl.refCountedPayload.used > 0) { 2801 alloc.freeLast(); 2802 } 2803 } 2804 2805 // Test that everything is really getting destroyed properly when 2806 // destroy() is called. If not then this test will run out of memory. 2807 foreach(i; 0..1000) { 2808 destroy(threadLocalStack.impl); 2809 threadLocalInitialized = false; 2810 2811 auto alloc = newRegionAllocator(); 2812 foreach(j; 0..1_000) { 2813 auto ptr = alloc.allocate(20_000); 2814 assert((cast(size_t) ptr) % alignBytes == 0); 2815 } 2816 2817 foreach(j; 0..500) { 2818 alloc.freeLast(); 2819 } 2820 } 2821 } 2822 2823 unittest { 2824 // Make sure the basics of using explicit stacks work. 2825 auto stack = RegionAllocatorStack(4 * 1024 * 1024, GCScan.no); 2826 auto alloc = stack.newRegionAllocator(); 2827 auto arr = alloc.array(iota(5)); 2828 assert(arr == [0, 1, 2, 3, 4]); 2829 auto ptr = alloc.allocate(5); 2830 2831 auto alloc2 = newRegionAllocator(); 2832 auto ptr2 = alloc2.allocate(5); 2833 auto ptr3 = alloc.allocate(5); 2834 } 2835 2836 unittest { 2837 // Make sure the stacks get freed properly when they go out of scope. 2838 // If they don't then this will run out of memory. 2839 foreach(i; 0..100_000) { 2840 auto stack = RegionAllocatorStack(4 * 1024 * 1024, GCScan.no); 2841 } 2842 } 2843 2844 unittest { 2845 // Make sure resize works properly. 2846 auto alloc = newRegionAllocator(); 2847 auto arr1 = alloc.array(iota(4)); 2848 auto res = alloc.resize(arr1.ptr, 8 * int.sizeof); 2849 assert(res); 2850 arr1 = arr1.ptr[0..8]; 2851 copy(iota(4, 8), arr1[4..$]); 2852 2853 auto arr2 = alloc.newArray!(int[])(8); 2854 2855 // If resizing resizes to something too small, this will have been 2856 // overwritten: 2857 assert(arr1 == [0, 1, 2, 3, 4, 5, 6, 7], text(arr1)); 2858 2859 alloc.free(arr2.ptr); 2860 auto res2 = alloc.resize(arr1.ptr, 4 * int.sizeof); 2861 assert(res2); 2862 arr2 = alloc.newArray!(int[])(8); 2863 2864 // This time the memory in arr1 should have been overwritten. 2865 assert(arr1 == [0, 1, 2, 3, 0, 0, 0, 0]); 2866 } 2867 2868 unittest { 2869 // Make sure that default thread-local stacks get freed properly at the 2870 // termination of a thread. If they don't then this will run out of 2871 // memory. 2872 2873 import core.thread; 2874 foreach(i; 0..100) { 2875 auto fun = { 2876 threadLocalSegmentSize = 100 * 1024 * 1024; 2877 newRegionAllocator(); 2878 }; 2879 2880 auto t = new Thread(fun); 2881 t.start(); 2882 t.join(); 2883 } 2884 } 2885 2886 unittest { 2887 // Make sure assignment works as advertised. 2888 RegionAllocator alloc; 2889 auto alloc2 = newRegionAllocator(); 2890 auto ptr = alloc2.allocate(8); 2891 alloc = alloc2; 2892 alloc.freeLast(); 2893 auto ptr2= alloc2.allocate(8); 2894 assert(ptr is ptr2); 2895 } 2896 2897 // Simple, fast stack w/o error checking. 2898 static struct SimpleStack(T) { 2899 private size_t capacity; 2900 private size_t index; 2901 private T* data; 2902 private enum sz = T.sizeof; 2903 2904 private static size_t max(size_t lhs, size_t rhs) pure nothrow { 2905 return (rhs > lhs) ? rhs : lhs; 2906 } 2907 2908 void push(T elem) { 2909 if (capacity == index) { 2910 capacity = max(16, capacity * 2); 2911 data = cast(T*) core.stdc.stdlib.realloc(data, capacity * sz); 2912 } 2913 data[index++] = elem; 2914 } 2915 2916 T pop() { 2917 index--; 2918 auto ret = data[index]; 2919 return ret; 2920 } 2921 2922 void destroy() { 2923 if(data) { 2924 core.stdc.stdlib.free(data); 2925 data = null; 2926 } 2927 } 2928 } 2929 2930 private void outOfMemory() { 2931 throw new OutOfMemoryError("Out of memory in RegionAllocator."); 2932 } 2933 2934 // Memory allocation routines. These wrap allocate(), free() and realloc(), 2935 // and guarantee alignment. 2936 private enum size_t alignBytes = 16; 2937 2938 private void* alignedMalloc(size_t size, bool shouldAddRange = false) { 2939 // We need (alignBytes - 1) extra bytes to guarantee alignment, 1 byte 2940 // to store the shouldAddRange flag, and ptrSize bytes to store 2941 // the pointer to the beginning of the block. 2942 void* toFree = core.stdc.stdlib.malloc( 2943 alignBytes + ptrSize + size 2944 ); 2945 2946 if(toFree is null) outOfMemory(); 2947 2948 // Add the offset for the flag and the base pointer. 2949 auto intPtr = cast(size_t) toFree + ptrSize + 1; 2950 2951 // Align it. 2952 intPtr = (intPtr + alignBytes - 1) & (~(alignBytes - 1)); 2953 auto ret = cast(void**) intPtr; 2954 2955 // Store base pointer. 2956 (cast(void**) ret)[-1] = toFree; 2957 2958 // Store flag. 2959 (cast(bool*) ret)[-1 - ptrSize] = shouldAddRange; 2960 2961 if(shouldAddRange) { 2962 GC.addRange(ret, size); 2963 } 2964 2965 return ret; 2966 } 2967 2968 private void alignedFree(void* ptr) { 2969 // If it was allocated with alignedMalloc() then the pointer to the 2970 // beginning is at ptr[-1]. 2971 auto addedRange = (cast(bool*) ptr)[-1 - ptrSize]; 2972 2973 if(addedRange) { 2974 GC.removeRange(ptr); 2975 } 2976 2977 core.stdc.stdlib.free( (cast(void**) ptr)[-1]); 2978 } 2979 2980 // This is used by RegionAllocator, but I'm not sure enough that its interface 2981 // isn't going to change to make it public and document it. 2982 private void* alignedRealloc(void* ptr, size_t newLen, size_t oldLen) { 2983 auto storedRange = (cast(bool*) ptr)[-1 - ptrSize]; 2984 auto newPtr = alignedMalloc(newLen, storedRange); 2985 memcpy(newPtr, ptr, oldLen); 2986 2987 alignedFree(ptr); 2988 return newPtr; 2989 } 2990 2991 private union SizetPtr { 2992 size_t i; 2993 void* p; 2994 }