1 /**
2 Stuff having to do with memory management.  Mostly a copy of RegionAllocator
3 for now until it gets into Phobos, as well as some RegionAllocator-specific
4 data structures.
5 
6 Author:  David Simcha*/
7  /*
8  * License:
9  * Boost Software License - Version 1.0 - August 17th, 2003
10  *
11  * Permission is hereby granted, free of charge, to any person or organization
12  * obtaining a copy of the software and accompanying documentation covered by
13  * this license (the "Software") to use, reproduce, display, distribute,
14  * execute, and transmit the Software, and to prepare derivative works of the
15  * Software, and to permit third-parties to whom the Software is furnished to
16  * do so, all subject to the following:
17  *
18  * The copyright notices in the Software and this entire statement, including
19  * the above license grant, this restriction and the following disclaimer,
20  * must be included in all copies of the Software, in whole or in part, and
21  * all derivative works of the Software, unless such copies or derivative
22  * works are solely in the form of machine-executable object code generated by
23  * a source language processor.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27  * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
28  * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
29  * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
30  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
31  * DEALINGS IN THE SOFTWARE.
32  */
33 
34 module dstats.alloc;
35 
36 import std.traits, core.memory, std.array, std.range, core.exception,
37     std.functional, std.math, std.algorithm : max;
38 
39 static import core.stdc.stdlib;
40 import core.stdc.string : memcpy;
41 
42 import dstats.base;
43 
44 version(unittest) {
45     import std.stdio, std.conv, std.random, dstats.sort;
46 }
47 
48 private template Appends(T, U) {
49     enum bool Appends = AppendsImpl!(T, U).ret;
50 }
51 
52 private template AppendsImpl(T, U) {
53     T[] a;
54     U b;
55     enum bool ret = is(typeof(a ~= b));
56 }
57 
58 ///Appends to an array, deleting the old array if it has to be realloced.
59 void appendDelOld(T, U)(ref T[] to, U from)
60 if(Appends!(T, U)) {
61     auto old = to;
62     to ~= from;
63     if (old.ptr !is to.ptr && old.ptr !is null) delete old;
64 }
65 
66 unittest {
67     uint[] foo;
68     foo.appendDelOld(5);
69     foo.appendDelOld(4);
70     foo.appendDelOld(3);
71     foo.appendDelOld(2);
72     foo.appendDelOld(1);
73     assert(foo == cast(uint[]) [5,4,3,2,1]);
74 }
75 
76 private struct SHNode(K, V) {
77     alias SHNode!(K, V) SomeType;
78     SomeType* next;
79     Unqual!(K) key;
80     Unqual!(V) val;
81 }
82 
83 /**Forward range struct for iterating over the keys or values of a
84  * StackHash or StackSet.  The lifetime of this object must not exceed that
85  * of the underlying StackHash or StackSet.*/
86 struct HashRange(K, S, bool vals = false) {
87 private:
88     S* set;
89     size_t index;
90     S.Node* next;
91     K* frontElem;
92     size_t _length;
93 
94     this(S* set) {
95         this.set = set;
96         if(set.rNext[0] == set.usedSentinel) {
97             this.popFront;
98         } else {
99             static if(vals) {
100                 frontElem = set.rVals.ptr;
101             } else {
102                 frontElem = set.rKeys.ptr;
103             }
104             next = set.rNext[0];
105         }
106         this._length = set.length;
107     }
108 
109 public:
110     ///
111     void popFront()
112     in {
113         assert(!empty);
114     } body {
115         this._length--;
116         if(next is null) {
117             do {
118                 index++;
119                 if(index >= set.rNext.length) {
120                     index = size_t.max;  // Sentinel for empty.
121                     return;
122                 }
123                 next = set.rNext[index];
124             } while(set.rNext[index] == set.usedSentinel);
125             static if(vals) {
126                 frontElem = &(set.rVals[index]);
127             } else {
128                 frontElem = &(set.rKeys[index]);
129             }
130         } else {
131             static if(vals) {
132                 frontElem = &(next.val);
133             } else {
134                 frontElem = &(next.key);
135             }
136             next = next.next;
137         }
138     }
139 
140     ///
141     static if(vals) {
142         @property ref Unqual!(K) front()
143         in {
144             assert(!empty);
145         } body {
146             return *frontElem;
147         }
148     } else {
149        @property Unqual!(K) front()
150        in {
151             assert(!empty);
152        } body {
153             return *frontElem;
154         }
155     }
156 
157     ///
158     @property bool empty() {
159         return index == size_t.max;
160     }
161 
162     ///
163     @property size_t length() {
164         return _length;
165     }
166 
167     ///
168     @property typeof(this) save() {
169         return this;
170     }
171 }
172 
173 /**A hash table that allocates its memory on RegionAllocator.  Good for building a
174  * temporary hash tables that will not escape the current scope.
175  *
176  * Examples:
177  * ---
178  * auto alloc = newRegionAllocator();
179  * auto ss = StackHash!(uint)(5, alloc);
180  * foreach(i; 0..5) {
181  *     ss[i]++;
182  * }
183  * assert(ss[3] == 1);
184  * ---
185  *
186  * Warning:
187  * This implementation places removed nodes on an internal free list and
188  * recycles them, since there is no way to delete RegionAllocator-allocated data
189  * in a non-LIFO order.  Therefore, you may not retain the address of a
190  * variable stored in a StackHash after deleting it from the StachHash.
191  * For example, DO NOT do this:
192  * ---
193  * SomeType* myPtr = &(myStackHash["foo"]);
194  * myStackHash.remove("foo");
195  * *myPtr = someValue;
196  * ---
197  */
198 struct StackHash(K, V) {
199 private:
200     alias SHNode!(K, V) Node;
201 
202     // Using parallel arrays instead of structs to save on alignment overhead:
203     Unqual!(K)[] rKeys;
204     Unqual!(V)[] rVals;
205     Unqual!(Node*)[] rNext;
206 
207     // Holds nodes that were deleted by remove().
208     Node** freeList;
209 
210     RegionAllocator alloc;
211     size_t _length;
212 
213     // Tries to allocate off the free list.  Otherwise allocates off
214     // RegionAllocator.
215     Node* allocNode() {
216         if(*freeList is null) {
217             return cast(Node*) alloc.allocate(Node.sizeof);
218         }
219         auto ret = *freeList;
220         *freeList = (*freeList).next;
221         return ret;
222     }
223 
224     // Add a removed node to the free list.
225     void pushFreeList(Node* node) {
226         if(*freeList is null) {
227             node.next = null;  // Sentinel
228             *freeList = node;
229         } else {
230             node.next = *freeList;
231             *freeList = node;
232         }
233     }
234 
235     // rNext.ptr is stored in elements of rNext as a sentinel to indicate
236     // that the corresponding slot is unused.
237     Node* usedSentinel() @property {
238         return cast(Node*) rNext.ptr;
239     }
240 
241     Node* newNode(K key) {
242         Node* ret = allocNode();
243         ret.key =  key;
244         ret.val =  V.init;
245         ret.next = null;
246         return ret;
247     }
248 
249     Node* newNode(K key, V val) {
250         Node* ret = allocNode();
251         ret.key =  key;
252         ret.val = val;
253         ret.next = null;
254         return ret;
255     }
256 
257     hash_t getHash(K key) {
258         static if(is(K : long) && K.sizeof <= hash_t.sizeof) {
259             hash_t hash = cast(hash_t) key;
260         } else static if(__traits(compiles, key.toHash())) {
261             hash_t hash = key.toHash();
262         } else {
263             hash_t hash = typeid(K).getHash(&key);
264         }
265         hash %= rNext.length;
266         return hash;
267     }
268 
269 
270 public:
271     /**Due to the nature of RegionAllocator, you must specify on object creation
272      * the approximate number of elements your table will have.  Too large a
273      * number will waste space and incur poor cache performance.  Too low a
274      * number will make this struct perform like a linked list.  Generally,
275      * if you're building a table from some other range, some fraction of the
276      * size of that range is a good guess.*/
277     this(size_t nElem, RegionAllocator alloc) {
278         // Obviously, the caller can never mean zero, because this struct
279         // can't work at all with nElem == 0, so assume it's a mistake and fix
280         // it here.
281         this.alloc = alloc;
282 
283         if(nElem == 0) nElem++;
284         rKeys = alloc.uninitializedArray!(K[])(nElem);
285         rVals = alloc.uninitializedArray!(V[])(nElem);
286 
287         // Allocate free list in same block with Node ptrs.  That's what the
288         // + 1 is for.
289         rNext = alloc.uninitializedArray!(Node*[])(nElem + 1);
290         freeList = &(rNext[$ - 1]);
291         *freeList = null;
292         rNext = rNext[0..$ - 1];
293 
294         foreach(ref rKey; rKeys) {
295             rKey =  K.init;
296         }
297         foreach(ref rVal; rVals) {
298             rVal = V.init;
299         }
300         foreach(ref r; rNext) {
301             r = usedSentinel;
302         }
303 
304 
305     }
306 
307     /**Index an element of the range.  If it does not exist, it will be created
308      * and initialized to V.init.*/
309     ref V opIndex(K key) {
310         hash_t hash = getHash(key);
311 
312         if(rNext[hash] == usedSentinel) {
313             rKeys[hash] =  key;
314             rNext[hash] = null;
315             _length++;
316             return rVals[hash];
317         } else if(rKeys[hash] == key) {
318             return rVals[hash];
319         } else {  // Collision.  Start chaining.
320             Node** next = &(rNext[hash]);
321             while(*next !is null) {
322                 if((**next).key ==  key) {
323                     return (**next).val;
324                 }
325                 next = &((**next).next);
326             }
327             *next = newNode(key);
328             _length++;
329             return (**next).val;
330         }
331     }
332 
333     ///
334     V opIndexAssign(V val, K key) {
335         hash_t hash = getHash(key);
336 
337         if(rNext[hash] == usedSentinel) {
338             rKeys[hash] =  key;
339             rVals[hash] = val;
340             rNext[hash] = null;
341             _length++;
342             return val;
343         } else if(rKeys[hash] ==  key) {
344             rVals[hash] = val;
345             return val;
346         } else {  // Collision.  Start chaining.
347             Node** next = &(rNext[hash]);
348             while(*next !is null) {
349                 if((**next).key == key) {
350                     (**next).val = val;
351                     return val;
352                 }
353                 next = &((**next).next);
354             }
355             _length++;
356             *next = newNode(key, val);
357             return val;
358         }
359     }
360 
361     ///
362     V* opIn_r(K key) {
363         hash_t hash = getHash(key);
364 
365         if(rNext[hash] == usedSentinel) {
366             return null;
367         } else if(rKeys[hash] == key) {
368             return &(rVals[hash]);
369         } else {  // Collision.  Start chaining.
370             Node* next = rNext[hash];
371             while(next !is null) {
372                 if(next.key == key) {
373                     return &(next.val);
374                 }
375                 next = next.next;
376             }
377             return null;
378         }
379    }
380 
381     ///
382     void remove(K key) {
383         hash_t hash = getHash(key);
384 
385         Node** next = &(rNext[hash]);
386         if(rNext[hash] == usedSentinel) {
387             return;
388         } else if(rKeys[hash] == key) {
389             _length--;
390             if(rNext[hash] is null) {
391                 rKeys[hash] = K.init;
392                 rVals[hash] = V.init;
393                 rNext[hash] = usedSentinel;
394                 return;
395             } else {
396                 Node* toPush = *next;
397 
398                 rKeys[hash] = (**next).key;
399                 rVals[hash] = (**next).val;
400                 rNext[hash] = (**next).next;
401 
402                 pushFreeList(toPush);
403                 return;
404             }
405         } else {  // Collision.  Start chaining.
406             while(*next !is null) {
407                 if((**next).key == key) {
408                     _length--;
409 
410                     Node* toPush = *next;
411                     *next = (**next).next;
412 
413                     pushFreeList(toPush);
414                     break;
415                 }
416                 next = &((**next).next);
417             }
418             return;
419         }
420    }
421 
422     /**Returns a forward range to iterate over the keys of this table.
423      * The lifetime of this range must not exceed the lifetime of this
424      * StackHash.*/
425     auto keys() {
426         return HashRange!(K, StackHash!(K, V))(&this);
427     }
428 
429     /**Returns a forward range to iterate over the values of this table.
430      * The lifetime of this range must not exceed the lifetime of this
431      * StackHash.*/
432     auto values() {
433        return HashRange!(V, StackHash!(K, V), true)(&this);
434     }
435 
436     ///
437     @property size_t length() const {
438         return _length;
439     }
440 
441     /**
442     Attempt to look up a key and return a default value if the key is not
443     present.
444     */
445     V get(K key, lazy V defaultValue) {
446         auto ptr = key in this;
447         if(ptr) return *ptr;
448         return defaultValue;
449     }
450 
451     int opApply(int delegate(ref K, ref V) dg) {
452         auto k = this.keys;
453         auto v = this.values;
454         int res;
455 
456         while(!k.empty) {
457             auto kFront = k.front;
458             res = dg(kFront, v.front);
459             k.popFront;
460             v.popFront;
461             if(res) {
462                 break;
463             }
464         }
465 
466         return res;
467     }
468 
469     real efficiency() {
470        uint used = 0;
471        foreach(root; rNext) {
472            if(root != usedSentinel) {
473                used++;
474            }
475        }
476        return cast(real) used / rNext.length;
477     }
478 }
479 
480 unittest {
481     alias StackHash!(string, uint) mySh;
482 
483     {  // Basic sanity checks.
484         auto alloc = newRegionAllocator();
485         auto data = mySh(2, alloc);  // Make sure we get some collisions.
486         data["foo"] = 1;
487         data["bar"] = 2;
488         data["baz"] = 3;
489         data["waldo"] = 4;
490         assert(!("foobar" in data));
491         assert(*("foo" in data) == 1);
492         assert(*("bar" in data) == 2);
493         assert(*("baz" in data) == 3);
494         assert(*("waldo" in data) == 4);
495         assert(data["foo"] == 1);
496         assert(data["bar"] == 2);
497         assert(data["baz"] == 3);
498         assert(data["waldo"] == 4);
499         auto myKeys = array(data.keys);
500         qsort(myKeys);
501         assert(myKeys == cast(string[]) ["bar", "baz", "foo", "waldo"]);
502         auto myValues = array(data.values);
503         qsort(myValues);
504         assert(myValues == [1U, 2, 3, 4]);
505         {
506             auto k = data.keys;
507             auto v = data.values;
508             while(!k.empty) {
509                 assert(data[k.front] == v.front);
510                 k.popFront;
511                 v.popFront;
512             }
513         }
514         foreach(v; data.values) {
515             assert(v > 0 && v < 5);
516         }
517     }
518 
519     alias StackHash!(uint, uint) mySh2;
520     {   // Test remove.
521         auto alloc = newRegionAllocator();
522 
523         auto foo = mySh2(7, alloc);
524         for(uint i = 0; i < 200; i++) {
525             foo[i] = i;
526         }
527         assert(foo.length == 200);
528         for(uint i = 0; i < 200; i += 2) {
529             foo.remove(i);
530         }
531         foreach(i; 20..200) {
532             foo.remove(i);
533         }
534         for(uint i = 0; i < 20; i++) {
535             if(i & 1) {
536                 assert(i in foo);
537                 assert(*(i in foo) == i);
538             } else {
539                 assert(!(i in foo));
540             }
541         }
542         auto vals = array(foo.values);
543         assert(foo.length == 10);
544         assert(vals.qsort == [1U, 3, 5, 7, 9, 11, 13, 15, 17, 19]);
545     }
546 
547     { // Monte carlo unittesting against builtin hash table.
548         auto alloc = newRegionAllocator();
549         uint[uint] builtin;
550         auto monteSh = mySh2(20_000, alloc);
551         uint[] nums = alloc.uninitializedArray!(uint[])(100_000);
552         foreach(ref num; nums) {
553             num = uniform(0U, uint.max);
554         }
555 
556         foreach(i; 0..1_000_000) {
557             auto index = uniform(0, cast(uint) nums.length);
558             if(index in builtin) {
559                 assert(index in monteSh);
560                 assert(builtin[index] == nums[index]);
561                 assert(monteSh[index] == nums[index]);
562                 builtin.remove(index);
563                 monteSh.remove(index);
564             } else {
565                 assert(!(index in monteSh));
566                 builtin[index] = nums[index];
567                 monteSh[index] = nums[index];
568             }
569         }
570 
571         assert(builtin.length == monteSh.length);
572         foreach(k, v; builtin) {
573             assert(k in monteSh);
574             assert(*(k in builtin) == *(k in monteSh));
575             assert(monteSh[k] == v);
576         }
577 
578         // Make sure nothing is missed in iteration.  Since both keys and
579         // values use the same struct, just with a few static if statements,
580         // if it works for keys and simple tests work for values, it works.
581         foreach(k; monteSh.keys) {
582             builtin.remove(k);
583         }
584         assert(builtin.length == 0);
585 
586     }
587 }
588 
589 /**A hash set that allocates its memory on RegionAllocator.  Good for building a
590  * temporary set that will not escape the current scope.
591  *
592  * Examples:
593  * ---
594  * auto alloc = newRegionAllocator();
595  * auto ss = StackSet!(uint)(5, alloc);
596  * foreach(i; 0..5) {
597  *     ss.insert(i);
598  * }
599  * assert(3 in ss);
600  * ---
601  */
602 struct StackSet(K) {
603 private:
604     // Choose smallest representation of the data.
605     struct Node1 {
606         Node1* next;
607         K key;
608     }
609 
610     struct Node2 {
611         K key;
612         Node2* next;
613     }
614 
615     static if(Node1.sizeof < Node2.sizeof) {
616         alias Node1 Node;
617     } else {
618         alias Node2 Node;
619     }
620 
621     Unqual!(K)[] rKeys;
622     Node*[] rNext;
623 
624     Node** freeList;
625     size_t _length;
626     RegionAllocator alloc;
627 
628     Node* usedSentinel() {
629         return cast(Node*) rNext.ptr;
630     }
631 
632     // Tries to allocate off the free list.  Otherwise allocates off
633     // RegionAllocator.
634     Node* allocNode() {
635         if(*freeList is null) {
636             return cast(Node*) alloc.allocate(Node.sizeof);
637         }
638         auto ret = *freeList;
639         *freeList = (*freeList).next;
640         return ret;
641     }
642 
643     // Add a removed node to the free list.
644     void pushFreeList(Node* node) {
645         if(*freeList is null) {
646             node.next = null;  // Sentinel
647             *freeList = node;
648         } else {
649             node.next = *freeList;
650             *freeList = node;
651         }
652     }
653 
654     Node* newNode(K key) {
655         Node* ret = allocNode();
656         ret.key = key;
657         ret.next = null;
658         return ret;
659     }
660 
661     hash_t getHash(K key) {
662         static if(is(K : long) && K.sizeof <= hash_t.sizeof) {
663             hash_t hash = cast(hash_t) key;
664         } else static if(__traits(compiles, key.toHash())) {
665             hash_t hash = key.toHash();
666         } else {
667             hash_t hash = typeid(K).getHash(&key);
668         }
669         hash %= rNext.length;
670         return hash;
671     }
672 
673 public:
674     /**Due to the nature of RegionAllocator, you must specify on object creation
675      * the approximate number of elements your set will have.  Too large a
676      * number will waste space and incur poor cache performance.  Too low a
677      * number will make this struct perform like a linked list.  Generally,
678      * if you're building a set from some other range, some fraction of the
679      * size of that range is a good guess.*/
680     this(size_t nElem, RegionAllocator alloc) {
681         this.alloc = alloc;
682 
683         // Obviously, the caller can never mean zero, because this struct
684         // can't work at all with nElem == 0, so assume it's a mistake and fix
685         // it here.
686         if(nElem == 0) nElem++;
687 
688         // Allocate the free list as the last element of rNext.
689         rNext = alloc.uninitializedArray!(Node*[])(nElem + 1);
690         freeList = &(rNext[$ - 1]);
691         *freeList = null;
692         rNext = rNext[0..$ - 1];
693 
694         foreach(ref root; rNext) {
695             root = usedSentinel;
696         }
697 
698         rKeys = alloc.uninitializedArray!(Unqual!(K)[])(nElem);
699         foreach(ref root; rKeys) {
700             root = K.init;
701         }
702     }
703 
704     ///
705     void insert(K key) {
706         hash_t hash = getHash(key);
707 
708         if(rNext[hash] == usedSentinel) {
709             rKeys[hash] = key;
710             rNext[hash] = null;
711             _length++;
712             return;
713         } else if(rKeys[hash] == key) {
714             return;
715         } else {  // Collision.  Start chaining.
716             Node** next = &(rNext[hash]);
717             while(*next !is null) {
718                 if((**next).key == key) {
719                     return;
720                 }
721                 next = &((**next).next);
722             }
723             *next = newNode(key);
724             _length++;
725             return;
726         }
727     }
728 
729     /**Returns a forward range of the elements of this struct.  The range's
730      * lifetime must not exceed the lifetime of this object.*/
731     auto elems() {
732         return HashRange!(K, typeof(this))(&this);
733     }
734 
735     ///
736     bool opIn_r(K key) {
737         hash_t hash = getHash(key);
738 
739         if(rNext[hash] == usedSentinel) {
740             return false;
741         } else if(rKeys[hash] == key) {
742             return true;
743         } else {  // Collision.  Start chaining.
744             Node* next = rNext[hash];
745             while(next !is null) {
746                 if(next.key == key) {
747                     return true;
748                 }
749                 next = next.next;
750             }
751             return false;
752         }
753    }
754 
755     ///
756     void remove(K key) {
757         hash_t hash = getHash(key);
758 
759         Node** next = &(rNext[hash]);
760         if(rNext[hash] == usedSentinel) {
761             return;
762         } else if(rKeys[hash] == key) {
763             _length--;
764             if(rNext[hash] is null) {
765                 rKeys[hash] = K.init;
766                 rNext[hash] = usedSentinel;
767                 return;
768             } else {
769                 Node* toPush = *next;
770 
771                 rKeys[hash] = (**next).key;
772                 rNext[hash] = (**next).next;
773 
774                 pushFreeList(toPush);
775                 return;
776             }
777         } else {  // Collision.  Start chaining.
778             while(*next !is null) {
779                 if((**next).key == key) {
780                     _length--;
781                     Node* toPush = *next;
782 
783                     *next = (**next).next;
784                     pushFreeList(toPush);
785                     break;
786                 }
787                 next = &((**next).next);
788             }
789             return;
790         }
791    }
792 
793     ///
794     @property size_t length() {
795        return _length;
796     }
797 }
798 
799 unittest {
800     { // "Normal" unittesting.
801         auto alloc = newRegionAllocator();
802         alias StackSet!(uint) mySS;
803         mySS set = mySS(12, alloc);
804         foreach(i; 0..20) {
805             set.insert(i);
806         }
807         assert(array(set.elems).qsort == seq(0U, 20U));
808 
809         for(uint i = 0; i < 20; i += 2) {
810             set.remove(i);
811         }
812 
813         foreach(i; 0..20) {
814             if(i & 1) {
815                 assert(i in set);
816             } else {
817                 assert(!(i in set));
818             }
819         }
820         uint[] contents;
821 
822         foreach(elem; set.elems) {
823             contents ~= elem;
824         }
825         assert(contents.qsort == [1U,3,5,7,9,11,13,15,17,19]);
826     }
827 
828     { // Monte carlo unittesting against builtin hash table.
829         auto alloc = newRegionAllocator();
830         bool[uint] builtin;
831         auto monteSh = StackSet!uint(20_000, alloc);
832 
833         foreach(i; 0..1_000_000) {
834             auto index = uniform(0, 100_000);
835             if(index in builtin) {
836                 assert(index in monteSh);
837                 builtin.remove(index);
838                 monteSh.remove(index);
839             } else {
840                 assert(!(index in monteSh));
841                 builtin[index] = 1;
842                 monteSh.insert(index);
843             }
844         }
845 
846         assert(builtin.length == monteSh.length);
847         foreach(k, v; builtin) {
848             assert(k in monteSh);
849         }
850 
851         foreach(k; monteSh.elems) {
852             builtin.remove(k);
853         }
854         assert(builtin.length == 0);
855     }
856 }
857 
858 private int height(T)(const T node) nothrow {
859     return (node is null) ? 0 : node.height;
860 }
861 
862 struct AVLNodeRealHeight(T) {
863     T payload;
864     typeof(this)* left;
865     typeof(this)* right;
866     int height;
867 
868     int balance() const nothrow @property {
869         return .height(left) - .height(right);
870     }
871 
872     void fixHeight() nothrow {
873         auto leftHeight = .height(left);
874         auto rightHeight = .height(right);
875 
876         height = ((leftHeight > rightHeight) ? leftHeight : rightHeight) + 1;
877     }
878 
879     bool isLeaf() nothrow @property {
880         return left is null && right is null;
881     }
882 }
883 
884 /* Store the height in the low order bits of the pointers to save space,
885  * since RegionAllocator allocates 16-byte aligned memory anyhow, but only if
886  * this would be smaller after considering alignment.
887  */
888 struct AVLNodeBitwise(T) {
889     T payload;
890     size_t _left;
891     size_t _right;
892 
893     enum size_t mask = 0b1111;
894     enum size_t notMask = ~mask;
895 
896     typeof(this)* left() nothrow @property {
897         return cast(typeof(return)) (_left & notMask);
898     }
899 
900     const(typeof(this))* left() const nothrow @property {
901         return cast(typeof(return)) (_left & notMask);
902     }
903 
904     void left(typeof(this)* newLeft) nothrow @property
905     in {
906         assert((cast(size_t) newLeft & mask) == 0);
907     } body {
908         _left &= mask;
909         _left |= cast(size_t) newLeft;
910         assert(left is newLeft);
911     }
912 
913     typeof(this)* right() nothrow @property {
914         return cast(typeof(return)) (_right & notMask);
915     }
916 
917     const(typeof(this))* right() const nothrow @property {
918         return cast(typeof(return)) (_right & notMask);
919     }
920 
921     void right(typeof(this)* newRight) nothrow @property
922     in {
923         assert((cast(size_t) newRight & mask) == 0);
924     } body {
925         _right &= mask;
926         _right |= cast(size_t) newRight;
927         assert(right is newRight);
928     }
929 
930     int height() const nothrow @property {
931         return (((_left & mask) << 4) |
932                     (_right & mask));
933     }
934 
935     void height(int newHeight) nothrow @property {
936         _right &= notMask;
937         _right |= (newHeight & mask);
938         newHeight >>= 4;
939         _left &= notMask;
940         _left |= (newHeight & mask);
941     }
942 
943     int balance() const nothrow @property {
944         return .height(left) - .height(right);
945     }
946 
947     void fixHeight() nothrow {
948         auto leftHeight = .height(left);
949         auto rightHeight = .height(right);
950 
951         height = ((leftHeight > rightHeight) ? leftHeight : rightHeight) + 1;
952     }
953 
954     bool isLeaf() const nothrow @property {
955         return left is null && right is null;
956     }
957 }
958 
959 private template GetAligned(uint size) {
960     static if(size % RegionAllocator.alignBytes(size) == 0) {
961         enum GetAligned = 0;
962     } else {
963         enum GetAligned =
964             size - size % RegionAllocator.alignBytes(size) +
965                 RegionAllocator.alignBytes(size);
966     }
967 }
968 
969 /**An AVL tree implementation on top of RegionAllocator.  If elements are removed,
970  * they are stored on an internal free list and recycled when new elements
971  * are added to the tree.
972  *
973  * Template paramters:
974  *
975  * T = The type to be stored in the tree.
976  *
977  * key = Function to access the key that what you're storing is to be compared
978  *       on.
979  *
980  * compFun = The function for comparing keys.
981  *
982  * Examples:
983  * ---
984  * struct StringNum {
985  *     string someString;
986  *     uint num;
987  * }
988  *
989  * // Create a StackTree of StringNums, sorted in descending order, using
990  * // someString for comparison.
991  * auto alloc = newRegionAllocator();
992  * auto myTree = StackTree!(StringNum, "a.someString", "a > b")(alloc);
993  *
994  * // Add some elements.
995  * myTree.insert( StringNum("foo", 1));
996  * myTree.insert( StringNum("bar", 2));
997  * myTree.insert( StringNum("foo", 3));
998  *
999  * assert(myTree.find("foo") == StringNum("foo", 3));
1000  * assert(myTree.find("bar") == StringNum("bar", 2));
1001  * ---
1002  *
1003  * Note:  This tree supports a compile-time interface similar to StackSet
1004  * and can be used as a finite set implementation.
1005  *
1006  * Warning:
1007  * This implementation places removed nodes on an internal free list and
1008  * recycles them, since there is no way to delete RegionAllocator-allocated data
1009  * in a non-LIFO order.  Therefore, you may not retain the address of a
1010  * variable stored in a StackTree after deleting it from the StackTree.
1011  * For example, DO NOT do this:
1012  * ---
1013  * SomeType* myPtr = "foo" in myTree;
1014  * myTree.remove("foo");
1015  * *myPtr = someValue;
1016  * ---
1017  */
1018 struct StackTree(T, alias key = "a", alias compFun = "a < b") {
1019 private:
1020 
1021     alias AVLNodeBitwise!(T) BitwiseNode;
1022     alias AVLNodeRealHeight!(T) RealNode;
1023 
1024     enum size_t bitSize = GetAligned!(BitwiseNode.sizeof);
1025     enum size_t realHeightSize = GetAligned!(RealNode.sizeof);
1026 
1027     static if(bitSize < realHeightSize ) {
1028         alias AVLNodeBitwise!(T) Node;
1029     } else {
1030         alias AVLNodeRealHeight!(T) Node;
1031     }
1032 
1033     alias binaryFun!(compFun) comp;
1034     alias unaryFun!(key) getKey;
1035 
1036     Node* head;
1037     Node** freeList;
1038     size_t _length;
1039     RegionAllocator alloc;
1040 
1041     static bool insertComp(T lhs, T rhs) {
1042         return comp( getKey(lhs), getKey(rhs));
1043     }
1044 
1045     static Node* rotateRight(Node* node)
1046     in {
1047         assert(node.left !is null);
1048         assert( abs(node.balance) <= 2);
1049 
1050     } body {
1051         Node* newHead = node.left;
1052         node.left = newHead.right;
1053         newHead.right = node;
1054 
1055         node.fixHeight();
1056         newHead.fixHeight();
1057 
1058         assert( abs(node.balance) < 2);
1059         return newHead;
1060     }
1061 
1062     static Node* rotateLeft(Node* node)
1063     in {
1064         assert(node.right !is null);
1065         assert( abs(node.balance) <= 2);
1066     } body {
1067         Node* newHead = node.right;
1068         node.right = newHead.left;
1069         newHead.left = node;
1070 
1071         node.fixHeight();
1072         newHead.fixHeight();
1073 
1074         assert( abs(node.balance) < 2);
1075         return newHead;
1076     }
1077 
1078     static Node* rebalance(Node* node)
1079     in {
1080         assert(node is null || abs(node.balance) <= 2);
1081     } out(ret) {
1082         assert( abs(ret.balance) < 2);
1083     } body {
1084         if(node is null) {
1085             return null;
1086         }
1087 
1088         immutable balance = node.balance;
1089         if(abs(balance) <= 1) {
1090             return node;
1091         }
1092 
1093         if(balance == -2) {
1094 
1095             // It should be impossible to have a balance factor of -2 if
1096             // node.right is null.
1097             assert(node.right !is null);
1098             immutable rightBalance = node.right.balance;
1099             assert( abs(rightBalance) < 2);
1100 
1101             if(rightBalance == 1) {
1102                 node.right = rotateRight(node.right);
1103                 node.fixHeight();
1104             }
1105 
1106             assert(node.balance == -2);
1107             return rotateLeft(node);
1108 
1109         } else if(balance == 2) {
1110             // It should be impossible to have a balance factor of 2 if
1111             // node.left is null.
1112             assert(node.left !is null);
1113             immutable leftBalance = node.left.balance;
1114             assert( abs(leftBalance) < 2);
1115 
1116             if(leftBalance == -1) {
1117                 node.left = rotateLeft(node.left);
1118                 node.fixHeight();
1119             }
1120 
1121             assert(node.balance == 2);
1122             return rotateRight(node);
1123         }
1124 
1125         // AVL tree invariant is that abs(balance) <= 2 even during
1126         // insertion/deletion.
1127         assert(0);
1128     }
1129 
1130     void pushFreeList(Node* node) {
1131         node.left = null;
1132         node.right = *freeList;
1133         *freeList = node;
1134     }
1135 
1136     Node* popFreeList()
1137     in {
1138         assert(freeList);
1139         assert(*freeList);
1140     } body {
1141         auto ret = *freeList;
1142         *freeList = ret.right;
1143         return ret;
1144     }
1145 
1146     Node* newNode(T payload)
1147     in {
1148         assert(freeList, "Uninitialized StackTree!(" ~ T.stringof ~ ")");
1149     } body {
1150         Node* ret;
1151         if(*freeList !is null) {
1152             ret = popFreeList();
1153         } else {
1154             ret = cast(Node*) alloc.allocate(Node.sizeof);
1155         }
1156 
1157         ret.payload = payload;
1158         ret.left = null;
1159         ret.right = null;
1160         ret.height = 1;
1161         return ret;
1162     }
1163 
1164 public:
1165     ///
1166     this(RegionAllocator alloc) {
1167         this.alloc = alloc;
1168         this.freeList = alloc.uninitializedArray!(Node*[])(1).ptr;
1169         *(this.freeList) = null;
1170     }
1171 
1172     /**Insert an element.*/
1173     void insert(T toInsert) {
1174         if(head is null) {
1175             head = newNode(toInsert);
1176             _length++;
1177         } else {
1178             head = insertImpl(toInsert, head);
1179         }
1180     }
1181 
1182     Node* insertImpl(T toInsert, Node* insertInto) {
1183         if( insertComp(toInsert, insertInto.payload) ) {
1184             if(insertInto.left is null) {
1185                 insertInto.left = newNode(toInsert);
1186                 _length++;
1187             } else {
1188                 insertInto.left = insertImpl(toInsert, insertInto.left);
1189             }
1190         } else if( insertComp(insertInto.payload, toInsert) ) {
1191             if(insertInto.right is null) {
1192                 insertInto.right = newNode(toInsert);
1193                 _length++;
1194             } else {
1195                 insertInto.right = insertImpl(toInsert, insertInto.right);
1196             }
1197         } else {
1198             // This is correct:  If the comparison key is only part of the
1199             // payload, the old payload may not be equal to the new payload,
1200             // even if the comparison keys are equal.
1201             insertInto.payload = toInsert;
1202             return insertInto;
1203         }
1204 
1205         insertInto.fixHeight();
1206         return rebalance(insertInto);
1207     }
1208 
1209     /**Remove an element from this tree.  The type of U is expected to be the
1210      * type of the key that this tree is sorted on.
1211      */
1212     void remove(U)(U whatToRemove) {
1213         Node* removedNode;
1214         Node* leftMost;
1215 
1216         Node* removeLeftMost(Node* node) {
1217             if(node.left is null) {
1218                 auto ret = node.right;
1219                 node.right = null;
1220                 leftMost = node;
1221                 return ret;
1222             }
1223 
1224             node.left = removeLeftMost(node.left);
1225             node.fixHeight();
1226             return rebalance(node);
1227         }
1228 
1229         Node* removeSuccessor(Node* node) {
1230             if(node.right is null) {
1231                 assert(node.left.isLeaf);
1232                 leftMost = node.left;
1233 
1234                 node.left = null;
1235                 return node;
1236             }
1237 
1238             node.right = removeLeftMost(node.right);
1239             node.fixHeight();
1240             return node;
1241         }
1242 
1243         Node* removeImpl(U whatToRemove, Node* whereToRemove) {
1244             static bool findComp(V, W)(V lhs, W rhs) {
1245                 static if(is(V == T)) {
1246                     static assert(is(W == U));
1247                     return comp( getKey(lhs), rhs);
1248                 } else {
1249                     static assert(is(V == U));
1250                     static assert(is(W == T));
1251                     return comp(lhs, getKey(rhs) );
1252                 }
1253             }
1254 
1255             if(whereToRemove is null) {
1256                 return null;
1257             }
1258 
1259             if( findComp(whatToRemove, whereToRemove.payload) ){
1260                 whereToRemove.left = removeImpl(whatToRemove, whereToRemove.left);
1261                 whereToRemove.fixHeight();
1262                 return rebalance(whereToRemove);
1263             } else if( findComp(whereToRemove.payload, whatToRemove) ) {
1264                 whereToRemove.right = removeImpl(whatToRemove, whereToRemove.right);
1265                 whereToRemove.fixHeight();
1266                 return rebalance(whereToRemove);
1267             } else {
1268                 // We've found it.
1269                 _length--;
1270                 removedNode = whereToRemove;
1271                 if(whereToRemove.isLeaf) {
1272                     return null;
1273                 }
1274 
1275                 whereToRemove = removeSuccessor(whereToRemove);
1276                 if(leftMost is null) {
1277                     return null;
1278                 }
1279 
1280                 leftMost.left = whereToRemove.left;
1281                 leftMost.right = whereToRemove.right;
1282                 leftMost.fixHeight();
1283                 return rebalance(leftMost);
1284             }
1285         }
1286 
1287         head = removeImpl(whatToRemove, head);
1288 
1289         debug(EXPENSIVE) assertAvl(head);
1290 
1291         if(removedNode !is null) {
1292             pushFreeList(removedNode);
1293         }
1294     }
1295 
1296     /**Find an element and return it.  Throw an exception if it is not
1297      * present.  U is expected to be the type of the key that this tree is
1298      * sorted on.*/
1299     T find(U)(U whatToFind) {
1300         T* ptr = dstatsEnforce( opIn_r!(U)(whatToFind),
1301             "Item not found:  " ~ to!string(whatToFind));
1302         return *ptr;
1303     }
1304 
1305     /**Find an element and return a pointer to it, or null if not present.*/
1306     T* opIn_r(U)(U whatToFind) {
1307         auto ret = findImpl!(U)(whatToFind, head);
1308         if(ret is null) {
1309             return null;
1310         }
1311         return &(ret.payload);
1312     }
1313 
1314     Node* findImpl(U)(U whatToFind, Node* whereToFind) {
1315         static bool findComp(V, W)(V lhs, W rhs) {
1316             static if(is(V == T)) {
1317                 static assert(is(W == U));
1318                 return comp( getKey(lhs), rhs );
1319             } else {
1320                 static assert(is(V == U));
1321                 static assert(is(W == T));
1322                 return comp( lhs, getKey(rhs) );
1323             }
1324         }
1325 
1326         if(whereToFind is null) {
1327             return null;
1328         }
1329 
1330         if( findComp(whatToFind, whereToFind.payload) ){
1331             return findImpl!(U)(whatToFind, whereToFind.left);
1332         } else if( findComp(whereToFind.payload, whatToFind) ) {
1333             return findImpl!(U)(whatToFind, whereToFind.right);
1334         } else {
1335             // We've found it.
1336             return whereToFind;
1337         }
1338 
1339         assert(0);
1340     }
1341 
1342     /**Iterate over the elements of this tree in sorted order.*/
1343     int opApply( int delegate(ref T) dg) {
1344         int res;
1345         int opApplyImpl(Node* node) {
1346             if(node is null) {
1347                 return 0;
1348             }
1349             res = opApplyImpl(node.left);
1350             if(res) {
1351                 return res;
1352             }
1353             res = dg(node.payload);
1354             if(res) {
1355                 return res;
1356             }
1357             res = opApplyImpl(node.right);
1358             return res;
1359         }
1360 
1361         return opApplyImpl(head);
1362     }
1363 
1364     /**Number of elements in the tree.*/
1365     @property size_t length() const pure nothrow {
1366         return _length;
1367     }
1368 }
1369 
1370 private int assertAvl(T)(T node) {
1371     if(node is null) {
1372         return 0;
1373     }
1374 
1375     int leftHeight = assertAvl(node.left);
1376     int rightHeight = assertAvl(node.right);
1377     assert(node.height == max(leftHeight, rightHeight) + 1);
1378     assert( abs(node.balance) < 2,
1379         text( height(node.left), '\t', height(node.right)));
1380 
1381     if(node.left) {
1382         assert(node.left.payload < node.payload);
1383     }
1384 
1385     if(node.right) {
1386         assert(node.right.payload > node.payload,
1387             text(node.payload, ' ', node.right.payload));
1388     }
1389 
1390     return node.height;
1391 }
1392 
1393 
1394 unittest {
1395     // Test against StackSet on random data.
1396     auto alloc = newRegionAllocator();
1397     StackTree!(uint) myTree = StackTree!(uint)(alloc);
1398     StackSet!(uint) ss = StackSet!(uint)(500, alloc);
1399     foreach(i; 0..1_000_000) {
1400         uint num = uniform(0, 1_000);
1401         if(num in ss) {
1402             assert(num in myTree);
1403             assert(*(num in myTree) == num);
1404             ss.remove(num);
1405             myTree.remove(num);
1406         } else {
1407             assert(!(num in myTree));
1408             ss.insert(num);
1409             myTree.insert(num);
1410         }
1411     }
1412     assertAvl(myTree.head);
1413 }
1414 
1415 /**Struct that iterates over keys or values of a StackTreeAA.
1416  *
1417  * Bugs:  Uses opApply instead of the more flexible ranges, because I
1418  * haven't figured out how to iterate efficiently and in sorted order over a
1419  * tree without control of the stack.
1420  */
1421 struct TreeAaIter(T, alias mapFun) {
1422     alias unaryFun!(mapFun) mFun;
1423     T tree;
1424     alias typeof(*(tree.head)) Node;
1425 
1426 //    TreeRange!(T, mapFun) asRange() {
1427 //        dstatsEnforce(0, "Not implemented yet.");
1428 //    }
1429 
1430     alias typeof( mFun( tree.head.payload ) ) IterType;
1431 
1432     ///
1433     int opApply( int delegate(ref IterType) dg) {
1434         int res;
1435         int opApplyImpl(Node* node) {
1436             if(node is null) {
1437                 return 0;
1438             }
1439             res = opApplyImpl(node.left);
1440             if(res) {
1441                 return res;
1442             }
1443 
1444             static if(__traits(compiles, dg(mFun(node.payload)))) {
1445                 res = dg(mFun(node.payload));
1446             } else {
1447                 auto toDg = mFun(node.payload);
1448                 res = dg(toDg);
1449             }
1450 
1451             if(res) {
1452                 return res;
1453             }
1454             res = opApplyImpl(node.right);
1455             return res;
1456         }
1457 
1458         return opApplyImpl(tree.head);
1459     }
1460 
1461     ///
1462     @property size_t length() const pure nothrow {
1463         return tree.length;
1464     }
1465 }
1466 
1467 private struct StackTreeAANode(K, V) {
1468     Unqual!(K) key;
1469     Unqual!(V) value;
1470 }
1471 
1472 /**An associative array implementation based on StackTree.  Lookups and
1473  * insertions are O(log N).  This is significantly slower in both theory and
1474  * practice than StackHash, but you may want to use it if:
1475  *
1476  * 1.  You don't know the approximate size of the table you will be creating
1477  *     in advance.  Unlike StackHash, this AA implementation does not need
1478  *     to pre-allocate anything.
1479  *
1480  * 2.  You care more about worst-case performance than average-case
1481  *     performance.
1482  *
1483  * 3.  You have a good comparison function for your type, but not a good hash
1484  *     function.
1485  *
1486  */
1487 struct StackTreeAA(K, V) {
1488     alias StackTreeAANode!(K, V) Node;
1489     StackTree!(Node, "a.key") tree;
1490 
1491     ///
1492     this(RegionAllocator alloc) {
1493         this.tree = typeof(tree)(alloc);
1494     }
1495 
1496     /**Looks up key in the table, returns it by reference.  If it does not
1497      * exist, it will be created and initialized to V.init.  This is handy,
1498      * for example, when counting things with integer types.
1499      */
1500     ref V opIndex(K key) {
1501         Node* result = key in tree;
1502         if(result is null) {
1503             tree.insert( Node(key, V.init));
1504             result = key in tree;
1505         }
1506 
1507         return result.value;
1508     }
1509 
1510     ///
1511     V opIndexAssign(V val, K key) {
1512         tree.insert( Node(key, val));
1513         return val;
1514     }
1515 
1516     ///
1517     V* opIn_r(K key) {
1518         auto nodePtr = key in tree;
1519         if(nodePtr is null) {
1520             return null;
1521         }
1522 
1523         return &(nodePtr.value);
1524     }
1525 
1526     ///
1527     void remove(K key) {
1528         tree.remove(key);
1529     }
1530 
1531     ///
1532     @property size_t length() const pure nothrow {
1533         return tree.length;
1534     }
1535 
1536     ///
1537     TreeAaIter!( typeof(tree), "a.key") keys() @property {
1538         typeof(return) ret;
1539         ret.tree = tree;
1540         return ret;
1541     }
1542 
1543     private static ref Unqual!(V) getVal(ref Node node) {
1544         return node.value;
1545     }
1546 
1547     ///
1548     TreeAaIter!( typeof(tree), getVal) values() @property {
1549         return typeof(return)(tree);
1550     }
1551 
1552     /**Iterate over both the keys and values of this associative array.*/
1553     int opApply( int delegate(ref Unqual!(K), ref Unqual!(V)) dg) {
1554         alias typeof(*(tree.head)) TreeNode;
1555         int res;
1556         int opApplyImpl(TreeNode* node) {
1557             if(node is null) {
1558                 return 0;
1559             }
1560             res = opApplyImpl(node.left);
1561             if(res) {
1562                 return res;
1563             }
1564             res = dg(node.payload.key, node.payload.value);
1565             if(res) {
1566                 return res;
1567             }
1568             res = opApplyImpl(node.right);
1569             return res;
1570         }
1571 
1572         return opApplyImpl(tree.head);
1573     }
1574 
1575 }
1576 
1577 unittest {
1578 
1579     // Test against builtin AA on random data.
1580     {
1581         auto alloc = newRegionAllocator();
1582         alias StackTreeAA!(string, uint) mySh;
1583         auto data = mySh(alloc);
1584         data["foo"] = 1;
1585         data["bar"] = 2;
1586         data["baz"] = 3;
1587         data["waldo"] = 4;
1588         assert(!("foobar" in data));
1589         assert(*("foo" in data) == 1);
1590         assert(*("bar" in data) == 2);
1591         assert(*("baz" in data) == 3);
1592         assert(*("waldo" in data) == 4);
1593         assert(data["foo"] == 1);
1594         assert(data["bar"] == 2);
1595         assert(data["baz"] == 3);
1596         assert(data["waldo"] == 4);
1597 
1598         assert(data.length == 4);
1599         auto myKeys = array(data.keys);
1600         qsort(myKeys);
1601         assert(myKeys == cast(string[]) ["bar", "baz", "foo", "waldo"]);
1602         auto myValues = array(data.values);
1603         qsort(myValues);
1604         assert(myValues == [1U, 2, 3, 4]);
1605 
1606         foreach(v; data.values) {
1607             assert(v > 0 && v < 5);
1608         }
1609     }
1610 
1611     alias StackTreeAA!(uint, uint) mySh2;
1612     {   // Test remove.
1613         auto alloc = newRegionAllocator();
1614 
1615         auto foo = mySh2(alloc);
1616         for(uint i = 0; i < 200; i++) {
1617             foo[i] = i;
1618         }
1619         assert(foo.length == 200);
1620         for(uint i = 0; i < 200; i += 2) {
1621             foo.remove(i);
1622         }
1623         foreach(i; 20..200) {
1624             foo.remove(i);
1625         }
1626         for(uint i = 0; i < 20; i++) {
1627             if(i & 1) {
1628                 assert(i in foo);
1629                 assert(*(i in foo) == i);
1630             } else {
1631                 assert(!(i in foo));
1632             }
1633         }
1634         auto vals = array(foo.values);
1635         assert(foo.length == 10);
1636         assert(vals.qsort == [1U, 3, 5, 7, 9, 11, 13, 15, 17, 19]);
1637     }
1638 
1639     { // Monte carlo unittesting against builtin hash table.
1640         auto alloc = newRegionAllocator();
1641         uint[uint] builtin;
1642         auto monteSh = mySh2(alloc);
1643         uint[] nums = alloc.uninitializedArray!(uint[])(100_000);
1644         foreach(ref num; nums) {
1645             num = uniform(0U, uint.max);
1646         }
1647 
1648         foreach(i; 0..10_000) {
1649             auto index = uniform(0, cast(uint) nums.length);
1650             if(index in builtin) {
1651                 assert(index in monteSh);
1652                 assert(builtin[index] == nums[index]);
1653                 assert(monteSh[index] == nums[index]);
1654                 builtin.remove(index);
1655                 monteSh.remove(index);
1656             } else {
1657                 assert(!(index in monteSh));
1658                 builtin[index] = nums[index];
1659                 monteSh[index] = nums[index];
1660             }
1661         }
1662 
1663         assert(builtin.length == monteSh.length);
1664         foreach(k, v; builtin) {
1665             assert(k in monteSh);
1666             assert(*(k in builtin) == *(k in monteSh));
1667             assert(monteSh[k] == v);
1668         }
1669 
1670         // Make sure nothing is missed in iteration.  Since both keys and
1671         // values use the same struct, just with a few static if statements,
1672         // if it works for keys and simple tests work for values, it works.
1673         foreach(k; monteSh.keys) {
1674             builtin.remove(k);
1675         }
1676         assert(builtin.length == 0);
1677     }
1678 }
1679 
1680 version(scid) {
1681     public import scid.internal.regionallocator;
1682 } else {
1683     version = noscid;
1684 }
1685 
1686 version(noscid):
1687 
1688 import std.traits, core.memory, std.range, core.exception, std.conv,
1689     std.algorithm, std.typetuple, std.exception, std.typecons;
1690 
1691 static import core.stdc.stdlib;
1692 
1693 // This is just for convenience/code readability/saving typing.
1694 private enum ptrSize = (void*).sizeof;
1695 
1696 // This was accidentally assumed in a few places and I'm too lazy to fix it
1697 // until I see proof that it needs to be fixed.
1698 static assert(bool.sizeof == 1);
1699 
1700 enum size_t defaultSegmentSize = 4 * 1_024 * 1_024;
1701 
1702 /**
1703 The exception that is thrown on invalid use of $(RegionAllocator) and
1704 $(D RegionAllocatorStack).  This exception is not thrown on out of memory.
1705 An $(D OutOfMemoryError) is thrown instead.
1706 */
1707 class RegionAllocatorException : Exception {
1708     this(string msg, string file, int line) @safe {
1709         super(msg, file, line);
1710     }
1711 }
1712 
1713 /**
1714 This flag determines whether a given $(D RegionAllocatorStack) is scanned for
1715 pointers by the garbage collector (GC).  If yes, the entire stack is scanned,
1716 not just the part currently in use, since there is currently no efficient way to
1717 modify the bounds of a GC region.  The stack is scanned conservatively, meaning
1718 that any bit pattern that would point to GC-allocated memory if interpreted as
1719 a pointer is considered to be a pointer.  This can result in GC-allocated
1720 memory being retained when it should be freed.  Due to these caveats,
1721 it is recommended that any stack scanned by the GC be small and/or short-lived.
1722 */
1723 enum GCScan : bool {
1724     ///
1725     no = false,
1726 
1727     ///
1728     yes = true
1729 }
1730 
1731 /**
1732 This object represents a segmented stack.  Memory can be allocated from this
1733 stack using a $(XREF regionallocator RegionAllocator) object.  Multiple
1734 $(D RegionAllocator) objects may be created per
1735 $(D RegionAllocatorStack) but each $(D RegionAllocator) uses a single
1736 $(D RegionAllocatorStack).
1737 
1738 For most use cases it's convenient to use the default thread-local
1739 instance of $(D RegionAllocatorStack), which is lazily instantiated on
1740 the first call to the global function
1741 $(XREF regionallocator, newRegionAllocator).  Occasionally it may be useful
1742 to have multiple independent stacks in one thread, in which case a
1743 $(D RegionAllocatorStack) can be created manually.
1744 
1745 $(D RegionAllocatorStack) is reference counted and has reference semantics.
1746 When the last copy of a given instance goes out of scope, the memory
1747 held by the $(D RegionAllocatorStack) instance is released back to the
1748 heap.  This cannot happen before memory allocated to a $(D RegionAllocator)
1749 instance is released back to the stack, because a $(D RegionAllocator)
1750 holds a copy of the $(D RegionAllocatorStack) instance it uses.
1751 
1752 Examples:
1753 ---
1754 import std.regionallocator;
1755 
1756 void main() {
1757     fun1();
1758 }
1759 
1760 void fun1() {
1761     auto stack = RegionAllocatorStack(1_048_576, GCScan.no);
1762     fun2(stack);
1763 
1764     // At the end of fun1, the last copy of the RegionAllocatorStack
1765     // instance pointed to by stack goes out of scope.  The memory
1766     // held by stack is released back to the heap.
1767 }
1768 
1769 void fun2(RegionAllocatorStack stack) {
1770     auto alloc = stack.newRegionAllocator();
1771     auto arr = alloc.newArray!(double[])(1_024);
1772 
1773     // At the end of fun2, the last copy of the RegionAllocator instance
1774     // pointed to by alloc goes out of scope.  The memory used by arr
1775     // is released back to stack.
1776 }
1777 ---
1778 */
1779 struct RegionAllocatorStack {
1780 private:
1781     RefCounted!(RegionAllocatorStackImpl, RefCountedAutoInitialize.no) impl;
1782     bool initialized;
1783     bool _gcScanned;
1784 
1785 public:
1786     /**
1787     Create a new $(D RegionAllocatorStack) with a given segment size in bytes.
1788     */
1789     this(size_t segmentSize, GCScan shouldScan) {
1790         this._gcScanned = shouldScan;
1791         if(segmentSize == 0) {
1792             throw new RegionAllocatorException(
1793                 "Cannot create a RegionAllocatorStack with segment size of 0.",
1794                 __FILE__, __LINE__
1795             );
1796         }
1797 
1798         impl = typeof(impl)(segmentSize, shouldScan);
1799         initialized = true;
1800     }
1801 
1802     /**
1803     Creates a new $(D RegionAllocator) region using this stack.
1804     */
1805     RegionAllocator newRegionAllocator() {
1806         if(!initialized) {
1807             throw new RegionAllocatorException(
1808                 "Cannot create a RegionAllocator from an " ~
1809                 "uninitialized RegionAllocatorStack.  Did you call " ~
1810                 "RegionAllocatorStack's constructor?",
1811                 __FILE__,
1812                 __LINE__
1813             );
1814         }
1815 
1816         return RegionAllocator(this);
1817     }
1818 
1819     /**
1820     Whether this stack is scanned by the garbage collector.
1821     */
1822     bool gcScanned() @property const pure nothrow @safe {
1823         return _gcScanned;
1824     }
1825 }
1826 
1827 private struct RegionAllocatorStackImpl {
1828 
1829     this(size_t segmentSize, GCScan shouldScan) {
1830         this.segmentSize = segmentSize;
1831         space = alignedMalloc(segmentSize, shouldScan);
1832 
1833         // We don't need 16-byte alignment for the bookkeeping array.
1834         immutable nBookKeep = segmentSize / alignBytes;
1835         bookkeep = (cast(SizetPtr*) core.stdc.stdlib.malloc(nBookKeep))
1836                     [0..nBookKeep / SizetPtr.sizeof];
1837 
1838         if(!bookkeep.ptr) {
1839             outOfMemory();
1840         }
1841 
1842         nBlocks++;
1843     }
1844 
1845     size_t segmentSize;  // The size of each segment.
1846 
1847     size_t used;
1848     void* space;
1849     size_t bookkeepIndex;
1850     SizetPtr[] bookkeep;
1851     uint nBlocks;
1852     uint nFree;
1853     size_t regionIndex = size_t.max;
1854 
1855     // inUse holds info for all blocks except the one currently being
1856     // allocated from.  freeList holds space ptrs for all free blocks.
1857 
1858     static struct Block {
1859         size_t used = 0;
1860         void* space = null;
1861     }
1862 
1863     SimpleStack!(Block) inUse;
1864     SimpleStack!(void*) freeList;
1865 
1866     void doubleSize(ref SizetPtr[] bookkeep) {
1867         size_t newSize = bookkeep.length * 2;
1868         auto ptr = cast(SizetPtr*) core.stdc.stdlib.realloc(
1869             bookkeep.ptr, newSize * SizetPtr.sizeof);
1870 
1871         if(!ptr) {
1872             outOfMemory();
1873         }
1874 
1875         bookkeep = ptr[0..newSize];
1876     }
1877 
1878     // Add an element to bookkeep, checking length first.
1879     void putLast(void* last) {
1880         if (bookkeepIndex == bookkeep.length) doubleSize(bookkeep);
1881         bookkeep[bookkeepIndex].p = last;
1882         bookkeepIndex++;
1883     }
1884 
1885     void putLast(size_t num) {
1886         return putLast(cast(void*) num);
1887     }
1888 
1889     // The number of objects allocated on the C heap instead of here.
1890     ref size_t nLargeObjects() @property pure nothrow {
1891         return bookkeep[regionIndex - 4].i;
1892     }
1893 
1894     // The number of extra segments that have been allocated in the current
1895     // region beyond the base segment of the region.
1896     ref size_t nExtraSegments() @property pure nothrow {
1897         return bookkeep[regionIndex - 3].i;
1898     }
1899 
1900     // Pushes a segment to the internal free list and frees segments to the
1901     // heap if there are more than 2x as many segments on the free list as
1902     // in use.
1903     void freeSegment() {
1904         nExtraSegments--;
1905         freeList.push(space);
1906         auto newHead = inUse.pop();
1907         space = newHead.space;
1908         used = newHead.used;
1909         nBlocks--;
1910         nFree++;
1911 
1912         if (nFree >= nBlocks * 2) {
1913             foreach(i; 0..nFree / 2) {
1914                 alignedFree(freeList.pop);
1915                 nFree--;
1916             }
1917         }
1918     }
1919 
1920     void destroy() {
1921         if(space) {
1922             alignedFree(space);
1923             space = null;
1924         }
1925 
1926         if(bookkeep) {
1927             core.stdc.stdlib.free(bookkeep.ptr);
1928             bookkeep = null;
1929         }
1930 
1931         while(inUse.index > 0) {
1932             auto toFree = inUse.pop();
1933             alignedFree(toFree.space);
1934         }
1935 
1936         inUse.destroy();
1937 
1938         while(freeList.index > 0) {
1939             auto toFree = freeList.pop();
1940             alignedFree(toFree);
1941         }
1942 
1943         freeList.destroy();
1944     }
1945 
1946     ~this() {
1947         destroy();
1948     }
1949 }
1950 
1951 /**
1952 These properties get and set the segment size of the default thread-local
1953 $(D RegionAllocatorStack) instance.  The default size is 4 megabytes.
1954 The setter is only effective before the global function
1955 $(D newRegionAllocator) has been called for the first time in the current
1956 thread.  Attempts to set this property after the first call to this
1957 function from the current thread throw a $(D RegionAllocatorException).
1958 */
1959 size_t threadLocalSegmentSize() @property nothrow @safe {
1960     return _threadLocalSegmentSize;
1961 }
1962 
1963 /// Ditto
1964 size_t threadLocalSegmentSize(size_t newSize) @property @safe {
1965     if(threadLocalInitialized) {
1966         throw new RegionAllocatorException(
1967             "Cannot set threadLocalSegmentSize after the thread-local " ~
1968             "RegionAllocatorStack has been used for the first time.",
1969             __FILE__,
1970             __LINE__
1971         );
1972     }
1973 
1974     return _threadLocalSegmentSize = newSize;
1975 }
1976 
1977 /**
1978 These properties determine whether the default thread-local
1979 $(D RegionAllocatorStack) instance is scanned by the garbage collector.
1980 The default is no.  In most cases, scanning a stack this long-lived is not
1981 recommended, as it will cause too many false pointers.  (See $(XREF
1982 regionallocator, GCScan) for details.)
1983 
1984 The setter is only effective before the global function
1985 $(D newRegionAllocator) has been called for the first time in the current
1986 thread.  Attempts to set this property after the first call to this
1987 function from the current thread throw a $(D RegionAllocatorException).
1988 */
1989 bool scanThreadLocalStack() @property nothrow @safe {
1990     return _scanThreadLocalStack;
1991 }
1992 
1993 /// Ditto
1994 bool scanThreadLocalStack(bool shouldScan) @property @safe {
1995     if(threadLocalInitialized) {
1996         throw new RegionAllocatorException(
1997             "Cannot set scanThreadLocalStack after the thread-local " ~
1998             "RegionAllocatorStack has been used for the first time.",
1999             __FILE__,
2000             __LINE__
2001         );
2002     }
2003 
2004     return _scanThreadLocalStack = shouldScan;
2005 }
2006 
2007 private size_t _threadLocalSegmentSize = defaultSegmentSize;
2008 private RegionAllocatorStack threadLocalStack;
2009 private bool threadLocalInitialized;
2010 private bool _scanThreadLocalStack = false;
2011 
2012 // Ensures the thread-local stack is initialized, then returns it.
2013 private ref RegionAllocatorStack getThreadLocal() {
2014     if(!threadLocalInitialized) {
2015         threadLocalInitialized = true;
2016         threadLocalStack = RegionAllocatorStack(
2017             threadLocalSegmentSize, cast(GCScan) scanThreadLocalStack
2018         );
2019     }
2020 
2021     return threadLocalStack;
2022 }
2023 
2024 static ~this() {
2025     if(threadLocalInitialized) {
2026         threadLocalStack.impl.refCountedPayload.destroy();
2027     }
2028 }
2029 
2030 /**
2031 This struct provides an interface to the $(D RegionAllocator) functionality
2032 and enforces scoped deletion.  A new instance using the thread-local
2033 $(D RegionAllocatorStack) instance is created using the global
2034 $(D newRegionAllocator) function.  A new instance using
2035 an explicitly created $(D RegionAllocatorStack) is created using
2036 $(D RegionAllocatorStack.newRegionAllocator).
2037 
2038 Each instance has reference semantics in that any copy will allocate from the
2039 same memory.  When the last copy of an instance goes out of scope, all memory
2040 allocated via that instance is freed.  Only the most recently created
2041 still-existing $(D RegionAllocator) using a given $(D RegionAllocatorStack)
2042 may be used to allocate and free memory at any given time.  Deviations
2043 from this model result in a $(D RegionAllocatorException) being thrown.
2044 
2045 An uninitialized $(D RegionAllocator) (for example $(D RegionAllocator.init))
2046 has semantics similar to a null pointer.  It may be assigned to or passed to
2047 a function.  However, any attempt to call a method will result in a
2048 $(D RegionAllocatorException) being thrown.
2049 
2050 Examples:
2051 ---
2052 void foo() {
2053     auto alloc = newRegionAllocator();
2054     auto ptr1 = bar(alloc);
2055     auto ptr2 = alloc.allocate(42);
2056 
2057     // The last copy of the RegionAllocator object used to allocate ptr1
2058     // and ptr2 is going out of scope here.  The memory pointed to by
2059     // both ptr1 and ptr2 will be freed.
2060 }
2061 
2062 void* bar(RegionAllocator alloc) {
2063     auto ret = alloc.allocate(42);
2064 
2065     auto alloc2 = newRegionAllocator();
2066     auto ptr3 = alloc2.allocate(42);
2067 
2068     // ptr3 was allocated using alloc2, which is going out of scope.
2069     // Its memory will therefore be freed.  ret was allocated using alloc.
2070     // A copy of this RegionAllocator is still alive in foo() after
2071     // bar() executes.  Therefore, ret will not be freed on returning and
2072     // is still valid after bar() returns.
2073 
2074     return ret;
2075 }
2076 
2077 void* thisIsSafe() {
2078     // This is safe because the two RegionAllocator objects being used
2079     // are using two different RegionAllocatorStack objects.
2080     auto alloc = newRegionAllocator();
2081     auto ptr1 = alloc.allocate(42);
2082 
2083     auto stack = RegionAllocatorStack(1_048_576, GCScan.no);
2084     auto alloc2 = stack.newRegionAllocator();
2085 
2086     auto ptr2 = alloc2.allocate(42);
2087     auto ptr3 = alloc.allocate(42);
2088 }
2089 
2090 void* dontDoThis() {
2091     auto alloc = newRegionAllocator();
2092     auto ptr1 = alloc.allocate(42);
2093     auto alloc2 = newRegionAllocator();
2094 
2095     // Error:  Allocating from a RegionAllocator instance other than the
2096     // most recently created one that's still alive from a given stack.
2097     auto ptr = alloc.allocate(42);
2098 }
2099 
2100 void uninitialized() {
2101     RegionAllocator alloc;
2102     auto ptr = alloc.allocate(42);  // Error:  alloc is not initialized.
2103     auto alloc2 = alloc;  // Ok.  Both alloc, alloc2 are uninitialized.
2104 
2105     alloc2 = newRegionAllocator();
2106     auto ptr2 = alloc2.allocate(42);  // Ok.
2107     auto ptr3 = alloc.allocate(42);  // Error:  alloc is still uninitialized.
2108 
2109     alloc = alloc2;
2110     auto ptr4 = alloc.allocate(42);  // Ok.
2111 }
2112 ---
2113 
2114 Note:  Allocations larger than $(D this.segmentSize) are handled as a special
2115 case and fall back to allocating directly from the C heap.  These large
2116 allocations are freed as if they were allocated on a $(D RegionAllocatorStack)
2117 when $(D free) or $(D freeLast) is called or the last copy of a
2118 $(D RegionAllocator) instance goes out of scope.  However, due to the extra
2119 bookkeeping required, destroying a region (as happens when the last copy of
2120 a $(D RegionAllocator) instance goes out of scope) will require time linear
2121 instead of constant in the number of allocations for regions where these
2122 large allocations are present.
2123 */
2124 struct RegionAllocator {
2125 private:
2126     RegionAllocatorStack stack;
2127 
2128     // The region index that should be current anytime this instance is
2129     // being used.  This is checked for in allocate() and free().
2130     size_t correctRegionIndex = size_t.max;
2131 
2132     this(ref RegionAllocatorStack stack) {
2133         assert(stack.initialized);
2134         auto impl = &(stack.impl.refCountedPayload());
2135         this.stack = stack;
2136 
2137         with(*impl) {
2138             putLast(0);            // nLargeObjects.
2139             putLast(0);            // nExtraSegments.
2140             putLast(regionIndex);  // Old regionIndex.
2141             putLast(1);            // Ref count of current RegionAllocator.
2142             regionIndex = bookkeepIndex;
2143             correctRegionIndex = regionIndex;
2144         }
2145     }
2146 
2147     // CTFE function, for static assertions.  Can't use bsr/bsf b/c it has
2148     // to be usable at compile time.
2149     static bool isPowerOf2(size_t num) pure nothrow @safe {
2150         return num && !(num & (num - 1));
2151     }
2152 
2153     alias RegionAllocatorStackImpl Impl;  // Save typing.
2154 
2155     // This is written as a mixin instead of a function because it's
2156     // performance critical and it wouldn't be inlinable because it throws.
2157     // By using a mixin, initialized can be checked all the time instead of
2158     // just in debug mode, for negligible performance cost.
2159     enum string getStackImplMixin = q{
2160         if(!initialized) {
2161             throw new RegionAllocatorException(
2162                 "RegionAllocator instance not initialized.  Please use " ~
2163                 "newRegionAllocator() to create a RegionAllocator object.",
2164                 __FILE__,
2165                 __LINE__
2166             );
2167         }
2168 
2169         auto impl = &(stack.impl.refCountedPayload());
2170     };
2171 
2172     void incrementRefCount() {
2173         mixin(getStackImplMixin);
2174         impl.bookkeep[correctRegionIndex - 1].i++;
2175     }
2176 
2177     void decrementRefCount() {
2178         mixin(getStackImplMixin);
2179         impl.bookkeep[correctRegionIndex - 1].i--;
2180 
2181         if(impl.bookkeep[correctRegionIndex - 1].i == 0) {
2182             if(impl.regionIndex != correctRegionIndex) {
2183                 throw new RegionAllocatorException(
2184                     "Cannot free RegionAlloc regions in non-last in first " ~
2185                     "out order.  Did you return a RegionAllocator from a " ~
2186                     "function?",
2187                     __FILE__,
2188                     __LINE__
2189                 );
2190             }
2191 
2192             with(*impl) {
2193                 // Free allocations one at a time until we don't have any
2194                 // more large objects.
2195                 while (nLargeObjects > 0 && bookkeepIndex > regionIndex) {
2196                     assert(bookkeepIndex > regionIndex);
2197                     freeLast();
2198                 }
2199 
2200                 // Free any extra segments that were used by this region.
2201                 while(nExtraSegments > 0) {
2202                     freeSegment();
2203                 }
2204 
2205                 if(bookkeepIndex > regionIndex) {
2206                     // Then there's something left to free.
2207                     used = bookkeep[regionIndex].i - cast(size_t) space;
2208                 }
2209 
2210                 bookkeepIndex = regionIndex - 4;
2211                 regionIndex = bookkeep[regionIndex - 2].i;
2212             }
2213         }
2214     }
2215 
2216     bool initialized() @property const pure nothrow @safe {
2217         return correctRegionIndex < size_t.max;
2218     }
2219 
2220     Unqual!(ElementType!(R))[] arrayImplStack(R)(R range) {
2221         alias ElementType!(R) E;
2222         alias Unqual!(E) U;
2223         static if(hasLength!(R)) {
2224             U[] ret = uninitializedArray!(U[])(range.length);
2225             copy(range, ret);
2226             return ret;
2227         } else {
2228             mixin(getStackImplMixin);
2229             auto startPtr = allocate(0);
2230             size_t bytesCopied = 0;
2231 
2232             while(!range.empty) {
2233                 auto elem = range.front;
2234                 if(impl.used + U.sizeof <= segmentSize) {
2235                     range.popFront;
2236                     *(cast(U*) (startPtr + bytesCopied)) = elem;
2237                     bytesCopied += U.sizeof;
2238                     impl.used += U.sizeof;
2239                 } else {
2240                     if(bytesCopied + U.sizeof >= segmentSize / 2) {
2241                         // Then just heap-allocate.
2242                         U[] result = (cast(U*)
2243                             alignedMalloc(bytesCopied * 2, gcScanned))
2244                             [0..bytesCopied / U.sizeof * 2];
2245 
2246                         immutable elemsCopied = bytesCopied / U.sizeof;
2247                         result[0..elemsCopied] = (cast(U*) startPtr)
2248                             [0..elemsCopied];
2249                         finishCopy(result, range, elemsCopied);
2250                         freeLast();
2251                         impl.putLast(result.ptr);
2252                         impl.nLargeObjects++;
2253                         return result;
2254                     } else {
2255                         // Force allocation of a new segment.
2256                         U[] oldData = (cast(U*) startPtr)
2257                             [0..bytesCopied / U.sizeof];
2258                         impl.used -= bytesCopied;
2259                         impl.bookkeepIndex--;
2260                         U[] arr = uninitializedArray!(U[])
2261                             (bytesCopied / U.sizeof + 1);
2262                         arr[0..oldData.length] = oldData[];
2263                         startPtr = impl.space;
2264                         arr[$ - 1] = elem;
2265                         bytesCopied += U.sizeof;
2266                         range.popFront;
2267                     }
2268                 }
2269             }
2270             auto rem = bytesCopied % .alignBytes;
2271             if(rem != 0) {
2272                 auto toAdd = .alignBytes - rem;
2273                 if(impl.used + toAdd < RegionAllocator.segmentSize) {
2274                     impl.used += toAdd;
2275                 } else {
2276                     impl.used = RegionAllocator.segmentSize;
2277                 }
2278             }
2279             return (cast(U*) startPtr)[0..bytesCopied / U.sizeof];
2280         }
2281     }
2282 
2283     Unqual!(ElementType!(R))[] arrayImplHeap(R)(R range) {
2284         // Initial guess of how much space to allocate.  It's relatively large
2285         // b/c the object will be short lived, so speed is more important than
2286         // space efficiency.
2287         enum initialGuess = 128;
2288 
2289         mixin(getStackImplMixin);
2290         alias Unqual!(ElementType!R) E;
2291         auto arr = (cast(E*) alignedMalloc(E.sizeof * initialGuess, true))
2292             [0..initialGuess];
2293 
2294         finishCopy(arr, range, 0);
2295         impl.putLast(arr.ptr);
2296         impl.nLargeObjects++;
2297         return arr;
2298     }
2299 
2300 public:
2301 
2302     this(this) {
2303         if(initialized) incrementRefCount();
2304     }
2305 
2306     ~this() {
2307         if(initialized) decrementRefCount();
2308     }
2309 
2310     void opAssign(RegionAllocator rhs) {
2311         if(initialized) decrementRefCount();
2312         this.stack = rhs.stack;
2313         this.correctRegionIndex = rhs.correctRegionIndex;
2314         if(initialized) incrementRefCount();
2315     }
2316 
2317     /**
2318     Allocates $(D nBytes) bytes on the $(D RegionAllocatorStack) used by this
2319     $(D RegionAllocator) instance.  The last block allocated from this
2320     $(D RegionAllocator) instance can be freed by calling
2321     $(D RegionAllocator.free) or $(D RegionAllocator.freeLast) or will be
2322     automatically freed when the last copy of this $(D RegionAllocator)
2323     instance goes out of scope.
2324 
2325     Allocation requests larger than $(D segmentSize) are
2326     allocated directly on the C heap, are scanned by the GC iff
2327     the $(D RegionAllocatorStack) instance that this object uses is scanned by
2328     the GC, and are freed according to the same rules as described above.
2329     */
2330     void* allocate(size_t nBytes) {
2331         mixin(getStackImplMixin);
2332         if(impl.regionIndex != this.correctRegionIndex) {
2333             throw new RegionAllocatorException(
2334                 "Cannot allocate memory from a RegionAllocator that is not " ~
2335                 "currently at the top of the stack.",
2336                 __FILE__,
2337                 __LINE__
2338             );
2339         }
2340 
2341         nBytes = allocSize(nBytes);
2342         with(*impl) {
2343             void* ret;
2344             if (segmentSize - used >= nBytes) {
2345                 ret = space + used;
2346                 used += nBytes;
2347             } else if (nBytes > segmentSize) {
2348                 ret = alignedMalloc(nBytes, gcScanned);
2349                 impl.nLargeObjects++;
2350             } else if (nFree > 0) {
2351                 nExtraSegments++;
2352                 inUse.push(Block(used, space));
2353                 space = freeList.pop;
2354                 used = nBytes;
2355                 nFree--;
2356                 nBlocks++;
2357                 ret = space;
2358             } else { // Allocate more space.
2359                 nExtraSegments++;
2360                 inUse.push(Block(used, space));
2361                 space = alignedMalloc(segmentSize, gcScanned);
2362                 nBlocks++;
2363                 used = nBytes;
2364                 ret = space;
2365             }
2366             putLast(ret);
2367             return ret;
2368         }
2369     }
2370 
2371     /**
2372     Frees the last block of memory allocated by the current
2373     $(D RegionAllocator).  Throws a $(D RegionAllocatorException) if
2374     this $(D RegionAllocator) is not the most recently created still-existing
2375     $(D RegionAllocator) using its $(D RegionAllocatorStack) instance.
2376     */
2377     void freeLast() {
2378         mixin(getStackImplMixin);
2379         if(impl.regionIndex != this.correctRegionIndex) {
2380             throw new RegionAllocatorException(
2381                 "Cannot free memory to a RegionAllocator that is not " ~
2382                 "currently at the top of the stack, or memory that has not " ~
2383                 "been allocated with this instance.",
2384                 __FILE__,
2385                 __LINE__
2386             );
2387         }
2388 
2389         with(*impl) {
2390             auto lastPos = bookkeep[--bookkeepIndex].p;
2391 
2392             // Handle large blocks.
2393             if(lastPos > space + segmentSize || lastPos < space) {
2394                 alignedFree(lastPos);
2395                 impl.nLargeObjects--;
2396                 return;
2397             }
2398 
2399             used = (cast(size_t) lastPos) - (cast(size_t) space);
2400             if (nBlocks > 1 && used == 0) {
2401                 impl.freeSegment();
2402             }
2403         }
2404     }
2405 
2406     /**
2407     Checks that $(D ptr) is a pointer to the block that would be freed by
2408     $(D freeLast) then calls $(D freeLast).  Throws a
2409     $(D RegionAllocatorException) if the pointer does not point to the
2410     block that would be freed by $(D freeLast).
2411     */
2412     void free(void* ptr) {
2413         mixin(getStackImplMixin);
2414         auto lastPos = impl.bookkeep[impl.bookkeepIndex - 1].p;
2415         if(ptr !is lastPos) {
2416             throw new RegionAllocatorException(
2417                 "Cannot free RegionAllocator memory in non-LIFO order.",
2418                 __FILE__,
2419                 __LINE__
2420             );
2421         }
2422 
2423         freeLast();
2424     }
2425 
2426     /**
2427     Attempts to resize a previously allocated block of memory in place.
2428     This is possible only if $(D ptr) points to the beginning of the last
2429     block allocated by this $(D RegionAllocator) instance and, in the
2430     case where $(D newSize) is greater than the old size, there is
2431     additional space in the segment that $(D ptr) was allocated from.
2432     Additionally, blocks larger than this $(D RegionAllocator)'s segment size
2433     cannot be grown or shrunk.
2434 
2435     Returns:  True if the block was successfully resized, false otherwise.
2436     */
2437     bool resize(const(void)* ptr, size_t newSize) {
2438         mixin(getStackImplMixin);
2439 
2440         // This works since we always allocate in increments of alignBytes
2441         // no matter what the allocation size.
2442         newSize = allocSize(newSize);
2443 
2444         with(*impl) {
2445             auto lastPos = bookkeep[bookkeepIndex - 1].p;
2446             if(ptr !is lastPos) {
2447                 return false;
2448             }
2449 
2450             // If it's a large block directly allocated on the heap,
2451             // then we definitely can't resize it in place.
2452             if(lastPos > space + segmentSize || lastPos < space) {
2453                 return false;
2454             }
2455 
2456             immutable blockSize = used - ((cast(size_t) lastPos) -
2457                 cast(size_t) space);
2458             immutable sizediff_t diff = newSize - blockSize;
2459 
2460             if(cast(sizediff_t) (impl.segmentSize - used) >= diff) {
2461                 used += diff;
2462                 return true;
2463             }
2464         }
2465 
2466         return false;
2467     }
2468 
2469     /**
2470     Returns whether the $(D RegionAllocatorStack) used by this
2471     $(D RegionAllocator) instance is scanned by the garbage collector.
2472     */
2473     bool gcScanned() @property const pure nothrow @safe {
2474         return stack.gcScanned;
2475     }
2476 
2477     /**Allocates an array of type $(D T).  $(D T) may be a multidimensional
2478     array.  In this case sizes may be specified for any number of dimensions
2479     from 1 to the number in $(D T).
2480 
2481     Examples:
2482     ---
2483     auto alloc = newRegionAllocator();
2484     double[] arr = alloc.newArray!(double[])(100);
2485     assert(arr.length == 100);
2486 
2487     double[][] matrix = alloc.newArray!(double[][])(42, 31);
2488     assert(matrix.length == 42);
2489     assert(matrix[0].length == 31);
2490     ---
2491     */
2492     auto newArray(T, I...)(I sizes)
2493     if(allSatisfy!(isIntegral, I)) {
2494 
2495         static void initialize(R)(R toInitialize) {
2496             static if(isArray!(ElementType!R)) {
2497                 foreach(elem; toInitialize) {
2498                     initialize(elem);
2499                 }
2500             } else {
2501                 toInitialize[] = ElementType!(R).init;
2502             }
2503         }
2504 
2505         auto ret = uninitializedArray!(T, I)(sizes);
2506         initialize(ret);
2507         return ret;
2508     }
2509 
2510     /**
2511     Same as $(D newArray), except skips initialization of elements for
2512     performance reasons.
2513     */
2514     auto uninitializedArray(T, I...)(I sizes)
2515     if(allSatisfy!(isIntegral, I)) {
2516         static assert(sizes.length >= 1,
2517             "Cannot allocate an array without the size of at least the first " ~
2518             " dimension.");
2519         static assert(sizes.length <= nDims!T,
2520             to!string(sizes.length) ~ " dimensions specified for a " ~
2521             to!string(nDims!T) ~ " dimensional array.");
2522 
2523         alias typeof(T.init[0]) E;
2524 
2525         auto ptr = cast(E*) allocate(sizes[0] * E.sizeof);
2526         auto ret = ptr[0..sizes[0]];
2527 
2528         static if(sizes.length > 1) {
2529             foreach(ref elem; ret) {
2530                 elem = uninitializedArray!(E)(sizes[1..$]);
2531             }
2532         }
2533 
2534         return ret;
2535     }
2536 
2537     /**
2538     Returns the number of bytes to which an allocation of size nBytes is
2539     guaranteed to be aligned.
2540     */
2541     static size_t alignBytes(size_t nBytes) {
2542         return .alignBytes;
2543     }
2544 
2545     /**
2546     Returns the number of bytes used to satisfy an allocation request
2547     of $(D nBytes).  Will return a value greater than or equal to
2548     $(D nBytes) to account for alignment overhead.
2549     */
2550     static size_t allocSize(size_t nBytes) pure nothrow {
2551         static assert(isPowerOf2(.alignBytes));
2552         return (nBytes + (.alignBytes - 1)) & (~(.alignBytes - 1));
2553     }
2554 
2555     /**
2556     False because memory allocated by this allocator is not automatically
2557     reclaimed by the garbage collector.
2558     */
2559     enum isAutomatic = false;
2560 
2561     /**
2562     True because, when the last last copy of a $(RegionAllocator) instance
2563     goes out of scope, the memory it references is automatically freed.
2564     */
2565     enum isScoped = true;
2566 
2567     /**
2568     True because if memory is freed via $(D free()) instead of $(D freeLast())
2569     then the pointer is checked for validity.
2570     */
2571     enum freeIsChecked = true;
2572 
2573     /**
2574     Returns the segment size of the $(D RegionAllocatorStack) used by this
2575     $(D RegionAllocator).
2576     */
2577     size_t segmentSize() @property {
2578         mixin(getStackImplMixin);
2579         return impl.segmentSize;
2580     }
2581 
2582     /**
2583     Returns the maximum number of bytes that may be allocated in the
2584     current segment.
2585     */
2586     size_t segmentSlack() @property {
2587         mixin(getStackImplMixin);
2588         return impl.segmentSize - impl.used;
2589     }
2590 
2591     /**
2592     Copies $(D range) to an array.  The array will be located on the
2593     $(D RegionAllocator) stack if any of the following conditions apply:
2594 
2595     1.  $(D std.traits.hasIndirections!(ElementType!R)) is false.
2596 
2597     2.  $(D R) is a builtin array.  In this case $(D range) maintains pointers
2598         to all elements at least until $(D array) returns, preventing the
2599         elements from being freed by the garbage collector.  A similar
2600         assumption cannot be made for ranges other than builtin arrays.
2601 
2602     3.  The $(D RegionAllocatorStack) instance used by this
2603         $(D RegionAllocator) is scanned by the garbage collector.
2604 
2605     If none of these conditions is met, the array is returned on the C heap
2606     and $(D GC.addRange) is called.  In either case, $(D RegionAllocator.free),
2607     $(D RegionAllocator.freeLast), or the last copy of this $(D RegionAllocator)
2608     instance going out of scope will free the array as if it had been
2609     allocated on the $(D RegionAllocator) stack.
2610 
2611     Rationale:  The most common reason to call $(D array) on a builtin array is
2612                 to modify its contents inside a function without affecting the
2613                 caller's view.  In this case $(D range) is not modified and
2614                 prevents the elements from being freed by the garbage
2615                 collector.  Furthermore, if the copy returned does need
2616                 to be scanned, the client can call $(D GC.addRange) before
2617                 modifying the original array.
2618 
2619     Examples:
2620     ---
2621     auto alloc = newRegionAllocator();
2622     auto arr = alloc.array(iota(5));
2623     assert(arr == [0, 1, 2, 3, 4]);
2624     ---
2625     */
2626     Unqual!(ElementType!(R))[] array(R)(R range) if(isInputRange!R) {
2627         alias Unqual!(ElementType!(R)) E;
2628         if(gcScanned || !hasIndirections!E || isArray!R) {
2629             return arrayImplStack(range);
2630         } else {
2631             return arrayImplHeap(range);
2632         }
2633     }
2634 }
2635 
2636 /**
2637 Returns a new $(D RegionAllocator) that uses the default thread-local
2638 $(D RegionAllocatorStack) instance.
2639 */
2640 RegionAllocator newRegionAllocator() {
2641     return RegionAllocator(getThreadLocal());
2642 }
2643 
2644 // Finishes copying a range to a C heap allocated array.  Assumes the first
2645 // half of the input array is stuff already copied and the second half is
2646 // free space.
2647 private void finishCopy(T, U)(ref T[] result, U range, size_t alreadyCopied) {
2648     void doRealloc() {
2649         auto newPtr = cast(T*) alignedRealloc(
2650             result.ptr, result.length * T.sizeof * 2, result.length * T.sizeof
2651         );
2652         result = newPtr[0..result.length * 2];
2653     }
2654 
2655     auto index = alreadyCopied;
2656     foreach(elem; range) {
2657         if(index == result.length) doRealloc();
2658         result[index++] = elem;
2659     }
2660 
2661     result = result[0..index];
2662 }
2663 
2664 unittest {
2665     auto alloc = newRegionAllocator();
2666     auto arr = alloc.array(iota(5));
2667     assert(arr == [0, 1, 2, 3, 4]);
2668 
2669     // Create quick and dirty finite but lengthless range.
2670     static struct Count {
2671         uint num;
2672         uint upTo;
2673         @property size_t front() {
2674             return num;
2675         }
2676         void popFront() {
2677             num++;
2678         }
2679         @property bool empty() {
2680             return num >= upTo;
2681         }
2682     }
2683 
2684     alloc.allocate(1024 * 1024 * 3);
2685     Count count;
2686     count.upTo = 1024 * 1025;
2687     auto asArray = alloc.array(count);
2688     foreach(i, elem; asArray) {
2689         assert(i == elem, to!(string)(i) ~ "\t" ~ to!(string)(elem));
2690     }
2691     assert(asArray.length == 1024 * 1025);
2692     alloc.freeLast();
2693     alloc.freeLast();
2694 
2695     while(alloc.stack.impl.refCountedPayload.freeList.index > 0) {
2696         alignedFree(alloc.stack.impl.refCountedPayload.freeList.pop());
2697     }
2698 }
2699 
2700 unittest {
2701     auto alloc = newRegionAllocator();
2702     double[] arr = alloc.uninitializedArray!(double[])(100);
2703     assert(arr.length == 100);
2704 
2705     double[][] matrix = alloc.uninitializedArray!(double[][])(42, 31);
2706     assert(matrix.length == 42);
2707     assert(matrix[0].length == 31);
2708 
2709     double[][] mat2 = alloc.newArray!(double[][])(3, 1);
2710     assert(mat2.length == 3);
2711     assert(mat2[0].length == 1);
2712 
2713     import std.math;
2714     assert(isNaN(mat2[0][0]));
2715 }
2716 
2717 unittest {
2718     /* Not a particularly good unittest in that it depends on knowing the
2719      * internals of RegionAllocator, but it's the best I could come up w/.  This
2720      * is really more of a stress test/sanity check than a normal unittest.*/
2721 
2722     // Make sure state is completely reset.
2723     destroy(threadLocalStack.impl);
2724     threadLocalStack = RegionAllocatorStack.init;
2725     threadLocalInitialized = false;
2726 
2727      // First test to make sure a large number of allocations does what it's
2728      // supposed to in terms of reallocing bookkeep[], etc.
2729      enum nIter =  defaultSegmentSize * 5 / alignBytes;
2730 
2731     {
2732          auto alloc = newRegionAllocator();
2733          foreach(i; 0..nIter) {
2734              alloc.allocate(alignBytes);
2735          }
2736          assert(alloc.stack.impl.refCountedPayload.nBlocks == 5,
2737             to!string(alloc.stack.impl.refCountedPayload.nBlocks));
2738          assert(alloc.stack.impl.refCountedPayload.nFree == 0);
2739          foreach(i; 0..nIter) {
2740             alloc.freeLast();
2741         }
2742         assert(alloc.stack.impl.refCountedPayload.nBlocks == 1);
2743         assert(alloc.stack.impl.refCountedPayload.nFree == 2);
2744 
2745         // Make sure logic for freeing excess blocks works.  If it doesn't this
2746         // test will run out of memory.
2747         enum allocSize = defaultSegmentSize / 2;
2748         foreach(i; 0..50) {
2749             foreach(j; 0..50) {
2750                 alloc.allocate(allocSize);
2751             }
2752             foreach(j; 0..50) {
2753                 alloc.freeLast();
2754             }
2755         }
2756 
2757         // Make sure data is stored properly.
2758         foreach(i; 0..10) {
2759             alloc.allocate(allocSize);
2760         }
2761         foreach(i; 0..5) {
2762             alloc.freeLast();
2763         }
2764         void* space = alloc.stack.impl.refCountedPayload.space;
2765         size_t used = alloc.stack.impl.refCountedPayload.used;
2766 
2767         {
2768             auto alloc2 = newRegionAllocator();
2769             auto arrays = new uint[][10];
2770 
2771             foreach(i; 0..10) {
2772                 uint[] data = alloc2.uninitializedArray!(uint[])(250_000);
2773                 foreach(j, ref e; data) {
2774                     e = cast(uint) (j * (i + 1));
2775                                     }
2776                 arrays[i] = data;
2777             }
2778 
2779             // Make stuff get overwrriten if blocks are getting GC'd when
2780             // they're not supposed to.
2781             GC.minimize;  // Free up all excess pools.
2782             uint[][] foo;
2783             foreach(i; 0..40) {
2784                 foo ~= new uint[1_048_576];
2785             }
2786             foo = null;
2787 
2788             for(size_t i = 9; i != size_t.max; i--) {
2789                 foreach(j, e; arrays[i]) {
2790                     assert(e == j * (i + 1));
2791                 }
2792             }
2793         }
2794 
2795         assert(space == alloc.stack.impl.refCountedPayload.space,
2796             text(space, '\t', alloc.stack.impl.refCountedPayload.space));
2797         assert(used == alloc.stack.impl.refCountedPayload.used,
2798             text(used, '\t', alloc.stack.impl.refCountedPayload.used));
2799         while(alloc.stack.impl.refCountedPayload.nBlocks > 1 ||
2800         alloc.stack.impl.refCountedPayload.used > 0) {
2801             alloc.freeLast();
2802         }
2803     }
2804 
2805     // Test that everything is really getting destroyed properly when
2806     // destroy() is called.  If not then this test will run out of memory.
2807     foreach(i; 0..1000) {
2808         destroy(threadLocalStack.impl);
2809         threadLocalInitialized = false;
2810 
2811         auto alloc = newRegionAllocator();
2812         foreach(j; 0..1_000) {
2813             auto ptr = alloc.allocate(20_000);
2814             assert((cast(size_t) ptr) % alignBytes == 0);
2815         }
2816 
2817         foreach(j; 0..500) {
2818             alloc.freeLast();
2819         }
2820     }
2821 }
2822 
2823 unittest {
2824     // Make sure the basics of using explicit stacks work.
2825     auto stack = RegionAllocatorStack(4 * 1024 * 1024, GCScan.no);
2826     auto alloc = stack.newRegionAllocator();
2827     auto arr = alloc.array(iota(5));
2828     assert(arr == [0, 1, 2, 3, 4]);
2829     auto ptr = alloc.allocate(5);
2830 
2831     auto alloc2 = newRegionAllocator();
2832     auto ptr2 = alloc2.allocate(5);
2833     auto ptr3 = alloc.allocate(5);
2834 }
2835 
2836 unittest {
2837     // Make sure the stacks get freed properly when they go out of scope.
2838     // If they don't then this will run out of memory.
2839     foreach(i; 0..100_000) {
2840         auto stack = RegionAllocatorStack(4 * 1024 * 1024, GCScan.no);
2841     }
2842 }
2843 
2844 unittest {
2845     // Make sure resize works properly.
2846     auto alloc = newRegionAllocator();
2847     auto arr1 = alloc.array(iota(4));
2848     auto res = alloc.resize(arr1.ptr, 8 * int.sizeof);
2849     assert(res);
2850     arr1 = arr1.ptr[0..8];
2851     copy(iota(4, 8), arr1[4..$]);
2852 
2853     auto arr2 = alloc.newArray!(int[])(8);
2854 
2855     // If resizing resizes to something too small, this will have been
2856     // overwritten:
2857     assert(arr1 == [0, 1, 2, 3, 4, 5, 6, 7], text(arr1));
2858 
2859     alloc.free(arr2.ptr);
2860     auto res2 = alloc.resize(arr1.ptr, 4 * int.sizeof);
2861     assert(res2);
2862     arr2 = alloc.newArray!(int[])(8);
2863 
2864     // This time the memory in arr1 should have been overwritten.
2865     assert(arr1 == [0, 1, 2, 3, 0, 0, 0, 0]);
2866 }
2867 
2868 unittest {
2869     // Make sure that default thread-local stacks get freed properly at the
2870     // termination of a thread.  If they don't then this will run out of
2871     // memory.
2872 
2873     import core.thread;
2874     foreach(i; 0..100) {
2875         auto fun = {
2876             threadLocalSegmentSize = 100 * 1024 * 1024;
2877             newRegionAllocator();
2878         };
2879 
2880         auto t = new Thread(fun);
2881         t.start();
2882         t.join();
2883     }
2884 }
2885 
2886 unittest {
2887     // Make sure assignment works as advertised.
2888     RegionAllocator alloc;
2889     auto alloc2 = newRegionAllocator();
2890     auto ptr = alloc2.allocate(8);
2891     alloc = alloc2;
2892     alloc.freeLast();
2893     auto ptr2= alloc2.allocate(8);
2894     assert(ptr is ptr2);
2895 }
2896 
2897  // Simple, fast stack w/o error checking.
2898 static struct SimpleStack(T) {
2899     private size_t capacity;
2900     private size_t index;
2901     private T* data;
2902     private enum sz = T.sizeof;
2903 
2904     private static size_t max(size_t lhs, size_t rhs) pure nothrow {
2905         return (rhs > lhs) ? rhs : lhs;
2906     }
2907 
2908     void push(T elem) {
2909         if (capacity == index) {
2910             capacity = max(16, capacity * 2);
2911             data = cast(T*) core.stdc.stdlib.realloc(data, capacity * sz);
2912         }
2913         data[index++] = elem;
2914     }
2915 
2916     T pop() {
2917         index--;
2918         auto ret = data[index];
2919         return ret;
2920     }
2921 
2922     void destroy() {
2923         if(data) {
2924             core.stdc.stdlib.free(data);
2925             data = null;
2926         }
2927     }
2928 }
2929 
2930 private  void outOfMemory()  {
2931     throw new OutOfMemoryError("Out of memory in RegionAllocator.");
2932 }
2933 
2934 // Memory allocation routines.  These wrap allocate(), free() and realloc(),
2935 // and guarantee alignment.
2936 private enum size_t alignBytes = 16;
2937 
2938 private void* alignedMalloc(size_t size, bool shouldAddRange = false) {
2939     // We need (alignBytes - 1) extra bytes to guarantee alignment, 1 byte
2940     // to store the shouldAddRange flag, and ptrSize bytes to store
2941     // the pointer to the beginning of the block.
2942     void* toFree = core.stdc.stdlib.malloc(
2943         alignBytes + ptrSize + size
2944     );
2945 
2946     if(toFree is null) outOfMemory();
2947 
2948     // Add the offset for the flag and the base pointer.
2949     auto intPtr = cast(size_t) toFree + ptrSize + 1;
2950 
2951     // Align it.
2952     intPtr = (intPtr + alignBytes - 1) & (~(alignBytes - 1));
2953     auto ret = cast(void**) intPtr;
2954 
2955     // Store base pointer.
2956     (cast(void**) ret)[-1] = toFree;
2957 
2958     // Store flag.
2959     (cast(bool*) ret)[-1 - ptrSize] = shouldAddRange;
2960 
2961     if(shouldAddRange) {
2962         GC.addRange(ret, size);
2963     }
2964 
2965     return ret;
2966 }
2967 
2968 private void alignedFree(void* ptr) {
2969     // If it was allocated with alignedMalloc() then the pointer to the
2970     // beginning is at ptr[-1].
2971     auto addedRange = (cast(bool*) ptr)[-1 - ptrSize];
2972 
2973     if(addedRange) {
2974         GC.removeRange(ptr);
2975     }
2976 
2977     core.stdc.stdlib.free( (cast(void**) ptr)[-1]);
2978 }
2979 
2980 // This is used by RegionAllocator, but I'm not sure enough that its interface
2981 // isn't going to change to make it public and document it.
2982 private void* alignedRealloc(void* ptr, size_t newLen, size_t oldLen) {
2983     auto storedRange = (cast(bool*) ptr)[-1 - ptrSize];
2984     auto newPtr = alignedMalloc(newLen, storedRange);
2985     memcpy(newPtr, ptr, oldLen);
2986 
2987     alignedFree(ptr);
2988     return newPtr;
2989 }
2990 
2991 private union SizetPtr {
2992     size_t i;
2993     void* p;
2994 }