1 /**Probability distribution CDFs, PDFs/PMFs, and a few inverse CDFs.
2  *
3  * Authors:  David Simcha, Don Clugston*/
4  /*
5  * Acknowledgements:  Some of this module was borrowed the mathstat module
6  * of Don Clugston's MathExtra library.  This was done to create a
7  * coherent, complete library without massive dependencies, and without
8  * reinventing the wheel.  These functions have been renamed to
9  * fit the naming conventions of this library, and are noted below.
10  * The code from Don Clugston's MathExtra library was based on the Cephes
11  * library by Stephen Moshier.
12  *
13  * Conventions:
14  * Cumulative distribution functions are named <distribution>CDF.  For
15  * discrete distributions, are the P(X <= x) where X is the random variable,
16  * NOT P(X < x).
17  *
18  * All CDFs have a complement, named <distribution>CDFR, which stands for
19  * "Cumulative Distribution Function Right".  For discrete distributions,
20  * this is P(X >= x), NOT P(X > x) and is therefore NOT equal to
21  * 1 - <distribution>CDF.  Also, even for continuous distributions, the
22  * numerical accuracy is higher for small p-values if the CDFR is used than
23  * if 1 - CDF is used.
24  *
25  * If a PDF/PMF function is included for a distribution, it is named
26  * <distribution>PMF or <distribution>PDF (PMF for discrete, PDF for
27  * continuous distributions).
28  *
29  * If an inverse CDF is included, it is named inv<Distribution>CDF.
30  *
31  * For all distributions, the test statistic is the first function parameter
32  * and the distribution parameters are further down the function parameter
33  * list.  This is important for certain generic code, such as  tests and
34  * the parametrize template.
35  *
36  * The following functions are identical or functionally equivalent to
37  * functions found in MathExtra/Tango.Math.probability.  This information
38  * might be useful if someone is trying to integrate this library into other code:
39  *
40  * normalCDF <=> normalDistribution
41  *
42  * normalCDFR <=> normalDistributionCompl
43  *
44  * invNormalCDF <=> normalDistributionComplInv
45  *
46  * studentsTCDF <=> studentsTDistribution  (Note reversal in argument order)
47  *
48  * invStudentsTCDF <=> studentsTDistributionInv (Again, arg order reversed)
49  *
50  * binomialCDF <=> binomialDistribution
51  *
52  * negBinomCDF <=> negativeBinomialDistribution
53  *
54  * poissonCDF <=> poissonDistribution
55  *
56  * chiSqrCDF <=> chiSqrDistribution (Note reversed arg order)
57  *
58  * chiSqrCDFR <=> chiSqrDistributionCompl (Note reversed arg order)
59  *
60  * invChiSqCDFR <=> chiSqrDistributionComplInv
61  *
62  * fisherCDF <=> fDistribution (Note reversed arg order)
63  *
64  * fisherCDFR <=> fDistributionCompl (Note reversed arg order)
65  *
66  * invFisherCDFR <=> fDistributionComplInv
67  *
68  * gammaCDF <=> gammaDistribution  (Note arg reversal)
69  *
70  * gammaCDFR <=> gammaDistributionCompl  (Note arg reversal)
71  *
72  * Note that CDFRs/Compls of continuous distributions are not equivalent,
73  * because in Tango/MathExtra they represent P(X > x) while in dstats they
74  * represent P(X >= x).
75  *
76  *
77  * Copyright (c) 2008-2009, David Simcha and Don Clugston
78  * All rights reserved.
79  *
80  * Redistribution and use in source and binary forms, with or without
81  * modification, are permitted provided that the following conditions are met:
82  *
83  *     * Redistributions of source code must retain the above copyright
84  *       notice, this list of conditions and the following disclaimer.
85  *
86  *     * Redistributions in binary form must reproduce the above copyright
87  *       notice, this list of conditions and the following disclaimer in the
88  *       documentation and/or other materials provided with the distribution.
89  *
90  *     * Neither the name of the authors nor the
91  *       names of its contributors may be used to endorse or promote products
92  *       derived from this software without specific prior written permission.
93  *
94  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
95  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
96  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
97  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
98  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
99  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
100  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
101  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
102  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
103  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
104 
105 module dstats.distrib;
106 
107 import std.algorithm, std.conv, std.exception, std.math, std.traits,
108     std.mathspecial, std.range;
109 
110 alias std.mathspecial.erfc erfc;
111 alias std.mathspecial.erf erf;
112 
113 import dstats.base;
114 
115 // CTFE doesn't work yet for sqrt() in GDC.  This value is sqrt(2 * PI).
116 enum SQ2PI = 2.50662827463100050241576528481104525300698674060993831662992;
117 
118 version(unittest) {
119     import std.stdio, std.random;
120 
121     alias std.math.approxEqual ae;
122 }
123 
124 /**Takes a distribution function (CDF or PDF/PMF) as a template argument, and
125  * parameters as function arguments in the order that they appear in the
126  * function declaration and returns a delegate that binds the supplied
127  * parameters to the distribution function.  Assumes the non-parameter
128  * argument is the first argument to the distribution function.
129  *
130  * Examples:
131  * ---
132  * auto stdNormal = parametrize!(normalCDF)(0.0L, 1.0L);
133  * ---
134  *
135  * stdNormal is now a delegate for the normal(0, 1) distribution.*/
136 double delegate(ParameterTypeTuple!(distrib)[0])
137               parametrize(alias distrib)(ParameterTypeTuple!(distrib)[1..$]
138               parameters) {
139 
140     double calculate(ParameterTypeTuple!(distrib)[0] arg) {
141         return distrib(arg, parameters);
142     }
143 
144     return &calculate;
145 }
146 
147 unittest {
148     // Just basically see if this compiles.
149     auto stdNormal = parametrize!normalCDF(0, 1);
150     assert(approxEqual(stdNormal(2.5), normalCDF(2.5, 0, 1)));
151 }
152 
153 ///
154 struct ParamFunctor(alias distrib) {
155     ParameterTypeTuple!(distrib)[1..$] parameters;
156 
157     double opCall(ParameterTypeTuple!(distrib)[0] arg) {
158         return distrib(arg, parameters);
159     }
160 }
161 
162 /**Takes a distribution function (CDF or PDF/PMF) as a template argument, and
163  * parameters as function arguments in the order that they appear in the
164  * function declaration and returns a functor that binds the supplied
165  * parameters to the distribution function.  Assumes the non-parameter
166  * argument is the first argument to the distribution function.
167  *
168  * Examples:
169  * ---
170  * auto stdNormal = paramFunctor!(normalCDF)(0.0L, 1.0L);
171  * ---
172  *
173  * stdNormal is now a functor for the normal(0, 1) distribution.*/
174 ParamFunctor!(distrib) paramFunctor(alias distrib)
175                        (ParameterTypeTuple!(distrib)[1..$] parameters) {
176     ParamFunctor!(distrib) ret;
177     foreach(ti, elem; parameters) {
178         ret.tupleof[ti] = elem;
179     }
180     return ret;
181 }
182 
183 unittest {
184     // Just basically see if this compiles.
185     auto stdNormal = paramFunctor!normalCDF(0, 1);
186     assert(approxEqual(stdNormal(2.5), normalCDF(2.5, 0, 1)));
187 }
188 
189 ///
190 double uniformPDF(double X, double lower, double upper) {
191     dstatsEnforce(X >= lower, "Can't have X < lower bound in uniform distribution.");
192     dstatsEnforce(X <= upper, "Can't have X > upper bound in uniform distribution.");
193     return 1.0L / (upper - lower);
194 }
195 
196 ///
197 double uniformCDF(double X, double lower, double upper) {
198     dstatsEnforce(X >= lower, "Can't have X < lower bound in uniform distribution.");
199     dstatsEnforce(X <= upper, "Can't have X > upper bound in uniform distribution.");
200 
201     return (X - lower) / (upper - lower);
202 }
203 
204 ///
205 double uniformCDFR(double X, double lower, double upper) {
206     dstatsEnforce(X >= lower, "Can't have X < lower bound in uniform distribution.");
207     dstatsEnforce(X <= upper, "Can't have X > upper bound in uniform distribution.");
208 
209     return (upper - X) / (upper - lower);
210 }
211 
212 ///
213 double poissonPMF(ulong k, double lambda) {
214     dstatsEnforce(lambda > 0, "Cannot have a Poisson with lambda <= 0 or nan.");
215 
216     return exp(cast(double) k * log(lambda) -
217             (lambda + logFactorial(k)));  //Grouped for best precision.
218 }
219 
220 unittest {
221     assert(approxEqual(poissonPMF(1, .1), .0904837));
222 }
223 
224 enum POISSON_NORMAL = 1UL << 12;  // Where to switch to normal approx.
225 
226 // The gamma incomplete function is too unstable and the distribution
227 // is for all practical purposes normal anyhow.
228 private double normApproxPoisCDF(ulong k, double lambda)
229 in {
230     assert(lambda > 0);
231 } body {
232     double sd = sqrt(lambda);
233     // mean == lambda.
234     return normalCDF(k + 0.5L, lambda, sd);
235 }
236 
237 /**P(K <= k) where K is r.v.*/
238 double poissonCDF(ulong k, double lambda) {
239     dstatsEnforce(lambda > 0, "Cannot have a poisson with lambda <= 0 or nan.");
240 
241     return (max(k, lambda) >= POISSON_NORMAL) ?
242            normApproxPoisCDF(k, lambda) :
243            gammaIncompleteCompl(k + 1, lambda);
244 }
245 
246 unittest {
247     // Make sure this jives with adding up PMF elements, since this is a
248     // discrete distribution.
249     static double pmfSum(uint k, double lambda) {
250         double ret = 0;
251         foreach(i; 0..k + 1) {
252             ret += poissonPMF(i, lambda);
253         }
254         return ret;
255     }
256 
257     assert(approxEqual(poissonCDF(1, 0.5), pmfSum(1, 0.5)));
258     assert(approxEqual(poissonCDF(3, 0.7), pmfSum(3, 0.7)));
259 
260     // Absurdly huge values:  Test normal approximation.
261     // Values from R.
262     double ans = poissonCDF( (1UL << 50) - 10_000_000, 1UL << 50);
263     assert(approxEqual(ans, 0.3828427));
264 
265     // Make sure cutoff is reasonable, i.e. make sure gamma incomplete branch
266     // and normal branch get roughly the same answer near the cutoff.
267     for(double lambda = POISSON_NORMAL / 2; lambda <= POISSON_NORMAL * 2; lambda += 100) {
268         for(ulong k = POISSON_NORMAL / 2; k <= POISSON_NORMAL * 2; k += 100) {
269             double normAns = normApproxPoisCDF(k, lambda);
270             double gammaAns = gammaIncompleteCompl(k + 1, lambda);
271             assert(abs(normAns - gammaAns) < 0.01, text(normAns, '\t', gammaAns));
272         }
273     }
274 }
275 
276 // The gamma incomplete function is too unstable and the distribution
277 // is for all practical purposes normal anyhow.
278 private double normApproxPoisCDFR(ulong k, double lambda)
279 in {
280     assert(lambda > 0);
281 } body {
282     double sd = sqrt(lambda);
283     // mean == lambda.
284     return normalCDFR(k - 0.5L, lambda, sd);
285 }
286 
287 /**P(K >= k) where K is r.v.*/
288 double poissonCDFR(ulong k, double lambda) {
289     dstatsEnforce(lambda > 0, "Can't have a poisson with lambda <= 0 or nan.");
290 
291     return (max(k, lambda) >= POISSON_NORMAL) ?
292             normApproxPoisCDFR(k, lambda) :
293             gammaIncomplete(k, lambda);
294 }
295 
296 unittest {
297     // Make sure this jives with adding up PMF elements, since this is a
298     // discrete distribution.
299     static double pmfSum(uint k, double lambda) {
300         double ret = 0;
301         foreach(i; 0..k + 1)  {
302             ret += poissonPMF(i, lambda);
303         }
304         return ret;
305     }
306 
307     assert(approxEqual(poissonCDFR(1, 0.5), 1 - pmfSum(0, 0.5)));
308     assert(approxEqual(poissonCDFR(3, 0.7), 1 - pmfSum(2, 0.7)));
309 
310     // Absurdly huge value to test normal approximation.
311     // Values from R.
312     double ans = poissonCDFR( (1UL << 50) - 10_000_000, 1UL << 50);
313     assert(approxEqual(ans, 0.6171573));
314 
315     // Make sure cutoff is reasonable, i.e. make sure gamma incomplete branch
316     // and normal branch get roughly the same answer near the cutoff.
317     for(double lambda = POISSON_NORMAL / 2; lambda <= POISSON_NORMAL * 2; lambda += 100) {
318         for(ulong k = POISSON_NORMAL / 2; k <= POISSON_NORMAL * 2; k += 100) {
319             double normAns = normApproxPoisCDFR(k, lambda);
320             double gammaAns = gammaIncomplete(k, lambda);
321             assert(abs(normAns - gammaAns) < 0.01, text(normAns, '\t', gammaAns));
322         }
323     }
324 }
325 
326 /**Returns the value of k for the given p-value and lambda.  If p-val
327  * doesn't exactly map to a value of k, the k for which poissonCDF(k, lambda)
328  * is closest to pVal is used.*/
329 uint invPoissonCDF(double pVal, double lambda) {
330     dstatsEnforce(lambda > 0, "Cannot have a poisson with lambda <= 0 or nan.");
331     dstatsEnforce(pVal >= 0 && pVal <= 1, "P-values must be between 0, 1.");
332 
333     // Use normal approximation to get approx answer, then brute force search.
334     // This works better than you think because for small n, there's not much
335     // search space and for large n, the normal approx. is doublely good.
336     uint guess = cast(uint) max(round(
337           invNormalCDF(pVal, lambda, sqrt(lambda)) + 0.5), 0.0L);
338     double guessP = poissonCDF(guess, lambda);
339 
340     if(guessP == pVal) {
341         return guess;
342     } else if(guessP < pVal) {
343         for(uint k = guess + 1; ; k++) {
344             double newP = guessP + poissonPMF(k, lambda);
345             if(newP >= 1)
346                 return k;
347             if(abs(newP - pVal) > abs(guessP - pVal)) {
348                 return k - 1;
349             } else {
350                 guessP = newP;
351             }
352         }
353     } else {
354         for(uint k = guess - 1; k != uint.max; k--) {
355             double newP = guessP - poissonPMF(k + 1, lambda);
356             if(abs(newP - pVal) > abs(guessP - pVal)) {
357                 return k + 1;
358             } else {
359                 guessP = newP;
360             }
361         }
362         return 0;
363     }
364 }
365 
366 unittest {
367    foreach(i; 0..1_000) {
368        // Restricted variable ranges are because, in the tails, more than one
369        // value of k can map to the same p-value at machine precision.
370        // Obviously, this is one of those corner cases that nothing can be
371        // done about.
372        double lambda = uniform(.05L, 8.0L);
373        uint k = uniform(0U, cast(uint) ceil(3.0L * lambda));
374        double pVal = poissonCDF(k, lambda);
375        assert(invPoissonCDF(pVal, lambda) == k);
376    }
377 }
378 
379 ///
380 double binomialPMF(ulong k, ulong n, double p) {
381     dstatsEnforce(k <= n, "k cannot be > n in binomial distribution.");
382     dstatsEnforce(p >= 0 && p <= 1, "p must be between 0, 1 in binomial distribution.");
383     return exp(logNcomb(n, k) + k * log(p) + (n - k) * log(1 - p));
384 }
385 
386 unittest {
387     assert(approxEqual(binomialPMF(0, 10, .5), cast(double) 1/1024));
388     assert(approxEqual(binomialPMF(100, 1000, .11), .024856));
389 }
390 
391 // Determines what value of n we switch to normal approximation at b/c
392 // betaIncomplete becomes unstable.
393 private enum BINOM_APPROX = 1UL << 24;
394 
395 // Cutoff value of n * p for deciding whether to go w/ normal or poisson approx
396 // when betaIncomplete becomes unstable.
397 private enum BINOM_POISSON = 1_024;
398 
399 // betaIncomplete is numerically unstable for huge values of n.
400 // Luckily this is exactly when the normal approximation becomes
401 // for all practical purposes exact.
402 private double normApproxBinomCDF(double k, double n, double p)
403 in {
404     assert(k <= n);
405     assert(p >= 0 && p <= 1);
406 } body {
407     double mu = p * n;
408     double sd = sqrt( to!double(n) ) * sqrt(p) * sqrt(1 - p);
409     double xCC = k + 0.5L;
410     return normalCDF(xCC, mu, sd);
411 }
412 
413 ///P(K <= k) where K is random variable.
414 double binomialCDF(ulong k, ulong n, double p) {
415     dstatsEnforce(k <= n, "k cannot be > n in binomial distribution.");
416     dstatsEnforce(p >= 0 && p <= 1, "p must be between 0, 1 in binomial distribution.");
417 
418     if(k == n) {
419         return 1;
420     } else if(k == 0) {
421         return pow(1.0 - p,  cast(double) n);
422     }
423 
424     if(n > BINOM_APPROX) {
425         if(n * p < BINOM_POISSON) {
426             return poissonCDF(k, n * p);
427         } else if(n * (1 - p) < BINOM_POISSON) {
428             return poissonCDFR(n - k, n * (1 - p));
429         } else {
430             return normApproxBinomCDF(k, n, p);
431         }
432     }
433 
434     return betaIncomplete(n - k, k + 1, 1.0 - p);
435 }
436 
437 unittest {
438     assert(approxEqual(binomialCDF(10, 100, .11), 0.4528744401));
439     assert(approxEqual(binomialCDF(15, 100, .12), 0.8585510507));
440     assert(approxEqual(binomialCDF(50, 1000, .04), 0.95093595));
441     assert(approxEqual(binomialCDF(7600, 15000, .5), .9496193045414));
442     assert(approxEqual(binomialCDF(0, 10, 0.2), 0.1073742));
443 
444     // Absurdly huge numbers:
445     {
446         ulong k = (1UL << 60) - 100_000_000;
447         ulong n = 1UL << 61;
448         assert(approxEqual(binomialCDF(k, n, 0.5L), 0.4476073));
449     }
450 
451     // Test Poisson branch.
452     double poisAns = binomialCDF(85, 1UL << 26, 1.49e-6);
453     assert(approxEqual(poisAns, 0.07085327));
454 
455     // Test poissonCDFR branch.
456     poisAns = binomialCDF( (1UL << 25) - 100, 1UL << 25, 0.9999975L);
457     assert(approxEqual(poisAns, 0.04713316));
458 
459     // Make sure cutoff is reasonable:  Just below it, we should get similar
460     // results for normal, exact.
461     for(ulong n = BINOM_APPROX / 2; n < BINOM_APPROX; n += 200_000) {
462         for(double p = 0.01; p <= 0.99; p += 0.05) {
463 
464             long lowerK = roundTo!long( n * p * 0.99);
465             long upperK = roundTo!long( n * p / 0.99);
466 
467             for(ulong k = lowerK; k <= min(n, upperK); k += 1_000) {
468                 double normRes = normApproxBinomCDF(k, n, p);
469                 double exactRes = binomialCDF(k, n, p);
470                 assert(abs(normRes - exactRes) < 0.001,
471                     text(normRes, '\t', exactRes));
472             }
473         }
474     }
475 
476 }
477 
478 // betaIncomplete is numerically unstable for huge values of n.
479 // Luckily this is exactly when the normal approximation becomes
480 // for all practical purposes exact.
481 private double normApproxBinomCDFR(ulong k, ulong n, double p)
482 in {
483     assert(k <= n);
484     assert(p >= 0 && p <= 1);
485 } body {
486     double mu = p * n;
487     double sd = sqrt( to!double(n) ) * sqrt(p)  * sqrt(1 - p);
488     double xCC = k - 0.5L;
489     return normalCDFR(xCC, mu, sd);
490 }
491 
492 ///P(K >= k) where K is random variable.
493 double binomialCDFR(ulong k, ulong n, double p) {
494     dstatsEnforce(k <= n, "k cannot be > n in binomial distribution.");
495     dstatsEnforce(p >= 0 && p <= 1, "p must be between 0, 1 in binomial distribution.");
496 
497     if(k == 0) {
498         return 1;
499     } else if(k == n) {
500         return pow(p, cast(double) n);
501     }
502 
503     if(n > BINOM_APPROX) {
504         if(n * p < BINOM_POISSON) {
505             return poissonCDFR(k, n * p);
506         } else if(n * (1 - p) < BINOM_POISSON) {
507             return poissonCDF(n - k, n * (1 - p));
508         } else {
509             return normApproxBinomCDFR(k, n, p);
510         }
511     }
512 
513     return betaIncomplete(k, n - k + 1, p);
514 }
515 
516 unittest {
517     // Values from R, Maxima.
518     assert(approxEqual(binomialCDF(10, 100, .11), 1 -
519                       binomialCDFR(11, 100, .11)));
520     assert(approxEqual(binomialCDF(15, 100, .12), 1 -
521                        binomialCDFR(16, 100, .12)));
522     assert(approxEqual(binomialCDF(50, 1000, .04), 1 -
523                        binomialCDFR(51, 1000, .04)));
524     assert(approxEqual(binomialCDF(7600, 15000, .5), 1 -
525                        binomialCDFR(7601, 15000, .5)));
526     assert(approxEqual(binomialCDF(9, 10, 0.3), 1 -
527                        binomialCDFR(10, 10, 0.3)));
528 
529     // Absurdly huge numbers, test normal branch.
530     {
531         ulong k = (1UL << 60) - 100_000_000;
532         ulong n = 1UL << 61;
533         assert(approxEqual(binomialCDFR(k, n, 0.5L), 0.5523927));
534     }
535 
536     // Test Poisson inversion branch.
537     double poisRes = binomialCDFR((1UL << 25) - 70, 1UL << 25, 0.9999975L);
538     assert(approxEqual(poisRes, 0.06883905));
539 
540     // Test Poisson branch.
541     poisRes = binomialCDFR(350, 1UL << 25, 1e-5);
542     assert(approxEqual(poisRes, 0.2219235));
543 
544     // Make sure cutoff is reasonable:  Just below it, we should get similar
545     // results for normal, exact.
546     for(ulong n = BINOM_APPROX / 2; n < BINOM_APPROX; n += 200_000) {
547         for(double p = 0.01; p <= 0.99; p += 0.05) {
548 
549             long lowerK = roundTo!long( n * p * 0.99);
550             long upperK = roundTo!long( n * p / 0.99);
551 
552             for(ulong k = lowerK; k <= min(n, upperK); k += 1_000) {
553                 double normRes = normApproxBinomCDFR(k, n, p);
554                 double exactRes = binomialCDFR(k, n, p);
555                 assert(abs(normRes - exactRes) < 0.001,
556                     text(normRes, '\t', exactRes));
557             }
558         }
559     }
560 }
561 
562 /**Returns the value of k for the given p-value, n and p.  If p-value does
563  * not exactly map to a value of k, the value for which binomialCDF(k, n, p)
564  * is closest to pVal is used.*/
565 uint invBinomialCDF(double pVal, uint n, double p) {
566     dstatsEnforce(pVal >= 0 && pVal <= 1, "p-values must be between 0, 1.");
567     dstatsEnforce(p >= 0 && p <= 1, "p must be between 0, 1 in binomial distribution.");
568 
569     // Use normal approximation to get approx answer, then brute force search.
570     // This works better than you think because for small n, there's not much
571     // search space and for large n, the normal approx. is doublely good.
572     uint guess = cast(uint) max(round(
573           invNormalCDF(pVal, n * p, sqrt(n * p * (1 - p)))) + 0.5, 0);
574     if(guess > n) {
575         if(pVal < 0.5)  // Numerical issues/overflow.
576             guess = 0;
577         else guess = n;
578     }
579     double guessP = binomialCDF(guess, n, p);
580 
581     if(guessP == pVal) {
582         return guess;
583     } else if(guessP < pVal) {
584         for(uint k = guess + 1; k <= n; k++) {
585             double newP = guessP + binomialPMF(k, n, p);
586             if(abs(newP - pVal) > abs(guessP - pVal)) {
587                 return k - 1;
588             } else {
589                 guessP = newP;
590             }
591         }
592         return n;
593     } else {
594         for(uint k = guess - 1; k != uint.max; k--) {
595             double newP = guessP - binomialPMF(k + 1, n, p);
596             if(abs(newP - pVal) > abs(guessP - pVal)) {
597                 return k + 1;
598             } else {
599                 guessP = newP;
600             }
601         }
602         return 0;
603     }
604 }
605 
606 unittest {
607    Random gen = Random(unpredictableSeed);
608    foreach(i; 0..1_000) {
609        // Restricted variable ranges are because, in the tails, more than one
610        // value of k can map to the same p-value at machine precision.
611        // Obviously, this is one of those corner cases that nothing can be
612        // done about.  Using small n's, moderate p's prevents this.
613        uint n = uniform(5U, 10U);
614        uint k = uniform(0U, n);
615        double p = uniform(0.1L, 0.9L);
616        double pVal = binomialCDF(k, n, p);
617        assert(invBinomialCDF(pVal, n, p) == k);
618    }
619 }
620 
621 ///
622 double hypergeometricPMF(long x, long n1, long n2, long n)
623 in {
624     assert(x <= n);
625 } body {
626     if(x > n1 || x < (n - n2)) {
627         return 0;
628     }
629     double result = logNcomb(n1, x) + logNcomb(n2, n - x) - logNcomb(n1 + n2, n);
630     return exp(result);
631 }
632 
633 unittest {
634     assert(approxEqual(hypergeometricPMF(5, 10, 10, 10), .3437182));
635     assert(approxEqual(hypergeometricPMF(9, 12, 10, 15), .27089783));
636     assert(approxEqual(hypergeometricPMF(9, 100, 100, 15), .15500003));
637 }
638 
639 /**P(X <= x), where X is random variable.  Uses either direct summation,
640  * normal or binomial approximation depending on parameters.*/
641 // If anyone knows a better algorithm for this, feel free...
642 // I've read a decent amount about it, though, and getting hypergeometric
643 // CDFs that are both accurate and fast is just plain hard.  This
644 // implementation attempts to strike a balance between the two, so that
645 // both speed and accuracy are "good enough" for most practical purposes.
646 double hypergeometricCDF(long x, long n1, long n2, long n) {
647     dstatsEnforce(x <= n, "x must be <= n in hypergeometric distribution.");
648     dstatsEnforce(n <= n1 + n2, "n must be <= n1 + n2 in hypergeometric distribution.");
649     dstatsEnforce(x >= 0, "x must be >= 0 in hypergeometric distribution.");
650 
651     ulong expec = (n1 * n) / (n1 + n2);
652     long nComp = n1 + n2 - n, xComp = n2 + x - n;
653 
654     // Try to reduce number of calculations using identities.
655     if(x >= n1 || x == n) {
656         return 1;
657     } else if(x > expec && x > n / 2) {
658         return 1 - hypergeometricCDF(n - x - 1, n2, n1, n);
659     } else if(xComp < x && xComp > 0) {
660         return hypergeometricCDF(xComp, n2, n1, nComp);
661     }
662 
663     // Speed depends on x mostly, so always use exact for small x.
664     if(x <= 100) {
665         return hyperExact(x, n1, n2, n);
666     }
667 
668     // Determine whether to use exact, normal approx or binomial approx.
669     // Using obviously arbitrary but relatively stringent standards
670     // for determining whether to approximate.
671     enum NEXACT = 50L;
672 
673     double p = cast(double) n1 / (n1 + n2);
674     double pComp = cast(double) n2 / (n1 + n2);
675     double pMin = min(p, pComp);
676     if(min(n, nComp) * pMin >= 100) {
677         // Since high relative error in the lower tail is a significant problem,
678         // this is a hack to improve the normal approximation:  Use the normal
679         // approximation, except calculate the last NEXACT elements exactly,
680         // since elements around the e.v. are where absolute error is highest.
681         // For large x, gives most of the accuracy of an exact calculation in
682         // only a small fraction of the time.
683         if(x <= expec + NEXACT / 2) {
684             return min(1, normApproxHyper(x - NEXACT, n1, n2, n) +
685                 hyperExact(x, n1, n2, n, x - NEXACT + 1));
686         } else {
687             // Just use plain old normal approx.  Since P is large, the
688             // relative error won't be so bad anyhow.
689             return normApproxHyper(x, n1, n2, n);
690         }
691     }
692     // Try to make n as small as possible by applying mathematically equivalent
693     // transformations so that binomial approx. works as well as possible.
694     ulong bSc1 = (n1 + n2) / n, bSc2 = (n1 + n2) / n1;
695 
696     if(bSc1 >= 50 && bSc1 > bSc2) {
697         // Same hack as normal approximation for rel. acc. in lower tail.
698         if(x <= expec + NEXACT / 2) {
699             return min(1, binomialCDF(cast(uint) (x - NEXACT), cast(uint) n, p) +
700                 hyperExact(x, n1, n2, n, x - NEXACT + 1));
701         } else {
702             return binomialCDF(cast(uint) x, cast(uint)  n, p);
703         }
704     } else if(bSc2 >= 50 && bSc2 > bSc1) {
705         double p2 = cast(double) n / (n1 + n2);
706         if(x <= expec + NEXACT / 2) {
707             return min(1, binomialCDF(cast(uint) (x - NEXACT), cast(uint)  n1, p2) +
708                 hyperExact(x, n1, n2, n, x - NEXACT + 1));
709         } else {
710             return binomialCDF(cast(uint) x, cast(uint) n1, p2);
711         }
712     } else {
713         return hyperExact(x, n1, n2, n);
714     }
715 }
716 
717 unittest {
718     // Values from R and the Maxima CAS.
719     // Test exact branch, including reversing, complementing.
720     assert(approxEqual(hypergeometricCDF(5, 10, 10, 10), 0.6718591));
721     assert(approxEqual(hypergeometricCDF(3, 11, 15, 10), 0.27745322));
722     assert(approxEqual(hypergeometricCDF(18, 27, 31, 35), 0.88271714));
723     assert(approxEqual(hypergeometricCDF(21, 29, 31, 35), 0.99229253));
724 
725     // Normal branch.
726     assert(approxEqual(hypergeometricCDF(501, 2000, 1000, 800), 0.002767073));
727     assert(approxEqual(hypergeometricCDF(565, 2000, 1000, 800), 0.9977068));
728     assert(approxEqual(hypergeometricCDF(2700, 10000, 20000, 8000), 0.825652));
729 
730     // Binomial branch.  One for each transformation.
731     assert(approxEqual(hypergeometricCDF(110, 5000, 7000, 239), 0.9255627));
732     assert(approxEqual(hypergeometricCDF(19840, 2950998, 12624, 19933), 0.2020618));
733     assert(approxEqual(hypergeometricCDF(130, 24195, 52354, 295), 0.9999973));
734     assert(approxEqual(hypergeometricCDF(103, 901, 49014, 3522), 0.999999));
735 }
736 
737 ///P(X >= x), where X is random variable.
738 double hypergeometricCDFR(ulong x, ulong n1, ulong n2, ulong n) {
739     dstatsEnforce(x <= n, "x must be <= n in hypergeometric distribution.");
740     dstatsEnforce(n <= n1 + n2, "n must be <= n1 + n2 in hypergeometric distribution.");
741     dstatsEnforce(x >= 0, "x must be >= 0 in hypergeometric distribution.");
742 
743     return hypergeometricCDF(n - x, n2, n1, n);
744 }
745 
746 unittest {
747     //Reverses n1, n2 and subtracts x from n to get mirror image.
748     assert(approxEqual(hypergeometricCDF(5,10,10,10),
749                        hypergeometricCDFR(5,10,10,10)));
750     assert(approxEqual(hypergeometricCDF(3, 11, 15, 10),
751                        hypergeometricCDFR(7, 15, 11, 10)));
752     assert(approxEqual(hypergeometricCDF(18, 27, 31, 35),
753                        hypergeometricCDFR(17, 31, 27, 35)));
754     assert(approxEqual(hypergeometricCDF(21, 29, 31, 35),
755                        hypergeometricCDFR(14, 31, 29, 35)));
756 }
757 
758 double hyperExact(ulong x, ulong n1, ulong n2, ulong n, ulong startAt = 0) {
759     dstatsEnforce(x <= n, "x must be <= n in hypergeometric distribution.");
760     dstatsEnforce(n <= n1 + n2, "n must be <= n1 + n2 in hypergeometric distribution.");
761     dstatsEnforce(x >= 0, "x must be >= 0 in hypergeometric distribution.");
762 
763     immutable double constPart = logFactorial(n1) + logFactorial(n2) +
764         logFactorial(n) + logFactorial(n1 + n2 - n) - logFactorial(n1 + n2);
765     double sum = 0;
766     for(ulong i = x; i != startAt - 1; i--) {
767         double oldSum = sum;
768         sum += exp(constPart - logFactorial(i) - logFactorial(n1 - i) -
769                logFactorial(n2 + i - n) - logFactorial(n - i));
770         if(isIdentical(sum, oldSum)) { // At full machine precision.
771             break;
772         }
773     }
774     return sum;
775 }
776 
777 double normApproxHyper(ulong x, ulong n1, ulong n2, ulong n) {
778     double p1 = cast(double) n1 / (n1 + n2);
779     double p2 = cast(double) n2 / (n1 + n2);
780     double numer = x + 0.5L - n * p1;
781     double denom = sqrt(n * p1 * p2 * (n1 + n2 - n) / (n1 + n2 - 1));
782     return normalCDF(numer / denom);
783 }
784 
785 // Aliases for old names.  Not documented because new names should be used.
786 deprecated {
787     alias chiSquareCDF chiSqrCDF;
788     alias chiSquareCDFR chiSqrCDFR;
789     alias invChiSquareCDFR invChiSqCDFR;
790 }
791 
792 ///
793 double chiSquarePDF(double x, double v) {
794     dstatsEnforce(x >= 0, "x must be >= 0 in chi-square distribution.");
795     dstatsEnforce(v >= 1.0, "Must have at least 1 degree of freedom for chi-square.");
796 
797     // Calculate in log space for stability.
798     immutable logX = log(x);
799     immutable numerator = logX * (0.5 * v - 1) - 0.5 * x;
800     immutable denominator = LN2 * (0.5 * v) + logGamma(0.5 * v);
801     return exp(numerator - denominator);
802 }
803 
804 unittest {
805     assert( approxEqual(chiSquarePDF(1, 2), 0.3032653));
806     assert( approxEqual(chiSquarePDF(2, 1), 0.1037769));
807 }
808 
809 /**
810  *  $(POWER &chi;,2) distribution function and its complement.
811  *
812  * Returns the area under the left hand tail (from 0 to x)
813  * of the Chi square probability density function with
814  * v degrees of freedom. The complement returns the area under
815  * the right hand tail (from x to &infin;).
816  *
817  *  chiSquareCDF(x | v) = ($(INTEGRATE 0, x)
818  *          $(POWER t, v/2-1) $(POWER e, -t/2) dt )
819  *             / $(POWER 2, v/2) $(GAMMA)(v/2)
820  *
821  *  chiSquareCDFR(x | v) = ($(INTEGRATE x, &infin;)
822  *          $(POWER t, v/2-1) $(POWER e, -t/2) dt )
823  *             / $(POWER 2, v/2) $(GAMMA)(v/2)
824  *
825  * Params:
826  *  v  = degrees of freedom. Must be positive.
827  *  x  = the $(POWER &chi;,2) variable. Must be positive.
828  *
829  */
830 double chiSquareCDF(double x, double v) {
831     dstatsEnforce(x >= 0, "x must be >= 0 in chi-square distribution.");
832     dstatsEnforce(v >= 1.0, "Must have at least 1 degree of freedom for chi-square.");
833 
834     // These are very common special cases where we can make the calculation
835     // a lot faster and/or more accurate.
836     if(v == 1) {
837         // Then it's the square of a normal(0, 1).
838         return 1.0L - erfc(sqrt(x) * SQRT1_2);
839     } else if(v == 2) {
840         // Then it's an exponential w/ lambda == 1/2.
841         return 1.0L - exp(-0.5 * x);
842     } else {
843         return gammaIncomplete(0.5 * v, 0.5 * x);
844     }
845 }
846 
847 ///
848 double chiSquareCDFR(double x, double v) {
849     dstatsEnforce(x >= 0, "x must be >= 0 in chi-square distribution.");
850     dstatsEnforce(v >= 1.0, "Must have at least 1 degree of freedom for chi-square.");
851 
852     // These are very common special cases where we can make the calculation
853     // a lot faster and/or more accurate.
854     if(v == 1) {
855         // Then it's the square of a normal(0, 1).
856         return erfc(sqrt(x) * SQRT1_2);
857     } else if(v == 2) {
858         // Then it's an exponential w/ lambda == 1/2.
859         return exp(-0.5 * x);
860     } else {
861         return gammaIncompleteCompl(0.5 * v, 0.5 * x);
862     }
863 }
864 
865 /**
866  *  Inverse of complemented $(POWER &chi;, 2) distribution
867  *
868  * Finds the $(POWER &chi;, 2) argument x such that the integral
869  * from x to &infin; of the $(POWER &chi;, 2) density is equal
870  * to the given cumulative probability p.
871  *
872  * Params:
873  * p = Cumulative probability. 0<= p <=1.
874  * v = Degrees of freedom. Must be positive.
875  *
876  */
877 double invChiSquareCDFR(double v, double p) {
878     dstatsEnforce(v >= 1.0, "Must have at least 1 degree of freedom for chi-square.");
879     dstatsEnforce(p >= 0 && p <= 1, "P-values must be between 0, 1.");
880     return  2.0 * gammaIncompleteComplInverse( 0.5*v, p);
881 }
882 
883 unittest {
884     assert(feqrel(chiSquareCDFR(invChiSquareCDFR(3.5, 0.1), 3.5), 0.1)>=double.mant_dig-3);
885     assert(approxEqual(
886         chiSquareCDF(0.4L, 19.02L) + chiSquareCDFR(0.4L, 19.02L), 1.0L));
887     assert(ae( invChiSquareCDFR( 3, chiSquareCDFR(1, 3)), 1));
888 
889     assert(ae(chiSquareCDFR(0.2, 1), 0.6547208));
890     assert(ae(chiSquareCDFR(0.2, 2), 0.9048374));
891     assert(ae(chiSquareCDFR(0.8, 1), 0.3710934));
892     assert(ae(chiSquareCDFR(0.8, 2), 0.67032));
893 
894     assert(ae(chiSquareCDF(0.2, 1), 0.3452792));
895     assert(ae(chiSquareCDF(0.2, 2), 0.09516258));
896     assert(ae(chiSquareCDF(0.8, 1), 0.6289066));
897     assert(ae(chiSquareCDF(0.8, 2), 0.3296800));
898 }
899 
900 ///
901 double normalPDF(double x, double mean = 0, double sd = 1) {
902     dstatsEnforce(sd > 0, "Standard deviation must be > 0 for normal distribution.");
903     double dev = x - mean;
904     return exp(-(dev * dev) / (2 * sd * sd)) / (sd * SQ2PI);
905 }
906 
907 unittest {
908     assert(approxEqual(normalPDF(3, 1, 2), 0.1209854));
909 }
910 
911 ///P(X < x) for normal distribution where X is random var.
912 double normalCDF(double x, double mean = 0, double stdev = 1) {
913     dstatsEnforce(stdev > 0, "Standard deviation must be > 0 for normal distribution.");
914 
915     // Using a slightly non-obvious implementation in terms of erfc because
916     // it seems more accurate than erf for very small values of Z.
917 
918     double Z = (-x + mean) / stdev;
919     return erfc(Z*SQRT1_2)/2;
920 }
921 
922 unittest {
923     assert(approxEqual(normalCDF(2), .9772498));
924     assert(approxEqual(normalCDF(-2), .02275013));
925     assert(approxEqual(normalCDF(1.3), .90319951));
926 }
927 
928 ///P(X > x) for normal distribution where X is random var.
929 double normalCDFR(double x, double mean = 0, double stdev = 1) {
930     dstatsEnforce(stdev > 0, "Standard deviation must be > 0 for normal distribution.");
931 
932     double Z = (x - mean) / stdev;
933     return erfc(Z * SQRT1_2) / 2;
934 }
935 
936 unittest {
937     //Should be essentially a mirror image of normalCDF.
938     for(double i = -8; i < 8; i += .1) {
939         assert(approxEqual(normalCDF(i), normalCDFR(-i)));
940     }
941 }
942 
943 private enum SQRT2PI =   0x1.40d931ff62705966p+1;    // 2.5066282746310005024
944 private enum EXP_2  = 0.13533528323661269189L; /* exp(-2) */
945 
946 /******************************
947  * Inverse of Normal distribution function
948  *
949  * Returns the argument, x, for which the area under the
950  * Normal probability density function (integrated from
951  * minus infinity to x) is equal to p.
952  */
953 double invNormalCDF(double p, double mean = 0, double sd = 1) {
954     dstatsEnforce(p >= 0 && p <= 1, "P-values must be between 0, 1.");
955     dstatsEnforce(sd > 0, "Standard deviation must be > 0 for normal distribution.");
956 
957     return normalDistributionInverse(p) * sd + mean;
958 }
959 
960 
961 unittest {
962     // The values below are from Excel 2003.
963     assert(fabs(invNormalCDF(0.001) - (-3.09023230616779))< 0.00000000000005);
964     assert(fabs(invNormalCDF(1e-50) - (-14.9333375347885))< 0.00000000000005);
965     assert(feqrel(invNormalCDF(0.999), -invNormalCDF(0.001))>double.mant_dig-6);
966 
967     // Excel 2003 gets all the following values wrong!
968     assert(invNormalCDF(0.0)==-double.infinity);
969     assert(invNormalCDF(1.0)==double.infinity);
970     assert(invNormalCDF(0.5)==0);
971 
972     // I don't know the correct result for low values
973     // (Excel 2003 returns norminv(p) = -30 for all p < 1e-200).
974     // The value tested here is the one the function returned in Jan 2006.
975     double unknown1 = invNormalCDF(1e-250L);
976     assert( fabs(unknown1 -(-33.79958617269L) ) < 0.00000005);
977 
978     Random gen;
979     gen.seed(unpredictableSeed);
980     // normalCDF function trivial given ERF, unlikely to contain subtle bugs.
981     // Just make sure invNormalCDF works like it should as the inverse.
982     foreach(i; 0..1000) {
983         double x = uniform(0.0L, 1.0L);
984         double mean = uniform(0.0L, 100.0L);
985         double sd = uniform(1.0L, 3.0L);
986         double inv = invNormalCDF(x, mean, sd);
987         double rec = normalCDF(inv, mean, sd);
988         assert(approxEqual(x, rec));
989     }
990 }
991 
992 ///
993 double logNormalPDF(double x, double mu = 0, double sigma = 1) {
994     dstatsEnforce(sigma > 0, "sigma must be > 0 for log-normal distribution.");
995 
996     immutable mulTerm = 1.0L / (x * sigma * SQ2PI);
997     double expTerm = log(x) - mu;
998     expTerm *= expTerm;
999     expTerm /= 2 * sigma * sigma;
1000     return mulTerm * exp(-expTerm);
1001 }
1002 
1003 unittest {
1004     // Values from R.
1005     assert(approxEqual(logNormalPDF(1, 0, 1), 0.3989423));
1006     assert(approxEqual(logNormalPDF(2, 2, 3), 0.06047173));
1007 }
1008 
1009 ///
1010 double logNormalCDF(double x, double mu = 0, double sigma = 1) {
1011     dstatsEnforce(sigma > 0, "sigma must be > 0 for log-normal distribution.");
1012 
1013     return 0.5L + 0.5L * erf((log(x) - mu) / (sigma * SQRT2));
1014 }
1015 
1016 unittest {
1017     assert(approxEqual(logNormalCDF(4), 0.9171715));
1018     assert(approxEqual(logNormalCDF(1, -2, 3), 0.7475075));
1019 }
1020 
1021 ///
1022 double logNormalCDFR(double x, double mu = 0, double sigma = 1) {
1023     dstatsEnforce(sigma > 0, "sigma must be > 0 for log-normal distribution.");
1024 
1025     return 0.5L - 0.5L * erf((log(x) - mu) / (sigma * SQRT2));
1026 }
1027 
1028 unittest {
1029     assert(approxEqual(logNormalCDF(4) + logNormalCDFR(4), 1));
1030     assert(approxEqual(logNormalCDF(1, -2, 3) + logNormalCDFR(1, -2, 3), 1));
1031 }
1032 
1033 ///
1034 double weibullPDF(double x, double shape, double scale = 1) {
1035     dstatsEnforce(shape > 0, "shape must be > 0 for weibull distribution.");
1036     dstatsEnforce(scale > 0, "scale must be > 0 for weibull distribution.");
1037 
1038     if(x < 0) {
1039         return 0;
1040     }
1041     double ret = pow(x / scale, shape - 1) * exp( -pow(x / scale, shape));
1042     return ret * (shape / scale);
1043 }
1044 
1045 unittest {
1046     assert(approxEqual(weibullPDF(2,1,3), 0.1711390));
1047 }
1048 
1049 ///
1050 double weibullCDF(double x, double shape, double scale = 1) {
1051     dstatsEnforce(shape > 0, "shape must be > 0 for weibull distribution.");
1052     dstatsEnforce(scale > 0, "scale must be > 0 for weibull distribution.");
1053 
1054     double exponent = pow(x / scale, shape);
1055     return 1 - exp(-exponent);
1056 }
1057 
1058 unittest {
1059     assert(approxEqual(weibullCDF(2, 3, 4), 0.1175031));
1060 }
1061 
1062 ///
1063 double weibullCDFR(double x, double shape, double scale = 1) {
1064     dstatsEnforce(shape > 0, "shape must be > 0 for weibull distribution.");
1065     dstatsEnforce(scale > 0, "scale must be > 0 for weibull distribution.");
1066 
1067     double exponent = pow(x / scale, shape);
1068     return exp(-exponent);
1069 }
1070 
1071 unittest {
1072     assert(approxEqual(weibullCDF(2, 3, 4) + weibullCDFR(2, 3, 4), 1));
1073 }
1074 
1075 // For K-S tests in dstats.random.  Todo:  Flesh out.
1076 double waldCDF(double x, double mu, double lambda) {
1077     double sqr = sqrt(lambda / (2 * x));
1078     double term1 = 1 + erf(sqr * (x / mu - 1));
1079     double term2 = exp(2 * lambda / mu);
1080     double term3 = 1 - erf(sqr * (x / mu + 1));
1081     return 0.5L * term1 + 0.5L * term2 * term3;
1082 }
1083 
1084 // ditto.
1085 double rayleighCDF(double x, double mode) {
1086     return 1.0L - exp(-x * x / (2 * mode * mode));
1087 }
1088 
1089 ///
1090 double studentsTPDF(double t, double df) {
1091     dstatsEnforce(df > 0, "Student's T must have >0 degrees of freedom.");
1092 
1093     immutable logPart = logGamma(0.5 * df + 0.5) - logGamma(0.5 * df);
1094     immutable term1 = exp(logPart) / sqrt(df * PI);
1095     immutable term2 = (1.0 + t / df * t) ^^ (-0.5 * df - 0.5);
1096     return term1 * term2;
1097 }
1098 
1099 ///
1100 double studentsTCDF(double t, double df) {
1101     dstatsEnforce(df > 0, "Student's T must have >0 degrees of freedom.");
1102 
1103     double x = (t + sqrt(t * t + df)) / (2 * sqrt(t * t + df));
1104     return betaIncomplete(df * 0.5L, df * 0.5L, x);
1105 }
1106 
1107 ///
1108 double studentsTCDFR(double t, double df)   {
1109     return studentsTCDF(-t, df);
1110 }
1111 
1112 unittest {
1113     assert(approxEqual(studentsTPDF(1, 1), 0.1591549));
1114     assert(approxEqual(studentsTPDF(3, 10), 0.0114055));
1115     assert(approxEqual(studentsTPDF(-4, 5), 0.005123727));
1116 
1117     assert(approxEqual(studentsTCDF(1, 1), 0.75));
1118     assert(approxEqual(studentsTCDF(1.061, 2), 0.8));
1119     assert(approxEqual(studentsTCDF(5.959, 5), 0.9995));
1120     assert(approxEqual(studentsTCDF(.667, 20), 0.75));
1121     assert(approxEqual(studentsTCDF(2.353, 3), 0.95));
1122 }
1123 
1124 /******************************************
1125 *   Inverse of Student's t distribution
1126 *
1127 * Given probability p and degrees of freedom df,
1128 * finds the argument t such that the one-sided
1129 * studentsDistribution(nu,t) is equal to p.
1130 * Used to test whether two distributions have the same
1131 * standard deviation.
1132 *
1133 * Params:
1134 * df = degrees of freedom. Must be >1
1135 * p  = probability. 0 < p < 1
1136 */
1137 double invStudentsTCDF(double p, double df) {
1138     dstatsEnforce(p >= 0 && p <= 1, "P-values must be between 0, 1.");
1139     dstatsEnforce(df > 0, "Student's T must have >0 degrees of freedom.");
1140 
1141     if (p==0) return -double.infinity;
1142     if (p==1) return double.infinity;
1143 
1144     double rk, z;
1145     rk =  df;
1146 
1147     if ( p > 0.25L && p < 0.75L ) {
1148         if ( p == 0.5L ) return 0;
1149         z = 1.0L - 2.0L * p;
1150         z = betaIncompleteInverse( 0.5L, 0.5L*rk, fabs(z) );
1151         double t = sqrt( rk*z/(1.0L-z) );
1152         if( p < 0.5L )
1153             t = -t;
1154         return t;
1155     }
1156     int rflg = -1; // sign of the result
1157     if (p >= 0.5L) {
1158         p = 1.0L - p;
1159         rflg = 1;
1160     }
1161     z = betaIncompleteInverse( 0.5L*rk, 0.5L, 2.0L*p );
1162 
1163     if (z<0) return rflg * double.infinity;
1164     return rflg * sqrt( rk/z - rk );
1165 }
1166 
1167 unittest {
1168     // The remaining values listed here are from Excel, and are unlikely to be accurate
1169     // in the last decimal places. However, they are helpful as a sanity check.
1170 
1171     //  Microsoft Excel 2003 gives TINV(2*(1-0.995), 10) == 3.16927267160917
1172     assert(approxEqual(invStudentsTCDF(0.995, 10), 3.169_272_67L));
1173     assert(approxEqual(invStudentsTCDF(0.6, 8), 0.261_921_096_769_043L));
1174     assert(approxEqual(invStudentsTCDF(0.4, 18), -0.257_123_042_655_869L));
1175     assert(approxEqual(studentsTCDF(invStudentsTCDF(0.4L, 18), 18), .4L));
1176     assert(approxEqual(studentsTCDF( invStudentsTCDF(0.9L, 11), 11), 0.9L));
1177 }
1178 
1179 /**
1180  * The Fisher distribution, its complement, and inverse.
1181  *
1182  * The F density function (also known as Snedcor's density or the
1183  * variance ratio density) is the density
1184  * of x = (u1/df1)/(u2/df2), where u1 and u2 are random
1185  * variables having $(POWER &chi;,2) distributions with df1
1186  * and df2 degrees of freedom, respectively.
1187  *
1188  * fisherCDF returns the area from zero to x under the F density
1189  * function.   The complementary function,
1190  * fisherCDFR, returns the area from x to &infin; under the F density function.
1191  *
1192  * The inverse of the complemented Fisher distribution,
1193  * invFisherCDFR, finds the argument x such that the integral
1194  * from x to infinity of the F density is equal to the given probability y.
1195 
1196  * Params:
1197  *  df1 = Degrees of freedom of the first variable. Must be >= 1
1198  *  df2 = Degrees of freedom of the second variable. Must be >= 1
1199  *  x  = Must be >= 0
1200  */
1201 double fisherCDF(double x, double df1, double df2) {
1202     dstatsEnforce(df1 > 0 && df2 > 0,
1203         "Fisher distribution must have >0 degrees of freedom.");
1204     dstatsEnforce(x >= 0, "x must be >=0 for Fisher distribution.");
1205 
1206     double a = cast(double)(df1);
1207     double b = cast(double)(df2);
1208     double w = a * x;
1209     w = w/(b + w);
1210     return betaIncomplete(0.5L*a, 0.5L*b, w);
1211 }
1212 
1213 /** ditto */
1214 double fisherCDFR(double x, double df1, double df2) {
1215     dstatsEnforce(df1 > 0 && df2 > 0,
1216         "Fisher distribution must have >0 degrees of freedom.");
1217     dstatsEnforce(x >= 0, "x must be >=0 for Fisher distribution.");
1218 
1219     double a = cast(double)(df1);
1220     double b = cast(double)(df2);
1221     double w = b / (b + a * x);
1222     return betaIncomplete( 0.5L*b, 0.5L*a, w );
1223 }
1224 
1225 /**
1226  * Inverse of complemented Fisher distribution
1227  *
1228  * Finds the F density argument x such that the integral
1229  * from x to infinity of the F density is equal to the
1230  * given probability p.
1231  *
1232  * This is accomplished using the inverse beta integral
1233  * function and the relations
1234  *
1235  *      z = betaIncompleteInverse( df2/2, df1/2, p ),
1236  *      x = df2 (1-z) / (df1 z).
1237  *
1238  * Note that the following relations hold for the inverse of
1239  * the uncomplemented F distribution:
1240  *
1241  *      z = betaIncompleteInverse( df1/2, df2/2, p ),
1242  *      x = df2 z / (df1 (1-z)).
1243 */
1244 double invFisherCDFR(double df1, double df2, double p ) {
1245     dstatsEnforce(df1 > 0 && df2 > 0,
1246         "Fisher distribution must have >0 degrees of freedom.");
1247     dstatsEnforce(p >= 0 && p <= 1, "P-values must be between 0, 1.");
1248 
1249     double a = df1;
1250     double b = df2;
1251     /* Compute probability for x = 0.5.  */
1252     double w = betaIncomplete( 0.5L*b, 0.5L*a, 0.5L );
1253     /* If that is greater than p, then the solution w < .5.
1254        Otherwise, solve at 1-p to remove cancellation in (b - b*w).  */
1255     if ( w > p || p < 0.001L) {
1256         w = betaIncompleteInverse( 0.5L*b, 0.5L*a, p );
1257         return (b - b*w)/(a*w);
1258     } else {
1259         w = betaIncompleteInverse( 0.5L*a, 0.5L*b, 1.0L - p );
1260         return b*w/(a*(1.0L-w));
1261     }
1262 }
1263 
1264 unittest {
1265     // fDistCompl(df1, df2, x) = Excel's FDIST(x, df1, df2)
1266       assert(fabs(fisherCDFR(16.5, 6, 4) - 0.00858719177897249L)< 0.0000000000005L);
1267       assert(fabs((1-fisherCDF(0.1, 12, 23)) - 0.99990562845505L)< 0.0000000000005L);
1268       assert(fabs(invFisherCDFR(8, 34, 0.2) - 1.48267037661408L)< 0.0000000005L);
1269       assert(fabs(invFisherCDFR(4, 16, 0.008) - 5.043_537_593_48596L)< 0.0000000005L);
1270       // This one used to fail because of a bug in the definition of MINLOG.
1271       assert(approxEqual(fisherCDFR(invFisherCDFR(4,16, 0.008), 4, 16), 0.008));
1272 }
1273 
1274 ///
1275 double negBinomPMF(ulong k, ulong n, double p) {
1276     dstatsEnforce(p >= 0 && p <= 1,
1277         "p must be between 0, 1 for negative binomial distribution.");
1278 
1279     return exp(logNcomb(k - 1 + n, k) +  n * log(p) + k * log(1 - p));
1280 }
1281 
1282 unittest {
1283     // Values from R.
1284     assert(approxEqual(negBinomPMF(1, 8, 0.7), 0.1383552));
1285     assert(approxEqual(negBinomPMF(3, 2, 0.5), 0.125));
1286 }
1287 
1288 
1289 /**********************
1290  * Negative binomial distribution.
1291  *
1292  * Returns the sum of the terms 0 through k of the negative
1293  * binomial distribution:
1294  *
1295  * $(BIGSUM j=0, k) $(CHOOSE n+j-1, j) $(POWER p, n) $(POWER (1-p), j)
1296  * ???? In mathworld, it is
1297  * $(BIGSUM j=0, k) $(CHOOSE n+j-1, j-1) $(POWER p, j) $(POWER (1-p), n)
1298  *
1299  * In a sequence of Bernoulli trials, this is the probability
1300  * that k or fewer failures precede the n-th success.
1301  *
1302  * The arguments must be positive, with 0 < p < 1 and r>0.
1303  *
1304  * The Geometric Distribution is a special case of the negative binomial
1305  * distribution.
1306  * -----------------------
1307  * geometricDistribution(k, p) = negativeBinomialDistribution(k, 1, p);
1308  * -----------------------
1309  * References:
1310  * $(LINK http://mathworld.wolfram.com/NegativeBinomialDistribution.html)
1311  */
1312 double negBinomCDF(ulong k, ulong n, double p ) {
1313     dstatsEnforce(p >= 0 && p <= 1,
1314         "p must be between 0, 1 for negative binomial distribution.");
1315     if ( k == 0 ) return pow(p, cast(double) n);
1316     return  betaIncomplete( n, k + 1, p );
1317 }
1318 
1319 unittest {
1320     // Values from R.
1321     assert(approxEqual(negBinomCDF(50, 50, 0.5), 0.5397946));
1322     assert(approxEqual(negBinomCDF(2, 1, 0.5), 0.875));
1323 }
1324 
1325 /**Probability that k or more failures precede the nth success.*/
1326 double negBinomCDFR(ulong k, ulong n, double p) {
1327     dstatsEnforce(p >= 0 && p <= 1,
1328         "p must be between 0, 1 for negative binomial distribution.");
1329 
1330     if(k == 0)
1331         return 1;
1332     return betaIncomplete(k, n, 1.0L - p);
1333 }
1334 
1335 unittest {
1336     assert(approxEqual(negBinomCDFR(10, 20, 0.5), 1 - negBinomCDF(9, 20, 0.5)));
1337 }
1338 
1339 ///
1340 ulong invNegBinomCDF(double pVal, ulong n, double p) {
1341     dstatsEnforce(p >= 0 && p <= 1,
1342         "p must be between 0, 1 for negative binomial distribution.");
1343     dstatsEnforce(pVal >= 0 && pVal <= 1,
1344         "P-values must be between 0, 1.");
1345 
1346     // Normal or gamma approx, then adjust.
1347     double mean = n * (1 - p) / p;
1348     double var = n * (1 - p) / (p * p);
1349     double skew = (2 - p) / sqrt(n * (1 - p));
1350     double kk = 4.0L / (skew * skew);
1351     double theta = sqrt(var / kk);
1352     double offset = (kk * theta) - mean + 0.5L;
1353     ulong guess;
1354 
1355     // invGammaCDFR is very expensive, but worth it in cases where normal approx
1356     // would be the worst.  Otherwise, use normal b/c it's *MUCH* cheaper to
1357     // calculate.
1358     if(skew > 1.5 && var > 1_048_576)
1359         guess = cast(long) max(round(
1360          invGammaCDFR(1 - pVal, 1 / theta, kk) - offset), 0.0L);
1361     else
1362         guess = cast(long) max(round(
1363            invNormalCDF(pVal, mean, sqrt(var)) + 0.5), 0.0L);
1364 
1365     // This is pretty arbitrary behavior, but I don't want to use exceptions
1366     // and it has to be handled as a special case.
1367     if(pVal > 1 - double.epsilon)
1368         return ulong.max;
1369     double guessP = negBinomCDF(guess, n, p);
1370 
1371     if(guessP == pVal) {
1372         return guess;
1373     } else if(guessP < pVal) {
1374         for(ulong k = guess + 1; ; k++) {
1375             double newP = guessP + negBinomPMF(k, n, p);
1376             // Test for aliasing.
1377             if(newP == guessP)
1378                 return k - 1;
1379             if(abs(pVal - newP) > abs(guessP - pVal)) {
1380                 return k - 1;
1381             } else if(newP >= 1) {
1382                 return k;
1383             } else {
1384                 guessP = newP;
1385             }
1386         }
1387     } else {
1388         for(ulong k = guess - 1; guess != ulong.max; k--) {
1389             double newP = guessP - negBinomPMF(k + 1, n, p);
1390             // Test for aliasing.
1391             if(newP == guessP)
1392                 return k + 1;
1393             if(abs(newP - pVal) > abs(guessP - pVal)) {
1394                 return k + 1;
1395             } else {
1396                 guessP = newP;
1397             }
1398         }
1399         return 0;
1400     }
1401 }
1402 
1403 unittest {
1404     Random gen = Random(unpredictableSeed);
1405     uint nSkipped;
1406     foreach(i; 0..1000) {
1407         uint n = uniform(1u, 10u);
1408         double p = uniform(0.0L, 1L);
1409         uint k = uniform(0, 20);
1410         double pVal = negBinomCDF(k, n, p);
1411 
1412         // In extreme tails, p-values can alias, giving incorrect results.
1413         // This is a corner case that nothing can be done about.  Just skip
1414         // these.
1415         if(pVal >= 1 - 10 * double.epsilon) {
1416             nSkipped++;
1417             continue;
1418         }
1419         assert(invNegBinomCDF(pVal, n, p) == k);
1420     }
1421 }
1422 
1423 ///
1424 double exponentialPDF(double x, double lambda) {
1425     dstatsEnforce(x >= 0, "x must be >0 in exponential distribution");
1426     dstatsEnforce(lambda > 0, "lambda must be >0 in exponential distribution");
1427 
1428     return lambda * exp(-lambda * x);
1429 }
1430 
1431 ///
1432 double exponentialCDF(double x, double lambda) {
1433     dstatsEnforce(x >= 0, "x must be >0 in exponential distribution");
1434     dstatsEnforce(lambda > 0, "lambda must be >0 in exponential distribution");
1435 
1436     return 1.0 - exp(-lambda * x);
1437 }
1438 
1439 ///
1440 double exponentialCDFR(double x, double lambda) {
1441     dstatsEnforce(x >= 0, "x must be >0 in exponential distribution");
1442     dstatsEnforce(lambda > 0, "lambda must be >0 in exponential distribution");
1443 
1444     return exp(-lambda * x);
1445 }
1446 
1447 ///
1448 double invExponentialCDF(double p, double lambda) {
1449     dstatsEnforce(p >= 0 && p <= 1, "p must be between 0, 1 in exponential distribution");
1450     dstatsEnforce(lambda > 0, "lambda must be >0 in exponential distribution");
1451     return log(-1.0 / (p - 1.0)) / lambda;
1452 }
1453 
1454 unittest {
1455     // Values from R.
1456     assert(approxEqual(exponentialPDF(0.75, 3), 0.3161977));
1457     assert(approxEqual(exponentialCDF(0.75, 3), 0.8946008));
1458     assert(approxEqual(exponentialCDFR(0.75, 3), 0.1053992));
1459 
1460     assert(approxEqual(invExponentialCDF(0.8, 2), 0.804719));
1461     assert(approxEqual(invExponentialCDF(0.2, 7), 0.03187765));
1462 }
1463 
1464 ///
1465 double gammaPDF(double x, double rate, double shape) {
1466     dstatsEnforce(x > 0, "x must be >0 in gamma distribution.");
1467     dstatsEnforce(rate > 0, "rate must be >0 in gamma distribution.");
1468     dstatsEnforce(shape > 0, "shape must be >0 in gamma distribution.");
1469 
1470     immutable scale = 1.0 / rate;
1471     immutable firstPart = x ^^ (shape - 1);
1472     immutable logNumer = -x / scale;
1473     immutable logDenom = logGamma(shape) + shape * log(scale);
1474     return firstPart * exp(logNumer - logDenom);
1475 }
1476 
1477 ///
1478 double gammaCDF(double x, double rate, double shape) {
1479     dstatsEnforce(x > 0, "x must be >0 in gamma distribution.");
1480     dstatsEnforce(rate > 0, "rate must be >0 in gamma distribution.");
1481     dstatsEnforce(shape > 0, "shape must be >0 in gamma distribution.");
1482 
1483     return gammaIncomplete(shape, rate * x);
1484 }
1485 
1486 ///
1487 double gammaCDFR(double x, double rate, double shape) {
1488     dstatsEnforce(x > 0, "x must be >0 in gamma distribution.");
1489     dstatsEnforce(rate > 0, "rate must be >0 in gamma distribution.");
1490     dstatsEnforce(shape > 0, "shape must be >0 in gamma distribution.");
1491 
1492     return gammaIncompleteCompl(shape, rate * x);
1493 }
1494 
1495 /**This just calls invGammaCDFR w/ 1 - p b/c invGammaCDFR is more accurate,
1496  * but this function is necessary for consistency.
1497  */
1498 double invGammaCDF(double p, double rate, double shape) {
1499     return invGammaCDFR(1.0 - p, rate, shape);
1500 }
1501 
1502 ///
1503 double invGammaCDFR(double p, double rate, double shape) {
1504     dstatsEnforce(p >= 0 && p <= 1, "p must be between 0, 1 in gamma distribution.");
1505     dstatsEnforce(rate > 0, "rate must be >0 in gamma distribution.");
1506     dstatsEnforce(shape > 0, "shape must be >0 in gamma distribution.");
1507 
1508     double ratex = gammaIncompleteComplInverse(shape, p);
1509     return ratex / rate;
1510 }
1511 
1512 unittest {
1513     assert(approxEqual(gammaPDF(1, 2, 5), 0.1804470));
1514     assert(approxEqual(gammaPDF(0.5, 8, 4), 1.562935));
1515     assert(approxEqual(gammaPDF(3, 2, 7), 0.3212463));
1516     assert(approxEqual(gammaCDF(1, 2, 5), 0.05265302));
1517     assert(approxEqual(gammaCDFR(1, 2, 5), 0.947347));
1518 
1519     double inv = invGammaCDFR(0.78, 2, 1);
1520     assert(approxEqual(gammaCDFR(inv, 2, 1), 0.78));
1521 
1522     double inv2 = invGammaCDF(0.78, 2, 1);
1523     assert(approxEqual(gammaCDF(inv2, 2, 1), 0.78));
1524 }
1525 
1526 ///
1527 double betaPDF(double x, double alpha, double beta) {
1528     dstatsEnforce(alpha > 0, "Alpha must be >0 for beta distribution.");
1529     dstatsEnforce(beta > 0, "Beta must be >0 for beta distribution.");
1530     dstatsEnforce(x >= 0 && x <= 1, "x must be between 0, 1 for beta distribution.");
1531 
1532     return x ^^ (alpha - 1) * (1 - x) ^^ (beta - 1) /
1533         std.mathspecial.beta(alpha, beta);
1534 }
1535 
1536 ///
1537 double betaCDF(double x, double alpha, double beta) {
1538     dstatsEnforce(alpha > 0, "Alpha must be >0 for beta distribution.");
1539     dstatsEnforce(beta > 0, "Beta must be >0 for beta distribution.");
1540     dstatsEnforce(x >= 0 && x <= 1, "x must be between 0, 1 for beta distribution.");
1541 
1542     return std.mathspecial.betaIncomplete(alpha, beta, x);
1543 }
1544 
1545 ///
1546 double betaCDFR(double x, double alpha, double beta) {
1547     dstatsEnforce(alpha > 0, "Alpha must be >0 for beta distribution.");
1548     dstatsEnforce(beta > 0, "Beta must be >0 for beta distribution.");
1549     dstatsEnforce(x >= 0 && x <= 1, "x must be between 0, 1 for beta distribution.");
1550 
1551     return std.mathspecial.betaIncomplete(beta, alpha, 1 - x);
1552 }
1553 
1554 ///
1555 double invBetaCDF(double p, double alpha, double beta) {
1556     dstatsEnforce(alpha > 0, "Alpha must be >0 for beta distribution.");
1557     dstatsEnforce(beta > 0, "Beta must be >0 for beta distribution.");
1558     dstatsEnforce(p >= 0 && p <= 1, "p must be between 0, 1 for beta distribution.");
1559 
1560     return std.mathspecial.betaIncompleteInverse(alpha, beta, p);
1561 }
1562 
1563 unittest {
1564     // Values from R.
1565     assert(approxEqual(betaPDF(0.3, 2, 3), 1.764));
1566     assert(approxEqual(betaPDF(0.78, 0.9, 4), 0.03518569));
1567 
1568     assert(approxEqual(betaCDF(0.3, 2, 3), 0.3483));
1569     assert(approxEqual(betaCDF(0.78, 0.9, 4), 0.9980752));
1570 
1571     assert(approxEqual(betaCDFR(0.3, 2, 3), 0.6517));
1572     assert(approxEqual(betaCDFR(0.78, 0.9, 4), 0.001924818));
1573 
1574     assert(approxEqual(invBetaCDF(0.3483, 2, 3), 0.3));
1575     assert(approxEqual(invBetaCDF(0.9980752, 0.9, 4), 0.78));
1576 }
1577 
1578 /**
1579 The Dirichlet probability density.
1580 
1581 Params:
1582 
1583 x = An input range of observed values.  All must be between [0, 1].  They
1584 must also sum to 1, though this is not checked because small deviations from
1585 this may result due to numerical error.
1586 
1587 alpha = A forward range of parameters.  This must have the same length as
1588 x.
1589 */
1590 double dirichletPDF(X, A)(X x, A alpha)
1591 if(isInputRange!X && isForwardRange!A && is(ElementType!X : double) &&
1592 is(ElementType!A : double)) {
1593 
1594     // Evaluating the multinomial beta function = product(gamma(alpha_1)) over
1595     // gamma(sum(alpha)), in log space.
1596     double logNormalizer = 0;
1597     double sumAlpha = 0;
1598 
1599     foreach(a; alpha.save) {
1600         dstatsEnforce(a > 0, "All alpha values must be > 0 for Dirichlet distribution.");
1601         logNormalizer += logGamma(a);
1602         sumAlpha += a;
1603     }
1604 
1605     logNormalizer -= logGamma(sumAlpha);
1606     double sum = 0;
1607     foreach(xElem, a; lockstep(x, alpha)) {
1608         dstatsEnforce(xElem > 0, "All x values must be > 0 for Dirichlet distribution.");
1609         sum += log(xElem) * (a - 1);
1610     }
1611 
1612     sum -= logNormalizer;
1613     return exp(sum);
1614 }
1615 
1616 unittest {
1617     // Test against beta
1618     assert(approxEqual(dirichletPDF([0.1, 0.9], [2, 3]), betaPDF(0.1, 2, 3)));
1619 
1620     // A few values from R's gregmisc package
1621     assert(approxEqual(dirichletPDF([0.1, 0.2, 0.7], [4, 5, 6]), 1.356672));
1622     assert(approxEqual(dirichletPDF([0.8, 0.05, 0.15], [8, 5, 6]), 0.04390199));
1623 }
1624 
1625 ///
1626 double cauchyPDF(double X, double X0 = 0, double gamma = 1) {
1627     dstatsEnforce(gamma > 0, "gamma must be > 0 for Cauchy distribution.");
1628 
1629     double toSquare = (X - X0) / gamma;
1630     return 1.0L / (
1631         PI * gamma * (1 + toSquare * toSquare));
1632 }
1633 
1634 unittest {
1635     assert(approxEqual(cauchyPDF(5), 0.01224269));
1636     assert(approxEqual(cauchyPDF(2), 0.06366198));
1637 }
1638 
1639 
1640 ///
1641 double cauchyCDF(double X, double X0 = 0, double gamma = 1) {
1642     dstatsEnforce(gamma > 0, "gamma must be > 0 for Cauchy distribution.");
1643 
1644     return M_1_PI * atan((X - X0) / gamma) + 0.5L;
1645 }
1646 
1647 unittest {
1648     // Values from R
1649     assert(approxEqual(cauchyCDF(-10), 0.03172552));
1650     assert(approxEqual(cauchyCDF(1), 0.75));
1651 }
1652 
1653 ///
1654 double cauchyCDFR(double X, double X0 = 0, double gamma = 1) {
1655     dstatsEnforce(gamma > 0, "gamma must be > 0 for Cauchy distribution.");
1656 
1657     return M_1_PI * atan((X0 - X) / gamma) + 0.5L;
1658 }
1659 
1660 unittest {
1661     // Values from R
1662     assert(approxEqual(1 - cauchyCDFR(-10), 0.03172552));
1663     assert(approxEqual(1 - cauchyCDFR(1), 0.75));
1664 }
1665 
1666 ///
1667 double invCauchyCDF(double p, double X0 = 0, double gamma = 1) {
1668     dstatsEnforce(gamma > 0, "gamma must be > 0 for Cauchy distribution.");
1669     dstatsEnforce(p >= 0 && p <= 1, "P-values must be between 0, 1.");
1670 
1671     return X0 + gamma * tan(PI * (p - 0.5L));
1672 }
1673 
1674 unittest {
1675     // cauchyCDF already tested.  Just make sure this is the inverse.
1676     assert(approxEqual(invCauchyCDF(cauchyCDF(.5)), .5));
1677     assert(approxEqual(invCauchyCDF(cauchyCDF(.99)), .99));
1678     assert(approxEqual(invCauchyCDF(cauchyCDF(.03)), .03));
1679 }
1680 
1681 // For K-S tests in dstats.random.  To be fleshed out later.  Intentionally
1682 // lacking ddoc.
1683 double logisticCDF(double x, double loc, double shape) {
1684     return 1.0L / (1 + exp(-(x - loc) / shape));
1685 }
1686 
1687 ///
1688 double laplacePDF(double x, double mu = 0, double b = 1) {
1689     dstatsEnforce(b > 0, "b must be > 0 for laplace distribution.");
1690 
1691     return (exp(-abs(x - mu) / b)) / (2 * b);
1692 }
1693 
1694 unittest {
1695     // Values from Maxima.
1696     assert(approxEqual(laplacePDF(3, 2, 1), 0.18393972058572));
1697     assert(approxEqual(laplacePDF(-8, 6, 7), 0.0096668059454723));
1698 }
1699 
1700 ///
1701 double laplaceCDF(double X, double mu = 0, double b = 1) {
1702     dstatsEnforce(b > 0, "b must be > 0 for laplace distribution.");
1703 
1704     double diff = (X - mu);
1705     double sign = (diff > 0) ? 1 : -1;
1706     return 0.5L *(1 + sign * (1 - exp(-abs(diff) / b)));
1707 }
1708 
1709 unittest {
1710     // Values from Octave.
1711     assert(approxEqual(laplaceCDF(5), 0.9963));
1712     assert(approxEqual(laplaceCDF(-3.14), .021641));
1713     assert(approxEqual(laplaceCDF(0.012), 0.50596));
1714 }
1715 
1716 ///
1717 double laplaceCDFR(double X, double mu = 0, double b = 1) {
1718     dstatsEnforce(b > 0, "b must be > 0 for laplace distribution.");
1719 
1720     double diff = (mu - X);
1721     double sign = (diff > 0) ? 1 : -1;
1722     return 0.5L *(1 + sign * (1 - exp(-abs(diff) / b)));
1723 }
1724 
1725 unittest {
1726     // Values from Octave.
1727     assert(approxEqual(1 - laplaceCDFR(5), 0.9963));
1728     assert(approxEqual(1 - laplaceCDFR(-3.14), .021641));
1729     assert(approxEqual(1 - laplaceCDFR(0.012), 0.50596));
1730 }
1731 
1732 ///
1733 double invLaplaceCDF(double p, double mu = 0, double b = 1) {
1734     dstatsEnforce(p >= 0 && p <= 1, "P-values must be between 0, 1.");
1735     dstatsEnforce(b > 0, "b must be > 0 for laplace distribution.");
1736 
1737     double p05 = p - 0.5L;
1738     double sign = (p05 < 0) ? -1.0L : 1.0L;
1739     return mu - b * sign * log(1.0L - 2 * abs(p05));
1740 }
1741 
1742 unittest {
1743     assert(approxEqual(invLaplaceCDF(0.012), -3.7297));
1744     assert(approxEqual(invLaplaceCDF(0.82), 1.0217));
1745 }
1746 
1747 double kolmDist()(double x) {
1748     pragma(msg, "kolmDist is scheduled for deprecation.  Please use " ~
1749         "kolmogorovDistrib instead.");
1750 
1751     return kolmogorovDistrib(x);
1752 }
1753 
1754 /**Kolmogorov distribution.  Used in Kolmogorov-Smirnov testing.
1755  *
1756  * References: http://en.wikipedia.org/wiki/Kolmogorov-Smirnov
1757  */
1758 double kolmogorovDistrib(immutable double x) {
1759     dstatsEnforce(x >= 0, "x must be >= 0 for Kolmogorov distribution.");
1760 
1761     if(x == 0) {
1762         //Handle as a special case.  Otherwise, get NAN b/c of divide by zero.
1763         return 0;
1764     }
1765 
1766     double result = 0;
1767     double i = 1;
1768     immutable xSquared = x * x;
1769     while(true) {
1770         immutable delta = exp(-(2 * i - 1) * (2 * i - 1) * PI * PI  / (8 * xSquared));
1771         i++;
1772 
1773         immutable oldResult = result;
1774         result += delta;
1775         if(isNaN(result) || oldResult == result) {
1776             break;
1777         }
1778     }
1779     result *= (sqrt(2 * PI) / x);
1780     return result;
1781 }
1782 
1783 unittest {
1784     assert(approxEqual(1 - kolmogorovDistrib(.75), 0.627167));
1785     assert(approxEqual(1 - kolmogorovDistrib(.5), 0.9639452436));
1786     assert(approxEqual(1 - kolmogorovDistrib(.9), 0.39273070));
1787     assert(approxEqual(1 - kolmogorovDistrib(1.2), 0.112249666));
1788 }