From 4ec4e95819b06931fc8ae5df9a8020b32ce40762 Mon Sep 17 00:00:00 2001 From: Dennis Eichhorn Date: Mon, 4 May 2020 22:39:21 +0200 Subject: [PATCH] continue impl. of distributions/stochastics/statistics --- Math/Statistic/Correlation.php | 2 +- Math/Statistic/MeasureOfDispersion.php | 32 +++++++++-- .../HypergeometricDistribution.php | 20 +++++++ .../Distribution/LogNormalDistribution.php | 16 ++++++ .../Distribution/NormalDistribution.php | 23 ++++++++ .../Distribution/PoissonDistribution.php | 27 ++++++++++ .../Stochastic/Distribution/TDistribution.php | 53 +++++++++++++++++++ .../Distribution/WeibullDistribution.php | 19 +++++++ Math/Stochastic/Distribution/ZTest.php | 25 ++++----- .../Statistic/MeasureOfDispersionTest.php | 4 +- 10 files changed, 201 insertions(+), 20 deletions(-) diff --git a/Math/Statistic/Correlation.php b/Math/Statistic/Correlation.php index 470e0dab8..abbed73e8 100644 --- a/Math/Statistic/Correlation.php +++ b/Math/Statistic/Correlation.php @@ -48,7 +48,7 @@ final class Correlation */ public static function bravaisPersonCorrelationCoefficient(array $x, array $y) : float { - return MeasureOfDispersion::empiricalCovariance($x, $y) / (MeasureOfDispersion::standardDeviation($x) * MeasureOfDispersion::standardDeviation($y)); + return MeasureOfDispersion::empiricalCovariance($x, $y) / (MeasureOfDispersion::standardDeviationSample($x) * MeasureOfDispersion::standardDeviationSample($y)); } /** diff --git a/Math/Statistic/MeasureOfDispersion.php b/Math/Statistic/MeasureOfDispersion.php index 67af1f184..65b106478 100644 --- a/Math/Statistic/MeasureOfDispersion.php +++ b/Math/Statistic/MeasureOfDispersion.php @@ -79,11 +79,11 @@ final class MeasureOfDispersion throw new ZeroDivisionException(); } - return self::standardDeviation($values) / $mean; + return self::standardDeviationSample($values) / $mean; } /** - * Calculage standard deviation. + * Calculate standard deviation of sample. * * Example: ([4, 5, 9, 1, 3]) * @@ -96,7 +96,7 @@ final class MeasureOfDispersion * * @since 1.0.0 */ - public static function standardDeviation(array $values, float $mean = null) : float + public static function standardDeviationSample(array $values, float $mean = null) : float { $mean = $mean !== null ? $mean : Average::arithmeticMean($values); $sum = 0.0; @@ -108,6 +108,32 @@ final class MeasureOfDispersion return \sqrt($sum / (\count($values) - 1)); } + /** + * Calculate standard deviation of entire population + * + * Example: ([4, 5, 9, 1, 3]) + * + * @latex \sigma = \sqrt{\sigma^{2}} = \sqrt{Var(X)} + * + * @param array $values Values + * @param float $mean Mean + * + * @return float + * + * @since 1.0.0 + */ + public static function standardDeviationPopulation(array $values, float $mean = null) : float + { + $mean = $mean !== null ? $mean : Average::arithmeticMean($values); + $sum = 0.0; + + foreach ($values as $value) { + $sum += ($value - $mean) ** 2; + } + + return \sqrt($sum / \count($values)); + } + /** * Calculage sample variance. * diff --git a/Math/Stochastic/Distribution/HypergeometricDistribution.php b/Math/Stochastic/Distribution/HypergeometricDistribution.php index 5442cbefd..c680661fd 100644 --- a/Math/Stochastic/Distribution/HypergeometricDistribution.php +++ b/Math/Stochastic/Distribution/HypergeometricDistribution.php @@ -139,4 +139,24 @@ final class HypergeometricDistribution return (($N - 1) * $N ** 2 * ($N * ($N + 1) - 6 * $K * ($N - $K) - 6 * $n * ($N - $n)) + 6 * $n * $K * ($N - $K) * ($N - $n) * (5 * $N - 6)) / ($n * $K * ($N - $K) * ($N - $n) * ($N - 2) * ($N - 3)); } + + /** + * Hypergeometric-Distribution + * + * @param int $sampleSuccesses Amount of sample successes + * @param int $samples Sample size + * @param int $populationSuccesses Amount of population successes + * @param int $population Population size + * + * @return float + * + * @since 1.0.0 + */ + public static function dist(int $sampleSuccesses, int $samples, int $populationSuccesses, int $population) : float + { + // Each multiplication calculates the total amount of possible group combinations based on a total amount of items. + return (int) (\round(Functions::fact($populationSuccesses) / Functions::fact($populationSuccesses - $sampleSuccesses)) / Functions::fact($sampleSuccesses) + * \round(Functions::fact($population - $populationSuccesses) / Functions::fact($population - $populationSuccesses - ($samples - $sampleSuccesses))) / Functions::fact($samples - $sampleSuccesses) + * \round(Functions::fact($population) / Functions::fact($population - $samples)) / Functions::fact($samples)); + } } diff --git a/Math/Stochastic/Distribution/LogNormalDistribution.php b/Math/Stochastic/Distribution/LogNormalDistribution.php index 6267685ec..ead320dff 100644 --- a/Math/Stochastic/Distribution/LogNormalDistribution.php +++ b/Math/Stochastic/Distribution/LogNormalDistribution.php @@ -173,4 +173,20 @@ final class LogNormalDistribution [0, 1 / (2 * $sigma ** 2)], ]; } + + /** + * Log-Normal-Distribution + * + * @param float $value Value + * @param float $mean Mean + * @param float $standardDeviation Standard deviation + * + * @return float + * + * @since 1.0.0 + */ + public static function dist(float $value, float $mean, float $standardDeviation) : float + { + return NormalDistribution::dist((\log($value) - $mean) / $standardDeviation, 0.0, 1.0, true); + } } diff --git a/Math/Stochastic/Distribution/NormalDistribution.php b/Math/Stochastic/Distribution/NormalDistribution.php index b1c2f4f2c..f7d0d6300 100644 --- a/Math/Stochastic/Distribution/NormalDistribution.php +++ b/Math/Stochastic/Distribution/NormalDistribution.php @@ -14,6 +14,8 @@ declare(strict_types=1); namespace phpOMS\Math\Stochastic\Distribution; +use phpOMS\Math\Functions\Functions; + /** * Normal distribution. * @@ -109,6 +111,8 @@ final class NormalDistribution * * @return float * + * @todo: compare with Functions::getErf($x); + * * @since 1.0.0 */ private static function erf(float $x) : float @@ -245,4 +249,23 @@ final class NormalDistribution { return 0; } + + /** + * Normal-Distribution + * + * @param float $value Value + * @param float $mean Mean + * @param float $standardDeviation Standard deviation + * @param bool $isCumulative Cumulative + * + * @return float + * + * @since 1.0.0 + */ + public static function dist(float $value, float $mean, float $standardDeviation, bool $isCumulative = true) : float + { + return $isCumulative + ? 0.5 * (1 + Functions::getErf(($value - $mean) / $standardDeviation * \sqrt(2))) + : 1 / (\sqrt(2 * \M_PI) * $standardDeviation) * \exp (-\pow($value - $mean, 2) / (2 * $standardDeviation * $standardDeviation)); + } } diff --git a/Math/Stochastic/Distribution/PoissonDistribution.php b/Math/Stochastic/Distribution/PoissonDistribution.php index 4a06bc75d..e8f57a32b 100644 --- a/Math/Stochastic/Distribution/PoissonDistribution.php +++ b/Math/Stochastic/Distribution/PoissonDistribution.php @@ -191,4 +191,31 @@ final class PoissonDistribution { return \pow($lambda, -1); } + + /** + * Poisson-Distribution + * + * @param float $value Value + * @param float $mean Mean + * @param bool $isCumulative Cumulative + * + * @return float + * + * @since 1.0.0 + */ + public static function dist(float $value, float $mean, bool $isCumulative = true) : float + { + if (!$isCumulative) { + return \exp(-$mean) * \pow($mean, $value) / Functions::fact((int) \floor($value)); + } + + $sum = 0.0; + $limit = \floor($value); + + for ($i = 0; $i <= $limit; ++$i) { + $sum += \pow($mean, $i) / Functions::fact($i); + } + + return \exp(-$mean) * $sum; + } } diff --git a/Math/Stochastic/Distribution/TDistribution.php b/Math/Stochastic/Distribution/TDistribution.php index e848b1a3f..e4ef90fde 100644 --- a/Math/Stochastic/Distribution/TDistribution.php +++ b/Math/Stochastic/Distribution/TDistribution.php @@ -159,4 +159,57 @@ final class TDistribution { return $nu < 5 && $nu > 2 ? \PHP_FLOAT_MAX : 6 / ($nu - 4); } + + /** + * T-Distribution + * + * @param float $value Value + * @param int $degrees Degrees of freedom + * @param int $tails Tails (1 or 2) + * + * @return float + * + * @since 1.0.0 + */ + public static function dist(float $value, int $degrees, int $tails = 2) : float + { + if ($value < 0.0 || $degrees < 1 || $tails < 1 || $tails > 2) { + return 0.0; + } + + /** + * "AS 3" by B E Cooper of the Atlas Computer Laboratory + * Ellis Horwood Ltd.; W. Sussex, England + */ + $term = $degrees; + $theta = \atan2($value, \sqrt($term)); + $cos = \cos($theta); + $sin = \sin($theta); + $sum = 0.0; + + if ($degrees % 2 === 1) { + $i = 3; + $term = $cos; + } else { + $i = 2; + $term = 1; + } + + $sum = $term; + while ($i < $degrees) { + $term *= $cos ** 2 * ($i - 1) / $i; + $sum += $term; + $i += 2; + } + + $sum *= $sin; + + if ($degrees % 2 === 1) { + $sum = 2 / M_PI * ($sum + $theta); + } + + $t = 0.5 * (1 + $sum); + + return $tails === 1 ? 1 - \abs($t) : 1 - \abs(1 - $t - $t); + } } diff --git a/Math/Stochastic/Distribution/WeibullDistribution.php b/Math/Stochastic/Distribution/WeibullDistribution.php index ce7d8af8e..dcc49433c 100644 --- a/Math/Stochastic/Distribution/WeibullDistribution.php +++ b/Math/Stochastic/Distribution/WeibullDistribution.php @@ -101,4 +101,23 @@ final class WeibullDistribution return $gamma * (1 - 1 / $k) + \log($lambda / $k) + 1; } + + /** + * Weibull-Distribution + * + * @param float $value Value + * @param float $alpha Alpha + * @param float $beta Beta + * @param bool $isCumulative Cumulative + * + * @return float + * + * @since 1.0.0 + */ + public static function dist(float $value, float $alpha, float $beta, bool $isCumulative = true) : float + { + return $isCumulative + ? 1 - \exp(-\pow($value / $beta, $alpha)) + : $alpha / \pow($beta, $alpha) * \pow($value, $alpha - 1) * \exp(-\pow($value / $beta, $alpha)); + } } diff --git a/Math/Stochastic/Distribution/ZTest.php b/Math/Stochastic/Distribution/ZTest.php index 384a02a8d..288b043e8 100644 --- a/Math/Stochastic/Distribution/ZTest.php +++ b/Math/Stochastic/Distribution/ZTest.php @@ -14,6 +14,9 @@ declare(strict_types=1); namespace phpOMS\Math\Stochastic\Distribution; +use phpOMS\Math\Statistic\Average; +use phpOMS\Math\Statistic\MeasureOfDispersion; + /** * ZTest * @@ -38,26 +41,20 @@ final class ZTest /** * Test hypthesis. * - * @param float $dataset Value observed - * @param float $expected Expected value - * @param float $total Observed dataset size - * @param float $significance Significance + * @param array $data Data + * @param float $alpha Alpha / Observed dataset size + * @param null|float $sigma Sigma / Significance * - * @return bool + * @return float * * @since 1.0.0 */ - public static function testHypothesis(float $dataset, float $expected, float $total, float $significance = 0.95) : bool + public static function testHypothesis(array $data, float $alpha, float $sigma = null) : float { - $z = ($dataset - $expected) / \sqrt($expected * (1 - $expected) / $total); - - $zSignificance = 0.0; - foreach (self::TABLE as $key => $value) { - if ($significance === $value) { - $zSignificance = (float) $key; - } + if ($sigma === null) { + return MeasureOfDispersion::standardDeviationSample($data); } - return $z > -$key && $z < $key; + return 1 - NormalDistribution::dist((Average::arithmeticMean($data) - $alpha) / ($sigma / \sqrt(\count($data))), 0.0, 1.0, true); } } diff --git a/tests/Math/Statistic/MeasureOfDispersionTest.php b/tests/Math/Statistic/MeasureOfDispersionTest.php index a3946d24e..79f7c2ff7 100644 --- a/tests/Math/Statistic/MeasureOfDispersionTest.php +++ b/tests/Math/Statistic/MeasureOfDispersionTest.php @@ -36,9 +36,9 @@ class MeasureOfDispersionTest extends \PHPUnit\Framework\TestCase * @testdox The standard deviation is correctly calculated * @group framework */ - public function testStandardDeviation() : void + public function testStandardDeviationSample() : void { - self::assertEqualsWithDelta(2.160246, MeasureOfDispersion::standardDeviation([1, 2, 3, 4, 5, 6, 7]), 0.01); + self::assertEqualsWithDelta(2.160246, MeasureOfDispersion::standardDeviationSample([1, 2, 3, 4, 5, 6, 7]), 0.01); } /**