From 577adab8574098775b74f39f863889ce6f62b4d1 Mon Sep 17 00:00:00 2001 From: Dennis Eichhorn Date: Sat, 12 Oct 2019 10:29:26 +0200 Subject: [PATCH] phpcs fixes and classifier bug fixes/searching --- Algorithm/Sort/CycleSort.php | 7 ++-- Algorithm/Sort/StoogeSort.php | 10 +++--- Algorithm/Sort/TimSort.php | 3 +- ...yesFilter.php => NaiveBayesClassifier.php} | 34 ++++++++++++++----- ...rTest.php => NaiveBayesClassifierTest.php} | 16 ++++----- 5 files changed, 42 insertions(+), 28 deletions(-) rename Math/Stochastic/{NaiveBayesFilter.php => NaiveBayesClassifier.php} (76%) rename tests/Math/Stochastic/{NaiveBayesFilterTest.php => NaiveBayesClassifierTest.php} (81%) diff --git a/Algorithm/Sort/CycleSort.php b/Algorithm/Sort/CycleSort.php index 745f3068d..72cbbb32e 100644 --- a/Algorithm/Sort/CycleSort.php +++ b/Algorithm/Sort/CycleSort.php @@ -29,14 +29,13 @@ class CycleSort implements SortInterface */ public static function sort(array $list, int $order = SortOrder::ASC) : array { - $writes = 0; - $n = \count($list); + $n = \count($list); if ($n < 2) { return $list; } - for ($start = 0; $start < \count($list) - 1; ++$start) { + for ($start = 0; $start < $n - 1; ++$start) { $item = $list[$start]; $pos = $start; @@ -58,7 +57,6 @@ class CycleSort implements SortInterface $old = $list[$pos]; $list[$pos] = $item; $item = $old; - ++$writes; while ($pos !== $start) { $pos = $start; @@ -76,7 +74,6 @@ class CycleSort implements SortInterface $old = $list[$pos]; $list[$pos] = $item; $item = $old; - ++$writes; } } diff --git a/Algorithm/Sort/StoogeSort.php b/Algorithm/Sort/StoogeSort.php index a5e88216b..988174311 100644 --- a/Algorithm/Sort/StoogeSort.php +++ b/Algorithm/Sort/StoogeSort.php @@ -36,7 +36,7 @@ class StoogeSort implements SortInterface } $copy = $list; - self::stoogeSort($copy, 0, $n - 1, $order); + self::stooge($copy, 0, $n - 1, $order); return $copy; } @@ -53,7 +53,7 @@ class StoogeSort implements SortInterface * * @since 1.0.0 */ - private static function stoogeSort(array &$list, int $lo, int $hi, int $order) : void + private static function stooge(array &$list, int $lo, int $hi, int $order) : void { if ($lo >= $hi) { return; @@ -68,9 +68,9 @@ class StoogeSort implements SortInterface if ($hi - $lo + 1 > 2) { $t = (int) (($hi - $lo + 1) / 3); - self::stoogeSort($list, $lo, $hi - $t, $order); - self::stoogeSort($list, $lo + $t, $hi, $order); - self::stoogeSort($list, $lo, $hi - $t, $order); + self::stooge($list, $lo, $hi - $t, $order); + self::stooge($list, $lo + $t, $hi, $order); + self::stooge($list, $lo, $hi - $t, $order); } } } diff --git a/Algorithm/Sort/TimSort.php b/Algorithm/Sort/TimSort.php index cf735a55c..b68c27a70 100644 --- a/Algorithm/Sort/TimSort.php +++ b/Algorithm/Sort/TimSort.php @@ -50,8 +50,7 @@ class TimSort implements SortInterface $temp = $list[$j]; $c = $j - 1; - while ($c >= $lo && $list[$c]->compare($temp, $order)) - { + while ($c >= $lo && $list[$c]->compare($temp, $order)) { $list[$c + 1] = $list[$c]; --$c; } diff --git a/Math/Stochastic/NaiveBayesFilter.php b/Math/Stochastic/NaiveBayesClassifier.php similarity index 76% rename from Math/Stochastic/NaiveBayesFilter.php rename to Math/Stochastic/NaiveBayesClassifier.php index e7c80df1d..212ed849c 100644 --- a/Math/Stochastic/NaiveBayesFilter.php +++ b/Math/Stochastic/NaiveBayesClassifier.php @@ -24,7 +24,7 @@ use phpOMS\Math\Statistic\MeasureOfDispersion; * @link https://orange-management.org * @since 1.0.0 */ -class NaiveBayesFilter +class NaiveBayesClassifier { /** * Dictionary of different criterias. @@ -85,7 +85,7 @@ class NaiveBayesFilter if (!isset($this->probabilities['attr'][$attr])) { $this->probabilities['attr'][$attr] = [ 'count' => 0, - 'evidence' => 0.0, + 'data' => [], ]; } @@ -152,15 +152,19 @@ class NaiveBayesFilter if (isset($this->dict[$criteria][$attr]['data'][$word]) && $this->dict[$criteria][$attr]['data'][$word] >= $minimum ) { - $p = $this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data']) - / $this->probabilities['attr'][$attr]['evidence']; + $p = ($this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data'])) + * ($this->probabilities['criteria'][$criteria]['count'] / $this->probabilities['count']) + / $this->probabilities['attr'][$attr]['data'][$word]; $n += \log(1 - $p) - \log($p); } } } else { + // todo: add probability of criteria / total? $p = 1 / \sqrt(2 * \M_PI * $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance']) - * \exp(-($value - $this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']) ** 2 / (2 * $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'] ** 2)); + * \exp(-($value - $this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']) / (2 * $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'])); + + var_dump($p); $n += \log(1 - $p) - \log($p); } @@ -178,20 +182,34 @@ class NaiveBayesFilter */ private function cache() : void { + $this->probabilities['attr'] = []; + foreach ($this->dict as $criteria => $subDict) { foreach ($subDict as $attr => $valueArray) { if ($valueArray['type'] === 2) { $this->probabilities['criteria'][$criteria]['attr'][$attr]['mean'] = Average::arithmeticMean($this->dict[$criteria][$attr]['data']); - $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'] = MeasureOfDispersion::empiricalVariance($this->dict[$criteria][$attr]['data']); + $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'] = MeasureOfDispersion::empiricalVariance($this->dict[$criteria][$attr]['data'], [], $this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']); + + // \var_dump($criteria); + // \var_dump($attr); + // \var_dump($this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']); // good + // \var_dump($this->probabilities['criteria'][$criteria]['attr'][$attr]['variance']); // bad } else { - $this->probabilities['attr'][$attr]['evidence'] = 0.0; + if (!isset( $this->probabilities['attr'][$attr])) { + $this->probabilities['attr'] = [$attr => ['data' => []]]; + } foreach ($valueArray['data'] as $word => $count) { if (!isset($this->dict[$criteria][$attr]['data'][$word])) { continue; } - $this->probabilities['attr'][$attr]['evidence'] += $this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data']); + if (!isset($this->probabilities['attr'][$attr]['data'][$word])) { + $this->probabilities['attr'][$attr]['data'][$word] = 0.0; + } + + $this->probabilities['attr'][$attr]['data'][$word] += ($this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data'])) + * ($this->probabilities['criteria'][$criteria]['count'] / $this->probabilities['count']); } } } diff --git a/tests/Math/Stochastic/NaiveBayesFilterTest.php b/tests/Math/Stochastic/NaiveBayesClassifierTest.php similarity index 81% rename from tests/Math/Stochastic/NaiveBayesFilterTest.php rename to tests/Math/Stochastic/NaiveBayesClassifierTest.php index 04d3c0ff3..0fcaff098 100644 --- a/tests/Math/Stochastic/NaiveBayesFilterTest.php +++ b/tests/Math/Stochastic/NaiveBayesClassifierTest.php @@ -14,12 +14,12 @@ namespace phpOMS\tests\Math\Stochastic; -use phpOMS\Math\Stochastic\NaiveBayesFilter; +use phpOMS\Math\Stochastic\NaiveBayesClassifier; /** * @internal */ -class NaiveBayesFilterTest extends \PHPUnit\Framework\TestCase +class NaiveBayesClassifierTest extends \PHPUnit\Framework\TestCase { const PLAY = [ ['weather' => ['Overcast']], @@ -55,28 +55,28 @@ class NaiveBayesFilterTest extends \PHPUnit\Framework\TestCase ['height' => 5.75, 'weight' => 150, 'foot' => 9], ]; - public function testTextFilter() : void + public function testTextClassifier() : void { - $filter = new NaiveBayesFilter(); + $filter = new NaiveBayesClassifier(); $filter->train('play', self::PLAY); $filter->train('noplay', self::NO_PLAY); self::assertEqualsWithDelta( - 0.64, + 0.6, $filter->match('play', ['weather' => ['Sunny']], 1), 0.01 ); } - public function testNumericFilter() : void + public function testNumericClassifier() : void { - $filter = new NaiveBayesFilter(); + $filter = new NaiveBayesClassifier(); $filter->train('male', self::MALE); $filter->train('female', self::FEMALE); self::assertEqualsWithDelta( 0.64, - $filter->match('play', ['height' => 6, 'weight' => 130, 'foot' => 8]), + $filter->match('female', ['height' => 6, 'weight' => 130, 'foot' => 8]), 0.01 ); }