phpcs fixes and classifier bug fixes/searching

This commit is contained in:
Dennis Eichhorn 2019-10-12 10:29:26 +02:00
parent 85abb6c520
commit 577adab857
5 changed files with 42 additions and 28 deletions

View File

@ -29,14 +29,13 @@ class CycleSort implements SortInterface
*/ */
public static function sort(array $list, int $order = SortOrder::ASC) : array public static function sort(array $list, int $order = SortOrder::ASC) : array
{ {
$writes = 0; $n = \count($list);
$n = \count($list);
if ($n < 2) { if ($n < 2) {
return $list; return $list;
} }
for ($start = 0; $start < \count($list) - 1; ++$start) { for ($start = 0; $start < $n - 1; ++$start) {
$item = $list[$start]; $item = $list[$start];
$pos = $start; $pos = $start;
@ -58,7 +57,6 @@ class CycleSort implements SortInterface
$old = $list[$pos]; $old = $list[$pos];
$list[$pos] = $item; $list[$pos] = $item;
$item = $old; $item = $old;
++$writes;
while ($pos !== $start) { while ($pos !== $start) {
$pos = $start; $pos = $start;
@ -76,7 +74,6 @@ class CycleSort implements SortInterface
$old = $list[$pos]; $old = $list[$pos];
$list[$pos] = $item; $list[$pos] = $item;
$item = $old; $item = $old;
++$writes;
} }
} }

View File

@ -36,7 +36,7 @@ class StoogeSort implements SortInterface
} }
$copy = $list; $copy = $list;
self::stoogeSort($copy, 0, $n - 1, $order); self::stooge($copy, 0, $n - 1, $order);
return $copy; return $copy;
} }
@ -53,7 +53,7 @@ class StoogeSort implements SortInterface
* *
* @since 1.0.0 * @since 1.0.0
*/ */
private static function stoogeSort(array &$list, int $lo, int $hi, int $order) : void private static function stooge(array &$list, int $lo, int $hi, int $order) : void
{ {
if ($lo >= $hi) { if ($lo >= $hi) {
return; return;
@ -68,9 +68,9 @@ class StoogeSort implements SortInterface
if ($hi - $lo + 1 > 2) { if ($hi - $lo + 1 > 2) {
$t = (int) (($hi - $lo + 1) / 3); $t = (int) (($hi - $lo + 1) / 3);
self::stoogeSort($list, $lo, $hi - $t, $order); self::stooge($list, $lo, $hi - $t, $order);
self::stoogeSort($list, $lo + $t, $hi, $order); self::stooge($list, $lo + $t, $hi, $order);
self::stoogeSort($list, $lo, $hi - $t, $order); self::stooge($list, $lo, $hi - $t, $order);
} }
} }
} }

View File

@ -50,8 +50,7 @@ class TimSort implements SortInterface
$temp = $list[$j]; $temp = $list[$j];
$c = $j - 1; $c = $j - 1;
while ($c >= $lo && $list[$c]->compare($temp, $order)) while ($c >= $lo && $list[$c]->compare($temp, $order)) {
{
$list[$c + 1] = $list[$c]; $list[$c + 1] = $list[$c];
--$c; --$c;
} }

View File

@ -24,7 +24,7 @@ use phpOMS\Math\Statistic\MeasureOfDispersion;
* @link https://orange-management.org * @link https://orange-management.org
* @since 1.0.0 * @since 1.0.0
*/ */
class NaiveBayesFilter class NaiveBayesClassifier
{ {
/** /**
* Dictionary of different criterias. * Dictionary of different criterias.
@ -85,7 +85,7 @@ class NaiveBayesFilter
if (!isset($this->probabilities['attr'][$attr])) { if (!isset($this->probabilities['attr'][$attr])) {
$this->probabilities['attr'][$attr] = [ $this->probabilities['attr'][$attr] = [
'count' => 0, 'count' => 0,
'evidence' => 0.0, 'data' => [],
]; ];
} }
@ -152,15 +152,19 @@ class NaiveBayesFilter
if (isset($this->dict[$criteria][$attr]['data'][$word]) if (isset($this->dict[$criteria][$attr]['data'][$word])
&& $this->dict[$criteria][$attr]['data'][$word] >= $minimum && $this->dict[$criteria][$attr]['data'][$word] >= $minimum
) { ) {
$p = $this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data']) $p = ($this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data']))
/ $this->probabilities['attr'][$attr]['evidence']; * ($this->probabilities['criteria'][$criteria]['count'] / $this->probabilities['count'])
/ $this->probabilities['attr'][$attr]['data'][$word];
$n += \log(1 - $p) - \log($p); $n += \log(1 - $p) - \log($p);
} }
} }
} else { } else {
// todo: add probability of criteria / total?
$p = 1 / \sqrt(2 * \M_PI * $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance']) $p = 1 / \sqrt(2 * \M_PI * $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'])
* \exp(-($value - $this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']) ** 2 / (2 * $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'] ** 2)); * \exp(-($value - $this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']) / (2 * $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance']));
var_dump($p);
$n += \log(1 - $p) - \log($p); $n += \log(1 - $p) - \log($p);
} }
@ -178,20 +182,34 @@ class NaiveBayesFilter
*/ */
private function cache() : void private function cache() : void
{ {
$this->probabilities['attr'] = [];
foreach ($this->dict as $criteria => $subDict) { foreach ($this->dict as $criteria => $subDict) {
foreach ($subDict as $attr => $valueArray) { foreach ($subDict as $attr => $valueArray) {
if ($valueArray['type'] === 2) { if ($valueArray['type'] === 2) {
$this->probabilities['criteria'][$criteria]['attr'][$attr]['mean'] = Average::arithmeticMean($this->dict[$criteria][$attr]['data']); $this->probabilities['criteria'][$criteria]['attr'][$attr]['mean'] = Average::arithmeticMean($this->dict[$criteria][$attr]['data']);
$this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'] = MeasureOfDispersion::empiricalVariance($this->dict[$criteria][$attr]['data']); $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'] = MeasureOfDispersion::empiricalVariance($this->dict[$criteria][$attr]['data'], [], $this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']);
// \var_dump($criteria);
// \var_dump($attr);
// \var_dump($this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']); // good
// \var_dump($this->probabilities['criteria'][$criteria]['attr'][$attr]['variance']); // bad
} else { } else {
$this->probabilities['attr'][$attr]['evidence'] = 0.0; if (!isset( $this->probabilities['attr'][$attr])) {
$this->probabilities['attr'] = [$attr => ['data' => []]];
}
foreach ($valueArray['data'] as $word => $count) { foreach ($valueArray['data'] as $word => $count) {
if (!isset($this->dict[$criteria][$attr]['data'][$word])) { if (!isset($this->dict[$criteria][$attr]['data'][$word])) {
continue; continue;
} }
$this->probabilities['attr'][$attr]['evidence'] += $this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data']); if (!isset($this->probabilities['attr'][$attr]['data'][$word])) {
$this->probabilities['attr'][$attr]['data'][$word] = 0.0;
}
$this->probabilities['attr'][$attr]['data'][$word] += ($this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data']))
* ($this->probabilities['criteria'][$criteria]['count'] / $this->probabilities['count']);
} }
} }
} }

View File

@ -14,12 +14,12 @@
namespace phpOMS\tests\Math\Stochastic; namespace phpOMS\tests\Math\Stochastic;
use phpOMS\Math\Stochastic\NaiveBayesFilter; use phpOMS\Math\Stochastic\NaiveBayesClassifier;
/** /**
* @internal * @internal
*/ */
class NaiveBayesFilterTest extends \PHPUnit\Framework\TestCase class NaiveBayesClassifierTest extends \PHPUnit\Framework\TestCase
{ {
const PLAY = [ const PLAY = [
['weather' => ['Overcast']], ['weather' => ['Overcast']],
@ -55,28 +55,28 @@ class NaiveBayesFilterTest extends \PHPUnit\Framework\TestCase
['height' => 5.75, 'weight' => 150, 'foot' => 9], ['height' => 5.75, 'weight' => 150, 'foot' => 9],
]; ];
public function testTextFilter() : void public function testTextClassifier() : void
{ {
$filter = new NaiveBayesFilter(); $filter = new NaiveBayesClassifier();
$filter->train('play', self::PLAY); $filter->train('play', self::PLAY);
$filter->train('noplay', self::NO_PLAY); $filter->train('noplay', self::NO_PLAY);
self::assertEqualsWithDelta( self::assertEqualsWithDelta(
0.64, 0.6,
$filter->match('play', ['weather' => ['Sunny']], 1), $filter->match('play', ['weather' => ['Sunny']], 1),
0.01 0.01
); );
} }
public function testNumericFilter() : void public function testNumericClassifier() : void
{ {
$filter = new NaiveBayesFilter(); $filter = new NaiveBayesClassifier();
$filter->train('male', self::MALE); $filter->train('male', self::MALE);
$filter->train('female', self::FEMALE); $filter->train('female', self::FEMALE);
self::assertEqualsWithDelta( self::assertEqualsWithDelta(
0.64, 0.64,
$filter->match('play', ['height' => 6, 'weight' => 130, 'foot' => 8]), $filter->match('female', ['height' => 6, 'weight' => 130, 'foot' => 8]),
0.01 0.01
); );
} }