phpcs fixes and classifier bug fixes/searching

This commit is contained in:
Dennis Eichhorn 2019-10-12 10:29:26 +02:00
parent 85abb6c520
commit 577adab857
5 changed files with 42 additions and 28 deletions

View File

@ -29,14 +29,13 @@ class CycleSort implements SortInterface
*/
public static function sort(array $list, int $order = SortOrder::ASC) : array
{
$writes = 0;
$n = \count($list);
$n = \count($list);
if ($n < 2) {
return $list;
}
for ($start = 0; $start < \count($list) - 1; ++$start) {
for ($start = 0; $start < $n - 1; ++$start) {
$item = $list[$start];
$pos = $start;
@ -58,7 +57,6 @@ class CycleSort implements SortInterface
$old = $list[$pos];
$list[$pos] = $item;
$item = $old;
++$writes;
while ($pos !== $start) {
$pos = $start;
@ -76,7 +74,6 @@ class CycleSort implements SortInterface
$old = $list[$pos];
$list[$pos] = $item;
$item = $old;
++$writes;
}
}

View File

@ -36,7 +36,7 @@ class StoogeSort implements SortInterface
}
$copy = $list;
self::stoogeSort($copy, 0, $n - 1, $order);
self::stooge($copy, 0, $n - 1, $order);
return $copy;
}
@ -53,7 +53,7 @@ class StoogeSort implements SortInterface
*
* @since 1.0.0
*/
private static function stoogeSort(array &$list, int $lo, int $hi, int $order) : void
private static function stooge(array &$list, int $lo, int $hi, int $order) : void
{
if ($lo >= $hi) {
return;
@ -68,9 +68,9 @@ class StoogeSort implements SortInterface
if ($hi - $lo + 1 > 2) {
$t = (int) (($hi - $lo + 1) / 3);
self::stoogeSort($list, $lo, $hi - $t, $order);
self::stoogeSort($list, $lo + $t, $hi, $order);
self::stoogeSort($list, $lo, $hi - $t, $order);
self::stooge($list, $lo, $hi - $t, $order);
self::stooge($list, $lo + $t, $hi, $order);
self::stooge($list, $lo, $hi - $t, $order);
}
}
}

View File

@ -50,8 +50,7 @@ class TimSort implements SortInterface
$temp = $list[$j];
$c = $j - 1;
while ($c >= $lo && $list[$c]->compare($temp, $order))
{
while ($c >= $lo && $list[$c]->compare($temp, $order)) {
$list[$c + 1] = $list[$c];
--$c;
}

View File

@ -24,7 +24,7 @@ use phpOMS\Math\Statistic\MeasureOfDispersion;
* @link https://orange-management.org
* @since 1.0.0
*/
class NaiveBayesFilter
class NaiveBayesClassifier
{
/**
* Dictionary of different criterias.
@ -85,7 +85,7 @@ class NaiveBayesFilter
if (!isset($this->probabilities['attr'][$attr])) {
$this->probabilities['attr'][$attr] = [
'count' => 0,
'evidence' => 0.0,
'data' => [],
];
}
@ -152,15 +152,19 @@ class NaiveBayesFilter
if (isset($this->dict[$criteria][$attr]['data'][$word])
&& $this->dict[$criteria][$attr]['data'][$word] >= $minimum
) {
$p = $this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data'])
/ $this->probabilities['attr'][$attr]['evidence'];
$p = ($this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data']))
* ($this->probabilities['criteria'][$criteria]['count'] / $this->probabilities['count'])
/ $this->probabilities['attr'][$attr]['data'][$word];
$n += \log(1 - $p) - \log($p);
}
}
} else {
// todo: add probability of criteria / total?
$p = 1 / \sqrt(2 * \M_PI * $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'])
* \exp(-($value - $this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']) ** 2 / (2 * $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'] ** 2));
* \exp(-($value - $this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']) / (2 * $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance']));
var_dump($p);
$n += \log(1 - $p) - \log($p);
}
@ -178,20 +182,34 @@ class NaiveBayesFilter
*/
private function cache() : void
{
$this->probabilities['attr'] = [];
foreach ($this->dict as $criteria => $subDict) {
foreach ($subDict as $attr => $valueArray) {
if ($valueArray['type'] === 2) {
$this->probabilities['criteria'][$criteria]['attr'][$attr]['mean'] = Average::arithmeticMean($this->dict[$criteria][$attr]['data']);
$this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'] = MeasureOfDispersion::empiricalVariance($this->dict[$criteria][$attr]['data']);
$this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'] = MeasureOfDispersion::empiricalVariance($this->dict[$criteria][$attr]['data'], [], $this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']);
// \var_dump($criteria);
// \var_dump($attr);
// \var_dump($this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']); // good
// \var_dump($this->probabilities['criteria'][$criteria]['attr'][$attr]['variance']); // bad
} else {
$this->probabilities['attr'][$attr]['evidence'] = 0.0;
if (!isset( $this->probabilities['attr'][$attr])) {
$this->probabilities['attr'] = [$attr => ['data' => []]];
}
foreach ($valueArray['data'] as $word => $count) {
if (!isset($this->dict[$criteria][$attr]['data'][$word])) {
continue;
}
$this->probabilities['attr'][$attr]['evidence'] += $this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data']);
if (!isset($this->probabilities['attr'][$attr]['data'][$word])) {
$this->probabilities['attr'][$attr]['data'][$word] = 0.0;
}
$this->probabilities['attr'][$attr]['data'][$word] += ($this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data']))
* ($this->probabilities['criteria'][$criteria]['count'] / $this->probabilities['count']);
}
}
}

View File

@ -14,12 +14,12 @@
namespace phpOMS\tests\Math\Stochastic;
use phpOMS\Math\Stochastic\NaiveBayesFilter;
use phpOMS\Math\Stochastic\NaiveBayesClassifier;
/**
* @internal
*/
class NaiveBayesFilterTest extends \PHPUnit\Framework\TestCase
class NaiveBayesClassifierTest extends \PHPUnit\Framework\TestCase
{
const PLAY = [
['weather' => ['Overcast']],
@ -55,28 +55,28 @@ class NaiveBayesFilterTest extends \PHPUnit\Framework\TestCase
['height' => 5.75, 'weight' => 150, 'foot' => 9],
];
public function testTextFilter() : void
public function testTextClassifier() : void
{
$filter = new NaiveBayesFilter();
$filter = new NaiveBayesClassifier();
$filter->train('play', self::PLAY);
$filter->train('noplay', self::NO_PLAY);
self::assertEqualsWithDelta(
0.64,
0.6,
$filter->match('play', ['weather' => ['Sunny']], 1),
0.01
);
}
public function testNumericFilter() : void
public function testNumericClassifier() : void
{
$filter = new NaiveBayesFilter();
$filter = new NaiveBayesClassifier();
$filter->train('male', self::MALE);
$filter->train('female', self::FEMALE);
self::assertEqualsWithDelta(
0.64,
$filter->match('play', ['height' => 6, 'weight' => 130, 'foot' => 8]),
$filter->match('female', ['height' => 6, 'weight' => 130, 'foot' => 8]),
0.01
);
}