mirror of
https://github.com/Karaka-Management/phpOMS.git
synced 2026-01-22 06:18:41 +00:00
phpcs fixes and classifier bug fixes/searching
This commit is contained in:
parent
85abb6c520
commit
577adab857
|
|
@ -29,14 +29,13 @@ class CycleSort implements SortInterface
|
|||
*/
|
||||
public static function sort(array $list, int $order = SortOrder::ASC) : array
|
||||
{
|
||||
$writes = 0;
|
||||
$n = \count($list);
|
||||
$n = \count($list);
|
||||
|
||||
if ($n < 2) {
|
||||
return $list;
|
||||
}
|
||||
|
||||
for ($start = 0; $start < \count($list) - 1; ++$start) {
|
||||
for ($start = 0; $start < $n - 1; ++$start) {
|
||||
$item = $list[$start];
|
||||
|
||||
$pos = $start;
|
||||
|
|
@ -58,7 +57,6 @@ class CycleSort implements SortInterface
|
|||
$old = $list[$pos];
|
||||
$list[$pos] = $item;
|
||||
$item = $old;
|
||||
++$writes;
|
||||
|
||||
while ($pos !== $start) {
|
||||
$pos = $start;
|
||||
|
|
@ -76,7 +74,6 @@ class CycleSort implements SortInterface
|
|||
$old = $list[$pos];
|
||||
$list[$pos] = $item;
|
||||
$item = $old;
|
||||
++$writes;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ class StoogeSort implements SortInterface
|
|||
}
|
||||
|
||||
$copy = $list;
|
||||
self::stoogeSort($copy, 0, $n - 1, $order);
|
||||
self::stooge($copy, 0, $n - 1, $order);
|
||||
|
||||
return $copy;
|
||||
}
|
||||
|
|
@ -53,7 +53,7 @@ class StoogeSort implements SortInterface
|
|||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
private static function stoogeSort(array &$list, int $lo, int $hi, int $order) : void
|
||||
private static function stooge(array &$list, int $lo, int $hi, int $order) : void
|
||||
{
|
||||
if ($lo >= $hi) {
|
||||
return;
|
||||
|
|
@ -68,9 +68,9 @@ class StoogeSort implements SortInterface
|
|||
if ($hi - $lo + 1 > 2) {
|
||||
$t = (int) (($hi - $lo + 1) / 3);
|
||||
|
||||
self::stoogeSort($list, $lo, $hi - $t, $order);
|
||||
self::stoogeSort($list, $lo + $t, $hi, $order);
|
||||
self::stoogeSort($list, $lo, $hi - $t, $order);
|
||||
self::stooge($list, $lo, $hi - $t, $order);
|
||||
self::stooge($list, $lo + $t, $hi, $order);
|
||||
self::stooge($list, $lo, $hi - $t, $order);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,8 +50,7 @@ class TimSort implements SortInterface
|
|||
$temp = $list[$j];
|
||||
$c = $j - 1;
|
||||
|
||||
while ($c >= $lo && $list[$c]->compare($temp, $order))
|
||||
{
|
||||
while ($c >= $lo && $list[$c]->compare($temp, $order)) {
|
||||
$list[$c + 1] = $list[$c];
|
||||
--$c;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ use phpOMS\Math\Statistic\MeasureOfDispersion;
|
|||
* @link https://orange-management.org
|
||||
* @since 1.0.0
|
||||
*/
|
||||
class NaiveBayesFilter
|
||||
class NaiveBayesClassifier
|
||||
{
|
||||
/**
|
||||
* Dictionary of different criterias.
|
||||
|
|
@ -85,7 +85,7 @@ class NaiveBayesFilter
|
|||
if (!isset($this->probabilities['attr'][$attr])) {
|
||||
$this->probabilities['attr'][$attr] = [
|
||||
'count' => 0,
|
||||
'evidence' => 0.0,
|
||||
'data' => [],
|
||||
];
|
||||
}
|
||||
|
||||
|
|
@ -152,15 +152,19 @@ class NaiveBayesFilter
|
|||
if (isset($this->dict[$criteria][$attr]['data'][$word])
|
||||
&& $this->dict[$criteria][$attr]['data'][$word] >= $minimum
|
||||
) {
|
||||
$p = $this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data'])
|
||||
/ $this->probabilities['attr'][$attr]['evidence'];
|
||||
$p = ($this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data']))
|
||||
* ($this->probabilities['criteria'][$criteria]['count'] / $this->probabilities['count'])
|
||||
/ $this->probabilities['attr'][$attr]['data'][$word];
|
||||
|
||||
$n += \log(1 - $p) - \log($p);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// todo: add probability of criteria / total?
|
||||
$p = 1 / \sqrt(2 * \M_PI * $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'])
|
||||
* \exp(-($value - $this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']) ** 2 / (2 * $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'] ** 2));
|
||||
* \exp(-($value - $this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']) / (2 * $this->probabilities['criteria'][$criteria]['attr'][$attr]['variance']));
|
||||
|
||||
var_dump($p);
|
||||
|
||||
$n += \log(1 - $p) - \log($p);
|
||||
}
|
||||
|
|
@ -178,20 +182,34 @@ class NaiveBayesFilter
|
|||
*/
|
||||
private function cache() : void
|
||||
{
|
||||
$this->probabilities['attr'] = [];
|
||||
|
||||
foreach ($this->dict as $criteria => $subDict) {
|
||||
foreach ($subDict as $attr => $valueArray) {
|
||||
if ($valueArray['type'] === 2) {
|
||||
$this->probabilities['criteria'][$criteria]['attr'][$attr]['mean'] = Average::arithmeticMean($this->dict[$criteria][$attr]['data']);
|
||||
$this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'] = MeasureOfDispersion::empiricalVariance($this->dict[$criteria][$attr]['data']);
|
||||
$this->probabilities['criteria'][$criteria]['attr'][$attr]['variance'] = MeasureOfDispersion::empiricalVariance($this->dict[$criteria][$attr]['data'], [], $this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']);
|
||||
|
||||
// \var_dump($criteria);
|
||||
// \var_dump($attr);
|
||||
// \var_dump($this->probabilities['criteria'][$criteria]['attr'][$attr]['mean']); // good
|
||||
// \var_dump($this->probabilities['criteria'][$criteria]['attr'][$attr]['variance']); // bad
|
||||
} else {
|
||||
$this->probabilities['attr'][$attr]['evidence'] = 0.0;
|
||||
if (!isset( $this->probabilities['attr'][$attr])) {
|
||||
$this->probabilities['attr'] = [$attr => ['data' => []]];
|
||||
}
|
||||
|
||||
foreach ($valueArray['data'] as $word => $count) {
|
||||
if (!isset($this->dict[$criteria][$attr]['data'][$word])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$this->probabilities['attr'][$attr]['evidence'] += $this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data']);
|
||||
if (!isset($this->probabilities['attr'][$attr]['data'][$word])) {
|
||||
$this->probabilities['attr'][$attr]['data'][$word] = 0.0;
|
||||
}
|
||||
|
||||
$this->probabilities['attr'][$attr]['data'][$word] += ($this->dict[$criteria][$attr]['data'][$word] / \array_sum($this->dict[$criteria][$attr]['data']))
|
||||
* ($this->probabilities['criteria'][$criteria]['count'] / $this->probabilities['count']);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -14,12 +14,12 @@
|
|||
|
||||
namespace phpOMS\tests\Math\Stochastic;
|
||||
|
||||
use phpOMS\Math\Stochastic\NaiveBayesFilter;
|
||||
use phpOMS\Math\Stochastic\NaiveBayesClassifier;
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
class NaiveBayesFilterTest extends \PHPUnit\Framework\TestCase
|
||||
class NaiveBayesClassifierTest extends \PHPUnit\Framework\TestCase
|
||||
{
|
||||
const PLAY = [
|
||||
['weather' => ['Overcast']],
|
||||
|
|
@ -55,28 +55,28 @@ class NaiveBayesFilterTest extends \PHPUnit\Framework\TestCase
|
|||
['height' => 5.75, 'weight' => 150, 'foot' => 9],
|
||||
];
|
||||
|
||||
public function testTextFilter() : void
|
||||
public function testTextClassifier() : void
|
||||
{
|
||||
$filter = new NaiveBayesFilter();
|
||||
$filter = new NaiveBayesClassifier();
|
||||
$filter->train('play', self::PLAY);
|
||||
$filter->train('noplay', self::NO_PLAY);
|
||||
|
||||
self::assertEqualsWithDelta(
|
||||
0.64,
|
||||
0.6,
|
||||
$filter->match('play', ['weather' => ['Sunny']], 1),
|
||||
0.01
|
||||
);
|
||||
}
|
||||
|
||||
public function testNumericFilter() : void
|
||||
public function testNumericClassifier() : void
|
||||
{
|
||||
$filter = new NaiveBayesFilter();
|
||||
$filter = new NaiveBayesClassifier();
|
||||
$filter->train('male', self::MALE);
|
||||
$filter->train('female', self::FEMALE);
|
||||
|
||||
self::assertEqualsWithDelta(
|
||||
0.64,
|
||||
$filter->match('play', ['height' => 6, 'weight' => 130, 'foot' => 8]),
|
||||
$filter->match('female', ['height' => 6, 'weight' => 130, 'foot' => 8]),
|
||||
0.01
|
||||
);
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user