diff --git a/Algorithm/Clustering/AffinityPropagation.php b/Algorithm/Clustering/AffinityPropagation.php new file mode 100644 index 000000000..52d5fc5be --- /dev/null +++ b/Algorithm/Clustering/AffinityPropagation.php @@ -0,0 +1,30 @@ +metric = $metric ?? function (PointInterface $a, PointInterface $b) { + $aCoordinates = $a->coordinates; + $bCoordinates = $b->coordinates; + + return MetricsND::euclidean($aCoordinates, $bCoordinates); + }; + } + + private function expandCluster(PointInterface $point, array $neighbors, int $c, float $epsilon, int $minPoints) : void + { + $this->clusters[$c][] = $point; + $this->clusteredPoints[] = $point; + $nPoint = reset($neighbors); + + while ($nPoint) { + $neighbors2 = $this->findNeighbors($nPoint, $epsilon); + + if (\count($neighbors2) >= $minPoints) { + foreach ($neighbors2 as $nPoint2) { + if (!isset($neighbors[$nPoint2->name])) { + $neighbors[$nPoint2->name] = $nPoint2; + } + } + } + + if (!\in_array($nPoint->name, $this->clusteredPoints)) { + $this->clusters[$c][] = $nPoint; + $this->clusteredPoints[] = $nPoint; + } + + $nPoint = next($neighbors); + } + } + + private function findNeighbors(PointInterface $point, float $epsilon) : array + { + $neighbors = []; + foreach ($this->points as $point2) { + if ($point->isEquals($point2)) { + $distance = isset($this->distanceMatrix[$point->name]) + ? $this->distanceMatrix[$point->name][$point2->name] + : $this->distanceMatrix[$point2->name][$point->name]; + + if ($distance < $epsilon) { + $neighbors[$point2->name] = $point2; + } + } + } + + return $neighbors; + } + + private function generateDistanceMatrix(array $points) : array + { + $distances = []; + foreach ($points as $point) { + $distances[$point->name] = []; + foreach ($points as $point2) { + $distances[$point->name][$point2->name] = ($this->metric)($point, $point2); + } + } + + return $distances; + } + + public function cluster(PointInterface $point) : int + { + if ($this->convexHulls === []) { + foreach ($this->clusters as $c => $cluster) { + $points = []; + foreach ($cluster as $p) { + $points[] = [ + 'x' => \reset($p->coordinates), + 'y' => \end($p->coordinates), + ]; + } + + $this->convexHulls[$c] = MonotoneChain::createConvexHull($points); + } + } + + foreach ($this->convexHulls as $c => $hull) { + if (Polygon::isPointInPolygon( + [ + 'x' => \reset($point->coordinates), + 'y' => \end($point->coordinates) + ], + $hull + ) <= 0 + ) { + return $c; + } + } + + return -1; + } + + public function generateClusters(array $points, float $epsilon, int $minPoints) : void + { + $this->noisePoints = []; + $this->clusters = []; + $this->clusteredPoints = []; + $this->points = $points; + $this->convexHulls = []; + + $this->distanceMatrix = $this->generateDistanceMatrix($points); + + $c = 0; + $this->clusters[$c] = []; + foreach ($this->points as $point) { + $neighbors = $this->findNeighbors($point, $epsilon); + + if (\count($neighbors) < $minPoints) { + $this->noisePoints[] = $point->name; + } elseif (!\in_array($point->name, $this->clusteredPoints)) { + $this->expandCluster($point->name, $neighbors, $c, $epsilon, $minPoints); + ++$c; + $this->clusters[$c] = []; + } + } + } +} diff --git a/Algorithm/Clustering/Kmeans.php b/Algorithm/Clustering/Kmeans.php index 08041cae7..0b465e1b6 100755 --- a/Algorithm/Clustering/Kmeans.php +++ b/Algorithm/Clustering/Kmeans.php @@ -14,6 +14,8 @@ declare(strict_types=1); namespace phpOMS\Algorithm\Clustering; +use phpOMS\Math\Topology\MetricsND; + /** * Clustering points * @@ -21,6 +23,7 @@ namespace phpOMS\Algorithm\Clustering; * @license OMS License 2.0 * @link https://jingga.app * @since 1.0.0 + * @see ./clustering_overview.png */ final class Kmeans { @@ -35,10 +38,10 @@ final class Kmeans /** * Metric to calculate the distance between two points * - * @var Callable + * @var \Closure * @since 1.0.0 */ - private Callable $metric; + private \Closure $metric; /** * Points of the cluster centers @@ -51,29 +54,20 @@ final class Kmeans /** * Constructor * - * @param PointInterface[] $points Points to cluster - * @param int<0, max> $clusters Amount of clusters - * @param null|Callable $metric metric to use for the distance between two points + * @param null|\Closure $metric metric to use for the distance between two points * * @since 1.0.0 */ - public function __construct(array $points, int $clusters, Callable $metric = null) + public function __construct(\Closure $metric = null) { $this->metric = $metric ?? function (PointInterface $a, PointInterface $b) { - $aCoordinates = $a->getCoordinates(); - $bCoordinates = $b->getCoordinates(); + $aCoordinates = $a->coordinates; + $bCoordinates = $b->coordinates; - $n = \count($aCoordinates); - $sum = 0; - - for ($i = 0; $i < $n; ++$i) { - $sum = ($aCoordinates[$i] - $bCoordinates[$i]) * ($aCoordinates[$i] - $bCoordinates[$i]); - } - - return $sum; + return MetricsND::euclidean($aCoordinates, $bCoordinates); }; - $this->generateClusters($points, $clusters); + //$this->generateClusters($points, $clusters); } /** @@ -81,7 +75,7 @@ final class Kmeans * * @param PointInterface $point Point to find the cluster for * - * @return null|PointInterface + * @return null|PointInterface Cluster center point * * @since 1.0.0 */ @@ -122,11 +116,11 @@ final class Kmeans * * @since 1.0.0 */ - private function generateClusters(array $points, int $clusters) : void + public function generateClusters(array $points, int $clusters) : void { $n = \count($points); $clusterCenters = $this->kpp($points, $clusters); - $coordinates = \count($points[0]->getCoordinates()); + $coordinates = \count($points[0]->coordinates); while (true) { foreach ($clusterCenters as $center) { diff --git a/Algorithm/Clustering/MeanShift.php b/Algorithm/Clustering/MeanShift.php new file mode 100644 index 000000000..25550b317 --- /dev/null +++ b/Algorithm/Clustering/MeanShift.php @@ -0,0 +1,227 @@ +metric = $metric ?? function (PointInterface $a, PointInterface $b) { + $aCoordinates = $a->coordinates; + $bCoordinates = $b->coordinates; + + return MetricsND::euclidean($aCoordinates, $bCoordinates); + }; + + $this->kernel = $kernel ?? function (array $distances, array $bandwidths) { + return KernelsND::gaussianKernel($distances, $bandwidths); + }; + } + + public function generateClusters(array $points, array $bandwidth) : void + { + $shiftPoints = $points; + $maxMinDist = 1; + + $stillShifting = \array_fill(0, \count($points), true); + + $pointLength = \count($shiftPoints); + + while ($maxMinDist > self::MIN_DISTANCE) { + $maxMinDist = 0; + + for ($i = 0; $i < $pointLength; ++$i) { + if (!$stillShifting[$i]) { + continue; + } + + $pNew = $shiftPoints[$i]; + $pNewStart = $pNew; + $pNew = $this->shiftPoint($pNew, $points, $bandwidth); + $dist = ($this->metric)($pNew, $pNewStart); + + if ($dist > $maxMinDist) { + $maxMinDist = $dist; + } + + if ($dist < self::MIN_DISTANCE) { + $stillShifting[$i] = false; + } + + $shiftPoints[$i] = $pNew; + } + } + + // @todo create an array of noisePoints like in the DBSCAN. That array can be empty or not depending on the bandwidth defined + + $this->clusters = $this->groupPoints($shiftPoints); + $this->clusterCenters = $shiftPoints; + } + + private function shiftPoint(PointInterface $point, array $points, array $bandwidth) : PointInterface + { + $scaleFactor = 0.0; + + $shifted = clone $point; + + foreach ($points as $pTemp) { + $dist = ($this->metric)($point, $pTemp); + $weight = ($this->kernel)($dist, $bandwidth); + + foreach ($point->coordinates as $idx => $_) { + if (!isset($shifted->coordinates[$idx])) { + $shifted->coordinates[$idx] = 0; + } + + $shifted->coordinates[$idx] += $pTemp->coordinates[$idx] * $weight; + } + + $scaleFactor += $weight; + } + + foreach ($shifted->coordinates as $idx => $_) { + $shifted->coordinates[$idx] /= $scaleFactor; + } + + return $shifted; + } + + private function groupPoints(array $points) : array + { + $groupAssignment = []; + $groups = []; + $groupIndex = 0; + + foreach ($points as $point) { + $nearestGroupIndex = $this->findNearestGroup($point, $groups); + + if ($nearestGroupIndex === -1) { + // create new group + $groups[] = [$point]; + $groupAssignment[] = $groupIndex; + ++$groupIndex; + } else { + $groupAssignment[] = $nearestGroupIndex; + $groups[$nearestGroupIndex][] = $point; + } + } + + return $groupAssignment; + } + + private function findNearestGroup(PointInterface $point, array $groups) : int + { + $nearestGroupIndex = -1; + $index = 0; + + foreach ($groups as $group) { + $distanceToGroup = $this->distanceToGroup($point, $group); + + if ($distanceToGroup < self::GROUP_DISTANCE_TOLERANCE) { + $nearestGroupIndex = $index; + break; + } + + ++$index; + } + + return $nearestGroupIndex; + } + + private function distanceToGroup(PointInterface $point, array $group) : float + { + $minDistance = \PHP_FLOAT_MAX; + + foreach ($group as $pt) { + $dist = ($this->metric)($point, $pt); + + if ($dist < $minDistance) { + $minDistance = $dist; + } + } + + return $minDistance; + } + + /** + * Find the cluster for a point + * + * @param PointInterface $point Point to find the cluster for + * + * @return null|PointInterface Cluster center point + * + * @since 1.0.0 + */ + public function cluster(PointInterface $point) : ?PointInterface + { + $clusterId = $this->findNearestGroup($point, $this->clusters); + + return $this->clusterCenters[$clusterId] ?? null; + } +} diff --git a/Algorithm/Clustering/Point.php b/Algorithm/Clustering/Point.php index 72ae2d465..f11ee27db 100755 --- a/Algorithm/Clustering/Point.php +++ b/Algorithm/Clustering/Point.php @@ -30,7 +30,7 @@ class Point implements PointInterface * @var array * @sicne 1.0.0 */ - private array $coordinates = []; + public array $coordinates = []; /** * Group or cluster this point belongs to @@ -85,4 +85,9 @@ class Point implements PointInterface { $this->coordinates[$index] = $value; } + + public function isEquals(PointInterface $point) : bool + { + return $this->name === $point->name && $this->coordinates === $point->coordinates; + } } diff --git a/Algorithm/Clustering/PointInterface.php b/Algorithm/Clustering/PointInterface.php index 7fa256428..d59123e1a 100755 --- a/Algorithm/Clustering/PointInterface.php +++ b/Algorithm/Clustering/PointInterface.php @@ -59,4 +59,6 @@ interface PointInterface * @since 1.0.0 */ public function setCoordinate(int $index, int | float $value) : void; + + public function isEquals(self $point) : bool; } diff --git a/Algorithm/Clustering/SpectralClustering.php b/Algorithm/Clustering/SpectralClustering.php new file mode 100644 index 000000000..46b863cc0 --- /dev/null +++ b/Algorithm/Clustering/SpectralClustering.php @@ -0,0 +1,30 @@ +realDuration = $this->duration; + } +} diff --git a/Algorithm/JobScheduling/v2/Dependency/Machine.php b/Algorithm/JobScheduling/v2/Dependency/Machine.php new file mode 100644 index 000000000..bccc9f8c7 --- /dev/null +++ b/Algorithm/JobScheduling/v2/Dependency/Machine.php @@ -0,0 +1,34 @@ +deadline = new \DateTime('now'); + } + + public function getProfit() + { + return $this->value - $this->cost; + } +} diff --git a/Algorithm/JobScheduling/v2/Notes.md b/Algorithm/JobScheduling/v2/Notes.md new file mode 100644 index 000000000..8c2f41b33 --- /dev/null +++ b/Algorithm/JobScheduling/v2/Notes.md @@ -0,0 +1,128 @@ +# Notes + +## Job / Item + +1. output item +2. output quantity +3. output scale factor +4. instruction manuals [] +5. steps [] + +### Data + +For single item, for this specific job (quantity plays a role) and for the current state + +1. Work time planned/actual + 1.1. Per worker type + 1.2. Total +2. Machine time planned/actual + 2.1. Per machine type + 2.2. Total +3. Total duration planned/actual (is NOT work time + machine time) +4. Machines types required incl. quantity +5. Worker types required incl. quantity +6. Material costs +7. Worker costs + 7.1. Per worker type + 7.2. Total +8. Machine costs + 8.1. Per machine type + 8.2. Total +9. Progress status in % +10. Progress type (time based, step based, manual) +11. Value planned/actual +11. Costs planned/actual +12. Current step + +## Steps + +1. Setup machine + 1.1. worker types required [] + 1.1.1. qualifications required by worker type [] + 1.1.2. defined after algorithm: workers [] + 1.1.2.1. worker specific qualifications available [] + 1.2. amount of workers per type required + 1.3. worker scale factor (0 = no scaling, 1 = 100% scaling) + 1.4. machine types required [] + 1.4.1. qualifications required by machine type [] + 1.4.2. min capacity + 1.4.3. max capacity + 1.4.4. defined after algorithm: machines [] + 1.4.4.1. machine specific qualifications required by machine type [] + 1.4.4.2. machine specific min capacity + 1.4.4.3. machine specific max capacity + 1.5. amount of machines per type required + 1.6. machine scale factor (0 = no scaling, 1 = 100% scaling) + 1.7. worker / machine correlation (1 = equal scaling required, > 1 = more workers required per machine scale, < 1 = less workers required per machine scale (e.g. 1.5 -> 150% additional worker required if machines are scaled by 100%, 0.8 -> 80% additional worker required if machines are scaled by 100%)) + 1.8. worker duration + 1.8.1. planned + 1.8.1. current/actual + 1.9. machine duration + 1.9.1. planned + 1.9.1. current/actual + 1.10. total duration + 1.10.1. planned + 1.10.1. current/actual + 1.11. duration scale factor (1 = duration equally scaled as machine/worker scaling, > 1 = longer duration with scaling, < 1 = shorter duration with scaling (e.g. 1.1 -> 110% additional duration if scaled by 100%, 0.9 -> 90 % additional duration if scaled by 100%)). The scale factor is max(worker scale, machine scale); + 1.12. depends on steps [] + 1.13. try to parallelize? (planned/actual) + 1.14. material required [] + 1.14.1. material id + 1.14.2. planned quantity + 1.14.2. actual quantity + 1.15. instruction checklist [] + 1.16. hold time during + 1.16. hold time until next stip + +2. Insert material 1 +3. Insert material 2 +4. Mix material +5. Quality control +6. Average correction +7. Insert material 3 +8. Insert material 4 +9. Mix material +10. Quality control +11. Average correction +12. Fill into large bindings +13. Fill into smaller bindings +14. Quality control +15. Packaging + +## Algorithm + +1. Try to manufacture in one go (no large breaks in between) +2. Try to parallelize (minimize time needed for production) +3. Match deadline (if no deadline available go to "find earliest possible deadline") + 3.1. Priorize close or early to deadline finish (settings dependant) + 3.2. If not possilbe re-adjust pending production + 3.2.1. Focus on (value, cost, ...) (settings dependant) + 3.2.2. If not possible re-adjust ongoing production + 3.2.2.1. Focus on (value, cost, ...) (settings dependant) + 3.2.2.2. If not possible find earliest possible deadline + +Constraints / To consider + +1. Deadline (maybe not defined) +2. Machines + 2.1. Available + 2.2.1. Other jobs + 2.2.2. General maintenance cleaning + 2.2.3. Unforseable maintenance + 2.2. Scalability by a factor +3. Worker + 2.2. Available + 2.2.1. Other jobs + 2.2.2. General maintenance cleaning + 2.2.3. Vacation/sick + 2.2. Qualification + 2.3. Scalability by a factor +4. Job variance (multiple corrections required) +5. Material + 4.1. Available + 4.2. Delivery time +6. Parallelizability +7. Stock space +8. Putting job steps on hold +9. max/min capacities +10. Scaling factors diff --git a/Algorithm/JobScheduling/v2/PriorityMode.php b/Algorithm/JobScheduling/v2/PriorityMode.php new file mode 100644 index 000000000..6ed957ccb --- /dev/null +++ b/Algorithm/JobScheduling/v2/PriorityMode.php @@ -0,0 +1,46 @@ +queue); + + switch ($type) { + case PriorityMode::FIFO: + for ($i = 0; $i < $size; ++$i) { + $jobs[$i] = $this->queue[$keys[$i]]; + } + + break; + case PriorityMode::LIFO: + for ($i = \count($this->queue) - $size - 1; $i < $size; ++$i) { + $jobs[$i] = $this->queue[$keys[$i]]; + } + + break; + case PriorityMode::PRIORITY: + $queue = $this->queue; + \uasort($queue, function (Job $a, Job $b) { + return $a->priority <=> $b->priority; + }); + + $jobs = \array_slice($queue, 0, $size, true); + + break; + case PriorityMode::VALUE: + $queue = $this->queue; + \uasort($queue, function (Job $a, Job $b) { + return $b->value <=> $a->value; + }); + + $jobs = \array_slice($queue, 0, $size, true); + + break; + case PriorityMode::COST: + $queue = $this->queue; + \uasort($queue, function (Job $a, Job $b) { + return $a->cost <=> $b->cost; + }); + + $jobs = \array_slice($queue, 0, $size, true); + + break; + case PriorityMode::PROFIT: + $queue = $this->queue; + \uasort($queue, function (Job $a, Job $b) { + return $b->getProfit() <=> $a->getProfit(); + }); + + $jobs = \array_slice($queue, 0, $size, true); + + break; + case PriorityMode::HOLD: + $queue = $this->queue; + \uasort($queue, function (Job $a, Job $b) { + return $b->onhold <=> $a->onhold; + }); + + $jobs = \array_slice($queue, 0, $size, true); + + break; + case PriorityMode::EARLIEST_DEADLINE: + $queue = $this->queue; + \uasort($queue, function (Job $a, Job $b) { + return $a->deadline->getTimestamp() <=> $b->deadline->getTimestamp(); + }); + + $jobs = \array_slice($queue, 0, $size, true); + + break; + } + + return $jobs; + } + + public function insert(int $id, Job $job) : void + { + $this->queue[$id] = $job; + } + + public function pop(int $size = 1, int $type = PriorityMode::FIFO) : array + { + $jobs = $this->get($size, $type); + foreach ($jobs as $id => $_) { + unset($this->queue[$id]); + } + + return $jobs; + } + + public function bumpHold(int $id = 0) : void + { + if ($id === 0) { + foreach ($this->queue as $job) { + ++$job->onhold; + } + } else { + ++$this->queue[$id]->onhold; + } + } + + public function adjustPriority(int $id = 0, float $priority = 0.1) : void + { + if ($id === 0) { + foreach ($this->queue as $job) { + $job->priority += $priority; + } + } else { + $this->queue[$id]->priority += $priority; + } + } + + public function remove(string $id) : void + { + unset($this->queue[$id]); + } + + +} diff --git a/Algorithm/Optimization/AntColonyOptimization.php b/Algorithm/Optimization/AntColonyOptimization.php new file mode 100644 index 000000000..e69de29bb diff --git a/Algorithm/Optimization/BeesAlgorithm.php b/Algorithm/Optimization/BeesAlgorithm.php new file mode 100644 index 000000000..e69de29bb diff --git a/Algorithm/Optimization/FireflyAlgorithm.php b/Algorithm/Optimization/FireflyAlgorithm.php new file mode 100644 index 000000000..e69de29bb diff --git a/Algorithm/Optimization/GeneticOptimization.php b/Algorithm/Optimization/GeneticOptimization.php new file mode 100644 index 000000000..5d99fc7b3 --- /dev/null +++ b/Algorithm/Optimization/GeneticOptimization.php @@ -0,0 +1,130 @@ + $fitnessScores[$parentIndex2] + ? $population[$parentIndex1] + : $population[$parentIndex2]; + } + + // Crossover and mutation to create next generation + $newPopulation = []; + for ($i = 0; $i < $populationSize; $i += 2) { + $crossover = ($crossover)($parents[$i], $parents[$i + 1], $parameterCount); + + $child1 = ($mutate)($crossover[0], $mutationRate); + $child2 = ($mutate)($crossover[1], $mutationRate); + + $newPopulation[] = $child1; + $newPopulation[] = $child2; + } + + $population = $newPopulation; + } + + $fitnesses = []; + + foreach ($population as $parameters) { + $fitnesses[$population] = ($fitness)($parameters); + } + + \asort($fitnesses); + + return [ + 'solutions' => $population, + 'fitnesses' => $fitnesses, + ]; + } +} diff --git a/Algorithm/Optimization/HarmonySearch.php b/Algorithm/Optimization/HarmonySearch.php new file mode 100644 index 000000000..e69de29bb diff --git a/Algorithm/Optimization/IntelligentWaterDrops.php b/Algorithm/Optimization/IntelligentWaterDrops.php new file mode 100644 index 000000000..e69de29bb diff --git a/Algorithm/Optimization/SimulatedAnnealing.php b/Algorithm/Optimization/SimulatedAnnealing.php new file mode 100644 index 000000000..fd9c9c223 --- /dev/null +++ b/Algorithm/Optimization/SimulatedAnnealing.php @@ -0,0 +1,84 @@ + $currentGeneration, + 'costs' => $currentCost + ]; + } +} diff --git a/Algorithm/Optimization/TabuSearch.php b/Algorithm/Optimization/TabuSearch.php new file mode 100644 index 000000000..bfb3f799c --- /dev/null +++ b/Algorithm/Optimization/TabuSearch.php @@ -0,0 +1,86 @@ + ($fitness)($bestNeighbor)) + ) { + $bestNeighbor = $neighbor; + } + } + + if (\is_null($bestNeighbor)) { + break; + } + + $tabuList[] = $bestNeighbor; + if (\count($tabuList) > $tabuListSize) { + \array_shift($tabuList); + } + + $currentSolution = $bestNeighbor; + + + if (($score = ($fitness)($bestNeighbor)) > $bestFitness) { + $bestSolution = $bestNeighbor; + $bestFitness = $score; + } + } + + return $bestSolution; + } +} diff --git a/Algorithm/Rating/Elo.php b/Algorithm/Rating/Elo.php index 0a3cb6b9c..c1d596a02 100644 --- a/Algorithm/Rating/Elo.php +++ b/Algorithm/Rating/Elo.php @@ -31,7 +31,7 @@ final class Elo public int $MIN_ELO = 100; - public function rating(int $elo, array $oElo, array $s) + public function rating(int $elo, array $oElo, array $s) : array { $eloNew = $elo; foreach ($oElo as $idx => $o) { diff --git a/Business/Marketing/ArticleCorrelationAffinity.php b/Business/Recommendation/ArticleCorrelationAffinity.php old mode 100755 new mode 100644 similarity index 92% rename from Business/Marketing/ArticleCorrelationAffinity.php rename to Business/Recommendation/ArticleCorrelationAffinity.php index 3e486770d..b3163add2 --- a/Business/Marketing/ArticleCorrelationAffinity.php +++ b/Business/Recommendation/ArticleCorrelationAffinity.php @@ -4,7 +4,7 @@ * * PHP Version 8.1 * - * @package phpOMS\Business\Marketing + * @package phpOMS\Business\Recommendation * @copyright Dennis Eichhorn * @license OMS License 2.0 * @version 1.0.0 @@ -12,16 +12,16 @@ */ declare(strict_types=1); -namespace phpOMS\Business\Marketing; +namespace phpOMS\Business\Recommendation; use phpOMS\Math\Statistic\Correlation; /** - * Marketing ArticleAffinity + * Article Affinity * - * This class provided basic marketing metric calculations + * You can consider this as a "purchased with" or "customers also purchased" algorithm * - * @package phpOMS\Business\Marketing + * @package phpOMS\Business\Recommendation * @license OMS License 2.0 * @link https://jingga.app * @since 1.0.0 diff --git a/Business/Recommendation/BayesianPersonalizedRanking.php b/Business/Recommendation/BayesianPersonalizedRanking.php new file mode 100644 index 000000000..56778436b --- /dev/null +++ b/Business/Recommendation/BayesianPersonalizedRanking.php @@ -0,0 +1,94 @@ +numFactors = $numFactors; + $this->learningRate = $learningRate; + $this->regularization = $regularization; + } + + private function generateRandomFactors() { + $factors = []; + for ($i = 0; $i < $this->numFactors; ++$i) { + $factors[$i] = \mt_rand() / \mt_getrandmax(); + } + + return $factors; + } + + public function predict($userId, $itemId) { + $userFactor = $this->userFactors[$userId]; + $itemFactor = $this->itemFactors[$itemId]; + $score = 0; + + for ($i = 0; $i < $this->numFactors; ++$i) { + $score += $userFactor[$i] * $itemFactor[$i]; + } + + return $score; + } + + public function updateFactors($userId, $posItemId, $negItemId) { + if (!isset($this->userFactors[$userId])) { + $this->userFactors[$userId] = $this->generateRandomFactors(); + } + + if (!isset($this->itemFactors[$posItemId])) { + $this->itemFactors[$posItemId] = $this->generateRandomFactors(); + } + + if (!isset($this->itemFactors[$negItemId])) { + $this->itemFactors[$negItemId] = $this->generateRandomFactors(); + } + + $userFactor = $this->userFactors[$userId]; + $posItemFactor = $this->itemFactors[$posItemId]; + $negItemFactor = $this->itemFactors[$negItemId]; + + for ($i = 0; $i < $this->numFactors; ++$i) { + $userFactor[$i] += $this->learningRate * ($posItemFactor[$i] - $negItemFactor[$i]) - $this->regularization * $userFactor[$i]; + $posItemFactor[$i] += $this->learningRate * $userFactor[$i] - $this->regularization * $posItemFactor[$i]; + $negItemFactor[$i] += $this->learningRate * (-$userFactor[$i]) - $this->regularization * $negItemFactor[$i]; + } + + $this->userFactors[$userId] = $userFactor; + $this->itemFactors[$posItemId] = $posItemFactor; + $this->itemFactors[$negItemId] = $negItemFactor; + } +} diff --git a/Business/Recommendation/MemoryCF.php b/Business/Recommendation/MemoryCF.php new file mode 100644 index 000000000..72f0c719e --- /dev/null +++ b/Business/Recommendation/MemoryCF.php @@ -0,0 +1,146 @@ +rankings = $this->normalizeRanking($rankings); + } + + private function normalizeRanking(array $rankings) : array + { + foreach ($rankings as $idx => $items) { + $avg = \array_sum($items) / \count($items); + + foreach ($items as $idx2 => $_) { + $rankings[$idx][$idx2] -= $avg; + } + } + + return $rankings; + } + + // Used to find similar users + public function euclideanDistance(array $ranking, array $rankings) : array + { + $distances = []; + foreach ($rankings as $idx => $r) { + $distances[$idx] = \abs(MetricsND::euclidean($ranking, $r)); + } + + return $distances; + } + + // Used to find similar users + public function cosineDistance(array $ranking, array $rankings) : array + { + $distances = []; + foreach ($rankings as $idx => $r) { + $distances[$idx] = \abs(MetricsND::cosine($ranking, $r)); + } + + return $distances; + } + + private function weightedItemRank(string $itemId, array $distances, array $users, int $size) : float + { + $rank = 0.0; + $count = 0; + foreach ($distances as $uId => $_) { + if ($count >= $size) { + break; + } + + if (!isset($user[$itemId])) { + continue; + } + + ++$count; + $rank += $users[$uId][$itemId]; + } + + return $rank / $count; + } + + // This can be used to find items for a specific user (aka might be interested in) or to find users who might be interested in this item + // option 1 - find items + // ranking[itemId] = itemRank (how much does specific user like item) + // rankings[userId][itemId] = itemRank + // + // option 2 - find user + // ranking[userId] = itemRank (how much does user like specific item) + // rankings[itemId][userId] = itemRank + // option 1 searches for items, option 2 searches for users + public function bestMatch(array $ranking, int $size = 10) : array + { + $ranking = $this->normalizeRanking([$ranking]); + $ranking = $ranking[0]; + + $euclidean = $this->euclideanDistance($ranking, $this->rankings); + $cosine = $this->cosineDistance($ranking, $this->rankings); + + \asort($euclidean); + \asort($cosine); + + $size = \min($size, \count($this->rankings)); + $matches = []; + + $distancePointer = \array_keys($euclidean); + $anglePointer = \array_keys($cosine); + + // Inspect items of the top n comparable users + for ($i = 1; $i <= $size; ++$i) { + $index = (int) ($i / 2) - 1; + + $uId = $i % 2 === 1 ? $distancePointer[$index] : $anglePointer[$index]; + $distances = $i % 2 === 1 ? $euclidean : $cosine; + foreach ($this->rankings[$uId] as $iId => $_) { + // Item is not already in dataset and not in historic dataset (we are only interested in new) + if (isset($matches[$iId]) || isset($ranking[$iId])) { + continue; + } + + // Calculate the expected rating the user would give based on what the best comparable users did + $matches[$iId] = $this->weightedItemRank($iId, $distances, $this->rankings, $size); + } + } + + \asort($matches); + $matches = \array_reverse($matches, true); + + return $matches; + } +} diff --git a/Business/Recommendation/ModelCF.php b/Business/Recommendation/ModelCF.php new file mode 100644 index 000000000..a0f189bba --- /dev/null +++ b/Business/Recommendation/ModelCF.php @@ -0,0 +1,50 @@ +mult($items)->getMatrix(); + } +} diff --git a/Business/Recommendation/README.md b/Business/Recommendation/README.md new file mode 100644 index 000000000..dad7c7152 --- /dev/null +++ b/Business/Recommendation/README.md @@ -0,0 +1,3 @@ +# Recommendation + +Additional recommendation algorithms not included in this directory are Clustering and Classifier available in this libarary. \ No newline at end of file diff --git a/Math/Geometry/Shape/D2/Polygon.php b/Math/Geometry/Shape/D2/Polygon.php index fd11b0511..5fbfdeaa3 100755 --- a/Math/Geometry/Shape/D2/Polygon.php +++ b/Math/Geometry/Shape/D2/Polygon.php @@ -61,7 +61,7 @@ final class Polygon implements D2ShapeInterface * * @param array{x:int|float, y:int|float} $point Point location * - * @return int + * @return int -1 inside polygon 0 on vertice 1 outside * * @since 1.0.0 */ diff --git a/Math/Matrix/Matrix.php b/Math/Matrix/Matrix.php index c4a41e382..12f21cb63 100755 --- a/Math/Matrix/Matrix.php +++ b/Math/Matrix/Matrix.php @@ -43,7 +43,7 @@ class Matrix implements \ArrayAccess, \Iterator * @var array> * @since 1.0.0 */ - protected array $matrix = []; + public array $matrix = []; /** * Columns. @@ -436,7 +436,7 @@ class Matrix implements \ArrayAccess, \Iterator $newMatrixArr = $this->matrix; foreach ($newMatrixArr as $i => $vector) { - foreach ($vector as $j => $value) { + foreach ($vector as $j => $_) { $newMatrixArr[$i][$j] += $matrixArr[$i][$j]; } } @@ -691,6 +691,178 @@ class Matrix implements \ArrayAccess, \Iterator return $L->det(); } + public function dot(self $B) : self + { + $value1 = $this->matrix; + $value2 = $B->getMatrix(); + + $m1 = \count($value1); + $n1 = ($isMatrix1 = \is_array($value1[0])) ? \count($value1[0]) : 1; + + $m2 = \count($value2); + $n2 = ($isMatrix2 = \is_array($value2[0])) ? \count($value2[0]) : 1; + + $result = null; + + if ($isMatrix1 && $isMatrix2) { + if ($m2 !== $n1) { + throw new InvalidDimensionException($m2 . 'x' . $n2 . ' not compatible with ' . $m1 . 'x' . $n1); + } + + $result = [[]]; + for ($i = 0; $i < $m1; ++$i) { // Row of 1 + for ($c = 0; $c < $n2; ++$c) { // Column of 2 + $temp = 0; + + for ($j = 0; $j < $m2; ++$j) { // Row of 2 + $temp += $value1[$i][$j] * $value2[$j][$c]; + } + + $result[$i][$c] = $temp; + } + } + } elseif (!$isMatrix1 && !$isMatrix2) { + if ($m1 !== $m2) { + throw new InvalidDimensionException($m1 . 'x' . $m2); + } + + $result = 0; + for ($i = 0; $i < $m1; ++$i) { + /** @var array $value1 */ + /** @var array $value2 */ + $result += $value1[$i] * $value2[$i]; + } + } elseif ($isMatrix1 && !$isMatrix2) { + $result = []; + for ($i = 0; $i < $m1; ++$i) { // Row of 1 + $temp = 0; + + for ($c = 0; $c < $m2; ++$c) { // Row of 2 + /** @var array $value2 */ + $temp += $value1[$i][$c] * $value2[$c]; + } + + $result[$i] = $temp; + } + } else { + throw new \InvalidArgumentException(); + } + + return self::fromArray($result); + } + + public function sum(int $axis = -1) + { + if ($axis === -1) { + $sum = 0; + + foreach ($this->matrix as $row) { + $sum += \array_sum($row); + } + + return $sum; + } elseif ($axis === 0) { + $sum = []; + foreach ($this->matrix as $row) { + foreach ($row as $idx2 => $value) { + if (!isset($sum[$idx2])) { + $sum[$idx2] = 0; + } + + $sum[$idx2] += $value; + } + } + + return self::fromArray($sum); + } elseif ($axis === 1) { + $sum = []; + foreach ($this->matrix as $idx => $row) { + $sum[$idx] = \array_sum($row); + } + + return self::fromArray($sum); + } + + return new self(); + } + + public function isDiagonal() : bool + { + if ($this->m !== $this->n) { + return false; + } + + for ($i = 0; $i < $this->m; ++$i) { + for ($j = 0; $j < $this->n; ++$j) { + if ($i !== $j && \abs($this->matrix[$i][$j]) > self::EPSILON) { + return false; + } + } + } + + return true; + } + + public function pow(int | float $exponent) : self + { + if ($this->isDiagonal()) { + $matrix = []; + + for ($i = 0; $i < $this->m; $i++) { + $row = []; + for ($j = 0; $j < $this->m; $j++) { + if ($i === $j) { + $row[] = \pow($this->matrix[$i][$j], $exponent); + } else { + $row[] = 0; + } + } + + $matrix[] = $row; + } + + return self::fromArray($matrix); + } elseif (\is_int($exponent)) { + if ($this->m !== $this->n) { + throw new InvalidDimensionException($this->m . 'x' . $this->n); + } + + $matrix = new IdentityMatrix($this->m); + for ($i = 0; $i < $exponent; ++$i) { + $matrix = $matrix->mult($this); + } + + return $matrix; + } else { + // @todo: implement + throw new \Exception('Not yet implemented'); + } + } + + public function exp(int $iterations = 10) : self + { + if ($this->m !== $this->n) { + throw new InvalidDimensionException($this->m . 'x' . $this->n); + } + + $identity = new IdentityMatrix($this->m); + $matrix = $identity; + + $factorial = 1; + $pow = $matrix; + + for ($i = 1; $i <= $iterations; ++$i) { + $factorial *= $i; + $coeff = 1 / $factorial; + + $term = $pow->mult($coeff); + $matrix = $matrix->add($term); + $pow = $pow->mult($matrix); // @todo: maybe wrong order? + } + + return $matrix; + } + /** * {@inheritdoc} */ diff --git a/Math/Matrix/Vector.php b/Math/Matrix/Vector.php index d241e4473..25ecc014c 100755 --- a/Math/Matrix/Vector.php +++ b/Math/Matrix/Vector.php @@ -88,6 +88,32 @@ final class Vector extends Matrix return $this; } + public function cosine(self $v) : float + { + $dotProduct = 0; + for ($i = 0; $i < \count($this->matrix); $i++) { + $dotProduct += $this->matrix[$i][0] * $v[$i]; + } + + $sumOfSquares = 0; + foreach ($this->matrix as $value) { + $sumOfSquares += $value[0] * $value[0]; + } + $magnitude1 = \sqrt($sumOfSquares); + + $sumOfSquares = 0; + foreach ($v->matrix as $value) { + $sumOfSquares += $value[0] * $value[0]; + } + $magnitude2 = \sqrt($sumOfSquares); + + if ($magnitude1 === 0 || $magnitude2 === 0) { + return \PHP_FLOAT_MAX; + } + + return $dotProduct / ($magnitude1 * $magnitude2); + } + /** * Calculate the cross product * diff --git a/Math/Topology/Kernel2D.php b/Math/Topology/Kernel2D.php new file mode 100644 index 000000000..58af4c02b --- /dev/null +++ b/Math/Topology/Kernel2D.php @@ -0,0 +1,112 @@ +mult($identityMatrix); + + $exponent = $distnaceMatrix->dot($cov->inverse())->mult($distnaceMatrix)->sum(1)->mult(-0.5); + + return $exponent->exp()->mult((1 / \pow(2 * \M_PI, $dim / 2)) * \pow($cov->det(), 0.5))->matrix; + } +} diff --git a/Math/Topology/MetricsND.php b/Math/Topology/MetricsND.php index 6afe50b77..8081ecd09 100755 --- a/Math/Topology/MetricsND.php +++ b/Math/Topology/MetricsND.php @@ -93,6 +93,32 @@ final class MetricsND return \sqrt($dist); } + public static function cosine(array $a, array $b) : float + { + $dotProduct = 0; + for ($i = 0; $i < \count($a); $i++) { + $dotProduct += $a[$i] * $b[$i]; + } + + $sumOfSquares = 0; + foreach ($a as $value) { + $sumOfSquares += $value * $value; + } + $magnitude1 = \sqrt($sumOfSquares); + + $sumOfSquares = 0; + foreach ($b as $value) { + $sumOfSquares += $value * $value; + } + $magnitude2 = \sqrt($sumOfSquares); + + if ($magnitude1 === 0 || $magnitude2 === 0) { + return \PHP_FLOAT_MAX; + } + + return $dotProduct / ($magnitude1 * $magnitude2); + } + /** * Chebyshev metric. *