metric = $metric ?? function (PointInterface $a, PointInterface $b) { $aCoordinates = $a->coordinates; $bCoordinates = $b->coordinates; return MetricsND::euclidean($aCoordinates, $bCoordinates); }; } /** * {@inheritdoc} */ public function cluster(PointInterface $point) : ?PointInterface { $bestCluster = null; $bestDistance = \PHP_FLOAT_MAX; foreach ($this->clusterCenters as $center) { if (($distance = ($this->metric)($center, $point)) < $bestDistance) { $bestCluster = $center; $bestDistance = $distance; } } return $bestCluster; } /** * {@inheritdoc} */ public function getCentroids() : array { return $this->clusterCenters; } /** * {@inheritdoc} */ public function getNoise() : array { return []; } /** * Generate the clusters of the points * * @param PointInterface[] $points Points to cluster * @param int<1, max> $clusters Amount of clusters * * @return void * * @since 1.0.0 */ public function generateClusters(array $points, int $clusters) : void { $this->points = $points; $n = \count($points); $clusterCenters = $this->kpp($points, $clusters); $coordinates = \count($points[0]->coordinates); while (true) { foreach ($clusterCenters as $center) { for ($i = 0; $i < $coordinates; ++$i) { $center->setCoordinate($i, 0); } } foreach ($points as $point) { $clusterPoint = $clusterCenters[$point->group]; ++$clusterPoint->group; for ($i = 0; $i < $coordinates; ++$i) { $clusterPoint->setCoordinate($i, $clusterPoint->getCoordinate($i) + $point->getCoordinate($i)); } } foreach ($clusterCenters as $center) { for ($i = 0; $i < $coordinates; ++$i) { $center->setCoordinate($i, $center->getCoordinate($i) / $center->group); } } $changed = 0; foreach ($points as $point) { $min = $this->nearestClusterCenter($point, $clusterCenters)[0]; if ($clusters !== $point->group) { ++$changed; $point->group = $min; } } if ($changed <= $n * self::EPSILON || $n * self::EPSILON < 2) { break; } } foreach ($clusterCenters as $key => $center) { $center->group = $key; $center->name = (string) $key; } $this->clusterCenters = $clusterCenters; } /** * Get the index and distance to the nearest cluster center * * @param PointInterface $point Point to get the cluster for * @param PointInterface[] $clusterCenters All cluster centers * * @return array [index, distance] * * @since 1.0.0 */ private function nearestClusterCenter(PointInterface $point, array $clusterCenters) : array { $index = $point->group; $dist = \PHP_FLOAT_MAX; foreach ($clusterCenters as $key => $cPoint) { $d = ($this->metric)($cPoint, $point); if ($dist > $d) { $dist = $d; $index = $key; } } return [$index, $dist]; } /** * Initializae cluster centers * * @param PointInterface[] $points Points to use for the cluster center initialization * @param int<0, max> $n Amount of clusters to use * * @return PointInterface[] * * @since 1.0.0 */ private function kpp(array $points, int $n) : array { $clusters = [clone $points[\array_rand($points, 1)]]; $d = \array_fill(0, $n, 0.0); for ($i = 1; $i < $n; ++$i) { $sum = 0; foreach ($points as $key => $point) { $d[$key] = $this->nearestClusterCenter($point, $clusters)[1]; $sum += $d[$key]; } $sum *= \mt_rand(0, \mt_getrandmax()) / \mt_getrandmax(); $found = false; foreach ($d as $key => $di) { $sum -= $di; // The in array check is important to avoid duplicate cluster centers if ($sum <= 0 && !\in_array($c = $points[$key], $clusters)) { $clusters[$i] = clone $c; $found = true; } } while (!$found) { if (!\in_array($c = $points[\array_rand($points)], $clusters)) { $clusters[$i] = clone $c; $found = true; } } } foreach ($points as $point) { $point->group = $this->nearestClusterCenter($point, $clusters)[0]; } return $clusters; } /** * {@inheritdoc} */ public function getClusters() : array { if (!empty($this->clusters)) { return $this->clusters; } foreach ($this->points as $point) { $c = $this->cluster($point); $this->clusters[$c?->name] = $point; } return $this->clusters; } }