mirror of
https://github.com/Karaka-Management/phpOMS.git
synced 2026-01-13 18:48:40 +00:00
Implement kmeans algorithm
This commit is contained in:
parent
f5208e95fe
commit
549bbfbccf
246
Algorithm/Clustering/Kmeans.php
Normal file
246
Algorithm/Clustering/Kmeans.php
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
<?php
|
||||
/**
|
||||
* Orange Management
|
||||
*
|
||||
* PHP Version 7.4
|
||||
*
|
||||
* @package phpOMS\Algorithm\Clustering
|
||||
* @copyright Dennis Eichhorn
|
||||
* @license OMS License 1.0
|
||||
* @version 1.0.0
|
||||
* @link https://orange-management.org
|
||||
*/
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace phpOMS\Algorithm\Clustering;
|
||||
|
||||
/**
|
||||
* Clustering points
|
||||
*
|
||||
* @package phpOMS\Algorithm\Clustering
|
||||
* @license OMS License 1.0
|
||||
* @link https://orange-management.org
|
||||
* @since 1.0.0
|
||||
*/
|
||||
final class Kmeans
|
||||
{
|
||||
/**
|
||||
* Metric to calculate the distance between two points
|
||||
*
|
||||
* @var \Closure
|
||||
* @since 1.0.0
|
||||
*/
|
||||
private \Closure $metric;
|
||||
|
||||
/**
|
||||
* Amount of different clusters
|
||||
*
|
||||
* @var int
|
||||
* @since 1.0.0
|
||||
*/
|
||||
private int $clusters = 1;
|
||||
|
||||
/**
|
||||
* Points of the cluster centers
|
||||
*
|
||||
* @var PointInterface[]
|
||||
* @since 1.0.0
|
||||
*/
|
||||
private $clusterCenters = [];
|
||||
|
||||
/**
|
||||
* Points to clusterize
|
||||
*
|
||||
* @var PointInterface[]
|
||||
* @since 1.0.0
|
||||
*/
|
||||
private array $points = [];
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @param PointInterface[] $points Points to cluster
|
||||
* @param int $clusters Amount of clusters
|
||||
* @param null|\Closure $metric Metric to use for the distance between two points.
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public function __construct(array $points, int $clusters, \Closure $metric = null)
|
||||
{
|
||||
$this->points = $points;
|
||||
$this->clusters = $clusters;
|
||||
$this->metric = $metric ?? function (PointInterface $a, PointInterface $b) {
|
||||
$aCoordinates = $a->getCoordinates();
|
||||
$bCoordinates = $b->getCoordinates();
|
||||
|
||||
$n = \count($aCoordinates);
|
||||
$sum = 0;
|
||||
|
||||
for ($i = 0; $i < $n; ++$i) {
|
||||
$sum = ($aCoordinates[$i] - $bCoordinates[$i]) * ($aCoordinates[$i] - $bCoordinates[$i]);
|
||||
}
|
||||
|
||||
return $sum;
|
||||
};
|
||||
|
||||
$this->generateClusters($points, $clusters);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the cluster for a point
|
||||
*
|
||||
* @param PointInterface $point Point to find the cluster for
|
||||
*
|
||||
* @return PointInterface
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public function cluster(PointInterface $point) : PointInterface
|
||||
{
|
||||
$bestCluster = null;
|
||||
$bestDistance = \PHP_FLOAT_MAX;
|
||||
|
||||
foreach ($this->clusterCenters as $center) {
|
||||
if (($distance = ($this->metric)($center, $point)) < $bestDistance) {
|
||||
$bestCluster = $center;
|
||||
$bestDistance = $distance;
|
||||
}
|
||||
}
|
||||
|
||||
return $bestCluster;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the clusters of the points
|
||||
*
|
||||
* @param PointInterface[] $points Points to cluster
|
||||
* @param int $clusters Amount of clusters
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
private function generateClusters(array $points, int $clusters) : void
|
||||
{
|
||||
$n = \count($points);
|
||||
$clusterCenters = $this->kpp($points, $clusters);
|
||||
$coordinates = \count($points[0]->getCoordinates());
|
||||
|
||||
while (true) {
|
||||
foreach ($clusterCenters as $center) {
|
||||
for ($i = 0; $i < $coordinates; ++$i) {
|
||||
$center->setCoordinate($i, 0);
|
||||
}
|
||||
|
||||
$center->setGroup(0);
|
||||
}
|
||||
|
||||
foreach ($points as $point) {
|
||||
$clusterPoint = $clusterCenters[$point->getGroup()];
|
||||
|
||||
$clusterPoint->setGroup(
|
||||
$clusterPoint->getGroup() + 1
|
||||
);
|
||||
|
||||
for ($i = 0; $i < $coordinates; ++$i) {
|
||||
$clusterPoint->setCoordinate($i, $clusterPoint->getCoordinate($i) + $point->getCoordinate($i));
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($clusterCenters as $center) {
|
||||
for ($i = 0; $i < $coordinates; ++$i) {
|
||||
$center->setCoordinate($i, $center->getCoordinate($i) / $center->getGroup());
|
||||
}
|
||||
}
|
||||
|
||||
$changed = 0;
|
||||
foreach ($points as $point) {
|
||||
$min = $this->nearestClusterCenter($point, $clusterCenters)[0];
|
||||
|
||||
if ($min !== $point->getGroup()) {
|
||||
++$changed;
|
||||
$point->setGroup($min);
|
||||
}
|
||||
}
|
||||
|
||||
if ($changed <= $n * 0.001 || $n * 0.001 < 2) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($clusterCenters as $key => $center) {
|
||||
$center->setGroup($key);
|
||||
$center->setName((string) $key);
|
||||
}
|
||||
|
||||
$this->clusterCenters = $clusterCenters;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the index and distance to the nearest cluster center
|
||||
*
|
||||
* @param PointInterface $point Point to get the cluster for
|
||||
* @param PointInterface[] $clusterCenters All cluster centers
|
||||
*
|
||||
* @return array [index, distance]
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
private function nearestClusterCenter(PointInterface $point, array $clusterCenters) : array
|
||||
{
|
||||
$index = $point->getGroup();
|
||||
$dist = \PHP_FLOAT_MAX;
|
||||
|
||||
foreach ($clusterCenters as $key => $cPoint) {
|
||||
$d = ($this->metric)($cPoint, $point);
|
||||
|
||||
if ($dist > $d) {
|
||||
$dist = $d;
|
||||
$index = $key;
|
||||
}
|
||||
}
|
||||
|
||||
return [$index, $dist];
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializae cluster centers
|
||||
*
|
||||
* @param PointInterface[] $points Points to use for the cluster center initialization
|
||||
* @param int $n Amount of clusters to use
|
||||
*
|
||||
* @return array
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
private function kpp(array $points, int $n) : array
|
||||
{
|
||||
$clusters = [clone $points[\mt_rand(0, \count($points) - 1)]];
|
||||
$d = \array_fill(0, $n, 0.0);
|
||||
|
||||
for ($i = 1; $i < $n; ++$i) {
|
||||
$sum = 0;
|
||||
|
||||
foreach ($points as $key => $point) {
|
||||
$d[$key] = $this->nearestClusterCenter($point, \array_slice($clusters, 0, 5))[1];
|
||||
$sum += $d[$key];
|
||||
}
|
||||
|
||||
$sum *= \mt_rand(0, \mt_getrandmax()) / \mt_getrandmax();
|
||||
|
||||
foreach ($d as $key => $di) {
|
||||
$sum -= $di;
|
||||
|
||||
if ($sum <= 0) {
|
||||
$clusters[$i] = clone $points[$key];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($points as $point) {
|
||||
$point->setGroup($this->nearestClusterCenter($point, $clusters)[0]);
|
||||
}
|
||||
|
||||
return $clusters;
|
||||
}
|
||||
}
|
||||
120
Algorithm/Clustering/Point.php
Normal file
120
Algorithm/Clustering/Point.php
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
<?php
|
||||
/**
|
||||
* Orange Management
|
||||
*
|
||||
* PHP Version 7.4
|
||||
*
|
||||
* @package phpOMS\Algorithm\Clustering
|
||||
* @copyright Dennis Eichhorn
|
||||
* @license OMS License 1.0
|
||||
* @version 1.0.0
|
||||
* @link https://orange-management.org
|
||||
*/
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace phpOMS\Algorithm\Clustering;
|
||||
|
||||
/**
|
||||
* Point for clustering
|
||||
*
|
||||
* @package phpOMS\Algorithm\Clustering
|
||||
* @license OMS License 1.0
|
||||
* @link https://orange-management.org
|
||||
* @since 1.0.0
|
||||
*/
|
||||
class Point implements PointInterface
|
||||
{
|
||||
/**
|
||||
* Coordinates of the point
|
||||
*
|
||||
* @var array
|
||||
* @sicne 1.0.0
|
||||
*/
|
||||
private array $coordinates = [];
|
||||
|
||||
/**
|
||||
* Group or cluster this point belongs to
|
||||
*
|
||||
* @var int
|
||||
* @since 1.0.0
|
||||
*/
|
||||
private int $group = 0;
|
||||
|
||||
/**
|
||||
* Name of the point
|
||||
*
|
||||
* @var string
|
||||
* @since 1.0.0
|
||||
*/
|
||||
private string $name = '';
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param array $coordinates Coordinates of the point
|
||||
* @param string $name Name of the point
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public function __construct(array $coordinates, string $name = '')
|
||||
{
|
||||
$this->coordinates = $coordinates;
|
||||
$this->name = $name;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritdoc}
|
||||
*/
|
||||
public function getCoordinates(): array
|
||||
{
|
||||
return $this->coordinates;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritdoc}
|
||||
*/
|
||||
public function getCoordinate($index)
|
||||
{
|
||||
return $this->coordinates[$index];
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritdoc}
|
||||
*/
|
||||
public function setCoordinate($index, $value)
|
||||
{
|
||||
$this->coordinates[$index] = $value;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritdoc}
|
||||
*/
|
||||
public function getGroup() : int
|
||||
{
|
||||
return $this->group;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritdoc}
|
||||
*/
|
||||
public function setGroup(int $group) : void
|
||||
{
|
||||
$this->group = $group;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritdoc}
|
||||
*/
|
||||
public function setName(string $name) : void
|
||||
{
|
||||
$this->name = $name;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritdoc}
|
||||
*/
|
||||
public function getName() : string
|
||||
{
|
||||
return $this->name;
|
||||
}
|
||||
}
|
||||
99
Algorithm/Clustering/PointInterface.php
Normal file
99
Algorithm/Clustering/PointInterface.php
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
<?php
|
||||
/**
|
||||
* Orange Management
|
||||
*
|
||||
* PHP Version 7.4
|
||||
*
|
||||
* @package phpOMS\Algorithm\Clustering
|
||||
* @copyright Dennis Eichhorn
|
||||
* @license OMS License 1.0
|
||||
* @version 1.0.0
|
||||
* @link https://orange-management.org
|
||||
*/
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace phpOMS\Algorithm\Clustering;
|
||||
|
||||
/**
|
||||
* Point interface.
|
||||
*
|
||||
* @package phpOMS\Algorithm\Clustering;
|
||||
* @license OMS License 1.0
|
||||
* @link https://orange-management.org
|
||||
* @since 1.0.0
|
||||
*/
|
||||
interface PointInterface
|
||||
{
|
||||
/**
|
||||
* Get the point coordinates
|
||||
*
|
||||
* @return array
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public function getCoordinates() : array;
|
||||
|
||||
/**
|
||||
* Get the coordinate of the point
|
||||
*
|
||||
* @param mixed $index Index of the coordinate (e.g. 0 = x);
|
||||
*
|
||||
* @return array
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public function getCoordinate($index);
|
||||
|
||||
/**
|
||||
* Set the coordinate of the point
|
||||
*
|
||||
* @param mixed $index Index of the coordinate (e.g. 0 = x);
|
||||
* @param mixed $value Value of the coordinate
|
||||
*
|
||||
* @return array
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public function setCoordinate($index, $value);
|
||||
|
||||
/**
|
||||
* Get group this point belongs to
|
||||
*
|
||||
* @return int
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public function getGroup() : int;
|
||||
|
||||
/**
|
||||
* Set the group this point belongs to
|
||||
*
|
||||
* @param int $group Group or cluster this point belongs to
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public function setGroup(int $group) : void;
|
||||
|
||||
/**
|
||||
* Set the point name
|
||||
*
|
||||
* @param string $name Name of the point
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public function setName(string $name) : void;
|
||||
|
||||
/**
|
||||
* Get the name of the point
|
||||
*
|
||||
* @return string
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public function getName() : string;
|
||||
}
|
||||
49
tests/Algorithm/Clustering/KmeansTest.php
Normal file
49
tests/Algorithm/Clustering/KmeansTest.php
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
<?php
|
||||
/**
|
||||
* Orange Management
|
||||
*
|
||||
* PHP Version 7.4
|
||||
*
|
||||
* @package tests
|
||||
* @copyright Dennis Eichhorn
|
||||
* @license OMS License 1.0
|
||||
* @version 1.0.0
|
||||
* @link https://orange-management.org
|
||||
*/
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace phpOMS\Algorithm\Clustering;
|
||||
|
||||
use phpOMS\Algorithm\Clustering\Kmeans;
|
||||
|
||||
/**
|
||||
* @testdox phpOMS\Algorithm\Clustering\Kmeans: Test the kmeans clustering implementation
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
class KmeansTest extends \PHPUnit\Framework\TestCase
|
||||
{
|
||||
public function testKmeans() : void
|
||||
{
|
||||
$points = [
|
||||
new Point([1.0, 1.0], '1'),
|
||||
new Point([1.5, 2.0], '2'),
|
||||
new Point([3.0, 4.0], '3'),
|
||||
new Point([5.0, 7.0], '4'),
|
||||
new Point([3.5, 5.0], '5'),
|
||||
new Point([4.5, 5.0], '6'),
|
||||
new Point([3.5, 4.5], '7'),
|
||||
];
|
||||
|
||||
$kmeans = new Kmeans($points, 2);
|
||||
|
||||
self::assertEquals(0, $kmeans->cluster($points[0])->getGroup());
|
||||
self::assertEquals(0, $kmeans->cluster($points[1])->getGroup());
|
||||
|
||||
self::assertEquals(1, $kmeans->cluster($points[2])->getGroup());
|
||||
self::assertEquals(1, $kmeans->cluster($points[3])->getGroup());
|
||||
self::assertEquals(1, $kmeans->cluster($points[4])->getGroup());
|
||||
self::assertEquals(1, $kmeans->cluster($points[5])->getGroup());
|
||||
self::assertEquals(1, $kmeans->cluster($points[6])->getGroup());
|
||||
}
|
||||
}
|
||||
51
tests/Algorithm/Clustering/PointTest.php
Normal file
51
tests/Algorithm/Clustering/PointTest.php
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
<?php
|
||||
/**
|
||||
* Orange Management
|
||||
*
|
||||
* PHP Version 7.4
|
||||
*
|
||||
* @package tests
|
||||
* @copyright Dennis Eichhorn
|
||||
* @license OMS License 1.0
|
||||
* @version 1.0.0
|
||||
* @link https://orange-management.org
|
||||
*/
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace phpOMS\Algorithm\Clustering;
|
||||
|
||||
use phpOMS\Algorithm\Clustering\Point;
|
||||
|
||||
/**
|
||||
* @testdox phpOMS\Algorithm\Clustering\Point: Test the point for the clustering implementation
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
class PointTest extends \PHPUnit\Framework\TestCase
|
||||
{
|
||||
public function testDefault() : void
|
||||
{
|
||||
$point = new Point([3.0, 2.0], 'abc');
|
||||
|
||||
self::assertEquals([3.0, 2.0], $point->getCoordinates());
|
||||
self::assertEquals(3.0, $point->getCoordinate(0));
|
||||
self::assertEquals(2.0, $point->getCoordinate(1));
|
||||
self::assertEquals(0, $point->getGroup());
|
||||
self::assertEquals('abc', $point->getName());
|
||||
}
|
||||
|
||||
public function testSetGet() : void
|
||||
{
|
||||
$point = new Point([3.0, 2.0], 'abc');
|
||||
|
||||
$point->setCoordinate(0, 4.0);
|
||||
$point->setCoordinate(1, 1.0);
|
||||
|
||||
self::assertEquals([4.0, 1.0], $point->getCoordinates());
|
||||
self::assertEquals(4.0, $point->getCoordinate(0));
|
||||
self::assertEquals(1.0, $point->getCoordinate(1));
|
||||
|
||||
$point->setGroup(2);
|
||||
self::assertEquals(2, $point->getGroup());
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user