From ad2634e32ac80e32d69b5fc0451363cd384215a0 Mon Sep 17 00:00:00 2001 From: Dennis Eichhorn Date: Sun, 14 Jun 2020 00:08:16 +0200 Subject: [PATCH] implemented pagerank algorithm --- Business/Marketing/PageRank.php | 133 ++++++++++++++++++++++ tests/Business/Marketing/PageRankTest.php | 77 +++++++++++++ 2 files changed, 210 insertions(+) create mode 100644 Business/Marketing/PageRank.php create mode 100644 tests/Business/Marketing/PageRankTest.php diff --git a/Business/Marketing/PageRank.php b/Business/Marketing/PageRank.php new file mode 100644 index 000000000..cf629918b --- /dev/null +++ b/Business/Marketing/PageRank.php @@ -0,0 +1,133 @@ + + * @since 1.0.0 + */ + private array $pageRanks = []; + + /** + * Relation array + * + * Array of elements where every element has an array of incoming links/relations + * + * @var array[] + * @since 1.0.0 + */ + private array $relations = []; + + /** + * Amount of outgoing links from an element + * + * @var int[] + * @since 1.0.0 + */ + private array $outgoing = []; + + /** + * Constructor. + * + * @param array[] $relations Relations between elements (keys => link from, array => link to) + * @param bool $isUnique Only consider unique relations + * @param float $damping Damping value + * + * @since 1.0.0 + */ + public function __construct(array $relations, bool $isUnique = true, float $damping = 0.85) + { + $this->damping = $damping; + + foreach ($relations as $key => $relation) { + $this->outgoing[$key] = \count($relation); + + if (!isset($this->relations[$key])) { + $this->relations[$key] = []; + } + + foreach ($relation as $linkTo) { + if (!isset($this->relations[$linkTo])) { + $this->relations[$linkTo] = []; + } + + if (!isset($this->outgoing[$linkTo])) { + $this->outgoing[$linkTo] = 0; + } + + if (!$isUnique || !\in_array($key, $this->relations[$linkTo])) { + $this->relations[$linkTo][] = $key; + } + } + } + } + + /** + * Calcualte the rank based on a start rank for the different elements + * + * A different start rank for different elements might make sense if the elements are not uniform from the very beginning + * + * @param int $iterations Algorithm iterations + * @param null|array $startRank Start rank for an element + * + * @return array + * + * @since 1.0.0 + */ + public function calculateRanks(int $iterations = 20, array $startRank = null) : array + { + if ($startRank !== null) { + $this->pageRanks = $startRank; + } else { + foreach ($this->relations as $key => $relation) { + $this->pageRanks[$key] = 0.0; + } + } + + for ($i = 0; $i < $iterations; ++$i) { + foreach ($this->relations as $key => $relation) { + $PR = 0.0; + + foreach ($relation as $linkFrom) { + $PR += $this->pageRanks[$linkFrom] / $this->outgoing[$linkFrom]; + } + + $this->pageRanks[$key] = 1 - $this->damping + $this->damping * $PR; + } + } + + return $this->pageRanks; + } +} diff --git a/tests/Business/Marketing/PageRankTest.php b/tests/Business/Marketing/PageRankTest.php new file mode 100644 index 000000000..36da563d3 --- /dev/null +++ b/tests/Business/Marketing/PageRankTest.php @@ -0,0 +1,77 @@ + ['B', 'C'], + 'B' => ['C'], + 'C' => ['A'], + 'D' => ['C'], + ]; + + $ranking = new PageRank($relations, true); + + self::assertEqualsWithDelta( + [ + 'A' => 1.49, + 'B' => 0.78, + 'C' => 1.58, + 'D' => 0.15, + ], + $ranking->calculateRanks(20, null), + 0.01 + ); + } + + /** + * @testdox Test the correctness of the page rank algorithm with custom damping and starting values + * @group framework + */ + public function testPageRankCustomDampingAndStart() : void + { + $relations = [ + 'A' => ['B', 'C'], + 'B' => ['C'], + 'C' => ['A'], + ]; + + $ranking = new PageRank($relations, true, 0.5); + + self::assertEqualsWithDelta( + [ + 'A' => 1.0769, + 'B' => 0.769, + 'C' => 1.1538, + ], + $ranking->calculateRanks(20, ['A' => 1.0, 'B' => 1.0, 'C' => 1.0]), + 0.01 + ); + } +}