* @since 1.0.0 */ private array $rankings = []; /** * Constructor. * * @param array $rankings Array of item ratings by users (or reverse to find users) * * @since 1.0.0 */ public function __construct(array $rankings) { $this->rankings = $this->normalizeRanking($rankings); } /** * Normalize all ratings. * * This is necessary because some users my give lower or higher ratings on average (bias). * * @param array $rankings Item ratings/rankings * * @return array * * @since 1.0.0 */ private function normalizeRanking(array $rankings) : array { foreach ($rankings as $idx => $items) { $avg = \array_sum($items) / \count($items); foreach ($items as $idx2 => $_) { $rankings[$idx][$idx2] -= $avg; } } return $rankings; } /** * Euclidean distance between users * * @param array $ranking Rating to find the distance for * @param array $rankings All ratings to find the distance to * * @return float[] * * @since 1.0.0 */ public function euclideanDistance(array $ranking, array $rankings) : array { $distances = []; foreach ($rankings as $idx => $r) { $distances[$idx] = \abs(MetricsND::euclidean($ranking, $r)); } return $distances; } /** * Cosine distance between users * * @param array $ranking Rating to find the distance for * @param array $rankings All ratings to find the distance to * * @return float[] * * @since 1.0.0 */ public function cosineDistance(array $ranking, array $rankings) : array { $distances = []; foreach ($rankings as $idx => $r) { $distances[$idx] = \abs(MetricsND::cosine($ranking, $r)); } return $distances; } /** * Assign a item rank/rating based on the distance to other items * * @param string $itemId Id of the item to rank * @param array $distances Distance to other users * @param array $users All user ratings * @param int $size Only consider the top n distances (best matches with other users) * * @return float Estimated item rank/rating based on similarity to other users * * @since 1.0.0 */ private function weightedItemRank(string $itemId, array $distances, array $users, int $size) : float { $rank = 0.0; $count = 0; foreach ($distances as $uId => $_) { if ($count >= $size) { break; } if (!isset($users[$itemId])) { continue; } ++$count; $rank += $users[$uId][$itemId]; } return $count === 0 ? 0.0 : $rank / $count; } /** * Find potential items/users which are a good match for a user/item. * * The algorithm uses the ratings of a a user and tries to find other users who have similar rating behavior and then searches for high rated items that the user doesn't have yet. * * This can be used to find items for a specific user (aka might be interested in) or to find users who might be interested in this item * * option 1 - find items * ranking[itemId] = itemRank (how much does specific user like item) * rankings[userId][itemId] = itemRank * * option 2 - find user * ranking[userId] = itemRank (how much does user like specific item) * rankings[itemId][userId] = itemRank * option 1 searches for items, option 2 searches for users * * @param array $ranking Array of item ratings (e.g. products, movies, ...) * * @return array * * @since 1.0.0 */ public function bestMatch(array $ranking, int $size = 10) : array { $ranking = $this->normalizeRanking([$ranking]); $ranking = $ranking[0]; $euclidean = $this->euclideanDistance($ranking, $this->rankings); $cosine = $this->cosineDistance($ranking, $this->rankings); \asort($euclidean); \asort($cosine); $size = \min($size, \count($this->rankings)); $matches = []; $distancePointer = \array_keys($euclidean); $anglePointer = \array_keys($cosine); // Inspect items of the top n comparable users for ($i = 1; $i <= $size; ++$i) { $index = (int) ($i / 2) - 1; $uId = $i % 2 === 1 ? $distancePointer[$index] : $anglePointer[$index]; $distances = $i % 2 === 1 ? $euclidean : $cosine; foreach ($this->rankings[$uId] as $iId => $_) { // Item is not already in dataset and not in historic dataset (we are only interested in new) if (isset($matches[$iId]) || isset($ranking[$iId])) { continue; } // Calculate the expected rating the user would give based on what the best comparable users did $matches[$iId] = $this->weightedItemRank($iId, $distances, $this->rankings, $size); } } \asort($matches); return \array_reverse($matches, true); } }