From d14bb89583e79bb60ac23b9964e66a216d4a4c72 Mon Sep 17 00:00:00 2001 From: Dennis Eichhorn Date: Sat, 2 Sep 2017 20:41:09 +0200 Subject: [PATCH] fixes #70 --- Utils/StringCompare.php | 168 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 Utils/StringCompare.php diff --git a/Utils/StringCompare.php b/Utils/StringCompare.php new file mode 100644 index 000000000..c78fa86db --- /dev/null +++ b/Utils/StringCompare.php @@ -0,0 +1,168 @@ +dictionary = $dictionary; + } + + /** + * Match word against dictionary. + * + * @param string $match Word to match against dictionary + * + * @return string Best match + * + * @since 1.0.0 + */ + public function matchDictionary(string $match) : string + { + $bestScore = PHP_INT_MAX; + $bestMatch = ''; + + foreach($dictionary as $word) { + $score = self::fuzzyMatch($word, $match); + + if($score < $bestScore) { + $bestMatch = $word; + } + } + + return $bestMatch; + } + + /** + * Calculate word match score. + * + * @param string $s1 Word 1 + * @param string $s2 Word 2 + * + * @return int + * + * @since 1.0.0 + */ + public static function valueWords(string $s1, string $s2) : int + { + $words1 = preg_split('/[ _-]/', $s1); + $words2 = preg_split('/[ _-]/', $s2); + $total = 0; + + foreach($words1 as $word1) { + $best = strlen($s2); + + foreach($words2 as $word2) { + $wordDist = levenshtein($word1, $word2); + + if($wordDist < $best) { + $best = $wordDist; + } + + if($wordDist === 0) { + break; + } + } + + $total += $total + $best; + } + + return $total; + } + + /** + * Calculate phrase match score. + * + * @param string $s1 Word 1 + * @param string $s2 Word 2 + * + * @return int + * + * @since 1.0.0 + */ + public static function valuePhrase(string $s1, string $s2) : int + { + return levenshtein($s1, $s2); + } + + /** + * Calculate word length score. + * + * @param string $s1 Word 1 + * @param string $s2 Word 2 + * + * @return int + * + * @since 1.0.0 + */ + public static function valueLength(string $s1, string $s2) : int + { + return abs(strlen($s1) - strlen($s2)); + } + + /** + * Calculate fuzzy match score. + * + * @param string $s1 Word 1 + * @param string $s2 Word 2 + * @param float $prhaseWeight Weighting for phrase score + * @param float $wordWeight Weighting for word score + * @param float $minWeight Min weight + * @param float $maxWeight Max weight + * @param float $lengthWeight Weighting for word length + * + * @return float + * + * @since 1.0.0 + */ + public static function fuzzyMatch(string $s1, string $s2, float $phraseWeight = 0.5, float $wordWeight = 1, float $minWeight = 10, float $maxWeight = 1, float $lengthWeight = -0.3) : float + { + $phraseValue = valuePhrase($s1, $s2); + $wordValue = valueWords($s1, $s2); + $lengthValue = valueLength($s1, $s2); + + return min($phraseValue * $phraseWeight, $wordValue * $wordWeight) * $minWeight + + max($phraseValue * $phraseWeight, $wordValue * $wordWeight) * $maxWeight + + $lengthValue * $lengthWeight; + } +} \ No newline at end of file