mirror of
https://github.com/Karaka-Management/phpOMS.git
synced 2026-01-11 17:58:41 +00:00
fixes #70
This commit is contained in:
parent
d22ff59b82
commit
d14bb89583
168
Utils/StringCompare.php
Normal file
168
Utils/StringCompare.php
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
<?php
|
||||
/**
|
||||
* Orange Management
|
||||
*
|
||||
* PHP Version 7.1
|
||||
*
|
||||
* @category TBD
|
||||
* @package TBD
|
||||
* @copyright Dennis Eichhorn
|
||||
* @license OMS License 1.0
|
||||
* @version 1.0.0
|
||||
* @link http://orange-management.com
|
||||
*/
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace phpOMS\Utils;
|
||||
|
||||
/**
|
||||
* String comparison class.
|
||||
*
|
||||
* This class helps to compare two strings
|
||||
*
|
||||
* @category Framework
|
||||
* @package phpOMS\Utils
|
||||
* @license OMS License 1.0
|
||||
* @link http://orange-management.com
|
||||
* @since 1.0.0
|
||||
*/
|
||||
class StringCompare
|
||||
{
|
||||
/**
|
||||
* Dictionary.
|
||||
*
|
||||
* @var array
|
||||
* @since 1.0.0
|
||||
*/
|
||||
private $dictionary = [];
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param array $dictionary Dictionary
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public function __construct(array $dictionary)
|
||||
{
|
||||
$this->dictionary = $dictionary;
|
||||
}
|
||||
|
||||
/**
|
||||
* Match word against dictionary.
|
||||
*
|
||||
* @param string $match Word to match against dictionary
|
||||
*
|
||||
* @return string Best match
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public function matchDictionary(string $match) : string
|
||||
{
|
||||
$bestScore = PHP_INT_MAX;
|
||||
$bestMatch = '';
|
||||
|
||||
foreach($dictionary as $word) {
|
||||
$score = self::fuzzyMatch($word, $match);
|
||||
|
||||
if($score < $bestScore) {
|
||||
$bestMatch = $word;
|
||||
}
|
||||
}
|
||||
|
||||
return $bestMatch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate word match score.
|
||||
*
|
||||
* @param string $s1 Word 1
|
||||
* @param string $s2 Word 2
|
||||
*
|
||||
* @return int
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public static function valueWords(string $s1, string $s2) : int
|
||||
{
|
||||
$words1 = preg_split('/[ _-]/', $s1);
|
||||
$words2 = preg_split('/[ _-]/', $s2);
|
||||
$total = 0;
|
||||
|
||||
foreach($words1 as $word1) {
|
||||
$best = strlen($s2);
|
||||
|
||||
foreach($words2 as $word2) {
|
||||
$wordDist = levenshtein($word1, $word2);
|
||||
|
||||
if($wordDist < $best) {
|
||||
$best = $wordDist;
|
||||
}
|
||||
|
||||
if($wordDist === 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$total += $total + $best;
|
||||
}
|
||||
|
||||
return $total;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate phrase match score.
|
||||
*
|
||||
* @param string $s1 Word 1
|
||||
* @param string $s2 Word 2
|
||||
*
|
||||
* @return int
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public static function valuePhrase(string $s1, string $s2) : int
|
||||
{
|
||||
return levenshtein($s1, $s2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate word length score.
|
||||
*
|
||||
* @param string $s1 Word 1
|
||||
* @param string $s2 Word 2
|
||||
*
|
||||
* @return int
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public static function valueLength(string $s1, string $s2) : int
|
||||
{
|
||||
return abs(strlen($s1) - strlen($s2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate fuzzy match score.
|
||||
*
|
||||
* @param string $s1 Word 1
|
||||
* @param string $s2 Word 2
|
||||
* @param float $prhaseWeight Weighting for phrase score
|
||||
* @param float $wordWeight Weighting for word score
|
||||
* @param float $minWeight Min weight
|
||||
* @param float $maxWeight Max weight
|
||||
* @param float $lengthWeight Weighting for word length
|
||||
*
|
||||
* @return float
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public static function fuzzyMatch(string $s1, string $s2, float $phraseWeight = 0.5, float $wordWeight = 1, float $minWeight = 10, float $maxWeight = 1, float $lengthWeight = -0.3) : float
|
||||
{
|
||||
$phraseValue = valuePhrase($s1, $s2);
|
||||
$wordValue = valueWords($s1, $s2);
|
||||
$lengthValue = valueLength($s1, $s2);
|
||||
|
||||
return min($phraseValue * $phraseWeight, $wordValue * $wordWeight) * $minWeight
|
||||
+ max($phraseValue * $phraseWeight, $wordValue * $wordWeight) * $maxWeight
|
||||
+ $lengthValue * $lengthWeight;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user