mirror of
https://github.com/Karaka-Management/phpOMS.git
synced 2026-01-19 21:18:39 +00:00
Add search and comparison functionality
This commit is contained in:
parent
33db2fd2da
commit
1cb7a1ed77
186
System/Search/StringSearch.php
Normal file
186
System/Search/StringSearch.php
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
<?php
|
||||
/**
|
||||
* Orange Management
|
||||
*
|
||||
* PHP Version 7.4
|
||||
*
|
||||
* @package phpOMS\System\Search
|
||||
* @copyright Dennis Eichhorn
|
||||
* @license OMS License 1.0
|
||||
* @version 1.0.0
|
||||
* @link https://orange-management.org
|
||||
*/
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace phpOMS\System\Search;
|
||||
|
||||
/**
|
||||
* Basic string search algorithms.
|
||||
*
|
||||
* @package phpOMS\System\Search
|
||||
* @license OMS License 1.0
|
||||
* @link https://orange-management.org
|
||||
* @since 1.0.0
|
||||
*/
|
||||
abstract class StringSearch
|
||||
{
|
||||
/**
|
||||
* @codeCoverageIgnore
|
||||
*/
|
||||
private function __construct()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Find pattern in string
|
||||
*
|
||||
* @param string $pattern Pattern
|
||||
* @param string $text Text to search in
|
||||
*
|
||||
* @return int Match position
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public static function knuthMorrisPrattSearch(string $pattern, string $text) : int
|
||||
{
|
||||
$patternSize = \strlen($pattern);
|
||||
$textSize = \strlen($text);
|
||||
|
||||
$shift = self::knuthMorrisPrattShift($pattern);
|
||||
|
||||
$i = 1;
|
||||
$j = 0;
|
||||
while ($i + $patternSize <= $textSize) {
|
||||
while ($text[$i + $j] === $pattern[$j]) {
|
||||
++$j;
|
||||
if ($j >= $patternSize) {
|
||||
return $i;
|
||||
}
|
||||
}
|
||||
|
||||
if ($j > 0) {
|
||||
$i += $shift[$j - 1];
|
||||
$j = \max($j - $shift[$j - 1], 0);
|
||||
} else {
|
||||
++$i;
|
||||
$j = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create shift array
|
||||
*
|
||||
* @param string $pattern Pattern
|
||||
*
|
||||
* @return int[]
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
private static function knuthMorrisPrattShift(string $pattern) : array
|
||||
{
|
||||
$patternSize = \strlen($pattern);
|
||||
$shift = [];
|
||||
$shift[] = 1;
|
||||
|
||||
$i = 1;
|
||||
$j = 0;
|
||||
while ($i + $j < $patternSize) {
|
||||
if ($pattern[$i + $j] === $pattern[$j]) {
|
||||
$shift[$i + $j] = $i;
|
||||
++$j;
|
||||
} else {
|
||||
if ($j === 0) {
|
||||
$shift[$i] = $i + 1;
|
||||
}
|
||||
|
||||
if ($j > 0) {
|
||||
$i += $shift[$j - 1];
|
||||
$j = \max($j - $shift[$j - 1], 0);
|
||||
} else {
|
||||
++$i;
|
||||
$j = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $shift;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find pattern in string
|
||||
*
|
||||
* @param string $pattern Pattern
|
||||
* @param string $text Text to search in
|
||||
*
|
||||
* @return int Match position
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public static function boyerMooreHorspoolSimpleSearch(string $pattern, string $text) : int
|
||||
{
|
||||
$patternSize = \strlen($pattern);
|
||||
$textSize = \strlen($text);
|
||||
|
||||
$i = 0;
|
||||
$j = 0;
|
||||
while ($i + $patternSize <= $textSize) {
|
||||
$j = $patternSize - 1;
|
||||
|
||||
while ($text[$i + $j] === $pattern[$j]) {
|
||||
--$j;
|
||||
if ($j < 0) {
|
||||
return $i;
|
||||
}
|
||||
}
|
||||
|
||||
++$i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find pattern in string
|
||||
*
|
||||
* @param string $pattern Pattern
|
||||
* @param string $text Text to search in
|
||||
*
|
||||
* @return int Match position
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public static function boyerMooreHorspoolSearch(string $pattern, string $text) : int
|
||||
{
|
||||
$patternSize = \strlen($pattern);
|
||||
$textSize = \strlen($text);
|
||||
|
||||
$shift = [];
|
||||
for ($k = 0; $k < 256; ++$k) {
|
||||
$shift[$k] = $patternSize;
|
||||
}
|
||||
|
||||
for ($k = 0; $k < $patternSize - 1; ++$k) {
|
||||
$shift[\ord($pattern[$k])] = $patternSize - 1 - $k;
|
||||
}
|
||||
|
||||
$i = 0;
|
||||
$j = 0;
|
||||
while ($i + $patternSize <= $textSize) {
|
||||
$j = $patternSize - 1;
|
||||
|
||||
while ($text[$i + $j] === $pattern[$j]) {
|
||||
--$j;
|
||||
if ($j < 0) {
|
||||
return $i;
|
||||
}
|
||||
}
|
||||
|
||||
$i += $shift[\ord($text[$i + $patternSize - 1])];
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
|
@ -26,10 +26,6 @@ use phpOMS\System\CharsetType;
|
|||
* @link https://orange-management.org
|
||||
* @since 1.0.0
|
||||
*
|
||||
* @todo Orange-Management/phpOMS#119
|
||||
* Create jaro winkler distance
|
||||
* https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
|
||||
*
|
||||
* @SuppressWarnings(PHPMD.CamelCaseMethodName)
|
||||
*/
|
||||
final class MbStringUtils
|
||||
|
|
|
|||
|
|
@ -86,6 +86,83 @@ final class StringCompare
|
|||
return $bestMatch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Jaro string distance
|
||||
*
|
||||
* @param string $s1 String1
|
||||
* @param string $s2 String2
|
||||
*
|
||||
* @return float
|
||||
*
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public static function jaro(string $s1, string $s2) : float
|
||||
{
|
||||
$s1Size = \strlen($s1);
|
||||
$s2Size = \strlen($s2);
|
||||
|
||||
if ($s1Size === 0) {
|
||||
return $s2Size === 0 ? 1.0 : 0.0;
|
||||
}
|
||||
|
||||
$mDistance = (int) (\max($s1Size, $s2Size) / 2 - 1);
|
||||
|
||||
$matches = 0;
|
||||
$transpositions = 0.0;
|
||||
|
||||
$s1Matches = [];
|
||||
$s2Matches = [];
|
||||
|
||||
for ($i = 0; $i < $s1Size; ++$i) {
|
||||
$start = \max(0, $i - $mDistance);
|
||||
$end = \min($i + $mDistance + 1, $s2Size);
|
||||
|
||||
for ($j = $start; $j < $end; ++$j) {
|
||||
if (isset($s2Matches[$j])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($s1[$i] !== $s2[$j]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$s1Matches[$i] = true;
|
||||
$s2Matches[$j] = true;
|
||||
|
||||
++$matches;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($matches === 0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
$j = 0;
|
||||
for ($i = 0; $i < $s1Size; ++$i) {
|
||||
if (!isset($s1Matches[$i])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
while (!isset($s2Matches[$j])) {
|
||||
++$j;
|
||||
}
|
||||
|
||||
if ($s1[$i] !== $s2[$j]) {
|
||||
++$transpositions;
|
||||
}
|
||||
|
||||
++$j;
|
||||
}
|
||||
|
||||
$transpositions /= 2.0;
|
||||
|
||||
return ($matches / $s1Size
|
||||
+ $matches / $s2Size
|
||||
+ ($matches - $transpositions) / $matches)
|
||||
/ 3.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate word match score.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -26,10 +26,6 @@ use phpOMS\Contract\RenderableInterface;
|
|||
* @link https://orange-management.org
|
||||
* @since 1.0.0
|
||||
*
|
||||
* @todo Orange-Management/phpOMS#119
|
||||
* Create jaro winkler distance
|
||||
* https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
|
||||
*
|
||||
* @SuppressWarnings(PHPMD.CamelCaseMethodName)
|
||||
*/
|
||||
final class StringUtils
|
||||
|
|
|
|||
54
tests/System/CharsetTypeTest.php
Normal file
54
tests/System/CharsetTypeTest.php
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
<?php
|
||||
/**
|
||||
* Orange Management
|
||||
*
|
||||
* PHP Version 7.4
|
||||
*
|
||||
* @package tests
|
||||
* @copyright Dennis Eichhorn
|
||||
* @license OMS License 1.0
|
||||
* @version 1.0.0
|
||||
* @link https://orange-management.org
|
||||
*/
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace phpOMS\tests\System;
|
||||
|
||||
require_once __DIR__ . '/../Autoloader.php';
|
||||
|
||||
use phpOMS\System\CharsetType;
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
class CharsetTypeTest extends \PHPUnit\Framework\TestCase
|
||||
{
|
||||
/**
|
||||
* @group framework
|
||||
* @coversNothing
|
||||
*/
|
||||
public function testEnumCount() : void
|
||||
{
|
||||
self::assertCount(3, CharsetType::getConstants());
|
||||
}
|
||||
|
||||
/**
|
||||
* @group framework
|
||||
* @coversNothing
|
||||
*/
|
||||
public function testUnique() : void
|
||||
{
|
||||
self::assertEquals(CharsetType::getConstants(), \array_unique(CharsetType::getConstants()));
|
||||
}
|
||||
|
||||
/**
|
||||
* @group framework
|
||||
* @coversNothing
|
||||
*/
|
||||
public function testEnums() : void
|
||||
{
|
||||
self::assertEquals('us-ascii', CharsetType::ASCII);
|
||||
self::assertEquals('iso-8859-1', CharsetType::ISO_8859_1);
|
||||
self::assertEquals('utf-8', CharsetType::UTF_8);
|
||||
}
|
||||
}
|
||||
55
tests/System/Search/StringSearchTest.php
Normal file
55
tests/System/Search/StringSearchTest.php
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
<?php
|
||||
/**
|
||||
* Orange Management
|
||||
*
|
||||
* PHP Version 7.4
|
||||
*
|
||||
* @package tests
|
||||
* @copyright Dennis Eichhorn
|
||||
* @license OMS License 1.0
|
||||
* @version 1.0.0
|
||||
* @link https://orange-management.org
|
||||
*/
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace phpOMS\tests\System\Search;
|
||||
|
||||
use phpOMS\System\Search\StringSearch;
|
||||
|
||||
/**
|
||||
* @testdox phpOMS\tests\System\Search\StringSearchTest: Search utilities
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
class StringSearchTest extends \PHPUnit\Framework\TestCase
|
||||
{
|
||||
public function testKnutMorrisPrattSearch() : void
|
||||
{
|
||||
self::assertEquals(15, StringSearch::knuthMorrisPrattSearch('ABCDABD', 'ABC ABCDAB ABCDABCDABDE'));
|
||||
}
|
||||
|
||||
public function testInvalidKnutMorrisPrattSearch() : void
|
||||
{
|
||||
self::assertEquals(-1, StringSearch::knuthMorrisPrattSearch('ABCDABDZ', 'ABC ABCDAB ABCDABCDABDE'));
|
||||
}
|
||||
|
||||
public function testBoyerMooreHorspoolSimpleSearch() : void
|
||||
{
|
||||
self::assertEquals(15, StringSearch::boyerMooreHorspoolSimpleSearch('ABCDABD', 'ABC ABCDAB ABCDABCDABDE'));
|
||||
}
|
||||
|
||||
public function testInvalidBoyerMooreHorspoolSimpleSearch() : void
|
||||
{
|
||||
self::assertEquals(-1, StringSearch::boyerMooreHorspoolSimpleSearch('ABCDABDZ', 'ABC ABCDAB ABCDABCDABDE'));
|
||||
}
|
||||
|
||||
public function testBoyerMooreHorspoolSearch() : void
|
||||
{
|
||||
self::assertEquals(15, StringSearch::boyerMooreHorspoolSearch('ABCDABD', 'ABC ABCDAB ABCDABCDABDE'));
|
||||
}
|
||||
|
||||
public function testInvalidBoyerMooreHorspoolSearch() : void
|
||||
{
|
||||
self::assertEquals(-1, StringSearch::boyerMooreHorspoolSearch('ABCDABDZ', 'ABC ABCDAB ABCDABCDABDE'));
|
||||
}
|
||||
}
|
||||
|
|
@ -94,4 +94,18 @@ class StringCompareTest extends \PHPUnit\Framework\TestCase
|
|||
// a is compared to is which has a distance of 2
|
||||
self::assertEquals(2, StringCompare::valueWords('This is a test', 'This is not test'));
|
||||
}
|
||||
|
||||
public function testJaro() : void
|
||||
{
|
||||
self::assertEqualsWithDelta(0.944444, StringCompare::jaro('MARTHA', 'MARHTA'), 0.01);
|
||||
self::assertEqualsWithDelta(0.766667, StringCompare::jaro('DIXON', 'DICKSONX'), 0.01);
|
||||
self::assertEqualsWithDelta(0.896296, StringCompare::jaro('JELLYFISH', 'SMELLYFISH'), 0.01);
|
||||
}
|
||||
|
||||
public function testJaroEmpty() : void
|
||||
{
|
||||
self::assertEquals(1.0, StringCompare::jaro('', ''));
|
||||
self::assertEquals(0.0, StringCompare::jaro('', 'test'));
|
||||
self::assertEquals(0.0, StringCompare::jaro('test', ''));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user