Add search and comparison functionality

This commit is contained in:
Dennis Eichhorn 2020-10-07 20:57:08 +02:00
parent 33db2fd2da
commit 1cb7a1ed77
7 changed files with 386 additions and 8 deletions

View File

@ -0,0 +1,186 @@
<?php
/**
* Orange Management
*
* PHP Version 7.4
*
* @package phpOMS\System\Search
* @copyright Dennis Eichhorn
* @license OMS License 1.0
* @version 1.0.0
* @link https://orange-management.org
*/
declare(strict_types=1);
namespace phpOMS\System\Search;
/**
* Basic string search algorithms.
*
* @package phpOMS\System\Search
* @license OMS License 1.0
* @link https://orange-management.org
* @since 1.0.0
*/
abstract class StringSearch
{
/**
* @codeCoverageIgnore
*/
private function __construct()
{
}
/**
* Find pattern in string
*
* @param string $pattern Pattern
* @param string $text Text to search in
*
* @return int Match position
*
* @since 1.0.0
*/
public static function knuthMorrisPrattSearch(string $pattern, string $text) : int
{
$patternSize = \strlen($pattern);
$textSize = \strlen($text);
$shift = self::knuthMorrisPrattShift($pattern);
$i = 1;
$j = 0;
while ($i + $patternSize <= $textSize) {
while ($text[$i + $j] === $pattern[$j]) {
++$j;
if ($j >= $patternSize) {
return $i;
}
}
if ($j > 0) {
$i += $shift[$j - 1];
$j = \max($j - $shift[$j - 1], 0);
} else {
++$i;
$j = 0;
}
}
return -1;
}
/**
* Create shift array
*
* @param string $pattern Pattern
*
* @return int[]
*
* @since 1.0.0
*/
private static function knuthMorrisPrattShift(string $pattern) : array
{
$patternSize = \strlen($pattern);
$shift = [];
$shift[] = 1;
$i = 1;
$j = 0;
while ($i + $j < $patternSize) {
if ($pattern[$i + $j] === $pattern[$j]) {
$shift[$i + $j] = $i;
++$j;
} else {
if ($j === 0) {
$shift[$i] = $i + 1;
}
if ($j > 0) {
$i += $shift[$j - 1];
$j = \max($j - $shift[$j - 1], 0);
} else {
++$i;
$j = 0;
}
}
}
return $shift;
}
/**
* Find pattern in string
*
* @param string $pattern Pattern
* @param string $text Text to search in
*
* @return int Match position
*
* @since 1.0.0
*/
public static function boyerMooreHorspoolSimpleSearch(string $pattern, string $text) : int
{
$patternSize = \strlen($pattern);
$textSize = \strlen($text);
$i = 0;
$j = 0;
while ($i + $patternSize <= $textSize) {
$j = $patternSize - 1;
while ($text[$i + $j] === $pattern[$j]) {
--$j;
if ($j < 0) {
return $i;
}
}
++$i;
}
return -1;
}
/**
* Find pattern in string
*
* @param string $pattern Pattern
* @param string $text Text to search in
*
* @return int Match position
*
* @since 1.0.0
*/
public static function boyerMooreHorspoolSearch(string $pattern, string $text) : int
{
$patternSize = \strlen($pattern);
$textSize = \strlen($text);
$shift = [];
for ($k = 0; $k < 256; ++$k) {
$shift[$k] = $patternSize;
}
for ($k = 0; $k < $patternSize - 1; ++$k) {
$shift[\ord($pattern[$k])] = $patternSize - 1 - $k;
}
$i = 0;
$j = 0;
while ($i + $patternSize <= $textSize) {
$j = $patternSize - 1;
while ($text[$i + $j] === $pattern[$j]) {
--$j;
if ($j < 0) {
return $i;
}
}
$i += $shift[\ord($text[$i + $patternSize - 1])];
}
return -1;
}
}

View File

@ -26,10 +26,6 @@ use phpOMS\System\CharsetType;
* @link https://orange-management.org
* @since 1.0.0
*
* @todo Orange-Management/phpOMS#119
* Create jaro winkler distance
* https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
*
* @SuppressWarnings(PHPMD.CamelCaseMethodName)
*/
final class MbStringUtils

View File

@ -86,6 +86,83 @@ final class StringCompare
return $bestMatch;
}
/**
* Jaro string distance
*
* @param string $s1 String1
* @param string $s2 String2
*
* @return float
*
* @since 1.0.0
*/
public static function jaro(string $s1, string $s2) : float
{
$s1Size = \strlen($s1);
$s2Size = \strlen($s2);
if ($s1Size === 0) {
return $s2Size === 0 ? 1.0 : 0.0;
}
$mDistance = (int) (\max($s1Size, $s2Size) / 2 - 1);
$matches = 0;
$transpositions = 0.0;
$s1Matches = [];
$s2Matches = [];
for ($i = 0; $i < $s1Size; ++$i) {
$start = \max(0, $i - $mDistance);
$end = \min($i + $mDistance + 1, $s2Size);
for ($j = $start; $j < $end; ++$j) {
if (isset($s2Matches[$j])) {
continue;
}
if ($s1[$i] !== $s2[$j]) {
continue;
}
$s1Matches[$i] = true;
$s2Matches[$j] = true;
++$matches;
break;
}
}
if ($matches === 0) {
return 0.0;
}
$j = 0;
for ($i = 0; $i < $s1Size; ++$i) {
if (!isset($s1Matches[$i])) {
continue;
}
while (!isset($s2Matches[$j])) {
++$j;
}
if ($s1[$i] !== $s2[$j]) {
++$transpositions;
}
++$j;
}
$transpositions /= 2.0;
return ($matches / $s1Size
+ $matches / $s2Size
+ ($matches - $transpositions) / $matches)
/ 3.0;
}
/**
* Calculate word match score.
*

View File

@ -26,10 +26,6 @@ use phpOMS\Contract\RenderableInterface;
* @link https://orange-management.org
* @since 1.0.0
*
* @todo Orange-Management/phpOMS#119
* Create jaro winkler distance
* https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
*
* @SuppressWarnings(PHPMD.CamelCaseMethodName)
*/
final class StringUtils

View File

@ -0,0 +1,54 @@
<?php
/**
* Orange Management
*
* PHP Version 7.4
*
* @package tests
* @copyright Dennis Eichhorn
* @license OMS License 1.0
* @version 1.0.0
* @link https://orange-management.org
*/
declare(strict_types=1);
namespace phpOMS\tests\System;
require_once __DIR__ . '/../Autoloader.php';
use phpOMS\System\CharsetType;
/**
* @internal
*/
class CharsetTypeTest extends \PHPUnit\Framework\TestCase
{
/**
* @group framework
* @coversNothing
*/
public function testEnumCount() : void
{
self::assertCount(3, CharsetType::getConstants());
}
/**
* @group framework
* @coversNothing
*/
public function testUnique() : void
{
self::assertEquals(CharsetType::getConstants(), \array_unique(CharsetType::getConstants()));
}
/**
* @group framework
* @coversNothing
*/
public function testEnums() : void
{
self::assertEquals('us-ascii', CharsetType::ASCII);
self::assertEquals('iso-8859-1', CharsetType::ISO_8859_1);
self::assertEquals('utf-8', CharsetType::UTF_8);
}
}

View File

@ -0,0 +1,55 @@
<?php
/**
* Orange Management
*
* PHP Version 7.4
*
* @package tests
* @copyright Dennis Eichhorn
* @license OMS License 1.0
* @version 1.0.0
* @link https://orange-management.org
*/
declare(strict_types=1);
namespace phpOMS\tests\System\Search;
use phpOMS\System\Search\StringSearch;
/**
* @testdox phpOMS\tests\System\Search\StringSearchTest: Search utilities
*
* @internal
*/
class StringSearchTest extends \PHPUnit\Framework\TestCase
{
public function testKnutMorrisPrattSearch() : void
{
self::assertEquals(15, StringSearch::knuthMorrisPrattSearch('ABCDABD', 'ABC ABCDAB ABCDABCDABDE'));
}
public function testInvalidKnutMorrisPrattSearch() : void
{
self::assertEquals(-1, StringSearch::knuthMorrisPrattSearch('ABCDABDZ', 'ABC ABCDAB ABCDABCDABDE'));
}
public function testBoyerMooreHorspoolSimpleSearch() : void
{
self::assertEquals(15, StringSearch::boyerMooreHorspoolSimpleSearch('ABCDABD', 'ABC ABCDAB ABCDABCDABDE'));
}
public function testInvalidBoyerMooreHorspoolSimpleSearch() : void
{
self::assertEquals(-1, StringSearch::boyerMooreHorspoolSimpleSearch('ABCDABDZ', 'ABC ABCDAB ABCDABCDABDE'));
}
public function testBoyerMooreHorspoolSearch() : void
{
self::assertEquals(15, StringSearch::boyerMooreHorspoolSearch('ABCDABD', 'ABC ABCDAB ABCDABCDABDE'));
}
public function testInvalidBoyerMooreHorspoolSearch() : void
{
self::assertEquals(-1, StringSearch::boyerMooreHorspoolSearch('ABCDABDZ', 'ABC ABCDAB ABCDABCDABDE'));
}
}

View File

@ -94,4 +94,18 @@ class StringCompareTest extends \PHPUnit\Framework\TestCase
// a is compared to is which has a distance of 2
self::assertEquals(2, StringCompare::valueWords('This is a test', 'This is not test'));
}
public function testJaro() : void
{
self::assertEqualsWithDelta(0.944444, StringCompare::jaro('MARTHA', 'MARHTA'), 0.01);
self::assertEqualsWithDelta(0.766667, StringCompare::jaro('DIXON', 'DICKSONX'), 0.01);
self::assertEqualsWithDelta(0.896296, StringCompare::jaro('JELLYFISH', 'SMELLYFISH'), 0.01);
}
public function testJaroEmpty() : void
{
self::assertEquals(1.0, StringCompare::jaro('', ''));
self::assertEquals(0.0, StringCompare::jaro('', 'test'));
self::assertEquals(0.0, StringCompare::jaro('test', ''));
}
}