mirror of
https://github.com/Karaka-Management/phpOMS.git
synced 2026-01-11 17:58:41 +00:00
46 lines
1.1 KiB
PHP
Executable File
46 lines
1.1 KiB
PHP
Executable File
<?php
|
|
/**
|
|
* Karaka
|
|
*
|
|
* PHP Version 8.1
|
|
*
|
|
* @package phpOMS\Localization\LanguageDetection\Tokenizer
|
|
* @author Patrick Schur <patrick_schur@outlook.de>
|
|
* @copyright Patrick Schur
|
|
* @license https://opensource.org/licenses/mit-license.html MIT
|
|
* @link https://github.com/patrickschur/language-detection
|
|
*/
|
|
declare(strict_types = 1);
|
|
|
|
namespace phpOMS\Localization\LanguageDetection\Tokenizer;
|
|
|
|
/**
|
|
* Whitespace tokenizer
|
|
*
|
|
* @package phpOMS\Localization\LanguageDetection\Tokenizer
|
|
* @license https://opensource.org/licenses/mit-license.html MIT
|
|
* @link https://github.com/patrickschur/language-detection
|
|
* @since 1.0.0
|
|
*/
|
|
class WhitespaceTokenizer
|
|
{
|
|
/**
|
|
* Tokenize string
|
|
*
|
|
* @param string $str String to tokenize
|
|
*
|
|
* @return array
|
|
*
|
|
* @since 1.0.0
|
|
*/
|
|
public function tokenize(string $str): array
|
|
{
|
|
return \array_map(
|
|
function ($word) {
|
|
return "_{$word}_";
|
|
},
|
|
\preg_split('/[^\pL]+(?<![\x27\x60\x{2019}])/u', $str, -1, \PREG_SPLIT_NO_EMPTY)
|
|
);
|
|
}
|
|
}
|