From 4b2ab3af739b4b1faf66bf7718929486dcc4e01e Mon Sep 17 00:00:00 2001 From: Dennis Eichhorn Date: Fri, 15 Dec 2017 23:48:52 +0100 Subject: [PATCH] Add markdown --- Utils/Parser/Markdown/License.txt | 2 +- Utils/Parser/Markdown/Markdown.php | 1171 ++++++++++++++++++++++++++-- 2 files changed, 1118 insertions(+), 55 deletions(-) diff --git a/Utils/Parser/Markdown/License.txt b/Utils/Parser/Markdown/License.txt index b5a9d32b1..baca86f5b 100644 --- a/Utils/Parser/Markdown/License.txt +++ b/Utils/Parser/Markdown/License.txt @@ -17,4 +17,4 @@ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/Utils/Parser/Markdown/Markdown.php b/Utils/Parser/Markdown/Markdown.php index 94276dff8..2e84af41b 100644 --- a/Utils/Parser/Markdown/Markdown.php +++ b/Utils/Parser/Markdown/Markdown.php @@ -8,6 +8,7 @@ * @package TBD * @copyright Dennis Eichhorn * @license OMS License 1.0 + * @license Original license Emanuil Rusev, erusev.com (MIT) * @version 1.0.0 * @link http://website.orange-management.de */ @@ -16,17 +17,18 @@ declare(strict_types = 1); namespace phpOMS\Utils\Parser\Markdown; /** - * Array utils. + * Markdown parser class. * * @category Framework - * @package phpOMS\Utils + * @package phpOMS\Utils\Parser * @license OMS License 1.0 + * @license Original license Emanuil Rusev, erusev.com (MIT) * @link http://website.orange-management.de * @since 1.0.0 */ class Markdown { - private static $blockTypes = [ + protected static $blockTypes = [ '#' => ['Header'], '*' => ['Rule', 'List'], '+' => ['List'], @@ -52,72 +54,1133 @@ class Markdown '~' => ['FencedCode'], ]; - private static $inlineTypes = [ - '"' => ['SpecialCharacter'], - '!' => ['Image'], - '&' => ['SpecialCharacter'], - '*' => ['Emphasis'], - ':' => ['Url'], - '<' => ['UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'], - '>' => ['SpecialCharacter'], - '[' => ['Link'], - '_' => ['Emphasis'], - '`' => ['Code'], - '~' => ['Strikethrough'], + protected static $unmarkedBlockTypes = [ + 'Code', + ]; + + protected static $specialCharacters = [ + '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|', + ]; + + protected static $strongRegex = [ + '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s', + '_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us', + ]; + + protected static $emRegex = [ + '*' => '/^[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s', + '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us', + ]; + + protected static $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?'; + + protected static $voidElements = [ + 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', + ]; + + protected static $textLevelElements = [ + 'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont', + 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing', + 'i', 'rp', 'del', 'code', 'strike', 'marquee', + 'q', 'rt', 'ins', 'font', 'strong', + 's', 'tt', 'kbd', 'mark', + 'u', 'xm', 'sub', 'nobr', + 'sup', 'ruby', + 'var', 'span', + 'wbr', 'time', + ]; + + protected static $inlineTypes = [ + '"' => ['SpecialCharacter'], + '!' => ['Image'], + '&' => ['SpecialCharacter'], + '*' => ['Emphasis'], + ':' => ['Url'], + '<' => ['UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'], + '>' => ['SpecialCharacter'], + '[' => ['Link'], + '_' => ['Emphasis'], + '`' => ['Code'], + '~' => ['Strikethrough'], '\\' => ['EscapeSequence'], ]; - private static $tags = [ - 'calendar' => [ - 'match' => 'regex here', - 'parsed' => 'output here', - ], + protected static $inlineMarkerList = '!"*_&[:<>`~\\'; + + private static $continuable = [ + 'Code', 'Comment', 'FencedCode', 'List', 'Quote', 'Markup', 'Table' ]; - public function __construct() + private static $completable = [ + 'Code', 'FencedCode' + ]; + + private static $definitionData = []; + + public static function parse(string $text) : string { + self::$definitionData = []; + $text = str_replace(["\r\n", "\r"], "\n", $text); + $text = trim($text, "\n"); + $lines = explode("\n", $text); + $markup = self::lines($lines); + + return trim($markup, "\n"); } - public static function parse(string $raw) : string + protected static function lines(array $lines) : string { - /*$raw = $this->cleanup($raw); - $lines = explode("\n", $raw); - - return trim($this->parseLines($lines), " \n");*/ - - return $raw; - } - - private static function cleanup(string $raw) : string - { - $raw = str_replace(["\r\n", "\r", "\t"], ["\n", "\n", ' '], $raw); - $raw = trim($raw); - $raw = trim($raw, "\n"); - - return $raw; - } - - private static function parseLines(array $lines) : string - { - $block = array_keys(self::$blockTypes); - $inline = array_keys(self::$inlineTypes); + $currentBlock = null; foreach ($lines as $line) { - foreach ($line as $character) { - + if (chop($line) === '') { + if (isset($currentBlock)) { + $currentBlock['interrupted'] = true; + } + + continue; + } + + if (strpos($line, "\t") !== false) { + $parts = explode("\t", $line); + $line = $parts[0]; + + unset($parts[0]); + + foreach ($parts as $part) { + $shortage = 4 - mb_strlen($line, 'utf-8') % 4; + + $line .= str_repeat(' ', $shortage); + $line .= $part; + } + } + + $indent = 0; + while (isset($line[$indent]) && $line[$indent] === ' ') { + $indent ++; + } + + $text = $indent > 0 ? substr($line, $indent) : $line; + $lineArray = ['body' => $line, 'indent' => $indent, 'text' => $text]; + + if (isset($currentBlock['continuable'])) { + $block = self::{'block' . $currentBlock['type'] . 'Continue'}($lineArray, $currentBlock); + + if (isset($block)) { + $currentBlock = $block; + + continue; + } elseif (in_array($currentBlock['type'], self::$completable)) { + $currentBlock = self::{'block' . $currentBlock['type'] . 'Complete'}($currentBlock); + } + } + + $marker = $text[0]; + $blockTypes = self::$unmarkedBlockTypes; + + if (isset(self::$blockTypes[$marker])) { + foreach (self::$blockTypes[$marker] as $blockType) { + $blockTypes[] = $blockType; + } + } + + foreach ($blockTypes as $blockType) { + $block = self::{'block' . $blockType}($lineArray, $currentBlock); + + if (isset($block)) { + $block['type'] = $blockType; + + if (!isset($block['identified'])) { + $blocks[] = $currentBlock; + + $block['identified'] = true; + } + + if (in_array($blockType, self::$continuable)) { + $block['continuable'] = true; + } + + $currentBlock = $block; + + continue 2; + } + } + + if (isset($currentBlock) && !isset($currentBlock['type']) && !isset($currentBlock['interrupted'])) { + $currentBlock['element']['text'] .= "\n" . $text; + } else { + $blocks[] = $currentBlock; + $currentBlock = self::paragraph($lineArray); + $currentBlock['identified'] = true; } } - return ''; - } - - private static function countIndention(string $line) : int - { - $indent = 0; - while (isset($line[$indent]) && $line[$indent] === ' ') { - $indent++; + if (isset($currentBlock['continuable']) && in_array($currentBlock['type'], self::$completable)) { + $currentBlock = self::{'block' . $currentBlock['type'] . 'Complete'}($currentBlock); } - return $indent; + $blocks[] = $currentBlock; + unset($blocks[0]); + $markup = ''; + + foreach ($blocks as $block) { + if (isset($block['hidden'])) { + continue; + } + + $markup .= "\n"; + $markup .= isset($block['markup']) ? $block['markup'] : self::element($block['element']); + } + + $markup .= "\n"; + + return $markup; + } + + protected static function blockCode(array $lineArray, array $block = null) /* : ?array */ + { + if (isset($block) && !isset($block['type']) && !isset($block['interrupted'])) { + return; + } + + if ($lineArray['indent'] < 4) { + return; + } + + $text = substr($lineArray['body'], 4); + $block = [ + 'element' => [ + 'name' => 'pre', + 'handler' => 'element', + 'text' => [ + 'name' => 'code', + 'text' => $text, + ], + ], + ]; + + return $block; + } + + protected static function blockCodeContinue(array $lineArray, array $block) /* : ?array */ + { + if ($lineArray['indent'] < 4) { + return; + } + + if (isset($block['interrupted'])) { + $block['element']['text']['text'] .= "\n"; + + unset($block['interrupted']); + } + + $block['element']['text']['text'] .= "\n"; + $text = substr($lineArray['body'], 4); + $block['element']['text']['text'] .= $text; + + return $block; + } + + protected static function blockCodeComplete(array $block) : array + { + $text = $block['element']['text']['text']; + $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); + $block['element']['text']['text'] = $text; + + return $block; + } + + protected static function blockComment(array $lineArray) /* : ?array */ + { + if ( + isset($lineArray['text'][3]) + && $lineArray['text'][3] === '-' + && $lineArray['text'][2] === '-' + && $lineArray['text'][1] === '!' + ) { + $block = ['markup' => $lineArray['body']]; + + if (preg_match('/-->$/', $lineArray['text'])) { + $block['closed'] = true; + } + + return $block; + } + } + + protected static function blockCommentContinue(array $lineArray, array $block) /* : ?array */ + { + if (isset($block['closed'])) { + return; + } + + $block['markup'] .= "\n" . $lineArray['body']; + + if (preg_match('/-->$/', $lineArray['text'])) { + $block['closed'] = true; + } + + return $block; + } + + protected static function blockFencedCode(array $lineArray) /* : ?array */ + { + if (!preg_match('/^[' . $lineArray['text'][0] . ']{3,}[ ]*([\w-]+)?[ ]*$/', $lineArray['text'], $matches)) { + return; + } + + $elementArray = [ + 'name' => 'code', + 'text' => '', + ]; + + if (isset($matches[1])) { + $class = 'language-' . $matches[1]; + + $elementArray['attributes'] = [ + 'class' => $class, + ]; + } + + $block = [ + 'char' => $lineArray['text'][0], + 'element' => [ + 'name' => 'pre', + 'handler' => 'element', + 'text' => $elementArray, + ] + ]; + + return $block; + } + + protected static function blockFencedCodeContinue(array $lineArray, array $block) : array + { + if (isset($block['complete'])) { + return; + } + + if (isset($block['interrupted'])) { + $block['element']['text']['text'] .= "\n"; + + unset($block['interrupted']); + } + + if (preg_match('/^' . $block['char'] . '{3,}[ ]*$/', $lineArray['text'])) { + $block['element']['text']['text'] = substr($block['element']['text']['text'], 1); + $block['complete'] = true; + + return $block; + } + + $block['element']['text']['text'] .= "\n" . $lineArray['body']; + + return $block; + } + + protected static function blockFencedCodeComplete(array $block) : array + { + $text = $block['element']['text']['text']; + $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); + $block['element']['text']['text'] = $text; + + return $block; + } + + protected static function blockHeader(array $lineArray) /* : ?array */ + { + if (!isset($lineArray['text'][1])) { + return; + } + + $level = 1; + + while (isset($lineArray['text'][$level]) && $lineArray['text'][$level] === '#') { + $level ++; + } + + if ($level > 6) { + return; + } + + $text = trim($lineArray['text'], '# '); + $block = [ + 'element' => [ + 'name' => 'h' . min(6, $level), + 'text' => $text, + 'handler' => 'line', + ], + ]; + + return $block; + } + + protected static function blockList(array $lineArray) /* : ?array */ + { + list($name, $pattern) = $lineArray['text'][0] <= '-' ? ['ul', '[*+-]'] : ['ol', '[0-9]+[.]']; + + if (!preg_match('/^(' . $pattern . '[ ]+)(.*)/', $lineArray['text'], $matches)) { + return; + } + + $block = [ + 'indent' => $lineArray['indent'], + 'pattern' => $pattern, + 'element' => [ + 'name' => $name, + 'handler' => 'elements', + ], + ]; + + if($name === 'ol') { + $listStart = stristr($matches[0], '.', true); + + if($listStart !== '1') { + $block['element']['attributes'] = ['start' => $listStart]; + } + } + + $block['li'] = [ + 'name' => 'li', + 'handler' => 'li', + 'text' => [ + $matches[2], + ], + ]; + + $block['element']['text'][] = & $block['li']; + + return $block; + } + + protected static function blockListContinue(array $lineArray, array $block) /* : ?array */ + { + if ($block['indent'] === $lineArray['indent'] && preg_match('/^' . $block['pattern'] . '(?:[ ]+(.*)|$)/', $lineArray['text'], $matches)) { + if (isset($block['interrupted'])) { + $block['li']['text'][] = ''; + + unset($block['interrupted']); + } + + unset($block['li']); + + $text = isset($matches[1]) ? $matches[1] : ''; + $block['li'] = [ + 'name' => 'li', + 'handler' => 'li', + 'text' => [ + $text, + ], + ]; + + $block['element']['text'][] = & $block['li']; + + return $block; + } + + if ($lineArray['text'][0] === '[' && self::blockReference($lineArray)) { + return $block; + } + + if (!isset($block['interrupted'])) { + $text = preg_replace('/^[ ]{0,4}/', '', $lineArray['body']); + $block['li']['text'][] = $text; + + return $block; + } + + if ($lineArray['indent'] > 0) { + $block['li']['text'][] = ''; + $text = preg_replace('/^[ ]{0,4}/', '', $lineArray['body']); + $block['li']['text'][] = $text; + + unset($block['interrupted']); + + return $block; + } + } + + protected static function blockQuote(array $lineArray) /* : ?array */ + { + if (!preg_match('/^>[ ]?(.*)/', $lineArray['text'], $matches)) { + return; + } + + $block = [ + 'element' => [ + 'name' => 'blockquote', + 'handler' => 'lines', + 'text' => (array) $matches[1], + ], + ]; + + return $block; + } + + protected static function blockQuoteContinue(array $lineArray, array $block) /* : ?array */ + { + if ($lineArray['text'][0] === '>' && preg_match('/^>[ ]?(.*)/', $lineArray['text'], $matches)) { + if (isset($block['interrupted'])) { + $block['element']['text'][] = ''; + + unset($block['interrupted']); + } + + $block['element']['text'][] = $matches[1]; + + return $block; + } + + if (!isset($block['interrupted'])) { + $block['element']['text'][] = $lineArray['text']; + + return $block; + } + } + + protected static function blockRule(array $lineArray) /* : ?array */ + { + if (!preg_match('/^([' . $lineArray['text'][0] . '])([ ]*\1){2,}[ ]*$/', $lineArray['text'])) { + return; + } + + $block = [ + 'element' => [ + 'name' => 'hr' + ], + ]; + + return $block; + } + + protected static function blockSetextHeader(array $lineArray, array $block = null) /* : ?array */ + { + if (!isset($block) || isset($block['type']) || isset($block['interrupted'])) { + return; + } + + if (chop($lineArray['text'], $lineArray['text'][0]) !== '') { + return; + } + + $block['element']['name'] = $lineArray['text'][0] === '=' ? 'h1' : 'h2'; + + return $block; + } + + protected static function blockMarkup(array $lineArray) /* : ?array */ + { + if (!preg_match('/^<(\w[\w-]*)(?:[ ]*' . self::$regexHtmlAttribute . ')*[ ]*(\/)?>/', $lineArray['text'], $matches)) { + return; + } + + $element = strtolower($matches[1]); + + if (in_array($element, self::$textLevelElements)) { + return; + } + + $block = [ + 'name' => $matches[1], + 'depth' => 0, + 'markup' => $lineArray['text'], + ]; + + $length = strlen($matches[0]); + $remainder = substr($lineArray['text'], $length); + + if (trim($remainder) === '') { + if (isset($matches[2]) || in_array($matches[1], self::$voidElements)) { + $block['closed'] = true; + $block['void'] = true; + } + } else { + if (isset($matches[2]) || in_array($matches[1], self::$voidElements)) { + return; + } + + if (preg_match('/<\/' . $matches[1] . '>[ ]*$/i', $remainder)) { + $block['closed'] = true; + } + } + + return $block; + } + + protected static function blockMarkupContinue(array $lineArray, array $block) /* : ?array */ + { + if (isset($block['closed'])) { + return; + } + + if (preg_match('/^<' . $block['name'] . '(?:[ ]*' . self::$regexHtmlAttribute . ')*[ ]*>/i', $lineArray['text'])) { + $block['depth']++; + } + + if (preg_match('/(.*?)<\/' . $block['name'] . '>[ ]*$/i', $lineArray['text'], $matches)) { + if ($block['depth'] > 0) { + $block['depth']--; + } else { + $block['closed'] = true; + } + } + + if (isset($block['interrupted'])) { + $block['markup'] .= "\n"; + + unset($block['interrupted']); + } + + $block['markup'] .= "\n".$lineArray['body']; + + return $block; + } + + protected static function blockReference(array $lineArray) /* : ?array */ + { + if (!preg_match('/^\[(.+?)\]:[ ]*?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $lineArray['text'], $matches)) { + return; + } + + $id = strtolower($matches[1]); + $Data = [ + 'url' => $matches[2], + 'title' => null, + ]; + + if (isset($matches[3])) { + $Data['title'] = $matches[3]; + } + + self::$definitionData['Reference'][$id] = $Data; + + $block = ['hidden' => true]; + + return $block; + } + + protected static function blockTable($lineArray, array $block = null) /* : ?array */ + { + if (!isset($block) || isset($block['type']) || isset($block['interrupted'])) { + return; + } + + if (strpos($block['element']['text'], '|') !== false && chop($lineArray['text'], ' -:|') === '') { + $alignments = []; + $divider = $lineArray['text']; + $divider = trim($divider); + $divider = trim($divider, '|'); + $dividerCells = explode('|', $divider); + + foreach ($dividerCells as $dividerCell) { + $dividerCell = trim($dividerCell); + + if ($dividerCell === '') { + continue; + } + + $alignment = null; + + if ($dividerCell[0] === ':') { + $alignment = 'left'; + } + + if (substr($dividerCell, - 1) === ':') { + $alignment = $alignment === 'left' ? 'center' : 'right'; + } + + $alignments[] = $alignment; + } + + $HeaderElements = []; + $header = $block['element']['text']; + $header = trim($header); + $header = trim($header, '|'); + $headerCells = explode('|', $header); + + foreach ($headerCells as $index => $headerCell) { + $headerCell = trim($headerCell); + $HeaderElement = [ + 'name' => 'th', + 'text' => $headerCell, + 'handler' => 'line', + ]; + + if (isset($alignments[$index])) { + $alignment = $alignments[$index]; + $HeaderElement['attributes'] = [ + 'style' => 'text-align: ' . $alignment . ';', + ]; + } + + $HeaderElements[] = $HeaderElement; + } + + $block = [ + 'alignments' => $alignments, + 'identified' => true, + 'element' => [ + 'name' => 'table', + 'handler' => 'elements', + ], + ]; + + $block['element']['text'][] = [ + 'name' => 'thead', + 'handler' => 'elements', + ]; + + $block['element']['text'][] = [ + 'name' => 'tbody', + 'handler' => 'elements', + 'text' => [], + ]; + + $block['element']['text'][0]['text'][] = [ + 'name' => 'tr', + 'handler' => 'elements', + 'text' => $HeaderElements, + ]; + + return $block; + } + } + + protected static function blockTableContinue(array $lineArray, array $block) /* : ?array */ + { + if (isset($block['interrupted'])) { + return; + } + + if ($lineArray['text'][0] === '|' || strpos($lineArray['text'], '|')) { + $elements = []; + $row = $lineArray['text']; + $row = trim($row); + $row = trim($row, '|'); + + preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches); + + foreach ($matches[0] as $index => $cell) { + $cell = trim($cell); + $element = [ + 'name' => 'td', + 'handler' => 'line', + 'text' => $cell, + ]; + + if (isset($block['alignments'][$index])) { + $element['attributes'] = [ + 'style' => 'text-align: ' . $block['alignments'][$index] . ';', + ]; + } + + $elements[] = $element; + } + + $element = [ + 'name' => 'tr', + 'handler' => 'elements', + 'text' => $elements, + ]; + $block['element']['text'][1]['text'][] = $element; + + return $block; + } + } + + protected static function paragraph(array $lineArray) : array + { + $block = [ + 'element' => [ + 'name' => 'p', + 'text' => $lineArray['text'], + 'handler' => 'line', + ], + ]; + + return $block; + } + + public function line(string $text) : string + { + $markup = ''; + + while ($excerpt = strpbrk($text, self::$inlineMarkerList)) { + $marker = $excerpt[0]; + $markerPosition = strpos($text, $marker); + $Excerpt = ['text' => $excerpt, 'context' => $text]; + + foreach (self::$inlineTypes[$marker] as $inlineType) { + $inline = self::{'inline' . $inlineType}($Excerpt); + + if (!isset($inline)) { + continue; + } + + if (isset($inline['position']) && $inline['position'] > $markerPosition) { + continue; + } + + if (!isset($inline['position'])) { + $inline['position'] = $markerPosition; + } + + $unmarkedText = substr($text, 0, $inline['position']); + $markup .= self::unmarkedText($unmarkedText); + $markup .= isset($inline['markup']) ? $inline['markup'] : self::element($inline['element']); + $text = substr($text, $inline['position'] + $inline['extent']); + + continue 2; + } + + $unmarkedText = substr($text, 0, $markerPosition + 1); + $markup .= self::unmarkedText($unmarkedText); + $text = substr($text, $markerPosition + 1); + } + + $markup .= self::unmarkedText($text); + + return $markup; + } + + protected static function inlineCode(array $excerpt) /* : ?array */ + { + $marker = $excerpt['text'][0]; + + if (!preg_match('/^(' . $marker . '+)[ ]*(.+?)[ ]*(? strlen($matches[0]), + 'element' => [ + 'name' => 'code', + 'text' => $text, + ], + ]; + } + + protected static function inlineEmailTag(array $excerpt) /* : ?array */ + { + if (strpos($excerpt['text'], '>') === false || !preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $excerpt['text'], $matches)) { + return; + } + + $url = $matches[1]; + + if (!isset($matches[2])) { + $url = 'mailto:' . $url; + } + + return [ + 'extent' => strlen($matches[0]), + 'element' => [ + 'name' => 'a', + 'text' => $matches[1], + 'attributes' => [ + 'href' => $url, + ], + ], + ]; + } + + protected static function inlineEmphasis(array $excerpt) /* : ?array */ + { + if (!isset($excerpt['text'][1])) { + return; + } + + $marker = $excerpt['text'][0]; + + if ($excerpt['text'][1] === $marker && preg_match(self::$strongRegex[$marker], $excerpt['text'], $matches)) { + $emphasis = 'strong'; + } elseif (preg_match(self::$emRegex[$marker], $excerpt['text'], $matches)) { + $emphasis = 'em'; + } else { + return; + } + + return [ + 'extent' => strlen($matches[0]), + 'element' => [ + 'name' => $emphasis, + 'handler' => 'line', + 'text' => $matches[1], + ], + ]; + } + + protected static function inlineEscapeSequence(array $excerpt) /* : ?array */ + { + if (!isset($excerpt['text'][1]) || !in_array($excerpt['text'][1], self::$specialCharacters)) { + return; + } + + return [ + 'markup' => $excerpt['text'][1], + 'extent' => 2, + ]; + } + + protected static function inlineImage(array $excerpt) /* : ?array */ + { + if (!isset($excerpt['text'][1]) || $excerpt['text'][1] !== '[') { + return; + } + + $excerpt['text']= substr($excerpt['text'], 1); + $link = self::inlineLink($excerpt); + + if (!isset($link)) { + return; + } + + $inline = [ + 'extent' => $link['extent'] + 1, + 'element' => [ + 'name' => 'img', + 'attributes' => [ + 'src' => $link['element']['attributes']['href'], + 'alt' => $link['element']['text'], + ], + ], + ]; + + $inline['element']['attributes'] += $link['element']['attributes']; + + unset($inline['element']['attributes']['href']); + + return $inline; + } + + protected static function inlineLink(array $excerpt) /* : ?array */ + { + $element = [ + 'name' => 'a', + 'handler' => 'line', + 'text' => null, + 'attributes' => [ + 'href' => null, + 'title' => null, + ], + ]; + $extent = 0; + $remainder = $excerpt['text']; + + if (!preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches)) { + return; + } + + $element['text'] = $matches[1]; + $extent += strlen($matches[0]); + $remainder = substr($remainder, $extent); + + if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*"|\'[^\']*\'))?\s*[)]/', $remainder, $matches)) { + $element['attributes']['href'] = $matches[1]; + + if (isset($matches[2])) { + $element['attributes']['title'] = substr($matches[2], 1, - 1); + } + + $extent += strlen($matches[0]); + } else { + if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches)) { + $definition = strlen($matches[1]) ? $matches[1] : $element['text']; + $definition = strtolower($definition); + $extent += strlen($matches[0]); + } else { + $definition = strtolower($element['text']); + } + + if (!isset(self::$definitionData['Reference'][$definition])) { + return; + } + + $def = self::$definitionData['Reference'][$definition]; + $element['attributes']['href'] = $def['url']; + $element['attributes']['title'] = $def['title']; + } + + $element['attributes']['href'] = str_replace(['&', '<'], ['&', '<'], $element['attributes']['href']); + + return [ + 'extent' => $extent, + 'element' => $element, + ]; + } + + protected static function inlineMarkup(array $excerpt) /* : ?array */ + { + if (strpos($excerpt['text'], '>') === false) { + return; + } + + if ($excerpt['text'][1] === '/' && preg_match('/^<\/\w[\w-]*[ ]*>/s', $excerpt['text'], $matches)) { + return [ + 'markup' => $matches[0], + 'extent' => strlen($matches[0]), + ]; + } + + if ($excerpt['text'][1] === '!' && preg_match('/^/s', $excerpt['text'], $matches)) { + return [ + 'markup' => $matches[0], + 'extent' => strlen($matches[0]), + ]; + } + + if ($excerpt['text'][1] !== ' ' && preg_match('/^<\w[\w-]*(?:[ ]*' . self::$regexHtmlAttribute . ')*[ ]*\/?>/s', $excerpt['text'], $matches)) { + return [ + 'markup' => $matches[0], + 'extent' => strlen($matches[0]), + ]; + } + } + + protected static function inlineSpecialCharacter(array $excerpt) /* : ?array */ + { + if ($excerpt['text'][0] === '&' && ! preg_match('/^&#?\w+;/', $excerpt['text'])) { + return [ + 'markup' => '&', + 'extent' => 1, + ]; + } + + $specialChar = ['>' => 'gt', '<' => 'lt', '"' => 'quot']; + + if (isset($specialChar[$excerpt['text'][0]])) { + return [ + 'markup' => '&' . $specialChar[$excerpt['text'][0]] . ';', + 'extent' => 1, + ]; + } + } + + protected static function inlineStrikethrough(array $excerpt) /* : ?array */ + { + if (!isset($excerpt['text'][1])) { + return; + } + + if ($excerpt['text'][1] !== '~' || !preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $excerpt['text'], $matches)) { + return; + } + + return [ + 'extent' => strlen($matches[0]), + 'element' => [ + 'name' => 'del', + 'text' => $matches[1], + 'handler' => 'line', + ], + ]; + } + + protected static function inlineUrl(array $excerpt) /* : ?array */ + { + if (self::$urlsLinked !== true || !isset($excerpt['text'][2]) || $excerpt['text'][2] !== '/') { + return; + } + + if (!preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) { + return; + } + + return [ + 'extent' => strlen($matches[0][0]), + 'position' => $matches[0][1], + 'element' => [ + 'name' => 'a', + 'text' => $matches[0][0], + 'attributes' => [ + 'href' => $matches[0][0], + ], + ], + ]; + } + + protected static function inlineUrlTag(array $excerpt) /* : ?array */ + { + if (strpos($excerpt['text'], '>') === false || !preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $excerpt['text'], $matches)) { + return; + } + + $url = str_replace(['&', '<'], ['&', '<'], $matches[1]); + + return [ + 'extent' => strlen($matches[0]), + 'element' => [ + 'name' => 'a', + 'text' => $url, + 'attributes' => [ + 'href' => $url, + ], + ], + ]; + } + + protected static function unmarkedText(string $text) : string + { + $text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "
\n", $text); + $text = str_replace(" \n", "\n", $text); + + return $text; + } + + protected static function element(array $element) : string + { + $markup = '<' . $element['name']; + + if (isset($element['attributes'])) { + foreach ($element['attributes'] as $name => $value) { + if ($value === null) { + continue; + } + + $markup .= ' ' . $name . '="' . $value . '"'; + } + } + + if (isset($element['text'])) { + $markup .= '>'; + $markup .= isset($element['handler']) ? self::{$element['handler']}($element['text']) : $element['text']; + $markup .= ''; + } else { + $markup .= ' />'; + } + + return $markup; + } + + protected static function elements(array $elements) : string + { + $markup = ''; + + foreach ($elements as $element) { + $markup .= "\n" . self::element($element); + } + + $markup .= "\n"; + + return $markup; + } + + protected static function li($lines) : string + { + $markup = self::lines($lines); + $trimmedMarkup = trim($markup); + + if (!in_array('', $lines) && substr($trimmedMarkup, 0, 3) === '

') { + $markup = $trimmedMarkup; + $markup = substr($markup, 3); + $position = strpos($markup, "

"); + $markup = substr_replace($markup, '', $position, 4); + } + + return $markup; } } \ No newline at end of file