diff --git a/Utils/Parser/Markdown/Markdown.php b/Utils/Parser/Markdown/Markdown.php index e9c07ba08..88d93dc6f 100755 --- a/Utils/Parser/Markdown/Markdown.php +++ b/Utils/Parser/Markdown/Markdown.php @@ -5,7 +5,9 @@ * PHP Version 8.1 * * @package phpOMS\Utils\Parser\Markdown - * @license Original license Emanuil Rusev, erusev.com (MIT) + * @license Original & extra license Emanuil Rusev, erusev.com (MIT) + * @license Extended license Benjamin Hoegh (MIT) + * @license Extreme license doowzs (MIT) * @license This version: OMS License 2.0 * @version 1.0.0 * @link https://jingga.app @@ -14,6 +16,7 @@ declare(strict_types=1); namespace phpOMS\Utils\Parser\Markdown; +use NullElo; use phpOMS\Uri\UriFactory; /** @@ -22,11 +25,13 @@ use phpOMS\Uri\UriFactory; * @package phpOMS\Utils\Parser\Markdown * @license Original & extra license Emanuil Rusev, erusev.com (MIT) * @license Extended license Benjamin Hoegh (MIT) + * @license Extreme license doowzs (MIT) * @license This version: OMS License 2.0 * @link https://jingga.app * @see https://github.com/erusev/parsedown * @see https://github.com/erusev/parsedown-extra * @see https://github.com/BenjaminHoegh/ParsedownExtended + * @see https://github.com/doowzs/parsedown-extreme * @since 1.0.0 */ class Markdown @@ -88,6 +93,14 @@ class Markdown */ protected string $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*+(?:\s*+=\s*+(?:[^"\'=<>`\s]+|"[^"]*+"|\'[^\']*+\'))?+'; + /** + * Regex for for classes and ids + * + * @var string + * @since 1.0.0 + */ + protected string $regexAttribute = '(?:[#.][-\w]+[ ]*)'; + /** * Elements without closing * @@ -116,6 +129,146 @@ class Markdown 'wbr', 'time', ]; + /** + * Inline special characters (not block elements) + * + * @var array + * @since 1.0.0 + */ + protected array $inlineTypes = [ + '!' => ['Image'], + '*' => ['Emphasis'], + '_' => ['Emphasis'], + '&' => ['SpecialCharacter'], + '[' => ['FootnoteMarker', 'Link'], + ':' => ['Url'], + '<' => ['UrlTag', 'EmailTag', 'Markup'], + '`' => ['Code'], + '~' => ['Strikethrough'], + '\\' => ['EscapeSequence'], + ]; + + /** + * Inline special characters (not block elements) (see $inlineTypes) + * + * @var string + * @since 1.0.0 + */ + protected string $inlineMarkerList = '!*_&[:<`~\\'; + + /** + * Uses strict mode? + * + * Less forgiving with regards to formatting. + * + * @var bool + * @since 1.0.0 + */ + public bool $strictMode = false; + + /** + * Uses safe mode (true -> no html allowed)? + * + * Important for parsing or sanitizing html. + * + * @var bool + * @since 1.0.0 + */ + public bool $safeMode = false; + + /** + * Urls are always handled as links + * + * @var bool + * @since 1.0.0 + */ + public bool $urlsLinked = true; + + /** + * Should html get escaped? + * + * @var bool + * @since 1.0.0 + */ + public bool $markupEscaped = false; + + /** + * Replaces new lines with
in inline elements + * + * true -> replaces new line with br + * false -> replaces double whitespace followed with new line with br + * + * @var bool + * @since 1.0.0 + */ + public bool $breaksEnabled = false; + + /** + * Save link prefixes + * + * @var string[] + * @since 1.0.0 + */ + protected array $safeLinksWhitelist = [ + 'http://', + 'https://', + 'ftp://', + 'ftps://', + 'mailto:', + 'tel:', + 'data:image/png;base64,', + 'data:image/gif;base64,', + 'data:image/jpeg;base64,', + 'irc:', + 'ircs:', + 'git:', + 'ssh:', + 'news:', + 'steam:', + ]; + + /** + * Block special characters + * + * @var array + * @since 1.0.0 + */ + protected array $blockTypes = [ + '#' => ['Header'], + '*' => ['Rule', 'List', 'Abbreviation'], + '+' => ['List'], + '-' => ['SetextHeader', 'Table', 'Rule', 'List'], + '0' => ['List'], + '1' => ['List'], + '2' => ['List'], + '3' => ['List'], + '4' => ['List'], + '5' => ['List'], + '6' => ['List'], + '7' => ['List'], + '8' => ['List'], + '9' => ['List'], + ':' => ['Table', 'DefinitionList'], + '<' => ['Comment', 'Markup'], + '=' => ['SetextHeader'], + '>' => ['Quote'], + '[' => ['Footnote', 'Reference'], + '_' => ['Rule'], + '`' => ['FencedCode'], + '|' => ['Table'], + '~' => ['FencedCode'], + ]; + + /** + * Unmarked block types + * + * @var string[] + * @since 1.0.0 + */ + protected array $unmarkedBlockTypes = [ + 'Code', + ]; + /** * Parsing options * @@ -141,7 +294,7 @@ class Markdown * @var string * @since 1.0.0 */ - private string $idToc = ''; + public string $idToc = 'toc'; /** * TOC array after parsing headers @@ -166,7 +319,6 @@ class Markdown * @since 1.0.0 */ protected int $firstHeadLevel = 0; - // TOC: end /** * Is header blacklist (for table of contents/TOC) initialized @@ -183,6 +335,31 @@ class Markdown * @since 1.0.0 */ protected $anchorDuplicates = []; + // TOC: end + + /** + * Footnote count + * + * @var int + * @since 1.0.0 + */ + private int $footnoteCount = 0; + + /** + * Current abbreviation + * + * @var string + * @since 1.0.0 + */ + private string $currentAbreviation; + + /** + * Current abbreviation meaning + * + * @var string + * @since 1.0.0 + */ + private string $currentMeaning; /** * Instances @@ -229,114 +406,112 @@ class Markdown // Marks $state = $this->options['mark'] ?? true; if ($state !== false) { - $this->InlineTypes['='][] = 'mark'; + $this->inlineTypes['='][] = 'mark'; $this->inlineMarkerList .= '='; } // Keystrokes $state = $this->options['keystrokes'] ?? true; if ($state !== false) { - $this->InlineTypes['['][] = 'Keystrokes'; + $this->inlineTypes['['][] = 'Keystrokes'; $this->inlineMarkerList .= '['; } // Inline Math $state = $this->options['math'] ?? false; if ($state !== false) { - $this->InlineTypes['\\'][] = 'Math'; + $this->inlineTypes['\\'][] = 'Math'; $this->inlineMarkerList .= '\\'; - $this->InlineTypes['$'][] = 'Math'; + $this->inlineTypes['$'][] = 'Math'; $this->inlineMarkerList .= '$'; } // Superscript $state = $this->options['sup'] ?? false; if ($state !== false) { - $this->InlineTypes['^'][] = 'Superscript'; + $this->inlineTypes['^'][] = 'Superscript'; $this->inlineMarkerList .= '^'; } // Subscript $state = $this->options['sub'] ?? false; if ($state !== false) { - $this->InlineTypes['~'][] = 'Subscript'; + $this->inlineTypes['~'][] = 'Subscript'; } // Emojis $state = $this->options['emojis'] ?? true; if ($state !== false) { - $this->InlineTypes[':'][] = 'Emojis'; + $this->inlineTypes[':'][] = 'Emojis'; $this->inlineMarkerList .= ':'; } // Typographer $state = $this->options['typographer'] ?? false; if ($state !== false) { - $this->InlineTypes['('][] = 'Typographer'; + $this->inlineTypes['('][] = 'Typographer'; $this->inlineMarkerList .= '('; - $this->InlineTypes['.'][] = 'Typographer'; + $this->inlineTypes['.'][] = 'Typographer'; $this->inlineMarkerList .= '.'; - $this->InlineTypes['+'][] = 'Typographer'; + $this->inlineTypes['+'][] = 'Typographer'; $this->inlineMarkerList .= '+'; - $this->InlineTypes['!'][] = 'Typographer'; + $this->inlineTypes['!'][] = 'Typographer'; $this->inlineMarkerList .= '!'; - $this->InlineTypes['?'][] = 'Typographer'; + $this->inlineTypes['?'][] = 'Typographer'; $this->inlineMarkerList .= '?'; } // Smartypants $state = $this->options['smarty'] ?? false; if ($state !== false) { - $this->InlineTypes['<'][] = 'Smartypants'; + $this->inlineTypes['<'][] = 'Smartypants'; $this->inlineMarkerList .= '<'; - $this->InlineTypes['>'][] = 'Smartypants'; + $this->inlineTypes['>'][] = 'Smartypants'; $this->inlineMarkerList .= '>'; - $this->InlineTypes['-'][] = 'Smartypants'; + $this->inlineTypes['-'][] = 'Smartypants'; $this->inlineMarkerList .= '-'; - $this->InlineTypes['.'][] = 'Smartypants'; + $this->inlineTypes['.'][] = 'Smartypants'; $this->inlineMarkerList .= '.'; - $this->InlineTypes["'"][] = 'Smartypants'; + $this->inlineTypes["'"][] = 'Smartypants'; $this->inlineMarkerList .= "'"; - $this->InlineTypes['"'][] = 'Smartypants'; + $this->inlineTypes['"'][] = 'Smartypants'; $this->inlineMarkerList .= '"'; - $this->InlineTypes['`'][] = 'Smartypants'; + $this->inlineTypes['`'][] = 'Smartypants'; $this->inlineMarkerList .= '`'; } // Block Math $state = $this->options['math'] ?? false; if ($state !== false) { - $this->BlockTypes['\\'][] = 'Math'; - $this->BlockTypes['$'][] = 'Math'; + $this->blockTypes['\\'][] = 'Math'; + $this->blockTypes['$'][] = 'Math'; } // Task $state = $this->options['lists']['tasks'] ?? true; if ($state !== false) { - $this->BlockTypes['['][] = 'Checkbox'; + $this->blockTypes['['][] = 'Checkbox'; } } - public function textParent($text) : string - { - $Elements = $this->textElements($text); - $markup = $this->elements($Elements); - $markup = \trim($markup, "\n"); - - // Merge consecutive dl elements - $markup = \preg_replace('/<\/dl>\s+
\s+/', '', $markup); - - // Add footnotes - if (isset($this->definitionData['Footnote'])) { - $Element = $this->buildFootnoteElement(); - $markup .= "\n" . $this->element($Element); - } - - return $markup; - } - /** - * Parses the given markdown string to an HTML string but it ignores ToC + * Parses the given markdown string to a HTML + * + * @param string $text Markdown text to parse + * + * @return string + * + * @since 1.0.0 + */ + public static function parse($text) : string + { + $parsedown = new self(); + + return $parsedown->text($text); + } + + /** + * Parses the given markdown string to a HTML string but it ignores ToC * * @param string $text Markdown text to parse * @@ -346,17 +521,34 @@ class Markdown */ public function body(string $text) : string { - $text = $this->encodeTagToHash($text); // Escapes ToC tag temporary - $html = $this->textParent($text); // Parses the markdown text + $text = $this->encodeToCTagToHash($text); // Escapes ToC tag temporary - return $this->decodeTagFromHash($html); // Unescape the ToC tag + $elements = $this->textElements($text); + $html = $this->elements($elements); + $html = \trim($html, "\n"); + + // Merge consecutive dl elements + $html = \preg_replace('/<\/dl>\s+
\s+/', '', $html); + + // Add footnotes + if (isset($this->definitionData['Footnote'])) { + $element = $this->buildFootnoteElement(); + $html .= "\n" . $this->element($element); + } + + return $this->decodeToCTagFromHash($html); // Unescape the ToC tag } /** * Parses markdown string to HTML and also the "[toc]" tag as well. - * It overrides the parent method: \Parsedown::text(). + * + * @param string $text Markdown text to parse + * + * @return string + * + * @since 1.0.0 */ - public function text($text) : string + public function text(string $text) : string { // Parses the markdown text except the ToC tag. This also searches // the list of contents and available to get from "contentsList()" @@ -368,16 +560,15 @@ class Markdown } // Handle toc - $tagOrigin = $this->getTagToC(); + $tagOrigin = $this->options['toc']['set_toc_tag'] ?? '[toc]'; if (\strpos($text, $tagOrigin) === false) { return $html; } $tocData = $this->contentsList(); - $tocId = $this->getIdAttributeToC(); $needle = '

' . $tagOrigin . '

'; - $replace = '
' . $tocData . '
'; + $replace = '
' . $tocData . '
'; return \str_replace($needle, $replace, $html); } @@ -1319,7 +1510,12 @@ class Markdown protected function blockFootnote(array $line, array $_ = null) : ?array { return ($this->options['footnotes'] ?? true) - ? $this->blockFootnoteBase($line) + && \preg_match('/^\[\^(.+?)\]:[ ]?(.*)$/', $line['text'], $matches) == 1 + ? [ + 'label' => $matches[1], + 'text' => $matches[2], + 'hidden' => true, + ] : null; } @@ -1335,9 +1531,34 @@ class Markdown */ protected function blockDefinitionList(array $line, array $block = null) : ?array { - return ($this->options['definition_lists'] ?? true) - ? $this->blockDefinitionListBase($line, $block) - : null; + if (!($this->options['definition_lists'] ?? true) + || $block === null + || $block['type'] !== 'Paragraph' + ) { + return null; + } + + $element = [ + 'name' => 'dl', + 'elements' => [], + ]; + + $terms = \explode("\n", $block['element']['handler']['argument']); + + foreach ($terms as $term) { + $element['elements'][] = [ + 'name' => 'dt', + 'handler' => [ + 'function' => 'lineElements', + 'argument' => $term, + 'destination' => 'elements', + ], + ]; + } + + $block['element'] = $element; + + return $this->addDdElement($line, $block); } /** @@ -1354,11 +1575,23 @@ class Markdown { if (!($this->options['code']['blocks'] ?? true) || !($this->options['code'] ?? true) + || ($block !== null && $block['type'] === 'Paragraph' && !isset($block['interrupted'])) + || $line['indent'] < 4 ) { return null; } - return $this->blockCodeBase($line, $block); + $text = \substr($line['body'], 4); + + return [ + 'element' => [ + 'name' => 'pre', + 'element' => [ + 'name' => 'code', + 'text' => $text, + ], + ], + ]; } /** @@ -1373,9 +1606,25 @@ class Markdown */ protected function blockComment(array $line, array $_ = null) : ?array { - return ($this->options['comments'] ?? true) - ? $this->blockCommentBase($line) - : null; + if (!($this->options['comments'] ?? true) + || $this->markupEscaped || $this->safeMode + || !\str_starts_with($line['text'], '') !== false) { + $block['closed'] = true; + } + + return $block; } /** @@ -1394,11 +1643,36 @@ class Markdown return null; } - $block = $this->blockHeaderBase($line); - if (empty($block)) { + $level = \strspn($line['text'], '#'); + if ($level > 6) { return null; } + $text = \trim($line['text'], '#'); + if ($this->strictMode && isset($text[0]) && $text[0] !== ' ') { + return null; + } + + $text = \trim($text, ' '); + + $block = [ + 'element' => [ + 'name' => 'h' . $level, + 'handler' => [ + 'function' => 'lineElements', + 'argument' => $text, + 'destination' => 'elements', + ], + ], + ]; + + if (preg_match('/[ #]*{(' . $this->regexAttribute . '+)}[ ]*$/', $block['element']['handler']['argument'], $matches, \PREG_OFFSET_CAPTURE)) { + $attributeString = $matches[1][0]; + + $block['element']['attributes'] = $this->parseAttributeData($attributeString); + $block['element']['handler']['argument'] = \substr($block['element']['handler']['argument'], 0, (int) $matches[0][1]); + } + // Get the text of the heading if (isset($block['element']['handler']['argument'])) { $text = $block['element']['handler']['argument']; @@ -1446,9 +1720,72 @@ class Markdown */ protected function blockList(array $line, array $block = null) : ?array { - return ($this->options['lists'] ?? true) - ? $this->blockListBase($line, $block) - : null; + if (!($this->options['lists'] ?? true)) { + return null; + } + + list($name, $pattern) = $line['text'][0] <= '-' ? ['ul', '[*+-]'] : ['ol', '[0-9]{1,9}+[.\)]']; + + if (\preg_match('/^(' . $pattern . '([ ]++|$))(.*+)/', $line['text'], $matches) !== 1) { + return null; + } + + $contentIndent = \strlen($matches[2]); + if ($contentIndent >= 5) { + --$contentIndent; + + $matches[1] = \substr($matches[1], 0, -$contentIndent); + $matches[3] = \str_repeat(' ', $contentIndent) . $matches[3]; + } + elseif ($contentIndent === 0) { + $matches[1] .= ' '; + } + + $markerWithoutWhitespace = \strstr($matches[1], ' ', true); + + $block = [ + 'indent' => $line['indent'], + 'pattern' => $pattern, + 'data' => [ + 'type' => $name, + 'marker' => $matches[1], + 'markerType' => ($name === 'ul' ? $markerWithoutWhitespace : \substr($markerWithoutWhitespace, -1)), + ], + 'element' => [ + 'name' => $name, + 'elements' => [], + ], + ]; + + $block['data']['markerTypeRegex'] = \preg_quote($block['data']['markerType'], '/'); + + if ($name === 'ol') { + $listStart = \ltrim(\strstr($matches[1], $block['data']['markerType'], true), '0') ?: '0'; + + if ($listStart !== '1') { + if (isset($currentBlock) + && $currentBlock['type'] === 'Paragraph' + && !isset($currentBlock['interrupted']) + ) { + return null; + } + + $block['element']['attributes'] = ['start' => $listStart]; + } + } + + $block['li'] = [ + 'name' => 'li', + 'handler' => [ + 'function' => 'li', + 'argument' => empty($matches[3]) ? [] : [$matches[3]], + 'destination' => 'elements', + ], + ]; + + $block['element']['elements'][] = &$block['li']; + + return $block; } /** @@ -1463,9 +1800,23 @@ class Markdown */ protected function blockQuote(array $line, array $_ = null) : ?array { - return ($this->options['qoutes'] ?? true) - ? $this->blockQuoteBase($line) - : null; + if (!($this->options['qoutes'] ?? true) + || \preg_match('/^>[ ]?+(.*+)/', $line['text'], $matches) !== 1 + ) { + return null; + } + + + return [ + 'element' => [ + 'name' => 'blockquote', + 'handler' => [ + 'function' => 'linesElements', + 'argument' => (array) $matches[1], + 'destination' => 'elements', + ], + ], + ]; } /** @@ -1480,9 +1831,20 @@ class Markdown */ protected function blockRule(array $line, array $_ = null) : ?array { - return ($this->options['thematic_breaks'] ?? true) - ? $this->blockRuleBase($line) - : null; + if (!($this->options['thematic_breaks'] ?? true)) { + return null; + } + + $marker = $line['text'][0]; + if (\substr_count($line['text'], $marker) >= 3 && \rtrim($line['text'], " {$marker}") === '') { + return [ + 'element' => [ + 'name' => 'hr', + ], + ]; + } + + return null; } /** @@ -1497,13 +1859,23 @@ class Markdown */ protected function blockSetextHeader(array $line, array $block = null) : ?array { - if (!($this->options['headings'] ?? true)) { + if (!($this->options['headings'] ?? true) + || $block === null + || $block['type'] !== 'Paragraph' + || isset($block['interrupted']) + ) { return null; } - $block = $this->blockSetextHeaderBase($line, $block); - if (empty($block)) { - return null; + if ($line['indent'] < 4 && \rtrim(\rtrim($line['text'], ' '), $line['text'][0]) === '') { + $block['element']['name'] = $line['text'][0] === '=' ? 'h1' : 'h2'; + } + + if (\preg_match('/[ ]*{(' . $this->regexAttribute . '+)}[ ]*$/', $block['element']['handler']['argument'], $matches, \PREG_OFFSET_CAPTURE)) { + $attributeString = $matches[1][0]; + + $block['element']['attributes'] = $this->parseAttributeData($attributeString); + $block['element']['handler']['argument'] = \substr($block['element']['handler']['argument'], 0, (int) $matches[0][1]); } // Get the text of the heading @@ -1554,9 +1926,47 @@ class Markdown */ protected function blockMarkup(array $line, array $_ = null) : ?array { - return ($this->options['markup'] ?? true) - ? $this->blockMarkupBase($line) - : null; + if (!($this->options['markup'] ?? true) + || $this->markupEscaped || $this->safeMode + || \preg_match('/^<(\w[\w-]*)(?:[ ]*' . $this->regexHtmlAttribute . ')*[ ]*(\/)?>/', $line['text'], $matches) !== 1 + ) { + return null; + } + + $element = \strtolower($matches[1]); + + if (\in_array($element, $this->textLevelElements)) { + return null; + } + + $block = [ + 'name' => $matches[1], + 'depth' => 0, + 'element' => [ + 'rawHtml' => $line['text'], + 'autobreak' => true, + ], + ]; + + $length = \strlen($matches[0]); + $remainder = \substr($line['text'], $length); + + if (\trim($remainder) === '') { + if (isset($matches[2]) || \in_array($matches[1], $this->voidElements)) { + $block['closed'] = true; + $block['void'] = true; + } + } else { + if (isset($matches[2]) || \in_array($matches[1], $this->voidElements)) { + return null; + } + + if (\preg_match('/<\/' . $matches[1] . '>[ ]*$/i', $remainder)) { + $block['closed'] = true; + } + } + + return $block; } /** @@ -1571,9 +1981,24 @@ class Markdown */ protected function blockReference(array $line, array $_ = null) : ?array { - return ($this->options['references'] ?? true) - ? $this->blockReferenceBase($line) - : null; + if (!($this->options['references'] ?? true) + || \strpos($line['text'], ']') === false + || \preg_match('/^\[(.+?)\]:[ ]*+?(?:[ ]+["\'(](.+)["\')])?[ ]*+$/', $line['text'], $matches) !== 1 + + ) { + return null; + } + + $id = \strtolower($matches[1]); + + $this->definitionData['Reference'][$id] = [ + 'url' => UriFactory::build($matches[2]), + 'title' => isset($matches[3]) ? $matches[3] : null, + ]; + + return [ + 'element' => [], + ]; } /** @@ -1588,9 +2013,104 @@ class Markdown */ protected function blockTable(array $line, array $block = null) : ?array { - return ($this->options['tables'] ?? true) - ? $this->blockTableBase($line, $block) - : null; + if (!($this->options['tables'] ?? true) + || $block === null || $block['type'] !== 'Paragraph' || isset($block['interrupted']) + || (\strpos($block['element']['handler']['argument'], '|') === false + && \strpos($line['text'], '|') === false + && \strpos($line['text'], ':') === false + || \strpos($block['element']['handler']['argument'], "\n") !== false) + || \rtrim($line['text'], ' -:|') !== '' + ) { + return null; + } + + $alignments = []; + + $divider = $line['text']; + $divider = \trim($divider); + $divider = \trim($divider, '|'); + + $dividerCells = \explode('|', $divider); + + foreach ($dividerCells as $dividerCell) { + $dividerCell = \trim($dividerCell); + + if ($dividerCell === '') { + return null; + } + + $alignment = null; + + if ($dividerCell[0] === ':') { + $alignment = 'left'; + } + + if (\substr($dividerCell, - 1) === ':') { + $alignment = $alignment === 'left' ? 'center' : 'right'; + } + + $alignments [] = $alignment; + } + + $headerElements = []; + + $header = $block['element']['handler']['argument']; + $header = \trim($header); + $header = \trim($header, '|'); + + $headerCells = \explode('|', $header); + + if (\count($headerCells) !== \count($alignments)) { + return null; + } + + foreach ($headerCells as $index => $headerCell) { + $headerCell = \trim($headerCell); + + $HeaderElement = [ + 'name' => 'th', + 'handler' => [ + 'function' => 'lineElements', + 'argument' => $headerCell, + 'destination' => 'elements', + ], + ]; + + if (isset($alignments[$index])) { + $alignment = $alignments[$index]; + + $HeaderElement['attributes'] = [ + 'style' => "text-align: {$alignment};", + ]; + } + + $headerElements [] = $HeaderElement; + } + + $block = [ + 'alignments' => $alignments, + 'identified' => true, + 'element' => [ + 'name' => 'table', + 'elements' => [], + ], + ]; + + $block['element']['elements'][] = [ + 'name' => 'thead', + ]; + + $block['element']['elements'][] = [ + 'name' => 'tbody', + 'elements' => [], + ]; + + $block['element']['elements'][0]['elements'][] = [ + 'name' => 'tr', + 'elements' => $headerElements, + ]; + + return $block; } /** @@ -1617,9 +2137,17 @@ class Markdown } } - return $allowCustomAbbr - ? $this->blockAbbreviationBase($line) - : null; + if (!$allowCustomAbbr + || \preg_match('/^\*\[(.+?)\]:[ ]*(.+?)[ ]*$/', $line['text'], $matches) !== 1 + ) { + return null; + } + + $this->definitionData['Abbreviation'][$matches[1]] = $matches[2]; + + return [ + 'hidden' => true, + ]; } /** @@ -1702,11 +2230,11 @@ class Markdown * * @param array $block Current block * - * @return null|array + * @return array * * @since 1.0.0 */ - protected function blockMathComplete(array $block) : ?array + protected function blockMathComplete(array $block) : array { return $block; } @@ -1729,14 +2257,58 @@ class Markdown return null; } - $block = $this->blockFencedCodeBase($line); - if (!($this->options['diagrams'] ?? true)) { - return $block; - } - $marker = $line['text'][0]; $openerLength = \strspn($line['text'], $marker); - $language = \trim(\preg_replace('/^`{3}([^\s]+)(.+)?/s', '$1', $line['text'])); + + if ($openerLength < 3) { + return null; + } + + // @todo: We are parsing the language here and further down. Shouldn't one time be enough? + // Both variations seem to result in the same result?! + $language = \trim(\preg_replace('/^`{3}([^\s]+)(.+)?/s', '$1', $line['text'])); + + // Handle diagrams + if (!($this->options['diagrams'] ?? true) + || !\in_array($language, ['mermaid', 'chart']) + ) { + $infostring = \trim(\substr($line['text'], $openerLength), "\t "); + if (\strpos($infostring, '`') !== false) { + return null; + } + + $element = [ + 'name' => 'code', + 'text' => '', + ]; + + if ($infostring !== '') { + /** + * https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes + * Every HTML element may have a class attribute specified. + * The attribute, if specified, must have a value that is a set + * of space-separated tokens representing the various classes + * that the element belongs to. + * [...] + * The space characters, for the purposes of this specification, + * are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab), + * U+000A LINE FEED (LF), U+000C FORM FEED (FF), and + * U+000D CARRIAGE RETURN (CR). + */ + $language = \substr($infostring, 0, \strcspn($infostring, " \t\n\f\r")); + + $element['attributes'] = ['class' => "language-{$language}"]; + } + + return [ + 'char' => $marker, + 'openerLength' => $openerLength, + 'element' => [ + 'name' => 'pre', + 'element' => $element, + ], + ]; + } if (\strtolower($language) === 'mermaid') { // Mermaid.js https://mermaidjs.github.io @@ -1774,7 +2346,7 @@ class Markdown ]; } - return $block; + return null; } /** @@ -1792,10 +2364,6 @@ class Markdown return $block; } - if (!isset($block)) { - return null; - } - $headerElements = &$block['element']['elements'][0]['elements'][0]['elements']; $headerElementsCount = \count($headerElements); @@ -1987,7 +2555,7 @@ class Markdown protected function checkboxUnchecked(string $text) : string { if ($this->markupEscaped || $this->safeMode) { - $text = self::escape($text); + $text = \htmlspecialchars($text, \ENT_QUOTES, 'UTF-8'); } return ' ' . $this->formatOnce($text); @@ -2005,7 +2573,7 @@ class Markdown protected function checkboxChecked(string $text) : string { if ($this->markupEscaped || $this->safeMode) { - $text = self::escape($text); + $text = \htmlspecialchars($text, \ENT_QUOTES, 'UTF-8'); } return ' ' . $this->formatOnce($text); @@ -2027,15 +2595,15 @@ class Markdown $safeMode = $this->safeMode; // disable rules to prevent double escaping. - $this->setMarkupEscaped(false); - $this->setSafeMode(false); + $this->markupEscaped = false; + $this->safeMode = false; // format line - $text = $this->line($text); + $text = $this->elements($this->lineElements($text)); // reset old values - $this->setMarkupEscaped($markupEscaped); - $this->setSafeMode($safeMode); + $this->markupEscaped = $markupEscaped; + $this->safeMode = $safeMode; return $text; } @@ -2051,28 +2619,51 @@ class Markdown */ protected function parseAttributeData(string $attribute) : array { - return ($this->options['special_attributes'] ?? true) - ? $this->parseAttributeDataBase($attribute) - : []; + if (!($this->options['special_attributes'] ?? true)) { + return []; + } + + $data = []; + $attributes = \preg_split('/[ ]+/', $attribute, - 1, \PREG_SPLIT_NO_EMPTY); + $classes = []; + + foreach ($attributes as $attribute) { + if ($attribute[0] === '#') { + $data['id'] = \substr($attribute, 1); + } else { // "." + $classes[] = \substr($attribute, 1); + } + } + + if (!empty($classes)) { + $data['class'] = \implode(' ', $classes); + } + + return $data; } /** * Encodes the ToC tag to a hashed tag and replace. * * This is used to avoid parsing user defined ToC tag which includes "_" in - * their tag such as "[[_toc_]]". Unless it will be parsed as: - * "

[[TOC]]

" + * their tag such as "[[_toc_]]". + * + * @param string $text Tag text to encode + * + * @return string + * + * @since 1.0.0 */ - protected function encodeTagToHash($text) + protected function encodeToCTagToHash(string $text) : string { - $salt = $this->getSalt(); - $tagOrigin = $this->getTagToC(); + $salt = \bin2hex(\random_bytes(4)); + $tagOrigin = $this->options['toc']['set_toc_tag'] ?? '[toc]'; if (\strpos($text, $tagOrigin) === false) { return $text; } - $tagHashed = \hash('sha256', $salt.$tagOrigin); + $tagHashed = \hash('sha256', $salt . $tagOrigin); return \str_replace($tagOrigin, $tagHashed, $text); } @@ -2081,14 +2672,19 @@ class Markdown * Decodes the hashed ToC tag to an original tag and replaces. * * This is used to avoid parsing user defined ToC tag which includes "_" in - * their tag such as "[[_toc_]]". Unless it will be parsed as: - * "

[[TOC]]

" + * their tag such as "[[_toc_]]". + * + * @param string $text Tag text to encode + * + * @return string + * + * @since 1.0.0 */ - protected function decodeTagFromHash($text) + protected function decodeToCTagFromHash(string $text) : string { - $salt = $this->getSalt(); - $tagOrigin = $this->getTagToC(); - $tagHashed = \hash('sha256', $salt.$tagOrigin); + $salt = \bin2hex(\random_bytes(4)); + $tagOrigin = $this->options['toc']['set_toc_tag'] ?? '[toc]'; + $tagHashed = \hash('sha256', $salt . $tagOrigin); if (\strpos($text, $tagHashed) === false) { return $text; @@ -2098,49 +2694,17 @@ class Markdown } /** - * Unique string to use as a salt value. + * Generates an anchor text that are link-able even if the heading is not in ASCII. + * + * @param string $str Header text + * + * @return string + * + * @since 1.0.0 */ - protected function getSalt() + protected function createAnchorID(string $str) : string { - static $salt; - if (isset($salt)) { - return $salt; - } - - $salt = \hash('md5', (string) \time()); - - return $salt; - } - - /** - * Gets the markdown tag for ToC. - */ - protected function getTagToC() - { - return $this->options['toc']['set_toc_tag'] ?? '[toc]'; - } - - /** - * Gets the ID attribute of the ToC for HTML tags. - */ - protected function getIdAttributeToC() - { - if (!empty($this->idToc)) { - return $this->idToc; - } - - return 'toc'; - } - - /** - * Generates an anchor text that are link-able even if the heading is not in - * ASCII. - */ - protected function createAnchorID($str) : string - { - $optionUrlEncode = $this->options['toc']['urlencode'] ?? false; - if ($optionUrlEncode) { - // Check AnchorID is unique + if ($this->options['toc']['urlencode'] ?? false) { $str = $this->incrementAnchorId($str); return \urlencode($str); @@ -2212,8 +2776,7 @@ class Markdown ]; // Transliterate characters to ASCII - $optionTransliterate = $this->options['toc']['transliterate'] ?? false; - if ($optionTransliterate) { + if ($this->options['toc']['transliterate'] ?? false) { $str = \str_replace(\array_keys($charMap), $charMap, $str); } @@ -2222,7 +2785,7 @@ class Markdown $str = \preg_replace('/[^\p{L}\p{Nd}]+/u', $optionDelimiter, $str); // Remove duplicate delimiters - $str = \preg_replace('/('.\preg_quote($optionDelimiter, '/').'){2,}/', '$1', $str); + $str = \preg_replace('/(' . \preg_quote($optionDelimiter, '/') . '){2,}/', '$1', $str); // Truncate slug to max. characters $optionLimit = $this->options['toc']['limit'] ?? \mb_strlen($str, 'UTF-8'); @@ -2237,51 +2800,26 @@ class Markdown return $this->incrementAnchorId($str); } - /** - * Get only the text from a markdown string. - * It parses to HTML once then trims the tags to get the text. - */ - protected function fetchText($text) : string - { - return \trim(\strip_tags($this->line($text))); - } - /** * Set/stores the heading block to ToC list in a string and array format. + * + * @param array $content ToC content + * + * @return void + * + * @since 1.0.0 */ - protected function setContentsList(array $Content) : void + protected function setContentsList(array $content) : void { - // Stores as an array - $this->setContentsListAsArray($Content); - // Stores as string in markdown list format. - $this->setContentsListAsString($Content); - } + $this->contentsListArray[] = $content; - /** - * Sets/stores the heading block info as an array. - */ - protected function setContentsListAsArray(array $Content) : void - { - $this->contentsListArray[] = $Content; - } - - /** - * Sets/stores the heading block info as a list in markdown format. - */ - protected function setContentsListAsString(array $Content) : void - { - $text = $this->fetchText($Content['text']); - $id = $Content['id']; - $level = (int) \trim($Content['level'], 'h'); - $link = "[{$text}](#{$id})"; + $text = \trim(\strip_tags($this->elements($this->lineElements($content['text'])))); + $id = $content['id']; + $level = (int) \trim($content['level'], 'h'); if ($this->firstHeadLevel === 0) { $this->firstHeadLevel = $level; } - $cutIndent = $this->firstHeadLevel - 1; - $level = $cutIndent > $level ? 1 : $level - $cutIndent; - - $indent = \str_repeat(' ', $level); // Stores in markdown list format as below: // - [Header1](#Header1) @@ -2289,14 +2827,22 @@ class Markdown // - [Header3](#Header3) // - [Header2-2](#Header2-2) // ... - $this->contentsListString .= "{$indent}- {$link}".\PHP_EOL; + $this->contentsListString .= \str_repeat(' ', $this->firstHeadLevel - 1 > $level ? 1 : $level - ($this->firstHeadLevel - 1)) + . ' - ' . '[' . $text . '](#' . $id . ')' . \PHP_EOL; } /** - * Collect and count anchors in use to prevent duplicated ids. Return string - * with incremental, numeric suffix. Also init optional blacklist of ids. + * Collect and count anchors in use to prevent duplicated ids. + * + * Also init optional blacklist of ids. + * + * @param string $str Header anchor + * + * @return string Incremental, numeric suffix + * + * @since 1.0.0 */ - protected function incrementAnchorId($str) + protected function incrementAnchorId(string $str) : string { // add blacklist to list of used anchors if (!$this->isBlacklistInitialized) { @@ -2308,7 +2854,7 @@ class Markdown $newStr = $str; if ($count = $this->anchorDuplicates[$str]) { - $newStr .= "-{$count}"; + $newStr .= '-' . $count; // increment until conversion doesn't produce new duplicates anymore if (isset($this->anchorDuplicates[$newStr])) { @@ -2323,6 +2869,10 @@ class Markdown /** * Add blacklisted ids to anchor list. + * + * @return void + * + * @since 1.0.0 */ protected function initBlacklist() : void { @@ -2332,28 +2882,34 @@ class Markdown if (!empty($this->options['headings']['blacklist']) && \is_array($this->options['headings']['blacklist'])) { foreach ($this->options['headings']['blacklist'] as $v) { - if (\is_string($v)) { - $this->anchorDuplicates[$v] = 0; - } + $this->anchorDuplicates[$v] = 0; } } $this->isBlacklistInitialized = true; } - protected function lineElements($text, $nonNestables = []) + /** + * Parse inline elements + * + * @param string $text Text to parse + * @param array $nonNestables Inline elements that are not allowed to be nested + * + * @return array + * + * @since 1.0.0 + */ + protected function lineElements(string $text, array $nonNestables = []) : array { - $Elements = []; + $elements = []; - $nonNestables = empty($nonNestables) - ? [] - : \array_combine($nonNestables, $nonNestables); - - // $excerpt is based on the first occurrence of a marker - - while ($exc = \strpbrk($text, $this->inlineMarkerList)) { - $marker = $exc[0]; + if (!empty($nonNestables)) { + $nonNestables = \array_combine($nonNestables, $nonNestables); + } + // $exc is based on the first occurrence of a marker + while (($exc = \strpbrk($text, $this->inlineMarkerList)) !== false) { + $marker = $exc[0]; $markerPosition = \strlen($text) - \strlen($exc); // Get the first char before the marker @@ -2362,457 +2918,356 @@ class Markdown $excerpt = ['text' => $exc, 'context' => $text, 'before' => $charBeforeMarker]; - foreach ($this->InlineTypes[$marker] as $inlineType) { + foreach ($this->inlineTypes[$marker] as $inlineType) { // check to see if the current inline type is nestable in the current context - if (isset($nonNestables[$inlineType])) { continue; } - $Inline = $this->{"inline{$inlineType}"}($excerpt); - - if (!isset($Inline)) { + $inline = $this->{"inline{$inlineType}"}($excerpt); + if ($inline === null) { continue; } // makes sure that the inline belongs to "our" marker - - if (isset($Inline['position']) && $Inline['position'] > $markerPosition) { + if (isset($inline['position']) && $inline['position'] > $markerPosition) { continue; } // sets a default inline position - - if (!isset($Inline['position'])) { - $Inline['position'] = $markerPosition; + if (!isset($inline['position'])) { + $inline['position'] = $markerPosition; } // cause the new element to 'inherit' our non nestables - - $Inline['element']['nonNestables'] = isset($Inline['element']['nonNestables']) - ? \array_merge($Inline['element']['nonNestables'], $nonNestables) + $inline['element']['nonNestables'] = isset($inline['element']['nonNestables']) + ? \array_merge($inline['element']['nonNestables'], $nonNestables) : $nonNestables; // the text that comes before the inline - $unmarkedText = \substr($text, 0, $Inline['position']); + $unmarkedText = \substr($text, 0, $inline['position']); // compile the unmarked text - $InlineText = $this->inlineText($unmarkedText); - $Elements[] = $InlineText['element']; + $inlineText = $this->inlineText($unmarkedText); + $elements[] = $inlineText['element']; // compile the inline - $Elements[] = $this->extractElement($Inline); + $elements[] = $this->extractElement($inline); // remove the examined text - $text = \substr($text, $Inline['position'] + $Inline['extent']); + $text = \substr($text, $inline['position'] + $inline['extent']); continue 2; } // the marker does not belong to an inline - $unmarkedText = \substr($text, 0, $markerPosition + 1); - $InlineText = $this->inlineText($unmarkedText); - $Elements[] = $InlineText['element']; + $inlineText = $this->inlineText($unmarkedText); + $elements[] = $inlineText['element']; $text = \substr($text, $markerPosition + 1); } - $InlineText = $this->inlineText($text); - $Elements[] = $InlineText['element']; + $inlineText = $this->inlineText($text); + $elements[] = $inlineText['element']; - foreach ($Elements as &$Element) { - if (!isset($Element['autobreak'])) { - $Element['autobreak'] = false; + foreach ($elements as &$element) { + if (!isset($element['autobreak'])) { + $element['autobreak'] = false; } } - return $Elements; + return $elements; } - private function pregReplaceAssoc(array $replace, $subject) + /** + * Continue block footnote + * + * @param array{body:string, indent:int, text:string} $line Line data + * @param array $block Current block + * + * @return null|array + * + * @since 1.0.0 + */ + protected function blockFootnoteContinue(array $line, array $block) : ?array { - return \preg_replace(\array_keys($replace), \array_values($replace), $subject); - } - - // - // Blocks - // - - // - // Abbreviation - - protected function blockAbbreviationBase($Line) - { - if (\preg_match('/^\*\[(.+?)\]:[ ]*(.+?)[ ]*$/', $Line['text'], $matches)) - { - $this->definitionData['Abbreviation'][$matches[1]] = $matches[2]; - - return [ - 'hidden' => true, - ]; - } - } - - // - // Footnote - - protected function blockFootnoteBase($Line) - { - if (\preg_match('/^\[\^(.+?)\]:[ ]?(.*)$/', $Line['text'], $matches)) - { - return [ - 'label' => $matches[1], - 'text' => $matches[2], - 'hidden' => true, - ]; - } - } - - protected function blockFootnoteContinue($Line, $Block) - { - if ($Line['text'][0] === '[' && \preg_match('/^\[\^(.+?)\]:/', $Line['text'])) - { - return; + if ($line['text'][0] === '[' + && \preg_match('/^\[\^(.+?)\]:/', $line['text']) + ) { + return null; } - if (isset($Block['interrupted'])) - { - if ($Line['indent'] >= 4) - { - $Block['text'] .= "\n\n" . $Line['text']; + if (isset($block['interrupted'])) { + if ($line['indent'] >= 4) { + $block['text'] .= "\n\n" . $line['text']; - return $Block; + return $block; } } else { - $Block['text'] .= "\n" . $Line['text']; + $block['text'] .= "\n" . $line['text']; - return $Block; + return $block; } + + return null; } - protected function blockFootnoteComplete($Block) + /** + * Complete block footnote + * + * @param array $block Current block + * + * @return array + * + * @since 1.0.0 + */ + protected function blockFootnoteComplete(array $block) : array { - $this->definitionData['Footnote'][$Block['label']] = [ - 'text' => $Block['text'], + $this->definitionData['Footnote'][$block['label']] = [ + 'text' => $block['text'], 'count' => null, 'number' => null, ]; - return $Block; + return $block; } - // - // Definition List - - protected function blockDefinitionListBase($Line, $Block) + /** + * Continue block footnote + * + * @param array{body:string, indent:int, text:string} $line Line data + * @param array $block Current block + * + * @return null|array + * + * @since 1.0.0 + */ + protected function blockDefinitionListContinue(array $line, array $block) : ?array { - if (!isset($Block) || $Block['type'] !== 'Paragraph') - { - return; + if ($line['text'][0] === ':') { + return $this->addDdElement($line, $block); } - $Element = [ - 'name' => 'dl', - 'elements' => [], + if (isset($block['interrupted']) && $line['indent'] === 0) { + return null; + } + + if (isset($block['interrupted'])) { + $block['dd']['handler']['function'] = 'textElements'; + $block['dd']['handler']['argument'] .= "\n\n"; + + $block['dd']['handler']['destination'] = 'elements'; + + unset($block['interrupted']); + } + + $text = \substr($line['body'], \min($line['indent'], 4)); + + $block['dd']['handler']['argument'] .= "\n" . $text; + + return $block; + } + + /** + * Continue block markup + * + * @param array{body:string, indent:int, text:string} $line Line data + * @param array $block Current block + * + * @return null|array + * + * @since 1.0.0 + */ + protected function blockMarkupContinue(array $line, array $block) : ?array + { + if (isset($block['closed'])) { + return null; + } + + if (\preg_match('/^<' . $block['name'] . '(?:[ ]*' . $this->regexHtmlAttribute . ')*[ ]*>/i', $line['text'])) { + // open + ++$block['depth']; + } + + if (\preg_match('/(.*?)<\/' . $block['name'] . '>[ ]*$/i', $line['text'], $matches)) { + // close + if ($block['depth'] > 0) { + --$block['depth']; + } else { + $block['closed'] = true; + } + } + + if (isset($block['interrupted'])) { + $block['element']['rawHtml'] .= "\n"; + unset($block['interrupted']); + } + + $block['element']['rawHtml'] .= "\n".$line['body']; + + return $block; + } + + /** + * Complete block markup + * + * @param array $block Current block + * + * @return array + * + * @since 1.0.0 + */ + protected function blockMarkupComplete(array $block) : array + { + if (!isset($block['void'])) { + $block['element']['rawHtml'] = $this->processTag($block['element']['rawHtml']); + } + + return $block; + } + + /** + * Handle footnote marker + * + * @param array{text:string, context:string, before:string} $excerpt Inline data + * + * @return null|array{extent:int, element:array} + * + * @since 1.0.0 + */ + protected function inlineFootnoteMarker(array $excerpt) : ?array + { + if (\preg_match('/^\[\^(.+?)\]/', $excerpt['text'], $matches) !== 1) { + return null; + } + + $name = $matches[1]; + + if (!isset($this->definitionData['Footnote'][$name])) { + return null; + } + + ++$this->definitionData['Footnote'][$name]['count']; + + if (!isset($this->definitionData['Footnote'][$name]['number'])) + { + $this->definitionData['Footnote'][$name]['number'] = ++ $this->footnoteCount; // » & + } + + $element = [ + 'name' => 'sup', + 'attributes' => ['id' => 'fnref' . $this->definitionData['Footnote'][$name]['count'] . ':' . $name], + 'element' => [ + 'name' => 'a', + 'attributes' => ['href' => '#fn:' . $name, 'class' => 'footnote-ref'], + 'text' => $this->definitionData['Footnote'][$name]['number'], + ], ]; - $terms = \explode("\n", $Block['element']['handler']['argument']); - - foreach ($terms as $term) - { - $Element['elements'][] = [ - 'name' => 'dt', - 'handler' => [ - 'function' => 'lineElements', - 'argument' => $term, - 'destination' => 'elements', - ], - ]; - } - - $Block['element'] = $Element; - - return $this->addDdElement($Line, $Block); + return [ + 'extent' => \strlen($matches[0]), + 'element' => $element, + ]; } - protected function blockDefinitionListContinue($Line, array $Block) + /** + * Insert/replace text with abbreviation + * + * @param array $element Element to insert abbreviation into + * + * @return array + * + * @since 1.0.0 + */ + protected function insertAbreviation(array $element) : array { - if ($Line['text'][0] === ':') - { - return $this->addDdElement($Line, $Block); - } else { - if (isset($Block['interrupted']) && $Line['indent'] === 0) - { - return; - } - - if (isset($Block['interrupted'])) - { - $Block['dd']['handler']['function'] = 'textElements'; - $Block['dd']['handler']['argument'] .= "\n\n"; - - $Block['dd']['handler']['destination'] = 'elements'; - - unset($Block['interrupted']); - } - - $text = \substr($Line['body'], \min($Line['indent'], 4)); - - $Block['dd']['handler']['argument'] .= "\n" . $text; - - return $Block; - } - } - - // - // Header - - protected function blockHeaderBase($Line) - { - $Block = $this->blockHeaderParent($Line); - - if ($Block !== null && \preg_match('/[ #]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['handler']['argument'], $matches, \PREG_OFFSET_CAPTURE)) - { - $attributeString = $matches[1][0]; - - $Block['element']['attributes'] = $this->parseAttributeData($attributeString); - - $Block['element']['handler']['argument'] = \substr($Block['element']['handler']['argument'], 0, (int) $matches[0][1]); + if (!isset($element['text'])) { + return $element; } - return $Block; - } - - // - // Markup - - protected function blockMarkupBase($Line) - { - if ($this->markupEscaped || $this->safeMode) - { - return; - } - - if (\preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) - { - $element = \strtolower($matches[1]); - - if (\in_array($element, $this->textLevelElements)) - { - return; - } - - $Block = [ - 'name' => $matches[1], - 'depth' => 0, - 'element' => [ - 'rawHtml' => $Line['text'], - 'autobreak' => true, - ], - ]; - - $length = \strlen($matches[0]); - $remainder = \substr($Line['text'], $length); - - if (\trim($remainder) === '') - { - if (isset($matches[2]) || \in_array($matches[1], $this->voidElements)) - { - $Block['closed'] = true; - $Block['void'] = true; - } - } - else - { - if (isset($matches[2]) || \in_array($matches[1], $this->voidElements)) - { - return; - } - if (\preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder)) - { - $Block['closed'] = true; - } - } - - return $Block; - } - } - - protected function blockMarkupContinue($Line, array $Block) - { - if (isset($Block['closed'])) - { - return; - } - - if (\preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) // open - { - ++$Block['depth']; - } - - if (\preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) // close - { - if ($Block['depth'] > 0) - { - --$Block['depth']; - } - else - { - $Block['closed'] = true; - } - } - - if (isset($Block['interrupted'])) - { - $Block['element']['rawHtml'] .= "\n"; - unset($Block['interrupted']); - } - - $Block['element']['rawHtml'] .= "\n".$Line['body']; - - return $Block; - } - - protected function blockMarkupComplete($Block) - { - if (!isset($Block['void'])) - { - $Block['element']['rawHtml'] = $this->processTag($Block['element']['rawHtml']); - } - - return $Block; - } - - // - // Setext - - protected function blockSetextHeaderBase($Line, array $Block = null) - { - $Block = $this->blockSetextHeaderParent($Line, $Block); - - if ($Block !== null && \preg_match('/[ ]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['handler']['argument'], $matches, \PREG_OFFSET_CAPTURE)) - { - $attributeString = $matches[1][0]; - - $Block['element']['attributes'] = $this->parseAttributeData($attributeString); - - $Block['element']['handler']['argument'] = \substr($Block['element']['handler']['argument'], 0, (int) $matches[0][1]); - } - - return $Block; - } - - // - // Inline Elements - // - - // - // Footnote Marker - - protected function inlineFootnoteMarker($Excerpt) - { - if (\preg_match('/^\[\^(.+?)\]/', $Excerpt['text'], $matches)) - { - $name = $matches[1]; - - if (!isset($this->definitionData['Footnote'][$name])) - { - return; - } - - ++$this->definitionData['Footnote'][$name]['count']; - - if (!isset($this->definitionData['Footnote'][$name]['number'])) - { - $this->definitionData['Footnote'][$name]['number'] = ++ $this->footnoteCount; // » & - } - - $Element = [ - 'name' => 'sup', - 'attributes' => ['id' => 'fnref'.$this->definitionData['Footnote'][$name]['count'].':'.$name], - 'element' => [ - 'name' => 'a', - 'attributes' => ['href' => '#fn:'.$name, 'class' => 'footnote-ref'], - 'text' => $this->definitionData['Footnote'][$name]['number'], - ], - ]; - - return [ - 'extent' => \strlen($matches[0]), - 'element' => $Element, - ]; - } - } - - private $footnoteCount = 0; - - // - // ~ - // - - private $currentAbreviation; - - private $currentMeaning; - - protected function insertAbreviation(array $Element) - { - if (isset($Element['text'])) - { - $Element['elements'] = self::pregReplaceElements( - '/\b'.\preg_quote($this->currentAbreviation, '/').'\b/', + $element['elements'] = self::pregReplaceElements( + '/\b' . \preg_quote($this->currentAbreviation, '/') . '\b/', + [ [ - [ - 'name' => 'abbr', - 'attributes' => [ - 'title' => $this->currentMeaning, - ], - 'text' => $this->currentAbreviation, + 'name' => 'abbr', + 'attributes' => [ + 'title' => $this->currentMeaning, ], + 'text' => $this->currentAbreviation, ], - $Element['text'] - ); + ], + $element['text'] + ); - unset($Element['text']); - } + unset($element['text']); - return $Element; + return $element; } - protected function inlineText($text) + /** + * Inline elements in text + * + * @param string $text Text to search for inlinable elements + * + * @return array + * + * @since 1.0.0 + */ + protected function inlineText(string $text) : array { - $Inline = $this->inlineTextParent($text); + $inline = [ + 'extent' => \strlen($text), + 'element' => [], + ]; - if (isset($this->definitionData['Abbreviation'])) + $inline['element']['elements'] = self::pregReplaceElements( + $this->breaksEnabled ? '/[ ]*+\n/' : '/(?:[ ]*+\\\\|[ ]{2,}+)\n/', + [ + ['name' => 'br'], + ['text' => "\n"], + ], + $text + ); + + // Handle abbreviations + if (!isset($this->definitionData['Abbreviation'])) { + return $inline; + } + + foreach ($this->definitionData['Abbreviation'] as $abbreviation => $meaning) { - foreach ($this->definitionData['Abbreviation'] as $abbreviation => $meaning) - { - $this->currentAbreviation = $abbreviation; - $this->currentMeaning = $meaning; + $this->currentAbreviation = $abbreviation; + $this->currentMeaning = $meaning; - $Inline['element'] = $this->elementApplyRecursiveDepthFirst( - [$this, 'insertAbreviation'], - $Inline['element'] - ); - } + $inline['element'] = $this->elementApplyRecursiveDepthFirst( + [$this, 'insertAbreviation'], + $inline['element'] + ); } - return $Inline; + return $inline; } - // - // Util Methods - // - - protected function addDdElement(array $Line, array $Block) + /** + * Handle block list + * + * @param array $line Line data + * @param array $block Current block + * + * @return array + * + * @since 1.0.0 + */ + protected function addDdElement(array $line, array $block) : array { - $text = \substr($Line['text'], 1); + $text = \substr($line['text'], 1); $text = \trim($text); - unset($Block['dd']); + unset($block['dd']); - $Block['dd'] = [ + $block['dd'] = [ 'name' => 'dd', 'handler' => [ 'function' => 'lineElements', @@ -2821,21 +3276,27 @@ class Markdown ], ]; - if (isset($Block['interrupted'])) - { - $Block['dd']['handler']['function'] = 'textElements'; + if (isset($block['interrupted'])) { + $block['dd']['handler']['function'] = 'textElements'; - unset($Block['interrupted']); + unset($block['interrupted']); } - $Block['element']['elements'][] = & $Block['dd']; + $block['element']['elements'][] = &$block['dd']; - return $Block; + return $block; } - protected function buildFootnoteElement() + /** + * Create footnotes + * + * @return array + * + * @since 1.0.0 + */ + protected function buildFootnoteElement() : array { - $Element = [ + $element = [ 'name' => 'div', 'attributes' => ['class' => 'footnotes'], 'elements' => [ @@ -2849,23 +3310,17 @@ class Markdown \uasort($this->definitionData['Footnote'], 'self::sortFootnotes'); - foreach ($this->definitionData['Footnote'] as $definitionId => $definitionData) - { - if (!isset($definitionData['number'])) - { + foreach ($this->definitionData['Footnote'] as $definitionId => $definitionData) { + if (!isset($definitionData['number'])) { continue; } - $text = $definitionData['text']; - - $textElements = $this->textElements($text); - - $numbers = \range(1, $definitionData['count']); - + $text = $definitionData['text']; + $textElements = $this->textElements($text); + $numbers = \range(1, $definitionData['count']); $backLinkElements = []; - foreach ($numbers as $number) - { + foreach ($numbers as $number) { $backLinkElements[] = ['text' => ' ']; $backLinkElements[] = [ 'name' => 'a', @@ -2884,8 +3339,7 @@ class Markdown $n = \count($textElements) - 1; - if ($textElements[$n]['name'] === 'p') - { + if ($textElements[$n]['name'] === 'p') { $backLinkElements = \array_merge( [ [ @@ -2905,122 +3359,105 @@ class Markdown $backLinkElements ), ]; - } - else - { + } else { $textElements[] = [ 'name' => 'p', 'elements' => $backLinkElements, ]; } - $Element['elements'][1]['elements'][] = [ + $element['elements'][1]['elements'][] = [ 'name' => 'li', - 'attributes' => ['id' => 'fn:'.$definitionId], + 'attributes' => ['id' => 'fn:' . $definitionId], 'elements' => \array_merge( $textElements ), ]; } - return $Element; + return $element; } - // ~ - - protected function parseAttributeDataBase($attributeString) - { - $Data = []; - - $attributes = \preg_split('/[ ]+/', $attributeString, - 1, \PREG_SPLIT_NO_EMPTY); - - foreach ($attributes as $attribute) - { - if ($attribute[0] === '#') - { - $Data['id'] = \substr($attribute, 1); - } - else // "." - { - $classes [] = \substr($attribute, 1); - } - } - - if (isset($classes)) - { - $Data['class'] = \implode(' ', $classes); - } - - return $Data; - } - - // ~ - - protected function processTag($elementMarkup) // recursive + /** + * Handle markup/html. + * + * Ensures that html is well formed. + * + * This function is called recursively + * + * @param string $elementMarkup Markup + * + * @return string + * + * @since 1.0.0 + */ + protected function processTag(string $elementMarkup) : string { // http://stackoverflow.com/q/1148928/200145 \libxml_use_internal_errors(true); - $DOMDocument = new \DOMDocument(); + $dom = new \DOMDocument(); // http://stackoverflow.com/q/11309194/200145 $elementMarkup = \mb_convert_encoding($elementMarkup, 'HTML-ENTITIES', 'UTF-8'); // http://stackoverflow.com/q/4879946/200145 - $DOMDocument->loadHTML($elementMarkup); - $DOMDocument->removeChild($DOMDocument->doctype); - $DOMDocument->replaceChild($DOMDocument->firstChild->firstChild->firstChild, $DOMDocument->firstChild); + $dom->loadHTML($elementMarkup); + $dom->removeChild($dom->doctype); + $dom->replaceChild($dom->firstChild->firstChild->firstChild, $dom->firstChild); $elementText = ''; - if ($DOMDocument->documentElement->getAttribute('markdown') === '1') - { - foreach ($DOMDocument->documentElement->childNodes as $Node) - { - $elementText .= $DOMDocument->saveHTML($Node); + if ($dom->documentElement->getAttribute('markdown') === '1') { + foreach ($dom->documentElement->childNodes as $node) { + $elementText .= $dom->saveHTML($node); } - $DOMDocument->documentElement->removeAttribute('markdown'); + $dom->documentElement->removeAttribute('markdown'); - $elementText = "\n".$this->text($elementText)."\n"; + $elementText = "\n" . $this->text($elementText) . "\n"; } else { - foreach ($DOMDocument->documentElement->childNodes as $Node) - { - $nodeMarkup = $DOMDocument->saveHTML($Node); - - if ($Node instanceof \DOMElement && ! \in_array($Node->nodeName, $this->textLevelElements)) - { - $elementText .= $this->processTag($nodeMarkup); - } - else - { - $elementText .= $nodeMarkup; - } + foreach ($dom->documentElement->childNodes as $node) { + $nodeMarkup = $dom->saveHTML($node); + $elementText .= $node instanceof \DOMElement && !\in_array($node->nodeName, $this->textLevelElements) + ? $this->processTag($nodeMarkup) + : $nodeMarkup; } } // because we don't want for markup to get encoded - $DOMDocument->documentElement->nodeValue = 'placeholder\x1A'; + $dom->documentElement->nodeValue = 'placeholder\x1A'; - $markup = $DOMDocument->saveHTML($DOMDocument->documentElement); + $markup = $dom->saveHTML($dom->documentElement); return \str_replace('placeholder\x1A', $elementText, $markup); } - // ~ - - protected function sortFootnotes($A, $B) // callback + /** + * Footnote sort function + * + * @param array $a First element + * @param array $b Second element + * + * @return int + * + * @since 1.0.0 + */ + protected function sortFootnotes(array $a, array $b) : int { - return $A['number'] - $B['number']; + return $a['number'] <=> $b['number']; } - // - // Fields - // - - protected $regexAttribute = '(?:[#.][-\w]+[ ]*)'; - - protected function textElements($text) + /** + * Parse text elements to lines and then handle lines. + * + * @param string $text Text to parse + * + * @return array + * + * @since 1.0.0 + */ + protected function textElements(string $text) : array { // make sure no definitions are set $this->definitionData = []; @@ -3038,128 +3475,25 @@ class Markdown return $this->linesElements($lines); } - // - // Setters - // - - public function setBreaksEnabled($breaksEnabled) + /** + * Handle lines of elements + * + * @param string[] $lines Lines to parse + * + * @return array + * + * @since 1.0.0 + */ + protected function linesElements(array $lines) : array { - $this->breaksEnabled = $breaksEnabled; - - return $this; - } - - protected $breaksEnabled; - - public function setMarkupEscaped($markupEscaped) - { - $this->markupEscaped = $markupEscaped; - - return $this; - } - - protected $markupEscaped; - - public function setUrlsLinked($urlsLinked) - { - $this->urlsLinked = $urlsLinked; - - return $this; - } - - protected $urlsLinked = true; - - public function setSafeMode($safeMode) - { - $this->safeMode = (bool) $safeMode; - - return $this; - } - - protected $safeMode; - - public function setStrictMode($strictMode) - { - $this->strictMode = (bool) $strictMode; - - return $this; - } - - protected $strictMode; - - protected $safeLinksWhitelist = [ - 'http://', - 'https://', - 'ftp://', - 'ftps://', - 'mailto:', - 'tel:', - 'data:image/png;base64,', - 'data:image/gif;base64,', - 'data:image/jpeg;base64,', - 'irc:', - 'ircs:', - 'git:', - 'ssh:', - 'news:', - 'steam:', - ]; - - // - // Lines - // - - protected $BlockTypes = [ - '#' => ['Header'], - '*' => ['Rule', 'List', 'Abbreviation'], - '+' => ['List'], - '-' => ['SetextHeader', 'Table', 'Rule', 'List'], - '0' => ['List'], - '1' => ['List'], - '2' => ['List'], - '3' => ['List'], - '4' => ['List'], - '5' => ['List'], - '6' => ['List'], - '7' => ['List'], - '8' => ['List'], - '9' => ['List'], - ':' => ['Table', 'DefinitionList'], - '<' => ['Comment', 'Markup'], - '=' => ['SetextHeader'], - '>' => ['Quote'], - '[' => ['Footnote', 'Reference'], - '_' => ['Rule'], - '`' => ['FencedCode'], - '|' => ['Table'], - '~' => ['FencedCode'], - ]; - - // ~ - - protected $unmarkedBlockTypes = [ - 'Code', - ]; - - // - // Blocks - // - - protected function lines(array $lines) - { - return $this->elements($this->linesElements($lines)); - } - - protected function linesElements(array $lines) - { - $Elements = []; - $CurrentBlock = null; + $elements = []; + $currentBlock = null; foreach ($lines as $line) { if (\rtrim($line) === '') { - if (isset($CurrentBlock)) { - $CurrentBlock['interrupted'] = (isset($CurrentBlock['interrupted']) - ? $CurrentBlock['interrupted'] + 1 : 1 + if (isset($currentBlock)) { + $currentBlock['interrupted'] = (isset($currentBlock['interrupted']) + ? $currentBlock['interrupted'] + 1 : 1 ); } @@ -3175,433 +3509,309 @@ class Markdown } $indent = \strspn($line, ' '); + $text = $indent > 0 ? \substr($line, $indent) : $line; + $line = ['body' => $line, 'indent' => $indent, 'text' => $text]; - $text = $indent > 0 ? \substr($line, $indent) : $line; + if (isset($currentBlock['continuable'])) { + $methodName = 'block' . $currentBlock['type'] . 'Continue'; + $block = $this->{$methodName}($line, $currentBlock); - $Line = ['body' => $line, 'indent' => $indent, 'text' => $text]; - - if (isset($CurrentBlock['continuable'])) { - $methodName = 'block' . $CurrentBlock['type'] . 'Continue'; - $Block = $this->{$methodName}($Line, $CurrentBlock); - - if (isset($Block)) { - $CurrentBlock = $Block; + if (isset($block)) { + $currentBlock = $block; continue; - } elseif ($this->isBlockCompletable($CurrentBlock['type'])) { - $methodName = 'block' . $CurrentBlock['type'] . 'Complete'; - $CurrentBlock = $this->{$methodName}($CurrentBlock); + } elseif ($this->isBlockCompletable($currentBlock['type'])) { + $methodName = 'block' . $currentBlock['type'] . 'Complete'; + $currentBlock = $this->{$methodName}($currentBlock); } } - $marker = $text[0]; - + $marker = $text[0]; $blockTypes = $this->unmarkedBlockTypes; - if (isset($this->BlockTypes[$marker])) { - foreach ($this->BlockTypes[$marker] as $blockType) { + if (isset($this->blockTypes[$marker])) { + foreach ($this->blockTypes[$marker] as $blockType) { $blockTypes [] = $blockType; } } foreach ($blockTypes as $blockType) { - $Block = $this->{"block{$blockType}"}($Line, $CurrentBlock); + $block = $this->{"block{$blockType}"}($line, $currentBlock); - if (isset($Block)) { - $Block['type'] = $blockType; + if (isset($block)) { + $block['type'] = $blockType; - if (!isset($Block['identified'])) { - if (isset($CurrentBlock)) { - $Elements[] = $this->extractElement($CurrentBlock); + if (!isset($block['identified'])) { + if (isset($currentBlock)) { + $elements[] = $this->extractElement($currentBlock); } - $Block['identified'] = true; + $block['identified'] = true; } if ($this->isBlockContinuable($blockType)) { - $Block['continuable'] = true; + $block['continuable'] = true; } - $CurrentBlock = $Block; + $currentBlock = $block; continue 2; } } - if (isset($CurrentBlock) && $CurrentBlock['type'] === 'Paragraph') { - $Block = $this->paragraphContinue($Line, $CurrentBlock); + if (isset($currentBlock) && $currentBlock['type'] === 'Paragraph') { + $block = $this->paragraphContinue($line, $currentBlock); } - if (isset($Block)) { - $CurrentBlock = $Block; + if (isset($block)) { + $currentBlock = $block; } else { - if (isset($CurrentBlock)) { - $Elements[] = $this->extractElement($CurrentBlock); + if (isset($currentBlock)) { + $elements[] = $this->extractElement($currentBlock); } - $CurrentBlock = $this->paragraph($Line); - - $CurrentBlock['identified'] = true; - } - } - - if (isset($CurrentBlock['continuable']) && $this->isBlockCompletable($CurrentBlock['type'])) { - $methodName = 'block' . $CurrentBlock['type'] . 'Complete'; - $CurrentBlock = $this->{$methodName}($CurrentBlock); - } - - if (isset($CurrentBlock)) { - $Elements[] = $this->extractElement($CurrentBlock); - } - - return $Elements; - } - - protected function extractElement(array $Component) - { - if (!isset($Component['element'])) - { - if (isset($Component['markup'])) { - $Component['element'] = ['rawHtml' => $Component['markup']]; - } elseif (isset($Component['hidden'])) { - $Component['element'] = []; - } - } - - return $Component['element']; - } - - protected function isBlockContinuable($Type) : bool - { - return \method_exists($this, 'block' . $Type . 'Continue'); - } - - protected function isBlockCompletable($Type) : bool - { - return \method_exists($this, 'block' . $Type . 'Complete'); - } - - protected function blockCodeBase($Line, $Block = null) - { - if (isset($Block) && $Block['type'] === 'Paragraph' && !isset($Block['interrupted'])) { - return; - } - - if ($Line['indent'] >= 4) - { - $text = \substr($Line['body'], 4); - - return [ - 'element' => [ - 'name' => 'pre', + $currentBlock = [ + 'type' => 'Paragraph', 'element' => [ - 'name' => 'code', - 'text' => $text, + 'name' => 'p', + 'handler' => [ + 'function' => 'lineElements', + 'argument' => $line['text'], + 'destination' => 'elements', + ], ], - ], - ]; - } - } + ]; - protected function blockCodeContinue($Line, $Block) - { - if ($Line['indent'] >= 4) - { - if (isset($Block['interrupted'])) - { - $Block['element']['element']['text'] .= \str_repeat("\n", $Block['interrupted']); - - unset($Block['interrupted']); + $currentBlock['identified'] = true; } - - $Block['element']['element']['text'] .= "\n"; - - $text = \substr($Line['body'], 4); - - $Block['element']['element']['text'] .= $text; - - return $Block; - } - } - - protected function blockCodeComplete($Block) - { - return $Block; - } - - // - // Comment - - protected function blockCommentBase($Line) - { - if ($this->markupEscaped || $this->safeMode) - { - return; } - if (\str_starts_with($Line['text'], '') !== false) - { - $Block['closed'] = true; + if (isset($currentBlock)) { + $elements[] = $this->extractElement($currentBlock); + } + + return $elements; + } + + /** + * Extract element from block + * + * @param array $block Block + * + * @return array + * + * @since 1.0.0 + */ + protected function extractElement(array $block) : array + { + if (!isset($block['element'])) { + if (isset($block['markup'])) { + $block['element'] = ['rawHtml' => $block['markup']]; + } elseif (isset($block['hidden'])) { + $block['element'] = []; } - - return $Block; } + + return $block['element']; } - protected function blockCommentContinue($Line, array $Block) + /** + * Can the block continue? + * + * @param string $type Block type + * + * @return bool + * + * @todo Consider to create lookup table + * + * @since 1.0.0 + */ + protected function isBlockContinuable(string $type) : bool { - if (isset($Block['closed'])) - { - return; - } - - $Block['element']['rawHtml'] .= "\n" . $Line['body']; - - if (\strpos($Line['text'], '-->') !== false) - { - $Block['closed'] = true; - } - - return $Block; + return \method_exists($this, 'block' . $type . 'Continue'); } - // - // Fenced Code - - protected function blockFencedCodeBase($Line) + /** + * Is the block finished + * + * @param string $type Block type + * + * @return bool + * + * @todo Consider to create lookup table + * + * @since 1.0.0 + */ + protected function isBlockCompletable($type) : bool { - $marker = $Line['text'][0]; - - $openerLength = \strspn($Line['text'], $marker); - - if ($openerLength < 3) - { - return; - } - - $infostring = \trim(\substr($Line['text'], $openerLength), "\t "); - - if (\strpos($infostring, '`') !== false) - { - return; - } - - $Element = [ - 'name' => 'code', - 'text' => '', - ]; - - if ($infostring !== '') - { - /** - * https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes - * Every HTML element may have a class attribute specified. - * The attribute, if specified, must have a value that is a set - * of space-separated tokens representing the various classes - * that the element belongs to. - * [...] - * The space characters, for the purposes of this specification, - * are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab), - * U+000A LINE FEED (LF), U+000C FORM FEED (FF), and - * U+000D CARRIAGE RETURN (CR). - */ - $language = \substr($infostring, 0, \strcspn($infostring, " \t\n\f\r")); - - $Element['attributes'] = ['class' => "language-{$language}"]; - } - - return [ - 'char' => $marker, - 'openerLength' => $openerLength, - 'element' => [ - 'name' => 'pre', - 'element' => $Element, - ], - ]; + return \method_exists($this, 'block' . $type . 'Complete'); } - protected function blockFencedCodeContinue($Line, $Block) + /** + * Continue block code + * + * @param array{body:string, indent:int, text:string} $line Line data + * @param array $block Current block + * + * @return null|array + * + * @since 1.0.0 + */ + protected function blockCodeContinue(array $line, array $block) : ?array { - if (isset($Block['complete'])) - { - return; - } - - if (isset($Block['interrupted'])) - { - $Block['element']['element']['text'] .= \str_repeat("\n", $Block['interrupted']); - - unset($Block['interrupted']); - } - - if (($len = \strspn($Line['text'], $Block['char'])) >= $Block['openerLength'] - && \rtrim(\substr($Line['text'], $len), ' ') === '' - ) { - $Block['element']['element']['text'] = \substr($Block['element']['element']['text'], 1); - - $Block['complete'] = true; - - return $Block; - } - - $Block['element']['element']['text'] .= "\n" . $Line['body']; - - return $Block; - } - - protected function blockFencedCodeComplete($Block) - { - return $Block; - } - - // - // Header - - protected function blockHeaderParent($Line) - { - $level = \strspn($Line['text'], '#'); - - if ($level > 6) - { - return; - } - - $text = \trim($Line['text'], '#'); - - if ($this->strictMode && isset($text[0]) && $text[0] !== ' ') - { - return; - } - - $text = \trim($text, ' '); - - return [ - 'element' => [ - 'name' => 'h' . $level, - 'handler' => [ - 'function' => 'lineElements', - 'argument' => $text, - 'destination' => 'elements', - ], - ], - ]; - } - - // - // List - - protected function blockListBase($Line, array $CurrentBlock = null) - { - list($name, $pattern) = $Line['text'][0] <= '-' ? ['ul', '[*+-]'] : ['ol', '[0-9]{1,9}+[.\)]']; - - if (\preg_match('/^('.$pattern.'([ ]++|$))(.*+)/', $Line['text'], $matches)) - { - $contentIndent = \strlen($matches[2]); - - if ($contentIndent >= 5) - { - --$contentIndent; - $matches[1] = \substr($matches[1], 0, -$contentIndent); - $matches[3] = \str_repeat(' ', $contentIndent) . $matches[3]; - } - elseif ($contentIndent === 0) - { - $matches[1] .= ' '; - } - - $markerWithoutWhitespace = \strstr($matches[1], ' ', true); - - $Block = [ - 'indent' => $Line['indent'], - 'pattern' => $pattern, - 'data' => [ - 'type' => $name, - 'marker' => $matches[1], - 'markerType' => ($name === 'ul' ? $markerWithoutWhitespace : \substr($markerWithoutWhitespace, -1)), - ], - 'element' => [ - 'name' => $name, - 'elements' => [], - ], - ]; - $Block['data']['markerTypeRegex'] = \preg_quote($Block['data']['markerType'], '/'); - - if ($name === 'ol') - { - $listStart = \ltrim(\strstr($matches[1], $Block['data']['markerType'], true), '0') ?: '0'; - - if ($listStart !== '1') - { - if (isset($CurrentBlock) - && $CurrentBlock['type'] === 'Paragraph' - && !isset($CurrentBlock['interrupted']) - ) { - return; - } - - $Block['element']['attributes'] = ['start' => $listStart]; - } - } - - $Block['li'] = [ - 'name' => 'li', - 'handler' => [ - 'function' => 'li', - 'argument' => empty($matches[3]) ? [] : [$matches[3]], - 'destination' => 'elements', - ], - ]; - - $Block['element']['elements'][] = & $Block['li']; - - return $Block; - } - } - - protected function blockListContinue($Line, array $Block) - { - if (isset($Block['interrupted']) && empty($Block['li']['handler']['argument'])) - { + if ($line['indent'] < 4) { return null; } - $requiredIndent = ($Block['indent'] + \strlen($Block['data']['marker'])); + if (isset($block['interrupted'])) { + $block['element']['element']['text'] .= \str_repeat("\n", $block['interrupted']); - if ($Line['indent'] < $requiredIndent - && ( - ( - $Block['data']['type'] === 'ol' - && \preg_match('/^[0-9]++'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches) - ) || ( - $Block['data']['type'] === 'ul' - && \preg_match('/^'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches) - ) - ) + unset($block['interrupted']); + } + + $block['element']['element']['text'] .= "\n"; + + $text = \substr($line['body'], 4); + + $block['element']['element']['text'] .= $text; + + return $block; + } + + /** + * Complete block code + * + * @param array $block Current block + * + * @return array + * + * @since 1.0.0 + */ + protected function blockCodeComplete(array $block) : array + { + return $block; + } + + /** + * Continue block code + * + * @param array{body:string, indent:int, text:string} $line Line data + * @param array $block Current block + * + * @return null|array + * + * @since 1.0.0 + */ + protected function blockCommentContinue(array $line, array $block) : ?array + { + if (isset($block['closed'])) { + return null; + } + + $block['element']['rawHtml'] .= "\n" . $line['body']; + + if (\strpos($line['text'], '-->') !== false) { + $block['closed'] = true; + } + + return $block; + } + + /** + * Continue block fenced code + * + * @param array{body:string, indent:int, text:string} $line Line data + * @param array $block Current block + * + * @return null|array + * + * @since 1.0.0 + */ + protected function blockFencedCodeContinue(array $line, array $block) : ?array + { + if (isset($block['complete'])) { + return null; + } + + if (isset($block['interrupted'])) { + $block['element']['element']['text'] .= \str_repeat("\n", $block['interrupted']); + + unset($block['interrupted']); + } + + if (($len = \strspn($line['text'], $block['char'])) >= $block['openerLength'] + && \rtrim(\substr($line['text'], $len), ' ') === '' ) { - if (isset($Block['interrupted'])) - { - $Block['li']['handler']['argument'][] = ''; + $block['element']['element']['text'] = \substr($block['element']['element']['text'], 1); - $Block['loose'] = true; + $block['complete'] = true; - unset($Block['interrupted']); + return $block; + } + + $block['element']['element']['text'] .= "\n" . $line['body']; + + return $block; + } + + /** + * Complete block fenced code + * + * @param array $block Current block + * + * @return array + * + * @since 1.0.0 + */ + protected function blockFencedCodeComplete(array $block) : array + { + return $block; + } + + /** + * Continue block list + * + * @param array{body:string, indent:int, text:string} $line Line data + * @param array $block Current block + * + * @return null|array + * + * @since 1.0.0 + */ + protected function blockListContinue(array $line, array $block) : ?array + { + if (isset($block['interrupted']) && empty($block['li']['handler']['argument'])) { + return null; + } + + $requiredIndent = ($block['indent'] + \strlen($block['data']['marker'])); + + if ($line['indent'] < $requiredIndent + && (($block['data']['type'] === 'ol' + && \preg_match('/^[0-9]++' . $block['data']['markerTypeRegex'] . '(?:[ ]++(.*)|$)/', $line['text'], $matches) + ) || ($block['data']['type'] === 'ul' + && \preg_match('/^' . $block['data']['markerTypeRegex'] . '(?:[ ]++(.*)|$)/', $line['text'], $matches) + )) + ) { + if (isset($block['interrupted'])) { + $block['li']['handler']['argument'][] = ''; + + $block['loose'] = true; + + unset($block['interrupted']); } - unset($Block['li']); + unset($block['li']); $text = isset($matches[1]) ? $matches[1] : ''; - $Block['indent'] = $Line['indent']; + $block['indent'] = $line['indent']; - $Block['li'] = [ + $block['li'] = [ 'name' => 'li', 'handler' => [ 'function' => 'li', @@ -3610,529 +3820,192 @@ class Markdown ], ]; - $Block['element']['elements'][] = & $Block['li']; + $block['element']['elements'][] = &$block['li']; - return $Block; - } - elseif ($Line['indent'] < $requiredIndent && $this->blockList($Line)) - { + return $block; + } elseif ($line['indent'] < $requiredIndent && $this->blockList($line)) { return null; } - if ($Line['text'][0] === '[' && $this->blockReference($Line)) - { - return $Block; + if ($line['text'][0] === '[' && $this->blockReference($line)) { + return $block; } - if ($Line['indent'] >= $requiredIndent) - { - if (isset($Block['interrupted'])) - { - $Block['li']['handler']['argument'][] = ''; + if ($line['indent'] >= $requiredIndent) { + if (isset($block['interrupted'])) { + $block['li']['handler']['argument'][] = ''; - $Block['loose'] = true; + $block['loose'] = true; - unset($Block['interrupted']); + unset($block['interrupted']); } - $text = \substr($Line['body'], $requiredIndent); + $text = \substr($line['body'], $requiredIndent); - $Block['li']['handler']['argument'][] = $text; + $block['li']['handler']['argument'][] = $text; - return $Block; + return $block; } - if (!isset($Block['interrupted'])) - { - $text = \preg_replace('/^[ ]{0,'.$requiredIndent.'}+/', '', $Line['body']); + if (!isset($block['interrupted'])) { + $text = \preg_replace('/^[ ]{0,' . $requiredIndent . '}+/', '', $line['body']); - $Block['li']['handler']['argument'][] = $text; + $block['li']['handler']['argument'][] = $text; - return $Block; + return $block; } + + return null; } - protected function blockListComplete(array $Block) + /** + * Complete block list + * + * @param array $block Current block + * + * @return array + * + * @since 1.0.0 + */ + protected function blockListComplete(array $block) : array { - if (isset($Block['loose'])) - { - foreach ($Block['element']['elements'] as &$li) - { - if (\end($li['handler']['argument']) !== '') - { - $li['handler']['argument'][] = ''; - } + if (!isset($block['loose'])) { + return $block; + } + + foreach ($block['element']['elements'] as &$li) { + if (\end($li['handler']['argument']) !== '') { + $li['handler']['argument'][] = ''; } } - return $Block; + return $block; } - // - // Quote - - protected function blockQuoteBase($Line) + /** + * Continue block quote + * + * @param array{body:string, indent:int, text:string} $line Line data + * @param array $block Current block + * + * @return null|array + * + * @since 1.0.0 + */ + protected function blockQuoteContinue(array $line, array $block) : ?array { - if (\preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches)) - { - return [ - 'element' => [ - 'name' => 'blockquote', - 'handler' => [ - 'function' => 'linesElements', - 'argument' => (array) $matches[1], - 'destination' => 'elements', - ], - ], - ]; + if (isset($block['interrupted'])) { + return null; } + + if ($line['text'][0] === '>' && \preg_match('/^>[ ]?+(.*+)/', $line['text'], $matches)) { + $block['element']['handler']['argument'][] = $matches[1]; + + return $block; + } + + if (!isset($block['interrupted'])) { + $block['element']['handler']['argument'][] = $line['text']; + + return $block; + } + + return null; } - protected function blockQuoteContinue($Line, array $Block) + /** + * Continue block table + * + * @param array{body:string, indent:int, text:string} $line Line data + * @param array $block Current block + * + * @return null|array + * + * @since 1.0.0 + */ + protected function blockTableContinue(array $line, array $block) : ?array { - if (isset($Block['interrupted'])) - { - return; - } - - if ($Line['text'][0] === '>' && \preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches)) - { - $Block['element']['handler']['argument'][] = $matches[1]; - - return $Block; - } - - if (!isset($Block['interrupted'])) - { - $Block['element']['handler']['argument'][] = $Line['text']; - - return $Block; - } - } - - // - // Rule - - protected function blockRuleBase($Line) - { - $marker = $Line['text'][0]; - - if (\substr_count($Line['text'], $marker) >= 3 && \rtrim($Line['text'], " {$marker}") === '') - { - return [ - 'element' => [ - 'name' => 'hr', - ], - ]; - } - } - - // - // Setext - - protected function blockSetextHeaderParent($Line, array $Block = null) - { - if (!isset($Block) || $Block['type'] !== 'Paragraph' || isset($Block['interrupted'])) - { - return; - } - - if ($Line['indent'] < 4 && \rtrim(\rtrim($Line['text'], ' '), $Line['text'][0]) === '') - { - $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2'; - - return $Block; - } - } - - // - // Reference - - protected function blockReferenceBase($Line) - { - if (\strpos($Line['text'], ']') !== false - && \preg_match('/^\[(.+?)\]:[ ]*+?(?:[ ]+["\'(](.+)["\')])?[ ]*+$/', $Line['text'], $matches) + if (isset($block['interrupted']) + || (\count($block['alignments']) !== 1 && $line['text'][0] !== '|' && \strpos($line['text'], '|') === false) ) { - $id = \strtolower($matches[1]); - - $Data = [ - 'url' => UriFactory::build($matches[2]), - 'title' => isset($matches[3]) ? $matches[3] : null, - ]; - - $this->definitionData['Reference'][$id] = $Data; - - return [ - 'element' => [], - ]; - } - } - - // - // Table - - protected function blockTableBase($Line, array $Block = null) - { - if (!isset($Block) || $Block['type'] !== 'Paragraph' || isset($Block['interrupted'])) - { - return; + return null; } - if (\strpos($Block['element']['handler']['argument'], '|') === false - && \strpos($Line['text'], '|') === false - && \strpos($Line['text'], ':') === false - || \strpos($Block['element']['handler']['argument'], "\n") !== false - ) { - return; - } + $elements = []; - if (\rtrim($Line['text'], ' -:|') !== '') - { - return; - } + $row = $line['text']; - $alignments = []; + $row = \trim($row); + $row = \trim($row, '|'); - $divider = $Line['text']; + \preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]++`|`)++/', $row, $matches); - $divider = \trim($divider); - $divider = \trim($divider, '|'); + $cells = \array_slice($matches[0], 0, \count($block['alignments'])); - $dividerCells = \explode('|', $divider); + foreach ($cells as $index => $cell) { + $cell = \trim($cell); - foreach ($dividerCells as $dividerCell) - { - $dividerCell = \trim($dividerCell); - - if ($dividerCell === '') - { - return; - } - - $alignment = null; - - if ($dividerCell[0] === ':') - { - $alignment = 'left'; - } - - if (\substr($dividerCell, - 1) === ':') - { - $alignment = $alignment === 'left' ? 'center' : 'right'; - } - - $alignments [] = $alignment; - } - - $HeaderElements = []; - - $header = $Block['element']['handler']['argument']; - - $header = \trim($header); - $header = \trim($header, '|'); - - $headerCells = \explode('|', $header); - - if (\count($headerCells) !== \count($alignments)) - { - return; - } - - foreach ($headerCells as $index => $headerCell) - { - $headerCell = \trim($headerCell); - - $HeaderElement = [ - 'name' => 'th', + $element = [ + 'name' => 'td', 'handler' => [ 'function' => 'lineElements', - 'argument' => $headerCell, + 'argument' => $cell, 'destination' => 'elements', ], ]; - if (isset($alignments[$index])) - { - $alignment = $alignments[$index]; - - $HeaderElement['attributes'] = [ - 'style' => "text-align: {$alignment};", + if (isset($block['alignments'][$index])) { + $element['attributes'] = [ + 'style' => 'text-align: ' . $block['alignments'][$index] . ';', ]; } - $HeaderElements [] = $HeaderElement; + $elements [] = $element; } - $Block = [ - 'alignments' => $alignments, - 'identified' => true, - 'element' => [ - 'name' => 'table', - 'elements' => [], - ], - ]; - - $Block['element']['elements'][] = [ - 'name' => 'thead', - ]; - - $Block['element']['elements'][] = [ - 'name' => 'tbody', - 'elements' => [], - ]; - - $Block['element']['elements'][0]['elements'][] = [ + $element = [ 'name' => 'tr', - 'elements' => $HeaderElements, + 'elements' => $elements, ]; - return $Block; + $block['element']['elements'][1]['elements'][] = $element; + + return $block; } - protected function blockTableContinue($Line, array $Block) + /** + * Continue block paragraph + * + * @param array{body:string, indent:int, text:string} $line Line data + * @param array $block Current block + * + * @return null|array + * + * @since 1.0.0 + */ + protected function paragraphContinue(array $line, array $block) : ?array { - if (isset($Block['interrupted'])) - { - return; + if (isset($block['interrupted'])) { + return null; } - if (\count($Block['alignments']) === 1 || $Line['text'][0] === '|' || \strpos($Line['text'], '|')) - { - $Elements = []; + $block['element']['handler']['argument'] .= "\n".$line['text']; - $row = $Line['text']; - - $row = \trim($row); - $row = \trim($row, '|'); - - \preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]++`|`)++/', $row, $matches); - - $cells = \array_slice($matches[0], 0, \count($Block['alignments'])); - - foreach ($cells as $index => $cell) - { - $cell = \trim($cell); - - $Element = [ - 'name' => 'td', - 'handler' => [ - 'function' => 'lineElements', - 'argument' => $cell, - 'destination' => 'elements', - ], - ]; - - if (isset($Block['alignments'][$index])) - { - $Element['attributes'] = [ - 'style' => 'text-align: ' . $Block['alignments'][$index] . ';', - ]; - } - - $Elements [] = $Element; - } - - $Element = [ - 'name' => 'tr', - 'elements' => $Elements, - ]; - - $Block['element']['elements'][1]['elements'][] = $Element; - - return $Block; - } + return $block; } - // - // ~ - // - - protected function paragraph($Line) + /** + * Handle link + * + * @param array{text:string, context:string, before:string} $excerpt Inline data + * + * @return null|array + * + * @since 1.0.0 + */ + protected function inlineLinkParent(array $excerpt) : ?array { - return [ - 'type' => 'Paragraph', - 'element' => [ - 'name' => 'p', - 'handler' => [ - 'function' => 'lineElements', - 'argument' => $Line['text'], - 'destination' => 'elements', - ], - ], - ]; - } - - protected function paragraphContinue($Line, array $Block) - { - if (isset($Block['interrupted'])) - { - return; - } - - $Block['element']['handler']['argument'] .= "\n".$Line['text']; - - return $Block; - } - - // - // Inline Elements - // - - protected $InlineTypes = [ - '!' => ['Image'], - '&' => ['SpecialCharacter'], - '*' => ['Emphasis'], - ':' => ['Url'], - '<' => ['UrlTag', 'EmailTag', 'Markup'], - '[' => ['FootnoteMarker', 'Link'], - '_' => ['Emphasis'], - '`' => ['Code'], - '~' => ['Strikethrough'], - '\\' => ['EscapeSequence'], - '=' => ['mark'], - ]; - - // ~ - - protected $inlineMarkerList = '!*_&[:<`~\\'; - - // - // ~ - // - - public function line($text, $nonNestables = []) - { - return $this->elements($this->lineElements($text, $nonNestables)); - } - - /* - protected function lineElements($text, $nonNestables = array()) - { - # standardize line breaks - $text = str_replace(array("\r\n", "\r"), "\n", $text); - - $Elements = array(); - - $nonNestables = (empty($nonNestables) - ? array() - : array_combine($nonNestables, $nonNestables) - ); - - # $excerpt is based on the first occurrence of a marker - - while ($excerpt = strpbrk($text, $this->inlineMarkerList)) - { - $marker = $excerpt[0]; - - $markerPosition = strlen($text) - strlen($excerpt); - - $Excerpt = array('text' => $excerpt, 'context' => $text); - - foreach ($this->InlineTypes[$marker] as $inlineType) - { - # check to see if the current inline type is nestable in the current context - - if (isset($nonNestables[$inlineType])) - { - continue; - } - - $Inline = $this->{"inline$inlineType"}($Excerpt); - - if ( !isset($Inline)) - { - continue; - } - - # makes sure that the inline belongs to "our" marker - - if (isset($Inline['position']) and $Inline['position'] > $markerPosition) - { - continue; - } - - # sets a default inline position - - if ( !isset($Inline['position'])) - { - $Inline['position'] = $markerPosition; - } - - # cause the new element to 'inherit' our non nestables - - - $Inline['element']['nonNestables'] = isset($Inline['element']['nonNestables']) - ? array_merge($Inline['element']['nonNestables'], $nonNestables) - : $nonNestables - ; - - # the text that comes before the inline - $unmarkedText = substr($text, 0, $Inline['position']); - - # compile the unmarked text - $InlineText = $this->inlineText($unmarkedText); - $Elements[] = $InlineText['element']; - - # compile the inline - $Elements[] = $this->extractElement($Inline); - - # remove the examined text - $text = substr($text, $Inline['position'] + $Inline['extent']); - - continue 2; - } - - # the marker does not belong to an inline - - $unmarkedText = substr($text, 0, $markerPosition + 1); - - $InlineText = $this->inlineText($unmarkedText); - $Elements[] = $InlineText['element']; - - $text = substr($text, $markerPosition + 1); - } - - $InlineText = $this->inlineText($text); - $Elements[] = $InlineText['element']; - - foreach ($Elements as &$Element) - { - if ( !isset($Element['autobreak'])) - { - $Element['autobreak'] = false; - } - } - - return $Elements; - } - */ - - // - // ~ - // - - protected function inlineTextParent($text) - { - $Inline = [ - 'extent' => \strlen($text), - 'element' => [], - ]; - - $Inline['element']['elements'] = self::pregReplaceElements( - $this->breaksEnabled ? '/[ ]*+\n/' : '/(?:[ ]*+\\\\|[ ]{2,}+)\n/', - [ - ['name' => 'br'], - ['text' => "\n"], - ], - $text - ); - - return $Inline; - } - - protected function inlineLinkParent($Excerpt) - { - $Element = [ + $element = [ 'name' => 'a', 'handler' => [ 'function' => 'lineElements', @@ -4146,325 +4019,359 @@ class Markdown ], ]; - $extent = 0; + $extent = 0; + $remainder = $excerpt['text']; - $remainder = $Excerpt['text']; - - if (\preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches)) - { - $Element['handler']['argument'] = $matches[1]; + if (\preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches)) { + $element['handler']['argument'] = $matches[1]; $extent += \strlen($matches[0]); $remainder = \substr($remainder, $extent); } else { - return; + return null; } - if (\preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*+"|\'[^\']*+\'))?\s*+[)]/', $remainder, $matches)) - { - $Element['attributes']['href'] = UriFactory::build($matches[1]); + if (\preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*+"|\'[^\']*+\'))?\s*+[)]/', $remainder, $matches)) { + $element['attributes']['href'] = UriFactory::build($matches[1]); - if (isset($matches[2])) - { - $Element['attributes']['title'] = \substr($matches[2], 1, - 1); + if (isset($matches[2])) { + $element['attributes']['title'] = \substr($matches[2], 1, - 1); } $extent += \strlen($matches[0]); } else { - if (\preg_match('/^\s*\[(.*?)\]/', $remainder, $matches)) - { - $definition = \strlen($matches[1]) !== 0 ? $matches[1] : $Element['handler']['argument']; + if (\preg_match('/^\s*\[(.*?)\]/', $remainder, $matches)) { + $definition = \strlen($matches[1]) !== 0 ? $matches[1] : $element['handler']['argument']; $definition = \strtolower($definition); $extent += \strlen($matches[0]); - } - else - { - $definition = \strtolower($Element['handler']['argument']); + } else { + $definition = \strtolower($element['handler']['argument']); } - if (!isset($this->definitionData['Reference'][$definition])) - { - return; + if (!isset($this->definitionData['Reference'][$definition])) { + return null; } $Definition = $this->definitionData['Reference'][$definition]; - $Element['attributes']['href'] = $Definition['url']; - $Element['attributes']['title'] = $Definition['title']; + $element['attributes']['href'] = $Definition['url']; + $element['attributes']['title'] = $Definition['title']; } return [ 'extent' => $extent, - 'element' => $Element, + 'element' => $element, ]; } - protected function inlineSpecialCharacter($Excerpt) + /** + * Handle special character + * + * @param array{text:string, context:string, before:string} $excerpt Inline data + * + * @return null|array + * + * @since 1.0.0 + */ + protected function inlineSpecialCharacter(array $excerpt) : ?array { - if (\substr($Excerpt['text'], 1, 1) !== ' ' && \strpos($Excerpt['text'], ';') !== false - && \preg_match('/^&(#?+[0-9a-zA-Z]++);/', $Excerpt['text'], $matches) + if (\substr($excerpt['text'], 1, 1) !== ' ' && \strpos($excerpt['text'], ';') !== false + && \preg_match('/^&(#?+[0-9a-zA-Z]++);/', $excerpt['text'], $matches) ) { return [ 'element' => ['rawHtml' => '&' . $matches[1] . ';'], 'extent' => \strlen($matches[0]), ]; } + + return null; } - // ~ - + /* protected function unmarkedText($text) { - $Inline = $this->inlineText($text); - return $this->element($Inline['element']); + $inline = $this->inlineText($text); + return $this->element($inline['element']); + } + */ + + /** + * Handle "handler" + * + * @param array $element Element to handle + * + * @return array + * + * @since 1.0.0 + */ + protected function handle(array $element) : array + { + if (!isset($element['handler'])) { + return $element; + } + + if (!isset($element['nonNestables'])) { + $element['nonNestables'] = []; + } + + if (\is_string($element['handler'])) { + $function = $element['handler']; + $argument = $element['text']; + unset($element['text']); + $destination = 'rawHtml'; + } else { + $function = $element['handler']['function']; + $argument = $element['handler']['argument']; + $destination = $element['handler']['destination']; + } + + $element[$destination] = $this->{$function}($argument, $element['nonNestables']); + + if ($destination === 'handler') { + $element = $this->handle($element); + } + + unset($element['handler']); + + return $element; } - // - // Handlers - // - - protected function handle(array $Element) + /* + protected function handleElementRecursive(array $element) { - if (isset($Element['handler'])) - { - if (!isset($Element['nonNestables'])) - { - $Element['nonNestables'] = []; + return $this->elementApplyRecursive([$this, 'handle'], $element); + } + + protected function handleElementsRecursive(array $elements) + { + return $this->elementsApplyRecursive([$this, 'handle'], $elements); + } + */ + + /** + * Handle element recursiveley + * + * @param \Closure $closure Closure for handling element + * @param array $element Element to handle + * + * @return array + * + * @since 1.0.0 + */ + protected function elementApplyRecursive(\Closure $closure, array $element) : array + { + $element = $closure($element); + + if (isset($element['elements'])) { + //$element['elements'] = $this->elementsApplyRecursive($closure, $element['elements']); + foreach ($element['elements'] as &$e) { + $e = $this->elementApplyRecursive($closure, $e); } + } elseif (isset($element['element'])) { + $element['element'] = $this->elementApplyRecursive($closure, $element['element']); + } - if (\is_string($Element['handler'])) - { - $function = $Element['handler']; - $argument = $Element['text']; - unset($Element['text']); - $destination = 'rawHtml'; + return $element; + } + + /** + * Handle element recursiveley + * + * @param \Closure $closure Closure for handling element + * @param array $element Element to handle + * + * @return array + * + * @since 1.0.0 + */ + protected function elementApplyRecursiveDepthFirst(\Closure $closure, array $element) : array + { + if (isset($element['elements'])) { + //$element['elements'] = $this->elementsApplyRecursiveDepthFirst($closure, $element['elements']); + foreach ($element['elements'] as &$e) { + $e = $this->elementApplyRecursiveDepthFirst($closure, $e); } - else - { - $function = $Element['handler']['function']; - $argument = $Element['handler']['argument']; - $destination = $Element['handler']['destination']; + } elseif (isset($element['element'])) { + //$element['element'] = $this->elementsApplyRecursiveDepthFirst($closure, $element['element']); + foreach ($element['element'] as &$e) { + $e = $this->elementApplyRecursiveDepthFirst($closure, $e); } - - $Element[$destination] = $this->{$function}($argument, $Element['nonNestables']); - - if ($destination === 'handler') - { - $Element = $this->handle($Element); - } - - unset($Element['handler']); } - return $Element; + return $closure($element); } - protected function handleElementRecursive(array $Element) + /* + protected function elementsApplyRecursive($closure, array $elements) { - return $this->elementApplyRecursive([$this, 'handle'], $Element); - } - - protected function handleElementsRecursive(array $Elements) - { - return $this->elementsApplyRecursive([$this, 'handle'], $Elements); - } - - protected function elementApplyRecursive($closure, array $Element) - { - $Element = $closure($Element); - - if (isset($Element['elements'])) - { - $Element['elements'] = $this->elementsApplyRecursive($closure, $Element['elements']); - } - elseif (isset($Element['element'])) - { - $Element['element'] = $this->elementApplyRecursive($closure, $Element['element']); + foreach ($elements as &$element) { + $element = $this->elementApplyRecursive($closure, $element); } - return $Element; + return $elements; } - protected function elementApplyRecursiveDepthFirst($closure, array $Element) + + protected function elementsApplyRecursiveDepthFirst($closure, array $elements) { - if (isset($Element['elements'])) - { - $Element['elements'] = $this->elementsApplyRecursiveDepthFirst($closure, $Element['elements']); - } - elseif (isset($Element['element'])) - { - $Element['element'] = $this->elementsApplyRecursiveDepthFirst($closure, $Element['element']); + foreach ($elements as &$element) { + $element = $this->elementApplyRecursiveDepthFirst($closure, $element); } - return $closure($Element); + return $elements; } + */ - protected function elementsApplyRecursive($closure, array $Elements) + /** + * Render element + * + * @param array $element Element to render + * + * @return : string + * + * @since 1.0.0 + */ + protected function element(array $element) : string { - foreach ($Elements as &$Element) - { - $Element = $this->elementApplyRecursive($closure, $Element); - } - - return $Elements; - } - - protected function elementsApplyRecursiveDepthFirst($closure, array $Elements) - { - foreach ($Elements as &$Element) - { - $Element = $this->elementApplyRecursiveDepthFirst($closure, $Element); - } - - return $Elements; - } - - protected function element(array $Element) - { - if ($this->safeMode) - { - $Element = $this->sanitiseElement($Element); + if ($this->safeMode) { + $element = $this->sanitiseElement($element); } // identity map if element has no handler - $Element = $this->handle($Element); + $element = $this->handle($element); + $hasName = isset($element['name']); + $markup = ''; - $hasName = isset($Element['name']); + if ($hasName) { + $markup .= '<' . $element['name']; - $markup = ''; - - if ($hasName) - { - $markup .= '<' . $Element['name']; - - if (isset($Element['attributes'])) - { - foreach ($Element['attributes'] as $name => $value) - { - if ($value === null) - { + if (isset($element['attributes'])) { + foreach ($element['attributes'] as $name => $value) { + if ($value === null) { continue; } - $markup .= " {$name}=\"".self::escape((string) $value).'"'; + $markup .= ' ' . $name . '="' . \htmlspecialchars((string) $value, \ENT_QUOTES, 'UTF-8') . '"'; } } } $permitRawHtml = false; - if (isset($Element['text'])) - { - $text = $Element['text']; - } - // very strongly consider an alternative if you're writing an - // extension - elseif (isset($Element['rawHtml'])) - { - $text = $Element['rawHtml']; + if (isset($element['text'])) { + $text = $element['text']; + } elseif (isset($element['rawHtml'])) { + // very strongly consider an alternative if you're writing an extension + $text = $element['rawHtml']; - $allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode']; - $permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode; + $permitRawHtml = !$this->safeMode || ($element['allowRawHtmlInSafeMode'] ?? false); } - $hasContent = isset($text) || isset($Element['element']) || isset($Element['elements']); + $hasContent = isset($text) || isset($element['element']) || isset($element['elements']); - if ($hasContent) - { + if ($hasContent) { $markup .= $hasName ? '>' : ''; - if (isset($Element['elements'])) - { - $markup .= $this->elements($Element['elements']); - } - elseif (isset($Element['element'])) - { - $markup .= $this->element($Element['element']); + if (isset($element['elements'])) { + $markup .= $this->elements($element['elements']); + } elseif (isset($element['element'])) { + $markup .= $this->element($element['element']); } elseif (!$permitRawHtml) { - $markup .= self::escape((string) $text, true); - } - else - { + $markup .= \htmlspecialchars((string) $text, \ENT_NOQUOTES, 'UTF-8'); + } else { $markup .= $text; } - $markup .= $hasName ? '' : ''; - } - elseif ($hasName) - { + $markup .= $hasName ? '' : ''; + } elseif ($hasName) { $markup .= ' />'; } return $markup; } - protected function elements(array $Elements) : string + /** + * Render elements + * + * @param array $elements Elements to render + * + * @return string + * + * @since 1.0.0 + */ + protected function elements(array $elements) : string { - $markup = ''; - + $markup = ''; $autoBreak = true; - foreach ($Elements as $Element) - { - if (empty($Element)) - { + foreach ($elements as $element) { + if (empty($element)) { continue; } - $autoBreakNext = (isset($Element['autobreak']) - ? $Element['autobreak'] : isset($Element['name']) + $autoBreakNext = (isset($element['autobreak']) + ? $element['autobreak'] : isset($element['name']) ); + // (autobreak === false) covers both sides of an element $autoBreak = $autoBreak ? $autoBreakNext : $autoBreak; - $markup .= ($autoBreak ? "\n" : '') . $this->element($Element); + $markup .= ($autoBreak ? "\n" : '') . $this->element($element); $autoBreak = $autoBreakNext; } return $markup . ($autoBreak ? "\n" : ''); } - // ~ - - protected function li($lines) + /** + * Handle list + * + * @param string[] $lines Lines + * + * @return array + * + * @since 1.0.0 + */ + protected function li(array $lines) : array { - $Elements = $this->linesElements($lines); + $elements = $this->linesElements($lines); - if (! \in_array('', $lines) - && isset($Elements[0], $Elements[0]['name']) - && $Elements[0]['name'] === 'p' + if (!\in_array('', $lines) + && isset($elements[0], $elements[0]['name']) + && $elements[0]['name'] === 'p' ) { - unset($Elements[0]['name']); + unset($elements[0]['name']); } - return $Elements; + return $elements; } - // - // AST Convenience - // - /** - * Replace occurrences $regexp with $Elements in $text. Return an array of - * elements representing the replacement. + * Replace occurrences $regexp with $elements in $text. + * + * @param string $regexp Regex + * @param array $elements Elements to replace + * @param string $text Text to match against regex + * + * @return array + * + * @since 1.0.0 */ - protected static function pregReplaceElements($regexp, $Elements, $text) + protected static function pregReplaceElements(string $regexp, array $elements, string $text) : array { $newElements = []; - while (\preg_match($regexp, $text, $matches, \PREG_OFFSET_CAPTURE)) - { + while (\preg_match($regexp, $text, $matches, \PREG_OFFSET_CAPTURE)) { $offset = (int) $matches[0][1]; $before = \substr($text, 0, $offset); $after = \substr($text, $offset + \strlen($matches[0][0])); $newElements[] = ['text' => $before]; - foreach ($Elements as $Element) - { - $newElements[] = $Element; + foreach ($elements as $element) { + $newElements[] = $element; } $text = $after; @@ -4475,18 +4382,16 @@ class Markdown return $newElements; } - // - // Deprecated Methods - // - - public static function parse($text) - { - $parsedown = new self(); - - return $parsedown->text($text); - } - - protected function sanitiseElement(array $Element) + /** + * Sanitize element + * + * @param array $element Element to sanitize + * + * @return array + * + * @since 1.0.0 + */ + protected function sanitiseElement(array $element) : array { static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/'; static $safeUrlNameToAtt = [ @@ -4494,70 +4399,51 @@ class Markdown 'img' => 'src', ]; - if (!isset($Element['name'])) - { - unset($Element['attributes']); - return $Element; + if (!isset($element['name'])) { + unset($element['attributes']); + + return $element; } - if (isset($safeUrlNameToAtt[$Element['name']])) - { - $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]); + if (isset($safeUrlNameToAtt[$element['name']])) { + $element = $this->filterUnsafeUrlInAttribute($element, $safeUrlNameToAtt[$element['name']]); } - if (! empty($Element['attributes'])) - { - foreach ($Element['attributes'] as $att => $val) - { - // filter out badly parsed attribute - if (! \preg_match($goodAttribute, $att)) - { - unset($Element['attributes'][$att]); - } - // dump onevent attribute - elseif (self::striAtStart($att, 'on')) - { - unset($Element['attributes'][$att]); + if (!empty($element['attributes'])) { + foreach ($element['attributes'] as $att => $_) { + if (!\preg_match($goodAttribute, $att)) { + // filter out badly parsed attribute + unset($element['attributes'][$att]); + } elseif (\str_starts_with($att, 'on')) { + // dump onevent attribute + unset($element['attributes'][$att]); } } } - return $Element; + return $element; } - protected function filterUnsafeUrlInAttribute(array $Element, $attribute) + /** + * Sanitize url in attribute + * + * @param array $element Element to sanitize + * @param string $attribute Attribute to sanitize + * + * @return array + * + * @since 1.0.0 + */ + protected function filterUnsafeUrlInAttribute(array $element, string $attribute) : array { - foreach ($this->safeLinksWhitelist as $scheme) - { - if (self::striAtStart($Element['attributes'][$attribute], $scheme)) - { - return $Element; + foreach ($this->safeLinksWhitelist as $scheme) { + if (\str_starts_with($element['attributes'][$attribute], $scheme)) { + return $element; } } - $Element['attributes'][$attribute] = \str_replace(':', '%3A', $Element['attributes'][$attribute]); + $element['attributes'][$attribute] = \str_replace(':', '%3A', $element['attributes'][$attribute]); - return $Element; - } - - // - // Static Methods - // - - protected static function escape(string $text, bool $allowQuotes = false) : string - { - return \htmlspecialchars($text, $allowQuotes ? \ENT_NOQUOTES : \ENT_QUOTES, 'UTF-8'); - } - - protected static function striAtStart($string, $needle) - { - $len = \strlen($needle); - - if ($len > \strlen($string)) - { - return false; - } else { - return \strtolower(\substr($string, 0, $len)) === \strtolower($needle); - } + return $element; } } diff --git a/tests/Utils/Parser/Markdown/MarkdownTest.php b/tests/Utils/Parser/Markdown/MarkdownTest.php index 6b9465a55..abf8c207e 100755 --- a/tests/Utils/Parser/Markdown/MarkdownTest.php +++ b/tests/Utils/Parser/Markdown/MarkdownTest.php @@ -48,7 +48,7 @@ final class MarkdownTest extends \PHPUnit\Framework\TestCase public function testSafeMode() : void { $parser = new Markdown(); - $parser->setSafeMode(true); + $parser->safeMode = true; self::assertEquals( \file_get_contents(__DIR__ . '/manualdata/xss_bad_url.html'),