started with markdown formatting

This commit is contained in:
Dennis Eichhorn 2023-11-09 00:25:34 +00:00
parent 9018cf5b57
commit 4645d59b8a

View File

@ -29,20 +29,40 @@ use phpOMS\Uri\UriFactory;
*/ */
class Markdown class Markdown
{ {
# ~ /**
* Parsedown version
*
* @var string
* @since 1.0.0
*/
public const version = '1.8.0-beta-7'; public const version = '1.8.0-beta-7';
/**
* Parsing options
*
* @var string
* @since 1.0.0
*/
private array $options = []; private array $options = [];
# ~ /**
* Table of content id
*
* @var string
* @since 1.0.0
*/
private string $idToc = ''; private string $idToc = '';
/**
* Constructor.
*
* @param array $params Parameters
*
* @since 1.0.0
*/
public function __construct(array $params = []) public function __construct(array $params = [])
{ {
$this->options = $params; $this->options = $params;
$this->options['toc'] = $this->options['toc'] ?? false; $this->options['toc'] = $this->options['toc'] ?? false;
// Marks // Marks
@ -122,11 +142,6 @@ class Markdown
$this->inlineMarkerList .= '`'; $this->inlineMarkerList .= '`';
} }
/*
* Blocks
* ------------------------------------------------------------------------
*/
// Block Math // Block Math
$state = $this->options['math'] ?? false; $state = $this->options['math'] ?? false;
if ($state !== false) { if ($state !== false) {
@ -141,26 +156,18 @@ class Markdown
} }
} }
public function textParent($text) public function textParent($text) : string
{ {
$Elements = $this->textElements($text); $Elements = $this->textElements($text);
# convert to markup
$markup = $this->elements($Elements); $markup = $this->elements($Elements);
# trim line breaks
$markup = \trim($markup, "\n"); $markup = \trim($markup, "\n");
# merge consecutive dl elements // Merge consecutive dl elements
$markup = \preg_replace('/<\/dl>\s+<dl>\s+/', '', $markup); $markup = \preg_replace('/<\/dl>\s+<dl>\s+/', '', $markup);
# add footnotes // Add footnotes
if (isset($this->DefinitionData['Footnote'])) {
if (isset($this->DefinitionData['Footnote']))
{
$Element = $this->buildFootnoteElement(); $Element = $this->buildFootnoteElement();
$markup .= "\n" . $this->element($Element); $markup .= "\n" . $this->element($Element);
} }
@ -168,10 +175,15 @@ class Markdown
} }
/** /**
* Parses the given markdown string to an HTML string but it leaves the ToC * Parses the given markdown string to an HTML string but it ignores ToC
* tag as is. It's an alias of the parent method "\DynamicParent::text()". *
* @param string $text Markdown text to parse
*
* @return string
*
* @since 1.0.0
*/ */
public function body($text) : string public function body(string $text) : string
{ {
$text = $this->encodeTagToHash($text); // Escapes ToC tag temporary $text = $this->encodeTagToHash($text); // Escapes ToC tag temporary
$html = $this->textParent($text); // Parses the markdown text $html = $this->textParent($text); // Parses the markdown text
@ -183,7 +195,7 @@ class Markdown
* Parses markdown string to HTML and also the "[toc]" tag as well. * Parses markdown string to HTML and also the "[toc]" tag as well.
* It overrides the parent method: \Parsedown::text(). * It overrides the parent method: \Parsedown::text().
*/ */
public function text($text) public function text($text) : string
{ {
// Parses the markdown text except the ToC tag. This also searches // Parses the markdown text except the ToC tag. This also searches
// the list of contents and available to get from "contentsList()" // the list of contents and available to get from "contentsList()"
@ -194,6 +206,7 @@ class Markdown
return $html; return $html;
} }
// Handle toc
$tagOrigin = $this->getTagToC(); $tagOrigin = $this->getTagToC();
if (\strpos($text, $tagOrigin) === false) { if (\strpos($text, $tagOrigin) === false) {
@ -202,8 +215,8 @@ class Markdown
$tocData = $this->contentsList(); $tocData = $this->contentsList();
$tocId = $this->getIdAttributeToC(); $tocId = $this->getIdAttributeToC();
$needle = '<p>'.$tagOrigin.'</p>'; $needle = '<p>' . $tagOrigin . '</p>';
$replace = "<div id=\"{$tocId}\">{$tocData}</div>"; $replace = '<div id="' . $tocId . '">' . $tocData . '</div>';
return \str_replace($needle, $replace, $html); return \str_replace($needle, $replace, $html);
} }
@ -214,8 +227,10 @@ class Markdown
* @param string $typeReturn Type of the return format. "html" or "json". * @param string $typeReturn Type of the return format. "html" or "json".
* *
* @return string HTML/JSON string of ToC * @return string HTML/JSON string of ToC
*
* @since 1.0.0
*/ */
public function contentsList($typeReturn = 'html') public function contentsList($typeReturn = 'html') : string
{ {
if (\strtolower($typeReturn) === 'html') { if (\strtolower($typeReturn) === 'html') {
$result = ''; $result = '';
@ -225,42 +240,40 @@ class Markdown
} }
return $result; return $result;
} } elseif (\strtolower($typeReturn) === 'json') {
if (\strtolower($typeReturn) === 'json') {
return \json_encode($this->contentsListArray); return \json_encode($this->contentsListArray);
} }
// Forces to return ToC as "html"
\error_log(
'Unknown return type given while parsing ToC.'
.' At: '.__FUNCTION__.'() '
.' in Line:'.__LINE__.' (Using default type)'
);
return $this->contentsList('html'); return $this->contentsList('html');
} }
/** /**
* ------------------------------------------------------------------------ * Handle inline code
* Inline *
* ------------------------------------------------------------------------. * @param array{text:string, context:string, before:string} $excerpt Inline data
*
* @return null|array
*
* @since 1.0.0
*/ */
protected function inlineCode(array $excerpt) : ?array
// inlineCode
protected function inlineCode($Excerpt)
{ {
$codeSnippets = $this->options['code']['inline'] ?? true; if (($this->options['code']['inline'] ?? true) !== true
$codeMain = $this->options['code'] ?? true; || ($this->options['code'] ?? true) !== true
) {
if ($codeSnippets !== true || $codeMain !== true) { return null;
return;
} }
$marker = $Excerpt['text'][0]; $marker = $excerpt['text'][0];
if (\preg_match(
'/^([' . $marker . ']++)[ ]*+(.+?)[ ]*+(?<![' . $marker . '])\1(?!' . $marker . ')/s',
$excerpt['text'], $matches
) !== 1
) {
return null;
}
if (\preg_match('/^(['.$marker.']++)[ ]*+(.+?)[ ]*+(?<!['.$marker.'])\1(?!'.$marker.')/s', $Excerpt['text'], $matches))
{
$text = $matches[2]; $text = $matches[2];
$text = \preg_replace('/[ ]*+\n/', ' ', $text); $text = \preg_replace('/[ ]*+\n/', ' ', $text);
@ -272,29 +285,36 @@ class Markdown
], ],
]; ];
} }
}
protected function inlineEmailTag($Excerpt) /**
* Handle inline email
*
* @param array{text:string, context:string, before:string} $excerpt Inline data
*
* @return null|array
*
* @since 1.0.0
*/
protected function inlineEmailTag(array $excerpt) : ?array
{ {
$mainState = $this->options['links'] ?? true; if (!($this->options['links'] ?? true)
$state = $this->options['links']['email_links'] ?? true; || !($this->options['links']['email_links'] ?? true)
) {
if (!$mainState || !$state) { return null;
return;
} }
$hostnameLabel = '[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?'; $hostnameLabel = '[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?';
$commonMarkEmail = '[a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]++@' . $hostnameLabel . '(?:\.' . $hostnameLabel . ')*';
$commonMarkEmail = '[a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]++@' if (\strpos($excerpt['text'], '>') === false
. $hostnameLabel . '(?:\.' . $hostnameLabel . ')*'; || \preg_match('/^<((mailto:)?{' . $commonMarkEmail . '})>/i', $excerpt['text'], $matches) !== 1
) {
return null;
}
if (\strpos($Excerpt['text'], '>') !== false
&& \preg_match("/^<((mailto:)?{$commonMarkEmail})>/i", $Excerpt['text'], $matches)
){
$url = UriFactory::build($matches[1]); $url = UriFactory::build($matches[1]);
if (!isset($matches[2])) if (!isset($matches[2])) {
{
$url = "mailto:{$url}"; $url = "mailto:{$url}";
} }
@ -309,37 +329,38 @@ class Markdown
], ],
]; ];
} }
/**
* Inline emphasis
*
* @param array{text:string, context:string, before:string} $excerpt Inline data
*
* @return null|array
*
* @since 1.0.0
*/
protected function inlineEmphasis(array $excerpt) : ?array
{
if (!($this->options['emphasis'] ?? true)
|| !isset($excerpt['text'][1])
) {
return null;
} }
protected function inlineEmphasis($Excerpt) $marker = $excerpt['text'][0];
{
$state = $this->options['emphasis'] ?? true;
if (!$state) {
return;
}
if (!isset($Excerpt['text'][1])) if ($excerpt['text'][1] === $marker
{ && isset($this->StrongRegex[$marker]) && \preg_match($this->StrongRegex[$marker], $excerpt['text'], $matches)
return; ) {
}
$marker = $Excerpt['text'][0];
if ($Excerpt['text'][1] === $marker && isset($this->StrongRegex[$marker]) && \preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches))
{
$emphasis = 'strong'; $emphasis = 'strong';
} } elseif ($excerpt['text'][1] === $marker
elseif ($Excerpt['text'][1] === $marker && isset($this->UnderlineRegex[$marker]) && \preg_match($this->UnderlineRegex[$marker], $Excerpt['text'], $matches)) && isset($this->UnderlineRegex[$marker]) && \preg_match($this->UnderlineRegex[$marker], $excerpt['text'], $matches)
{ ) {
$emphasis = 'u'; $emphasis = 'u';
} } elseif (\preg_match($this->EmRegex[$marker], $excerpt['text'], $matches)) {
elseif (\preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches))
{
$emphasis = 'em'; $emphasis = 'em';
} } else {
else return null;
{
return;
} }
return [ return [
@ -355,118 +376,124 @@ class Markdown
]; ];
} }
protected function inlineImage($Excerpt) /**
* Handle image
*
* @param array{text:string, context:string, before:string} $excerpt Inline data
*
* @return null|array
*
* @since 1.0.0
*/
protected function inlineImage(array $excerpt) : ?array
{ {
$state = $this->options['images'] ?? true; if (!($this->options['images'] ?? true)
if (!$state) { || !isset($excerpt['text'][1]) || $excerpt['text'][1] !== '['
return; ) {
return null;
} }
if (!isset($Excerpt['text'][1]) || $Excerpt['text'][1] !== '[') $excerpt['text'] = \substr($excerpt['text'], 1);
{ $link = $this->inlineLink($excerpt);
return;
if ($link === null) {
return null;
} }
$Excerpt['text']= \substr($Excerpt['text'], 1); $inline = [
'extent' => $link['extent'] + 1,
$Link = $this->inlineLink($Excerpt);
if ($Link === null)
{
return;
}
$Inline = [
'extent' => $Link['extent'] + 1,
'element' => [ 'element' => [
'name' => 'img', 'name' => 'img',
'attributes' => [ 'attributes' => [
'src' => $Link['element']['attributes']['href'], 'src' => $link['element']['attributes']['href'],
'alt' => $Link['element']['handler']['argument'], 'alt' => $link['element']['handler']['argument'],
], ],
'autobreak' => true, 'autobreak' => true,
], ],
]; ];
$Inline['element']['attributes'] += $Link['element']['attributes']; $inline['element']['attributes'] += $link['element']['attributes'];
unset($Inline['element']['attributes']['href']); unset($inline['element']['attributes']['href']);
return $Inline; return $inline;
} }
protected function inlineLink($Excerpt) /**
* Handle link
*
* @param array{text:string, context:string, before:string} $excerpt Inline data
*
* @return null|array
*
* @since 1.0.0
*/
protected function inlineLink(array $excerpt) : ?array
{ {
$state = $this->options['links'] ?? true; if (!($this->options['links'] ?? true)) {
if (!$state) { return null;
return;
} }
$Link = $this->inlineLinkParent($Excerpt); $link = $this->inlineLinkParent($excerpt);
$remainder = $link !== null ? \substr($excerpt['text'], $link['extent']) : '';
$remainder = $Link !== null ? \substr($Excerpt['text'], $Link['extent']) : ''; if (\preg_match('/^[ ]*{(' . $this->regexAttribute . '+)}/', $remainder, $matches)) {
$link['element']['attributes'] += $this->parseAttributeData($matches[1]);
$link['extent'] += \strlen($matches[0]);
}
if (\preg_match('/^[ ]*{('.$this->regexAttribute.'+)}/', $remainder, $matches)) return $link;
}
/**
* Handle markup
*
* @param array{text:string, context:string, before:string} $excerpt Inline data
*
* @return null|array
*
* @since 1.0.0
*/
protected function inlineMarkup(array $excerpt) : ?array
{ {
$Link['element']['attributes'] += $this->parseAttributeData($matches[1]); if (!($this->options['markup'] ?? true)
|| $this->markupEscaped || $this->safeMode || \strpos($excerpt['text'], '>') === false
$Link['extent'] += \strlen($matches[0]); ) {
return null;
} }
return $Link; if (($excerpt['text'][1] === '/' && \preg_match('/^<\/\w[\w-]*+[ ]*+>/s', $excerpt['text'], $matches))
} || ($excerpt['text'][1] === '!' && \preg_match('/^<!---?[^>-](?:-?+[^-])*-->/s', $excerpt['text'], $matches))
|| ($excerpt['text'][1] !== ' ' && \preg_match('/^<\w[\w-]*+(?:[ ]*+' . $this->regexHtmlAttribute . ')*+[ ]*+\/?>/s', $excerpt['text'], $matches))
protected function inlineMarkup($Excerpt) ) {
{
$state = $this->options['markup'] ?? true;
if (!$state) {
return;
}
if ($this->markupEscaped || $this->safeMode || \strpos($Excerpt['text'], '>') === false)
{
return;
}
if ($Excerpt['text'][1] === '/' && \preg_match('/^<\/\w[\w-]*+[ ]*+>/s', $Excerpt['text'], $matches))
{
return [ return [
'element' => ['rawHtml' => $matches[0]], 'element' => ['rawHtml' => $matches[0]],
'extent' => \strlen($matches[0]), 'extent' => \strlen($matches[0]),
]; ];
} }
if ($Excerpt['text'][1] === '!' && \preg_match('/^<!---?[^>-](?:-?+[^-])*-->/s', $Excerpt['text'], $matches)) return null;
{
return [
'element' => ['rawHtml' => $matches[0]],
'extent' => \strlen($matches[0]),
];
} }
if ($Excerpt['text'][1] !== ' ' && \preg_match('/^<\w[\w-]*+(?:[ ]*+'.$this->regexHtmlAttribute.')*+[ ]*+\/?>/s', $Excerpt['text'], $matches)) /**
* Handle striketrhough
*
* @param array{text:string, context:string, before:string} $excerpt Inline data
*
* @return null|array
*
* @since 1.0.0
*/
protected function inlineStrikethrough($excerpt) : ?array
{ {
return [ if (!($this->options['strikethroughs'] ?? true)
'element' => ['rawHtml' => $matches[0]], || !isset($excerpt['text'][1])
'extent' => \strlen($matches[0]), || $excerpt['text'][1] !== '~'
]; || \preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $excerpt['text'], $matches) !== 1
} ) {
return null;
} }
protected function inlineStrikethrough($Excerpt)
{
$state = $this->options['strikethroughs'] ?? true;
if (!$state) {
return;
}
if (!isset($Excerpt['text'][1]))
{
return;
}
if ($Excerpt['text'][1] === '~' && \preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches))
{
return [ return [
'extent' => \strlen($matches[0]), 'extent' => \strlen($matches[0]),
'element' => [ 'element' => [
@ -479,23 +506,26 @@ class Markdown
], ],
]; ];
} }
}
protected function inlineUrl($Excerpt) /**
* Handle url
*
* @param array{text:string, context:string, before:string} $excerpt Inline data
*
* @return null|array
*
* @since 1.0.0
*/
protected function inlineUrl($excerpt) : ?array
{ {
$state = $this->options['links'] ?? true; if (!($this->options['links'] ?? true)
if (!$state) { || $this->urlsLinked !== true || !isset($excerpt['text'][2]) || $excerpt['text'][2] !== '/'
return; || \strpos($excerpt['context'], 'http') === false
} || \preg_match('/\bhttps?+:[\/]{2}[^\s<]+\b\/*+/ui', $excerpt['context'], $matches, \PREG_OFFSET_CAPTURE) !== 1
if ($this->urlsLinked !== true || !isset($Excerpt['text'][2]) || $Excerpt['text'][2] !== '/')
{
return;
}
if (\strpos($Excerpt['context'], 'http') !== false
&& \preg_match('/\bhttps?+:[\/]{2}[^\s<]+\b\/*+/ui', $Excerpt['context'], $matches, \PREG_OFFSET_CAPTURE)
) { ) {
return null;
}
$url = UriFactory::build($matches[0][0]); $url = UriFactory::build($matches[0][0]);
return [ return [
@ -510,7 +540,6 @@ class Markdown
], ],
]; ];
} }
}
protected function inlineUrlTag($Excerpt) protected function inlineUrlTag($Excerpt)
{ {
@ -1847,24 +1876,22 @@ class Markdown
{ {
$Elements = []; $Elements = [];
$nonNestables = ( $nonNestables = empty($nonNestables)
empty($nonNestables)
? [] ? []
: \array_combine($nonNestables, $nonNestables) : \array_combine($nonNestables, $nonNestables);
);
// $excerpt is based on the first occurrence of a marker // $excerpt is based on the first occurrence of a marker
while ($excerpt = \strpbrk($text, $this->inlineMarkerList)) { while ($exc = \strpbrk($text, $this->inlineMarkerList)) {
$marker = $excerpt[0]; $marker = $exc[0];
$markerPosition = \strlen($text) - \strlen($excerpt); $markerPosition = \strlen($text) - \strlen($exc);
// Get the first char before the marker // Get the first char before the marker
$beforeMarkerPosition = $markerPosition - 1; $beforeMarkerPosition = $markerPosition - 1;
$charBeforeMarker = $beforeMarkerPosition >= 0 ? $text[$markerPosition - 1] : ''; $charBeforeMarker = $beforeMarkerPosition >= 0 ? $text[$markerPosition - 1] : '';
$Excerpt = ['text' => $excerpt, 'context' => $text, 'before' => $charBeforeMarker]; $excerpt = ['text' => $exc, 'context' => $text, 'before' => $charBeforeMarker];
foreach ($this->InlineTypes[$marker] as $inlineType) { foreach ($this->InlineTypes[$marker] as $inlineType) {
// check to see if the current inline type is nestable in the current context // check to see if the current inline type is nestable in the current context
@ -1873,7 +1900,7 @@ class Markdown
continue; continue;
} }
$Inline = $this->{"inline{$inlineType}"}($Excerpt); $Inline = $this->{"inline{$inlineType}"}($excerpt);
if (!isset($Inline)) { if (!isset($Inline)) {
continue; continue;
@ -3552,6 +3579,7 @@ class Markdown
'`' => ['Code'], '`' => ['Code'],
'~' => ['Strikethrough'], '~' => ['Strikethrough'],
'\\' => ['EscapeSequence'], '\\' => ['EscapeSequence'],
'=' => ['mark'],
]; ];
# ~ # ~