updated Obsidian style list processing to get formatting better, also implement nested lists properly
This commit is contained in:
parent
dc25acd5da
commit
1a8a3132f4
|
@ -3,6 +3,7 @@
|
|||
"""The Markdown parser and its extensions."""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import re
|
||||
import xml.etree.ElementTree as etree # noqa: N813
|
||||
from typing import Any
|
||||
|
@ -11,6 +12,8 @@ from urllib.parse import quote as urlquote
|
|||
from urllib.parse import urlparse
|
||||
|
||||
import markdown
|
||||
from markdown import Markdown
|
||||
from markdown.blockprocessors import BlockProcessor, BlockParser
|
||||
from markdown.extensions import Extension
|
||||
from markdown.extensions.footnotes import (FootnoteExtension, FootnoteBlockProcessor, FootnoteInlineProcessor,
|
||||
FootnoteTreeprocessor, FootnotePostTreeprocessor, FootnotePostprocessor)
|
||||
|
@ -20,6 +23,8 @@ from markdown.util import STX, ETX
|
|||
|
||||
from .config import Context
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
PRIO_BASE = 10000 # priority base for our extensions
|
||||
|
||||
# Patterns for ObsidianImages
|
||||
|
@ -383,6 +388,240 @@ class ObsidianLinks(Extension):
|
|||
'obsidian_generic_links', PRIO_BASE + 100)
|
||||
|
||||
|
||||
class ObsidianLists(Extension):
|
||||
"""An extension to process Obsidian lists, including making nested lists."""
|
||||
UL_RE = re.compile(r'^([ ]*)[-+*][ ]+(.*)')
|
||||
OL_RE = re.compile(r'^([ ]*)([1-9][0-9]*)\.[ ]+(.*)')
|
||||
LIST_START = STX + "erbosoft:lstart" + ETX
|
||||
LIST_END = STX + "erbosoft:lend" + ETX
|
||||
|
||||
def _find_listhead(self, line) -> tuple[str, int, int]:
|
||||
"""
|
||||
Find if the specified line is a list element head.
|
||||
|
||||
Args:
|
||||
line (str): The line to be checked.
|
||||
|
||||
Returns:
|
||||
str: The list type, "ol" or "ul", or "" if this line is not a list head.
|
||||
int: The indent level of the list, or -1 if this line is not a list head.
|
||||
int: The start index of the list, or -1 if this line is not a list head for an ordered list.
|
||||
"""
|
||||
m = self.UL_RE.match(line)
|
||||
if m:
|
||||
return 'ul', len(m.group(1)), -1
|
||||
m = self.OL_RE.match(line)
|
||||
if m:
|
||||
return 'ol', len(m.group(1)), int(m.group(2))
|
||||
return '', -1, -1
|
||||
|
||||
class ObsidianListFinder(Preprocessor):
|
||||
def __init__(self, extref: Any) -> None:
|
||||
super(ObsidianLists.ObsidianListFinder, self).__init__()
|
||||
self._extref = extref
|
||||
|
||||
def run(self, lines: list[str]) -> list[str]:
|
||||
i = 0
|
||||
in_list = False
|
||||
while i < len(lines):
|
||||
if in_list:
|
||||
if len(lines[i].strip()) == 0:
|
||||
in_list = False
|
||||
lines.insert(i, self._extref.LIST_END)
|
||||
i += 1
|
||||
else:
|
||||
listtype, _, _ = self._extref._find_listhead(lines[i])
|
||||
if len(listtype) > 0:
|
||||
in_list = True
|
||||
lines.insert(i, self._extref.LIST_START)
|
||||
i += 1
|
||||
i += 1
|
||||
return lines
|
||||
|
||||
class ObsidianListBlock(BlockProcessor):
|
||||
"""The actual block processor that generates lists."""
|
||||
def __init__(self, parser: BlockParser, extref: Any) -> None:
|
||||
super(ObsidianLists.ObsidianListBlock, self).__init__(parser)
|
||||
self._extref = extref
|
||||
|
||||
@staticmethod
|
||||
def _extract(blocks: list[str]) -> list[str]:
|
||||
"""
|
||||
Extract all lines up to the next blank line, which terminates the list.
|
||||
|
||||
Args:
|
||||
blocks (list[str]): List of blocks to be parsed.
|
||||
|
||||
Returns:
|
||||
list[str]: The block of lines to be parsed for a list.
|
||||
"""
|
||||
out_lines: list[str] = []
|
||||
while len(blocks) > 0:
|
||||
curblk = blocks.pop(0)
|
||||
cur_lines = curblk.split('\n')
|
||||
while len(cur_lines) > 0:
|
||||
line = cur_lines.pop(0)
|
||||
if line.strip() == '':
|
||||
cur_lines.insert(0, line)
|
||||
blocks.insert(0, '\n'.join(cur_lines))
|
||||
return out_lines
|
||||
out_lines.append(line)
|
||||
return out_lines
|
||||
|
||||
def _build_element(self, parent: etree.Element, listtype: str, indent: int, lines: list[str]) -> None:
|
||||
"""
|
||||
Builds a list element and adds it to the specified parent list.
|
||||
|
||||
Args:
|
||||
parent (etree.element): The parent list element to insert "li" items under.
|
||||
listtype (str): The list type, either "ol" or "ul".
|
||||
indent (int): The indent level of the list, which is greater than or equal to 0.
|
||||
lines (list[str]): The lines comprising the list element.
|
||||
"""
|
||||
textdata: list[str] = []
|
||||
|
||||
# Parse the header line to get the first bit of text.
|
||||
if listtype == 'ol':
|
||||
m = self._extref.OL_RE.match(lines[0])
|
||||
assert len(m.group(1)) == indent
|
||||
textdata.append(m.group(3))
|
||||
elif listtype == 'ul':
|
||||
m = self._extref.UL_RE.match(lines[0])
|
||||
assert len(m.group(1)) == indent
|
||||
textdata.append(m.group(2))
|
||||
|
||||
# Build the list element
|
||||
listelement = etree.SubElement(parent, 'li')
|
||||
my_lines = list(lines)
|
||||
i = 1
|
||||
last_sublist: etree.Element | None = None
|
||||
while i < len(my_lines):
|
||||
subtype, subindent, _ = self._extref._find_listhead(my_lines[i])
|
||||
if len(subtype) == 0:
|
||||
# ordinary text line - append it to the text data
|
||||
textdata.append(my_lines[i])
|
||||
i += 1
|
||||
else:
|
||||
# start of new sublist
|
||||
assert subindent > indent
|
||||
# flush text data
|
||||
if last_sublist is not None:
|
||||
last_sublist.tail = '\n'.join(textdata)
|
||||
else:
|
||||
listelement.text = '\n'.join(textdata)
|
||||
textdata = []
|
||||
# parse the sublist and reset to parse the "excess"
|
||||
last_sublist, my_lines = self._build_list(listelement, my_lines[i:])
|
||||
i = 0
|
||||
|
||||
# flush the text data one more time
|
||||
if len(textdata) > 0:
|
||||
if last_sublist is not None:
|
||||
last_sublist.tail = '\n'.join(textdata)
|
||||
else:
|
||||
listelement.text = '\n'.join(textdata)
|
||||
|
||||
def _build_list(self, parent: etree.Element, lines: list[str]) -> tuple[etree.Element, list[str]]:
|
||||
"""
|
||||
Build a new list attached to the specified parent element.
|
||||
|
||||
Args:
|
||||
parent (etree.Element): The parent element to build the list under.
|
||||
lines (list[str]): The lines of text comprising the list.
|
||||
|
||||
Returns:
|
||||
etree.Element: The new list element.
|
||||
list[str]: The excess lines that can't be parsed as part of this list.
|
||||
"""
|
||||
|
||||
# Establish the start of the list.
|
||||
listtype, indent_level, start_index = self._extref._find_listhead(lines[0])
|
||||
assert indent_level >= 0
|
||||
list_top = etree.SubElement(parent, listtype)
|
||||
if listtype == 'ol' and start_index > 1:
|
||||
list_top.attrib['start'] = str(start_index)
|
||||
|
||||
# The start index is set to -1 so, the first time around, we'll pick it up and set the start point.
|
||||
st = -1
|
||||
for i, line in enumerate(lines):
|
||||
new_type, new_indent, _ = self._extref._find_listhead(lines[i])
|
||||
if len(new_type) > 0:
|
||||
if new_indent < indent_level:
|
||||
# if the list head is lesser indented, this is the end of this element and the list
|
||||
if st >= 0:
|
||||
self._build_element(list_top, listtype, indent_level, lines[st:i])
|
||||
return list_top, lines[i:]
|
||||
if new_indent == indent_level:
|
||||
# this is the end of this element
|
||||
if st >= 0:
|
||||
self._build_element(list_top, listtype, indent_level, lines[st:i])
|
||||
if new_type != listtype:
|
||||
# this is also the end of the list
|
||||
return list_top, lines[i:]
|
||||
st = i # start of next element
|
||||
if st >= 0: # end the final element
|
||||
self._build_element(list_top, listtype, indent_level, lines[st:])
|
||||
return list_top, []
|
||||
|
||||
def test(self, parent: etree.Element, block: str) -> bool:
|
||||
"""
|
||||
Tests to see whether the current block can be handled by this processor.
|
||||
|
||||
Args:
|
||||
parent (etree.Element): The current parent element.
|
||||
block (str): The current block to be tested.
|
||||
|
||||
Returns:
|
||||
bool: ``True`` if this processor can handle the block, ``False`` if not.
|
||||
"""
|
||||
if self._extref.LIST_START in block:
|
||||
logger.debug("DETECT")
|
||||
return True
|
||||
return False
|
||||
|
||||
def run(self, parent: etree.Element, blocks: list[str]) -> bool:
|
||||
"""
|
||||
Processes the text in the current block and adds elements to the parent element.
|
||||
|
||||
Args:
|
||||
parent (etree.Element): Parent element to add new subelements to.
|
||||
blocks (list[str]): Blocks of text to be processed. The first block in this list is the one for which
|
||||
the ``test`` method returned ``True``. This method should remove any parsed text from the
|
||||
``block`` list before it returns.
|
||||
|
||||
Returns:
|
||||
bool: ``True`` if text was parsed by this method, ``False`` if not.
|
||||
"""
|
||||
chunk = blocks.pop(0)
|
||||
p = chunk.find(self._extref.LIST_START)
|
||||
self.parser.parseChunk(parent, chunk[:p])
|
||||
chunk = chunk[p + len(self._extref.LIST_START) + 1:]
|
||||
p = chunk.find(self._extref.LIST_END)
|
||||
while p < 0:
|
||||
chunk += '\n'
|
||||
chunk += blocks.pop(0)
|
||||
p = chunk.find(self._extref.LIST_END)
|
||||
blocks.insert(0, chunk[p + len(self._extref.LIST_END) + 1:])
|
||||
list_lines = chunk[:p].rstrip().split('\n')
|
||||
assert len(list_lines) > 0
|
||||
logger.debug(f"*** Found list: {list_lines}")
|
||||
_, excess = self._build_list(parent, list_lines)
|
||||
if len(excess) > 0:
|
||||
blocks.insert(0, '\n'.join(excess))
|
||||
return True
|
||||
|
||||
def extendMarkdown(self, md: Markdown) -> None: # noqa: N802
|
||||
"""
|
||||
Registers the list block processor with the Markdown parser.
|
||||
|
||||
Args:
|
||||
md (markdown.Markdown): The Markdown parser to register the patterns with.
|
||||
"""
|
||||
md.preprocessors.register(ObsidianLists.ObsidianListFinder(self), 'obsidian-list-finder', 18)
|
||||
md.parser.blockprocessors.register(ObsidianLists.ObsidianListBlock(md.parser, self),
|
||||
'obsidian-lists', PRIO_BASE + 50)
|
||||
|
||||
|
||||
class ObsidianInlines(Extension):
|
||||
"""An extension that handles the special Obsidian markdown format sequences."""
|
||||
def extendMarkdown(self, md: markdown.Markdown) -> None: # noqa: N802
|
||||
|
@ -529,7 +768,7 @@ class ObsidianStyleFootnotes(FootnoteExtension):
|
|||
)
|
||||
return sup, m.start(0), etxpoint + 1
|
||||
|
||||
def extendMarkdown(self, md):
|
||||
def extendMarkdown(self, md) -> None:
|
||||
"""
|
||||
Registers the footnote processor with the Markdown parser.
|
||||
|
||||
|
@ -583,11 +822,13 @@ def create_markdown_parser(context: Context) -> markdown.Markdown:
|
|||
}
|
||||
return markdown.Markdown(extensions=['fenced_code',
|
||||
'codehilite',
|
||||
'sane_lists',
|
||||
'tables',
|
||||
MetaStripper(),
|
||||
ObsidianComments(),
|
||||
ObsidianStyleFootnotes(SUPERSCRIPT_TEXT='[{}]', SEPARATOR='-'),
|
||||
ObsidianImages(context),
|
||||
ObsidianLinks(context),
|
||||
ObsidianLists(),
|
||||
ObsidianInlines()],
|
||||
extension_configs=extconfig)
|
||||
|
|
Loading…
Reference in New Issue
Block a user