added basic Markdown parsing and some support for Obsidian constructs

This commit is contained in:
Amy G. Bowersox 2024-02-27 22:56:46 -07:00
parent 772fe46a5b
commit d8cd92764f
3 changed files with 70 additions and 0 deletions

View File

@ -4,6 +4,7 @@ import argparse
import sys import sys
from pathlib import Path from pathlib import Path
from tree import SourceNode from tree import SourceNode
from mparse import create_markdown_parser
# The command line parser # The command line parser
parser = argparse.ArgumentParser(prog='dragonglass') parser = argparse.ArgumentParser(prog='dragonglass')
@ -28,11 +29,19 @@ def main():
for node in nodes: for node in nodes:
node.load_metadata(source_dir) node.load_metadata(source_dir)
mdparse = create_markdown_parser()
for node in nodes:
node.parse_markdown(source_dir, mdparse)
# TEMP # TEMP
for node in nodes: for node in nodes:
print(node) print(node)
if node.metadata: if node.metadata:
print(f"Metadata: {node.metadata}") print(f"Metadata: {node.metadata}")
if node.text:
print("----- BEGIN TEXT -----")
print(node.text)
print("------ END TEXT ------")
return 0 return 0

54
src/dragonglass/mparse.py Normal file
View File

@ -0,0 +1,54 @@
#/usr/bin/env python3
import markdown
import xml.etree.ElementTree as etree
from markdown.extensions import Extension
from markdown.inlinepatterns import InlineProcessor, SimpleTagInlineProcessor
from markdown.preprocessors import Preprocessor
class MetaStripper(Extension):
class MetaStripperProc(Preprocessor):
def run(self, lines):
if lines[0] == '---':
lines.pop(0)
while lines[0] != '---':
lines.pop(0)
lines.pop(0)
return lines
def extendMarkdown(self, md):
md.preprocessors.register(MetaStripper.MetaStripperProc(md), 'metastripper', 0)
class ObsidianLinks(Extension):
class ObsidianLinksProc(InlineProcessor):
def __init__(self, pattern, md):
super(ObsidianLinks.ObsidianLinksProc, self).__init__(pattern, md)
def handleMatch(self, m, data):
dstr = m.group(0)[2:-2]
el = etree.Element('a')
el.set('href', dstr)
el.text = dstr
return el, m.start(0), m.end(0)
def extendMarkdown(self, md):
OBSLINK_PATTERN = r'\[\[(.*?)\]\]'
md.inlinePatterns.register(ObsidianLinks.ObsidianLinksProc(OBSLINK_PATTERN, md), 'obsidian_links', 0)
class Strikeout(Extension):
def extendMarkdown(self, md):
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()~~(.*?)~~', 'del'), 'strikeout', 0)
class Highlight(Extension):
def extendMarkdown(self, md):
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', 0)
def create_markdown_parser():
return markdown.Markdown(extensions=[MetaStripper(), ObsidianLinks(), Strikeout(), Highlight()])

View File

@ -16,6 +16,7 @@ class SourceNode:
self._is_dir = is_dir self._is_dir = is_dir
self._is_md = path.match(MARKDOWN_PAT) self._is_md = path.match(MARKDOWN_PAT)
self.metadata = None self.metadata = None
self.text = None
def __str__(self): def __str__(self):
return f"SourceNode({self._path}, {self._is_dir}) [is_md={self._is_md}]" return f"SourceNode({self._path}, {self._is_dir}) [is_md={self._is_md}]"
@ -52,3 +53,9 @@ class SourceNode:
metalines.append(l) metalines.append(l)
l = f.readline() l = f.readline()
self.metadata = yaml.full_load(''.join(metalines)) self.metadata = yaml.full_load(''.join(metalines))
def parse_markdown(self, source_dir, markdown_parser):
if self._is_md:
markdown_parser.reset()
with open(source_dir / self._path, "r", encoding="utf-8") as f:
self.text = markdown_parser.convert(f.read())