diff --git a/.coverage b/.coverage index 07e5941..3ca192c 100644 Binary files a/.coverage and b/.coverage differ diff --git a/poetry.lock b/poetry.lock index 9e5b6f3..66f2303 100644 --- a/poetry.lock +++ b/poetry.lock @@ -99,6 +99,27 @@ python-versions = ">=3.6" [package.dependencies] markdown = ">=3" +[[package]] +name = "marko" +version = "1.2.0" +description = "A markdown parser with high extensibility." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +benchmark = ["commonmark (>=0.9,<1.0)", "markdown (>=3.3,<4.0)", "markdown-it-py (>=2.0,<3.0)", "mistune (>=2.0,<3.0)", "mistletoe (>=0.7,<1.0)"] +codehilite = ["pygments"] +toc = ["python-slugify"] + +[[package]] +name = "mistletoe" +version = "0.8.2" +description = "A fast, extensible Markdown parser in pure Python." +category = "main" +optional = false +python-versions = "~=3.3" + [[package]] name = "packaging" version = "21.3" @@ -297,7 +318,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "029333515754c994e12f242b002d6a6437ca5441bebefb5a017e55fdad40cfdc" +content-hash = "ac905618e17d6e6ae95e55be9dbbceb825c567de657348c82a27ad5849a1b559" [metadata.files] atomicwrites = [ @@ -374,6 +395,14 @@ markdown-link-attr-modifier = [ {file = "markdown_link_attr_modifier-0.2.0-py3-none-any.whl", hash = "sha256:01b07d7a466ff7c721f97d66b08ec60a54e8d46c287695b649507ae57a42ff4b"}, {file = "markdown_link_attr_modifier-0.2.0.tar.gz", hash = "sha256:94eacc4b7cfda9b5c883fb8c9f218b0602889c64d12977b31994d10b7b720a59"}, ] +marko = [ + {file = "marko-1.2.0-py3-none-any.whl", hash = "sha256:aa9fd81b02f64ab192404cbae5dfe812e337d3d04cefff8800a079c9dc9e5d28"}, + {file = "marko-1.2.0.tar.gz", hash = "sha256:0befa554da3d14b7a38ac7473ffde60f1916fd1d0aaf67181ae70bfe00fdba18"}, +] +mistletoe = [ + {file = "mistletoe-0.8.2-py3-none-any.whl", hash = "sha256:3c82cbcf01ed0a79944ba7ba2b715e6da633d1eef24812904e9008160a74e93e"}, + {file = "mistletoe-0.8.2.tar.gz", hash = "sha256:a2e354e653f29bcaf5599d8948512119ac6037e9c2f8ee77f39d55450e7d1a6c"}, +] packaging = [ {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, diff --git a/pyproject.toml b/pyproject.toml index 5405a1d..ec3c2aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,8 @@ python-dotenv = "^0.19.2" python-frontmatter = "^1.0.0" python-slugify = "^5.0.2" watchdog = "^2.1.6" +mistletoe = "^0.8.2" +marko = "^1.2.0" [tool.poetry.dev-dependencies] pytest = "^7.0.1" diff --git a/src/entities/matcher/abstract_matcher.py b/src/entities/matcher/abstract_matcher.py new file mode 100644 index 0000000..1a3b1dd --- /dev/null +++ b/src/entities/matcher/abstract_matcher.py @@ -0,0 +1,7 @@ +from src.entities.matcher.match import Match + + +class AbstractMatcher: + @classmethod + def match(cls, content: str): + raise NotImplementedError('Should have implemented this') diff --git a/src/entities/matcher/entities_matcher.py b/src/entities/matcher/entities_matcher.py new file mode 100644 index 0000000..9c81edb --- /dev/null +++ b/src/entities/matcher/entities_matcher.py @@ -0,0 +1,50 @@ +from enum import Enum + +from .markdown_inline_images_matcher import MarkdownInlineImageMatcher + + +class Target(Enum): + MARKDOWN_INLINE_IMAGE = 'MARKDOWN_INLINE_IMAGE' + MARKDOWN_REFERENCE_IMAGE = 'MARKDOWN_REFERENCE_IMAGE' + OBSIDIAN_EMBED = 'OBSIDIAN_EMBED' + OBSIDIAN_LINK = 'OBSIDIAN_LINK' + + +class Category(Enum): + EMBED = 'EMBED' + LINK = 'LINK' + + +class EntitiesMatcher: + matchers = [ + MarkdownInlineImageMatcher, + ] + + @classmethod + def get_matches(cls, content: str): + return [ + match + for matcher in cls.matchers + for match in matcher.match(content) + ] + + @classmethod + def match_all(cls, content: str): + return [ + *cls.get_obsidian_embed(content), + *cls.get_obsidian_link(content), + ] + + @classmethod + def get_obsidian_embed(cls, content: str): + MW_IMG_REGEXP = r'(\!\[\[(.*)\]\])' + MW_INCLUDE_REGEXP = r'^(\[\[([\s\w\d_\-&|]*)\]\])$' + return [] + + @classmethod + def get_obsidian_link(cls, content: str): + MATCHERS = [ + r'^(\[\[([\s\w\d_\-&|]*)\]\])(?:.+)$', + r'(?!^)(\[\[([\s\w\d_\-&|]*)\]\])', + ] + return [] diff --git a/src/entities/matcher/markdown_inline_image_matcher.py b/src/entities/matcher/markdown_inline_image_matcher.py new file mode 100644 index 0000000..6ba374d --- /dev/null +++ b/src/entities/matcher/markdown_inline_image_matcher.py @@ -0,0 +1,30 @@ +import os +import re + +from .abstract_matcher import AbstractMatcher +from .match import Match + + +class MarkdownInlineImageMatcher(AbstractMatcher): + matcher_id = 'OBSIDIAN_BLOG/MARKDOWN/INLINE_IMAGE/Mather' + + @classmethod + def match(cls, content: str): + REGEX = r'(\!\[(.*)\]\((.*)\))' + + matches = [] + re_matches = re.findall(REGEX, content) + + for match in re_matches: + placeholder, title, url = match + _, ext = os.path.splitext(url) + match = Match( + matcher_id=cls.matcher_id, + placeholder=placeholder, + url=url, + title=title, + ext=ext, + ) + matches.append(match) + + return matches diff --git a/src/entities/matcher/markdown_reference_image_matcher.py b/src/entities/matcher/markdown_reference_image_matcher.py new file mode 100644 index 0000000..aa3acef --- /dev/null +++ b/src/entities/matcher/markdown_reference_image_matcher.py @@ -0,0 +1,38 @@ +import os +import re +from src.entities.matcher.abstract_matcher import AbstractMatcher +from src.entities.matcher.match import Match + + +class MarkdownReferenceImageMatcher(AbstractMatcher): + matcher_id = 'OBSIDIAN_BLOG/MARKDOWN/REFERENCE_IMAGE/MATCHER' + + @classmethod + def match(cls, content): + """parse all reference image entities from a given page model""" + REGEX = r'(\!\[(.*)]\[(.*)\])' + + matches = [] + re_matches = re.findall(REGEX, content) + + for match in re_matches: + placeholder, title, key = match + link_re = re.compile('\\[' + key + '\\]:\\s(.*)') + [url] = re.findall(link_re, content) + _, ext = os.path.splitext(url) + + match = Match( + matcher_id=cls.matcher_id, + placeholder=placeholder, + url=cls.normalize_path(url), + title=title, + ext=ext, + ) + matches.append(match) + return matches + + @staticmethod + def normalize_path(path: str): + if path[0] == '/': + return os.path.realpath(path) + return path diff --git a/src/entities/matcher/match.py b/src/entities/matcher/match.py new file mode 100644 index 0000000..04db5b8 --- /dev/null +++ b/src/entities/matcher/match.py @@ -0,0 +1,12 @@ +from typing import Optional +from dataclasses import dataclass + + +@dataclass +class Match: + matcher_id: str + is_embed: bool = False + placeholder: Optional[str] = None + url: Optional[str] = None + title: Optional[str] = None + ext: Optional[str] = None diff --git a/src/entities/matcher/obsidian_link_matcher.py b/src/entities/matcher/obsidian_link_matcher.py new file mode 100644 index 0000000..310a256 --- /dev/null +++ b/src/entities/matcher/obsidian_link_matcher.py @@ -0,0 +1,48 @@ +import os +import re +from .match import Match + + +class ObsidianMatcher: + matcher_id = 'OBSIDIAN_BLOG/OBSIDIAN/MATCHER' + + @classmethod + def match(cls, content): + REGEXP = r'((?:!)?\[\[([\s\w\d_\-&|\.]*)\]\])' + + matches = [] + re_matches = re.findall(REGEXP, content, flags=re.MULTILINE) + + for re_match in re_matches: + placeholder, _inner = re_match + + title = None + url = None + ext = None + is_embed = placeholder.startswith('!') + + res = _inner.split('|') + + + if len(res) >= 1: + url = res[0].strip() + + if len(res) >= 2: + title = res[1].strip() + + + if url: + _, ext = os.path.splitext(url) + + match = Match( + matcher_id=cls.matcher_id, + is_embed=is_embed, + placeholder=placeholder, + url=url, + title=title, + ext=ext or None, + ) + + matches.append(match) + + return matches diff --git a/src/lib/fs.py b/src/lib/fs.py index 9dd01d3..d90a3ba 100644 --- a/src/lib/fs.py +++ b/src/lib/fs.py @@ -76,6 +76,7 @@ def load(filename): def normalize_path(path: str): + print('Deprecated!') if path[0] == '/': return os.path.realpath(path) return path diff --git a/src/markdown/markdown.py b/src/markdown/markdown.py new file mode 100644 index 0000000..17136be --- /dev/null +++ b/src/markdown/markdown.py @@ -0,0 +1,31 @@ +from marko import inline, Markdown, ast_renderer, html_renderer + + +class ObsidianLink(inline.InlineElement): + pattern = r'\[\[\s*(.+?)\s*(?:\|\s*(.+?)\s*)?]\]' + parse_children = True + + def __init__(self, match): + self.target = match.group(1) + self.title = match.group(2) + self.document = None + + +class ObsidianEmbed(inline.InlineElement): + pattern = r'\!\[\[\s*(.+?)\s*(?:\|\s*(.+?)\s*)?]\]' + parse_children = True + + def __init__(self, match): + self.target = match.group(1) + self.title = match.group(2) + self.document = None + + +class Obsidian: + elements = [ObsidianEmbed, ObsidianLink] + + +markdown = Markdown() +markdown.use(Obsidian) + +# HTMLRenderer = html_renderer.HTMLRenderer() diff --git a/tests/entities/entities_matcher_test.py b/tests/entities/entities_matcher_test.py new file mode 100644 index 0000000..2fabc4e --- /dev/null +++ b/tests/entities/entities_matcher_test.py @@ -0,0 +1,57 @@ +# from src.entities.matcher.match import EntitiesMatcher, Target + + +# def test_entities_matcher(): +# content = """ +# ![Image Alt](https://example.com/inline_image) +# """ +# +# """ +# ![Image No Link Alt]() +# ![Image Local Alt](./local_image.png) +# ![Image Ref Alt][ref_id] +# ![Image Ref Local Alt][ref_local_id] +# +# [ref_id]: https:example.com/ref_image +# [ref_local_id]: ./local_image_png +# +# [[Obsidian Link]] +# [[Link with Spec Characters,._-]] +# [[Link with Title | Placeholder Title]] +# +# ![[Obsidian Embed]] +# ![[Obsidian Embed with Spec Characters,._-]] +# ![[Obsidian Embed JPG.png]] +# ![[Obsidian Embed PNG.png]] +# ![[Obsidian Embed PDF.pdf]] +# ![[Obsidian Embed with Alt | Placeholder Title]] +# """ +# +# res = EntitiesMatcher().get_matches(content) +# +# assert res == [ +# { +# 'matcher_id': +# 'placeholder': 'Image Alt', +# 'url': 'https://example.com/inline_image', +# }, +# # {'placeholder': 'Image No Link Alt', 'url': None}, +# # {'placeholder': 'Image Local Alt', 'url': './local_image.png'}, +# # {'placeholder': 'Image Ref Alt', 'url': 'https:example.com/ref_image'}, +# # {'placeholder': 'Image Ref Local Alt', 'url': './local_image_png'}, +# # {'placeholder': 'Obsidian Link'}, +# # {'placeholder': 'Link with Spec Characters,._-'}, +# # { +# # 'placeholder': 'Link with Title', +# # 'placeholder_title': 'Placeholder Title', +# # }, +# # {'placeholder': 'Obsidian Embed'}, +# # {'placeholder': 'Obsidian Embed with Spec Characters,._-'}, +# # {'placeholder': 'Obsidian Embed JPG', 'ext': 'png'}, +# # {'placeholder': 'Obsidian Embed PNG', 'ext': 'png'}, +# # {'placeholder': 'Obsidian Embed PDF', 'ext': 'pdf'}, +# # { +# # 'placeholder': 'Obsidian Embed with Alt', +# # 'placeholder_title': 'Placeholder Title', +# # }, +# ] diff --git a/tests/entities/matcher/markdown_inline_images_matcher_test.py b/tests/entities/matcher/markdown_inline_images_matcher_test.py new file mode 100644 index 0000000..ca8553d --- /dev/null +++ b/tests/entities/matcher/markdown_inline_images_matcher_test.py @@ -0,0 +1,26 @@ +import pytest + +from src.entities.matcher.markdown_inline_image_matcher import ( + MarkdownInlineImageMatcher, +) +from src.entities.matcher.match import Match + + +@pytest.mark.parametrize( + 'content,expected_result', + [ + ( + ' ![Title](link.png)', + Match( + matcher_id=MarkdownInlineImageMatcher.matcher_id, + placeholder='![Title](link.png)', + url='link.png', + title='Title', + ext='.png', + ), + ) + ], +) +def test_markdown_inline_image_matcher(content, expected_result): + [res] = MarkdownInlineImageMatcher.match(content) + assert res == expected_result diff --git a/tests/entities/matcher/markdown_reference_image_matcher_test.py b/tests/entities/matcher/markdown_reference_image_matcher_test.py new file mode 100644 index 0000000..30ec347 --- /dev/null +++ b/tests/entities/matcher/markdown_reference_image_matcher_test.py @@ -0,0 +1,26 @@ +import pytest + +from src.entities.matcher.markdown_reference_image_matcher import ( + MarkdownReferenceImageMatcher, +) +from src.entities.matcher.match import Match + + +@pytest.mark.parametrize( + 'content,expected_result', + [ + ( + ' ![title][reference]\n[reference]: image.png', + Match( + matcher_id=MarkdownReferenceImageMatcher.matcher_id, + placeholder='![title][reference]', + url='image.png', + title='title', + ext='.png', + ), + ) + ], +) +def test_markdown_reference_image_matcher(content, expected_result): + [res] = MarkdownReferenceImageMatcher.match(content) + assert res == expected_result diff --git a/tests/entities/matcher/obsidian_link_matcher_test.py b/tests/entities/matcher/obsidian_link_matcher_test.py new file mode 100644 index 0000000..e6f24ff --- /dev/null +++ b/tests/entities/matcher/obsidian_link_matcher_test.py @@ -0,0 +1,45 @@ +import pytest + +from src.entities.matcher.obsidian_link_matcher import ObsidianLinkMatcher +from src.entities.matcher.match import Match + + +@pytest.mark.parametrize( + 'content,expected_result', + [ + ( + ' [[title.png]] ', + Match( + matcher_id=ObsidianLinkMatcher.matcher_id, + placeholder='[[title.png]]', + url='title.png', + title=None, + ext='.png', + ), + ), + ( + ' [[url | title]] ', + Match( + matcher_id=ObsidianLinkMatcher.matcher_id, + placeholder='[[url | title]]', + url='url', + title='title', + ext=None, + ), + ), + ( + ' ![[url | title]] ', + Match( + matcher_id=ObsidianLinkMatcher.matcher_id, + is_embed=True, + placeholder='![[url | title]]', + url='url', + title='title', + ext=None, + ), + ), + ], +) +def test_markdown_reference_image_matcher(content, expected_result): + [res] = ObsidianLinkMatcher.match(content) + assert res == expected_result diff --git a/tests/mistletoe_test.py b/tests/mistletoe_test.py new file mode 100644 index 0000000..d8e6703 --- /dev/null +++ b/tests/mistletoe_test.py @@ -0,0 +1,24 @@ +# import mistletoe +# from mistletoe import Document +# from mistletoe.ast_renderer import ASTRenderer +# import src.markdown.markdown +import json +from src.markdown.markdown import markdown + + + +def test_marko(): + content = """\ +![[Hello]] +![[Hello | World]] +[[Hello]] +[[Hello | World]] +[[One | Two]] [[Three | Four]]\ +""" + + ast = markdown.parse(content) + html = markdown.render(ast) + print(html) + # res = HTMLRenderer.render_children(res) + # print(res) + assert True == False