A · A · Feb 18, 2022 · Feb 19, 2022 · Feb 20, 2022
diff --git a/.coverage b/.coverage
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,8 @@ python-dotenv = "^0.19.2"
 python-frontmatter = "^1.0.0"
 python-slugify = "^5.0.2"
 watchdog = "^2.1.6"
+mistletoe = "^0.8.2"
+marko = "^1.2.0"
 
 [tool.poetry.dev-dependencies]
 pytest = "^7.0.1"

diff --git a/src/entities/matcher/abstract_matcher.py b/src/entities/matcher/abstract_matcher.py
@@ -0,0 +1,7 @@
+from src.entities.matcher.match import Match
+
+
+class AbstractMatcher:
+    @classmethod
+    def match(cls, content: str):
+        raise NotImplementedError('Should have implemented this')
diff --git a/src/entities/matcher/entities_matcher.py b/src/entities/matcher/entities_matcher.py
@@ -0,0 +1,50 @@
+from enum import Enum
+
+from .markdown_inline_images_matcher import MarkdownInlineImageMatcher
+
+
+class Target(Enum):
+    MARKDOWN_INLINE_IMAGE = 'MARKDOWN_INLINE_IMAGE'
+    MARKDOWN_REFERENCE_IMAGE = 'MARKDOWN_REFERENCE_IMAGE'
+    OBSIDIAN_EMBED = 'OBSIDIAN_EMBED'
+    OBSIDIAN_LINK = 'OBSIDIAN_LINK'
+
+
+class Category(Enum):
+    EMBED = 'EMBED'
+    LINK = 'LINK'
+
+
+class EntitiesMatcher:
+    matchers = [
+        MarkdownInlineImageMatcher,
+    ]
+
+    @classmethod
+    def get_matches(cls, content: str):
+        return [
+            match
+            for matcher in cls.matchers
+            for match in matcher.match(content)
+        ]
+
+    @classmethod
+    def match_all(cls, content: str):
+        return [
+            *cls.get_obsidian_embed(content),
+            *cls.get_obsidian_link(content),
+        ]
+
+    @classmethod
+    def get_obsidian_embed(cls, content: str):
+        MW_IMG_REGEXP = r'(\!\[\[(.*)\]\])'
+        MW_INCLUDE_REGEXP = r'^(\[\[([\s\w\d_\-&|]*)\]\])$'
+        return []
+
+    @classmethod
+    def get_obsidian_link(cls, content: str):
+        MATCHERS = [
+            r'^(\[\[([\s\w\d_\-&|]*)\]\])(?:.+)$',
+            r'(?!^)(\[\[([\s\w\d_\-&|]*)\]\])',
+        ]
+        return []
diff --git a/src/entities/matcher/markdown_inline_image_matcher.py b/src/entities/matcher/markdown_inline_image_matcher.py
@@ -0,0 +1,30 @@
+import os
+import re
+
+from .abstract_matcher import AbstractMatcher
+from .match import Match
+
+
+class MarkdownInlineImageMatcher(AbstractMatcher):
+    matcher_id = 'OBSIDIAN_BLOG/MARKDOWN/INLINE_IMAGE/Mather'
+
+    @classmethod
+    def match(cls, content: str):
+        REGEX = r'(\!\[(.*)\]\((.*)\))'
+
+        matches = []
+        re_matches = re.findall(REGEX, content)
+
+        for match in re_matches:
+            placeholder, title, url = match
+            _, ext = os.path.splitext(url)
+            match = Match(
+                matcher_id=cls.matcher_id,
+                placeholder=placeholder,
+                url=url,
+                title=title,
+                ext=ext,
+            )
+            matches.append(match)
+
+        return matches
diff --git a/src/entities/matcher/markdown_reference_image_matcher.py b/src/entities/matcher/markdown_reference_image_matcher.py
@@ -0,0 +1,38 @@
+import os
+import re
+from src.entities.matcher.abstract_matcher import AbstractMatcher
+from src.entities.matcher.match import Match
+
+
+class MarkdownReferenceImageMatcher(AbstractMatcher):
+    matcher_id = 'OBSIDIAN_BLOG/MARKDOWN/REFERENCE_IMAGE/MATCHER'
+
+    @classmethod
+    def match(cls, content):
+        """parse all reference image entities from a given page model"""
+        REGEX = r'(\!\[(.*)]\[(.*)\])'
+
+        matches = []
+        re_matches = re.findall(REGEX, content)
+
+        for match in re_matches:
+            placeholder, title, key = match
+            link_re = re.compile('\\[' + key + '\\]:\\s(.*)')
+            [url] = re.findall(link_re, content)
+            _, ext = os.path.splitext(url)
+
+            match = Match(
+                matcher_id=cls.matcher_id,
+                placeholder=placeholder,
+                url=cls.normalize_path(url),
+                title=title,
+                ext=ext,
+            )
+            matches.append(match)
+        return matches
+
+    @staticmethod
+    def normalize_path(path: str):
+        if path[0] == '/':
+            return os.path.realpath(path)
+        return path
diff --git a/src/entities/matcher/match.py b/src/entities/matcher/match.py
@@ -0,0 +1,12 @@
+from typing import Optional
+from dataclasses import dataclass
+
+
+@dataclass
+class Match:
+    matcher_id: str
+    is_embed: bool = False
+    placeholder: Optional[str] = None
+    url: Optional[str] = None
+    title: Optional[str] = None
+    ext: Optional[str] = None
diff --git a/src/entities/matcher/obsidian_link_matcher.py b/src/entities/matcher/obsidian_link_matcher.py
@@ -0,0 +1,48 @@
+import os
+import re
+from .match import Match
+
+
+class ObsidianMatcher:
+    matcher_id = 'OBSIDIAN_BLOG/OBSIDIAN/MATCHER'
+
+    @classmethod
+    def match(cls, content):
+        REGEXP = r'((?:!)?\[\[([\s\w\d_\-&|\.]*)\]\])'
+
+        matches = []
+        re_matches = re.findall(REGEXP, content, flags=re.MULTILINE)
+
+        for re_match in re_matches:
+            placeholder, _inner = re_match
+
+            title = None
+            url = None
+            ext = None
+            is_embed = placeholder.startswith('!')
+
+            res = _inner.split('|')
+
+
+            if len(res) >= 1:
+                url = res[0].strip()
+
+            if len(res) >= 2:
+                title = res[1].strip()
+
+
+            if url:
+                _, ext = os.path.splitext(url)
+
+            match = Match(
+                matcher_id=cls.matcher_id,
+                is_embed=is_embed,
+                placeholder=placeholder,
+                url=url,
+                title=title,
+                ext=ext or None,
+            )
+
+            matches.append(match)
+
+        return matches
diff --git a/src/lib/fs.py b/src/lib/fs.py
@@ -76,6 +76,7 @@ def load(filename):
 
 
 def normalize_path(path: str):
+    print('Deprecated!')
     if path[0] == '/':
         return os.path.realpath(path)
     return path
diff --git a/src/markdown/markdown.py b/src/markdown/markdown.py
@@ -0,0 +1,31 @@
+from marko import inline, Markdown, ast_renderer, html_renderer
+
+
+class ObsidianLink(inline.InlineElement):
+    pattern = r'\[\[\s*(.+?)\s*(?:\|\s*(.+?)\s*)?]\]'
+    parse_children = True
+
+    def __init__(self, match):
+        self.target = match.group(1)
+        self.title = match.group(2)
+        self.document = None
+
+
+class ObsidianEmbed(inline.InlineElement):
+    pattern = r'\!\[\[\s*(.+?)\s*(?:\|\s*(.+?)\s*)?]\]'
+    parse_children = True
+
+    def __init__(self, match):
+        self.target = match.group(1)
+        self.title = match.group(2)
+        self.document = None
+
+
+class Obsidian:
+    elements = [ObsidianEmbed, ObsidianLink]
+
+
+markdown = Markdown()
+markdown.use(Obsidian)
+
+# HTMLRenderer = html_renderer.HTMLRenderer()
diff --git a/tests/entities/entities_matcher_test.py b/tests/entities/entities_matcher_test.py
@@ -0,0 +1,57 @@
+# from src.entities.matcher.match import EntitiesMatcher, Target
+
+
+# def test_entities_matcher():
+#     content = """
+# ![Image Alt](https://example.com/inline_image)
+# """
+# 
+#     """
+# ![Image No Link Alt]()
+# ![Image Local Alt](./local_image.png)
+# ![Image Ref Alt][ref_id]
+# ![Image Ref Local Alt][ref_local_id]
+# 
+# [ref_id]: https:example.com/ref_image
+# [ref_local_id]: ./local_image_png
+# 
+# [[Obsidian Link]]
+# [[Link with Spec Characters,._-]]
+# [[Link with Title | Placeholder Title]]
+# 
+# ![[Obsidian Embed]]
+# ![[Obsidian Embed with Spec Characters,._-]]
+# ![[Obsidian Embed JPG.png]]
+# ![[Obsidian Embed PNG.png]]
+# ![[Obsidian Embed PDF.pdf]]
+# ![[Obsidian Embed with Alt | Placeholder Title]]
+# """
+# 
+#     res = EntitiesMatcher().get_matches(content)
+# 
+#     assert res == [
+#         {
+#             'matcher_id': 
+#             'placeholder': 'Image Alt',
+#             'url': 'https://example.com/inline_image',
+#         },
+#         # {'placeholder': 'Image No Link Alt', 'url': None},
+#         # {'placeholder': 'Image Local Alt', 'url': './local_image.png'},
+#         # {'placeholder': 'Image Ref Alt', 'url': 'https:example.com/ref_image'},
+#         # {'placeholder': 'Image Ref Local Alt', 'url': './local_image_png'},
+#         # {'placeholder': 'Obsidian Link'},
+#         # {'placeholder': 'Link with Spec Characters,._-'},
+#         # {
+#         #     'placeholder': 'Link with Title',
+#         #     'placeholder_title': 'Placeholder Title',
+#         # },
+#         # {'placeholder': 'Obsidian Embed'},
+#         # {'placeholder': 'Obsidian Embed with Spec Characters,._-'},
+#         # {'placeholder': 'Obsidian Embed JPG', 'ext': 'png'},
+#         # {'placeholder': 'Obsidian Embed PNG', 'ext': 'png'},
+#         # {'placeholder': 'Obsidian Embed PDF', 'ext': 'pdf'},
+#         # {
+#         #     'placeholder': 'Obsidian Embed with Alt',
+#         #     'placeholder_title': 'Placeholder Title',
+#         # },
+#     ]
diff --git a/tests/entities/matcher/markdown_inline_images_matcher_test.py b/tests/entities/matcher/markdown_inline_images_matcher_test.py
@@ -0,0 +1,26 @@
+import pytest
+
+from src.entities.matcher.markdown_inline_image_matcher import (
+    MarkdownInlineImageMatcher,
+)
+from src.entities.matcher.match import Match
+
+
+@pytest.mark.parametrize(
+    'content,expected_result',
+    [
+        (
+            ' ![Title](link.png)',
+            Match(
+                matcher_id=MarkdownInlineImageMatcher.matcher_id,
+                placeholder='![Title](link.png)',
+                url='link.png',
+                title='Title',
+                ext='.png',
+            ),
+        )
+    ],
+)
+def test_markdown_inline_image_matcher(content, expected_result):
+    [res] = MarkdownInlineImageMatcher.match(content)
+    assert res == expected_result