Skip to content

[wip]Refactor matchers #29

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .coverage
Binary file not shown.
31 changes: 30 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ python-dotenv = "^0.19.2"
python-frontmatter = "^1.0.0"
python-slugify = "^5.0.2"
watchdog = "^2.1.6"
mistletoe = "^0.8.2"
marko = "^1.2.0"

[tool.poetry.dev-dependencies]
pytest = "^7.0.1"
Expand Down
7 changes: 7 additions & 0 deletions src/entities/matcher/abstract_matcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from src.entities.matcher.match import Match


class AbstractMatcher:
@classmethod
def match(cls, content: str):
raise NotImplementedError('Should have implemented this')
50 changes: 50 additions & 0 deletions src/entities/matcher/entities_matcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from enum import Enum

from .markdown_inline_images_matcher import MarkdownInlineImageMatcher


class Target(Enum):
MARKDOWN_INLINE_IMAGE = 'MARKDOWN_INLINE_IMAGE'
MARKDOWN_REFERENCE_IMAGE = 'MARKDOWN_REFERENCE_IMAGE'
OBSIDIAN_EMBED = 'OBSIDIAN_EMBED'
OBSIDIAN_LINK = 'OBSIDIAN_LINK'


class Category(Enum):
EMBED = 'EMBED'
LINK = 'LINK'


class EntitiesMatcher:
matchers = [
MarkdownInlineImageMatcher,
]

@classmethod
def get_matches(cls, content: str):
return [
match
for matcher in cls.matchers
for match in matcher.match(content)
]

@classmethod
def match_all(cls, content: str):
return [
*cls.get_obsidian_embed(content),
*cls.get_obsidian_link(content),
]

@classmethod
def get_obsidian_embed(cls, content: str):
MW_IMG_REGEXP = r'(\!\[\[(.*)\]\])'
MW_INCLUDE_REGEXP = r'^(\[\[([\s\w\d_\-&|]*)\]\])$'
return []

@classmethod
def get_obsidian_link(cls, content: str):
MATCHERS = [
r'^(\[\[([\s\w\d_\-&|]*)\]\])(?:.+)$',
r'(?!^)(\[\[([\s\w\d_\-&|]*)\]\])',
]
return []
30 changes: 30 additions & 0 deletions src/entities/matcher/markdown_inline_image_matcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os
import re

from .abstract_matcher import AbstractMatcher
from .match import Match


class MarkdownInlineImageMatcher(AbstractMatcher):
matcher_id = 'OBSIDIAN_BLOG/MARKDOWN/INLINE_IMAGE/Mather'

@classmethod
def match(cls, content: str):
REGEX = r'(\!\[(.*)\]\((.*)\))'

matches = []
re_matches = re.findall(REGEX, content)

for match in re_matches:
placeholder, title, url = match
_, ext = os.path.splitext(url)
match = Match(
matcher_id=cls.matcher_id,
placeholder=placeholder,
url=url,
title=title,
ext=ext,
)
matches.append(match)

return matches
38 changes: 38 additions & 0 deletions src/entities/matcher/markdown_reference_image_matcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os
import re
from src.entities.matcher.abstract_matcher import AbstractMatcher
from src.entities.matcher.match import Match


class MarkdownReferenceImageMatcher(AbstractMatcher):
matcher_id = 'OBSIDIAN_BLOG/MARKDOWN/REFERENCE_IMAGE/MATCHER'

@classmethod
def match(cls, content):
"""parse all reference image entities from a given page model"""
REGEX = r'(\!\[(.*)]\[(.*)\])'

matches = []
re_matches = re.findall(REGEX, content)

for match in re_matches:
placeholder, title, key = match
link_re = re.compile('\\[' + key + '\\]:\\s(.*)')
[url] = re.findall(link_re, content)
_, ext = os.path.splitext(url)

match = Match(
matcher_id=cls.matcher_id,
placeholder=placeholder,
url=cls.normalize_path(url),
title=title,
ext=ext,
)
matches.append(match)
return matches

@staticmethod
def normalize_path(path: str):
if path[0] == '/':
return os.path.realpath(path)
return path
12 changes: 12 additions & 0 deletions src/entities/matcher/match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from typing import Optional
from dataclasses import dataclass


@dataclass
class Match:
matcher_id: str
is_embed: bool = False
placeholder: Optional[str] = None
url: Optional[str] = None
title: Optional[str] = None
ext: Optional[str] = None
48 changes: 48 additions & 0 deletions src/entities/matcher/obsidian_link_matcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import os
import re
from .match import Match


class ObsidianMatcher:
matcher_id = 'OBSIDIAN_BLOG/OBSIDIAN/MATCHER'

@classmethod
def match(cls, content):
REGEXP = r'((?:!)?\[\[([\s\w\d_\-&|\.]*)\]\])'

matches = []
re_matches = re.findall(REGEXP, content, flags=re.MULTILINE)

for re_match in re_matches:
placeholder, _inner = re_match

title = None
url = None
ext = None
is_embed = placeholder.startswith('!')

res = _inner.split('|')


if len(res) >= 1:
url = res[0].strip()

if len(res) >= 2:
title = res[1].strip()


if url:
_, ext = os.path.splitext(url)

match = Match(
matcher_id=cls.matcher_id,
is_embed=is_embed,
placeholder=placeholder,
url=url,
title=title,
ext=ext or None,
)

matches.append(match)

return matches
1 change: 1 addition & 0 deletions src/lib/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def load(filename):


def normalize_path(path: str):
print('Deprecated!')
if path[0] == '/':
return os.path.realpath(path)
return path
31 changes: 31 additions & 0 deletions src/markdown/markdown.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from marko import inline, Markdown, ast_renderer, html_renderer


class ObsidianLink(inline.InlineElement):
pattern = r'\[\[\s*(.+?)\s*(?:\|\s*(.+?)\s*)?]\]'
parse_children = True

def __init__(self, match):
self.target = match.group(1)
self.title = match.group(2)
self.document = None


class ObsidianEmbed(inline.InlineElement):
pattern = r'\!\[\[\s*(.+?)\s*(?:\|\s*(.+?)\s*)?]\]'
parse_children = True

def __init__(self, match):
self.target = match.group(1)
self.title = match.group(2)
self.document = None


class Obsidian:
elements = [ObsidianEmbed, ObsidianLink]


markdown = Markdown()
markdown.use(Obsidian)

# HTMLRenderer = html_renderer.HTMLRenderer()
57 changes: 57 additions & 0 deletions tests/entities/entities_matcher_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# from src.entities.matcher.match import EntitiesMatcher, Target


# def test_entities_matcher():
# content = """
# ![Image Alt](https://example.com/inline_image)
# """
#
# """
# ![Image No Link Alt]()
# ![Image Local Alt](./local_image.png)
# ![Image Ref Alt][ref_id]
# ![Image Ref Local Alt][ref_local_id]
#
# [ref_id]: https:example.com/ref_image
# [ref_local_id]: ./local_image_png
#
# [[Obsidian Link]]
# [[Link with Spec Characters,._-]]
# [[Link with Title | Placeholder Title]]
#
# ![[Obsidian Embed]]
# ![[Obsidian Embed with Spec Characters,._-]]
# ![[Obsidian Embed JPG.png]]
# ![[Obsidian Embed PNG.png]]
# ![[Obsidian Embed PDF.pdf]]
# ![[Obsidian Embed with Alt | Placeholder Title]]
# """
#
# res = EntitiesMatcher().get_matches(content)
#
# assert res == [
# {
# 'matcher_id':
# 'placeholder': 'Image Alt',
# 'url': 'https://example.com/inline_image',
# },
# # {'placeholder': 'Image No Link Alt', 'url': None},
# # {'placeholder': 'Image Local Alt', 'url': './local_image.png'},
# # {'placeholder': 'Image Ref Alt', 'url': 'https:example.com/ref_image'},
# # {'placeholder': 'Image Ref Local Alt', 'url': './local_image_png'},
# # {'placeholder': 'Obsidian Link'},
# # {'placeholder': 'Link with Spec Characters,._-'},
# # {
# # 'placeholder': 'Link with Title',
# # 'placeholder_title': 'Placeholder Title',
# # },
# # {'placeholder': 'Obsidian Embed'},
# # {'placeholder': 'Obsidian Embed with Spec Characters,._-'},
# # {'placeholder': 'Obsidian Embed JPG', 'ext': 'png'},
# # {'placeholder': 'Obsidian Embed PNG', 'ext': 'png'},
# # {'placeholder': 'Obsidian Embed PDF', 'ext': 'pdf'},
# # {
# # 'placeholder': 'Obsidian Embed with Alt',
# # 'placeholder_title': 'Placeholder Title',
# # },
# ]
26 changes: 26 additions & 0 deletions tests/entities/matcher/markdown_inline_images_matcher_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pytest

from src.entities.matcher.markdown_inline_image_matcher import (
MarkdownInlineImageMatcher,
)
from src.entities.matcher.match import Match


@pytest.mark.parametrize(
'content,expected_result',
[
(
' ![Title](link.png)',
Match(
matcher_id=MarkdownInlineImageMatcher.matcher_id,
placeholder='![Title](link.png)',
url='link.png',
title='Title',
ext='.png',
),
)
],
)
def test_markdown_inline_image_matcher(content, expected_result):
[res] = MarkdownInlineImageMatcher.match(content)
assert res == expected_result
Loading