Skip to content

Commit

Permalink
chg: improved documentation and code cleanup.
Browse files Browse the repository at this point in the history
  • Loading branch information
AlbertWeichselbraun committed Feb 16, 2024
1 parent 8d9861f commit 0f3280d
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 9 deletions.
4 changes: 2 additions & 2 deletions examples/custom-html-handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def my_handle_end_b(state: HtmlDocumentState):


MY_MAPPING = CustomHtmlTagHandlerMapping(
start_tag_handler_mapping={"b": my_handle_start_b},
end_tag_handler_mapping={"b": my_handle_end_b},
start_tag_mapping={"b": my_handle_start_b},
end_tag_mapping={"b": my_handle_end_b},
)


Expand Down
8 changes: 4 additions & 4 deletions src/inscriptis/html_engine.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# coding:utf-8
"""The HTML Engine is responsible for converting HTML to text."""
from typing import List, Dict, Callable, Any
from typing import List, Dict, Callable

import lxml.html
from lxml.etree import Comment
Expand Down Expand Up @@ -57,7 +57,7 @@ def __init__(self, html_tree: lxml.html.HtmlElement, config: ParserConfig = None

# setup start and end tag call tables
self.start_tag_handler_dict: Dict[
str, Callable[[HtmlDocumentState, Any], None]
str, Callable[[HtmlDocumentState, Dict], None]
] = {
"table": table_start_handler,
"tr": tr_start_handler,
Expand All @@ -81,10 +81,10 @@ def __init__(self, html_tree: lxml.html.HtmlElement, config: ParserConfig = None

if config.custom_html_tag_handler_mapping:
self.start_tag_handler_dict.update(
config.custom_html_tag_handler_mapping.start_tag_handler_mapping
config.custom_html_tag_handler_mapping.start_tag_mapping
)
self.end_tag_handler_dict.update(
config.custom_html_tag_handler_mapping.end_tag_handler_mapping
config.custom_html_tag_handler_mapping.end_tag_mapping
)

# parse the HTML tree
Expand Down
11 changes: 8 additions & 3 deletions src/inscriptis/model/tag/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@


class CustomHtmlTagHandlerMapping(NamedTuple):
"""Provide a custom HTML Tag handler mapping."""
"""Refine the standard HTML Tag handling with the provided mapping.
start_tag_handler_mapping: Dict[str, Callable[[HtmlDocumentState, Dict], None]]
end_tag_handler_mapping: Dict[str, Callable[[HtmlDocumentState], None]]
Attributes:
start_tag_mapping: a dictionary of custom start tag handlers.
end_tag_mapping: a dictionary of custom end tag handlers.
"""

start_tag_mapping: Dict[str, Callable[[HtmlDocumentState, Dict], None]]
end_tag_mapping: Dict[str, Callable[[HtmlDocumentState], None]]

0 comments on commit 0f3280d

Please sign in to comment.