Skip to content

Commit 446a621

Browse files
committed
(no commit message provided)
1 parent cb1c40f commit 446a621

File tree

8 files changed

+1644
-19
lines changed

8 files changed

+1644
-19
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -159,4 +159,5 @@ cython_debug/
159159
# and can be added to the global gitignore or merged into this file. For a more nuclear
160160
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
161161
.idea/
162-
162+
scratch
163+
.aider*

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
# accfix - EPUB accessibility fixer
1+
# accfix - Automated accessibility fixes for EPUB

accfix/__init__.py

Whitespace-only changes.

accfix/app.py

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
from pathlib import Path
2+
from loguru import logger as log
3+
import streamlit as st
4+
import tempfile
5+
import subprocess
6+
import shutil
7+
import re
8+
9+
ace_path = Path(shutil.which("ace"))
10+
ansi_escape = re.compile(r"\x1B[@-_][0-?]*[ -/]*[@-~]")
11+
12+
13+
def check_epub(fp):
14+
# Run the ace command with subprocess.Popen
15+
fp = Path(fp)
16+
report_dir = fp.parent / f"{fp.stem}_report"
17+
report_dir.mkdir(exist_ok=True)
18+
report_file = report_dir / "report.json"
19+
20+
# Run ACE Check
21+
cmd = [ace_path, "-f", "-o", report_dir, fp]
22+
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
23+
24+
log_output = []
25+
for line in process.stdout:
26+
stripped_line = ansi_escape.sub("", line).strip()
27+
log.info(stripped_line)
28+
log_output.append(line)
29+
yield stripped_line
30+
31+
process.wait()
32+
if process.returncode != 0:
33+
log_output.append(f"Ace command failed with return code {process.returncode}")
34+
yield f"Ace command failed with return code {process.returncode}"
35+
36+
37+
# def main():
38+
# st.title("EPUB Accessibility Fixer")
39+
# uploaded_file = st.file_uploader("Upload an EPUB file", type=["epub"])
40+
#
41+
# if uploaded_file is not None:
42+
# with tempfile.NamedTemporaryFile(delete=False, suffix=".epub") as tmp_file:
43+
# tmp_file.write(uploaded_file.getbuffer())
44+
# tmp_file_path = tmp_file.name
45+
#
46+
# st.text("Checking EPUB for accessibility violations...")
47+
#
48+
# # Placeholder for log messages
49+
# log_placeholder = st.empty()
50+
# log_lines = []
51+
#
52+
# # Define the maximum number of lines to display
53+
# max_log_lines = 6
54+
#
55+
# for log_line in check_epub(tmp_file_path):
56+
# # Add new log line to the list
57+
# log_lines.append(log_line)
58+
#
59+
# # Keep only the last 'max_log_lines' lines
60+
# if len(log_lines) > max_log_lines:
61+
# log_lines = log_lines[-max_log_lines:]
62+
#
63+
# # Join the log lines into a single string
64+
# log_messages = "\n".join(log_lines)
65+
#
66+
# # Update the log placeholder
67+
# log_placeholder.text(log_messages)
68+
#
69+
# st.text("Accessibility check completed.")
70+
# # Placeholder for fixing process
71+
# st.text("Fixing issues... (implementation required)")
72+
# fixed_file_path = tmp_file_path.replace(".epub", "_fixed.epub")
73+
#
74+
# # Example of saving fixed file (this should be replaced with actual fixing logic)
75+
# with open(fixed_file_path, "wb") as fixed_file:
76+
# fixed_file.write(uploaded_file.getbuffer())
77+
#
78+
# st.text("Download the fixed EPUB file:")
79+
# with open(fixed_file_path, "rb") as fixed_file:
80+
# st.download_button(label="Download Fixed EPUB", data=fixed_file, file_name="fixed.epub")
81+
82+
83+
def main():
84+
st.title("EPUB Accessibility Fixer")
85+
uploaded_file = st.file_uploader("Upload an EPUB file", type=["epub"])
86+
87+
if uploaded_file is not None:
88+
with tempfile.NamedTemporaryFile(delete=False, suffix=".epub") as tmp_file:
89+
tmp_file.write(uploaded_file.getbuffer())
90+
tmp_file_path = tmp_file.name
91+
92+
st.text("Checking EPUB for accessibility violations...")
93+
94+
# Placeholder for log messages
95+
log_placeholder = st.empty()
96+
97+
for log_line in check_epub(tmp_file_path):
98+
log_placeholder.text(log_line, append=True)
99+
100+
st.text("Accessibility check completed.")
101+
# Placeholder for fixing process
102+
st.text("Fixing issues... (implementation required)")
103+
fixed_file_path = tmp_file_path.replace(".epub", "_fixed.epub")
104+
105+
# Example of saving fixed file (this should be replaced with actual fixing logic)
106+
with open(fixed_file_path, "wb") as fixed_file:
107+
fixed_file.write(uploaded_file.getbuffer())
108+
109+
st.text("Download the fixed EPUB file:")
110+
with open(fixed_file_path, "rb") as fixed_file:
111+
st.download_button(label="Download Fixed EPUB", data=fixed_file, file_name="fixed.epub")
112+
113+
114+
if __name__ == "__main__":
115+
main()

accfix/check.py

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
from pathlib import Path
2+
import zipfile
3+
from typing import Optional
4+
from lingua import LanguageDetectorBuilder
5+
from lxml import etree
6+
from subprocess import run
7+
import json
8+
9+
10+
detector = LanguageDetectorBuilder.from_all_languages().with_low_accuracy_mode().build()
11+
12+
13+
def is_epub(fp: str | Path) -> bool:
14+
"""Check if a file is an epub."""
15+
fp = Path(fp)
16+
with zipfile.ZipFile(fp, "r") as zf:
17+
infolist = zf.infolist()
18+
if not infolist or infolist[0].filename != "mimetype":
19+
return False
20+
21+
info = infolist[0]
22+
if info.file_size == 20 and zf.read(info) == b"application/epub+zip":
23+
return True
24+
return False
25+
26+
27+
def opf_path(fp: str | Path) -> Path:
28+
"""Determine OPF-File path within epub archive"""
29+
fp = Path(fp)
30+
with zipfile.ZipFile(fp, "r") as z:
31+
with z.open("META-INF/container.xml", "r") as f:
32+
xml_data = f.read()
33+
tree = etree.fromstring(xml_data)
34+
namespace = {"ns": "urn:oasis:names:tc:opendocument:xmlns:container"}
35+
rootfile_element = tree.find(".//ns:rootfiles/ns:rootfile", namespaces=namespace)
36+
result = rootfile_element.attrib["full-path"]
37+
return Path(result)
38+
39+
40+
def read_opf(fp: str | Path):
41+
"""Read OPF-File from epub archive"""
42+
fp = Path(fp)
43+
opf_fp = opf_path(fp) # use the existing function to determine the opf file's location
44+
with zipfile.ZipFile(fp, "r") as z:
45+
with z.open(opf_fp.as_posix(), "r") as f:
46+
xml_data = f.read()
47+
tree = etree.fromstring(xml_data) # parse the opf file using lxml for editing
48+
return tree
49+
50+
51+
def xml_text(xml: etree._Element) -> str | None:
52+
"""Extract plaintext content from XML tree"""
53+
content = [t.strip() for t in xml.xpath("//text()") if t.strip()]
54+
return "\n".join(content)
55+
56+
57+
def detect_lang(text: str) -> Optional[str]:
58+
"""Detect language of text and return ISO 639-1 code"""
59+
detected_language = detector.detect_language_of(text)
60+
if not detected_language:
61+
return
62+
return detected_language.iso_code_639_1.name.lower()
63+
64+
65+
def check(fp: str | Path) -> dict:
66+
# Prepare output path
67+
fp = Path(fp)
68+
report_dir = fp.parent / f"{fp.stem}_report"
69+
report_dir.mkdir(exist_ok=True)
70+
report_file = report_dir / "report.json"
71+
72+
# Run ACE Check
73+
cmd = ["ace", "-f", "-o", report_dir, fp]
74+
run(cmd, shell=True)
75+
76+
# Return Result data
77+
return json.load(report_file.open(encoding="utf-8"))
78+
79+
80+
if __name__ == "__main__":
81+
file = Path("../scratch/test1.epub")
82+
check(file)
83+
# print(is_epub(file))
84+
# opf_fp = opf_path(Path("../scratch/test1_fix.epub"))
85+
# print(opf_fp)
86+
# opf_tree = read_opf(file)
87+
# print(opf_tree)
88+
# print(detect_lang("Guten Tag. Wie geht es ihnen"))
89+
# print(xml_text(opf_tree))
90+
# print(check(Path("../scratch/test1_fix.epub")))

accfix/epub.py

+4-15
Original file line numberDiff line numberDiff line change
@@ -18,30 +18,19 @@ def __init__(self, path, clone=True):
1818
"""
1919
self._path = Path(path)
2020
self.name = self._path.name
21-
log.debug("Opening EPUB file: {}".format(self._path.name))
21+
log.debug(f"Opening EPUB file: {self._path.name}")
2222
self._clone = None
2323
if clone:
2424
temp_dir = tempfile.mkdtemp()
2525
self._clone = Path(temp_dir) / self._path.name
2626
shutil.copy2(self._path, self._clone)
27-
log.debug("Cloning EPUB file to: {}".format(self._clone.parent))
27+
log.debug(f"Cloning EPUB file to: {self._clone.parent}")
2828
self._fs = fsspec.filesystem("zip", fo=str(self._clone if self._clone else self._path))
2929

3030
def __repr__(self):
31-
return 'Epub("{}")'.format(self._path.name)
31+
return f'Epub("{self._path.name}")'
3232

33-
def write(self, path, content, mode="wb"):
34-
# type: (str|Path, str|bytes, str) -> None
35-
"""Write content to a file within the EPUB.
36-
37-
:param path: The relative path of the file within the EPUB.
38-
:param content: The content to write to the file.
39-
:param mode: The mode to open the file.
40-
"""
41-
path = Path(path)
42-
log.debug(f"Writing to: {self.name}/{path}")
43-
with self._fs.open(path.as_posix(), mode=mode) as file:
44-
file.write(content)
33+
def read(self, path, mode="rb"):
4534
# type: (str|Path, str) -> str | bytes
4635
"""Read the content of a file from the EPUB.
4736

0 commit comments

Comments
 (0)