Skip to content

Commit

Permalink
Merge branch 'main' into setup.py-to-pyproject.toml
Browse files Browse the repository at this point in the history
  • Loading branch information
cclauss authored Jan 4, 2025
2 parents e380921 + 26a7cac commit 5a849ac
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 18 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
run: git fetch origin ${{ github.base_ref }}
- uses: actions/setup-python@v5
with:
python-version: "3.8"
python-version: "3.9"
architecture: x64
- name: Get pip cache dir
id: pip-cache
Expand All @@ -33,7 +33,7 @@ jobs:
${{ runner.os }}-pip-pre-commit
- name: pre-commit
run: |
pip install -U pre-commit
pip install --upgrade pre-commit
pre-commit install --install-hooks
pre-commit run --all-files
whisper-test:
Expand Down
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.0.1
rev: v5.0.0
hooks:
- id: check-json
- id: end-of-file-fixer
Expand All @@ -11,17 +11,17 @@ repos:
- id: check-added-large-files
args: [--maxkb=4096]
- repo: https://github.com/psf/black
rev: 23.7.0
rev: 24.10.0
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: 5.12.0
rev: 5.13.2
hooks:
- id: isort
name: isort (python)
args: ["--profile", "black", "-l", "88", "--trailing-comma", "--multi-line", "3"]
- repo: https://github.com/pycqa/flake8.git
rev: 6.0.0
rev: 7.1.1
hooks:
- id: flake8
types: [python]
Expand Down
22 changes: 13 additions & 9 deletions whisper/normalizers/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,19 @@ def remove_symbols_and_diacritics(s: str, keep=""):
and drop any diacritics (category 'Mn' and some manual mappings)
"""
return "".join(
c
if c in keep
else ADDITIONAL_DIACRITICS[c]
if c in ADDITIONAL_DIACRITICS
else ""
if unicodedata.category(c) == "Mn"
else " "
if unicodedata.category(c)[0] in "MSP"
else c
(
c
if c in keep
else (
ADDITIONAL_DIACRITICS[c]
if c in ADDITIONAL_DIACRITICS
else (
""
if unicodedata.category(c) == "Mn"
else " " if unicodedata.category(c)[0] in "MSP" else c
)
)
)
for c in unicodedata.normalize("NFKD", s)
)

Expand Down
8 changes: 5 additions & 3 deletions whisper/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,11 @@ def iterate_subtitles():

yield start, end, "".join(
[
re.sub(r"^(\s*)(.*)$", r"\1<u>\2</u>", word)
if j == i
else word
(
re.sub(r"^(\s*)(.*)$", r"\1<u>\2</u>", word)
if j == i
else word
)
for j, word in enumerate(all_words)
]
)
Expand Down

0 comments on commit 5a849ac

Please sign in to comment.