Skip to content

Commit 61b94ae

Browse files
committed
Fix UnicodeDecodeError in case of invalid UTF-8 in input file
1 parent 36759d5 commit 61b94ae

File tree

4 files changed

+58
-1
lines changed

4 files changed

+58
-1
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66

77
- Use file README.md as package long description
88

9+
### Fixed
10+
11+
- Fix UnicodeDecodeError in case of invalid UTF-8 in input file
12+
913
## Version 4.0.0 (2022-01-23)
1014

1115
### Changed

msgcheck/po.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ def read(self): # pylint: disable=too-many-locals
468468
"""
469469
self.msgs = []
470470
checker = Checker()
471-
with open(self.filename, "r", encoding="utf-8") as po_file:
471+
with open(self.filename, "r", encoding="utf-8", errors="ignore") as po_file:
472472
for line in po_file:
473473
message = checker.check_line(line.strip())
474474
if message:

tests/fr_invalid_utf8.po

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#
2+
# Copyright (C) 2024 Sébastien Helleu <[email protected]>
3+
#
4+
# This file is part of msgcheck.
5+
#
6+
# Msgcheck is free software; you can redistribute it and/or modify
7+
# it under the terms of the GNU General Public License as published by
8+
# the Free Software Foundation; either version 3 of the License, or
9+
# (at your option) any later version.
10+
#
11+
# Msgcheck is distributed in the hope that it will be useful,
12+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
# GNU General Public License for more details.
15+
#
16+
# You should have received a copy of the GNU General Public License
17+
# along with msgcheck. If not, see <https://www.gnu.org/licenses/>.
18+
#
19+
20+
#
21+
# Gettext file with invalid UTF-8 chars.
22+
#
23+
24+
msgid ""
25+
msgstr ""
26+
"Project-Id-Version: msgcheck\n"
27+
"Report-Msgid-Bugs-To: [email protected]\n"
28+
"POT-Creation-Date: 2014-05-03 12:00+0200\n"
29+
"PO-Revision-Date: 2024-09-12 17:02+0200\n"
30+
"Last-Translator: Sébastien Helleu <[email protected]>\n"
31+
"Language-Team: [email protected]\n"
32+
"Language: fr\n"
33+
"MIME-Version: 1.0\n"
34+
"Content-Type: text/plain; charset=iso-8859-13\n"
35+
"Content-Transfer-Encoding: 8bit\n"
36+
"Plural-Forms: nplurals=2; plural=(n > 1);\n"
37+
38+
# Normal string with special chars
39+
msgid "id-õäöü"
40+
msgstr "str-þð"

tests/test_msgcheck.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,3 +434,16 @@ def test_punct_full_stop_ja_zh(language, msgid, msgstr, error_message):
434434
assert error_message in errors[0].message
435435
else:
436436
assert not errors
437+
438+
439+
def test_invalid_utf8():
440+
"""Test checks on a file with invalid UTF-8 chars."""
441+
po_check = PoCheck()
442+
po_check.set_check("fuzzy", True)
443+
result = po_check.check_files([local_path("fr_invalid_utf8.po")])
444+
445+
# be sure we have one file in result
446+
assert len(result) == 1
447+
448+
# the file has no errors
449+
assert len(result[0][1]) == 0

0 commit comments

Comments
 (0)