Skip to content

Commit 0f920a0

Browse files
authored
Sanitize manager output properly (#1396)
Evaluation outcome is stored as a formatted string with "%" serving as a formatting control character. We therefore need to escape percent signs in manager output. Previously, some ANSI escape sequences (ESC + arbitrary characters + m, which is a superset of SGR sequences) were removed from the messages. We argue that all programs emitting such sequences without checking that the output is a terminal are broken and we should not sweep the problem under the rug. Instead, we firmly reject all control characters in C0 and C1 ranges except for TAB.
1 parent 4b79731 commit 0f920a0

1 file changed

Lines changed: 19 additions & 14 deletions

File tree

cms/grading/steps/trusted.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
"""
3737

3838
import logging
39+
import re
3940

4041
from cms import config
4142
from cms.grading.Sandbox import Sandbox
@@ -55,24 +56,25 @@
5556
CHECKER_FILENAME = "checker"
5657

5758

58-
def _filter_ansi_escape(string: str) -> str:
59-
"""Filter out ANSI commands from the given string.
59+
def _sanitize_message(string: str) -> str:
60+
"""Sanitize a message read from manager output.
61+
62+
Percent signs are escaped (so they are not treated as formatting specifiers
63+
later). Control characters are rejected.
6064
6165
string: string to process.
6266
63-
return: string with ANSI commands stripped.
67+
return: sanitized string.
68+
69+
raise (ValueError): if invalid characters were found.
6470
6571
"""
66-
ansi_mode = False
67-
res = ''
68-
for char in string:
69-
if char == u'\033':
70-
ansi_mode = True
71-
if not ansi_mode:
72-
res += char
73-
if char == u'm':
74-
ansi_mode = False
75-
return res
72+
match = re.search('[\x00-\x08\x0a-\x1f\x7f-\xbf]', string)
73+
if match:
74+
ch = ord(match[0])
75+
raise ValueError(f'Invalid character in outcome: 0x{ch:02x}')
76+
77+
return string.replace('%', '%%')
7678

7779

7880
def extract_outcome_and_text(sandbox: Sandbox) -> tuple[float, list[str]]:
@@ -98,10 +100,13 @@ def extract_outcome_and_text(sandbox: Sandbox) -> tuple[float, list[str]]:
98100

99101
with sandbox.get_file_text(sandbox.stderr_file) as stderr_file:
100102
try:
101-
text = _filter_ansi_escape(stderr_file.readline().strip())
103+
text = _sanitize_message(stderr_file.readline().strip())
102104
except UnicodeDecodeError as error:
103105
logger.error("Manager stderr (text) is not valid UTF-8. %r", error)
104106
raise ValueError("Cannot decode the text.")
107+
except ValueError as error:
108+
logger.error("Manager stderr (text) is malformed. %r", error)
109+
raise error
105110

106111
try:
107112
outcome = float(outcome)

0 commit comments

Comments
 (0)