Skip to content

Commit 50d9b40

Browse files
authored
Merge pull request #377 from Matistjati/typo-checking
Check if there are incorrect submission directories
2 parents 8e7148e + 7938046 commit 50d9b40

File tree

2 files changed

+43
-0
lines changed

2 files changed

+43
-0
lines changed

problemtools/formatversion.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,16 @@ def output_validator_directory(self) -> str:
3131
case FormatVersion.V_2023_07:
3232
return 'output_validator'
3333

34+
@property
35+
def submission_directories(self) -> list[str]:
36+
match self:
37+
case FormatVersion.LEGACY:
38+
return ['accepted', 'partially_accepted', 'wrong_answer', 'time_limit_exceeded', 'run_time_error']
39+
case FormatVersion.V_2023_07:
40+
# TODO: parse submissions.yaml if applicable, since
41+
# 2023-07 and later formats support adding more submission directories
42+
return ['accepted', 'rejected', 'wrong_answer', 'time_limit_exceeded', 'run_time_error', 'brute_force']
43+
3444
# Support 2023-07 and 2023-07-draft strings.
3545
# This method should be replaced with an alias once we require python 3.13
3646
@classmethod

problemtools/verifyproblem.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import random
2424
import traceback
2525
import uuid
26+
import difflib
2627
from pathlib import Path
2728

2829
import colorlog
@@ -2074,6 +2075,7 @@ def check(self) -> tuple[int, int]:
20742075

20752076
self._check_symlinks()
20762077
self._check_file_and_directory_names()
2078+
self._check_submission_directory_names()
20772079

20782080
run.limit.check_limit_capabilities(self)
20792081

@@ -2097,6 +2099,37 @@ def check(self) -> tuple[int, int]:
20972099
context.wait_for_background_work()
20982100
return self.errors, self.warnings
20992101

2102+
def _check_submission_directory_names(self):
2103+
"""Heuristically check if submissions contain any directories that will be ignored because of typos or format mismatches"""
2104+
submission_directories = [p.name for p in (Path(self.probdir) / 'submissions').glob('*') if p.is_dir()]
2105+
if len(submission_directories) == 0:
2106+
return
2107+
2108+
def most_similar(present_dir: str, format_version: FormatVersion):
2109+
similarities = [
2110+
(spec_dir, difflib.SequenceMatcher(None, present_dir, spec_dir).ratio())
2111+
for spec_dir in format_version.submission_directories
2112+
]
2113+
return max(similarities, key=lambda x: x[1])
2114+
2115+
for present_dir in submission_directories:
2116+
most_similar_dir, max_similarity = most_similar(present_dir, self.format)
2117+
2118+
if max_similarity == 1:
2119+
# Exact match, no typo
2120+
continue
2121+
2122+
if 0.75 <= max_similarity:
2123+
self.warning(f'Potential typo: directory submissions/{present_dir} is similar to {most_similar_dir}')
2124+
else:
2125+
for other_version in [v for v in FormatVersion if v != self.format]:
2126+
_, max_similarity = most_similar(present_dir, other_version)
2127+
if max_similarity == 1:
2128+
self.warning(
2129+
f'Directory submissions/{present_dir} is not part of format version {self.format}, but part of {other_version}'
2130+
)
2131+
break
2132+
21002133
def _check_symlinks(self):
21012134
"""Check that all symlinks point to something existing within the problem package"""
21022135
probdir = os.path.realpath(self.probdir)

0 commit comments

Comments
 (0)