Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Format: [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) · SemVer.
- Accept a bare `.tex` master file as input: `latex2ufdissertation path/to/main.tex` is now valid when the file contains `\documentclass{ufdissertation}`; the parent directory is used as the project root and the file is forced as the master (#80)

### Changed
- `UF-F3` (PDF layer) severity is now calibrated so `must-fix` means certain rejection: a deviating page is `must-fix` only when the document-wide modal body size is itself off 12pt (a global override) or the page's body text is larger than 12pt (never a float); an undersized page on an otherwise-12pt document (a `\footnotesize` table or `\small` figure sub-caption) is demoted to `review`. Previously every per-page deviation was `must-fix`, false-failing real submissions the Graduate School accepts (#82)
- Bundled-PDF progress message now shows the fully-resolved path (not just the filename) and a stale-source caveat: "may not reflect source edits since it was last compiled; delete it to force recompile" (#80)
- UF-A2 accessibility advisory is now suppressed on source-only (`--dry-run`) runs; it appears only when the PDF layer ran, avoiding a misleading note when no PDF was checked (#80)
- Human report now includes a severity guide ("must-fix = will cause UF Graduate School rejection; review = discretionary") and a scope disclaimer after the Summary line on every run; a "PDF layer did not run" note is appended on `--dry-run` or source-only runs (#80)
Expand Down
3 changes: 2 additions & 1 deletion docs/uf-rules.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ The UF LaTeX template (`ufdissertation.cls`) does heavy lifting. Most formatting
- `\fontsize{...}{...}\selectfont` in body
- `\tiny`, `\scriptsize`, `\footnotesize`, `\small`, `\large`, `\Large`, `\LARGE`, `\huge`, `\Huge` in body text
- Allowed contexts: captions, headings (template-handled — flag only direct user override)
- **PDF backup:** per-page **body-mode** size; flag a page whose body-mode size ≠ 12 pt (tolerance ±0.5 pt). This is the authoritative check: the source scan over-fires on *localized-legal* sizing (a one-off `\fontsize` on a title/caption — issue #47), whereas body-mode ignores non-body runs. Verified: `\fontsize{20}{24}\selectfont` moves the body-mode to ~20 pt; legitimate captions/headings do not. Source therefore fires as `review`; PDF holds the `must-fix` verdict.
- **PDF backup:** per-page **body-mode** size; flag a page whose body-mode size ≠ 12 pt (tolerance ±0.5 pt). This is the authoritative check: the source scan over-fires on *localized-legal* sizing (a one-off `\fontsize` on a title/caption — issue #47), whereas body-mode ignores non-body runs. Verified: `\fontsize{20}{24}\selectfont` moves the body-mode to ~20 pt; legitimate captions/headings do not. Source therefore fires as `review`.
- **PDF severity calibration (#82):** `must-fix` is reserved for *certain* rejection; uncertain deviations are demoted to `review`. A deviating page is `must-fix` when **either** the document-wide modal body size is itself off 12 pt (a global override) **or** the page's body text is *larger* than 12 pt (oversized text is never a tolerated float). A page that renders *smaller* than 12 pt while the document-wide modal is 12 pt is almost always a float — a `\footnotesize` table or `\small` figure sub-caption, which the per-page modal cannot distinguish from running text and which UF tolerates in practice — so it emits `review`. Rationale: a real first submission with `\footnotesize` appendix tables and `\small` sub-captions was accepted by the Graduate School, so these are not rejection drivers.

### UF-F4 — Line spacing

Expand Down
86 changes: 64 additions & 22 deletions latex2ufdissertation/pipeline/pdf_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from dataclasses import dataclass
from pathlib import Path

from latex2ufdissertation.pipeline.rules import PDF
from latex2ufdissertation.pipeline.rules import MUST_FIX, PDF, REVIEW
from latex2ufdissertation.pipeline.types import Issues, MissingToolchain, UnreadableInput

# Random 6-uppercase-letter subset prefix added by the PDF engine per compile;
Expand Down Expand Up @@ -199,35 +199,77 @@ def _check_f2(pages: list[PageData], issues: Issues) -> None:
)


def _document_body_size(pages: list[PageData]) -> float | None:
"""Return the document-wide modal body size across *pages*.

Counts each page's body_size (ignoring None pages) and returns the most
common value. Ties resolve toward the compliant 12pt side, then toward the
smaller size, so an ambiguous document is treated as conforming and its
deviating pages route to review rather than must-fix. Returns None when no
page has a body size (an all-image / empty document).
"""
from collections import Counter

counter: Counter[float] = Counter(p.body_size for p in pages if p.body_size is not None)
if not counter:
return None
# Most common first; tie-break toward 12pt (compliant), then smaller size.
return min(
counter,
key=lambda size: (-counter[size], abs(size - _F3_REQUIRED_BODY_PT), size),
)


def _check_f3(pages: list[PageData], issues: Issues) -> None:
r"""UF-F3: body-mode size must be 12pt throughout (PDF layer).

For each page whose body_size is not None: if the size deviates from
_F3_REQUIRED_BODY_PT by more than _F3_SIZE_TOLERANCE_PT, emit a must-fix
finding with layer=PDF. The registry severity for UF-F3 is must-fix so
no per-call severity override is needed; pass layer=PDF to mark this as
the PDF-authoritative half.

The source-layer half (checks.py UF-F3 emit sites) emits at severity
REVIEW so that localized legal sizing (a one-off ``\fontsize`` on a
title/caption) does not produce a false must-fix in the absence of a
Severity is calibrated so that must-fix means *certain* rejection and any
uncertain case is a gentler review (3+4 hybrid):

* If the **document-wide** modal body size deviates from 12pt, the
template's 12-point default was overridden globally — a certain
rejection — so every deviating page emits **must-fix**.
* If a page's modal body text is *larger* than 12pt, it cannot be a
tolerated float (sub-captions and tables only ever shrink text), so an
oversized page is a certain override and emits **must-fix** regardless of
the document-wide size.
* If the document-wide modal size *is* 12pt but an individual page renders
*smaller*, that page is almost always a float (a ``\footnotesize`` table
or a ``\small`` figure sub-caption), which UF tolerates in practice. The
per-page modal glyph size cannot distinguish such float text from running
body text, so the deviation is uncertain and emits **review**.

The source-layer half (checks.py UF-F3 emit sites) likewise emits REVIEW so
that localized legal sizing does not produce a false must-fix without a
compiled PDF.
"""
doc_size = _document_body_size(pages)
if doc_size is None:
return
doc_deviates = abs(doc_size - _F3_REQUIRED_BODY_PT) > _F3_SIZE_TOLERANCE_PT
for page in pages:
if page.body_size is None:
continue
if abs(page.body_size - _F3_REQUIRED_BODY_PT) > _F3_SIZE_TOLERANCE_PT:
issues.add(
"UF-F3",
layer=PDF,
location=f"p.{page.page_num}",
observed=f"{page.body_size}pt body text",
required="12-point body text",
fix_hint=(
"Rendered body text is not 12 pt; "
"check for a \\fontsize{...}{...}\\selectfont override affecting the body."
),
)
if abs(page.body_size - _F3_REQUIRED_BODY_PT) <= _F3_SIZE_TOLERANCE_PT:
continue
# Certain rejection: a global override (doc-wide size off 12pt) or an
# oversized page (text larger than 12pt is never a caption/table float).
# An undersized page on an otherwise-12pt document is an uncertain
# float artifact and is demoted to review.
oversized = page.body_size - _F3_REQUIRED_BODY_PT > _F3_SIZE_TOLERANCE_PT
severity = MUST_FIX if (doc_deviates or oversized) else REVIEW
issues.add(
"UF-F3",
layer=PDF,
severity=severity,
location=f"p.{page.page_num}",
observed=f"{page.body_size}pt body text",
required="12-point body text",
fix_hint=(
"Rendered body text is not 12 pt; "
"check for a \\fontsize{...}{...}\\selectfont override affecting the body."
),
)


def _check_s1(pages: list[PageData], issues: Issues) -> None:
Expand Down
214 changes: 214 additions & 0 deletions tests/test_pdf_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,220 @@ def test_check_f3_violation_fixture_fires_must_fix() -> None:
assert body_size > 14.0, f"Expected body_size >14pt, got {body_size}pt"


# ---------------------------------------------------------------------------
# F3 severity calibration (3+4 hybrid): a deviation is must-fix ONLY when the
# document-wide body size is itself non-12pt (a global override = certain
# rejection). A localized deviation on an otherwise-12pt document (a
# \footnotesize table or \small figure sub-caption) is uncertain and routes
# to review, not must-fix.
# ---------------------------------------------------------------------------


def test_check_f3_localized_deviation_is_review(tmp_path: Path) -> None:
"""When the document-wide body size is 12pt but a few float pages render
smaller (e.g. a \\footnotesize table at 10pt, a \\small sub-caption at
10.9pt), UF-F3 must emit REVIEW on those pages and zero must-fix.
"""
from unittest.mock import patch

from latex2ufdissertation.pipeline.pdf_checks import PageData, run_pdf_checks
from latex2ufdissertation.pipeline.rules import REVIEW
from latex2ufdissertation.pipeline.types import Issues

dummy_pdf = tmp_path / "test.pdf"
dummy_pdf.write_bytes(b"")

# Majority of pages are genuine 12pt body text; two float pages deviate.
mock_pages = [
PageData(page_num=1, body_font="TeXGyreTermesX-Regular", body_size=12.0),
PageData(page_num=2, body_font="TeXGyreTermesX-Regular", body_size=12.0),
PageData(page_num=3, body_font="TeXGyreTermesX-Regular", body_size=12.0),
PageData(page_num=4, body_font="TeXGyreTermesX-Regular", body_size=10.9),
PageData(page_num=5, body_font="TeXGyreTermesX-Regular", body_size=10.0),
]

issues = Issues()
with patch(
"latex2ufdissertation.pipeline.pdf_checks._extract_pages",
return_value=mock_pages,
):
run_pdf_checks(dummy_pdf, issues)

f3 = [f for f in issues.findings if f.rule_id == "UF-F3"]
assert len(f3) == 2, f"expected 2 UF-F3 findings (p.4, p.5), got {len(f3)}"
assert {f.location for f in f3} == {"p.4", "p.5"}
for finding in f3:
assert finding.severity == REVIEW, (
f"localized deviation must be review, got {finding.severity} at {finding.location}"
)
assert issues.must_fix_count() == 0, "no must-fix expected on an otherwise-12pt document"


def test_check_f3_global_override_is_must_fix(tmp_path: Path) -> None:
"""When the document-wide body size is itself non-12pt (a global
\\fontsize override), every deviating page must emit must-fix.
"""
from unittest.mock import patch

from latex2ufdissertation.pipeline.pdf_checks import PageData, run_pdf_checks
from latex2ufdissertation.pipeline.rules import MUST_FIX
from latex2ufdissertation.pipeline.types import Issues

dummy_pdf = tmp_path / "test.pdf"
dummy_pdf.write_bytes(b"")

# Body text shrunk document-wide to 11pt; one 12pt outlier page.
mock_pages = [
PageData(page_num=1, body_font="TeXGyreTermesX-Regular", body_size=11.0),
PageData(page_num=2, body_font="TeXGyreTermesX-Regular", body_size=11.0),
PageData(page_num=3, body_font="TeXGyreTermesX-Regular", body_size=11.0),
PageData(page_num=4, body_font="TeXGyreTermesX-Regular", body_size=12.0),
]

issues = Issues()
with patch(
"latex2ufdissertation.pipeline.pdf_checks._extract_pages",
return_value=mock_pages,
):
run_pdf_checks(dummy_pdf, issues)

f3 = [f for f in issues.findings if f.rule_id == "UF-F3"]
assert len(f3) == 3, f"expected 3 must-fix UF-F3 findings, got {len(f3)}"
assert {f.location for f in f3} == {"p.1", "p.2", "p.3"}
for finding in f3:
assert finding.severity == MUST_FIX
assert issues.must_fix_count() == 3


def test_check_f3_global_undersize_is_must_fix(tmp_path: Path) -> None:
"""A document whose body text is shrunk document-wide (every page at 10pt)
is a global override = certain rejection: every page emits must-fix, never
review. Pins the lower end of the deviation spectrum.
"""
from unittest.mock import patch

from latex2ufdissertation.pipeline.pdf_checks import PageData, run_pdf_checks
from latex2ufdissertation.pipeline.rules import MUST_FIX
from latex2ufdissertation.pipeline.types import Issues

dummy_pdf = tmp_path / "test.pdf"
dummy_pdf.write_bytes(b"")

mock_pages = [
PageData(page_num=n, body_font="TeXGyreTermesX-Regular", body_size=10.0)
for n in range(1, 6)
]

issues = Issues()
with patch(
"latex2ufdissertation.pipeline.pdf_checks._extract_pages",
return_value=mock_pages,
):
run_pdf_checks(dummy_pdf, issues)

f3 = [f for f in issues.findings if f.rule_id == "UF-F3"]
assert len(f3) == 5, f"expected 5 must-fix findings, got {len(f3)}"
for finding in f3:
assert finding.severity == MUST_FIX
assert issues.must_fix_count() == 5


def test_check_f3_tie_breaks_toward_12pt_review(tmp_path: Path) -> None:
"""When 12pt and a smaller size tie for the document-wide mode, the
smaller size is treated as the uncertain case: doc-wide size resolves to
12pt and the deviating pages route to review, never must-fix.
"""
from unittest.mock import patch

from latex2ufdissertation.pipeline.pdf_checks import PageData, run_pdf_checks
from latex2ufdissertation.pipeline.rules import REVIEW
from latex2ufdissertation.pipeline.types import Issues

dummy_pdf = tmp_path / "test.pdf"
dummy_pdf.write_bytes(b"")

# Equal counts of 12.0 and 10.0 — a tie that must resolve toward 12pt.
mock_pages = [
PageData(page_num=1, body_font="TeXGyreTermesX-Regular", body_size=12.0),
PageData(page_num=2, body_font="TeXGyreTermesX-Regular", body_size=10.0),
]

issues = Issues()
with patch(
"latex2ufdissertation.pipeline.pdf_checks._extract_pages",
return_value=mock_pages,
):
run_pdf_checks(dummy_pdf, issues)

f3 = [f for f in issues.findings if f.rule_id == "UF-F3"]
assert len(f3) == 1 and f3[0].location == "p.2"
assert f3[0].severity == REVIEW
assert issues.must_fix_count() == 0


def test_check_f3_oversized_page_is_must_fix_even_if_doc_is_12pt(tmp_path: Path) -> None:
"""A page whose body text is LARGER than 12pt is a certain override (no
float makes text bigger), so it emits must-fix even when the document-wide
modal size is 12pt. Mirrors the \\fontsize{20} violation fixture at unit
speed.
"""
from unittest.mock import patch

from latex2ufdissertation.pipeline.pdf_checks import PageData, run_pdf_checks
from latex2ufdissertation.pipeline.rules import MUST_FIX
from latex2ufdissertation.pipeline.types import Issues

dummy_pdf = tmp_path / "test.pdf"
dummy_pdf.write_bytes(b"")

# Majority 12pt; a few pages render body text at ~20pt (a real override).
mock_pages = [
PageData(page_num=1, body_font="TeXGyreTermesX-Regular", body_size=12.0),
PageData(page_num=2, body_font="TeXGyreTermesX-Regular", body_size=12.0),
PageData(page_num=3, body_font="TeXGyreTermesX-Regular", body_size=12.0),
PageData(page_num=4, body_font="TeXGyreTermesX-Regular", body_size=19.9),
PageData(page_num=5, body_font="TeXGyreTermesX-Regular", body_size=19.9),
]

issues = Issues()
with patch(
"latex2ufdissertation.pipeline.pdf_checks._extract_pages",
return_value=mock_pages,
):
run_pdf_checks(dummy_pdf, issues)

f3 = [f for f in issues.findings if f.rule_id == "UF-F3"]
assert {f.location for f in f3} == {"p.4", "p.5"}
for finding in f3:
assert finding.severity == MUST_FIX, "oversized body text must be must-fix"


def test_document_body_size_helper() -> None:
"""_document_body_size returns the glyph-page modal body size, ignoring
None pages, with ties resolving toward 12pt; None on an empty document.
"""
from latex2ufdissertation.pipeline.pdf_checks import PageData, _document_body_size

assert _document_body_size([]) is None
assert _document_body_size([PageData(1, None, None)]) is None
# Clear majority at 12.0.
assert (
_document_body_size(
[PageData(1, "F", 12.0), PageData(2, "F", 12.0), PageData(3, "F", 10.0)]
)
== 12.0
)
# Global shrink: 11.0 dominates.
assert (
_document_body_size(
[PageData(1, "F", 11.0), PageData(2, "F", 11.0), PageData(3, "F", 12.0)]
)
== 11.0
)
# Tie between 12.0 and 10.0 resolves toward 12.0 (compliant side).
assert _document_body_size([PageData(1, "F", 12.0), PageData(2, "F", 10.0)]) == 12.0


# ---------------------------------------------------------------------------
# S5 PDF-layer check (UF-S5, hyperlink annotations / outline present)
# ---------------------------------------------------------------------------
Expand Down