diff --git a/packages/python-packages/apiview-copilot/.github/skills/report-apiview-metrics/SKILL.md b/packages/python-packages/apiview-copilot/.github/skills/report-apiview-metrics/SKILL.md index e27973753fb..edc4f17776a 100644 --- a/packages/python-packages/apiview-copilot/.github/skills/report-apiview-metrics/SKILL.md +++ b/packages/python-packages/apiview-copilot/.github/skills/report-apiview-metrics/SKILL.md @@ -1,6 +1,6 @@ --- name: report-apiview-metrics -description: "Run APIView platform metrics (versioned revisions and cross-language compliance). Use for: apiview metrics, platform metrics, version coverage, versioned revisions, cross-language compliance, compliance metrics, PackageVersion coverage, CrossLanguagePackageId, apiview-metrics, parser compliance." +description: "Run APIView platform metrics (versioned revisions, cross-language compliance, and duplicate line ID compliance). Use for: apiview metrics, platform metrics, version coverage, versioned revisions, cross-language compliance, compliance metrics, PackageVersion coverage, CrossLanguagePackageId, apiview-metrics, parser compliance, duplicate line IDs, HasDuplicateLineIds." argument-hint: "Optional: --months N, --end-date YYYY-MM-DD, --languages Python Java, --chart, --summary" --- @@ -9,19 +9,21 @@ argument-hint: "Optional: --months N, --end-date YYYY-MM-DD, --languages Python ## When to Use - Monitoring progress toward 100% versioned revisions across languages - Checking cross-language metadata compliance (CrossLanguagePackageId) +- Checking duplicate line ID compliance (HasDuplicateLineIds) - Generating trend charts for APIView platform health - Reviewing parser compliance over time ## What It Produces -A combined report with two metric buckets: +A combined report with three metric buckets: | Bucket | What it measures | -|--------|-----------------| +|--------|------------------| | **versions** | % of revisions with a valid `PackageVersion`, broken out by language and revision type (Automatic, Manual, PullRequest) | | **compliance** | % of reviews whose latest revision includes `CrossLanguagePackageId` (from `CrossLanguageMetadata`) | +| **duplicate_line_ids** | % of evaluated reviews whose latest revision does NOT have `HasDuplicateLineIds=true`. Revisions missing the field are tracked as "unknown" and excluded from the percentage. | -Output is JSON with top-level `"versions"` and `"compliance"` keys. With `--summary`, human-readable tables are printed to stderr. +Output is JSON with top-level `"versions"`, `"compliance"`, and `"duplicate_line_ids"` keys. With `--summary`, human-readable tables are printed to stderr. ## Defaults @@ -44,6 +46,7 @@ New-Item -ItemType Directory -Path output -Force | Out-Null; New-Item -ItemType After the command completes, **read the output file** with `read_file` to get the JSON results. Then use `view_image` to display charts at: - `output/charts/apiview_version_trends.png` - `output/charts/cross_language_compliance.png` +- `output/charts/duplicate_line_ids.png` ### Examples @@ -72,7 +75,8 @@ python cli.py report apiview-metrics --chart --environment staging After reading the output file: 1. Summarize the version-coverage trends (highlight languages below 100%) 2. Summarize the compliance trends (highlight languages below 100%) -3. Show the chart images with `view_image` +3. Summarize the duplicate line ID compliance trends (highlight languages below 100%) +4. Show the chart images with `view_image` ### Step 3: Answer Follow-up Questions diff --git a/packages/python-packages/apiview-copilot/AGENTS.md b/packages/python-packages/apiview-copilot/AGENTS.md index 6cded781648..6d9f552cf8f 100644 --- a/packages/python-packages/apiview-copilot/AGENTS.md +++ b/packages/python-packages/apiview-copilot/AGENTS.md @@ -93,7 +93,7 @@ Invoked via `avc` (or `python cli.py`): - `avc report active-reviews` — Query active reviews for a language and date range. - `avc report feedback` / `avc report memory` — Audit feedback and memories. - `avc report architect-comments` — Retrieve human architect review comments for a language and date range. -- `avc report apiview-metrics` — Track APIView platform metrics (versioned revision coverage and cross-language compliance). +- `avc report apiview-metrics` — Track APIView platform metrics (versioned revision coverage, cross-language compliance, and duplicate line ID compliance). - `avc ops deploy` — Deploy to Azure App Service. - `avc ops check` — Health check the deployed service. - `avc ops grant` / `avc ops revoke` — Manage Azure RBAC permissions. diff --git a/packages/python-packages/apiview-copilot/cli.py b/packages/python-packages/apiview-copilot/cli.py index ceb487ff812..485827c5626 100644 --- a/packages/python-packages/apiview-copilot/cli.py +++ b/packages/python-packages/apiview-copilot/cli.py @@ -49,18 +49,18 @@ from src._apiview_reviewer import SUPPORTED_LANGUAGES, ApiViewReview from src._database_manager import ContainerNames, DatabaseManager from src._garbage_collector import GarbageCollector -from src._apiview_metrics import ( - DEFAULT_OUTPUT_PATH as DEFAULT_VERSION_TRENDS_OUTPUT_PATH, -) from src._apiview_metrics import ( DEFAULT_COMPLIANCE_OUTPUT_PATH, -) -from src._apiview_metrics import ( + DEFAULT_DUPLICATE_LINEIDS_OUTPUT_PATH, + DEFAULT_OUTPUT_PATH as DEFAULT_VERSION_TRENDS_OUTPUT_PATH, build_compliance_reports, + build_duplicate_lineid_reports, build_version_reports, generate_compliance_chart, + generate_duplicate_lineid_chart, generate_version_chart, print_compliance_report, + print_duplicate_lineid_report, print_version_report, ) from src._comment_bucket_trends import ( @@ -2044,7 +2044,7 @@ def report_apiview_metrics( chart: bool = False, summary: bool = False, ) -> None: - """Generate APIView platform metrics (versioned-revision tracking and cross-language compliance).""" + """Generate APIView platform metrics (versioned-revision tracking, cross-language compliance, and duplicate line ID compliance).""" parsed_end_date = None if end_date: try: @@ -2070,8 +2070,16 @@ def report_apiview_metrics( environment=environment, ) + duplicate_lineid_reports = build_duplicate_lineid_reports( + languages=normalized_languages, + months=months, + end_date=parsed_end_date, + environment=environment, + ) + version_chart_path = None compliance_chart_path = None + duplicate_lineid_chart_path = None if chart: version_chart_path = generate_version_chart( version_reports, @@ -2083,14 +2091,20 @@ def report_apiview_metrics( output_path=DEFAULT_COMPLIANCE_OUTPUT_PATH, environment=environment, ) + duplicate_lineid_chart_path = generate_duplicate_lineid_chart( + duplicate_lineid_reports, + output_path=DEFAULT_DUPLICATE_LINEIDS_OUTPUT_PATH, + environment=environment, + ) - output = {"versions": version_reports, "compliance": compliance_reports} + output = {"versions": version_reports, "compliance": compliance_reports, "duplicate_line_ids": duplicate_lineid_reports} sys.stdout.buffer.write(json.dumps(output, indent=2, ensure_ascii=False, default=str).encode("utf-8")) sys.stdout.buffer.write(b"\n") if summary: print_version_report(version_reports, version_chart_path, environment=environment, file=sys.stderr) print_compliance_report(compliance_reports, compliance_chart_path, environment=environment, file=sys.stderr) + print_duplicate_lineid_report(duplicate_lineid_reports, duplicate_lineid_chart_path, environment=environment, file=sys.stderr) def grant_permissions(assignee_id: str = None): diff --git a/packages/python-packages/apiview-copilot/docs/metrics.md b/packages/python-packages/apiview-copilot/docs/metrics.md index 68db0a5e625..d62d89d75c5 100644 --- a/packages/python-packages/apiview-copilot/docs/metrics.md +++ b/packages/python-packages/apiview-copilot/docs/metrics.md @@ -172,6 +172,7 @@ avc report apiview-metrics [--months 6] [--end-date 2026-04-30] [--languages Pyt - **versions** — Percentage of revisions with a valid `PackageVersion`, broken out by language and revision type (Automatic, Manual, PullRequest). - **compliance** — Percentage of reviews whose latest revision includes `CrossLanguagePackageId`. +- **duplicate_line_ids** — Percentage of evaluated reviews whose latest revision does NOT have `HasDuplicateLineIds=true`. Revisions where the field is missing are tracked as "unknown" and excluded from the percentage calculation. ## OpenTelemetry Metrics diff --git a/packages/python-packages/apiview-copilot/src/_apiview_metrics.py b/packages/python-packages/apiview-copilot/src/_apiview_metrics.py index e6184b15552..18f77509a8d 100644 --- a/packages/python-packages/apiview-copilot/src/_apiview_metrics.py +++ b/packages/python-packages/apiview-copilot/src/_apiview_metrics.py @@ -23,6 +23,7 @@ DEFAULT_MONTHS = 6 DEFAULT_OUTPUT_PATH = Path("output/charts/apiview_version_trends.png") DEFAULT_COMPLIANCE_OUTPUT_PATH = Path("output/charts/cross_language_compliance.png") +DEFAULT_DUPLICATE_LINEIDS_OUTPUT_PATH = Path("output/charts/duplicate_line_ids.png") OMIT_LANGUAGES = ["c++", "c", "typespec", "swagger", "xml"] @@ -619,3 +620,241 @@ def print_compliance_report( print(f"\nSaved chart: {output_path}", file=file) else: print("\nChart was not generated.", file=file) + + +# --------------------------------------------------------------------------- +# Duplicate Line ID metrics +# --------------------------------------------------------------------------- + + +@dataclass +class MonthlyDuplicateLineIdPoint: + """Monthly duplicate line ID compliance data for a single language.""" + + label: str + start_date: str + end_date: str + clean: int = 0 + has_duplicates: int = 0 + unknown: int = 0 + total: int = 0 + clean_pct: float = 0.0 + + +def build_duplicate_lineid_reports( + languages: Optional[list[str]] = None, + months: int = DEFAULT_MONTHS, + end_date: Optional[date] = None, + *, + environment: str = PRODUCTION_ENVIRONMENT, +) -> dict[str, list[dict]]: + """Build per-language duplicate line ID compliance reports. + + For each month, groups revisions by ReviewId, picks the latest revision per review, + and checks whether ``HasDuplicateLineIds`` is set to true. + + Returns: + A dict mapping language name to a list of monthly data-point dicts. + """ + selected_languages = languages or DEFAULT_LANGUAGES + month_ranges = get_last_n_month_ranges(months=months, end_date=end_date) + if not month_ranges: + return {lang: [] for lang in selected_languages} + + full_start = month_ranges[0][0] + full_end = month_ranges[-1][1] + + start_iso = to_iso8601(full_start.isoformat()) + end_iso = to_iso8601(full_end.isoformat(), end_of_day=True) + + revisions_container = get_apiview_cosmos_client(container_name="APIRevisions", environment=environment) + + query = ( + "SELECT c.ReviewId, c.Language, c.HasDuplicateLineIds, c.CreatedOn " + "FROM c " + "WHERE (NOT IS_DEFINED(c.IsDeleted) OR c.IsDeleted = false) " + "AND c.CreatedOn >= @start AND c.CreatedOn <= @end" + ) + params = [ + {"name": "@start", "value": start_iso}, + {"name": "@end", "value": end_iso}, + ] + + all_revisions = list( + revisions_container.query_items(query=query, parameters=params, enable_cross_partition_query=True) + ) + + # Bucket revisions by month in a single pass O(revisions) + bucketed: dict[str, list[dict]] = {f"{start.year}-{start.month:02d}": [] for start, _ in month_ranges} + for rev in all_revisions: + created_on = rev.get("CreatedOn", "") + label = created_on[:7] # "YYYY-MM" slice from ISO8601 + if label in bucketed: + bucketed[label].append(rev) + + omit_lower = {lang.lower() for lang in OMIT_LANGUAGES} + + reports: dict[str, list[dict]] = {lang: [] for lang in selected_languages} + for start, end in month_ranges: + label = f"{start.year}-{start.month:02d}" + + month_revisions = bucketed[label] + + # Group by ReviewId and keep only the latest revision per review + latest_by_review: dict[str, dict] = {} + for rev in month_revisions: + review_id = rev.get("ReviewId") + if not review_id: + continue + existing = latest_by_review.get(review_id) + if existing is None or rev.get("CreatedOn", "") > existing.get("CreatedOn", ""): + latest_by_review[review_id] = rev + + # Compute per language + by_language: dict[str, dict] = {} + for rev in latest_by_review.values(): + lang = get_language_pretty_name(rev.get("Language", "Unknown")) + if lang.lower() in omit_lower: + continue + entry = by_language.setdefault(lang, {"clean": 0, "has_duplicates": 0, "unknown": 0, "total": 0}) + entry["total"] += 1 + has_dup = rev.get("HasDuplicateLineIds") + if has_dup is None: + entry["unknown"] += 1 + elif has_dup: + entry["has_duplicates"] += 1 + else: + entry["clean"] += 1 + + for entry in by_language.values(): + evaluated = entry["clean"] + entry["has_duplicates"] + entry["clean_pct"] = round((entry["clean"] / evaluated) * 100, 2) if evaluated else 0.0 + + for language in selected_languages: + entry = by_language.get( + language, {"clean": 0, "has_duplicates": 0, "unknown": 0, "total": 0, "clean_pct": 0.0} + ) + point = MonthlyDuplicateLineIdPoint( + label=label, + start_date=start.isoformat(), + end_date=end.isoformat(), + clean=entry["clean"], + has_duplicates=entry["has_duplicates"], + unknown=entry["unknown"], + total=entry["total"], + clean_pct=entry["clean_pct"], + ) + reports[language].append(asdict(point)) + + return reports + + +def generate_duplicate_lineid_chart( + reports: dict[str, list[dict]], + output_path: Path = DEFAULT_DUPLICATE_LINEIDS_OUTPUT_PATH, + *, + environment: str = PRODUCTION_ENVIRONMENT, +) -> Optional[Path]: + """Render a PNG chart showing duplicate line ID compliance percentage trends per language.""" + output_path.parent.mkdir(parents=True, exist_ok=True) + + try: + import matplotlib.pyplot as plt + except ImportError: + print("matplotlib is not installed; skipping chart generation.") + return None + + languages = list(reports.keys()) + month_count = len(next(iter(reports.values()), [])) if reports else 0 + if month_count == 0: + return None + + cols = 2 if len(languages) > 1 else 1 + rows = max(1, math.ceil(len(languages) / cols)) + + figure, axes = plt.subplots(rows, cols, figsize=(8 * cols, 5 * rows), sharey=True) + if not isinstance(axes, (list, tuple)): + try: + axes = axes.flatten() + except AttributeError: + axes = [axes] + else: + axes = list(axes) + + for index, language in enumerate(languages): + axis = axes[index] + report = reports[language] + labels = [item["label"] for item in report] + x_positions = list(range(len(labels))) + pcts = [item["clean_pct"] for item in report] + + _bars = axis.bar(x_positions, pcts, color="#2196F3", width=0.6) + + # Annotate each bar with count + for bar_pos, item in zip(x_positions, report): + evaluated = item["clean"] + item["has_duplicates"] + if evaluated > 0: + axis.annotate( + f"{item['clean']}/{evaluated}", + (bar_pos, item["clean_pct"]), + textcoords="offset points", + xytext=(0, 4), + ha="center", + fontsize=7, + ) + + axis.axhline(y=100, color="gray", linestyle=":", linewidth=1.0, alpha=0.5) + axis.set_title(language) + axis.set_xticks(x_positions, labels, rotation=45, ha="right") + axis.set_ylim(0, 115) + axis.grid(True, axis="y", linestyle="--", alpha=0.4) + + for index in range(len(languages), len(axes)): + figure.delaxes(axes[index]) + + environment_label = (environment or PRODUCTION_ENVIRONMENT).strip().lower() + figure.suptitle( + f"No-Duplicate Line ID Compliance %\nLast {month_count} Calendar Months (APIView {environment_label})", + fontsize=14, + y=0.985, + ) + figure.supxlabel("Month") + figure.supylabel("% Reviews Without Duplicate Line IDs") + plt.tight_layout(rect=(0.02, 0.03, 1, 0.90)) + figure.savefig(output_path, dpi=150) + plt.close(figure) + return output_path + + +def print_duplicate_lineid_report( + reports: dict[str, list[dict]], + output_path: Optional[Path], + *, + environment: str = PRODUCTION_ENVIRONMENT, + file=None, +) -> None: + """Print a compact terminal summary of duplicate line ID compliance.""" + environment_label = (environment or PRODUCTION_ENVIRONMENT).strip().lower() + print(f"No-duplicate line ID compliance % by month (APIView {environment_label})", file=file) + + for language, report in reports.items(): + print(f"\n{language}", file=file) + header = ["Month", "Clean", "Has Dupes", "Unknown", "Total", "Clean %"] + print(" ".join(f"{col:>14}" for col in header), file=file) + print(" ".join(["----------"] * len(header)), file=file) + + for item in report: + values = [ + f"{item['label']:>14}", + f"{item['clean']:>14}", + f"{item['has_duplicates']:>14}", + f"{item['unknown']:>14}", + f"{item['total']:>14}", + f"{item['clean_pct']:>14.1f}", + ] + print(" ".join(values), file=file) + + if output_path and output_path.exists(): + print(f"\nSaved chart: {output_path}", file=file) + else: + print("\nChart was not generated.", file=file) diff --git a/packages/python-packages/apiview-copilot/tests/duplicate_lineid_test.py b/packages/python-packages/apiview-copilot/tests/duplicate_lineid_test.py new file mode 100644 index 00000000000..a5f02db38c2 --- /dev/null +++ b/packages/python-packages/apiview-copilot/tests/duplicate_lineid_test.py @@ -0,0 +1,218 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +# pylint: disable=no-member,redefined-outer-name + +"""Tests for duplicate line ID compliance metrics (build_duplicate_lineid_reports).""" + +import sys +from datetime import date +from io import StringIO +from unittest.mock import MagicMock, patch + +# Mock azure.cosmos before importing modules +sys.modules["azure.cosmos"] = MagicMock() +sys.modules["azure.cosmos.exceptions"] = MagicMock() + +from src._apiview_metrics import ( # pylint: disable=wrong-import-position + build_duplicate_lineid_reports, + print_duplicate_lineid_report, +) + + +def _make_revision(review_id, language, has_duplicate_line_ids=None, created_on="2026-04-15T00:00:00Z"): + """Helper to create a fake revision dict.""" + rev = { + "ReviewId": review_id, + "Language": language, + "CreatedOn": created_on, + } + if has_duplicate_line_ids is not None: + rev["HasDuplicateLineIds"] = has_duplicate_line_ids + return rev + + +class TestBuildDuplicateLineidReports: + """Tests for build_duplicate_lineid_reports.""" + + @patch("src._apiview_metrics.get_apiview_cosmos_client") + def test_latest_revision_per_review(self, mock_cosmos): + """When a review has multiple revisions, only the latest is counted.""" + container = MagicMock() + container.query_items.return_value = [ + _make_revision("r1", "Python", True, created_on="2026-04-10T00:00:00Z"), + _make_revision("r1", "Python", False, created_on="2026-04-20T00:00:00Z"), + ] + mock_cosmos.return_value = container + + reports = build_duplicate_lineid_reports( + languages=["Python"], + months=1, + end_date=date(2026, 4, 30), + ) + + # The later revision has HasDuplicateLineIds=False, so should be clean + assert reports["Python"][0]["clean"] == 1 + assert reports["Python"][0]["has_duplicates"] == 0 + assert reports["Python"][0]["unknown"] == 0 + + @patch("src._apiview_metrics.get_apiview_cosmos_client") + def test_missing_field_tracked_as_unknown(self, mock_cosmos): + """Revisions without HasDuplicateLineIds are tracked as unknown.""" + container = MagicMock() + container.query_items.return_value = [ + _make_revision("r1", "Python", False, created_on="2026-04-10T00:00:00Z"), + _make_revision("r2", "Python", None, created_on="2026-04-15T00:00:00Z"), # field absent + _make_revision("r3", "Python", True, created_on="2026-04-20T00:00:00Z"), + ] + mock_cosmos.return_value = container + + reports = build_duplicate_lineid_reports( + languages=["Python"], + months=1, + end_date=date(2026, 4, 30), + ) + + point = reports["Python"][0] + assert point["clean"] == 1 + assert point["has_duplicates"] == 1 + assert point["unknown"] == 1 + assert point["total"] == 3 + + @patch("src._apiview_metrics.get_apiview_cosmos_client") + def test_unknown_excluded_from_percentage(self, mock_cosmos): + """Clean percentage is calculated only from evaluated (clean + has_duplicates) revisions.""" + container = MagicMock() + container.query_items.return_value = [ + _make_revision("r1", "Python", False, created_on="2026-04-05T00:00:00Z"), + _make_revision("r2", "Python", True, created_on="2026-04-10T00:00:00Z"), + _make_revision("r3", "Python", None, created_on="2026-04-15T00:00:00Z"), # absent + _make_revision("r4", "Python", None, created_on="2026-04-20T00:00:00Z"), # absent + ] + mock_cosmos.return_value = container + + reports = build_duplicate_lineid_reports( + languages=["Python"], + months=1, + end_date=date(2026, 4, 30), + ) + + point = reports["Python"][0] + # 1 clean out of 2 evaluated = 50% + assert point["clean_pct"] == 50.0 + + @patch("src._apiview_metrics.get_apiview_cosmos_client") + def test_all_unknown_yields_zero_pct(self, mock_cosmos): + """If all revisions are unknown, clean_pct is 0.""" + container = MagicMock() + container.query_items.return_value = [ + _make_revision("r1", "Python", None, created_on="2026-04-10T00:00:00Z"), + _make_revision("r2", "Python", None, created_on="2026-04-15T00:00:00Z"), + ] + mock_cosmos.return_value = container + + reports = build_duplicate_lineid_reports( + languages=["Python"], + months=1, + end_date=date(2026, 4, 30), + ) + + point = reports["Python"][0] + assert point["unknown"] == 2 + assert point["clean_pct"] == 0.0 + + @patch("src._apiview_metrics.get_apiview_cosmos_client") + def test_omits_excluded_languages(self, mock_cosmos): + """Languages in OMIT_LANGUAGES (e.g., TypeSpec) are not counted.""" + container = MagicMock() + container.query_items.return_value = [ + _make_revision("r1", "Python", False, created_on="2026-04-10T00:00:00Z"), + _make_revision("r2", "TypeSpec", False, created_on="2026-04-12T00:00:00Z"), + _make_revision("r3", "Swagger", True, created_on="2026-04-14T00:00:00Z"), + ] + mock_cosmos.return_value = container + + reports = build_duplicate_lineid_reports( + languages=["Python"], + months=1, + end_date=date(2026, 4, 30), + ) + + # Only Python should be in the report + assert reports["Python"][0]["total"] == 1 + assert reports["Python"][0]["clean"] == 1 + + @patch("src._apiview_metrics.get_apiview_cosmos_client") + def test_buckets_by_month(self, mock_cosmos): + """Revisions are correctly bucketed into their respective months.""" + container = MagicMock() + container.query_items.return_value = [ + _make_revision("r1", "Python", False, created_on="2026-03-15T00:00:00Z"), + _make_revision("r2", "Python", True, created_on="2026-04-10T00:00:00Z"), + ] + mock_cosmos.return_value = container + + reports = build_duplicate_lineid_reports( + languages=["Python"], + months=2, + end_date=date(2026, 4, 30), + ) + + assert len(reports["Python"]) == 2 + assert reports["Python"][0]["label"] == "2026-03" + assert reports["Python"][0]["clean"] == 1 + assert reports["Python"][0]["has_duplicates"] == 0 + assert reports["Python"][1]["label"] == "2026-04" + assert reports["Python"][1]["clean"] == 0 + assert reports["Python"][1]["has_duplicates"] == 1 + + @patch("src._apiview_metrics.get_apiview_cosmos_client") + def test_empty_results(self, mock_cosmos): + """Empty query results produce zeroed reports.""" + container = MagicMock() + container.query_items.return_value = [] + mock_cosmos.return_value = container + + reports = build_duplicate_lineid_reports( + languages=["Go"], + months=1, + end_date=date(2026, 4, 30), + ) + + assert reports["Go"][0]["clean"] == 0 + assert reports["Go"][0]["has_duplicates"] == 0 + assert reports["Go"][0]["unknown"] == 0 + assert reports["Go"][0]["total"] == 0 + assert reports["Go"][0]["clean_pct"] == 0.0 + + +class TestPrintDuplicateLineidReport: + """Tests for print_duplicate_lineid_report.""" + + def test_prints_unknown_column(self): + """The printed table includes the Unknown column.""" + reports = { + "Python": [ + { + "label": "2026-04", + "start_date": "2026-04-01", + "end_date": "2026-04-30", + "clean": 5, + "has_duplicates": 2, + "unknown": 3, + "total": 10, + "clean_pct": 71.43, + } + ] + } + + output = StringIO() + print_duplicate_lineid_report(reports, None, file=output) + text = output.getvalue() + + assert "Unknown" in text + assert "Python" in text + assert "2026-04" in text diff --git a/src/dotnet/APIView/APIViewUnitTests/ReviewsControllerTests.cs b/src/dotnet/APIView/APIViewUnitTests/ReviewsControllerTests.cs index b2edba85252..091e9b8fd07 100644 --- a/src/dotnet/APIView/APIViewUnitTests/ReviewsControllerTests.cs +++ b/src/dotnet/APIView/APIViewUnitTests/ReviewsControllerTests.cs @@ -1,7 +1,9 @@ using System.Collections.Generic; using System.Security.Claims; using System.Threading.Tasks; +using APIView; using APIView.Identity; +using APIView.Model.V2; using APIViewWeb; using APIViewWeb.Hubs; using APIViewWeb.LeanControllers; @@ -397,5 +399,139 @@ public async Task DeleteReviewAsync_WhenAuthorizationFails_ReturnsForbidden() objectResult!.StatusCode.Should().Be(403); objectResult.Value.Should().Be("You are not authorized to delete this review."); } + + [Fact] + public async Task GetReviewContentAsync_WhenHasDuplicateLineIdsIsNull_SelfHealsAndPersists() + { + // Arrange + string reviewId = "test-review-id"; + string revisionId = "test-revision-id"; + string fileId = "test-file-id"; + + var activeRevision = new APIRevisionListItemModel + { + Id = revisionId, + Language = "C#", + HasDuplicateLineIds = null, + Files = new List + { + new() { FileId = fileId, ParserStyle = ParserStyle.Tree } + } + }; + + var codeFile = new CodeFile + { + ReviewLines = new List + { + new() { LineId = "line1" }, + new() { LineId = "line2" } + }, + ContentGenerationInProgress = false + }; + + var user = new ClaimsPrincipal(new ClaimsIdentity(new Claim[] + { + new(ClaimConstants.Login, "testuser"), + new(System.Security.Claims.ClaimTypes.Name, "Test User") + }, "mock")); + + _controller.ControllerContext = new ControllerContext + { + HttpContext = new DefaultHttpContext { User = user } + }; + + _mockApiRevisionsManager + .Setup(m => m.GetAPIRevisionAsync(user, revisionId)) + .ReturnsAsync(activeRevision); + + _mockCodeFileRepository + .Setup(m => m.GetCodeFileFromStorageAsync(revisionId, fileId)) + .ReturnsAsync(codeFile); + + _mockCommentsManager + .Setup(m => m.GetCommentsAsync(reviewId, false, CommentType.APIRevision, false)) + .ReturnsAsync(new List()); + + _mockCommentsManager + .Setup(m => m.SyncDiagnosticCommentsAsync(It.IsAny(), It.IsAny(), It.IsAny>())) + .ReturnsAsync(new List()); + + _mockApiRevisionsManager + .Setup(m => m.UpdateAPIRevisionAsync(It.IsAny())) + .Returns(Task.CompletedTask); + + // Act + await _controller.GetReviewContentAsync(reviewId, activeApiRevisionId: revisionId); + + // Assert - verify self-healing persisted the revision + _mockApiRevisionsManager.Verify( + m => m.UpdateAPIRevisionAsync(It.Is(r => r.HasDuplicateLineIds == false)), + Times.Once); + } + + [Fact] + public async Task GetReviewContentAsync_WhenHasDuplicateLineIdsIsNotNull_DoesNotUpdate() + { + // Arrange + string reviewId = "test-review-id"; + string revisionId = "test-revision-id"; + string fileId = "test-file-id"; + + var activeRevision = new APIRevisionListItemModel + { + Id = revisionId, + Language = "C#", + HasDuplicateLineIds = false, + Files = new List + { + new() { FileId = fileId, ParserStyle = ParserStyle.Tree } + } + }; + + var codeFile = new CodeFile + { + ReviewLines = new List + { + new() { LineId = "line1" }, + new() { LineId = "line2" } + }, + ContentGenerationInProgress = false + }; + + var user = new ClaimsPrincipal(new ClaimsIdentity(new Claim[] + { + new(ClaimConstants.Login, "testuser"), + new(System.Security.Claims.ClaimTypes.Name, "Test User") + }, "mock")); + + _controller.ControllerContext = new ControllerContext + { + HttpContext = new DefaultHttpContext { User = user } + }; + + _mockApiRevisionsManager + .Setup(m => m.GetAPIRevisionAsync(user, revisionId)) + .ReturnsAsync(activeRevision); + + _mockCodeFileRepository + .Setup(m => m.GetCodeFileFromStorageAsync(revisionId, fileId)) + .ReturnsAsync(codeFile); + + _mockCommentsManager + .Setup(m => m.GetCommentsAsync(reviewId, false, CommentType.APIRevision, false)) + .ReturnsAsync(new List()); + + _mockCommentsManager + .Setup(m => m.SyncDiagnosticCommentsAsync(It.IsAny(), It.IsAny(), It.IsAny>())) + .ReturnsAsync(new List()); + + // Act + await _controller.GetReviewContentAsync(reviewId, activeApiRevisionId: revisionId); + + // Assert - UpdateAPIRevisionAsync should NOT be called since value is already set + _mockApiRevisionsManager.Verify( + m => m.UpdateAPIRevisionAsync(It.IsAny()), + Times.Never); + } } } diff --git a/src/dotnet/APIView/APIViewWeb/Extensions/APIRevisionExtensions.cs b/src/dotnet/APIView/APIViewWeb/Extensions/APIRevisionExtensions.cs new file mode 100644 index 00000000000..3ccd087be77 --- /dev/null +++ b/src/dotnet/APIView/APIViewWeb/Extensions/APIRevisionExtensions.cs @@ -0,0 +1,36 @@ +using System; +using System.Threading.Tasks; +using APIView; +using APIViewWeb.LeanModels; +using APIViewWeb.Managers; +using APIViewWeb.Managers.Interfaces; + +namespace APIViewWeb.Extensions +{ + public static class APIRevisionExtensions + { + /// + /// Evaluates and persists HasDuplicateLineIds for revisions that were never evaluated. + /// Persistence is best-effort; failures are swallowed so callers can still return content. + /// + public static async Task SelfHealHasDuplicateLineIdsAsync( + this IAPIRevisionsManager revisionsManager, + APIRevisionListItemModel revision, + CodeFile codeFile) + { + if (revision.HasDuplicateLineIds != null) + return; + + revision.HasDuplicateLineIds = CodeFileManager.HasDuplicateLineIds(codeFile); + try + { + await revisionsManager.UpdateAPIRevisionAsync(revision); + } + catch (Exception) + { + // Best-effort persist; swallow transient Cosmos/network errors + // so review content can still be returned to the caller. + } + } + } +} diff --git a/src/dotnet/APIView/APIViewWeb/Helpers/PageModelHelpers.cs b/src/dotnet/APIView/APIViewWeb/Helpers/PageModelHelpers.cs index 77e3432821b..045116dc997 100644 --- a/src/dotnet/APIView/APIViewWeb/Helpers/PageModelHelpers.cs +++ b/src/dotnet/APIView/APIViewWeb/Helpers/PageModelHelpers.cs @@ -4,6 +4,7 @@ using System.Security.Claims; using APIView.Diff; using APIView; +using APIViewWeb.Extensions; using APIViewWeb.Managers; using APIViewWeb.Models; using APIViewWeb.Repositories; @@ -299,6 +300,10 @@ public static async Task GetReviewContentAsync( var comments = await commentManager.GetReviewCommentsAsync(review.Id); var activeRevisionRenderableCodeFile = await codeFileRepository.GetCodeFileAsync(activeRevision.Id, activeRevision.Files[0], activeRevision.Language); var activeRevisionReviewCodeFile = activeRevisionRenderableCodeFile.CodeFile; + + // Self-heal HasDuplicateLineIds for older revisions that were never evaluated + await reviewRevisionsManager.SelfHealHasDuplicateLineIdsAsync(activeRevision, activeRevisionReviewCodeFile); + var fileDiagnostics = activeRevisionReviewCodeFile.Diagnostics ?? Array.Empty(); var activeRevisionHtmlLines = activeRevisionRenderableCodeFile.Render(showDocumentation: showDocumentation); var codeLines = new CodeLineModel[0]; diff --git a/src/dotnet/APIView/APIViewWeb/LeanControllers/ReviewsController.cs b/src/dotnet/APIView/APIViewWeb/LeanControllers/ReviewsController.cs index feacee1b9ae..bbf7ff208a0 100644 --- a/src/dotnet/APIView/APIViewWeb/LeanControllers/ReviewsController.cs +++ b/src/dotnet/APIView/APIViewWeb/LeanControllers/ReviewsController.cs @@ -259,6 +259,9 @@ public async Task> GetReviewContentAsync(string revi return new LeanJsonResult("Content generation in progress", StatusCodes.Status202Accepted, languageServices.ReviewGenerationPipelineUrl); } + // Self-heal HasDuplicateLineIds for older revisions that were never evaluated + await _apiRevisionsManager.SelfHealHasDuplicateLineIdsAsync(activeAPIRevision, activeRevisionReviewCodeFile); + IEnumerable allCommentsFromDb = await _commentsManager.GetCommentsAsync(reviewId, commentType: CommentType.APIRevision); List diagnosticComments = await _commentsManager.SyncDiagnosticCommentsAsync( activeAPIRevision, diff --git a/src/dotnet/APIView/APIViewWeb/LeanModels/ReviewListModels.cs b/src/dotnet/APIView/APIViewWeb/LeanModels/ReviewListModels.cs index a3c81aa2835..b884b38f177 100644 --- a/src/dotnet/APIView/APIViewWeb/LeanModels/ReviewListModels.cs +++ b/src/dotnet/APIView/APIViewWeb/LeanModels/ReviewListModels.cs @@ -150,6 +150,7 @@ public string PackageVersion public Dictionary> HeadingsOfSectionsWithDiff { get; set; } = new Dictionary>(); public List AssignedReviewers { get; set; } = new List(); public bool IsApproved { get; set; } + public bool? HasDuplicateLineIds { get; set; } public bool HasAutoGeneratedComments { get; set; } public bool CopilotReviewInProgress { get; set; } public string CopilotReviewJobId { get; set; } diff --git a/src/dotnet/APIView/APIViewWeb/Managers/APIRevisionsManager.cs b/src/dotnet/APIView/APIViewWeb/Managers/APIRevisionsManager.cs index 98fe8b2ecdd..584899409ed 100644 --- a/src/dotnet/APIView/APIViewWeb/Managers/APIRevisionsManager.cs +++ b/src/dotnet/APIView/APIViewWeb/Managers/APIRevisionsManager.cs @@ -657,6 +657,7 @@ private async Task AddAPIRevisionCoreAsync( // Reuse pre-parsed code file to avoid duplicate parsing codeFileModel = await _codeFileManager.CreateReviewCodeFileModel(apiRevision.Id, preParsedMemoryStream, preParsedCodeFile); codeFileModel.FileName = name; + apiRevision.HasDuplicateLineIds = CodeFileManager.HasDuplicateLineIds(preParsedCodeFile); } else { @@ -682,16 +683,21 @@ private async Task AddAPIRevisionCoreAsync( else { CodeFile codeFile = await _codeFileRepository.GetCodeFileFromStorageAsync(apiRevision.Id, codeFileModel.FileId); - if (codeFile?.Diagnostics != null && codeFile.Diagnostics.Length > 0) + if (codeFile != null) { - DiagnosticSyncResult diagnosticResult = await _diagnosticCommentService.SyncDiagnosticCommentsAsync( - review.Id, - apiRevision.Id, - null, // No existing hash for new revisions - codeFile.Diagnostics, - []); + apiRevision.HasDuplicateLineIds = CodeFileManager.HasDuplicateLineIds(codeFile); + + if (codeFile.Diagnostics != null && codeFile.Diagnostics.Length > 0) + { + DiagnosticSyncResult diagnosticResult = await _diagnosticCommentService.SyncDiagnosticCommentsAsync( + review.Id, + apiRevision.Id, + null, // No existing hash for new revisions + codeFile.Diagnostics, + []); - apiRevision.DiagnosticsHash = diagnosticResult.DiagnosticsHash; + apiRevision.DiagnosticsHash = diagnosticResult.DiagnosticsHash; + } } } @@ -911,6 +917,7 @@ public async Task UpdateAPIRevisionCodeFileAsync(string repoName, string buildId file.PackageVersion = codeFile.PackageVersion; file.ParserStyle = codeFile.ReviewLines.Count > 0 ? ParserStyle.Tree : ParserStyle.Flat; file.ContentHash = await _codeFileManager.ComputeAPIContentHashAsync(codeFile); + apiRevision.HasDuplicateLineIds = CodeFileManager.HasDuplicateLineIds(codeFile); await _reviewsRepository.UpsertReviewAsync(review); await _apiRevisionsRepository.UpsertAPIRevisionAsync(apiRevision); @@ -1035,6 +1042,7 @@ public async Task UpdateAPIRevisionAsync(APIRevisionListItemModel revision, Lang file.ParserStyle = ParserStyle.Tree; } file.ContentHash = await _codeFileManager.ComputeAPIContentHashAsync(codeFile); + revision.HasDuplicateLineIds = CodeFileManager.HasDuplicateLineIds(codeFile); await _apiRevisionsRepository.UpsertAPIRevisionAsync(revision); _telemetryClient.TrackTrace($"Successfully Updated {revision.Language} revision with id {revision.Id}"); } @@ -1358,6 +1366,8 @@ public async Task CreateAPIRevisionAsync(string userNa apiRevisionCodeFile.FileName = originalName; } + apiRevision.HasDuplicateLineIds = CodeFileManager.HasDuplicateLineIds(codeFile); + DiagnosticSyncResult diagnosticResult = await _diagnosticCommentService.SyncDiagnosticCommentsAsync( reviewId, apiRevision.Id, @@ -1577,6 +1587,7 @@ private async Task UpgradeAPIRevisionIfRequired(APIRev codeFileDetails.ParserStyle = ParserStyle.Tree; codeFileDetails.IsConvertedTokenModel = true; codeFileDetails.ContentHash = await _codeFileManager.ComputeAPIContentHashAsync(codeFile); + revisionModel.HasDuplicateLineIds = CodeFileManager.HasDuplicateLineIds(codeFile); await _apiRevisionsRepository.UpsertAPIRevisionAsync(revisionModel); } } diff --git a/src/dotnet/APIView/APIViewWeb/Managers/CodeFileManager.cs b/src/dotnet/APIView/APIViewWeb/Managers/CodeFileManager.cs index 7482c5cf97f..e140ab34b27 100644 --- a/src/dotnet/APIView/APIViewWeb/Managers/CodeFileManager.cs +++ b/src/dotnet/APIView/APIViewWeb/Managers/CodeFileManager.cs @@ -318,5 +318,38 @@ private static void InitializeFromCodeFile(APICodeFileModel file, CodeFile codeF file.CrossLanguagePackageId = codeFile.CrossLanguageMetadata != null ? codeFile.CrossLanguageMetadata.CrossLanguagePackageId : codeFile.CrossLanguagePackageId; file.ParserStyle = (codeFile.ReviewLines.Count > 0) ? ParserStyle.Tree : ParserStyle.Flat; } + + /// + /// Checks whether a CodeFile contains duplicate LineId values across its ReviewLines. + /// + /// The CodeFile to check. + /// True if duplicate LineId values are found; otherwise false. + public static bool HasDuplicateLineIds(CodeFile codeFile) + { + if (codeFile?.ReviewLines == null || codeFile.ReviewLines.Count == 0) + return false; + + var lineIds = new HashSet(StringComparer.Ordinal); + return HasDuplicateLineIdsRecursive(codeFile.ReviewLines, lineIds); + } + + private static bool HasDuplicateLineIdsRecursive(IEnumerable lines, HashSet lineIds) + { + foreach (var line in lines) + { + if (!string.IsNullOrEmpty(line.LineId)) + { + if (!lineIds.Add(line.LineId)) + return true; + } + + if (line.Children != null && line.Children.Count > 0) + { + if (HasDuplicateLineIdsRecursive(line.Children, lineIds)) + return true; + } + } + return false; + } } }