Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
name: report-apiview-metrics
description: "Run APIView platform metrics (versioned revisions and cross-language compliance). Use for: apiview metrics, platform metrics, version coverage, versioned revisions, cross-language compliance, compliance metrics, PackageVersion coverage, CrossLanguagePackageId, apiview-metrics, parser compliance."
description: "Run APIView platform metrics (versioned revisions, cross-language compliance, and duplicate line ID compliance). Use for: apiview metrics, platform metrics, version coverage, versioned revisions, cross-language compliance, compliance metrics, PackageVersion coverage, CrossLanguagePackageId, apiview-metrics, parser compliance, duplicate line IDs, HasDuplicateLineIds."
argument-hint: "Optional: --months N, --end-date YYYY-MM-DD, --languages Python Java, --chart, --summary"
---

Expand All @@ -9,19 +9,21 @@ argument-hint: "Optional: --months N, --end-date YYYY-MM-DD, --languages Python
## When to Use
- Monitoring progress toward 100% versioned revisions across languages
- Checking cross-language metadata compliance (CrossLanguagePackageId)
- Checking duplicate line ID compliance (HasDuplicateLineIds)
- Generating trend charts for APIView platform health
- Reviewing parser compliance over time

## What It Produces

A combined report with two metric buckets:
A combined report with three metric buckets:

| Bucket | What it measures |
|--------|-----------------|
|--------|------------------|
| **versions** | % of revisions with a valid `PackageVersion`, broken out by language and revision type (Automatic, Manual, PullRequest) |
| **compliance** | % of reviews whose latest revision includes `CrossLanguagePackageId` (from `CrossLanguageMetadata`) |
| **duplicate_line_ids** | % of evaluated reviews whose latest revision does NOT have `HasDuplicateLineIds=true`. Revisions missing the field are tracked as "unknown" and excluded from the percentage. |

Output is JSON with top-level `"versions"` and `"compliance"` keys. With `--summary`, human-readable tables are printed to stderr.
Output is JSON with top-level `"versions"`, `"compliance"`, and `"duplicate_line_ids"` keys. With `--summary`, human-readable tables are printed to stderr.

## Defaults

Expand All @@ -44,6 +46,7 @@ New-Item -ItemType Directory -Path output -Force | Out-Null; New-Item -ItemType
After the command completes, **read the output file** with `read_file` to get the JSON results. Then use `view_image` to display charts at:
- `output/charts/apiview_version_trends.png`
- `output/charts/cross_language_compliance.png`
- `output/charts/duplicate_line_ids.png`

### Examples

Expand Down Expand Up @@ -72,7 +75,8 @@ python cli.py report apiview-metrics --chart --environment staging
After reading the output file:
1. Summarize the version-coverage trends (highlight languages below 100%)
2. Summarize the compliance trends (highlight languages below 100%)
3. Show the chart images with `view_image`
3. Summarize the duplicate line ID compliance trends (highlight languages below 100%)
4. Show the chart images with `view_image`

### Step 3: Answer Follow-up Questions

Expand Down
2 changes: 1 addition & 1 deletion packages/python-packages/apiview-copilot/AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ Invoked via `avc` (or `python cli.py`):
- `avc report active-reviews` — Query active reviews for a language and date range.
- `avc report feedback` / `avc report memory` — Audit feedback and memories.
- `avc report architect-comments` — Retrieve human architect review comments for a language and date range.
- `avc report apiview-metrics` — Track APIView platform metrics (versioned revision coverage and cross-language compliance).
- `avc report apiview-metrics` — Track APIView platform metrics (versioned revision coverage, cross-language compliance, and duplicate line ID compliance).
- `avc ops deploy` — Deploy to Azure App Service.
- `avc ops check` — Health check the deployed service.
- `avc ops grant` / `avc ops revoke` — Manage Azure RBAC permissions.
Expand Down
28 changes: 21 additions & 7 deletions packages/python-packages/apiview-copilot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,18 @@
from src._apiview_reviewer import SUPPORTED_LANGUAGES, ApiViewReview
from src._database_manager import ContainerNames, DatabaseManager
from src._garbage_collector import GarbageCollector
from src._apiview_metrics import (
DEFAULT_OUTPUT_PATH as DEFAULT_VERSION_TRENDS_OUTPUT_PATH,
)
from src._apiview_metrics import (
DEFAULT_COMPLIANCE_OUTPUT_PATH,
)
from src._apiview_metrics import (
DEFAULT_DUPLICATE_LINEIDS_OUTPUT_PATH,
DEFAULT_OUTPUT_PATH as DEFAULT_VERSION_TRENDS_OUTPUT_PATH,
build_compliance_reports,
build_duplicate_lineid_reports,
build_version_reports,
generate_compliance_chart,
generate_duplicate_lineid_chart,
generate_version_chart,
print_compliance_report,
print_duplicate_lineid_report,
print_version_report,
)
from src._comment_bucket_trends import (
Expand Down Expand Up @@ -2044,7 +2044,7 @@ def report_apiview_metrics(
chart: bool = False,
summary: bool = False,
) -> None:
"""Generate APIView platform metrics (versioned-revision tracking and cross-language compliance)."""
"""Generate APIView platform metrics (versioned-revision tracking, cross-language compliance, and duplicate line ID compliance)."""
parsed_end_date = None
if end_date:
try:
Expand All @@ -2070,8 +2070,16 @@ def report_apiview_metrics(
environment=environment,
)

duplicate_lineid_reports = build_duplicate_lineid_reports(
languages=normalized_languages,
months=months,
end_date=parsed_end_date,
environment=environment,
)

version_chart_path = None
compliance_chart_path = None
duplicate_lineid_chart_path = None
if chart:
version_chart_path = generate_version_chart(
version_reports,
Expand All @@ -2083,14 +2091,20 @@ def report_apiview_metrics(
output_path=DEFAULT_COMPLIANCE_OUTPUT_PATH,
environment=environment,
)
duplicate_lineid_chart_path = generate_duplicate_lineid_chart(
duplicate_lineid_reports,
output_path=DEFAULT_DUPLICATE_LINEIDS_OUTPUT_PATH,
environment=environment,
)

output = {"versions": version_reports, "compliance": compliance_reports}
output = {"versions": version_reports, "compliance": compliance_reports, "duplicate_line_ids": duplicate_lineid_reports}
sys.stdout.buffer.write(json.dumps(output, indent=2, ensure_ascii=False, default=str).encode("utf-8"))
sys.stdout.buffer.write(b"\n")

if summary:
print_version_report(version_reports, version_chart_path, environment=environment, file=sys.stderr)
print_compliance_report(compliance_reports, compliance_chart_path, environment=environment, file=sys.stderr)
print_duplicate_lineid_report(duplicate_lineid_reports, duplicate_lineid_chart_path, environment=environment, file=sys.stderr)


def grant_permissions(assignee_id: str = None):
Expand Down
1 change: 1 addition & 0 deletions packages/python-packages/apiview-copilot/docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ avc report apiview-metrics [--months 6] [--end-date 2026-04-30] [--languages Pyt

- **versions** — Percentage of revisions with a valid `PackageVersion`, broken out by language and revision type (Automatic, Manual, PullRequest).
- **compliance** — Percentage of reviews whose latest revision includes `CrossLanguagePackageId`.
- **duplicate_line_ids** — Percentage of evaluated reviews whose latest revision does NOT have `HasDuplicateLineIds=true`. Revisions where the field is missing are tracked as "unknown" and excluded from the percentage calculation.

## OpenTelemetry Metrics

Expand Down
239 changes: 239 additions & 0 deletions packages/python-packages/apiview-copilot/src/_apiview_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
DEFAULT_MONTHS = 6
DEFAULT_OUTPUT_PATH = Path("output/charts/apiview_version_trends.png")
DEFAULT_COMPLIANCE_OUTPUT_PATH = Path("output/charts/cross_language_compliance.png")
DEFAULT_DUPLICATE_LINEIDS_OUTPUT_PATH = Path("output/charts/duplicate_line_ids.png")
OMIT_LANGUAGES = ["c++", "c", "typespec", "swagger", "xml"]


Expand Down Expand Up @@ -619,3 +620,241 @@ def print_compliance_report(
print(f"\nSaved chart: {output_path}", file=file)
else:
print("\nChart was not generated.", file=file)


# ---------------------------------------------------------------------------
# Duplicate Line ID metrics
# ---------------------------------------------------------------------------


@dataclass
class MonthlyDuplicateLineIdPoint:
"""Monthly duplicate line ID compliance data for a single language."""

label: str
start_date: str
end_date: str
clean: int = 0
has_duplicates: int = 0
unknown: int = 0
total: int = 0
clean_pct: float = 0.0


def build_duplicate_lineid_reports(
languages: Optional[list[str]] = None,
months: int = DEFAULT_MONTHS,
end_date: Optional[date] = None,
*,
environment: str = PRODUCTION_ENVIRONMENT,
) -> dict[str, list[dict]]:
Comment thread
tjprescott marked this conversation as resolved.
"""Build per-language duplicate line ID compliance reports.

For each month, groups revisions by ReviewId, picks the latest revision per review,
and checks whether ``HasDuplicateLineIds`` is set to true.

Returns:
A dict mapping language name to a list of monthly data-point dicts.
"""
Comment thread
tjprescott marked this conversation as resolved.
selected_languages = languages or DEFAULT_LANGUAGES
month_ranges = get_last_n_month_ranges(months=months, end_date=end_date)
if not month_ranges:
return {lang: [] for lang in selected_languages}

full_start = month_ranges[0][0]
full_end = month_ranges[-1][1]

start_iso = to_iso8601(full_start.isoformat())
end_iso = to_iso8601(full_end.isoformat(), end_of_day=True)

revisions_container = get_apiview_cosmos_client(container_name="APIRevisions", environment=environment)

query = (
"SELECT c.ReviewId, c.Language, c.HasDuplicateLineIds, c.CreatedOn "
"FROM c "
"WHERE (NOT IS_DEFINED(c.IsDeleted) OR c.IsDeleted = false) "
"AND c.CreatedOn >= @start AND c.CreatedOn <= @end"
)
params = [
{"name": "@start", "value": start_iso},
{"name": "@end", "value": end_iso},
]

all_revisions = list(
revisions_container.query_items(query=query, parameters=params, enable_cross_partition_query=True)
)

# Bucket revisions by month in a single pass O(revisions)
bucketed: dict[str, list[dict]] = {f"{start.year}-{start.month:02d}": [] for start, _ in month_ranges}
for rev in all_revisions:
created_on = rev.get("CreatedOn", "")
label = created_on[:7] # "YYYY-MM" slice from ISO8601
if label in bucketed:
bucketed[label].append(rev)

omit_lower = {lang.lower() for lang in OMIT_LANGUAGES}

reports: dict[str, list[dict]] = {lang: [] for lang in selected_languages}
for start, end in month_ranges:
label = f"{start.year}-{start.month:02d}"

month_revisions = bucketed[label]

# Group by ReviewId and keep only the latest revision per review
latest_by_review: dict[str, dict] = {}
for rev in month_revisions:
review_id = rev.get("ReviewId")
if not review_id:
continue
existing = latest_by_review.get(review_id)
if existing is None or rev.get("CreatedOn", "") > existing.get("CreatedOn", ""):
latest_by_review[review_id] = rev

# Compute per language
by_language: dict[str, dict] = {}
for rev in latest_by_review.values():
lang = get_language_pretty_name(rev.get("Language", "Unknown"))
if lang.lower() in omit_lower:
continue
entry = by_language.setdefault(lang, {"clean": 0, "has_duplicates": 0, "unknown": 0, "total": 0})
entry["total"] += 1
has_dup = rev.get("HasDuplicateLineIds")
if has_dup is None:
entry["unknown"] += 1
elif has_dup:
entry["has_duplicates"] += 1
else:
entry["clean"] += 1

for entry in by_language.values():
evaluated = entry["clean"] + entry["has_duplicates"]
entry["clean_pct"] = round((entry["clean"] / evaluated) * 100, 2) if evaluated else 0.0

for language in selected_languages:
entry = by_language.get(
language, {"clean": 0, "has_duplicates": 0, "unknown": 0, "total": 0, "clean_pct": 0.0}
)
point = MonthlyDuplicateLineIdPoint(
label=label,
start_date=start.isoformat(),
end_date=end.isoformat(),
clean=entry["clean"],
has_duplicates=entry["has_duplicates"],
unknown=entry["unknown"],
total=entry["total"],
clean_pct=entry["clean_pct"],
)
reports[language].append(asdict(point))

return reports


def generate_duplicate_lineid_chart(
reports: dict[str, list[dict]],
output_path: Path = DEFAULT_DUPLICATE_LINEIDS_OUTPUT_PATH,
*,
environment: str = PRODUCTION_ENVIRONMENT,
) -> Optional[Path]:
"""Render a PNG chart showing duplicate line ID compliance percentage trends per language."""
output_path.parent.mkdir(parents=True, exist_ok=True)

try:
import matplotlib.pyplot as plt
except ImportError:
print("matplotlib is not installed; skipping chart generation.")
return None

languages = list(reports.keys())
month_count = len(next(iter(reports.values()), [])) if reports else 0
if month_count == 0:
return None

cols = 2 if len(languages) > 1 else 1
rows = max(1, math.ceil(len(languages) / cols))

figure, axes = plt.subplots(rows, cols, figsize=(8 * cols, 5 * rows), sharey=True)
if not isinstance(axes, (list, tuple)):
try:
axes = axes.flatten()
except AttributeError:
axes = [axes]
else:
axes = list(axes)

for index, language in enumerate(languages):
axis = axes[index]
report = reports[language]
labels = [item["label"] for item in report]
x_positions = list(range(len(labels)))
pcts = [item["clean_pct"] for item in report]

_bars = axis.bar(x_positions, pcts, color="#2196F3", width=0.6)

# Annotate each bar with count
for bar_pos, item in zip(x_positions, report):
evaluated = item["clean"] + item["has_duplicates"]
if evaluated > 0:
axis.annotate(
f"{item['clean']}/{evaluated}",
(bar_pos, item["clean_pct"]),
textcoords="offset points",
xytext=(0, 4),
ha="center",
fontsize=7,
)

axis.axhline(y=100, color="gray", linestyle=":", linewidth=1.0, alpha=0.5)
axis.set_title(language)
axis.set_xticks(x_positions, labels, rotation=45, ha="right")
axis.set_ylim(0, 115)
axis.grid(True, axis="y", linestyle="--", alpha=0.4)

for index in range(len(languages), len(axes)):
figure.delaxes(axes[index])

environment_label = (environment or PRODUCTION_ENVIRONMENT).strip().lower()
figure.suptitle(
f"No-Duplicate Line ID Compliance %\nLast {month_count} Calendar Months (APIView {environment_label})",
fontsize=14,
y=0.985,
)
figure.supxlabel("Month")
figure.supylabel("% Reviews Without Duplicate Line IDs")
plt.tight_layout(rect=(0.02, 0.03, 1, 0.90))
figure.savefig(output_path, dpi=150)
plt.close(figure)
return output_path


def print_duplicate_lineid_report(
reports: dict[str, list[dict]],
output_path: Optional[Path],
*,
environment: str = PRODUCTION_ENVIRONMENT,
file=None,
) -> None:
"""Print a compact terminal summary of duplicate line ID compliance."""
environment_label = (environment or PRODUCTION_ENVIRONMENT).strip().lower()
print(f"No-duplicate line ID compliance % by month (APIView {environment_label})", file=file)

for language, report in reports.items():
print(f"\n{language}", file=file)
header = ["Month", "Clean", "Has Dupes", "Unknown", "Total", "Clean %"]
print(" ".join(f"{col:>14}" for col in header), file=file)
print(" ".join(["----------"] * len(header)), file=file)

for item in report:
values = [
f"{item['label']:>14}",
f"{item['clean']:>14}",
f"{item['has_duplicates']:>14}",
f"{item['unknown']:>14}",
f"{item['total']:>14}",
f"{item['clean_pct']:>14.1f}",
]
print(" ".join(values), file=file)

if output_path and output_path.exists():
print(f"\nSaved chart: {output_path}", file=file)
else:
print("\nChart was not generated.", file=file)
Loading