diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index b77b29628..a056b0c6d 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -23,7 +23,7 @@ "name": "jira", "source": "./plugins/jira", "description": "A plugin to automate tasks with Jira", - "version": "0.4.6" + "version": "0.4.8" }, { "name": "ci", diff --git a/plugins/jira/.claude-plugin/plugin.json b/plugins/jira/.claude-plugin/plugin.json index 347e9d6c5..f2c1d250f 100644 --- a/plugins/jira/.claude-plugin/plugin.json +++ b/plugins/jira/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "jira", "description": "A plugin to automate tasks with Jira", - "version": "0.4.6", + "version": "0.4.8", "author": { "name": "github.com/openshift-eng" } diff --git a/plugins/jira/commands/candidates-from-pr.md b/plugins/jira/commands/candidates-from-pr.md new file mode 100644 index 000000000..7b422fbbb --- /dev/null +++ b/plugins/jira/commands/candidates-from-pr.md @@ -0,0 +1,191 @@ +--- +description: Given a GitHub PR, find candidate open Jira issues (matching component and target release) that the PR may fix +argument-hint: " [--repo ] [--project ] [--target-release ] [--component ] [--limit ] [--min-score <0-100>] [--include-explicit] [--output text|json]" +--- + +## Name +jira:candidates-from-pr + +## Synopsis +```text +/jira:candidates-from-pr [options] +``` + +## Description + +The `jira:candidates-from-pr` command takes a GitHub Pull Request and produces a ranked list of **open Jira issues that the PR may fix**, scoped by component and target release. It is intended for triage workflows where a PR was opened without an explicit Jira reference (or with an incomplete one) and a maintainer needs to decide which open bugs/stories the PR actually closes. + +This is the inverse direction of `jira:extract-prs` (which goes Jira → PRs). Here the input is a PR and the output is a candidate set of Jiras with a confidence score and matched signals. + +### What it does + +1. **Fetches the PR** (title, body, labels, commits, file paths, and diff hunks). +2. **Extracts already-referenced Jira keys** from the PR title, body, commits, and branch name (e.g. `OCPBUGS-12345`, `SDN-1234`). These are validated and reported separately as "explicitly referenced" — by default they are **not** re-evaluated as candidates unless `--include-explicit` is passed. +3. **Derives triage signals** from the PR: + - **Component(s)**: inferred from changed file paths and the repository (e.g. `openshift/ovn-kubernetes` → component `Networking / ovn-kubernetes`). The user can override with `--component`. + - **Target release**: inferred from the PR's base branch (e.g. `release-4.18` → `4.18`, `main` → the current development version). The user can override with `--target-release`. + - **Keywords/symbols**: function names, error strings, log messages, CLI flags, CRD/API field names touched by the diff. +4. **Queries open Jiras** via JQL, filtered by project, component, target release / fix version, and `statusCategory != Done`. +5. **Scores each candidate** by semantic match against the PR signals (symbol overlap, error-string match, keyword/title overlap, component agreement, recent activity). Each score includes a 1-2 sentence rationale and the matched signals. +6. **Outputs a ranked list** of candidates above `--min-score` (default `40`), capped at `--limit` (default `10`). + +### What it does not do + +- Does **not** modify any Jira issue, post comments, or link the PR. It is a read-only triage helper. +- Does **not** create new Jira issues. Use `/utils:process-renovate-pr` or `/jira:create` for creation. +- Does **not** replace human judgement. Confidence scores are advisory; a maintainer must confirm the link. + +## Prerequisites + +- `gh` CLI installed and authenticated with read access to the PR's repository. +- Jira MCP server configured (see `plugins/jira/README.md`). +- `jq` installed for JSON processing. + +## Implementation + +Load the skill file for detailed implementation guidance: + +```text +plugins/jira/skills/candidates-from-pr/SKILL.md +``` + +### Process Flow + +1. **Parse arguments**: + - `$1` is required: a PR URL (`https://github.com///pull/`) or a PR number (in which case `--repo` must also be given). + - Parse optional flags: `--repo`, `--project`, `--target-release`, `--component`, `--limit`, `--min-score`, `--include-explicit`, `--output`. + - Defaults: `--limit 10`, `--min-score 40`, `--output text`. + +2. **Fetch PR data**: + ```bash + gh pr view --repo / \ + --json number,url,title,body,headRefName,baseRefName,labels,author,commits,files + gh pr diff --repo / + ``` + +3. **Extract explicitly referenced Jira keys**: + - Regex `\b[A-Z][A-Z0-9_]+-[0-9]+\b` against title, body, commit messages, and `headRefName`. + - Validate each via `mcp__atlassian__jira_get_issue` (fields: `summary,status,issuetype,components,fixVersions,customfield_10855`). + - Report these in a separate "Explicit references" block. + +4. **Derive component**: + - If `--component` provided, use it directly. + - Otherwise, map repo + changed paths to component(s) using `plugins/teams/skills/list-components` or the `team_component_map.json` lookup. For OpenShift repos, default mappings include: + - `openshift/ovn-kubernetes` → `Networking / ovn-kubernetes` + - `openshift/cluster-network-operator` → `Networking / cluster-network-operator` + - `openshift/origin` → component derived from changed test paths + - If no mapping is confident, fall back to component-free search and warn the user. + +5. **Derive target release**: + - If `--target-release` provided, use it directly. + - Otherwise, parse `baseRefName`: + - `release-X.Y` → `X.Y` + - `main` / `master` → the current development version (look up via `mcp__atlassian__jira_get_project_versions` and pick the latest unreleased version). + - For OCPBUGS, target release is in `customfield_10855` (Target Version), not `fixVersions` (managed by release team — see `plugins/jira/reference/mcp-tools.md:295`). Query both for safety. + +6. **Build JQL** and search for candidate Jiras: + ```jql + project = + AND statusCategory != Done + AND component in () + AND ("Target Version" = "" OR fixVersion = "") + ORDER BY updated DESC + ``` + Use `mcp__atlassian__jira_search` with `fields=summary,status,issuetype,components,fixVersions,customfield_10855,priority,description,updated,labels` and a generous limit (e.g. 50 — narrowed by JQL filters). + +7. **Score candidates** (see SKILL for full rubric). Briefly: + - **Symbol/identifier overlap** (function, struct, CRD field, error string): high weight. + - **Keyword overlap** (PR title vs. Jira summary, error message in description): medium weight. + - **Component agreement**: required for non-zero score unless component derivation was skipped. + - **Recency**: small bonus for issues updated within the last 90 days. + - **Penalties**: candidate is an Epic/Initiative (rarely "fixed by" a single PR) or has a different `Target Version`. + +8. **Output**: + - **Text** (default): a table with one row per candidate; columns: `#`, verdict, score, key, status, priority, **assignee** (required — show `unassigned` if null), summary, top matched signals, and Jira URL. End with the JQL used and a one-line verdict per candidate (`likely`, `possible`, `unlikely`). + - **JSON** (`--output json`): structured payload with `pr`, `explicit_references`, `candidates[]`, and `metadata`. Each candidate object includes `assignee`. + +9. **Always print the JQL used** so the user can iterate. + +## Examples + +1. **Basic usage with a PR URL**: + ```text + /jira:candidates-from-pr https://github.com/openshift/ovn-kubernetes/pull/4567 + ``` + +2. **Override the target release** (e.g. when triaging a backport before the base branch is final): + ```text + /jira:candidates-from-pr https://github.com/openshift/ovn-kubernetes/pull/4567 --target-release 4.18 + ``` + +3. **Restrict to a specific component**: + ```text + /jira:candidates-from-pr 4567 --repo openshift/ovn-kubernetes \ + --component "Networking / ovn-kubernetes" --project OCPBUGS + ``` + +4. **Tighter results, JSON output for downstream tooling**: + ```text + /jira:candidates-from-pr https://github.com/openshift/ovn-kubernetes/pull/4567 \ + --limit 5 --min-score 60 --output json + ``` + +5. **Re-score even keys already mentioned in the PR description**: + ```text + /jira:candidates-from-pr https://github.com/openshift/ovn-kubernetes/pull/4567 --include-explicit + ``` + +## Return Value + +- **Claude agent text** (default): grouped report with two sections: + 1. **Explicit references** — Jira keys already mentioned in the PR, with status and target release. + 2. **Candidate matches** — ranked list of open Jiras the PR may fix, each with score, rationale, matched signals, and link. +- **JSON** (`--output json`): a structured object suitable for piping into other commands. Schema: + ```json + { + "schema_version": "1.0", + "metadata": { "generated_at": "...", "command": "candidates-from-pr" }, + "pr": { "url": "...", "number": 0, "title": "...", "base_ref": "...", "head_ref": "..." }, + "derived": { "components": ["..."], "target_release": "..." }, + "explicit_references": [ { "key": "...", "summary": "...", "status": "...", "target_release": "...", "assignee": "..." } ], + "candidates": [ + { + "key": "OCPBUGS-12345", + "summary": "...", + "url": "https://issues.redhat.com/browse/OCPBUGS-12345", + "status": "New", + "issuetype": "Bug", + "priority": "Major", + "assignee": { "display_name": "Jane Doe", "email": "jane@example.com" }, + "components": ["..."], + "target_release": "4.18", + "score": 78, + "verdict": "likely", + "rationale": "...", + "matched_signals": ["error string 'failed to add subnet'", "function ensureSubnet", "title keyword 'subnet'"] + } + ] + } + ``` + +## Notes + +- **Read-only**: the command never mutates Jira state. +- **Component mapping** is best-effort; pass `--component` for precision when the auto-derivation is wrong. +- **Target Version vs Fix Version**: for OCPBUGS the user-facing field is `Target Version` (`customfield_10855`); `fixVersions` is set by the release team. The query checks both, but the report displays `Target Version` when present. +- **Companion commands**: + - `/jira:extract-prs` — opposite direction (Jira → PRs). + - `/jira:reconcile-github` — state-mismatch reconciliation between linked GitHub issues and Jira. + - `/utils:process-renovate-pr` — creates a *new* Jira from a Konflux/Renovate PR (does not match existing). + +## Arguments + +- `$1` (required): GitHub PR URL (`https://github.com///pull/`) or PR number. +- `--repo `: required if `$1` is just a number; ignored if a full URL is given. +- `--project `: Jira project key to search (default: `OCPBUGS`). +- `--target-release `: override auto-detected target release (e.g. `4.18`). +- `--component `: override auto-detected component. Repeatable. +- `--limit `: maximum candidates to return (default: `10`). +- `--min-score <0-100>`: drop candidates scoring below this (default: `40`). +- `--include-explicit`: also score Jira keys already referenced in the PR (default: list them separately, do not re-score). +- `--output text|json`: output format (default: `text`). diff --git a/plugins/jira/skills/candidates-from-pr/SKILL.md b/plugins/jira/skills/candidates-from-pr/SKILL.md new file mode 100644 index 000000000..a54c42bc1 --- /dev/null +++ b/plugins/jira/skills/candidates-from-pr/SKILL.md @@ -0,0 +1,203 @@ +--- +name: PR to Jira Candidate Matcher +description: Implementation guide for /jira:candidates-from-pr — analyze a GitHub PR and surface ranked open Jira candidates filtered by component and target release +--- + +# PR to Jira Candidate Matcher + +Implementation for `/jira:candidates-from-pr`. Inverse direction of `extract-prs`: input is a GitHub PR, output is a ranked, triage-ready table of open Jira issues the PR may fix. + +**IMPORTANT FOR AI**: This skill delegates all mechanical work to the scripts under `scripts/`. When invoked you MUST: + +- Run the scripts as shown in the "Implementation" section. Do NOT replace them with inline `gh` / `jq` / `grep` calls. +- Make exactly the MCP calls listed in steps 2, 3 (conditional), and 4 — no more, no less. +- Treat `fetch_pr.py` as the single entry point for PR data; never call `gh pr view` or `gh pr diff` directly. +- Pipe data through the scripts using temp files in `.work/candidates-from-pr/{pr-number}/`. + +The skill caller (this AI) is responsible only for the steps that need MCP tool calls or natural-language judgment (rationales). + +## Scripts + +All scripts read JSON on stdin or via `--` flags and write JSON/text to stdout. None of them call APIs except `fetch_pr.py` (which shells out to `gh`). + +| Script | Purpose | +|---|---| +| `scripts/fetch_pr.py` | Resolve PR URL/number + run `gh pr view` and `gh pr diff`; emit normalized PR JSON. Caps the diff (default 4000 lines). | +| `scripts/extract_jira_keys.py` | Regex over PR title/body/branch/commits; emit deduplicated `[{key, sources[]}]` filtered by `--projects`. | +| `scripts/derive_filters.py` | Map repo + base ref to `components[]` + `target_release` (with sources and warnings). Sets `target_release_source=needs_lookup` when base ref is `main`/`master`. | +| `scripts/build_jql.py` | Assemble the final JQL from the filters JSON. Component and version clauses are conditional. | +| `scripts/extract_signals.py` | Local signal extraction — symbols, error strings, log messages, title keywords, file-path tags. No API calls. | +| `scripts/score_candidates.py` | Apply the scoring rubric + penalties to candidate Jiras and emit ranked JSON with `verdict` and `matched_signals`. | +| `scripts/render_report.py` | Render the final text table or canonical JSON payload. Accepts an optional `rationales.json` produced by the caller. | + +## When to Use This Skill + +- A PR is opened without a Jira reference and a triager needs to identify what it fixes. +- A PR mentions a Jira but you suspect it also addresses other open bugs in the same target release. +- Preparing a release-readiness report and want to map merged PRs to closeable Jiras. + +**Read-only**: the skill never mutates Jira or GitHub state. + +## Prerequisites + +- `gh` CLI authenticated with read access to the target repo. +- Jira MCP server configured (`plugins/jira/README.md`). +- Python 3.10+ (no third-party deps; standard library only). +- `jq` CLI for JSON field extraction in shell snippets. + +## Output Format + +Schema version `1.0`, produced by `render_report.py --format json`: + +```json +{ + "schema_version": "1.0", + "metadata": { "generated_at": "...", "command": "candidates-from-pr", "jql": "..." }, + "pr": { "url": "...", "number": 0, "title": "...", "base_ref": "...", "head_ref": "...", "labels": [], "files_changed": 0 }, + "derived": { "components": [], "target_release": "...", "component_source": "...", "target_release_source": "..." }, + "explicit_references": [ + { "key": "...", "summary": "...", "status": "...", "target_release": "...", "assignee": "...", "url": "..." } + ], + "candidates": [ + { + "key": "OCPBUGS-22222", "summary": "...", "url": "...", + "status": "...", "issuetype": "...", "priority": "...", + "assignee": { "display_name": "Jane Doe", "email": "..." }, + "components": [], "target_release": "...", "fix_versions": [], + "score": 78, "verdict": "likely", + "rationale": "Optional 1-2 sentence prose written by the caller.", + "matched_signals": [{ "type": "error_string", "value": "..." }] + } + ] +} +``` + +The text format renders the same data as a markdown-style table; the **assignee column is required** and prints `unassigned` when null. + +## Implementation + +Work in `.work/candidates-from-pr/{pr}/` for intermediate JSON. All paths below are relative to that working directory unless stated. + +### 1. Fetch PR + +```bash +scripts/fetch_pr.py "$PR_ARG" ${REPO:+--repo "$REPO"} > pr.json +``` + +If `pr.diff_truncated` is `true`, mention it in the final report (the rest of the pipeline is unaffected — signals are extracted from the capped diff only). If `pr.diff_unavailable_reason` is set (e.g., `gh` returned HTTP 406 because the PR is too large), `extract_signals.py` automatically falls back to commit headlines/bodies; mention the fallback in the report so the user knows scoring is weaker. + +### 2. Extract explicit Jira references + +```bash +scripts/extract_jira_keys.py --projects "$PROJECTS" < pr.json > explicit_keys.json +``` + +For each `key` in the output, call: + +```text +mcp__atlassian__jira_get_issue( + issue_key=, + fields="summary,status,issuetype,components,fixVersions,customfield_10855,assignee" +) +``` + +On success, project the result to `{key, summary, status, target_release, assignee, url}` and append to `explicit.json`. On 404, drop the key with a warning. + +### 3. Derive filters + +```bash +scripts/derive_filters.py \ + ${COMPONENTS_OVERRIDE[@]/#/--component } \ + ${TARGET_RELEASE:+--target-release "$TARGET_RELEASE"} \ + < pr.json > filters.json +``` + +If `filters.json.target_release_source == "needs_lookup"`: + +```text +mcp__atlassian__jira_get_project_versions(project_key=) +``` + +Pick the **highest** unreleased numeric version where `released == false` (the current development release for `main`/`master`) and rewrite `filters.json` with it (set `target_release_source = "project_versions"`). + +### 4. Build JQL and search + +```bash +scripts/build_jql.py --project "$PROJECT" < filters.json > jql.txt +``` + +Run the search: + +```text +mcp__atlassian__jira_search( + jql=, + fields="summary,status,issuetype,priority,assignee,components,fixVersions,customfield_10855,description,updated,labels", + limit=50 +) +``` + +Project each issue to the input shape expected by `score_candidates.py` (see its docstring) and write to `candidates_raw.json`. Drop any keys also present in `explicit.json` unless `--include-explicit` was passed. + +### 5. Extract signals (local) + +```bash +scripts/extract_signals.py < pr.json > signals.json +``` + +### 6. Score candidates (local) + +```bash +COMPS=$(jq -r '.components | join(",")' filters.json) +COMP_FLAG=$([ -n "$COMPS" ] && echo --component-filter-used) +scripts/score_candidates.py \ + --signals signals.json \ + --candidates candidates_raw.json \ + --components-derived "$COMPS" $COMP_FLAG \ + --min-score "$MIN_SCORE" \ + --limit "$LIMIT" \ + > scored.json +``` + +### 7. Write rationales (caller, optional) + +For each candidate in `scored.json`, write a 1–2 sentence rationale referencing the **top three** entries in `matched_signals`. Save to `rationales.json` as `{"OCPBUGS-1234": "..."}`. If you skip this step, `render_report.py` falls back to a comma-joined list of matched signal values. + +### 8. Render + +```bash +scripts/render_report.py \ + --pr pr.json --filters filters.json --jql "$(cat jql.txt)" \ + --explicit explicit.json --candidates scored.json \ + ${RATIONALES:+--rationales rationales.json} \ + --format "$FORMAT" +``` + +## Scoring Rubric (implemented in `score_candidates.py`) + +| Signal | Weight | +|---|---| +| Error string from PR diff appears verbatim in Jira summary or description | +35 | +| Symbol from PR diff (function/struct) appears in Jira summary or description | +25 per unique match, capped at +40 | +| ≥ 2 PR title keywords overlap Jira summary/description | +15 | +| Component agreement (Jira components ∩ derived components ≠ ∅) | +10 (required for non-zero when component filter active) | +| Recency: Jira `updated` within last 90 days | +5 | +| File-path tag overlaps Jira summary/description | +5 each, capped at +10 | +| Penalty: Jira issuetype is Epic/Feature/Initiative | −15 | +| Penalty: derived component empty AND no symbol/error-string overlap | −20 | +| Drop: Jira status is Verified/Closed | drop | + +Final score is clamped to [0, 100]. Verdict mapping: `≥75` likely, `≥50` possible, `≥min_score` unlikely, otherwise dropped. + +## Error Handling + +- **PR not found / private** — `fetch_pr.py` propagates the `gh` error and exits non-zero. Surface verbatim to the user and stop. +- **MCP unavailable** — direct the user to `plugins/jira/README.md` and stop. +- **No candidates after scoring** — print explicit references (if any), the JQL, and suggest `--component`, `--target-release`, or `--min-score 0`. +- **Diff truncated** — keep going; mention truncation in the report. +- **Project not OCPBUGS** — works as long as the project has either `Target Version` or `fixVersions`; the JQL combines both. + +## Companion Skills + +- `plugins/jira/skills/extract-prs/SKILL.md` — opposite direction (Jira → PRs). +- `plugins/teams/skills/list-components/SKILL.md` — component lookup helpers. +- `plugins/jira/reference/mcp-tools.md` — MCP field reference, including `customfield_10855` for Target Version. diff --git a/plugins/jira/skills/candidates-from-pr/scripts/build_jql.py b/plugins/jira/skills/candidates-from-pr/scripts/build_jql.py new file mode 100755 index 000000000..9acfa5b4c --- /dev/null +++ b/plugins/jira/skills/candidates-from-pr/scripts/build_jql.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +"""Build the JQL string for the candidate Jira search. + +Reads a derive_filters.py JSON object on stdin and prints a JQL string on +stdout. Component clause and version clause are conditional; the +statusCategory exclusion is always included. + +Usage: + derive_filters.py ... | build_jql.py [--project OCPBUGS] +""" + +from __future__ import annotations + +import argparse +import json +import sys + + +def quote(value: str) -> str: + return '"' + value.replace('"', '\\"') + '"' + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument("--project", default="OCPBUGS") + args = p.parse_args() + + filters = json.load(sys.stdin) + parts: list[str] = [f"project = {quote(args.project)}", "statusCategory != Done"] + + components = filters.get("components") or [] + if components: + comps = ", ".join(quote(c) for c in components) + parts.append(f"component in ({comps})") + + target_release = filters.get("target_release") + if target_release and target_release.strip(): + parts.append( + f'("Target Version" = {quote(target_release)} ' + f"OR fixVersion = {quote(target_release)})" + ) + + jql = " AND ".join(parts) + " ORDER BY updated DESC" + sys.stdout.write(jql + "\n") + + +if __name__ == "__main__": + main() diff --git a/plugins/jira/skills/candidates-from-pr/scripts/derive_filters.py b/plugins/jira/skills/candidates-from-pr/scripts/derive_filters.py new file mode 100755 index 000000000..5c609b7c1 --- /dev/null +++ b/plugins/jira/skills/candidates-from-pr/scripts/derive_filters.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +"""Derive component(s) and target release for a PR. + +Reads PR JSON (fetch_pr.py output) on stdin and writes a JSON object with +derived filters. Pure local logic — no API calls. When the base ref is +`main`/`master` the target_release is left null with source="needs_lookup", +signalling the skill caller to query Jira project versions. + +Usage: + fetch_pr.py ... | derive_filters.py \ + [--component "Networking / ovn-kubernetes" ...] \ + [--target-release X.Y] + +Output: + { + "components": ["..."], + "component_source": "auto" | "override" | "path-heuristic" | "none", + "target_release": "4.18" | null, + "target_release_source": "override" | "base_ref" | "needs_lookup" | "unknown", + "warnings": ["..."] + } +""" + +from __future__ import annotations + +import argparse +import json +import re +import sys + +# Repo (org/name) -> default component(s). +REPO_COMPONENTS: dict[str, list[str]] = { + "openshift/ovn-kubernetes": ["Networking / ovn-kubernetes"], + "openshift/cluster-network-operator": ["Networking / cluster-network-operator"], + "openshift/sdn": ["Networking / openshift-sdn"], + "openshift/cluster-ingress-operator": ["Networking / router"], + "openshift/hypershift": ["HyperShift"], + "openshift/cluster-version-operator": ["Cluster Version Operator"], +} + +# Path-prefix heuristics applied if no repo mapping exists. +PATH_HEURISTICS: list[tuple[re.Pattern[str], list[str]]] = [ + (re.compile(r"^pkg/network/"), ["Networking"]), + (re.compile(r"^pkg/operator/"), ["Operator"]), +] + +RELEASE_BRANCH_RE = re.compile(r"^release[-/](\d+\.\d+)$") +DEV_BRANCHES = {"main", "master"} + + +def derive_components( + org: str, + repo: str, + files: list[dict], + overrides: list[str], +) -> tuple[list[str], str, list[str]]: + warnings: list[str] = [] + if overrides: + return overrides, "override", warnings + + slug = f"{org}/{repo}" + if slug in REPO_COMPONENTS: + return REPO_COMPONENTS[slug], "auto", warnings + + paths = [f.get("path", "") for f in files] + matched: list[str] = [] + for pattern, comps in PATH_HEURISTICS: + if any(pattern.search(p) for p in paths): + for c in comps: + if c not in matched: + matched.append(c) + if matched: + warnings.append( + f"no repo mapping for {slug}; derived components from file paths" + ) + return matched, "path-heuristic", warnings + + warnings.append( + f"no component mapping known for {slug}; JQL will omit component filter" + ) + return [], "none", warnings + + +def derive_target_release( + base_ref: str, override: str | None +) -> tuple[str | None, str, list[str]]: + if override: + return override, "override", [] + m = RELEASE_BRANCH_RE.match(base_ref or "") + if m: + return m.group(1), "base_ref", [] + if base_ref in DEV_BRANCHES: + return None, "needs_lookup", [] + return ( + None, + "unknown", + [ + f"could not derive target release from base ref '{base_ref}'; " + "skill caller should pass --target-release or skip version filter" + ], + ) + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument("--component", action="append", default=[]) + p.add_argument("--target-release", default=None) + args = p.parse_args() + + pr = json.load(sys.stdin) + + components, csrc, cwarn = derive_components( + pr.get("org", ""), pr.get("repo", ""), pr.get("files") or [], args.component + ) + release, rsrc, rwarn = derive_target_release( + pr.get("base_ref", ""), args.target_release + ) + + json.dump( + { + "components": components, + "component_source": csrc, + "target_release": release, + "target_release_source": rsrc, + "warnings": cwarn + rwarn, + }, + sys.stdout, + ) + sys.stdout.write("\n") + + +if __name__ == "__main__": + main() diff --git a/plugins/jira/skills/candidates-from-pr/scripts/extract_jira_keys.py b/plugins/jira/skills/candidates-from-pr/scripts/extract_jira_keys.py new file mode 100755 index 000000000..d886b5f77 --- /dev/null +++ b/plugins/jira/skills/candidates-from-pr/scripts/extract_jira_keys.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +"""Extract explicitly referenced Jira keys from a PR. + +Reads PR JSON (output of fetch_pr.py) on stdin, scans the title, body, branch +name, and each commit headline+body for keys matching a set of project +prefixes, and emits a deduplicated JSON array on stdout. + +Usage: + fetch_pr.py ... | extract_jira_keys.py [--projects OCPBUGS,SDN] + +Output: + [{"key": "OCPBUGS-1234", "sources": ["title", "commit:abcd1234"]}, ...] + +The Jira keys are *not* validated here — the skill caller is expected to +validate via mcp__atlassian__jira_get_issue. +""" + +from __future__ import annotations + +import argparse +import json +import re +import sys + +KEY_RE = re.compile(r"\b([A-Z][A-Z0-9_]+)-([0-9]+)\b") + + +def scan(text: str, source: str, projects: set[str]) -> list[tuple[str, str]]: + found: list[tuple[str, str]] = [] + for m in KEY_RE.finditer(text or ""): + proj, num = m.group(1), m.group(2) + if num == "0": + continue + if proj not in projects: + continue + found.append((f"{proj}-{num}", source)) + return found + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument( + "--projects", + default="OCPBUGS", + help="Comma-separated Jira project prefixes to recognize (default: OCPBUGS)", + ) + args = p.parse_args() + projects = {x.strip() for x in args.projects.split(",") if x.strip()} + + pr = json.load(sys.stdin) + hits: dict[str, set[str]] = {} + + def add(matches: list[tuple[str, str]]) -> None: + for key, src in matches: + hits.setdefault(key, set()).add(src) + + add(scan(pr.get("title", ""), "title", projects)) + add(scan(pr.get("body", ""), "body", projects)) + add(scan(pr.get("head_ref", ""), "branch", projects)) + for c in pr.get("commits") or []: + oid = (c.get("oid") or "")[:8] + add(scan(c.get("headline", ""), f"commit:{oid}:headline", projects)) + add(scan(c.get("body", ""), f"commit:{oid}:body", projects)) + + out = [{"key": k, "sources": sorted(hits[k])} for k in sorted(hits)] + json.dump(out, sys.stdout) + sys.stdout.write("\n") + + +if __name__ == "__main__": + main() diff --git a/plugins/jira/skills/candidates-from-pr/scripts/extract_signals.py b/plugins/jira/skills/candidates-from-pr/scripts/extract_signals.py new file mode 100755 index 000000000..1ac26de23 --- /dev/null +++ b/plugins/jira/skills/candidates-from-pr/scripts/extract_signals.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +"""Extract triage signals from a PR's title, body, commits, files, and diff. + +Reads PR JSON (fetch_pr.py output) on stdin and emits a JSON object with +typed signal arrays. All extraction is local regex/string work — no API +calls and no model judgement. + +Usage: + fetch_pr.py ... | extract_signals.py + +Output: + { + "symbols": [{"value": "ensureSubnet"}, ...], + "error_strings": [{"value": "failed to add subnet"}, ...], + "log_messages": [{"value": "..."}, ...], + "labels": [{"value": "kind/bug"}, ...], + "title_keywords": [{"value": "subnet"}, ...], + "path_tags": [{"value": "subnet"}, ...] + } +""" + +from __future__ import annotations + +import json +import re +import sys + +STOP_WORDS = { + "the", "a", "an", "and", "or", "but", "of", "for", "to", "in", "on", + "with", "from", "by", "is", "are", "was", "were", "be", "been", "fix", + "fixes", "bug", "bugs", "add", "adds", "added", "remove", "removes", + "removed", "update", "updates", "updated", "use", "uses", "using", + "this", "that", "these", "those", "into", "make", "makes", "made", + "merge", "branch", "master", "main", "feat", "feature", "chore", "docs", + "doc", "test", "tests", "ci", "build", "release", "version", "support", + "no-jira", "downstream", "upstream", +} + +GO_FUNC_RE = re.compile(r"^\+.*\bfunc\s+(?:\(\s*\w+\s+\*?\w+\s*\)\s+)?(\w+)\s*\(", re.M) +GO_TYPE_RE = re.compile(r"^\+.*\btype\s+(\w+)\s+(?:struct|interface)\b", re.M) +ERR_NEW_RE = re.compile(r'errors\.New\(\s*"([^"]{4,200})"\s*\)') +FMT_ERRORF_RE = re.compile(r'fmt\.Errorf\(\s*"([^"]{4,200})"') +LOG_RE = re.compile( + r'(?:klog|log|logger|logrus)\.[A-Za-z]+\(\s*"([^"]{12,200})"' +) +PANIC_RE = re.compile(r'panic\(\s*"([^"]{4,200})"\s*\)') + +# Identifier extraction from added lines (lines starting with '+'). +CAMEL_RE = re.compile(r"\b([A-Z][a-zA-Z0-9]{3,})\b") +SNAKE_RE = re.compile(r"\b([a-z][a-zA-Z0-9_]{4,})\b") + +# Identifiers we never want to treat as signals (too generic to be useful). +SYMBOL_BLOCKLIST = { + "string", "error", "context", "Context", "true", "false", "nil", + "return", "interface", "struct", "package", "import", "func", + "default", "switch", "select", "channel", "range", "map", "make", + "panic", "recover", "println", "Sprintf", "Errorf", +} + + +def added_lines(diff: str) -> str: + """Return only the '+ ...' added lines from a unified diff.""" + out: list[str] = [] + for line in diff.splitlines(): + if line.startswith("+") and not line.startswith("+++"): + out.append(line[1:]) + return "\n".join(out) + + +def uniq(items: list[str], cap: int | None = None) -> list[str]: + seen: set[str] = set() + out: list[str] = [] + for x in items: + if x in seen: + continue + seen.add(x) + out.append(x) + if cap and len(out) >= cap: + break + return out + + +def extract_symbols(diff_added: str) -> list[str]: + syms: list[str] = [] + syms += GO_FUNC_RE.findall(diff_added) + syms += GO_TYPE_RE.findall(diff_added) + + # Camel/snake identifiers from added lines (capped to keep payload sane). + camel = CAMEL_RE.findall(diff_added) + snake = SNAKE_RE.findall(diff_added) + syms += [s for s in camel if s not in SYMBOL_BLOCKLIST] + syms += [s for s in snake if s not in SYMBOL_BLOCKLIST] + + return uniq(syms, cap=200) + + +def extract_error_strings(diff_added: str) -> list[str]: + found: list[str] = [] + for rx in (ERR_NEW_RE, FMT_ERRORF_RE, PANIC_RE): + found += rx.findall(diff_added) + return uniq(found, cap=100) + + +def extract_log_messages(diff_added: str) -> list[str]: + return uniq(LOG_RE.findall(diff_added), cap=100) + + +def title_keywords(title: str) -> list[str]: + tokens = re.findall(r"[A-Za-z][A-Za-z0-9]{3,}", title or "") + out: list[str] = [] + for t in tokens: + lo = t.lower() + if lo in STOP_WORDS: + continue + out.append(lo) + return uniq(out) + + +def path_tags(files: list[dict]) -> list[str]: + leaves: list[str] = [] + for f in files: + path = f.get("path") or "" + for part in path.split("/"): + if not part or "." in part: + continue + if len(part) < 4: + continue + leaves.append(part.lower()) + return uniq(leaves, cap=50) + + +def main() -> None: + pr = json.load(sys.stdin) + diff = pr.get("diff", "") or "" + added = added_lines(diff) + + # When the diff is unavailable (large PRs, gh 406s, private), fall back + # to commit messages so symbol/error extraction still has something to + # chew on. Treat each commit headline+body as if it were an added line. + if not added: + commit_text = "\n".join( + f"+{c.get('headline','')}\n+{c.get('body','')}" + for c in pr.get("commits") or [] + ) + added = added_lines(commit_text) + + out = { + "symbols": [{"value": v} for v in extract_symbols(added)], + "error_strings": [{"value": v} for v in extract_error_strings(added)], + "log_messages": [{"value": v} for v in extract_log_messages(added)], + "labels": [{"value": v} for v in pr.get("labels") or []], + "title_keywords": [{"value": v} for v in title_keywords(pr.get("title", ""))], + "path_tags": [{"value": v} for v in path_tags(pr.get("files") or [])], + "commit_keywords": [ + {"value": v} + for v in uniq( + [ + kw + for c in pr.get("commits") or [] + for kw in title_keywords(c.get("headline", "")) + ], + cap=100, + ) + ], + "diff_unavailable": pr.get("diff_unavailable_reason") is not None + or (not (pr.get("diff") or "") and bool(pr.get("commits"))), + } + json.dump(out, sys.stdout) + sys.stdout.write("\n") + + +if __name__ == "__main__": + main() diff --git a/plugins/jira/skills/candidates-from-pr/scripts/fetch_pr.py b/plugins/jira/skills/candidates-from-pr/scripts/fetch_pr.py new file mode 100755 index 000000000..e0bf8983b --- /dev/null +++ b/plugins/jira/skills/candidates-from-pr/scripts/fetch_pr.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +"""Fetch GitHub PR metadata and diff for candidates-from-pr. + +Resolves a PR URL or (number + --repo) into normalized JSON containing the +fields the rest of the pipeline needs. Diff is capped to keep downstream +processing bounded. + +Usage: + fetch_pr.py [--repo ] [--diff-max-lines N] + +Output: JSON on stdout with keys: + org, repo, number, url, title, body, base_ref, head_ref, + labels[], author, files[{path}], commits[{oid, headline, body}], + diff (capped), diff_truncated (bool), diff_total_lines +""" + +from __future__ import annotations + +import argparse +import json +import re +import subprocess +import sys +from typing import Any + +PR_URL_RE = re.compile(r"^https://github\.com/([^/]+)/([^/]+)/pull/(\d+)/?$") + + +def resolve_pr(arg: str, repo: str | None) -> tuple[str, str, int]: + """Return (org, repo, number) from a URL or number+repo.""" + m = PR_URL_RE.match(arg) + if m: + return m.group(1), m.group(2), int(m.group(3)) + if not arg.isdigit(): + sys.exit(f"error: '{arg}' is not a PR URL or numeric PR number") + if not repo or "/" not in repo: + sys.exit("error: --repo is required when passing a numeric PR") + org, name = repo.split("/", 1) + return org, name, int(arg) + + +def gh_json(args: list[str]) -> Any: + res = subprocess.run(args, capture_output=True, text=True) + if res.returncode != 0: + sys.exit(f"error: {' '.join(args)} failed:\n{res.stderr}") + return json.loads(res.stdout) + + +def gh_text(args: list[str]) -> tuple[str, str, int]: + res = subprocess.run(args, capture_output=True, text=True) + return res.stdout, res.stderr, res.returncode + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument("pr", help="PR URL or number") + p.add_argument("--repo", help="org/repo (required when pr is numeric)") + p.add_argument("--diff-max-lines", type=int, default=4000) + args = p.parse_args() + + org, repo, number = resolve_pr(args.pr, args.repo) + slug = f"{org}/{repo}" + + pr = gh_json( + [ + "gh", "pr", "view", str(number), "--repo", slug, + "--json", + "number,url,title,body,headRefName,baseRefName,labels,author,commits,files", + ] + ) + + diff, diff_err, rc = gh_text(["gh", "pr", "diff", str(number), "--repo", slug]) + diff_unavailable_reason: str | None = None + if rc != 0: + # Common failure modes: PR too large (HTTP 406, >300 files), private, + # network glitch. Don't fail the whole pipeline — signals can still be + # extracted from titles/bodies/file paths/commit messages. + diff = "" + _err_lines = (diff_err or "").strip().splitlines() + diff_unavailable_reason = _err_lines[-1] if _err_lines else "unknown error" + + diff_lines = diff.splitlines() + truncated = len(diff_lines) > args.diff_max_lines + capped = "\n".join(diff_lines[: args.diff_max_lines]) + + out = { + "org": org, + "repo": repo, + "number": number, + "url": pr.get("url"), + "title": pr.get("title") or "", + "body": pr.get("body") or "", + "base_ref": pr.get("baseRefName") or "", + "head_ref": pr.get("headRefName") or "", + "labels": [lab.get("name") for lab in pr.get("labels") or [] if lab.get("name")], + "author": (pr.get("author") or {}).get("login"), + "files": [{"path": f.get("path")} for f in pr.get("files") or [] if f.get("path")], + "commits": [ + { + "oid": c.get("oid"), + "headline": c.get("messageHeadline") or "", + "body": c.get("messageBody") or "", + } + for c in pr.get("commits") or [] + ], + "diff": capped, + "diff_truncated": truncated, + "diff_total_lines": len(diff_lines), + "diff_unavailable_reason": diff_unavailable_reason, + } + json.dump(out, sys.stdout) + sys.stdout.write("\n") + + +if __name__ == "__main__": + main() diff --git a/plugins/jira/skills/candidates-from-pr/scripts/render_report.py b/plugins/jira/skills/candidates-from-pr/scripts/render_report.py new file mode 100755 index 000000000..b9916b9cd --- /dev/null +++ b/plugins/jira/skills/candidates-from-pr/scripts/render_report.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 +"""Render the final report (text table or JSON passthrough). + +Reads several intermediate JSON files and emits either a human-readable text +report or the canonical JSON payload to stdout. + +Usage: + render_report.py \ + --pr pr.json \ + --filters filters.json \ + --jql "" \ + --explicit explicit.json \ + --candidates scored.json \ + [--rationales rationales.json] \ + [--format text|json] + +`rationales.json` (optional) maps {"OCPBUGS-1234": "1-2 sentence rationale"} +produced by the skill caller. If absent, rationale defaults to a comma-joined +list of matched signal values. +""" + +from __future__ import annotations + +import argparse +import json +import sys +from datetime import datetime, timezone + + +COL_HEADERS = ["#", "Verdict", "Score", "Key", "Status", "Pri", "Assignee", "Summary", "Top signals"] + + +def load(path: str | None) -> object: + if not path: + return None + with open(path) as f: + return json.load(f) + + +def assignee_str(value: object) -> str: + if not value: + return "unassigned" + if isinstance(value, dict): + return value.get("display_name") or value.get("email") or value.get("name") or "unassigned" + return str(value) + + +def truncate(value: str, length: int) -> str: + value = (value or "").replace("\n", " ").strip() + return value if len(value) <= length else value[: length - 1] + "…" + + +def signals_summary(matched: list[dict]) -> str: + parts: list[str] = [] + for s in matched[:3]: + t = s.get("type", "") + v = s.get("value", "") + parts.append(f"{t}={v}" if t else str(v)) + return "; ".join(parts) + + +def render_text( + pr: dict, + filters: dict, + jql: str, + explicit: list[dict], + cands: list[dict], + rationales: dict[str, str] | None, +) -> str: + lines: list[str] = [] + lines.append( + f"PR: {pr['org']}/{pr['repo']}#{pr['number']} — {pr.get('title','')}" + ) + lines.append( + f"Base ref: {pr.get('base_ref','')} → " + f"target release {filters.get('target_release') or 'unknown'} " + f"({filters.get('target_release_source','')})" + ) + components = filters.get("components") or [] + if components: + lines.append( + f"Component(s): {', '.join(components)} ({filters.get('component_source','')})" + ) + else: + lines.append("Component(s): (none — JQL did not filter by component)") + for w in filters.get("warnings") or []: + lines.append(f"Warning: {w}") + lines.append("") + + lines.append(f"Explicit references in PR ({len(explicit)}):") + if explicit: + for r in explicit: + lines.append( + f" {r.get('key')} [{r.get('status','?')}, " + f"target {r.get('target_release','?')}, " + f"assignee: {assignee_str(r.get('assignee'))}] — {r.get('summary','')}" + ) + if r.get("url"): + lines.append(f" {r['url']}") + else: + lines.append(" (none)") + lines.append("") + + lines.append(f"Candidate matches ({len(cands)}):") + if cands: + rows: list[list[str]] = [COL_HEADERS] + for i, c in enumerate(cands, 1): + rationale = (rationales or {}).get(c.get("key", "")) + top = rationale or signals_summary(c.get("matched_signals") or []) + rows.append([ + str(i), + c.get("verdict", ""), + str(c.get("score", "")), + c.get("key", ""), + c.get("status", ""), + c.get("priority", ""), + assignee_str(c.get("assignee")), + truncate(c.get("summary", ""), 50), + truncate(top, 60), + ]) + widths = [max(len(r[i]) for r in rows) for i in range(len(COL_HEADERS))] + sep = "+".join("-" * (w + 2) for w in widths) + sep = f"+{sep}+" + for idx, row in enumerate(rows): + cells = " | ".join(c.ljust(widths[i]) for i, c in enumerate(row)) + lines.append(f"| {cells} |") + if idx == 0: + lines.append(sep) + lines.append("") + for c in cands: + if c.get("url"): + lines.append(f" {c['key']}: {c['url']}") + else: + lines.append(" (none above min-score)") + lines.append("") + + lines.append("JQL used:") + lines.append(f" {jql}") + return "\n".join(lines) + "\n" + + +def render_json( + pr: dict, + filters: dict, + jql: str, + explicit: list[dict], + cands: list[dict], + rationales: dict[str, str] | None, +) -> str: + for c in cands: + rat = (rationales or {}).get(c.get("key", "")) + if rat: + c["rationale"] = rat + + payload = { + "schema_version": "1.0", + "metadata": { + "generated_at": datetime.now(timezone.utc).isoformat(), + "command": "candidates-from-pr", + "jql": jql, + }, + "pr": { + "url": pr.get("url"), + "number": pr.get("number"), + "title": pr.get("title"), + "base_ref": pr.get("base_ref"), + "head_ref": pr.get("head_ref"), + "labels": pr.get("labels") or [], + "files_changed": len(pr.get("files") or []), + }, + "derived": { + "components": filters.get("components") or [], + "target_release": filters.get("target_release"), + "component_source": filters.get("component_source"), + "target_release_source": filters.get("target_release_source"), + }, + "explicit_references": explicit, + "candidates": cands, + } + return json.dumps(payload, indent=2) + "\n" + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument("--pr", required=True) + p.add_argument("--filters", required=True) + p.add_argument("--jql", required=True) + p.add_argument("--explicit", required=True) + p.add_argument("--candidates", required=True) + p.add_argument("--rationales", default=None) + p.add_argument("--format", choices=("text", "json"), default="text") + args = p.parse_args() + + pr = load(args.pr) + filters = load(args.filters) + explicit = load(args.explicit) or [] + cands = load(args.candidates) or [] + rationales = load(args.rationales) if args.rationales else None + + if args.format == "json": + sys.stdout.write(render_json(pr, filters, args.jql, explicit, cands, rationales)) + else: + sys.stdout.write(render_text(pr, filters, args.jql, explicit, cands, rationales)) + + +if __name__ == "__main__": + main() diff --git a/plugins/jira/skills/candidates-from-pr/scripts/score_candidates.py b/plugins/jira/skills/candidates-from-pr/scripts/score_candidates.py new file mode 100755 index 000000000..db861f66b --- /dev/null +++ b/plugins/jira/skills/candidates-from-pr/scripts/score_candidates.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python3 +"""Score candidate Jiras against PR signals. + +Reads two files and produces a ranked candidates JSON for rendering. All +scoring is local string matching; no API calls. + +Usage: + score_candidates.py \ + --signals signals.json \ + --candidates candidates.json \ + --components-derived "Networking / ovn-kubernetes,..." \ + [--min-score 40] [--limit 10] + +Inputs: + signals.json — output of extract_signals.py + candidates.json — JSON list, each element with keys: + key, summary, status, issuetype, priority, assignee, + components, fix_versions, target_release, description, + updated, url + (skill caller fills this from mcp__atlassian__jira_search) + +Output (stdout): + [ + { + "key": "...", "summary": "...", "url": "...", + "status": "...", "issuetype": "...", "priority": "...", + "assignee": "...", "components": [...], + "target_release": "...", "fix_versions": [...], + "score": 82, "verdict": "likely", + "matched_signals": [{"type": "...", "value": "..."}], + }, + ... + ] + +The skill caller is responsible for turning matched_signals into a 1-2 +sentence rationale. +""" + +from __future__ import annotations + +import argparse +import json +import sys +from datetime import datetime, timedelta, timezone + +DROPLIKE_TYPES = {"Epic", "Feature", "Initiative"} +DROP_STATUSES = {"Verified", "Closed"} + + +def load(path: str) -> object: + with open(path) as f: + return json.load(f) + + +def find_in_text(needles: list[str], text: str) -> list[str]: + """Return needles that appear verbatim in text. Case-sensitive for + symbols and error strings (they tend to be exact tokens).""" + if not needles or not text: + return [] + return [n for n in needles if n and n in text] + + +def find_lower(needles: list[str], text_lower: str) -> list[str]: + if not needles or not text_lower: + return [] + return [n for n in needles if n and n.lower() in text_lower] + + +def score_one( + cand: dict, + signals: dict, + derived_components: set[str], + component_filter_used: bool, +) -> tuple[int, list[dict]]: + matched: list[dict] = [] + + summary = cand.get("summary") or "" + description = cand.get("description") or "" + haystack = summary + "\n" + description + haystack_lower = haystack.lower() + + err_values = [s["value"] for s in signals.get("error_strings", [])] + sym_values = [s["value"] for s in signals.get("symbols", [])] + title_values = [s["value"] for s in signals.get("title_keywords", [])] + path_values = [s["value"] for s in signals.get("path_tags", [])] + commit_values = [s["value"] for s in signals.get("commit_keywords", [])] + + score = 0 + + # Error strings: +35 for any match. + err_hits = find_in_text(err_values, haystack) + if err_hits: + score += 35 + for v in err_hits[:3]: + matched.append({"type": "error_string", "value": v}) + + # Symbols: +25 per unique match, capped at +40. + sym_hits = find_in_text(sym_values, haystack) + if sym_hits: + score += min(40, 25 * len(sym_hits)) + for v in sym_hits[:5]: + matched.append({"type": "symbol", "value": v}) + + # Title keyword overlap: +15 if 2+ unique 4+ char tokens match. + title_hits = find_lower(title_values, haystack_lower) + if len(title_hits) >= 2: + score += 15 + for v in title_hits[:3]: + matched.append({"type": "title_keyword", "value": v}) + + # Component agreement: +10. Required for non-zero unless filter skipped. + cand_components = {c for c in cand.get("components") or []} + if cand_components & derived_components: + score += 10 + matched.append( + { + "type": "component_match", + "value": next(iter(cand_components & derived_components)), + } + ) + elif component_filter_used: + # JQL should have prevented this; guard anyway. + score = 0 + return score, matched + + # Recency: updated within last 90 days. + updated_raw = cand.get("updated") + if updated_raw: + try: + updated = datetime.fromisoformat(updated_raw.replace("Z", "+00:00")) + if datetime.now(timezone.utc) - updated < timedelta(days=90): + score += 5 + matched.append({"type": "recency", "value": "updated<=90d"}) + except (ValueError, AttributeError): + pass + + # Path-tag overlap: +5 capped at +10. + path_hits = find_lower(path_values, haystack_lower) + if path_hits: + score += min(10, 5 * len(path_hits)) + for v in path_hits[:3]: + matched.append({"type": "path_tag", "value": v}) + + # Commit-keyword overlap: +5 if 3+ unique commit headline tokens match. + # Useful for downstream-merge / large PRs where the diff is unavailable. + commit_hits = find_lower(commit_values, haystack_lower) + if len(commit_hits) >= 3: + score += 5 + for v in commit_hits[:3]: + matched.append({"type": "commit_keyword", "value": v}) + + # Penalties. + if cand.get("issuetype") in DROPLIKE_TYPES: + score -= 15 + if cand.get("status") in DROP_STATUSES: + return -1, matched # caller drops + if not derived_components and not (err_hits or sym_hits): + score -= 20 + + return max(0, min(100, score)), matched + + +def verdict_for(score: int, min_score: int) -> str | None: + if score >= 75: + return "likely" + if score >= 50: + return "possible" + if score >= min_score: + return "unlikely" + return None + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument("--signals", required=True) + p.add_argument("--candidates", required=True) + p.add_argument( + "--components-derived", + default="", + help="Comma-separated derived components (used for component-agreement scoring)", + ) + p.add_argument( + "--component-filter-used", + action="store_true", + help="Pass when the JQL included a component clause", + ) + p.add_argument("--min-score", type=int, default=40) + p.add_argument("--limit", type=int, default=10) + args = p.parse_args() + + signals = load(args.signals) + cands = load(args.candidates) + derived = {c.strip() for c in args.components_derived.split(",") if c.strip()} + + scored: list[dict] = [] + for cand in cands: + score, matched = score_one(cand, signals, derived, args.component_filter_used) + if score < 0: + continue + v = verdict_for(score, args.min_score) + if v is None: + continue + scored.append( + { + "key": cand.get("key"), + "summary": cand.get("summary"), + "url": cand.get("url"), + "status": cand.get("status"), + "issuetype": cand.get("issuetype"), + "priority": cand.get("priority"), + "assignee": cand.get("assignee"), + "components": cand.get("components") or [], + "target_release": cand.get("target_release"), + "fix_versions": cand.get("fix_versions") or [], + "score": score, + "verdict": v, + "matched_signals": matched, + } + ) + + scored.sort(key=lambda x: x["score"], reverse=True) + json.dump(scored[: args.limit], sys.stdout) + sys.stdout.write("\n") + + +if __name__ == "__main__": + main()