Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions gittensor/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
# =============================================================================
# Repository & PR Scoring
# =============================================================================
PR_LOOKBACK_DAYS = 35 # rolling window for scoring
PR_LOOKBACK_DAYS = 30 # rolling window for scoring (per-repo default, overridable in the scoring config)
MERGED_PR_BASE_SCORE = 25
MIN_TOKEN_SCORE_FOR_BASE_SCORE = 5 # PRs below this get 0 base score
MAX_CONTRIBUTION_BONUS = 25
Expand Down Expand Up @@ -129,13 +129,9 @@
# =============================================================================
# Eligibility gate — per-repo defaults, overridable in master_repositories.json.
MIN_VALID_SOLVED_ISSUES = 3 # minimum solved issues where solving PR has token_score >= MIN_TOKEN_SCORE_FOR_VALID_ISSUE
MIN_ISSUE_CREDIBILITY = 0.70 # minimum issue credibility ratio
MIN_ISSUE_CREDIBILITY = 0.80 # minimum issue credibility ratio
MIN_TOKEN_SCORE_FOR_VALID_ISSUE = 5 # solving-PR token_score for a solved issue to count as "valid"

# Review quality cliff model (different from OSS: has clean bonus + steeper penalty)
ISSUE_REVIEW_CLEAN_BONUS = 1.1 # multiplier when 0 CHANGES_REQUESTED rounds
ISSUE_REVIEW_PENALTY_RATE = 0.15 # per CHANGES_REQUESTED round after cliff

# Open issue spam threshold (per-repo: counts a repo's own open issues)
OPEN_ISSUE_SPAM_BASE_THRESHOLD = 2
OPEN_ISSUE_SPAM_TOKEN_SCORE_PER_SLOT = 300.0 # +1 allowed open issue per this much token score
Expand Down
5 changes: 0 additions & 5 deletions gittensor/utils/github_api_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import time
from dataclasses import dataclass
from enum import Enum
from math import ceil
from typing import Any, Dict, List, Optional

import bittensor as bt
Expand All @@ -12,14 +11,10 @@
from gittensor.constants import (
BASE_GITHUB_API_URL,
GITHUB_HTTP_TIMEOUT_SECONDS,
REVIEW_PENALTY_RATE,
)
from gittensor.utils.models import PRInfo
from gittensor.utils.utils import backoff_seconds

# Beyond this many CHANGES_REQUESTED reviews the quality multiplier is already 0
_MAX_CHANGES_REQUESTED_REVIEWS = ceil(1 / REVIEW_PENALTY_RATE)


class GitHubIdentityStatus(Enum):
VALID = 'VALID'
Expand Down
76 changes: 53 additions & 23 deletions gittensor/utils/mirror/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import time
from datetime import datetime, timezone
from typing import Optional
from typing import Dict, Optional

import bittensor as bt
import requests
Expand Down Expand Up @@ -64,35 +64,40 @@ def __exit__(self, exc_type, exc, tb) -> None:
def get_miner_pulls(
self,
github_id: str,
since: Optional[datetime] = None,
since_by_repo: Optional[Dict[str, datetime]] = None,
) -> MirrorPullRequestsResponse:
"""Fetch every tracked PR authored by ``github_id`` since the given
datetime. If ``since`` is omitted the mirror defaults to 35 days back.
Response contains all mirror-tracked repos; caller must filter to the
scoring config's registered subset.
"""Fetch tracked PRs authored by ``github_id``.

With ``since_by_repo`` (repo full name -> cutoff datetime), POSTs the
per-repo window map; the response is restricted to those repos, each
windowed to its own cutoff. Without it, GETs the mirror's default
window across all tracked repos.
"""
path = f'/api/v1/miners/{github_id}/pulls'
params = {'since': since.astimezone(timezone.utc).isoformat()} if since else None
data = self._get(path, params=params)
data = self._fetch_windowed(path, since_by_repo)
try:
return MirrorPullRequestsResponse.from_dict(data)
except Exception as e:
raise MirrorRequestError(f'Mirror GET {path} returned invalid mirror response: {e}') from e
raise MirrorRequestError(f'Mirror response from {path} was invalid: {e}') from e

def get_miner_issues(
self,
github_id: str,
since: Optional[datetime] = None,
since_by_repo: Optional[Dict[str, datetime]] = None,
) -> MirrorIssuesResponse:
"""Fetch issues authored by ``github_id`` since the given datetime,
each with an inline ``solving_pr`` when ``solved_by_pr`` is populated."""
"""Fetch issues authored by ``github_id``, each with an inline
``solving_pr`` when ``solved_by_pr`` is populated.

With ``since_by_repo``, POSTs the per-repo window map (the scoring
window). Without it, GETs all currently-open issues unbounded — the
open-issue-count path.
"""
path = f'/api/v1/miners/{github_id}/issues'
params = {'since': since.astimezone(timezone.utc).isoformat()} if since else None
data = self._get(path, params=params)
data = self._fetch_windowed(path, since_by_repo)
try:
return MirrorIssuesResponse.from_dict(data)
except Exception as e:
raise MirrorRequestError(f'Mirror GET {path} returned invalid mirror response: {e}') from e
raise MirrorRequestError(f'Mirror response from {path} was invalid: {e}') from e

def get_pr_files(
self,
Expand All @@ -109,7 +114,7 @@ def get_pr_files(
try:
return MirrorPullRequestFilesResponse.from_dict(data)
except Exception as e:
raise MirrorRequestError(f'Mirror GET {path} returned invalid mirror response: {e}') from e
raise MirrorRequestError(f'Mirror response from {path} was invalid: {e}') from e

def get_repo_maintainers(self, repo_full_name: str) -> MirrorRepoMaintainersResponse:
"""Fetch users whose latest known GitHub association for
Expand All @@ -123,21 +128,46 @@ def get_repo_maintainers(self, repo_full_name: str) -> MirrorRepoMaintainersResp
try:
return MirrorRepoMaintainersResponse.from_dict(data)
except Exception as e:
raise MirrorRequestError(f'Mirror GET {path} returned invalid mirror response: {e}') from e
raise MirrorRequestError(f'Mirror response from {path} was invalid: {e}') from e

def _fetch_windowed(self, path: str, since_by_repo: Optional[Dict[str, datetime]]) -> dict:
"""POST a per-repo ``since`` map when one is given, else GET the
mirror's default window."""
if since_by_repo:
body = {
'since_by_repo': {repo: dt.astimezone(timezone.utc).isoformat() for repo, dt in since_by_repo.items()}
}
return self._post(path, body)
return self._get(path)

def _get(self, path: str, params: Optional[dict] = None) -> dict:
return self._request('GET', path, params=params)

def _post(self, path: str, json_body: dict) -> dict:
return self._request('POST', path, json_body=json_body)

def _request(
self,
method: str,
path: str,
params: Optional[dict] = None,
json_body: Optional[dict] = None,
) -> dict:
url = f'{self.base_url}{path}'
last_error: Optional[str] = None

for attempt in range(self.max_attempts):
try:
response = self.session.get(url, params=params, timeout=self.timeout)
if method == 'POST':
response = self.session.post(url, json=json_body, timeout=self.timeout)
else:
response = self.session.get(url, params=params, timeout=self.timeout)
except requests.RequestException as e:
last_error = f'request exception: {e}'
if attempt < self.max_attempts - 1:
backoff = backoff_seconds(attempt)
bt.logging.warning(
f'Mirror GET {path} raised {e} '
f'Mirror {method} {path} raised {e} '
f'(attempt {attempt + 1}/{self.max_attempts}), retrying in {backoff}s...'
)
time.sleep(backoff)
Expand All @@ -151,7 +181,7 @@ def _get(self, path: str, params: Optional[dict] = None) -> dict:
if attempt < self.max_attempts - 1:
backoff = backoff_seconds(attempt)
bt.logging.warning(
f'Mirror GET {path} failed ({last_error}) '
f'Mirror {method} {path} failed ({last_error}) '
f'(attempt {attempt + 1}/{self.max_attempts}), retrying in {backoff}s...'
)
time.sleep(backoff)
Expand All @@ -160,16 +190,16 @@ def _get(self, path: str, params: Optional[dict] = None) -> dict:
# 4xx except 429 are not retryable — fail fast so callers see the real error.
if 400 <= response.status_code < 500 and response.status_code != 429:
raise MirrorRequestError(
f'Mirror GET {path} returned {response.status_code}: {_body_preview(response)}'
f'Mirror {method} {path} returned {response.status_code}: {_body_preview(response)}'
)

last_error = f'status {response.status_code}: {_body_preview(response)}'
if attempt < self.max_attempts - 1:
backoff = backoff_seconds(attempt)
bt.logging.warning(
f'Mirror GET {path} failed ({last_error}) '
f'Mirror {method} {path} failed ({last_error}) '
f'(attempt {attempt + 1}/{self.max_attempts}), retrying in {backoff}s...'
)
time.sleep(backoff)

raise MirrorRequestError(f'Mirror GET {path} failed after {self.max_attempts} attempts: {last_error}')
raise MirrorRequestError(f'Mirror {method} {path} failed after {self.max_attempts} attempts: {last_error}')
25 changes: 19 additions & 6 deletions gittensor/validator/issue_discovery/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
from gittensor.constants import (
MAINTAINER_ASSOCIATIONS,
MIN_TOKEN_SCORE_FOR_BASE_SCORE,
PR_LOOKBACK_DAYS,
)
from gittensor.utils.mirror.client import MirrorClient, MirrorRequestError
from gittensor.utils.mirror.models import MirrorIssue, MirrorSolvingPR
Expand All @@ -63,6 +62,7 @@
RepositoryConfig,
TokenConfig,
resolve_eligibility,
resolve_scoring,
)


Expand Down Expand Up @@ -137,7 +137,7 @@ async def run_issue_discovery(

For each miner, fetches their authored issues via the mirror and classifies
each. Issues in repos not present in ``mirror_repos`` are filtered out
client-side (mirror returns all tracked repos; the master list may be narrower).
client-side.

Depends on OSS scoring (``score_miner_prs``) having already run for
this cycle — the cross-miner solving-PR cache is built by walking every
Expand All @@ -153,7 +153,12 @@ async def run_issue_discovery(
return

client = client or MirrorClient()
lookback_date = datetime.now(timezone.utc) - timedelta(days=PR_LOOKBACK_DAYS)
now = datetime.now(timezone.utc)
# Each repo is windowed by its own pr_lookback_days; the mirror applies the
# per-repo cutoffs server-side for the scoring fetch.
since_by_repo = {
name: now - timedelta(days=resolve_scoring(rc.scoring).pr_lookback_days) for name, rc in mirror_repos.items()
}
enabled_names: Set[str] = set(mirror_repos.keys())

solving_pr_cache: Dict[Tuple[str, int], CachedSolvingPR] = _build_solving_pr_cache(miner_evaluations)
Expand Down Expand Up @@ -181,7 +186,9 @@ async def run_issue_discovery(
continue

try:
response = await asyncio.to_thread(client.get_miner_issues, evaluation.github_id, since=lookback_date)
response = await asyncio.to_thread(
client.get_miner_issues, evaluation.github_id, since_by_repo=since_by_repo
)
except MirrorRequestError as e:
bt.logging.warning(f'├─ UID {uid}: issue fetch failed ({e}) — skipped this miner')
_restore_issue_discovery_from_cache(evaluation, evaluation_cache)
Expand Down Expand Up @@ -733,10 +740,16 @@ def _mirror_issue_for_scoring(
body_or_title_edited_at=None,
)

scoring_cfg = resolve_scoring(repo_config.scoring)
adapted.discovery_base_score = base_score
adapted.discovery_time_decay_multiplier = round(calculate_time_decay(solving_pr.merged_at), 2)
adapted.discovery_time_decay_multiplier = round(
calculate_time_decay(solving_pr.merged_at, scoring_cfg.time_decay), 2
)
adapted.discovery_review_quality_multiplier = round(
calculate_issue_review_quality_multiplier(solving_pr.review_summary.maintainer_changes_requested_count),
calculate_issue_review_quality_multiplier(
solving_pr.review_summary.maintainer_changes_requested_count,
scoring_cfg.review_penalty_rate,
),
2,
)

Expand Down
17 changes: 4 additions & 13 deletions gittensor/validator/issue_discovery/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,18 @@

import bittensor as bt

from gittensor.constants import (
ISSUE_REVIEW_CLEAN_BONUS,
ISSUE_REVIEW_PENALTY_RATE,
)

if TYPE_CHECKING:
from gittensor.validator.utils.load_weights import ResolvedEligibility


def calculate_issue_review_quality_multiplier(changes_requested_count: int) -> float:
"""Cliff model: clean bonus when 0 changes requested, then linear penalty.
def calculate_issue_review_quality_multiplier(changes_requested_count: int, review_penalty_rate: float) -> float:
"""Linear penalty on the solving PR's maintainer CHANGES_REQUESTED rounds.

0 rounds → 1.1 (clean bonus)
0 rounds → 1.0
1 round → 0.85
2 rounds → 0.70
7+ rounds → 0.0
"""
if changes_requested_count == 0:
multiplier = ISSUE_REVIEW_CLEAN_BONUS
else:
multiplier = max(0.0, 1.0 - ISSUE_REVIEW_PENALTY_RATE * changes_requested_count)
multiplier = max(0.0, 1.0 - review_penalty_rate * changes_requested_count)
bt.logging.info(
f'{changes_requested_count} solving-PR CHANGES_REQUESTED review(s) → '
f'issue_review_quality_multiplier={multiplier:.2f}'
Expand Down
39 changes: 22 additions & 17 deletions gittensor/validator/oss_contributions/mirror/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

The mirror returns one bundle per PR (with all scoring inputs inlined), so
loading is a single HTTP call regardless of how many repos the miner has
touched.
touched. The call sends each repo's ``pr_lookback_days`` window, so the mirror
applies the per-repo time cutoffs server-side and returns only in-window PRs.

Filtering applied at load time:
- Repo not in master_repositories: dropped (mirror returns all tracked repos).
- Repo not in master_repositories: dropped (defensive — the per-repo request
already scopes the response to the registered repos).
- PR author is a maintainer (OWNER/MEMBER/COLLABORATOR): silently dropped.
- CLOSED PRs created before the lookback window: dropped — closing an old PR
shouldn't trigger a fresh credibility penalty.
- MERGED PRs that fail ``_should_skip_merged_mirror_pr`` (base_ref, head_ref,
self-merge w/o approval, etc.): dropped. Applied at LOAD time so the
merged_count used by ``check_eligibility`` isn't inflated by ineligible PRs.
Expand All @@ -21,12 +21,12 @@
import bittensor as bt

from gittensor.classes import MinerEvaluation
from gittensor.constants import MAINTAINER_ASSOCIATIONS, PR_LOOKBACK_DAYS
from gittensor.constants import MAINTAINER_ASSOCIATIONS
from gittensor.utils.mirror.client import MirrorClient, MirrorRequestError
from gittensor.utils.mirror.models import MirrorPullRequest
from gittensor.validator.oss_contributions.mirror.scored_pr import ScoredPR
from gittensor.validator.oss_contributions.mirror.scoring import _should_skip_merged_mirror_pr
from gittensor.validator.utils.load_weights import RepositoryConfig
from gittensor.validator.utils.load_weights import RepositoryConfig, resolve_scoring


def load_miner_prs(
Expand Down Expand Up @@ -56,10 +56,16 @@ def load_miner_prs(
return

client = client or MirrorClient()
lookback_date = datetime.now(timezone.utc) - timedelta(days=PR_LOOKBACK_DAYS)
now = datetime.now(timezone.utc)
# Each repo is windowed by its own pr_lookback_days; the mirror applies the
# per-repo cutoffs server-side and returns only in-window PRs.
since_by_repo = {
name: now - timedelta(days=resolve_scoring(rc.scoring).pr_lookback_days)
for name, rc in master_repositories.items()
}

try:
response = client.get_miner_pulls(eval_.github_id, since=lookback_date)
response = client.get_miner_pulls(eval_.github_id, since_by_repo=since_by_repo)
except MirrorRequestError as e:
bt.logging.error(f'PR fetch failed for UID {eval_.uid}: {e}')
eval_.mirror_pr_fetch_failed = True
Expand All @@ -68,7 +74,7 @@ def load_miner_prs(

for pr in response.pull_requests:
try:
_maybe_add_pr(eval_, pr, master_repositories, lookback_date)
_maybe_add_pr(eval_, pr, master_repositories)
except Exception as e:
bt.logging.warning(f'Error processing PR #{pr.pr_number} ({pr.repo_full_name}): {e}')

Expand All @@ -81,14 +87,17 @@ def _maybe_add_pr(
eval_: MinerEvaluation,
pr: MirrorPullRequest,
master_repositories: Dict[str, RepositoryConfig],
lookback_date: datetime,
) -> None:
"""Apply load-time filters and bucket pr by state if it passes."""
"""Apply load-time filters and bucket pr by state if it passes.

Time-windowing (each repo's ``pr_lookback_days``) is applied by the mirror,
so every PR here is already inside its repo's window.
"""

repo_config = master_repositories.get(pr.repo_full_name)
if repo_config is None:
# Mirror tracks more repos than the scoring set; skip-noise dominates the
# log at info level when master_repositories is small. Demoted to debug.
# Defensive: the per-repo request already scopes the response, but a
# stray repo would otherwise have no config to score against.
bt.logging.debug(f'Skipping PR #{pr.pr_number} in {pr.repo_full_name} - not in master_repositories')
return

Expand All @@ -100,10 +109,6 @@ def _maybe_add_pr(
if pr.state == 'OPEN':
eval_.open_prs.append(ScoredPR(pr=pr))
elif pr.state == 'CLOSED':
# Skip stale CLOSED PRs created before the lookback window — closing an
# old PR shouldn't trigger a fresh credibility penalty.
if pr.created_at < lookback_date:
return
eval_.closed_prs.append(ScoredPR(pr=pr))
elif pr.state == 'MERGED':
# Apply the merge-eligibility gate at LOAD time so the merged_count used
Expand Down
Loading
Loading