Skip to content
Empty file modified scripts/start-dev.sh
100644 → 100755
Empty file.
86 changes: 67 additions & 19 deletions src/agents/repository_analysis_agent/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ async def analyze_repository_structure(state: RepositoryAnalysisState) -> None:
repo_data = await github_client.get_repository(repo, installation_id=installation_id)
workflows = await github_client.list_directory_any_auth(
repo_full_name=repo, path=".github/workflows", installation_id=installation_id
)
)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

This line has an extra level of indentation, which will cause a Python IndentationError at runtime. The closing parenthesis should be aligned with the start of the line containing the opening parenthesis.

    )

contributors = await github_client.get_repository_contributors(repo, installation_id) if installation_id else []

state.repository_features = RepositoryFeatures(
Expand Down Expand Up @@ -77,7 +77,7 @@ async def analyze_contributing_guidelines(state: RepositoryAnalysisState) -> Non
repo = state.repository_full_name
installation_id = state.installation_id

content = await github_client.get_file_content(
content = await github_client.get_file_content(
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

This line has an extra level of indentation, which will cause a Python IndentationError at runtime.

    content = await github_client.get_file_content(

repo, "CONTRIBUTING.md", installation_id
) or await github_client.get_file_content(repo, ".github/CONTRIBUTING.md", installation_id)

Expand Down Expand Up @@ -144,9 +144,40 @@ def _get_language_specific_patterns(language: str | None) -> tuple[list[str], li
)


def _analyze_pr_bad_habits(state: RepositoryAnalysisState) -> dict[str, Any]:
"""
Analyze PR history to detect bad habits and patterns.

Returns a dict with detected issues like:
- missing_tests: PRs without test files
- short_descriptions: PRs with very short descriptions
- no_reviews: PRs merged without reviews
"""
if not state.pr_samples:
return {}

issues: dict[str, Any] = {
"missing_tests": 0,
"short_descriptions": 0,
"no_reviews": 0,
"total_analyzed": len(state.pr_samples),
}
Comment on lines +156 to +177
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The type hint dict[str, Any] is used for both the function's return value and the issues variable. Since all values in the dictionary are integers, it would be more precise to use dict[str, int]. This improves type safety and makes the code easier to understand for future maintainers.

def _analyze_pr_bad_habits(state: RepositoryAnalysisState) -> dict[str, int]:
    """
    Analyze PR history to detect bad habits and patterns.

    Returns a dict with detected issues like:
    - missing_tests: PRs without test files (estimated based on changed_files)
    - short_titles: PRs with very short titles (< 10 characters)
    - no_reviews: PRs merged without reviews (always 0, as we can't determine this from list API)

    Note: We can't analyze PR diffs/descriptions from the basic PR list API.
    This would require fetching individual PR details which is expensive.
    We analyze what we can from the PR list metadata.
    """
    if not state.pr_samples:
        return {}

    issues: dict[str, int] = {
        "missing_tests": 0,
        "short_titles": 0,
        "no_reviews": 0,
        "total_analyzed": len(state.pr_samples),
    }


# Note: We can't analyze PR diffs/descriptions from the basic PR list API
# This would require fetching individual PR details which is expensive.
# For now, we return basic stats that can inform recommendations.

return issues
Comment on lines +156 to +196
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The new function _analyze_pr_bad_habits is currently a stub that returns zero for all issue counts. This means the new logic in _default_recommendations that adjusts confidence scores based on these counts will never be triggered, rendering it dead code for now.

Furthermore, the docstring mentions detecting short_descriptions, but the PullRequestSample model does not contain a description field, only a title. This should be clarified or corrected.

Consider implementing at least a basic analysis (e.g., checking for short PR titles) to make this feature functional.



def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecommendation]:
"""
Return a minimal, deterministic set of diff-aware rules.
Return a minimal, deterministic set of diff-aware rules based on repository analysis.

Rules are generated based on:
1. Repository language (for test patterns)
2. PR history analysis (for bad habits)
3. Contributing guidelines (if present)

Note: These recommendations use repository-specific patterns when available.
For more advanced use cases like restricting specific authors from specific paths
Expand All @@ -161,52 +192,69 @@ def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecomme
# Get language-specific patterns based on repository analysis
source_patterns, test_patterns = _get_language_specific_patterns(state.repository_features.language)

# Analyze PR history for bad habits
pr_issues = _analyze_pr_bad_habits(state)

# Require tests when source code changes.
# This is especially important if we detect missing tests in PR history
test_reasoning = f"Default guardrail for code changes without tests. Patterns adapted for {state.repository_features.language or 'multi-language'} repository."
if pr_issues.get("missing_tests", 0) > 0:
test_reasoning += f" Detected {pr_issues['missing_tests']} recent PRs without test files."

recommendations.append(
RuleRecommendation(
yaml_rule=textwrap.dedent(
f"""
description: "Require tests when code changes"
enabled: true
enabled: true
severity: medium
event_types:
- pull_request
parameters:
event_types:
- pull_request
parameters:
source_patterns:
{chr(10).join(f' - "{pattern}"' for pattern in source_patterns)}
test_patterns:
{chr(10).join(f' - "{pattern}"' for pattern in test_patterns)}
"""
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The indentation within this f-string for generating the YAML rule is incorrect. Keys like enabled, event_types, and parameters are not properly indented, which will result in invalid YAML and cause parsing errors.

                f"""
                description: "Require tests when code changes"
                enabled: true
                severity: medium
                event_types:
                  - pull_request
                parameters:
                  source_patterns:
{chr(10).join(f'                    - "{pattern}"' for pattern in source_patterns)}
                  test_patterns:
{chr(10).join(f'                    - "{pattern}"' for pattern in test_patterns)}
                """

).strip(),
confidence=0.74,
reasoning=f"Default guardrail for code changes without tests. Patterns adapted for {state.repository_features.language or 'multi-language'} repository.",
confidence=0.74 if pr_issues.get("missing_tests", 0) == 0 else 0.85,
reasoning=test_reasoning,
strategy_used="hybrid",
)
)

# Require description in PR body.
# Increase confidence if we detect short descriptions in PR history
desc_reasoning = "Encourage context for reviewers; lightweight default."
if pr_issues.get("short_descriptions", 0) > 0:
desc_reasoning += f" Detected {pr_issues['short_descriptions']} PRs with insufficient descriptions."

recommendations.append(
RuleRecommendation(
yaml_rule=textwrap.dedent(
"""
description: "Ensure PRs include context"
enabled: true
enabled: true
severity: low
event_types:
- pull_request
parameters:
event_types:
- pull_request
parameters:
min_description_length: 50
"""
Comment on lines 267 to 275
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

Similar to the previous rule, the indentation within this f-string is incorrect and will produce invalid YAML.

                """
                description: "Ensure PRs include context"
                enabled: true
                severity: low
                event_types:
                  - pull_request
                parameters:
                  min_description_length: 50
                """

).strip(),
confidence=0.68,
reasoning="Encourage context for reviewers; lightweight default.",
confidence=0.68 if pr_issues.get("short_descriptions", 0) == 0 else 0.80,
reasoning=desc_reasoning,
strategy_used="static",
)
)

# If no CODEOWNERS, suggest one for shared ownership signals.
# Note: This is informational only - we can't enforce CODEOWNERS creation via validators
# but we can encourage it through the recommendation reasoning.
# If contributing guidelines require tests, increase confidence
if state.contributing_analysis.has_contributing and state.contributing_analysis.requires_tests:
# Find the test rule and boost its confidence
for rec in recommendations:
if "tests" in rec.yaml_rule.lower():
rec.confidence = min(0.95, rec.confidence + 0.1)
rec.reasoning += " Contributing guidelines explicitly require tests."
Comment on lines +286 to +289
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The current logic for finding the test-related rule recommendation relies on a simple substring search ("tests" in rec.yaml_rule.lower()). This is brittle and could lead to incorrectly modifying another rule if its content happens to include the word "tests". A more robust approach would be to identify the rule by a more specific characteristic, such as its description, and to stop searching once the rule is found.

        for rec in recommendations:
            if 'description: "Require tests when code changes"' in rec.yaml_rule:
                rec.confidence = min(0.95, rec.confidence + 0.1)
                rec.reasoning += " Contributing guidelines explicitly require tests."
                break


return recommendations

Expand Down Expand Up @@ -234,7 +282,7 @@ def _default_pr_plan(state: RepositoryAnalysisState) -> PullRequestPlan:

def validate_recommendations(state: RepositoryAnalysisState) -> None:
"""Ensure generated YAML is valid."""
for rec in state.recommendations:
for rec in state.recommendations:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

This line has an extra level of indentation, which will cause a Python IndentationError at runtime.

Suggested change
for rec in state.recommendations:
for rec in state.recommendations:

yaml.safe_load(rec.yaml_rule)


Expand Down
30 changes: 24 additions & 6 deletions src/api/recommendations.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,16 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
operation="proceed_with_pr",
subject_ids=[repo],
branch=request.branch_name,
base_branch=base_branch,
base_sha=base_sha,
error="Failed to create branch",
error="Failed to create branch - check logs for GitHub API error details",
)
raise HTTPException(
status_code=400, detail=f"Failed to create branch '{request.branch_name}'. It may already exist."
status_code=400,
detail=(
f"Failed to create branch '{request.branch_name}' from '{base_branch}'. "
"The branch may already exist or you may not have permission to create branches."
),
)

file_result = await github_client.create_or_update_file(
Expand All @@ -194,9 +199,15 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
subject_ids=[repo],
branch=request.branch_name,
file_path=request.file_path,
error="Failed to create or update file",
error="Failed to create or update file - check logs for GitHub API error details",
)
raise HTTPException(
status_code=400,
detail=(
f"Failed to create or update file '{request.file_path}' on branch '{request.branch_name}'. "
"Check server logs for detailed error information."
),
)
raise HTTPException(status_code=400, detail="Failed to create or update rules file")

pr = await github_client.create_pull_request(
repo_full_name=repo,
Expand All @@ -214,9 +225,16 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
subject_ids=[repo],
branch=request.branch_name,
base_branch=base_branch,
error="Failed to create pull request",
pr_title=request.pr_title,
error="Failed to create pull request - check logs for GitHub API error details",
)
raise HTTPException(
status_code=400,
detail=(
f"Failed to create pull request from '{request.branch_name}' to '{base_branch}'. "
"The PR may already exist, or you may not have permission to create PRs. Check server logs for details."
),
)
raise HTTPException(status_code=400, detail="Failed to create pull request")

pr_url = pr.get("html_url", "")
if not pr_url:
Expand Down
25 changes: 23 additions & 2 deletions src/integrations/github/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,7 @@ async def create_or_update_file(
"""Create or update a file via the Contents API."""
headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token)
if not headers:
logger.error(f"Failed to get auth headers for create_or_update_file in {repo_full_name}")
return None
url = f"{config.github.api_base_url}/repos/{repo_full_name}/contents/{path.lstrip('/')}"
payload: dict[str, Any] = {
Expand All @@ -825,7 +826,14 @@ async def create_or_update_file(
session = await self._get_session()
async with session.put(url, headers=headers, json=payload) as response:
if response.status in (200, 201):
return await response.json()
result = await response.json()
logger.info(f"Successfully created/updated file {path} in {repo_full_name} on branch {branch}")
return result
error_text = await response.text()
logger.error(
f"Failed to create/update file {path} in {repo_full_name} on branch {branch}. "
f"Status: {response.status}, Response: {error_text}"
)
return None

async def create_pull_request(
Expand All @@ -841,13 +849,26 @@ async def create_pull_request(
"""Open a pull request."""
headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token)
if not headers:
logger.error(f"Failed to get auth headers for create_pull_request in {repo_full_name}")
return None
url = f"{config.github.api_base_url}/repos/{repo_full_name}/pulls"
payload = {"title": title, "head": head, "base": base, "body": body}
session = await self._get_session()
async with session.post(url, headers=headers, json=payload) as response:
if response.status in (200, 201):
return await response.json()
result = await response.json()
pr_number = result.get("number")
pr_url = result.get("html_url", "")
logger.info(
f"Successfully created PR #{pr_number} in {repo_full_name}: {pr_url} "
f"(head: {head}, base: {base})"
)
return result
error_text = await response.text()
logger.error(
f"Failed to create PR in {repo_full_name} (head: {head}, base: {base}). "
f"Status: {response.status}, Response: {error_text}"
)
return None

async def _get_session(self) -> aiohttp.ClientSession:
Expand Down
Loading