-
Notifications
You must be signed in to change notification settings - Fork 26
feat: add repository analysis with automated PR creation #33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
abb38ae
5cdf7db
b06500b
24c9b3a
4a3bdd4
8932233
12ba5eb
513b5bd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,7 +34,7 @@ async def analyze_repository_structure(state: RepositoryAnalysisState) -> None: | |
| repo_data = await github_client.get_repository(repo, installation_id=installation_id) | ||
| workflows = await github_client.list_directory_any_auth( | ||
| repo_full_name=repo, path=".github/workflows", installation_id=installation_id | ||
| ) | ||
| ) | ||
| contributors = await github_client.get_repository_contributors(repo, installation_id) if installation_id else [] | ||
|
|
||
| state.repository_features = RepositoryFeatures( | ||
|
|
@@ -77,7 +77,7 @@ async def analyze_contributing_guidelines(state: RepositoryAnalysisState) -> Non | |
| repo = state.repository_full_name | ||
| installation_id = state.installation_id | ||
|
|
||
| content = await github_client.get_file_content( | ||
| content = await github_client.get_file_content( | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| repo, "CONTRIBUTING.md", installation_id | ||
| ) or await github_client.get_file_content(repo, ".github/CONTRIBUTING.md", installation_id) | ||
|
|
||
|
|
@@ -144,9 +144,40 @@ def _get_language_specific_patterns(language: str | None) -> tuple[list[str], li | |
| ) | ||
|
|
||
|
|
||
| def _analyze_pr_bad_habits(state: RepositoryAnalysisState) -> dict[str, Any]: | ||
| """ | ||
| Analyze PR history to detect bad habits and patterns. | ||
|
|
||
| Returns a dict with detected issues like: | ||
| - missing_tests: PRs without test files | ||
| - short_descriptions: PRs with very short descriptions | ||
| - no_reviews: PRs merged without reviews | ||
| """ | ||
| if not state.pr_samples: | ||
| return {} | ||
|
|
||
| issues: dict[str, Any] = { | ||
| "missing_tests": 0, | ||
| "short_descriptions": 0, | ||
| "no_reviews": 0, | ||
| "total_analyzed": len(state.pr_samples), | ||
| } | ||
|
Comment on lines
+156
to
+177
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The type hint def _analyze_pr_bad_habits(state: RepositoryAnalysisState) -> dict[str, int]:
"""
Analyze PR history to detect bad habits and patterns.
Returns a dict with detected issues like:
- missing_tests: PRs without test files (estimated based on changed_files)
- short_titles: PRs with very short titles (< 10 characters)
- no_reviews: PRs merged without reviews (always 0, as we can't determine this from list API)
Note: We can't analyze PR diffs/descriptions from the basic PR list API.
This would require fetching individual PR details which is expensive.
We analyze what we can from the PR list metadata.
"""
if not state.pr_samples:
return {}
issues: dict[str, int] = {
"missing_tests": 0,
"short_titles": 0,
"no_reviews": 0,
"total_analyzed": len(state.pr_samples),
} |
||
|
|
||
| # Note: We can't analyze PR diffs/descriptions from the basic PR list API | ||
| # This would require fetching individual PR details which is expensive. | ||
| # For now, we return basic stats that can inform recommendations. | ||
|
|
||
| return issues | ||
|
Comment on lines
+156
to
+196
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The new function Furthermore, the docstring mentions detecting Consider implementing at least a basic analysis (e.g., checking for short PR titles) to make this feature functional. |
||
|
|
||
|
|
||
| def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecommendation]: | ||
| """ | ||
| Return a minimal, deterministic set of diff-aware rules. | ||
| Return a minimal, deterministic set of diff-aware rules based on repository analysis. | ||
|
|
||
| Rules are generated based on: | ||
| 1. Repository language (for test patterns) | ||
| 2. PR history analysis (for bad habits) | ||
| 3. Contributing guidelines (if present) | ||
|
|
||
| Note: These recommendations use repository-specific patterns when available. | ||
| For more advanced use cases like restricting specific authors from specific paths | ||
|
|
@@ -161,52 +192,69 @@ def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecomme | |
| # Get language-specific patterns based on repository analysis | ||
| source_patterns, test_patterns = _get_language_specific_patterns(state.repository_features.language) | ||
|
|
||
| # Analyze PR history for bad habits | ||
| pr_issues = _analyze_pr_bad_habits(state) | ||
|
|
||
| # Require tests when source code changes. | ||
| # This is especially important if we detect missing tests in PR history | ||
| test_reasoning = f"Default guardrail for code changes without tests. Patterns adapted for {state.repository_features.language or 'multi-language'} repository." | ||
| if pr_issues.get("missing_tests", 0) > 0: | ||
| test_reasoning += f" Detected {pr_issues['missing_tests']} recent PRs without test files." | ||
|
|
||
| recommendations.append( | ||
| RuleRecommendation( | ||
| yaml_rule=textwrap.dedent( | ||
| f""" | ||
| description: "Require tests when code changes" | ||
| enabled: true | ||
| enabled: true | ||
| severity: medium | ||
| event_types: | ||
| - pull_request | ||
| parameters: | ||
| event_types: | ||
| - pull_request | ||
| parameters: | ||
| source_patterns: | ||
| {chr(10).join(f' - "{pattern}"' for pattern in source_patterns)} | ||
| test_patterns: | ||
| {chr(10).join(f' - "{pattern}"' for pattern in test_patterns)} | ||
| """ | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The indentation within this f-string for generating the YAML rule is incorrect. Keys like f"""
description: "Require tests when code changes"
enabled: true
severity: medium
event_types:
- pull_request
parameters:
source_patterns:
{chr(10).join(f' - "{pattern}"' for pattern in source_patterns)}
test_patterns:
{chr(10).join(f' - "{pattern}"' for pattern in test_patterns)}
""" |
||
| ).strip(), | ||
| confidence=0.74, | ||
| reasoning=f"Default guardrail for code changes without tests. Patterns adapted for {state.repository_features.language or 'multi-language'} repository.", | ||
| confidence=0.74 if pr_issues.get("missing_tests", 0) == 0 else 0.85, | ||
| reasoning=test_reasoning, | ||
| strategy_used="hybrid", | ||
| ) | ||
| ) | ||
|
|
||
| # Require description in PR body. | ||
| # Increase confidence if we detect short descriptions in PR history | ||
| desc_reasoning = "Encourage context for reviewers; lightweight default." | ||
| if pr_issues.get("short_descriptions", 0) > 0: | ||
| desc_reasoning += f" Detected {pr_issues['short_descriptions']} PRs with insufficient descriptions." | ||
|
|
||
| recommendations.append( | ||
| RuleRecommendation( | ||
| yaml_rule=textwrap.dedent( | ||
| """ | ||
| description: "Ensure PRs include context" | ||
| enabled: true | ||
| enabled: true | ||
| severity: low | ||
| event_types: | ||
| - pull_request | ||
| parameters: | ||
| event_types: | ||
| - pull_request | ||
| parameters: | ||
| min_description_length: 50 | ||
| """ | ||
|
Comment on lines
267
to
275
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| ).strip(), | ||
| confidence=0.68, | ||
| reasoning="Encourage context for reviewers; lightweight default.", | ||
| confidence=0.68 if pr_issues.get("short_descriptions", 0) == 0 else 0.80, | ||
| reasoning=desc_reasoning, | ||
| strategy_used="static", | ||
| ) | ||
| ) | ||
|
|
||
| # If no CODEOWNERS, suggest one for shared ownership signals. | ||
| # Note: This is informational only - we can't enforce CODEOWNERS creation via validators | ||
| # but we can encourage it through the recommendation reasoning. | ||
| # If contributing guidelines require tests, increase confidence | ||
| if state.contributing_analysis.has_contributing and state.contributing_analysis.requires_tests: | ||
| # Find the test rule and boost its confidence | ||
| for rec in recommendations: | ||
| if "tests" in rec.yaml_rule.lower(): | ||
| rec.confidence = min(0.95, rec.confidence + 0.1) | ||
| rec.reasoning += " Contributing guidelines explicitly require tests." | ||
|
Comment on lines
+286
to
+289
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current logic for finding the test-related rule recommendation relies on a simple substring search ( for rec in recommendations:
if 'description: "Require tests when code changes"' in rec.yaml_rule:
rec.confidence = min(0.95, rec.confidence + 0.1)
rec.reasoning += " Contributing guidelines explicitly require tests."
break |
||
|
|
||
| return recommendations | ||
|
|
||
|
|
@@ -234,7 +282,7 @@ def _default_pr_plan(state: RepositoryAnalysisState) -> PullRequestPlan: | |
|
|
||
| def validate_recommendations(state: RepositoryAnalysisState) -> None: | ||
| """Ensure generated YAML is valid.""" | ||
| for rec in state.recommendations: | ||
| for rec in state.recommendations: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| yaml.safe_load(rec.yaml_rule) | ||
|
|
||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This line has an extra level of indentation, which will cause a Python
IndentationErrorat runtime. The closing parenthesis should be aligned with the start of the line containing the opening parenthesis.