Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions agents/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,19 @@
)
from claude_agent_sdk.types import McpHttpServerConfig


# ANSI color codes for terminal output
class Colors:
BLUE = '\033[0;34m'
CYAN = '\033[0;36m'
GREEN = '\033[0;32m'
YELLOW = '\033[1;33m'
MAGENTA = '\033[0;35m'
RED = '\033[0;31m'
RESET = '\033[0m'
BOLD = '\033[1m'


# Script directory for resolving relative paths
SCRIPT_DIR = Path(__file__).parent

Expand Down Expand Up @@ -197,13 +210,18 @@ async def agent_query_fn(user_prompt: str, system_prompt: str):
)

# Print final summary
print("\n" + "=" * 60)
print(f"\n{Colors.CYAN}{Colors.BOLD}{'=' * 60}")
print("WORKFLOW SUMMARY")
print("=" * 60)
print(f"Overall Status: {result['overall_status']}")
print(f"{'=' * 60}{Colors.RESET}")

if result['overall_status'] == 'success':
print(f"{Colors.GREEN}{Colors.BOLD}Overall Status: {result['overall_status']}{Colors.RESET}")
else:
print(f"{Colors.RED}{Colors.BOLD}Overall Status: {result['overall_status']}{Colors.RESET}")

if result["errors"]:
print(f"Errors: {result['errors']}")
print("=" * 60 + "\n")
print(f"{Colors.RED}Errors: {result['errors']}{Colors.RESET}")
print(f"{Colors.CYAN}{'=' * 60}{Colors.RESET}\n")


async def _find_conference_monolithic(conference_name: str) -> None:
Expand Down
43 changes: 28 additions & 15 deletions agents/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,19 @@
from utils.yaml_utils import YAMLUtils


# ANSI color codes for terminal output
class Colors:
BLUE = '\033[0;34m'
CYAN = '\033[0;36m'
GREEN = '\033[0;32m'
YELLOW = '\033[1;33m'
MAGENTA = '\033[0;35m'
RED = '\033[0;31m'
RESET = '\033[0m'
BOLD = '\033[1m'
DIM = '\033[2m'


class Orchestrator:
"""Main orchestrator for conference deadline agent workflow.

Expand Down Expand Up @@ -98,16 +111,16 @@ async def orchestrate_conference_update(
"errors": [],
}

print(f"\n{'='*60}")
print(f"\n{Colors.CYAN}{Colors.BOLD}{'='*60}")
print(f"Starting conference update: {conference_name}")
print(f"{'='*60}\n")
print(f"{'='*60}{Colors.RESET}\n")

# Load existing data first for reference/hints
existing_data = await self._load_existing_conference_data(conference_name)
previous_url = existing_data.get("link") if existing_data else None

# Stage 1: Search for conference website
print("[Stage 1] Searching for conference website...")
print(f"{Colors.CYAN}🔍 [Stage 1] Searching for conference website...{Colors.RESET}")
search_result = await self._stage_search(
conference_name, previous_url, agent_query_fn
)
Expand All @@ -116,14 +129,14 @@ async def orchestrate_conference_update(
if search_result["status"] != "success":
result["overall_status"] = "failed"
result["errors"].append(f"Search failed: {search_result.get('search_notes')}")
print(f"❌ Search failed: {search_result.get('search_notes')}")
print(f"{Colors.RED}❌ Search failed: {search_result.get('search_notes')}{Colors.RESET}")
return result

conference_url = search_result["url"]
print(f"✓ Found conference URL: {conference_url}\n")
print(f"{Colors.GREEN}✓ Found conference URL: {conference_url}{Colors.RESET}\n")

# Stage 2: Extract data from website
print("[Stage 2] Extracting data from conference website...")
print(f"{Colors.MAGENTA}📝 [Stage 2] Extracting data from conference website...{Colors.RESET}")

extraction_result = await self._stage_extract(
conference_name,
Expand All @@ -142,13 +155,13 @@ async def orchestrate_conference_update(

extracted_data = extraction_result.get("extracted_data", {})
extracted_field_names = set(extracted_data.keys()) # Track which fields were actually extracted
print(f"✓ Extracted {len(extracted_data)} fields")
print(f"{Colors.GREEN}✓ Extracted {len(extracted_data)} fields{Colors.RESET}")
if extracted_data:
print(f" Fields: {list(extracted_data.keys())}")
print(f"{Colors.DIM} Fields: {list(extracted_data.keys())}{Colors.RESET}")
print()

# Stage 3: Validate extracted data
print("[Stage 3] Validating extracted data (strict mode)...")
print(f"{Colors.YELLOW}✓ [Stage 3] Validating extracted data (strict mode)...{Colors.RESET}")
validation_result = await self._stage_validate(
conference_name,
extracted_data,
Expand All @@ -159,9 +172,9 @@ async def orchestrate_conference_update(
result["stages"]["validation"] = validation_result

# If validation has errors, try re-extraction with explicit instructions
print(f"Validation status: {validation_result['status']}")
print(f"Validation errors: {validation_result.get('validation_errors', [])}")
print(f"Approved data: {list(validation_result.get('approved_data', {}).keys())}")
print(f"{Colors.DIM}Validation status: {validation_result['status']}{Colors.RESET}")
print(f"{Colors.DIM}Validation errors: {validation_result.get('validation_errors', [])}{Colors.RESET}")
print(f"{Colors.DIM}Approved data: {list(validation_result.get('approved_data', {}).keys())}{Colors.RESET}")

if validation_result["status"] == "invalid":
rejected_fields = validation_result.get("rejected_fields", [])
Expand Down Expand Up @@ -212,11 +225,11 @@ async def orchestrate_conference_update(
print("❌ No validated data available for update")
return result

print(f"✓ Validation passed with {len(approved_data)} approved fields")
print(f" Extracted fields: {list(extracted_fields_in_approved)}\n")
print(f"{Colors.GREEN}✓ Validation passed with {len(approved_data)} approved fields{Colors.RESET}")
print(f"{Colors.DIM} Extracted fields: {list(extracted_fields_in_approved)}{Colors.RESET}\n")

# Stage 4: Update YAML file
print("[Stage 4] Generating YAML update...")
print(f"{Colors.BLUE}📄 [Stage 4] Generating YAML update...{Colors.RESET}")
yaml_file_path = self.project_root / "src" / "data" / "conferences" / f"{conference_name}.yml"

# Determine if we should append (new year) or update (existing year)
Expand Down
14 changes: 12 additions & 2 deletions agents/skills/git_skill.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,18 @@ async def execute(
self._run_git_command(["checkout", base_branch], cwd=git_root)
self._run_git_command(["pull", "origin", base_branch], cwd=git_root)

# Step 2: Create feature branch
self._run_git_command(["checkout", "-b", branch_name], cwd=git_root)
# Step 2: Create feature branch (or switch to it if it exists)
try:
self._run_git_command(["checkout", "-b", branch_name], cwd=git_root)
except Exception as e:
# Branch might already exist, try to switch to it
if "already exists" in str(e):
print(f"[Git] Branch {branch_name} already exists, switching to it and resetting...")
self._run_git_command(["checkout", branch_name], cwd=git_root)
# Reset to base branch to start fresh
self._run_git_command(["reset", "--hard", base_branch], cwd=git_root)
else:
raise

# Step 3: Validate and fix YAML before writing
print("[Git] Validating YAML content...")
Expand Down
66 changes: 55 additions & 11 deletions agents/utils/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,15 @@ class ConferenceValidator:
}

# Required fields that must always be present
REQUIRED_FIELDS = {"title", "id", "year", "link"}
# Only truly critical fields - agent should warn but not fail on missing optional fields
REQUIRED_FIELDS = {"title", "id", "year"} # Removed 'link' - agent can work without it

# Fields that should have high confidence scores if present
# Validation will reject fields with low confidence
CONFIDENCE_REQUIRED_FIELDS = {"deadlines", "date", "start", "end", "city", "country"}

# Deadline type values (standardized types)
# This is now managed dynamically in yaml_schema.py
VALID_DEADLINE_TYPES = {
"abstract",
"paper",
Expand All @@ -60,6 +62,8 @@ class ConferenceValidator:
"notification",
"camera_ready",
"registration",
"panel", # Added
"poster", # Added
}

# Valid ERA ratings
Expand All @@ -68,6 +72,8 @@ class ConferenceValidator:
@staticmethod
def validate_required_fields(data: dict[str, Any]) -> tuple[bool, list[str]]:
"""Validate that all required fields are present.

Only fails on truly critical fields. Warns about missing optional fields.

Args:
data: Conference data dictionary.
Expand All @@ -76,6 +82,13 @@ def validate_required_fields(data: dict[str, Any]) -> tuple[bool, list[str]]:
Tuple of (is_valid, missing_fields).
"""
missing = [field for field in ConferenceValidator.REQUIRED_FIELDS if field not in data]

# Warn about recommended fields but don't fail
recommended = ["link", "date", "city", "country"]
missing_recommended = [f for f in recommended if f not in data and f not in missing]
if missing_recommended:
print(f"⚠️ Recommended fields missing (not critical): {', '.join(missing_recommended)}")

return len(missing) == 0, missing

@staticmethod
Expand Down Expand Up @@ -103,29 +116,48 @@ def validate_iso_date(date_str: str) -> bool:

@staticmethod
def validate_timezone(tz: str) -> bool:
"""Validate timezone string.
"""Validate timezone string with intelligent fallback.

Accepts any IANA-style timezone format (Region/City) instead of hardcoded list.

Args:
tz: Timezone string to validate.

Returns:
True if valid IANA timezone or "AoE", False otherwise.
True if valid IANA timezone format or "AoE", False otherwise.
"""
return tz in ConferenceValidator.VALID_TIMEZONES or re.match(
r"^[A-Za-z]+/[A-Za-z_]+$", tz
)
if not isinstance(tz, str):
return False

# Accept AoE (Anywhere on Earth)
if tz == "AoE":
return True

# Accept UTC and variants
if tz in ("UTC", "GMT"):
return True

# Accept IANA format: Region/City or Region/Subregion/City
if re.match(r"^[A-Za-z]+(/[A-Za-z_]+)+$", tz):
# Log new timezones for awareness
if tz not in ConferenceValidator.VALID_TIMEZONES:
print(f"📋 New timezone encountered: {tz} (auto-accepted)")
return True

return False

@staticmethod
def validate_deadline(deadline: dict[str, Any]) -> tuple[bool, list[str]]:
"""Validate a single deadline object.
"""Validate a single deadline object with intelligent fallbacks.

Args:
deadline: Deadline dictionary.

Returns:
Tuple of (is_valid, errors).
Tuple of (is_valid, errors). Only returns False for critical errors.
"""
errors = []
warnings = []

if not isinstance(deadline, dict):
return False, ["Deadline must be a dictionary"]
Expand All @@ -134,12 +166,17 @@ def validate_deadline(deadline: dict[str, Any]) -> tuple[bool, list[str]]:
if "type" not in deadline:
errors.append("Deadline missing 'type' field")
elif deadline["type"] not in ConferenceValidator.VALID_DEADLINE_TYPES:
errors.append(f"Invalid deadline type: {deadline['type']}")
# Try to import and use dynamic type validation
from agents.utils.yaml_schema import add_deadline_type
if add_deadline_type(deadline["type"]):
warnings.append(f"New deadline type accepted: {deadline['type']}")
else:
errors.append(f"Invalid deadline type: {deadline['type']}")

if "label" not in deadline:
errors.append("Deadline missing 'label' field")
warnings.append("Deadline missing 'label' field (recommended)")
elif not isinstance(deadline["label"], str) or not deadline["label"].strip():
errors.append("Deadline 'label' must be non-empty string")
warnings.append("Deadline 'label' should be non-empty string")

if "date" not in deadline:
errors.append("Deadline missing 'date' field")
Expand All @@ -148,6 +185,13 @@ def validate_deadline(deadline: dict[str, Any]) -> tuple[bool, list[str]]:

if "timezone" in deadline:
if not ConferenceValidator.validate_timezone(deadline["timezone"]):
warnings.append(f"Unusual timezone format: {deadline['timezone']}")

# Print warnings but don't fail
for warning in warnings:
print(f"⚠️ {warning}")

return len(errors) == 0, errors
errors.append(f"Invalid timezone in deadline: {deadline['timezone']}")
else:
errors.append("Deadline missing 'timezone' field")
Expand Down
Loading
Loading