Added optional tags field and filtering support

thomas-bartlett · thomas-bartlett · commit 0850522ce877 · 2025-11-17T13:41:58.000-05:00
diff --git a/sources/core/codeguard-0-api-web-services.md b/sources/core/codeguard-0-api-web-services.md
@@ -12,6 +12,10 @@ languages:
 - typescript
 - xml
 - yaml
+tags:
+- api
+- web-security
+- microservices
 alwaysApply: false
 ---
 
diff --git a/sources/core/codeguard-0-authentication-mfa.md b/sources/core/codeguard-0-authentication-mfa.md
@@ -13,6 +13,9 @@ languages:
 - ruby
 - swift
 - typescript
+tags:
+- authentication
+- web-security
 alwaysApply: false
 ---
 
diff --git a/src/convert_to_ide_formats.py b/src/convert_to_ide_formats.py
@@ -36,6 +36,23 @@ def sync_plugin_metadata(version: str) -> None:
     print(f"✅ Synced plugin metadata to {version}")
 
 
+def matches_tag_filter(rule_tags: list[str], filter_tags: list[str]) -> bool:
+    """
+    Check if rule has all required tags (case-insensitive AND logic).
+    
+    Args:
+        rule_tags: List of tags from the rule (already lowercase from parsing)
+        filter_tags: List of tags to filter by
+    
+    Returns:
+        True if rule has all filter tags (or no filter), False otherwise
+    """
+    if not filter_tags:
+        return True  # No filter means all pass
+    
+    return all(tag.lower() in rule_tags for tag in filter_tags)
+
+
 def update_skill_md(language_to_rules: dict[str, list[str]], skill_path: str) -> None:
     """
     Update SKILL.md with language-to-rules mapping table.
@@ -81,7 +98,7 @@ def update_skill_md(language_to_rules: dict[str, list[str]], skill_path: str) ->
     print(f"Updated SKILL.md with language mappings")
 
 
-def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: bool = True, version: str = None) -> dict[str, list[str]]:
+def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: bool = True, version: str = None, filter_tags: list[str] = None) -> dict[str, list[str]]:
     """
     Convert rule file(s) to all supported IDE formats using RuleConverter.
 
@@ -90,6 +107,7 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode:
         output_dir: Output directory (default: 'dist/')
         include_claudecode: Whether to generate Claude Code plugin (default: True, only for core rules)
         version: Version string to use (default: read from pyproject.toml)
+        filter_tags: Optional list of tags to filter by (AND logic, case-insensitive)
 
     Returns:
         Dictionary with 'success' and 'errors' lists:
@@ -138,14 +156,19 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode:
     # Setup output directory
     output_base = Path(output_dir)
 
-    results = {"success": [], "errors": []}
+    results = {"success": [], "errors": [], "skipped": []}
     language_to_rules = defaultdict(list)
 
     # Process each file
     for md_file in md_files:
         try:
             # Convert the file (raises exceptions on error)
             result = converter.convert(md_file)
+            
+            # Apply tag filter if specified
+            if filter_tags and not matches_tag_filter(result.tags, filter_tags):
+                results["skipped"].append(result.filename)
+                continue
 
             # Write each format
             output_files = []
@@ -192,9 +215,14 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode:
             results["errors"].append(error_msg)
 
     # Summary
-    print(
-        f"\nResults: {len(results['success'])} success, {len(results['errors'])} errors"
-    )
+    if filter_tags:
+        print(
+            f"\nResults: {len(results['success'])} success, {len(results['skipped'])} skipped (tag filter), {len(results['errors'])} errors"
+        )
+    else:
+        print(
+            f"\nResults: {len(results['success'])} success, {len(results['errors'])} errors"
+        )
 
     # Generate SKILL.md with language mappings (only if Claude Code is included)
     if include_claudecode and language_to_rules:
@@ -256,6 +284,12 @@ def _resolve_source_paths(args) -> list[Path]:
         default="dist",
         help="Output directory for generated bundles (default: dist).",
     )
+    parser.add_argument(
+        "--tag",
+        "--tags",
+        dest="tags",
+        help="Filter rules by tags (comma-separated, case-insensitive, AND logic). Example: --tag api,web-security",
+    )
     
     cli_args = parser.parse_args()
     source_paths = _resolve_source_paths(cli_args)
@@ -316,7 +350,16 @@ def _resolve_source_paths(args) -> list[Path]:
         print()
     
     # Convert all sources
-    aggregated = {"success": [], "errors": []}
+    aggregated = {"success": [], "errors": [], "skipped": []}
+    # Parse comma-separated tags
+    filter_tags = None
+    if cli_args.tags:
+        filter_tags = [tag.strip() for tag in cli_args.tags.split(",") if tag.strip()]
+    
+    # Print tag filter info if active
+    if filter_tags:
+        print(f"Tag filter active: {', '.join(filter_tags)} (AND logic - rules must have all tags)\n")
+    
     for source_path in source_paths:
         is_core = source_path == Path("sources/core")
         
@@ -325,11 +368,14 @@ def _resolve_source_paths(args) -> list[Path]:
             str(source_path), 
             cli_args.output_dir, 
             include_claudecode=is_core,
-            version=version
+            version=version,
+            filter_tags=filter_tags
         )
         
         aggregated["success"].extend(results["success"])
         aggregated["errors"].extend(results["errors"])
+        if "skipped" in results:
+            aggregated["skipped"].extend(results["skipped"])
         print("")
     
     if aggregated["errors"]:
diff --git a/src/converter.py b/src/converter.py
@@ -12,7 +12,7 @@
 from pathlib import Path
 
 from language_mappings import languages_to_globs
-from utils import parse_frontmatter_and_content
+from utils import parse_frontmatter_and_content, validate_tags
 from formats import (
     BaseFormat,
     ProcessedRule,
@@ -45,6 +45,7 @@ class ConversionResult:
         basename: Filename without extension (e.g., 'my-rule')
         outputs: Dictionary mapping format names to their outputs
         languages: List of programming languages the rule applies to, empty list if always applies
+        tags: List of tags for categorizing and filtering rules
     Example:
         result = ConversionResult(
             filename="my-rule.md",
@@ -56,14 +57,16 @@ class ConversionResult:
                     subpath=".cursor/rules"
                 )
             },
-            languages=["python", "javascript"]
+            languages=["python", "javascript"],
+            tags=["authentication", "web-security"]
         )
     """
 
     filename: str
     basename: str
     outputs: dict[str, FormatOutput]
     languages: list[str]
+    tags: list[str]
 
 
 class RuleConverter:
@@ -159,6 +162,11 @@ def parse_rule(self, content: str, filename: str) -> ProcessedRule:
                     f"'languages' must be a non-empty list in {filename} when alwaysApply is false"
                 )
 
+        # Parse and validate tags (optional field)
+        tags = []
+        if "tags" in frontmatter:
+            tags = validate_tags(frontmatter["tags"], filename)
+
         # Adding rule_id to the beginning of the content
         rule_id = Path(filename).stem
         markdown_content = f"rule_id: {rule_id}\n\n{markdown_content}"
@@ -169,6 +177,7 @@ def parse_rule(self, content: str, filename: str) -> ProcessedRule:
             always_apply=always_apply,
             content=markdown_content,
             filename=filename,
+            tags=tags,
         )
 
     def generate_globs(self, languages: list[str]) -> str:
@@ -242,4 +251,5 @@ def convert(self, filepath: str) -> ConversionResult:
             basename=basename,
             outputs=outputs,
             languages=rule.languages,
+            tags=rule.tags,
         )
diff --git a/src/formats/base.py b/src/formats/base.py
@@ -25,13 +25,15 @@ class ProcessedRule:
         always_apply: Whether this rule should apply to all files
         content: The actual rule content in markdown format
         filename: Original filename of the rule
+        tags: List of tags for categorizing and filtering rules
     """
 
     description: str
     languages: list[str]
     always_apply: bool
     content: str
     filename: str
+    tags: list[str]
 
 
 class BaseFormat(ABC):
diff --git a/src/utils.py b/src/utils.py
@@ -57,6 +57,41 @@ def parse_frontmatter_and_content(content: str) -> tuple[dict | None, str]:
     return frontmatter, markdown_content.strip()
 
 
+def validate_tags(tags, filename=None) -> list[str]:
+    """
+    Validate tags list and return normalized (lowercase) tags.
+    
+    Args:
+        tags: The tags value to validate (should be a list)
+        filename: Optional filename for better error messages
+    
+    Returns:
+        List of normalized (lowercase) tags
+    
+    Raises:
+        ValueError: If tags are invalid (wrong type, contain whitespace, empty, etc.)
+    """
+    context = f" in {filename}" if filename else ""
+    
+    if not isinstance(tags, list):
+        raise ValueError(f"'tags' must be a list{context}")
+    
+    normalized = []
+    for tag in tags:
+        if not isinstance(tag, str):
+            raise ValueError(f"All tags must be strings{context}, found: {type(tag).__name__}")
+        
+        if any(c.isspace() for c in tag):
+            raise ValueError(f"Tags cannot contain whitespace: '{tag}'{context}")
+        
+        if not tag:
+            raise ValueError(f"Empty tag found{context}")
+        
+        normalized.append(tag.lower())
+    
+    return normalized
+
+
 def get_version_from_pyproject() -> str:
     """
     Read version from pyproject.toml using Python's built-in TOML parser.
diff --git a/src/validate_unified_rules.py b/src/validate_unified_rules.py
@@ -12,7 +12,7 @@
 from pathlib import Path
 
 from language_mappings import LANGUAGE_TO_EXTENSIONS
-from utils import parse_frontmatter_and_content
+from utils import parse_frontmatter_and_content, validate_tags
 
 
 def validate_rule(file_path: Path) -> dict[str, list[str]]:
@@ -54,6 +54,13 @@ def validate_rule(file_path: Path) -> dict[str, list[str]]:
             if unknown:
                 warnings.append(f"Unknown languages: {', '.join(unknown)}")
 
+        # Validate tags if present
+        if "tags" in frontmatter:
+            try:
+                validate_tags(frontmatter["tags"], file_path.name)
+            except ValueError as e:
+                errors.append(str(e))
+
         # Check content exists
         if not markdown_content.strip():
             errors.append("Rule content cannot be empty")