#!/usr/bin/env python3 """ Spec Compliance Validator for the Agent Skills Open Standard. Validates a skill directory against the Agent Skills Open Standard by checking SKILL.md existence, frontmatter structure, naming conventions, and best practices. Usage: python3 scripts/validate.py path/to/skill/ python3 scripts/validate.py path/to/skill/ --json Exit codes: 0 - Valid (no errors, may have warnings) 1 - Invalid (one or more errors found) """ import json import re import sys from pathlib import Path from typing import Optional # --- Constants --- MAX_NAME_LENGTH = 64 MAX_DESCRIPTION_LENGTH = 1024 MAX_BODY_LINES_WARNING = 500 # Pattern for valid skill names: lowercase letters, numbers, hyphens NAME_PATTERN = re.compile(r"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$") CONSECUTIVE_HYPHENS_PATTERN = re.compile(r"--") # Pattern for YYYY-MM-DD date format DATE_FORMAT_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}$") # Pattern for local file references in markdown: [text](path) excluding http/https/mailto/# LOCAL_LINK_PATTERN = re.compile( r"\[([^\]]*)\]\(([^)]+)\)" ) def _parse_frontmatter(content: str) -> tuple[Optional[str], Optional[str]]: """ Extract frontmatter and body from SKILL.md content. Args: content: Full text content of SKILL.md. Returns: Tuple of (frontmatter_text, body_text). Either may be None if frontmatter is missing or malformed. """ if not content.startswith("---"): return None, None # Find the closing --- (skip the opening one at position 0) closing_index = content.find("---", 3) if closing_index == -1: return None, None frontmatter = content[3:closing_index].strip() body = content[closing_index + 3:].strip() return frontmatter, body def _parse_yaml_field(frontmatter: str, field: str) -> Optional[str]: """ Extract a top-level scalar field value from YAML frontmatter using simple parsing. Handles both inline values (``name: value``) and YAML block scalars (``description: >-`` followed by indented continuation lines). Args: frontmatter: The frontmatter text (without ``---`` delimiters). field: The field name to look for. Returns: The field value as a string, or None if the field is not present. """ lines = frontmatter.split("\n") for i, line in enumerate(lines): stripped = line.strip() if stripped.startswith(f"{field}:"): value = stripped[len(field) + 1:].strip() # Check for YAML block scalar indicators (>- , |-, >, |) if value in (">-", "|-", ">", "|", ">+", "|+"): # Collect indented continuation lines parts: list[str] = [] for j in range(i + 1, len(lines)): continuation = lines[j] # Continuation lines must be indented if continuation and (continuation[0] == " " or continuation[0] == "\t"): parts.append(continuation.strip()) else: break return " ".join(parts) if parts else "" return value return None def _field_exists_in_frontmatter(frontmatter: str, field: str) -> bool: """ Check whether a field name appears as a top-level key in frontmatter. Args: frontmatter: The frontmatter text. field: The field name to look for. Returns: True if the field is present. """ for line in frontmatter.split("\n"): stripped = line.strip() if stripped.startswith(f"{field}:"): return True return False def _subfield_exists(frontmatter: str, parent: str, child: str) -> bool: """ Check whether a sub-field exists under a parent field in YAML frontmatter. Args: frontmatter: The frontmatter text. parent: The parent field name (e.g., ``metadata``). child: The child field name (e.g., ``author``). Returns: True if the sub-field is found under the parent. """ lines = frontmatter.split("\n") in_parent = False for line in lines: stripped = line.strip() # Detect the parent field if stripped.startswith(f"{parent}:"): in_parent = True continue if in_parent: # Still inside the parent block if line is indented if line and (line[0] == " " or line[0] == "\t"): if stripped.startswith(f"{child}:"): return True else: # Left the parent block in_parent = False return False def _parse_subfield_value(frontmatter: str, parent: str, child: str) -> Optional[str]: """ Extract a sub-field value from under a parent field in YAML frontmatter. Args: frontmatter: The frontmatter text. parent: The parent field name (e.g., ``metadata``). child: The child field name (e.g., ``author``). Returns: The sub-field value as a string, or None if not found. """ lines = frontmatter.split("\n") in_parent = False for line in lines: stripped = line.strip() if stripped.startswith(f"{parent}:"): in_parent = True continue if in_parent: if line and (line[0] == " " or line[0] == "\t"): if stripped.startswith(f"{child}:"): return stripped[len(child) + 1:].strip() else: in_parent = False return None def _extract_local_links(body: str) -> list[str]: """ Extract local file paths referenced in markdown links within the body. Filters out URLs (http, https, mailto) and anchor links (#). Args: body: The markdown body text. Returns: List of relative file paths referenced in the body. """ paths: list[str] = [] for match in LOCAL_LINK_PATTERN.finditer(body): target = match.group(2).strip() # Skip external URLs and anchors if target.startswith(("http://", "https://", "mailto:", "#")): continue # Strip any anchor fragment from the path if "#" in target: target = target.split("#")[0] if target: paths.append(target) return paths def validate_skill(skill_path: str) -> dict: """ Validate a skill directory against the Agent Skills Open Standard. Performs both required checks (errors) and recommended checks (warnings). Args: skill_path: Path to the skill directory to validate. Returns: Dictionary with keys: - ``valid`` (bool): True if no errors were found. - ``errors`` (list[str]): List of error messages (must fix). - ``warnings`` (list[str]): List of warning messages (should fix). """ errors: list[str] = [] warnings: list[str] = [] skill_dir = Path(skill_path).resolve() # --- Check: directory exists --- if not skill_dir.exists(): errors.append(f"Path does not exist: {skill_dir}") return {"valid": False, "errors": errors, "warnings": warnings} if not skill_dir.is_dir(): errors.append(f"Path is not a directory: {skill_dir}") return {"valid": False, "errors": errors, "warnings": warnings} # --- Check: SKILL.md exists --- skill_md = skill_dir / "SKILL.md" if not skill_md.exists(): errors.append("SKILL.md not found in skill directory") return {"valid": False, "errors": errors, "warnings": warnings} # --- Read SKILL.md --- try: content = skill_md.read_text(encoding="utf-8") except Exception as exc: errors.append(f"Could not read SKILL.md: {exc}") return {"valid": False, "errors": errors, "warnings": warnings} # --- Check: frontmatter exists --- if not content.startswith("---"): errors.append("SKILL.md must start with '---' frontmatter delimiter") return {"valid": False, "errors": errors, "warnings": warnings} frontmatter, body = _parse_frontmatter(content) if frontmatter is None: errors.append("SKILL.md frontmatter is not properly closed (missing closing '---')") return {"valid": False, "errors": errors, "warnings": warnings} # --- Check: name field --- name_value = _parse_yaml_field(frontmatter, "name") if name_value is None: errors.append("'name' field is missing from frontmatter") else: name_value = name_value.strip() if len(name_value) == 0: errors.append("'name' field is empty") elif len(name_value) > MAX_NAME_LENGTH: errors.append( f"'name' field exceeds {MAX_NAME_LENGTH} characters " f"(found {len(name_value)})" ) else: # Validate name format if not NAME_PATTERN.match(name_value): errors.append( f"'name' field must contain only lowercase letters, numbers, " f"and hyphens (found: '{name_value}')" ) if name_value.startswith("-"): errors.append(f"'name' must not start with a hyphen (found: '{name_value}')") if name_value.endswith("-"): errors.append(f"'name' must not end with a hyphen (found: '{name_value}')") if CONSECUTIVE_HYPHENS_PATTERN.search(name_value): errors.append( f"'name' must not contain consecutive hyphens (found: '{name_value}')" ) # --- Check: directory name matches name field --- dir_name = skill_dir.name if dir_name != name_value: errors.append( f"Directory name '{dir_name}' does not match 'name' field " f"'{name_value}' in frontmatter" ) # --- Check: description field --- description_value = _parse_yaml_field(frontmatter, "description") if description_value is None: errors.append("'description' field is missing from frontmatter") else: description_value = description_value.strip() if len(description_value) == 0: errors.append("'description' field is empty") elif len(description_value) > MAX_DESCRIPTION_LENGTH: errors.append( f"'description' field exceeds {MAX_DESCRIPTION_LENGTH} characters " f"(found {len(description_value)})" ) # --- Check: -cskill suffix is deprecated --- if name_value is not None and name_value.endswith("-cskill"): errors.append( f"'name' uses the deprecated '-cskill' suffix. " f"Use '-skill' instead (found: '{name_value}')" ) # --- Warnings --- # Naming convention: -skill suffix (or -suite for suites) if name_value is not None and len(name_value) > 0: if not name_value.endswith("-skill") and not name_value.endswith("-suite"): warnings.append( f"'name' should end with '-skill' for discoverability " f"(found: '{name_value}')" ) # Body line count if body is not None: body_lines = body.split("\n") body_line_count = len(body_lines) if body_line_count > MAX_BODY_LINES_WARNING: warnings.append( f"SKILL.md body exceeds {MAX_BODY_LINES_WARNING} lines " f"({body_line_count} lines). Consider moving content to references/." ) # license field if not _field_exists_in_frontmatter(frontmatter, "license"): warnings.append("'license' field is missing from frontmatter") # metadata field if not _field_exists_in_frontmatter(frontmatter, "metadata"): warnings.append("'metadata' field is missing from frontmatter") else: if not _subfield_exists(frontmatter, "metadata", "author"): warnings.append("'metadata.author' sub-field is missing") if not _subfield_exists(frontmatter, "metadata", "version"): warnings.append("'metadata.version' sub-field is missing") # Temporal metadata validation (optional, warnings only) created_val = _parse_subfield_value(frontmatter, "metadata", "created") reviewed_val = _parse_subfield_value(frontmatter, "metadata", "last_reviewed") interval_val = _parse_subfield_value(frontmatter, "metadata", "review_interval_days") if created_val and not DATE_FORMAT_PATTERN.match(created_val.strip()): warnings.append( f"'metadata.created' should be YYYY-MM-DD format (found: '{created_val}')" ) if reviewed_val and not DATE_FORMAT_PATTERN.match(reviewed_val.strip()): warnings.append( f"'metadata.last_reviewed' should be YYYY-MM-DD format (found: '{reviewed_val}')" ) if interval_val: try: int(interval_val.strip()) except ValueError: warnings.append( f"'metadata.review_interval_days' should be an integer (found: '{interval_val}')" ) has_temporal = bool(created_val or reviewed_val or interval_val) if not has_temporal: warnings.append( "Consider adding temporal metadata (metadata.created, metadata.last_reviewed, " "metadata.review_interval_days) for staleness tracking" ) # Referenced local files if body is not None: local_links = _extract_local_links(body) for link_path in local_links: resolved = skill_dir / link_path if not resolved.exists(): warnings.append( f"Referenced file does not exist: '{link_path}'" ) return { "valid": len(errors) == 0, "errors": errors, "warnings": warnings, } def _print_human_readable(result: dict, skill_path: str) -> None: """ Print validation results in a human-readable format. Args: result: The validation result dictionary. skill_path: The path that was validated (for display). """ print(f"Validating: {skill_path}") print(f"{'=' * 60}") if result["valid"]: print("Status: VALID") else: print("Status: INVALID") if result["errors"]: print(f"\nErrors ({len(result['errors'])}):") for error in result["errors"]: print(f" [ERROR] {error}") if result["warnings"]: print(f"\nWarnings ({len(result['warnings'])}):") for warning in result["warnings"]: print(f" [WARN] {warning}") if not result["errors"] and not result["warnings"]: print("\nNo issues found.") print(f"{'=' * 60}") def main() -> None: """CLI entry point for the spec compliance validator.""" if len(sys.argv) < 2: print( "Usage: python3 scripts/validate.py [--json]\n" "\n" "Arguments:\n" " skill-path Path to the skill directory to validate\n" "\n" "Options:\n" " --json Output results as JSON to stdout\n" "\n" "Exit codes:\n" " 0 Valid (no errors)\n" " 1 Invalid (one or more errors)\n", file=sys.stderr, ) sys.exit(1) skill_path = sys.argv[1] use_json = "--json" in sys.argv result = validate_skill(skill_path) if use_json: print(json.dumps(result, indent=2)) else: _print_human_readable(result, skill_path) sys.exit(0 if result["valid"] else 1) if __name__ == "__main__": main()