#!/usr/bin/env python3 """ Security Scanner for Generated Agent Skills. Scans a skill directory for hardcoded API keys, sensitive files, and dangerous Python patterns that could pose security risks. Usage: python3 scripts/security_scan.py path/to/skill/ python3 scripts/security_scan.py path/to/skill/ --json Exit codes: 0 - Clean (no issues found) 1 - Issues found (one or more security issues detected) """ import json import os import re import sys from pathlib import Path # --- API Key Patterns --- # Each entry: (pattern_name, compiled_regex, description, severity) API_KEY_PATTERNS: list[tuple[str, re.Pattern, str, str]] = [ ( "OpenAI API Key", re.compile(r"sk-[a-zA-Z0-9]{20,}"), "Hardcoded OpenAI API key detected", "high", ), ( "AWS Access Key", re.compile(r"AKIA[A-Z0-9]{16}"), "Hardcoded AWS access key ID detected", "high", ), ( "GitHub Personal Access Token", re.compile(r"ghp_[a-zA-Z0-9]{36}"), "Hardcoded GitHub personal access token detected", "high", ), ( "GitLab Personal Access Token", re.compile(r"glpat-[a-zA-Z0-9\-]{20}"), "Hardcoded GitLab personal access token detected", "high", ), ( "Slack Token", re.compile(r"xox[bprs]-[a-zA-Z0-9\-]+"), "Hardcoded Slack token detected", "high", ), ( "Generic Secret", re.compile( r"""(api[_\-]?key|secret|token|password)\s*[:=]\s*["'][^"']{8,}["']""", re.IGNORECASE, ), "Possible hardcoded secret (generic key/token/password pattern)", "medium", ), ] # --- Sensitive File Names --- SENSITIVE_FILES: dict[str, str] = { ".env": "Environment file may contain secrets", "credentials.json": "Credentials file may contain API keys or passwords", "secrets.json": "Secrets file may contain sensitive data", "api_keys.json": "API keys file may contain hardcoded keys", } # --- Dangerous Python Patterns --- # Each entry: (pattern_name, compiled_regex, description, severity) PYTHON_DANGER_PATTERNS: list[tuple[str, re.Pattern, str, str]] = [ ( "eval() usage", re.compile(r"\beval\s*\("), "Use of eval() can execute arbitrary code; avoid unless strictly necessary", "high", ), ( "exec() usage", re.compile(r"\bexec\s*\("), "Use of exec() can execute arbitrary code; avoid unless strictly necessary", "high", ), ( "os.system() with concatenation", re.compile(r"os\.system\s*\([^)]*[\+f\"']"), "os.system() with string concatenation is vulnerable to shell injection", "high", ), ( "subprocess with shell=True", re.compile(r"subprocess\.call\s*\([^)]*shell\s*=\s*True"), "subprocess.call() with shell=True is vulnerable to shell injection", "high", ), ( "__import__() dynamic import", re.compile(r"__import__\s*\("), "Dynamic imports via __import__() can load arbitrary modules", "medium", ), ] # File extensions to scan for content patterns TEXT_EXTENSIONS: set[str] = { ".py", ".md", ".txt", ".json", ".yaml", ".yml", ".toml", ".cfg", ".ini", ".sh", ".bash", ".zsh", ".env", ".conf", ".xml", ".html", ".css", ".js", ".ts", ".jsx", ".tsx", ".sql", ".csv", ".rst", } # Maximum file size to scan (skip very large files to avoid performance issues) MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024 # 10 MB # Directories to skip during scanning SKIP_DIRS: set[str] = { ".git", "__pycache__", "node_modules", ".venv", "venv", "env", ".pytest_cache", ".mypy_cache", "dist", "build", } def _is_text_file(file_path: Path) -> bool: """ Determine if a file is likely a text file that should be scanned. Uses the file extension to decide. Falls back to attempting to read a small portion of the file if the extension is unrecognized. Args: file_path: Path to the file. Returns: True if the file should be scanned for content patterns. """ if file_path.suffix.lower() in TEXT_EXTENSIONS: return True # For files with no extension or unrecognized extensions, try reading a sample if file_path.suffix == "" or file_path.suffix.lower() not in { ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg", ".pdf", ".zip", ".tar", ".gz", ".bz2", ".xz", ".exe", ".dll", ".so", ".dylib", ".whl", ".egg", ".pyc", ".pyo", ".class", ".o", ".a", ".mp3", ".mp4", ".wav", ".avi", ".mov", ".ttf", ".otf", ".woff", ".woff2", ".eot", ".sqlite", ".db", }: try: with open(file_path, "rb") as f: chunk = f.read(1024) # Check for null bytes (binary indicator) if b"\x00" in chunk: return False return True except (OSError, PermissionError): return False return False def _scan_file_content( file_path: Path, skill_dir: Path, ) -> list[dict]: """ Scan a single file for security issues in its content. Args: file_path: Absolute path to the file. skill_dir: Root directory of the skill (for relative path display). Returns: List of issue dictionaries found in this file. """ issues: list[dict] = [] relative_path = str(file_path.relative_to(skill_dir)) try: file_size = file_path.stat().st_size except OSError: return issues if file_size > MAX_FILE_SIZE_BYTES: return issues if not _is_text_file(file_path): return issues try: lines = file_path.read_text(encoding="utf-8", errors="replace").splitlines() except (OSError, PermissionError): return issues is_python = file_path.suffix.lower() == ".py" for line_num, line in enumerate(lines, start=1): # Check API key patterns against all text files for pattern_name, regex, description, severity in API_KEY_PATTERNS: match = regex.search(line) if match: issues.append({ "severity": severity, "file": relative_path, "line": line_num, "pattern": pattern_name, "description": description, }) # Check Python-specific patterns only in .py files if is_python: for pattern_name, regex, description, severity in PYTHON_DANGER_PATTERNS: match = regex.search(line) if match: issues.append({ "severity": severity, "file": relative_path, "line": line_num, "pattern": pattern_name, "description": description, }) return issues def security_scan(skill_path: str) -> dict: """ Perform a security scan on a skill directory. Checks for hardcoded API keys, sensitive files, and dangerous code patterns. Args: skill_path: Path to the skill directory to scan. Returns: Dictionary with keys: - ``clean`` (bool): True if no issues were found. - ``issues`` (list[dict]): List of issue dictionaries. Each has: - ``severity`` (str): "high", "medium", or "low" - ``file`` (str): Relative file path - ``line`` (int): Line number (0 for file-level issues) - ``pattern`` (str): Pattern name that triggered the issue - ``description`` (str): Human-readable description """ issues: list[dict] = [] skill_dir = Path(skill_path).resolve() # --- Check: directory exists --- if not skill_dir.exists(): return { "clean": False, "issues": [{ "severity": "high", "file": str(skill_dir), "line": 0, "pattern": "missing_directory", "description": f"Path does not exist: {skill_dir}", }], } if not skill_dir.is_dir(): return { "clean": False, "issues": [{ "severity": "high", "file": str(skill_dir), "line": 0, "pattern": "not_a_directory", "description": f"Path is not a directory: {skill_dir}", }], } # --- Check: sensitive files --- for sensitive_name, description in SENSITIVE_FILES.items(): sensitive_path = skill_dir / sensitive_name if sensitive_path.exists(): issues.append({ "severity": "high", "file": sensitive_name, "line": 0, "pattern": "Sensitive file", "description": description, }) # Also check subdirectories for .env files for root, dirs, files in os.walk(skill_dir): root_path = Path(root) # Skip excluded directories dirs[:] = [d for d in dirs if d not in SKIP_DIRS] for filename in files: file_path = root_path / filename relative = str(file_path.relative_to(skill_dir)) # Check for .env files anywhere in the tree if filename == ".env" and relative != ".env": issues.append({ "severity": "high", "file": relative, "line": 0, "pattern": "Sensitive file", "description": "Environment file may contain secrets", }) # Check for sensitive JSON files in subdirectories if filename in ("credentials.json", "secrets.json", "api_keys.json"): if relative != filename: # Not already caught at root level issues.append({ "severity": "high", "file": relative, "line": 0, "pattern": "Sensitive file", "description": SENSITIVE_FILES.get( filename, "Sensitive file detected" ), }) # --- Scan file contents --- for root, dirs, files in os.walk(skill_dir): root_path = Path(root) # Skip excluded directories dirs[:] = [d for d in dirs if d not in SKIP_DIRS] for filename in files: file_path = root_path / filename file_issues = _scan_file_content(file_path, skill_dir) issues.extend(file_issues) # Sort issues: high first, then medium, then low severity_order = {"high": 0, "medium": 1, "low": 2} issues.sort(key=lambda x: (severity_order.get(x["severity"], 3), x["file"], x["line"])) return { "clean": len(issues) == 0, "issues": issues, } def _print_human_readable(result: dict, skill_path: str) -> None: """ Print security scan results in a human-readable format. Args: result: The scan result dictionary. skill_path: The path that was scanned (for display). """ print(f"Security scan: {skill_path}") print(f"{'=' * 60}") if result["clean"]: print("Status: CLEAN") print("\nNo security issues found.") else: print(f"Status: ISSUES FOUND ({len(result['issues'])})") # Count by severity high = sum(1 for i in result["issues"] if i["severity"] == "high") medium = sum(1 for i in result["issues"] if i["severity"] == "medium") low = sum(1 for i in result["issues"] if i["severity"] == "low") print(f"\n High: {high} Medium: {medium} Low: {low}") print() for issue in result["issues"]: severity_label = issue["severity"].upper().ljust(6) location = issue["file"] if issue["line"] > 0: location += f":{issue['line']}" print(f" [{severity_label}] {location}") print(f" Pattern: {issue['pattern']}") print(f" {issue['description']}") print() print(f"{'=' * 60}") def main() -> None: """CLI entry point for the security scanner.""" if len(sys.argv) < 2: print( "Usage: python3 scripts/security_scan.py [--json]\n" "\n" "Arguments:\n" " skill-path Path to the skill directory to scan\n" "\n" "Options:\n" " --json Output results as JSON to stdout\n" "\n" "Exit codes:\n" " 0 Clean (no issues)\n" " 1 Issues found (one or more security issues)\n", file=sys.stderr, ) sys.exit(1) skill_path = sys.argv[1] use_json = "--json" in sys.argv result = security_scan(skill_path) if use_json: print(json.dumps(result, indent=2)) else: _print_human_readable(result, skill_path) sys.exit(0 if result["clean"] else 1) if __name__ == "__main__": main()