Files
bi-agents/.agents/skills/bmad-agent-builder/scripts/prepass-structure-capabilities.py
Cassel 647cbec54f docs: update all documentation and add AI tooling configs
- Rewrite README.md with current architecture, features and stack
- Update docs/API.md with all current endpoints (corporate, BI, client 360)
- Update docs/ARCHITECTURE.md with cache, modular queries, services, ETL
- Update docs/GUIA-USUARIO.md for all roles (admin, corporate, agente)
- Add docs/INDEX.md documentation index
- Add PROJETO.md comprehensive project reference
- Add BI-CCC-Implementation-Guide.md
- Include AI agent configs (.claude, .agents, .gemini, _bmad)
- Add netbird VPN configuration
- Add status report

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 13:29:03 -04:00

637 lines
22 KiB
Python

#!/usr/bin/env python3
"""Deterministic pre-pass for agent structure and capabilities scanner.
Extracts structural metadata from a BMad agent skill that the LLM scanner
can use instead of reading all files itself. Covers:
- Frontmatter parsing and validation
- Section inventory (H2/H3 headers)
- Template artifact detection
- Agent name validation (bmad-{code}-agent-{name} or bmad-agent-{name})
- Required agent sections (Overview, Identity, Communication Style, Principles, On Activation)
- bmad-manifest.json validation (persona field for agent detection, capabilities)
- Capability cross-referencing with prompt files at skill root
- Memory path consistency checking
- Language/directness pattern grep
- On Exit / Exiting section detection (invalid)
"""
# /// script
# requires-python = ">=3.9"
# dependencies = [
# "pyyaml>=6.0",
# ]
# ///
from __future__ import annotations
import argparse
import json
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
try:
import yaml
except ImportError:
print("Error: pyyaml required. Run with: uv run prepass-structure-capabilities.py", file=sys.stderr)
sys.exit(2)
# Template artifacts that should NOT appear in finalized skills
TEMPLATE_ARTIFACTS = [
r'\{if-complex-workflow\}', r'\{/if-complex-workflow\}',
r'\{if-simple-workflow\}', r'\{/if-simple-workflow\}',
r'\{if-simple-utility\}', r'\{/if-simple-utility\}',
r'\{if-module\}', r'\{/if-module\}',
r'\{if-headless\}', r'\{/if-headless\}',
r'\{if-autonomous\}', r'\{/if-autonomous\}',
r'\{if-sidecar\}', r'\{/if-sidecar\}',
r'\{displayName\}', r'\{skillName\}',
]
# Runtime variables that ARE expected (not artifacts)
RUNTIME_VARS = {
'{user_name}', '{communication_language}', '{document_output_language}',
'{project-root}', '{output_folder}', '{planning_artifacts}',
'{headless_mode}',
}
# Directness anti-patterns
DIRECTNESS_PATTERNS = [
(r'\byou should\b', 'Suggestive "you should" — use direct imperative'),
(r'\bplease\b(?! note)', 'Polite "please" — use direct imperative'),
(r'\bhandle appropriately\b', 'Ambiguous "handle appropriately" — specify how'),
(r'\bwhen ready\b', 'Vague "when ready" — specify testable condition'),
]
# Invalid sections
INVALID_SECTIONS = [
(r'^##\s+On\s+Exit\b', 'On Exit section found — no exit hooks exist in the system, this will never run'),
(r'^##\s+Exiting\b', 'Exiting section found — no exit hooks exist in the system, this will never run'),
]
def parse_frontmatter(content: str) -> tuple[dict | None, list[dict]]:
"""Parse YAML frontmatter and validate."""
findings = []
fm_match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)
if not fm_match:
findings.append({
'file': 'SKILL.md', 'line': 1,
'severity': 'critical', 'category': 'frontmatter',
'issue': 'No YAML frontmatter found',
})
return None, findings
try:
fm = yaml.safe_load(fm_match.group(1))
except yaml.YAMLError as e:
findings.append({
'file': 'SKILL.md', 'line': 1,
'severity': 'critical', 'category': 'frontmatter',
'issue': f'Invalid YAML frontmatter: {e}',
})
return None, findings
if not isinstance(fm, dict):
findings.append({
'file': 'SKILL.md', 'line': 1,
'severity': 'critical', 'category': 'frontmatter',
'issue': 'Frontmatter is not a YAML mapping',
})
return None, findings
# name check
name = fm.get('name')
if not name:
findings.append({
'file': 'SKILL.md', 'line': 1,
'severity': 'critical', 'category': 'frontmatter',
'issue': 'Missing "name" field in frontmatter',
})
elif not re.match(r'^[a-z0-9]+(-[a-z0-9]+)*$', name):
findings.append({
'file': 'SKILL.md', 'line': 1,
'severity': 'high', 'category': 'frontmatter',
'issue': f'Name "{name}" is not kebab-case',
})
elif not (re.match(r'^bmad-[a-z0-9]+-agent-[a-z0-9]+(-[a-z0-9]+)*$', name)
or re.match(r'^bmad-agent-[a-z0-9]+(-[a-z0-9]+)*$', name)):
findings.append({
'file': 'SKILL.md', 'line': 1,
'severity': 'medium', 'category': 'frontmatter',
'issue': f'Name "{name}" does not follow bmad-{{code}}-agent-{{name}} or bmad-agent-{{name}} pattern',
})
# description check
desc = fm.get('description')
if not desc:
findings.append({
'file': 'SKILL.md', 'line': 1,
'severity': 'high', 'category': 'frontmatter',
'issue': 'Missing "description" field in frontmatter',
})
elif 'Use when' not in desc and 'use when' not in desc:
findings.append({
'file': 'SKILL.md', 'line': 1,
'severity': 'medium', 'category': 'frontmatter',
'issue': 'Description missing "Use when..." trigger phrase',
})
# Extra fields check — only name and description allowed for agents
allowed = {'name', 'description'}
extra = set(fm.keys()) - allowed
if extra:
findings.append({
'file': 'SKILL.md', 'line': 1,
'severity': 'low', 'category': 'frontmatter',
'issue': f'Extra frontmatter fields: {", ".join(sorted(extra))}',
})
return fm, findings
def extract_sections(content: str) -> list[dict]:
"""Extract all H2/H3 headers with line numbers."""
sections = []
for i, line in enumerate(content.split('\n'), 1):
m = re.match(r'^(#{2,3})\s+(.+)$', line)
if m:
sections.append({
'level': len(m.group(1)),
'title': m.group(2).strip(),
'line': i,
})
return sections
def check_required_sections(sections: list[dict]) -> list[dict]:
"""Check for required and invalid sections."""
findings = []
h2_titles = [s['title'] for s in sections if s['level'] == 2]
required = ['Overview', 'Identity', 'Communication Style', 'Principles', 'On Activation']
for req in required:
if req not in h2_titles:
findings.append({
'file': 'SKILL.md', 'line': 1,
'severity': 'high', 'category': 'sections',
'issue': f'Missing ## {req} section',
})
# Invalid sections
for s in sections:
if s['level'] == 2:
for pattern, message in INVALID_SECTIONS:
if re.match(pattern, f"## {s['title']}"):
findings.append({
'file': 'SKILL.md', 'line': s['line'],
'severity': 'high', 'category': 'invalid-section',
'issue': message,
})
return findings
def find_template_artifacts(filepath: Path, rel_path: str) -> list[dict]:
"""Scan for orphaned template substitution artifacts."""
findings = []
content = filepath.read_text(encoding='utf-8')
for pattern in TEMPLATE_ARTIFACTS:
for m in re.finditer(pattern, content):
matched = m.group()
if matched in RUNTIME_VARS:
continue
line_num = content[:m.start()].count('\n') + 1
findings.append({
'file': rel_path, 'line': line_num,
'severity': 'high', 'category': 'artifacts',
'issue': f'Orphaned template artifact: {matched}',
'fix': 'Resolve or remove this template conditional/placeholder',
})
return findings
def validate_manifest(skill_path: Path) -> tuple[dict, list[dict]]:
"""Validate bmad-manifest.json for agent requirements."""
findings = []
validation = {
'found': False,
'valid_json': False,
'is_agent': False,
'has_capabilities': False,
'capability_count': 0,
'menu_codes': [],
'duplicate_menu_codes': [],
'capability_issues': [],
}
manifest_path = skill_path / 'bmad-manifest.json'
if not manifest_path.exists():
findings.append({
'file': 'bmad-manifest.json', 'line': 0,
'severity': 'high', 'category': 'manifest',
'issue': 'bmad-manifest.json not found at skill root',
})
return validation, findings
validation['found'] = True
try:
data = json.loads(manifest_path.read_text(encoding='utf-8'))
except json.JSONDecodeError as e:
findings.append({
'file': 'bmad-manifest.json', 'line': 0,
'severity': 'critical', 'category': 'manifest',
'issue': f'Invalid JSON in bmad-manifest.json: {e}',
})
return validation, findings
validation['valid_json'] = True
# Check if this is an agent (agents have a persona field)
has_persona = 'persona' in data
validation['is_agent'] = has_persona
if not has_persona:
findings.append({
'file': 'bmad-manifest.json', 'line': 0,
'severity': 'high', 'category': 'manifest',
'issue': 'Missing "persona" field — agents are identified by having a persona field',
})
# Check capabilities
capabilities = data.get('capabilities')
if capabilities is None:
findings.append({
'file': 'bmad-manifest.json', 'line': 0,
'severity': 'high', 'category': 'manifest',
'issue': 'Missing "capabilities" field',
})
return validation, findings
if not isinstance(capabilities, list):
findings.append({
'file': 'bmad-manifest.json', 'line': 0,
'severity': 'high', 'category': 'manifest',
'issue': '"capabilities" is not an array',
})
return validation, findings
validation['has_capabilities'] = True
validation['capability_count'] = len(capabilities)
# Check each capability for required fields and unique menu codes
required_fields = {'name', 'menu-code', 'description'}
menu_codes = []
for i, cap in enumerate(capabilities):
if not isinstance(cap, dict):
findings.append({
'file': 'bmad-manifest.json', 'line': 0,
'severity': 'high', 'category': 'manifest',
'issue': f'Capability at index {i} is not an object',
})
continue
missing = required_fields - set(cap.keys())
if missing:
cap_name = cap.get('name', f'index-{i}')
findings.append({
'file': 'bmad-manifest.json', 'line': 0,
'severity': 'high', 'category': 'manifest',
'issue': f'Capability "{cap_name}" missing required fields: {", ".join(sorted(missing))}',
})
mc = cap.get('menu-code')
if mc:
menu_codes.append(mc)
validation['menu_codes'] = menu_codes
# Check for duplicate menu codes
seen = set()
dupes = set()
for mc in menu_codes:
if mc in seen:
dupes.add(mc)
seen.add(mc)
if dupes:
validation['duplicate_menu_codes'] = sorted(dupes)
findings.append({
'file': 'bmad-manifest.json', 'line': 0,
'severity': 'high', 'category': 'manifest',
'issue': f'Duplicate menu codes: {", ".join(sorted(dupes))}',
})
return validation, findings
def cross_reference_capabilities(skill_path: Path) -> tuple[dict, list[dict]]:
"""Cross-reference manifest capabilities with prompt files."""
findings = []
crossref = {
'manifest_prompt_caps': [],
'missing_prompt_files': [],
'orphaned_prompt_files': [],
}
manifest_path = skill_path / 'bmad-manifest.json'
if not manifest_path.exists():
return crossref, findings
try:
data = json.loads(manifest_path.read_text(encoding='utf-8'))
except (json.JSONDecodeError, OSError):
return crossref, findings
capabilities = data.get('capabilities', [])
if not isinstance(capabilities, list):
return crossref, findings
# Get prompt-type capabilities from manifest
prompt_cap_names = set()
for cap in capabilities:
if isinstance(cap, dict) and cap.get('type') == 'prompt':
name = cap.get('name')
if name:
prompt_cap_names.add(name)
crossref['manifest_prompt_caps'].append(name)
# Get actual prompt files (at skill root, excluding SKILL.md and non-prompt files)
actual_prompts = set()
skip_files = {'SKILL.md', 'bmad-manifest.json', 'bmad-skill-manifest.yaml'}
for f in skill_path.iterdir():
if f.is_file() and f.suffix == '.md' and f.name not in skip_files:
actual_prompts.add(f.stem)
# Missing prompt files (in manifest but no file)
missing = prompt_cap_names - actual_prompts
for name in sorted(missing):
crossref['missing_prompt_files'].append(name)
findings.append({
'file': 'bmad-manifest.json', 'line': 0,
'severity': 'high', 'category': 'capability-crossref',
'issue': f'Prompt capability "{name}" has no matching file {name}.md at skill root',
})
# Orphaned prompt files (file exists but not in manifest)
orphaned = actual_prompts - prompt_cap_names
for name in sorted(orphaned):
crossref['orphaned_prompt_files'].append(name)
findings.append({
'file': f'{name}.md', 'line': 0,
'severity': 'medium', 'category': 'capability-crossref',
'issue': f'Prompt file {name}.md not referenced as a prompt capability in manifest',
})
return crossref, findings
def extract_memory_paths(skill_path: Path) -> tuple[list[str], list[dict]]:
"""Extract all memory path references across files and check consistency."""
findings = []
memory_paths = set()
# Memory path patterns
mem_pattern = re.compile(r'(?:memory/|sidecar/|\.memory/|\.sidecar/)[\w\-/]+(?:\.\w+)?')
files_to_scan = []
skill_md = skill_path / 'SKILL.md'
if skill_md.exists():
files_to_scan.append(('SKILL.md', skill_md))
for subdir in ['prompts', 'resources']:
d = skill_path / subdir
if d.exists():
for f in sorted(d.iterdir()):
if f.is_file() and f.suffix in ('.md', '.json', '.yaml', '.yml'):
files_to_scan.append((f'{subdir}/{f.name}', f))
for rel_path, filepath in files_to_scan:
content = filepath.read_text(encoding='utf-8')
for m in mem_pattern.finditer(content):
memory_paths.add(m.group())
sorted_paths = sorted(memory_paths)
# Check for inconsistent formats (e.g., mixing memory/ and .memory/)
prefixes = set()
for p in sorted_paths:
prefix = p.split('/')[0]
prefixes.add(prefix)
memory_prefixes = {p for p in prefixes if 'memory' in p.lower()}
sidecar_prefixes = {p for p in prefixes if 'sidecar' in p.lower()}
if len(memory_prefixes) > 1:
findings.append({
'file': 'multiple', 'line': 0,
'severity': 'medium', 'category': 'memory-paths',
'issue': f'Inconsistent memory path prefixes: {", ".join(sorted(memory_prefixes))}',
})
if len(sidecar_prefixes) > 1:
findings.append({
'file': 'multiple', 'line': 0,
'severity': 'medium', 'category': 'memory-paths',
'issue': f'Inconsistent sidecar path prefixes: {", ".join(sorted(sidecar_prefixes))}',
})
return sorted_paths, findings
def check_prompt_basics(skill_path: Path) -> tuple[list[dict], list[dict]]:
"""Check each prompt file for config header and progression conditions."""
findings = []
prompt_details = []
skip_files = {'SKILL.md', 'bmad-manifest.json', 'bmad-skill-manifest.yaml'}
prompt_files = [f for f in sorted(skill_path.iterdir())
if f.is_file() and f.suffix == '.md' and f.name not in skip_files]
if not prompt_files:
return prompt_details, findings
for f in prompt_files:
content = f.read_text(encoding='utf-8')
rel_path = f.name
detail = {'file': f.name, 'has_config_header': False, 'has_progression': False}
# Config header check
if '{communication_language}' in content or '{document_output_language}' in content:
detail['has_config_header'] = True
else:
findings.append({
'file': rel_path, 'line': 1,
'severity': 'medium', 'category': 'config-header',
'issue': 'No config header with language variables found',
})
# Progression condition check
lower = content.lower()
prog_keywords = ['progress', 'advance', 'move to', 'next stage', 'when complete',
'proceed to', 'transition', 'completion criteria']
if any(kw in lower for kw in prog_keywords):
detail['has_progression'] = True
else:
findings.append({
'file': rel_path, 'line': len(content.split('\n')),
'severity': 'high', 'category': 'progression',
'issue': 'No progression condition keywords found',
})
# Directness checks
for pattern, message in DIRECTNESS_PATTERNS:
for m in re.finditer(pattern, content, re.IGNORECASE):
line_num = content[:m.start()].count('\n') + 1
findings.append({
'file': rel_path, 'line': line_num,
'severity': 'low', 'category': 'language',
'issue': message,
})
# Template artifacts
findings.extend(find_template_artifacts(f, rel_path))
prompt_details.append(detail)
return prompt_details, findings
def scan_structure_capabilities(skill_path: Path) -> dict:
"""Run all deterministic agent structure and capability checks."""
all_findings = []
# Read SKILL.md
skill_md = skill_path / 'SKILL.md'
if not skill_md.exists():
return {
'scanner': 'structure-capabilities-prepass',
'script': 'prepass-structure-capabilities.py',
'version': '1.0.0',
'skill_path': str(skill_path),
'timestamp': datetime.now(timezone.utc).isoformat(),
'status': 'fail',
'issues': [{'file': 'SKILL.md', 'line': 1, 'severity': 'critical',
'category': 'missing-file', 'issue': 'SKILL.md does not exist'}],
'summary': {'total_issues': 1, 'by_severity': {'critical': 1, 'high': 0, 'medium': 0, 'low': 0}},
}
skill_content = skill_md.read_text(encoding='utf-8')
# Frontmatter
frontmatter, fm_findings = parse_frontmatter(skill_content)
all_findings.extend(fm_findings)
# Sections
sections = extract_sections(skill_content)
section_findings = check_required_sections(sections)
all_findings.extend(section_findings)
# Template artifacts in SKILL.md
all_findings.extend(find_template_artifacts(skill_md, 'SKILL.md'))
# Directness checks in SKILL.md
for pattern, message in DIRECTNESS_PATTERNS:
for m in re.finditer(pattern, skill_content, re.IGNORECASE):
line_num = skill_content[:m.start()].count('\n') + 1
all_findings.append({
'file': 'SKILL.md', 'line': line_num,
'severity': 'low', 'category': 'language',
'issue': message,
})
# Manifest validation
manifest_validation, manifest_findings = validate_manifest(skill_path)
all_findings.extend(manifest_findings)
has_manifest = manifest_validation['found']
# Capability cross-reference
capability_crossref, crossref_findings = cross_reference_capabilities(skill_path)
all_findings.extend(crossref_findings)
# Memory path consistency
memory_paths, memory_findings = extract_memory_paths(skill_path)
all_findings.extend(memory_findings)
# Prompt basics
prompt_details, prompt_findings = check_prompt_basics(skill_path)
all_findings.extend(prompt_findings)
# Build severity summary
by_severity = {'critical': 0, 'high': 0, 'medium': 0, 'low': 0}
for f in all_findings:
sev = f['severity']
if sev in by_severity:
by_severity[sev] += 1
status = 'pass'
if by_severity['critical'] > 0:
status = 'fail'
elif by_severity['high'] > 0:
status = 'warning'
return {
'scanner': 'structure-capabilities-prepass',
'script': 'prepass-structure-capabilities.py',
'version': '1.0.0',
'skill_path': str(skill_path),
'timestamp': datetime.now(timezone.utc).isoformat(),
'status': status,
'metadata': {
'frontmatter': frontmatter,
'sections': sections,
'has_manifest': has_manifest,
'manifest_validation': manifest_validation,
'capability_crossref': capability_crossref,
},
'prompt_details': prompt_details,
'memory_paths': memory_paths,
'issues': all_findings,
'summary': {
'total_issues': len(all_findings),
'by_severity': by_severity,
},
}
def main() -> int:
parser = argparse.ArgumentParser(
description='Deterministic pre-pass for agent structure and capabilities scanning',
)
parser.add_argument(
'skill_path',
type=Path,
help='Path to the skill directory to scan',
)
parser.add_argument(
'--output', '-o',
type=Path,
help='Write JSON output to file instead of stdout',
)
args = parser.parse_args()
if not args.skill_path.is_dir():
print(f"Error: {args.skill_path} is not a directory", file=sys.stderr)
return 2
result = scan_structure_capabilities(args.skill_path)
output = json.dumps(result, indent=2)
if args.output:
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(output)
print(f"Results written to {args.output}", file=sys.stderr)
else:
print(output)
return 0 if result['status'] == 'pass' else 1
if __name__ == '__main__':
sys.exit(main())