docs: update all documentation and add AI tooling configs
- Rewrite README.md with current architecture, features and stack - Update docs/API.md with all current endpoints (corporate, BI, client 360) - Update docs/ARCHITECTURE.md with cache, modular queries, services, ETL - Update docs/GUIA-USUARIO.md for all roles (admin, corporate, agente) - Add docs/INDEX.md documentation index - Add PROJETO.md comprehensive project reference - Add BI-CCC-Implementation-Guide.md - Include AI agent configs (.claude, .agents, .gemini, _bmad) - Add netbird VPN configuration - Add status report Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,476 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Deterministic pre-pass for prompt craft scanner (agent builder).
|
||||
|
||||
Extracts metrics and flagged patterns from SKILL.md and prompt files
|
||||
so the LLM scanner can work from compact data instead of reading raw files.
|
||||
|
||||
Covers:
|
||||
- SKILL.md line count and section inventory
|
||||
- Overview section size
|
||||
- Inline data detection (tables, fenced code blocks)
|
||||
- Defensive padding pattern grep
|
||||
- Meta-explanation pattern grep
|
||||
- Back-reference detection ("as described above")
|
||||
- Config header and progression condition presence per prompt
|
||||
- File-level token estimates (chars / 4 rough approximation)
|
||||
- Prompt frontmatter validation (name, description, menu-code)
|
||||
- Manifest alignment check (frontmatter vs bmad-manifest.json entries)
|
||||
- Wall-of-text detection
|
||||
- Suggestive loading grep
|
||||
"""
|
||||
|
||||
# /// script
|
||||
# requires-python = ">=3.9"
|
||||
# ///
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# Defensive padding / filler patterns
|
||||
WASTE_PATTERNS = [
|
||||
(r'\b[Mm]ake sure (?:to|you)\b', 'defensive-padding', 'Defensive: "make sure to/you"'),
|
||||
(r"\b[Dd]on'?t forget (?:to|that)\b", 'defensive-padding', "Defensive: \"don't forget\""),
|
||||
(r'\b[Rr]emember (?:to|that)\b', 'defensive-padding', 'Defensive: "remember to/that"'),
|
||||
(r'\b[Bb]e sure to\b', 'defensive-padding', 'Defensive: "be sure to"'),
|
||||
(r'\b[Pp]lease ensure\b', 'defensive-padding', 'Defensive: "please ensure"'),
|
||||
(r'\b[Ii]t is important (?:to|that)\b', 'defensive-padding', 'Defensive: "it is important"'),
|
||||
(r'\b[Yy]ou are an AI\b', 'meta-explanation', 'Meta: "you are an AI"'),
|
||||
(r'\b[Aa]s a language model\b', 'meta-explanation', 'Meta: "as a language model"'),
|
||||
(r'\b[Aa]s an AI assistant\b', 'meta-explanation', 'Meta: "as an AI assistant"'),
|
||||
(r'\b[Tt]his (?:workflow|skill|process) is designed to\b', 'meta-explanation', 'Meta: "this workflow is designed to"'),
|
||||
(r'\b[Tt]he purpose of this (?:section|step) is\b', 'meta-explanation', 'Meta: "the purpose of this section is"'),
|
||||
(r"\b[Ll]et'?s (?:think about|begin|start)\b", 'filler', "Filler: \"let's think/begin\""),
|
||||
(r'\b[Nn]ow we(?:\'ll| will)\b', 'filler', "Filler: \"now we'll\""),
|
||||
]
|
||||
|
||||
# Back-reference patterns (self-containment risk)
|
||||
BACKREF_PATTERNS = [
|
||||
(r'\bas described above\b', 'Back-reference: "as described above"'),
|
||||
(r'\bper the overview\b', 'Back-reference: "per the overview"'),
|
||||
(r'\bas mentioned (?:above|in|earlier)\b', 'Back-reference: "as mentioned above/in/earlier"'),
|
||||
(r'\bsee (?:above|the overview)\b', 'Back-reference: "see above/the overview"'),
|
||||
(r'\brefer to (?:the )?(?:above|overview|SKILL)\b', 'Back-reference: "refer to above/overview"'),
|
||||
]
|
||||
|
||||
# Suggestive loading patterns
|
||||
SUGGESTIVE_LOADING_PATTERNS = [
|
||||
(r'\b[Ll]oad (?:the |all )?(?:relevant|necessary|needed|required)\b', 'Suggestive loading: "load relevant/necessary"'),
|
||||
(r'\b[Rr]ead (?:the |all )?(?:relevant|necessary|needed|required)\b', 'Suggestive loading: "read relevant/necessary"'),
|
||||
(r'\b[Gg]ather (?:the |all )?(?:relevant|necessary|needed)\b', 'Suggestive loading: "gather relevant/necessary"'),
|
||||
]
|
||||
|
||||
|
||||
def count_tables(content: str) -> tuple[int, int]:
|
||||
"""Count markdown tables and their total lines."""
|
||||
table_count = 0
|
||||
table_lines = 0
|
||||
in_table = False
|
||||
for line in content.split('\n'):
|
||||
if '|' in line and re.match(r'^\s*\|', line):
|
||||
if not in_table:
|
||||
table_count += 1
|
||||
in_table = True
|
||||
table_lines += 1
|
||||
else:
|
||||
in_table = False
|
||||
return table_count, table_lines
|
||||
|
||||
|
||||
def count_fenced_blocks(content: str) -> tuple[int, int]:
|
||||
"""Count fenced code blocks and their total lines."""
|
||||
block_count = 0
|
||||
block_lines = 0
|
||||
in_block = False
|
||||
for line in content.split('\n'):
|
||||
if line.strip().startswith('```'):
|
||||
if in_block:
|
||||
in_block = False
|
||||
else:
|
||||
in_block = True
|
||||
block_count += 1
|
||||
elif in_block:
|
||||
block_lines += 1
|
||||
return block_count, block_lines
|
||||
|
||||
|
||||
def extract_overview_size(content: str) -> int:
|
||||
"""Count lines in the ## Overview section."""
|
||||
lines = content.split('\n')
|
||||
in_overview = False
|
||||
overview_lines = 0
|
||||
for line in lines:
|
||||
if re.match(r'^##\s+Overview\b', line):
|
||||
in_overview = True
|
||||
continue
|
||||
elif in_overview and re.match(r'^##\s', line):
|
||||
break
|
||||
elif in_overview:
|
||||
overview_lines += 1
|
||||
return overview_lines
|
||||
|
||||
|
||||
def detect_wall_of_text(content: str) -> list[dict]:
|
||||
"""Detect long runs of text without headers or breaks."""
|
||||
walls = []
|
||||
lines = content.split('\n')
|
||||
run_start = None
|
||||
run_length = 0
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
stripped = line.strip()
|
||||
is_break = (
|
||||
not stripped
|
||||
or re.match(r'^#{1,6}\s', stripped)
|
||||
or re.match(r'^[-*]\s', stripped)
|
||||
or re.match(r'^\d+\.\s', stripped)
|
||||
or stripped.startswith('```')
|
||||
or stripped.startswith('|')
|
||||
)
|
||||
|
||||
if is_break:
|
||||
if run_length >= 15:
|
||||
walls.append({
|
||||
'start_line': run_start,
|
||||
'length': run_length,
|
||||
})
|
||||
run_start = None
|
||||
run_length = 0
|
||||
else:
|
||||
if run_start is None:
|
||||
run_start = i
|
||||
run_length += 1
|
||||
|
||||
if run_length >= 15:
|
||||
walls.append({
|
||||
'start_line': run_start,
|
||||
'length': run_length,
|
||||
})
|
||||
|
||||
return walls
|
||||
|
||||
|
||||
def parse_prompt_frontmatter(filepath: Path) -> dict:
|
||||
"""Parse YAML frontmatter from a prompt file and validate."""
|
||||
content = filepath.read_text(encoding='utf-8')
|
||||
result = {
|
||||
'has_frontmatter': False,
|
||||
'fields': {},
|
||||
'missing_fields': [],
|
||||
}
|
||||
|
||||
fm_match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)
|
||||
if not fm_match:
|
||||
result['missing_fields'] = ['name', 'description', 'menu-code']
|
||||
return result
|
||||
|
||||
result['has_frontmatter'] = True
|
||||
|
||||
try:
|
||||
import yaml
|
||||
fm = yaml.safe_load(fm_match.group(1))
|
||||
except Exception:
|
||||
# Fallback: simple key-value parsing
|
||||
fm = {}
|
||||
for line in fm_match.group(1).split('\n'):
|
||||
if ':' in line:
|
||||
key, _, val = line.partition(':')
|
||||
fm[key.strip()] = val.strip()
|
||||
|
||||
if not isinstance(fm, dict):
|
||||
result['missing_fields'] = ['name', 'description', 'menu-code']
|
||||
return result
|
||||
|
||||
expected_fields = ['name', 'description', 'menu-code']
|
||||
for field in expected_fields:
|
||||
if field in fm:
|
||||
result['fields'][field] = fm[field]
|
||||
else:
|
||||
result['missing_fields'].append(field)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def check_manifest_alignment(skill_path: Path, prompt_frontmatters: dict[str, dict]) -> dict:
|
||||
"""Compare prompt frontmatter against bmad-manifest.json entries."""
|
||||
alignment = {
|
||||
'manifest_found': False,
|
||||
'mismatches': [],
|
||||
'manifest_only': [],
|
||||
'prompt_only': [],
|
||||
}
|
||||
|
||||
manifest_path = skill_path / 'bmad-manifest.json'
|
||||
if not manifest_path.exists():
|
||||
return alignment
|
||||
|
||||
try:
|
||||
data = json.loads(manifest_path.read_text(encoding='utf-8'))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return alignment
|
||||
|
||||
alignment['manifest_found'] = True
|
||||
|
||||
capabilities = data.get('capabilities', [])
|
||||
if not isinstance(capabilities, list):
|
||||
return alignment
|
||||
|
||||
# Build manifest lookup by name
|
||||
manifest_caps = {}
|
||||
for cap in capabilities:
|
||||
if isinstance(cap, dict) and cap.get('name'):
|
||||
manifest_caps[cap['name']] = cap
|
||||
|
||||
# Compare
|
||||
prompt_names = set(prompt_frontmatters.keys())
|
||||
manifest_names = set(manifest_caps.keys())
|
||||
|
||||
alignment['manifest_only'] = sorted(manifest_names - prompt_names)
|
||||
alignment['prompt_only'] = sorted(prompt_names - manifest_names)
|
||||
|
||||
# Check field mismatches for overlapping entries
|
||||
for name in sorted(prompt_names & manifest_names):
|
||||
pfm = prompt_frontmatters[name]
|
||||
mcap = manifest_caps[name]
|
||||
|
||||
issues = []
|
||||
# Compare name field
|
||||
pfm_name = pfm.get('fields', {}).get('name')
|
||||
if pfm_name and pfm_name != mcap.get('name'):
|
||||
issues.append(f'name mismatch: frontmatter="{pfm_name}" manifest="{mcap.get("name")}"')
|
||||
|
||||
# Compare menu-code
|
||||
pfm_mc = pfm.get('fields', {}).get('menu-code')
|
||||
mcap_mc = mcap.get('menu-code')
|
||||
if pfm_mc and mcap_mc and pfm_mc != mcap_mc:
|
||||
issues.append(f'menu-code mismatch: frontmatter="{pfm_mc}" manifest="{mcap_mc}"')
|
||||
|
||||
if issues:
|
||||
alignment['mismatches'].append({
|
||||
'name': name,
|
||||
'issues': issues,
|
||||
})
|
||||
|
||||
return alignment
|
||||
|
||||
|
||||
def scan_file_patterns(filepath: Path, rel_path: str) -> dict:
|
||||
"""Extract metrics and pattern matches from a single file."""
|
||||
content = filepath.read_text(encoding='utf-8')
|
||||
lines = content.split('\n')
|
||||
line_count = len(lines)
|
||||
|
||||
# Token estimate (rough: chars / 4)
|
||||
token_estimate = len(content) // 4
|
||||
|
||||
# Section inventory
|
||||
sections = []
|
||||
for i, line in enumerate(lines, 1):
|
||||
m = re.match(r'^(#{2,3})\s+(.+)$', line)
|
||||
if m:
|
||||
sections.append({'level': len(m.group(1)), 'title': m.group(2).strip(), 'line': i})
|
||||
|
||||
# Tables and code blocks
|
||||
table_count, table_lines = count_tables(content)
|
||||
block_count, block_lines = count_fenced_blocks(content)
|
||||
|
||||
# Pattern matches
|
||||
waste_matches = []
|
||||
for pattern, category, label in WASTE_PATTERNS:
|
||||
for m in re.finditer(pattern, content):
|
||||
line_num = content[:m.start()].count('\n') + 1
|
||||
waste_matches.append({
|
||||
'line': line_num,
|
||||
'category': category,
|
||||
'pattern': label,
|
||||
'context': lines[line_num - 1].strip()[:100],
|
||||
})
|
||||
|
||||
backref_matches = []
|
||||
for pattern, label in BACKREF_PATTERNS:
|
||||
for m in re.finditer(pattern, content, re.IGNORECASE):
|
||||
line_num = content[:m.start()].count('\n') + 1
|
||||
backref_matches.append({
|
||||
'line': line_num,
|
||||
'pattern': label,
|
||||
'context': lines[line_num - 1].strip()[:100],
|
||||
})
|
||||
|
||||
# Suggestive loading
|
||||
suggestive_loading = []
|
||||
for pattern, label in SUGGESTIVE_LOADING_PATTERNS:
|
||||
for m in re.finditer(pattern, content, re.IGNORECASE):
|
||||
line_num = content[:m.start()].count('\n') + 1
|
||||
suggestive_loading.append({
|
||||
'line': line_num,
|
||||
'pattern': label,
|
||||
'context': lines[line_num - 1].strip()[:100],
|
||||
})
|
||||
|
||||
# Config header
|
||||
has_config_header = '{communication_language}' in content or '{document_output_language}' in content
|
||||
|
||||
# Progression condition
|
||||
prog_keywords = ['progress', 'advance', 'move to', 'next stage',
|
||||
'when complete', 'proceed to', 'transition', 'completion criteria']
|
||||
has_progression = any(kw in content.lower() for kw in prog_keywords)
|
||||
|
||||
# Wall-of-text detection
|
||||
walls = detect_wall_of_text(content)
|
||||
|
||||
result = {
|
||||
'file': rel_path,
|
||||
'line_count': line_count,
|
||||
'token_estimate': token_estimate,
|
||||
'sections': sections,
|
||||
'table_count': table_count,
|
||||
'table_lines': table_lines,
|
||||
'fenced_block_count': block_count,
|
||||
'fenced_block_lines': block_lines,
|
||||
'waste_patterns': waste_matches,
|
||||
'back_references': backref_matches,
|
||||
'suggestive_loading': suggestive_loading,
|
||||
'has_config_header': has_config_header,
|
||||
'has_progression': has_progression,
|
||||
'wall_of_text': walls,
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def scan_prompt_metrics(skill_path: Path) -> dict:
|
||||
"""Extract metrics from all prompt-relevant files."""
|
||||
files_data = []
|
||||
|
||||
# SKILL.md
|
||||
skill_md = skill_path / 'SKILL.md'
|
||||
if skill_md.exists():
|
||||
data = scan_file_patterns(skill_md, 'SKILL.md')
|
||||
content = skill_md.read_text(encoding='utf-8')
|
||||
data['overview_lines'] = extract_overview_size(content)
|
||||
data['is_skill_md'] = True
|
||||
files_data.append(data)
|
||||
|
||||
# Prompt files at skill root — also extract frontmatter
|
||||
prompt_frontmatters: dict[str, dict] = {}
|
||||
skip_files = {'SKILL.md', 'bmad-manifest.json', 'bmad-skill-manifest.yaml'}
|
||||
|
||||
for f in sorted(skill_path.iterdir()):
|
||||
if f.is_file() and f.suffix == '.md' and f.name not in skip_files and f.name != 'SKILL.md':
|
||||
data = scan_file_patterns(f, f.name)
|
||||
data['is_skill_md'] = False
|
||||
|
||||
# Parse prompt frontmatter
|
||||
pfm = parse_prompt_frontmatter(f)
|
||||
data['prompt_frontmatter'] = pfm
|
||||
|
||||
# Use stem as key for manifest alignment
|
||||
prompt_name = pfm.get('fields', {}).get('name', f.stem)
|
||||
prompt_frontmatters[prompt_name] = pfm
|
||||
|
||||
files_data.append(data)
|
||||
|
||||
# Resources (just sizes, for progressive disclosure assessment)
|
||||
resources_dir = skill_path / 'resources'
|
||||
resource_sizes = {}
|
||||
if resources_dir.exists():
|
||||
for f in sorted(resources_dir.iterdir()):
|
||||
if f.is_file() and f.suffix in ('.md', '.json', '.yaml', '.yml'):
|
||||
content = f.read_text(encoding='utf-8')
|
||||
resource_sizes[f.name] = {
|
||||
'lines': len(content.split('\n')),
|
||||
'tokens': len(content) // 4,
|
||||
}
|
||||
|
||||
# Manifest alignment
|
||||
manifest_alignment = check_manifest_alignment(skill_path, prompt_frontmatters)
|
||||
|
||||
# Aggregate stats
|
||||
total_waste = sum(len(f['waste_patterns']) for f in files_data)
|
||||
total_backrefs = sum(len(f['back_references']) for f in files_data)
|
||||
total_suggestive = sum(len(f.get('suggestive_loading', [])) for f in files_data)
|
||||
total_tokens = sum(f['token_estimate'] for f in files_data)
|
||||
total_walls = sum(len(f.get('wall_of_text', [])) for f in files_data)
|
||||
prompts_with_config = sum(1 for f in files_data if not f.get('is_skill_md') and f['has_config_header'])
|
||||
prompts_with_progression = sum(1 for f in files_data if not f.get('is_skill_md') and f['has_progression'])
|
||||
total_prompts = sum(1 for f in files_data if not f.get('is_skill_md'))
|
||||
|
||||
skill_md_data = next((f for f in files_data if f.get('is_skill_md')), None)
|
||||
|
||||
return {
|
||||
'scanner': 'prompt-craft-prepass',
|
||||
'script': 'prepass-prompt-metrics.py',
|
||||
'version': '1.0.0',
|
||||
'skill_path': str(skill_path),
|
||||
'timestamp': datetime.now(timezone.utc).isoformat(),
|
||||
'status': 'info',
|
||||
'skill_md_summary': {
|
||||
'line_count': skill_md_data['line_count'] if skill_md_data else 0,
|
||||
'token_estimate': skill_md_data['token_estimate'] if skill_md_data else 0,
|
||||
'overview_lines': skill_md_data.get('overview_lines', 0) if skill_md_data else 0,
|
||||
'table_count': skill_md_data['table_count'] if skill_md_data else 0,
|
||||
'table_lines': skill_md_data['table_lines'] if skill_md_data else 0,
|
||||
'fenced_block_count': skill_md_data['fenced_block_count'] if skill_md_data else 0,
|
||||
'fenced_block_lines': skill_md_data['fenced_block_lines'] if skill_md_data else 0,
|
||||
'section_count': len(skill_md_data['sections']) if skill_md_data else 0,
|
||||
},
|
||||
'prompt_health': {
|
||||
'total_prompts': total_prompts,
|
||||
'prompts_with_config_header': prompts_with_config,
|
||||
'prompts_with_progression': prompts_with_progression,
|
||||
},
|
||||
'aggregate': {
|
||||
'total_files_scanned': len(files_data),
|
||||
'total_token_estimate': total_tokens,
|
||||
'total_waste_patterns': total_waste,
|
||||
'total_back_references': total_backrefs,
|
||||
'total_suggestive_loading': total_suggestive,
|
||||
'total_wall_of_text': total_walls,
|
||||
},
|
||||
'resource_sizes': resource_sizes,
|
||||
'manifest_alignment': manifest_alignment,
|
||||
'files': files_data,
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Extract prompt craft metrics for LLM scanner pre-pass (agent builder)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'skill_path',
|
||||
type=Path,
|
||||
help='Path to the skill directory to scan',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output', '-o',
|
||||
type=Path,
|
||||
help='Write JSON output to file instead of stdout',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.skill_path.is_dir():
|
||||
print(f"Error: {args.skill_path} is not a directory", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
result = scan_prompt_metrics(args.skill_path)
|
||||
output = json.dumps(result, indent=2)
|
||||
|
||||
if args.output:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(output)
|
||||
print(f"Results written to {args.output}", file=sys.stderr)
|
||||
else:
|
||||
print(output)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user