docs: update all documentation and add AI tooling configs

- Rewrite README.md with current architecture, features and stack
- Update docs/API.md with all current endpoints (corporate, BI, client 360)
- Update docs/ARCHITECTURE.md with cache, modular queries, services, ETL
- Update docs/GUIA-USUARIO.md for all roles (admin, corporate, agente)
- Add docs/INDEX.md documentation index
- Add PROJETO.md comprehensive project reference
- Add BI-CCC-Implementation-Guide.md
- Include AI agent configs (.claude, .agents, .gemini, _bmad)
- Add netbird VPN configuration
- Add status report

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-19 13:29:03 -04:00
parent c5b377e788
commit 647cbec54f
3246 changed files with 479789 additions and 983 deletions

View File

@@ -0,0 +1,300 @@
# /// script
# /// requires-python = ">=3.10"
# /// dependencies = []
# ///
"""Analyze source documents for the distillation generator.
Enumerates files from paths/folders/globs, computes sizes and token estimates,
detects document types from naming conventions, and suggests groupings for
related documents (e.g., a brief paired with its discovery notes).
Accepts: file paths, folder paths (scans recursively for .md/.txt/.yaml/.yml/.json),
or glob patterns. Skips node_modules, .git, __pycache__, .venv, _bmad-output.
Output JSON structure:
status: "ok" | "error"
files[]: path, filename, size_bytes, estimated_tokens, doc_type
summary: total_files, total_size_bytes, total_estimated_tokens
groups[]: group_key, files[] with role (primary/companion/standalone)
- Groups related docs by naming convention (e.g., brief + discovery-notes)
routing: recommendation ("single" | "fan-out"), reason
- single: ≤3 files AND ≤15K estimated tokens
- fan-out: >3 files OR >15K estimated tokens
split_prediction: prediction ("likely" | "unlikely"), reason, estimated_distillate_tokens
- Estimates distillate at ~1/3 source size; splits if >5K tokens
"""
from __future__ import annotations
import argparse
import glob
import json
import os
import re
import sys
from pathlib import Path
# Extensions to include when scanning folders
INCLUDE_EXTENSIONS = {".md", ".txt", ".yaml", ".yml", ".json"}
# Directories to skip when scanning folders
SKIP_DIRS = {
"node_modules", ".git", "__pycache__", ".venv", "venv",
".claude", "_bmad-output", ".cursor", ".vscode",
}
# Approximate chars per token for estimation
CHARS_PER_TOKEN = 4
# Thresholds
SINGLE_COMPRESSOR_MAX_TOKENS = 15_000
SINGLE_DISTILLATE_MAX_TOKENS = 5_000
# Naming patterns for document type detection
DOC_TYPE_PATTERNS = [
(r"discovery[_-]notes", "discovery-notes"),
(r"product[_-]brief", "product-brief"),
(r"research[_-]report", "research-report"),
(r"architecture", "architecture-doc"),
(r"prd", "prd"),
(r"distillate", "distillate"),
(r"changelog", "changelog"),
(r"readme", "readme"),
(r"spec", "specification"),
(r"requirements", "requirements"),
(r"design[_-]doc", "design-doc"),
(r"meeting[_-]notes", "meeting-notes"),
(r"brainstorm", "brainstorming"),
(r"interview", "interview-notes"),
]
# Patterns for grouping related documents
GROUP_PATTERNS = [
# base document + discovery notes
(r"^(.+?)(?:-discovery-notes|-discovery_notes)\.(\w+)$", r"\1.\2"),
# base document + appendix
(r"^(.+?)(?:-appendix|-addendum)(?:-\w+)?\.(\w+)$", r"\1.\2"),
# base document + review/feedback
(r"^(.+?)(?:-review|-feedback)\.(\w+)$", r"\1.\2"),
]
def resolve_inputs(inputs: list[str]) -> list[Path]:
"""Resolve input arguments to a flat list of file paths."""
files: list[Path] = []
for inp in inputs:
path = Path(inp)
if path.is_file():
files.append(path.resolve())
elif path.is_dir():
for root, dirs, filenames in os.walk(path):
dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
for fn in sorted(filenames):
fp = Path(root) / fn
if fp.suffix.lower() in INCLUDE_EXTENSIONS:
files.append(fp.resolve())
else:
# Try as glob
matches = glob.glob(inp, recursive=True)
for m in sorted(matches):
mp = Path(m)
if mp.is_file() and mp.suffix.lower() in INCLUDE_EXTENSIONS:
files.append(mp.resolve())
# Deduplicate while preserving order
seen: set[Path] = set()
deduped: list[Path] = []
for f in files:
if f not in seen:
seen.add(f)
deduped.append(f)
return deduped
def detect_doc_type(filename: str) -> str:
"""Detect document type from filename."""
name_lower = filename.lower()
for pattern, doc_type in DOC_TYPE_PATTERNS:
if re.search(pattern, name_lower):
return doc_type
return "unknown"
def suggest_groups(files: list[Path]) -> list[dict]:
"""Suggest document groupings based on naming conventions."""
groups: dict[str, list[dict]] = {}
ungrouped: list[dict] = []
file_map = {f.name: f for f in files}
assigned: set[str] = set()
for f in files:
if f.name in assigned:
continue
matched = False
for pattern, base_pattern in GROUP_PATTERNS:
m = re.match(pattern, f.name, re.IGNORECASE)
if m:
# This file is a companion — find its base
base_name = re.sub(pattern, base_pattern, f.name, flags=re.IGNORECASE)
group_key = base_name
if group_key not in groups:
groups[group_key] = []
# Add the base file if it exists
if base_name in file_map and base_name not in assigned:
groups[group_key].append({
"path": str(file_map[base_name]),
"filename": base_name,
"role": "primary",
})
assigned.add(base_name)
groups[group_key].append({
"path": str(f),
"filename": f.name,
"role": "companion",
})
assigned.add(f.name)
matched = True
break
if not matched:
# Check if this file is a base that already has companions
if f.name in groups:
continue # Already added as primary
ungrouped.append({
"path": str(f),
"filename": f.name,
})
result = []
for group_key, members in groups.items():
result.append({
"group_key": group_key,
"files": members,
})
for ug in ungrouped:
if ug["filename"] not in assigned:
result.append({
"group_key": ug["filename"],
"files": [{"path": ug["path"], "filename": ug["filename"], "role": "standalone"}],
})
return result
def analyze(inputs: list[str], output_path: str | None = None) -> None:
"""Main analysis function."""
files = resolve_inputs(inputs)
if not files:
result = {
"status": "error",
"error": "No readable files found from provided inputs",
"inputs": inputs,
}
output_json(result, output_path)
return
# Analyze each file
file_details = []
total_chars = 0
for f in files:
size = f.stat().st_size
total_chars += size
file_details.append({
"path": str(f),
"filename": f.name,
"size_bytes": size,
"estimated_tokens": size // CHARS_PER_TOKEN,
"doc_type": detect_doc_type(f.name),
})
total_tokens = total_chars // CHARS_PER_TOKEN
groups = suggest_groups(files)
# Routing recommendation
if len(files) <= 3 and total_tokens <= SINGLE_COMPRESSOR_MAX_TOKENS:
routing = "single"
routing_reason = (
f"{len(files)} file(s), ~{total_tokens:,} estimated tokens — "
f"within single compressor threshold"
)
else:
routing = "fan-out"
routing_reason = (
f"{len(files)} file(s), ~{total_tokens:,} estimated tokens — "
f"exceeds single compressor threshold "
f"({'>' + str(SINGLE_COMPRESSOR_MAX_TOKENS) + ' tokens' if total_tokens > SINGLE_COMPRESSOR_MAX_TOKENS else '> 3 files'})"
)
# Split prediction
estimated_distillate_tokens = total_tokens // 3 # rough: distillate is ~1/3 of source
if estimated_distillate_tokens > SINGLE_DISTILLATE_MAX_TOKENS:
split_prediction = "likely"
split_reason = (
f"Estimated distillate ~{estimated_distillate_tokens:,} tokens "
f"exceeds {SINGLE_DISTILLATE_MAX_TOKENS:,} threshold"
)
else:
split_prediction = "unlikely"
split_reason = (
f"Estimated distillate ~{estimated_distillate_tokens:,} tokens "
f"within {SINGLE_DISTILLATE_MAX_TOKENS:,} threshold"
)
result = {
"status": "ok",
"files": file_details,
"summary": {
"total_files": len(files),
"total_size_bytes": total_chars,
"total_estimated_tokens": total_tokens,
},
"groups": groups,
"routing": {
"recommendation": routing,
"reason": routing_reason,
},
"split_prediction": {
"prediction": split_prediction,
"reason": split_reason,
"estimated_distillate_tokens": estimated_distillate_tokens,
},
}
output_json(result, output_path)
def output_json(data: dict, output_path: str | None) -> None:
"""Write JSON to file or stdout."""
json_str = json.dumps(data, indent=2)
if output_path:
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
Path(output_path).write_text(json_str + "\n")
print(f"Results written to {output_path}", file=sys.stderr)
else:
print(json_str)
def main() -> None:
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"inputs",
nargs="+",
help="File paths, folder paths, or glob patterns to analyze",
)
parser.add_argument(
"-o", "--output",
help="Output JSON to file instead of stdout",
)
args = parser.parse_args()
analyze(args.inputs, args.output)
sys.exit(0)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,204 @@
"""Tests for analyze_sources.py"""
import json
import os
import tempfile
from pathlib import Path
from unittest.mock import patch
import pytest
# Add parent dir to path so we can import the script
import sys
sys.path.insert(0, str(Path(__file__).parent.parent))
from analyze_sources import (
resolve_inputs,
detect_doc_type,
suggest_groups,
analyze,
INCLUDE_EXTENSIONS,
SKIP_DIRS,
)
@pytest.fixture
def temp_dir():
"""Create a temp directory with sample files."""
with tempfile.TemporaryDirectory() as d:
# Create sample files
(Path(d) / "product-brief-foo.md").write_text("# Product Brief\nContent here")
(Path(d) / "product-brief-foo-discovery-notes.md").write_text("# Discovery\nNotes")
(Path(d) / "architecture-doc.md").write_text("# Architecture\nDesign here")
(Path(d) / "research-report.md").write_text("# Research\nFindings")
(Path(d) / "random.txt").write_text("Some text content")
(Path(d) / "image.png").write_bytes(b"\x89PNG")
# Create a subdirectory with more files
sub = Path(d) / "subdir"
sub.mkdir()
(sub / "prd-v2.md").write_text("# PRD\nRequirements")
# Create a skip directory
skip = Path(d) / "node_modules"
skip.mkdir()
(skip / "junk.md").write_text("Should be skipped")
yield d
class TestResolveInputs:
def test_single_file(self, temp_dir):
f = str(Path(temp_dir) / "product-brief-foo.md")
result = resolve_inputs([f])
assert len(result) == 1
assert result[0].name == "product-brief-foo.md"
def test_folder_recursion(self, temp_dir):
result = resolve_inputs([temp_dir])
names = {f.name for f in result}
assert "product-brief-foo.md" in names
assert "prd-v2.md" in names
assert "random.txt" in names
def test_folder_skips_excluded_dirs(self, temp_dir):
result = resolve_inputs([temp_dir])
names = {f.name for f in result}
assert "junk.md" not in names
def test_folder_skips_non_text_files(self, temp_dir):
result = resolve_inputs([temp_dir])
names = {f.name for f in result}
assert "image.png" not in names
def test_glob_pattern(self, temp_dir):
pattern = str(Path(temp_dir) / "product-brief-*.md")
result = resolve_inputs([pattern])
assert len(result) == 2
names = {f.name for f in result}
assert "product-brief-foo.md" in names
assert "product-brief-foo-discovery-notes.md" in names
def test_deduplication(self, temp_dir):
f = str(Path(temp_dir) / "product-brief-foo.md")
result = resolve_inputs([f, f, f])
assert len(result) == 1
def test_mixed_inputs(self, temp_dir):
file_path = str(Path(temp_dir) / "architecture-doc.md")
folder_path = str(Path(temp_dir) / "subdir")
result = resolve_inputs([file_path, folder_path])
names = {f.name for f in result}
assert "architecture-doc.md" in names
assert "prd-v2.md" in names
def test_nonexistent_path(self):
result = resolve_inputs(["/nonexistent/path/file.md"])
assert len(result) == 0
class TestDetectDocType:
@pytest.mark.parametrize("filename,expected", [
("product-brief-foo.md", "product-brief"),
("product_brief_bar.md", "product-brief"),
("foo-discovery-notes.md", "discovery-notes"),
("foo-discovery_notes.md", "discovery-notes"),
("architecture-overview.md", "architecture-doc"),
("my-prd.md", "prd"),
("research-report-q4.md", "research-report"),
("foo-distillate.md", "distillate"),
("changelog.md", "changelog"),
("readme.md", "readme"),
("api-spec.md", "specification"),
("design-doc-v2.md", "design-doc"),
("meeting-notes-2026.md", "meeting-notes"),
("brainstorm-session.md", "brainstorming"),
("user-interview-notes.md", "interview-notes"),
("random-file.md", "unknown"),
])
def test_detection(self, filename, expected):
assert detect_doc_type(filename) == expected
class TestSuggestGroups:
def test_groups_brief_with_discovery_notes(self, temp_dir):
files = [
Path(temp_dir) / "product-brief-foo.md",
Path(temp_dir) / "product-brief-foo-discovery-notes.md",
]
groups = suggest_groups(files)
# Should produce one group with both files
paired = [g for g in groups if len(g["files"]) > 1]
assert len(paired) == 1
filenames = {f["filename"] for f in paired[0]["files"]}
assert "product-brief-foo.md" in filenames
assert "product-brief-foo-discovery-notes.md" in filenames
def test_standalone_files(self, temp_dir):
files = [
Path(temp_dir) / "architecture-doc.md",
Path(temp_dir) / "research-report.md",
]
groups = suggest_groups(files)
assert len(groups) == 2
for g in groups:
assert len(g["files"]) == 1
def test_mixed_grouped_and_standalone(self, temp_dir):
files = [
Path(temp_dir) / "product-brief-foo.md",
Path(temp_dir) / "product-brief-foo-discovery-notes.md",
Path(temp_dir) / "architecture-doc.md",
]
groups = suggest_groups(files)
paired = [g for g in groups if len(g["files"]) > 1]
standalone = [g for g in groups if len(g["files"]) == 1]
assert len(paired) == 1
assert len(standalone) == 1
class TestAnalyze:
def test_basic_analysis(self, temp_dir):
f = str(Path(temp_dir) / "product-brief-foo.md")
output_file = str(Path(temp_dir) / "output.json")
analyze([f], output_file)
result = json.loads(Path(output_file).read_text())
assert result["status"] == "ok"
assert result["summary"]["total_files"] == 1
assert result["files"][0]["doc_type"] == "product-brief"
assert result["files"][0]["estimated_tokens"] > 0
def test_routing_single_small_input(self, temp_dir):
f = str(Path(temp_dir) / "product-brief-foo.md")
output_file = str(Path(temp_dir) / "output.json")
analyze([f], output_file)
result = json.loads(Path(output_file).read_text())
assert result["routing"]["recommendation"] == "single"
def test_routing_fanout_many_files(self, temp_dir):
# Create enough files to trigger fan-out (> 3 files)
for i in range(5):
(Path(temp_dir) / f"doc-{i}.md").write_text("x" * 1000)
output_file = str(Path(temp_dir) / "output.json")
analyze([temp_dir], output_file)
result = json.loads(Path(output_file).read_text())
assert result["routing"]["recommendation"] == "fan-out"
def test_folder_analysis(self, temp_dir):
output_file = str(Path(temp_dir) / "output.json")
analyze([temp_dir], output_file)
result = json.loads(Path(output_file).read_text())
assert result["status"] == "ok"
assert result["summary"]["total_files"] >= 4 # at least the base files
assert len(result["groups"]) > 0
def test_no_files_found(self):
output_file = "/tmp/test_analyze_empty.json"
analyze(["/nonexistent/path"], output_file)
result = json.loads(Path(output_file).read_text())
assert result["status"] == "error"
os.unlink(output_file)
def test_stdout_output(self, temp_dir, capsys):
f = str(Path(temp_dir) / "product-brief-foo.md")
analyze([f])
captured = capsys.readouterr()
result = json.loads(captured.out)
assert result["status"] == "ok"