microdao-daarion/services/crewai-service/app/main.py

"""
CrewAI Service - Canonical Multi-Role Orchestration v2.0
Variant A: Profiles per top-level agent
"""
import os
import json
import time
import asyncio
import logging
import re
import httpx
from typing import Dict, Any, List, Optional
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Import registry loader (v2 with profiles)
from registry_loader import (
    load_teams_config,
    get_team_members, get_synthesis_config, get_delegation_config,
    get_team_settings, can_delegate_to, is_orchestrator,
    get_agent_profiles, get_default_profile, select_profile,
    get_profile_config, get_all_agents_summary
)

app = FastAPI(title="CrewAI Service", version="2.0.0")

# Configuration
_router_url = os.getenv("ROUTER_URL", "http://router:8000")
# Backward compatibility for older envs injecting unreachable hostname.
ROUTER_URL = _router_url.replace("dagi-staging-router", "router")
DEFAULT_MAX_CONCURRENCY = int(os.getenv("MAX_CONCURRENT_ROLES", "3"))
LLM_TIMEOUT = int(os.getenv("LLM_TIMEOUT", "120"))
CREWAI_ORCHESTRATOR_LLM_PROFILE = os.getenv("CREWAI_ORCHESTRATOR_LLM_PROFILE", "cloud_deepseek").strip()
CREWAI_WORKER_LLM_PROFILE = os.getenv("CREWAI_WORKER_LLM_PROFILE", "local_qwen3_8b").strip()
TEAM_VOICE_ORCHESTRATORS = {"daarwizz"}
TEAM_VOICE_MARKERS_RE = re.compile(
    r"(\bwe\b|\bour\b|\bour team\b|наша команда|\bми\b|\bмы\b|\bнаш\w*\b)",
    flags=re.IGNORECASE,
)
VISIBILITY_LEVELS = {"public", "interclan", "incircle", "soulsafe", "sacred"}
CONSENT_STATUSES = {"none", "pending", "confirmed"}


def validate_runtime_envelope(envelope: Dict[str, Any]) -> Optional[Dict[str, Any]]:
    """Lightweight envelope guard for /crew/run boundary."""
    if not isinstance(envelope, dict):
        return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": ["runtime_envelope_not_object"]}

    required = [
        "request_id",
        "visibility_level_target",
        "consent_status",
        "allowed_actions",
        "expected_output",
        "input_text",
    ]
    missing = [k for k in required if k not in envelope]
    if missing:
        return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": [f"missing:{m}" for m in missing]}

    if envelope.get("visibility_level_target") not in VISIBILITY_LEVELS:
        return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": ["invalid:visibility_level_target"]}
    if envelope.get("consent_status") not in CONSENT_STATUSES:
        return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": ["invalid:consent_status"]}
    if not isinstance(envelope.get("allowed_actions"), list) or len(envelope.get("allowed_actions")) == 0:
        return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": ["invalid:allowed_actions"]}
    if not isinstance(envelope.get("input_text"), str) or not envelope.get("input_text", "").strip():
        return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": ["invalid:input_text"]}
    return None


# Request/Response models
class CrewRunRequest(BaseModel):
    orchestrator_id: str
    task: str
    context: Dict[str, Any] = {}
    profile: Optional[str] = None  # NEW: explicit profile selection

class CrewRunResponse(BaseModel):
    success: bool
    result: Optional[str] = None
    error: Optional[str] = None
    meta: Dict[str, Any] = {}

class DelegationRequest(BaseModel):
    orchestrator_id: str
    target_agent_id: str
    task: str
    context: Dict[str, Any] = {}
    hops_remaining: int = 2


async def call_internal_llm(
    prompt: str,
    system_prompt: str = None,
    role_context: str = None,
    llm_profile: str = "reasoning",
    max_tokens: int = 1200,
    temperature: float = 0.3,
) -> str:
    """Call Router internal LLM endpoint for a single role"""
    url = f"{ROUTER_URL}/internal/llm/complete"

    payload = {
        "prompt": prompt,
        "llm_profile": llm_profile,
        "max_tokens": max_tokens,
        "temperature": temperature,
    }
    if system_prompt:
        payload["system_prompt"] = system_prompt
    if role_context:
        payload["role_context"] = role_context

    async with httpx.AsyncClient(timeout=LLM_TIMEOUT) as client:
        try:
            resp = await client.post(url, json=payload)
            resp.raise_for_status()
            data = resp.json()
            return data.get("text", "")
        except Exception as e:
            logger.error(f"Internal LLM call failed: {e}")
            raise


def resolve_generation_controls(context: Dict[str, Any]) -> Dict[str, Any]:
    """Soft generation controls from Gateway metadata (no hard policy lock)."""
    metadata = (context or {}).get("metadata", {}) if isinstance(context, dict) else {}
    force_concise = bool(metadata.get("force_concise"))
    is_training = bool(metadata.get("is_training_group"))
    if force_concise:
        return {"max_tokens": 220, "temperature": 0.2, "mode": "concise"}
    if is_training:
        return {"max_tokens": 520, "temperature": 0.25, "mode": "training"}
    return {"max_tokens": 1200, "temperature": 0.3, "mode": "default"}


async def delegate_to_agent(
    orchestrator_id: str,
    target_agent_id: str,
    task: str,
    context: Dict[str, Any] = {},
    hops_remaining: int = 2,
    profile: str = None
) -> Optional[str]:
    """Delegate task to another top-level agent via Router"""
    if not can_delegate_to(orchestrator_id, target_agent_id, profile):
        logger.warning(f"Delegation not allowed: {orchestrator_id} -> {target_agent_id}")
        return None

    if hops_remaining <= 0:
        logger.warning(f"Max delegation hops reached for {orchestrator_id}")
        return None

    url = f"{ROUTER_URL}/v1/agents/{target_agent_id}/infer"

    delegation_cfg = get_delegation_config(orchestrator_id, profile) or {}
    attach_headers = delegation_cfg.get("attach_headers", {})

    payload = {
        "prompt": task,
        "metadata": {
            "handoff_from": attach_headers.get("handoff_from", orchestrator_id),
            "hops_remaining": hops_remaining - 1,
        }
    }

    logger.info(f"DELEGATION: {orchestrator_id} -> {target_agent_id} (hops={hops_remaining})")

    async with httpx.AsyncClient(timeout=180) as client:
        try:
            resp = await client.post(url, json=payload)
            resp.raise_for_status()
            data = resp.json()
            return data.get("response", "")
        except Exception as e:
            logger.error(f"Delegation to {target_agent_id} failed: {e}")
            return None


async def execute_role(
    role_config: Dict[str, Any],
    task: str,
    context: Dict[str, Any],
    semaphore: asyncio.Semaphore
) -> Dict[str, Any]:
    """Execute a single role with rate limiting"""
    role_id = role_config.get("id", "unknown")
    role_context = role_config.get("role_context", role_id)
    llm_profile = CREWAI_WORKER_LLM_PROFILE or role_config.get("llm_profile", "reasoning")
    system_prompt = role_config.get("system_prompt", "")

    memory_brief = context.get("memory_brief", {})
    memory_str = json.dumps(memory_brief, ensure_ascii=False)[:500] if memory_brief else ""

    prompt = f"Task: {task}\n\nContext: {memory_str}\n\nYour role: {role_context}\n\nProvide your analysis and recommendations."
    controls = resolve_generation_controls(context)

    async with semaphore:
        t0 = time.time()
        try:
            logger.info(f"ROLE START: {role_context} (profile={llm_profile})")
            result = await call_internal_llm(
                prompt=prompt,
                system_prompt=system_prompt,
                role_context=role_context,
                llm_profile=llm_profile,
                max_tokens=controls["max_tokens"],
                temperature=controls["temperature"],
            )
            elapsed = time.time() - t0
            logger.info(f"ROLE DONE: {role_context} ({elapsed:.1f}s)")
            return {
                "role_id": role_id,
                "role_context": role_context,
                "result": result,
                "elapsed_seconds": elapsed,
                "success": True
            }
        except Exception as e:
            elapsed = time.time() - t0
            logger.error(f"ROLE ERROR: {role_context}: {e}")
            return {
                "role_id": role_id,
                "role_context": role_context,
                "result": None,
                "error": str(e),
                "elapsed_seconds": elapsed,
                "success": False
            }


async def run_crew_canonical(
    orchestrator_id: str,
    task: str,
    context: Dict[str, Any],
    profile: str = None
) -> CrewRunResponse:
    """Execute multi-role orchestration for an agent with profile selection"""

    # Select profile (auto or explicit)
    if profile is None:
        profile = select_profile(orchestrator_id, task)

    logger.info(f"CREW RUN: {orchestrator_id} profile={profile}")

    # Get team config for selected profile
    team_members = get_team_members(orchestrator_id, profile)
    synthesis_config = get_synthesis_config(orchestrator_id, profile)
    team_settings = get_team_settings(orchestrator_id, profile)

    if not team_members:
        return CrewRunResponse(
            success=False,
            error=f"No team members for {orchestrator_id}.{profile}",
            meta={"orchestrator_id": orchestrator_id, "profile": profile}
        )

    team_name = team_settings.get("team_name", f"{orchestrator_id} team")
    parallel_roles = team_settings.get("parallel_roles", True)
    max_concurrency = team_settings.get("max_concurrency", DEFAULT_MAX_CONCURRENCY)

    logger.info(f"Team: {team_name}, Roles: {len(team_members)}, Parallel: {parallel_roles}, MaxConc: {max_concurrency}")

    t0 = time.time()
    role_results = []

    if parallel_roles:
        # Parallel execution with semaphore
        semaphore = asyncio.Semaphore(max_concurrency)
        tasks = [
            execute_role(member, task, context, semaphore)
            for member in team_members
        ]
        role_results = await asyncio.gather(*tasks, return_exceptions=True)
        role_results = [r if isinstance(r, dict) else {"error": str(r), "success": False} for r in role_results]
    else:
        # Sequential execution
        semaphore = asyncio.Semaphore(1)
        for member in team_members:
            res = await execute_role(member, task, context, semaphore)
            role_results.append(res)

    # Build synthesis context
    successful_results = [r for r in role_results if r.get("success")]
    failed_results = [r for r in role_results if not r.get("success")]

    synthesis_context = "\n\n".join([
        f"## {r['role_context']}\n{r['result']}"
        for r in successful_results
    ])

    # Synthesis
    synthesis_prompt = synthesis_config.get("system_prompt", "")
    synthesis_role = synthesis_config.get("role_context", "Synthesis")
    synthesis_llm = CREWAI_ORCHESTRATOR_LLM_PROFILE or synthesis_config.get("llm_profile", "reasoning")
    controls = resolve_generation_controls(context)

    if orchestrator_id in TEAM_VOICE_ORCHESTRATORS:
        voice_rule = (
            "You may speak as an orchestrator team when appropriate."
        )
    else:
        voice_rule = (
            "CRITICAL STYLE: Write only in first-person singular as this single agent "
            "(I/me in English; я in Ukrainian/Russian). "
            "Do not present yourself as a team, group, council, or collective. "
            "Do not use phrases like 'we', 'our team', 'наша команда', 'мы'."
        )
        if orchestrator_id == "nutra":
            voice_rule += (
                " CRITICAL GENDER: NUTRA must always use feminine first-person wording "
                "in Ukrainian/Russian (e.g., 'я підготувала', 'я готова', 'я зрозуміла'); "
                "never masculine forms like 'понял/готов'."
            )

    if controls["mode"] in ("concise", "training"):
        final_prompt = f"""Task: {task}

Team Analysis:
{synthesis_context}

{voice_rule}

Return a concise user-facing answer in the user's language.
Format:
- 2-4 short bullets with key points
- 1 short next step
Avoid long reports and verbose section headers."""
    else:
        final_prompt = f"""Task: {task}

Team Analysis:
{synthesis_context}

{voice_rule}

Synthesize the above into a coherent, actionable response. Include:
- Key findings
- Recommendations
- Risks/limitations
- Next steps"""

    try:
        final_result = await call_internal_llm(
            prompt=final_prompt,
            system_prompt=synthesis_prompt,
            role_context=synthesis_role,
            llm_profile=synthesis_llm,
            max_tokens=controls["max_tokens"],
            temperature=controls["temperature"],
        )
    except Exception as e:
        final_result = f"Synthesis failed: {e}\n\nRaw team results:\n{synthesis_context}"

    # Enforce single-agent voice for non-network orchestrators.
    if orchestrator_id not in TEAM_VOICE_ORCHESTRATORS and TEAM_VOICE_MARKERS_RE.search(final_result or ""):
        rewrite_prompt = f"""Rewrite the text below in the user's language.

Hard constraints:
- First-person singular only (I/me; я).
- Never use collective/team voice: no "we", "our", "our team", "ми", "мы", "наша команда", "наш*".
- Keep original meaning and structure concise.
{"- For NUTRA: strictly feminine first-person in Ukrainian/Russian; never masculine forms." if orchestrator_id == "nutra" else ""}

Text:
{final_result}"""
        try:
            final_result = await call_internal_llm(
                prompt=rewrite_prompt,
                system_prompt="You are a strict style editor for agent voice consistency.",
                role_context="Voice Consistency Editor",
                llm_profile=CREWAI_ORCHESTRATOR_LLM_PROFILE or "reasoning",
                max_tokens=min(600, controls["max_tokens"] + 120),
                temperature=0.1,
            )
        except Exception as e:
            logger.warning(f"Voice rewrite skipped due to error: {e}")

    elapsed = time.time() - t0

    return CrewRunResponse(
        success=True,
        result=final_result,
        meta={
            "orchestrator_id": orchestrator_id,
            "profile": profile,
            "team_name": team_name,
            "roles_count": len(team_members),
            "roles_success": len(successful_results),
            "roles_failed": len(failed_results),
            "elapsed_seconds": round(elapsed, 2),
            "parallel": parallel_roles
        }
    )


# === ENDPOINTS ===

@app.get("/health")
async def health():
    return {"status": "ok", "version": "2.0.0", "variant": "A-profiles"}


@app.get("/crew/agents")
async def list_agents():
    """List all agents with their profiles"""
    return get_all_agents_summary()


@app.get("/crew/teams")
async def list_teams():
    """List all teams (for backwards compatibility, returns profiles view)"""
    summary = get_all_agents_summary()
    result = {}

    for agent_id, info in summary.items():
        for profile_name in info.get("profiles", []):
            key = f"{agent_id}" if profile_name == "default" else f"{agent_id}.{profile_name}"
            settings = get_team_settings(agent_id, profile_name)
            deleg = get_delegation_config(agent_id, profile_name)
            members = get_team_members(agent_id, profile_name)

            result[key] = {
                "agent_id": agent_id,
                "profile": profile_name,
                "team_name": settings.get("team_name"),
                "member_count": len(members),
                "members": [m.get("role_context") for m in members],
                "parallel_roles": settings.get("parallel_roles"),
                "max_concurrency": settings.get("max_concurrency"),
                "has_delegation": deleg.get("enabled", False)
            }

    return result


@app.post("/crew/run", response_model=CrewRunResponse)
async def run_crew(request: CrewRunRequest):
    """Execute multi-role orchestration for an agent"""
    orchestrator_id = request.orchestrator_id

    runtime_envelope = request.context.get("runtime_envelope") if isinstance(request.context, dict) else None
    if runtime_envelope is not None:
        envelope_error = validate_runtime_envelope(runtime_envelope)
        if envelope_error:
            return CrewRunResponse(
                success=False,
                error=envelope_error["stop_code"],
                meta={
                    "stop_code": envelope_error["stop_code"],
                    "details": envelope_error.get("details", []),
                    "request_id": runtime_envelope.get("request_id") if isinstance(runtime_envelope, dict) else None,
                },
            )

    if not is_orchestrator(orchestrator_id):
        raise HTTPException(status_code=404, detail=f"Agent {orchestrator_id} not found or not an orchestrator")

    return await run_crew_canonical(
        orchestrator_id=orchestrator_id,
        task=request.task,
        context=request.context,
        profile=request.profile
    )


@app.post("/crew/delegate")
async def delegate(request: DelegationRequest):
    """Delegate task to another top-level agent (for DAARWIZZ)"""
    result = await delegate_to_agent(
        orchestrator_id=request.orchestrator_id,
        target_agent_id=request.target_agent_id,
        task=request.task,
        context=request.context,
        hops_remaining=request.hops_remaining
    )

    if result is None:
        raise HTTPException(status_code=400, detail="Delegation failed or not allowed")

    return {"success": True, "result": result}


@app.on_event("startup")
async def startup():
    config = load_teams_config()
    version = config.get("version", "unknown")
    summary = get_all_agents_summary()

    total_profiles = sum(len(info.get("profiles", [])) for info in summary.values())

    logger.info(f"=== CrewAI Service v2.0.0 (Variant A) ===")
    logger.info(f"Config version: {version}")
    logger.info(f"Agents: {len(summary)}, Total profiles: {total_profiles}")

    for agent_id, info in summary.items():
        profiles = info.get("profiles", [])
        logger.info(f"  {agent_id}: {profiles}")