""" CrewAI Service - Canonical Multi-Role Orchestration v2.0 Variant A: Profiles per top-level agent """ import os import json import time import asyncio import logging import re import httpx from typing import Dict, Any, List, Optional from fastapi import FastAPI, HTTPException from pydantic import BaseModel # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Import registry loader (v2 with profiles) from registry_loader import ( load_teams_config, get_team_members, get_synthesis_config, get_delegation_config, get_team_settings, can_delegate_to, is_orchestrator, get_agent_profiles, get_default_profile, select_profile, get_profile_config, get_all_agents_summary ) app = FastAPI(title="CrewAI Service", version="2.0.0") # Configuration _router_url = os.getenv("ROUTER_URL", "http://router:8000") # Backward compatibility for older envs injecting unreachable hostname. ROUTER_URL = _router_url.replace("dagi-staging-router", "router") DEFAULT_MAX_CONCURRENCY = int(os.getenv("MAX_CONCURRENT_ROLES", "3")) LLM_TIMEOUT = int(os.getenv("LLM_TIMEOUT", "120")) CREWAI_ORCHESTRATOR_LLM_PROFILE = os.getenv("CREWAI_ORCHESTRATOR_LLM_PROFILE", "cloud_deepseek").strip() CREWAI_WORKER_LLM_PROFILE = os.getenv("CREWAI_WORKER_LLM_PROFILE", "local_qwen3_8b").strip() TEAM_VOICE_ORCHESTRATORS = {"daarwizz"} TEAM_VOICE_MARKERS_RE = re.compile( r"(\bwe\b|\bour\b|\bour team\b|наша команда|\bми\b|\bмы\b|\bнаш\w*\b)", flags=re.IGNORECASE, ) VISIBILITY_LEVELS = {"public", "interclan", "incircle", "soulsafe", "sacred"} CONSENT_STATUSES = {"none", "pending", "confirmed"} def validate_runtime_envelope(envelope: Dict[str, Any]) -> Optional[Dict[str, Any]]: """Lightweight envelope guard for /crew/run boundary.""" if not isinstance(envelope, dict): return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": ["runtime_envelope_not_object"]} required = [ "request_id", "visibility_level_target", "consent_status", "allowed_actions", "expected_output", "input_text", ] missing = [k for k in required if k not in envelope] if missing: return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": [f"missing:{m}" for m in missing]} if envelope.get("visibility_level_target") not in VISIBILITY_LEVELS: return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": ["invalid:visibility_level_target"]} if envelope.get("consent_status") not in CONSENT_STATUSES: return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": ["invalid:consent_status"]} if not isinstance(envelope.get("allowed_actions"), list) or len(envelope.get("allowed_actions")) == 0: return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": ["invalid:allowed_actions"]} if not isinstance(envelope.get("input_text"), str) or not envelope.get("input_text", "").strip(): return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": ["invalid:input_text"]} return None # Request/Response models class CrewRunRequest(BaseModel): orchestrator_id: str task: str context: Dict[str, Any] = {} profile: Optional[str] = None # NEW: explicit profile selection class CrewRunResponse(BaseModel): success: bool result: Optional[str] = None error: Optional[str] = None meta: Dict[str, Any] = {} class DelegationRequest(BaseModel): orchestrator_id: str target_agent_id: str task: str context: Dict[str, Any] = {} hops_remaining: int = 2 async def call_internal_llm( prompt: str, system_prompt: str = None, role_context: str = None, llm_profile: str = "reasoning", max_tokens: int = 1200, temperature: float = 0.3, ) -> str: """Call Router internal LLM endpoint for a single role""" url = f"{ROUTER_URL}/internal/llm/complete" payload = { "prompt": prompt, "llm_profile": llm_profile, "max_tokens": max_tokens, "temperature": temperature, } if system_prompt: payload["system_prompt"] = system_prompt if role_context: payload["role_context"] = role_context async with httpx.AsyncClient(timeout=LLM_TIMEOUT) as client: try: resp = await client.post(url, json=payload) resp.raise_for_status() data = resp.json() return data.get("text", "") except Exception as e: logger.error(f"Internal LLM call failed: {e}") raise def resolve_generation_controls(context: Dict[str, Any]) -> Dict[str, Any]: """Soft generation controls from Gateway metadata (no hard policy lock).""" metadata = (context or {}).get("metadata", {}) if isinstance(context, dict) else {} force_concise = bool(metadata.get("force_concise")) is_training = bool(metadata.get("is_training_group")) if force_concise: return {"max_tokens": 220, "temperature": 0.2, "mode": "concise"} if is_training: return {"max_tokens": 520, "temperature": 0.25, "mode": "training"} return {"max_tokens": 1200, "temperature": 0.3, "mode": "default"} async def delegate_to_agent( orchestrator_id: str, target_agent_id: str, task: str, context: Dict[str, Any] = {}, hops_remaining: int = 2, profile: str = None ) -> Optional[str]: """Delegate task to another top-level agent via Router""" if not can_delegate_to(orchestrator_id, target_agent_id, profile): logger.warning(f"Delegation not allowed: {orchestrator_id} -> {target_agent_id}") return None if hops_remaining <= 0: logger.warning(f"Max delegation hops reached for {orchestrator_id}") return None url = f"{ROUTER_URL}/v1/agents/{target_agent_id}/infer" delegation_cfg = get_delegation_config(orchestrator_id, profile) or {} attach_headers = delegation_cfg.get("attach_headers", {}) payload = { "prompt": task, "metadata": { "handoff_from": attach_headers.get("handoff_from", orchestrator_id), "hops_remaining": hops_remaining - 1, } } logger.info(f"DELEGATION: {orchestrator_id} -> {target_agent_id} (hops={hops_remaining})") async with httpx.AsyncClient(timeout=180) as client: try: resp = await client.post(url, json=payload) resp.raise_for_status() data = resp.json() return data.get("response", "") except Exception as e: logger.error(f"Delegation to {target_agent_id} failed: {e}") return None async def execute_role( role_config: Dict[str, Any], task: str, context: Dict[str, Any], semaphore: asyncio.Semaphore ) -> Dict[str, Any]: """Execute a single role with rate limiting""" role_id = role_config.get("id", "unknown") role_context = role_config.get("role_context", role_id) llm_profile = CREWAI_WORKER_LLM_PROFILE or role_config.get("llm_profile", "reasoning") system_prompt = role_config.get("system_prompt", "") memory_brief = context.get("memory_brief", {}) memory_str = json.dumps(memory_brief, ensure_ascii=False)[:500] if memory_brief else "" prompt = f"Task: {task}\n\nContext: {memory_str}\n\nYour role: {role_context}\n\nProvide your analysis and recommendations." controls = resolve_generation_controls(context) async with semaphore: t0 = time.time() try: logger.info(f"ROLE START: {role_context} (profile={llm_profile})") result = await call_internal_llm( prompt=prompt, system_prompt=system_prompt, role_context=role_context, llm_profile=llm_profile, max_tokens=controls["max_tokens"], temperature=controls["temperature"], ) elapsed = time.time() - t0 logger.info(f"ROLE DONE: {role_context} ({elapsed:.1f}s)") return { "role_id": role_id, "role_context": role_context, "result": result, "elapsed_seconds": elapsed, "success": True } except Exception as e: elapsed = time.time() - t0 logger.error(f"ROLE ERROR: {role_context}: {e}") return { "role_id": role_id, "role_context": role_context, "result": None, "error": str(e), "elapsed_seconds": elapsed, "success": False } async def run_crew_canonical( orchestrator_id: str, task: str, context: Dict[str, Any], profile: str = None ) -> CrewRunResponse: """Execute multi-role orchestration for an agent with profile selection""" # Select profile (auto or explicit) if profile is None: profile = select_profile(orchestrator_id, task) logger.info(f"CREW RUN: {orchestrator_id} profile={profile}") # Get team config for selected profile team_members = get_team_members(orchestrator_id, profile) synthesis_config = get_synthesis_config(orchestrator_id, profile) team_settings = get_team_settings(orchestrator_id, profile) if not team_members: return CrewRunResponse( success=False, error=f"No team members for {orchestrator_id}.{profile}", meta={"orchestrator_id": orchestrator_id, "profile": profile} ) team_name = team_settings.get("team_name", f"{orchestrator_id} team") parallel_roles = team_settings.get("parallel_roles", True) max_concurrency = team_settings.get("max_concurrency", DEFAULT_MAX_CONCURRENCY) logger.info(f"Team: {team_name}, Roles: {len(team_members)}, Parallel: {parallel_roles}, MaxConc: {max_concurrency}") t0 = time.time() role_results = [] if parallel_roles: # Parallel execution with semaphore semaphore = asyncio.Semaphore(max_concurrency) tasks = [ execute_role(member, task, context, semaphore) for member in team_members ] role_results = await asyncio.gather(*tasks, return_exceptions=True) role_results = [r if isinstance(r, dict) else {"error": str(r), "success": False} for r in role_results] else: # Sequential execution semaphore = asyncio.Semaphore(1) for member in team_members: res = await execute_role(member, task, context, semaphore) role_results.append(res) # Build synthesis context successful_results = [r for r in role_results if r.get("success")] failed_results = [r for r in role_results if not r.get("success")] synthesis_context = "\n\n".join([ f"## {r['role_context']}\n{r['result']}" for r in successful_results ]) # Synthesis synthesis_prompt = synthesis_config.get("system_prompt", "") synthesis_role = synthesis_config.get("role_context", "Synthesis") synthesis_llm = CREWAI_ORCHESTRATOR_LLM_PROFILE or synthesis_config.get("llm_profile", "reasoning") controls = resolve_generation_controls(context) if orchestrator_id in TEAM_VOICE_ORCHESTRATORS: voice_rule = ( "You may speak as an orchestrator team when appropriate." ) else: voice_rule = ( "CRITICAL STYLE: Write only in first-person singular as this single agent " "(I/me in English; я in Ukrainian/Russian). " "Do not present yourself as a team, group, council, or collective. " "Do not use phrases like 'we', 'our team', 'наша команда', 'мы'." ) if orchestrator_id == "nutra": voice_rule += ( " CRITICAL GENDER: NUTRA must always use feminine first-person wording " "in Ukrainian/Russian (e.g., 'я підготувала', 'я готова', 'я зрозуміла'); " "never masculine forms like 'понял/готов'." ) if controls["mode"] in ("concise", "training"): final_prompt = f"""Task: {task} Team Analysis: {synthesis_context} {voice_rule} Return a concise user-facing answer in the user's language. Format: - 2-4 short bullets with key points - 1 short next step Avoid long reports and verbose section headers.""" else: final_prompt = f"""Task: {task} Team Analysis: {synthesis_context} {voice_rule} Synthesize the above into a coherent, actionable response. Include: - Key findings - Recommendations - Risks/limitations - Next steps""" try: final_result = await call_internal_llm( prompt=final_prompt, system_prompt=synthesis_prompt, role_context=synthesis_role, llm_profile=synthesis_llm, max_tokens=controls["max_tokens"], temperature=controls["temperature"], ) except Exception as e: final_result = f"Synthesis failed: {e}\n\nRaw team results:\n{synthesis_context}" # Enforce single-agent voice for non-network orchestrators. if orchestrator_id not in TEAM_VOICE_ORCHESTRATORS and TEAM_VOICE_MARKERS_RE.search(final_result or ""): rewrite_prompt = f"""Rewrite the text below in the user's language. Hard constraints: - First-person singular only (I/me; я). - Never use collective/team voice: no "we", "our", "our team", "ми", "мы", "наша команда", "наш*". - Keep original meaning and structure concise. {"- For NUTRA: strictly feminine first-person in Ukrainian/Russian; never masculine forms." if orchestrator_id == "nutra" else ""} Text: {final_result}""" try: final_result = await call_internal_llm( prompt=rewrite_prompt, system_prompt="You are a strict style editor for agent voice consistency.", role_context="Voice Consistency Editor", llm_profile=CREWAI_ORCHESTRATOR_LLM_PROFILE or "reasoning", max_tokens=min(600, controls["max_tokens"] + 120), temperature=0.1, ) except Exception as e: logger.warning(f"Voice rewrite skipped due to error: {e}") elapsed = time.time() - t0 return CrewRunResponse( success=True, result=final_result, meta={ "orchestrator_id": orchestrator_id, "profile": profile, "team_name": team_name, "roles_count": len(team_members), "roles_success": len(successful_results), "roles_failed": len(failed_results), "elapsed_seconds": round(elapsed, 2), "parallel": parallel_roles } ) # === ENDPOINTS === @app.get("/health") async def health(): return {"status": "ok", "version": "2.0.0", "variant": "A-profiles"} @app.get("/crew/agents") async def list_agents(): """List all agents with their profiles""" return get_all_agents_summary() @app.get("/crew/teams") async def list_teams(): """List all teams (for backwards compatibility, returns profiles view)""" summary = get_all_agents_summary() result = {} for agent_id, info in summary.items(): for profile_name in info.get("profiles", []): key = f"{agent_id}" if profile_name == "default" else f"{agent_id}.{profile_name}" settings = get_team_settings(agent_id, profile_name) deleg = get_delegation_config(agent_id, profile_name) members = get_team_members(agent_id, profile_name) result[key] = { "agent_id": agent_id, "profile": profile_name, "team_name": settings.get("team_name"), "member_count": len(members), "members": [m.get("role_context") for m in members], "parallel_roles": settings.get("parallel_roles"), "max_concurrency": settings.get("max_concurrency"), "has_delegation": deleg.get("enabled", False) } return result @app.post("/crew/run", response_model=CrewRunResponse) async def run_crew(request: CrewRunRequest): """Execute multi-role orchestration for an agent""" orchestrator_id = request.orchestrator_id runtime_envelope = request.context.get("runtime_envelope") if isinstance(request.context, dict) else None if runtime_envelope is not None: envelope_error = validate_runtime_envelope(runtime_envelope) if envelope_error: return CrewRunResponse( success=False, error=envelope_error["stop_code"], meta={ "stop_code": envelope_error["stop_code"], "details": envelope_error.get("details", []), "request_id": runtime_envelope.get("request_id") if isinstance(runtime_envelope, dict) else None, }, ) if not is_orchestrator(orchestrator_id): raise HTTPException(status_code=404, detail=f"Agent {orchestrator_id} not found or not an orchestrator") return await run_crew_canonical( orchestrator_id=orchestrator_id, task=request.task, context=request.context, profile=request.profile ) @app.post("/crew/delegate") async def delegate(request: DelegationRequest): """Delegate task to another top-level agent (for DAARWIZZ)""" result = await delegate_to_agent( orchestrator_id=request.orchestrator_id, target_agent_id=request.target_agent_id, task=request.task, context=request.context, hops_remaining=request.hops_remaining ) if result is None: raise HTTPException(status_code=400, detail="Delegation failed or not allowed") return {"success": True, "result": result} @app.on_event("startup") async def startup(): config = load_teams_config() version = config.get("version", "unknown") summary = get_all_agents_summary() total_profiles = sum(len(info.get("profiles", [])) for info in summary.values()) logger.info(f"=== CrewAI Service v2.0.0 (Variant A) ===") logger.info(f"Config version: {version}") logger.info(f"Agents: {len(summary)}, Total profiles: {total_profiles}") for agent_id, info in summary.items(): profiles = info.get("profiles", []) logger.info(f" {agent_id}: {profiles}")