504 lines
18 KiB
Python
504 lines
18 KiB
Python
"""
|
||
CrewAI Service - Canonical Multi-Role Orchestration v2.0
|
||
Variant A: Profiles per top-level agent
|
||
"""
|
||
import os
|
||
import json
|
||
import time
|
||
import asyncio
|
||
import logging
|
||
import re
|
||
import httpx
|
||
from typing import Dict, Any, List, Optional
|
||
from fastapi import FastAPI, HTTPException
|
||
from pydantic import BaseModel
|
||
|
||
# Setup logging
|
||
logging.basicConfig(level=logging.INFO)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# Import registry loader (v2 with profiles)
|
||
from registry_loader import (
|
||
load_teams_config,
|
||
get_team_members, get_synthesis_config, get_delegation_config,
|
||
get_team_settings, can_delegate_to, is_orchestrator,
|
||
get_agent_profiles, get_default_profile, select_profile,
|
||
get_profile_config, get_all_agents_summary
|
||
)
|
||
|
||
app = FastAPI(title="CrewAI Service", version="2.0.0")
|
||
|
||
# Configuration
|
||
_router_url = os.getenv("ROUTER_URL", "http://router:8000")
|
||
# Backward compatibility for older envs injecting unreachable hostname.
|
||
ROUTER_URL = _router_url.replace("dagi-staging-router", "router")
|
||
DEFAULT_MAX_CONCURRENCY = int(os.getenv("MAX_CONCURRENT_ROLES", "3"))
|
||
LLM_TIMEOUT = int(os.getenv("LLM_TIMEOUT", "120"))
|
||
CREWAI_ORCHESTRATOR_LLM_PROFILE = os.getenv("CREWAI_ORCHESTRATOR_LLM_PROFILE", "cloud_deepseek").strip()
|
||
CREWAI_WORKER_LLM_PROFILE = os.getenv("CREWAI_WORKER_LLM_PROFILE", "local_qwen3_8b").strip()
|
||
TEAM_VOICE_ORCHESTRATORS = {"daarwizz"}
|
||
TEAM_VOICE_MARKERS_RE = re.compile(
|
||
r"(\bwe\b|\bour\b|\bour team\b|наша команда|\bми\b|\bмы\b|\bнаш\w*\b)",
|
||
flags=re.IGNORECASE,
|
||
)
|
||
VISIBILITY_LEVELS = {"public", "interclan", "incircle", "soulsafe", "sacred"}
|
||
CONSENT_STATUSES = {"none", "pending", "confirmed"}
|
||
|
||
|
||
def validate_runtime_envelope(envelope: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||
"""Lightweight envelope guard for /crew/run boundary."""
|
||
if not isinstance(envelope, dict):
|
||
return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": ["runtime_envelope_not_object"]}
|
||
|
||
required = [
|
||
"request_id",
|
||
"visibility_level_target",
|
||
"consent_status",
|
||
"allowed_actions",
|
||
"expected_output",
|
||
"input_text",
|
||
]
|
||
missing = [k for k in required if k not in envelope]
|
||
if missing:
|
||
return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": [f"missing:{m}" for m in missing]}
|
||
|
||
if envelope.get("visibility_level_target") not in VISIBILITY_LEVELS:
|
||
return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": ["invalid:visibility_level_target"]}
|
||
if envelope.get("consent_status") not in CONSENT_STATUSES:
|
||
return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": ["invalid:consent_status"]}
|
||
if not isinstance(envelope.get("allowed_actions"), list) or len(envelope.get("allowed_actions")) == 0:
|
||
return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": ["invalid:allowed_actions"]}
|
||
if not isinstance(envelope.get("input_text"), str) or not envelope.get("input_text", "").strip():
|
||
return {"stop_code": "STOP_SCHEMA_ENVELOPE", "details": ["invalid:input_text"]}
|
||
return None
|
||
|
||
|
||
# Request/Response models
|
||
class CrewRunRequest(BaseModel):
|
||
orchestrator_id: str
|
||
task: str
|
||
context: Dict[str, Any] = {}
|
||
profile: Optional[str] = None # NEW: explicit profile selection
|
||
|
||
class CrewRunResponse(BaseModel):
|
||
success: bool
|
||
result: Optional[str] = None
|
||
error: Optional[str] = None
|
||
meta: Dict[str, Any] = {}
|
||
|
||
class DelegationRequest(BaseModel):
|
||
orchestrator_id: str
|
||
target_agent_id: str
|
||
task: str
|
||
context: Dict[str, Any] = {}
|
||
hops_remaining: int = 2
|
||
|
||
|
||
async def call_internal_llm(
|
||
prompt: str,
|
||
system_prompt: str = None,
|
||
role_context: str = None,
|
||
llm_profile: str = "reasoning",
|
||
max_tokens: int = 1200,
|
||
temperature: float = 0.3,
|
||
) -> str:
|
||
"""Call Router internal LLM endpoint for a single role"""
|
||
url = f"{ROUTER_URL}/internal/llm/complete"
|
||
|
||
payload = {
|
||
"prompt": prompt,
|
||
"llm_profile": llm_profile,
|
||
"max_tokens": max_tokens,
|
||
"temperature": temperature,
|
||
}
|
||
if system_prompt:
|
||
payload["system_prompt"] = system_prompt
|
||
if role_context:
|
||
payload["role_context"] = role_context
|
||
|
||
async with httpx.AsyncClient(timeout=LLM_TIMEOUT) as client:
|
||
try:
|
||
resp = await client.post(url, json=payload)
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
return data.get("text", "")
|
||
except Exception as e:
|
||
logger.error(f"Internal LLM call failed: {e}")
|
||
raise
|
||
|
||
|
||
def resolve_generation_controls(context: Dict[str, Any]) -> Dict[str, Any]:
|
||
"""Soft generation controls from Gateway metadata (no hard policy lock)."""
|
||
metadata = (context or {}).get("metadata", {}) if isinstance(context, dict) else {}
|
||
force_concise = bool(metadata.get("force_concise"))
|
||
is_training = bool(metadata.get("is_training_group"))
|
||
if force_concise:
|
||
return {"max_tokens": 220, "temperature": 0.2, "mode": "concise"}
|
||
if is_training:
|
||
return {"max_tokens": 520, "temperature": 0.25, "mode": "training"}
|
||
return {"max_tokens": 1200, "temperature": 0.3, "mode": "default"}
|
||
|
||
|
||
async def delegate_to_agent(
|
||
orchestrator_id: str,
|
||
target_agent_id: str,
|
||
task: str,
|
||
context: Dict[str, Any] = {},
|
||
hops_remaining: int = 2,
|
||
profile: str = None
|
||
) -> Optional[str]:
|
||
"""Delegate task to another top-level agent via Router"""
|
||
if not can_delegate_to(orchestrator_id, target_agent_id, profile):
|
||
logger.warning(f"Delegation not allowed: {orchestrator_id} -> {target_agent_id}")
|
||
return None
|
||
|
||
if hops_remaining <= 0:
|
||
logger.warning(f"Max delegation hops reached for {orchestrator_id}")
|
||
return None
|
||
|
||
url = f"{ROUTER_URL}/v1/agents/{target_agent_id}/infer"
|
||
|
||
delegation_cfg = get_delegation_config(orchestrator_id, profile) or {}
|
||
attach_headers = delegation_cfg.get("attach_headers", {})
|
||
|
||
payload = {
|
||
"prompt": task,
|
||
"metadata": {
|
||
"handoff_from": attach_headers.get("handoff_from", orchestrator_id),
|
||
"hops_remaining": hops_remaining - 1,
|
||
}
|
||
}
|
||
|
||
logger.info(f"DELEGATION: {orchestrator_id} -> {target_agent_id} (hops={hops_remaining})")
|
||
|
||
async with httpx.AsyncClient(timeout=180) as client:
|
||
try:
|
||
resp = await client.post(url, json=payload)
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
return data.get("response", "")
|
||
except Exception as e:
|
||
logger.error(f"Delegation to {target_agent_id} failed: {e}")
|
||
return None
|
||
|
||
|
||
async def execute_role(
|
||
role_config: Dict[str, Any],
|
||
task: str,
|
||
context: Dict[str, Any],
|
||
semaphore: asyncio.Semaphore
|
||
) -> Dict[str, Any]:
|
||
"""Execute a single role with rate limiting"""
|
||
role_id = role_config.get("id", "unknown")
|
||
role_context = role_config.get("role_context", role_id)
|
||
llm_profile = CREWAI_WORKER_LLM_PROFILE or role_config.get("llm_profile", "reasoning")
|
||
system_prompt = role_config.get("system_prompt", "")
|
||
|
||
memory_brief = context.get("memory_brief", {})
|
||
memory_str = json.dumps(memory_brief, ensure_ascii=False)[:500] if memory_brief else ""
|
||
|
||
prompt = f"Task: {task}\n\nContext: {memory_str}\n\nYour role: {role_context}\n\nProvide your analysis and recommendations."
|
||
controls = resolve_generation_controls(context)
|
||
|
||
async with semaphore:
|
||
t0 = time.time()
|
||
try:
|
||
logger.info(f"ROLE START: {role_context} (profile={llm_profile})")
|
||
result = await call_internal_llm(
|
||
prompt=prompt,
|
||
system_prompt=system_prompt,
|
||
role_context=role_context,
|
||
llm_profile=llm_profile,
|
||
max_tokens=controls["max_tokens"],
|
||
temperature=controls["temperature"],
|
||
)
|
||
elapsed = time.time() - t0
|
||
logger.info(f"ROLE DONE: {role_context} ({elapsed:.1f}s)")
|
||
return {
|
||
"role_id": role_id,
|
||
"role_context": role_context,
|
||
"result": result,
|
||
"elapsed_seconds": elapsed,
|
||
"success": True
|
||
}
|
||
except Exception as e:
|
||
elapsed = time.time() - t0
|
||
logger.error(f"ROLE ERROR: {role_context}: {e}")
|
||
return {
|
||
"role_id": role_id,
|
||
"role_context": role_context,
|
||
"result": None,
|
||
"error": str(e),
|
||
"elapsed_seconds": elapsed,
|
||
"success": False
|
||
}
|
||
|
||
|
||
async def run_crew_canonical(
|
||
orchestrator_id: str,
|
||
task: str,
|
||
context: Dict[str, Any],
|
||
profile: str = None
|
||
) -> CrewRunResponse:
|
||
"""Execute multi-role orchestration for an agent with profile selection"""
|
||
|
||
# Select profile (auto or explicit)
|
||
if profile is None:
|
||
profile = select_profile(orchestrator_id, task)
|
||
|
||
logger.info(f"CREW RUN: {orchestrator_id} profile={profile}")
|
||
|
||
# Get team config for selected profile
|
||
team_members = get_team_members(orchestrator_id, profile)
|
||
synthesis_config = get_synthesis_config(orchestrator_id, profile)
|
||
team_settings = get_team_settings(orchestrator_id, profile)
|
||
|
||
if not team_members:
|
||
return CrewRunResponse(
|
||
success=False,
|
||
error=f"No team members for {orchestrator_id}.{profile}",
|
||
meta={"orchestrator_id": orchestrator_id, "profile": profile}
|
||
)
|
||
|
||
team_name = team_settings.get("team_name", f"{orchestrator_id} team")
|
||
parallel_roles = team_settings.get("parallel_roles", True)
|
||
max_concurrency = team_settings.get("max_concurrency", DEFAULT_MAX_CONCURRENCY)
|
||
|
||
logger.info(f"Team: {team_name}, Roles: {len(team_members)}, Parallel: {parallel_roles}, MaxConc: {max_concurrency}")
|
||
|
||
t0 = time.time()
|
||
role_results = []
|
||
|
||
if parallel_roles:
|
||
# Parallel execution with semaphore
|
||
semaphore = asyncio.Semaphore(max_concurrency)
|
||
tasks = [
|
||
execute_role(member, task, context, semaphore)
|
||
for member in team_members
|
||
]
|
||
role_results = await asyncio.gather(*tasks, return_exceptions=True)
|
||
role_results = [r if isinstance(r, dict) else {"error": str(r), "success": False} for r in role_results]
|
||
else:
|
||
# Sequential execution
|
||
semaphore = asyncio.Semaphore(1)
|
||
for member in team_members:
|
||
res = await execute_role(member, task, context, semaphore)
|
||
role_results.append(res)
|
||
|
||
# Build synthesis context
|
||
successful_results = [r for r in role_results if r.get("success")]
|
||
failed_results = [r for r in role_results if not r.get("success")]
|
||
|
||
synthesis_context = "\n\n".join([
|
||
f"## {r['role_context']}\n{r['result']}"
|
||
for r in successful_results
|
||
])
|
||
|
||
# Synthesis
|
||
synthesis_prompt = synthesis_config.get("system_prompt", "")
|
||
synthesis_role = synthesis_config.get("role_context", "Synthesis")
|
||
synthesis_llm = CREWAI_ORCHESTRATOR_LLM_PROFILE or synthesis_config.get("llm_profile", "reasoning")
|
||
controls = resolve_generation_controls(context)
|
||
|
||
if orchestrator_id in TEAM_VOICE_ORCHESTRATORS:
|
||
voice_rule = (
|
||
"You may speak as an orchestrator team when appropriate."
|
||
)
|
||
else:
|
||
voice_rule = (
|
||
"CRITICAL STYLE: Write only in first-person singular as this single agent "
|
||
"(I/me in English; я in Ukrainian/Russian). "
|
||
"Do not present yourself as a team, group, council, or collective. "
|
||
"Do not use phrases like 'we', 'our team', 'наша команда', 'мы'."
|
||
)
|
||
if orchestrator_id == "nutra":
|
||
voice_rule += (
|
||
" CRITICAL GENDER: NUTRA must always use feminine first-person wording "
|
||
"in Ukrainian/Russian (e.g., 'я підготувала', 'я готова', 'я зрозуміла'); "
|
||
"never masculine forms like 'понял/готов'."
|
||
)
|
||
|
||
if controls["mode"] in ("concise", "training"):
|
||
final_prompt = f"""Task: {task}
|
||
|
||
Team Analysis:
|
||
{synthesis_context}
|
||
|
||
{voice_rule}
|
||
|
||
Return a concise user-facing answer in the user's language.
|
||
Format:
|
||
- 2-4 short bullets with key points
|
||
- 1 short next step
|
||
Avoid long reports and verbose section headers."""
|
||
else:
|
||
final_prompt = f"""Task: {task}
|
||
|
||
Team Analysis:
|
||
{synthesis_context}
|
||
|
||
{voice_rule}
|
||
|
||
Synthesize the above into a coherent, actionable response. Include:
|
||
- Key findings
|
||
- Recommendations
|
||
- Risks/limitations
|
||
- Next steps"""
|
||
|
||
try:
|
||
final_result = await call_internal_llm(
|
||
prompt=final_prompt,
|
||
system_prompt=synthesis_prompt,
|
||
role_context=synthesis_role,
|
||
llm_profile=synthesis_llm,
|
||
max_tokens=controls["max_tokens"],
|
||
temperature=controls["temperature"],
|
||
)
|
||
except Exception as e:
|
||
final_result = f"Synthesis failed: {e}\n\nRaw team results:\n{synthesis_context}"
|
||
|
||
# Enforce single-agent voice for non-network orchestrators.
|
||
if orchestrator_id not in TEAM_VOICE_ORCHESTRATORS and TEAM_VOICE_MARKERS_RE.search(final_result or ""):
|
||
rewrite_prompt = f"""Rewrite the text below in the user's language.
|
||
|
||
Hard constraints:
|
||
- First-person singular only (I/me; я).
|
||
- Never use collective/team voice: no "we", "our", "our team", "ми", "мы", "наша команда", "наш*".
|
||
- Keep original meaning and structure concise.
|
||
{"- For NUTRA: strictly feminine first-person in Ukrainian/Russian; never masculine forms." if orchestrator_id == "nutra" else ""}
|
||
|
||
Text:
|
||
{final_result}"""
|
||
try:
|
||
final_result = await call_internal_llm(
|
||
prompt=rewrite_prompt,
|
||
system_prompt="You are a strict style editor for agent voice consistency.",
|
||
role_context="Voice Consistency Editor",
|
||
llm_profile=CREWAI_ORCHESTRATOR_LLM_PROFILE or "reasoning",
|
||
max_tokens=min(600, controls["max_tokens"] + 120),
|
||
temperature=0.1,
|
||
)
|
||
except Exception as e:
|
||
logger.warning(f"Voice rewrite skipped due to error: {e}")
|
||
|
||
elapsed = time.time() - t0
|
||
|
||
return CrewRunResponse(
|
||
success=True,
|
||
result=final_result,
|
||
meta={
|
||
"orchestrator_id": orchestrator_id,
|
||
"profile": profile,
|
||
"team_name": team_name,
|
||
"roles_count": len(team_members),
|
||
"roles_success": len(successful_results),
|
||
"roles_failed": len(failed_results),
|
||
"elapsed_seconds": round(elapsed, 2),
|
||
"parallel": parallel_roles
|
||
}
|
||
)
|
||
|
||
|
||
# === ENDPOINTS ===
|
||
|
||
@app.get("/health")
|
||
async def health():
|
||
return {"status": "ok", "version": "2.0.0", "variant": "A-profiles"}
|
||
|
||
|
||
@app.get("/crew/agents")
|
||
async def list_agents():
|
||
"""List all agents with their profiles"""
|
||
return get_all_agents_summary()
|
||
|
||
|
||
@app.get("/crew/teams")
|
||
async def list_teams():
|
||
"""List all teams (for backwards compatibility, returns profiles view)"""
|
||
summary = get_all_agents_summary()
|
||
result = {}
|
||
|
||
for agent_id, info in summary.items():
|
||
for profile_name in info.get("profiles", []):
|
||
key = f"{agent_id}" if profile_name == "default" else f"{agent_id}.{profile_name}"
|
||
settings = get_team_settings(agent_id, profile_name)
|
||
deleg = get_delegation_config(agent_id, profile_name)
|
||
members = get_team_members(agent_id, profile_name)
|
||
|
||
result[key] = {
|
||
"agent_id": agent_id,
|
||
"profile": profile_name,
|
||
"team_name": settings.get("team_name"),
|
||
"member_count": len(members),
|
||
"members": [m.get("role_context") for m in members],
|
||
"parallel_roles": settings.get("parallel_roles"),
|
||
"max_concurrency": settings.get("max_concurrency"),
|
||
"has_delegation": deleg.get("enabled", False)
|
||
}
|
||
|
||
return result
|
||
|
||
|
||
@app.post("/crew/run", response_model=CrewRunResponse)
|
||
async def run_crew(request: CrewRunRequest):
|
||
"""Execute multi-role orchestration for an agent"""
|
||
orchestrator_id = request.orchestrator_id
|
||
|
||
runtime_envelope = request.context.get("runtime_envelope") if isinstance(request.context, dict) else None
|
||
if runtime_envelope is not None:
|
||
envelope_error = validate_runtime_envelope(runtime_envelope)
|
||
if envelope_error:
|
||
return CrewRunResponse(
|
||
success=False,
|
||
error=envelope_error["stop_code"],
|
||
meta={
|
||
"stop_code": envelope_error["stop_code"],
|
||
"details": envelope_error.get("details", []),
|
||
"request_id": runtime_envelope.get("request_id") if isinstance(runtime_envelope, dict) else None,
|
||
},
|
||
)
|
||
|
||
if not is_orchestrator(orchestrator_id):
|
||
raise HTTPException(status_code=404, detail=f"Agent {orchestrator_id} not found or not an orchestrator")
|
||
|
||
return await run_crew_canonical(
|
||
orchestrator_id=orchestrator_id,
|
||
task=request.task,
|
||
context=request.context,
|
||
profile=request.profile
|
||
)
|
||
|
||
|
||
@app.post("/crew/delegate")
|
||
async def delegate(request: DelegationRequest):
|
||
"""Delegate task to another top-level agent (for DAARWIZZ)"""
|
||
result = await delegate_to_agent(
|
||
orchestrator_id=request.orchestrator_id,
|
||
target_agent_id=request.target_agent_id,
|
||
task=request.task,
|
||
context=request.context,
|
||
hops_remaining=request.hops_remaining
|
||
)
|
||
|
||
if result is None:
|
||
raise HTTPException(status_code=400, detail="Delegation failed or not allowed")
|
||
|
||
return {"success": True, "result": result}
|
||
|
||
|
||
@app.on_event("startup")
|
||
async def startup():
|
||
config = load_teams_config()
|
||
version = config.get("version", "unknown")
|
||
summary = get_all_agents_summary()
|
||
|
||
total_profiles = sum(len(info.get("profiles", [])) for info in summary.values())
|
||
|
||
logger.info(f"=== CrewAI Service v2.0.0 (Variant A) ===")
|
||
logger.info(f"Config version: {version}")
|
||
logger.info(f"Agents: {len(summary)}, Total profiles: {total_profiles}")
|
||
|
||
for agent_id, info in summary.items():
|
||
profiles = info.get("profiles", [])
|
||
logger.info(f" {agent_id}: {profiles}")
|