helion: deepseek-first, on-demand CrewAI, local subagent profiles, concise post-synthesis

2026-02-18 09:21:47 -08:00
parent 343bdc2d11
commit 635f2d7e37
6 changed files with 117 additions and 80 deletions
--- a/config/crewai_teams.yml
+++ b/config/crewai_teams.yml
@@ -64,7 +64,7 @@ helion:
      synthesis:
        role_context: HELION Orchestrator
        system_prompt_ref: roles/helion/orchestrator_synthesis.md
-        llm_profile: reasoning
+        llm_profile: science
      team:
      - id: energy_researcher
        role_context: Energy Researcher
@@ -73,7 +73,7 @@ helion:
      - id: systems_modeler
        role_context: Systems Modeler
        system_prompt_ref: roles/helion/systems_modeler.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: policy_analyst
        role_context: Policy Analyst
        system_prompt_ref: roles/helion/policy_analyst.md
@@ -81,7 +81,7 @@ helion:
      - id: risk_assessor
        role_context: Risk Assessor
        system_prompt_ref: roles/helion/risk_assessor.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: communicator
        role_context: Communicator
        system_prompt_ref: roles/helion/communicator.md
@@ -95,12 +95,12 @@ helion:
      synthesis:
        role_context: Executive Synthesis (CEO-mode)
        system_prompt_ref: roles/helion/HELION_CORE/orchestrator_synthesis.md
-        llm_profile: reasoning
+        llm_profile: science
      team:
      - id: orchestrator_front_desk_router
        role_context: Orchestrator (Front Desk / Router)
        system_prompt_ref: roles/helion/HELION_CORE/orchestrator_front_desk_router.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: knowledge_curator_rag_librarian
        role_context: Knowledge Curator (L1–L3 RAG Librarian)
        system_prompt_ref: roles/helion/HELION_CORE/knowledge_curator_rag_librarian.md
@@ -108,15 +108,15 @@ helion:
      - id: safety_anti_hallucination_gate
        role_context: Safety & Anti-Hallucination Gate
        system_prompt_ref: roles/helion/HELION_CORE/safety_anti_hallucination_gate.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: legal_compliance_gdpr_mica_aml_kyc
        role_context: Legal & Compliance (GDPR/MiCA/AML/KYC)
        system_prompt_ref: roles/helion/HELION_CORE/legal_compliance_gdpr_mica_aml_kyc.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: security_anti_fraud_anti_fake
        role_context: Security & Anti-Fraud / Anti-Fake
        system_prompt_ref: roles/helion/HELION_CORE/security_anti_fraud_anti_fake.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: energy_systems_engineer
        role_context: Energy Systems Engineer (GGU/BioMiner/SES)
        system_prompt_ref: roles/helion/HELION_CORE/energy_systems_engineer.md
@@ -124,7 +124,7 @@ helion:
      - id: finance_roi_modeler
        role_context: Finance & ROI Modeler
        system_prompt_ref: roles/helion/HELION_CORE/finance_roi_modeler.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: dao_guide_governance_onboarding
        role_context: DAO Guide (Governance & Onboarding)
        system_prompt_ref: roles/helion/HELION_CORE/dao_guide_governance_onboarding.md
@@ -132,7 +132,7 @@ helion:
      - id: tokenization_rwa_nft_architect
        role_context: Tokenization & RWA/NFT Architect
        system_prompt_ref: roles/helion/HELION_CORE/tokenization_rwa_nft_architect.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: growth_soft_selling_cx
        role_context: Growth & Soft-Selling CX
        system_prompt_ref: roles/helion/HELION_CORE/growth_soft_selling_cx.md
--- a/gateway-bot/http_api.py
+++ b/gateway-bot/http_api.py
@@ -469,6 +469,18 @@ SENPAI_CONFIG = load_agent_config(
    default_prompt="Ти — Гордон Сенпай (Gordon Senpai), радник з ринків капіталу та цифрових активів. Допомагаєш з трейдингом, ризик-менеджментом, аналізом ринків.",
 )

+# 1OK Configuration
+ONEOK_CONFIG = load_agent_config(
+    agent_id="oneok",
+    name=os.getenv("ONEOK_NAME", "1OK"),
+    prompt_path=os.getenv(
+        "ONEOK_PROMPT_PATH",
+        str(Path(__file__).parent / "oneok_prompt.txt"),
+    ),
+    telegram_token_env="ONEOK_TELEGRAM_BOT_TOKEN",
+    default_prompt="Ти — 1OK, асистент віконного майстра. Допомагаєш з кваліфікацією ліда, підготовкою заміру та формуванням комерційної пропозиції.",
+)
+
 # SOUL / Athena Configuration
 SOUL_CONFIG = load_agent_config(
    agent_id="soul",
@@ -517,6 +529,7 @@ AGENT_REGISTRY: Dict[str, AgentConfig] = {
    "clan": CLAN_CONFIG,
    "eonarch": EONARCH_CONFIG,
    "senpai": SENPAI_CONFIG,
+    "oneok": ONEOK_CONFIG,
    "soul": SOUL_CONFIG,
    "yaromir": YAROMIR_CONFIG,
    "sofiia": SOFIIA_CONFIG,
@@ -707,6 +720,11 @@ async def eonarch_telegram_webhook(update: TelegramUpdate):
 async def senpai_telegram_webhook(update: TelegramUpdate):
    return await handle_telegram_webhook(SENPAI_CONFIG, update)

+# 1OK webhook endpoint
+@router.post("/oneok/telegram/webhook")
+async def oneok_telegram_webhook(update: TelegramUpdate):
+    return await handle_telegram_webhook(ONEOK_CONFIG, update)
+

 # SOUL / Athena webhook endpoint
@router.post("/soul/telegram/webhook")
@@ -897,50 +915,6 @@ def _resolve_stt_upload_url() -> str:
 # Helper Functions
 # ========================================

-async def send_telegram_message(chat_id: str, text: str, bot_token: Optional[str] = None) -> bool:
-    """
-    Відправити повідомлення в Telegram.
-    
-    Args:
-        chat_id: ID чату
-        text: Текст повідомлення
-        bot_token: Telegram bot token (якщо None, використовується TELEGRAM_BOT_TOKEN)
-    
-    Returns:
-        True якщо успішно, False інакше
-    """
-    try:
-        token = bot_token or os.getenv("TELEGRAM_BOT_TOKEN")
-        if not token:
-            logger.error("TELEGRAM_BOT_TOKEN not set")
-            return False
-        
-        # Strip <think>...</think> tags (DeepSeek reasoning leak)
-        import re
-        text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
-        text = re.sub(r'<think>.*$', '', text, flags=re.DOTALL)  # unclosed tag
-        # Strip any DSML/XML-like markup
-        text = re.sub(r'</?(?:function_calls|invoke|parameter)[^>]*>', '', text)
-        text = text.strip()
-        if not text:
-            text = "..."
-
-        url = f"https://api.telegram.org/bot{token}/sendMessage"
-        payload = {
-            "chat_id": chat_id,
-            "text": text,
-            "parse_mode": "Markdown"
-        }
-        
-        async with httpx.AsyncClient() as client:
-            response = await client.post(url, json=payload, timeout=10.0)
-            response.raise_for_status()
-            return True
-    except Exception as e:
-        logger.error(f"Failed to send Telegram message: {e}")
-        return False
-
-
 async def get_telegram_file_path(file_id: str, bot_token: Optional[str] = None) -> Optional[str]:
    """
    Отримати шлях до файлу з Telegram API.
@@ -2491,7 +2465,11 @@ async def handle_telegram_webhook(
            + "\n(Не потрібно щоразу представлятися по імені або писати шаблонне: 'чим можу допомогти'.)"
        )
    
-    if needs_complex_reasoning:
+    # Helion policy: DeepSeek-first primary response path.
+    if agent_config.agent_id == "helion":
+        router_request["metadata"]["provider"] = "cloud_deepseek"
+        router_request["metadata"]["reason"] = "helion_primary_deepseek"
+    elif needs_complex_reasoning:
        router_request["metadata"]["provider"] = "cloud_deepseek"
        router_request["metadata"]["reason"] = "auto_complex"

@@ -3546,27 +3524,52 @@ async def _artifact_job_done(job_id: str, note: str) -> None:
            raise HTTPException(status_code=502, detail=f"Job done error: {resp.text[:200]}")


-async def send_telegram_message(chat_id: str, text: str, bot_token: str = None):
-    """Send message to Telegram chat"""
+async def send_telegram_message(chat_id: str, text: str, bot_token: Optional[str] = None) -> bool:
+    """Send message to Telegram chat with explicit error diagnostics."""
    telegram_token = bot_token or os.getenv("TELEGRAM_BOT_TOKEN")
    if not telegram_token:
        logger.error("TELEGRAM_BOT_TOKEN not set")
-        return
-    
+        return False
+
+    # Defensive cleanup for occasional reasoning/markup leaks.
+    import re
+    safe_text = re.sub(r'<think>.*?</think>', '', text or "", flags=re.DOTALL)
+    safe_text = re.sub(r'<think>.*$', '', safe_text, flags=re.DOTALL)
+    safe_text = safe_text.strip() or "..."
+
+    token_id = telegram_token.split(":", 1)[0] if ":" in telegram_token else "unknown"
    url = f"https://api.telegram.org/bot{telegram_token}/sendMessage"
    payload = {
-        "chat_id": chat_id,
-        "text": text,
-        # "parse_mode": "Markdown",  # Removed to prevent 400 errors
+        "chat_id": str(chat_id),
+        "text": safe_text,
+        "disable_web_page_preview": True,
    }
-    
+
    try:
        async with httpx.AsyncClient() as client:
-            response = await client.post(url, json=payload, timeout=10.0)
-            response.raise_for_status()
-            logger.info(f"Telegram message sent to chat {chat_id}")
+            response = await client.post(url, json=payload, timeout=15.0)
+
+        if response.status_code >= 400:
+            err_desc = response.text[:300]
+            try:
+                body = response.json()
+                err_desc = body.get("description") or err_desc
+            except Exception:
+                pass
+            logger.error(
+                "Telegram sendMessage failed: bot_id=%s chat_id=%s status=%s desc=%s",
+                token_id,
+                chat_id,
+                response.status_code,
+                err_desc,
+            )
+            return False
+
+        logger.info("Telegram message sent: bot_id=%s chat_id=%s", token_id, chat_id)
+        return True
    except Exception as e:
-        logger.error(f"Error sending Telegram message: {e}")
+        logger.error("Telegram sendMessage exception: bot_id=%s chat_id=%s error=%s", token_id, chat_id, e)
+        return False


 # ========================================
--- a/gateway-bot/router_client.py
+++ b/gateway-bot/router_client.py
@@ -25,6 +25,7 @@ GATEWAY_MAX_TOKENS_CONCISE = int(os.getenv("GATEWAY_MAX_TOKENS_CONCISE", "220"))
 GATEWAY_MAX_TOKENS_TRAINING = int(os.getenv("GATEWAY_MAX_TOKENS_TRAINING", "900"))
 GATEWAY_TEMPERATURE_DEFAULT = float(os.getenv("GATEWAY_TEMPERATURE_DEFAULT", "0.4"))
 GATEWAY_MAX_TOKENS_SENPAI_DEFAULT = int(os.getenv("GATEWAY_MAX_TOKENS_SENPAI_DEFAULT", "320"))
+GATEWAY_MAX_TOKENS_HELION_DEFAULT = int(os.getenv("GATEWAY_MAX_TOKENS_HELION_DEFAULT", "240"))
 GATEWAY_MAX_TOKENS_DETAILED = int(os.getenv("GATEWAY_MAX_TOKENS_DETAILED", "900"))


@@ -87,6 +88,8 @@ async def send_to_router(body: Dict[str, Any]) -> Dict[str, Any]:
    # Senpai tends to over-verbose responses in Telegram; use lower default unless user asked details.
    if agent_id == "senpai":
        max_tokens = GATEWAY_MAX_TOKENS_SENPAI_DEFAULT
+    elif agent_id == "helion":
+        max_tokens = min(max_tokens, GATEWAY_MAX_TOKENS_HELION_DEFAULT)

    if metadata.get("is_training_group"):
        max_tokens = GATEWAY_MAX_TOKENS_TRAINING
--- a/services/crewai-service/app/config/crewai_teams.yml
+++ b/services/crewai-service/app/config/crewai_teams.yml
@@ -61,7 +61,7 @@ helion:
      synthesis:
        role_context: HELION Orchestrator
        system_prompt_ref: roles/helion/orchestrator_synthesis.md
-        llm_profile: reasoning
+        llm_profile: science
      team:
      - id: energy_researcher
        role_context: Energy Researcher
@@ -70,7 +70,7 @@ helion:
      - id: systems_modeler
        role_context: Systems Modeler
        system_prompt_ref: roles/helion/systems_modeler.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: policy_analyst
        role_context: Policy Analyst
        system_prompt_ref: roles/helion/policy_analyst.md
@@ -78,7 +78,7 @@ helion:
      - id: risk_assessor
        role_context: Risk Assessor
        system_prompt_ref: roles/helion/risk_assessor.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: communicator
        role_context: Communicator
        system_prompt_ref: roles/helion/communicator.md
@@ -92,12 +92,12 @@ helion:
      synthesis:
        role_context: Executive Synthesis (CEO-mode)
        system_prompt_ref: roles/helion/HELION_CORE/orchestrator_synthesis.md
-        llm_profile: reasoning
+        llm_profile: science
      team:
      - id: orchestrator_front_desk_router
        role_context: Orchestrator (Front Desk / Router)
        system_prompt_ref: roles/helion/HELION_CORE/orchestrator_front_desk_router.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: knowledge_curator_rag_librarian
        role_context: Knowledge Curator (L1–L3 RAG Librarian)
        system_prompt_ref: roles/helion/HELION_CORE/knowledge_curator_rag_librarian.md
@@ -105,15 +105,15 @@ helion:
      - id: safety_anti_hallucination_gate
        role_context: Safety & Anti-Hallucination Gate
        system_prompt_ref: roles/helion/HELION_CORE/safety_anti_hallucination_gate.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: legal_compliance_gdpr_mica_aml_kyc
        role_context: Legal & Compliance (GDPR/MiCA/AML/KYC)
        system_prompt_ref: roles/helion/HELION_CORE/legal_compliance_gdpr_mica_aml_kyc.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: security_anti_fraud_anti_fake
        role_context: Security & Anti-Fraud / Anti-Fake
        system_prompt_ref: roles/helion/HELION_CORE/security_anti_fraud_anti_fake.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: energy_systems_engineer
        role_context: Energy Systems Engineer (GGU/BioMiner/SES)
        system_prompt_ref: roles/helion/HELION_CORE/energy_systems_engineer.md
@@ -121,7 +121,7 @@ helion:
      - id: finance_roi_modeler
        role_context: Finance & ROI Modeler
        system_prompt_ref: roles/helion/HELION_CORE/finance_roi_modeler.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: dao_guide_governance_onboarding
        role_context: DAO Guide (Governance & Onboarding)
        system_prompt_ref: roles/helion/HELION_CORE/dao_guide_governance_onboarding.md
@@ -129,7 +129,7 @@ helion:
      - id: tokenization_rwa_nft_architect
        role_context: Tokenization & RWA/NFT Architect
        system_prompt_ref: roles/helion/HELION_CORE/tokenization_rwa_nft_architect.md
-        llm_profile: reasoning
+        llm_profile: science
      - id: growth_soft_selling_cx
        role_context: Growth & Soft-Selling CX
        system_prompt_ref: roles/helion/HELION_CORE/growth_soft_selling_cx.md
--- a/services/router/crewai_client.py
+++ b/services/router/crewai_client.py
@@ -14,6 +14,7 @@ logger = logging.getLogger(__name__)
 CREWAI_URL = os.getenv("CREWAI_URL", "http://dagi-staging-crewai-service:9010")
 CREWAI_ENABLED = os.getenv("CREWAI_ENABLED", "true").lower() == "true"
 CREWAI_ORCHESTRATORS_ALWAYS = os.getenv("CREWAI_ORCHESTRATORS_ALWAYS", "true").lower() == "true"
+HELION_CREWAI_TEAM_LIMIT = int(os.getenv("HELION_CREWAI_TEAM_LIMIT", "3"))

 CREWAI_AGENTS_PATH = os.getenv("CREWAI_AGENTS_PATH", "/config/crewai_agents.json")
 FALLBACK_CREWAI_PATH = "/app/config/crewai_agents.json"
@@ -90,6 +91,19 @@ def should_use_crewai(agent_id, prompt, agent_config, metadata=None, force_crewa
    if not team:
        return False, "agent_has_no_team"

+    metadata = metadata or {}
+    force_detailed = bool(metadata.get("force_detailed"))
+    requires_complex = bool(metadata.get("requires_complex_reasoning"))
+
+    # Helion policy: DeepSeek direct path by default; CrewAI only on-demand.
+    # This keeps first-touch replies fast and concise.
+    if agent_id == "helion":
+        prompt_lower = prompt.lower()
+        has_complexity = any(kw in prompt_lower for kw in COMPLEXITY_KEYWORDS)
+        if force_detailed or requires_complex or has_complexity:
+            return True, "helion_complex_or_detailed"
+        return False, "helion_direct_deepseek_first"
+
    # Architecture mode: top-level orchestrators go through CrewAI API by default.
    if CREWAI_ORCHESTRATORS_ALWAYS:
        return True, "orchestrator_default_crewai"
@@ -111,9 +125,15 @@ async def call_crewai(agent_id, task, context=None, team=None, profile=None):
        if not team:
            crewai_info = get_agent_crewai_info(agent_id)
            team = crewai_info.get("team", [])
+
+        effective_context = context or {}
+        metadata = (effective_context.get("metadata", {}) or {})
+        force_detailed = bool(metadata.get("force_detailed"))
+        # Helion policy: limit CrewAI participants unless user requested detailed mode.
+        if agent_id == "helion" and not force_detailed and HELION_CREWAI_TEAM_LIMIT > 0 and len(team) > HELION_CREWAI_TEAM_LIMIT:
+            team = team[:HELION_CREWAI_TEAM_LIMIT]
        
        async with httpx.AsyncClient(timeout=600.0) as client:
-            effective_context = context or {}
            effective_profile = profile or (effective_context.get("metadata", {}) or {}).get("crewai_profile")
            if not effective_profile and agent_id == "clan":
                effective_profile = "zhos_mvp"
--- a/services/router/main.py
+++ b/services/router/main.py
@@ -1412,6 +1412,17 @@ async def agent_infer(agent_id: str, request: InferRequest):
                            if isinstance(row, dict):
                                logger.info(json.dumps(row, ensure_ascii=False))
                    logger.info(f"✅ CrewAI success for {agent_id}: {latency:.2f}s")
+                    final_response_text = crew_result["result"]
+                    # Helion: keep first-touch answers short by default, even after CrewAI.
+                    if (
+                        agent_id == "helion"
+                        and isinstance(final_response_text, str)
+                        and effective_metadata.get("force_concise")
+                        and not effective_metadata.get("force_detailed")
+                    ):
+                        parts = re.split(r"(?<=[.!?])\s+", final_response_text.strip())
+                        if len(parts) > 3:
+                            final_response_text = " ".join(parts[:3]).strip()
                    
                    # Store interaction in memory
                    if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval and chat_id and user_id:
@@ -1423,13 +1434,13 @@ async def agent_infer(agent_id: str, request: InferRequest):
                                agent_id=request_agent_id,
                                username=username,
                                user_message=request.prompt,
-                                assistant_response=crew_result["result"]
+                                assistant_response=final_response_text
                            )
                        except Exception as e:
                            logger.warning(f"⚠️ Memory storage failed: {e}")
                    
                    return InferResponse(
-                        response=crew_result["result"],
+                        response=final_response_text,
                        model="crewai-" + agent_id,
                        backend="crewai",
                        tokens_used=0