feat(platform): add new services, tools, tests and crews modules

New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
2026-03-03 07:14:14 -08:00
parent e9dedffa48
commit 129e4ea1fc
241 changed files with 69349 additions and 0 deletions
--- a/tests/test_stepan_v3_session_proactivity_stability.py
+++ b/tests/test_stepan_v3_session_proactivity_stability.py
@@ -0,0 +1,350 @@
+"""
+Tests для Humanized Stepan v3 — Session Context + Soft Proactivity + Intent Stability Guard.
+
+Покриває:
+1. Session TTL expiry
+2. Session last_messages max 3
+3. Session isolation per chat_id
+4. Session update/load cycle
+5. Stability guard: last_depth=light + short → light reason=stability_guard
+6. Stability guard: action verb → deep (guard не спрацьовує)
+7. Stability guard: urgent → deep (guard не спрацьовує)
+8. Stability guard: >6 слів → guard не спрацьовує
+9. Proactivity: умови виконані → added=True, фраза ≤120, без "!"
+10. Proactivity: depth=light → not added
+11. Proactivity: not 10th interaction → not added
+12. Proactivity: confidence < 0.7 → not added
+13. Proactivity: brief + "?" → not added
+14. Proactivity: intent_freq < 3 → not added
+15. Telemetry: AGX_STEPAN_METRIC session_updated / session_expired / stability_guard_triggered / proactivity_added
+"""
+
+import logging
+import sys
+import time
+from copy import deepcopy
+from pathlib import Path
+from unittest.mock import patch
+
+root = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(root))
+sys.path.insert(0, str(root / 'packages' / 'agromatrix-tools'))
+
+from crews.agromatrix_crew.session_context import (
+    load_session, update_session, clear_session, _STORE, SESSION_TTL, _default_session,
+)
+from crews.agromatrix_crew.proactivity import (
+    maybe_add_proactivity, _top_intent,
+)
+from crews.agromatrix_crew.depth_classifier import classify_depth
+from crews.agromatrix_crew.memory_manager import _default_user_profile
+from crews.agromatrix_crew.telemetry import TELEMETRY_TAG
+
+
+# ─── Helper ───────────────────────────────────────────────────────────────────
+
+class _CaptureHandler(logging.Handler):
+    def __init__(self):
+        super().__init__()
+        self.records: list[logging.LogRecord] = []
+
+    def emit(self, record):
+        self.records.append(record)
+
+    @property
+    def messages(self):
+        return [r.getMessage() for r in self.records]
+
+
+def _attach(module_path: str) -> tuple[logging.Logger, _CaptureHandler]:
+    lg = logging.getLogger(module_path)
+    lg.setLevel(logging.DEBUG)
+    h = _CaptureHandler()
+    lg.addHandler(h)
+    return lg, h
+
+
+def _profile(interaction_count=0, known_intents=None, style="conversational"):
+    p = _default_user_profile("test_user")
+    p["interaction_count"] = interaction_count
+    p["known_intents"] = known_intents or []
+    p["style"] = style
+    return p
+
+
+# ─── 1. Session TTL expiry ────────────────────────────────────────────────────
+
+def test_session_ttl_expiry_returns_default():
+    chat_id = "ttl_test_chat"
+    clear_session(chat_id)
+    update_session(chat_id, "повідомлення", "deep")
+
+    # Перемотуємо час щоб протухнути
+    with patch("crews.agromatrix_crew.session_context.time") as mock_time:
+        mock_time.time.return_value = time.time() + SESSION_TTL + 1
+        result = load_session(chat_id)
+
+    assert result["last_depth"] is None
+    assert result["last_messages"] == []
+
+
+def test_session_ttl_expiry_logs_expired(caplog):
+    chat_id = "ttl_log_chat"
+    clear_session(chat_id)
+    update_session(chat_id, "msg", "deep")
+
+    with patch("crews.agromatrix_crew.session_context.time") as mock_time, \
+         caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.session_context"):
+        mock_time.time.return_value = time.time() + SESSION_TTL + 10
+        load_session(chat_id)
+
+    tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()]
+    assert any("session_expired" in m for m in tagged), f"No session_expired. Got: {tagged}"
+
+
+# ─── 2. Session last_messages max 3 ──────────────────────────────────────────
+
+def test_session_last_messages_max_3():
+    chat_id = "msg_max_chat"
+    clear_session(chat_id)
+    for i in range(4):
+        update_session(chat_id, f"msg_{i}", "light")
+    s = load_session(chat_id)
+    assert len(s["last_messages"]) == 3
+
+
+def test_session_last_messages_keeps_newest():
+    chat_id = "msg_newest_chat"
+    clear_session(chat_id)
+    for i in range(4):
+        update_session(chat_id, f"msg_{i}", "light")
+    s = load_session(chat_id)
+    # newest 3: msg_1, msg_2, msg_3
+    assert "msg_0" not in s["last_messages"]
+    assert "msg_3" in s["last_messages"]
+
+
+# ─── 3. Session isolation per chat_id ────────────────────────────────────────
+
+def test_session_isolation():
+    chat_a = "iso_chat_A"
+    chat_b = "iso_chat_B"
+    clear_session(chat_a)
+    clear_session(chat_b)
+    update_session(chat_a, "msg_a", "deep")
+    update_session(chat_b, "msg_b", "light")
+
+    s_a = load_session(chat_a)
+    s_b = load_session(chat_b)
+    assert s_a["last_depth"] == "deep"
+    assert s_b["last_depth"] == "light"
+    assert "msg_a" not in s_b["last_messages"]
+    assert "msg_b" not in s_a["last_messages"]
+
+
+# ─── 4. Session update/load cycle ────────────────────────────────────────────
+
+def test_session_update_load_roundtrip():
+    chat_id = "roundtrip_chat"
+    clear_session(chat_id)
+    update_session(chat_id, "план на тиждень", "deep",
+                   agents=["ops", "iot"], last_question="Уточни поле?")
+    s = load_session(chat_id)
+    assert s["last_depth"] == "deep"
+    assert "план на тиждень" in s["last_messages"]
+    assert s["last_agents"] == ["ops", "iot"]
+    assert s["last_question"] == "Уточни поле?"
+
+
+def test_session_agents_max_5():
+    chat_id = "agents_max_chat"
+    clear_session(chat_id)
+    update_session(chat_id, "msg", "deep",
+                   agents=["a", "b", "c", "d", "e", "f", "g"])
+    s = load_session(chat_id)
+    assert len(s["last_agents"]) == 5
+
+
+def test_session_telemetry_updated(caplog):
+    chat_id = "tlog_update_chat"
+    clear_session(chat_id)
+    with caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.session_context"):
+        update_session(chat_id, "тест", "light")
+    tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()]
+    assert any("session_updated" in m for m in tagged), f"No session_updated. Got: {tagged}"
+
+
+def test_session_no_crash_empty_chat_id():
+    """load/update з порожнім chat_id не кидає виняток."""
+    result = load_session("")
+    assert result is not None
+    update_session("", "msg", "deep")  # no crash
+
+
+# ─── 5–8. Stability Guard ─────────────────────────────────────────────────────
+
+def test_stability_guard_short_after_light():
+    """last_depth=light + ≤6 слів + без action verbs → light reason=stability_guard."""
+    session = {"last_depth": "light", "last_messages": [], "last_agents": [],
+               "last_question": None, "updated_at": time.time()}
+    result = classify_depth("а на завтра?", session=session)
+    assert result == "light"
+
+
+def test_stability_guard_action_verb_overrides():
+    """Action verb перебиває guard → deep."""
+    session = {"last_depth": "light", "last_messages": [], "last_agents": [],
+               "last_question": None, "updated_at": time.time()}
+    result = classify_depth("зроби план на завтра", session=session)
+    assert result == "deep"
+
+
+def test_stability_guard_urgent_overrides():
+    """Urgent слово перебиває guard → deep."""
+    session = {"last_depth": "light", "last_messages": [], "last_agents": [],
+               "last_question": None, "updated_at": time.time()}
+    result = classify_depth("терміново на завтра?", session=session)
+    assert result == "deep"
+
+
+def test_stability_guard_long_text_no_guard():
+    """7+ слів → guard не спрацьовує (звична логіка)."""
+    session = {"last_depth": "light", "last_messages": [], "last_agents": [],
+               "last_question": None, "updated_at": time.time()}
+    # 7 words, no action verb — should go through normal path, likely deep
+    result = classify_depth("перевір статус поля один два три чотири п'ять", session=session)
+    # Action verb "перевір" → deep regardless
+    assert result == "deep"
+
+
+def test_stability_guard_no_session_works_normally():
+    """Без session — логіка без змін."""
+    result = classify_depth("а на завтра?", session=None, last_topic="plan_day")
+    assert result == "light"  # short_followup_last_topic
+
+
+def test_stability_guard_last_depth_deep_no_guard():
+    """last_depth=deep → guard не спрацьовує."""
+    session = {"last_depth": "deep", "last_messages": [], "last_agents": [],
+               "last_question": None, "updated_at": time.time()}
+    # Short message but last was deep — normal classification
+    result = classify_depth("а завтра?", session=session, last_topic="plan_day")
+    # Normal short_followup_last_topic → light
+    assert result == "light"
+
+
+def test_stability_guard_telemetry(caplog):
+    """Stability guard → AGX_STEPAN_METRIC stability_guard_triggered."""
+    session = {"last_depth": "light", "last_messages": [], "last_agents": [],
+               "last_question": None, "updated_at": time.time()}
+    with caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.depth_classifier"):
+        classify_depth("а завтра?", session=session)
+    tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()]
+    assert any("stability_guard_triggered" in m for m in tagged), \
+        f"No stability_guard_triggered. Got: {tagged}"
+
+
+# ─── 9–14. Proactivity ───────────────────────────────────────────────────────
+
+def _profile_with_intent(intent: str, count: int, interaction_count: int = 10) -> dict:
+    p = _default_user_profile("u_proact")
+    p["interaction_count"] = interaction_count
+    p["known_intents"] = [intent] * count
+    return p
+
+
+def test_proactivity_all_conditions_met():
+    """Всі умови → added=True, фраза ≤120 символів, без '!'."""
+    p = _profile_with_intent("plan_day", count=5, interaction_count=10)
+    response = "Ось твій план на день."
+    new_resp, added = maybe_add_proactivity(response, p, depth="deep", reflection=None)
+    assert added is True
+    added_part = new_resp[len(response):].strip()
+    assert len(added_part) <= 120, f"Added phrase too long: {len(added_part)}"
+    assert "!" not in added_part, f"Exclamation found: {added_part!r}"
+
+
+def test_proactivity_light_depth_not_added():
+    p = _profile_with_intent("plan_day", count=5, interaction_count=10)
+    _, added = maybe_add_proactivity("Відповідь.", p, depth="light")
+    assert added is False
+
+
+def test_proactivity_not_tenth_interaction():
+    p = _profile_with_intent("plan_day", count=5, interaction_count=7)
+    _, added = maybe_add_proactivity("Відповідь.", p, depth="deep")
+    assert added is False
+
+
+def test_proactivity_zero_interaction_not_added():
+    p = _profile_with_intent("plan_day", count=5, interaction_count=0)
+    _, added = maybe_add_proactivity("Відповідь.", p, depth="deep")
+    assert added is False
+
+
+def test_proactivity_low_confidence_not_added():
+    p = _profile_with_intent("plan_day", count=5, interaction_count=10)
+    reflection = {"confidence": 0.5, "new_facts": {}, "clarifying_question": None}
+    _, added = maybe_add_proactivity("Відповідь.", p, depth="deep", reflection=reflection)
+    assert added is False
+
+
+def test_proactivity_brief_with_question_not_added():
+    p = _profile_with_intent("plan_day", count=5, interaction_count=10)
+    p["style"] = "concise"
+    response = "Ось план. Чи уточнити?"
+    _, added = maybe_add_proactivity(response, p, depth="deep")
+    assert added is False
+
+
+def test_proactivity_intent_freq_low_not_added():
+    p = _profile_with_intent("plan_day", count=2, interaction_count=10)  # < 3
+    _, added = maybe_add_proactivity("Відповідь.", p, depth="deep")
+    assert added is False
+
+
+def test_proactivity_confidence_ok_added():
+    """confidence >= 0.7 → added=True."""
+    p = _profile_with_intent("iot_sensors", count=4, interaction_count=10)
+    reflection = {"confidence": 0.8, "new_facts": {}, "clarifying_question": None}
+    _, added = maybe_add_proactivity("Статус датчиків перевірено.", p, depth="deep",
+                                     reflection=reflection)
+    assert added is True
+
+
+def test_proactivity_telemetry_added(caplog):
+    p = _profile_with_intent("plan_day", count=5, interaction_count=10)
+    with caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.proactivity"):
+        maybe_add_proactivity("Відповідь.", p, depth="deep")
+    tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()]
+    assert any("proactivity_added" in m for m in tagged), \
+        f"No proactivity_added telemetry. Got: {tagged}"
+
+
+def test_proactivity_telemetry_skipped_light(caplog):
+    p = _profile_with_intent("plan_day", count=5, interaction_count=10)
+    with caplog.at_level(logging.DEBUG, logger="crews.agromatrix_crew.proactivity"):
+        maybe_add_proactivity("Відповідь.", p, depth="light")
+    tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()]
+    assert any("proactivity_skipped" in m for m in tagged), \
+        f"No proactivity_skipped telemetry. Got: {tagged}"
+
+
+# ─── _top_intent helper ───────────────────────────────────────────────────────
+
+def test_top_intent_returns_most_frequent():
+    intents = ["plan_day"] * 5 + ["iot_sensors"] * 2
+    intent, count = _top_intent(intents)
+    assert intent == "plan_day"
+    assert count == 5
+
+
+def test_top_intent_empty_returns_none():
+    intent, count = _top_intent([])
+    assert intent is None
+    assert count == 0
+
+
+def test_top_intent_none_returns_none():
+    intent, count = _top_intent(None)
+    assert intent is None