New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
351 lines
14 KiB
Python
351 lines
14 KiB
Python
"""
|
||
Tests для Humanized Stepan v3 — Session Context + Soft Proactivity + Intent Stability Guard.
|
||
|
||
Покриває:
|
||
1. Session TTL expiry
|
||
2. Session last_messages max 3
|
||
3. Session isolation per chat_id
|
||
4. Session update/load cycle
|
||
5. Stability guard: last_depth=light + short → light reason=stability_guard
|
||
6. Stability guard: action verb → deep (guard не спрацьовує)
|
||
7. Stability guard: urgent → deep (guard не спрацьовує)
|
||
8. Stability guard: >6 слів → guard не спрацьовує
|
||
9. Proactivity: умови виконані → added=True, фраза ≤120, без "!"
|
||
10. Proactivity: depth=light → not added
|
||
11. Proactivity: not 10th interaction → not added
|
||
12. Proactivity: confidence < 0.7 → not added
|
||
13. Proactivity: brief + "?" → not added
|
||
14. Proactivity: intent_freq < 3 → not added
|
||
15. Telemetry: AGX_STEPAN_METRIC session_updated / session_expired / stability_guard_triggered / proactivity_added
|
||
"""
|
||
|
||
import logging
|
||
import sys
|
||
import time
|
||
from copy import deepcopy
|
||
from pathlib import Path
|
||
from unittest.mock import patch
|
||
|
||
root = Path(__file__).resolve().parents[1]
|
||
sys.path.insert(0, str(root))
|
||
sys.path.insert(0, str(root / 'packages' / 'agromatrix-tools'))
|
||
|
||
from crews.agromatrix_crew.session_context import (
|
||
load_session, update_session, clear_session, _STORE, SESSION_TTL, _default_session,
|
||
)
|
||
from crews.agromatrix_crew.proactivity import (
|
||
maybe_add_proactivity, _top_intent,
|
||
)
|
||
from crews.agromatrix_crew.depth_classifier import classify_depth
|
||
from crews.agromatrix_crew.memory_manager import _default_user_profile
|
||
from crews.agromatrix_crew.telemetry import TELEMETRY_TAG
|
||
|
||
|
||
# ─── Helper ───────────────────────────────────────────────────────────────────
|
||
|
||
class _CaptureHandler(logging.Handler):
|
||
def __init__(self):
|
||
super().__init__()
|
||
self.records: list[logging.LogRecord] = []
|
||
|
||
def emit(self, record):
|
||
self.records.append(record)
|
||
|
||
@property
|
||
def messages(self):
|
||
return [r.getMessage() for r in self.records]
|
||
|
||
|
||
def _attach(module_path: str) -> tuple[logging.Logger, _CaptureHandler]:
|
||
lg = logging.getLogger(module_path)
|
||
lg.setLevel(logging.DEBUG)
|
||
h = _CaptureHandler()
|
||
lg.addHandler(h)
|
||
return lg, h
|
||
|
||
|
||
def _profile(interaction_count=0, known_intents=None, style="conversational"):
|
||
p = _default_user_profile("test_user")
|
||
p["interaction_count"] = interaction_count
|
||
p["known_intents"] = known_intents or []
|
||
p["style"] = style
|
||
return p
|
||
|
||
|
||
# ─── 1. Session TTL expiry ────────────────────────────────────────────────────
|
||
|
||
def test_session_ttl_expiry_returns_default():
|
||
chat_id = "ttl_test_chat"
|
||
clear_session(chat_id)
|
||
update_session(chat_id, "повідомлення", "deep")
|
||
|
||
# Перемотуємо час щоб протухнути
|
||
with patch("crews.agromatrix_crew.session_context.time") as mock_time:
|
||
mock_time.time.return_value = time.time() + SESSION_TTL + 1
|
||
result = load_session(chat_id)
|
||
|
||
assert result["last_depth"] is None
|
||
assert result["last_messages"] == []
|
||
|
||
|
||
def test_session_ttl_expiry_logs_expired(caplog):
|
||
chat_id = "ttl_log_chat"
|
||
clear_session(chat_id)
|
||
update_session(chat_id, "msg", "deep")
|
||
|
||
with patch("crews.agromatrix_crew.session_context.time") as mock_time, \
|
||
caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.session_context"):
|
||
mock_time.time.return_value = time.time() + SESSION_TTL + 10
|
||
load_session(chat_id)
|
||
|
||
tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()]
|
||
assert any("session_expired" in m for m in tagged), f"No session_expired. Got: {tagged}"
|
||
|
||
|
||
# ─── 2. Session last_messages max 3 ──────────────────────────────────────────
|
||
|
||
def test_session_last_messages_max_3():
|
||
chat_id = "msg_max_chat"
|
||
clear_session(chat_id)
|
||
for i in range(4):
|
||
update_session(chat_id, f"msg_{i}", "light")
|
||
s = load_session(chat_id)
|
||
assert len(s["last_messages"]) == 3
|
||
|
||
|
||
def test_session_last_messages_keeps_newest():
|
||
chat_id = "msg_newest_chat"
|
||
clear_session(chat_id)
|
||
for i in range(4):
|
||
update_session(chat_id, f"msg_{i}", "light")
|
||
s = load_session(chat_id)
|
||
# newest 3: msg_1, msg_2, msg_3
|
||
assert "msg_0" not in s["last_messages"]
|
||
assert "msg_3" in s["last_messages"]
|
||
|
||
|
||
# ─── 3. Session isolation per chat_id ────────────────────────────────────────
|
||
|
||
def test_session_isolation():
|
||
chat_a = "iso_chat_A"
|
||
chat_b = "iso_chat_B"
|
||
clear_session(chat_a)
|
||
clear_session(chat_b)
|
||
update_session(chat_a, "msg_a", "deep")
|
||
update_session(chat_b, "msg_b", "light")
|
||
|
||
s_a = load_session(chat_a)
|
||
s_b = load_session(chat_b)
|
||
assert s_a["last_depth"] == "deep"
|
||
assert s_b["last_depth"] == "light"
|
||
assert "msg_a" not in s_b["last_messages"]
|
||
assert "msg_b" not in s_a["last_messages"]
|
||
|
||
|
||
# ─── 4. Session update/load cycle ────────────────────────────────────────────
|
||
|
||
def test_session_update_load_roundtrip():
|
||
chat_id = "roundtrip_chat"
|
||
clear_session(chat_id)
|
||
update_session(chat_id, "план на тиждень", "deep",
|
||
agents=["ops", "iot"], last_question="Уточни поле?")
|
||
s = load_session(chat_id)
|
||
assert s["last_depth"] == "deep"
|
||
assert "план на тиждень" in s["last_messages"]
|
||
assert s["last_agents"] == ["ops", "iot"]
|
||
assert s["last_question"] == "Уточни поле?"
|
||
|
||
|
||
def test_session_agents_max_5():
|
||
chat_id = "agents_max_chat"
|
||
clear_session(chat_id)
|
||
update_session(chat_id, "msg", "deep",
|
||
agents=["a", "b", "c", "d", "e", "f", "g"])
|
||
s = load_session(chat_id)
|
||
assert len(s["last_agents"]) == 5
|
||
|
||
|
||
def test_session_telemetry_updated(caplog):
|
||
chat_id = "tlog_update_chat"
|
||
clear_session(chat_id)
|
||
with caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.session_context"):
|
||
update_session(chat_id, "тест", "light")
|
||
tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()]
|
||
assert any("session_updated" in m for m in tagged), f"No session_updated. Got: {tagged}"
|
||
|
||
|
||
def test_session_no_crash_empty_chat_id():
|
||
"""load/update з порожнім chat_id не кидає виняток."""
|
||
result = load_session("")
|
||
assert result is not None
|
||
update_session("", "msg", "deep") # no crash
|
||
|
||
|
||
# ─── 5–8. Stability Guard ─────────────────────────────────────────────────────
|
||
|
||
def test_stability_guard_short_after_light():
|
||
"""last_depth=light + ≤6 слів + без action verbs → light reason=stability_guard."""
|
||
session = {"last_depth": "light", "last_messages": [], "last_agents": [],
|
||
"last_question": None, "updated_at": time.time()}
|
||
result = classify_depth("а на завтра?", session=session)
|
||
assert result == "light"
|
||
|
||
|
||
def test_stability_guard_action_verb_overrides():
|
||
"""Action verb перебиває guard → deep."""
|
||
session = {"last_depth": "light", "last_messages": [], "last_agents": [],
|
||
"last_question": None, "updated_at": time.time()}
|
||
result = classify_depth("зроби план на завтра", session=session)
|
||
assert result == "deep"
|
||
|
||
|
||
def test_stability_guard_urgent_overrides():
|
||
"""Urgent слово перебиває guard → deep."""
|
||
session = {"last_depth": "light", "last_messages": [], "last_agents": [],
|
||
"last_question": None, "updated_at": time.time()}
|
||
result = classify_depth("терміново на завтра?", session=session)
|
||
assert result == "deep"
|
||
|
||
|
||
def test_stability_guard_long_text_no_guard():
|
||
"""7+ слів → guard не спрацьовує (звична логіка)."""
|
||
session = {"last_depth": "light", "last_messages": [], "last_agents": [],
|
||
"last_question": None, "updated_at": time.time()}
|
||
# 7 words, no action verb — should go through normal path, likely deep
|
||
result = classify_depth("перевір статус поля один два три чотири п'ять", session=session)
|
||
# Action verb "перевір" → deep regardless
|
||
assert result == "deep"
|
||
|
||
|
||
def test_stability_guard_no_session_works_normally():
|
||
"""Без session — логіка без змін."""
|
||
result = classify_depth("а на завтра?", session=None, last_topic="plan_day")
|
||
assert result == "light" # short_followup_last_topic
|
||
|
||
|
||
def test_stability_guard_last_depth_deep_no_guard():
|
||
"""last_depth=deep → guard не спрацьовує."""
|
||
session = {"last_depth": "deep", "last_messages": [], "last_agents": [],
|
||
"last_question": None, "updated_at": time.time()}
|
||
# Short message but last was deep — normal classification
|
||
result = classify_depth("а завтра?", session=session, last_topic="plan_day")
|
||
# Normal short_followup_last_topic → light
|
||
assert result == "light"
|
||
|
||
|
||
def test_stability_guard_telemetry(caplog):
|
||
"""Stability guard → AGX_STEPAN_METRIC stability_guard_triggered."""
|
||
session = {"last_depth": "light", "last_messages": [], "last_agents": [],
|
||
"last_question": None, "updated_at": time.time()}
|
||
with caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.depth_classifier"):
|
||
classify_depth("а завтра?", session=session)
|
||
tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()]
|
||
assert any("stability_guard_triggered" in m for m in tagged), \
|
||
f"No stability_guard_triggered. Got: {tagged}"
|
||
|
||
|
||
# ─── 9–14. Proactivity ───────────────────────────────────────────────────────
|
||
|
||
def _profile_with_intent(intent: str, count: int, interaction_count: int = 10) -> dict:
|
||
p = _default_user_profile("u_proact")
|
||
p["interaction_count"] = interaction_count
|
||
p["known_intents"] = [intent] * count
|
||
return p
|
||
|
||
|
||
def test_proactivity_all_conditions_met():
|
||
"""Всі умови → added=True, фраза ≤120 символів, без '!'."""
|
||
p = _profile_with_intent("plan_day", count=5, interaction_count=10)
|
||
response = "Ось твій план на день."
|
||
new_resp, added = maybe_add_proactivity(response, p, depth="deep", reflection=None)
|
||
assert added is True
|
||
added_part = new_resp[len(response):].strip()
|
||
assert len(added_part) <= 120, f"Added phrase too long: {len(added_part)}"
|
||
assert "!" not in added_part, f"Exclamation found: {added_part!r}"
|
||
|
||
|
||
def test_proactivity_light_depth_not_added():
|
||
p = _profile_with_intent("plan_day", count=5, interaction_count=10)
|
||
_, added = maybe_add_proactivity("Відповідь.", p, depth="light")
|
||
assert added is False
|
||
|
||
|
||
def test_proactivity_not_tenth_interaction():
|
||
p = _profile_with_intent("plan_day", count=5, interaction_count=7)
|
||
_, added = maybe_add_proactivity("Відповідь.", p, depth="deep")
|
||
assert added is False
|
||
|
||
|
||
def test_proactivity_zero_interaction_not_added():
|
||
p = _profile_with_intent("plan_day", count=5, interaction_count=0)
|
||
_, added = maybe_add_proactivity("Відповідь.", p, depth="deep")
|
||
assert added is False
|
||
|
||
|
||
def test_proactivity_low_confidence_not_added():
|
||
p = _profile_with_intent("plan_day", count=5, interaction_count=10)
|
||
reflection = {"confidence": 0.5, "new_facts": {}, "clarifying_question": None}
|
||
_, added = maybe_add_proactivity("Відповідь.", p, depth="deep", reflection=reflection)
|
||
assert added is False
|
||
|
||
|
||
def test_proactivity_brief_with_question_not_added():
|
||
p = _profile_with_intent("plan_day", count=5, interaction_count=10)
|
||
p["style"] = "concise"
|
||
response = "Ось план. Чи уточнити?"
|
||
_, added = maybe_add_proactivity(response, p, depth="deep")
|
||
assert added is False
|
||
|
||
|
||
def test_proactivity_intent_freq_low_not_added():
|
||
p = _profile_with_intent("plan_day", count=2, interaction_count=10) # < 3
|
||
_, added = maybe_add_proactivity("Відповідь.", p, depth="deep")
|
||
assert added is False
|
||
|
||
|
||
def test_proactivity_confidence_ok_added():
|
||
"""confidence >= 0.7 → added=True."""
|
||
p = _profile_with_intent("iot_sensors", count=4, interaction_count=10)
|
||
reflection = {"confidence": 0.8, "new_facts": {}, "clarifying_question": None}
|
||
_, added = maybe_add_proactivity("Статус датчиків перевірено.", p, depth="deep",
|
||
reflection=reflection)
|
||
assert added is True
|
||
|
||
|
||
def test_proactivity_telemetry_added(caplog):
|
||
p = _profile_with_intent("plan_day", count=5, interaction_count=10)
|
||
with caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.proactivity"):
|
||
maybe_add_proactivity("Відповідь.", p, depth="deep")
|
||
tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()]
|
||
assert any("proactivity_added" in m for m in tagged), \
|
||
f"No proactivity_added telemetry. Got: {tagged}"
|
||
|
||
|
||
def test_proactivity_telemetry_skipped_light(caplog):
|
||
p = _profile_with_intent("plan_day", count=5, interaction_count=10)
|
||
with caplog.at_level(logging.DEBUG, logger="crews.agromatrix_crew.proactivity"):
|
||
maybe_add_proactivity("Відповідь.", p, depth="light")
|
||
tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()]
|
||
assert any("proactivity_skipped" in m for m in tagged), \
|
||
f"No proactivity_skipped telemetry. Got: {tagged}"
|
||
|
||
|
||
# ─── _top_intent helper ───────────────────────────────────────────────────────
|
||
|
||
def test_top_intent_returns_most_frequent():
|
||
intents = ["plan_day"] * 5 + ["iot_sensors"] * 2
|
||
intent, count = _top_intent(intents)
|
||
assert intent == "plan_day"
|
||
assert count == 5
|
||
|
||
|
||
def test_top_intent_empty_returns_none():
|
||
intent, count = _top_intent([])
|
||
assert intent is None
|
||
assert count == 0
|
||
|
||
|
||
def test_top_intent_none_returns_none():
|
||
intent, count = _top_intent(None)
|
||
assert intent is None
|