""" Tests для Humanized Stepan v3 — Session Context + Soft Proactivity + Intent Stability Guard. Покриває: 1. Session TTL expiry 2. Session last_messages max 3 3. Session isolation per chat_id 4. Session update/load cycle 5. Stability guard: last_depth=light + short → light reason=stability_guard 6. Stability guard: action verb → deep (guard не спрацьовує) 7. Stability guard: urgent → deep (guard не спрацьовує) 8. Stability guard: >6 слів → guard не спрацьовує 9. Proactivity: умови виконані → added=True, фраза ≤120, без "!" 10. Proactivity: depth=light → not added 11. Proactivity: not 10th interaction → not added 12. Proactivity: confidence < 0.7 → not added 13. Proactivity: brief + "?" → not added 14. Proactivity: intent_freq < 3 → not added 15. Telemetry: AGX_STEPAN_METRIC session_updated / session_expired / stability_guard_triggered / proactivity_added """ import logging import sys import time from copy import deepcopy from pathlib import Path from unittest.mock import patch root = Path(__file__).resolve().parents[1] sys.path.insert(0, str(root)) sys.path.insert(0, str(root / 'packages' / 'agromatrix-tools')) from crews.agromatrix_crew.session_context import ( load_session, update_session, clear_session, _STORE, SESSION_TTL, _default_session, ) from crews.agromatrix_crew.proactivity import ( maybe_add_proactivity, _top_intent, ) from crews.agromatrix_crew.depth_classifier import classify_depth from crews.agromatrix_crew.memory_manager import _default_user_profile from crews.agromatrix_crew.telemetry import TELEMETRY_TAG # ─── Helper ─────────────────────────────────────────────────────────────────── class _CaptureHandler(logging.Handler): def __init__(self): super().__init__() self.records: list[logging.LogRecord] = [] def emit(self, record): self.records.append(record) @property def messages(self): return [r.getMessage() for r in self.records] def _attach(module_path: str) -> tuple[logging.Logger, _CaptureHandler]: lg = logging.getLogger(module_path) lg.setLevel(logging.DEBUG) h = _CaptureHandler() lg.addHandler(h) return lg, h def _profile(interaction_count=0, known_intents=None, style="conversational"): p = _default_user_profile("test_user") p["interaction_count"] = interaction_count p["known_intents"] = known_intents or [] p["style"] = style return p # ─── 1. Session TTL expiry ──────────────────────────────────────────────────── def test_session_ttl_expiry_returns_default(): chat_id = "ttl_test_chat" clear_session(chat_id) update_session(chat_id, "повідомлення", "deep") # Перемотуємо час щоб протухнути with patch("crews.agromatrix_crew.session_context.time") as mock_time: mock_time.time.return_value = time.time() + SESSION_TTL + 1 result = load_session(chat_id) assert result["last_depth"] is None assert result["last_messages"] == [] def test_session_ttl_expiry_logs_expired(caplog): chat_id = "ttl_log_chat" clear_session(chat_id) update_session(chat_id, "msg", "deep") with patch("crews.agromatrix_crew.session_context.time") as mock_time, \ caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.session_context"): mock_time.time.return_value = time.time() + SESSION_TTL + 10 load_session(chat_id) tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()] assert any("session_expired" in m for m in tagged), f"No session_expired. Got: {tagged}" # ─── 2. Session last_messages max 3 ────────────────────────────────────────── def test_session_last_messages_max_3(): chat_id = "msg_max_chat" clear_session(chat_id) for i in range(4): update_session(chat_id, f"msg_{i}", "light") s = load_session(chat_id) assert len(s["last_messages"]) == 3 def test_session_last_messages_keeps_newest(): chat_id = "msg_newest_chat" clear_session(chat_id) for i in range(4): update_session(chat_id, f"msg_{i}", "light") s = load_session(chat_id) # newest 3: msg_1, msg_2, msg_3 assert "msg_0" not in s["last_messages"] assert "msg_3" in s["last_messages"] # ─── 3. Session isolation per chat_id ──────────────────────────────────────── def test_session_isolation(): chat_a = "iso_chat_A" chat_b = "iso_chat_B" clear_session(chat_a) clear_session(chat_b) update_session(chat_a, "msg_a", "deep") update_session(chat_b, "msg_b", "light") s_a = load_session(chat_a) s_b = load_session(chat_b) assert s_a["last_depth"] == "deep" assert s_b["last_depth"] == "light" assert "msg_a" not in s_b["last_messages"] assert "msg_b" not in s_a["last_messages"] # ─── 4. Session update/load cycle ──────────────────────────────────────────── def test_session_update_load_roundtrip(): chat_id = "roundtrip_chat" clear_session(chat_id) update_session(chat_id, "план на тиждень", "deep", agents=["ops", "iot"], last_question="Уточни поле?") s = load_session(chat_id) assert s["last_depth"] == "deep" assert "план на тиждень" in s["last_messages"] assert s["last_agents"] == ["ops", "iot"] assert s["last_question"] == "Уточни поле?" def test_session_agents_max_5(): chat_id = "agents_max_chat" clear_session(chat_id) update_session(chat_id, "msg", "deep", agents=["a", "b", "c", "d", "e", "f", "g"]) s = load_session(chat_id) assert len(s["last_agents"]) == 5 def test_session_telemetry_updated(caplog): chat_id = "tlog_update_chat" clear_session(chat_id) with caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.session_context"): update_session(chat_id, "тест", "light") tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()] assert any("session_updated" in m for m in tagged), f"No session_updated. Got: {tagged}" def test_session_no_crash_empty_chat_id(): """load/update з порожнім chat_id не кидає виняток.""" result = load_session("") assert result is not None update_session("", "msg", "deep") # no crash # ─── 5–8. Stability Guard ───────────────────────────────────────────────────── def test_stability_guard_short_after_light(): """last_depth=light + ≤6 слів + без action verbs → light reason=stability_guard.""" session = {"last_depth": "light", "last_messages": [], "last_agents": [], "last_question": None, "updated_at": time.time()} result = classify_depth("а на завтра?", session=session) assert result == "light" def test_stability_guard_action_verb_overrides(): """Action verb перебиває guard → deep.""" session = {"last_depth": "light", "last_messages": [], "last_agents": [], "last_question": None, "updated_at": time.time()} result = classify_depth("зроби план на завтра", session=session) assert result == "deep" def test_stability_guard_urgent_overrides(): """Urgent слово перебиває guard → deep.""" session = {"last_depth": "light", "last_messages": [], "last_agents": [], "last_question": None, "updated_at": time.time()} result = classify_depth("терміново на завтра?", session=session) assert result == "deep" def test_stability_guard_long_text_no_guard(): """7+ слів → guard не спрацьовує (звична логіка).""" session = {"last_depth": "light", "last_messages": [], "last_agents": [], "last_question": None, "updated_at": time.time()} # 7 words, no action verb — should go through normal path, likely deep result = classify_depth("перевір статус поля один два три чотири п'ять", session=session) # Action verb "перевір" → deep regardless assert result == "deep" def test_stability_guard_no_session_works_normally(): """Без session — логіка без змін.""" result = classify_depth("а на завтра?", session=None, last_topic="plan_day") assert result == "light" # short_followup_last_topic def test_stability_guard_last_depth_deep_no_guard(): """last_depth=deep → guard не спрацьовує.""" session = {"last_depth": "deep", "last_messages": [], "last_agents": [], "last_question": None, "updated_at": time.time()} # Short message but last was deep — normal classification result = classify_depth("а завтра?", session=session, last_topic="plan_day") # Normal short_followup_last_topic → light assert result == "light" def test_stability_guard_telemetry(caplog): """Stability guard → AGX_STEPAN_METRIC stability_guard_triggered.""" session = {"last_depth": "light", "last_messages": [], "last_agents": [], "last_question": None, "updated_at": time.time()} with caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.depth_classifier"): classify_depth("а завтра?", session=session) tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()] assert any("stability_guard_triggered" in m for m in tagged), \ f"No stability_guard_triggered. Got: {tagged}" # ─── 9–14. Proactivity ─────────────────────────────────────────────────────── def _profile_with_intent(intent: str, count: int, interaction_count: int = 10) -> dict: p = _default_user_profile("u_proact") p["interaction_count"] = interaction_count p["known_intents"] = [intent] * count return p def test_proactivity_all_conditions_met(): """Всі умови → added=True, фраза ≤120 символів, без '!'.""" p = _profile_with_intent("plan_day", count=5, interaction_count=10) response = "Ось твій план на день." new_resp, added = maybe_add_proactivity(response, p, depth="deep", reflection=None) assert added is True added_part = new_resp[len(response):].strip() assert len(added_part) <= 120, f"Added phrase too long: {len(added_part)}" assert "!" not in added_part, f"Exclamation found: {added_part!r}" def test_proactivity_light_depth_not_added(): p = _profile_with_intent("plan_day", count=5, interaction_count=10) _, added = maybe_add_proactivity("Відповідь.", p, depth="light") assert added is False def test_proactivity_not_tenth_interaction(): p = _profile_with_intent("plan_day", count=5, interaction_count=7) _, added = maybe_add_proactivity("Відповідь.", p, depth="deep") assert added is False def test_proactivity_zero_interaction_not_added(): p = _profile_with_intent("plan_day", count=5, interaction_count=0) _, added = maybe_add_proactivity("Відповідь.", p, depth="deep") assert added is False def test_proactivity_low_confidence_not_added(): p = _profile_with_intent("plan_day", count=5, interaction_count=10) reflection = {"confidence": 0.5, "new_facts": {}, "clarifying_question": None} _, added = maybe_add_proactivity("Відповідь.", p, depth="deep", reflection=reflection) assert added is False def test_proactivity_brief_with_question_not_added(): p = _profile_with_intent("plan_day", count=5, interaction_count=10) p["style"] = "concise" response = "Ось план. Чи уточнити?" _, added = maybe_add_proactivity(response, p, depth="deep") assert added is False def test_proactivity_intent_freq_low_not_added(): p = _profile_with_intent("plan_day", count=2, interaction_count=10) # < 3 _, added = maybe_add_proactivity("Відповідь.", p, depth="deep") assert added is False def test_proactivity_confidence_ok_added(): """confidence >= 0.7 → added=True.""" p = _profile_with_intent("iot_sensors", count=4, interaction_count=10) reflection = {"confidence": 0.8, "new_facts": {}, "clarifying_question": None} _, added = maybe_add_proactivity("Статус датчиків перевірено.", p, depth="deep", reflection=reflection) assert added is True def test_proactivity_telemetry_added(caplog): p = _profile_with_intent("plan_day", count=5, interaction_count=10) with caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.proactivity"): maybe_add_proactivity("Відповідь.", p, depth="deep") tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()] assert any("proactivity_added" in m for m in tagged), \ f"No proactivity_added telemetry. Got: {tagged}" def test_proactivity_telemetry_skipped_light(caplog): p = _profile_with_intent("plan_day", count=5, interaction_count=10) with caplog.at_level(logging.DEBUG, logger="crews.agromatrix_crew.proactivity"): maybe_add_proactivity("Відповідь.", p, depth="light") tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()] assert any("proactivity_skipped" in m for m in tagged), \ f"No proactivity_skipped telemetry. Got: {tagged}" # ─── _top_intent helper ─────────────────────────────────────────────────────── def test_top_intent_returns_most_frequent(): intents = ["plan_day"] * 5 + ["iot_sensors"] * 2 intent, count = _top_intent(intents) assert intent == "plan_day" assert count == 5 def test_top_intent_empty_returns_none(): intent, count = _top_intent([]) assert intent is None assert count == 0 def test_top_intent_none_returns_none(): intent, count = _top_intent(None) assert intent is None