feat(platform): add new services, tools, tests and crews modules

New router intelligence modules (26 files): alert_ingest/store, audit_store,
architecture_pressure, backlog_generator/store, cost_analyzer, data_governance,
dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment,
platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files),
signature_state_store, sofiia_auto_router, tool_governance

New services:
- sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static
- memory-service: integration_endpoints, integrations, voice_endpoints, static UI
- aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents)
- sofiia-supervisor: new supervisor service
- aistalk-bridge-lite: Telegram bridge lite
- calendar-service: CalDAV calendar service with reminders
- mlx-stt-service / mlx-tts-service: Apple Silicon speech services
- binance-bot-monitor: market monitor service
- node-worker: STT/TTS memory providers

New tools (9): agent_email, browser_tool, contract_tool, observability_tool,
oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault

New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus,
farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine,
session_context, style_adapter, telemetry)

Tests: 85+ test files for all new modules
Made-with: Cursor
This commit is contained in:
Apple
2026-03-03 07:14:14 -08:00
parent e9dedffa48
commit 129e4ea1fc
241 changed files with 69349 additions and 0 deletions

View File

@@ -0,0 +1,350 @@
"""
Tests для Humanized Stepan v3 — Session Context + Soft Proactivity + Intent Stability Guard.
Покриває:
1. Session TTL expiry
2. Session last_messages max 3
3. Session isolation per chat_id
4. Session update/load cycle
5. Stability guard: last_depth=light + short → light reason=stability_guard
6. Stability guard: action verb → deep (guard не спрацьовує)
7. Stability guard: urgent → deep (guard не спрацьовує)
8. Stability guard: >6 слів → guard не спрацьовує
9. Proactivity: умови виконані → added=True, фраза ≤120, без "!"
10. Proactivity: depth=light → not added
11. Proactivity: not 10th interaction → not added
12. Proactivity: confidence < 0.7 → not added
13. Proactivity: brief + "?" → not added
14. Proactivity: intent_freq < 3 → not added
15. Telemetry: AGX_STEPAN_METRIC session_updated / session_expired / stability_guard_triggered / proactivity_added
"""
import logging
import sys
import time
from copy import deepcopy
from pathlib import Path
from unittest.mock import patch
root = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(root))
sys.path.insert(0, str(root / 'packages' / 'agromatrix-tools'))
from crews.agromatrix_crew.session_context import (
load_session, update_session, clear_session, _STORE, SESSION_TTL, _default_session,
)
from crews.agromatrix_crew.proactivity import (
maybe_add_proactivity, _top_intent,
)
from crews.agromatrix_crew.depth_classifier import classify_depth
from crews.agromatrix_crew.memory_manager import _default_user_profile
from crews.agromatrix_crew.telemetry import TELEMETRY_TAG
# ─── Helper ───────────────────────────────────────────────────────────────────
class _CaptureHandler(logging.Handler):
def __init__(self):
super().__init__()
self.records: list[logging.LogRecord] = []
def emit(self, record):
self.records.append(record)
@property
def messages(self):
return [r.getMessage() for r in self.records]
def _attach(module_path: str) -> tuple[logging.Logger, _CaptureHandler]:
lg = logging.getLogger(module_path)
lg.setLevel(logging.DEBUG)
h = _CaptureHandler()
lg.addHandler(h)
return lg, h
def _profile(interaction_count=0, known_intents=None, style="conversational"):
p = _default_user_profile("test_user")
p["interaction_count"] = interaction_count
p["known_intents"] = known_intents or []
p["style"] = style
return p
# ─── 1. Session TTL expiry ────────────────────────────────────────────────────
def test_session_ttl_expiry_returns_default():
chat_id = "ttl_test_chat"
clear_session(chat_id)
update_session(chat_id, "повідомлення", "deep")
# Перемотуємо час щоб протухнути
with patch("crews.agromatrix_crew.session_context.time") as mock_time:
mock_time.time.return_value = time.time() + SESSION_TTL + 1
result = load_session(chat_id)
assert result["last_depth"] is None
assert result["last_messages"] == []
def test_session_ttl_expiry_logs_expired(caplog):
chat_id = "ttl_log_chat"
clear_session(chat_id)
update_session(chat_id, "msg", "deep")
with patch("crews.agromatrix_crew.session_context.time") as mock_time, \
caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.session_context"):
mock_time.time.return_value = time.time() + SESSION_TTL + 10
load_session(chat_id)
tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()]
assert any("session_expired" in m for m in tagged), f"No session_expired. Got: {tagged}"
# ─── 2. Session last_messages max 3 ──────────────────────────────────────────
def test_session_last_messages_max_3():
chat_id = "msg_max_chat"
clear_session(chat_id)
for i in range(4):
update_session(chat_id, f"msg_{i}", "light")
s = load_session(chat_id)
assert len(s["last_messages"]) == 3
def test_session_last_messages_keeps_newest():
chat_id = "msg_newest_chat"
clear_session(chat_id)
for i in range(4):
update_session(chat_id, f"msg_{i}", "light")
s = load_session(chat_id)
# newest 3: msg_1, msg_2, msg_3
assert "msg_0" not in s["last_messages"]
assert "msg_3" in s["last_messages"]
# ─── 3. Session isolation per chat_id ────────────────────────────────────────
def test_session_isolation():
chat_a = "iso_chat_A"
chat_b = "iso_chat_B"
clear_session(chat_a)
clear_session(chat_b)
update_session(chat_a, "msg_a", "deep")
update_session(chat_b, "msg_b", "light")
s_a = load_session(chat_a)
s_b = load_session(chat_b)
assert s_a["last_depth"] == "deep"
assert s_b["last_depth"] == "light"
assert "msg_a" not in s_b["last_messages"]
assert "msg_b" not in s_a["last_messages"]
# ─── 4. Session update/load cycle ────────────────────────────────────────────
def test_session_update_load_roundtrip():
chat_id = "roundtrip_chat"
clear_session(chat_id)
update_session(chat_id, "план на тиждень", "deep",
agents=["ops", "iot"], last_question="Уточни поле?")
s = load_session(chat_id)
assert s["last_depth"] == "deep"
assert "план на тиждень" in s["last_messages"]
assert s["last_agents"] == ["ops", "iot"]
assert s["last_question"] == "Уточни поле?"
def test_session_agents_max_5():
chat_id = "agents_max_chat"
clear_session(chat_id)
update_session(chat_id, "msg", "deep",
agents=["a", "b", "c", "d", "e", "f", "g"])
s = load_session(chat_id)
assert len(s["last_agents"]) == 5
def test_session_telemetry_updated(caplog):
chat_id = "tlog_update_chat"
clear_session(chat_id)
with caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.session_context"):
update_session(chat_id, "тест", "light")
tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()]
assert any("session_updated" in m for m in tagged), f"No session_updated. Got: {tagged}"
def test_session_no_crash_empty_chat_id():
"""load/update з порожнім chat_id не кидає виняток."""
result = load_session("")
assert result is not None
update_session("", "msg", "deep") # no crash
# ─── 58. Stability Guard ─────────────────────────────────────────────────────
def test_stability_guard_short_after_light():
"""last_depth=light + ≤6 слів + без action verbs → light reason=stability_guard."""
session = {"last_depth": "light", "last_messages": [], "last_agents": [],
"last_question": None, "updated_at": time.time()}
result = classify_depth("а на завтра?", session=session)
assert result == "light"
def test_stability_guard_action_verb_overrides():
"""Action verb перебиває guard → deep."""
session = {"last_depth": "light", "last_messages": [], "last_agents": [],
"last_question": None, "updated_at": time.time()}
result = classify_depth("зроби план на завтра", session=session)
assert result == "deep"
def test_stability_guard_urgent_overrides():
"""Urgent слово перебиває guard → deep."""
session = {"last_depth": "light", "last_messages": [], "last_agents": [],
"last_question": None, "updated_at": time.time()}
result = classify_depth("терміново на завтра?", session=session)
assert result == "deep"
def test_stability_guard_long_text_no_guard():
"""7+ слів → guard не спрацьовує (звична логіка)."""
session = {"last_depth": "light", "last_messages": [], "last_agents": [],
"last_question": None, "updated_at": time.time()}
# 7 words, no action verb — should go through normal path, likely deep
result = classify_depth("перевір статус поля один два три чотири п'ять", session=session)
# Action verb "перевір" → deep regardless
assert result == "deep"
def test_stability_guard_no_session_works_normally():
"""Без session — логіка без змін."""
result = classify_depth("а на завтра?", session=None, last_topic="plan_day")
assert result == "light" # short_followup_last_topic
def test_stability_guard_last_depth_deep_no_guard():
"""last_depth=deep → guard не спрацьовує."""
session = {"last_depth": "deep", "last_messages": [], "last_agents": [],
"last_question": None, "updated_at": time.time()}
# Short message but last was deep — normal classification
result = classify_depth("а завтра?", session=session, last_topic="plan_day")
# Normal short_followup_last_topic → light
assert result == "light"
def test_stability_guard_telemetry(caplog):
"""Stability guard → AGX_STEPAN_METRIC stability_guard_triggered."""
session = {"last_depth": "light", "last_messages": [], "last_agents": [],
"last_question": None, "updated_at": time.time()}
with caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.depth_classifier"):
classify_depth("а завтра?", session=session)
tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()]
assert any("stability_guard_triggered" in m for m in tagged), \
f"No stability_guard_triggered. Got: {tagged}"
# ─── 914. Proactivity ───────────────────────────────────────────────────────
def _profile_with_intent(intent: str, count: int, interaction_count: int = 10) -> dict:
p = _default_user_profile("u_proact")
p["interaction_count"] = interaction_count
p["known_intents"] = [intent] * count
return p
def test_proactivity_all_conditions_met():
"""Всі умови → added=True, фраза ≤120 символів, без '!'."""
p = _profile_with_intent("plan_day", count=5, interaction_count=10)
response = "Ось твій план на день."
new_resp, added = maybe_add_proactivity(response, p, depth="deep", reflection=None)
assert added is True
added_part = new_resp[len(response):].strip()
assert len(added_part) <= 120, f"Added phrase too long: {len(added_part)}"
assert "!" not in added_part, f"Exclamation found: {added_part!r}"
def test_proactivity_light_depth_not_added():
p = _profile_with_intent("plan_day", count=5, interaction_count=10)
_, added = maybe_add_proactivity("Відповідь.", p, depth="light")
assert added is False
def test_proactivity_not_tenth_interaction():
p = _profile_with_intent("plan_day", count=5, interaction_count=7)
_, added = maybe_add_proactivity("Відповідь.", p, depth="deep")
assert added is False
def test_proactivity_zero_interaction_not_added():
p = _profile_with_intent("plan_day", count=5, interaction_count=0)
_, added = maybe_add_proactivity("Відповідь.", p, depth="deep")
assert added is False
def test_proactivity_low_confidence_not_added():
p = _profile_with_intent("plan_day", count=5, interaction_count=10)
reflection = {"confidence": 0.5, "new_facts": {}, "clarifying_question": None}
_, added = maybe_add_proactivity("Відповідь.", p, depth="deep", reflection=reflection)
assert added is False
def test_proactivity_brief_with_question_not_added():
p = _profile_with_intent("plan_day", count=5, interaction_count=10)
p["style"] = "concise"
response = "Ось план. Чи уточнити?"
_, added = maybe_add_proactivity(response, p, depth="deep")
assert added is False
def test_proactivity_intent_freq_low_not_added():
p = _profile_with_intent("plan_day", count=2, interaction_count=10) # < 3
_, added = maybe_add_proactivity("Відповідь.", p, depth="deep")
assert added is False
def test_proactivity_confidence_ok_added():
"""confidence >= 0.7 → added=True."""
p = _profile_with_intent("iot_sensors", count=4, interaction_count=10)
reflection = {"confidence": 0.8, "new_facts": {}, "clarifying_question": None}
_, added = maybe_add_proactivity("Статус датчиків перевірено.", p, depth="deep",
reflection=reflection)
assert added is True
def test_proactivity_telemetry_added(caplog):
p = _profile_with_intent("plan_day", count=5, interaction_count=10)
with caplog.at_level(logging.INFO, logger="crews.agromatrix_crew.proactivity"):
maybe_add_proactivity("Відповідь.", p, depth="deep")
tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()]
assert any("proactivity_added" in m for m in tagged), \
f"No proactivity_added telemetry. Got: {tagged}"
def test_proactivity_telemetry_skipped_light(caplog):
p = _profile_with_intent("plan_day", count=5, interaction_count=10)
with caplog.at_level(logging.DEBUG, logger="crews.agromatrix_crew.proactivity"):
maybe_add_proactivity("Відповідь.", p, depth="light")
tagged = [r.getMessage() for r in caplog.records if TELEMETRY_TAG in r.getMessage()]
assert any("proactivity_skipped" in m for m in tagged), \
f"No proactivity_skipped telemetry. Got: {tagged}"
# ─── _top_intent helper ───────────────────────────────────────────────────────
def test_top_intent_returns_most_frequent():
intents = ["plan_day"] * 5 + ["iot_sensors"] * 2
intent, count = _top_intent(intents)
assert intent == "plan_day"
assert count == 5
def test_top_intent_empty_returns_none():
intent, count = _top_intent([])
assert intent is None
assert count == 0
def test_top_intent_none_returns_none():
intent, count = _top_intent(None)
assert intent is None