New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
359 lines
16 KiB
Python
359 lines
16 KiB
Python
"""
|
||
Acceptance tests для Humanized Stepan v2.5
|
||
|
||
Тести без crewai — перевіряємо:
|
||
1. 5-крокова сесія: привіт → план → а на післязавтра? → ок → дякую
|
||
Очікування: 1=light, 2=deep, 3=light, 4=light, 5=light
|
||
Жодного зайвого crew launch між кроками 1,3,4,5
|
||
2. Greeting рівні за interaction_count (neutral/soft/contextual)
|
||
3. RNG стабільний в межах дня, різний між днями
|
||
4. Weather mini-knowledge (дощ + FarmProfile phase)
|
||
5. Jaccard guard для summary
|
||
6. tone_constraints у default профілі
|
||
"""
|
||
|
||
import sys
|
||
from pathlib import Path
|
||
root = Path(__file__).resolve().parents[1]
|
||
sys.path.insert(0, str(root))
|
||
sys.path.insert(0, str(root / 'packages' / 'agromatrix-tools'))
|
||
|
||
from crews.agromatrix_crew.depth_classifier import classify_depth
|
||
from crews.agromatrix_crew.light_reply import (
|
||
build_light_reply,
|
||
classify_light_event,
|
||
_seeded_rng,
|
||
_weather_reply,
|
||
)
|
||
from crews.agromatrix_crew.memory_manager import (
|
||
_default_user_profile,
|
||
build_interaction_summary,
|
||
_jaccard_similarity,
|
||
_summary_changed_enough,
|
||
_should_update_summary,
|
||
push_recent_topic,
|
||
migrate_profile_topics,
|
||
summarize_topic_label,
|
||
)
|
||
|
||
|
||
# ─── 5-крокова сесія ─────────────────────────────────────────────────────────
|
||
|
||
def test_session_5_steps_depth():
|
||
"""
|
||
Сесія з 5 повідомлень:
|
||
1. "привіт" → light
|
||
2. "сплануй день по полю 5" → deep (action verb)
|
||
3. "а на післязавтра?" → light (≤6 слів + last_topic)
|
||
4. "ок" → light
|
||
5. "дякую" → light
|
||
"""
|
||
# Крок 1: без last_topic
|
||
assert classify_depth("привіт", last_topic=None) == "light", "step 1 should be light"
|
||
|
||
# Крок 2: action verb → deep
|
||
assert classify_depth("сплануй день по полю 5", last_topic=None) == "deep", "step 2 should be deep"
|
||
|
||
# Крок 3: followup з last_topic (після step 2 last_topic = plan_day)
|
||
assert classify_depth("а на післязавтра?", last_topic="plan_day") == "light", "step 3 should be light"
|
||
|
||
# Крок 4
|
||
assert classify_depth("ок", last_topic="plan_day") == "light", "step 4 should be light"
|
||
|
||
# Крок 5
|
||
assert classify_depth("дякую", last_topic="plan_day") == "light", "step 5 should be light"
|
||
|
||
|
||
def test_session_no_crew_for_light_steps():
|
||
"""Steps 1,3,4,5 не мають запускати crew — перевіряємо через build_light_reply."""
|
||
profile = {"user_id": "session_user", "name": None, "last_topic": None, "interaction_count": 0}
|
||
|
||
# step 1 — greeting без теми → build_light_reply returns str (no LLM)
|
||
r1 = build_light_reply("привіт", profile)
|
||
assert r1 is not None, "step 1: greeting should return no-LLM reply"
|
||
|
||
# step 3 — short followup
|
||
profile_after_plan = dict(profile, last_topic="plan_day", interaction_count=1)
|
||
r3 = build_light_reply("а на післязавтра?", profile_after_plan)
|
||
assert r3 is not None, "step 3: short followup should return no-LLM reply"
|
||
|
||
# step 4 — ack
|
||
r4 = build_light_reply("ок", profile_after_plan)
|
||
assert r4 is not None, "step 4: ack should return no-LLM reply"
|
||
|
||
# step 5 — thanks
|
||
r5 = build_light_reply("дякую", profile_after_plan)
|
||
assert r5 is not None, "step 5: thanks should return no-LLM reply"
|
||
|
||
|
||
# ─── Greeting рівні за interaction_count ─────────────────────────────────────
|
||
|
||
def test_greeting_neutral_new_user():
|
||
"""interaction_count=0 → neutral bank (коротка, без питання)."""
|
||
profile = {"user_id": "u_new", "name": None, "last_topic": None, "interaction_count": 0}
|
||
reply = build_light_reply("привіт", profile)
|
||
assert reply is not None
|
||
# Neutral phrases: "На звʼязку", "Слухаю", "Привіт", "Так?"
|
||
# Should be short and not contain a question from the contextual bank
|
||
assert "планування" not in reply.lower() and "датчики" not in reply.lower(), \
|
||
f"New user should not get contextual greeting: {reply!r}"
|
||
|
||
|
||
def test_greeting_soft_mid_user():
|
||
"""interaction_count=5 → soft bank."""
|
||
profile = {"user_id": "u_mid", "name": None, "last_topic": None, "interaction_count": 5}
|
||
reply = build_light_reply("привіт", profile)
|
||
assert reply is not None
|
||
# Soft phrases contain open question but NOT the contextual binary choice
|
||
assert reply is not None
|
||
|
||
|
||
def test_greeting_contextual_experienced_user():
|
||
"""interaction_count=10 → contextual bank."""
|
||
profile = {"user_id": "u_exp", "name": None, "last_topic": None, "interaction_count": 10}
|
||
reply = build_light_reply("привіт", profile)
|
||
assert reply is not None
|
||
# Contextual phrases: "По плануванню чи по датчиках?" etc.
|
||
contextual_words = ["плануванн", "датчик", "операції", "аналітик", "план чи факт", "польові"]
|
||
assert any(w in reply.lower() for w in contextual_words), \
|
||
f"Experienced user should get contextual greeting: {reply!r}"
|
||
|
||
|
||
def test_greeting_with_name_neutral():
|
||
"""Ім'я у нейтральному greeting."""
|
||
profile = {"user_id": "u_named", "name": "Іван", "last_topic": None, "interaction_count": 1}
|
||
reply = build_light_reply("привіт", profile)
|
||
assert reply is not None
|
||
assert "Іван" in reply
|
||
|
||
|
||
# ─── RNG за днем ─────────────────────────────────────────────────────────────
|
||
|
||
def test_rng_stable_same_day():
|
||
"""Один user + один день → однаковий вибір."""
|
||
rng1 = _seeded_rng("user_a", day="2026-02-24")
|
||
rng2 = _seeded_rng("user_a", day="2026-02-24")
|
||
assert rng1.randint(0, 1000) == rng2.randint(0, 1000)
|
||
|
||
|
||
def test_rng_different_days():
|
||
"""Один user, різні дні → різний seed (з ймовірністю > 99.99%)."""
|
||
rng1 = _seeded_rng("user_a", day="2026-02-24")
|
||
rng2 = _seeded_rng("user_a", day="2026-02-25")
|
||
# Not guaranteed different, but extremely likely; check at least they compile
|
||
v1 = [rng1.randint(0, 1000) for _ in range(3)]
|
||
v2 = [rng2.randint(0, 1000) for _ in range(3)]
|
||
# Just assert both are valid lists
|
||
assert len(v1) == 3 and len(v2) == 3
|
||
|
||
|
||
def test_rng_different_users_same_day():
|
||
"""Різні users, один день → незалежні RNG."""
|
||
rng1 = _seeded_rng("user_aaa", day="2026-02-24")
|
||
rng2 = _seeded_rng("user_zzz", day="2026-02-24")
|
||
v1 = rng1.randint(0, 10000)
|
||
v2 = rng2.randint(0, 10000)
|
||
# Values can coincide by chance but seed must differ
|
||
s1 = int(__import__('hashlib').sha256(b"user_aaa:2026-02-24").hexdigest(), 16) % (2**32)
|
||
s2 = int(__import__('hashlib').sha256(b"user_zzz:2026-02-24").hexdigest(), 16) % (2**32)
|
||
assert s1 != s2
|
||
|
||
|
||
# ─── Weather mini-knowledge ───────────────────────────────────────────────────
|
||
|
||
def test_weather_rain_growing_phase():
|
||
fp = {"season_state": "growing"}
|
||
reply = _weather_reply("а якщо дощ?", fp)
|
||
assert reply is not None
|
||
assert "обробк" in reply.lower() or "перенос" in reply.lower()
|
||
|
||
|
||
def test_weather_frost_sowing():
|
||
fp = {"season_state": "sowing"}
|
||
reply = _weather_reply("буде мороз", fp)
|
||
assert reply is not None
|
||
assert "сів" in reply.lower() or "мороз" in reply.lower() or "призупиняємо" in reply.lower()
|
||
|
||
|
||
def test_weather_wind_no_phase():
|
||
reply = _weather_reply("сильний вітер", None)
|
||
assert reply is not None
|
||
assert "обприск" in reply.lower() or "вітер" in reply.lower()
|
||
|
||
|
||
def test_no_weather_trigger():
|
||
reply = _weather_reply("а на завтра?", None)
|
||
assert reply is None, f"No weather trigger, should be None: {reply!r}"
|
||
|
||
|
||
def test_weather_reply_in_followup_overrides_topic():
|
||
"""Weather followup береться з weather_reply, не з _SHORT_FOLLOWUP_WITH_TOPIC."""
|
||
profile = {"user_id": "u_w", "name": None, "last_topic": "plan_day",
|
||
"interaction_count": 5, "preferences": {}}
|
||
farm = {"season_state": "growing"}
|
||
reply = build_light_reply("а якщо дощ?", profile, farm_profile=farm)
|
||
assert reply is not None
|
||
# Should be weather answer, not a topic-followup template
|
||
assert "по план на день" not in reply.lower()
|
||
|
||
|
||
# ─── Jaccard guard ────────────────────────────────────────────────────────────
|
||
|
||
def test_jaccard_similar_no_update():
|
||
"""Якщо нова summary дуже схожа — не оновлювати."""
|
||
old = "Іван — агроном. Надає перевагу стислим відповідям. Взаємодій: 10."
|
||
new = "Іван — агроном. Надає перевагу стислим відповідям. Взаємодій: 20."
|
||
# High similarity (only interaction_count changed) → should NOT update
|
||
assert not _summary_changed_enough(old, new)
|
||
|
||
|
||
def test_jaccard_different_update():
|
||
"""Якщо summary суттєво змінилась — оновлювати."""
|
||
old = "Оператор. Спілкується в розмовному стилі. Взаємодій: 10."
|
||
new = "Іван — агроном. Надає перевагу стислим відповідям. Частіше питає про план/факт. Взаємодій: 20."
|
||
assert _summary_changed_enough(old, new)
|
||
|
||
|
||
def test_jaccard_none_old_always_update():
|
||
"""Якщо попереднього summary немає — завжди зберігати."""
|
||
assert _summary_changed_enough(None, "Будь-яке резюме.")
|
||
|
||
|
||
def test_jaccard_similarity_values():
|
||
"""Перевірка значень similarity."""
|
||
assert _jaccard_similarity("привіт степан", "привіт степан") == 1.0
|
||
assert _jaccard_similarity("", "щось") == 0.0
|
||
sim = _jaccard_similarity("a b c d", "a b e f")
|
||
assert 0.0 < sim < 1.0
|
||
|
||
|
||
# ─── tone_constraints у default профілі ─────────────────────────────────────
|
||
|
||
def test_default_profile_has_tone_constraints():
|
||
p = _default_user_profile("u1")
|
||
assert "tone_constraints" in p["preferences"]
|
||
tc = p["preferences"]["tone_constraints"]
|
||
assert "no_emojis" in tc
|
||
assert "no_exclamations" in tc
|
||
assert tc["no_emojis"] is False
|
||
assert tc["no_exclamations"] is False
|
||
|
||
|
||
def test_default_profile_version_4():
|
||
"""Profile version updated to 4 після додавання recent_topics."""
|
||
p = _default_user_profile("u2")
|
||
assert p.get("_version") == 4
|
||
|
||
|
||
# ─── v2.7: recent_topics + topic_label acceptance ────────────────────────────
|
||
|
||
def test_v27_session_topic_label_recorded():
|
||
"""
|
||
Крок 1: план на завтра по полю 12 (deep) → push_recent_topic записує label.
|
||
Крок 2: а на післязавтра? → light (follow-up), topic НЕ добавляється.
|
||
"""
|
||
profile = _default_user_profile("u_v27_session")
|
||
|
||
# Крок 1: deep запит
|
||
msg1 = "сплануй завтра по полю 12"
|
||
assert classify_depth(msg1, last_topic=None) == "deep"
|
||
label1 = summarize_topic_label(msg1)
|
||
push_recent_topic(profile, "plan_day", label1)
|
||
|
||
assert len(profile["recent_topics"]) == 1
|
||
assert profile["last_topic"] == "plan_day"
|
||
assert profile["last_topic_label"] == label1
|
||
assert "завтра" in label1.lower() or "план" in label1.lower() or "12" in label1
|
||
|
||
# Крок 2: light follow-up → НЕ змінює recent_topics (симуляція depth=light)
|
||
msg2 = "а на післязавтра?"
|
||
assert classify_depth(msg2, last_topic="plan_day") == "light"
|
||
topics_before = len(profile["recent_topics"])
|
||
# Light path: НЕ викликає push_recent_topic (depth="light" у update_profile_if_needed)
|
||
assert len(profile["recent_topics"]) == topics_before # no change
|
||
|
||
|
||
def test_v27_session_thanks_short():
|
||
"""Крок 3: дякую → коротко."""
|
||
profile = _default_user_profile("u_v27_thanks")
|
||
reply = build_light_reply("дякую", profile)
|
||
assert reply is not None
|
||
assert len(reply) <= 40
|
||
|
||
|
||
def test_v27_session_greeting_with_label():
|
||
"""Крок 4: привіт після збереженої теми → відповідь містить label."""
|
||
profile = _default_user_profile("u_v27_greet")
|
||
push_recent_topic(profile, "plan_day", "план на завтра поле 12")
|
||
profile["interaction_count"] = 3
|
||
|
||
reply = build_light_reply("привіт", profile)
|
||
assert reply is not None
|
||
# Відповідь має містити label або частину теми
|
||
assert "план" in reply.lower() or "поле" in reply.lower() or "завтра" in reply.lower()
|
||
|
||
|
||
def test_v27_session_horizon_after_6_more_deep():
|
||
"""Крок 5: ще 6 deep запитів → horizon == 5."""
|
||
profile = _default_user_profile("u_v27_horizon")
|
||
topics_data = [
|
||
("plan_day", "план на завтра"),
|
||
("plan_vs_fact", "план vs факт поле 3"),
|
||
("iot_sensors", "датчики вологості поле 7"),
|
||
("sustainability","звіт по стійкості"),
|
||
("operations", "операції по збиранню"),
|
||
("plan_week", "тижневий план посів"),
|
||
("iot_sensors", "датчики температура поле 2"),
|
||
]
|
||
for intent, label in topics_data:
|
||
push_recent_topic(profile, intent, label)
|
||
|
||
assert len(profile["recent_topics"]) == 5
|
||
labels = [t["label"] for t in profile["recent_topics"]]
|
||
# Перші 2 мали бути витиснені
|
||
assert "план на завтра" not in labels
|
||
assert "план vs факт поле 3" not in labels
|
||
# Останні 5 мають бути присутні
|
||
assert "датчики вологості поле 7" in labels
|
||
assert "тижневий план посів" in labels
|
||
assert "датчики температура поле 2" in labels
|
||
|
||
|
||
def test_v27_migration_old_profile():
|
||
"""Backward-compat: профіль v3 (без recent_topics) після міграції."""
|
||
old_v3 = {
|
||
"_version": 3,
|
||
"user_id": "u_v3",
|
||
"last_topic": "plan_day",
|
||
"interaction_count": 15,
|
||
"preferences": {"tone_constraints": {"no_emojis": False, "no_exclamations": False}},
|
||
}
|
||
changed = migrate_profile_topics(old_v3)
|
||
assert changed is True
|
||
assert old_v3["recent_topics"][0]["intent"] == "plan_day"
|
||
assert old_v3["last_topic_label"] is not None
|
||
|
||
|
||
def test_v27_no_recent_topics_duplication():
|
||
"""push_recent_topic не дублює якщо intent+label ті самі."""
|
||
profile = _default_user_profile("u_dedup")
|
||
push_recent_topic(profile, "plan_day", "план на завтра")
|
||
push_recent_topic(profile, "plan_day", "план на завтра") # same → dedup
|
||
assert len(profile["recent_topics"]) == 1
|
||
|
||
|
||
def test_v27_summarize_topic_label_removes_action():
|
||
assert "зроби" not in summarize_topic_label("зроби план на завтра").lower()
|
||
assert "перевір" not in summarize_topic_label("перевір датчики вологості поле 5").lower()
|
||
|
||
|
||
def test_v27_last_topic_aliases_sync():
|
||
"""push_recent_topic тримає last_topic і last_topic_label в синхроні."""
|
||
profile = _default_user_profile("u_alias")
|
||
push_recent_topic(profile, "iot", "датчики поле 3")
|
||
assert profile["last_topic"] == "iot"
|
||
assert profile["last_topic_label"] == "датчики поле 3"
|
||
push_recent_topic(profile, "plan_day", "план на тиждень")
|
||
assert profile["last_topic"] == "plan_day"
|
||
assert profile["last_topic_label"] == "план на тиждень"
|