microdao-daarion/tests/test_stepan_acceptance.py

"""
Acceptance tests для Humanized Stepan v2.5

Тести без crewai — перевіряємо:
  1. 5-крокова сесія: привіт → план → а на післязавтра? → ок → дякую
     Очікування: 1=light, 2=deep, 3=light, 4=light, 5=light
     Жодного зайвого crew launch між кроками 1,3,4,5
  2. Greeting рівні за interaction_count (neutral/soft/contextual)
  3. RNG стабільний в межах дня, різний між днями
  4. Weather mini-knowledge (дощ + FarmProfile phase)
  5. Jaccard guard для summary
  6. tone_constraints у default профілі
"""

import sys
from pathlib import Path
root = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(root))
sys.path.insert(0, str(root / 'packages' / 'agromatrix-tools'))

from crews.agromatrix_crew.depth_classifier import classify_depth
from crews.agromatrix_crew.light_reply import (
    build_light_reply,
    classify_light_event,
    _seeded_rng,
    _weather_reply,
)
from crews.agromatrix_crew.memory_manager import (
    _default_user_profile,
    build_interaction_summary,
    _jaccard_similarity,
    _summary_changed_enough,
    _should_update_summary,
    push_recent_topic,
    migrate_profile_topics,
    summarize_topic_label,
)


# ─── 5-крокова сесія ─────────────────────────────────────────────────────────

def test_session_5_steps_depth():
    """
    Сесія з 5 повідомлень:
      1. "привіт"                    → light
      2. "сплануй день по полю 5"    → deep (action verb)
      3. "а на післязавтра?"         → light (≤6 слів + last_topic)
      4. "ок"                        → light
      5. "дякую"                     → light
    """
    # Крок 1: без last_topic
    assert classify_depth("привіт", last_topic=None) == "light", "step 1 should be light"

    # Крок 2: action verb → deep
    assert classify_depth("сплануй день по полю 5", last_topic=None) == "deep", "step 2 should be deep"

    # Крок 3: followup з last_topic (після step 2 last_topic = plan_day)
    assert classify_depth("а на післязавтра?", last_topic="plan_day") == "light", "step 3 should be light"

    # Крок 4
    assert classify_depth("ок", last_topic="plan_day") == "light", "step 4 should be light"

    # Крок 5
    assert classify_depth("дякую", last_topic="plan_day") == "light", "step 5 should be light"


def test_session_no_crew_for_light_steps():
    """Steps 1,3,4,5 не мають запускати crew — перевіряємо через build_light_reply."""
    profile = {"user_id": "session_user", "name": None, "last_topic": None, "interaction_count": 0}

    # step 1 — greeting без теми → build_light_reply returns str (no LLM)
    r1 = build_light_reply("привіт", profile)
    assert r1 is not None, "step 1: greeting should return no-LLM reply"

    # step 3 — short followup
    profile_after_plan = dict(profile, last_topic="plan_day", interaction_count=1)
    r3 = build_light_reply("а на післязавтра?", profile_after_plan)
    assert r3 is not None, "step 3: short followup should return no-LLM reply"

    # step 4 — ack
    r4 = build_light_reply("ок", profile_after_plan)
    assert r4 is not None, "step 4: ack should return no-LLM reply"

    # step 5 — thanks
    r5 = build_light_reply("дякую", profile_after_plan)
    assert r5 is not None, "step 5: thanks should return no-LLM reply"


# ─── Greeting рівні за interaction_count ─────────────────────────────────────

def test_greeting_neutral_new_user():
    """interaction_count=0 → neutral bank (коротка, без питання)."""
    profile = {"user_id": "u_new", "name": None, "last_topic": None, "interaction_count": 0}
    reply = build_light_reply("привіт", profile)
    assert reply is not None
    # Neutral phrases: "На звʼязку", "Слухаю", "Привіт", "Так?"
    # Should be short and not contain a question from the contextual bank
    assert "планування" not in reply.lower() and "датчики" not in reply.lower(), \
        f"New user should not get contextual greeting: {reply!r}"


def test_greeting_soft_mid_user():
    """interaction_count=5 → soft bank."""
    profile = {"user_id": "u_mid", "name": None, "last_topic": None, "interaction_count": 5}
    reply = build_light_reply("привіт", profile)
    assert reply is not None
    # Soft phrases contain open question but NOT the contextual binary choice
    assert reply is not None


def test_greeting_contextual_experienced_user():
    """interaction_count=10 → contextual bank."""
    profile = {"user_id": "u_exp", "name": None, "last_topic": None, "interaction_count": 10}
    reply = build_light_reply("привіт", profile)
    assert reply is not None
    # Contextual phrases: "По плануванню чи по датчиках?" etc.
    contextual_words = ["плануванн", "датчик", "операції", "аналітик", "план чи факт", "польові"]
    assert any(w in reply.lower() for w in contextual_words), \
        f"Experienced user should get contextual greeting: {reply!r}"


def test_greeting_with_name_neutral():
    """Ім'я у нейтральному greeting."""
    profile = {"user_id": "u_named", "name": "Іван", "last_topic": None, "interaction_count": 1}
    reply = build_light_reply("привіт", profile)
    assert reply is not None
    assert "Іван" in reply


# ─── RNG за днем ─────────────────────────────────────────────────────────────

def test_rng_stable_same_day():
    """Один user + один день → однаковий вибір."""
    rng1 = _seeded_rng("user_a", day="2026-02-24")
    rng2 = _seeded_rng("user_a", day="2026-02-24")
    assert rng1.randint(0, 1000) == rng2.randint(0, 1000)


def test_rng_different_days():
    """Один user, різні дні → різний seed (з ймовірністю > 99.99%)."""
    rng1 = _seeded_rng("user_a", day="2026-02-24")
    rng2 = _seeded_rng("user_a", day="2026-02-25")
    # Not guaranteed different, but extremely likely; check at least they compile
    v1 = [rng1.randint(0, 1000) for _ in range(3)]
    v2 = [rng2.randint(0, 1000) for _ in range(3)]
    # Just assert both are valid lists
    assert len(v1) == 3 and len(v2) == 3


def test_rng_different_users_same_day():
    """Різні users, один день → незалежні RNG."""
    rng1 = _seeded_rng("user_aaa", day="2026-02-24")
    rng2 = _seeded_rng("user_zzz", day="2026-02-24")
    v1 = rng1.randint(0, 10000)
    v2 = rng2.randint(0, 10000)
    # Values can coincide by chance but seed must differ
    s1 = int(__import__('hashlib').sha256(b"user_aaa:2026-02-24").hexdigest(), 16) % (2**32)
    s2 = int(__import__('hashlib').sha256(b"user_zzz:2026-02-24").hexdigest(), 16) % (2**32)
    assert s1 != s2


# ─── Weather mini-knowledge ───────────────────────────────────────────────────

def test_weather_rain_growing_phase():
    fp = {"season_state": "growing"}
    reply = _weather_reply("а якщо дощ?", fp)
    assert reply is not None
    assert "обробк" in reply.lower() or "перенос" in reply.lower()


def test_weather_frost_sowing():
    fp = {"season_state": "sowing"}
    reply = _weather_reply("буде мороз", fp)
    assert reply is not None
    assert "сів" in reply.lower() or "мороз" in reply.lower() or "призупиняємо" in reply.lower()


def test_weather_wind_no_phase():
    reply = _weather_reply("сильний вітер", None)
    assert reply is not None
    assert "обприск" in reply.lower() or "вітер" in reply.lower()


def test_no_weather_trigger():
    reply = _weather_reply("а на завтра?", None)
    assert reply is None, f"No weather trigger, should be None: {reply!r}"


def test_weather_reply_in_followup_overrides_topic():
    """Weather followup береться з weather_reply, не з _SHORT_FOLLOWUP_WITH_TOPIC."""
    profile = {"user_id": "u_w", "name": None, "last_topic": "plan_day",
               "interaction_count": 5, "preferences": {}}
    farm = {"season_state": "growing"}
    reply = build_light_reply("а якщо дощ?", profile, farm_profile=farm)
    assert reply is not None
    # Should be weather answer, not a topic-followup template
    assert "по план на день" not in reply.lower()


# ─── Jaccard guard ────────────────────────────────────────────────────────────

def test_jaccard_similar_no_update():
    """Якщо нова summary дуже схожа — не оновлювати."""
    old = "Іван — агроном. Надає перевагу стислим відповідям. Взаємодій: 10."
    new = "Іван — агроном. Надає перевагу стислим відповідям. Взаємодій: 20."
    # High similarity (only interaction_count changed) → should NOT update
    assert not _summary_changed_enough(old, new)


def test_jaccard_different_update():
    """Якщо summary суттєво змінилась — оновлювати."""
    old = "Оператор. Спілкується в розмовному стилі. Взаємодій: 10."
    new = "Іван — агроном. Надає перевагу стислим відповідям. Частіше питає про план/факт. Взаємодій: 20."
    assert _summary_changed_enough(old, new)


def test_jaccard_none_old_always_update():
    """Якщо попереднього summary немає — завжди зберігати."""
    assert _summary_changed_enough(None, "Будь-яке резюме.")


def test_jaccard_similarity_values():
    """Перевірка значень similarity."""
    assert _jaccard_similarity("привіт степан", "привіт степан") == 1.0
    assert _jaccard_similarity("", "щось") == 0.0
    sim = _jaccard_similarity("a b c d", "a b e f")
    assert 0.0 < sim < 1.0


# ─── tone_constraints у default профілі ─────────────────────────────────────

def test_default_profile_has_tone_constraints():
    p = _default_user_profile("u1")
    assert "tone_constraints" in p["preferences"]
    tc = p["preferences"]["tone_constraints"]
    assert "no_emojis" in tc
    assert "no_exclamations" in tc
    assert tc["no_emojis"] is False
    assert tc["no_exclamations"] is False


def test_default_profile_version_4():
    """Profile version updated to 4 після додавання recent_topics."""
    p = _default_user_profile("u2")
    assert p.get("_version") == 4


# ─── v2.7: recent_topics + topic_label acceptance ────────────────────────────

def test_v27_session_topic_label_recorded():
    """
    Крок 1: план на завтра по полю 12 (deep) → push_recent_topic записує label.
    Крок 2: а на післязавтра? → light (follow-up), topic НЕ добавляється.
    """
    profile = _default_user_profile("u_v27_session")

    # Крок 1: deep запит
    msg1 = "сплануй завтра по полю 12"
    assert classify_depth(msg1, last_topic=None) == "deep"
    label1 = summarize_topic_label(msg1)
    push_recent_topic(profile, "plan_day", label1)

    assert len(profile["recent_topics"]) == 1
    assert profile["last_topic"] == "plan_day"
    assert profile["last_topic_label"] == label1
    assert "завтра" in label1.lower() or "план" in label1.lower() or "12" in label1

    # Крок 2: light follow-up → НЕ змінює recent_topics (симуляція depth=light)
    msg2 = "а на післязавтра?"
    assert classify_depth(msg2, last_topic="plan_day") == "light"
    topics_before = len(profile["recent_topics"])
    # Light path: НЕ викликає push_recent_topic (depth="light" у update_profile_if_needed)
    assert len(profile["recent_topics"]) == topics_before  # no change


def test_v27_session_thanks_short():
    """Крок 3: дякую → коротко."""
    profile = _default_user_profile("u_v27_thanks")
    reply = build_light_reply("дякую", profile)
    assert reply is not None
    assert len(reply) <= 40


def test_v27_session_greeting_with_label():
    """Крок 4: привіт після збереженої теми → відповідь містить label."""
    profile = _default_user_profile("u_v27_greet")
    push_recent_topic(profile, "plan_day", "план на завтра поле 12")
    profile["interaction_count"] = 3

    reply = build_light_reply("привіт", profile)
    assert reply is not None
    # Відповідь має містити label або частину теми
    assert "план" in reply.lower() or "поле" in reply.lower() or "завтра" in reply.lower()


def test_v27_session_horizon_after_6_more_deep():
    """Крок 5: ще 6 deep запитів → horizon == 5."""
    profile = _default_user_profile("u_v27_horizon")
    topics_data = [
        ("plan_day",      "план на завтра"),
        ("plan_vs_fact",  "план vs факт поле 3"),
        ("iot_sensors",   "датчики вологості поле 7"),
        ("sustainability","звіт по стійкості"),
        ("operations",    "операції по збиранню"),
        ("plan_week",     "тижневий план посів"),
        ("iot_sensors",   "датчики температура поле 2"),
    ]
    for intent, label in topics_data:
        push_recent_topic(profile, intent, label)

    assert len(profile["recent_topics"]) == 5
    labels = [t["label"] for t in profile["recent_topics"]]
    # Перші 2 мали бути витиснені
    assert "план на завтра" not in labels
    assert "план vs факт поле 3" not in labels
    # Останні 5 мають бути присутні
    assert "датчики вологості поле 7" in labels
    assert "тижневий план посів" in labels
    assert "датчики температура поле 2" in labels


def test_v27_migration_old_profile():
    """Backward-compat: профіль v3 (без recent_topics) після міграції."""
    old_v3 = {
        "_version": 3,
        "user_id": "u_v3",
        "last_topic": "plan_day",
        "interaction_count": 15,
        "preferences": {"tone_constraints": {"no_emojis": False, "no_exclamations": False}},
    }
    changed = migrate_profile_topics(old_v3)
    assert changed is True
    assert old_v3["recent_topics"][0]["intent"] == "plan_day"
    assert old_v3["last_topic_label"] is not None


def test_v27_no_recent_topics_duplication():
    """push_recent_topic не дублює якщо intent+label ті самі."""
    profile = _default_user_profile("u_dedup")
    push_recent_topic(profile, "plan_day", "план на завтра")
    push_recent_topic(profile, "plan_day", "план на завтра")  # same → dedup
    assert len(profile["recent_topics"]) == 1


def test_v27_summarize_topic_label_removes_action():
    assert "зроби" not in summarize_topic_label("зроби план на завтра").lower()
    assert "перевір" not in summarize_topic_label("перевір датчики вологості поле 5").lower()


def test_v27_last_topic_aliases_sync():
    """push_recent_topic тримає last_topic і last_topic_label в синхроні."""
    profile = _default_user_profile("u_alias")
    push_recent_topic(profile, "iot", "датчики поле 3")
    assert profile["last_topic"] == "iot"
    assert profile["last_topic_label"] == "датчики поле 3"
    push_recent_topic(profile, "plan_day", "план на тиждень")
    assert profile["last_topic"] == "plan_day"
    assert profile["last_topic_label"] == "план на тиждень"