microdao-daarion/config/incident_intelligence_policy.yml

# Incident Intelligence Policy
# Controls correlation scoring, recurrence detection, and digest generation.

correlation:
  lookback_days: 30
  max_related: 10
  min_score: 20          # discard matches below this
  rules:
    - name: "same_signature"
      weight: 100
      match:
        signature: true

    - name: "same_service_and_kind"
      weight: 60
      match:
        same_service: true
        same_kind: true

    - name: "same_service_time_cluster"
      weight: 40
      match:
        same_service: true
        within_minutes: 180

    - name: "same_kind_cross_service"
      weight: 30
      match:
        same_kind: true
        within_minutes: 120

recurrence:
  windows_days: [7, 30]
  thresholds:
    signature:
      warn: 3     # ≥ 3 occurrences in window → warn
      high: 6     # ≥ 6 occurrences in window → high
    kind:
      warn: 5
      high: 10
  top_n: 15        # top N per category

  # Deterministic recommendations per recurrence level
  recommendations:
    signature_high: "Create permanent fix: add regression test + SLO guard for this failure type"
    signature_warn: "Review root cause history; consider adding monitoring threshold"
    kind_high: "Systemic issue with kind={kind}: review architecture / add circuit breaker"
    kind_warn: "Recurring kind={kind}: validate if alert thresholds are tuned correctly"

digest:
  weekly_day: "Mon"
  include_closed: true
  include_open: true
  output_dir: "ops/reports/incidents"
  markdown_max_chars: 8000
  top_incidents: 20      # max incidents in weekly listing

# ── Root-Cause Buckets ─────────────────────────────────────────────────────
buckets:
  mode: "service_kind"         # service_kind | signature_prefix
  signature_prefix_len: 12
  top_n: 10
  min_count:
    7: 3                       # bucket must have ≥ 3 incidents in last 7d
    30: 6                      # or ≥ 6 in last 30d
  include_statuses: ["open", "mitigating", "resolved", "closed"]

# ── Auto Follow-ups (policy-driven, no LLM) ───────────────────────────────
autofollowups:
  enabled: true
  only_when_high: true         # only create for HIGH recurrence buckets
  owner: "oncall"
  priority: "P1"
  due_days: 7
  max_followups_per_bucket_per_week: 1   # dedupe by week+bucket_key
  dedupe_key_prefix: "intel_recur"

# ── Release Gate: recurrence_watch ────────────────────────────────────────
release_gate:
  recurrence_watch:
    enabled: true
    service_scope: "target_service"  # target_service | all
    windows_days: [7, 30]
    fail_on:
      severity_in: ["P0", "P1"]   # used only in strict mode
      high_recurrence: true
    warn_on:
      warn_recurrence: true