""" backlog_store.py — Engineering Backlog Storage Layer. DAARION.city | deterministic, no LLM. Backends: MemoryBacklogStore — in-process (tests + fallback) JsonlBacklogStore — filesystem append-only JSONL (MVP) PostgresBacklogStore — Postgres primary (psycopg2 sync) AutoBacklogStore — Postgres → JSONL → Memory cascade Factory: get_backlog_store() → respects BACKLOG_BACKEND env var. BACKLOG_BACKEND: auto | postgres | jsonl | memory | null """ from __future__ import annotations import datetime import json import logging import os import threading import uuid from abc import ABC, abstractmethod from dataclasses import dataclass, field, asdict from pathlib import Path from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) # ─── Data model ─────────────────────────────────────────────────────────────── _VALID_STATUSES = {"open", "in_progress", "blocked", "done", "canceled"} _VALID_PRIORITIES = {"P0", "P1", "P2", "P3"} def _now_iso() -> str: return datetime.datetime.utcnow().isoformat() def _new_id(prefix: str = "bl") -> str: return f"{prefix}_{uuid.uuid4().hex[:12]}" @dataclass class BacklogItem: id: str created_at: str updated_at: str env: str service: str category: str # arch_review / refactor / slo_hardening / cleanup_followups / security title: str description: str priority: str # P0..P3 status: str # open / in_progress / blocked / done / canceled owner: str due_date: str # YYYY-MM-DD source: str # risk | pressure | digest | manual dedupe_key: str evidence_refs: Dict = field(default_factory=dict) # alerts, incidents, release_checks, ... tags: List[str] = field(default_factory=list) meta: Dict = field(default_factory=dict) def to_dict(self) -> Dict: return asdict(self) @classmethod def from_dict(cls, d: Dict) -> "BacklogItem": return cls( id=d.get("id", _new_id()), created_at=d.get("created_at", _now_iso()), updated_at=d.get("updated_at", _now_iso()), env=d.get("env", "prod"), service=d.get("service", ""), category=d.get("category", ""), title=d.get("title", ""), description=d.get("description", ""), priority=d.get("priority", "P2"), status=d.get("status", "open"), owner=d.get("owner", "oncall"), due_date=d.get("due_date", ""), source=d.get("source", "manual"), dedupe_key=d.get("dedupe_key", ""), evidence_refs=d.get("evidence_refs") or {}, tags=d.get("tags") or [], meta=d.get("meta") or {}, ) @dataclass class BacklogEvent: id: str item_id: str ts: str type: str # created | status_change | comment | auto_update message: str actor: str meta: Dict = field(default_factory=dict) def to_dict(self) -> Dict: return asdict(self) @classmethod def from_dict(cls, d: Dict) -> "BacklogEvent": return cls( id=d.get("id", _new_id("ev")), item_id=d.get("item_id", ""), ts=d.get("ts", _now_iso()), type=d.get("type", "comment"), message=d.get("message", ""), actor=d.get("actor", "system"), meta=d.get("meta") or {}, ) # ─── Abstract base ──────────────────────────────────────────────────────────── class BacklogStore(ABC): @abstractmethod def create(self, item: BacklogItem) -> BacklogItem: ... @abstractmethod def get(self, item_id: str) -> Optional[BacklogItem]: ... @abstractmethod def get_by_dedupe_key(self, key: str) -> Optional[BacklogItem]: ... @abstractmethod def update(self, item: BacklogItem) -> BacklogItem: ... @abstractmethod def list_items(self, filters: Optional[Dict] = None, limit: int = 50, offset: int = 0) -> List[BacklogItem]: ... @abstractmethod def add_event(self, event: BacklogEvent) -> BacklogEvent: ... @abstractmethod def get_events(self, item_id: str, limit: int = 50) -> List[BacklogEvent]: ... @abstractmethod def cleanup(self, retention_days: int = 180) -> int: ... def upsert(self, item: BacklogItem) -> Dict: """Create or update by dedupe_key. Returns {"action": created|updated, "item": ...}""" existing = self.get_by_dedupe_key(item.dedupe_key) if existing is None: created = self.create(item) return {"action": "created", "item": created} # Update title/description/evidence_refs/tags/meta; preserve status/owner existing.title = item.title existing.description = item.description existing.evidence_refs = item.evidence_refs existing.tags = list(set(existing.tags + item.tags)) existing.meta.update(item.meta or {}) existing.updated_at = _now_iso() updated = self.update(existing) return {"action": "updated", "item": updated} def dashboard(self, env: str = "prod") -> Dict: """Return aggregated backlog counts.""" items = self.list_items({"env": env}, limit=1000) today = datetime.datetime.utcnow().strftime("%Y-%m-%d") status_counts: Dict[str, int] = {} priority_counts: Dict[str, int] = {} category_counts: Dict[str, int] = {} overdue: List[Dict] = [] service_counts: Dict[str, int] = {} for it in items: status_counts[it.status] = status_counts.get(it.status, 0) + 1 priority_counts[it.priority] = priority_counts.get(it.priority, 0) + 1 category_counts[it.category] = category_counts.get(it.category, 0) + 1 service_counts[it.service] = service_counts.get(it.service, 0) + 1 if (it.status not in ("done", "canceled") and it.due_date and it.due_date < today): overdue.append({ "id": it.id, "service": it.service, "title": it.title, "priority": it.priority, "due_date": it.due_date, "owner": it.owner, }) overdue.sort(key=lambda x: (x["priority"], x["due_date"])) top_services = sorted(service_counts.items(), key=lambda x: -x[1])[:10] return { "env": env, "total": len(items), "status_counts": status_counts, "priority_counts": priority_counts, "category_counts": category_counts, "overdue": overdue[:20], "overdue_count": len(overdue), "top_services": [{"service": s, "count": c} for s, c in top_services], } # ─── Workflow helper ────────────────────────────────────────────────────────── def validate_transition(current_status: str, new_status: str, policy: Optional[Dict] = None) -> bool: """Return True if transition is allowed, False otherwise.""" defaults = _builtin_workflow() if policy is None: allowed = defaults else: allowed = policy.get("workflow", {}).get("allowed_transitions", defaults) return new_status in allowed.get(current_status, []) def _builtin_workflow() -> Dict: return { "open": ["in_progress", "blocked", "canceled"], "in_progress": ["blocked", "done", "canceled"], "blocked": ["open", "in_progress", "canceled"], "done": [], "canceled": [], } # ─── Memory backend ─────────────────────────────────────────────────────────── class MemoryBacklogStore(BacklogStore): def __init__(self) -> None: self._items: Dict[str, BacklogItem] = {} self._events: List[BacklogEvent] = [] self._lock = threading.Lock() def create(self, item: BacklogItem) -> BacklogItem: with self._lock: self._items[item.id] = item return item def get(self, item_id: str) -> Optional[BacklogItem]: with self._lock: return self._items.get(item_id) def get_by_dedupe_key(self, key: str) -> Optional[BacklogItem]: with self._lock: for it in self._items.values(): if it.dedupe_key == key: return it return None def update(self, item: BacklogItem) -> BacklogItem: with self._lock: self._items[item.id] = item return item def list_items(self, filters: Optional[Dict] = None, limit: int = 50, offset: int = 0) -> List[BacklogItem]: filters = filters or {} with self._lock: items = list(self._items.values()) items = _apply_filters(items, filters) items.sort(key=lambda x: (x.priority, x.due_date or "9999")) return items[offset: offset + limit] def add_event(self, event: BacklogEvent) -> BacklogEvent: with self._lock: self._events.append(event) return event def get_events(self, item_id: str, limit: int = 50) -> List[BacklogEvent]: with self._lock: evs = [e for e in self._events if e.item_id == item_id] return evs[-limit:] def cleanup(self, retention_days: int = 180) -> int: cutoff = ( datetime.datetime.utcnow() - datetime.timedelta(days=retention_days) ).isoformat() with self._lock: to_delete = [ iid for iid, it in self._items.items() if it.status in ("done", "canceled") and it.updated_at < cutoff ] for iid in to_delete: del self._items[iid] return len(to_delete) # ─── JSONL backend ──────────────────────────────────────────────────────────── _JSONL_ITEMS = "ops/backlog/items.jsonl" _JSONL_EVENTS = "ops/backlog/events.jsonl" _JSONL_CACHE_MAX = 50_000 # lines to scan class JsonlBacklogStore(BacklogStore): """ Append-only JSONL filesystem store. Last-write-wins: items keyed by id, updates appended (read returns latest). """ def __init__( self, items_path: str = _JSONL_ITEMS, events_path: str = _JSONL_EVENTS, ) -> None: self._items_path = Path(items_path) self._events_path = Path(events_path) self._lock = threading.Lock() self._items_path.parent.mkdir(parents=True, exist_ok=True) self._events_path.parent.mkdir(parents=True, exist_ok=True) def _load_items(self) -> Dict[str, BacklogItem]: """Scan file, last-write-wins per id.""" items: Dict[str, BacklogItem] = {} if not self._items_path.exists(): return items try: with open(self._items_path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: d = json.loads(line) items[d["id"]] = BacklogItem.from_dict(d) except Exception: pass except Exception as e: logger.warning("JsonlBacklogStore: load_items error: %s", e) return items def _append_item(self, item: BacklogItem) -> None: with open(self._items_path, "a", encoding="utf-8") as f: f.write(json.dumps(item.to_dict(), default=str) + "\n") def create(self, item: BacklogItem) -> BacklogItem: with self._lock: self._append_item(item) return item def get(self, item_id: str) -> Optional[BacklogItem]: with self._lock: items = self._load_items() return items.get(item_id) def get_by_dedupe_key(self, key: str) -> Optional[BacklogItem]: with self._lock: items = self._load_items() for it in items.values(): if it.dedupe_key == key: return it return None def update(self, item: BacklogItem) -> BacklogItem: item.updated_at = _now_iso() with self._lock: self._append_item(item) return item def list_items(self, filters: Optional[Dict] = None, limit: int = 50, offset: int = 0) -> List[BacklogItem]: with self._lock: items = list(self._load_items().values()) items = _apply_filters(items, filters or {}) items.sort(key=lambda x: (x.priority, x.due_date or "9999")) return items[offset: offset + limit] def add_event(self, event: BacklogEvent) -> BacklogEvent: with self._lock: if not self._events_path.parent.exists(): self._events_path.parent.mkdir(parents=True, exist_ok=True) with open(self._events_path, "a", encoding="utf-8") as f: f.write(json.dumps(event.to_dict(), default=str) + "\n") return event def get_events(self, item_id: str, limit: int = 50) -> List[BacklogEvent]: events: List[BacklogEvent] = [] if not self._events_path.exists(): return events try: with open(self._events_path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: d = json.loads(line) if d.get("item_id") == item_id: events.append(BacklogEvent.from_dict(d)) except Exception: pass except Exception as e: logger.warning("JsonlBacklogStore: get_events error: %s", e) return events[-limit:] def cleanup(self, retention_days: int = 180) -> int: cutoff = ( datetime.datetime.utcnow() - datetime.timedelta(days=retention_days) ).isoformat() with self._lock: items = self._load_items() to_keep = { iid: it for iid, it in items.items() if not (it.status in ("done", "canceled") and it.updated_at < cutoff) } deleted = len(items) - len(to_keep) if deleted: # Rewrite the file with open(self._items_path, "w", encoding="utf-8") as f: for it in to_keep.values(): f.write(json.dumps(it.to_dict(), default=str) + "\n") return deleted # ─── Postgres backend ───────────────────────────────────────────────────────── class PostgresBacklogStore(BacklogStore): """ Postgres-backed store using psycopg2 (sync). Tables: backlog_items, backlog_events (created by migration script). """ def __init__(self, dsn: Optional[str] = None) -> None: self._dsn = dsn or os.environ.get( "BACKLOG_POSTGRES_DSN", os.environ.get("POSTGRES_DSN", "postgresql://localhost/daarion") ) self._lock = threading.Lock() def _conn(self): import psycopg2 import psycopg2.extras return psycopg2.connect(self._dsn) def create(self, item: BacklogItem) -> BacklogItem: sql = """ INSERT INTO backlog_items (id, created_at, updated_at, env, service, category, title, description, priority, status, owner, due_date, source, dedupe_key, evidence_refs, tags, meta) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT (dedupe_key) DO NOTHING """ with self._conn() as conn: with conn.cursor() as cur: cur.execute(sql, ( item.id, item.created_at, item.updated_at, item.env, item.service, item.category, item.title, item.description, item.priority, item.status, item.owner, item.due_date or None, item.source, item.dedupe_key, json.dumps(item.evidence_refs), json.dumps(item.tags), json.dumps(item.meta), )) return item def get(self, item_id: str) -> Optional[BacklogItem]: with self._conn() as conn: with conn.cursor() as cur: cur.execute("SELECT * FROM backlog_items WHERE id=%s", (item_id,)) row = cur.fetchone() if row: return self._row_to_item(row, cur.description) return None def get_by_dedupe_key(self, key: str) -> Optional[BacklogItem]: with self._conn() as conn: with conn.cursor() as cur: cur.execute("SELECT * FROM backlog_items WHERE dedupe_key=%s", (key,)) row = cur.fetchone() if row: return self._row_to_item(row, cur.description) return None def update(self, item: BacklogItem) -> BacklogItem: item.updated_at = _now_iso() sql = """ UPDATE backlog_items SET updated_at=%s, title=%s, description=%s, priority=%s, status=%s, owner=%s, due_date=%s, evidence_refs=%s, tags=%s, meta=%s WHERE id=%s """ with self._conn() as conn: with conn.cursor() as cur: cur.execute(sql, ( item.updated_at, item.title, item.description, item.priority, item.status, item.owner, item.due_date or None, json.dumps(item.evidence_refs), json.dumps(item.tags), json.dumps(item.meta), item.id, )) return item def list_items(self, filters: Optional[Dict] = None, limit: int = 50, offset: int = 0) -> List[BacklogItem]: filters = filters or {} where, params = _pg_where_clause(filters) sql = f""" SELECT * FROM backlog_items {where} ORDER BY priority ASC, due_date ASC NULLS LAST LIMIT %s OFFSET %s """ with self._conn() as conn: with conn.cursor() as cur: cur.execute(sql, params + [limit, offset]) rows = cur.fetchall() desc = cur.description return [self._row_to_item(r, desc) for r in rows] def add_event(self, event: BacklogEvent) -> BacklogEvent: sql = """ INSERT INTO backlog_events (id, item_id, ts, type, message, actor, meta) VALUES (%s,%s,%s,%s,%s,%s,%s) """ with self._conn() as conn: with conn.cursor() as cur: cur.execute(sql, ( event.id, event.item_id, event.ts, event.type, event.message, event.actor, json.dumps(event.meta), )) return event def get_events(self, item_id: str, limit: int = 50) -> List[BacklogEvent]: with self._conn() as conn: with conn.cursor() as cur: cur.execute( "SELECT * FROM backlog_events WHERE item_id=%s ORDER BY ts DESC LIMIT %s", (item_id, limit) ) rows = cur.fetchall() desc = cur.description return [self._row_to_event(r, desc) for r in rows] def cleanup(self, retention_days: int = 180) -> int: cutoff = ( datetime.datetime.utcnow() - datetime.timedelta(days=retention_days) ).isoformat() with self._conn() as conn: with conn.cursor() as cur: cur.execute( """DELETE FROM backlog_items WHERE status IN ('done','canceled') AND updated_at < %s""", (cutoff,) ) return cur.rowcount @staticmethod def _row_to_item(row, description) -> BacklogItem: d = {col.name: val for col, val in zip(description, row)} for json_key in ("evidence_refs", "tags", "meta"): v = d.get(json_key) if isinstance(v, str): try: d[json_key] = json.loads(v) except Exception: d[json_key] = {} if json_key != "tags" else [] return BacklogItem.from_dict(d) @staticmethod def _row_to_event(row, description) -> BacklogEvent: d = {col.name: val for col, val in zip(description, row)} if isinstance(d.get("meta"), str): try: d["meta"] = json.loads(d["meta"]) except Exception: d["meta"] = {} return BacklogEvent.from_dict(d) def _pg_where_clause(filters: Dict): clauses, params = [], [] if filters.get("env"): clauses.append("env=%s"); params.append(filters["env"]) if filters.get("service"): clauses.append("service=%s"); params.append(filters["service"]) if filters.get("status"): if isinstance(filters["status"], list): ph = ",".join(["%s"] * len(filters["status"])) clauses.append(f"status IN ({ph})"); params.extend(filters["status"]) else: clauses.append("status=%s"); params.append(filters["status"]) if filters.get("owner"): clauses.append("owner=%s"); params.append(filters["owner"]) if filters.get("category"): clauses.append("category=%s"); params.append(filters["category"]) if filters.get("due_before"): clauses.append("due_date < %s"); params.append(filters["due_before"]) return ("WHERE " + " AND ".join(clauses)) if clauses else "", params # ─── Null backend ───────────────────────────────────────────────────────────── class NullBacklogStore(BacklogStore): def create(self, item): return item def get(self, item_id): return None def get_by_dedupe_key(self, key): return None def update(self, item): return item def list_items(self, filters=None, limit=50, offset=0): return [] def add_event(self, event): return event def get_events(self, item_id, limit=50): return [] def cleanup(self, retention_days=180): return 0 # ─── Auto backend (Postgres → JSONL fallback) ───────────────────────────────── class AutoBacklogStore(BacklogStore): """Postgres primary with JSONL fallback. Retries Postgres after 5 min.""" _RETRY_SEC = 300 def __init__( self, postgres_dsn: Optional[str] = None, jsonl_items: str = _JSONL_ITEMS, jsonl_events: str = _JSONL_EVENTS, ) -> None: self._pg: Optional[PostgresBacklogStore] = None self._jsonl = JsonlBacklogStore(jsonl_items, jsonl_events) self._dsn = postgres_dsn self._pg_failed_at: Optional[float] = None self._lock = threading.Lock() self._try_init_pg() def _try_init_pg(self) -> None: try: self._pg = PostgresBacklogStore(self._dsn) self._pg._conn().close() # test connection self._pg_failed_at = None logger.info("AutoBacklogStore: Postgres backend active") except Exception as e: logger.warning("AutoBacklogStore: Postgres unavailable, using JSONL: %s", e) self._pg = None import time self._pg_failed_at = time.time() def _backend(self) -> BacklogStore: if self._pg is not None: return self._pg import time if (self._pg_failed_at is None or time.time() - self._pg_failed_at >= self._RETRY_SEC): self._try_init_pg() return self._pg if self._pg is not None else self._jsonl def create(self, item): return self._backend().create(item) def get(self, item_id): return self._backend().get(item_id) def get_by_dedupe_key(self, key): return self._backend().get_by_dedupe_key(key) def update(self, item): return self._backend().update(item) def list_items(self, filters=None, limit=50, offset=0): return self._backend().list_items(filters, limit, offset) def add_event(self, event): return self._backend().add_event(event) def get_events(self, item_id, limit=50): return self._backend().get_events(item_id, limit) def cleanup(self, retention_days=180): return self._backend().cleanup(retention_days) # ─── Filters helper ─────────────────────────────────────────────────────────── def _apply_filters(items: List[BacklogItem], filters: Dict) -> List[BacklogItem]: result = [] for it in items: if filters.get("env") and it.env != filters["env"]: continue if filters.get("service") and it.service != filters["service"]: continue if filters.get("status"): statuses = filters["status"] if isinstance(filters["status"], list) else [filters["status"]] if it.status not in statuses: continue if filters.get("owner") and it.owner != filters["owner"]: continue if filters.get("category") and it.category != filters["category"]: continue if filters.get("due_before") and it.due_date and it.due_date >= filters["due_before"]: continue result.append(it) return result # ─── Factory ────────────────────────────────────────────────────────────────── _STORE_INSTANCE: Optional[BacklogStore] = None _STORE_LOCK = threading.Lock() def get_backlog_store() -> BacklogStore: global _STORE_INSTANCE with _STORE_LOCK: if _STORE_INSTANCE is not None: return _STORE_INSTANCE backend = os.environ.get("BACKLOG_BACKEND", "auto").lower() if backend == "memory": _STORE_INSTANCE = MemoryBacklogStore() elif backend == "jsonl": _STORE_INSTANCE = JsonlBacklogStore() elif backend == "postgres": _STORE_INSTANCE = PostgresBacklogStore() elif backend == "null": _STORE_INSTANCE = NullBacklogStore() else: # auto _STORE_INSTANCE = AutoBacklogStore() logger.info("backlog_store: using %s backend", type(_STORE_INSTANCE).__name__) return _STORE_INSTANCE def _reset_store_for_tests() -> None: global _STORE_INSTANCE with _STORE_LOCK: _STORE_INSTANCE = None