docs(platform): add policy configs, runbooks, ops scripts and platform documentation
Config policies (16 files): alert_routing, architecture_pressure, backlog, cost_weights, data_governance, incident_escalation, incident_intelligence, network_allowlist, nodes_registry, observability_sources, rbac_tools_matrix, release_gate, risk_attribution, risk_policy, slo_policy, tool_limits, tools_rollout Ops (22 files): Caddyfile, calendar compose, grafana voice dashboard, deployments/incidents logs, runbooks for alerts/audit/backlog/incidents/sofiia/voice, cron jobs, scripts (alert_triage, audit_cleanup, migrate_*, governance, schedule), task_registry, voice alerts/ha/latency/policy Docs (30+ files): HUMANIZED_STEPAN v2.7-v3 changelogs and runbooks, NODA1/NODA2 status and setup, audit index and traces, backlog, incident, supervisor, tools, voice, opencode, release, risk, aistalk, spacebot Made-with: Cursor
This commit is contained in:
133
config/cost_weights.yml
Normal file
133
config/cost_weights.yml
Normal file
@@ -0,0 +1,133 @@
|
||||
# Cost Weights — DAARION FinOps MVP
|
||||
#
|
||||
# "cost_units" = cost_per_call + duration_ms * cost_per_ms
|
||||
# These are RELATIVE units for ranking, not actual dollars.
|
||||
#
|
||||
# Update weights as actual cost data becomes available.
|
||||
|
||||
defaults:
|
||||
cost_per_call: 1.0 # baseline: 1 unit per call
|
||||
cost_per_ms: 0.001 # 0.001 units per ms elapsed
|
||||
|
||||
tools:
|
||||
# ─── Heavy GPU/compute (high cost) ───────────────────────────────────────
|
||||
comfy_generate_video:
|
||||
cost_per_call: 120.0
|
||||
cost_per_ms: 0.005
|
||||
category: media
|
||||
|
||||
comfy_generate_image:
|
||||
cost_per_call: 50.0
|
||||
cost_per_ms: 0.003
|
||||
category: media
|
||||
|
||||
# ─── Release / governance tools ──────────────────────────────────────────
|
||||
pr_reviewer_tool:
|
||||
cost_per_call: 10.0
|
||||
cost_per_ms: 0.002
|
||||
category: release
|
||||
|
||||
contract_tool:
|
||||
cost_per_call: 5.0
|
||||
cost_per_ms: 0.001
|
||||
category: release
|
||||
|
||||
threatmodel_tool:
|
||||
cost_per_call: 5.0
|
||||
cost_per_ms: 0.001
|
||||
category: release
|
||||
|
||||
dependency_scanner_tool:
|
||||
cost_per_call: 3.0
|
||||
cost_per_ms: 0.001
|
||||
category: release
|
||||
|
||||
drift_analyzer_tool:
|
||||
cost_per_call: 4.0
|
||||
cost_per_ms: 0.001
|
||||
category: release
|
||||
|
||||
cost_analyzer_tool:
|
||||
cost_per_call: 2.0
|
||||
cost_per_ms: 0.001
|
||||
category: finops
|
||||
|
||||
# ─── Observability (moderate cost, often called) ─────────────────────────
|
||||
observability_tool:
|
||||
cost_per_call: 2.0
|
||||
cost_per_ms: 0.001
|
||||
category: observability
|
||||
|
||||
# ─── Jobs / orchestration ────────────────────────────────────────────────
|
||||
job_orchestrator_tool:
|
||||
cost_per_call: 3.0
|
||||
cost_per_ms: 0.001
|
||||
category: ops
|
||||
|
||||
# ─── Web / external (network cost) ───────────────────────────────────────
|
||||
web_search:
|
||||
cost_per_call: 2.0
|
||||
cost_per_ms: 0.001
|
||||
category: web
|
||||
|
||||
web_extract:
|
||||
cost_per_call: 1.5
|
||||
cost_per_ms: 0.001
|
||||
category: web
|
||||
|
||||
crawl4ai_scrape:
|
||||
cost_per_call: 3.0
|
||||
cost_per_ms: 0.001
|
||||
category: web
|
||||
|
||||
# ─── Knowledge / memory (low cost) ───────────────────────────────────────
|
||||
memory_search:
|
||||
cost_per_call: 0.5
|
||||
cost_per_ms: 0.0005
|
||||
category: memory
|
||||
|
||||
remember_fact:
|
||||
cost_per_call: 0.5
|
||||
cost_per_ms: 0.0005
|
||||
category: memory
|
||||
|
||||
graph_query:
|
||||
cost_per_call: 0.5
|
||||
cost_per_ms: 0.0005
|
||||
category: memory
|
||||
|
||||
kb_tool:
|
||||
cost_per_call: 1.0
|
||||
cost_per_ms: 0.001
|
||||
category: knowledge
|
||||
|
||||
# ─── Repo / code tools ───────────────────────────────────────────────────
|
||||
repo_tool:
|
||||
cost_per_call: 1.5
|
||||
cost_per_ms: 0.001
|
||||
category: dev
|
||||
|
||||
config_linter_tool:
|
||||
cost_per_call: 2.0
|
||||
cost_per_ms: 0.001
|
||||
category: release
|
||||
|
||||
# ─── Oncall / incident ───────────────────────────────────────────────────
|
||||
oncall_tool:
|
||||
cost_per_call: 1.0
|
||||
cost_per_ms: 0.001
|
||||
category: ops
|
||||
|
||||
# ─── Anomaly detection thresholds ────────────────────────────────────────────
|
||||
anomaly:
|
||||
# Spike: window_cost / baseline_avg_cost >= ratio_threshold
|
||||
spike_ratio_threshold: 3.0
|
||||
# Must have at least this many calls in window to be an anomaly
|
||||
min_calls_threshold: 10
|
||||
# High-priority tools for cost_watch gate in release_check
|
||||
priority_tools:
|
||||
- comfy_generate_video
|
||||
- comfy_generate_image
|
||||
- pr_reviewer_tool
|
||||
- job_orchestrator_tool
|
||||
- observability_tool
|
||||
Reference in New Issue
Block a user