Config policies (16 files): alert_routing, architecture_pressure, backlog, cost_weights, data_governance, incident_escalation, incident_intelligence, network_allowlist, nodes_registry, observability_sources, rbac_tools_matrix, release_gate, risk_attribution, risk_policy, slo_policy, tool_limits, tools_rollout Ops (22 files): Caddyfile, calendar compose, grafana voice dashboard, deployments/incidents logs, runbooks for alerts/audit/backlog/incidents/sofiia/voice, cron jobs, scripts (alert_triage, audit_cleanup, migrate_*, governance, schedule), task_registry, voice alerts/ha/latency/policy Docs (30+ files): HUMANIZED_STEPAN v2.7-v3 changelogs and runbooks, NODA1/NODA2 status and setup, audit index and traces, backlog, incident, supervisor, tools, voice, opencode, release, risk, aistalk, spacebot Made-with: Cursor
741 lines
24 KiB
YAML
741 lines
24 KiB
YAML
# Job Orchestrator Task Registry
|
|
# Defines allowlisted operational tasks that can be executed via job_orchestrator_tool
|
|
# Only tasks defined here can be run - no arbitrary command execution
|
|
|
|
tasks:
|
|
# === Smoke Tests ===
|
|
- id: "smoke_gateway"
|
|
title: "Smoke test gateway"
|
|
description: "Run smoke tests against the gateway service"
|
|
tags: ["smoke", "ops"]
|
|
service: "gateway"
|
|
runner: "script"
|
|
command_ref: "ops/smoke_helion_stack.sh"
|
|
timeout_sec: 300
|
|
inputs_schema:
|
|
type: "object"
|
|
properties: {}
|
|
additionalProperties: false
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.jobs.run.smoke"
|
|
dry_run_behavior: "show_help"
|
|
|
|
- id: "smoke_all"
|
|
title: "Smoke test all services"
|
|
description: "Run smoke tests against all services in the stack"
|
|
tags: ["smoke", "ops"]
|
|
runner: "script"
|
|
command_ref: "ops/canary_all.sh"
|
|
timeout_sec: 600
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
service:
|
|
type: "string"
|
|
description: "Optional specific service to test"
|
|
additionalProperties: false
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.jobs.run.smoke"
|
|
dry_run_behavior: "validation_only"
|
|
|
|
# === Drift Checks ===
|
|
- id: "drift_check_node1"
|
|
title: "Drift check NODE1"
|
|
description: "Check infrastructure drift on production node"
|
|
tags: ["drift", "ops"]
|
|
service: "infrastructure"
|
|
runner: "script"
|
|
command_ref: "ops/status.sh"
|
|
timeout_sec: 300
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
mode:
|
|
type: "string"
|
|
enum: ["quick", "full"]
|
|
default: "quick"
|
|
required: ["mode"]
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.jobs.run.drift"
|
|
dry_run_behavior: "validation_only"
|
|
|
|
# === Backup Validation ===
|
|
- id: "backup_validate"
|
|
title: "Validate backup integrity"
|
|
description: "Verify backup files are present and valid"
|
|
tags: ["backup", "ops"]
|
|
service: "storage"
|
|
runner: "script"
|
|
command_ref: "ops/check_daarwizz_awareness.sh"
|
|
timeout_sec: 600
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
backup_path:
|
|
type: "string"
|
|
description: "Path to backup directory"
|
|
check_integrity:
|
|
type: "boolean"
|
|
default: true
|
|
additionalProperties: false
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.jobs.run.backup"
|
|
dry_run_behavior: "list_files"
|
|
|
|
# === Contract Checks ===
|
|
- id: "contract_check_router"
|
|
title: "Contract check router"
|
|
description: "Verify OpenAPI contract compatibility for router"
|
|
tags: ["migrate", "ops"]
|
|
service: "router"
|
|
runner: "script"
|
|
command_ref: "ops/canary_router_contract.sh"
|
|
timeout_sec: 300
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
strict:
|
|
type: "boolean"
|
|
default: false
|
|
additionalProperties: false
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.jobs.run.migrate"
|
|
dry_run_behavior: "validation_only"
|
|
|
|
# === Delivery Priority Check ===
|
|
- id: "delivery_priority_check"
|
|
title: "Delivery priority check"
|
|
description: "Verify message delivery priority configuration"
|
|
tags: ["ops"]
|
|
service: "gateway"
|
|
runner: "script"
|
|
command_ref: "ops/canary_gateway_delivery_priority.sh"
|
|
timeout_sec: 180
|
|
inputs_schema:
|
|
type: "object"
|
|
properties: {}
|
|
additionalProperties: false
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.jobs.run.ops"
|
|
dry_run_behavior: "show_help"
|
|
|
|
# === Monitor ===
|
|
- id: "monitor_notification"
|
|
title: "Monitor notification check"
|
|
description: "Check if monitoring notifications are working"
|
|
tags: ["ops"]
|
|
service: "monitoring"
|
|
runner: "script"
|
|
command_ref: "ops/monitor_notify_sofiia.sh"
|
|
timeout_sec: 120
|
|
inputs_schema:
|
|
type: "object"
|
|
properties: {}
|
|
additionalProperties: false
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.jobs.run.ops"
|
|
dry_run_behavior: "show_help"
|
|
|
|
# === Release Gate (internal runner: invokes tool endpoints sequentially) ===
|
|
- id: "release_check"
|
|
title: "Release Gate Check"
|
|
description: >
|
|
Orchestrates all release gates: PR review, config lint, contract diff,
|
|
threat model, optional smoke/drift. Returns one structured pass/fail verdict.
|
|
tags: ["release", "gate", "ops"]
|
|
runner: "internal" # NOT a shell script; uses release_check_runner.py
|
|
command_ref: null # No shell command — internal Python runner
|
|
timeout_sec: 600 # 10 min max for all gates
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
diff_text:
|
|
type: "string"
|
|
description: "Unified diff text (optional if repo_path provided)"
|
|
service_name:
|
|
type: "string"
|
|
description: "Name of the service being released"
|
|
openapi_base:
|
|
type: "string"
|
|
description: "Base OpenAPI spec (text or repo path)"
|
|
openapi_head:
|
|
type: "string"
|
|
description: "Head OpenAPI spec (text or repo path)"
|
|
risk_profile:
|
|
type: "string"
|
|
enum: ["default", "agentic_tools", "public_api"]
|
|
default: "default"
|
|
description: "Threat model risk profile"
|
|
fail_fast:
|
|
type: "boolean"
|
|
default: false
|
|
description: "Stop at first failing gate"
|
|
run_smoke:
|
|
type: "boolean"
|
|
default: false
|
|
description: "Run smoke tests after static gates pass"
|
|
run_deps:
|
|
type: "boolean"
|
|
default: true
|
|
description: "Run dependency vulnerability scan (gate 3)"
|
|
deps_targets:
|
|
type: "array"
|
|
items: {type: "string", enum: ["python", "node"]}
|
|
description: "Ecosystems to scan (default: python + node)"
|
|
deps_vuln_mode:
|
|
type: "string"
|
|
enum: ["online", "offline_cache"]
|
|
default: "offline_cache"
|
|
description: "OSV query mode: online or offline_cache"
|
|
deps_fail_on:
|
|
type: "array"
|
|
items: {type: "string", enum: ["CRITICAL", "HIGH", "MEDIUM", "LOW"]}
|
|
description: "Severity levels that block release (default: CRITICAL, HIGH)"
|
|
deps_timeout_sec:
|
|
type: "number"
|
|
default: 40
|
|
description: "Timeout for dependency scan in seconds"
|
|
gate_profile:
|
|
type: "string"
|
|
enum: ["dev", "staging", "prod"]
|
|
default: "dev"
|
|
description: "Gate strictness profile (dev=warn-first, staging/prod=strict privacy)"
|
|
run_slo_watch:
|
|
type: "boolean"
|
|
default: true
|
|
description: "Run SLO watch gate (warns/blocks if service has active SLO violations)"
|
|
slo_watch_window_minutes:
|
|
type: "integer"
|
|
default: 60
|
|
description: "SLO evaluation window in minutes"
|
|
run_followup_watch:
|
|
type: "boolean"
|
|
default: true
|
|
description: "Run follow-up watch gate (checks open P0/P1 incidents and overdue follow-ups)"
|
|
followup_watch_window_days:
|
|
type: "integer"
|
|
default: 30
|
|
description: "Window for follow-up/incident scan in days"
|
|
followup_watch_env:
|
|
type: "string"
|
|
enum: ["prod", "staging", "any"]
|
|
default: "any"
|
|
description: "Filter incidents by environment"
|
|
run_privacy_watch:
|
|
type: "boolean"
|
|
default: true
|
|
description: "Run privacy/data-governance warning gate (always pass=true, adds recommendations)"
|
|
privacy_watch_mode:
|
|
type: "string"
|
|
enum: ["fast", "full"]
|
|
default: "fast"
|
|
description: "Scan mode: fast=.py/.yml/.json only, full=all extensions"
|
|
privacy_audit_window_hours:
|
|
type: "integer"
|
|
default: 24
|
|
description: "Time window for audit stream scan in hours"
|
|
run_cost_watch:
|
|
type: "boolean"
|
|
default: true
|
|
description: "Run cost_watch warning gate (always pass=true, adds recommendations)"
|
|
cost_watch_window_hours:
|
|
type: "integer"
|
|
default: 24
|
|
description: "Window for anomaly detection in hours (default 24)"
|
|
cost_spike_ratio_threshold:
|
|
type: "number"
|
|
default: 3.0
|
|
description: "Cost spike ratio to flag as warning (default 3.0x baseline)"
|
|
cost_min_calls_threshold:
|
|
type: "integer"
|
|
default: 50
|
|
description: "Min calls in window to qualify as anomaly (default 50)"
|
|
run_risk_watch:
|
|
type: "boolean"
|
|
default: true
|
|
description: "Run risk_watch gate: warn/block if service risk score exceeds threshold"
|
|
risk_watch_env:
|
|
type: "string"
|
|
enum: ["prod", "staging"]
|
|
default: "prod"
|
|
description: "Environment for risk score evaluation"
|
|
risk_watch_warn_at:
|
|
type: "integer"
|
|
description: "Override warn threshold (default from risk_policy.yml)"
|
|
risk_watch_fail_at:
|
|
type: "integer"
|
|
description: "Override fail threshold (default from risk_policy.yml per-service override)"
|
|
run_risk_delta_watch:
|
|
type: "boolean"
|
|
default: true
|
|
description: "Run risk_delta_watch gate: block staging for p0_services if score rose >= fail_delta in 24h"
|
|
risk_delta_env:
|
|
type: "string"
|
|
enum: ["prod", "staging"]
|
|
default: "prod"
|
|
description: "Environment for risk delta evaluation"
|
|
risk_delta_hours:
|
|
type: "integer"
|
|
default: 24
|
|
description: "Baseline window in hours (default 24h)"
|
|
risk_delta_warn:
|
|
type: "integer"
|
|
description: "Override delta warn threshold (default from risk_policy.yml)"
|
|
risk_delta_fail:
|
|
type: "integer"
|
|
description: "Override delta fail threshold (default from risk_policy.yml)"
|
|
run_drift:
|
|
type: "boolean"
|
|
default: false
|
|
description: "Run drift check after static gates pass"
|
|
required: ["service_name"]
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.pr_review.gate"
|
|
- "tools.contract.gate"
|
|
- "tools.config_lint.gate"
|
|
- "tools.threatmodel.gate"
|
|
- "tools.deps.gate"
|
|
- "tools.cost.read"
|
|
- "tools.data_gov.read"
|
|
- "tools.risk.read"
|
|
- "tools.risk.write"
|
|
dry_run_behavior: "validation_only"
|
|
|
|
# === Audit Retention & Compaction ===
|
|
|
|
- id: "audit_cleanup"
|
|
title: "Audit JSONL Cleanup"
|
|
description: "Delete or gzip-archive audit JSONL files older than retention_days. Enforces data governance policy."
|
|
tags: ["ops", "retention", "audit"]
|
|
service: "infrastructure"
|
|
runner: "script"
|
|
command_ref: "ops/scripts/audit_cleanup.py"
|
|
timeout_sec: 300
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
retention_days:
|
|
type: "integer"
|
|
minimum: 1
|
|
maximum: 365
|
|
default: 30
|
|
description: "Delete/archive files older than this many days (from data_governance_policy.yml default)"
|
|
dry_run:
|
|
type: "boolean"
|
|
default: true
|
|
description: "If true: report only, no changes"
|
|
archive_gzip:
|
|
type: "boolean"
|
|
default: false
|
|
description: "Compress to .jsonl.gz before deleting"
|
|
audit_dir:
|
|
type: "string"
|
|
default: "ops/audit"
|
|
description: "Path to audit JSONL directory (relative to repo root)"
|
|
required: ["retention_days", "dry_run"]
|
|
additionalProperties: false
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.jobs.run.ops"
|
|
dry_run_behavior: "report_only"
|
|
|
|
- id: "audit_compact"
|
|
title: "Audit JSONL Compaction"
|
|
description: "Merge last N days of audit JSONL into a single compressed artifact for forensics or fast analysis."
|
|
tags: ["ops", "retention", "audit"]
|
|
service: "infrastructure"
|
|
runner: "script"
|
|
command_ref: "ops/scripts/audit_compact.py"
|
|
timeout_sec: 180
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
window_days:
|
|
type: "integer"
|
|
minimum: 1
|
|
maximum: 30
|
|
default: 7
|
|
description: "Compact files from last N days"
|
|
output_path:
|
|
type: "string"
|
|
description: "Output directory for compact file (default: ops/audit/compact)"
|
|
dry_run:
|
|
type: "boolean"
|
|
default: true
|
|
description: "If true: count lines only, do not write"
|
|
audit_dir:
|
|
type: "string"
|
|
default: "ops/audit"
|
|
required: ["window_days", "dry_run"]
|
|
additionalProperties: false
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.jobs.run.ops"
|
|
dry_run_behavior: "report_only"
|
|
|
|
# === Scheduled Operational Jobs (daily/weekly) ===
|
|
#
|
|
# Schedule guidance (add to your cron / systemd timer):
|
|
# Daily 03:30: audit_cleanup
|
|
# Daily 09:00: daily_cost_digest
|
|
# Daily 09:10: daily_privacy_digest
|
|
# Weekly Mon 02:00: weekly_drift_full
|
|
# Weekly Mon 08:00: weekly_incident_digest
|
|
#
|
|
# Example cron (NODE1, as ops user):
|
|
# 30 3 * * * /usr/local/bin/job_runner.sh audit_cleanup '{"retention_days":30}'
|
|
# 0 9 * * * /usr/local/bin/job_runner.sh daily_cost_digest '{}'
|
|
# 10 9 * * * /usr/local/bin/job_runner.sh daily_privacy_digest '{}'
|
|
# 0 2 * * 1 /usr/local/bin/job_runner.sh weekly_drift_full '{}'
|
|
# 0 8 * * 1 /usr/local/bin/job_runner.sh weekly_incident_digest '{}'
|
|
|
|
- id: "daily_cost_digest"
|
|
title: "Daily Cost & FinOps Digest"
|
|
description: "Runs cost_analyzer_tool.digest for last 24h (backend=auto) and saves markdown + JSON artifacts."
|
|
tags: ["ops", "finops", "scheduled", "daily"]
|
|
service: "infrastructure"
|
|
runner: "internal"
|
|
timeout_sec: 60
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
window_hours:
|
|
type: "integer"
|
|
default: 24
|
|
description: "Analysis window in hours"
|
|
baseline_hours:
|
|
type: "integer"
|
|
default: 168
|
|
description: "Baseline window for anomaly comparison (7d)"
|
|
top_n:
|
|
type: "integer"
|
|
default: 10
|
|
description: "Top-N tools/agents to include"
|
|
backend:
|
|
type: "string"
|
|
enum: ["auto", "jsonl", "postgres"]
|
|
default: "auto"
|
|
description: "Audit data source"
|
|
output_dir:
|
|
type: "string"
|
|
default: "ops/reports/cost"
|
|
description: "Directory to write YYYY-MM-DD.json and .md artifacts"
|
|
required: []
|
|
additionalProperties: false
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.cost.read"
|
|
- "tools.jobs.run.ops"
|
|
|
|
- id: "daily_privacy_digest"
|
|
title: "Daily Privacy & Audit Digest"
|
|
description: "Runs data_governance_tool.digest_audit for last 24h (backend=auto) and saves markdown + JSON artifacts."
|
|
tags: ["ops", "privacy", "scheduled", "daily"]
|
|
service: "infrastructure"
|
|
runner: "internal"
|
|
timeout_sec: 60
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
window_hours:
|
|
type: "integer"
|
|
default: 24
|
|
description: "Audit scan window in hours"
|
|
max_findings:
|
|
type: "integer"
|
|
default: 20
|
|
description: "Max findings to include in digest"
|
|
backend:
|
|
type: "string"
|
|
enum: ["auto", "jsonl", "postgres"]
|
|
default: "auto"
|
|
description: "Audit data source"
|
|
output_dir:
|
|
type: "string"
|
|
default: "ops/reports/privacy"
|
|
description: "Directory to write YYYY-MM-DD.json and .md artifacts"
|
|
required: []
|
|
additionalProperties: false
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.data_gov.read"
|
|
- "tools.jobs.run.ops"
|
|
|
|
- id: "weekly_drift_full"
|
|
title: "Weekly Full Drift Analysis"
|
|
description: "Runs drift_analyzer_tool with all categories and saves JSON artifact to ops/reports/drift/."
|
|
tags: ["ops", "drift", "scheduled", "weekly"]
|
|
service: "infrastructure"
|
|
runner: "internal"
|
|
timeout_sec: 120
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
drift_categories:
|
|
type: "array"
|
|
items:
|
|
type: "string"
|
|
enum: ["services", "openapi", "nats", "tools"]
|
|
default: ["services", "openapi", "nats", "tools"]
|
|
description: "Categories to analyze"
|
|
drift_profile:
|
|
type: "string"
|
|
enum: ["dev", "release_gate"]
|
|
default: "dev"
|
|
description: "Severity profile for drift analysis"
|
|
output_dir:
|
|
type: "string"
|
|
default: "ops/reports/drift"
|
|
description: "Directory for week-YYYY-WW.json artifact"
|
|
required: []
|
|
additionalProperties: false
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.drift.read"
|
|
- "tools.jobs.run.ops"
|
|
|
|
# === Weekly Incident Intelligence Digest (every Monday 08:00) ===
|
|
- id: "weekly_incident_digest"
|
|
title: "Weekly Incident Intelligence Digest"
|
|
description: "Generates weekly incident digest: correlation stats, recurrence tables (7d/30d), and recommendations. Saves md+json to ops/reports/incidents/weekly/."
|
|
tags: ["incidents", "intelligence", "scheduled", "weekly"]
|
|
runner: "internal"
|
|
schedule: "0 8 * * 1" # Monday 08:00 UTC
|
|
timeout_sec: 120
|
|
concurrency: 1
|
|
on_failure: "log_and_continue"
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
save_artifacts:
|
|
type: "boolean"
|
|
default: true
|
|
description: "Write md+json artifacts to output_dir"
|
|
workspace_id:
|
|
type: "string"
|
|
default: "default"
|
|
agent_id:
|
|
type: "string"
|
|
default: "sofiia"
|
|
required: []
|
|
additionalProperties: false
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.oncall.incident_write"
|
|
- "tools.jobs.run.ops"
|
|
output_artifacts:
|
|
- pattern: "ops/reports/incidents/weekly/YYYY-WW.json"
|
|
- pattern: "ops/reports/incidents/weekly/YYYY-WW.md"
|
|
|
|
# === Alert Triage Loop (scheduled, every 5 min, 0 LLM tokens) ===
|
|
- id: "alert_triage_loop"
|
|
title: "Alert Triage Loop"
|
|
description: "Poll unacked alerts and create/update incidents deterministically. 0 LLM tokens in steady state (llm_mode=off)."
|
|
tags: ["alerts", "incidents", "scheduled"]
|
|
runner: "script"
|
|
command_ref: "ops/scripts/alert_triage_loop.py"
|
|
schedule: "*/5 * * * *"
|
|
timeout_sec: 240
|
|
concurrency: 1
|
|
on_failure: "log_and_continue"
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
policy_profile:
|
|
type: "string"
|
|
default: "default"
|
|
description: "Routing policy profile"
|
|
dry_run:
|
|
type: "boolean"
|
|
default: false
|
|
description: "Simulate without writes"
|
|
workspace_id:
|
|
type: "string"
|
|
default: "default"
|
|
agent_id:
|
|
type: "string"
|
|
default: "sofiia"
|
|
required: []
|
|
additionalProperties: false
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.alerts.read"
|
|
- "tools.alerts.ack"
|
|
- "tools.oncall.incident_write"
|
|
|
|
# === Deploy (requires explicit entitlement) ===
|
|
- id: "deploy_canary"
|
|
title: "Deploy canary"
|
|
description: "Deploy canary version of services"
|
|
tags: ["deploy"]
|
|
service: "infrastructure"
|
|
runner: "script"
|
|
command_ref: "ops/canary_all.sh"
|
|
timeout_sec: 600
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
service:
|
|
type: "string"
|
|
description: "Service to deploy"
|
|
version:
|
|
type: "string"
|
|
description: "Version tag to deploy"
|
|
percentage:
|
|
type: "integer"
|
|
minimum: 1
|
|
maximum: 100
|
|
default: 10
|
|
required: ["service", "version"]
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.jobs.run.deploy"
|
|
dry_run_behavior: "show_plan"
|
|
|
|
# === Risk History & Digest ===
|
|
|
|
- id: "hourly_risk_snapshot"
|
|
title: "Hourly Risk Snapshot"
|
|
description: "Compute and persist risk scores for all known services into risk_history store."
|
|
tags: ["risk", "ops", "scheduled"]
|
|
service: "infrastructure"
|
|
runner: "internal"
|
|
schedule: "0 * * * *" # every hour
|
|
timeout_sec: 120
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
env:
|
|
type: "string"
|
|
enum: ["prod", "staging"]
|
|
default: "prod"
|
|
description: "Environment to snapshot"
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.risk.write"
|
|
dry_run_behavior: "report_only"
|
|
|
|
- id: "daily_risk_digest"
|
|
title: "Daily Risk Digest"
|
|
description: "Generate daily risk digest (md+json) in ops/reports/risk/. Runs at policy.digest.daily_hour_utc (default 09:00 UTC)."
|
|
tags: ["risk", "ops", "digest", "scheduled"]
|
|
service: "infrastructure"
|
|
runner: "internal"
|
|
schedule: "0 9 * * *" # daily at 09:00 UTC
|
|
timeout_sec: 60
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
env:
|
|
type: "string"
|
|
enum: ["prod", "staging"]
|
|
default: "prod"
|
|
date:
|
|
type: "string"
|
|
description: "Override date (YYYY-MM-DD). Default: today UTC."
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.risk.write"
|
|
dry_run_behavior: "report_only"
|
|
|
|
- id: "risk_history_cleanup"
|
|
title: "Risk History Cleanup"
|
|
description: "Delete risk_history records older than retention_days (default 90d)."
|
|
tags: ["risk", "ops", "retention", "scheduled"]
|
|
service: "infrastructure"
|
|
runner: "internal"
|
|
schedule: "20 3 * * *" # daily at 03:20 UTC
|
|
timeout_sec: 60
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
retention_days:
|
|
type: "integer"
|
|
minimum: 7
|
|
maximum: 365
|
|
default: 90
|
|
description: "Retention period in days"
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.risk.write"
|
|
dry_run_behavior: "report_only"
|
|
|
|
- id: "weekly_platform_priority_digest"
|
|
title: "Weekly Platform Priority Digest"
|
|
description: "Generate Architecture Pressure digest for all services. Outputs ops/reports/platform/YYYY-WW.md + .json. Auto-creates architecture-review followups for services with pressure >= require_arch_review_at."
|
|
tags: ["pressure", "architecture", "digest", "scheduled"]
|
|
service: "infrastructure"
|
|
runner: "internal"
|
|
schedule: "0 6 * * 1" # every Monday at 06:00 UTC
|
|
timeout_sec: 120
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
env:
|
|
type: "string"
|
|
enum: ["prod", "staging", "dev"]
|
|
default: "prod"
|
|
auto_followup:
|
|
type: "boolean"
|
|
default: true
|
|
description: "Auto-create architecture-review followups"
|
|
top_n:
|
|
type: "integer"
|
|
default: 10
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.pressure.write"
|
|
dry_run_behavior: "report_only"
|
|
|
|
- id: "weekly_backlog_generate"
|
|
title: "Weekly Backlog Auto-Generation"
|
|
description: "Auto-generate Engineering Backlog items from latest weekly Platform Priority Digest. Runs after weekly_platform_priority_digest (06:00 UTC Monday)."
|
|
tags: ["backlog", "platform", "scheduled"]
|
|
service: "infrastructure"
|
|
runner: "internal"
|
|
schedule: "20 6 * * 1" # every Monday at 06:20 UTC (20 min after digest)
|
|
timeout_sec: 120
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
env:
|
|
type: "string"
|
|
enum: ["prod", "staging", "dev"]
|
|
default: "prod"
|
|
week_str:
|
|
type: "string"
|
|
description: "Override ISO week (YYYY-WNN). Default: current week."
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.backlog.admin"
|
|
dry_run_behavior: "report_only"
|
|
|
|
- id: "daily_backlog_cleanup"
|
|
title: "Daily Backlog Cleanup"
|
|
description: "Remove done/canceled backlog items older than retention_days (default 180d)."
|
|
tags: ["backlog", "ops", "retention", "scheduled"]
|
|
service: "infrastructure"
|
|
runner: "internal"
|
|
schedule: "40 3 * * *" # daily at 03:40 UTC
|
|
timeout_sec: 60
|
|
inputs_schema:
|
|
type: "object"
|
|
properties:
|
|
retention_days:
|
|
type: "integer"
|
|
minimum: 7
|
|
maximum: 730
|
|
default: 180
|
|
permissions:
|
|
entitlements_required:
|
|
- "tools.backlog.admin"
|
|
dry_run_behavior: "report_only"
|