Config policies (16 files): alert_routing, architecture_pressure, backlog, cost_weights, data_governance, incident_escalation, incident_intelligence, network_allowlist, nodes_registry, observability_sources, rbac_tools_matrix, release_gate, risk_attribution, risk_policy, slo_policy, tool_limits, tools_rollout Ops (22 files): Caddyfile, calendar compose, grafana voice dashboard, deployments/incidents logs, runbooks for alerts/audit/backlog/incidents/sofiia/voice, cron jobs, scripts (alert_triage, audit_cleanup, migrate_*, governance, schedule), task_registry, voice alerts/ha/latency/policy Docs (30+ files): HUMANIZED_STEPAN v2.7-v3 changelogs and runbooks, NODA1/NODA2 status and setup, audit index and traces, backlog, incident, supervisor, tools, voice, opencode, release, risk, aistalk, spacebot Made-with: Cursor
193 lines
5.5 KiB
YAML
193 lines
5.5 KiB
YAML
# Data Governance & Privacy Policy — DAARION.city
|
|
#
|
|
# Used by data_governance_tool to scan for PII/secrets/logging/retention risks.
|
|
# Severity: "error" = high risk (still warning-only in gate_mode=warning_only).
|
|
# "warning" = medium risk.
|
|
# "info" = low risk / informational.
|
|
|
|
# ─── Retention policies ───────────────────────────────────────────────────────
|
|
retention:
|
|
audit_jsonl_days: 30
|
|
audit_postgres_days: 90
|
|
memory_events_days: 90
|
|
logs_days: 14
|
|
# Large output threshold: if audit out_size >= this, flag as anomaly
|
|
large_output_bytes: 65536 # 64KB
|
|
|
|
# ─── PII patterns ─────────────────────────────────────────────────────────────
|
|
pii_patterns:
|
|
email:
|
|
regex: "(?i)\\b[A-Z0-9._%+\\-]+@[A-Z0-9.\\-]+\\.[A-Z]{2,}\\b"
|
|
severity: "warning"
|
|
id: "DG-PII-001"
|
|
description: "Email address detected"
|
|
|
|
phone_ua_intl:
|
|
regex: "\\b\\+?[0-9][0-9\\-\\s()]{7,}[0-9]\\b"
|
|
severity: "warning"
|
|
id: "DG-PII-002"
|
|
description: "Phone-like number detected"
|
|
|
|
credit_card:
|
|
regex: "\\b(?:\\d[ \\-]*?){13,19}\\b"
|
|
severity: "error"
|
|
id: "DG-PII-003"
|
|
description: "Credit card-like number detected"
|
|
|
|
passport_like:
|
|
regex: "\\b[A-Z]{2}\\d{6,7}\\b"
|
|
severity: "warning"
|
|
id: "DG-PII-004"
|
|
description: "Passport-like identifier detected"
|
|
|
|
tax_id_ua:
|
|
regex: "\\b\\d{10}\\b"
|
|
severity: "info"
|
|
id: "DG-PII-005"
|
|
description: "Possible Ukrainian tax ID (10 digits)"
|
|
|
|
# ─── Extra secret patterns (supplement tool_governance._SECRET_PATTERNS) ──────
|
|
secret_patterns:
|
|
inherit_from_tool_governance: true
|
|
extra:
|
|
- name: "private_key_block"
|
|
regex: "-----BEGIN [A-Z ]*PRIVATE KEY-----"
|
|
severity: "error"
|
|
id: "DG-SEC-001"
|
|
- name: "aws_mfa_token"
|
|
regex: "(?i)mfa[_\\-]?token[\\s=:]+['\"`]?[\\dA-Z]{6,8}['\"`]?"
|
|
severity: "warning"
|
|
id: "DG-SEC-002"
|
|
- name: "pem_certificate"
|
|
regex: "-----BEGIN CERTIFICATE-----"
|
|
severity: "info"
|
|
id: "DG-SEC-003"
|
|
|
|
# ─── Logging safety rules ─────────────────────────────────────────────────────
|
|
logging_rules:
|
|
# Field names that must NOT appear unmasked in logger calls
|
|
forbid_logging_fields:
|
|
- password
|
|
- passwd
|
|
- token
|
|
- secret
|
|
- private_key
|
|
- api_key
|
|
- access_key
|
|
- credential
|
|
- auth_header
|
|
- bearer
|
|
|
|
# Fields that should appear as hash-only (warn if logged raw)
|
|
sensitive_fields_warn:
|
|
- user_id
|
|
- chat_id
|
|
- telegram_id
|
|
- session_id
|
|
- workspace_id
|
|
|
|
# Calls that indicate redaction is applied (good)
|
|
redaction_calls:
|
|
- redact
|
|
- mask
|
|
- sanitize
|
|
- anonymize
|
|
- _hash
|
|
- sha256
|
|
|
|
# Payload field names that indicate raw content is being logged/stored
|
|
raw_payload_indicators:
|
|
- payload
|
|
- diff_text
|
|
- openapi_text
|
|
- request_body
|
|
- response_body
|
|
- prompt
|
|
- messages
|
|
- content
|
|
- transcript
|
|
- conversation
|
|
- full_text
|
|
|
|
# ─── Storage / retention keywords ─────────────────────────────────────────────
|
|
storage_keywords:
|
|
write_patterns:
|
|
- save_message
|
|
- store_event
|
|
- insert_record
|
|
- append_event
|
|
- write_event
|
|
- write_record
|
|
- persist
|
|
- bulk_insert
|
|
- executemany
|
|
retention_indicators:
|
|
- ttl
|
|
- expire
|
|
- retention
|
|
- cleanup
|
|
- delete_old
|
|
- purge
|
|
- rotate
|
|
- max_age
|
|
- expiry
|
|
context_window: 20 # lines before/after to search for retention indicator
|
|
|
|
# ─── Scan paths ───────────────────────────────────────────────────────────────
|
|
paths:
|
|
include:
|
|
- "services/"
|
|
- "docs/"
|
|
- "ops/"
|
|
- "config/"
|
|
exclude:
|
|
- "**/node_modules/**"
|
|
- "**/.git/**"
|
|
- "**/dist/**"
|
|
- "**/build/**"
|
|
- "**/.venv/**"
|
|
- "**/__pycache__/**"
|
|
- "**/*.pyc"
|
|
- "**/*.lock" # dependency lock files (high false-positive risk)
|
|
- "**/*.min.js"
|
|
|
|
# File extensions to scan
|
|
scan_extensions:
|
|
- ".py"
|
|
- ".ts"
|
|
- ".js"
|
|
- ".yml"
|
|
- ".yaml"
|
|
- ".json"
|
|
- ".env.example"
|
|
- ".md"
|
|
- ".txt"
|
|
- ".sh"
|
|
|
|
# Never scan these (sensitive or binary)
|
|
never_scan:
|
|
- "*.env"
|
|
- ".env.*"
|
|
- "*.pem"
|
|
- "*.key"
|
|
- "*.pfx"
|
|
- "*.p12"
|
|
- "*.crt"
|
|
|
|
# ─── Gate behaviour ───────────────────────────────────────────────────────────
|
|
severity_behavior:
|
|
# warning_only: gate always pass=True (adds recommendations only)
|
|
# strict: gate pass=False on any error finding
|
|
gate_mode: "warning_only"
|
|
recommend_on:
|
|
- "warning"
|
|
- "error"
|
|
|
|
# ─── Limits ───────────────────────────────────────────────────────────────────
|
|
limits:
|
|
max_files_fast: 200
|
|
max_files_full: 500
|
|
max_bytes_per_file: 262144 # 256KB
|
|
max_findings: 200 # cap before truncating
|
|
max_evidence_chars: 200 # mask and truncate evidence snippets
|