# Data Governance & Privacy Policy — DAARION.city # # Used by data_governance_tool to scan for PII/secrets/logging/retention risks. # Severity: "error" = high risk (still warning-only in gate_mode=warning_only). # "warning" = medium risk. # "info" = low risk / informational. # ─── Retention policies ─────────────────────────────────────────────────────── retention: audit_jsonl_days: 30 audit_postgres_days: 90 memory_events_days: 90 logs_days: 14 # Large output threshold: if audit out_size >= this, flag as anomaly large_output_bytes: 65536 # 64KB # ─── PII patterns ───────────────────────────────────────────────────────────── pii_patterns: email: regex: "(?i)\\b[A-Z0-9._%+\\-]+@[A-Z0-9.\\-]+\\.[A-Z]{2,}\\b" severity: "warning" id: "DG-PII-001" description: "Email address detected" phone_ua_intl: regex: "\\b\\+?[0-9][0-9\\-\\s()]{7,}[0-9]\\b" severity: "warning" id: "DG-PII-002" description: "Phone-like number detected" credit_card: regex: "\\b(?:\\d[ \\-]*?){13,19}\\b" severity: "error" id: "DG-PII-003" description: "Credit card-like number detected" passport_like: regex: "\\b[A-Z]{2}\\d{6,7}\\b" severity: "warning" id: "DG-PII-004" description: "Passport-like identifier detected" tax_id_ua: regex: "\\b\\d{10}\\b" severity: "info" id: "DG-PII-005" description: "Possible Ukrainian tax ID (10 digits)" # ─── Extra secret patterns (supplement tool_governance._SECRET_PATTERNS) ────── secret_patterns: inherit_from_tool_governance: true extra: - name: "private_key_block" regex: "-----BEGIN [A-Z ]*PRIVATE KEY-----" severity: "error" id: "DG-SEC-001" - name: "aws_mfa_token" regex: "(?i)mfa[_\\-]?token[\\s=:]+['\"`]?[\\dA-Z]{6,8}['\"`]?" severity: "warning" id: "DG-SEC-002" - name: "pem_certificate" regex: "-----BEGIN CERTIFICATE-----" severity: "info" id: "DG-SEC-003" # ─── Logging safety rules ───────────────────────────────────────────────────── logging_rules: # Field names that must NOT appear unmasked in logger calls forbid_logging_fields: - password - passwd - token - secret - private_key - api_key - access_key - credential - auth_header - bearer # Fields that should appear as hash-only (warn if logged raw) sensitive_fields_warn: - user_id - chat_id - telegram_id - session_id - workspace_id # Calls that indicate redaction is applied (good) redaction_calls: - redact - mask - sanitize - anonymize - _hash - sha256 # Payload field names that indicate raw content is being logged/stored raw_payload_indicators: - payload - diff_text - openapi_text - request_body - response_body - prompt - messages - content - transcript - conversation - full_text # ─── Storage / retention keywords ───────────────────────────────────────────── storage_keywords: write_patterns: - save_message - store_event - insert_record - append_event - write_event - write_record - persist - bulk_insert - executemany retention_indicators: - ttl - expire - retention - cleanup - delete_old - purge - rotate - max_age - expiry context_window: 20 # lines before/after to search for retention indicator # ─── Scan paths ─────────────────────────────────────────────────────────────── paths: include: - "services/" - "docs/" - "ops/" - "config/" exclude: - "**/node_modules/**" - "**/.git/**" - "**/dist/**" - "**/build/**" - "**/.venv/**" - "**/__pycache__/**" - "**/*.pyc" - "**/*.lock" # dependency lock files (high false-positive risk) - "**/*.min.js" # File extensions to scan scan_extensions: - ".py" - ".ts" - ".js" - ".yml" - ".yaml" - ".json" - ".env.example" - ".md" - ".txt" - ".sh" # Never scan these (sensitive or binary) never_scan: - "*.env" - ".env.*" - "*.pem" - "*.key" - "*.pfx" - "*.p12" - "*.crt" # ─── Gate behaviour ─────────────────────────────────────────────────────────── severity_behavior: # warning_only: gate always pass=True (adds recommendations only) # strict: gate pass=False on any error finding gate_mode: "warning_only" recommend_on: - "warning" - "error" # ─── Limits ─────────────────────────────────────────────────────────────────── limits: max_files_fast: 200 max_files_full: 500 max_bytes_per_file: 262144 # 256KB max_findings: 200 # cap before truncating max_evidence_chars: 200 # mask and truncate evidence snippets