snapshot: NODE1 production state 2026-02-09
Complete snapshot of /opt/microdao-daarion/ from NODE1 (144.76.224.179).
This represents the actual running production code that has diverged
significantly from the previous main branch.
Key changes from old main:
- Gateway (http_api.py): expanded from ~40KB to 164KB with full agent support
- Router: new /v1/agents/{id}/infer endpoint with vision + DeepSeek routing
- Behavior Policy: SOWA v2.2 (3-level: FULL/ACK/SILENT)
- Agent Registry: config/agent_registry.yml as single source of truth
- 13 agents configured (was 3)
- Memory service integration
- CrewAI teams and roles
Excluded from snapshot: venv/, .env, data/, backups, .tgz archives
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
240
shared/idempotency_redis.py
Normal file
240
shared/idempotency_redis.py
Normal file
@@ -0,0 +1,240 @@
|
||||
"""
|
||||
Idempotency Middleware for NATS Workers
|
||||
========================================
|
||||
Redis-based deduplication for async jobs.
|
||||
|
||||
Usage:
|
||||
from idempotency_redis import check_idempotency, mark_completed, mark_failed
|
||||
|
||||
async def process_job(job_id: str, payload: dict):
|
||||
# Check if already processed
|
||||
status, result = await check_idempotency(job_id)
|
||||
if status == "completed":
|
||||
return result # Return cached result
|
||||
if status == "in_progress":
|
||||
raise AlreadyProcessingError("Job already in progress")
|
||||
|
||||
# Mark as in progress
|
||||
await mark_in_progress(job_id)
|
||||
|
||||
try:
|
||||
# Process job
|
||||
result = await do_work(payload)
|
||||
await mark_completed(job_id, result)
|
||||
return result
|
||||
except Exception as e:
|
||||
await mark_failed(job_id, str(e))
|
||||
raise
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
from typing import Optional, Tuple, Dict, Any
|
||||
from datetime import datetime, timedelta
|
||||
import redis.asyncio as redis
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Redis connection
|
||||
REDIS_URL = os.getenv("REDIS_URL", "redis://dagi-redis:6379")
|
||||
REDIS_CLIENT = None
|
||||
|
||||
# TTLs
|
||||
IDEMPOTENCY_TTL_HOURS = int(os.getenv("IDEMPOTENCY_TTL_HOURS", "24"))
|
||||
IN_PROGRESS_TTL_MINUTES = int(os.getenv("IN_PROGRESS_TTL_MINUTES", "30"))
|
||||
|
||||
|
||||
async def get_redis() -> redis.Redis:
|
||||
"""Get or create Redis client"""
|
||||
global REDIS_CLIENT
|
||||
if REDIS_CLIENT is None:
|
||||
REDIS_CLIENT = await redis.from_url(REDIS_URL, decode_responses=True)
|
||||
return REDIS_CLIENT
|
||||
|
||||
|
||||
async def check_idempotency(job_id: str) -> Tuple[str, Optional[Dict[str, Any]]]:
|
||||
"""
|
||||
Check if job_id was already processed.
|
||||
|
||||
Returns:
|
||||
(status, result_ref)
|
||||
status: "new" | "in_progress" | "completed" | "failed"
|
||||
result_ref: None or dict with result data
|
||||
"""
|
||||
r = await get_redis()
|
||||
key = f"idemp:{job_id}"
|
||||
|
||||
value = await r.get(key)
|
||||
if value is None:
|
||||
return ("new", None)
|
||||
|
||||
try:
|
||||
data = json.loads(value)
|
||||
status = data.get("status")
|
||||
result_ref = data.get("result_ref")
|
||||
|
||||
if status == "completed":
|
||||
# Try to fetch result if stored
|
||||
result_key = f"idemp:result:{job_id}"
|
||||
result_data = await r.get(result_key)
|
||||
if result_data:
|
||||
result_ref = json.loads(result_data)
|
||||
|
||||
return (status, result_ref)
|
||||
except json.JSONDecodeError:
|
||||
# Legacy format: just status string
|
||||
return (value, None)
|
||||
|
||||
|
||||
async def mark_in_progress(job_id: str, metadata: Dict[str, Any] = None):
|
||||
"""Mark job as in progress"""
|
||||
r = await get_redis()
|
||||
key = f"idemp:{job_id}"
|
||||
|
||||
data = {
|
||||
"status": "in_progress",
|
||||
"started_at": datetime.utcnow().isoformat(),
|
||||
"metadata": metadata or {}
|
||||
}
|
||||
|
||||
await r.setex(
|
||||
key,
|
||||
timedelta(minutes=IN_PROGRESS_TTL_MINUTES),
|
||||
json.dumps(data)
|
||||
)
|
||||
|
||||
logger.info(f"Marked job {job_id} as in_progress")
|
||||
|
||||
|
||||
async def mark_completed(job_id: str, result: Dict[str, Any] = None, result_ref: str = None):
|
||||
"""
|
||||
Mark job as completed.
|
||||
|
||||
Args:
|
||||
job_id: Job identifier
|
||||
result: Full result data (stored separately if large)
|
||||
result_ref: Reference to result (e.g., NATS subject, file path)
|
||||
"""
|
||||
r = await get_redis()
|
||||
key = f"idemp:{job_id}"
|
||||
|
||||
data = {
|
||||
"status": "completed",
|
||||
"completed_at": datetime.utcnow().isoformat(),
|
||||
"result_ref": result_ref or "stored"
|
||||
}
|
||||
|
||||
# Store result separately if provided (for retrieval)
|
||||
if result:
|
||||
result_key = f"idemp:result:{job_id}"
|
||||
await r.setex(
|
||||
result_key,
|
||||
timedelta(hours=IDEMPOTENCY_TTL_HOURS),
|
||||
json.dumps(result)
|
||||
)
|
||||
|
||||
# Mark as completed with TTL
|
||||
await r.setex(
|
||||
key,
|
||||
timedelta(hours=IDEMPOTENCY_TTL_HOURS),
|
||||
json.dumps(data)
|
||||
)
|
||||
|
||||
logger.info(f"Marked job {job_id} as completed")
|
||||
|
||||
|
||||
async def mark_failed(job_id: str, error: str, allow_retry: bool = True):
|
||||
"""
|
||||
Mark job as failed.
|
||||
|
||||
Args:
|
||||
job_id: Job identifier
|
||||
error: Error message
|
||||
allow_retry: If True, delete key to allow retry. If False, mark as failed with short TTL.
|
||||
"""
|
||||
r = await get_redis()
|
||||
key = f"idemp:{job_id}"
|
||||
|
||||
if allow_retry:
|
||||
# Delete key to allow retry
|
||||
await r.delete(key)
|
||||
logger.info(f"Marked job {job_id} as failed (retry allowed), deleted key")
|
||||
else:
|
||||
# Mark as failed with short TTL (to prevent immediate retry spam)
|
||||
data = {
|
||||
"status": "failed",
|
||||
"failed_at": datetime.utcnow().isoformat(),
|
||||
"error": error[:500] # Truncate long errors
|
||||
}
|
||||
await r.setex(
|
||||
key,
|
||||
timedelta(minutes=5), # Short TTL for failed
|
||||
json.dumps(data)
|
||||
)
|
||||
logger.warning(f"Marked job {job_id} as failed (no retry): {error[:100]}")
|
||||
|
||||
|
||||
async def get_job_status(job_id: str) -> Dict[str, Any]:
|
||||
"""Get full job status for debugging"""
|
||||
r = await get_redis()
|
||||
key = f"idemp:{job_id}"
|
||||
|
||||
value = await r.get(key)
|
||||
if value is None:
|
||||
return {"status": "not_found"}
|
||||
|
||||
try:
|
||||
data = json.loads(value)
|
||||
return data
|
||||
except json.JSONDecodeError:
|
||||
return {"status": value, "raw": value}
|
||||
|
||||
|
||||
# ==================== Decorator for Workers ====================
|
||||
|
||||
def idempotent_job(job_id_field: str = "job_id"):
|
||||
"""
|
||||
Decorator to make a worker function idempotent.
|
||||
|
||||
Usage:
|
||||
@idempotent_job("job_id")
|
||||
async def process_workflow(payload: dict):
|
||||
# payload must contain job_id
|
||||
...
|
||||
"""
|
||||
def decorator(func):
|
||||
async def wrapper(payload: dict, *args, **kwargs):
|
||||
job_id = payload.get(job_id_field)
|
||||
if not job_id:
|
||||
raise ValueError(f"Payload must contain '{job_id_field}'")
|
||||
|
||||
# Check idempotency
|
||||
status, result = await check_idempotency(job_id)
|
||||
|
||||
if status == "completed":
|
||||
logger.info(f"Job {job_id} already completed, returning cached result")
|
||||
return result or {"status": "already_completed", "job_id": job_id}
|
||||
|
||||
if status == "in_progress":
|
||||
logger.warning(f"Job {job_id} already in progress, skipping")
|
||||
raise RuntimeError(f"Job {job_id} already in progress")
|
||||
|
||||
# Mark as in progress
|
||||
await mark_in_progress(job_id, {"function": func.__name__})
|
||||
|
||||
try:
|
||||
# Execute function
|
||||
result = await func(payload, *args, **kwargs)
|
||||
|
||||
# Mark as completed
|
||||
await mark_completed(job_id, result=result)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
# Mark as failed (allow retry)
|
||||
await mark_failed(job_id, str(e), allow_retry=True)
|
||||
raise
|
||||
|
||||
return wrapper
|
||||
return decorator
|
||||
186
shared/service_auth.py
Normal file
186
shared/service_auth.py
Normal file
@@ -0,0 +1,186 @@
|
||||
"""
|
||||
Service-to-Service Authentication
|
||||
=================================
|
||||
JWT-based authentication between internal services.
|
||||
|
||||
Usage:
|
||||
from service_auth import create_service_token, verify_service_token, require_service_auth
|
||||
|
||||
# Create token for service
|
||||
token = create_service_token("router", "router")
|
||||
|
||||
# Verify in endpoint
|
||||
@app.get("/protected")
|
||||
@require_service_auth(allowed_roles=["router", "gateway"])
|
||||
async def protected_endpoint():
|
||||
return {"status": "ok"}
|
||||
"""
|
||||
|
||||
import os
|
||||
import jwt
|
||||
import time
|
||||
from typing import List, Optional, Dict, Any
|
||||
from functools import wraps
|
||||
from fastapi import HTTPException, Header, Request
|
||||
|
||||
# Configuration
|
||||
JWT_SECRET = os.getenv("JWT_SECRET", "change-me-in-production")
|
||||
JWT_ALGORITHM = "HS256"
|
||||
JWT_AUDIENCE = os.getenv("SERVICE_AUD", "microdao-internal")
|
||||
JWT_ISSUER = os.getenv("SERVICE_ISS", "microdao")
|
||||
|
||||
# Service roles and permissions
|
||||
SERVICE_ROLES = {
|
||||
"gateway": ["gateway", "router", "worker", "parser"],
|
||||
"router": ["router", "worker"],
|
||||
"worker": ["worker"],
|
||||
"memory": ["memory"],
|
||||
"control-plane": ["control-plane"],
|
||||
"parser": ["parser"],
|
||||
"ingest": ["ingest"]
|
||||
}
|
||||
|
||||
# Service-to-service access matrix
|
||||
SERVICE_ACCESS = {
|
||||
"gateway": ["memory", "control-plane", "router"],
|
||||
"router": ["memory", "control-plane", "swapper"],
|
||||
"worker": ["memory", "router"],
|
||||
"parser": ["memory"],
|
||||
"ingest": ["memory"]
|
||||
}
|
||||
|
||||
|
||||
def create_service_token(service_id: str, service_role: str, expires_in: int = 900) -> str:
|
||||
"""
|
||||
Create JWT token for service-to-service authentication.
|
||||
|
||||
Args:
|
||||
service_id: Unique service identifier (e.g., "router", "gateway")
|
||||
service_role: Service role (e.g., "router", "gateway")
|
||||
expires_in: Token expiration in seconds (default: 1 hour)
|
||||
|
||||
Returns:
|
||||
JWT token string
|
||||
"""
|
||||
now = int(time.time())
|
||||
payload = {
|
||||
"sub": service_id,
|
||||
"role": service_role,
|
||||
"aud": JWT_AUDIENCE,
|
||||
"iss": JWT_ISSUER,
|
||||
"iat": now,
|
||||
"exp": now + expires_in,
|
||||
"service_id": service_id,
|
||||
"service_role": service_role
|
||||
}
|
||||
|
||||
token = jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
|
||||
return token
|
||||
|
||||
|
||||
def verify_service_token(token: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Verify service JWT token.
|
||||
|
||||
Returns:
|
||||
Decoded token payload
|
||||
|
||||
Raises:
|
||||
HTTPException: If token is invalid
|
||||
"""
|
||||
try:
|
||||
payload = jwt.decode(
|
||||
token,
|
||||
JWT_SECRET,
|
||||
algorithms=[JWT_ALGORITHM],
|
||||
audience=JWT_AUDIENCE,
|
||||
issuer=JWT_ISSUER
|
||||
)
|
||||
return payload
|
||||
except jwt.ExpiredSignatureError:
|
||||
raise HTTPException(status_code=401, detail="Token expired")
|
||||
except jwt.InvalidTokenError as e:
|
||||
raise HTTPException(status_code=401, detail=f"Invalid token: {e}")
|
||||
|
||||
|
||||
def require_service_auth(allowed_roles: List[str] = None, allowed_services: List[str] = None):
|
||||
"""
|
||||
Decorator to require service authentication.
|
||||
|
||||
Args:
|
||||
allowed_roles: List of allowed service roles
|
||||
allowed_services: List of allowed service IDs
|
||||
"""
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
async def wrapper(request: Request, *args, **kwargs):
|
||||
# Get Authorization header
|
||||
auth_header = request.headers.get("Authorization", "")
|
||||
|
||||
if not auth_header.startswith("Bearer "):
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail="Missing or invalid Authorization header"
|
||||
)
|
||||
|
||||
token = auth_header.replace("Bearer ", "")
|
||||
|
||||
try:
|
||||
payload = verify_service_token(token)
|
||||
service_id = payload.get("service_id")
|
||||
service_role = payload.get("role")
|
||||
|
||||
# Check if service is allowed
|
||||
if allowed_roles and service_role not in allowed_roles:
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail=f"Service role '{service_role}' not allowed"
|
||||
)
|
||||
|
||||
if allowed_services and service_id not in allowed_services:
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail=f"Service '{service_id}' not allowed"
|
||||
)
|
||||
|
||||
# Add service info to request state
|
||||
request.state.service_id = service_id
|
||||
request.state.service_role = service_role
|
||||
|
||||
return await func(request, *args, **kwargs)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=401, detail=f"Authentication failed: {e}")
|
||||
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
def get_service_token() -> str:
|
||||
"""
|
||||
Get service token for current service (from environment).
|
||||
"""
|
||||
service_id = os.getenv("SERVICE_ID")
|
||||
service_role = os.getenv("SERVICE_ROLE", service_id)
|
||||
|
||||
if not service_id:
|
||||
raise ValueError("SERVICE_ID environment variable not set")
|
||||
|
||||
return create_service_token(service_id, service_role)
|
||||
|
||||
|
||||
# FastAPI dependency for service auth
|
||||
async def verify_service(request: Request, authorization: str = Header(None)):
|
||||
"""FastAPI dependency for service authentication"""
|
||||
if not authorization or not authorization.startswith("Bearer "):
|
||||
raise HTTPException(status_code=401, detail="Missing Authorization header")
|
||||
|
||||
token = authorization.replace("Bearer ", "")
|
||||
payload = verify_service_token(token)
|
||||
|
||||
request.state.service_id = payload.get("service_id")
|
||||
request.state.service_role = payload.get("role")
|
||||
|
||||
return payload
|
||||
282
shared/trace_middleware.py
Normal file
282
shared/trace_middleware.py
Normal file
@@ -0,0 +1,282 @@
|
||||
"""
|
||||
Trace Middleware
|
||||
================
|
||||
Стандартизована кореляція запитів через всі сервіси.
|
||||
|
||||
Headers:
|
||||
- X-Trace-ID: uuid (весь шлях)
|
||||
- X-Request-ID: uuid (HTTP request)
|
||||
- X-Job-ID: uuid (async NATS job)
|
||||
- X-User-ID: user identifier
|
||||
- X-Agent-ID: target agent
|
||||
- X-Mode: public|team|private|confidential
|
||||
- X-Policy-Version: version hash
|
||||
- X-Prompt-Version: version hash
|
||||
|
||||
Використання:
|
||||
1. Gateway генерує trace_id
|
||||
2. Всі сервіси передають у headers
|
||||
3. NATS messages містять у metadata
|
||||
4. Logs структуровані з trace_id
|
||||
"""
|
||||
|
||||
import uuid
|
||||
import logging
|
||||
from typing import Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
from contextvars import ContextVar
|
||||
from functools import wraps
|
||||
|
||||
from fastapi import Request, Response
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
|
||||
# Context variables for trace propagation
|
||||
trace_context: ContextVar[Dict[str, str]] = ContextVar('trace_context', default={})
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TraceContext:
|
||||
"""Immutable trace context for request correlation"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
trace_id: str = None,
|
||||
request_id: str = None,
|
||||
job_id: str = None,
|
||||
user_id: str = None,
|
||||
agent_id: str = None,
|
||||
mode: str = "public",
|
||||
policy_version: str = None,
|
||||
prompt_version: str = None,
|
||||
source_service: str = None
|
||||
):
|
||||
self.trace_id = trace_id or str(uuid.uuid4())
|
||||
self.request_id = request_id or str(uuid.uuid4())
|
||||
self.job_id = job_id
|
||||
self.user_id = user_id
|
||||
self.agent_id = agent_id
|
||||
self.mode = mode
|
||||
self.policy_version = policy_version
|
||||
self.prompt_version = prompt_version
|
||||
self.source_service = source_service
|
||||
self.timestamp = datetime.utcnow().isoformat()
|
||||
|
||||
def to_headers(self) -> Dict[str, str]:
|
||||
"""Convert to HTTP headers"""
|
||||
headers = {
|
||||
"X-Trace-ID": self.trace_id,
|
||||
"X-Request-ID": self.request_id,
|
||||
}
|
||||
if self.job_id:
|
||||
headers["X-Job-ID"] = self.job_id
|
||||
if self.user_id:
|
||||
headers["X-User-ID"] = self.user_id
|
||||
if self.agent_id:
|
||||
headers["X-Agent-ID"] = self.agent_id
|
||||
if self.mode:
|
||||
headers["X-Mode"] = self.mode
|
||||
if self.policy_version:
|
||||
headers["X-Policy-Version"] = self.policy_version
|
||||
if self.prompt_version:
|
||||
headers["X-Prompt-Version"] = self.prompt_version
|
||||
return headers
|
||||
|
||||
def to_nats_headers(self) -> Dict[str, str]:
|
||||
"""Convert to NATS message headers"""
|
||||
return {
|
||||
"Nats-Trace-ID": self.trace_id,
|
||||
"Nats-Job-ID": self.job_id or self.request_id,
|
||||
"Nats-User-ID": self.user_id or "",
|
||||
"Nats-Agent-ID": self.agent_id or "",
|
||||
"Nats-Mode": self.mode,
|
||||
"Nats-Timestamp": self.timestamp
|
||||
}
|
||||
|
||||
def to_log_context(self) -> Dict[str, Any]:
|
||||
"""Convert to structured log context"""
|
||||
return {
|
||||
"trace_id": self.trace_id,
|
||||
"request_id": self.request_id,
|
||||
"job_id": self.job_id,
|
||||
"user_id": self.user_id,
|
||||
"agent_id": self.agent_id,
|
||||
"mode": self.mode,
|
||||
"policy_version": self.policy_version,
|
||||
"prompt_version": self.prompt_version,
|
||||
"timestamp": self.timestamp
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_headers(cls, headers: Dict[str, str]) -> "TraceContext":
|
||||
"""Create from HTTP headers"""
|
||||
return cls(
|
||||
trace_id=headers.get("X-Trace-ID") or headers.get("x-trace-id"),
|
||||
request_id=headers.get("X-Request-ID") or headers.get("x-request-id"),
|
||||
job_id=headers.get("X-Job-ID") or headers.get("x-job-id"),
|
||||
user_id=headers.get("X-User-ID") or headers.get("x-user-id"),
|
||||
agent_id=headers.get("X-Agent-ID") or headers.get("x-agent-id"),
|
||||
mode=headers.get("X-Mode") or headers.get("x-mode") or "public",
|
||||
policy_version=headers.get("X-Policy-Version"),
|
||||
prompt_version=headers.get("X-Prompt-Version")
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_nats(cls, headers: Dict[str, str]) -> "TraceContext":
|
||||
"""Create from NATS headers"""
|
||||
return cls(
|
||||
trace_id=headers.get("Nats-Trace-ID"),
|
||||
job_id=headers.get("Nats-Job-ID"),
|
||||
user_id=headers.get("Nats-User-ID"),
|
||||
agent_id=headers.get("Nats-Agent-ID"),
|
||||
mode=headers.get("Nats-Mode", "public")
|
||||
)
|
||||
|
||||
|
||||
class TraceMiddleware(BaseHTTPMiddleware):
|
||||
"""FastAPI middleware for trace propagation"""
|
||||
|
||||
def __init__(self, app, service_name: str):
|
||||
super().__init__(app)
|
||||
self.service_name = service_name
|
||||
|
||||
async def dispatch(self, request: Request, call_next):
|
||||
# Extract or create trace context
|
||||
ctx = TraceContext.from_headers(dict(request.headers))
|
||||
ctx.source_service = self.service_name
|
||||
|
||||
# Store in context var
|
||||
trace_context.set(ctx.to_log_context())
|
||||
|
||||
# Log request start
|
||||
logger.info(
|
||||
f"Request started",
|
||||
extra={
|
||||
"trace_id": ctx.trace_id,
|
||||
"request_id": ctx.request_id,
|
||||
"method": request.method,
|
||||
"path": request.url.path,
|
||||
"service": self.service_name
|
||||
}
|
||||
)
|
||||
|
||||
# Process request
|
||||
response = await call_next(request)
|
||||
|
||||
# Add trace headers to response
|
||||
response.headers["X-Trace-ID"] = ctx.trace_id
|
||||
response.headers["X-Request-ID"] = ctx.request_id
|
||||
|
||||
# Log request end
|
||||
logger.info(
|
||||
f"Request completed",
|
||||
extra={
|
||||
"trace_id": ctx.trace_id,
|
||||
"request_id": ctx.request_id,
|
||||
"status_code": response.status_code,
|
||||
"service": self.service_name
|
||||
}
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def get_current_trace() -> Dict[str, str]:
|
||||
"""Get current trace context from context var"""
|
||||
return trace_context.get()
|
||||
|
||||
|
||||
def with_trace(func):
|
||||
"""Decorator to propagate trace context"""
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
ctx = get_current_trace()
|
||||
return await func(*args, trace_context=ctx, **kwargs)
|
||||
return wrapper
|
||||
|
||||
|
||||
# ==================== Structured Logging ====================
|
||||
|
||||
class TraceLogFormatter(logging.Formatter):
|
||||
"""JSON formatter with trace context"""
|
||||
|
||||
def format(self, record):
|
||||
# Get trace context
|
||||
ctx = trace_context.get()
|
||||
|
||||
log_entry = {
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"level": record.levelname,
|
||||
"message": record.getMessage(),
|
||||
"service": getattr(record, 'service', 'unknown'),
|
||||
"trace_id": ctx.get('trace_id', ''),
|
||||
"request_id": ctx.get('request_id', ''),
|
||||
"user_id": ctx.get('user_id', ''),
|
||||
"agent_id": ctx.get('agent_id', ''),
|
||||
}
|
||||
|
||||
# Add extra fields
|
||||
if hasattr(record, 'extra'):
|
||||
log_entry.update(record.extra)
|
||||
|
||||
import json
|
||||
return json.dumps(log_entry)
|
||||
|
||||
|
||||
def setup_trace_logging(service_name: str):
|
||||
"""Setup structured logging with trace context"""
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(TraceLogFormatter())
|
||||
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.handlers = [handler]
|
||||
root_logger.setLevel(logging.INFO)
|
||||
|
||||
# Add service name to all logs
|
||||
old_factory = logging.getLogRecordFactory()
|
||||
|
||||
def record_factory(*args, **kwargs):
|
||||
record = old_factory(*args, **kwargs)
|
||||
record.service = service_name
|
||||
return record
|
||||
|
||||
logging.setLogRecordFactory(record_factory)
|
||||
|
||||
|
||||
# ==================== NATS Integration ====================
|
||||
|
||||
async def publish_with_trace(js, subject: str, payload: bytes, ctx: TraceContext):
|
||||
"""Publish NATS message with trace headers"""
|
||||
headers = ctx.to_nats_headers()
|
||||
await js.publish(subject, payload, headers=headers)
|
||||
|
||||
|
||||
def extract_trace_from_msg(msg) -> TraceContext:
|
||||
"""Extract trace context from NATS message"""
|
||||
headers = dict(msg.headers) if msg.headers else {}
|
||||
return TraceContext.from_nats(headers)
|
||||
|
||||
|
||||
# ==================== Audit Event ====================
|
||||
|
||||
def create_audit_event(
|
||||
action: str,
|
||||
ctx: TraceContext,
|
||||
details: Dict[str, Any] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Create standardized audit event"""
|
||||
return {
|
||||
"event_id": str(uuid.uuid4()),
|
||||
"event_type": f"audit.action.{action}",
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"trace_id": ctx.trace_id,
|
||||
"request_id": ctx.request_id,
|
||||
"job_id": ctx.job_id,
|
||||
"user_id": ctx.user_id,
|
||||
"agent_id": ctx.agent_id,
|
||||
"mode": ctx.mode,
|
||||
"policy_version": ctx.policy_version,
|
||||
"prompt_version": ctx.prompt_version,
|
||||
"action": action,
|
||||
"details": details or {}
|
||||
}
|
||||
Reference in New Issue
Block a user