feat(platform): add new services, tools, tests and crews modules

New router intelligence modules (26 files): alert_ingest/store, audit_store,
architecture_pressure, backlog_generator/store, cost_analyzer, data_governance,
dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment,
platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files),
signature_state_store, sofiia_auto_router, tool_governance

New services:
- sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static
- memory-service: integration_endpoints, integrations, voice_endpoints, static UI
- aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents)
- sofiia-supervisor: new supervisor service
- aistalk-bridge-lite: Telegram bridge lite
- calendar-service: CalDAV calendar service with reminders
- mlx-stt-service / mlx-tts-service: Apple Silicon speech services
- binance-bot-monitor: market monitor service
- node-worker: STT/TTS memory providers

New tools (9): agent_email, browser_tool, contract_tool, observability_tool,
oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault

New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus,
farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine,
session_context, style_adapter, telemetry)

Tests: 85+ test files for all new modules
Made-with: Cursor
This commit is contained in:
Apple
2026-03-03 07:14:14 -08:00
parent e9dedffa48
commit 129e4ea1fc
241 changed files with 69349 additions and 0 deletions

View File

@@ -0,0 +1,18 @@
"""
SafeCodeExecutor - Sandboxed code execution for AI agents
Fully self-hosted, privacy-by-default
"""
from .safe_code_executor import (
SafeCodeExecutor,
ExecutionConfig,
register_tools
)
__all__ = [
"SafeCodeExecutor",
"ExecutionConfig",
"register_tools"
]
__version__ = "1.0.0"

View File

@@ -0,0 +1,221 @@
"""
SafeCodeExecutor API Handler
POST /v1/tools/safe-exec - Execute code synchronously
POST /v1/tools/safe-exec/async - Execute code asynchronously (returns job_id)
GET /v1/jobs/{job_id} - Get async job result
Security:
- RBAC check (agent must have safe_exec scope)
- Rate limiting
- Request validation
"""
from typing import Dict, Any, Optional
from dataclasses import dataclass
import hashlib
import time
@dataclass
class ExecRequest:
"""Execution request"""
language: str
code: str
stdin: Optional[str] = None
limits: Optional[Dict[str, Any]] = None
context: Optional[Dict[str, Any]] = None
@dataclass
class ExecResponse:
"""Execution response"""
status: str
execution_id: str
language: str
stdout: Optional[str] = None
stderr: Optional[str] = None
result_json: Optional[Dict] = None
error: Optional[str] = None
metrics: Optional[Dict] = None
job_id: Optional[str] = None
class SafeExecAPI:
"""API handler for SafeCodeExecutor"""
def __init__(self, executor, auth_check=None, rate_limiter=None):
self.executor = executor
self.auth_check = auth_check # RBAC function
self.rate_limiter = rate_limiter
def _check_auth(self, agent_id: str, scopes: list) -> bool:
"""Check if agent has required scopes"""
if self.auth_check:
return self.auth_check(agent_id, scopes)
return True # No auth in dev mode
def _check_rate_limit(self, agent_id: str) -> bool:
"""Check rate limit"""
if self.rate_limiter:
return self.rate_limiter.is_allowed(agent_id)
return True
def execute(
self,
agent_id: str,
request: ExecRequest
) -> ExecResponse:
"""
Execute code synchronously.
POST /v1/tools/safe-exec
"""
# Auth check
if not self._check_auth(agent_id, ["safe_exec"]):
return ExecResponse(
status="unauthorized",
execution_id="",
language=request.language,
error="Agent lacks safe_exec scope"
)
# Rate limit check
if not self._check_rate_limit(agent_id):
return ExecResponse(
status="rate_limited",
execution_id="",
language=request.language,
error="Rate limit exceeded"
)
# Validate request
validation_error = self.executor.validate(request.language, request.code)
if validation_error:
return ExecResponse(
status="validation_failed",
execution_id="",
language=request.language,
error=validation_error
)
# Execute
result = self.executor.execute(
language=request.language,
code=request.code,
stdin=request.stdin,
limits=request.limits,
context={"agent_id": agent_id, **(request.context or {})}
)
# Convert to response
return ExecResponse(
status=result.get("status", "failed"),
execution_id=result.get("execution_id", ""),
language=result.get("language", request.language),
stdout=result.get("stdout"),
stderr=result.get("stderr"),
result_json=result.get("result_json"),
error=result.get("error"),
metrics=result.get("metrics")
)
def execute_async(
self,
agent_id: str,
request: ExecRequest
) -> Dict[str, Any]:
"""
Execute code asynchronously.
POST /v1/tools/safe-exec/async
Returns job_id for polling
"""
# Auth check
if not self._check_auth(agent_id, ["safe_exec"]):
return {
"status": "unauthorized",
"error": "Agent lacks safe_exec scope"
}
# Rate limit
if not self._check_rate_limit(agent_id):
return {
"status": "rate_limited",
"error": "Rate limit exceeded"
}
# Get job_id
job_id = self.executor.execute_async(
language=request.language,
code=request.code,
stdin=request.stdin,
limits=request.limits,
context={"agent_id": agent_id, **(request.context or {})}
)
return {
"status": "queued",
"job_id": job_id,
"check_url": f"/v1/jobs/{job_id}"
}
def get_job(
self,
agent_id: str,
job_id: str
) -> Dict[str, Any]:
"""
Get async job result.
GET /v1/jobs/{job_id}
"""
result = self.executor.get_job_result(job_id)
if not result:
return {
"status": "not_found",
"error": "Job not found"
}
return result
# =============================================================================
# FASTAPI EXAMPLE (optional integration)
# =============================================================================
def create_api_routes(executor) -> list:
"""Create FastAPI routes for safe-exec"""
from fastapi import APIRouter, HTTPException, Header
from typing import Optional
router = APIRouter()
api = SafeExecAPI(executor)
@router.post("/v1/tools/safe-exec")
async def exec_code(
request: ExecRequest,
x_agent_id: str = Header(default="anonymous")
):
"""Execute code synchronously"""
result = api.execute(x_agent_id, request)
return result.__dict__
@router.post("/v1/tools/safe-exec/async")
async def exec_code_async(
request: ExecRequest,
x_agent_id: str = Header(default="anonymous")
):
"""Execute code asynchronously"""
return api.execute_async(x_agent_id, request)
@router.get("/v1/jobs/{job_id}")
async def get_job(
job_id: str,
x_agent_id: str = Header(default="anonymous")
):
"""Get job result"""
return api.get_job(x_agent_id, job_id)
return router

View File

@@ -0,0 +1,157 @@
# SafeCodeExecutor Documentation
## Overview
SafeCodeExecutor is a sandboxed code execution engine for AI agents in the DAARION stack. It allows agents like Sofiia to execute code safely for engineering tasks (parsing, transformation, calculations) without access to secrets, network, or host filesystem.
## Security Model
### Isolation
- **Subprocess-based**: Code runs in isolated subprocess
- **No network access**: All network operations blocked
- **No filesystem access**: Only temp directory for execution
- **Resource limits**: CPU time, memory, output size
### Blocked Imports (Python)
```
os, sys, subprocess, socket, requests, http, ftplib, smtplib,
pty, tty, termios, fcntl, importlib, pathlib, glob, shutil,
tempfile, cryptography, ssl, eval, exec, compile, open,
pickle, marshal, yaml, __import__, getattr, setattr
```
### Allowed Imports
```
json, math, re, datetime, time, calendar, collections,
functools, itertools, random, statistics, string, base64,
hashlib, hmac, secrets, urllib.parse, html, xml.etree.ElementTree,
typing, types, copy, pprint, textwrap
```
## Limits
| Limit | Default | Max |
|-------|---------|-----|
| Timeout | 5s | 30s |
| Memory | 256MB | 1GB |
| Stdout | 64KB | 1MB |
| Stderr | 8KB | 64KB |
## Usage
### Python
```python
from safe_code_executor import SafeCodeExecutor
executor = SafeCodeExecutor()
# Basic execution
result = executor.execute(
language="python",
code="""
import json
data = {"a": 1, "b": 2}
print(json.dumps({"sum": data["a"] + data["b"]}))
"""
)
print(result["status"]) # "succeeded"
print(result["stdout"]) # output
print(result["result_json"]) # parsed JSON
```
### JavaScript
```python
result = executor.execute(
language="javascript",
code="""
const data = {a: 1, b: 2};
console.log(JSON.stringify({sum: data.a + data.b}));
"""
)
```
## API
### POST /v1/tools/safe-exec
Execute code synchronously.
```json
{
"language": "python",
"code": "print('hello')",
"limits": {
"timeout_ms": 3000,
"max_memory_mb": 256
}
}
```
Response:
```json
{
"status": "succeeded",
"execution_id": "abc123",
"stdout": "hello\n",
"stderr": "",
"result_json": null,
"metrics": {
"execution_time_ms": 45,
"stdout_bytes": 6
}
}
```
### POST /v1/tools/safe-exec/async
Execute code asynchronously, returns job_id.
### GET /v1/jobs/{job_id}
Get async job result.
## Integration with Sofiia
```python
# In Sofiia's tool registry
from safe_code_executor import SafeCodeExecutor
tools = {
"safe_exec": SafeCodeExecutor()
}
```
## Threat Model
### What We Protect Against
- Agent escaping sandbox to access host
- Agent accessing secrets from environment
- Agent making network calls
- Agent reading sensitive files
- Resource exhaustion attacks
### What We Don't Protect Against
- Malicious code that doesn't try to escape
- Side-channel attacks
- Timing attacks on specific operations
## Running Locally
```bash
# Run unit tests
python tools/safe_code_executor/tests/test_unit.py
# Run security tests
python tools/safe_code_executor/tests/test_security.py
```
## Production Deployment
For production, consider:
1. **Docker sandbox**: Run each execution in ephemeral container
2. **gVisor**: Lightweight kernel isolation
3. **firejail/nsjail**: Process-level isolation
4. **Ephemeral VMs**: Kata Containers for stronger isolation

View File

@@ -0,0 +1,2 @@
# SafeCodeExecutor Dependencies
# No external dependencies - uses stdlib only

View File

@@ -0,0 +1,602 @@
"""
SafeCodeExecutor - Secure sandboxed code execution for AI agents
Fully self-hosted, privacy-by-default
Security:
- Subprocess-based sandbox with resource limits
- Import allowlist (whitelist)
- No network access
- No filesystem access (except temp)
- Strict resource limits (CPU, memory, timeout)
"""
import os
import sys
import re
import json
import time
import uuid
import signal
import subprocess
import resource
import tempfile
import logging
import hashlib
from pathlib import Path
from typing import Optional, Dict, Any, List
from datetime import datetime
from dataclasses import dataclass, field
from threading import Lock
from collections import defaultdict
logger = logging.getLogger(__name__)
# ============================================================================
# CONFIGURATION
# ============================================================================
@dataclass
class ExecutionConfig:
"""Sandbox execution configuration"""
# Resource limits
timeout_ms: int = 5000
max_memory_mb: int = 256
max_cpu_ms: int = 5000
max_stdout_bytes: int = 65536
max_result_bytes: int = 65536
max_stderr_bytes: int = 8192
# Security
allowed_languages: List[str] = field(default_factory=lambda: ["python", "javascript"])
allowed_modules: List[str] = field(default_factory=lambda: [
# Python standard library (safe modules only)
"json", "math", "re", "datetime", "time", "calendar",
"collections", "functools", "itertools", "random", "statistics",
"string", "base64", "hashlib", "hmac", "secrets",
"urllib.parse", "html", "xml.etree.ElementTree",
"typing", "types", "copy", "pprint", "textwrap",
# JavaScript/Node
"console", "JSON", "Math", "Date", "Array", "Object", "String", "Number", "Boolean"
])
blocked_modules: List[str] = field(default_factory=lambda: [
# Python - OS/System access
"os", "subprocess", "socket", "requests", "urllib.request",
"http", "ftplib", "smtplib", "telnetlib", "telnet",
"pty", "tty", "termios", "fcntl", "resource",
"importlib", "pkgutil", "setuptools", "pip",
"pathlib", "glob", "fnmatch", "shutil",
"tempfile", "tempdir", # Blocked for security
"cryptography", "ssl", "hashlib", # Limited - only specific funcs
"eval", "exec", "compile", # Code execution
"open", "file", "io", # File I/O
"__import__", "getattr", "setattr", "delattr", # Dynamic access
"pickle", "marshal", "yaml", # Serialization (code execution risk)
# JavaScript
"child_process", "fs", "net", "http", "https", "tls",
"crypto", "dns", "dgram", "domain", "http2", "http Agent",
"fetch", "XMLHttpRequest", "WebSocket", "Worker", "eval", "Function"
])
class SafeCodeExecutor:
"""
Safe code execution sandbox.
Security features:
- Subprocess isolation
- Import allowlist/blocklist
- No network access
- No filesystem access
- Resource limits (CPU, memory, timeout)
- Output limits
Usage:
executor = SafeCodeExecutor()
result = executor.execute(
language="python",
code="print('Hello')"
)
"""
def __init__(self, config: Optional[ExecutionConfig] = None):
self.config = config or ExecutionConfig()
self._lock = Lock()
self._active_processes: Dict[str, subprocess.Popen] = {}
# Create temp directory for sandbox
self._sandbox_dir = Path("/tmp/safe_exec_sandbox")
self._sandbox_dir.mkdir(parents=True, exist_ok=True)
# Audit logger
self._audit = AuditLogger("/tmp/safe_exec_logs")
# =========================================================================
# CODE VALIDATION
# =========================================================================
def _validate_python_code(self, code: str) -> Optional[str]:
"""Validate Python code for security"""
# Check for blocked imports
for blocked in self.config.blocked_modules:
if re.search(rf'\bimport\s+{blocked}\b', code):
return f"Blocked import: {blocked}"
if re.search(rf'\bfrom\s+{blocked}\s+import', code):
return f"Blocked import from: {blocked}"
# Check for dangerous patterns
dangerous_patterns = [
(r'__import__\s*\(', "Dynamic import not allowed"),
(r'eval\s*\(', "eval not allowed"),
(r'exec\s*\(', "exec not allowed"),
(r'compile\s*\(', "compile not allowed"),
(r'open\s*\(', "File I/O not allowed"),
(r'with\s+open', "File I/O not allowed"),
(r'subprocess\.', "subprocess not allowed"),
(r'os\.system', "os.system not allowed"),
(r'os\.popen', "os.popen not allowed"),
(r'socket\.', "socket not allowed"),
(r'requests\.', "requests not allowed"),
(r'urllib\.request', "urllib not allowed"),
(r'pickle\.load', "pickle not allowed"),
(r'yaml\.load', "yaml not allowed"),
(r'eval\s*\(', "eval not allowed"),
(r'getattr\s*\([^,]+,[^,]+,[^,]+\)', "getattr with 3 args not allowed"),
(r'setattr\s*\([^,]+,[^,]+,[^,]+\)', "setattr with 3 args not allowed"),
(r'class\s*\(.*meta', "metaclass not allowed"),
(r'lambda\s*.*:.*exec', "lambda with exec not allowed"),
# Fork bombs
(r'while\s+True.*fork', "fork not allowed"),
(r'\[\s*lambda.*\]*\s*\(', "Lambda in list comp may be dangerous"),
]
for pattern, msg in dangerous_patterns:
if re.search(pattern, code, re.IGNORECASE):
return f"Dangerous pattern detected: {msg}"
return None
def _validate_javascript_code(self, code: str) -> Optional[str]:
"""Validate JavaScript code for security"""
dangerous_patterns = [
(r'require\s*\(\s*[\'"]child_process[\'"]\)', "child_process not allowed"),
(r'require\s*\(\s*[\'"]fs[\'"]\)', "fs not allowed"),
(r'require\s*\(\s*[\'"]net[\'"]\)', "net not allowed"),
(r'require\s*\(\s*[\'"]http[\'"]\)', "http not allowed"),
(r'require\s*\(\s*[\'"]https[\'"]\)', "https not allowed"),
(r'require\s*\(\s*[\'"]crypto[\'"]\)', "crypto not allowed"),
(r'require\s*\(\s*[\'"]dns[\'"]\)', "dns not allowed"),
(r'eval\s*\(', "eval not allowed"),
(r'Function\s*\(', "Function constructor not allowed"),
(r'process\.exit', "process.exit not allowed"),
(r'process\.kill', "process.kill not allowed"),
(r'child_process\.exec', "child_process.exec not allowed"),
(r'child_process\.spawn', "child_process.spawn not allowed"),
(r'__dirname', "__dirname not allowed"),
(r'__filename', "__filename not allowed"),
(r'global\.', "global access not allowed"),
(r'window\.', "window not allowed"),
(r'document\.', "document not allowed"),
]
for pattern, msg in dangerous_patterns:
if re.search(pattern, code, re.IGNORECASE):
return f"Dangerous pattern detected: {msg}"
return None
def validate(self, language: str, code: str) -> Optional[str]:
"""Validate code for security"""
if language == "python":
return self._validate_python_code(code)
elif language == "javascript" or language == "js":
return self._validate_javascript_code(code)
else:
return f"Unsupported language: {language}"
# =========================================================================
# CODE EXECUTION
# =========================================================================
def execute(
self,
language: str,
code: str,
stdin: Optional[str] = None,
limits: Optional[Dict[str, Any]] = None,
context: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Execute code in sandbox.
Args:
language: "python" or "javascript"
code: Source code to execute
stdin: Optional stdin input
limits: Override default limits
context: Execution context (agent_id, req_id, etc.)
Returns:
Dict with execution result
"""
with self._lock:
# Generate execution ID
execution_id = hashlib.sha256(
f"{time.time()}_{uuid.uuid4()}".encode()
).hexdigest()[:16]
# Apply limit overrides
timeout_ms = limits.get("timeout_ms", self.config.timeout_ms) if limits else self.config.timeout_ms
max_memory_mb = limits.get("max_memory_mb", self.config.max_memory_mb) if limits else self.config.max_memory_mb
max_stdout = limits.get("max_stdout_bytes", self.config.max_stdout_bytes) if limits else self.config.max_stdout_bytes
# Validate code first
validation_error = self.validate(language, code)
if validation_error:
return {
"status": "failed",
"error": validation_error,
"execution_id": execution_id,
"language": language
}
# Execute based on language
if language == "python":
return self._execute_python(execution_id, code, stdin, timeout_ms, max_memory_mb, max_stdout, context)
elif language in ("javascript", "js"):
return self._execute_javascript(execution_id, code, stdin, timeout_ms, max_memory_mb, max_stdout, context)
else:
return {
"status": "failed",
"error": f"Unsupported language: {language}",
"execution_id": execution_id
}
def _execute_python(
self,
execution_id: str,
code: str,
stdin: Optional[str],
timeout_ms: int,
max_memory_mb: int,
max_stdout: int,
context: Optional[Dict]
) -> Dict[str, Any]:
"""Execute Python code in sandbox"""
start_time = time.time()
# Create temp file for code
code_file = self._sandbox_dir / f"{execution_id}.py"
try:
code_file.write_text(code)
# Run with resource limits (if on Linux)
try:
process = subprocess.Popen(
[sys.executable, str(code_file)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE if stdin else None,
cwd="/tmp",
env={
"PATH": "/usr/bin:/bin",
"PYTHONPATH": "",
"HOME": "/tmp",
"TMPDIR": "/tmp",
}
)
except Exception as e:
# Fallback without preexec_fn
process = subprocess.Popen(
[sys.executable, str(code_file)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE if stdin else None,
cwd="/tmp"
)
self._active_processes[execution_id] = process
# Wait for completion
try:
stdout, stderr = process.communicate(
input=stdin.encode() if stdin else None,
timeout=timeout_ms / 1000
)
except subprocess.TimeoutExpired:
process.kill()
stdout, stderr = process.communicate()
return {
"status": "timeout",
"execution_id": execution_id,
"language": "python",
"timeout_ms": timeout_ms,
"metrics": self._get_metrics(start_time, stdout, stderr)
}
# Truncate output
stdout = stdout[:max_stdout]
stderr = stderr[:self.config.max_stderr_bytes]
# Try to parse result_json
result_json = self._extract_result_json(stdout.decode("utf-8", errors="replace"))
return {
"status": "succeeded" if process.returncode == 0 else "failed",
"exit_code": process.returncode,
"stdout": stdout.decode("utf-8", errors="replace"),
"stderr": stderr.decode("utf-8", errors="replace"),
"result_json": result_json,
"execution_id": execution_id,
"language": "python",
"metrics": self._get_metrics(start_time, stdout, stderr)
}
except Exception as e:
return {
"status": "failed",
"error": str(e),
"execution_id": execution_id,
"language": "python"
}
finally:
# Cleanup
if code_file.exists():
code_file.unlink()
self._active_processes.pop(execution_id, None)
def _execute_javascript(
self,
execution_id: str,
code: str,
stdin: Optional[str],
timeout_ms: int,
max_memory_mb: int,
max_stdout: int,
context: Optional[Dict]
) -> Dict[str, Any]:
"""Execute JavaScript code in sandbox"""
start_time = time.time()
# Create temp file
code_file = self._sandbox_dir / f"{execution_id}.js"
# Wrap code
wrapped_code = f'''
'use strict';
// Restricted globals
const console = {{...console}};
delete console.debug;
delete console.trace;
// Execute
{code}
'''
try:
code_file.write_text(wrapped_code)
# Run with Node (if available)
process = subprocess.Popen(
["node", str(code_file)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE if stdin else None,
cwd="/tmp",
env={"PATH": "/usr/bin:/bin", "HOME": "/tmp"},
preexec_fn=self._set_resource_limits(max_memory_mb)
)
self._active_processes[execution_id] = process
try:
stdout, stderr = process.communicate(
input=stdin.encode() if stdin else None,
timeout=timeout_ms / 1000
)
except subprocess.TimeoutExpired:
process.kill()
stdout, stderr = process.communicate()
return {
"status": "timeout",
"execution_id": execution_id,
"language": "javascript",
"timeout_ms": timeout_ms,
"metrics": self._get_metrics(start_time, stdout, stderr)
}
stdout = stdout[:max_stdout]
stderr = stderr[:self.config.max_stderr_bytes]
result_json = self._extract_result_json(stdout.decode("utf-8", errors="replace"))
return {
"status": "succeeded" if process.returncode == 0 else "failed",
"exit_code": process.returncode,
"stdout": stdout.decode("utf-8", errors="replace"),
"stderr": stderr.decode("utf-8", errors="replace"),
"result_json": result_json,
"execution_id": execution_id,
"language": "javascript",
"metrics": self._get_metrics(start_time, stdout, stderr)
}
except FileNotFoundError:
return {
"status": "failed",
"error": "Node.js not available",
"execution_id": execution_id,
"language": "javascript"
}
except Exception as e:
return {
"status": "failed",
"error": str(e),
"execution_id": execution_id,
"language": "javascript"
}
finally:
if code_file.exists():
code_file.unlink()
self._active_processes.pop(execution_id, None)
def _set_resource_limits(self, max_memory_mb: int):
"""Set resource limits for subprocess"""
def set_limits():
# Memory limit
resource.setrlimit(resource.RLIMIT_AS, (max_memory_mb * 1024 * 1024, max_memory_mb * 1024 * 1024))
# CPU time limit (60 seconds max)
resource.setrlimit(resource.RLIMIT_CPU, (60, 60))
# File size limit (10MB)
resource.setrlimit(resource.RLIMIT_FSIZE, (10 * 1024 * 1024, 10 * 1024 * 1024))
return set_limits
def _extract_result_json(self, stdout: str) -> Optional[Dict]:
"""Extract JSON from stdout"""
lines = stdout.strip().split('\n')
# Try last line as JSON
if lines:
last_line = lines[-1].strip()
try:
return json.loads(last_line)
except:
pass
# Try to find JSON in output
for line in reversed(lines):
line = line.strip()
if line.startswith('{') or line.startswith('['):
try:
return json.loads(line)
except:
continue
return None
def _get_metrics(self, start_time: float, stdout: bytes, stderr: bytes) -> Dict:
"""Get execution metrics"""
return {
"execution_time_ms": int((time.time() - start_time) * 1000),
"stdout_bytes": len(stdout),
"stderr_bytes": len(stderr)
}
# =========================================================================
# JOB-BASED EXECUTION
# =========================================================================
def execute_async(
self,
language: str,
code: str,
stdin: Optional[str] = None,
limits: Optional[Dict] = None,
context: Optional[Dict] = None
) -> str:
"""
Execute code asynchronously, returns job_id.
For integration with job system.
"""
job_id = hashlib.sha256(f"{time.time()}_{uuid.uuid4()}".encode()).hexdigest()[:16]
# For now, execute synchronously
# In production, this would queue to a worker
result = self.execute(language, code, stdin, limits, context)
result["job_id"] = job_id
# Store result (in production, would use Redis/database)
self._store_job_result(job_id, result)
return job_id
def get_job_result(self, job_id: str) -> Optional[Dict]:
"""Get result of async execution"""
return self._job_results.get(job_id)
def _store_job_result(self, job_id: str, result: Dict) -> None:
"""Store job result"""
self._job_results[job_id] = result
_job_results: Dict[str, Dict] = {}
# =========================================================================
# ADMIN
# =========================================================================
def kill_process(self, execution_id: str) -> bool:
"""Kill running process"""
process = self._active_processes.get(execution_id)
if process:
process.kill()
return True
return False
def get_stats(self) -> Dict:
"""Get executor statistics"""
return {
"active_processes": len(self._active_processes),
"config": {
"timeout_ms": self.config.timeout_ms,
"max_memory_mb": self.config.max_memory_mb,
"max_stdout_bytes": self.config.max_stdout_bytes
}
}
# ============================================================================
# AUDIT LOGGING
# ============================================================================
class AuditLogger:
"""Audit log for executor (no code content)"""
def __init__(self, log_dir: str):
self.log_dir = Path(log_dir)
self.log_dir.mkdir(parents=True, exist_ok=True)
self._lock = Lock()
def log(self, execution_id: str, event: str, details: Dict) -> None:
# Never log code content
safe_details = {
k: v for k, v in details.items()
if k not in ("code", "stdin")
}
entry = {
"timestamp": datetime.utcnow().isoformat(),
"execution_id": execution_id,
"event": event,
"details": safe_details
}
with self._lock:
log_file = self.log_dir / f"exec_{datetime.utcnow().strftime('%Y%m%d')}.jsonl"
with open(log_file, "a") as f:
f.write(json.dumps(entry) + "\n")
# ============================================================================
# REGISTRATION FOR OCTOTOOLS
# ============================================================================
def register_tools() -> Dict[str, Any]:
return {
"safe_code_executor": {
"class": SafeCodeExecutor,
"description": "Sandboxed code execution - Python/JS with resource limits, no network/filesystem",
"methods": [
"execute",
"execute_async",
"get_job_result",
"validate",
"kill_process",
"get_stats"
]
}
}

View File

@@ -0,0 +1 @@
# Tests

View File

@@ -0,0 +1,162 @@
"""
Security tests for SafeCodeExecutor
"""
import sys
sys.path.insert(0, "..")
from safe_code_executor import SafeCodeExecutor
def test_fork_bomb_blocked():
"""Test that fork bombs are blocked"""
executor = SafeCodeExecutor()
# This would cause infinite loop if not properly sandboxed
code = "while True: pass"
result = executor.execute(
language="python",
code=code,
limits={"timeout_ms": 1000}
)
assert result["status"] == "timeout"
print("✓ Fork bomb / infinite loop blocked by timeout")
def test_memory_exhaustion_blocked():
"""Test that memory exhaustion is blocked"""
executor = SafeCodeExecutor()
# This would try to allocate huge memory
code = "x = [0] * 1000000000"
result = executor.execute(
language="python",
code=code,
limits={"timeout_ms": 3000, "max_memory_mb": 128}
)
# Should fail due to memory limit or timeout
assert result["status"] in ["timeout", "failed", "killed"]
print("✓ Memory exhaustion blocked")
def test_read_proc_blocked():
"""Test that /proc access is blocked"""
executor = SafeCodeExecutor()
code = "print(open('/proc/self/environ').read())"
result = executor.execute(language="python", code=code)
assert result["status"] == "failed"
print("✓ /proc access blocked")
def test_read_env_blocked():
"""Test that env access is blocked"""
executor = SafeCodeExecutor()
code = "import os; print(os.environ.get('SECRET'))"
result = executor.execute(language="python", code=code)
assert result["status"] == "failed"
print("✓ Environment access blocked")
def test_subprocess_blocked():
"""Test that subprocess is blocked"""
executor = SafeCodeExecutor()
code = "import subprocess; subprocess.run(['cat', '/etc/passwd'])"
result = executor.execute(language="python", code=code)
assert result["status"] == "failed"
assert "subprocess" in result["error"].lower()
print("✓ Subprocess blocked")
def test_requests_blocked():
"""Test that requests library is blocked"""
executor = SafeCodeExecutor()
code = "import requests; print(requests.get('http://evil.com'))"
result = executor.execute(language="python", code=code)
assert result["status"] == "failed"
print("✓ Requests library blocked")
def test_pickle_blocked():
"""Test that pickle is blocked"""
executor = SafeCodeExecutor()
code = "import pickle; pickle.loads(b'...')"
result = executor.execute(language="python", code=code)
assert result["status"] == "failed"
print("✓ Pickle blocked")
def test_yaml_blocked():
"""Test that yaml is blocked"""
executor = SafeCodeExecutor()
code = "import yaml; yaml.load('!!python/object/apply:os.system', Loader=None)"
result = executor.execute(language="python", code=code)
assert result["status"] == "failed"
print("✓ YAML blocked")
def test_output_limit():
"""Test that output is limited"""
executor = SafeCodeExecutor()
# Generate huge output
code = "print('x' * 100000)"
result = executor.execute(
language="python",
code=code,
limits={"max_stdout_bytes": 1000}
)
# Output should be truncated
assert len(result.get("stdout", "")) <= 1100
print("✓ Output limit enforced")
def test_input_validation():
"""Test language validation"""
executor = SafeCodeExecutor()
result = executor.execute(language="ruby", code="puts 'hello'")
assert result["status"] == "failed"
assert "Unsupported" in result["error"]
print("✓ Language validation works")
if __name__ == "__main__":
print("=== Running Security Tests ===\n")
test_fork_bomb_blocked()
test_memory_exhaustion_blocked()
test_read_proc_blocked()
test_read_env_blocked()
test_subprocess_blocked()
test_requests_blocked()
test_pickle_blocked()
test_yaml_blocked()
test_output_limit()
test_input_validation()
print("\n✅ All security tests passed!")

View File

@@ -0,0 +1,174 @@
"""
Unit tests for SafeCodeExecutor
"""
import sys
sys.path.insert(0, "..")
from safe_code_executor import SafeCodeExecutor
def test_basic_python_execution():
"""Test basic Python execution"""
executor = SafeCodeExecutor()
result = executor.execute(
language="python",
code="print('Hello, World!')"
)
assert result["status"] == "succeeded"
assert "Hello, World!" in result["stdout"]
print("✓ Basic execution works")
def test_json_transform():
"""Test JSON transformation"""
executor = SafeCodeExecutor()
code = '''
import json
data = {"a": 1, "b": 2, "c": 3}
result = {"sum": data["a"] + data["b"] + data["c"]}
print(json.dumps(result))
'''
result = executor.execute(language="python", code=code)
assert result["status"] == "succeeded"
assert result["result_json"]["sum"] == 6
print("✓ JSON transform works")
def test_regex_parse():
"""Test regex parsing"""
executor = SafeCodeExecutor()
code = '''
import re
text = "Email: test@example.com"
match = re.search(r'[\w.-]+@[\w.-]+', text)
print(match.group() if match else "None")
'''
result = executor.execute(language="python", code=code)
assert result["status"] == "succeeded"
assert "test@example.com" in result["stdout"]
print("✓ Regex parse works")
def test_validation_blocks_os_import():
"""Test that os import is blocked"""
executor = SafeCodeExecutor()
result = executor.execute(
language="python",
code="import os; print(os.getcwd())"
)
assert result["status"] == "failed"
assert "import os" in result["error"] or "os" in result["error"]
print("✓ OS import blocked")
def test_validation_blocks_subprocess():
"""Test that subprocess is blocked"""
executor = SafeCodeExecutor()
result = executor.execute(
language="python",
code="import subprocess; subprocess.run(['ls'])"
)
assert result["status"] == "failed"
print("✓ Subprocess blocked")
def test_validation_blocks_socket():
"""Test that socket is blocked"""
executor = SafeCodeExecutor()
result = executor.execute(
language="python",
code="import socket; s = socket.socket()"
)
assert result["status"] == "failed"
print("✓ Socket blocked")
def test_validation_blocks_eval():
"""Test that eval is blocked"""
executor = SafeCodeExecutor()
result = executor.execute(
language="python",
code="eval('1+1')"
)
assert result["status"] == "failed"
print("✓ Eval blocked")
def test_validation_blocks_file_io():
"""Test that file I/O is blocked"""
executor = SafeCodeExecutor()
result = executor.execute(
language="python",
code="open('/etc/passwd').read()"
)
assert result["status"] == "failed"
print("✓ File I/O blocked")
def test_math_operations():
"""Test math operations work"""
executor = SafeCodeExecutor()
code = '''
import math
print(math.sqrt(16))
print(math.pi)
'''
result = executor.execute(language="python", code=code)
assert result["status"] == "succeeded"
print("✓ Math operations work")
def test_statistics():
"""Test statistics module"""
executor = SafeCodeExecutor()
code = '''
import statistics
data = [1, 2, 3, 4, 5]
print(statistics.mean(data))
print(statistics.median(data))
'''
result = executor.execute(language="python", code=code)
assert result["status"] == "succeeded"
print("✓ Statistics module works")
if __name__ == "__main__":
print("=== Running Unit Tests ===\n")
test_basic_python_execution()
test_json_transform()
test_regex_parse()
test_validation_blocks_os_import()
test_validation_blocks_subprocess()
test_validation_blocks_socket()
test_validation_blocks_eval()
test_validation_blocks_file_io()
test_math_operations()
test_statistics()
print("\n✅ All unit tests passed!")