microdao-daarion/tools/safe_code_executor/safe_code_executor.py

"""
SafeCodeExecutor - Secure sandboxed code execution for AI agents
Fully self-hosted, privacy-by-default

Security:
- Subprocess-based sandbox with resource limits
- Import allowlist (whitelist)
- No network access
- No filesystem access (except temp)
- Strict resource limits (CPU, memory, timeout)
"""

import os
import sys
import re
import json
import time
import uuid
import signal
import subprocess
import resource
import tempfile
import logging
import hashlib
from pathlib import Path
from typing import Optional, Dict, Any, List
from datetime import datetime
from dataclasses import dataclass, field
from threading import Lock
from collections import defaultdict

logger = logging.getLogger(__name__)


# ============================================================================
# CONFIGURATION
# ============================================================================

@dataclass
class ExecutionConfig:
    """Sandbox execution configuration"""
    # Resource limits
    timeout_ms: int = 5000
    max_memory_mb: int = 256
    max_cpu_ms: int = 5000
    max_stdout_bytes: int = 65536
    max_result_bytes: int = 65536
    max_stderr_bytes: int = 8192

    # Security
    allowed_languages: List[str] = field(default_factory=lambda: ["python", "javascript"])
    allowed_modules: List[str] = field(default_factory=lambda: [
        # Python standard library (safe modules only)
        "json", "math", "re", "datetime", "time", "calendar",
        "collections", "functools", "itertools", "random", "statistics",
        "string", "base64", "hashlib", "hmac", "secrets",
        "urllib.parse", "html", "xml.etree.ElementTree",
        "typing", "types", "copy", "pprint", "textwrap",
        # JavaScript/Node
        "console", "JSON", "Math", "Date", "Array", "Object", "String", "Number", "Boolean"
    ])
    blocked_modules: List[str] = field(default_factory=lambda: [
        # Python - OS/System access
        "os", "subprocess", "socket", "requests", "urllib.request",
        "http", "ftplib", "smtplib", "telnetlib", "telnet",
        "pty", "tty", "termios", "fcntl", "resource",
        "importlib", "pkgutil", "setuptools", "pip",
        "pathlib", "glob", "fnmatch", "shutil",
        "tempfile", "tempdir",  # Blocked for security
        "cryptography", "ssl", "hashlib",  # Limited - only specific funcs
        "eval", "exec", "compile",  # Code execution
        "open", "file", "io",  # File I/O
        "__import__", "getattr", "setattr", "delattr",  # Dynamic access
        "pickle", "marshal", "yaml",  # Serialization (code execution risk)
        # JavaScript
        "child_process", "fs", "net", "http", "https", "tls",
        "crypto", "dns", "dgram", "domain", "http2", "http Agent",
        "fetch", "XMLHttpRequest", "WebSocket", "Worker", "eval", "Function"
    ])


class SafeCodeExecutor:
    """
    Safe code execution sandbox.

    Security features:
    - Subprocess isolation
    - Import allowlist/blocklist
    - No network access
    - No filesystem access
    - Resource limits (CPU, memory, timeout)
    - Output limits

    Usage:
        executor = SafeCodeExecutor()

        result = executor.execute(
            language="python",
            code="print('Hello')"
        )
    """

    def __init__(self, config: Optional[ExecutionConfig] = None):
        self.config = config or ExecutionConfig()
        self._lock = Lock()
        self._active_processes: Dict[str, subprocess.Popen] = {}

        # Create temp directory for sandbox
        self._sandbox_dir = Path("/tmp/safe_exec_sandbox")
        self._sandbox_dir.mkdir(parents=True, exist_ok=True)

        # Audit logger
        self._audit = AuditLogger("/tmp/safe_exec_logs")

    # =========================================================================
    # CODE VALIDATION
    # =========================================================================

    def _validate_python_code(self, code: str) -> Optional[str]:
        """Validate Python code for security"""
        # Check for blocked imports
        for blocked in self.config.blocked_modules:
            if re.search(rf'\bimport\s+{blocked}\b', code):
                return f"Blocked import: {blocked}"
            if re.search(rf'\bfrom\s+{blocked}\s+import', code):
                return f"Blocked import from: {blocked}"

        # Check for dangerous patterns
        dangerous_patterns = [
            (r'__import__\s*\(', "Dynamic import not allowed"),
            (r'eval\s*\(', "eval not allowed"),
            (r'exec\s*\(', "exec not allowed"),
            (r'compile\s*\(', "compile not allowed"),
            (r'open\s*\(', "File I/O not allowed"),
            (r'with\s+open', "File I/O not allowed"),
            (r'subprocess\.', "subprocess not allowed"),
            (r'os\.system', "os.system not allowed"),
            (r'os\.popen', "os.popen not allowed"),
            (r'socket\.', "socket not allowed"),
            (r'requests\.', "requests not allowed"),
            (r'urllib\.request', "urllib not allowed"),
            (r'pickle\.load', "pickle not allowed"),
            (r'yaml\.load', "yaml not allowed"),
            (r'eval\s*\(', "eval not allowed"),
            (r'getattr\s*\([^,]+,[^,]+,[^,]+\)', "getattr with 3 args not allowed"),
            (r'setattr\s*\([^,]+,[^,]+,[^,]+\)', "setattr with 3 args not allowed"),
            (r'class\s*\(.*meta', "metaclass not allowed"),
            (r'lambda\s*.*:.*exec', "lambda with exec not allowed"),
            # Fork bombs
            (r'while\s+True.*fork', "fork not allowed"),
            (r'\[\s*lambda.*\]*\s*\(', "Lambda in list comp may be dangerous"),
        ]

        for pattern, msg in dangerous_patterns:
            if re.search(pattern, code, re.IGNORECASE):
                return f"Dangerous pattern detected: {msg}"

        return None

    def _validate_javascript_code(self, code: str) -> Optional[str]:
        """Validate JavaScript code for security"""
        dangerous_patterns = [
            (r'require\s*\(\s*[\'"]child_process[\'"]\)', "child_process not allowed"),
            (r'require\s*\(\s*[\'"]fs[\'"]\)', "fs not allowed"),
            (r'require\s*\(\s*[\'"]net[\'"]\)', "net not allowed"),
            (r'require\s*\(\s*[\'"]http[\'"]\)', "http not allowed"),
            (r'require\s*\(\s*[\'"]https[\'"]\)', "https not allowed"),
            (r'require\s*\(\s*[\'"]crypto[\'"]\)', "crypto not allowed"),
            (r'require\s*\(\s*[\'"]dns[\'"]\)', "dns not allowed"),
            (r'eval\s*\(', "eval not allowed"),
            (r'Function\s*\(', "Function constructor not allowed"),
            (r'process\.exit', "process.exit not allowed"),
            (r'process\.kill', "process.kill not allowed"),
            (r'child_process\.exec', "child_process.exec not allowed"),
            (r'child_process\.spawn', "child_process.spawn not allowed"),
            (r'__dirname', "__dirname not allowed"),
            (r'__filename', "__filename not allowed"),
            (r'global\.', "global access not allowed"),
            (r'window\.', "window not allowed"),
            (r'document\.', "document not allowed"),
        ]

        for pattern, msg in dangerous_patterns:
            if re.search(pattern, code, re.IGNORECASE):
                return f"Dangerous pattern detected: {msg}"

        return None

    def validate(self, language: str, code: str) -> Optional[str]:
        """Validate code for security"""
        if language == "python":
            return self._validate_python_code(code)
        elif language == "javascript" or language == "js":
            return self._validate_javascript_code(code)
        else:
            return f"Unsupported language: {language}"

    # =========================================================================
    # CODE EXECUTION
    # =========================================================================

    def execute(
        self,
        language: str,
        code: str,
        stdin: Optional[str] = None,
        limits: Optional[Dict[str, Any]] = None,
        context: Optional[Dict[str, Any]] = None
    ) -> Dict[str, Any]:
        """
        Execute code in sandbox.

        Args:
            language: "python" or "javascript"
            code: Source code to execute
            stdin: Optional stdin input
            limits: Override default limits
            context: Execution context (agent_id, req_id, etc.)

        Returns:
            Dict with execution result
        """
        with self._lock:
            # Generate execution ID
            execution_id = hashlib.sha256(
                f"{time.time()}_{uuid.uuid4()}".encode()
            ).hexdigest()[:16]

            # Apply limit overrides
            timeout_ms = limits.get("timeout_ms", self.config.timeout_ms) if limits else self.config.timeout_ms
            max_memory_mb = limits.get("max_memory_mb", self.config.max_memory_mb) if limits else self.config.max_memory_mb
            max_stdout = limits.get("max_stdout_bytes", self.config.max_stdout_bytes) if limits else self.config.max_stdout_bytes

            # Validate code first
            validation_error = self.validate(language, code)
            if validation_error:
                return {
                    "status": "failed",
                    "error": validation_error,
                    "execution_id": execution_id,
                    "language": language
                }

            # Execute based on language
            if language == "python":
                return self._execute_python(execution_id, code, stdin, timeout_ms, max_memory_mb, max_stdout, context)
            elif language in ("javascript", "js"):
                return self._execute_javascript(execution_id, code, stdin, timeout_ms, max_memory_mb, max_stdout, context)
            else:
                return {
                    "status": "failed",
                    "error": f"Unsupported language: {language}",
                    "execution_id": execution_id
                }

    def _execute_python(
        self,
        execution_id: str,
        code: str,
        stdin: Optional[str],
        timeout_ms: int,
        max_memory_mb: int,
        max_stdout: int,
        context: Optional[Dict]
    ) -> Dict[str, Any]:
        """Execute Python code in sandbox"""
        start_time = time.time()

        # Create temp file for code
        code_file = self._sandbox_dir / f"{execution_id}.py"

        try:
            code_file.write_text(code)

            # Run with resource limits (if on Linux)
            try:
                process = subprocess.Popen(
                    [sys.executable, str(code_file)],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    stdin=subprocess.PIPE if stdin else None,
                    cwd="/tmp",
                    env={
                        "PATH": "/usr/bin:/bin",
                        "PYTHONPATH": "",
                        "HOME": "/tmp",
                        "TMPDIR": "/tmp",
                    }
                )
            except Exception as e:
                # Fallback without preexec_fn
                process = subprocess.Popen(
                    [sys.executable, str(code_file)],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    stdin=subprocess.PIPE if stdin else None,
                    cwd="/tmp"
                )

            self._active_processes[execution_id] = process

            # Wait for completion
            try:
                stdout, stderr = process.communicate(
                    input=stdin.encode() if stdin else None,
                    timeout=timeout_ms / 1000
                )
            except subprocess.TimeoutExpired:
                process.kill()
                stdout, stderr = process.communicate()

                return {
                    "status": "timeout",
                    "execution_id": execution_id,
                    "language": "python",
                    "timeout_ms": timeout_ms,
                    "metrics": self._get_metrics(start_time, stdout, stderr)
                }

            # Truncate output
            stdout = stdout[:max_stdout]
            stderr = stderr[:self.config.max_stderr_bytes]

            # Try to parse result_json
            result_json = self._extract_result_json(stdout.decode("utf-8", errors="replace"))

            return {
                "status": "succeeded" if process.returncode == 0 else "failed",
                "exit_code": process.returncode,
                "stdout": stdout.decode("utf-8", errors="replace"),
                "stderr": stderr.decode("utf-8", errors="replace"),
                "result_json": result_json,
                "execution_id": execution_id,
                "language": "python",
                "metrics": self._get_metrics(start_time, stdout, stderr)
            }

        except Exception as e:
            return {
                "status": "failed",
                "error": str(e),
                "execution_id": execution_id,
                "language": "python"
            }
        finally:
            # Cleanup
            if code_file.exists():
                code_file.unlink()
            self._active_processes.pop(execution_id, None)

    def _execute_javascript(
        self,
        execution_id: str,
        code: str,
        stdin: Optional[str],
        timeout_ms: int,
        max_memory_mb: int,
        max_stdout: int,
        context: Optional[Dict]
    ) -> Dict[str, Any]:
        """Execute JavaScript code in sandbox"""
        start_time = time.time()

        # Create temp file
        code_file = self._sandbox_dir / f"{execution_id}.js"

        # Wrap code
        wrapped_code = f'''
'use strict';

// Restricted globals
const console = {{...console}};
delete console.debug;
delete console.trace;

// Execute
{code}
'''

        try:
            code_file.write_text(wrapped_code)

            # Run with Node (if available)
            process = subprocess.Popen(
                ["node", str(code_file)],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                stdin=subprocess.PIPE if stdin else None,
                cwd="/tmp",
                env={"PATH": "/usr/bin:/bin", "HOME": "/tmp"},
                preexec_fn=self._set_resource_limits(max_memory_mb)
            )

            self._active_processes[execution_id] = process

            try:
                stdout, stderr = process.communicate(
                    input=stdin.encode() if stdin else None,
                    timeout=timeout_ms / 1000
                )
            except subprocess.TimeoutExpired:
                process.kill()
                stdout, stderr = process.communicate()

                return {
                    "status": "timeout",
                    "execution_id": execution_id,
                    "language": "javascript",
                    "timeout_ms": timeout_ms,
                    "metrics": self._get_metrics(start_time, stdout, stderr)
                }

            stdout = stdout[:max_stdout]
            stderr = stderr[:self.config.max_stderr_bytes]

            result_json = self._extract_result_json(stdout.decode("utf-8", errors="replace"))

            return {
                "status": "succeeded" if process.returncode == 0 else "failed",
                "exit_code": process.returncode,
                "stdout": stdout.decode("utf-8", errors="replace"),
                "stderr": stderr.decode("utf-8", errors="replace"),
                "result_json": result_json,
                "execution_id": execution_id,
                "language": "javascript",
                "metrics": self._get_metrics(start_time, stdout, stderr)
            }

        except FileNotFoundError:
            return {
                "status": "failed",
                "error": "Node.js not available",
                "execution_id": execution_id,
                "language": "javascript"
            }
        except Exception as e:
            return {
                "status": "failed",
                "error": str(e),
                "execution_id": execution_id,
                "language": "javascript"
            }
        finally:
            if code_file.exists():
                code_file.unlink()
            self._active_processes.pop(execution_id, None)

    def _set_resource_limits(self, max_memory_mb: int):
        """Set resource limits for subprocess"""
        def set_limits():
            # Memory limit
            resource.setrlimit(resource.RLIMIT_AS, (max_memory_mb * 1024 * 1024, max_memory_mb * 1024 * 1024))
            # CPU time limit (60 seconds max)
            resource.setrlimit(resource.RLIMIT_CPU, (60, 60))
            # File size limit (10MB)
            resource.setrlimit(resource.RLIMIT_FSIZE, (10 * 1024 * 1024, 10 * 1024 * 1024))
        return set_limits

    def _extract_result_json(self, stdout: str) -> Optional[Dict]:
        """Extract JSON from stdout"""
        lines = stdout.strip().split('\n')

        # Try last line as JSON
        if lines:
            last_line = lines[-1].strip()
            try:
                return json.loads(last_line)
            except:
                pass

        # Try to find JSON in output
        for line in reversed(lines):
            line = line.strip()
            if line.startswith('{') or line.startswith('['):
                try:
                    return json.loads(line)
                except:
                    continue

        return None

    def _get_metrics(self, start_time: float, stdout: bytes, stderr: bytes) -> Dict:
        """Get execution metrics"""
        return {
            "execution_time_ms": int((time.time() - start_time) * 1000),
            "stdout_bytes": len(stdout),
            "stderr_bytes": len(stderr)
        }

    # =========================================================================
    # JOB-BASED EXECUTION
    # =========================================================================

    def execute_async(
        self,
        language: str,
        code: str,
        stdin: Optional[str] = None,
        limits: Optional[Dict] = None,
        context: Optional[Dict] = None
    ) -> str:
        """
        Execute code asynchronously, returns job_id.
        For integration with job system.
        """
        job_id = hashlib.sha256(f"{time.time()}_{uuid.uuid4()}".encode()).hexdigest()[:16]

        # For now, execute synchronously
        # In production, this would queue to a worker
        result = self.execute(language, code, stdin, limits, context)
        result["job_id"] = job_id

        # Store result (in production, would use Redis/database)
        self._store_job_result(job_id, result)

        return job_id

    def get_job_result(self, job_id: str) -> Optional[Dict]:
        """Get result of async execution"""
        return self._job_results.get(job_id)

    def _store_job_result(self, job_id: str, result: Dict) -> None:
        """Store job result"""
        self._job_results[job_id] = result

    _job_results: Dict[str, Dict] = {}

    # =========================================================================
    # ADMIN
    # =========================================================================

    def kill_process(self, execution_id: str) -> bool:
        """Kill running process"""
        process = self._active_processes.get(execution_id)
        if process:
            process.kill()
            return True
        return False

    def get_stats(self) -> Dict:
        """Get executor statistics"""
        return {
            "active_processes": len(self._active_processes),
            "config": {
                "timeout_ms": self.config.timeout_ms,
                "max_memory_mb": self.config.max_memory_mb,
                "max_stdout_bytes": self.config.max_stdout_bytes
            }
        }


# ============================================================================
# AUDIT LOGGING
# ============================================================================

class AuditLogger:
    """Audit log for executor (no code content)"""

    def __init__(self, log_dir: str):
        self.log_dir = Path(log_dir)
        self.log_dir.mkdir(parents=True, exist_ok=True)
        self._lock = Lock()

    def log(self, execution_id: str, event: str, details: Dict) -> None:
        # Never log code content
        safe_details = {
            k: v for k, v in details.items()
            if k not in ("code", "stdin")
        }

        entry = {
            "timestamp": datetime.utcnow().isoformat(),
            "execution_id": execution_id,
            "event": event,
            "details": safe_details
        }

        with self._lock:
            log_file = self.log_dir / f"exec_{datetime.utcnow().strftime('%Y%m%d')}.jsonl"
            with open(log_file, "a") as f:
                f.write(json.dumps(entry) + "\n")


# ============================================================================
# REGISTRATION FOR OCTOTOOLS
# ============================================================================

def register_tools() -> Dict[str, Any]:
    return {
        "safe_code_executor": {
            "class": SafeCodeExecutor,
            "description": "Sandboxed code execution - Python/JS with resource limits, no network/filesystem",
            "methods": [
                "execute",
                "execute_async",
                "get_job_result",
                "validate",
                "kill_process",
                "get_stats"
            ]
        }
    }