feat: Add Alateya, Clan, Eonarch agents + fix gateway-router connection

## Agents Added
- Alateya: R&D, biotech, innovations
- Clan (Spirit): Community spirit agent
- Eonarch: Consciousness evolution agent

## Changes
- docker-compose.node1.yml: Added tokens for all 3 new agents
- gateway-bot/http_api.py: Added configs and webhook endpoints
- gateway-bot/clan_prompt.txt: New prompt file
- gateway-bot/eonarch_prompt.txt: New prompt file

## Fixes
- Fixed ROUTER_URL from :9102 to :8000 (internal container port)
- All 9 Telegram agents now working

## Documentation
- Created PROJECT-MASTER-INDEX.md - single entry point
- Added various status documents and scripts

Tokens configured:
- Helion, NUTRA, Agromatrix (existing)
- Alateya, Clan, Eonarch (new)
- Druid, GreenFood, DAARWIZZ (configured)
This commit is contained in:
Apple
2026-01-28 06:40:34 -08:00
parent 4aeb69e7ae
commit 0c8bef82f4
120 changed files with 21905 additions and 425 deletions

View File

@@ -20,6 +20,10 @@ from fastapi import FastAPI, HTTPException, BackgroundTasks, File, UploadFile, F
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import httpx
import csv
import zipfile
from io import BytesIO
import mimetypes
import yaml
# Optional imports for HuggingFace models
@@ -33,6 +37,129 @@ except ImportError:
Image = None
logger = logging.getLogger(__name__)
# ========== Document Helpers ==========
def _decode_text_bytes(content: bytes) -> str:
"""Decode text with best-effort fallback."""
try:
import chardet
detected = chardet.detect(content)
encoding = detected.get("encoding") or "utf-8"
return content.decode(encoding, errors="replace")
except Exception:
try:
return content.decode("utf-8", errors="replace")
except Exception:
return content.decode("latin-1", errors="replace")
def _csv_to_markdown(content: bytes) -> str:
text = _decode_text_bytes(content)
reader = csv.reader(text.splitlines())
rows = list(reader)
if not rows:
return ""
header = rows[0]
body = rows[1:]
lines = [
"| " + " | ".join(header) + " |",
"| " + " | ".join(["---"] * len(header)) + " |",
]
for row in body:
lines.append("| " + " | ".join(row) + " |")
return "\n".join(lines)
def _xlsx_to_markdown(content: bytes) -> str:
try:
import openpyxl
except Exception as e:
raise HTTPException(status_code=500, detail=f"openpyxl not available: {e}")
wb = openpyxl.load_workbook(filename=BytesIO(content), data_only=True)
parts = []
for sheet in wb.worksheets:
parts.append(f"## Sheet: {sheet.title}")
rows = list(sheet.iter_rows(values_only=True))
if not rows:
parts.append("_Empty sheet_")
continue
header = [str(c) if c is not None else "" for c in rows[0]]
body = rows[1:]
parts.append("| " + " | ".join(header) + " |")
parts.append("| " + " | ".join(["---"] * len(header)) + " |")
for row in body:
parts.append("| " + " | ".join([str(c) if c is not None else "" for c in row]) + " |")
return "\n".join(parts)
def _docx_to_text(content: bytes) -> str:
try:
from docx import Document
except Exception as e:
raise HTTPException(status_code=500, detail=f"python-docx not available: {e}")
doc = Document(BytesIO(content))
lines = [p.text for p in doc.paragraphs if p.text]
return "\n".join(lines)
def _pdf_to_text(content: bytes) -> str:
try:
import pdfplumber
except Exception as e:
raise HTTPException(status_code=500, detail=f"pdfplumber not available: {e}")
text_content = []
with pdfplumber.open(BytesIO(content)) as pdf:
for page in pdf.pages:
page_text = page.extract_text() or ""
if page_text:
text_content.append(page_text)
return "\n\n".join(text_content)
def _extract_text_by_ext(filename: str, content: bytes) -> str:
ext = filename.split(".")[-1].lower() if "." in filename else ""
if ext in ["txt", "md"]:
return _decode_text_bytes(content)
if ext == "csv":
return _csv_to_markdown(content)
if ext == "xlsx":
return _xlsx_to_markdown(content)
if ext == "docx":
return _docx_to_text(content)
if ext == "pdf":
return _pdf_to_text(content)
raise HTTPException(status_code=400, detail=f"Unsupported file type: .{ext}")
def _zip_to_markdown(content: bytes, max_files: int = 50, max_total_mb: int = 100) -> str:
zf = zipfile.ZipFile(BytesIO(content))
members = [m for m in zf.infolist() if not m.is_dir()]
if len(members) > max_files:
raise HTTPException(status_code=400, detail=f"ZIP has слишком много файлов: {len(members)}")
total_size = sum(m.file_size for m in members)
if total_size > max_total_mb * 1024 * 1024:
raise HTTPException(status_code=400, detail=f"ZIP слишком большой: {total_size / 1024 / 1024:.1f} MB")
parts = []
allowed_exts = {"txt", "md", "csv", "xlsx", "docx", "pdf"}
processed = []
skipped = []
for member in members:
name = member.filename
ext = name.split(".")[-1].lower() if "." in name else ""
if ext not in allowed_exts:
skipped.append(name)
parts.append(f"## {name}\n_Skipped unsupported file type_")
continue
file_bytes = zf.read(member)
extracted = _extract_text_by_ext(name, file_bytes)
processed.append(name)
parts.append(f"## {name}\n{extracted}")
header_lines = ["# ZIP summary", "Processed files:"]
header_lines.extend([f"- {name}" for name in processed] or ["- (none)"])
if skipped:
header_lines.append("Skipped files:")
header_lines.extend([f"- {name}" for name in skipped])
return "\n\n".join(["\n".join(header_lines), *parts])
# ========== Configuration ==========
@@ -719,14 +846,21 @@ class SwapperService:
logger.info(f"🎨 Generating image with {model_name}: {prompt[:50]}...")
with torch.no_grad():
result = pipeline(
prompt=prompt,
negative_prompt=negative_prompt if negative_prompt else None,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
width=width,
height=height,
)
# FLUX Klein doesn't support negative_prompt, check pipeline type
pipeline_kwargs = {
"prompt": prompt,
"num_inference_steps": num_inference_steps,
"guidance_scale": guidance_scale,
"width": width,
"height": height,
}
# Only add negative_prompt for models that support it (not FLUX)
is_flux = "flux" in model_name.lower()
if negative_prompt and not is_flux:
pipeline_kwargs["negative_prompt"] = negative_prompt
result = pipeline(**pipeline_kwargs)
image = result.images[0]
# Convert to base64
@@ -978,6 +1112,117 @@ async def generate(request: GenerateRequest):
logger.error(f"❌ Error in generate: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
# ========== VISION API Endpoints ==========
class VisionRequest(BaseModel):
"""Vision (image description) request"""
model: str = "qwen3-vl-8b"
prompt: str = "Опиши це зображення коротко (2-3 речення)."
images: List[str] # List of base64 encoded images (can include data: prefix)
system: Optional[str] = None
max_tokens: int = 1024
temperature: float = 0.7
@app.post("/vision")
async def vision_endpoint(request: VisionRequest):
"""
Vision endpoint - analyze images with Vision-Language models.
Models:
- qwen3-vl-8b: Qwen3 Vision-Language model (8GB VRAM)
Images should be base64 encoded. Can include data:image/... prefix or raw base64.
"""
try:
import time
start_time = time.time()
model_name = request.model
# Convert data URLs to raw base64 (Ollama expects base64 without prefix)
processed_images = []
for img in request.images:
if img.startswith("data:"):
# Extract base64 part from data URL
base64_part = img.split(",", 1)[1] if "," in img else img
processed_images.append(base64_part)
else:
processed_images.append(img)
logger.info(f"🖼️ Vision request: model={model_name}, images={len(processed_images)}, prompt={request.prompt[:50]}...")
# Map model name to Ollama model
ollama_model = model_name.replace("-", ":") # qwen3-vl-8b -> qwen3:vl-8b
if model_name == "qwen3-vl-8b":
ollama_model = "qwen3-vl:8b"
# Build Ollama request
ollama_payload = {
"model": ollama_model,
"prompt": request.prompt,
"images": processed_images,
"stream": False,
"options": {
"num_predict": request.max_tokens,
"temperature": request.temperature
}
}
if request.system:
ollama_payload["system"] = request.system
# Send to Ollama
async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post(
f"{OLLAMA_BASE_URL}/api/generate",
json=ollama_payload
)
if response.status_code != 200:
logger.error(f"❌ Ollama vision error: {response.status_code} - {response.text[:200]}")
raise HTTPException(status_code=500, detail=f"Ollama error: {response.status_code}")
result = response.json()
vision_text = result.get("response", "")
# Debug logging
if not vision_text:
logger.warning(f"⚠️ Empty response from Ollama! Result keys: {list(result.keys())}, error: {result.get('error', 'none')}")
processing_time_ms = (time.time() - start_time) * 1000
logger.info(f"✅ Vision response: {len(vision_text)} chars in {processing_time_ms:.0f}ms")
return {
"success": True,
"model": model_name,
"text": vision_text,
"processing_time_ms": processing_time_ms,
"images_count": len(processed_images)
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Vision endpoint error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@app.get("/vision/models")
async def vision_models():
"""List available vision models"""
vision_models = [m for m in swapper.models.values() if m.type == "vision"]
return {
"models": [
{
"name": m.name,
"type": m.type,
"status": m.status.value,
"size_gb": m.size_gb
}
for m in vision_models
]
}
# ========== OCR API Endpoints ==========
class OCRRequest(BaseModel):
@@ -1426,6 +1671,31 @@ async def document_endpoint(
# Determine file type
filename = file.filename if file else "document"
file_ext = filename.split(".")[-1].lower() if "." in filename else "pdf"
# Handle text-based formats without Docling
if file_ext in ["txt", "md", "csv", "xlsx", "zip"]:
try:
if file_ext == "zip":
content = _zip_to_markdown(doc_data)
output_format = "markdown"
else:
content = _extract_text_by_ext(filename, doc_data)
output_format = "markdown" if file_ext in ["md", "csv", "xlsx"] else "text"
processing_time_ms = (time.time() - start_time) * 1000
return {
"success": True,
"model": "text-extract",
"output_format": output_format,
"result": content,
"filename": filename,
"processing_time_ms": processing_time_ms,
"device": swapper.device
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Text extraction failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Text extraction failed: {e}")
# Save to temp file
import tempfile
@@ -1478,8 +1748,8 @@ async def document_endpoint(
}
except ImportError:
# Fallback to trafilatura for simpler extraction
logger.warning("⚠️ Docling not installed, falling back to basic extraction")
# Fallback to pdfplumber/OCR for simpler extraction
logger.warning("⚠️ Docling not installed, using fallback extraction")
# For images, use OCR
if file_ext in ["png", "jpg", "jpeg", "gif", "webp"]:
@@ -1494,10 +1764,50 @@ async def document_endpoint(
"device": swapper.device
}
# For DOCX, try python-docx
if file_ext == "docx":
try:
content = _docx_to_text(doc_data)
return {
"success": True,
"model": "python-docx (fallback)",
"output_format": "text",
"result": content,
"filename": filename,
"processing_time_ms": (time.time() - start_time) * 1000,
"device": swapper.device
}
except Exception as e:
logger.error(f"DOCX fallback failed: {e}")
raise HTTPException(status_code=500, detail="DOCX extraction failed")
# For PDFs, try pdfplumber
if file_ext == "pdf":
try:
import pdfplumber
text_content = []
with pdfplumber.open(tmp_path) as pdf:
for page in pdf.pages:
text = page.extract_text()
if text:
text_content.append(text)
content = "\n\n".join(text_content)
return {
"success": True,
"model": "pdfplumber (fallback)",
"output_format": "text",
"result": content,
"filename": filename,
"processing_time_ms": (time.time() - start_time) * 1000,
"device": "cpu"
}
except ImportError:
pass
# For other documents, return error
raise HTTPException(
status_code=503,
detail="Docling not installed. Run: pip install docling"
detail="Document processing not available. Supported: PDF (with pdfplumber), images (with OCR)"
)
finally:
@@ -1712,10 +2022,33 @@ async def web_extract(request: WebExtractRequest):
)
if response.status_code == 200:
data = response.json()
result = data.get("results", [{}])[0]
# Get markdown - can be string or dict with raw_markdown
markdown_data = result.get("markdown", "")
if isinstance(markdown_data, dict):
content = markdown_data.get("raw_markdown", "") or markdown_data.get("fit_markdown", "")
else:
content = markdown_data
# Fallback to cleaned_html
if not content:
content = result.get("cleaned_html", "") or result.get("extracted_content", "")
# Last resort: strip HTML tags
if not content and result.get("html"):
import re
content = re.sub(r'<[^>]+>', ' ', result.get("html", ""))
content = re.sub(r'\s+', ' ', content).strip()
# Limit size for LLM context
if len(content) > 50000:
content = content[:50000] + "\n\n[... truncated ...]"
return {
"success": True,
"success": bool(content),
"method": "crawl4ai",
"content": data.get("results", [{}])[0].get("markdown", ""),
"content": content,
"url": url
}
return {"success": False, "error": f"Crawl4AI returned {response.status_code}"}
@@ -1746,39 +2079,66 @@ async def web_extract(request: WebExtractRequest):
@app.post("/web/search")
async def web_search(request: WebSearchRequest):
"""
Search the web using DuckDuckGo (free, no API key needed).
Search the web using multiple engines with fallback.
Priority: 1) DDGS (DuckDuckGo) 2) Google Search
"""
formatted_results = []
engine_used = "none"
# Method 1: Try DDGS (new package name)
try:
from duckduckgo_search import DDGS
from ddgs import DDGS
ddgs = DDGS()
results = ddgs.text(
results = list(ddgs.text(
request.query,
max_results=request.max_results
)
formatted_results = []
for idx, result in enumerate(results):
formatted_results.append({
"position": idx + 1,
"title": result.get("title", ""),
"url": result.get("href", ""),
"snippet": result.get("body", "")
})
return {
"success": True,
"query": request.query,
"results": formatted_results,
"total": len(formatted_results),
"engine": "duckduckgo"
}
max_results=request.max_results,
region="wt-wt" # Worldwide
))
if results:
for idx, result in enumerate(results):
formatted_results.append({
"position": idx + 1,
"title": result.get("title", ""),
"url": result.get("href", result.get("link", "")),
"snippet": result.get("body", result.get("snippet", ""))
})
engine_used = "ddgs"
logger.info(f"✅ DDGS search found {len(formatted_results)} results for: {request.query[:50]}")
except ImportError:
raise HTTPException(status_code=503, detail="DuckDuckGo search not installed")
logger.warning("DDGS not installed, trying Google search")
except Exception as e:
logger.error(f"❌ Web search error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
logger.warning(f"DDGS search failed: {e}, trying Google search")
# Method 2: Fallback to Google search
if not formatted_results:
try:
from googlesearch import search as google_search
results = list(google_search(request.query, num_results=request.max_results, lang="uk"))
if results:
for idx, url in enumerate(results):
formatted_results.append({
"position": idx + 1,
"title": url.split("/")[-1].replace("-", " ").replace("_", " ")[:60] or "Result",
"url": url,
"snippet": ""
})
engine_used = "google"
logger.info(f"✅ Google search found {len(formatted_results)} results for: {request.query[:50]}")
except ImportError:
logger.warning("Google search not installed")
except Exception as e:
logger.warning(f"Google search failed: {e}")
# Return results or empty
return {
"success": len(formatted_results) > 0,
"query": request.query,
"results": formatted_results,
"total": len(formatted_results),
"engine": engine_used
}
@app.get("/web/read/{url:path}")
async def web_read_simple(url: str):
@@ -1827,6 +2187,90 @@ async def web_status():
}
}
# ========== Video Generation API (Grok xAI) ==========
GROK_API_KEY = os.getenv("GROK_API_KEY", "")
GROK_API_URL = "https://api.x.ai/v1"
class VideoGenerateRequest(BaseModel):
"""Video generation request via Grok"""
prompt: str
duration: int = 6 # seconds (max 6 for Grok)
style: str = "cinematic" # cinematic, anime, realistic, abstract
aspect_ratio: str = "16:9" # 16:9, 9:16, 1:1
@app.post("/video/generate")
async def video_generate(request: VideoGenerateRequest):
"""
Generate image using Grok (xAI) API.
Note: Grok API currently supports image generation only (not video).
For video-like content, generate multiple frames and combine externally.
"""
if not GROK_API_KEY:
raise HTTPException(status_code=503, detail="GROK_API_KEY not configured")
try:
async with httpx.AsyncClient(timeout=120.0) as client:
# Grok image generation endpoint
response = await client.post(
f"{GROK_API_URL}/images/generations",
headers={
"Authorization": f"Bearer {GROK_API_KEY}",
"Content-Type": "application/json"
},
json={
"model": "grok-2-image-1212", # Correct model name
"prompt": f"{request.prompt}, {request.style} style",
"n": 1,
"response_format": "url"
}
)
if response.status_code == 200:
data = response.json()
return {
"success": True,
"prompt": request.prompt,
"style": request.style,
"type": "image", # Note: video not available via API
"result": data,
"provider": "grok-xai",
"note": "Grok API supports image generation. Video generation is available only in xAI app."
}
else:
logger.error(f"Grok API error: {response.status_code} - {response.text}")
raise HTTPException(
status_code=response.status_code,
detail=f"Grok API error: {response.text}"
)
except httpx.TimeoutException:
raise HTTPException(status_code=504, detail="Image generation timeout (>120s)")
except Exception as e:
logger.error(f"Image generation error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@app.get("/video/status")
async def video_status():
"""Check Grok image/video generation service status"""
return {
"service": "grok-xai",
"api_key_configured": bool(GROK_API_KEY),
"capabilities": {
"image_generation": True, # grok-2-image-1212
"video_generation": False, # Not available via API (only in xAI app)
"vision_analysis": True # grok-2-vision-1212
},
"models": {
"image": "grok-2-image-1212",
"vision": "grok-2-vision-1212",
"chat": ["grok-3", "grok-3-mini", "grok-4-0709"]
},
"supported_styles": ["cinematic", "anime", "realistic", "abstract", "photorealistic"]
}
# ========== Multimodal Stack Summary ==========
@app.get("/multimodal")
@@ -1856,7 +2300,8 @@ async def get_multimodal_stack():
"stt": get_models_by_type("stt"),
"tts": get_models_by_type("tts"),
"embedding": get_models_by_type("embedding"),
"image_generation": get_models_by_type("image_generation")
"image_generation": get_models_by_type("image_generation"),
"video_generation": {"provider": "grok-xai", "available": bool(GROK_API_KEY)}
},
"active_models": {
"llm": swapper.active_model,