## Agents Added - Alateya: R&D, biotech, innovations - Clan (Spirit): Community spirit agent - Eonarch: Consciousness evolution agent ## Changes - docker-compose.node1.yml: Added tokens for all 3 new agents - gateway-bot/http_api.py: Added configs and webhook endpoints - gateway-bot/clan_prompt.txt: New prompt file - gateway-bot/eonarch_prompt.txt: New prompt file ## Fixes - Fixed ROUTER_URL from :9102 to :8000 (internal container port) - All 9 Telegram agents now working ## Documentation - Created PROJECT-MASTER-INDEX.md - single entry point - Added various status documents and scripts Tokens configured: - Helion, NUTRA, Agromatrix (existing) - Alateya, Clan, Eonarch (new) - Druid, GreenFood, DAARWIZZ (configured)
310 lines
9.1 KiB
Python
310 lines
9.1 KiB
Python
"""
|
|
Brand Intake Service
|
|
- Detects and attributes brand identity from inputs
|
|
- Stores sources and snapshots (MVP file-based)
|
|
"""
|
|
|
|
from fastapi import FastAPI, HTTPException
|
|
from pydantic import BaseModel
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
from datetime import datetime
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import uuid
|
|
from pathlib import Path
|
|
|
|
import yaml
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
DATA_DIR = Path(os.getenv("BRAND_INTAKE_DATA", "/data/brand-intake"))
|
|
BRAND_MAP_PATH = Path(os.getenv("BRAND_MAP_PATH", "/app/config/BrandMap.yaml"))
|
|
|
|
app = FastAPI(
|
|
title="Brand Intake Service",
|
|
description="Detects, attributes and stores brand sources",
|
|
version="0.1.0"
|
|
)
|
|
|
|
|
|
class IntakeRequest(BaseModel):
|
|
source_type: str
|
|
text: Optional[str] = None
|
|
url: Optional[str] = None
|
|
filename: Optional[str] = None
|
|
raw_ref: Optional[str] = None
|
|
mime_type: Optional[str] = None
|
|
agent_id: Optional[str] = None
|
|
workspace_id: Optional[str] = None
|
|
project_id: Optional[str] = None
|
|
tags: Optional[List[str]] = None
|
|
|
|
|
|
class IntakeResponse(BaseModel):
|
|
id: str
|
|
attribution: Dict[str, Any]
|
|
status: str
|
|
created_at: str
|
|
|
|
|
|
class BrandMap:
|
|
def __init__(self, data: Dict[str, Any]):
|
|
self.data = data
|
|
self.defaults = data.get("defaults", {})
|
|
self.brands = data.get("brands", [])
|
|
|
|
@property
|
|
def min_confidence(self) -> float:
|
|
return float(self.defaults.get("min_confidence", 0.72))
|
|
|
|
@property
|
|
def min_confidence_context_override(self) -> float:
|
|
return float(self.defaults.get("min_confidence_context_override", 0.55))
|
|
|
|
@property
|
|
def weights(self) -> Dict[str, float]:
|
|
return self.defaults.get("weights", {})
|
|
|
|
|
|
BRAND_MAP: Optional[BrandMap] = None
|
|
|
|
|
|
def load_brand_map() -> BrandMap:
|
|
global BRAND_MAP
|
|
if not BRAND_MAP_PATH.exists():
|
|
raise FileNotFoundError(f"BrandMap not found: {BRAND_MAP_PATH}")
|
|
data = yaml.safe_load(BRAND_MAP_PATH.read_text(encoding="utf-8"))
|
|
BRAND_MAP = BrandMap(data)
|
|
return BRAND_MAP
|
|
|
|
|
|
def _ensure_dirs() -> None:
|
|
(DATA_DIR / "sources").mkdir(parents=True, exist_ok=True)
|
|
(DATA_DIR / "snapshots").mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
def _norm(text: str) -> str:
|
|
return re.sub(r"\s+", " ", text.strip().lower())
|
|
|
|
|
|
def _match_any(text: str, patterns: List[str]) -> List[str]:
|
|
found = []
|
|
if not text:
|
|
return found
|
|
text_norm = _norm(text)
|
|
for p in patterns:
|
|
if not p:
|
|
continue
|
|
if _norm(p) in text_norm:
|
|
found.append(p)
|
|
return found
|
|
|
|
|
|
def _domain_from_url(url: str) -> str:
|
|
try:
|
|
return re.sub(r"^www\.", "", re.split(r"/|:\/\/", url)[-1].split("/")[0])
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
def _score_brand(brand: Dict[str, Any], req: IntakeRequest, weights: Dict[str, float]) -> Tuple[float, List[str], bool]:
|
|
score = 0.0
|
|
reasons: List[str] = []
|
|
has_context_match = False
|
|
|
|
text_blob = " ".join(filter(None, [req.text, req.filename, req.url]))
|
|
text_blob_norm = _norm(text_blob)
|
|
|
|
domains = brand.get("domains", [])
|
|
aliases = brand.get("aliases", [])
|
|
keywords = brand.get("keywords", [])
|
|
|
|
if req.url:
|
|
url_lower = req.url.lower()
|
|
for d in domains:
|
|
if d and d.lower() in url_lower:
|
|
score += weights.get("domain_match", 0)
|
|
reasons.append(f"domain:{d}")
|
|
break
|
|
|
|
alias_hits = _match_any(text_blob, aliases)
|
|
if alias_hits:
|
|
score += weights.get("alias_match", 0)
|
|
reasons.append("alias")
|
|
|
|
keyword_hits = _match_any(text_blob, keywords)
|
|
if keyword_hits:
|
|
score += weights.get("keyword_match", 0)
|
|
reasons.append("keyword")
|
|
|
|
# Attachment hint: filename mentions alias or keyword
|
|
if req.filename and (alias_hits or keyword_hits):
|
|
score += weights.get("attachment_hint", 0)
|
|
reasons.append("attachment_hint")
|
|
|
|
# Context rules
|
|
for rule in brand.get("context_rules", []):
|
|
if rule.get("type") == "agent_id" and req.agent_id:
|
|
if _norm(rule.get("value", "")) == _norm(req.agent_id):
|
|
score += weights.get("context_match", 0)
|
|
reasons.append("context:agent_id")
|
|
has_context_match = True
|
|
if rule.get("type") == "workspace_id" and req.workspace_id:
|
|
if _norm(rule.get("value", "")) == _norm(req.workspace_id):
|
|
score += weights.get("context_match", 0)
|
|
reasons.append("context:workspace_id")
|
|
has_context_match = True
|
|
|
|
return min(score, 1.0), reasons, has_context_match
|
|
|
|
|
|
def _attribute(req: IntakeRequest) -> Dict[str, Any]:
|
|
if BRAND_MAP is None:
|
|
load_brand_map()
|
|
assert BRAND_MAP is not None
|
|
|
|
candidates = []
|
|
context_override = False
|
|
for brand in BRAND_MAP.brands:
|
|
score, reasons, has_context = _score_brand(brand, req, BRAND_MAP.weights)
|
|
if score > 0:
|
|
candidates.append({
|
|
"brand_id": brand.get("brand_id"),
|
|
"score": round(score, 3),
|
|
"reasons": reasons
|
|
})
|
|
if has_context and score >= BRAND_MAP.min_confidence_context_override:
|
|
context_override = True
|
|
|
|
candidates.sort(key=lambda x: x["score"], reverse=True)
|
|
top = candidates[0] if candidates else None
|
|
|
|
status = "unattributed"
|
|
brand_id = None
|
|
confidence = 0.0
|
|
|
|
if top:
|
|
confidence = float(top["score"])
|
|
if confidence >= BRAND_MAP.min_confidence or context_override:
|
|
status = "attributed"
|
|
brand_id = top["brand_id"]
|
|
else:
|
|
status = "needs_review"
|
|
|
|
return {
|
|
"status": status,
|
|
"brand_id": brand_id,
|
|
"confidence": confidence,
|
|
"candidates": candidates
|
|
}
|
|
|
|
|
|
@app.get("/")
|
|
async def root() -> Dict[str, Any]:
|
|
_ensure_dirs()
|
|
return {
|
|
"service": "brand-intake",
|
|
"status": "running",
|
|
"brand_map": str(BRAND_MAP_PATH),
|
|
"version": "0.1.0"
|
|
}
|
|
|
|
|
|
@app.get("/health")
|
|
async def health() -> Dict[str, Any]:
|
|
return {"status": "healthy"}
|
|
|
|
|
|
@app.post("/brand/intake", response_model=IntakeResponse)
|
|
async def brand_intake(req: IntakeRequest) -> IntakeResponse:
|
|
_ensure_dirs()
|
|
if req.source_type not in {"url", "text", "file", "figma", "drive", "notion"}:
|
|
raise HTTPException(status_code=400, detail="Unsupported source_type")
|
|
|
|
attribution = _attribute(req)
|
|
source_id = uuid.uuid4().hex
|
|
created_at = datetime.utcnow().isoformat() + "Z"
|
|
|
|
source_doc = {
|
|
"id": source_id,
|
|
"created_at": created_at,
|
|
"created_by": "brand-intake",
|
|
"workspace_id": req.workspace_id,
|
|
"project_id": req.project_id,
|
|
"agent_id": req.agent_id,
|
|
"source_type": req.source_type,
|
|
"payload": {
|
|
"raw_ref": req.raw_ref or req.url or req.text or "",
|
|
"mime_type": req.mime_type,
|
|
"filename": req.filename,
|
|
"url": req.url,
|
|
"text_excerpt": (req.text or "")[:2000]
|
|
},
|
|
"attribution": attribution,
|
|
"tags": req.tags or []
|
|
}
|
|
|
|
(DATA_DIR / "sources" / f"{source_id}.json").write_text(
|
|
json.dumps(source_doc, ensure_ascii=False, indent=2),
|
|
encoding="utf-8"
|
|
)
|
|
|
|
snapshot_id = uuid.uuid4().hex
|
|
snapshot_doc = {
|
|
"id": snapshot_id,
|
|
"created_at": created_at,
|
|
"brand_id": attribution.get("brand_id") or "unattributed",
|
|
"source_id": source_id,
|
|
"quality": {
|
|
"confidence": attribution.get("confidence", 0.0),
|
|
"warnings": ["extraction_not_implemented"],
|
|
"needs_review": attribution.get("status") != "attributed"
|
|
},
|
|
"extracted": {
|
|
"palette": {},
|
|
"typography": {},
|
|
"logos": [],
|
|
"web_tokens": {},
|
|
"documents": {},
|
|
"licensing": {}
|
|
}
|
|
}
|
|
(DATA_DIR / "snapshots" / f"{snapshot_id}.json").write_text(
|
|
json.dumps(snapshot_doc, ensure_ascii=False, indent=2),
|
|
encoding="utf-8"
|
|
)
|
|
|
|
return IntakeResponse(
|
|
id=source_id,
|
|
attribution=attribution,
|
|
status=attribution.get("status", "unattributed"),
|
|
created_at=created_at
|
|
)
|
|
|
|
|
|
@app.get("/brand/sources/{source_id}")
|
|
async def get_source(source_id: str) -> Dict[str, Any]:
|
|
path = DATA_DIR / "sources" / f"{source_id}.json"
|
|
if not path.exists():
|
|
raise HTTPException(status_code=404, detail="Source not found")
|
|
return json.loads(path.read_text(encoding="utf-8"))
|
|
|
|
|
|
@app.get("/brand/brands/{brand_id}/latest")
|
|
async def latest_brand_snapshot(brand_id: str) -> Dict[str, Any]:
|
|
snapshot_dir = DATA_DIR / "snapshots"
|
|
if not snapshot_dir.exists():
|
|
raise HTTPException(status_code=404, detail="No snapshots")
|
|
candidates = []
|
|
for path in snapshot_dir.glob("*.json"):
|
|
data = json.loads(path.read_text(encoding="utf-8"))
|
|
if data.get("brand_id") == brand_id:
|
|
candidates.append(data)
|
|
if not candidates:
|
|
raise HTTPException(status_code=404, detail="No snapshots for brand")
|
|
candidates.sort(key=lambda x: x.get("created_at", ""))
|
|
return candidates[-1]
|