snapshot: NODE1 production state 2026-02-09

Complete snapshot of /opt/microdao-daarion/ from NODE1 (144.76.224.179). This represents the actual running production code that has diverged significantly from the previous main branch. Key changes from old main: - Gateway (http_api.py): expanded from ~40KB to 164KB with full agent support - Router: new /v1/agents/{id}/infer endpoint with vision + DeepSeek routing - Behavior Policy: SOWA v2.2 (3-level: FULL/ACK/SILENT) - Agent Registry: config/agent_registry.yml as single source of truth - 13 agents configured (was 3) - Memory service integration - CrewAI teams and roles Excluded from snapshot: venv/, .env, data/, backups, .tgz archives Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-09 08:46:46 -08:00
parent 134c044c21
commit ef3473db21
9473 changed files with 408933 additions and 2769877 deletions
--- a/gateway-bot/services/doc_service.py
+++ b/gateway-bot/services/doc_service.py
@@ -209,7 +209,7 @@ class DocumentService:
                doc_content = doc_response.content
            
            # Send directly to Swapper /document endpoint
-            async with httpx.AsyncClient(timeout=120.0) as client:
+            async with httpx.AsyncClient(timeout=30.0) as client:
                # Map output_mode: qa_pairs -> text (Swapper doesn't support qa_pairs directly)
                swapper_mode = "markdown" if output_mode in ["qa_pairs", "markdown"] else "text"
                
@@ -287,7 +287,49 @@ class DocumentService:
            )
            
        except Exception as e:
-            logger.error(f"Document parsing failed: {e}", exc_info=True)
+            logger.error(f"Document parsing via Swapper failed: {e}")
+            
+            # === FALLBACK: Try PyPDF2 for PDF files ===
+            if file_name and file_name.lower().endswith(".pdf"):
+                try:
+                    logger.info(f"Fallback: parsing PDF with PyPDF2: {file_name}")
+                    import io
+                    import PyPDF2
+                    
+                    reader = PyPDF2.PdfReader(io.BytesIO(doc_content))
+                    parsed_text = ""
+                    for page in reader.pages:
+                        text = page.extract_text() or ""
+                        parsed_text += text + "\n"
+                    parsed_text = parsed_text.strip()
+                    
+                    if len(parsed_text) > 30:
+                        logger.info(f"PyPDF2 fallback success: {len(parsed_text)} chars from {len(reader.pages)} pages")
+                        doc_id = hashlib.md5(f"{file_name}:{datetime.utcnow().isoformat()}".encode()).hexdigest()[:12]
+                        
+                        await self.save_doc_context(
+                            session_id=session_id,
+                            doc_id=doc_id,
+                            doc_url=doc_url,
+                            file_name=file_name,
+                            dao_id=dao_id
+                        )
+                        
+                        return ParsedResult(
+                            success=True,
+                            doc_id=doc_id,
+                            qa_pairs=None,
+                            markdown=parsed_text,
+                            chunks_meta=None,
+                            raw={"model": "PyPDF2-fallback", "pages": len(reader.pages)},
+                            error=None
+                        )
+                    else:
+                        logger.warning(f"PyPDF2 fallback: too little text ({len(parsed_text)} chars)")
+                except Exception as pdf_err:
+                    logger.error(f"PyPDF2 fallback also failed: {pdf_err}")
+            # === END FALLBACK ===
+            
            return ParsedResult(
                success=False,
                error=str(e)