New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
483 lines
17 KiB
Python
483 lines
17 KiB
Python
"""
|
|
DAARION Memory Service - Integrations
|
|
Obsidian та Google Drive інтеграції
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import json
|
|
import logging
|
|
import hashlib
|
|
import shutil
|
|
import io
|
|
from pathlib import Path
|
|
from typing import List, Dict, Set, Optional, Any, Tuple
|
|
from datetime import datetime
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ObsidianIntegrator:
|
|
"""Obsidian інтегратор для Memory Service"""
|
|
|
|
HOST_VAULT_MAPPINGS = {
|
|
'/vault/rd': '/Users/apple/Desktop/R&D',
|
|
'/vault/obsidian': '/Users/apple/Documents/Obsidian Vault',
|
|
}
|
|
|
|
def __init__(self, vault_path: str = None):
|
|
self.vault_path = self._resolve_vault_path(vault_path) if vault_path else None
|
|
self.notes_cache = {}
|
|
self.links_graph = {}
|
|
self.tags_index = {}
|
|
|
|
def _resolve_vault_path(self, path: str) -> Path:
|
|
"""Resolve vault path, handling Docker-to-host mappings"""
|
|
path_obj = Path(path)
|
|
if path_obj.exists():
|
|
return path_obj
|
|
if path in self.HOST_VAULT_MAPPINGS:
|
|
resolved = Path(self.HOST_VAULT_MAPPINGS[path])
|
|
if resolved.exists():
|
|
return resolved
|
|
return path_obj
|
|
|
|
def find_vault(self) -> Optional[Path]:
|
|
"""Автоматично знайти Obsidian vault"""
|
|
possible_paths = [
|
|
Path.home() / "Documents" / "Obsidian Vault",
|
|
Path.home() / "Documents" / "Notes",
|
|
Path.home() / "Desktop" / "Obsidian Vault",
|
|
Path.home() / "Obsidian",
|
|
Path.home() / "Notes",
|
|
]
|
|
|
|
documents_path = Path.home() / "Documents"
|
|
if documents_path.exists():
|
|
for item in documents_path.iterdir():
|
|
if item.is_dir() and (item / ".obsidian").exists():
|
|
possible_paths.append(item)
|
|
|
|
for path in possible_paths:
|
|
if path.exists() and (path / ".obsidian").exists():
|
|
logger.info(f"Found Obsidian vault at: {path}")
|
|
return path
|
|
|
|
return None
|
|
|
|
def set_vault_path(self, vault_path: str) -> bool:
|
|
resolved = self._resolve_vault_path(vault_path)
|
|
if not resolved.exists():
|
|
logger.error(f"Vault path does not exist: {vault_path} (resolved: {resolved})")
|
|
return False
|
|
|
|
if not (resolved / ".obsidian").exists():
|
|
logger.error(f"Not a valid Obsidian vault: {vault_path} (resolved: {resolved})")
|
|
return False
|
|
|
|
self.vault_path = resolved
|
|
logger.info(f"Vault path set to: {resolved}")
|
|
return True
|
|
|
|
def scan_vault(self) -> Dict[str, Any]:
|
|
if not self.vault_path:
|
|
return {}
|
|
|
|
stats = {
|
|
'total_notes': 0,
|
|
'total_attachments': 0,
|
|
'total_links': 0,
|
|
'total_tags': 0,
|
|
'folders': set(),
|
|
'file_types': {},
|
|
'notes': []
|
|
}
|
|
|
|
for file_path in self.vault_path.rglob('*'):
|
|
if file_path.is_file() and not file_path.name.startswith('.'):
|
|
suffix = file_path.suffix.lower()
|
|
stats['file_types'][suffix] = stats['file_types'].get(suffix, 0) + 1
|
|
|
|
relative_folder = file_path.parent.relative_to(self.vault_path)
|
|
if relative_folder != Path('.'):
|
|
stats['folders'].add(str(relative_folder))
|
|
|
|
if suffix == '.md':
|
|
note_data = self._parse_note(file_path)
|
|
if note_data:
|
|
self.notes_cache[file_path.stem] = note_data
|
|
stats['notes'].append(note_data)
|
|
stats['total_notes'] += 1
|
|
stats['total_links'] += len(note_data['links'])
|
|
stats['total_tags'] += len(note_data['tags'])
|
|
else:
|
|
stats['total_attachments'] += 1
|
|
|
|
self._build_links_graph()
|
|
self._build_tags_index()
|
|
|
|
stats['folders'] = list(stats['folders'])
|
|
return stats
|
|
|
|
def _parse_note(self, file_path: Path) -> Optional[Dict[str, Any]]:
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
note_data = {
|
|
'title': file_path.stem,
|
|
'path': str(file_path.relative_to(self.vault_path)),
|
|
'full_path': str(file_path),
|
|
'size': len(content),
|
|
'created': datetime.fromtimestamp(file_path.stat().st_ctime),
|
|
'modified': datetime.fromtimestamp(file_path.stat().st_mtime),
|
|
'content': content,
|
|
'content_hash': hashlib.md5(content.encode()).hexdigest(),
|
|
'frontmatter': {},
|
|
'headings': [],
|
|
'links': [],
|
|
'tags': [],
|
|
'backlinks': [],
|
|
'blocks': []
|
|
}
|
|
|
|
frontmatter_match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)
|
|
if frontmatter_match:
|
|
try:
|
|
import yaml
|
|
note_data['frontmatter'] = yaml.safe_load(frontmatter_match.group(1))
|
|
except Exception:
|
|
pass
|
|
|
|
headings = re.findall(r'^(#{1,6})\s+(.+)$', content, re.MULTILINE)
|
|
note_data['headings'] = [(len(h[0]), h[1].strip()) for h in headings]
|
|
|
|
internal_links = re.findall(r'\[\[([^\]]+)\]\]', content)
|
|
note_data['links'] = [link.split('|')[0].strip() for link in internal_links]
|
|
|
|
tags = re.findall(r'(?:^|\s)#([\w\-\/]+)', content)
|
|
note_data['tags'] = list(set(tags))
|
|
|
|
blocks = re.findall(r'\^([\w\-]+)', content)
|
|
note_data['blocks'] = blocks
|
|
|
|
return note_data
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error parsing note {file_path}: {e}")
|
|
return None
|
|
|
|
def _build_links_graph(self):
|
|
self.links_graph = {}
|
|
|
|
for note_title, note_data in self.notes_cache.items():
|
|
self.links_graph[note_title] = {
|
|
'outbound': note_data['links'],
|
|
'inbound': []
|
|
}
|
|
|
|
for note_title, note_data in self.notes_cache.items():
|
|
for linked_note in note_data['links']:
|
|
if linked_note in self.links_graph:
|
|
self.links_graph[linked_note]['inbound'].append(note_title)
|
|
if linked_note in self.notes_cache:
|
|
self.notes_cache[linked_note]['backlinks'].append(note_title)
|
|
|
|
def _build_tags_index(self):
|
|
self.tags_index = {}
|
|
|
|
for note_title, note_data in self.notes_cache.items():
|
|
for tag in note_data['tags']:
|
|
if tag not in self.tags_index:
|
|
self.tags_index[tag] = []
|
|
self.tags_index[tag].append(note_title)
|
|
|
|
def search_notes(self, query: str, search_content: bool = True) -> List[Dict[str, Any]]:
|
|
results = []
|
|
query_lower = query.lower()
|
|
|
|
for note_title, note_data in self.notes_cache.items():
|
|
match_score = 0
|
|
|
|
if query_lower in note_title.lower():
|
|
match_score += 10
|
|
|
|
for tag in note_data['tags']:
|
|
if query_lower in tag.lower():
|
|
match_score += 5
|
|
|
|
if search_content and query_lower in note_data['content'].lower():
|
|
match_score += 1
|
|
|
|
if match_score > 0:
|
|
result = note_data.copy()
|
|
result['match_score'] = match_score
|
|
results.append(result)
|
|
|
|
results.sort(key=lambda x: x['match_score'], reverse=True)
|
|
return results
|
|
|
|
def get_status(self) -> Dict[str, Any]:
|
|
return {
|
|
'available': True,
|
|
'vault_configured': self.vault_path is not None,
|
|
'vault_path': str(self.vault_path) if self.vault_path else None,
|
|
'notes_count': len(self.notes_cache),
|
|
'tags_count': len(self.tags_index)
|
|
}
|
|
|
|
|
|
class GoogleDriveIntegrator:
|
|
"""Google Drive інтегратор для Memory Service"""
|
|
|
|
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
|
|
CREDENTIALS_DIR = Path.home() / '.daarion'
|
|
CREDENTIALS_FILE = CREDENTIALS_DIR / 'google_credentials.json'
|
|
TOKEN_FILE = CREDENTIALS_DIR / 'google_token.json'
|
|
|
|
SUPPORTED_MIMETYPES = {
|
|
'application/vnd.google-apps.document': 'text/plain',
|
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': None,
|
|
'application/pdf': None,
|
|
'text/plain': None,
|
|
'text/markdown': None,
|
|
'application/vnd.google-apps.spreadsheet': 'text/csv',
|
|
}
|
|
|
|
def __init__(self):
|
|
self.service = None
|
|
self.CREDENTIALS_DIR.mkdir(exist_ok=True)
|
|
|
|
def authenticate(self) -> bool:
|
|
try:
|
|
from googleapiclient.discovery import build
|
|
from google_auth_oauthlib.flow import InstalledAppFlow
|
|
from google.auth.transport.requests import Request
|
|
from google.oauth2.credentials import Credentials
|
|
except ImportError:
|
|
logger.warning("Google API libraries not installed")
|
|
return False
|
|
|
|
creds = None
|
|
|
|
if self.TOKEN_FILE.exists():
|
|
creds = Credentials.from_authorized_user_file(str(self.TOKEN_FILE), self.SCOPES)
|
|
|
|
if not creds or not creds.valid:
|
|
if creds and creds.expired and creds.refresh_token:
|
|
creds.refresh(Request())
|
|
else:
|
|
client_secrets = self.CREDENTIALS_DIR / 'client_secrets.json'
|
|
if not client_secrets.exists():
|
|
logger.warning("Google client_secrets.json not found")
|
|
return False
|
|
|
|
flow = InstalledAppFlow.from_client_secrets_file(
|
|
str(client_secrets), self.SCOPES)
|
|
creds = flow.run_local_server(port=0)
|
|
|
|
with open(self.TOKEN_FILE, 'w') as token:
|
|
token.write(creds.to_json())
|
|
|
|
self.service = build('drive', 'v3', credentials=creds)
|
|
logger.info("Google Drive API authenticated")
|
|
return True
|
|
|
|
def list_files(self, folder_id: Optional[str] = None,
|
|
max_results: int = 100) -> List[Dict]:
|
|
if not self.service:
|
|
if not self.authenticate():
|
|
return []
|
|
|
|
search_query = []
|
|
|
|
if folder_id:
|
|
search_query.append(f"'{folder_id}' in parents")
|
|
|
|
mime_conditions = []
|
|
for mime_type in self.SUPPORTED_MIMETYPES.keys():
|
|
mime_conditions.append(f"mimeType='{mime_type}'")
|
|
|
|
if mime_conditions:
|
|
search_query.append(f"({' or '.join(mime_conditions)})")
|
|
|
|
search_query.extend([
|
|
"trashed=false",
|
|
"mimeType!='application/vnd.google-apps.folder'"
|
|
])
|
|
|
|
final_query = ' and '.join(search_query)
|
|
|
|
try:
|
|
results = self.service.files().list(
|
|
q=final_query,
|
|
pageSize=max_results,
|
|
fields="nextPageToken, files(id, name, mimeType, size, createdTime, modifiedTime, parents, webViewLink)"
|
|
).execute()
|
|
|
|
return results.get('files', [])
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error listing Google Drive files: {e}")
|
|
return []
|
|
|
|
def download_file(self, file_id: str, mime_type: str) -> Optional[str]:
|
|
if not self.service:
|
|
return None
|
|
|
|
try:
|
|
from googleapiclient.http import MediaIoBaseDownload
|
|
|
|
export_mime_type = self.SUPPORTED_MIMETYPES.get(mime_type)
|
|
|
|
if export_mime_type:
|
|
request = self.service.files().export_media(
|
|
fileId=file_id,
|
|
mimeType=export_mime_type
|
|
)
|
|
else:
|
|
request = self.service.files().get_media(fileId=file_id)
|
|
|
|
file_io = io.BytesIO()
|
|
downloader = MediaIoBaseDownload(file_io, request)
|
|
|
|
done = False
|
|
while done is False:
|
|
status, done = downloader.next_chunk()
|
|
|
|
content = file_io.getvalue()
|
|
|
|
for encoding in ['utf-8', 'utf-16', 'latin-1']:
|
|
try:
|
|
return content.decode(encoding)
|
|
except UnicodeDecodeError:
|
|
continue
|
|
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error downloading file {file_id}: {e}")
|
|
return None
|
|
|
|
def get_status(self) -> Dict[str, Any]:
|
|
available = False
|
|
authenticated = False
|
|
|
|
try:
|
|
from googleapiclient.discovery import build
|
|
from google.oauth2.credentials import Credentials
|
|
available = True
|
|
|
|
if self.TOKEN_FILE.exists():
|
|
authenticated = True
|
|
except ImportError:
|
|
pass
|
|
|
|
return {
|
|
'available': available,
|
|
'authenticated': authenticated,
|
|
'credentials_configured': self.CREDENTIALS_DIR.exists()
|
|
}
|
|
|
|
def sync_to_daarion(self, output_dir: Path,
|
|
folder_ids: List[str] = None,
|
|
file_extensions: List[str] = None) -> Dict[str, Any]:
|
|
"""Sync files from Google Drive to DAARION"""
|
|
stats = {
|
|
'total_files': 0,
|
|
'downloaded': 0,
|
|
'errors': 0,
|
|
'skipped': 0,
|
|
'files': []
|
|
}
|
|
|
|
all_files = []
|
|
if folder_ids:
|
|
for folder_id in folder_ids:
|
|
files = self.list_files(folder_id=folder_id)
|
|
all_files.extend(files)
|
|
else:
|
|
all_files = self.list_files()
|
|
|
|
stats['total_files'] = len(all_files)
|
|
|
|
for file_data in all_files:
|
|
file_id = file_data['id']
|
|
file_name = file_data['name']
|
|
mime_type = file_data['mimeType']
|
|
|
|
if file_extensions:
|
|
file_ext = Path(file_name).suffix.lower()
|
|
if file_ext not in file_extensions:
|
|
stats['skipped'] += 1
|
|
continue
|
|
|
|
content = self.download_file(file_id, mime_type)
|
|
|
|
if content:
|
|
safe_filename = "".join(c for c in file_name if c.isalnum() or c in (' ', '-', '_', '.')).rstrip()
|
|
file_path = output_dir / f"gdrive_{file_id}_{safe_filename}.txt"
|
|
|
|
try:
|
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
f.write(f"# Google Drive: {file_name}\n")
|
|
f.write(f"Source: {file_data.get('webViewLink', 'N/A')}\n")
|
|
f.write(f"Modified: {file_data.get('modifiedTime', 'N/A')}\n")
|
|
f.write(f"MIME Type: {mime_type}\n\n")
|
|
f.write("---\n\n")
|
|
f.write(content)
|
|
|
|
stats['downloaded'] += 1
|
|
stats['files'].append({
|
|
'original_name': file_name,
|
|
'saved_path': str(file_path),
|
|
'file_id': file_id,
|
|
'size': len(content),
|
|
'url': file_data.get('webViewLink')
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error saving {file_name}: {e}")
|
|
stats['errors'] += 1
|
|
else:
|
|
stats['errors'] += 1
|
|
|
|
return stats
|
|
|
|
def get_folder_structure(self, folder_id: str = None, level: int = 0) -> Dict:
|
|
"""Get Google Drive folder structure"""
|
|
if not self.service:
|
|
if not self.authenticate():
|
|
return {}
|
|
|
|
try:
|
|
query = "mimeType='application/vnd.google-apps.folder' and trashed=false"
|
|
if folder_id:
|
|
query += f" and '{folder_id}' in parents"
|
|
|
|
results = self.service.files().list(
|
|
q=query,
|
|
fields="files(id, name, parents)"
|
|
).execute()
|
|
|
|
folders = results.get('files', [])
|
|
structure = {}
|
|
|
|
for folder in folders:
|
|
folder_name = folder['name']
|
|
fid = folder['id']
|
|
structure[folder_name] = {
|
|
'id': fid,
|
|
'subfolders': self.get_folder_structure(fid, level + 1) if level < 3 else {}
|
|
}
|
|
|
|
return structure
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting folder structure: {e}")
|
|
return {}
|
|
|
|
|
|
obsidian_integrator = ObsidianIntegrator()
|
|
gdrive_integrator = GoogleDriveIntegrator()
|