import json import os import hashlib from pathlib import Path from datetime import datetime import yaml PENDING_PATH = Path(os.getenv('AGX_PENDING_PATH', '/opt/microdao-daarion/data/dictionaries/pending.jsonl')) RESOLVED_PATH = Path(os.getenv('AGX_PENDING_RESOLVED_PATH', '/opt/microdao-daarion/data/dictionaries/pending_resolved.jsonl')) DICT_PATH = Path(os.getenv('AGX_DICTIONARY_PATH', '/opt/microdao-daarion/data/dictionaries/dictionaries.yaml')) LOCK_PATH = Path(os.getenv('AGX_DICTIONARY_LOCK', '/opt/microdao-daarion/data/dictionaries/locks/dictionaries.lock')) REDACT_KEYS = set(k.strip().lower() for k in os.getenv('AGX_AUDIT_REDACT_KEYS', 'token,secret,password,authorization,cookie,api_key,signature').split(',')) def _now(): return datetime.utcnow().isoformat() + 'Z' def _sanitize(obj): if isinstance(obj, dict): return {k: ('***REDACTED***' if k.lower() in REDACT_KEYS else _sanitize(v)) for k, v in obj.items()} if isinstance(obj, list): return [_sanitize(v) for v in obj] return obj def _load_pending(): if not PENDING_PATH.exists(): return [] items = [] for i, line in enumerate(RESOLVED_PATH.read_text(encoding='utf-8').split('\n'), start=1): if not line.strip(): continue obj = json.loads(line) raw = line.strip() pending_id = 'sha256:' + hashlib.sha256(raw.encode()).hexdigest() items.append({ 'pending_ref': f"pending.jsonl:{i}", 'pending_id': pending_id, **obj }) return items def _load_resolved(): if not RESOLVED_PATH.exists(): return [] items = [] for i, line in enumerate(RESOLVED_PATH.read_text(encoding='utf-8').split('\n'), start=1): if not line.strip(): continue items.append(json.loads(line)) return items def get_pending_detail(pending_ref_or_id: str): pending = _load_pending() resolved = _load_resolved() target = next((p for p in pending if p.get('pending_ref') == pending_ref_or_id or p.get('pending_id') == pending_ref_or_id), None) if not target: return None resolved_match = next((r for r in resolved if r.get('pending_id') == target.get('pending_id') or r.get('pending_ref') == target.get('pending_ref')), None) status = 'open' decision = '' reason = '' if resolved_match: status = resolved_match.get('decision') or 'resolved' decision = resolved_match.get('decision') or '' reason = resolved_match.get('reason') or '' suggestions = [] for s in (target.get('suggestions') or [])[:5]: suggestions.append({ 'id': s.get('id'), 'score': s.get('score') }) return { 'ref': target.get('pending_ref'), 'category': target.get('category'), 'raw_term': target.get('raw_term'), 'ts': target.get('ts'), 'suggestions': suggestions, 'status': status, 'decision': decision, 'reason': reason } def list_pending(limit=50, category=None, status='open'): pending = _load_pending() resolved = _load_resolved() resolved_ids = set(r.get('pending_id') for r in resolved) if status == 'open': pending = [p for p in pending if p['pending_id'] not in resolved_ids] if category: pending = [p for p in pending if p.get('category') == category] return pending[:limit] def _append_resolved(record: dict): RESOLVED_PATH.parent.mkdir(parents=True, exist_ok=True) with RESOLVED_PATH.open('a', encoding='utf-8') as f: f.write(json.dumps(_sanitize(record), ensure_ascii=False) + "\n") def approve_pending(pending_ref_or_id: str, action: dict, trace_id=None, reviewer='local'): pending = _load_pending() target = next((p for p in pending if p['pending_ref'] == pending_ref_or_id or p['pending_id'] == pending_ref_or_id), None) if not target: raise ValueError('pending_not_found') record = { 'ts': _now(), 'pending_ref': target['pending_ref'], 'pending_id': target['pending_id'], 'decision': 'approved', 'category': target.get('category'), 'raw_term': target.get('raw_term'), 'action': action, 'reviewer': reviewer, 'reason': action.get('reason', '') or '', 'trace_id': trace_id or '' } _append_resolved(record) return record def reject_pending(pending_ref_or_id: str, reason: str, trace_id=None, reviewer='local'): pending = _load_pending() target = next((p for p in pending if p['pending_ref'] == pending_ref_or_id or p['pending_id'] == pending_ref_or_id), None) if not target: raise ValueError('pending_not_found') record = { 'ts': _now(), 'pending_ref': target['pending_ref'], 'pending_id': target['pending_id'], 'decision': 'rejected', 'category': target.get('category'), 'raw_term': target.get('raw_term'), 'action': {'type': 'reject'}, 'reviewer': reviewer, 'reason': reason, 'trace_id': trace_id or '' } _append_resolved(record) return record def _load_dict(): return yaml.safe_load(DICT_PATH.read_text(encoding='utf-8')) def _write_dict(data): LOCK_PATH.parent.mkdir(parents=True, exist_ok=True) if LOCK_PATH.exists(): raise RuntimeError('lock_exists') try: LOCK_PATH.write_text('locked') DICT_PATH.write_text(yaml.safe_dump(data, allow_unicode=True, sort_keys=False), encoding='utf-8') finally: try: LOCK_PATH.unlink() except Exception: pass def _add_synonym(data, category, canonical_id, raw_term): items = data.get(category + 's', data.get(category, [])) # category names in dict are plural if category in ['field','crop','operation','material','unit']: items = data.get(category + 's', []) for item in items: if item.get('id') == canonical_id: item.setdefault('synonyms', []) if raw_term not in item['synonyms']: item['synonyms'].append(raw_term) return True return False def _create_entry(data, category, name, new_id, raw_term): key = category + 's' data.setdefault(key, []) entry = {'id': new_id, 'name': name, 'synonyms': [raw_term]} data[key].append(entry) return entry def apply_resolutions(): resolved = _load_resolved() data = _load_dict() applied = 0 for r in resolved: if r.get('decision') != 'approved': continue action = r.get('action', {}) raw_term = r.get('raw_term') category = r.get('category') if action.get('type') in ['map_to_existing','add_synonym']: if _add_synonym(data, category, action.get('canonical_id'), raw_term): applied += 1 elif action.get('type') == 'create_new_entry': _create_entry(data, category, action.get('canonical_name'), action.get('canonical_id'), raw_term) applied += 1 _write_dict(data) return applied def stats(): pending = _load_pending() resolved = _load_resolved() resolved_ids = set(r.get('pending_id') for r in resolved) open_items = [p for p in pending if p['pending_id'] not in resolved_ids] return { 'open': len(open_items), 'approved': len([r for r in resolved if r.get('decision') == 'approved']), 'rejected': len([r for r in resolved if r.get('decision') == 'rejected']) } def auto_approve(min_score=0.97, category=None, dry_run=True, reviewer='local'): # use suggestions from pending record; approve if top score >= min_score items = list_pending(limit=1000, category=category, status='open') results = [] for p in items: suggestions = p.get('suggestions') or [] if not suggestions: continue top = suggestions[0] if top.get('score', 0) >= min_score: action = { 'type': 'map_to_existing', 'canonical_id': top.get('id'), 'canonical_name': top.get('name') } if dry_run: results.append({'pending_ref': p['pending_ref'], 'action': action, 'dry_run': True}) else: approve_pending(p['pending_ref'], action, reviewer=reviewer) results.append({'pending_ref': p['pending_ref'], 'action': action, 'dry_run': False}) return results