Files
microdao-daarion/scripts/docs/jupyter_sync.sh
2026-02-16 02:21:49 -08:00

154 lines
3.7 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
OUT_DIR="$ROOT_DIR/docs/consolidation/jupyter"
NOTEBOOKS_DIR="/Users/apple/notebooks"
DRY_RUN=1
APPLY=0
usage() {
cat <<'USAGE'
Usage:
bash scripts/docs/jupyter_sync.sh [--dry-run] [--apply] [--notebooks-dir PATH]
Behavior:
- default mode is --dry-run
- --apply writes markdown and csv outputs under docs/consolidation/jupyter
USAGE
}
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run)
DRY_RUN=1
APPLY=0
shift
;;
--apply)
APPLY=1
DRY_RUN=0
shift
;;
--notebooks-dir)
NOTEBOOKS_DIR="$2"
shift 2
;;
-h|--help)
usage
exit 0
;;
*)
echo "Unknown arg: $1" >&2
usage
exit 2
;;
esac
done
JUPYTER_CMD=""
if command -v jupyter >/dev/null 2>&1; then
JUPYTER_CMD="jupyter"
elif python3 -m jupyter --version >/dev/null 2>&1; then
JUPYTER_CMD="python3 -m jupyter"
fi
server_list=""
if [[ -n "$JUPYTER_CMD" ]]; then
server_list="$(eval "$JUPYTER_CMD server list" 2>/dev/null || true)"
fi
server_rows="$(echo "$server_list" | awk '/^http/ {print}' || true)"
server_count="$(echo "$server_rows" | sed '/^\s*$/d' | wc -l | tr -d ' ')"
ok_api=0
total_api=0
api_lines=""
if [[ "$server_count" -gt 0 ]]; then
while IFS= read -r line; do
[[ -z "$line" ]] && continue
url="$(echo "$line" | awk '{print $1}')"
token="$(echo "$url" | sed -n 's/.*token=\([^&]*\).*/\1/p')"
base="${url%%\?*}"
status_url="${base%/}/api/status"
if [[ -n "$token" ]]; then
probe_url="${status_url}?token=${token}"
else
probe_url="$status_url"
fi
code="$(curl -sS -o /tmp/jupyter_status.json -w '%{http_code}' "$probe_url" || true)"
total_api=$((total_api + 1))
if [[ "$code" == "200" ]]; then
ok_api=$((ok_api + 1))
fi
api_lines+="- ${probe_url} -> HTTP ${code}"$'\n'
done <<< "$server_rows"
fi
nb_count=0
if [[ -d "$NOTEBOOKS_DIR" ]]; then
nb_count="$(find "$NOTEBOOKS_DIR" -maxdepth 1 -type f -name '*.ipynb' | wc -l | tr -d ' ')"
fi
if [[ "$DRY_RUN" -eq 1 ]]; then
echo "[dry-run] jupyter_cmd: ${JUPYTER_CMD:-<not-found>}"
echo "[dry-run] server_count: $server_count"
echo "[dry-run] api_ok: $ok_api/$total_api"
echo "[dry-run] notebooks_dir: $NOTEBOOKS_DIR"
echo "[dry-run] notebooks_count: $nb_count"
if [[ -n "$api_lines" ]]; then
echo "[dry-run] api_probes:"
printf "%s" "$api_lines"
fi
exit 0
fi
mkdir -p "$OUT_DIR"
STAMP="$(date +%Y%m%d-%H%M%S)"
REPORT="$OUT_DIR/JUPYTER_SYNC_${STAMP}.md"
LATEST="$OUT_DIR/JUPYTER_SYNC_LATEST.md"
INDEX="$OUT_DIR/notebooks_index_${STAMP}.csv"
INDEX_LATEST="$OUT_DIR/notebooks_index_latest.csv"
{
echo "# Jupyter Sync Report"
echo
echo "Generated: $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
echo
echo "- jupyter_cmd: ${JUPYTER_CMD:-not-found}"
echo "- server_count: $server_count"
echo "- api_ok: $ok_api/$total_api"
echo "- notebooks_dir: $NOTEBOOKS_DIR"
echo "- notebooks_count: $nb_count"
echo
echo "## API Probes"
echo
if [[ -n "$api_lines" ]]; then
printf "%s" "$api_lines"
else
echo "- no active jupyter servers discovered"
fi
} > "$REPORT"
if [[ -d "$NOTEBOOKS_DIR" ]]; then
{
echo "path,size_bytes,mtime_epoch"
find "$NOTEBOOKS_DIR" -maxdepth 1 -type f -name '*.ipynb' -print0 | \
while IFS= read -r -d '' file; do
size="$(stat -f '%z' "$file")"
mtime="$(stat -f '%m' "$file")"
echo "\"$file\",$size,$mtime"
done
} > "$INDEX"
else
echo "path,size_bytes,mtime_epoch" > "$INDEX"
fi
cp "$REPORT" "$LATEST"
cp "$INDEX" "$INDEX_LATEST"
echo "Wrote: $REPORT"
echo "Updated: $LATEST"
echo "Wrote: $INDEX"
echo "Updated: $INDEX_LATEST"