Scaffold SimpleX Orchestrate: supervisor over official binaries

A standalone control-plane app that spawns and drives the official SimpleX
binaries (never modifies simplex source). Validated against simplex-chat
built from source (stable v6.5.4, GHC 9.6.3).

- CLAUDE.md: architecture notes mined from the upstream docs (WebSocket bot API,
  per-profile DBs, directory/broadcast bot config)
- supervisor/: process registry + port allocation (supervisor.py), corrId/cmd<->resp
  WebSocket client (ws_client.py), binary locator (binaries.py), FastAPI front with
  REST control + /events stream (server.py)
- smoke_test.py: Pattern-1 handshake (spawn simplex-chat -p, create+read user) — PASS
- group_test.py: two accounts, invitation connect + group invite/join, verified
  membership over the real SMP network — PASS
- build_chat.sh / install_ghc.sh: reproducible toolchain + from-source build

Key finding: fresh DB prompts for a display name on stdin; spawn with
--create-bot-display-name to start the WebSocket server non-interactively.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Jon
2026-06-04 12:31:37 +01:00
commit 38ff96c576
12 changed files with 704 additions and 0 deletions

9
supervisor/__init__.py Normal file
View File

@@ -0,0 +1,9 @@
"""SimpleX Orchestrate supervisor — spawns and drives the official SimpleX binaries.
It never modifies or rebuilds simplex-chat; it only invokes prebuilt binaries.
"""
from .supervisor import Managed, Supervisor
from .ws_client import SimplexWSClient
__all__ = ["Supervisor", "Managed", "SimplexWSClient"]

24
supervisor/binaries.py Normal file
View File

@@ -0,0 +1,24 @@
"""Locate the official SimpleX binaries — never built/modified here, only invoked.
Resolution order: $SIMPLEX_BIN dir, then ./bin/, then PATH. Place prebuilt
binaries (simplex-chat, simplex-directory-service, simplex-broadcast-bot) in ./bin/
or install via the upstream install script. We never compile or alter them.
"""
import os
import shutil
from pathlib import Path
BIN_DIR = Path(os.environ.get("SIMPLEX_BIN", Path(__file__).resolve().parent.parent / "bin"))
KNOWN = ("simplex-chat", "simplex-directory-service", "simplex-broadcast-bot")
def binary_path(name: str) -> str:
local = BIN_DIR / name
if local.exists():
return str(local)
found = shutil.which(name)
if found:
return found
raise FileNotFoundError(f"{name!r} not found in {BIN_DIR} or PATH")

90
supervisor/server.py Normal file
View File

@@ -0,0 +1,90 @@
"""Front-facing API the website talks to.
The browser:
- opens a WebSocket to /events to receive live chat events from every profile
(each event is tagged with the originating profile name);
- uses REST to spawn/stop profiles and to send commands to cli profiles.
This process sits between the website and the SimpleX binaries; it is the only
thing that touches the binaries. Run: uvicorn supervisor.server:app
"""
import asyncio
import contextlib
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from .supervisor import Supervisor
app = FastAPI(title="SimpleX Orchestrate")
_browser_clients: set[WebSocket] = set()
async def _broadcast(name: str, event: dict) -> None:
dead = []
for ws in _browser_clients:
try:
await ws.send_json({"profile": name, "event": event})
except Exception:
dead.append(ws)
for ws in dead:
_browser_clients.discard(ws)
sup = Supervisor(on_event=_broadcast)
@app.on_event("shutdown")
async def _shutdown() -> None:
await sup.stop_all()
# ── Control (REST) ──────────────────────────────────────────────────────────────
@app.get("/profiles")
async def profiles() -> dict:
return {"profiles": sup.list()}
@app.post("/profiles/{name}/start-cli")
async def start_cli(name: str) -> dict:
await sup.start_cli(name)
return {"ok": True}
@app.post("/profiles/{name}/start-directory")
async def start_directory(name: str, body: dict) -> dict:
await sup.start_directory(name, body["super_users"], body["web_folder"])
return {"ok": True}
@app.post("/profiles/{name}/start-broadcast")
async def start_broadcast(name: str, body: dict) -> dict:
await sup.start_broadcast(name, body["display_name"], body["publishers"])
return {"ok": True}
@app.post("/profiles/{name}/cmd")
async def cmd(name: str, body: dict) -> dict:
"""Send a raw chat command string to a cli profile (e.g. '/_send @1 text hi')."""
return {"resp": await sup.send(name, body["cmd"])}
@app.post("/profiles/{name}/stop")
async def stop(name: str) -> dict:
await sup.stop(name)
return {"ok": True}
# ── Live events (WebSocket) ──────────────────────────────────────────────────────
@app.websocket("/events")
async def events(ws: WebSocket) -> None:
await ws.accept()
_browser_clients.add(ws)
try:
while True:
await ws.receive_text() # keep the socket open; ignore inbound
except WebSocketDisconnect:
pass
finally:
_browser_clients.discard(ws)

153
supervisor/supervisor.py Normal file
View File

@@ -0,0 +1,153 @@
"""Process supervisor: spawns and tracks the official SimpleX binaries.
Two kinds of managed process (see ../CLAUDE.md):
- cli : `simplex-chat -p PORT -d PREFIX` → we hold a WebSocket and stream commands
- directory : `simplex-directory-service ...` → autonomous; lifecycle + read its web-folder
- broadcast : `simplex-broadcast-bot ...` → autonomous; lifecycle only
The DB belongs to each process; we never open it ourselves while the process runs.
NOTE: exact flag spellings for the autonomous bots should be confirmed with
`<binary> --help` once binaries are in ./bin — the directory flags here follow
apps/simplex-directory-service/src/Directory/Options.hs.
"""
import asyncio
import contextlib
from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from pathlib import Path
from .binaries import binary_path
from .ws_client import SimplexWSClient
# (profile_name, event_record) -> None
EventSink = Callable[[str, dict], Awaitable[None]]
@dataclass
class Managed:
name: str
kind: str # 'cli' | 'directory' | 'broadcast'
proc: asyncio.subprocess.Process
port: int | None = None
client: SimplexWSClient | None = None
class Supervisor:
def __init__(self, data_dir: str = "data", base_port: int = 5300, on_event: EventSink | None = None):
self.data_dir = Path(data_dir)
self.data_dir.mkdir(parents=True, exist_ok=True)
self._next_port = base_port
self._procs: dict[str, Managed] = {}
self._on_event = on_event
def _alloc_port(self) -> int:
port = self._next_port
self._next_port += 1
return port
def list(self) -> list[dict]:
return [
{"name": m.name, "kind": m.kind, "port": m.port, "running": m.proc.returncode is None}
for m in self._procs.values()
]
# ── Pattern 1: interactive CLI (driven over WebSocket) ──────────────────────
async def start_cli(self, name: str, display_name: str | None = None,
allow_files: bool = True, extra_args: tuple[str, ...] = ()) -> Managed:
if name in self._procs:
return self._procs[name]
port = self._alloc_port()
prefix = str(self.data_dir / name)
# On a fresh DB the CLI prompts for a display name on stdin and won't start
# the WS server. --create-bot-display-name creates the profile non-interactively
# (no-op once the DB exists, so it's safe to always pass).
args = ["-p", str(port), "-d", prefix, "--create-bot-display-name", display_name or name]
if allow_files:
args.append("--create-bot-allow-files")
args += list(extra_args)
proc = await asyncio.create_subprocess_exec(
binary_path("simplex-chat"), *args,
stdin=asyncio.subprocess.DEVNULL,
stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.STDOUT,
)
client = SimplexWSClient(port, on_event=lambda r: self._emit(name, r))
await self._connect_with_retry(client)
m = Managed(name=name, kind="cli", proc=proc, port=port, client=client)
self._procs[name] = m
return m
async def _connect_with_retry(self, client: SimplexWSClient, attempts: int = 40, delay: float = 0.25) -> None:
for _ in range(attempts):
try:
await client.connect()
return
except OSError:
await asyncio.sleep(delay) # WS server not up yet
raise RuntimeError("simplex-chat websocket did not come up")
async def send(self, name: str, cmd: str) -> dict:
m = self._procs.get(name)
if not m or not m.client:
raise RuntimeError(f"{name!r} is not a running cli profile")
return await m.client.send_cmd(cmd)
# ── Pattern 2: autonomous official bots (lifecycle + config-at-launch) ──────
async def start_directory(self, name: str, super_users: str, web_folder: str,
extra_args: tuple[str, ...] = ()) -> Managed:
if name in self._procs:
return self._procs[name]
prefix = str(self.data_dir / name)
proc = await asyncio.create_subprocess_exec(
binary_path("simplex-directory-service"),
"-d", prefix,
"--super-users", super_users, # CONTACT_ID:NAME,...
"--directory-file", f"{prefix}_directory.log", # append-only state log
"--web-folder", web_folder, # bot writes listing files here
*extra_args,
stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.STDOUT,
)
m = Managed(name=name, kind="directory", proc=proc) # no WS — autonomous
self._procs[name] = m
return m
async def start_broadcast(self, name: str, display_name: str, publishers: str,
extra_args: tuple[str, ...] = ()) -> Managed:
if name in self._procs:
return self._procs[name]
prefix = str(self.data_dir / name)
proc = await asyncio.create_subprocess_exec(
binary_path("simplex-broadcast-bot"),
"-d", prefix,
"--display-name", display_name,
"--publishers", publishers, # CONTACT_ID:NAME,...
*extra_args,
stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.STDOUT,
)
m = Managed(name=name, kind="broadcast", proc=proc)
self._procs[name] = m
return m
# ── Lifecycle ───────────────────────────────────────────────────────────────
async def stop(self, name: str) -> None:
m = self._procs.pop(name, None)
if not m:
return
if m.client:
await m.client.close()
with contextlib.suppress(ProcessLookupError):
m.proc.terminate()
with contextlib.suppress(asyncio.TimeoutError):
await asyncio.wait_for(m.proc.wait(), 5)
with contextlib.suppress(ProcessLookupError):
if m.proc.returncode is None:
m.proc.kill()
async def stop_all(self) -> None:
for name in list(self._procs):
await self.stop(name)
async def _emit(self, name: str, resp: dict) -> None:
if self._on_event:
await self._on_event(name, resp)

73
supervisor/ws_client.py Normal file
View File

@@ -0,0 +1,73 @@
"""WebSocket client for a `simplex-chat -p <port>` process (Pattern 1 control).
Protocol (simplex-chat/bots/README.md):
- send {"corrId": "<unique>", "cmd": "<command string>"}
- response{"corrId": "<same>", "resp": {"type": ..., ...}} (matched by corrId)
- event {"resp": {"type": ..., ...}} (no corrId)
We keep one long-lived connection per profile. Responses resolve futures keyed by
corrId; events (no corrId) are pushed to an async callback.
"""
import asyncio
import itertools
import json
from collections.abc import Awaitable, Callable
import websockets
EventHandler = Callable[[dict], Awaitable[None]]
class SimplexWSClient:
def __init__(self, port: int, on_event: EventHandler | None = None):
self.url = f"ws://localhost:{port}/"
self._on_event = on_event
self._ws: websockets.WebSocketClientProtocol | None = None
self._corr = itertools.count(1)
self._pending: dict[str, asyncio.Future] = {}
self._reader: asyncio.Task | None = None
async def connect(self) -> None:
# max_size=None: chat responses (chat lists, profiles w/ avatars) can be large.
self._ws = await websockets.connect(self.url, max_size=None)
self._reader = asyncio.create_task(self._read_loop())
async def _read_loop(self) -> None:
assert self._ws is not None
async for raw in self._ws:
try:
msg = json.loads(raw)
except Exception:
continue
corr = msg.get("corrId")
resp = msg.get("resp")
fut = self._pending.pop(corr, None) if corr else None
if fut is not None:
if not fut.done():
fut.set_result(resp)
elif resp is not None and self._on_event is not None:
# Forward-compat: never fail on unknown event types (bots/README.md)
try:
await self._on_event(resp)
except Exception:
pass
async def send_cmd(self, cmd: str, timeout: float = 30.0) -> dict:
"""Send a chat command string, await its matching response record."""
if self._ws is None:
raise RuntimeError("not connected")
corr = str(next(self._corr))
fut: asyncio.Future = asyncio.get_event_loop().create_future()
self._pending[corr] = fut
await self._ws.send(json.dumps({"corrId": corr, "cmd": cmd}))
try:
return await asyncio.wait_for(fut, timeout)
finally:
self._pending.pop(corr, None)
async def close(self) -> None:
if self._reader:
self._reader.cancel()
if self._ws:
await self._ws.close()