Add 'llm' bot: OpenAI-compatible chat (Ollama-ready)

New 'llm' bot type that takes a startup context (system prompt) and replies to each message via an OpenAI-compatible endpoint — works with a local Ollama (ollama serve, http://localhost:11434/v1), OpenAI, Grok, etc. Generalize the support LLM handler into _handle_llm_message (shared by support + llm) with a per-bot default prompt. Create form reuses the LLM fields (URL/key/model/context) for both support and llm. Adds llm_test.py (mock OpenAI backend) — passes. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-05 18:58:54 +01:00
parent 3f0338041c
commit aaf3c23a18
3 changed files with 186 additions and 22 deletions
--- a/manager/llm_test.py
+++ b/manager/llm_test.py
@@ -0,0 +1,150 @@
+"""End-to-end test of the 'llm' bot (Pattern 3, in-process FFI).
+
+Stands up a tiny OpenAI-compatible mock server (so no Ollama needed), starts an
+llm bot pointed at it with a known context, connects a customer, sends a message,
+and verifies the bot's reply reflects both the configured context and the message.
+
+Run:  .venv/bin/python llm_test.py
+"""
+
+import asyncio
+import json
+import sys
+import threading
+import time
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+import profiles as pm  # noqa: E402
+from simplex_chat import ChatApi, SqliteDb  # noqa: E402
+
+DATA = Path("data")
+BOT_PREFIX = str(DATA / "llmtest_bot")
+CUST_PREFIX = str(DATA / "llmtest_cust")
+BOT_PID = 99003
+CONTEXT = "TESTCTX"
+
+
+def cleanup():
+    for pat in ("llmtest_bot_*", "llmtest_cust_*"):
+        for p in DATA.glob(pat):
+            p.unlink()
+
+
+class MockLLM(BaseHTTPRequestHandler):
+    def do_POST(self):
+        n = int(self.headers.get("Content-Length", 0))
+        body = json.loads(self.rfile.read(n) or b"{}")
+        msgs = body.get("messages", [])
+        system = next((m["content"] for m in msgs if m["role"] == "system"), "")
+        last_user = next((m["content"] for m in reversed(msgs) if m["role"] == "user"), "")
+        content = f"ctx:{system}|got:{last_user}"
+        out = json.dumps({"choices": [{"message": {"role": "assistant", "content": content}}]}).encode()
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.end_headers()
+        self.wfile.write(out)
+
+    def log_message(self, *a):
+        pass
+
+
+async def wait_until(fn, timeout=120, every=1):
+    start = time.time()
+    while time.time() - start < timeout:
+        v = await fn()
+        if v:
+            return v
+        await asyncio.sleep(every)
+    return None
+
+
+async def incoming_texts(chat, contact_id):
+    c = await chat.api_get_chat("direct", contact_id, 50)
+    return [
+        ci.get("content", {}).get("msgContent", {}).get("text", "")
+        for ci in c.get("chatItems", [])
+        if ci.get("chatDir", {}).get("type", "").endswith("Rcv")
+    ]
+
+
+async def main() -> int:
+    cleanup()
+    srv = HTTPServer(("127.0.0.1", 0), MockLLM)
+    port = srv.server_address[1]
+    threading.Thread(target=srv.serve_forever, daemon=True).start()
+    print("mock LLM on port", port)
+
+    addr_box = {}
+
+    async def on_address(pid, addr):
+        addr_box["addr"] = addr
+
+    profile = {
+        "id": BOT_PID, "name": "llmtestbot", "bot_type": "llm",
+        "db_prefix": BOT_PREFIX,
+        "config": json.dumps({
+            "api_base": f"http://127.0.0.1:{port}/v1",
+            "model": "test-model", "api_key": "x", "system_prompt": CONTEXT,
+        }),
+    }
+    cust = None
+    ok = True
+    try:
+        await pm.start_bot(profile, on_address)
+        addr = await wait_until(lambda: asyncio.sleep(0, addr_box.get("addr")), timeout=90)
+        print("bot address:", bool(addr))
+        assert addr, "llm bot never published an address"
+
+        cust = await ChatApi.init(SqliteDb(file_prefix=CUST_PREFIX))
+        if not await cust.api_get_active_user():
+            await cust.api_create_active_user({"displayName": "customer", "fullName": ""})
+        await cust.start_chat()
+        await cust.send_chat_cmd(f"/connect {addr}")
+
+        u = await cust.api_get_active_user()
+        cid = await wait_until(
+            lambda: _first_contact(cust, u["userId"]), timeout=90, every=2
+        )
+        assert cid, "customer did not connect"
+        await asyncio.sleep(2)
+
+        await cust.api_send_text_message({"chatType": "direct", "chatId": cid}, "ping")
+        reply = await wait_until(
+            lambda: _find_reply(cust, cid), timeout=60, every=2
+        )
+        print("bot reply:", reply)
+        assert reply and "ctx:TESTCTX" in reply and "got:ping" in reply, \
+            "reply did not reflect context + message"
+    except AssertionError as e:
+        ok = False
+        print("ASSERT FAIL:", e)
+    finally:
+        await pm.stop_bot(BOT_PID)
+        if cust:
+            try:
+                await cust.close()
+            except Exception:
+                pass
+        srv.shutdown()
+        cleanup()
+
+    print("\nRESULT:", "PASS — llm bot replies using its context" if ok else "FAIL")
+    return 0 if ok else 1
+
+
+async def _first_contact(chat, uid):
+    cs = await chat.api_list_contacts(uid)
+    return cs[0]["contactId"] if cs else None
+
+
+async def _find_reply(chat, cid):
+    for t in await incoming_texts(chat, cid):
+        if t.startswith("ctx:"):
+            return t
+    return None
+
+
+if __name__ == "__main__":
+    raise SystemExit(asyncio.run(main()))
--- a/manager/profiles.py
+++ b/manager/profiles.py
@@ -78,11 +78,15 @@ def mark_all_read() -> None:
    for n in _notifications:
        n["read"] = True

-# Default system prompt for LLM-backed support bots when none is configured.
+# Default system prompts when none is configured.
 DEFAULT_SUPPORT_PROMPT = (
    "You are a helpful customer-support assistant. Answer concisely and politely. "
    "If you don't know something, say so rather than guessing."
 )
+DEFAULT_LLM_PROMPT = (
+    "You are a helpful assistant. Answer concisely. "
+    "If you don't know something, say so rather than guessing."
+)


 async def llm_chat(
@@ -132,7 +136,7 @@ def group_member_count(g: dict) -> int:
    return g.get("groupSummary", {}).get("currentMembers", 0)


-BOT_TYPES      = ["echo", "broadcast", "support", "directory", "deadmans"]
+BOT_TYPES      = ["echo", "llm", "broadcast", "support", "directory", "deadmans"]
 USER_TYPES     = ["user"]
 BUSINESS_TYPES = ["business"]  # cli accounts with a business address (per-customer group chats)
 ALL_TYPES      = BOT_TYPES + USER_TYPES + BUSINESS_TYPES
@@ -483,7 +487,7 @@ async def _run_bot(
        elif bot_type == "broadcast":
            # auto-reply greets each new contact (default lists allowed publishers)
            settings["autoReply"] = {"type": "text", "text": _bc_welcome(config, name)}
-        elif bot_type in ("echo", "directory", "deadmans"):
+        elif bot_type in ("echo", "llm", "directory", "deadmans"):
            welcome = config.get("welcome_message", f"Connected to {name}.")
            settings["autoReply"] = {"type": "text", "text": welcome}

@@ -593,8 +597,13 @@ async def _run_bot(
                                pass

                        elif bot_type == "support" and text:
-                            await _handle_support_message(
-                                b, chat, config, item, chat_info, text
+                            await _handle_llm_message(
+                                b, chat, config, item, chat_info, text, DEFAULT_SUPPORT_PROMPT
+                            )
+
+                        elif bot_type == "llm" and text:
+                            await _handle_llm_message(
+                                b, chat, config, item, chat_info, text, DEFAULT_LLM_PROMPT
                            )

                        elif bot_type == "directory" and text:
@@ -765,20 +774,22 @@ async def _fire_deadmans(chat: Any, user_id: int, config: dict) -> int:
    return sent


-async def _handle_support_message(
-    b: RunningBot, chat: Any, config: dict, item: dict, chat_info: dict, text: str
+async def _handle_llm_message(
+    b: RunningBot, chat: Any, config: dict, item: dict, chat_info: dict, text: str,
+    default_prompt: str = DEFAULT_LLM_PROMPT,
 ) -> None:
-    """Answer an incoming support message via the configured OpenAI-compatible LLM.
+    """Reply to an incoming message via the configured OpenAI-compatible LLM
+    (works with Ollama's `ollama serve` at http://localhost:11434/v1, OpenAI, Grok…).

-    If no api_base is configured the bot stays silent (the static welcome auto-reply
-    has already greeted the contact) — so support bots without an LLM behave as before.
+    `system_prompt` (the startup context), `api_base` (the API URL), `model` and an
+    optional `api_key` come from config. If no api_base is set the bot stays silent.
    """
    api_base = (config.get("api_base") or "").strip()
    if not api_base:
        return  # no LLM configured for this bot

    contact_id = chat_info.get("contact", {}).get("contactId")
-    system_prompt = config.get("system_prompt") or DEFAULT_SUPPORT_PROMPT
+    system_prompt = config.get("system_prompt") or default_prompt
    model = config.get("model") or "grok-2"
    api_key = config.get("api_key") or ""

@@ -792,7 +803,7 @@ async def _handle_support_message(
    try:
        reply = await llm_chat(api_base, api_key, model, messages)
    except Exception as e:
-        log.error("support LLM error: %s", e)
+        log.error("LLM error: %s", e)
        _append_log(b, f"LLM error: {e}")
        try:
            await chat.api_send_text_reply(
@@ -807,7 +818,7 @@ async def _handle_support_message(
    try:
        await chat.api_send_text_reply(item, reply)
    except Exception:
-        log.exception("failed to send support reply")
+        log.exception("failed to send LLM reply")


 async def global_status() -> dict:
--- a/manager/templates/list.html
+++ b/manager/templates/list.html
@@ -47,6 +47,7 @@
  <h2 style="font-size:15px;margin-bottom:12px;">Available bot types</h2>
  <table>
    <tr><td><span class="tag">echo</span></td><td class="muted">Repeats every message back to the sender — handy for testing a connection end to end.</td></tr>
+    <tr><td><span class="tag">llm</span></td><td class="muted">Chat with a local or remote LLM (OpenAI-compatible, e.g. Ollama). Give it context, it replies to your messages.</td></tr>
    <tr><td><span class="tag">broadcast</span></td><td class="muted">Relays messages from authorized publishers out to all of the bot's contacts.</td></tr>
    <tr><td><span class="tag">support</span></td><td class="muted">Business inbox — auto-replies with a welcome message and collects incoming inquiries.</td></tr>
    <tr><td><span class="tag">directory</span></td><td class="muted">Directory service for discovering and listing groups or contacts.</td></tr>
@@ -150,24 +151,26 @@
    <div id="support-fields" style="display:none;">
      <div style="border-top:1px solid var(--border);margin:4px 0 14px;padding-top:14px;">
        <p class="muted" style="margin-bottom:12px;">
-          LLM backend (OpenAI-compatible). Leave the URL blank for a static welcome-only bot.
+          LLM backend (OpenAI-compatible — works with a local Ollama via <code>ollama serve</code>,
+          OpenAI, Grok…). The LLM bot needs the URL; support bots may leave it blank for a
+          welcome-only inbox.
        </p>
      </div>
      <div class="field">
        <label>API Base URL</label>
-        <input type="text" name="api_base" placeholder="https://api.x.ai/v1   (Ollama: http://localhost:11434/v1)">
+        <input type="text" name="api_base" placeholder="http://localhost:11434/v1   (Ollama)   ·   https://api.x.ai/v1">
      </div>
      <div class="field">
-        <label>API Key</label>
-        <input type="password" name="api_key" placeholder="xai-…  (any value for Ollama)">
+        <label>API Key <span class="muted" style="font-weight:400;">(any value for Ollama)</span></label>
+        <input type="password" name="api_key" placeholder="ollama   ·   xai-…">
      </div>
      <div class="field">
        <label>Model</label>
-        <input type="text" name="model" placeholder="grok-2   (Ollama: llama3.2)">
+        <input type="text" name="model" placeholder="llama3.2   (Ollama)   ·   grok-2">
      </div>
      <div class="field">
-        <label>System Prompt</label>
-        <textarea name="system_prompt" rows="3" placeholder="You are a helpful customer-support assistant…"></textarea>
+        <label>Context <span class="muted" style="font-weight:400;">(system prompt given on start-up)</span></label>
+        <textarea name="system_prompt" rows="3" placeholder="You are a helpful assistant…"></textarea>
      </div>
    </div>
    <div id="deadmans-fields" style="display:none;">
@@ -289,7 +292,7 @@ function copyAddr(ev, btn, addr) {
 function onTypeChange() {
  const val = document.getElementById('type-select').value;
  document.getElementById('welcome-field').style.display = (val === 'echo') ? 'none' : '';
-  document.getElementById('support-fields').style.display = (val === 'support') ? 'block' : 'none';
+  document.getElementById('support-fields').style.display = (val === 'support' || val === 'llm') ? 'block' : 'none';
  document.getElementById('deadmans-fields').style.display = (val === 'deadmans') ? 'block' : 'none';
  document.getElementById('directory-fields').style.display = (val === 'directory') ? 'block' : 'none';
  document.getElementById('broadcast-fields').style.display = (val === 'broadcast') ? 'block' : 'none';
@@ -307,7 +310,7 @@ document.getElementById('create-form').addEventListener('submit', async (e) => {
  const config = {};
  const welcome = fd.get('welcome_message');
  if (welcome) config.welcome_message = welcome;
-  if (botType === 'support') {
+  if (botType === 'support' || botType === 'llm') {
    const apiBase = (fd.get('api_base') || '').trim();
    if (apiBase) config.api_base = apiBase;
    const apiKey = (fd.get('api_key') || '').trim();