Add 'rss' bot: broadcasts an RSS/Atom feed to a channel

New rss bot type: on start it creates a broadcast channel (observer group with
recent history on) and polls a configured feed URL; new posts are broadcast to
the channel. Subscribers join the channel link (seen on the bot's profile);
direct contacts get a welcome + the latest items and can send /new for the
latest. Stdlib-only feed parsing (urllib + ElementTree), seeds existing items on
startup so it doesn't replay the whole feed. Config: feed_url, poll_seconds.
Adds rss_test.py (mock feed) — passes.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Jon
2026-06-05 21:08:05 +01:00
parent 12d21e6de5
commit 908d16bfc3
3 changed files with 302 additions and 2 deletions

View File

@@ -3,7 +3,9 @@
import asyncio import asyncio
import json import json
import logging import logging
import time
import urllib.request import urllib.request
import xml.etree.ElementTree as ET
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
@@ -136,7 +138,7 @@ def group_member_count(g: dict) -> int:
return g.get("groupSummary", {}).get("currentMembers", 0) return g.get("groupSummary", {}).get("currentMembers", 0)
BOT_TYPES = ["echo", "llm", "broadcast", "support", "directory", "deadmans"] BOT_TYPES = ["echo", "llm", "rss", "broadcast", "support", "directory", "deadmans"]
USER_TYPES = ["user"] USER_TYPES = ["user"]
BUSINESS_TYPES = ["business"] # cli accounts with a business address (per-customer group chats) BUSINESS_TYPES = ["business"] # cli accounts with a business address (per-customer group chats)
ALL_TYPES = BOT_TYPES + USER_TYPES + BUSINESS_TYPES ALL_TYPES = BOT_TYPES + USER_TYPES + BUSINESS_TYPES
@@ -155,6 +157,11 @@ class RunningBot:
chat: Any = None # simplex_chat ChatApi instance chat: Any = None # simplex_chat ChatApi instance
# Per-contact LLM conversation history (contactId → [{role, content}, ...]) # Per-contact LLM conversation history (contactId → [{role, content}, ...])
histories: dict[int, list[dict]] = field(default_factory=dict) histories: dict[int, list[dict]] = field(default_factory=dict)
# RSS bot state
rss_seen: set = field(default_factory=set) # entry ids already posted
rss_items: list = field(default_factory=list) # latest fetched entries (newest first)
rss_next_poll: float = 0.0
rss_gid: int | None = None # broadcast channel group id
# profile_id → RunningBot # profile_id → RunningBot
@@ -426,6 +433,110 @@ async def _handle_broadcast_message(
await reply("Broadcast failed.") await reply("Broadcast failed.")
# ── RSS bot helpers ──────────────────────────────────────────────────────────────
def _fetch_feed(url: str) -> list[dict]:
"""Fetch + parse an RSS 2.0 or Atom feed → newest-first list of {id,title,link}.
Uses only the stdlib (urllib + ElementTree), no extra dependencies."""
req = urllib.request.Request(url, headers={"User-Agent": "simplex-rss-bot/1.0"})
with urllib.request.urlopen(req, timeout=20) as r: # noqa: S310 - user-configured feed
data = r.read()
root = ET.fromstring(data)
out: list[dict] = []
items = root.findall(".//item") # RSS 2.0
if items:
for it in items:
title = (it.findtext("title") or "").strip()
link = (it.findtext("link") or "").strip()
eid = (it.findtext("guid") or link or title).strip()
out.append({"id": eid, "title": title, "link": link})
else: # Atom
ns = "{http://www.w3.org/2005/Atom}"
for e in root.findall(f".//{ns}entry"):
title = (e.findtext(f"{ns}title") or "").strip()
le = e.find(f"{ns}link")
link = (le.get("href") if le is not None else "") or ""
eid = (e.findtext(f"{ns}id") or link or title).strip()
out.append({"id": eid, "title": title, "link": link})
return out
def _rss_format(e: dict) -> str:
title = e.get("title") or "(untitled)"
return f"{title}\n{e['link']}" if e.get("link") else title
async def _rss_ensure_channel(profile_id: int, b: "RunningBot", chat: Any, user_id: int,
name: str, config: dict) -> int | None:
"""Find or create the broadcast channel (observer group) for this feed; persist its id."""
import db as _db
gid = config.get("channel_gid")
if gid:
try:
groups = await chat.api_list_groups(user_id)
if any(group_id(g) == gid for g in groups):
return gid
except Exception:
return gid # assume still valid if the lookup failed
try:
info = await chat.api_new_group(user_id, {
"displayName": f"{name} Feed", "fullName": "",
"groupPreferences": {"history": {"enable": "on"}}, # new subscribers see recent posts
})
gid = info["groupId"]
await chat.api_create_group_link(gid, "observer") # channel = observer link
config["channel_gid"] = gid
_db.update_config(profile_id, config)
_append_log(b, f"RSS channel created (group {gid})")
return gid
except Exception:
log.exception("rss: failed to create channel")
return None
async def _rss_poll(b: "RunningBot", chat: Any, gid: int | None, config: dict, seed: bool) -> None:
"""Fetch the feed; on the seed run just record existing ids, otherwise broadcast new items."""
url = (config.get("feed_url") or "").strip()
if not url:
return
try:
entries = await asyncio.to_thread(_fetch_feed, url)
except Exception as e:
log.error("rss fetch error: %s", e)
_append_log(b, f"RSS fetch error: {e}")
return
b.rss_items = entries
new = [e for e in entries if e["id"] not in b.rss_seen]
for e in new:
b.rss_seen.add(e["id"])
if seed:
_append_log(b, f"RSS seeded {len(entries)} existing item(s)")
return
if not gid or not new:
return
for e in reversed(new): # post oldest → newest
try:
await chat.api_send_text_message({"chatType": "group", "chatId": gid}, _rss_format(e))
except Exception:
log.exception("rss: failed to post to channel")
_append_log(b, f"RSS posted {len(new)} new item(s)")
async def _rss_send_latest(chat: Any, item: dict, b: "RunningBot", n: int = 3) -> None:
"""Reply to a direct request (e.g. /new) with the latest feed items."""
items = b.rss_items[:n]
if not items:
try:
await chat.api_send_text_reply(item, "No items yet — check back soon.")
except Exception:
pass
return
for e in items:
try:
await chat.api_send_text_reply(item, _rss_format(e))
except Exception:
pass
async def _run_bot( async def _run_bot(
profile_id: int, profile_id: int,
name: str, name: str,
@@ -487,6 +598,9 @@ async def _run_bot(
elif bot_type == "broadcast": elif bot_type == "broadcast":
# auto-reply greets each new contact (default lists allowed publishers) # auto-reply greets each new contact (default lists allowed publishers)
settings["autoReply"] = {"type": "text", "text": _bc_welcome(config, name)} settings["autoReply"] = {"type": "text", "text": _bc_welcome(config, name)}
elif bot_type == "rss":
welcome = config.get("welcome_message") or f"Subscribed to {name}. Send /new for the latest posts."
settings["autoReply"] = {"type": "text", "text": welcome}
elif bot_type in ("echo", "llm", "directory", "deadmans"): elif bot_type in ("echo", "llm", "directory", "deadmans"):
welcome = config.get("welcome_message", f"Connected to {name}.") welcome = config.get("welcome_message", f"Connected to {name}.")
settings["autoReply"] = {"type": "text", "text": welcome} settings["autoReply"] = {"type": "text", "text": welcome}
@@ -516,10 +630,24 @@ async def _run_bot(
dir_tick = 0 # directory bots periodically scan for newly-added groups dir_tick = 0 # directory bots periodically scan for newly-added groups
# RSS bot: ensure a broadcast channel and seed seen-items so we don't replay the
# whole feed on startup; then poll on an interval.
rss_poll_s = float(config.get("poll_seconds", 300))
if bot_type == "rss":
b.rss_gid = await _rss_ensure_channel(profile_id, b, chat, user_id, name, config)
await _rss_poll(b, chat, b.rss_gid, config, seed=True)
b.rss_next_poll = time.time() + rss_poll_s
await refresh()
# Event loop # Event loop
while True: while True:
evt = await chat.recv_chat_event(500_000) evt = await chat.recv_chat_event(500_000)
# RSS bot: poll the feed on its interval and broadcast new items to the channel
if bot_type == "rss" and time.time() >= b.rss_next_poll:
await _rss_poll(b, chat, b.rss_gid, config, seed=False)
b.rss_next_poll = time.time() + rss_poll_s
# Directory bot: ~every 30s, refresh groups and register/auto-join new ones # Directory bot: ~every 30s, refresh groups and register/auto-join new ones
if bot_type == "directory": if bot_type == "directory":
dir_tick += 1 dir_tick += 1
@@ -551,7 +679,16 @@ async def _run_bot(
_append_log(b, f"Contact connected: {ct_name}") _append_log(b, f"Contact connected: {ct_name}")
# echo replies on message; broadcast/others greet via the auto-reply # echo replies on message; broadcast/others greet via the auto-reply
# configured in address settings, so nothing to do on connect here. # configured in address settings. RSS also sends the latest items on connect.
if bot_type == "rss":
cid = ct.get("contactId")
if cid is not None: # send the latest items directly to the new contact
for e in b.rss_items[:3]:
try:
await chat.api_send_text_message(
{"chatType": "direct", "chatId": cid}, _rss_format(e))
except Exception:
pass
elif tag == "newChatItems": elif tag == "newChatItems":
items = evt.get("chatItems", []) items = evt.get("chatItems", [])
@@ -606,6 +743,10 @@ async def _run_bot(
b, chat, config, item, chat_info, text, DEFAULT_LLM_PROMPT b, chat, config, item, chat_info, text, DEFAULT_LLM_PROMPT
) )
elif bot_type == "rss" and text:
if text.strip().lower() == "/new":
await _rss_send_latest(chat, item, b)
elif bot_type == "directory" and text: elif bot_type == "directory" and text:
await _handle_directory_message( await _handle_directory_message(
b, chat, config, name, item, chat_info, text b, chat, config, name, item, chat_info, text

133
manager/rss_test.py Normal file
View File

@@ -0,0 +1,133 @@
"""End-to-end test of the RSS bot (Pattern 3, in-process FFI).
Serves a mock RSS feed locally, starts an rss bot pointed at it, and checks:
- the bot creates a broadcast channel (observer group)
- the initial feed item is seeded (not re-posted)
- a newly-added feed item is broadcast to the channel
Verifies via the bot's own view of the channel chat (no subscriber needed).
Run: .venv/bin/python rss_test.py
"""
import asyncio
import json
import sys
import threading
import time
from http.server import BaseHTTPRequestHandler, HTTPServer
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent))
import profiles as pm # noqa: E402
DATA = Path("data")
BOT_PREFIX = str(DATA / "rsstest_bot")
BOT_PID = 99004
# mutable feed state the mock server serves
FEED = {"items": [("First post", "https://example.com/1")]}
def feed_xml():
items = "".join(
f"<item><title>{t}</title><link>{l}</link><guid>{l}</guid></item>"
for t, l in FEED["items"]
)
return (f'<?xml version="1.0"?><rss version="2.0"><channel>'
f'<title>Test Feed</title>{items}</channel></rss>').encode()
class FeedHandler(BaseHTTPRequestHandler):
def do_GET(self):
body = feed_xml()
self.send_response(200)
self.send_header("Content-Type", "application/rss+xml")
self.end_headers()
self.wfile.write(body)
def log_message(self, *a):
pass
def cleanup():
for p in DATA.glob("rsstest_bot_*"):
p.unlink()
bf = DATA / "manager.db" # leave the manager db alone; we don't use it here
async def wait_until(fn, timeout=120, every=1):
start = time.time()
while time.time() - start < timeout:
v = await fn()
if v:
return v
await asyncio.sleep(every)
return None
async def channel_texts(chat, gid):
c = await chat.api_get_chat("group", gid, 50)
out = []
for ci in c.get("chatItems", []):
out.append(ci.get("content", {}).get("msgContent", {}).get("text", ""))
return out
async def main() -> int:
cleanup()
srv = HTTPServer(("127.0.0.1", 0), FeedHandler)
port = srv.server_address[1]
threading.Thread(target=srv.serve_forever, daemon=True).start()
url = f"http://127.0.0.1:{port}/feed.xml"
print("mock feed at", url)
async def on_address(pid, addr):
pass
profile = {
"id": BOT_PID, "name": "rsstestbot", "bot_type": "rss",
"db_prefix": BOT_PREFIX,
"config": json.dumps({"feed_url": url, "poll_seconds": 5}),
}
ok = True
try:
await pm.start_bot(profile, on_address)
b = pm.get_running(BOT_PID)
# 1) channel created
gid = await wait_until(lambda: asyncio.sleep(0, b.rss_gid), timeout=90)
print("channel created:", bool(gid), "gid", gid)
assert gid, "rss bot did not create a channel"
# 2) the first item was seeded, not posted
await asyncio.sleep(2)
texts = await channel_texts(b.chat, gid)
assert not any("First post" in t for t in texts), "seeded item was wrongly broadcast"
print("seed OK (first item not broadcast)")
# 3) add a new item → it should be broadcast on the next poll
FEED["items"].insert(0, ("Breaking news", "https://example.com/2"))
got = await wait_until(
lambda: _contains(channel_texts(b.chat, gid), "Breaking news"), timeout=30, every=2
)
print("new item broadcast to channel:", bool(got))
ok = bool(got)
except AssertionError as e:
ok = False
print("ASSERT FAIL:", e)
finally:
await pm.stop_bot(BOT_PID)
srv.shutdown()
cleanup()
print("\nRESULT:", "PASS — rss bot broadcasts new feed posts" if ok else "FAIL")
return 0 if ok else 1
async def _contains(coro, needle):
return any(needle in (t or "") for t in await coro)
if __name__ == "__main__":
raise SystemExit(asyncio.run(main()))

View File

@@ -49,6 +49,7 @@
<table> <table>
<tr><td><span class="tag">echo</span></td><td class="muted">Repeats every message back to the sender — handy for testing a connection end to end.</td></tr> <tr><td><span class="tag">echo</span></td><td class="muted">Repeats every message back to the sender — handy for testing a connection end to end.</td></tr>
<tr><td><span class="tag">llm</span></td><td class="muted">Chat with a local or remote LLM (OpenAI-compatible, e.g. Ollama). Give it context, it replies to your messages.</td></tr> <tr><td><span class="tag">llm</span></td><td class="muted">Chat with a local or remote LLM (OpenAI-compatible, e.g. Ollama). Give it context, it replies to your messages.</td></tr>
<tr><td><span class="tag">rss</span></td><td class="muted">Watches an RSS/Atom feed and broadcasts new posts to a channel it creates. Subscribers join the channel; send /new for the latest.</td></tr>
<tr><td><span class="tag">broadcast</span></td><td class="muted">Relays messages from authorized publishers out to all of the bot's contacts.</td></tr> <tr><td><span class="tag">broadcast</span></td><td class="muted">Relays messages from authorized publishers out to all of the bot's contacts.</td></tr>
<tr><td><span class="tag">support</span></td><td class="muted">Business inbox — auto-replies with a welcome message and collects incoming inquiries.</td></tr> <tr><td><span class="tag">support</span></td><td class="muted">Business inbox — auto-replies with a welcome message and collects incoming inquiries.</td></tr>
<tr><td><span class="tag">directory</span></td><td class="muted">Directory service for discovering and listing groups or contacts.</td></tr> <tr><td><span class="tag">directory</span></td><td class="muted">Directory service for discovering and listing groups or contacts.</td></tr>
@@ -224,6 +225,23 @@
<input type="text" name="prohibited_message" placeholder="Only publishers can broadcast. Your message is deleted."> <input type="text" name="prohibited_message" placeholder="Only publishers can broadcast. Your message is deleted.">
</div> </div>
</div> </div>
<div id="rss-fields" style="display:none;">
<div style="border-top:1px solid var(--border);margin:4px 0 14px;padding-top:14px;">
<p class="muted" style="margin-bottom:12px;">
The bot watches this feed and broadcasts new posts to a channel it creates.
Share the channel link (from the bot's profile) with subscribers; anyone messaging
the bot can send <code>/new</code> for the latest.
</p>
</div>
<div class="field">
<label>Feed URL</label>
<input type="text" name="feed_url" placeholder="https://example.com/feed.xml">
</div>
<div class="field">
<label>Poll interval <span class="muted" style="font-weight:400;">(seconds)</span></label>
<input type="number" name="poll_seconds" min="30" value="300">
</div>
</div>
{% endif %} {% endif %}
<div class="flex gap-8 mt-16" style="justify-content:flex-end;"> <div class="flex gap-8 mt-16" style="justify-content:flex-end;">
<button type="button" class="btn btn-ghost" <button type="button" class="btn btn-ghost"
@@ -297,6 +315,7 @@ function onTypeChange() {
document.getElementById('deadmans-fields').style.display = (val === 'deadmans') ? 'block' : 'none'; document.getElementById('deadmans-fields').style.display = (val === 'deadmans') ? 'block' : 'none';
document.getElementById('directory-fields').style.display = (val === 'directory') ? 'block' : 'none'; document.getElementById('directory-fields').style.display = (val === 'directory') ? 'block' : 'none';
document.getElementById('broadcast-fields').style.display = (val === 'broadcast') ? 'block' : 'none'; document.getElementById('broadcast-fields').style.display = (val === 'broadcast') ? 'block' : 'none';
document.getElementById('rss-fields').style.display = (val === 'rss') ? 'block' : 'none';
} }
{% endif %} {% endif %}
@@ -341,6 +360,13 @@ document.getElementById('create-form').addEventListener('submit', async (e) => {
const prohibited = (fd.get('prohibited_message') || '').trim(); const prohibited = (fd.get('prohibited_message') || '').trim();
if (prohibited) config.prohibited_message = prohibited; if (prohibited) config.prohibited_message = prohibited;
} }
if (botType === 'rss') {
const url = (fd.get('feed_url') || '').trim();
if (!url) { alert('Feed URL is required for an RSS bot'); return; }
config.feed_url = url;
const ps = parseInt(fd.get('poll_seconds'), 10);
if (!isNaN(ps) && ps >= 30) config.poll_seconds = ps;
}
{% endif %} {% endif %}
// Shared profile fields (users and bots) // Shared profile fields (users and bots)
const bio = (fd.get('bio') || '').trim(); const bio = (fd.get('bio') || '').trim();