commit e31282cbb51c249643f77d6c721eda7c29e40bf5 Author: Dave Campbell Date: Mon Jun 8 12:41:55 2026 +0000 Initial release of Papernews Daily AI-curated newspaper PDF delivered via Telegram. Fetches RSS feeds, summarizes top stories with Claude, compiles a LaTeX PDF in newspaper layout. Co-Authored-By: Claude Sonnet 4.6 diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..8c0ad91 --- /dev/null +++ b/.env.example @@ -0,0 +1,29 @@ +# Papernews — environment configuration +# Copy this file to .env and fill in your values. + +# Telegram bot token — create a bot via @BotFather on Telegram +TELEGRAM_BOT_TOKEN=your_bot_token_here + +# Your Telegram chat ID — send a message to @userinfobot to find it +TELEGRAM_CHAT_ID=your_chat_id_here + +# Anthropic API key — https://console.anthropic.com +ANTHROPIC_API_KEY=your_anthropic_api_key_here + +# Location shown in the newspaper masthead +LOCATION=Your City + +# Delivery schedule (24-hour time, default: 7:00am) +SCHEDULE_HOUR=7 +SCHEDULE_MINUTE=0 + +# Timezone for the schedule — any IANA timezone name +# Examples: America/Chicago, America/New_York, America/Los_Angeles, Europe/London +TIMEZONE=America/Chicago + +# How many days to keep PDFs before auto-pruning (default: 5) +RETENTION_DAYS=5 + +# Set to true to run the pipeline immediately on container start (for testing). +# Reset to false after testing so it doesn't fire on every restart. +RUN_NOW=false diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a6306e7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +# Credentials — never commit these +.env + +# Generated PDFs +output/ + +# Python +__pycache__/ +*.pyc +*.pyo diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..cdceeb2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +# Papernews — Dockerfile +# Python 3.12 slim + a minimal LaTeX install for newspaper PDF generation. +# texlive-latex-extra adds titlesec, microtype, and other layout packages. +# Image is large (~700MB) due to texlive — this is expected and a one-time cost. +# +# Rebuild after code changes: docker compose up -d --build + +FROM python:3.12-slim + +# Install LaTeX. --no-install-recommends keeps the layer as lean as possible. +RUN apt-get update && apt-get install -y --no-install-recommends \ + texlive-latex-base \ + texlive-latex-recommended \ + texlive-fonts-recommended \ + texlive-latex-extra \ + lmodern \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Install Python deps first (layer-cached until requirements.txt changes) +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY papernews.py . + +# /output is mounted from the host — PDFs are written and pruned here +VOLUME ["/output"] + +CMD ["python", "papernews.py"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ef0e6e1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Rock Campbell + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..07d9395 --- /dev/null +++ b/README.md @@ -0,0 +1,108 @@ +# Papernews + +A self-hosted daily newspaper delivered to your Telegram. Fetches RSS feeds from your chosen news sources, summarizes the top stories with Claude AI, and compiles everything into a clean newspaper-style PDF — once a day, no feeds to scroll, no algorithm pulling you back. + +Inspired by a Reddit post on r/RemarkableTablet. + +![Sample Papernews PDF showing newspaper-style layout with sections](screenshot.png) + +## How it works + +1. RSS feeds are fetched from configured news sources +2. Claude picks and summarizes the 4–6 most newsworthy stories per section +3. A LaTeX PDF is compiled in newspaper layout (two-column, masthead header) +4. The PDF is delivered to your Telegram chat +5. PDFs older than 5 days are automatically pruned + +## Requirements + +- Docker + Docker Compose +- A Telegram bot token (create one via [@BotFather](https://t.me/botfather)) +- An [Anthropic API key](https://console.anthropic.com) + +## Setup + +**1. Clone the repo** +```bash +git clone https://github.com/yourusername/papernews.git +cd papernews +``` + +**2. Configure** +```bash +cp .env.example .env +``` + +Edit `.env` and fill in: +- `TELEGRAM_BOT_TOKEN` — from @BotFather +- `TELEGRAM_CHAT_ID` — your Telegram user ID (send `/start` to [@userinfobot](https://t.me/userinfobot) to find it) +- `ANTHROPIC_API_KEY` — from [console.anthropic.com](https://console.anthropic.com) +- `LOCATION` — shown in the masthead (e.g. `Kansas City, Missouri`) + +**3. Build and run** +```bash +docker compose up -d --build +``` + +The container will start and wait for the scheduled time. To test immediately: +```bash +docker compose run --rm -e RUN_NOW=true papernews +``` + +## Configuration + +All configuration is via environment variables in `.env`: + +| Variable | Default | Description | +|---|---|---| +| `TELEGRAM_BOT_TOKEN` | required | Bot token from @BotFather | +| `TELEGRAM_CHAT_ID` | required | Your Telegram chat ID | +| `ANTHROPIC_API_KEY` | required | Anthropic API key | +| `LOCATION` | `Your City` | City/region shown in the masthead | +| `SCHEDULE_HOUR` | `7` | Hour to deliver (24-hour) | +| `SCHEDULE_MINUTE` | `0` | Minute to deliver | +| `TIMEZONE` | `America/Chicago` | Any IANA timezone name | +| `RETENTION_DAYS` | `5` | Days to keep PDFs before pruning | +| `RUN_NOW` | `false` | Set `true` to run immediately on start | + +## Customizing news sources + +Edit the `SECTIONS` list in `papernews.py`. Each section has a title and a list of RSS feed URLs: + +```python +SECTIONS = [ + { + "title": "Local --- My City", + "feeds": [ + ("Local Paper", "https://localpaper.com/feed"), + ("Local TV", "https://localtv.com/rss"), + ], + }, + { + "title": "National", + "feeds": [ + ("AP News", "https://feeds.apnews.com/rss/apf-topnews"), + ("NPR", "https://feeds.npr.org/1001/rss.xml"), + ], + }, + # add as many sections as you like +] +``` + +The `---` in section titles renders as an em dash in the PDF. + +## Checking logs + +```bash +docker logs papernews +``` + +## Notes + +- The Docker image is ~700MB due to the LaTeX install. This is a one-time cost. +- PDFs are saved to `./output/` and pruned automatically. +- The bot token is only used to send outbound documents — the container does not poll for incoming messages. + +## License + +MIT — see [LICENSE](LICENSE). diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..6140169 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,22 @@ +# Papernews — Docker Compose +# Daily AI-curated newspaper PDF: fetches RSS, summarizes with Claude, +# compiles LaTeX, delivers to Telegram, prunes PDFs older than 5 days. +# +# Schedule: 7:00am CT daily. +# To test immediately: set RUN_NOW=true in .env, then docker compose up +# Rebuild after code changes: docker compose up -d --build + +services: + papernews: + build: . + container_name: papernews + restart: unless-stopped + env_file: .env + volumes: + # PDFs are written here and auto-pruned after 5 days + - ./output:/output + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" diff --git a/papernews.py b/papernews.py new file mode 100644 index 0000000..f66f06b --- /dev/null +++ b/papernews.py @@ -0,0 +1,356 @@ +#!/usr/bin/env python3 +""" +papernews.py — Daily AI-curated newspaper PDF + +Fetches RSS feeds from configurable news sources, asks Claude to pick and +summarize the top stories, renders a newspaper-style PDF via LaTeX, delivers +it to Telegram, and prunes PDFs older than a configurable number of days. + +Schedule: configurable via SCHEDULE_HOUR/SCHEDULE_MINUTE (default: 7:00am). +Timezone: configurable via TIMEZONE (default: America/Chicago). +Set RUN_NOW=true in the environment to fire once immediately on startup (for testing). +""" + +import json +import logging +import os +import re +import shutil +import subprocess +import tempfile +from datetime import datetime, timedelta +from pathlib import Path +from zoneinfo import ZoneInfo + +import anthropic +import feedparser +import requests +from apscheduler.schedulers.blocking import BlockingScheduler + +# ── Configuration ────────────────────────────────────────────────────────── +BOT_TOKEN = os.environ["TELEGRAM_BOT_TOKEN"] +CHAT_ID = os.environ["TELEGRAM_CHAT_ID"] +ANTHROPIC_KEY = os.environ["ANTHROPIC_API_KEY"] +OUTPUT_DIR = Path("/output") +LOCATION = os.environ.get("LOCATION", "Your City") +TZ = ZoneInfo(os.environ.get("TIMEZONE", "America/Chicago")) +RETENTION_DAYS = int(os.environ.get("RETENTION_DAYS", "5")) +SCHEDULE_HOUR = int(os.environ.get("SCHEDULE_HOUR", "7")) +SCHEDULE_MIN = int(os.environ.get("SCHEDULE_MINUTE", "0")) +RUN_NOW = os.environ.get("RUN_NOW", "").lower() == "true" + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", +) +log = logging.getLogger(__name__) + +# Convenience alias — used in run() and the scheduler +CT = TZ + + +# ── News sections and RSS feeds ──────────────────────────────────────────── +# Each section has a display title and a list of (source_name, rss_url) pairs. +SECTIONS = [ + { + "title": "Local --- Kansas City", + "feeds": [ + ("KCUR", "https://www.kcur.org/feed"), + ("Fox4KC", "https://www.fox4kc.com/feed/"), + ("KSHB", "https://www.kshb.com/news.rss"), + ], + }, + { + "title": "National", + "feeds": [ + ("AP News", "https://feeds.apnews.com/rss/apf-topnews"), + ("NPR", "https://feeds.npr.org/1001/rss.xml"), + ("PBS", "https://www.pbs.org/newshour/feeds/rss/headlines"), + ], + }, + { + "title": "International", + "feeds": [ + ("BBC World", "http://feeds.bbci.co.uk/news/world/rss.xml"), + ("The Guardian", "https://www.theguardian.com/world/rss"), + ("Reuters", "https://feeds.reuters.com/reuters/topNews"), + ], + }, + { + "title": "Technology", + "feeds": [ + ("Hacker News", "https://news.ycombinator.com/rss"), + ("Ars Technica", "http://feeds.arstechnica.com/arstechnica/index"), + ], + }, +] + +# How many headlines to collect from each feed before handing off to Claude +MAX_ARTICLES_PER_FEED = 6 + + +# ── LaTeX helpers ────────────────────────────────────────────────────────── +# Maps each special LaTeX character to its escaped equivalent. +# translate() processes each character independently — no double-substitution risk. +_LATEX_ESCAPES = str.maketrans({ + "\\": r"\textbackslash{}", + "&": r"\&", + "%": r"\%", + "$": r"\$", + "#": r"\#", + "_": r"\_", + "{": r"\{", + "}": r"\}", + "~": r"\textasciitilde{}", + "^": r"\textasciicircum{}", +}) + + +def esc(text: str) -> str: + """Escape special LaTeX characters in user-supplied text.""" + return str(text).translate(_LATEX_ESCAPES) + + +# Raw string template — avoids double-escaping LaTeX braces. +# Uses simple str.replace() substitution for %(date)s and %(body)s markers. +LATEX_TEMPLATE = r"""\documentclass[10pt]{article} +\usepackage[margin=0.65in, top=0.4in, columnsep=0.3in]{geometry} +\usepackage[T1]{fontenc} +\usepackage[utf8]{inputenc} +\usepackage{lmodern} +\usepackage{multicol} +\usepackage{parskip} +\usepackage{microtype} +\usepackage{titlesec} + +%% Section header: centered, bold, with a rule below +\titleformat{\section}[block]{\large\bfseries\filcenter}{}{0pt}{}[\titlerule] +\titlespacing*{\section}{0pt}{14pt}{6pt} + +\setlength{\parindent}{0pt} +\setlength{\parskip}{4pt} +\pagestyle{empty} + +\begin{document} + +%% Masthead — full width above the two-column body +\begin{center} + {\fontsize{42}{50}\selectfont\textbf{Papernews}}\\[2pt] + \rule{\linewidth}{2pt}\\[2pt] + {\footnotesize PAPERLOCATION\hfill\textit{PAPERDATE}}\\ + \rule{\linewidth}{0.4pt} +\end{center} +\vspace{6pt} + +%% Two-column newspaper content +\begin{multicols}{2} + +PAPERBODY + +\end{multicols} +\end{document} +""" + + +def build_section_latex(title: str, stories: list) -> str: + """Render one newspaper section from its AI-summarized stories.""" + lines = [rf"\section*{{{esc(title)}}}"] + for story in stories: + headline = esc(story.get("headline", "")) + body = esc(story.get("body", "")) + lines.append(rf"\textbf{{{headline}}}") + lines.append("") + lines.append(body) + lines.append("") + return "\n".join(lines) + + +# ── RSS fetching ─────────────────────────────────────────────────────────── +# Fetch via requests (with explicit timeout) then parse the content string, +# so feedparser never makes its own unguarded network calls that can hang forever. +_FEED_HEADERS = {"User-Agent": "Papernews/1.0 (rocklab daily digest)"} + + +def fetch_articles(feeds: list) -> list: + """Pull the top headlines from each feed in a section.""" + articles = [] + for source, url in feeds: + try: + resp = requests.get(url, timeout=12, headers=_FEED_HEADERS) + resp.raise_for_status() + feed = feedparser.parse(resp.text) + for entry in feed.entries[:MAX_ARTICLES_PER_FEED]: + title = entry.get("title", "").strip() + summary = entry.get("summary", entry.get("description", "")).strip() + # Strip HTML tags — Claude handles messy plain text fine + summary = re.sub(r"<[^>]+>", " ", summary).strip() + if title: + articles.append({ + "source": source, + "title": title, + "summary": summary[:500], + }) + except Exception as exc: + log.warning("Feed fetch failed — %s (%s): %s", source, url, exc) + return articles + + +# ── Claude summarization ─────────────────────────────────────────────────── +def summarize_section(client: anthropic.Anthropic, section_title: str, articles: list) -> list: + """Ask Claude to select and summarize the top stories for one section.""" + if not articles: + log.warning("No articles fetched for section: %s", section_title) + return [] + + article_text = "\n\n".join( + f"[{a['source']}] {a['title']}\n{a['summary']}" for a in articles + ) + + prompt = ( + f"You are the {section_title} editor for a daily newspaper called Papernews.\n\n" + "From the articles below, select the 4–6 most newsworthy stories and write a concise digest.\n\n" + "Return ONLY a JSON array with no other text:\n" + '[\n {"headline": "Short headline", "body": "1–3 sentences of newspaper prose."},\n ...\n]\n\n' + "Rules: headlines under 10 words, body text factual and tight, no source names or URLs.\n\n" + f"Articles:\n{article_text}" + ) + + try: + msg = client.messages.create( + model="claude-sonnet-4-6", + max_tokens=2048, + messages=[{"role": "user", "content": prompt}], + ) + raw = msg.content[0].text.strip() + # Strip markdown code fences if Claude wraps the JSON + if raw.startswith("```"): + raw = raw.split("```")[1] + if raw.startswith("json"): + raw = raw[4:] + return json.loads(raw.strip()) + except Exception as exc: + log.error("Claude summarization failed for '%s': %s", section_title, exc) + return [] + + +# ── PDF compilation ──────────────────────────────────────────────────────── +def compile_pdf(latex: str, output_path: Path) -> bool: + """Compile a LaTeX string to PDF and copy it to output_path.""" + with tempfile.TemporaryDirectory() as tmp: + tex_path = Path(tmp) / "papernews.tex" + tex_path.write_text(latex, encoding="utf-8") + + result = subprocess.run( + ["pdflatex", "-interaction=nonstopmode", "-output-directory", tmp, str(tex_path)], + capture_output=True, + text=True, + ) + pdf_src = Path(tmp) / "papernews.pdf" + if result.returncode != 0 or not pdf_src.exists(): + log.error("pdflatex failed. Last 3000 chars of output:\n%s", result.stdout[-3000:]) + return False + + shutil.copy2(pdf_src, output_path) + return True + + +# ── Telegram delivery ────────────────────────────────────────────────────── +def send_pdf(path: Path, caption: str) -> bool: + """Upload the PDF to the configured Telegram chat.""" + url = f"https://api.telegram.org/bot{BOT_TOKEN}/sendDocument" + try: + with open(path, "rb") as f: + resp = requests.post( + url, + data={"chat_id": CHAT_ID, "caption": caption}, + files={"document": f}, + timeout=60, + ) + if resp.ok: + log.info("PDF delivered to Telegram.") + return True + log.error("Telegram sendDocument failed: %s — %s", resp.status_code, resp.text) + return False + except Exception as exc: + log.error("Telegram send error: %s", exc) + return False + + +def send_message(text: str) -> None: + """Send a plain-text message to Telegram (used for error alerts).""" + url = f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage" + try: + requests.post(url, json={"chat_id": CHAT_ID, "text": text}, timeout=15) + except Exception as exc: + log.error("Telegram message error: %s", exc) + + +# ── Cleanup ──────────────────────────────────────────────────────────────── +def prune_old_pdfs() -> None: + """Delete papernews PDFs from OUTPUT_DIR older than RETENTION_DAYS.""" + cutoff = datetime.now() - timedelta(days=RETENTION_DAYS) + for pdf in OUTPUT_DIR.glob("papernews-*.pdf"): + if datetime.fromtimestamp(pdf.stat().st_mtime) < cutoff: + pdf.unlink() + log.info("Pruned: %s", pdf.name) + + +# ── Main pipeline ────────────────────────────────────────────────────────── +def run() -> None: + """Full pipeline: fetch → summarize → render → deliver → prune.""" + now = datetime.now(CT) + date_str = now.strftime("%A, %B %-d, %Y") + pdf_path = OUTPUT_DIR / f"papernews-{now.strftime('%Y-%m-%d')}.pdf" + + log.info("Starting Papernews run for %s", date_str) + client = anthropic.Anthropic(api_key=ANTHROPIC_KEY) + + body_parts = [] + for section in SECTIONS: + log.info("Processing section: %s", section["title"]) + articles = fetch_articles(section["feeds"]) + log.info(" Fetched %d articles", len(articles)) + stories = summarize_section(client, section["title"], articles) + log.info(" Summarized to %d stories", len(stories)) + if stories: + body_parts.append(build_section_latex(section["title"], stories)) + + if not body_parts: + msg = "Papernews: no content generated today — check container logs." + log.error(msg) + send_message(msg) + return + + # Substitute location, date, and body into the template using plain str.replace() + # so no special characters in the content can interfere with formatting. + latex = ( + LATEX_TEMPLATE + .replace("PAPERLOCATION", LOCATION) + .replace("PAPERDATE", date_str) + .replace("PAPERBODY", "\n\n".join(body_parts)) + ) + + if not compile_pdf(latex, pdf_path): + send_message("Papernews: LaTeX compile failed — check container logs.") + return + + if not send_pdf(pdf_path, f"Papernews — {date_str}"): + log.error("PDF compiled but Telegram delivery failed: %s", pdf_path) + return + + prune_old_pdfs() + log.info("Papernews complete: %s", pdf_path.name) + + +# ── Entry point ──────────────────────────────────────────────────────────── +if __name__ == "__main__": + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + + if RUN_NOW: + log.info("RUN_NOW=true — running immediately.") + run() + + scheduler = BlockingScheduler(timezone=CT) + scheduler.add_job(run, "cron", hour=SCHEDULE_HOUR, minute=SCHEDULE_MIN, id="papernews_daily") + log.info("Papernews scheduled for %02d:%02d daily.", SCHEDULE_HOUR, SCHEDULE_MIN) + scheduler.start() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b76201b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +feedparser>=6.0 +anthropic>=0.50.0 +apscheduler>=3.10,<4.0 +requests>=2.28.0