hermes-mcp/product/animaGen/render_squaremcp_video.py

#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
import math
import re
import textwrap
from dataclasses import dataclass
from pathlib import Path
from typing import List, Tuple, Optional

import numpy as np
from PIL import Image, ImageDraw, ImageFont, ImageFilter

try:
    from moviepy.editor import VideoClip, concatenate_videoclips, AudioFileClip
except Exception as exc:  # pragma: no cover
    raise SystemExit(
        "MoviePy is required. Install dependencies with: pip install -r requirements.txt\n"
        f"Import error: {exc}"
    )


BG = (10, 14, 22)
BG2 = (16, 22, 34)
TEXT = (244, 247, 251)
MUTED = (144, 156, 178)
ACCENT = (112, 166, 255)
ACCENT_2 = (107, 228, 183)
CARD = (20, 28, 44)
CARD_2 = (28, 38, 58)
STROKE = (44, 58, 86)
SHADOW = (0, 0, 0, 110)


@dataclass
class Scene:
    scene: int
    start_s: float
    end_s: float
    on_screen_text: str
    voiceover: str
    visual: str

    @property
    def duration(self) -> float:
        return self.end_s - self.start_s


@dataclass
class Subtitle:
    start_s: float
    end_s: float
    text: str


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Render the SquareMCP launch video locally from the generated shot list and captions."
    )
    parser.add_argument("--shotlist", default="squaremcp_shotlist.json", help="Path to shot list JSON")
    parser.add_argument("--captions", default="squaremcp_launch_captions.srt", help="Path to captions SRT")
    parser.add_argument("--output", default="squaremcp_launch.mp4", help="Output MP4 path")
    parser.add_argument("--voiceover", default=None, help="Optional voiceover audio file")
    parser.add_argument("--assets-dir", default=None, help="Optional directory containing scene1.png ... scene7.png")
    parser.add_argument("--width", type=int, default=1080, help="Video width")
    parser.add_argument("--height", type=int, default=1920, help="Video height")
    parser.add_argument("--fps", type=int, default=24, help="Frames per second")
    parser.add_argument("--no-captions", action="store_true", help="Disable burned-in captions")
    parser.add_argument("--draft", action="store_true", help="Lower-resolution faster render")
    return parser.parse_args()


def load_scenes(path: Path) -> List[Scene]:
    data = json.loads(path.read_text(encoding="utf-8"))
    return [Scene(**item) for item in data]


_TIME_RE = re.compile(
    r"(?P<h>\d\d):(?P<m>\d\d):(?P<s>\d\d),(?P<ms>\d\d\d)\s+-->\s+"
    r"(?P<h2>\d\d):(?P<m2>\d\d):(?P<s2>\d\d),(?P<ms2>\d\d\d)"
)


def ts_to_seconds(h: str, m: str, s: str, ms: str) -> float:
    return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0


def parse_srt(path: Optional[Path]) -> List[Subtitle]:
    if not path or not path.exists():
        return []
    blocks = re.split(r"\n\s*\n", path.read_text(encoding="utf-8").strip())
    subtitles: List[Subtitle] = []
    for block in blocks:
        lines = [ln.rstrip() for ln in block.splitlines() if ln.strip()]
        if len(lines) < 2:
            continue
        timing_line = lines[1] if lines[0].strip().isdigit() else lines[0]
        match = _TIME_RE.match(timing_line)
        if not match:
            continue
        start = ts_to_seconds(match["h"], match["m"], match["s"], match["ms"])
        end = ts_to_seconds(match["h2"], match["m2"], match["s2"], match["ms2"])
        text_lines = lines[2:] if lines[0].strip().isdigit() else lines[1:]
        subtitles.append(Subtitle(start, end, "\n".join(text_lines)))
    return subtitles


def ease_in_out(x: float) -> float:
    x = max(0.0, min(1.0, x))
    return x * x * (3 - 2 * x)


def lerp(a: float, b: float, x: float) -> float:
    return a + (b - a) * x


def get_font(size: int, bold: bool = False) -> ImageFont.FreeTypeFont | ImageFont.ImageFont:
    candidates = []
    if bold:
        candidates.extend([
            "DejaVuSans-Bold.ttf",
            "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
            "Arial Bold.ttf",
            "Arial.ttf",
        ])
    else:
        candidates.extend([
            "DejaVuSans.ttf",
            "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
            "Arial.ttf",
        ])
    for cand in candidates:
        try:
            return ImageFont.truetype(cand, size=size)
        except Exception:
            pass
    return ImageFont.load_default()


def text_size(draw: ImageDraw.ImageDraw, text: str, font) -> Tuple[int, int]:
    bbox = draw.multiline_textbbox((0, 0), text, font=font, spacing=10)
    return bbox[2] - bbox[0], bbox[3] - bbox[1]


def wrap_text(draw: ImageDraw.ImageDraw, text: str, font, max_width: int) -> str:
    parts = re.split(r"\n+", text)
    wrapped_parts = []
    for part in parts:
        words = part.split()
        if not words:
            wrapped_parts.append("")
            continue
        lines = []
        current = words[0]
        for word in words[1:]:
            candidate = f"{current} {word}"
            w = draw.textbbox((0, 0), candidate, font=font)[2]
            if w <= max_width:
                current = candidate
            else:
                lines.append(current)
                current = word
        lines.append(current)
        wrapped_parts.append("\n".join(lines))
    return "\n".join(wrapped_parts)


def rounded_box(draw: ImageDraw.ImageDraw, box, fill, outline=None, radius=24, width=2):
    draw.rounded_rectangle(box, radius=radius, fill=fill, outline=outline, width=width)


def add_shadow(base: Image.Image, box, radius=28):
    shadow = Image.new("RGBA", base.size, (0, 0, 0, 0))
    sd = ImageDraw.Draw(shadow)
    sd.rounded_rectangle(box, radius=28, fill=SHADOW)
    shadow = shadow.filter(ImageFilter.GaussianBlur(radius=radius))
    base.alpha_composite(shadow)


def draw_gradient_background(img: Image.Image, t: float):
    arr = np.zeros((img.height, img.width, 3), dtype=np.uint8)
    y = np.linspace(0, 1, img.height)[:, None]
    x = np.linspace(0, 1, img.width)[None, :]
    pulse = 0.5 + 0.5 * math.sin(t * 0.55)
    r = (BG[0] * (1 - y) + BG2[0] * y + 8 * pulse * x).clip(0, 255)
    g = (BG[1] * (1 - y) + BG2[1] * y + 10 * pulse * x).clip(0, 255)
    b = (BG[2] * (1 - y) + BG2[2] * y + 20 * pulse * x).clip(0, 255)
    arr[:, :, 0] = r.astype(np.uint8)
    arr[:, :, 1] = g.astype(np.uint8)
    arr[:, :, 2] = b.astype(np.uint8)
    bg = Image.fromarray(arr, mode="RGB").convert("RGBA")
    img.alpha_composite(bg)

    draw = ImageDraw.Draw(img)
    step = max(40, img.width // 18)
    grid_alpha = 30
    for xx in range(0, img.width, step):
        draw.line((xx, 0, xx, img.height), fill=(255, 255, 255, grid_alpha), width=1)
    for yy in range(0, img.height, step):
        draw.line((0, yy, img.width, yy), fill=(255, 255, 255, grid_alpha), width=1)


def paste_asset_background(img: Image.Image, asset_path: Optional[Path], t: float):
    if not asset_path or not asset_path.exists():
        return
    asset = Image.open(asset_path).convert("RGBA")
    scale = max(img.width / asset.width, img.height / asset.height) * 1.08
    new_size = (int(asset.width * scale), int(asset.height * scale))
    asset = asset.resize(new_size, Image.LANCZOS)
    pan_x = int((asset.width - img.width) * (0.04 + 0.02 * math.sin(t * 0.3)))
    pan_y = int((asset.height - img.height) * (0.04 + 0.02 * math.cos(t * 0.2)))
    asset = asset.crop((pan_x, pan_y, pan_x + img.width, pan_y + img.height))
    overlay = Image.new("RGBA", img.size, (5, 8, 14, 150))
    img.alpha_composite(asset)
    img.alpha_composite(overlay)


def draw_top_label(draw, width, height):
    font = get_font(max(18, width // 42), bold=True)
    label = "SQUAREMCP"
    pad_x = width * 0.08
    pad_y = height * 0.06
    tw = draw.textbbox((0, 0), label, font=font)[2]
    rounded_box(draw, (pad_x - 18, pad_y - 12, pad_x + tw + 18, pad_y + 42), fill=(18, 24, 38, 215), outline=(55, 75, 110), radius=18)
    draw.text((pad_x, pad_y), label, font=font, fill=TEXT)


def draw_main_text(draw, width, height, text, progress):
    font = get_font(max(34, width // 16), bold=True)
    body_font = get_font(max(22, width // 32), bold=False)
    max_width = int(width * 0.78)
    wrapped = wrap_text(draw, text.replace(" | ", "\n"), font, max_width)
    tw, th = text_size(draw, wrapped, font)
    x = int(width * 0.08)
    y = int(height * 0.18 + (1 - ease_in_out(min(progress * 1.6, 1.0))) * 40)
    box = (x - 26, y - 22, x + tw + 28, y + th + 26)
    draw.rounded_rectangle(box, radius=30, fill=(14, 18, 30, 185), outline=(52, 68, 98), width=2)
    draw.multiline_text((x, y), wrapped, font=font, fill=TEXT, spacing=12)
    return y + th + 30, body_font


def draw_footer_caption(draw, width, height, subtitles: List[Subtitle], current_t: float):
    active = None
    for sub in subtitles:
        if sub.start_s <= current_t < sub.end_s:
            active = sub.text
            break
    if not active:
        return
    font = get_font(max(24, width // 28), bold=True)
    max_width = int(width * 0.78)
    active = wrap_text(draw, active, font, max_width)
    tw, th = text_size(draw, active, font)
    x = (width - tw) // 2
    y = int(height * 0.82)
    pad = 22
    draw.rounded_rectangle((x - pad, y - pad, x + tw + pad, y + th + pad), radius=26, fill=(8, 10, 16, 205), outline=(56, 72, 110), width=2)
    draw.multiline_text((x, y), active, font=font, fill=TEXT, spacing=8, align="center")


def draw_node(draw, cx, cy, r, label, progress, active=False):
    fill = ACCENT if active else CARD_2
    outline = (160, 208, 255) if active else STROKE
    draw.ellipse((cx - r, cy - r, cx + r, cy + r), fill=fill, outline=outline, width=4)
    font = get_font(max(22, int(r * 0.33)), bold=True)
    bbox = draw.textbbox((0, 0), label, font=font)
    draw.text((cx - (bbox[2] - bbox[0]) / 2, cy - (bbox[3] - bbox[1]) / 2), label, font=font, fill=TEXT)


def draw_scene_visual(scene: Scene, draw: ImageDraw.ImageDraw, img: Image.Image, local_t: float, progress: float, width: int, height: int):
    if scene.scene == 1:
        cy = int(height * 0.58)
        x1, x2, x3 = int(width * 0.18), int(width * 0.5), int(width * 0.82)
        draw_node(draw, x1, cy, int(width * 0.07), "Agent", progress, active=True)
        rounded_box(draw, (x2 - 120, cy - 90, x2 + 120, cy + 90), fill=CARD, outline=(90, 116, 168), radius=30, width=3)
        mid_font = get_font(max(28, width // 28), bold=True)
        label = "Gateway"
        bbox = draw.textbbox((0, 0), label, font=mid_font)
        draw.text((x2 - (bbox[2] - bbox[0]) / 2, cy - 15), label, font=mid_font, fill=TEXT)
        draw_node(draw, x3, cy, int(width * 0.07), "Tools", progress, active=False)
        p = ease_in_out(min(local_t / max(scene.duration * 0.7, 0.001), 1.0))
        stop_x = lerp(x1 + 76, x2 - 124, p)
        draw.line((x1 + 78, cy, stop_x, cy), fill=ACCENT_2, width=10)
        if p > 0.96:
            draw.ellipse((stop_x - 10, cy - 10, stop_x + 10, cy + 10), fill=ACCENT_2)
    elif scene.scene == 2:
        cards = ["APIs", "Databases", "Workflows", "Ops Tools"]
        positions = [(0.08, 0.56), (0.54, 0.56), (0.08, 0.75), (0.54, 0.75)]
        for idx, (label, pos) in enumerate(zip(cards, positions), start=1):
            delay = (idx - 1) * 0.12
            p = ease_in_out((progress - delay) / 0.65)
            if p <= 0:
                continue
            x = int(width * pos[0])
            y = int(height * pos[1] + (1 - p) * 80)
            w = int(width * 0.32)
            h = int(height * 0.11)
            add_shadow(img, (x, y, x + w, y + h), radius=18)
            rounded_box(draw, (x, y, x + w, y + h), fill=CARD, outline=STROKE, radius=28)
            f = get_font(max(26, width // 30), bold=True)
            draw.text((x + 28, y + 26), label, font=f, fill=TEXT)
            # Fake interface lines
            for line_idx in range(3):
                yy = y + 64 + line_idx * 14
                draw.line((x + 28, yy, x + w - 28 - line_idx * 40, yy), fill=MUTED, width=4)
    elif scene.scene == 3:
        left = (int(width * 0.08), int(height * 0.52), int(width * 0.46), int(height * 0.76))
        right = (int(width * 0.52), int(height * 0.52), int(width * 0.92), int(height * 0.76))
        add_shadow(img, left, radius=20)
        add_shadow(img, right, radius=20)
        rounded_box(draw, left, fill=CARD, outline=STROKE, radius=28)
        rounded_box(draw, right, fill=CARD, outline=STROKE, radius=28)
        title_font = get_font(max(26, width // 28), bold=True)
        small = get_font(max(20, width // 36), bold=False)
        draw.text((left[0] + 26, left[1] + 22), "Policy Controls", font=title_font, fill=TEXT)
        for i, label in enumerate(["OAuth required", "Scoped tools", "Write actions blocked"]):
            yy = left[1] + 84 + i * 54
            draw.rounded_rectangle((left[0] + 26, yy, left[0] + 74, yy + 28), radius=14, fill=(36, 56, 86), outline=(98, 132, 188))
            knob_x = left[0] + 50 + (18 if i != 2 else 0)
            draw.ellipse((knob_x, yy + 4, knob_x + 20, yy + 24), fill=ACCENT_2 if i != 2 else MUTED)
            draw.text((left[0] + 92, yy - 2), label, font=small, fill=TEXT)
        draw.text((right[0] + 26, right[1] + 22), "Audit Trail", font=title_font, fill=TEXT)
        for i, item in enumerate(["tool.call github.list_prs", "auth.ok service_account", "policy.allow read_only"]):
            yy = right[1] + 84 + i * 52
            draw.rounded_rectangle((right[0] + 22, yy, right[2] - 22, yy + 36), radius=16, fill=CARD_2, outline=(60, 78, 116))
            draw.text((right[0] + 36, yy + 6), item, font=small, fill=TEXT)
    elif scene.scene == 4:
        title = "SquareMCP"
        subtitle = "Managed MCP gateway for internal tools"
        title_font = get_font(max(56, width // 10), bold=True)
        subtitle_font = get_font(max(28, width // 28), bold=False)
        bbox = draw.textbbox((0, 0), title, font=title_font)
        tw = bbox[2] - bbox[0]
        draw.text(((width - tw) / 2, height * 0.52), title, font=title_font, fill=TEXT)
        bbox2 = draw.textbbox((0, 0), subtitle, font=subtitle_font)
        sw = bbox2[2] - bbox2[0]
        draw.text(((width - sw) / 2, height * 0.52 + 120), subtitle, font=subtitle_font, fill=MUTED)
        cy = int(height * 0.74)
        x1, x2, x3 = int(width * 0.16), int(width * 0.5), int(width * 0.84)
        draw_node(draw, x1, cy, int(width * 0.05), "Agent", progress, active=True)
        rounded_box(draw, (x2 - 130, cy - 74, x2 + 130, cy + 74), fill=CARD, outline=(90, 116, 168), radius=26, width=3)
        mid_font = get_font(max(24, width // 32), bold=True)
        label = "SquareMCP"
        bbox = draw.textbbox((0, 0), label, font=mid_font)
        draw.text((x2 - (bbox[2] - bbox[0]) / 2, cy - 12), label, font=mid_font, fill=TEXT)
        draw_node(draw, x3, cy, int(width * 0.05), "Tools", progress, active=False)
        draw.line((x1 + 58, cy, x2 - 132, cy), fill=ACCENT_2, width=8)
        draw.line((x2 + 132, cy, x3 - 58, cy), fill=ACCENT, width=8)
    elif scene.scene == 5:
        shell = (int(width * 0.08), int(height * 0.5), int(width * 0.92), int(height * 0.8))
        add_shadow(img, shell, radius=24)
        rounded_box(draw, shell, fill=(15, 21, 34), outline=(64, 85, 126), radius=34)
        title_font = get_font(max(26, width // 28), bold=True)
        small = get_font(max(20, width // 38), bold=False)
        draw.text((shell[0] + 28, shell[1] + 22), "Gateway Runtime", font=title_font, fill=TEXT)
        left_panel = (shell[0] + 28, shell[1] + 72, shell[0] + int(width * 0.28), shell[3] - 28)
        mid_panel = (left_panel[2] + 22, shell[1] + 72, left_panel[2] + 22 + int(width * 0.22), shell[3] - 28)
        right_panel = (mid_panel[2] + 22, shell[1] + 72, shell[2] - 28, shell[3] - 28)
        for box, label in [(left_panel, "Connected Tools"), (mid_panel, "Access"), (right_panel, "Audit Log")]:
            rounded_box(draw, box, fill=CARD, outline=STROKE, radius=24)
            draw.text((box[0] + 18, box[1] + 14), label, font=small, fill=MUTED)
        for i, tool in enumerate(["postgres.query", "github.repos", "zendesk.ticket"]):
            yy = left_panel[1] + 52 + i * 58
            draw.rounded_rectangle((left_panel[0] + 14, yy, left_panel[2] - 14, yy + 42), radius=16, fill=CARD_2, outline=(58, 76, 110))
            draw.text((left_panel[0] + 28, yy + 10), tool, font=small, fill=TEXT)
        for i, item in enumerate(["Auth OK", "Read only", "Tenant scoped"]):
            yy = mid_panel[1] + 56 + i * 70
            draw.rounded_rectangle((mid_panel[0] + 16, yy, mid_panel[2] - 16, yy + 48), radius=18, fill=(25, 37, 56), outline=(74, 96, 138))
            draw.text((mid_panel[0] + 28, yy + 13), item, font=small, fill=ACCENT_2 if i == 0 else TEXT)
        logs = [
            "12:01 auth.ok",
            "12:01 tool.call postgres.query",
            "12:01 policy.allow",
            "12:02 result.ok",
        ]
        for i, log in enumerate(logs):
            yy = right_panel[1] + 48 + i * 44
            draw.text((right_panel[0] + 18, yy), log, font=small, fill=TEXT if i % 2 == 0 else MUTED)
            draw.line((right_panel[0] + 18, yy + 28, right_panel[2] - 18, yy + 28), fill=(50, 66, 96), width=2)
    elif scene.scene == 6:
        titles = ["AI startups", "Copilot teams", "Regulated teams"]
        subs = ["Move fast with guardrails", "Connect real internal systems", "Keep logs and policy boundaries"]
        widths = [0.26, 0.26, 0.26]
        start_x = 0.08
        gap = 0.03
        top = int(height * 0.56)
        title_font = get_font(max(24, width // 34), bold=True)
        body_font = get_font(max(20, width // 42), bold=False)
        x = start_x
        for idx, (title, sub, w_pct) in enumerate(zip(titles, subs, widths)):
            p = ease_in_out((progress - idx * 0.12) / 0.7)
            if p <= 0:
                x += w_pct + gap
                continue
            box_w = int(width * w_pct)
            box_h = int(height * 0.18)
            xx = int(width * x)
            yy = int(top + (1 - p) * 70)
            add_shadow(img, (xx, yy, xx + box_w, yy + box_h), radius=18)
            rounded_box(draw, (xx, yy, xx + box_w, yy + box_h), fill=CARD, outline=STROKE, radius=26)
            draw.text((xx + 20, yy + 18), title, font=title_font, fill=TEXT)
            wrapped = wrap_text(draw, sub, body_font, box_w - 40)
            draw.multiline_text((xx + 20, yy + 66), wrapped, font=body_font, fill=MUTED, spacing=8)
            x += w_pct + gap
    elif scene.scene == 7:
        title = "squaremcp.com"
        font = get_font(max(52, width // 12), bold=True)
        bbox = draw.textbbox((0, 0), title, font=font)
        tw = bbox[2] - bbox[0]
        th = bbox[3] - bbox[1]
        x = (width - tw) / 2
        y = height * 0.60
        draw.rounded_rectangle((x - 30, y - 24, x + tw + 30, y + th + 24), radius=30, fill=(14, 18, 30, 190), outline=(70, 94, 140), width=2)
        draw.text((x, y), title, font=font, fill=TEXT)
        small = get_font(max(24, width // 32), bold=False)
        msg = "Managed MCP gateway for internal tools"
        bbox2 = draw.textbbox((0, 0), msg, font=small)
        draw.text(((width - (bbox2[2] - bbox2[0])) / 2, y - 80), msg, font=small, fill=MUTED)


def make_scene_clip(scene: Scene, subtitles: List[Subtitle], width: int, height: int, assets_dir: Optional[Path], show_captions: bool) -> VideoClip:
    asset_path = assets_dir / f"scene{scene.scene}.png" if assets_dir else None

    def frame_fn(t: float):
        local_t = max(0.0, min(scene.duration, t))
        progress = local_t / max(scene.duration, 0.001)
        img = Image.new("RGBA", (width, height), (0, 0, 0, 255))
        draw_gradient_background(img, scene.start_s + local_t)
        paste_asset_background(img, asset_path, scene.start_s + local_t)
        draw = ImageDraw.Draw(img)
        draw_top_label(draw, width, height)
        draw_main_text(draw, width, height, scene.on_screen_text, progress)
        draw_scene_visual(scene, draw, img, local_t, progress, width, height)
        if show_captions:
            draw_footer_caption(draw, width, height, subtitles, scene.start_s + local_t)
        return np.array(img.convert("RGB"))

    return VideoClip(frame_fn, duration=scene.duration)


def attach_audio(clip: VideoClip, voiceover_path: Optional[Path]) -> VideoClip:
    if not voiceover_path:
        return clip
    audio = AudioFileClip(str(voiceover_path))
    if audio.duration > clip.duration:
        audio = audio.subclip(0, clip.duration)
    return clip.set_audio(audio)


def main() -> None:
    args = parse_args()
    width = 720 if args.draft else args.width
    height = 1280 if args.draft else args.height

    shotlist_path = Path(args.shotlist)
    captions_path = Path(args.captions) if args.captions else None
    output_path = Path(args.output)
    assets_dir = Path(args.assets_dir) if args.assets_dir else None
    voiceover_path = Path(args.voiceover) if args.voiceover else None

    scenes = load_scenes(shotlist_path)
    subtitles = [] if args.no_captions else parse_srt(captions_path)

    clips = [make_scene_clip(scene, subtitles, width, height, assets_dir, not args.no_captions) for scene in scenes]
    final = concatenate_videoclips(clips, method="compose")
    final = attach_audio(final, voiceover_path)

    final.write_videofile(
        str(output_path),
        fps=args.fps,
        codec="libx264",
        audio_codec="aac" if voiceover_path else None,
        preset="medium" if not args.draft else "ultrafast",
        threads=4,
    )


if __name__ == "__main__":
    main()