Files
hermes-mcp/product/animaGen/render_squaremcp_video.py
2026-04-29 09:52:53 -04:00

482 lines
21 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
import math
import re
import textwrap
from dataclasses import dataclass
from pathlib import Path
from typing import List, Tuple, Optional
import numpy as np
from PIL import Image, ImageDraw, ImageFont, ImageFilter
try:
from moviepy.editor import VideoClip, concatenate_videoclips, AudioFileClip
except Exception as exc: # pragma: no cover
raise SystemExit(
"MoviePy is required. Install dependencies with: pip install -r requirements.txt\n"
f"Import error: {exc}"
)
BG = (10, 14, 22)
BG2 = (16, 22, 34)
TEXT = (244, 247, 251)
MUTED = (144, 156, 178)
ACCENT = (112, 166, 255)
ACCENT_2 = (107, 228, 183)
CARD = (20, 28, 44)
CARD_2 = (28, 38, 58)
STROKE = (44, 58, 86)
SHADOW = (0, 0, 0, 110)
@dataclass
class Scene:
scene: int
start_s: float
end_s: float
on_screen_text: str
voiceover: str
visual: str
@property
def duration(self) -> float:
return self.end_s - self.start_s
@dataclass
class Subtitle:
start_s: float
end_s: float
text: str
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Render the SquareMCP launch video locally from the generated shot list and captions."
)
parser.add_argument("--shotlist", default="squaremcp_shotlist.json", help="Path to shot list JSON")
parser.add_argument("--captions", default="squaremcp_launch_captions.srt", help="Path to captions SRT")
parser.add_argument("--output", default="squaremcp_launch.mp4", help="Output MP4 path")
parser.add_argument("--voiceover", default=None, help="Optional voiceover audio file")
parser.add_argument("--assets-dir", default=None, help="Optional directory containing scene1.png ... scene7.png")
parser.add_argument("--width", type=int, default=1080, help="Video width")
parser.add_argument("--height", type=int, default=1920, help="Video height")
parser.add_argument("--fps", type=int, default=24, help="Frames per second")
parser.add_argument("--no-captions", action="store_true", help="Disable burned-in captions")
parser.add_argument("--draft", action="store_true", help="Lower-resolution faster render")
return parser.parse_args()
def load_scenes(path: Path) -> List[Scene]:
data = json.loads(path.read_text(encoding="utf-8"))
return [Scene(**item) for item in data]
_TIME_RE = re.compile(
r"(?P<h>\d\d):(?P<m>\d\d):(?P<s>\d\d),(?P<ms>\d\d\d)\s+-->\s+"
r"(?P<h2>\d\d):(?P<m2>\d\d):(?P<s2>\d\d),(?P<ms2>\d\d\d)"
)
def ts_to_seconds(h: str, m: str, s: str, ms: str) -> float:
return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0
def parse_srt(path: Optional[Path]) -> List[Subtitle]:
if not path or not path.exists():
return []
blocks = re.split(r"\n\s*\n", path.read_text(encoding="utf-8").strip())
subtitles: List[Subtitle] = []
for block in blocks:
lines = [ln.rstrip() for ln in block.splitlines() if ln.strip()]
if len(lines) < 2:
continue
timing_line = lines[1] if lines[0].strip().isdigit() else lines[0]
match = _TIME_RE.match(timing_line)
if not match:
continue
start = ts_to_seconds(match["h"], match["m"], match["s"], match["ms"])
end = ts_to_seconds(match["h2"], match["m2"], match["s2"], match["ms2"])
text_lines = lines[2:] if lines[0].strip().isdigit() else lines[1:]
subtitles.append(Subtitle(start, end, "\n".join(text_lines)))
return subtitles
def ease_in_out(x: float) -> float:
x = max(0.0, min(1.0, x))
return x * x * (3 - 2 * x)
def lerp(a: float, b: float, x: float) -> float:
return a + (b - a) * x
def get_font(size: int, bold: bool = False) -> ImageFont.FreeTypeFont | ImageFont.ImageFont:
candidates = []
if bold:
candidates.extend([
"DejaVuSans-Bold.ttf",
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
"Arial Bold.ttf",
"Arial.ttf",
])
else:
candidates.extend([
"DejaVuSans.ttf",
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
"Arial.ttf",
])
for cand in candidates:
try:
return ImageFont.truetype(cand, size=size)
except Exception:
pass
return ImageFont.load_default()
def text_size(draw: ImageDraw.ImageDraw, text: str, font) -> Tuple[int, int]:
bbox = draw.multiline_textbbox((0, 0), text, font=font, spacing=10)
return bbox[2] - bbox[0], bbox[3] - bbox[1]
def wrap_text(draw: ImageDraw.ImageDraw, text: str, font, max_width: int) -> str:
parts = re.split(r"\n+", text)
wrapped_parts = []
for part in parts:
words = part.split()
if not words:
wrapped_parts.append("")
continue
lines = []
current = words[0]
for word in words[1:]:
candidate = f"{current} {word}"
w = draw.textbbox((0, 0), candidate, font=font)[2]
if w <= max_width:
current = candidate
else:
lines.append(current)
current = word
lines.append(current)
wrapped_parts.append("\n".join(lines))
return "\n".join(wrapped_parts)
def rounded_box(draw: ImageDraw.ImageDraw, box, fill, outline=None, radius=24, width=2):
draw.rounded_rectangle(box, radius=radius, fill=fill, outline=outline, width=width)
def add_shadow(base: Image.Image, box, radius=28):
shadow = Image.new("RGBA", base.size, (0, 0, 0, 0))
sd = ImageDraw.Draw(shadow)
sd.rounded_rectangle(box, radius=28, fill=SHADOW)
shadow = shadow.filter(ImageFilter.GaussianBlur(radius=radius))
base.alpha_composite(shadow)
def draw_gradient_background(img: Image.Image, t: float):
arr = np.zeros((img.height, img.width, 3), dtype=np.uint8)
y = np.linspace(0, 1, img.height)[:, None]
x = np.linspace(0, 1, img.width)[None, :]
pulse = 0.5 + 0.5 * math.sin(t * 0.55)
r = (BG[0] * (1 - y) + BG2[0] * y + 8 * pulse * x).clip(0, 255)
g = (BG[1] * (1 - y) + BG2[1] * y + 10 * pulse * x).clip(0, 255)
b = (BG[2] * (1 - y) + BG2[2] * y + 20 * pulse * x).clip(0, 255)
arr[:, :, 0] = r.astype(np.uint8)
arr[:, :, 1] = g.astype(np.uint8)
arr[:, :, 2] = b.astype(np.uint8)
bg = Image.fromarray(arr, mode="RGB").convert("RGBA")
img.alpha_composite(bg)
draw = ImageDraw.Draw(img)
step = max(40, img.width // 18)
grid_alpha = 30
for xx in range(0, img.width, step):
draw.line((xx, 0, xx, img.height), fill=(255, 255, 255, grid_alpha), width=1)
for yy in range(0, img.height, step):
draw.line((0, yy, img.width, yy), fill=(255, 255, 255, grid_alpha), width=1)
def paste_asset_background(img: Image.Image, asset_path: Optional[Path], t: float):
if not asset_path or not asset_path.exists():
return
asset = Image.open(asset_path).convert("RGBA")
scale = max(img.width / asset.width, img.height / asset.height) * 1.08
new_size = (int(asset.width * scale), int(asset.height * scale))
asset = asset.resize(new_size, Image.LANCZOS)
pan_x = int((asset.width - img.width) * (0.04 + 0.02 * math.sin(t * 0.3)))
pan_y = int((asset.height - img.height) * (0.04 + 0.02 * math.cos(t * 0.2)))
asset = asset.crop((pan_x, pan_y, pan_x + img.width, pan_y + img.height))
overlay = Image.new("RGBA", img.size, (5, 8, 14, 150))
img.alpha_composite(asset)
img.alpha_composite(overlay)
def draw_top_label(draw, width, height):
font = get_font(max(18, width // 42), bold=True)
label = "SQUAREMCP"
pad_x = width * 0.08
pad_y = height * 0.06
tw = draw.textbbox((0, 0), label, font=font)[2]
rounded_box(draw, (pad_x - 18, pad_y - 12, pad_x + tw + 18, pad_y + 42), fill=(18, 24, 38, 215), outline=(55, 75, 110), radius=18)
draw.text((pad_x, pad_y), label, font=font, fill=TEXT)
def draw_main_text(draw, width, height, text, progress):
font = get_font(max(34, width // 16), bold=True)
body_font = get_font(max(22, width // 32), bold=False)
max_width = int(width * 0.78)
wrapped = wrap_text(draw, text.replace(" | ", "\n"), font, max_width)
tw, th = text_size(draw, wrapped, font)
x = int(width * 0.08)
y = int(height * 0.18 + (1 - ease_in_out(min(progress * 1.6, 1.0))) * 40)
box = (x - 26, y - 22, x + tw + 28, y + th + 26)
draw.rounded_rectangle(box, radius=30, fill=(14, 18, 30, 185), outline=(52, 68, 98), width=2)
draw.multiline_text((x, y), wrapped, font=font, fill=TEXT, spacing=12)
return y + th + 30, body_font
def draw_footer_caption(draw, width, height, subtitles: List[Subtitle], current_t: float):
active = None
for sub in subtitles:
if sub.start_s <= current_t < sub.end_s:
active = sub.text
break
if not active:
return
font = get_font(max(24, width // 28), bold=True)
max_width = int(width * 0.78)
active = wrap_text(draw, active, font, max_width)
tw, th = text_size(draw, active, font)
x = (width - tw) // 2
y = int(height * 0.82)
pad = 22
draw.rounded_rectangle((x - pad, y - pad, x + tw + pad, y + th + pad), radius=26, fill=(8, 10, 16, 205), outline=(56, 72, 110), width=2)
draw.multiline_text((x, y), active, font=font, fill=TEXT, spacing=8, align="center")
def draw_node(draw, cx, cy, r, label, progress, active=False):
fill = ACCENT if active else CARD_2
outline = (160, 208, 255) if active else STROKE
draw.ellipse((cx - r, cy - r, cx + r, cy + r), fill=fill, outline=outline, width=4)
font = get_font(max(22, int(r * 0.33)), bold=True)
bbox = draw.textbbox((0, 0), label, font=font)
draw.text((cx - (bbox[2] - bbox[0]) / 2, cy - (bbox[3] - bbox[1]) / 2), label, font=font, fill=TEXT)
def draw_scene_visual(scene: Scene, draw: ImageDraw.ImageDraw, img: Image.Image, local_t: float, progress: float, width: int, height: int):
if scene.scene == 1:
cy = int(height * 0.58)
x1, x2, x3 = int(width * 0.18), int(width * 0.5), int(width * 0.82)
draw_node(draw, x1, cy, int(width * 0.07), "Agent", progress, active=True)
rounded_box(draw, (x2 - 120, cy - 90, x2 + 120, cy + 90), fill=CARD, outline=(90, 116, 168), radius=30, width=3)
mid_font = get_font(max(28, width // 28), bold=True)
label = "Gateway"
bbox = draw.textbbox((0, 0), label, font=mid_font)
draw.text((x2 - (bbox[2] - bbox[0]) / 2, cy - 15), label, font=mid_font, fill=TEXT)
draw_node(draw, x3, cy, int(width * 0.07), "Tools", progress, active=False)
p = ease_in_out(min(local_t / max(scene.duration * 0.7, 0.001), 1.0))
stop_x = lerp(x1 + 76, x2 - 124, p)
draw.line((x1 + 78, cy, stop_x, cy), fill=ACCENT_2, width=10)
if p > 0.96:
draw.ellipse((stop_x - 10, cy - 10, stop_x + 10, cy + 10), fill=ACCENT_2)
elif scene.scene == 2:
cards = ["APIs", "Databases", "Workflows", "Ops Tools"]
positions = [(0.08, 0.56), (0.54, 0.56), (0.08, 0.75), (0.54, 0.75)]
for idx, (label, pos) in enumerate(zip(cards, positions), start=1):
delay = (idx - 1) * 0.12
p = ease_in_out((progress - delay) / 0.65)
if p <= 0:
continue
x = int(width * pos[0])
y = int(height * pos[1] + (1 - p) * 80)
w = int(width * 0.32)
h = int(height * 0.11)
add_shadow(img, (x, y, x + w, y + h), radius=18)
rounded_box(draw, (x, y, x + w, y + h), fill=CARD, outline=STROKE, radius=28)
f = get_font(max(26, width // 30), bold=True)
draw.text((x + 28, y + 26), label, font=f, fill=TEXT)
# Fake interface lines
for line_idx in range(3):
yy = y + 64 + line_idx * 14
draw.line((x + 28, yy, x + w - 28 - line_idx * 40, yy), fill=MUTED, width=4)
elif scene.scene == 3:
left = (int(width * 0.08), int(height * 0.52), int(width * 0.46), int(height * 0.76))
right = (int(width * 0.52), int(height * 0.52), int(width * 0.92), int(height * 0.76))
add_shadow(img, left, radius=20)
add_shadow(img, right, radius=20)
rounded_box(draw, left, fill=CARD, outline=STROKE, radius=28)
rounded_box(draw, right, fill=CARD, outline=STROKE, radius=28)
title_font = get_font(max(26, width // 28), bold=True)
small = get_font(max(20, width // 36), bold=False)
draw.text((left[0] + 26, left[1] + 22), "Policy Controls", font=title_font, fill=TEXT)
for i, label in enumerate(["OAuth required", "Scoped tools", "Write actions blocked"]):
yy = left[1] + 84 + i * 54
draw.rounded_rectangle((left[0] + 26, yy, left[0] + 74, yy + 28), radius=14, fill=(36, 56, 86), outline=(98, 132, 188))
knob_x = left[0] + 50 + (18 if i != 2 else 0)
draw.ellipse((knob_x, yy + 4, knob_x + 20, yy + 24), fill=ACCENT_2 if i != 2 else MUTED)
draw.text((left[0] + 92, yy - 2), label, font=small, fill=TEXT)
draw.text((right[0] + 26, right[1] + 22), "Audit Trail", font=title_font, fill=TEXT)
for i, item in enumerate(["tool.call github.list_prs", "auth.ok service_account", "policy.allow read_only"]):
yy = right[1] + 84 + i * 52
draw.rounded_rectangle((right[0] + 22, yy, right[2] - 22, yy + 36), radius=16, fill=CARD_2, outline=(60, 78, 116))
draw.text((right[0] + 36, yy + 6), item, font=small, fill=TEXT)
elif scene.scene == 4:
title = "SquareMCP"
subtitle = "Managed MCP gateway for internal tools"
title_font = get_font(max(56, width // 10), bold=True)
subtitle_font = get_font(max(28, width // 28), bold=False)
bbox = draw.textbbox((0, 0), title, font=title_font)
tw = bbox[2] - bbox[0]
draw.text(((width - tw) / 2, height * 0.52), title, font=title_font, fill=TEXT)
bbox2 = draw.textbbox((0, 0), subtitle, font=subtitle_font)
sw = bbox2[2] - bbox2[0]
draw.text(((width - sw) / 2, height * 0.52 + 120), subtitle, font=subtitle_font, fill=MUTED)
cy = int(height * 0.74)
x1, x2, x3 = int(width * 0.16), int(width * 0.5), int(width * 0.84)
draw_node(draw, x1, cy, int(width * 0.05), "Agent", progress, active=True)
rounded_box(draw, (x2 - 130, cy - 74, x2 + 130, cy + 74), fill=CARD, outline=(90, 116, 168), radius=26, width=3)
mid_font = get_font(max(24, width // 32), bold=True)
label = "SquareMCP"
bbox = draw.textbbox((0, 0), label, font=mid_font)
draw.text((x2 - (bbox[2] - bbox[0]) / 2, cy - 12), label, font=mid_font, fill=TEXT)
draw_node(draw, x3, cy, int(width * 0.05), "Tools", progress, active=False)
draw.line((x1 + 58, cy, x2 - 132, cy), fill=ACCENT_2, width=8)
draw.line((x2 + 132, cy, x3 - 58, cy), fill=ACCENT, width=8)
elif scene.scene == 5:
shell = (int(width * 0.08), int(height * 0.5), int(width * 0.92), int(height * 0.8))
add_shadow(img, shell, radius=24)
rounded_box(draw, shell, fill=(15, 21, 34), outline=(64, 85, 126), radius=34)
title_font = get_font(max(26, width // 28), bold=True)
small = get_font(max(20, width // 38), bold=False)
draw.text((shell[0] + 28, shell[1] + 22), "Gateway Runtime", font=title_font, fill=TEXT)
left_panel = (shell[0] + 28, shell[1] + 72, shell[0] + int(width * 0.28), shell[3] - 28)
mid_panel = (left_panel[2] + 22, shell[1] + 72, left_panel[2] + 22 + int(width * 0.22), shell[3] - 28)
right_panel = (mid_panel[2] + 22, shell[1] + 72, shell[2] - 28, shell[3] - 28)
for box, label in [(left_panel, "Connected Tools"), (mid_panel, "Access"), (right_panel, "Audit Log")]:
rounded_box(draw, box, fill=CARD, outline=STROKE, radius=24)
draw.text((box[0] + 18, box[1] + 14), label, font=small, fill=MUTED)
for i, tool in enumerate(["postgres.query", "github.repos", "zendesk.ticket"]):
yy = left_panel[1] + 52 + i * 58
draw.rounded_rectangle((left_panel[0] + 14, yy, left_panel[2] - 14, yy + 42), radius=16, fill=CARD_2, outline=(58, 76, 110))
draw.text((left_panel[0] + 28, yy + 10), tool, font=small, fill=TEXT)
for i, item in enumerate(["Auth OK", "Read only", "Tenant scoped"]):
yy = mid_panel[1] + 56 + i * 70
draw.rounded_rectangle((mid_panel[0] + 16, yy, mid_panel[2] - 16, yy + 48), radius=18, fill=(25, 37, 56), outline=(74, 96, 138))
draw.text((mid_panel[0] + 28, yy + 13), item, font=small, fill=ACCENT_2 if i == 0 else TEXT)
logs = [
"12:01 auth.ok",
"12:01 tool.call postgres.query",
"12:01 policy.allow",
"12:02 result.ok",
]
for i, log in enumerate(logs):
yy = right_panel[1] + 48 + i * 44
draw.text((right_panel[0] + 18, yy), log, font=small, fill=TEXT if i % 2 == 0 else MUTED)
draw.line((right_panel[0] + 18, yy + 28, right_panel[2] - 18, yy + 28), fill=(50, 66, 96), width=2)
elif scene.scene == 6:
titles = ["AI startups", "Copilot teams", "Regulated teams"]
subs = ["Move fast with guardrails", "Connect real internal systems", "Keep logs and policy boundaries"]
widths = [0.26, 0.26, 0.26]
start_x = 0.08
gap = 0.03
top = int(height * 0.56)
title_font = get_font(max(24, width // 34), bold=True)
body_font = get_font(max(20, width // 42), bold=False)
x = start_x
for idx, (title, sub, w_pct) in enumerate(zip(titles, subs, widths)):
p = ease_in_out((progress - idx * 0.12) / 0.7)
if p <= 0:
x += w_pct + gap
continue
box_w = int(width * w_pct)
box_h = int(height * 0.18)
xx = int(width * x)
yy = int(top + (1 - p) * 70)
add_shadow(img, (xx, yy, xx + box_w, yy + box_h), radius=18)
rounded_box(draw, (xx, yy, xx + box_w, yy + box_h), fill=CARD, outline=STROKE, radius=26)
draw.text((xx + 20, yy + 18), title, font=title_font, fill=TEXT)
wrapped = wrap_text(draw, sub, body_font, box_w - 40)
draw.multiline_text((xx + 20, yy + 66), wrapped, font=body_font, fill=MUTED, spacing=8)
x += w_pct + gap
elif scene.scene == 7:
title = "squaremcp.com"
font = get_font(max(52, width // 12), bold=True)
bbox = draw.textbbox((0, 0), title, font=font)
tw = bbox[2] - bbox[0]
th = bbox[3] - bbox[1]
x = (width - tw) / 2
y = height * 0.60
draw.rounded_rectangle((x - 30, y - 24, x + tw + 30, y + th + 24), radius=30, fill=(14, 18, 30, 190), outline=(70, 94, 140), width=2)
draw.text((x, y), title, font=font, fill=TEXT)
small = get_font(max(24, width // 32), bold=False)
msg = "Managed MCP gateway for internal tools"
bbox2 = draw.textbbox((0, 0), msg, font=small)
draw.text(((width - (bbox2[2] - bbox2[0])) / 2, y - 80), msg, font=small, fill=MUTED)
def make_scene_clip(scene: Scene, subtitles: List[Subtitle], width: int, height: int, assets_dir: Optional[Path], show_captions: bool) -> VideoClip:
asset_path = assets_dir / f"scene{scene.scene}.png" if assets_dir else None
def frame_fn(t: float):
local_t = max(0.0, min(scene.duration, t))
progress = local_t / max(scene.duration, 0.001)
img = Image.new("RGBA", (width, height), (0, 0, 0, 255))
draw_gradient_background(img, scene.start_s + local_t)
paste_asset_background(img, asset_path, scene.start_s + local_t)
draw = ImageDraw.Draw(img)
draw_top_label(draw, width, height)
draw_main_text(draw, width, height, scene.on_screen_text, progress)
draw_scene_visual(scene, draw, img, local_t, progress, width, height)
if show_captions:
draw_footer_caption(draw, width, height, subtitles, scene.start_s + local_t)
return np.array(img.convert("RGB"))
return VideoClip(frame_fn, duration=scene.duration)
def attach_audio(clip: VideoClip, voiceover_path: Optional[Path]) -> VideoClip:
if not voiceover_path:
return clip
audio = AudioFileClip(str(voiceover_path))
if audio.duration > clip.duration:
audio = audio.subclip(0, clip.duration)
return clip.set_audio(audio)
def main() -> None:
args = parse_args()
width = 720 if args.draft else args.width
height = 1280 if args.draft else args.height
shotlist_path = Path(args.shotlist)
captions_path = Path(args.captions) if args.captions else None
output_path = Path(args.output)
assets_dir = Path(args.assets_dir) if args.assets_dir else None
voiceover_path = Path(args.voiceover) if args.voiceover else None
scenes = load_scenes(shotlist_path)
subtitles = [] if args.no_captions else parse_srt(captions_path)
clips = [make_scene_clip(scene, subtitles, width, height, assets_dir, not args.no_captions) for scene in scenes]
final = concatenate_videoclips(clips, method="compose")
final = attach_audio(final, voiceover_path)
final.write_videofile(
str(output_path),
fps=args.fps,
codec="libx264",
audio_codec="aac" if voiceover_path else None,
preset="medium" if not args.draft else "ultrafast",
threads=4,
)
if __name__ == "__main__":
main()