chore: ruler files update

Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
This commit is contained in:
2026-05-24 21:03:49 -04:00
parent 97b3ddd653
commit abb472c83d
303 changed files with 46670 additions and 25369 deletions

View File

@@ -0,0 +1,108 @@
#!/usr/bin/env bash
# Mix a BGM track into an MP4 video.
#
# Usage:
# bash add-music.sh <input.mp4> [--mood=<name>] [--music=<path>] [--out=<path>]
#
# Mood library (in ../assets/, matching bgm-<mood>.mp3):
# tech — Apple Silicon / product keynote vibe, minimal synth+piano (default)
# ad — upbeat modern, clear build + drop, social-media ad energy
# educational — warm, patient, inviting learning tone
# educational-alt — alternate take of educational
# tutorial — lo-fi background, stays out of voiceover's way
# tutorial-alt — alternate take of tutorial
#
# Flags (all optional):
# --mood=<name> pick a preset from the library (default: tech)
# --music=<path> override with your own audio file (wins over --mood)
# --out=<path> output path (default: <input-basename>-bgm.mp4)
#
# Legacy positional form still works: bash add-music.sh in.mp4 music.mp3 out.mp4
#
# Behavior:
# - Music is trimmed to match video duration
# - 0.3s fade in, 1.0s fade out (avoids hard cuts)
# - Video stream copied (no re-encode), audio AAC 192k
#
# Examples:
# bash add-music.sh my.mp4 # default: tech mood
# bash add-music.sh my.mp4 --mood=ad # switch mood
# bash add-music.sh my.mp4 --mood=educational --out=final.mp4
# bash add-music.sh my.mp4 --music=~/Downloads/song.mp3 # bring your own
#
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ASSETS_DIR="$SCRIPT_DIR/../assets"
# ── Parse args ───────────────────────────────────────────────────────
INPUT=""
MOOD="tech"
CUSTOM_MUSIC=""
OUTPUT=""
POSITIONAL=()
for arg in "$@"; do
case "$arg" in
--mood=*) MOOD="${arg#*=}" ;;
--music=*) CUSTOM_MUSIC="${arg#*=}" ;;
--out=*) OUTPUT="${arg#*=}" ;;
*) POSITIONAL+=("$arg") ;;
esac
done
# Legacy positional: <input> [music] [output]
INPUT="${POSITIONAL[0]}"
[ -z "$CUSTOM_MUSIC" ] && [ -n "${POSITIONAL[1]}" ] && CUSTOM_MUSIC="${POSITIONAL[1]}"
[ -z "$OUTPUT" ] && [ -n "${POSITIONAL[2]}" ] && OUTPUT="${POSITIONAL[2]}"
if [ -z "$INPUT" ] || [ ! -f "$INPUT" ]; then
echo "Usage: bash add-music.sh <input.mp4> [--mood=<name>] [--music=<path>] [--out=<path>]" >&2
echo "Moods available: $(ls "$ASSETS_DIR" | grep -E '^bgm-.*\.mp3$' | sed 's/^bgm-//;s/\.mp3$//' | tr '\n' ' ')" >&2
exit 1
fi
# ── Resolve music source: --music wins, else --mood ─────────────────
if [ -n "$CUSTOM_MUSIC" ]; then
MUSIC="$CUSTOM_MUSIC"
SOURCE_LABEL="custom: $MUSIC"
else
MUSIC="$ASSETS_DIR/bgm-${MOOD}.mp3"
SOURCE_LABEL="mood: $MOOD"
fi
if [ ! -f "$MUSIC" ]; then
echo "✗ Music not found: $MUSIC" >&2
echo " Available moods: $(ls "$ASSETS_DIR" | grep -E '^bgm-.*\.mp3$' | sed 's/^bgm-//;s/\.mp3$//' | tr '\n' ' ')" >&2
exit 1
fi
# ── Resolve output path ─────────────────────────────────────────────
INPUT_DIR="$(cd "$(dirname "$INPUT")" && pwd)"
INPUT_NAME="$(basename "$INPUT" .mp4)"
[ -z "$OUTPUT" ] && OUTPUT="$INPUT_DIR/$INPUT_NAME-bgm.mp4"
# ── Measure video duration, compute fade-out start ──────────────────
DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$INPUT")
if [ -z "$DURATION" ]; then
echo "✗ Could not read video duration" >&2
exit 1
fi
FADE_OUT_START=$(awk "BEGIN { d = $DURATION - 1; if (d < 0) d = 0; print d }")
echo "▸ Mixing BGM into video"
echo " input: $INPUT"
echo " music: $SOURCE_LABEL"
echo " duration: ${DURATION}s"
echo " output: $OUTPUT"
ffmpeg -y -loglevel error \
-i "$INPUT" \
-i "$MUSIC" \
-filter_complex "[1:a]atrim=0:${DURATION},asetpts=PTS-STARTPTS,afade=t=in:st=0:d=0.3,afade=t=out:st=${FADE_OUT_START}:d=1[a]" \
-map 0:v -map "[a]" \
-c:v copy -c:a aac -b:a 192k -shortest \
"$OUTPUT"
SIZE=$(du -h "$OUTPUT" | cut -f1)
echo "✓ Done: $OUTPUT ($SIZE)"

View File

@@ -0,0 +1,83 @@
#!/bin/bash
# Convert MP4 animations to 60fps MP4 and optimized GIF.
#
# Usage:
# ./convert-formats.sh input.mp4 [gif_width] [--minterpolate]
#
# Produces next to the input:
# <name>-60fps.mp4 (1920x1080, 60fps, frame-duplicated by default)
# <name>.gif (scaled width, 15fps, palette-optimized)
#
# Flags:
# --minterpolate Enable motion-compensated interpolation (high quality
# but elementary stream has known QuickTime/Safari
# compat issues — only use if your player handles it).
#
# Default 60fps mode: simple `fps=60` filter (frame duplication). Wide
# compatibility, plays in QuickTime / Safari / Chrome / VLC. The 60fps
# label is for upload-platform optics; perceived smoothness is identical
# to the source 25fps for most CSS-driven motion.
#
# When to enable --minterpolate: heavy translate/scale motion where you
# want true 60fps interpolation. WARN: macOS QuickTime sometimes refuses
# to open minterpolate output. Test before delivering.
#
# GIF uses two-pass palette:
# pass 1: palettegen with stats_mode=diff (per-video optimal palette)
# pass 2: paletteuse with bayer dither + rectangle diff
# This keeps 30s/1080p animations GIF under ~4MB with good color fidelity.
set -e
INPUT=""
GIF_WIDTH="960"
USE_MINTERPOLATE=0
for arg in "$@"; do
case "$arg" in
--minterpolate) USE_MINTERPOLATE=1 ;;
--*) echo "Unknown flag: $arg" >&2; exit 1 ;;
*)
if [ -z "$INPUT" ]; then INPUT="$arg"
else GIF_WIDTH="$arg"
fi
;;
esac
done
[ -z "$INPUT" ] && { echo "Usage: $0 input.mp4 [gif_width] [--minterpolate]" >&2; exit 1; }
DIR=$(dirname "$INPUT")
BASE=$(basename "$INPUT" .mp4)
OUT60="$DIR/$BASE-60fps.mp4"
OUTGIF="$DIR/$BASE.gif"
PAL="$DIR/.palette-$BASE.png"
if [ "$USE_MINTERPOLATE" = "1" ]; then
echo "▸ 60fps interpolate (minterpolate, high quality): $OUT60"
VFILTER="minterpolate=fps=60:mi_mode=mci:mc_mode=aobmc:me_mode=bidir:vsbmc=1"
else
echo "▸ 60fps frame-duplicate (compat mode): $OUT60"
VFILTER="fps=60"
fi
# -profile:v high -level 4.0 → broad H.264 compatibility (QuickTime, Safari, mobile)
# -movflags +faststart → moov atom upfront, streamable / instant-play
ffmpeg -y -loglevel error -i "$INPUT" \
-vf "$VFILTER" \
-c:v libx264 -pix_fmt yuv420p -profile:v high -level 4.0 \
-crf 18 -preset medium -movflags +faststart \
"$OUT60"
MP4_SIZE=$(du -h "$OUT60" | cut -f1)
echo "$MP4_SIZE"
echo "▸ GIF (${GIF_WIDTH}w, 15fps, palette-optimized): $OUTGIF"
# Pass 1: generate palette tailored to this video
ffmpeg -y -loglevel error -i "$INPUT" \
-vf "fps=15,scale=${GIF_WIDTH}:-1:flags=lanczos,palettegen=stats_mode=diff" \
"$PAL"
# Pass 2: apply palette with dithering
ffmpeg -y -loglevel error -i "$INPUT" -i "$PAL" \
-lavfi "fps=15,scale=${GIF_WIDTH}:-1:flags=lanczos[x];[x][1:v]paletteuse=dither=bayer:bayer_scale=5:diff_mode=rectangle" \
"$OUTGIF"
rm -f "$PAL"
GIF_SIZE=$(du -h "$OUTGIF" | cut -f1)
echo "$GIF_SIZE"

View File

@@ -0,0 +1,98 @@
#!/usr/bin/env node
/**
* export_deck_pdf.mjs — 把多文件 slide deck 导出为单个矢量 PDF
*
* 用法:
* node export_deck_pdf.mjs --slides <dir> --out <file.pdf> [--width 1920] [--height 1080]
*
* 特点:
* - 文字保留矢量(可复制、可搜索)
* - 背景/图形 1:1 保真Playwright 内嵌 Chromium 渲染)
* - 不需要对 HTML 做任何改造
* - 视觉损失 = 0PDF 就是浏览器打印出来的)
*
* trade-off
* - PDF 不可再编辑文字(要改回到 HTML 改)
*
* 依赖playwright pdf-lib
* npm install playwright pdf-lib
*
* 会按文件名排序01-xxx.html → 02-xxx.html → ...
*/
import { chromium } from 'playwright';
import { PDFDocument } from 'pdf-lib';
import fs from 'fs/promises';
import path from 'path';
function parseArgs() {
const args = { width: 1920, height: 1080 };
const a = process.argv.slice(2);
for (let i = 0; i < a.length; i += 2) {
const k = a[i].replace(/^--/, '');
args[k] = a[i + 1];
}
if (!args.slides || !args.out) {
console.error('用法: node export_deck_pdf.mjs --slides <dir> --out <file.pdf> [--width 1920] [--height 1080]');
process.exit(1);
}
args.width = parseInt(args.width);
args.height = parseInt(args.height);
return args;
}
async function main() {
const { slides, out, width, height } = parseArgs();
const slidesDir = path.resolve(slides);
const outFile = path.resolve(out);
const files = (await fs.readdir(slidesDir))
.filter(f => f.endsWith('.html'))
.sort();
if (!files.length) {
console.error(`No .html files found in ${slidesDir}`);
process.exit(1);
}
console.log(`Found ${files.length} slides in ${slidesDir}`);
const browser = await chromium.launch();
const ctx = await browser.newContext({ viewport: { width, height } });
// 1) Render each HTML to its own PDF buffer
const pageBuffers = [];
for (const f of files) {
const page = await ctx.newPage();
const url = 'file://' + path.join(slidesDir, f);
await page.goto(url, { waitUntil: 'networkidle' }).catch(() => page.goto(url));
await page.waitForTimeout(1200); // web-font paint
// emulate "screen" so CSS colors/backgrounds render the same as browser
await page.emulateMedia({ media: 'screen' });
const buf = await page.pdf({
width: `${width}px`,
height: `${height}px`,
printBackground: true,
margin: { top: 0, right: 0, bottom: 0, left: 0 },
preferCSSPageSize: false,
});
pageBuffers.push(buf);
await page.close();
console.log(` [${pageBuffers.length}/${files.length}] ${f}`);
}
await browser.close();
// 2) Merge into a single PDF
const merged = await PDFDocument.create();
for (const buf of pageBuffers) {
const src = await PDFDocument.load(buf);
const copied = await merged.copyPages(src, src.getPageIndices());
copied.forEach(p => merged.addPage(p));
}
const bytes = await merged.save();
await fs.writeFile(outFile, bytes);
const kb = (bytes.byteLength / 1024).toFixed(0);
console.log(`\n✓ Wrote ${outFile} (${kb} KB, ${files.length} pages, vector)`);
}
main().catch(e => { console.error(e); process.exit(1); });

View File

@@ -0,0 +1,107 @@
#!/usr/bin/env node
/**
* export_deck_pptx.mjs — 把多文件 slide deck 导出为可编辑 PPTX
*
* 用法:
* node export_deck_pptx.mjs --slides <dir> --out <file.pptx>
*
* 行为:
* - 调用 scripts/html2pptx.js 把 HTML DOM 逐元素翻译成 PowerPoint 原生对象
* - 文字是真文本框PPT 里直接双击能编辑
* - body 尺寸 960pt × 540ptLAYOUT_WIDE13.333″ × 7.5″)
*
* ⚠️ HTML 必须符合 4 条硬约束(见 references/editable-pptx.md
* 1. 文字包在 <p>/<h1>-<h6> 里div 不能直接放文字)
* 2. 不用 CSS 渐变
* 3. <p>/<h*> 不能有 background/border/shadow放外层 div
* 4. div 不能 background-image用 <img>
*
* 视觉驱动的 HTML 几乎无法 pass —— 必须从写 HTML 的第一行就按约束写。
* 视觉自由度优先的场景动画、web component、CSS 渐变、复杂 SVG
* 应改用 export_deck_pdf.mjs / export_deck_stage_pdf.mjs 导出 PDF。
*
* 依赖npm install playwright pptxgenjs sharp
*
* 按文件名排序01-xxx.html → 02-xxx.html → ...)。
*/
import pptxgen from 'pptxgenjs';
import fs from 'fs/promises';
import path from 'path';
import { fileURLToPath } from 'url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
function parseArgs() {
const args = {};
const a = process.argv.slice(2);
for (let i = 0; i < a.length; i += 2) {
const k = a[i].replace(/^--/, '');
args[k] = a[i + 1];
}
if (!args.slides || !args.out) {
console.error('用法: node export_deck_pptx.mjs --slides <dir> --out <file.pptx>');
console.error('');
console.error('⚠️ HTML 必须符合 4 条硬约束(见 references/editable-pptx.md。');
console.error(' 视觉自由度优先的场景请改用 export_deck_pdf.mjs 导出 PDF。');
process.exit(1);
}
return args;
}
async function main() {
const { slides, out } = parseArgs();
const slidesDir = path.resolve(slides);
const outFile = path.resolve(out);
const files = (await fs.readdir(slidesDir))
.filter(f => f.endsWith('.html'))
.sort();
if (!files.length) {
console.error(`No .html files found in ${slidesDir}`);
process.exit(1);
}
console.log(`Converting ${files.length} slides via html2pptx...`);
const { createRequire } = await import('module');
const require = createRequire(import.meta.url);
let html2pptx;
try {
html2pptx = require(path.join(__dirname, 'html2pptx.js'));
} catch (e) {
console.error(`✗ 加载 html2pptx.js 失败:${e.message}`);
console.error(` 依赖缺失时请跑npm install playwright pptxgenjs sharp`);
process.exit(1);
}
const pres = new pptxgen();
pres.layout = 'LAYOUT_WIDE'; // 13.333 × 7.5 inch对应 HTML body 960 × 540 pt
const errors = [];
for (let i = 0; i < files.length; i++) {
const f = files[i];
const fullPath = path.join(slidesDir, f);
try {
await html2pptx(fullPath, pres);
console.log(` [${i + 1}/${files.length}] ${f}`);
} catch (e) {
console.error(` [${i + 1}/${files.length}] ${f}${e.message}`);
errors.push({ file: f, error: e.message });
}
}
if (errors.length) {
console.error(`\n⚠️ ${errors.length} 张 slide 转换失败。常见原因HTML 不符合 4 条硬约束。`);
console.error(` 详见 references/editable-pptx.md 的「常见错误速查」。`);
if (errors.length === files.length) {
console.error(`✗ 全部失败,不生成 PPTX。`);
process.exit(1);
}
}
await pres.writeFile({ fileName: outFile });
console.log(`\n✓ Wrote ${outFile} (${files.length - errors.length}/${files.length} slides, 可编辑 PPTX)`);
}
main().catch(e => { console.error(e); process.exit(1); });

View File

@@ -0,0 +1,130 @@
#!/usr/bin/env node
/**
* export_deck_stage_pdf.mjs — 单文件 <deck-stage> 架构专用 PDF 导出
*
* 用法:
* node export_deck_stage_pdf.mjs --html <deck.html> --out <file.pdf> [--width 1920] [--height 1080]
*
* 什么时候用这个脚本?
* - 你的 deck 是**单 HTML 文件**,所有 slide 是 `<section>`,外层用 `<deck-stage>` 包裹
* - 此时 `export_deck_pdf.mjs`(多文件专用)用不上
*
* 为什么不能直接 `page.pdf()`2026-04-20 踩坑记录):
* 1. deck-stage 的 shadow CSS `::slotted(section) { display: none }` 让只有 active slide 可见
* 2. print 媒体下外层 `!important` 压不住 shadow DOM 规则
* 3. 结果PDF 永远只有 1 页active 那张)
*
* 解决方案:
* 打开 HTML 后,用 page.evaluate 把所有 section 从 deck-stage slot 拔出来,
* 挂到 body 下一个普通 div内联 style 强制 position:relative + 固定尺寸,
* 每个 section 加 page-break-after: always最后一个改 auto 避免尾部空白页。
*
* 依赖playwright
* npm install playwright
*
* 输出特点:
* - 文字保留矢量(可复制、可搜索)
* - 视觉 1:1 保真
* - 字体必须能被 Chromium 加载(本地字体或 Google Fonts
*/
import { chromium } from 'playwright';
import fs from 'fs/promises';
import path from 'path';
function parseArgs() {
const args = { width: 1920, height: 1080 };
const a = process.argv.slice(2);
for (let i = 0; i < a.length; i += 2) {
const k = a[i].replace(/^--/, '');
args[k] = a[i + 1];
}
if (!args.html || !args.out) {
console.error('用法: node export_deck_stage_pdf.mjs --html <deck.html> --out <file.pdf> [--width 1920] [--height 1080]');
process.exit(1);
}
args.width = parseInt(args.width);
args.height = parseInt(args.height);
return args;
}
async function main() {
const { html, out, width, height } = parseArgs();
const htmlAbs = path.resolve(html);
const outFile = path.resolve(out);
await fs.access(htmlAbs).catch(() => {
console.error(`HTML file not found: ${htmlAbs}`);
process.exit(1);
});
console.log(`Rendering ${path.basename(htmlAbs)}${path.basename(outFile)}`);
const browser = await chromium.launch();
const ctx = await browser.newContext({ viewport: { width, height } });
const page = await ctx.newPage();
await page.goto('file://' + htmlAbs, { waitUntil: 'networkidle' });
await page.waitForTimeout(2500); // 等 Google Fonts + deck-stage init
// 核心修复:把 section 从 shadow DOM slot 拔出来摊平
const sectionCount = await page.evaluate(({ W, H }) => {
const stage = document.querySelector('deck-stage');
if (!stage) throw new Error('<deck-stage> not found — 这个脚本只适用于单文件 deck-stage 架构');
const sections = Array.from(stage.querySelectorAll(':scope > section'));
if (!sections.length) throw new Error('No <section> found inside <deck-stage>');
// 注入打印样式
const style = document.createElement('style');
style.textContent = `
@page { size: ${W}px ${H}px; margin: 0; }
html, body { margin: 0 !important; padding: 0 !important; background: #fff; }
deck-stage { display: none !important; }
`;
document.head.appendChild(style);
// 摊平到 body 下
const container = document.createElement('div');
container.id = 'print-container';
sections.forEach(s => {
// 内联 style 拿到最高优先级;确保 position:relative 让 absolute 子元素正确约束
s.style.cssText = `
width: ${W}px !important;
height: ${H}px !important;
display: block !important;
position: relative !important;
overflow: hidden !important;
page-break-after: always !important;
break-after: page !important;
margin: 0 !important;
padding: 0 !important;
`;
container.appendChild(s);
});
// 最后一页不分页,避免尾部空白页
const last = sections[sections.length - 1];
last.style.pageBreakAfter = 'auto';
last.style.breakAfter = 'auto';
document.body.appendChild(container);
return sections.length;
}, { W: width, H: height });
await page.waitForTimeout(800);
await page.pdf({
path: outFile,
width: `${width}px`,
height: `${height}px`,
printBackground: true,
preferCSSPageSize: true,
});
await browser.close();
const stat = await fs.stat(outFile);
const kb = (stat.size / 1024).toFixed(0);
console.log(`\n✓ Wrote ${outFile} (${kb} KB, ${sectionCount} pages, vector)`);
console.log(` 验证页数mdimport "${outFile}" && pdfinfo "${outFile}" | grep Pages`);
}
main().catch(e => { console.error(e); process.exit(1); });

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,127 @@
#!/usr/bin/env bash
# mix-voiceover.sh · Mix voiceover (人声主轨) + optional BGM into an MP4
#
# Usage:
# bash mix-voiceover.sh <video.mp4> --voiceover=<voice.mp3> [options]
#
# Required:
# --voiceover=<path> Path to voiceover mp3 (人声主轨, 来自 narrate-pipeline.mjs)
#
# Optional:
# --bgm=<path> BGM mp3 path (overrides --bgm-mood)
# --bgm-mood=<name> Pick a preset BGM from assets/ (educational / tech / tutorial / ...)
# --bgm-volume=<0-1> BGM 静态音量, 默认 0.18 (相对人声)
# --no-ducking 关闭 sidechain ducking默认开启人声响时 BGM 自动让路)
# --voice-volume=<0-2> 人声音量倍率, 默认 1.0
# --out=<path> 输出路径, 默认 <input>-voiced.mp4
#
# Behavior:
# - 视频流 stream copy不重编码
# - 人声始终是主轨必带BGM 可选
# - 默认开 ducking人声响时 BGM 压到约 -10dB人声停时回升
# - 输出长度 = 视频长度(人声/BGM 较短就尾静音;较长就截断)
#
# Examples:
# bash mix-voiceover.sh anim.mp4 --voiceover=narration/voiceover.mp3
# bash mix-voiceover.sh anim.mp4 --voiceover=v.mp3 --bgm-mood=educational
# bash mix-voiceover.sh anim.mp4 --voiceover=v.mp3 --bgm=~/Music/song.mp3 --bgm-volume=0.12
# bash mix-voiceover.sh anim.mp4 --voiceover=v.mp3 --bgm-mood=tech --no-ducking
#
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ASSETS_DIR="$SCRIPT_DIR/../assets"
INPUT=""
VOICEOVER=""
BGM=""
BGM_MOOD=""
BGM_VOLUME="0.18"
VOICE_VOLUME="1.0"
DUCKING="1"
OUTPUT=""
for arg in "$@"; do
case "$arg" in
--voiceover=*) VOICEOVER="${arg#*=}" ;;
--bgm=*) BGM="${arg#*=}" ;;
--bgm-mood=*) BGM_MOOD="${arg#*=}" ;;
--bgm-volume=*) BGM_VOLUME="${arg#*=}" ;;
--voice-volume=*) VOICE_VOLUME="${arg#*=}" ;;
--no-ducking) DUCKING="0" ;;
--out=*) OUTPUT="${arg#*=}" ;;
-*) echo "未知参数:$arg" >&2; exit 1 ;;
*) INPUT="$arg" ;;
esac
done
if [ -z "$INPUT" ] || [ ! -f "$INPUT" ]; then
echo "Usage: bash mix-voiceover.sh <video.mp4> --voiceover=<v.mp3> [--bgm=<b.mp3> | --bgm-mood=<name>]" >&2
exit 1
fi
if [ -z "$VOICEOVER" ] || [ ! -f "$VOICEOVER" ]; then
echo "✗ 缺 --voiceover=<path>" >&2
exit 1
fi
# 解析 BGM 来源
if [ -z "$BGM" ] && [ -n "$BGM_MOOD" ]; then
BGM="$ASSETS_DIR/bgm-${BGM_MOOD}.mp3"
fi
if [ -n "$BGM" ] && [ ! -f "$BGM" ]; then
echo "✗ BGM 文件不存在: $BGM" >&2
echo " 可用 mood: $(ls "$ASSETS_DIR" 2>/dev/null | grep -E '^bgm-.*\.mp3$' | sed 's/^bgm-//;s/\.mp3$//' | tr '\n' ' ')" >&2
exit 1
fi
# 输出路径
if [ -z "$OUTPUT" ]; then
base="${INPUT%.*}"
OUTPUT="${base}-voiced.mp4"
fi
echo "─ mix-voiceover ──────────────"
echo " 视频: $INPUT"
echo " 人声: $VOICEOVER (vol=$VOICE_VOLUME)"
if [ -n "$BGM" ]; then
echo " BGM: $BGM (vol=$BGM_VOLUME, ducking=$DUCKING)"
else
echo " BGM: (无)"
fi
echo " 输出: $OUTPUT"
echo "──────────────────────────────"
# ── ffmpeg filter graph ─────────────────────────────────────
if [ -z "$BGM" ]; then
# 仅人声
ffmpeg -y -i "$INPUT" -i "$VOICEOVER" \
-filter_complex "[1:a]volume=${VOICE_VOLUME}[a]" \
-map 0:v -map "[a]" \
-c:v copy -c:a aac -b:a 192k -shortest \
"$OUTPUT"
elif [ "$DUCKING" = "1" ]; then
# 人声 + BGM + sidechain ducking
ffmpeg -y -i "$INPUT" -i "$VOICEOVER" -i "$BGM" \
-filter_complex "
[1:a]volume=${VOICE_VOLUME}[voice];
[2:a]volume=${BGM_VOLUME},aloop=loop=-1:size=2e9[bgm_lo];
[bgm_lo][voice]sidechaincompress=threshold=0.04:ratio=8:attack=5:release=300:makeup=1[bgm_ducked];
[voice][bgm_ducked]amix=inputs=2:duration=first:dropout_transition=0,afade=t=out:st=0:d=0.5:curve=tri[a]
" \
-map 0:v -map "[a]" \
-c:v copy -c:a aac -b:a 192k -shortest \
"$OUTPUT"
else
# 人声 + BGM 静态混合
ffmpeg -y -i "$INPUT" -i "$VOICEOVER" -i "$BGM" \
-filter_complex "
[1:a]volume=${VOICE_VOLUME}[voice];
[2:a]volume=${BGM_VOLUME},aloop=loop=-1:size=2e9[bgm];
[voice][bgm]amix=inputs=2:duration=first:dropout_transition=0[a]
" \
-map 0:v -map "[a]" \
-c:v copy -c:a aac -b:a 192k -shortest \
"$OUTPUT"
fi
echo "✓ 完成:$OUTPUT"

View File

@@ -0,0 +1,315 @@
#!/usr/bin/env node
/**
* narrate-pipeline.mjs · L2 长解说总指挥
*
* 输入markdown 解说稿(## scene-id 分段,[[cue:id]] 标关键句)
* 输出voiceover.mp3拼接好的整段人声+ timeline.json每段 start/end + cues 绝对时间)
*
* 用法:
* node scripts/narrate-pipeline.mjs --script demo.md --out-dir _narration_demo
*
* 解说稿格式:
* ---
* title: 什么是 LLM
* voice: S_JSdgdWk22 # 可选,不填走 .env
* speed: 1.0 # 可选
* gap: 0.3 # 段间静音秒数,默认 0.3
* ---
*
* ## intro
* 大家好,我是花叔。今天我们 5 分钟讲清楚 LLM 是什么。
*
* ## what-is
* LLM 全称 Large Language Model[[cue:bigmodel]]它是一个有几千亿参数的神经网络。
* 本质是一个文字接龙的预测器。
*
* 输出文件结构out-dir 下):
* audio/
* intro.mp3
* what-is.mp3
* voiceover.mp3 拼接全部 scene 的整段人声
* timeline.json schema 见 references/voiceover-pipeline.md
*
* 依赖tts-doubao.mjs、ffmpeg、ffprobe
*/
import fs from 'node:fs';
import path from 'node:path';
import { execFileSync, execSync } from 'node:child_process';
import { fileURLToPath } from 'node:url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const SKILL_ROOT = path.resolve(__dirname, '..');
const TTS_SCRIPT = path.join(__dirname, 'tts-doubao.mjs');
function parseArgs(argv) {
const args = {};
for (let i = 2; i < argv.length; i++) {
const a = argv[i];
if (a === '--script') args.script = argv[++i];
else if (a === '--out-dir') args.outDir = argv[++i];
else if (a === '--help' || a === '-h') args.help = true;
}
return args;
}
function usage() {
console.error(`
narrate-pipeline.mjs · L2 长解说总指挥
--script <path> 解说稿 .md 文件(必填)
--out-dir <path> 输出目录(必填)
输出:<out-dir>/voiceover.mp3 + <out-dir>/timeline.json
`.trim());
process.exit(1);
}
/**
* Parse frontmatter + scene blocks from markdown
* Returns { meta, scenes: [{ id, raw }] }
*/
function parseScript(md) {
const meta = {};
let body = md;
const fmMatch = md.match(/^---\n([\s\S]*?)\n---\n/);
if (fmMatch) {
for (const line of fmMatch[1].split('\n')) {
const idx = line.indexOf(':');
if (idx < 0) continue;
const key = line.slice(0, idx).trim();
const val = line.slice(idx + 1).trim();
meta[key] = val;
}
body = md.slice(fmMatch[0].length);
}
const scenes = [];
const re = /^##\s+([\w-]+)\s*\n([\s\S]*?)(?=^##\s+[\w-]+\s*\n|$(?![\r\n]))/gm;
let m;
while ((m = re.exec(body)) !== null) {
scenes.push({ id: m[1], raw: m[2].trim() });
}
return { meta, scenes };
}
/**
* Split a scene's text by [[cue:id]] markers into chunks.
* Returns: { chunks: [{ text, cueAfter? }] }
* cueAfter is the cue id that follows this chunk (chunk's end = cue position)
*
* Example: "A[[cue:x]]B[[cue:y]]C" =>
* chunks: [
* { text: "A", cueAfter: "x" },
* { text: "B", cueAfter: "y" },
* { text: "C" }
* ]
*/
function splitByCues(text) {
const chunks = [];
const re = /\[\[cue:([\w-]+)\]\]/g;
let lastIdx = 0;
let m;
while ((m = re.exec(text)) !== null) {
const before = text.slice(lastIdx, m.index).trim();
chunks.push({ text: before, cueAfter: m[1] });
lastIdx = m.index + m[0].length;
}
const tail = text.slice(lastIdx).trim();
chunks.push({ text: tail });
// 过滤空文本块cue 紧贴段首/段尾时)
return chunks.filter((c) => c.text.length > 0 || c.cueAfter);
}
function getDuration(filePath) {
const out = execFileSync('ffprobe', [
'-v', 'error',
'-show_entries', 'format=duration',
'-of', 'default=noprint_wrappers=1:nokey=1',
filePath,
], { encoding: 'utf8' });
return parseFloat(out.trim());
}
function callTTS(text, outPath, opts) {
const args = ['--text', text, '--out', outPath];
if (opts.voice) args.push('--voice', opts.voice);
if (opts.speed) args.push('--speed', String(opts.speed));
const out = execFileSync('node', [TTS_SCRIPT, ...args], {
encoding: 'utf8',
stdio: ['ignore', 'pipe', 'inherit'],
});
return JSON.parse(out.trim());
}
function ffmpegConcat(inputs, output) {
// 用 concat demuxer 合并相同编码的 mp3
const listFile = output + '.list';
fs.writeFileSync(
listFile,
inputs.map((p) => `file '${p.replace(/'/g, "'\\''")}'`).join('\n'),
);
execSync(
`ffmpeg -y -f concat -safe 0 -i "${listFile}" -c copy "${output}"`,
{ stdio: ['ignore', 'pipe', 'pipe'] },
);
fs.unlinkSync(listFile);
}
function makeSilence(duration, outPath) {
execSync(
`ffmpeg -y -f lavfi -i anullsrc=r=24000:cl=mono -t ${duration} -q:a 9 -acodec libmp3lame "${outPath}"`,
{ stdio: ['ignore', 'pipe', 'pipe'] },
);
}
async function main() {
const args = parseArgs(process.argv);
if (args.help || !args.script || !args.outDir) usage();
const scriptPath = path.resolve(args.script);
const outDir = path.resolve(args.outDir);
const audioDir = path.join(outDir, 'audio');
const tmpDir = path.join(outDir, '.tmp');
fs.mkdirSync(audioDir, { recursive: true });
fs.mkdirSync(tmpDir, { recursive: true });
const md = fs.readFileSync(scriptPath, 'utf8');
const { meta, scenes } = parseScript(md);
if (scenes.length === 0) {
console.error('错:解说稿没有 ## scene 段,至少一段。');
process.exit(1);
}
const voice = meta.voice || undefined;
const speed = meta.speed ? parseFloat(meta.speed) : 1.0;
const gap = meta.gap ? parseFloat(meta.gap) : 0.3;
console.error(`[narrate] script=${path.basename(scriptPath)} scenes=${scenes.length} voice=${voice || '(env)'} speed=${speed} gap=${gap}s`);
// 段间静音文件(共用一个)
const gapFile = path.join(tmpDir, 'gap.mp3');
if (gap > 0) makeSilence(gap, gapFile);
const timeline = {
title: meta.title || path.basename(scriptPath, '.md'),
voice: voice || null,
speed,
gap,
totalDuration: 0,
scenes: [],
};
let cursor = 0;
const sceneAudioFiles = [];
for (let i = 0; i < scenes.length; i++) {
const scene = scenes[i];
console.error(`[narrate] (${i + 1}/${scenes.length}) scene="${scene.id}"`);
const chunks = splitByCues(scene.raw);
const chunkFiles = [];
const cueRecords = [];
const chunkRecords = []; // 每个 chunk 的实测 start/end 段内时间,用于字幕显示
let sceneInternalCursor = 0;
for (let j = 0; j < chunks.length; j++) {
const chunk = chunks[j];
if (!chunk.text) {
// 空文本块cue 紧贴),跳过 TTS 但仍记录 cue 位置
if (chunk.cueAfter) {
cueRecords.push({
id: chunk.cueAfter,
offset: sceneInternalCursor,
});
}
continue;
}
const chunkPath = path.join(tmpDir, `${scene.id}-${j}.mp3`);
const result = callTTS(chunk.text, chunkPath, { voice, speed });
const chunkStart = sceneInternalCursor;
chunkFiles.push(chunkPath);
sceneInternalCursor += result.duration;
chunkRecords.push({
text: chunk.text,
start: chunkStart,
end: sceneInternalCursor,
duration: result.duration,
});
console.error(` chunk ${j}: ${result.duration.toFixed(2)}s · ${chunk.text.length} 字 · ${chunk.text.slice(0, 30)}${chunk.text.length > 30 ? '…' : ''}`);
if (chunk.cueAfter) {
cueRecords.push({
id: chunk.cueAfter,
offset: sceneInternalCursor,
});
}
}
// 合并段内子段
const sceneAudio = path.join(audioDir, `${scene.id}.mp3`);
if (chunkFiles.length === 1) {
fs.copyFileSync(chunkFiles[0], sceneAudio);
} else {
ffmpegConcat(chunkFiles, sceneAudio);
}
const sceneDuration = getDuration(sceneAudio);
// 拼接到总轨:先加 gap除了第一段再加 scene
if (i > 0 && gap > 0) {
sceneAudioFiles.push(gapFile);
cursor += gap;
}
sceneAudioFiles.push(sceneAudio);
timeline.scenes.push({
id: scene.id,
start: cursor,
end: cursor + sceneDuration,
duration: sceneDuration,
audio: path.relative(outDir, sceneAudio),
text: scene.raw.replace(/\[\[cue:[\w-]+\]\]/g, ''),
// chunks: 用于字幕逐句显示。start/end 是段内相对时间absoluteStart/absoluteEnd 是整轨绝对时间
chunks: chunkRecords.map((c) => ({
text: c.text,
start: c.start,
end: c.end,
absoluteStart: cursor + c.start,
absoluteEnd: cursor + c.end,
})),
cues: cueRecords.map((c) => ({
id: c.id,
offset: c.offset,
absoluteTime: cursor + c.offset,
})),
});
cursor += sceneDuration;
}
// 合并整轨
const voiceoverPath = path.join(outDir, 'voiceover.mp3');
ffmpegConcat(sceneAudioFiles, voiceoverPath);
timeline.totalDuration = getDuration(voiceoverPath);
timeline.voiceover = 'voiceover.mp3';
fs.writeFileSync(
path.join(outDir, 'timeline.json'),
JSON.stringify(timeline, null, 2),
);
// 清理 tmp
fs.rmSync(tmpDir, { recursive: true, force: true });
console.error(`\n[narrate] 完成。`);
console.error(` voiceover: ${voiceoverPath}`);
console.error(` timeline: ${path.join(outDir, 'timeline.json')}`);
console.error(` 总时长: ${timeline.totalDuration.toFixed(2)}s (${(timeline.totalDuration / 60).toFixed(2)} min)`);
console.error(` 段数: ${timeline.scenes.length}`);
const totalCues = timeline.scenes.reduce((sum, s) => sum + s.cues.length, 0);
console.error(` cue 数: ${totalCues}`);
}
main().catch((err) => {
console.error(`narrate-pipeline 失败:${err.message}`);
console.error(err.stack);
process.exit(1);
});

View File

@@ -0,0 +1,136 @@
#!/usr/bin/env bash
# render-narration.sh · 一条龙HTML 解说动画 → 最终 MP4带人声
#
# 流水线:
# 1. render-video.js 录无声 MP4按 timeline.totalDuration
# 2. mix-voiceover.sh 混入 voiceover.mp3可选 BGM
# 3. 输出 <basename>-narrated.mp4
#
# Usage:
# bash render-narration.sh <html> --timeline=<path> [options]
#
# Required:
# <html> 解说动画的 HTML应内嵌 NarrationStage + recording 模式 rAF 自驱)
# --timeline=<path> timeline.json 路径(自动读 totalDuration 和 voiceover.mp3 路径)
#
# Optional:
# --bgm-mood=<name> BGM 预设educational / tech / tutorial / ...
# --bgm=<path> 自定义 BGM 文件
# --bgm-volume=<0-1> BGM 静态音量,默认 0.18
# --no-ducking 关 sidechain ducking
# --keep-silent 保留中间产物(无声 MP4便于 debug
# --out=<path> 输出路径,默认 <html-basename>-narrated.mp4
# --width=<px> 视频宽度(默认 1920
# --height=<px> 视频高度(默认 1080
#
# Examples:
# bash render-narration.sh demo.html --timeline=_narration/timeline.json
# bash render-narration.sh demo.html --timeline=_narration/timeline.json --bgm-mood=educational
#
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
SKILL_ROOT="$SCRIPT_DIR/.."
HTML=""
TIMELINE=""
BGM_MOOD=""
BGM=""
BGM_VOLUME="0.18"
NO_DUCKING=""
KEEP_SILENT=""
OUT=""
WIDTH="1920"
HEIGHT="1080"
for arg in "$@"; do
case "$arg" in
--timeline=*) TIMELINE="${arg#*=}" ;;
--bgm-mood=*) BGM_MOOD="${arg#*=}" ;;
--bgm=*) BGM="${arg#*=}" ;;
--bgm-volume=*) BGM_VOLUME="${arg#*=}" ;;
--no-ducking) NO_DUCKING="--no-ducking" ;;
--keep-silent) KEEP_SILENT="1" ;;
--out=*) OUT="${arg#*=}" ;;
--width=*) WIDTH="${arg#*=}" ;;
--height=*) HEIGHT="${arg#*=}" ;;
-*) echo "未知参数:$arg" >&2; exit 1 ;;
*) HTML="$arg" ;;
esac
done
if [ -z "$HTML" ] || [ ! -f "$HTML" ]; then
echo "Usage: bash render-narration.sh <html> --timeline=<path> [options]" >&2
exit 1
fi
if [ -z "$TIMELINE" ] || [ ! -f "$TIMELINE" ]; then
echo "✗ 缺 --timeline=<path>timeline.json 由 narrate-pipeline.mjs 生成)" >&2
exit 1
fi
# ── 从 timeline.json 读 totalDuration 和 voiceover 路径 ──
TIMELINE_DIR="$(cd "$(dirname "$TIMELINE")" && pwd)"
TOTAL_DURATION=$(node -e "console.log(JSON.parse(require('fs').readFileSync('$TIMELINE','utf8')).totalDuration)")
VOICEOVER_REL=$(node -e "console.log(JSON.parse(require('fs').readFileSync('$TIMELINE','utf8')).voiceover || 'voiceover.mp3')")
VOICEOVER="$TIMELINE_DIR/$VOICEOVER_REL"
if [ ! -f "$VOICEOVER" ]; then
echo "✗ voiceover.mp3 不存在: $VOICEOVER" >&2
exit 1
fi
# 录制时长 = 总时长 + 1s 安全缓冲
RECORD_DURATION=$(node -e "console.log(Math.ceil($TOTAL_DURATION + 1))")
HTML_ABS="$(cd "$(dirname "$HTML")" && pwd)/$(basename "$HTML")"
HTML_DIR="$(dirname "$HTML_ABS")"
HTML_BASE="$(basename "$HTML" .html)"
SILENT_MP4="$HTML_DIR/$HTML_BASE.mp4"
if [ -z "$OUT" ]; then
OUT="$HTML_DIR/$HTML_BASE-narrated.mp4"
fi
echo "═══ render-narration ═══════════════════"
echo " HTML: $HTML_ABS"
echo " Timeline: $TIMELINE"
echo " Voiceover: $VOICEOVER"
echo " Total dur: ${TOTAL_DURATION}s (录 ${RECORD_DURATION}s)"
echo " 尺寸: ${WIDTH}×${HEIGHT}"
[ -n "$BGM_MOOD" ] && echo " BGM mood: $BGM_MOOD"
[ -n "$BGM" ] && echo " BGM: $BGM"
echo " 最终输出: $OUT"
echo "════════════════════════════════════════"
# ── Step 1: 录无声 MP4 ──────────────────────
echo ""
echo "▸ Step 1/2 · 录制 HTML 动画 (无声)"
NODE_PATH=$(npm root -g) node "$SCRIPT_DIR/render-video.js" "$HTML_ABS" \
--duration="$RECORD_DURATION" \
--width="$WIDTH" \
--height="$HEIGHT"
if [ ! -f "$SILENT_MP4" ]; then
echo "✗ 无声 MP4 没生成: $SILENT_MP4" >&2
exit 1
fi
# ── Step 2: 混入人声 ──────────────────────
echo ""
echo "▸ Step 2/2 · 混入人声"
MIX_ARGS=("$SILENT_MP4" "--voiceover=$VOICEOVER" "--out=$OUT")
[ -n "$BGM_MOOD" ] && MIX_ARGS+=("--bgm-mood=$BGM_MOOD")
[ -n "$BGM" ] && MIX_ARGS+=("--bgm=$BGM")
[ -n "$BGM_MOOD$BGM" ] && MIX_ARGS+=("--bgm-volume=$BGM_VOLUME")
[ -n "$NO_DUCKING" ] && MIX_ARGS+=("$NO_DUCKING")
bash "$SCRIPT_DIR/mix-voiceover.sh" "${MIX_ARGS[@]}"
# 清理中间产物
if [ -z "$KEEP_SILENT" ]; then
rm -f "$SILENT_MP4"
fi
echo ""
echo "✓ 完成: $OUT"
[ -n "$KEEP_SILENT" ] && echo " (中间产物保留: $SILENT_MP4)"

View File

@@ -0,0 +1,289 @@
#!/usr/bin/env node
/**
* HTML animation → MP4 via Playwright recordVideo + ffmpeg.
*
* Requires: global playwright (`npm install -g playwright`), ffmpeg on PATH.
*
* Usage:
* NODE_PATH=$(npm root -g) node render-video.js <html-file> \
* [--duration=30] [--width=1920] [--height=1080] \
* [--trim=<seconds>] [--fontwait=1.5] [--readytimeout=8] \
* [--keep-chrome]
*
* Design:
* 1. Warmup context (no record) — caches fonts/assets, closes cleanly
* 2. Record context (fresh, recordVideo ON) — WebM starts writing at
* context creation. Babel-standalone compile + React mount +
* fonts.ready can take 1.5-3s, during which WebM writes black frames.
* We measure this by waiting for window.__ready (set by animations.jsx
* Stage component after first paint), then trim exactly that offset.
* 3. addInitScript injects CSS hiding "chrome" elements (progress bar,
* replay button, masthead, footer, etc.) that are fine for human
* debugging but shouldn't appear in exported video.
*
* Animation-ready signal:
* Set `window.__ready = true` in your HTML after first paint. This tells
* the recorder "animation has started rendering — treat now as t=0".
* If you use animations.jsx, Stage does this automatically. Otherwise
* add: `document.fonts.ready.then(() => requestAnimationFrame(() => { window.__ready = true }));`
* after your first render call.
*
* Without __ready, falls back to --fontwait=1.5s (may leave 1-2s of black
* at the start). Pass --trim=<seconds> to override manually.
*
* Chrome elements hidden by default (all common class names + `.no-record`
* convention). Pass --keep-chrome to disable this and see raw HTML.
*
* Output: next to the HTML file, same basename with .mp4 suffix.
*/
const { chromium } = require('playwright');
const path = require('path');
const fs = require('fs');
const { spawnSync } = require('child_process');
function arg(name, def) {
const p = process.argv.find(a => a.startsWith('--' + name + '='));
return p ? p.slice(name.length + 3) : def;
}
function hasFlag(name) {
return process.argv.includes('--' + name);
}
const HTML_FILE = process.argv[2];
if (!HTML_FILE || HTML_FILE.startsWith('--')) {
console.error('Usage: node render-video.js <html-file>');
console.error('Example: NODE_PATH=$(npm root -g) node render-video.js my-animation.html');
process.exit(1);
}
const DURATION = parseFloat(arg('duration', '30'));
const WIDTH = parseInt(arg('width', '1920'));
const HEIGHT = parseInt(arg('height', '1080'));
const TRIM_OVERRIDE = arg('trim', null); // manual override (seconds). If unset, auto-detected.
const FONT_WAIT = parseFloat(arg('fontwait', '1.5')); // fallback when no __ready signal
const READY_TIMEOUT = parseFloat(arg('readytimeout', '8'));
const KEEP_CHROME = hasFlag('keep-chrome');
const HTML_ABS = path.resolve(HTML_FILE);
const BASENAME = path.basename(HTML_FILE, path.extname(HTML_FILE));
const DIR = path.dirname(HTML_ABS);
const TMP_DIR = path.join(DIR, '.video-tmp-' + Date.now() + '-' + process.pid);
const MP4_OUT = path.join(DIR, BASENAME + '.mp4');
// CSS to hide "chrome" elements during recording.
// Covers class-name conventions seen across skill-built animations,
// plus a `.no-record` explicit opt-out class.
const HIDE_CHROME_CSS = `
.no-record,
.progress, .progress-bar,
.counter, .tCur,
.phases, .phase-label, .phase,
.replay, button.replay,
.masthead, .kicker, .title,
.footer,
[data-role="chrome"], [data-record="hidden"] {
display: none !important;
}
`;
console.log(`▸ Rendering: ${HTML_FILE}`);
console.log(` size: ${WIDTH}x${HEIGHT} · duration: ${DURATION}s · hide-chrome: ${!KEEP_CHROME}`);
console.log(` output: ${MP4_OUT}`);
(async () => {
fs.mkdirSync(TMP_DIR, { recursive: true });
const browser = await chromium.launch();
const url = 'file://' + HTML_ABS;
// ── Phase 1: WARMUP (no recording, caches fonts/assets) ─────────────
console.log('▸ Warmup (caching fonts)…');
const warmupCtx = await browser.newContext({
viewport: { width: WIDTH, height: HEIGHT },
});
const warmupPage = await warmupCtx.newPage();
// 'load' not 'networkidle' — unpkg/Google Fonts can keep connections alive
// past our 30s budget even after all critical resources are in. __ready
// flag + FONT_WAIT handle animation-readiness properly.
await warmupPage.goto(url, { waitUntil: 'load', timeout: 60000 });
await warmupPage.waitForTimeout(FONT_WAIT * 1000);
await warmupCtx.close();
// ── Phase 2: RECORD (fresh context, animation from t=0) ─────────────
console.log('▸ Recording (clean start)…');
const recordCtx = await browser.newContext({
viewport: { width: WIDTH, height: HEIGHT },
deviceScaleFactor: 1,
recordVideo: {
dir: TMP_DIR,
size: { width: WIDTH, height: HEIGHT },
},
});
// Tell the page it's being recorded — animations.jsx Stage reads this
// and forces loop=false so the export ends on the final frame instead of
// capturing the start of the next cycle. Hand-written Stage components
// should also honor this signal (see animation-pitfalls.md §13).
await recordCtx.addInitScript(() => { window.__recording = true; });
// Inject CSS + JS heuristic to hide "chrome" elements.
// Two layers:
// A. CSS selectors for common class-name conventions (cheap)
// B. JS heuristic for fixed-position bars containing buttons or time
// readouts (catches inline-styled chrome like <Stage> controls)
// Persists across reloads via addInitScript.
if (!KEEP_CHROME) {
await recordCtx.addInitScript(css => {
const HIDE_MARK = 'data-video-hidden';
function injectStyle() {
const style = document.createElement('style');
style.setAttribute('data-inject', 'render-video-chrome-hide');
style.textContent = css;
(document.head || document.documentElement).appendChild(style);
}
function hideChromeBars() {
const vh = window.innerHeight;
document.querySelectorAll('div, nav, header, footer, section, aside')
.forEach(el => {
if (el.hasAttribute(HIDE_MARK)) return;
if (el.dataset.recordKeep === 'true') return;
const s = getComputedStyle(el);
if (s.position !== 'fixed' && s.position !== 'sticky') return;
const r = el.getBoundingClientRect();
// Only skinny bars (not full-screen overlays)
if (r.height > vh * 0.25) return;
const atBottom = r.bottom >= vh - 30;
const atTop = r.top <= 30 && r.height < 80;
if (!atBottom && !atTop) return;
// Chrome-like: contains button or scrubber/time glyphs
const txt = el.textContent || '';
const hasBtn = !!el.querySelector('button, [role="button"]');
const hasCtrls = /[⏸▶⏮⏭↻↺↩↪]|\d+\.\d+\s*s/.test(txt);
if (hasBtn || hasCtrls) {
el.style.setProperty('display', 'none', 'important');
el.setAttribute(HIDE_MARK, '1');
}
});
}
const start = () => {
injectStyle();
hideChromeBars();
// Re-run as React/Vue commits DOM changes
const obs = new MutationObserver(hideChromeBars);
obs.observe(document.body, { childList: true, subtree: true });
setTimeout(() => obs.disconnect(), 6000);
};
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', start, { once: true });
} else {
start();
}
}, HIDE_CHROME_CSS);
}
// Record context opens page. The WebM starts writing the moment the
// context is created — so we track T0 here and measure how many seconds
// elapse before the animation is actually ready (Babel compile + React
// mount + fonts.ready). That elapsed time = exact trim offset.
const T0 = Date.now();
const page = await recordCtx.newPage();
await page.goto(url, { waitUntil: 'load', timeout: 60000 });
// Wait for animation ready signal. Stage component (animations.jsx) sets
// window.__ready = true on its first rAF after mount + fonts.ready.
// Fallback: if HTML doesn't set __ready within READY_TIMEOUT, use fontwait.
let animationStartSec;
const hasReady = await page.waitForFunction(
() => window.__ready === true,
{ timeout: READY_TIMEOUT * 1000 },
).then(() => true).catch(() => false);
if (hasReady) {
// 第二道防线:主动把动画 time 归零——对付 HTML 不严格遵守 starter tick 模板
// 的情况(例如 lastTick 用 performance.now() 导致字体加载时间被算进首帧 dt
// 详见 references/animation-pitfalls.md §12
const seekCorrected = await page.evaluate(() => {
if (typeof window.__seek === 'function') {
window.__seek(0);
return true;
}
return false;
});
if (seekCorrected) {
// 等两个 rAF 让 seek 生效并渲染出 t=0 的画面
await page.evaluate(() => new Promise(r => requestAnimationFrame(() => requestAnimationFrame(r))));
}
animationStartSec = (Date.now() - T0) / 1000;
console.log(`▸ Ready at ${animationStartSec.toFixed(2)}s (from window.__ready${seekCorrected ? ' + __seek(0) correction' : ''})`);
} else {
await page.waitForTimeout(FONT_WAIT * 1000);
animationStartSec = (Date.now() - T0) / 1000;
// Fallback offset is unreliable: animation may have started in raf loop
// already, so trim could land mid-cycle. Add 0.5s safety margin (see
// animation-pitfalls.md §13). Loud warning so user knows to fix the HTML.
console.log('');
console.log(` ⚠️ WARNING: window.__ready signal not detected within ${READY_TIMEOUT}s`);
console.log(` Recording will use fallback trim of ${animationStartSec.toFixed(2)}s + 0.5s safety margin.`);
console.log(` This is UNRELIABLE — your video may start mid-animation or skip frames.`);
console.log('');
console.log(` FIX: in your HTML's animation tick (or rAF first frame), add:`);
console.log(` window.__ready = true;`);
console.log(` animations.jsx-based HTML does this automatically. If you wrote your`);
console.log(` own Stage, see references/animation-pitfalls.md §12 for the pattern.`);
console.log('');
}
// Now let the animation play out its full duration
await page.waitForTimeout(DURATION * 1000 + 300);
await page.close();
await recordCtx.close();
await browser.close();
const webmFiles = fs.readdirSync(TMP_DIR).filter(f => f.endsWith('.webm'));
if (webmFiles.length === 0) {
console.error('✗ No webm produced');
process.exit(1);
}
const webmPath = path.join(TMP_DIR, webmFiles[0]);
console.log(`▸ WebM: ${(fs.statSync(webmPath).size / 1024 / 1024).toFixed(1)} MB`);
// Resolve final trim offset:
// - manual --trim=X → use X (explicit user override)
// - hasReady → animationStartSec + 0.05s (Babel-commit nudge)
// - fallback (no __ready) → animationStartSec + 0.5s safety margin (raf
// loop may have started running already; without
// this we'd capture mid-cycle frames)
const resolvedTrim = TRIM_OVERRIDE !== null
? parseFloat(TRIM_OVERRIDE)
: animationStartSec + (hasReady ? 0.05 : 0.5);
console.log(`▸ ffmpeg: trim=${resolvedTrim.toFixed(2)}s${TRIM_OVERRIDE !== null ? ' (manual)' : ' (auto)'}, encode H.264…`);
const ffmpeg = spawnSync('ffmpeg', [
'-y',
'-ss', String(resolvedTrim),
'-i', webmPath,
'-t', String(DURATION),
'-c:v', 'libx264',
'-pix_fmt', 'yuv420p',
'-crf', '18',
'-preset', 'medium',
'-movflags', '+faststart',
MP4_OUT,
], { stdio: ['ignore', 'ignore', 'pipe'] });
if (ffmpeg.status !== 0) {
console.error('✗ ffmpeg failed:\n' + ffmpeg.stderr.toString().slice(-2000));
process.exit(1);
}
fs.rmSync(TMP_DIR, { recursive: true, force: true });
const mp4Size = (fs.statSync(MP4_OUT).size / 1024 / 1024).toFixed(1);
console.log(`✓ Done: ${MP4_OUT} (${mp4Size} MB)`);
})();

View File

@@ -0,0 +1,184 @@
#!/usr/bin/env node
/**
* tts-doubao.mjs · 豆包语音 TTS火山引擎 openspeech
*
* 用法:
* node scripts/tts-doubao.mjs --text "你好" --out demo.mp3
* node scripts/tts-doubao.mjs --text-file script.txt --out out.mp3 --speed 1.0
*
* 输出:
* - mp3 文件写到 --out 路径
* - stdout 打印一行 JSON: {"path":"...","duration":12.34,"bytes":54321}
*
* 依赖Node 18+(自带 fetch/crypto、ffprobe测时长brew install ffmpeg
*
* env自动从 skill 根目录 .env 读取,也可走 process.env 覆盖):
* DOUBAO_TTS_API_KEY 必填
* DOUBAO_TTS_VOICE_ID 必填(音色 id
* DOUBAO_TTS_CLUSTER 默认 volcano_icl
* DOUBAO_TTS_ENDPOINT 默认 https://openspeech.bytedance.com/api/v1/tts
*/
import fs from 'node:fs';
import path from 'node:path';
import { execFileSync } from 'node:child_process';
import { fileURLToPath } from 'node:url';
import { randomUUID } from 'node:crypto';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const SKILL_ROOT = path.resolve(__dirname, '..');
function loadEnv() {
const envPath = path.join(SKILL_ROOT, '.env');
if (!fs.existsSync(envPath)) return;
const text = fs.readFileSync(envPath, 'utf8');
for (const line of text.split('\n')) {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith('#')) continue;
const idx = trimmed.indexOf('=');
if (idx < 0) continue;
const key = trimmed.slice(0, idx).trim();
let val = trimmed.slice(idx + 1).trim();
if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'"))) {
val = val.slice(1, -1);
}
if (!(key in process.env)) process.env[key] = val;
}
}
loadEnv();
function parseArgs(argv) {
const args = { speed: '1.0', encoding: 'mp3' };
for (let i = 2; i < argv.length; i++) {
const a = argv[i];
if (a === '--text') args.text = argv[++i];
else if (a === '--text-file') args.textFile = argv[++i];
else if (a === '--out') args.out = argv[++i];
else if (a === '--speed') args.speed = argv[++i];
else if (a === '--voice') args.voice = argv[++i];
else if (a === '--encoding') args.encoding = argv[++i];
else if (a === '--help' || a === '-h') args.help = true;
}
return args;
}
function usage() {
console.error(`
tts-doubao.mjs · 豆包语音 TTS
--text <str> 要合成的文本
--text-file <path> 从文件读取文本(与 --text 二选一)
--out <path> 输出 mp3 路径(必填)
--speed <float> 语速倍率,默认 1.00.5-2.0
--voice <voice_id> 覆盖 .env 里的音色 id
--encoding <ext> mp3 / wav / pcm默认 mp3
`.trim());
process.exit(1);
}
function getDuration(filePath) {
try {
const out = execFileSync('ffprobe', [
'-v', 'error',
'-show_entries', 'format=duration',
'-of', 'default=noprint_wrappers=1:nokey=1',
filePath,
], { encoding: 'utf8' });
return parseFloat(out.trim());
} catch (e) {
return null;
}
}
async function tts({ text, voice, speed, encoding }) {
const apiKey = process.env.DOUBAO_TTS_API_KEY;
const cluster = process.env.DOUBAO_TTS_CLUSTER || 'volcano_icl';
const endpoint = process.env.DOUBAO_TTS_ENDPOINT || 'https://openspeech.bytedance.com/api/v1/tts';
const voiceId = voice || process.env.DOUBAO_TTS_VOICE_ID;
if (!apiKey) throw new Error('缺 DOUBAO_TTS_API_KEY检查 .env');
if (!voiceId) throw new Error('缺 DOUBAO_TTS_VOICE_ID检查 .env 或用 --voice 传)');
const body = {
app: { cluster },
user: { uid: 'huashu-design' },
audio: {
voice_type: voiceId,
encoding,
speed_ratio: parseFloat(speed),
},
request: {
reqid: randomUUID(),
text,
operation: 'query',
},
};
const res = await fetch(endpoint, {
method: 'POST',
headers: {
'x-api-key': apiKey,
'Content-Type': 'application/json',
},
body: JSON.stringify(body),
});
if (!res.ok) {
const errText = await res.text();
throw new Error(`HTTP ${res.status}: ${errText.slice(0, 500)}`);
}
const json = await res.json();
// 豆包标准返回:{ code, message, data: "<base64 audio>", ... }
// code === 3000 表示成功
if (json.code !== undefined && json.code !== 3000) {
throw new Error(`API 返回错误 code=${json.code} msg=${json.message || JSON.stringify(json)}`);
}
if (!json.data) {
throw new Error(`API 响应无 data 字段:${JSON.stringify(json).slice(0, 500)}`);
}
return Buffer.from(json.data, 'base64');
}
async function main() {
const args = parseArgs(process.argv);
if (args.help) usage();
let text = args.text;
if (!text && args.textFile) {
text = fs.readFileSync(args.textFile, 'utf8').trim();
}
if (!text) {
console.error('错:缺 --text 或 --text-file');
usage();
}
if (!args.out) {
console.error('错:缺 --out');
usage();
}
const outPath = path.resolve(args.out);
fs.mkdirSync(path.dirname(outPath), { recursive: true });
const audio = await tts({
text,
voice: args.voice,
speed: args.speed,
encoding: args.encoding,
});
fs.writeFileSync(outPath, audio);
const duration = getDuration(outPath);
const result = {
path: outPath,
bytes: audio.length,
duration,
text_chars: text.length,
};
console.log(JSON.stringify(result));
}
main().catch((err) => {
console.error(`TTS 失败:${err.message}`);
process.exit(1);
});

View File

@@ -0,0 +1,154 @@
#!/usr/bin/env python3
"""
verify.py — Playwright封装用于验证claude-design产出的HTML
Usage:
python verify.py path/to/design.html # 基础:打开+截图+抓控制台错误
python verify.py design.html --viewports 1920x1080,375x667 # 多viewport
python verify.py deck.html --slides 10 # 幻灯片逐页截前10张
python verify.py design.html --output ./screenshots/ # 输出目录
python verify.py design.html --show # 非headless打开真实浏览器
依赖:
pip install playwright
playwright install chromium
"""
import argparse
import sys
import os
import time
from pathlib import Path
def parse_viewport(s):
w, h = s.split('x')
return {'width': int(w), 'height': int(h)}
def verify_html(html_path, viewports=None, slides=0, output_dir=None, show=False, wait=2000):
try:
from playwright.sync_api import sync_playwright
except ImportError:
print("ERROR: playwright未安装。")
print("运行: pip install playwright && playwright install chromium")
sys.exit(1)
html_path = Path(html_path).resolve()
if not html_path.exists():
print(f"ERROR: 文件不存在: {html_path}")
sys.exit(1)
if output_dir is None:
output_dir = html_path.parent / 'screenshots'
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
file_url = html_path.as_uri()
stem = html_path.stem
if viewports is None:
viewports = [{'width': 1440, 'height': 900}]
console_errors = []
page_errors = []
with sync_playwright() as p:
browser = p.chromium.launch(headless=not show)
for viewport in viewports:
context = browser.new_context(viewport=viewport, device_scale_factor=2)
page = context.new_page()
page.on("console", lambda msg: console_errors.append(f"[{msg.type}] {msg.text}") if msg.type in ("error", "warning") else None)
page.on("pageerror", lambda err: page_errors.append(str(err)))
print(f"\n→ 打开 {file_url} @ {viewport['width']}x{viewport['height']}")
page.goto(file_url, wait_until='networkidle')
page.wait_for_timeout(wait)
if slides > 0:
for i in range(slides):
screenshot_path = output_dir / f"{stem}-slide-{str(i + 1).zfill(2)}.png"
page.screenshot(path=str(screenshot_path), full_page=False)
print(f" ✓ slide {i+1}{screenshot_path.name}")
if i < slides - 1:
page.keyboard.press('ArrowRight')
page.wait_for_timeout(500)
else:
suffix = f"-{viewport['width']}x{viewport['height']}" if len(viewports) > 1 else ""
screenshot_path = output_dir / f"{stem}{suffix}.png"
page.screenshot(path=str(screenshot_path), full_page=False)
print(f" ✓ 截图 → {screenshot_path.name}")
full_path = output_dir / f"{stem}{suffix}-full.png"
page.screenshot(path=str(full_path), full_page=True)
print(f" ✓ 完整页 → {full_path.name}")
if show:
print(" (浏览器窗口保持打开按Enter关闭...)")
input()
context.close()
browser.close()
print("\n" + "=" * 50)
print("验证报告")
print("=" * 50)
if page_errors:
print(f"\n❌ Page Errors ({len(page_errors)}):")
for e in page_errors:
print(f" - {e}")
else:
print("\n✅ 无JavaScript错误")
if console_errors:
print(f"\n⚠️ Console Errors/Warnings ({len(console_errors)}):")
for e in console_errors[:20]:
print(f" - {e}")
if len(console_errors) > 20:
print(f" ... 还有{len(console_errors) - 20}")
else:
print("✅ Console干净")
print(f"\n📸 截图保存至: {output_dir}")
return 0 if not page_errors else 1
def main():
parser = argparse.ArgumentParser(
description="Verify HTML design outputs with Playwright",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("html_path", help="HTML file path")
parser.add_argument("--viewports", default="1440x900",
help="逗号分隔的viewport列表格式 WxH默认 1440x900")
parser.add_argument("--slides", type=int, default=0,
help="幻灯片模式截取前N张需要HTML支持ArrowRight翻页")
parser.add_argument("--output", default=None,
help="输出目录默认HTML所在目录的screenshots/")
parser.add_argument("--show", action="store_true",
help="非headless打开真实浏览器窗口")
parser.add_argument("--wait", type=int, default=2000,
help="打开页面后等待的毫秒数默认2000")
args = parser.parse_args()
viewports = [parse_viewport(v) for v in args.viewports.split(",")]
return verify_html(
html_path=args.html_path,
viewports=viewports,
slides=args.slides,
output_dir=args.output,
show=args.show,
wait=args.wait,
)
if __name__ == "__main__":
sys.exit(main())