chore: ruler files update

Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-05-24 21:03:49 -04:00
parent 97b3ddd653
commit abb472c83d
303 changed files with 46670 additions and 25369 deletions
--- a/.claude/skills/huashu-design/scripts/narrate-pipeline.mjs
+++ b/.claude/skills/huashu-design/scripts/narrate-pipeline.mjs
@@ -0,0 +1,315 @@
+#!/usr/bin/env node
+/**
+ * narrate-pipeline.mjs · L2 长解说总指挥
+ *
+ * 输入：markdown 解说稿（## scene-id 分段，[[cue:id]] 标关键句）
+ * 输出：voiceover.mp3（拼接好的整段人声）+ timeline.json（每段 start/end + cues 绝对时间）
+ *
+ * 用法：
+ *   node scripts/narrate-pipeline.mjs --script demo.md --out-dir _narration_demo
+ *
+ * 解说稿格式：
+ *   ---
+ *   title: 什么是 LLM
+ *   voice: S_JSdgdWk22   # 可选，不填走 .env
+ *   speed: 1.0           # 可选
+ *   gap: 0.3             # 段间静音秒数，默认 0.3
+ *   ---
+ *
+ *   ## intro
+ *   大家好，我是花叔。今天我们 5 分钟讲清楚 LLM 是什么。
+ *
+ *   ## what-is
+ *   LLM 全称 Large Language Model，[[cue:bigmodel]]它是一个有几千亿参数的神经网络。
+ *   本质是一个文字接龙的预测器。
+ *
+ * 输出文件结构（out-dir 下）：
+ *   audio/
+ *     intro.mp3
+ *     what-is.mp3
+ *   voiceover.mp3       拼接全部 scene 的整段人声
+ *   timeline.json       schema 见 references/voiceover-pipeline.md
+ *
+ * 依赖：tts-doubao.mjs、ffmpeg、ffprobe
+ */
+
+import fs from 'node:fs';
+import path from 'node:path';
+import { execFileSync, execSync } from 'node:child_process';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const SKILL_ROOT = path.resolve(__dirname, '..');
+const TTS_SCRIPT = path.join(__dirname, 'tts-doubao.mjs');
+
+function parseArgs(argv) {
+  const args = {};
+  for (let i = 2; i < argv.length; i++) {
+    const a = argv[i];
+    if (a === '--script') args.script = argv[++i];
+    else if (a === '--out-dir') args.outDir = argv[++i];
+    else if (a === '--help' || a === '-h') args.help = true;
+  }
+  return args;
+}
+
+function usage() {
+  console.error(`
+narrate-pipeline.mjs · L2 长解说总指挥
+
+  --script <path>     解说稿 .md 文件（必填）
+  --out-dir <path>    输出目录（必填）
+
+输出：<out-dir>/voiceover.mp3 + <out-dir>/timeline.json
+`.trim());
+  process.exit(1);
+}
+
+/**
+ * Parse frontmatter + scene blocks from markdown
+ * Returns { meta, scenes: [{ id, raw }] }
+ */
+function parseScript(md) {
+  const meta = {};
+  let body = md;
+  const fmMatch = md.match(/^---\n([\s\S]*?)\n---\n/);
+  if (fmMatch) {
+    for (const line of fmMatch[1].split('\n')) {
+      const idx = line.indexOf(':');
+      if (idx < 0) continue;
+      const key = line.slice(0, idx).trim();
+      const val = line.slice(idx + 1).trim();
+      meta[key] = val;
+    }
+    body = md.slice(fmMatch[0].length);
+  }
+  const scenes = [];
+  const re = /^##\s+([\w-]+)\s*\n([\s\S]*?)(?=^##\s+[\w-]+\s*\n|$(?![\r\n]))/gm;
+  let m;
+  while ((m = re.exec(body)) !== null) {
+    scenes.push({ id: m[1], raw: m[2].trim() });
+  }
+  return { meta, scenes };
+}
+
+/**
+ * Split a scene's text by [[cue:id]] markers into chunks.
+ * Returns: { chunks: [{ text, cueAfter? }] }
+ *   cueAfter is the cue id that follows this chunk (chunk's end = cue position)
+ *
+ * Example: "A[[cue:x]]B[[cue:y]]C" =>
+ *   chunks: [
+ *     { text: "A", cueAfter: "x" },
+ *     { text: "B", cueAfter: "y" },
+ *     { text: "C" }
+ *   ]
+ */
+function splitByCues(text) {
+  const chunks = [];
+  const re = /\[\[cue:([\w-]+)\]\]/g;
+  let lastIdx = 0;
+  let m;
+  while ((m = re.exec(text)) !== null) {
+    const before = text.slice(lastIdx, m.index).trim();
+    chunks.push({ text: before, cueAfter: m[1] });
+    lastIdx = m.index + m[0].length;
+  }
+  const tail = text.slice(lastIdx).trim();
+  chunks.push({ text: tail });
+  // 过滤空文本块（cue 紧贴段首/段尾时）
+  return chunks.filter((c) => c.text.length > 0 || c.cueAfter);
+}
+
+function getDuration(filePath) {
+  const out = execFileSync('ffprobe', [
+    '-v', 'error',
+    '-show_entries', 'format=duration',
+    '-of', 'default=noprint_wrappers=1:nokey=1',
+    filePath,
+  ], { encoding: 'utf8' });
+  return parseFloat(out.trim());
+}
+
+function callTTS(text, outPath, opts) {
+  const args = ['--text', text, '--out', outPath];
+  if (opts.voice) args.push('--voice', opts.voice);
+  if (opts.speed) args.push('--speed', String(opts.speed));
+  const out = execFileSync('node', [TTS_SCRIPT, ...args], {
+    encoding: 'utf8',
+    stdio: ['ignore', 'pipe', 'inherit'],
+  });
+  return JSON.parse(out.trim());
+}
+
+function ffmpegConcat(inputs, output) {
+  // 用 concat demuxer 合并相同编码的 mp3
+  const listFile = output + '.list';
+  fs.writeFileSync(
+    listFile,
+    inputs.map((p) => `file '${p.replace(/'/g, "'\\''")}'`).join('\n'),
+  );
+  execSync(
+    `ffmpeg -y -f concat -safe 0 -i "${listFile}" -c copy "${output}"`,
+    { stdio: ['ignore', 'pipe', 'pipe'] },
+  );
+  fs.unlinkSync(listFile);
+}
+
+function makeSilence(duration, outPath) {
+  execSync(
+    `ffmpeg -y -f lavfi -i anullsrc=r=24000:cl=mono -t ${duration} -q:a 9 -acodec libmp3lame "${outPath}"`,
+    { stdio: ['ignore', 'pipe', 'pipe'] },
+  );
+}
+
+async function main() {
+  const args = parseArgs(process.argv);
+  if (args.help || !args.script || !args.outDir) usage();
+
+  const scriptPath = path.resolve(args.script);
+  const outDir = path.resolve(args.outDir);
+  const audioDir = path.join(outDir, 'audio');
+  const tmpDir = path.join(outDir, '.tmp');
+  fs.mkdirSync(audioDir, { recursive: true });
+  fs.mkdirSync(tmpDir, { recursive: true });
+
+  const md = fs.readFileSync(scriptPath, 'utf8');
+  const { meta, scenes } = parseScript(md);
+  if (scenes.length === 0) {
+    console.error('错：解说稿没有 ## scene 段，至少一段。');
+    process.exit(1);
+  }
+
+  const voice = meta.voice || undefined;
+  const speed = meta.speed ? parseFloat(meta.speed) : 1.0;
+  const gap = meta.gap ? parseFloat(meta.gap) : 0.3;
+
+  console.error(`[narrate] script=${path.basename(scriptPath)} scenes=${scenes.length} voice=${voice || '(env)'} speed=${speed} gap=${gap}s`);
+
+  // 段间静音文件（共用一个）
+  const gapFile = path.join(tmpDir, 'gap.mp3');
+  if (gap > 0) makeSilence(gap, gapFile);
+
+  const timeline = {
+    title: meta.title || path.basename(scriptPath, '.md'),
+    voice: voice || null,
+    speed,
+    gap,
+    totalDuration: 0,
+    scenes: [],
+  };
+
+  let cursor = 0;
+  const sceneAudioFiles = [];
+
+  for (let i = 0; i < scenes.length; i++) {
+    const scene = scenes[i];
+    console.error(`[narrate] (${i + 1}/${scenes.length}) scene="${scene.id}"`);
+
+    const chunks = splitByCues(scene.raw);
+    const chunkFiles = [];
+    const cueRecords = [];
+    const chunkRecords = []; // 每个 chunk 的实测 start/end 段内时间，用于字幕显示
+    let sceneInternalCursor = 0;
+
+    for (let j = 0; j < chunks.length; j++) {
+      const chunk = chunks[j];
+      if (!chunk.text) {
+        // 空文本块（cue 紧贴），跳过 TTS 但仍记录 cue 位置
+        if (chunk.cueAfter) {
+          cueRecords.push({
+            id: chunk.cueAfter,
+            offset: sceneInternalCursor,
+          });
+        }
+        continue;
+      }
+      const chunkPath = path.join(tmpDir, `${scene.id}-${j}.mp3`);
+      const result = callTTS(chunk.text, chunkPath, { voice, speed });
+      const chunkStart = sceneInternalCursor;
+      chunkFiles.push(chunkPath);
+      sceneInternalCursor += result.duration;
+      chunkRecords.push({
+        text: chunk.text,
+        start: chunkStart,
+        end: sceneInternalCursor,
+        duration: result.duration,
+      });
+      console.error(`  chunk ${j}: ${result.duration.toFixed(2)}s · ${chunk.text.length} 字 · ${chunk.text.slice(0, 30)}${chunk.text.length > 30 ? '…' : ''}`);
+      if (chunk.cueAfter) {
+        cueRecords.push({
+          id: chunk.cueAfter,
+          offset: sceneInternalCursor,
+        });
+      }
+    }
+
+    // 合并段内子段
+    const sceneAudio = path.join(audioDir, `${scene.id}.mp3`);
+    if (chunkFiles.length === 1) {
+      fs.copyFileSync(chunkFiles[0], sceneAudio);
+    } else {
+      ffmpegConcat(chunkFiles, sceneAudio);
+    }
+    const sceneDuration = getDuration(sceneAudio);
+
+    // 拼接到总轨：先加 gap（除了第一段），再加 scene
+    if (i > 0 && gap > 0) {
+      sceneAudioFiles.push(gapFile);
+      cursor += gap;
+    }
+    sceneAudioFiles.push(sceneAudio);
+
+    timeline.scenes.push({
+      id: scene.id,
+      start: cursor,
+      end: cursor + sceneDuration,
+      duration: sceneDuration,
+      audio: path.relative(outDir, sceneAudio),
+      text: scene.raw.replace(/\[\[cue:[\w-]+\]\]/g, ''),
+      // chunks: 用于字幕逐句显示。start/end 是段内相对时间，absoluteStart/absoluteEnd 是整轨绝对时间
+      chunks: chunkRecords.map((c) => ({
+        text: c.text,
+        start: c.start,
+        end: c.end,
+        absoluteStart: cursor + c.start,
+        absoluteEnd: cursor + c.end,
+      })),
+      cues: cueRecords.map((c) => ({
+        id: c.id,
+        offset: c.offset,
+        absoluteTime: cursor + c.offset,
+      })),
+    });
+
+    cursor += sceneDuration;
+  }
+
+  // 合并整轨
+  const voiceoverPath = path.join(outDir, 'voiceover.mp3');
+  ffmpegConcat(sceneAudioFiles, voiceoverPath);
+  timeline.totalDuration = getDuration(voiceoverPath);
+  timeline.voiceover = 'voiceover.mp3';
+
+  fs.writeFileSync(
+    path.join(outDir, 'timeline.json'),
+    JSON.stringify(timeline, null, 2),
+  );
+
+  // 清理 tmp
+  fs.rmSync(tmpDir, { recursive: true, force: true });
+
+  console.error(`\n[narrate] 完成。`);
+  console.error(`  voiceover: ${voiceoverPath}`);
+  console.error(`  timeline:  ${path.join(outDir, 'timeline.json')}`);
+  console.error(`  总时长:    ${timeline.totalDuration.toFixed(2)}s (${(timeline.totalDuration / 60).toFixed(2)} min)`);
+  console.error(`  段数:      ${timeline.scenes.length}`);
+  const totalCues = timeline.scenes.reduce((sum, s) => sum + s.cues.length, 0);
+  console.error(`  cue 数:    ${totalCues}`);
+}
+
+main().catch((err) => {
+  console.error(`narrate-pipeline 失败：${err.message}`);
+  console.error(err.stack);
+  process.exit(1);
+});