Files
local-cal/.claude/skills/huashu-design/scripts/narrate-pipeline.mjs
2026-05-24 21:03:49 -04:00

316 lines
9.7 KiB
JavaScript
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
/**
* narrate-pipeline.mjs · L2 长解说总指挥
*
* 输入markdown 解说稿(## scene-id 分段,[[cue:id]] 标关键句)
* 输出voiceover.mp3拼接好的整段人声+ timeline.json每段 start/end + cues 绝对时间)
*
* 用法:
* node scripts/narrate-pipeline.mjs --script demo.md --out-dir _narration_demo
*
* 解说稿格式:
* ---
* title: 什么是 LLM
* voice: S_JSdgdWk22 # 可选,不填走 .env
* speed: 1.0 # 可选
* gap: 0.3 # 段间静音秒数,默认 0.3
* ---
*
* ## intro
* 大家好,我是花叔。今天我们 5 分钟讲清楚 LLM 是什么。
*
* ## what-is
* LLM 全称 Large Language Model[[cue:bigmodel]]它是一个有几千亿参数的神经网络。
* 本质是一个文字接龙的预测器。
*
* 输出文件结构out-dir 下):
* audio/
* intro.mp3
* what-is.mp3
* voiceover.mp3 拼接全部 scene 的整段人声
* timeline.json schema 见 references/voiceover-pipeline.md
*
* 依赖tts-doubao.mjs、ffmpeg、ffprobe
*/
import fs from 'node:fs';
import path from 'node:path';
import { execFileSync, execSync } from 'node:child_process';
import { fileURLToPath } from 'node:url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const SKILL_ROOT = path.resolve(__dirname, '..');
const TTS_SCRIPT = path.join(__dirname, 'tts-doubao.mjs');
function parseArgs(argv) {
const args = {};
for (let i = 2; i < argv.length; i++) {
const a = argv[i];
if (a === '--script') args.script = argv[++i];
else if (a === '--out-dir') args.outDir = argv[++i];
else if (a === '--help' || a === '-h') args.help = true;
}
return args;
}
function usage() {
console.error(`
narrate-pipeline.mjs · L2 长解说总指挥
--script <path> 解说稿 .md 文件(必填)
--out-dir <path> 输出目录(必填)
输出:<out-dir>/voiceover.mp3 + <out-dir>/timeline.json
`.trim());
process.exit(1);
}
/**
* Parse frontmatter + scene blocks from markdown
* Returns { meta, scenes: [{ id, raw }] }
*/
function parseScript(md) {
const meta = {};
let body = md;
const fmMatch = md.match(/^---\n([\s\S]*?)\n---\n/);
if (fmMatch) {
for (const line of fmMatch[1].split('\n')) {
const idx = line.indexOf(':');
if (idx < 0) continue;
const key = line.slice(0, idx).trim();
const val = line.slice(idx + 1).trim();
meta[key] = val;
}
body = md.slice(fmMatch[0].length);
}
const scenes = [];
const re = /^##\s+([\w-]+)\s*\n([\s\S]*?)(?=^##\s+[\w-]+\s*\n|$(?![\r\n]))/gm;
let m;
while ((m = re.exec(body)) !== null) {
scenes.push({ id: m[1], raw: m[2].trim() });
}
return { meta, scenes };
}
/**
* Split a scene's text by [[cue:id]] markers into chunks.
* Returns: { chunks: [{ text, cueAfter? }] }
* cueAfter is the cue id that follows this chunk (chunk's end = cue position)
*
* Example: "A[[cue:x]]B[[cue:y]]C" =>
* chunks: [
* { text: "A", cueAfter: "x" },
* { text: "B", cueAfter: "y" },
* { text: "C" }
* ]
*/
function splitByCues(text) {
const chunks = [];
const re = /\[\[cue:([\w-]+)\]\]/g;
let lastIdx = 0;
let m;
while ((m = re.exec(text)) !== null) {
const before = text.slice(lastIdx, m.index).trim();
chunks.push({ text: before, cueAfter: m[1] });
lastIdx = m.index + m[0].length;
}
const tail = text.slice(lastIdx).trim();
chunks.push({ text: tail });
// 过滤空文本块cue 紧贴段首/段尾时)
return chunks.filter((c) => c.text.length > 0 || c.cueAfter);
}
function getDuration(filePath) {
const out = execFileSync('ffprobe', [
'-v', 'error',
'-show_entries', 'format=duration',
'-of', 'default=noprint_wrappers=1:nokey=1',
filePath,
], { encoding: 'utf8' });
return parseFloat(out.trim());
}
function callTTS(text, outPath, opts) {
const args = ['--text', text, '--out', outPath];
if (opts.voice) args.push('--voice', opts.voice);
if (opts.speed) args.push('--speed', String(opts.speed));
const out = execFileSync('node', [TTS_SCRIPT, ...args], {
encoding: 'utf8',
stdio: ['ignore', 'pipe', 'inherit'],
});
return JSON.parse(out.trim());
}
function ffmpegConcat(inputs, output) {
// 用 concat demuxer 合并相同编码的 mp3
const listFile = output + '.list';
fs.writeFileSync(
listFile,
inputs.map((p) => `file '${p.replace(/'/g, "'\\''")}'`).join('\n'),
);
execSync(
`ffmpeg -y -f concat -safe 0 -i "${listFile}" -c copy "${output}"`,
{ stdio: ['ignore', 'pipe', 'pipe'] },
);
fs.unlinkSync(listFile);
}
function makeSilence(duration, outPath) {
execSync(
`ffmpeg -y -f lavfi -i anullsrc=r=24000:cl=mono -t ${duration} -q:a 9 -acodec libmp3lame "${outPath}"`,
{ stdio: ['ignore', 'pipe', 'pipe'] },
);
}
async function main() {
const args = parseArgs(process.argv);
if (args.help || !args.script || !args.outDir) usage();
const scriptPath = path.resolve(args.script);
const outDir = path.resolve(args.outDir);
const audioDir = path.join(outDir, 'audio');
const tmpDir = path.join(outDir, '.tmp');
fs.mkdirSync(audioDir, { recursive: true });
fs.mkdirSync(tmpDir, { recursive: true });
const md = fs.readFileSync(scriptPath, 'utf8');
const { meta, scenes } = parseScript(md);
if (scenes.length === 0) {
console.error('错:解说稿没有 ## scene 段,至少一段。');
process.exit(1);
}
const voice = meta.voice || undefined;
const speed = meta.speed ? parseFloat(meta.speed) : 1.0;
const gap = meta.gap ? parseFloat(meta.gap) : 0.3;
console.error(`[narrate] script=${path.basename(scriptPath)} scenes=${scenes.length} voice=${voice || '(env)'} speed=${speed} gap=${gap}s`);
// 段间静音文件(共用一个)
const gapFile = path.join(tmpDir, 'gap.mp3');
if (gap > 0) makeSilence(gap, gapFile);
const timeline = {
title: meta.title || path.basename(scriptPath, '.md'),
voice: voice || null,
speed,
gap,
totalDuration: 0,
scenes: [],
};
let cursor = 0;
const sceneAudioFiles = [];
for (let i = 0; i < scenes.length; i++) {
const scene = scenes[i];
console.error(`[narrate] (${i + 1}/${scenes.length}) scene="${scene.id}"`);
const chunks = splitByCues(scene.raw);
const chunkFiles = [];
const cueRecords = [];
const chunkRecords = []; // 每个 chunk 的实测 start/end 段内时间,用于字幕显示
let sceneInternalCursor = 0;
for (let j = 0; j < chunks.length; j++) {
const chunk = chunks[j];
if (!chunk.text) {
// 空文本块cue 紧贴),跳过 TTS 但仍记录 cue 位置
if (chunk.cueAfter) {
cueRecords.push({
id: chunk.cueAfter,
offset: sceneInternalCursor,
});
}
continue;
}
const chunkPath = path.join(tmpDir, `${scene.id}-${j}.mp3`);
const result = callTTS(chunk.text, chunkPath, { voice, speed });
const chunkStart = sceneInternalCursor;
chunkFiles.push(chunkPath);
sceneInternalCursor += result.duration;
chunkRecords.push({
text: chunk.text,
start: chunkStart,
end: sceneInternalCursor,
duration: result.duration,
});
console.error(` chunk ${j}: ${result.duration.toFixed(2)}s · ${chunk.text.length} 字 · ${chunk.text.slice(0, 30)}${chunk.text.length > 30 ? '…' : ''}`);
if (chunk.cueAfter) {
cueRecords.push({
id: chunk.cueAfter,
offset: sceneInternalCursor,
});
}
}
// 合并段内子段
const sceneAudio = path.join(audioDir, `${scene.id}.mp3`);
if (chunkFiles.length === 1) {
fs.copyFileSync(chunkFiles[0], sceneAudio);
} else {
ffmpegConcat(chunkFiles, sceneAudio);
}
const sceneDuration = getDuration(sceneAudio);
// 拼接到总轨:先加 gap除了第一段再加 scene
if (i > 0 && gap > 0) {
sceneAudioFiles.push(gapFile);
cursor += gap;
}
sceneAudioFiles.push(sceneAudio);
timeline.scenes.push({
id: scene.id,
start: cursor,
end: cursor + sceneDuration,
duration: sceneDuration,
audio: path.relative(outDir, sceneAudio),
text: scene.raw.replace(/\[\[cue:[\w-]+\]\]/g, ''),
// chunks: 用于字幕逐句显示。start/end 是段内相对时间absoluteStart/absoluteEnd 是整轨绝对时间
chunks: chunkRecords.map((c) => ({
text: c.text,
start: c.start,
end: c.end,
absoluteStart: cursor + c.start,
absoluteEnd: cursor + c.end,
})),
cues: cueRecords.map((c) => ({
id: c.id,
offset: c.offset,
absoluteTime: cursor + c.offset,
})),
});
cursor += sceneDuration;
}
// 合并整轨
const voiceoverPath = path.join(outDir, 'voiceover.mp3');
ffmpegConcat(sceneAudioFiles, voiceoverPath);
timeline.totalDuration = getDuration(voiceoverPath);
timeline.voiceover = 'voiceover.mp3';
fs.writeFileSync(
path.join(outDir, 'timeline.json'),
JSON.stringify(timeline, null, 2),
);
// 清理 tmp
fs.rmSync(tmpDir, { recursive: true, force: true });
console.error(`\n[narrate] 完成。`);
console.error(` voiceover: ${voiceoverPath}`);
console.error(` timeline: ${path.join(outDir, 'timeline.json')}`);
console.error(` 总时长: ${timeline.totalDuration.toFixed(2)}s (${(timeline.totalDuration / 60).toFixed(2)} min)`);
console.error(` 段数: ${timeline.scenes.length}`);
const totalCues = timeline.scenes.reduce((sum, s) => sum + s.cues.length, 0);
console.error(` cue 数: ${totalCues}`);
}
main().catch((err) => {
console.error(`narrate-pipeline 失败:${err.message}`);
console.error(err.stack);
process.exit(1);
});