Hermes Agent 83011d13d9 feat: KTV产线v2 — Ali T2I素材生成 + 虚拟素材库 + Seedance 2.0 R2V
- character_designing: 扩展为完整视觉素材设计(角色3视角+道具+服饰+场景)
- character_image_generating: 改用llmage API (wan2.7-image-pro)生成所有素材图片
- 新增asset_uploading: 素材图片上传到虚拟素材库,返回asset://素材号
- storyboard_generating: 用asset://引用素材,标记r2v/t2v模式
- scene_video_generating: 缺省Seedance 2.0,r2v模式+素材引用
- 所有模型统一通过token.opencomputing.cn接入
- adapter.py: 注册18个step handler(含asset_uploading)
2026-06-20 12:18:56 +08:00

202 lines
6.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""KTV产线适配器 — 注册step_types和handlers到pipeline-service。
本模块是pipeline-service的外部适配器负责
1. 注册KTV特有的step_types含元数据
2. 注册18个step handler函数含asset_uploading
3. 提供load_ktv_adapter()一键加载
宿主应用先调用 load_pipeline_service(),再调用 load_ktv_adapter()。
"""
import logging
from pipeline_service import register_handler, register_step_type
from .handlers import (
handle_audio_preparing,
handle_video_preparing,
handle_demucs_separating,
handle_lyric_calibrating,
handle_subtitle_rendering,
handle_subtitle_exporting,
handle_lyric_generating,
handle_lyric_evaluating,
handle_music_generating,
handle_music_polling,
handle_character_designing,
handle_character_image_generating,
handle_asset_uploading,
handle_storyboard_generating,
handle_scene_video_generating,
handle_scene_video_evaluating,
handle_scene_video_concatenating,
handle_ktv_synthesizing,
)
logger = logging.getLogger("pipeline_ktv.adapter")
MODULE_NAME = "pipeline_ktv"
MODULE_VERSION = "1.0.0"
# ── Step type metadata ──
KTV_STEP_TYPES = {
"audio_preparing": {
"display_name": "音频准备",
"category": "media",
"is_interactive": False,
"description": "下载/复制音频文件,提取时长",
},
"video_preparing": {
"display_name": "视频准备",
"category": "media",
"is_interactive": False,
"description": "下载/复制视频,提取音频轨道",
},
"demucs_separating": {
"display_name": "Demucs人声分离",
"category": "media",
"is_interactive": False,
"description": "GPU运行Demucs分离人声和伴奏",
},
"lyric_calibrating": {
"display_name": "歌词时间轴校准",
"category": "llm",
"is_interactive": False,
"description": "ASR识别+LLM校准歌词时间戳",
},
"subtitle_rendering": {
"display_name": "字幕渲染",
"category": "media",
"is_interactive": False,
"description": "生成ASS卡拉OK字幕文件",
},
"subtitle_exporting": {
"display_name": "字幕导出",
"category": "media",
"is_interactive": False,
"description": "导出字幕为独立文件",
},
"lyric_generating": {
"display_name": "歌词生成",
"category": "llm",
"is_interactive": False,
"description": "LLM根据主题创作歌词",
},
"lyric_evaluating": {
"display_name": "歌词评估",
"category": "llm",
"is_interactive": False,
"description": "LLM评估歌词质量低于阈值则拒绝",
},
"music_generating": {
"display_name": "音乐生成",
"category": "media",
"is_interactive": False,
"description": "提交Suno/MiniMax音乐生成任务",
},
"music_polling": {
"display_name": "音乐轮询",
"category": "media",
"is_interactive": False,
"description": "轮询音乐生成API直到完成",
},
"character_designing": {
"display_name": "视觉素材设计",
"category": "llm",
"is_interactive": False,
"description": "LLM设计角色(正面/左侧/右侧)、道具、服饰、场景图片prompt",
},
"character_image_generating": {
"display_name": "素材图片生成",
"category": "media",
"is_interactive": False,
"description": "通过llmage T2I API (wan2.7) 生成角色多视角、道具、服饰、场景图片",
},
"asset_uploading": {
"display_name": "素材上传虚拟库",
"category": "media",
"is_interactive": False,
"description": "将生成的素材图片上传到虚拟素材库返回asset://素材号引用",
},
"storyboard_generating": {
"display_name": "分镜脚本生成",
"category": "llm",
"is_interactive": False,
"description": "LLM生成MV分镜脚本引用asset://素材号标记r2v/t2v模式",
},
"scene_video_generating": {
"display_name": "场景视频生成",
"category": "media",
"is_interactive": False,
"description": "通过Seedance 2.0生成场景视频含角色场景用r2v模式+素材引用",
},
"scene_video_evaluating": {
"display_name": "场景视频评估",
"category": "media",
"is_interactive": False,
"description": "评估场景视频质量",
},
"scene_video_concatenating": {
"display_name": "场景视频拼接",
"category": "media",
"is_interactive": False,
"description": "ffmpeg拼接场景视频循环匹配音频时长",
},
"ktv_synthesizing": {
"display_name": "KTV最终合成",
"category": "media",
"is_interactive": False,
"description": "合成KTV双轨+MTV单轨最终视频",
},
}
# ── Handler mapping ──
KTV_HANDLERS = {
"audio_preparing": handle_audio_preparing,
"video_preparing": handle_video_preparing,
"demucs_separating": handle_demucs_separating,
"lyric_calibrating": handle_lyric_calibrating,
"subtitle_rendering": handle_subtitle_rendering,
"subtitle_exporting": handle_subtitle_exporting,
"lyric_generating": handle_lyric_generating,
"lyric_evaluating": handle_lyric_evaluating,
"music_generating": handle_music_generating,
"music_polling": handle_music_polling,
"character_designing": handle_character_designing,
"character_image_generating": handle_character_image_generating,
"asset_uploading": handle_asset_uploading,
"storyboard_generating": handle_storyboard_generating,
"scene_video_generating": handle_scene_video_generating,
"scene_video_evaluating": handle_scene_video_evaluating,
"scene_video_concatenating": handle_scene_video_concatenating,
"ktv_synthesizing": handle_ktv_synthesizing,
}
def register_ktv_step_types():
"""注册KTV步骤类型元数据到pipeline-service。"""
for step_type, meta in KTV_STEP_TYPES.items():
register_step_type(step_type, meta)
logger.info(f"Registered {len(KTV_STEP_TYPES)} KTV step types")
def register_ktv_handlers():
"""注册KTV步骤处理器到pipeline-service。"""
for step_type, fn in KTV_HANDLERS.items():
register_handler(step_type, fn)
logger.info(f"Registered {len(KTV_HANDLERS)} KTV handlers")
def load_ktv_adapter():
"""一键加载KTV适配器注册step_types + handlers。
调用前先确保 load_pipeline_service() 已执行。
"""
register_ktv_step_types()
register_ktv_handlers()
logger.info(f"[{MODULE_NAME}] v{MODULE_VERSION} loaded — KTV pipeline adapter")
return True