commit 63a796aa22747d1562ace41968748de448eb5951 Author: yumoqing Date: Sun Jun 14 16:15:12 2026 +0800 Initial: KTV delivery verification HTTP service (4 QA checks) diff --git a/ah.py b/ah.py new file mode 100644 index 0000000..80ed84a --- /dev/null +++ b/ah.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +"""KTV产线交付质检服务 — verify_delivery as HTTP service""" +import sys, os +sys.path.insert(0, os.path.dirname(__file__)) + +from ahserver.webapp import webapp +from longtasks import LongTasks +from ahserver.serverenv import ServerEnv +from workers.verify import VerifyWorker + +def init(app): + env = ServerEnv() + env.longtasks = LongTasks( + app=app, + worker_class=VerifyWorker, + queue_name="verify_delivery", + worker_cnt=1, + stuck_seconds=600, + max_age_hours=24 + ) + return env + +webapp(init) diff --git a/workers/verify.py b/workers/verify.py new file mode 100644 index 0000000..707a859 --- /dev/null +++ b/workers/verify.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- +"""KTV产线交付质检Worker — 4项QA检查""" +import json, os, subprocess, re + +def run_ffprobe(filepath, args): + cmd = ["ffprobe", "-v", "error"] + args + [filepath] + r = subprocess.run(cmd, capture_output=True, text=True) + return r.stdout.strip() + +class VerifyWorker: + def __init__(self, longtasks): + self.lt = longtasks + + async def run(self, task): + payload = task.payload + task_id = task.task_id + + mtv_path = payload.get("mtv_path", "") + ktv_path = payload.get("ktv_path", "") + ass_path = payload.get("ass_path", "") + lyrics_path = payload.get("lyrics_path", "") + original_duration = float(payload.get("original_duration", 0)) + + # 检查文件是否存在 + missing = [] + for label, p in [("MTV", mtv_path), ("KTV", ktv_path), ("ASS", ass_path), ("歌词", lyrics_path)]: + if p and not os.path.exists(p): + missing.append(f"{label}: {p}") + if missing: + return {"status": "FAILED", "error": "文件不存在", "missing": missing} + + all_errors = [] + qa_results = {} + + # QA-1: 字幕时间精准性(从calibrated数据检查) + # 如果有calibrated_path,检查时间轴 + calibrated_path = payload.get("calibrated_path", "") + if calibrated_path and os.path.exists(calibrated_path): + qa1_errors = self._check_timeline(calibrated_path) + qa_results["QA1_timeline"] = {"passed": len(qa1_errors) == 0, "errors": qa1_errors} + all_errors.extend(qa1_errors) + else: + qa_results["QA1_timeline"] = {"passed": True, "errors": [], "skipped": "no calibrated_path"} + + # QA-2: 字幕歌词正确性 + qa2_errors = self._check_subtitles(ass_path, lyrics_path) + qa_results["QA2_lyrics"] = {"passed": len(qa2_errors) == 0, "errors": qa2_errors} + all_errors.extend(qa2_errors) + + # QA-3: MTV使用原音频 + if mtv_path and os.path.exists(mtv_path): + qa3_errors = self._check_mtv(mtv_path, original_duration) + qa_results["QA3_mtv"] = {"passed": len(qa3_errors) == 0, "errors": qa3_errors} + all_errors.extend(qa3_errors) + else: + qa_results["QA3_mtv"] = {"passed": True, "errors": [], "skipped": "no MTV file"} + + # QA-4: KTV双轨音序 + if ktv_path and os.path.exists(ktv_path): + qa4_errors = self._check_ktv(ktv_path, original_duration) + qa_results["QA4_ktv"] = {"passed": len(qa4_errors) == 0, "errors": qa4_errors} + all_errors.extend(qa4_errors) + else: + qa_results["QA4_ktv"] = {"passed": True, "errors": [], "skipped": "no KTV file"} + + # 汇总 + passed = len(all_errors) == 0 + return { + "status": "PASSED" if passed else "FAILED", + "task_id": task_id, + "qa_results": qa_results, + "total_errors": len(all_errors), + "errors": all_errors + } + + def _check_timeline(self, calibrated_path): + """QA-1: 字幕时间精准性 — 时间单调递增、无重叠""" + errors = [] + try: + with open(calibrated_path, 'r') as f: + data = json.load(f) + lyrics = data.get("lyrics", data.get("segments", data)) + if not isinstance(lyrics, list): + return ["calibrated数据格式错误,应为list"] + + for i in range(len(lyrics)): + seg = lyrics[i] + start = seg.get("start", 0) + end = seg.get("end", 0) + if end <= start: + errors.append(f"行{i+1}: end({end:.2f})<=start({start:.2f})") + if i > 0: + prev_end = lyrics[i-1].get("end", 0) + if start < prev_end - 0.1: + errors.append(f"行{i+1}: start({start:.2f})与前一行end({prev_end:.2f})重叠") + except Exception as e: + errors.append(f"读取calibrated文件失败: {e}") + return errors + + def _check_subtitles(self, ass_path, lyrics_path): + """QA-2: 字幕歌词正确性 — ASS中歌词与原始歌词逐行比对""" + errors = [] + if not ass_path or not lyrics_path: + return errors + if not os.path.exists(ass_path): + return [f"ASS文件不存在: {ass_path}"] + if not os.path.exists(lyrics_path): + return [f"原始歌词文件不存在: {lyrics_path}"] + + with open(lyrics_path, 'r') as f: + original_lines = [l.strip() for l in f.readlines() if l.strip()] + + with open(ass_path, 'r') as f: + ass_content = f.read() + + karaoke_lines = [] + for line in ass_content.split('\n'): + if 'Karaoke' in line and 'Dialogue' in line: + parts = line.split(',', 9) + if len(parts) >= 10: + text = parts[9] + clean = re.sub(r'\{\\kf\d+\}', '', text).strip() + if clean: + karaoke_lines.append(clean) + + if len(karaoke_lines) != len(original_lines): + errors.append(f"歌词行数不匹配: ASS={len(karaoke_lines)}行, 原始={len(original_lines)}行") + + mismatch_count = 0 + for i in range(min(len(karaoke_lines), len(original_lines))): + kl = karaoke_lines[i].replace(' ', '') + ol = original_lines[i].replace(' ', '') + if kl != ol: + mismatch_count += 1 + if mismatch_count <= 5: + errors.append(f"行{i+1}: ASS='{kl}' vs 原始='{ol}'") + if mismatch_count > 5: + errors.append(f"... 还有{mismatch_count-5}处不匹配") + + return errors + + def _check_mtv(self, path, original_duration): + """QA-3: MTV使用原音频(单轨,duration匹配)""" + errors = [] + try: + streams = run_ffprobe(path, [ + "-show_entries", "stream=index,codec_type,duration", + "-show_entries", "stream_tags=handler_name", + "-of", "json" + ]) + data = json.loads(streams) + audio_streams = [s for s in data.get("streams", []) if s.get("codec_type") == "audio"] + if len(audio_streams) != 1: + errors.append(f"MTV应有1条音轨,实际{len(audio_streams)}条") + if audio_streams and original_duration > 0: + dur = float(audio_streams[0].get("duration", 0)) + if abs(dur - original_duration) > 2: + errors.append(f"音频时长{dur:.1f}s与原曲{original_duration:.1f}s差距>2s") + + fmt = run_ffprobe(path, ["-show_entries", "format=duration", "-of", "csv=p=0"]) + if fmt and original_duration > 0: + video_dur = float(fmt) + if abs(video_dur - original_duration) > 3: + errors.append(f"视频时长{video_dur:.1f}s与原曲{original_duration:.1f}s差距>3s") + except Exception as e: + errors.append(f"ffprobe失败: {e}") + return errors + + def _check_ktv(self, path, original_duration): + """QA-4: KTV双轨音序 — Track1=伴奏, Track2=原唱""" + errors = [] + try: + streams = run_ffprobe(path, [ + "-show_entries", "stream=index,codec_type,duration", + "-show_entries", "stream_tags=handler_name", + "-of", "json" + ]) + data = json.loads(streams) + audio_streams = [s for s in data.get("streams", []) if s.get("codec_type") == "audio"] + if len(audio_streams) != 2: + errors.append(f"KTV应有2条音轨,实际{len(audio_streams)}条") + return errors + + t1 = audio_streams[0].get("tags", {}).get("handler_name", "") + if "伴奏" not in t1 and "Accompaniment" not in t1: + errors.append(f"Track1应为伴奏,实际标签: '{t1}'") + t2 = audio_streams[1].get("tags", {}).get("handler_name", "") + if "原唱" not in t2 and "Original" not in t2: + errors.append(f"Track2应为原唱,实际标签: '{t2}'") + + if original_duration > 0: + for i, a in enumerate(audio_streams): + dur = float(a.get("duration", 0)) + if abs(dur - original_duration) > 2: + errors.append(f"音轨{i+1}时长{dur:.1f}s与原曲{original_duration:.1f}s差距>2s") + except Exception as e: + errors.append(f"ffprobe失败: {e}") + return errors