2026-06-14 16:17:10 +08:00

195 lines
8.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""KTV产线交付质检Worker — 4项QA检查"""
import json, os, subprocess, re
def run_ffprobe(filepath, args):
cmd = ["ffprobe", "-v", "error"] + args + [filepath]
r = subprocess.run(cmd, capture_output=True, text=True)
return r.stdout.strip()
class VerifyWorker:
def __init__(self, longtasks):
self.lt = longtasks
async def run_task(self, payload):
mtv_path = payload.get("mtv_path", "")
ktv_path = payload.get("ktv_path", "")
ass_path = payload.get("ass_path", "")
lyrics_path = payload.get("lyrics_path", "")
original_duration = float(payload.get("original_duration", 0))
# 检查文件是否存在
missing = []
for label, p in [("MTV", mtv_path), ("KTV", ktv_path), ("ASS", ass_path), ("歌词", lyrics_path)]:
if p and not os.path.exists(p):
missing.append(f"{label}: {p}")
if missing:
return {"status": "FAILED", "error": "文件不存在", "missing": missing}
all_errors = []
qa_results = {}
# QA-1: 字幕时间精准性从calibrated数据检查
# 如果有calibrated_path检查时间轴
calibrated_path = payload.get("calibrated_path", "")
if calibrated_path and os.path.exists(calibrated_path):
qa1_errors = self._check_timeline(calibrated_path)
qa_results["QA1_timeline"] = {"passed": len(qa1_errors) == 0, "errors": qa1_errors}
all_errors.extend(qa1_errors)
else:
qa_results["QA1_timeline"] = {"passed": True, "errors": [], "skipped": "no calibrated_path"}
# QA-2: 字幕歌词正确性
qa2_errors = self._check_subtitles(ass_path, lyrics_path)
qa_results["QA2_lyrics"] = {"passed": len(qa2_errors) == 0, "errors": qa2_errors}
all_errors.extend(qa2_errors)
# QA-3: MTV使用原音频
if mtv_path and os.path.exists(mtv_path):
qa3_errors = self._check_mtv(mtv_path, original_duration)
qa_results["QA3_mtv"] = {"passed": len(qa3_errors) == 0, "errors": qa3_errors}
all_errors.extend(qa3_errors)
else:
qa_results["QA3_mtv"] = {"passed": True, "errors": [], "skipped": "no MTV file"}
# QA-4: KTV双轨音序
if ktv_path and os.path.exists(ktv_path):
qa4_errors = self._check_ktv(ktv_path, original_duration)
qa_results["QA4_ktv"] = {"passed": len(qa4_errors) == 0, "errors": qa4_errors}
all_errors.extend(qa4_errors)
else:
qa_results["QA4_ktv"] = {"passed": True, "errors": [], "skipped": "no KTV file"}
# 汇总
passed = len(all_errors) == 0
return {
"status": "PASSED" if passed else "FAILED",
"qa_results": qa_results,
"total_errors": len(all_errors),
"errors": all_errors
}
def _check_timeline(self, calibrated_path):
"""QA-1: 字幕时间精准性 — 时间单调递增、无重叠"""
errors = []
try:
with open(calibrated_path, 'r') as f:
data = json.load(f)
lyrics = data.get("lyrics", data.get("segments", data))
if not isinstance(lyrics, list):
return ["calibrated数据格式错误应为list"]
for i in range(len(lyrics)):
seg = lyrics[i]
start = seg.get("start", 0)
end = seg.get("end", 0)
if end <= start:
errors.append(f"{i+1}: end({end:.2f})<=start({start:.2f})")
if i > 0:
prev_end = lyrics[i-1].get("end", 0)
if start < prev_end - 0.1:
errors.append(f"{i+1}: start({start:.2f})与前一行end({prev_end:.2f})重叠")
except Exception as e:
errors.append(f"读取calibrated文件失败: {e}")
return errors
def _check_subtitles(self, ass_path, lyrics_path):
"""QA-2: 字幕歌词正确性 — ASS中歌词与原始歌词逐行比对"""
errors = []
if not ass_path or not lyrics_path:
return errors
if not os.path.exists(ass_path):
return [f"ASS文件不存在: {ass_path}"]
if not os.path.exists(lyrics_path):
return [f"原始歌词文件不存在: {lyrics_path}"]
with open(lyrics_path, 'r') as f:
original_lines = [l.strip() for l in f.readlines() if l.strip()]
with open(ass_path, 'r') as f:
ass_content = f.read()
karaoke_lines = []
for line in ass_content.split('\n'):
if 'Karaoke' in line and 'Dialogue' in line:
parts = line.split(',', 9)
if len(parts) >= 10:
text = parts[9]
clean = re.sub(r'\{\\kf\d+\}', '', text).strip()
if clean:
karaoke_lines.append(clean)
if len(karaoke_lines) != len(original_lines):
errors.append(f"歌词行数不匹配: ASS={len(karaoke_lines)}行, 原始={len(original_lines)}")
mismatch_count = 0
for i in range(min(len(karaoke_lines), len(original_lines))):
kl = karaoke_lines[i].replace(' ', '')
ol = original_lines[i].replace(' ', '')
if kl != ol:
mismatch_count += 1
if mismatch_count <= 5:
errors.append(f"{i+1}: ASS='{kl}' vs 原始='{ol}'")
if mismatch_count > 5:
errors.append(f"... 还有{mismatch_count-5}处不匹配")
return errors
def _check_mtv(self, path, original_duration):
"""QA-3: MTV使用原音频单轨duration匹配"""
errors = []
try:
streams = run_ffprobe(path, [
"-show_entries", "stream=index,codec_type,duration",
"-show_entries", "stream_tags=handler_name",
"-of", "json"
])
data = json.loads(streams)
audio_streams = [s for s in data.get("streams", []) if s.get("codec_type") == "audio"]
if len(audio_streams) != 1:
errors.append(f"MTV应有1条音轨实际{len(audio_streams)}")
if audio_streams and original_duration > 0:
dur = float(audio_streams[0].get("duration", 0))
if abs(dur - original_duration) > 2:
errors.append(f"音频时长{dur:.1f}s与原曲{original_duration:.1f}s差距>2s")
fmt = run_ffprobe(path, ["-show_entries", "format=duration", "-of", "csv=p=0"])
if fmt and original_duration > 0:
video_dur = float(fmt)
if abs(video_dur - original_duration) > 3:
errors.append(f"视频时长{video_dur:.1f}s与原曲{original_duration:.1f}s差距>3s")
except Exception as e:
errors.append(f"ffprobe失败: {e}")
return errors
def _check_ktv(self, path, original_duration):
"""QA-4: KTV双轨音序 — Track1=伴奏, Track2=原唱"""
errors = []
try:
streams = run_ffprobe(path, [
"-show_entries", "stream=index,codec_type,duration",
"-show_entries", "stream_tags=handler_name",
"-of", "json"
])
data = json.loads(streams)
audio_streams = [s for s in data.get("streams", []) if s.get("codec_type") == "audio"]
if len(audio_streams) != 2:
errors.append(f"KTV应有2条音轨实际{len(audio_streams)}")
return errors
t1 = audio_streams[0].get("tags", {}).get("handler_name", "")
if "伴奏" not in t1 and "Accompaniment" not in t1:
errors.append(f"Track1应为伴奏实际标签: '{t1}'")
t2 = audio_streams[1].get("tags", {}).get("handler_name", "")
if "原唱" not in t2 and "Original" not in t2:
errors.append(f"Track2应为原唱实际标签: '{t2}'")
if original_duration > 0:
for i, a in enumerate(audio_streams):
dur = float(a.get("duration", 0))
if abs(dur - original_duration) > 2:
errors.append(f"音轨{i+1}时长{dur:.1f}s与原曲{original_duration:.1f}s差距>2s")
except Exception as e:
errors.append(f"ffprobe失败: {e}")
return errors