2025-11-06 15:33:33 +08:00

73 lines
2.1 KiB
Python

from longtasks.longtasks import LongTasks
from faster_whisper import WhisperModel
from appPublic.worker import awaitify
from appPublic.jsonConfig import getConfig
from appPublic.log import debug, exception
from ahserver.filestorage import FileStorage
class MyLongTask(LongTasks):
def __init__(self):
self.config = getConfig()
redis_url = self.config.redis_url
taskname = 'fastwhisper'
worker_cnt = self.config.worker_cnt
super().__init__(redis_url, taskname, worker_cnt=worker_cnt)
self.load_models()
def load_models(self):
self.models = []
for i in range(self.worker_cnt):
model = WhisperModel(self.config.model_path, device="cuda", compute_type="float16")
self.models.append(model)
async def process_task(self, payload:dict, workerid:int=None):
debug(f'process_task():{payload=}, {workerid=}')
if workerid is None:
return {
"task_status": "error",
"message": "workerid is None"
}
model = self.models[workerid]
webpath = payload.get('audio_file')
if webpath is None:
return {
"task_status": "error",
"message":"not audio_file provided"
}
fs = FileStorage()
fpath = fs.realPath(webpath)
f = awaitify(self.transcribe)
options = {
'beam_size': payload.get('beam_size', 5),
'temperature': payload.get('temperature', 0.0),
'word_timestamps': payload.get('word_timestamps', True)
}
return await f(model, fpath, options)
def transcribe(self, model, fpath, options):
segments, info = model.transcribe(fpath, **options)
segments = list(segments)
debug(f'{segments=}')
if 'word_timestamps' in options.keys():
return {
'language': info.language,
'language_probability': info.language_probability,
'content': ' '.join([s.text for s in segments]),
'segments': [[s.start,
s.end,
s.text,
[[w.start, w.end, w.word] for w in s.words]
] for s in segments]
}
else:
return {
'language': info.language,
'language_probability': info.language_probability,
'content': ' '.join([s.text for s in segments]),
'segments': [[s.start,
s.end,
s.text,
] for s in segments]
}