This commit is contained in:
yumoqing 2025-11-06 15:33:33 +08:00
parent ee1f718152
commit fed4c53ba6
2 changed files with 32 additions and 13 deletions

View File

@ -67,6 +67,9 @@ bash ./build.sh
```
{
"audio_file": a.wav or b.mp3
"beam_size": default is 5
"temperature": default is 0.0
"word_timestamps": default is True
}
```

View File

@ -37,20 +37,36 @@ class MyLongTask(LongTasks):
fs = FileStorage()
fpath = fs.realPath(webpath)
f = awaitify(self.transcribe)
return await f(model, fpath)
options = {
'beam_size': payload.get('beam_size', 5),
'temperature': payload.get('temperature', 0.0),
'word_timestamps': payload.get('word_timestamps', True)
}
return await f(model, fpath, options)
def transcribe(self, model, fpath):
segments, info = model.transcribe(fpath, beam_size=5, word_timestamps=True)
def transcribe(self, model, fpath, options):
segments, info = model.transcribe(fpath, **options)
segments = list(segments)
debug(f'{segments=}')
return {
'language': info.language,
'language_probability': info.language_probability,
'content': ' '.join([s.text for s in segments]),
'segments': [[s.start,
s.end,
s.text,
[[w.start, w.end, w.word] for w in s.words]
] for s in segments]
}
if 'word_timestamps' in options.keys():
return {
'language': info.language,
'language_probability': info.language_probability,
'content': ' '.join([s.text for s in segments]),
'segments': [[s.start,
s.end,
s.text,
[[w.start, w.end, w.word] for w in s.words]
] for s in segments]
}
else:
return {
'language': info.language,
'language_probability': info.language_probability,
'content': ' '.join([s.text for s in segments]),
'segments': [[s.start,
s.end,
s.text,
] for s in segments]
}