diff --git a/README.md b/README.md index 4e609b4..032d225 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,9 @@ bash ./build.sh ``` { "audio_file": a.wav or b.mp3 + "beam_size": default is 5 + "temperature": default is 0.0 + "word_timestamps": default is True } ``` diff --git a/fw/fw.py b/fw/fw.py index bd9200a..a8dbb8e 100644 --- a/fw/fw.py +++ b/fw/fw.py @@ -37,20 +37,36 @@ class MyLongTask(LongTasks): fs = FileStorage() fpath = fs.realPath(webpath) f = awaitify(self.transcribe) - return await f(model, fpath) + options = { + 'beam_size': payload.get('beam_size', 5), + 'temperature': payload.get('temperature', 0.0), + 'word_timestamps': payload.get('word_timestamps', True) + } + return await f(model, fpath, options) - def transcribe(self, model, fpath): - segments, info = model.transcribe(fpath, beam_size=5, word_timestamps=True) + def transcribe(self, model, fpath, options): + segments, info = model.transcribe(fpath, **options) segments = list(segments) debug(f'{segments=}') - return { - 'language': info.language, - 'language_probability': info.language_probability, - 'content': ' '.join([s.text for s in segments]), - 'segments': [[s.start, - s.end, - s.text, - [[w.start, w.end, w.word] for w in s.words] - ] for s in segments] - } + if 'word_timestamps' in options.keys(): + return { + 'language': info.language, + 'language_probability': info.language_probability, + 'content': ' '.join([s.text for s in segments]), + 'segments': [[s.start, + s.end, + s.text, + [[w.start, w.end, w.word] for w in s.words] + ] for s in segments] + } + else: + return { + 'language': info.language, + 'language_probability': info.language_probability, + 'content': ' '.join([s.text for s in segments]), + 'segments': [[s.start, + s.end, + s.text, + ] for s in segments] + }