Initial: faster-whisper ASR HTTP service (ahserver+longtasks+Redis)
This commit is contained in:
commit
e18aac6595
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
nohup*.out
|
||||||
|
*.egg-info
|
||||||
|
.env
|
||||||
|
py3/
|
||||||
182
README.md
Normal file
182
README.md
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
# ASR Service
|
||||||
|
|
||||||
|
Speech-to-text service powered by [faster-whisper](https://github.com/SYSTRAN/faster-whisper) (CTranslate2 backend). Uses the `large-v3-turbo` model for fast, high-quality transcription with word-level timestamps.
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
Client --> Redis Queue ("asr") --> ASRTasks (LongTasks worker)
|
||||||
|
|
|
||||||
|
v
|
||||||
|
faster-whisper (GPU)
|
||||||
|
|
|
||||||
|
v
|
||||||
|
Result (JSON)
|
||||||
|
```
|
||||||
|
|
||||||
|
- **ahserver**: Web framework serving HTTP on port 9925
|
||||||
|
- **longtasks**: Redis-backed async task queue with worker management
|
||||||
|
- **Redis**: Task queue broker (queue name: `asr`)
|
||||||
|
- **faster-whisper**: ASR engine running on GPU (CUDA, float16)
|
||||||
|
|
||||||
|
The service follows the same ahserver+longtasks pattern as wan22-service and realesrgan-service.
|
||||||
|
|
||||||
|
## Model
|
||||||
|
|
||||||
|
- **Model**: faster-whisper-large-v3-turbo-ct2
|
||||||
|
- **Path**: `/data/ymq/models/deepdml/faster-whisper-large-v3-turbo-ct2`
|
||||||
|
- **Device**: CUDA (float16)
|
||||||
|
- **GPU**: Isolated via `CUDA_VISIBLE_DEVICES` (default GPU 5)
|
||||||
|
|
||||||
|
The model is lazy-loaded on first transcription request and stays in GPU memory for subsequent requests.
|
||||||
|
|
||||||
|
## Deployment
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- Python venv with faster-whisper 1.2.1: `/data/ymq/demucs_venv`
|
||||||
|
- Redis server running on 127.0.0.1:6379
|
||||||
|
- CUDA-capable GPU
|
||||||
|
|
||||||
|
### Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /data/ymq/asr-service
|
||||||
|
bash start.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Stop
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /data/ymq/asr-service
|
||||||
|
bash stop.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Health Check
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:9925/health
|
||||||
|
```
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "ok",
|
||||||
|
"service": "asr-service",
|
||||||
|
"model": "faster-whisper-large-v3-turbo-ct2"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Usage
|
||||||
|
|
||||||
|
Tasks are submitted via Redis, same pattern as wan22-service.
|
||||||
|
|
||||||
|
### Submit a Transcription Task
|
||||||
|
|
||||||
|
```python
|
||||||
|
import redis
|
||||||
|
import json
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
r = redis.Redis(host='127.0.0.1', port=6379)
|
||||||
|
|
||||||
|
task_id = str(uuid.uuid4())
|
||||||
|
payload = {
|
||||||
|
"task_id": task_id,
|
||||||
|
"task_type": "transcribe",
|
||||||
|
"audio_path": "/path/to/audio.wav",
|
||||||
|
"language": "zh",
|
||||||
|
"word_timestamps": True,
|
||||||
|
"vad_filter": True,
|
||||||
|
"output_path": "/tmp/asr-outputs/result.json"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Push to the Redis queue
|
||||||
|
r.lpush('asr:queue', json.dumps(payload))
|
||||||
|
print(f"Task submitted: {task_id}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check Task Status
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Task status is stored in Redis by longtasks
|
||||||
|
status = r.get(f'asr:status:{task_id}')
|
||||||
|
result = r.get(f'asr:result:{task_id}')
|
||||||
|
```
|
||||||
|
|
||||||
|
## Task Payload Format
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|------------------|--------|----------|---------|--------------------------------------|
|
||||||
|
| task_type | string | Yes | - | Must be `"transcribe"` |
|
||||||
|
| audio_path | string | Yes | - | Path to input audio file |
|
||||||
|
| language | string | No | `"zh"` | Language code (zh, en, ja, etc.) |
|
||||||
|
| word_timestamps | bool | No | `True` | Enable word-level timestamps |
|
||||||
|
| vad_filter | bool | No | `True` | Enable voice activity detection |
|
||||||
|
| output_path | string | No | - | If set, save result JSON to this path|
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "ok",
|
||||||
|
"text": "Full transcription text...",
|
||||||
|
"language": "zh",
|
||||||
|
"language_probability": 0.9876,
|
||||||
|
"duration": 125.340,
|
||||||
|
"segments": [
|
||||||
|
{
|
||||||
|
"text": "Segment text",
|
||||||
|
"start": 0.000,
|
||||||
|
"end": 5.120,
|
||||||
|
"words": [
|
||||||
|
{
|
||||||
|
"word": "你好",
|
||||||
|
"start": 0.000,
|
||||||
|
"end": 0.800,
|
||||||
|
"probability": 0.9523
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"processing_time": 3.45,
|
||||||
|
"audio_path": "/path/to/audio.wav"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
Config file: `conf/config.json`
|
||||||
|
|
||||||
|
| Setting | Value | Description |
|
||||||
|
|-----------------------|------------------------------|--------------------------------|
|
||||||
|
| website.port | 9925 | HTTP listen port |
|
||||||
|
| website.host | 0.0.0.0 | Bind address |
|
||||||
|
| session_redis | 127.0.0.1:6379 db=1 | Session storage |
|
||||||
|
| password_key | ASRService2026Key | Auth key |
|
||||||
|
| filesroot | /tmp/asr-outputs | Output files directory |
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------------------|---------|---------------------------------------|
|
||||||
|
| ASR_GPU_ID | 5 | GPU device ID (for logging) |
|
||||||
|
| CUDA_VISIBLE_DEVICES | 5 | CUDA device isolation |
|
||||||
|
| PYTHONPATH | . | Python module search path |
|
||||||
|
|
||||||
|
## File Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
asr-service/
|
||||||
|
├── ah.py # Main entry point
|
||||||
|
├── start.sh # Start script
|
||||||
|
├── stop.sh # Stop script
|
||||||
|
├── conf/
|
||||||
|
│ └── config.json # Service configuration
|
||||||
|
├── app/
|
||||||
|
│ └── health.dspy # Health check endpoint
|
||||||
|
├── workers/
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ └── transcribe.py # Transcription worker
|
||||||
|
└── README.md
|
||||||
|
```
|
||||||
43
ah.py
Normal file
43
ah.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
import os
|
||||||
|
from ahserver.webapp import webapp
|
||||||
|
from ahserver.serverenv import ServerEnv
|
||||||
|
from ahserver.configuredServer import add_startup
|
||||||
|
from longtasks.longtasks import LongTasks, schedule_once
|
||||||
|
from appPublic.log import debug
|
||||||
|
|
||||||
|
|
||||||
|
class ASRTasks(LongTasks):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.gpu_id = int(os.environ.get('ASR_GPU_ID', '5'))
|
||||||
|
|
||||||
|
async def process_task(self, payload, workid=None):
|
||||||
|
import json
|
||||||
|
if isinstance(payload, str):
|
||||||
|
payload = json.loads(payload)
|
||||||
|
task_type = payload.get('task_type', '')
|
||||||
|
if task_type == 'transcribe':
|
||||||
|
from workers.transcribe import run_transcribe
|
||||||
|
return await run_transcribe(self, payload)
|
||||||
|
raise ValueError(f'Unknown task_type: {task_type}')
|
||||||
|
|
||||||
|
|
||||||
|
async def on_app_built(app):
|
||||||
|
env = ServerEnv()
|
||||||
|
lt = env.longtasks
|
||||||
|
if lt:
|
||||||
|
schedule_once(0.1, lt.run)
|
||||||
|
debug(f'ASR longtasks worker started, GPU: {lt.gpu_id}')
|
||||||
|
|
||||||
|
|
||||||
|
def init():
|
||||||
|
env = ServerEnv()
|
||||||
|
env.longtasks = ASRTasks(
|
||||||
|
'redis://127.0.0.1:6379', 'asr',
|
||||||
|
worker_cnt=1, stuck_seconds=600, max_age_hours=24
|
||||||
|
)
|
||||||
|
add_startup(on_app_built)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
webapp(init)
|
||||||
5
app/health.dspy
Normal file
5
app/health.dspy
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
{{
|
||||||
|
"status": "ok",
|
||||||
|
"service": "asr-service",
|
||||||
|
"model": "faster-whisper-large-v3-turbo-ct2"
|
||||||
|
}}
|
||||||
1
conf/config.json
Normal file
1
conf/config.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"password_key":"ASRService2026Key","databases":{},"session_redis":{"host":"127.0.0.1","port":6379,"db":1},"website":{"paths":[["$[workdir]$/app",""]],"host":"0.0.0.0","port":9925,"coding":"utf-8","indexes":["index.html","index.dspy"],"processors":[[".dspy","dspy"]],"startswiths":[{"leading":"/idfile","registerfunction":"idfile"}]},"hot_reload":false,"filesroot":"/tmp/asr-outputs"}
|
||||||
7
start.sh
Executable file
7
start.sh
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
cd /data/ymq/asr-service
|
||||||
|
export ASR_GPU_ID=5
|
||||||
|
export CUDA_VISIBLE_DEVICES=5
|
||||||
|
export PYTHONPATH=/data/ymq/asr-service
|
||||||
|
nohup /data/ymq/demucs_venv/bin/python ah.py > nohup.out 2>&1 &
|
||||||
|
echo "asr-service started, PID: $!, GPU: $ASR_GPU_ID"
|
||||||
24
stop.sh
Executable file
24
stop.sh
Executable file
@ -0,0 +1,24 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Stop the asr-service
|
||||||
|
PID=$(pgrep -f "python ah.py" | head -1)
|
||||||
|
if [ -z "$PID" ]; then
|
||||||
|
echo "asr-service is not running"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Stopping asr-service (PID: $PID)..."
|
||||||
|
kill "$PID"
|
||||||
|
|
||||||
|
# Wait up to 10 seconds for graceful shutdown
|
||||||
|
for i in $(seq 1 10); do
|
||||||
|
if ! kill -0 "$PID" 2>/dev/null; then
|
||||||
|
echo "asr-service stopped"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
# Force kill if still running
|
||||||
|
echo "Force killing asr-service (PID: $PID)..."
|
||||||
|
kill -9 "$PID"
|
||||||
|
echo "asr-service killed"
|
||||||
0
workers/__init__.py
Normal file
0
workers/__init__.py
Normal file
144
workers/transcribe.py
Normal file
144
workers/transcribe.py
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
"""
|
||||||
|
ASR Transcription Worker using faster-whisper.
|
||||||
|
|
||||||
|
Lazy-loads the model on first use and keeps it in GPU memory.
|
||||||
|
Processes transcription tasks from the Redis queue.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import asyncio
|
||||||
|
import time
|
||||||
|
from appPublic.log import debug, error
|
||||||
|
|
||||||
|
# Module-level model cache (lazy-loaded, stays in memory)
|
||||||
|
_model = None
|
||||||
|
_model_lock = None
|
||||||
|
|
||||||
|
MODEL_PATH = '/data/ymq/models/deepdml/faster-whisper-large-v3-turbo-ct2'
|
||||||
|
|
||||||
|
|
||||||
|
def _get_lock():
|
||||||
|
"""Get or create the async lock for model loading."""
|
||||||
|
global _model_lock
|
||||||
|
if _model_lock is None:
|
||||||
|
_model_lock = asyncio.Lock()
|
||||||
|
return _model_lock
|
||||||
|
|
||||||
|
|
||||||
|
async def load_model():
|
||||||
|
"""Lazy-load the faster-whisper model. Thread-safe, loads once."""
|
||||||
|
global _model
|
||||||
|
if _model is not None:
|
||||||
|
return _model
|
||||||
|
|
||||||
|
async with _get_lock():
|
||||||
|
# Double-check after acquiring lock
|
||||||
|
if _model is not None:
|
||||||
|
return _model
|
||||||
|
|
||||||
|
debug(f'Loading faster-whisper model from {MODEL_PATH}...')
|
||||||
|
t0 = time.time()
|
||||||
|
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
|
# CUDA device 0 — CUDA_VISIBLE_DEVICES already isolates the GPU
|
||||||
|
_model = WhisperModel(
|
||||||
|
MODEL_PATH,
|
||||||
|
device='cuda',
|
||||||
|
device_index=0,
|
||||||
|
compute_type='float16',
|
||||||
|
num_workers=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
elapsed = time.time() - t0
|
||||||
|
debug(f'faster-whisper model loaded in {elapsed:.1f}s')
|
||||||
|
return _model
|
||||||
|
|
||||||
|
|
||||||
|
async def run_transcribe(tasks, payload):
|
||||||
|
"""
|
||||||
|
Run transcription on an audio file.
|
||||||
|
|
||||||
|
Payload fields:
|
||||||
|
audio_path (str): Path to the audio file (required)
|
||||||
|
language (str): Language code, default 'zh'
|
||||||
|
word_timestamps (bool): Enable word-level timestamps, default True
|
||||||
|
vad_filter (bool): Enable VAD filter, default True
|
||||||
|
output_path (str): Optional path to save result JSON
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict with segments, language, duration, etc.
|
||||||
|
"""
|
||||||
|
audio_path = payload.get('audio_path')
|
||||||
|
if not audio_path:
|
||||||
|
raise ValueError('audio_path is required')
|
||||||
|
|
||||||
|
if not os.path.exists(audio_path):
|
||||||
|
raise FileNotFoundError(f'Audio file not found: {audio_path}')
|
||||||
|
|
||||||
|
language = payload.get('language', 'zh')
|
||||||
|
word_timestamps = payload.get('word_timestamps', True)
|
||||||
|
vad_filter = payload.get('vad_filter', True)
|
||||||
|
output_path = payload.get('output_path')
|
||||||
|
|
||||||
|
debug(f'Transcribing: {audio_path} (lang={language}, vad={vad_filter}, words={word_timestamps})')
|
||||||
|
t0 = time.time()
|
||||||
|
|
||||||
|
model = await load_model()
|
||||||
|
|
||||||
|
# Run the synchronous transcription in a thread to not block the event loop
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
segments_gen, info = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
lambda: model.transcribe(
|
||||||
|
audio_path,
|
||||||
|
language=language,
|
||||||
|
word_timestamps=word_timestamps,
|
||||||
|
vad_filter=vad_filter,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Collect segments
|
||||||
|
segments = []
|
||||||
|
for seg in segments_gen:
|
||||||
|
seg_data = {
|
||||||
|
'text': seg.text,
|
||||||
|
'start': round(seg.start, 3),
|
||||||
|
'end': round(seg.end, 3),
|
||||||
|
}
|
||||||
|
if word_timestamps and seg.words:
|
||||||
|
seg_data['words'] = [
|
||||||
|
{
|
||||||
|
'word': w.word,
|
||||||
|
'start': round(w.start, 3),
|
||||||
|
'end': round(w.end, 3),
|
||||||
|
'probability': round(w.probability, 4),
|
||||||
|
}
|
||||||
|
for w in seg.words
|
||||||
|
]
|
||||||
|
segments.append(seg_data)
|
||||||
|
|
||||||
|
elapsed = time.time() - t0
|
||||||
|
result = {
|
||||||
|
'status': 'ok',
|
||||||
|
'text': ' '.join(s['text'] for s in segments),
|
||||||
|
'language': info.language,
|
||||||
|
'language_probability': round(info.language_probability, 4),
|
||||||
|
'duration': round(info.duration, 3),
|
||||||
|
'segments': segments,
|
||||||
|
'processing_time': round(elapsed, 2),
|
||||||
|
'audio_path': audio_path,
|
||||||
|
}
|
||||||
|
|
||||||
|
debug(f'Transcription done in {elapsed:.1f}s: {len(segments)} segments, '
|
||||||
|
f'duration={info.duration:.1f}s, lang={info.language}')
|
||||||
|
|
||||||
|
# Save result if output_path specified
|
||||||
|
if output_path:
|
||||||
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||||
|
with open(output_path, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||||
|
debug(f'Result saved to {output_path}')
|
||||||
|
|
||||||
|
return result
|
||||||
Loading…
x
Reference in New Issue
Block a user