From 924b27be117f5e72c1aca1b14850a1df586dc280 Mon Sep 17 00:00:00 2001 From: Hermes Agent Date: Sun, 14 Jun 2026 14:46:26 +0800 Subject: [PATCH] Initial: Demucs vocal separation HTTP service (ahserver+longtasks+Redis) --- .gitignore | 33 +++++++++++ README.md | 131 ++++++++++++++++++++++++++++++++++++++++++++ ah.py | 34 ++++++++++++ app/health.dspy | 1 + conf/config.json | 7 +++ start.sh | 7 +++ stop.sh | 26 +++++++++ workers/__init__.py | 0 workers/separate.py | 87 +++++++++++++++++++++++++++++ 9 files changed, 326 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 ah.py create mode 100644 app/health.dspy create mode 100644 conf/config.json create mode 100755 start.sh create mode 100755 stop.sh create mode 100644 workers/__init__.py create mode 100644 workers/separate.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ba2cad2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,33 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +*.egg-info/ +dist/ +build/ +*.egg + +# Virtual environments +venv/ +.venv/ +env/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# Logs +nohup.out +*.log + +# OS +.DS_Store +Thumbs.db + +# Temporary files +*.tmp +*.bak diff --git a/README.md b/README.md new file mode 100644 index 0000000..51ce25f --- /dev/null +++ b/README.md @@ -0,0 +1,131 @@ +# demucs-service + +Vocal/accompaniment separation web service using [Demucs](https://github.com/adefossez/demucs) (htdemucs model). + +## Overview + +This service provides an async API for separating audio files into vocals and accompaniment tracks using Meta's Demucs neural network model. It follows the ahserver + longtasks + Redis pattern. + +## Architecture + +- **ahserver**: Async HTTP server framework +- **longtasks**: Background task processing via Redis queues +- **Redis**: Task queue for separation jobs +- **Demucs 4.0.1**: AI-powered source separation model (htdemucs) + +## API + +### Submit Separation Task + +Send a JSON payload to the longtask endpoint: + +```json +{ + "task_type": "separate", + "audio_path": "/path/to/audio.wav", + "output_dir": "/tmp/demucs_custom_output" // optional +} +``` + +**Parameters:** +- `audio_path` (required): Absolute path to the input audio file +- `output_dir` (optional): Output directory. Default: `/tmp/demucs_{task_id}` + +**Response:** +```json +{ + "vocals_path": "/tmp/demucs_123/htdemucs/audio/vocals.wav", + "no_vocals_path": "/tmp/demucs_123/htdemucs/audio/no_vocals.wav", + "duration": 12.34, + "output_dir": "/tmp/demucs_123", + "model": "htdemucs" +} +``` + +### Health Check + +``` +GET /app/health.dspy +``` + +Returns: +```json +{"status":"ok","service":"demucs-service","model":"htdemucs"} +``` + +## Configuration + +Config file: `conf/config.json` + +```json +{ + "port": 9083, + "queue": "demucs", + "filesroot": "/tmp/demucs-outputs", + "host": "0.0.0.0", + "debug": false +} +``` + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `DEMUCS_GPU_ID` | `5` | GPU device ID for CUDA | +| `CUDA_VISIBLE_DEVICES` | `5` | CUDA device visibility | +| `PYTHONPATH` | `/data/ymq/demucs-service` | Python module path | + +## Deployment + +### Prerequisites + +- Python venv at `/data/ymq/demucs_venv` with demucs 4.0.1 and torchcodec +- Redis server running on `127.0.0.1:6379` +- GPU with CUDA support + +### Start + +```bash +bash start.sh +``` + +### Stop + +```bash +bash stop.sh +``` + +### Logs + +```bash +tail -f nohup.out +``` + +## Directory Structure + +``` +demucs-service/ +├── ah.py # Main entry point +├── workers/ +│ ├── __init__.py +│ └── separate.py # Separation worker +├── conf/ +│ └── config.json # Service configuration +├── app/ +│ └── health.dspy # Health check endpoint +├── start.sh # Start script +├── stop.sh # Stop script +└── README.md # This file +``` + +## Output Format + +Demucs outputs to: `{output_dir}/htdemucs/{basename}/` +- `vocals.wav` - Isolated vocal track +- `no_vocals.wav` - Accompaniment (everything except vocals) + +## Troubleshooting + +- **GPU OOM**: The htdemucs model requires significant VRAM. Ensure the assigned GPU has enough memory. +- **Process timeout**: Long audio files may exceed the stuck_seconds timeout (default: 600s). Increase if needed. +- **Missing output files**: Check nohup.out for demucs stderr output to diagnose issues. diff --git a/ah.py b/ah.py new file mode 100644 index 0000000..fe20613 --- /dev/null +++ b/ah.py @@ -0,0 +1,34 @@ +import os +from ahserver.webapp import webapp +from ahserver.serverenv import ServerEnv +from ahserver.configuredServer import add_startup +from longtasks.longtasks import LongTasks, schedule_once +from appPublic.log import debug + +class DemucsTasks(LongTasks): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.gpu_id = int(os.environ.get('DEMUCS_GPU_ID', '5')) + async def process_task(self, payload, workid=None): + import json + if isinstance(payload, str): payload = json.loads(payload) + task_type = payload.get('task_type', '') + if task_type == 'separate': + from workers.separate import run_separate + return await run_separate(self, payload) + raise ValueError(f'Unknown task_type: {task_type}') + +async def on_app_built(app): + env = ServerEnv() + lt = env.longtasks + if lt: + schedule_once(0.1, lt.run) + debug(f'Demucs longtasks worker started, GPU: {lt.gpu_id}') + +def init(): + env = ServerEnv() + env.longtasks = DemucsTasks('redis://127.0.0.1:6379', 'demucs', worker_cnt=1, stuck_seconds=600, max_age_hours=24) + add_startup(on_app_built) + +if __name__ == '__main__': + webapp(init) diff --git a/app/health.dspy b/app/health.dspy new file mode 100644 index 0000000..a6e7e39 --- /dev/null +++ b/app/health.dspy @@ -0,0 +1 @@ +{"status":"ok","service":"demucs-service","model":"htdemucs"} diff --git a/conf/config.json b/conf/config.json new file mode 100644 index 0000000..ce0fb7c --- /dev/null +++ b/conf/config.json @@ -0,0 +1,7 @@ +{ + "port": 9083, + "queue": "demucs", + "filesroot": "/tmp/demucs-outputs", + "host": "0.0.0.0", + "debug": false +} diff --git a/start.sh b/start.sh new file mode 100755 index 0000000..ae7a025 --- /dev/null +++ b/start.sh @@ -0,0 +1,7 @@ +#!/bin/bash +cd /data/ymq/demucs-service +export DEMUCS_GPU_ID=5 +export CUDA_VISIBLE_DEVICES=5 +export PYTHONPATH=/data/ymq/demucs-service +nohup /data/ymq/demucs_venv/bin/python ah.py > nohup.out 2>&1 & +echo "demucs-service started, PID: $!, GPU: $DEMUCS_GPU_ID" diff --git a/stop.sh b/stop.sh new file mode 100755 index 0000000..e4f965d --- /dev/null +++ b/stop.sh @@ -0,0 +1,26 @@ +#!/bin/bash +echo "Stopping demucs-service..." + +# Find and kill processes running ah.py for demucs-service +PIDS=$(ps aux | grep '[d]emucs.*ah.py' | awk '{print $2}') + +if [ -z "$PIDS" ]; then + echo "No demucs-service processes found." + exit 0 +fi + +for pid in $PIDS; do + echo "Killing PID: $pid" + kill "$pid" 2>/dev/null +done + +# Wait briefly then force kill if still running +sleep 2 +for pid in $PIDS; do + if kill -0 "$pid" 2>/dev/null; then + echo "Force killing PID: $pid" + kill -9 "$pid" 2>/dev/null + fi +done + +echo "demucs-service stopped." diff --git a/workers/__init__.py b/workers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/workers/separate.py b/workers/separate.py new file mode 100644 index 0000000..bec8c8c --- /dev/null +++ b/workers/separate.py @@ -0,0 +1,87 @@ +import os +import json +import asyncio +import time +from appPublic.log import debug, error + +async def run_separate(task_obj, payload): + """ + Run demucs vocal/accompaniment separation. + + payload: + audio_path (str, required): Path to input audio file + output_dir (str, optional): Output directory, default /tmp/demucs_{task_id} + """ + audio_path = payload.get('audio_path') + if not audio_path: + raise ValueError('audio_path is required') + + if not os.path.isfile(audio_path): + raise FileNotFoundError(f'Audio file not found: {audio_path}') + + task_id = payload.get('task_id', str(int(time.time()))) + output_dir = payload.get('output_dir', f'/tmp/demucs_{task_id}') + + gpu_id = task_obj.gpu_id + basename = os.path.splitext(os.path.basename(audio_path))[0] + + # Expected output paths from demucs + result_dir = os.path.join(output_dir, 'htdemucs', basename) + vocals_path = os.path.join(result_dir, 'vocals.wav') + no_vocals_path = os.path.join(result_dir, 'no_vocals.wav') + + # Build the command + env = os.environ.copy() + env['CUDA_VISIBLE_DEVICES'] = str(gpu_id) + + cmd = [ + '/data/ymq/demucs_venv/bin/python', '-m', 'demucs', + '--two-stems', 'vocals', + audio_path, + '-o', output_dir + ] + + debug(f'[demucs] Running separation: audio={audio_path}, output={output_dir}, gpu={gpu_id}') + debug(f'[demucs] Command: {" ".join(cmd)}') + + start_time = time.time() + + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + env=env + ) + + stdout, stderr = await proc.communicate() + elapsed = time.time() - start_time + + if proc.returncode != 0: + stderr_text = stderr.decode('utf-8', errors='replace') + stdout_text = stdout.decode('utf-8', errors='replace') + error(f'[demucs] Process failed (rc={proc.returncode})') + error(f'[demucs] stdout: {stdout_text[-2000:]}') + error(f'[demucs] stderr: {stderr_text[-2000:]}') + raise RuntimeError( + f'Demucs separation failed (rc={proc.returncode}): {stderr_text[-500:]}' + ) + + # Verify output files exist + if not os.path.isfile(vocals_path): + raise FileNotFoundError(f'Expected vocals output not found: {vocals_path}') + if not os.path.isfile(no_vocals_path): + raise FileNotFoundError(f'Expected no_vocals output not found: {no_vocals_path}') + + vocals_size = os.path.getsize(vocals_path) + no_vocals_size = os.path.getsize(no_vocals_path) + + debug(f'[demucs] Separation complete in {elapsed:.1f}s') + debug(f'[demucs] vocals.wav: {vocals_size} bytes, no_vocals.wav: {no_vocals_size} bytes') + + return { + 'vocals_path': vocals_path, + 'no_vocals_path': no_vocals_path, + 'duration': round(elapsed, 2), + 'output_dir': output_dir, + 'model': 'htdemucs' + }