feat: implement OpenAI-compatible LLM API

- Add /v1/chat/completions endpoint (POST) with streaming support - Add /v1/models endpoint (GET) listing available models - Add /v1/completions endpoint (POST) legacy compatibility - Add llm_api.py module with OpenAI API proxy via aiohttp - Add llm_service_url, llm_api_key, available_models to config model - Update harnessed_agent_config_view CRUD to protect API key field - Register new functions in init.py (harnessed_llm_chat_completions etc.) - Add .gitignore for pycache files Endpoints available under module path: POST /harnessed_agent/v1/chat/completions GET /harnessed_agent/v1/models POST /harnessed_agent/v1/completions
2026-05-07 11:36:35 +08:00 · 2026-05-07 11:36:35 +08:00 · 608413a5d5
commit 608413a5d5
parent 52f88239ed
8 changed files with 536 additions and 11 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,8 @@
+__pycache__/
+*.pyc
+*.pyo
+*.egg-info/
+dist/
+build/
+py3/
+*.egg
--- a/harnessed_agent/init.py
+++ b/harnessed_agent/init.py
@ -20,6 +20,11 @@ from .config_functions import (
    harnessed_get_agent_config,
    harnessed_save_agent_config
 )
+from .llm_api import (
+    harnessed_llm_chat_completions,
+    harnessed_llm_models,
+    harnessed_llm_completions,
+)

 def load_harnessed_agent():
    env = ServerEnv()
@ -43,3 +48,8 @@ def load_harnessed_agent():
    # Configuration management functions
    env.harnessed_get_agent_config = harnessed_get_agent_config
    env.harnessed_save_agent_config = harnessed_save_agent_config
+
+    # OpenAI-compatible LLM API
+    env.harnessed_llm_chat_completions = harnessed_llm_chat_completions
+    env.harnessed_llm_models = harnessed_llm_models
+    env.harnessed_llm_completions = harnessed_llm_completions
--- a/harnessed_agent/llm_api.py
+++ b/harnessed_agent/llm_api.py
@ -0,0 +1,382 @@
+"""
+OpenAI-compatible LLM API for Hermes Agent
+Provides /v1/chat/completions and /v1/models endpoints compatible with OpenAI API spec.
+"""
+import json
+import uuid
+import time
+import asyncio
+from typing import Dict, Any, List, Optional, AsyncGenerator
+from datetime import datetime
+
+try:
+    from aiohttp import ClientSession, ClientTimeout
+    from aiohttp.client_exceptions import ClientError
+    HAS_AIOHTTP = True
+except ImportError:
+    HAS_AIOHTTP = False
+
+try:
+    from ahserver.serverenv import ServerEnv
+    from sqlor.dbpools import DBPools
+    from appPublic.worker import awaitify
+except ImportError:
+    class ServerEnv:
+        pass
+    class DBPools:
+        pass
+    def awaitify(f):
+        return f
+
+
+def _now_iso():
+    return datetime.utcnow().isoformat() + 'Z'
+
+
+def _now_ts():
+    return int(time.time())
+
+
+def _get_default_llm_config(user_id: str = None) -> Dict[str, Any]:
+    """Get LLM service config from harnessed_agent_config table."""
+    try:
+        dbname = ServerEnv().get_module_dbname('harnessed_agent')
+    except Exception:
+        dbname = 'default'
+
+    try:
+        import asyncio
+        async def _fetch():
+            async with DBPools().sqlorContext(dbname) as sor:
+                filters = {}
+                if user_id:
+                    filters['user_id'] = user_id
+                rows = await sor.R('harnessed_agent_config', filters,
+                                   orderby='updated_at DESC', limit=1)
+                if rows:
+                    return rows[0]
+            return None
+
+        try:
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                # Already in async context, need to use create_task pattern
+                # but for simplicity in .dspy context we return None and let caller handle
+                return None
+            else:
+                config = loop.run_until_complete(_fetch())
+                return config
+        except RuntimeError:
+            return None
+    except Exception:
+        return None
+
+
+async def _async_get_llm_config(user_id: str = None) -> Dict[str, Any]:
+    """Async version to get LLM service config."""
+    try:
+        env = ServerEnv()
+        dbname = env.get_module_dbname('harnessed_agent')
+    except Exception:
+        dbname = 'default'
+
+    try:
+        async with DBPools().sqlorContext(dbname) as sor:
+            filters = {}
+            if user_id:
+                filters['user_id'] = user_id
+            rows = await sor.R('harnessed_agent_config', filters,
+                               orderby='updated_at DESC', limit=1)
+            if rows:
+                return rows[0]
+    except Exception as e:
+        print(f"Error fetching LLM config: {e}")
+    return None
+
+
+async def _call_llm_api(
+    service_url: str,
+    api_key: str,
+    model: str,
+    messages: List[Dict[str, str]],
+    temperature: float = 0.7,
+    max_tokens: Optional[int] = None,
+    stream: bool = False,
+    top_p: float = 1.0,
+    **kwargs
+) -> Any:
+    """Call an OpenAI-compatible LLM API endpoint."""
+    url = service_url.rstrip('/') + '/chat/completions'
+
+    headers = {
+        'Content-Type': 'application/json',
+        'Authorization': f'Bearer {api_key}',
+    }
+
+    body = {
+        'model': model,
+        'messages': messages,
+        'temperature': temperature,
+        'top_p': top_p,
+        'stream': stream,
+    }
+
+    if max_tokens is not None:
+        body['max_tokens'] = max_tokens
+
+    # Pass through any additional OpenAI-compatible parameters
+    for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools', 'tool_choice',
+                'response_format', 'seed'):
+        if key in kwargs and kwargs[key] is not None:
+            body[key] = kwargs[key]
+
+    if stream:
+        return await _stream_llm_response(url, headers, body)
+    else:
+        return await _sync_llm_response(url, headers, body)
+
+
+async def _sync_llm_response(url: str, headers: Dict, body: Dict) -> Dict[str, Any]:
+    """Make a non-streaming LLM API call."""
+    async with ClientSession() as session:
+        async with session.post(url, headers=headers, json=body,
+                                timeout=ClientTimeout(total=300)) as resp:
+            if resp.status != 200:
+                error_text = await resp.text()
+                return {
+                    'error': {
+                        'message': f'LLM API error: HTTP {resp.status}',
+                        'type': 'api_error',
+                        'code': resp.status,
+                        'detail': error_text[:500],
+                    }
+                }
+
+            data = await resp.json()
+            return data
+
+
+async def _stream_llm_response(url: str, headers: Dict, body: Dict) -> AsyncGenerator[str, None]:
+    """Make a streaming LLM API call, yielding SSE chunks."""
+    async with ClientSession() as session:
+        async with session.post(url, headers=headers, json=body,
+                                timeout=ClientTimeout(total=600)) as resp:
+            if resp.status != 200:
+                error_text = await resp.text()
+                error_data = {
+                    'error': {
+                        'message': f'LLM API error: HTTP {resp.status}',
+                        'type': 'api_error',
+                        'code': resp.status,
+                        'detail': error_text[:500],
+                    }
+                }
+                yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
+                yield "data: [DONE]\n\n"
+                return
+
+            async for line in resp.content:
+                line = line.decode('utf-8').strip()
+                if not line:
+                    continue
+                if line.startswith('data: '):
+                    yield line + '\n\n'
+
+
+# ============================================================
+# Public API functions (registered to ServerEnv via init.py)
+# ============================================================
+
+async def harnessed_llm_chat_completions(body: Dict[str, Any]) -> Any:
+    """
+    OpenAI-compatible /v1/chat/completions endpoint.
+    
+    Args:
+        body: dict with keys: model, messages, temperature, max_tokens,
+              stream, top_p, and other OpenAI-compatible params.
+    
+    Returns:
+        If stream=False: dict matching OpenAI chat completion response.
+        If stream=True: aiohttp.StreamResponse with SSE.
+    """
+    from aiohttp import web
+
+    model = body.get('model', 'default')
+    messages = body.get('messages', [])
+    temperature = body.get('temperature', 0.7)
+    max_tokens = body.get('max_tokens', None)
+    stream = body.get('stream', False)
+    top_p = body.get('top_p', 1.0)
+
+    # Get LLM service config
+    config = await _async_get_llm_config()
+    if not config:
+        return {
+            'error': {
+                'message': 'LLM service not configured. Please configure llm_service_url and llm_api_key in agent settings.',
+                'type': 'configuration_error',
+                'code': 503,
+            }
+        }
+
+    service_url = config.get('llm_service_url') or config.get('api_endpoint')
+    api_key = config.get('llm_api_key') or config.get('api_key')
+
+    if not service_url or not api_key:
+        return {
+            'error': {
+                'message': 'LLM service URL or API key not configured. Set llm_service_url and llm_api_key in harnessed_agent_config.',
+                'type': 'configuration_error',
+                'code': 503,
+            }
+        }
+
+    # Use default model from config if request model is 'default' or empty
+    default_model = config.get('default_model', 'qwen3-max')
+    if not model or model == 'default':
+        model = default_model
+
+    # Pass through extra params
+    extra_params = {}
+    for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools',
+                'tool_choice', 'response_format', 'seed'):
+        if key in body:
+            extra_params[key] = body[key]
+
+    if stream:
+        resp = web.StreamResponse(
+            status=200,
+            reason='OK',
+            headers={
+                'Content-Type': 'text/event-stream',
+                'Cache-Control': 'no-cache',
+                'Connection': 'keep-alive',
+                'X-Accel-Buffering': 'no',
+            }
+        )
+        await resp.prepare(body.get('_request'))
+
+        request_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
+        created = _now_ts()
+
+        try:
+            async for chunk_line in _stream_llm_response(
+                service_url, {'Content-Type': 'application/json', 'Authorization': f'Bearer {api_key}'},
+                {
+                    'model': model,
+                    'messages': messages,
+                    'temperature': temperature,
+                    'top_p': top_p,
+                    'stream': True,
+                    'max_tokens': max_tokens,
+                    **extra_params,
+                }
+            ):
+                await resp.write(chunk_line.encode('utf-8') if isinstance(chunk_line, str) else chunk_line)
+                await resp.drain()
+        except Exception as e:
+            error_data = {
+                'error': {
+                    'message': f'Streaming error: {str(e)}',
+                    'type': 'server_error',
+                    'code': 500,
+                }
+            }
+            await resp.write(f"data: {json.dumps(error_data)}\n\n".encode('utf-8'))
+            await resp.drain()
+
+        await resp.write(b"data: [DONE]\n\n")
+        await resp.drain()
+        return resp
+
+    else:
+        result = await _call_llm_api(
+            service_url=service_url,
+            api_key=api_key,
+            model=model,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            stream=False,
+            top_p=top_p,
+            **extra_params,
+        )
+        return result
+
+
+async def harnessed_llm_models() -> Dict[str, Any]:
+    """
+    OpenAI-compatible /v1/models endpoint.
+    Returns list of available models from config.
+    """
+    config = await _async_get_llm_config()
+
+    models_list = []
+
+    # Add default model from config
+    default_model = 'qwen3-max'
+    if config:
+        default_model = config.get('default_model', 'qwen3-max')
+
+    # If available_models is configured as JSON string, parse it
+    if config and config.get('available_models'):
+        try:
+            models_str = config['available_models']
+            if isinstance(models_str, str):
+                model_names = json.loads(models_str)
+            else:
+                model_names = models_str
+            for m in model_names:
+                if isinstance(m, str):
+                    models_list.append({
+                        'id': m,
+                        'object': 'model',
+                        'created': _now_ts(),
+                        'owned_by': 'harnessed_agent',
+                    })
+                elif isinstance(m, dict):
+                    models_list.append(m)
+        except Exception:
+            pass
+
+    # Always include the default model if not already listed
+    existing_ids = {m['id'] for m in models_list}
+    if default_model not in existing_ids:
+        models_list.insert(0, {
+            'id': default_model,
+            'object': 'model',
+            'created': _now_ts(),
+            'owned_by': 'harnessed_agent',
+        })
+
+    return {
+        'object': 'list',
+        'data': models_list,
+    }
+
+
+async def harnessed_llm_completions(body: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    OpenAI-compatible /v1/completions endpoint (legacy, non-chat).
+    Converts prompt to messages format internally.
+    """
+    prompt = body.get('prompt', '')
+    model = body.get('model', 'default')
+    temperature = body.get('temperature', 0.7)
+    max_tokens = body.get('max_tokens', None)
+    stream = body.get('stream', False)
+
+    # Convert to chat format
+    messages = [{'role': 'user', 'content': prompt}]
+
+    # Build request body for chat completions
+    chat_body = {
+        'model': model,
+        'messages': messages,
+        'temperature': temperature,
+        'max_tokens': max_tokens,
+        'stream': stream,
+        '_request': body.get('_request'),
+    }
+
+    return await harnessed_llm_chat_completions(chat_body)
--- a/json/harnessed_agent_config_view.json
+++ b/json/harnessed_agent_config_view.json
@ -4,20 +4,22 @@
    "title": "Agent Configuration",
    "params": {
        "logined_userid": "user_id",
-        "confidential_fields": [],
+        "confidential_fields": ["llm_api_key"],
        "browserfields": {
+            "exclouded": ["llm_api_key", "available_models"],
            "alters": {
                "auto_cleanup_enabled": {
                    "uitype": "code",
                    "data": [
-                        {
-                            "value": "1",
-                            "text": "Enabled"
-                        },
-                        {
-                            "value": "0",
-                            "text": "Disabled"
-                        }
+                        {"value": "1", "text": "Enabled"},
+                        {"value": "0", "text": "Disabled"}
+                    ]
+                },
+                "enable_streaming": {
+                    "uitype": "code",
+                    "data": [
+                        {"value": "1", "text": "Enabled"},
+                        {"value": "0", "text": "Disabled"}
                    ]
                }
            }
@ -25,7 +27,8 @@
        "editexclouded": [
            "id",
            "user_id",
-            "created_at"
+            "created_at",
+            "updated_at"
        ],
        "editable": {
            "new_data_url": "{{entire_url('../api/harnessed_agent_config_view_create.dspy')}}",
--- a/models/harnessed_agent_config.json
+++ b/models/harnessed_agent_config.json
@ -105,6 +105,32 @@
            "nullable": "no",
            "default": "1"
        },
+        {
+            "name": "llm_service_url",
+            "title": "LLM service base URL (OpenAI-compatible endpoint)",
+            "type": "str",
+            "length": 255,
+            "nullable": "yes",
+            "default": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+            "comments": "Base URL for the LLM provider API, e.g. https://api.openai.com/v1"
+        },
+        {
+            "name": "llm_api_key",
+            "title": "LLM service API key",
+            "type": "str",
+            "length": 255,
+            "nullable": "yes",
+            "default": "",
+            "comments": "API key for LLM service authentication (Bearer token)"
+        },
+        {
+            "name": "available_models",
+            "title": "Available LLM models (JSON array)",
+            "type": "text",
+            "nullable": "yes",
+            "default": "",
+            "comments": "JSON array of model IDs, e.g. [\"qwen3-max\", \"qwen3-plus\"]"
+        },
        {
            "name": "created_at",
            "title": "Creation timestamp",
--- a/wwwroot/v1/chat/completions.dspy
+++ b/wwwroot/v1/chat/completions.dspy
@ -0,0 +1,46 @@
+"""
+OpenAI-compatible /v1/chat/completions endpoint
+Accepts POST with JSON body matching OpenAI API format.
+"""
+import json
+
+async def main():
+    # Read request body
+    body = {}
+    try:
+        raw_body = await request.read()
+        if raw_body:
+            body = json.loads(raw_body)
+    except Exception as e:
+        result = {
+            'error': {
+                'message': f'Invalid JSON body: {str(e)}',
+                'type': 'invalid_request_error',
+                'code': 400,
+            }
+        }
+        return json.dumps(result, ensure_ascii=False)
+
+    # Pass the request object for streaming support
+    body['_request'] = request
+
+    # Call the LLM API handler
+    result = await harnessed_llm_chat_completions(body)
+
+    # Handle streaming response (StreamResponse)
+    from aiohttp.web_response import StreamResponse
+    if isinstance(result, StreamResponse):
+        return result
+
+    # Handle error response
+    if 'error' in result:
+        status_code = result.get('error', {}).get('code', 500)
+        resp = web.Response(
+            status=status_code,
+            body=json.dumps(result, ensure_ascii=False),
+            content_type='application/json'
+        )
+        return resp
+
+    # Return successful response
+    return json.dumps(result, ensure_ascii=False)
--- a/wwwroot/v1/completions.dspy
+++ b/wwwroot/v1/completions.dspy
@ -0,0 +1,41 @@
+"""
+OpenAI-compatible /v1/completions endpoint (legacy)
+Accepts POST with JSON body matching OpenAI completions API format.
+"""
+import json
+
+async def main():
+    # Read request body
+    body = {}
+    try:
+        raw_body = await request.read()
+        if raw_body:
+            body = json.loads(raw_body)
+    except Exception as e:
+        result = {
+            'error': {
+                'message': f'Invalid JSON body: {str(e)}',
+                'type': 'invalid_request_error',
+                'code': 400,
+            }
+        }
+        return json.dumps(result, ensure_ascii=False)
+
+    body['_request'] = request
+
+    result = await harnessed_llm_completions(body)
+
+    from aiohttp.web_response import StreamResponse
+    if isinstance(result, StreamResponse):
+        return result
+
+    if 'error' in result:
+        status_code = result.get('error', {}).get('code', 500)
+        resp = web.Response(
+            status=status_code,
+            body=json.dumps(result, ensure_ascii=False),
+            content_type='application/json'
+        )
+        return resp
+
+    return json.dumps(result, ensure_ascii=False)
--- a/wwwroot/v1/models.dspy
+++ b/wwwroot/v1/models.dspy
@ -0,0 +1,9 @@
+"""
+OpenAI-compatible /v1/models endpoint
+Returns list of available models.
+"""
+import json
+
+async def main():
+    result = await harnessed_llm_models()
+    return json.dumps(result, ensure_ascii=False)