feat: implement OpenAI-compatible LLM API

- Add /v1/chat/completions endpoint (POST) with streaming support - Add /v1/models endpoint (GET) listing available models - Add /v1/completions endpoint (POST) legacy compatibility - Add llm_api.py module with OpenAI API proxy via aiohttp - Add llm_service_url, llm_api_key, available_models to config model - Update harnessed_agent_config_view CRUD to protect API key field - Register new functions in init.py (harnessed_llm_chat_completions etc.) - Add .gitignore for pycache files Endpoints available under module path: POST /harnessed_agent/v1/chat/completions GET /harnessed_agent/v1/models POST /harnessed_agent/v1/completions
2026-05-07 11:36:35 +08:00 · 2026-05-07 11:36:35 +08:00 · 608413a5d5
commit 608413a5d5
parent 52f88239ed
8 changed files with 536 additions and 11 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,8 @@
 __pycache__/
 *.pyc
 *.pyo
 *.egg-info/
 dist/
 build/
 py3/
 *.egg
--- a/harnessed_agent/init.py
+++ b/harnessed_agent/init.py
@ -20,6 +20,11 @@ from .config_functions import (
    harnessed_get_agent_config,
    harnessed_save_agent_config
 )
 from .llm_api import (
    harnessed_llm_chat_completions,
    harnessed_llm_models,
    harnessed_llm_completions,
 )
 def load_harnessed_agent():
    env = ServerEnv()
@ -43,3 +48,8 @@ def load_harnessed_agent():
    # Configuration management functions
    env.harnessed_get_agent_config = harnessed_get_agent_config
    env.harnessed_save_agent_config = harnessed_save_agent_config
    # OpenAI-compatible LLM API
    env.harnessed_llm_chat_completions = harnessed_llm_chat_completions
    env.harnessed_llm_models = harnessed_llm_models
    env.harnessed_llm_completions = harnessed_llm_completions
--- a/harnessed_agent/llm_api.py
+++ b/harnessed_agent/llm_api.py
@ -0,0 +1,382 @@
 """
 OpenAI-compatible LLM API for Hermes Agent
 Provides /v1/chat/completions and /v1/models endpoints compatible with OpenAI API spec.
 """
 import json
 import uuid
 import time
 import asyncio
 from typing import Dict, Any, List, Optional, AsyncGenerator
 from datetime import datetime
 try:
    from aiohttp import ClientSession, ClientTimeout
    from aiohttp.client_exceptions import ClientError
    HAS_AIOHTTP = True
 except ImportError:
    HAS_AIOHTTP = False
 try:
    from ahserver.serverenv import ServerEnv
    from sqlor.dbpools import DBPools
    from appPublic.worker import awaitify
 except ImportError:
    class ServerEnv:
        pass
    class DBPools:
        pass
    def awaitify(f):
        return f
 def _now_iso():
    return datetime.utcnow().isoformat() + 'Z'
 def _now_ts():
    return int(time.time())
 def _get_default_llm_config(user_id: str = None) -> Dict[str, Any]:
    """Get LLM service config from harnessed_agent_config table."""
    try:
        dbname = ServerEnv().get_module_dbname('harnessed_agent')
    except Exception:
        dbname = 'default'
    try:
        import asyncio
        async def _fetch():
            async with DBPools().sqlorContext(dbname) as sor:
                filters = {}
                if user_id:
                    filters['user_id'] = user_id
                rows = await sor.R('harnessed_agent_config', filters,
                                   orderby='updated_at DESC', limit=1)
                if rows:
                    return rows[0]
            return None
        try:
            loop = asyncio.get_event_loop()
            if loop.is_running():
                # Already in async context, need to use create_task pattern
                # but for simplicity in .dspy context we return None and let caller handle
                return None
            else:
                config = loop.run_until_complete(_fetch())
                return config
        except RuntimeError:
            return None
    except Exception:
        return None
 async def _async_get_llm_config(user_id: str = None) -> Dict[str, Any]:
    """Async version to get LLM service config."""
    try:
        env = ServerEnv()
        dbname = env.get_module_dbname('harnessed_agent')
    except Exception:
        dbname = 'default'
    try:
        async with DBPools().sqlorContext(dbname) as sor:
            filters = {}
            if user_id:
                filters['user_id'] = user_id
            rows = await sor.R('harnessed_agent_config', filters,
                               orderby='updated_at DESC', limit=1)
            if rows:
                return rows[0]
    except Exception as e:
        print(f"Error fetching LLM config: {e}")
    return None
 async def _call_llm_api(
    service_url: str,
    api_key: str,
    model: str,
    messages: List[Dict[str, str]],
    temperature: float = 0.7,
    max_tokens: Optional[int] = None,
    stream: bool = False,
    top_p: float = 1.0,
    **kwargs
 ) -> Any:
    """Call an OpenAI-compatible LLM API endpoint."""
    url = service_url.rstrip('/') + '/chat/completions'
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Bearer {api_key}',
    }
    body = {
        'model': model,
        'messages': messages,
        'temperature': temperature,
        'top_p': top_p,
        'stream': stream,
    }
    if max_tokens is not None:
        body['max_tokens'] = max_tokens
    # Pass through any additional OpenAI-compatible parameters
    for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools', 'tool_choice',
                'response_format', 'seed'):
        if key in kwargs and kwargs[key] is not None:
            body[key] = kwargs[key]
    if stream:
        return await _stream_llm_response(url, headers, body)
    else:
        return await _sync_llm_response(url, headers, body)
 async def _sync_llm_response(url: str, headers: Dict, body: Dict) -> Dict[str, Any]:
    """Make a non-streaming LLM API call."""
    async with ClientSession() as session:
        async with session.post(url, headers=headers, json=body,
                                timeout=ClientTimeout(total=300)) as resp:
            if resp.status != 200:
                error_text = await resp.text()
                return {
                    'error': {
                        'message': f'LLM API error: HTTP {resp.status}',
                        'type': 'api_error',
                        'code': resp.status,
                        'detail': error_text[:500],
                    }
                }
            data = await resp.json()
            return data
 async def _stream_llm_response(url: str, headers: Dict, body: Dict) -> AsyncGenerator[str, None]:
    """Make a streaming LLM API call, yielding SSE chunks."""
    async with ClientSession() as session:
        async with session.post(url, headers=headers, json=body,
                                timeout=ClientTimeout(total=600)) as resp:
            if resp.status != 200:
                error_text = await resp.text()
                error_data = {
                    'error': {
                        'message': f'LLM API error: HTTP {resp.status}',
                        'type': 'api_error',
                        'code': resp.status,
                        'detail': error_text[:500],
                    }
                }
                yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
                yield "data: [DONE]\n\n"
                return
            async for line in resp.content:
                line = line.decode('utf-8').strip()
                if not line:
                    continue
                if line.startswith('data: '):
                    yield line + '\n\n'
 # ============================================================
 # Public API functions (registered to ServerEnv via init.py)
 # ============================================================
 async def harnessed_llm_chat_completions(body: Dict[str, Any]) -> Any:
    """
    OpenAI-compatible /v1/chat/completions endpoint.
    Args:
        body: dict with keys: model, messages, temperature, max_tokens,
              stream, top_p, and other OpenAI-compatible params.
    Returns:
        If stream=False: dict matching OpenAI chat completion response.
        If stream=True: aiohttp.StreamResponse with SSE.
    """
    from aiohttp import web
    model = body.get('model', 'default')
    messages = body.get('messages', [])
    temperature = body.get('temperature', 0.7)
    max_tokens = body.get('max_tokens', None)
    stream = body.get('stream', False)
    top_p = body.get('top_p', 1.0)
    # Get LLM service config
    config = await _async_get_llm_config()
    if not config:
        return {
            'error': {
                'message': 'LLM service not configured. Please configure llm_service_url and llm_api_key in agent settings.',
                'type': 'configuration_error',
                'code': 503,
            }
        }
    service_url = config.get('llm_service_url') or config.get('api_endpoint')
    api_key = config.get('llm_api_key') or config.get('api_key')
    if not service_url or not api_key:
        return {
            'error': {
                'message': 'LLM service URL or API key not configured. Set llm_service_url and llm_api_key in harnessed_agent_config.',
                'type': 'configuration_error',
                'code': 503,
            }
        }
    # Use default model from config if request model is 'default' or empty
    default_model = config.get('default_model', 'qwen3-max')
    if not model or model == 'default':
        model = default_model
    # Pass through extra params
    extra_params = {}
    for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools',
                'tool_choice', 'response_format', 'seed'):
        if key in body:
            extra_params[key] = body[key]
    if stream:
        resp = web.StreamResponse(
            status=200,
            reason='OK',
            headers={
                'Content-Type': 'text/event-stream',
                'Cache-Control': 'no-cache',
                'Connection': 'keep-alive',
                'X-Accel-Buffering': 'no',
            }
        )
        await resp.prepare(body.get('_request'))
        request_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
        created = _now_ts()
        try:
            async for chunk_line in _stream_llm_response(
                service_url, {'Content-Type': 'application/json', 'Authorization': f'Bearer {api_key}'},
                {
                    'model': model,
                    'messages': messages,
                    'temperature': temperature,
                    'top_p': top_p,
                    'stream': True,
                    'max_tokens': max_tokens,
                    **extra_params,
                }
            ):
                await resp.write(chunk_line.encode('utf-8') if isinstance(chunk_line, str) else chunk_line)
                await resp.drain()
        except Exception as e:
            error_data = {
                'error': {
                    'message': f'Streaming error: {str(e)}',
                    'type': 'server_error',
                    'code': 500,
                }
            }
            await resp.write(f"data: {json.dumps(error_data)}\n\n".encode('utf-8'))
            await resp.drain()
        await resp.write(b"data: [DONE]\n\n")
        await resp.drain()
        return resp
    else:
        result = await _call_llm_api(
            service_url=service_url,
            api_key=api_key,
            model=model,
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens,
            stream=False,
            top_p=top_p,
            **extra_params,
        )
        return result
 async def harnessed_llm_models() -> Dict[str, Any]:
    """
    OpenAI-compatible /v1/models endpoint.
    Returns list of available models from config.
    """
    config = await _async_get_llm_config()
    models_list = []
    # Add default model from config
    default_model = 'qwen3-max'
    if config:
        default_model = config.get('default_model', 'qwen3-max')
    # If available_models is configured as JSON string, parse it
    if config and config.get('available_models'):
        try:
            models_str = config['available_models']
            if isinstance(models_str, str):
                model_names = json.loads(models_str)
            else:
                model_names = models_str
            for m in model_names:
                if isinstance(m, str):
                    models_list.append({
                        'id': m,
                        'object': 'model',
                        'created': _now_ts(),
                        'owned_by': 'harnessed_agent',
                    })
                elif isinstance(m, dict):
                    models_list.append(m)
        except Exception:
            pass
    # Always include the default model if not already listed
    existing_ids = {m['id'] for m in models_list}
    if default_model not in existing_ids:
        models_list.insert(0, {
            'id': default_model,
            'object': 'model',
            'created': _now_ts(),
            'owned_by': 'harnessed_agent',
        })
    return {
        'object': 'list',
        'data': models_list,
    }
 async def harnessed_llm_completions(body: Dict[str, Any]) -> Dict[str, Any]:
    """
    OpenAI-compatible /v1/completions endpoint (legacy, non-chat).
    Converts prompt to messages format internally.
    """
    prompt = body.get('prompt', '')
    model = body.get('model', 'default')
    temperature = body.get('temperature', 0.7)
    max_tokens = body.get('max_tokens', None)
    stream = body.get('stream', False)
    # Convert to chat format
    messages = [{'role': 'user', 'content': prompt}]
    # Build request body for chat completions
    chat_body = {
        'model': model,
        'messages': messages,
        'temperature': temperature,
        'max_tokens': max_tokens,
        'stream': stream,
        '_request': body.get('_request'),
    }
    return await harnessed_llm_chat_completions(chat_body)
--- a/json/harnessed_agent_config_view.json
+++ b/json/harnessed_agent_config_view.json
@ -4,20 +4,22 @@
    "title": "Agent Configuration",
    "params": {
        "logined_userid": "user_id",
-        "confidential_fields": [],
+        "confidential_fields": ["llm_api_key"],
        "browserfields": {
            "exclouded": ["llm_api_key", "available_models"],
            "alters": {
                "auto_cleanup_enabled": {
                    "uitype": "code",
                    "data": [
-                        {
+                        {"value": "1", "text": "Enabled"},
-                            "value": "1",
+                        {"value": "0", "text": "Disabled"}
-                            "text": "Enabled"
+                    ]
-                        },
+                },
-                        {
+                "enable_streaming": {
-                            "value": "0",
+                    "uitype": "code",
-                            "text": "Disabled"
+                    "data": [
-                        }
+                        {"value": "1", "text": "Enabled"},
                        {"value": "0", "text": "Disabled"}
                    ]
                }
            }
@ -25,7 +27,8 @@
        "editexclouded": [
            "id",
            "user_id",
-            "created_at"
+            "created_at",
            "updated_at"
        ],
        "editable": {
            "new_data_url": "{{entire_url('../api/harnessed_agent_config_view_create.dspy')}}",
--- a/models/harnessed_agent_config.json
+++ b/models/harnessed_agent_config.json
@ -105,6 +105,32 @@
            "nullable": "no",
            "default": "1"
        },
        {
            "name": "llm_service_url",
            "title": "LLM service base URL (OpenAI-compatible endpoint)",
            "type": "str",
            "length": 255,
            "nullable": "yes",
            "default": "https://dashscope.aliyuncs.com/compatible-mode/v1",
            "comments": "Base URL for the LLM provider API, e.g. https://api.openai.com/v1"
        },
        {
            "name": "llm_api_key",
            "title": "LLM service API key",
            "type": "str",
            "length": 255,
            "nullable": "yes",
            "default": "",
            "comments": "API key for LLM service authentication (Bearer token)"
        },
        {
            "name": "available_models",
            "title": "Available LLM models (JSON array)",
            "type": "text",
            "nullable": "yes",
            "default": "",
            "comments": "JSON array of model IDs, e.g. [\"qwen3-max\", \"qwen3-plus\"]"
        },
        {
            "name": "created_at",
            "title": "Creation timestamp",
--- a/wwwroot/v1/chat/completions.dspy
+++ b/wwwroot/v1/chat/completions.dspy
@ -0,0 +1,46 @@
 """
 OpenAI-compatible /v1/chat/completions endpoint
 Accepts POST with JSON body matching OpenAI API format.
 """
 import json
 async def main():
    # Read request body
    body = {}
    try:
        raw_body = await request.read()
        if raw_body:
            body = json.loads(raw_body)
    except Exception as e:
        result = {
            'error': {
                'message': f'Invalid JSON body: {str(e)}',
                'type': 'invalid_request_error',
                'code': 400,
            }
        }
        return json.dumps(result, ensure_ascii=False)
    # Pass the request object for streaming support
    body['_request'] = request
    # Call the LLM API handler
    result = await harnessed_llm_chat_completions(body)
    # Handle streaming response (StreamResponse)
    from aiohttp.web_response import StreamResponse
    if isinstance(result, StreamResponse):
        return result
    # Handle error response
    if 'error' in result:
        status_code = result.get('error', {}).get('code', 500)
        resp = web.Response(
            status=status_code,
            body=json.dumps(result, ensure_ascii=False),
            content_type='application/json'
        )
        return resp
    # Return successful response
    return json.dumps(result, ensure_ascii=False)
--- a/wwwroot/v1/completions.dspy
+++ b/wwwroot/v1/completions.dspy
@ -0,0 +1,41 @@
 """
 OpenAI-compatible /v1/completions endpoint (legacy)
 Accepts POST with JSON body matching OpenAI completions API format.
 """
 import json
 async def main():
    # Read request body
    body = {}
    try:
        raw_body = await request.read()
        if raw_body:
            body = json.loads(raw_body)
    except Exception as e:
        result = {
            'error': {
                'message': f'Invalid JSON body: {str(e)}',
                'type': 'invalid_request_error',
                'code': 400,
            }
        }
        return json.dumps(result, ensure_ascii=False)
    body['_request'] = request
    result = await harnessed_llm_completions(body)
    from aiohttp.web_response import StreamResponse
    if isinstance(result, StreamResponse):
        return result
    if 'error' in result:
        status_code = result.get('error', {}).get('code', 500)
        resp = web.Response(
            status=status_code,
            body=json.dumps(result, ensure_ascii=False),
            content_type='application/json'
        )
        return resp
    return json.dumps(result, ensure_ascii=False)
--- a/wwwroot/v1/models.dspy
+++ b/wwwroot/v1/models.dspy
@ -0,0 +1,9 @@
 """
 OpenAI-compatible /v1/models endpoint
 Returns list of available models.
 """
 import json
 async def main():
    result = await harnessed_llm_models()
    return json.dumps(result, ensure_ascii=False)