diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3235e2e --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +__pycache__/ +*.pyc +*.pyo +*.egg-info/ +dist/ +build/ +py3/ +*.egg diff --git a/harnessed_agent/init.py b/harnessed_agent/init.py index 021faf7..bfe0a5d 100644 --- a/harnessed_agent/init.py +++ b/harnessed_agent/init.py @@ -20,6 +20,11 @@ from .config_functions import ( harnessed_get_agent_config, harnessed_save_agent_config ) +from .llm_api import ( + harnessed_llm_chat_completions, + harnessed_llm_models, + harnessed_llm_completions, +) def load_harnessed_agent(): env = ServerEnv() @@ -42,4 +47,9 @@ def load_harnessed_agent(): # Configuration management functions env.harnessed_get_agent_config = harnessed_get_agent_config - env.harnessed_save_agent_config = harnessed_save_agent_config \ No newline at end of file + env.harnessed_save_agent_config = harnessed_save_agent_config + + # OpenAI-compatible LLM API + env.harnessed_llm_chat_completions = harnessed_llm_chat_completions + env.harnessed_llm_models = harnessed_llm_models + env.harnessed_llm_completions = harnessed_llm_completions \ No newline at end of file diff --git a/harnessed_agent/llm_api.py b/harnessed_agent/llm_api.py new file mode 100644 index 0000000..c69f253 --- /dev/null +++ b/harnessed_agent/llm_api.py @@ -0,0 +1,382 @@ +""" +OpenAI-compatible LLM API for Hermes Agent +Provides /v1/chat/completions and /v1/models endpoints compatible with OpenAI API spec. +""" +import json +import uuid +import time +import asyncio +from typing import Dict, Any, List, Optional, AsyncGenerator +from datetime import datetime + +try: + from aiohttp import ClientSession, ClientTimeout + from aiohttp.client_exceptions import ClientError + HAS_AIOHTTP = True +except ImportError: + HAS_AIOHTTP = False + +try: + from ahserver.serverenv import ServerEnv + from sqlor.dbpools import DBPools + from appPublic.worker import awaitify +except ImportError: + class ServerEnv: + pass + class DBPools: + pass + def awaitify(f): + return f + + +def _now_iso(): + return datetime.utcnow().isoformat() + 'Z' + + +def _now_ts(): + return int(time.time()) + + +def _get_default_llm_config(user_id: str = None) -> Dict[str, Any]: + """Get LLM service config from harnessed_agent_config table.""" + try: + dbname = ServerEnv().get_module_dbname('harnessed_agent') + except Exception: + dbname = 'default' + + try: + import asyncio + async def _fetch(): + async with DBPools().sqlorContext(dbname) as sor: + filters = {} + if user_id: + filters['user_id'] = user_id + rows = await sor.R('harnessed_agent_config', filters, + orderby='updated_at DESC', limit=1) + if rows: + return rows[0] + return None + + try: + loop = asyncio.get_event_loop() + if loop.is_running(): + # Already in async context, need to use create_task pattern + # but for simplicity in .dspy context we return None and let caller handle + return None + else: + config = loop.run_until_complete(_fetch()) + return config + except RuntimeError: + return None + except Exception: + return None + + +async def _async_get_llm_config(user_id: str = None) -> Dict[str, Any]: + """Async version to get LLM service config.""" + try: + env = ServerEnv() + dbname = env.get_module_dbname('harnessed_agent') + except Exception: + dbname = 'default' + + try: + async with DBPools().sqlorContext(dbname) as sor: + filters = {} + if user_id: + filters['user_id'] = user_id + rows = await sor.R('harnessed_agent_config', filters, + orderby='updated_at DESC', limit=1) + if rows: + return rows[0] + except Exception as e: + print(f"Error fetching LLM config: {e}") + return None + + +async def _call_llm_api( + service_url: str, + api_key: str, + model: str, + messages: List[Dict[str, str]], + temperature: float = 0.7, + max_tokens: Optional[int] = None, + stream: bool = False, + top_p: float = 1.0, + **kwargs +) -> Any: + """Call an OpenAI-compatible LLM API endpoint.""" + url = service_url.rstrip('/') + '/chat/completions' + + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {api_key}', + } + + body = { + 'model': model, + 'messages': messages, + 'temperature': temperature, + 'top_p': top_p, + 'stream': stream, + } + + if max_tokens is not None: + body['max_tokens'] = max_tokens + + # Pass through any additional OpenAI-compatible parameters + for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools', 'tool_choice', + 'response_format', 'seed'): + if key in kwargs and kwargs[key] is not None: + body[key] = kwargs[key] + + if stream: + return await _stream_llm_response(url, headers, body) + else: + return await _sync_llm_response(url, headers, body) + + +async def _sync_llm_response(url: str, headers: Dict, body: Dict) -> Dict[str, Any]: + """Make a non-streaming LLM API call.""" + async with ClientSession() as session: + async with session.post(url, headers=headers, json=body, + timeout=ClientTimeout(total=300)) as resp: + if resp.status != 200: + error_text = await resp.text() + return { + 'error': { + 'message': f'LLM API error: HTTP {resp.status}', + 'type': 'api_error', + 'code': resp.status, + 'detail': error_text[:500], + } + } + + data = await resp.json() + return data + + +async def _stream_llm_response(url: str, headers: Dict, body: Dict) -> AsyncGenerator[str, None]: + """Make a streaming LLM API call, yielding SSE chunks.""" + async with ClientSession() as session: + async with session.post(url, headers=headers, json=body, + timeout=ClientTimeout(total=600)) as resp: + if resp.status != 200: + error_text = await resp.text() + error_data = { + 'error': { + 'message': f'LLM API error: HTTP {resp.status}', + 'type': 'api_error', + 'code': resp.status, + 'detail': error_text[:500], + } + } + yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n" + yield "data: [DONE]\n\n" + return + + async for line in resp.content: + line = line.decode('utf-8').strip() + if not line: + continue + if line.startswith('data: '): + yield line + '\n\n' + + +# ============================================================ +# Public API functions (registered to ServerEnv via init.py) +# ============================================================ + +async def harnessed_llm_chat_completions(body: Dict[str, Any]) -> Any: + """ + OpenAI-compatible /v1/chat/completions endpoint. + + Args: + body: dict with keys: model, messages, temperature, max_tokens, + stream, top_p, and other OpenAI-compatible params. + + Returns: + If stream=False: dict matching OpenAI chat completion response. + If stream=True: aiohttp.StreamResponse with SSE. + """ + from aiohttp import web + + model = body.get('model', 'default') + messages = body.get('messages', []) + temperature = body.get('temperature', 0.7) + max_tokens = body.get('max_tokens', None) + stream = body.get('stream', False) + top_p = body.get('top_p', 1.0) + + # Get LLM service config + config = await _async_get_llm_config() + if not config: + return { + 'error': { + 'message': 'LLM service not configured. Please configure llm_service_url and llm_api_key in agent settings.', + 'type': 'configuration_error', + 'code': 503, + } + } + + service_url = config.get('llm_service_url') or config.get('api_endpoint') + api_key = config.get('llm_api_key') or config.get('api_key') + + if not service_url or not api_key: + return { + 'error': { + 'message': 'LLM service URL or API key not configured. Set llm_service_url and llm_api_key in harnessed_agent_config.', + 'type': 'configuration_error', + 'code': 503, + } + } + + # Use default model from config if request model is 'default' or empty + default_model = config.get('default_model', 'qwen3-max') + if not model or model == 'default': + model = default_model + + # Pass through extra params + extra_params = {} + for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools', + 'tool_choice', 'response_format', 'seed'): + if key in body: + extra_params[key] = body[key] + + if stream: + resp = web.StreamResponse( + status=200, + reason='OK', + headers={ + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'X-Accel-Buffering': 'no', + } + ) + await resp.prepare(body.get('_request')) + + request_id = f"chatcmpl-{uuid.uuid4().hex[:12]}" + created = _now_ts() + + try: + async for chunk_line in _stream_llm_response( + service_url, {'Content-Type': 'application/json', 'Authorization': f'Bearer {api_key}'}, + { + 'model': model, + 'messages': messages, + 'temperature': temperature, + 'top_p': top_p, + 'stream': True, + 'max_tokens': max_tokens, + **extra_params, + } + ): + await resp.write(chunk_line.encode('utf-8') if isinstance(chunk_line, str) else chunk_line) + await resp.drain() + except Exception as e: + error_data = { + 'error': { + 'message': f'Streaming error: {str(e)}', + 'type': 'server_error', + 'code': 500, + } + } + await resp.write(f"data: {json.dumps(error_data)}\n\n".encode('utf-8')) + await resp.drain() + + await resp.write(b"data: [DONE]\n\n") + await resp.drain() + return resp + + else: + result = await _call_llm_api( + service_url=service_url, + api_key=api_key, + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + stream=False, + top_p=top_p, + **extra_params, + ) + return result + + +async def harnessed_llm_models() -> Dict[str, Any]: + """ + OpenAI-compatible /v1/models endpoint. + Returns list of available models from config. + """ + config = await _async_get_llm_config() + + models_list = [] + + # Add default model from config + default_model = 'qwen3-max' + if config: + default_model = config.get('default_model', 'qwen3-max') + + # If available_models is configured as JSON string, parse it + if config and config.get('available_models'): + try: + models_str = config['available_models'] + if isinstance(models_str, str): + model_names = json.loads(models_str) + else: + model_names = models_str + for m in model_names: + if isinstance(m, str): + models_list.append({ + 'id': m, + 'object': 'model', + 'created': _now_ts(), + 'owned_by': 'harnessed_agent', + }) + elif isinstance(m, dict): + models_list.append(m) + except Exception: + pass + + # Always include the default model if not already listed + existing_ids = {m['id'] for m in models_list} + if default_model not in existing_ids: + models_list.insert(0, { + 'id': default_model, + 'object': 'model', + 'created': _now_ts(), + 'owned_by': 'harnessed_agent', + }) + + return { + 'object': 'list', + 'data': models_list, + } + + +async def harnessed_llm_completions(body: Dict[str, Any]) -> Dict[str, Any]: + """ + OpenAI-compatible /v1/completions endpoint (legacy, non-chat). + Converts prompt to messages format internally. + """ + prompt = body.get('prompt', '') + model = body.get('model', 'default') + temperature = body.get('temperature', 0.7) + max_tokens = body.get('max_tokens', None) + stream = body.get('stream', False) + + # Convert to chat format + messages = [{'role': 'user', 'content': prompt}] + + # Build request body for chat completions + chat_body = { + 'model': model, + 'messages': messages, + 'temperature': temperature, + 'max_tokens': max_tokens, + 'stream': stream, + '_request': body.get('_request'), + } + + return await harnessed_llm_chat_completions(chat_body) diff --git a/json/harnessed_agent_config_view.json b/json/harnessed_agent_config_view.json index 81a79af..0ec981f 100644 --- a/json/harnessed_agent_config_view.json +++ b/json/harnessed_agent_config_view.json @@ -4,20 +4,22 @@ "title": "Agent Configuration", "params": { "logined_userid": "user_id", - "confidential_fields": [], + "confidential_fields": ["llm_api_key"], "browserfields": { + "exclouded": ["llm_api_key", "available_models"], "alters": { "auto_cleanup_enabled": { "uitype": "code", "data": [ - { - "value": "1", - "text": "Enabled" - }, - { - "value": "0", - "text": "Disabled" - } + {"value": "1", "text": "Enabled"}, + {"value": "0", "text": "Disabled"} + ] + }, + "enable_streaming": { + "uitype": "code", + "data": [ + {"value": "1", "text": "Enabled"}, + {"value": "0", "text": "Disabled"} ] } } @@ -25,7 +27,8 @@ "editexclouded": [ "id", "user_id", - "created_at" + "created_at", + "updated_at" ], "editable": { "new_data_url": "{{entire_url('../api/harnessed_agent_config_view_create.dspy')}}", diff --git a/models/harnessed_agent_config.json b/models/harnessed_agent_config.json index 9675349..238e0d5 100644 --- a/models/harnessed_agent_config.json +++ b/models/harnessed_agent_config.json @@ -105,6 +105,32 @@ "nullable": "no", "default": "1" }, + { + "name": "llm_service_url", + "title": "LLM service base URL (OpenAI-compatible endpoint)", + "type": "str", + "length": 255, + "nullable": "yes", + "default": "https://dashscope.aliyuncs.com/compatible-mode/v1", + "comments": "Base URL for the LLM provider API, e.g. https://api.openai.com/v1" + }, + { + "name": "llm_api_key", + "title": "LLM service API key", + "type": "str", + "length": 255, + "nullable": "yes", + "default": "", + "comments": "API key for LLM service authentication (Bearer token)" + }, + { + "name": "available_models", + "title": "Available LLM models (JSON array)", + "type": "text", + "nullable": "yes", + "default": "", + "comments": "JSON array of model IDs, e.g. [\"qwen3-max\", \"qwen3-plus\"]" + }, { "name": "created_at", "title": "Creation timestamp", diff --git a/wwwroot/v1/chat/completions.dspy b/wwwroot/v1/chat/completions.dspy new file mode 100644 index 0000000..b1360b2 --- /dev/null +++ b/wwwroot/v1/chat/completions.dspy @@ -0,0 +1,46 @@ +""" +OpenAI-compatible /v1/chat/completions endpoint +Accepts POST with JSON body matching OpenAI API format. +""" +import json + +async def main(): + # Read request body + body = {} + try: + raw_body = await request.read() + if raw_body: + body = json.loads(raw_body) + except Exception as e: + result = { + 'error': { + 'message': f'Invalid JSON body: {str(e)}', + 'type': 'invalid_request_error', + 'code': 400, + } + } + return json.dumps(result, ensure_ascii=False) + + # Pass the request object for streaming support + body['_request'] = request + + # Call the LLM API handler + result = await harnessed_llm_chat_completions(body) + + # Handle streaming response (StreamResponse) + from aiohttp.web_response import StreamResponse + if isinstance(result, StreamResponse): + return result + + # Handle error response + if 'error' in result: + status_code = result.get('error', {}).get('code', 500) + resp = web.Response( + status=status_code, + body=json.dumps(result, ensure_ascii=False), + content_type='application/json' + ) + return resp + + # Return successful response + return json.dumps(result, ensure_ascii=False) diff --git a/wwwroot/v1/completions.dspy b/wwwroot/v1/completions.dspy new file mode 100644 index 0000000..c5e5fc4 --- /dev/null +++ b/wwwroot/v1/completions.dspy @@ -0,0 +1,41 @@ +""" +OpenAI-compatible /v1/completions endpoint (legacy) +Accepts POST with JSON body matching OpenAI completions API format. +""" +import json + +async def main(): + # Read request body + body = {} + try: + raw_body = await request.read() + if raw_body: + body = json.loads(raw_body) + except Exception as e: + result = { + 'error': { + 'message': f'Invalid JSON body: {str(e)}', + 'type': 'invalid_request_error', + 'code': 400, + } + } + return json.dumps(result, ensure_ascii=False) + + body['_request'] = request + + result = await harnessed_llm_completions(body) + + from aiohttp.web_response import StreamResponse + if isinstance(result, StreamResponse): + return result + + if 'error' in result: + status_code = result.get('error', {}).get('code', 500) + resp = web.Response( + status=status_code, + body=json.dumps(result, ensure_ascii=False), + content_type='application/json' + ) + return resp + + return json.dumps(result, ensure_ascii=False) diff --git a/wwwroot/v1/models.dspy b/wwwroot/v1/models.dspy new file mode 100644 index 0000000..b7c178a --- /dev/null +++ b/wwwroot/v1/models.dspy @@ -0,0 +1,9 @@ +""" +OpenAI-compatible /v1/models endpoint +Returns list of available models. +""" +import json + +async def main(): + result = await harnessed_llm_models() + return json.dumps(result, ensure_ascii=False)