feat: implement OpenAI-compatible LLM API
- Add /v1/chat/completions endpoint (POST) with streaming support - Add /v1/models endpoint (GET) listing available models - Add /v1/completions endpoint (POST) legacy compatibility - Add llm_api.py module with OpenAI API proxy via aiohttp - Add llm_service_url, llm_api_key, available_models to config model - Update harnessed_agent_config_view CRUD to protect API key field - Register new functions in init.py (harnessed_llm_chat_completions etc.) - Add .gitignore for pycache files Endpoints available under module path: POST /harnessed_agent/v1/chat/completions GET /harnessed_agent/v1/models POST /harnessed_agent/v1/completions
This commit is contained in:
parent
52f88239ed
commit
608413a5d5
8
.gitignore
vendored
Normal file
8
.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.egg-info/
|
||||
dist/
|
||||
build/
|
||||
py3/
|
||||
*.egg
|
||||
@ -20,6 +20,11 @@ from .config_functions import (
|
||||
harnessed_get_agent_config,
|
||||
harnessed_save_agent_config
|
||||
)
|
||||
from .llm_api import (
|
||||
harnessed_llm_chat_completions,
|
||||
harnessed_llm_models,
|
||||
harnessed_llm_completions,
|
||||
)
|
||||
|
||||
def load_harnessed_agent():
|
||||
env = ServerEnv()
|
||||
@ -43,3 +48,8 @@ def load_harnessed_agent():
|
||||
# Configuration management functions
|
||||
env.harnessed_get_agent_config = harnessed_get_agent_config
|
||||
env.harnessed_save_agent_config = harnessed_save_agent_config
|
||||
|
||||
# OpenAI-compatible LLM API
|
||||
env.harnessed_llm_chat_completions = harnessed_llm_chat_completions
|
||||
env.harnessed_llm_models = harnessed_llm_models
|
||||
env.harnessed_llm_completions = harnessed_llm_completions
|
||||
382
harnessed_agent/llm_api.py
Normal file
382
harnessed_agent/llm_api.py
Normal file
@ -0,0 +1,382 @@
|
||||
"""
|
||||
OpenAI-compatible LLM API for Hermes Agent
|
||||
Provides /v1/chat/completions and /v1/models endpoints compatible with OpenAI API spec.
|
||||
"""
|
||||
import json
|
||||
import uuid
|
||||
import time
|
||||
import asyncio
|
||||
from typing import Dict, Any, List, Optional, AsyncGenerator
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
from aiohttp import ClientSession, ClientTimeout
|
||||
from aiohttp.client_exceptions import ClientError
|
||||
HAS_AIOHTTP = True
|
||||
except ImportError:
|
||||
HAS_AIOHTTP = False
|
||||
|
||||
try:
|
||||
from ahserver.serverenv import ServerEnv
|
||||
from sqlor.dbpools import DBPools
|
||||
from appPublic.worker import awaitify
|
||||
except ImportError:
|
||||
class ServerEnv:
|
||||
pass
|
||||
class DBPools:
|
||||
pass
|
||||
def awaitify(f):
|
||||
return f
|
||||
|
||||
|
||||
def _now_iso():
|
||||
return datetime.utcnow().isoformat() + 'Z'
|
||||
|
||||
|
||||
def _now_ts():
|
||||
return int(time.time())
|
||||
|
||||
|
||||
def _get_default_llm_config(user_id: str = None) -> Dict[str, Any]:
|
||||
"""Get LLM service config from harnessed_agent_config table."""
|
||||
try:
|
||||
dbname = ServerEnv().get_module_dbname('harnessed_agent')
|
||||
except Exception:
|
||||
dbname = 'default'
|
||||
|
||||
try:
|
||||
import asyncio
|
||||
async def _fetch():
|
||||
async with DBPools().sqlorContext(dbname) as sor:
|
||||
filters = {}
|
||||
if user_id:
|
||||
filters['user_id'] = user_id
|
||||
rows = await sor.R('harnessed_agent_config', filters,
|
||||
orderby='updated_at DESC', limit=1)
|
||||
if rows:
|
||||
return rows[0]
|
||||
return None
|
||||
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
if loop.is_running():
|
||||
# Already in async context, need to use create_task pattern
|
||||
# but for simplicity in .dspy context we return None and let caller handle
|
||||
return None
|
||||
else:
|
||||
config = loop.run_until_complete(_fetch())
|
||||
return config
|
||||
except RuntimeError:
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
async def _async_get_llm_config(user_id: str = None) -> Dict[str, Any]:
|
||||
"""Async version to get LLM service config."""
|
||||
try:
|
||||
env = ServerEnv()
|
||||
dbname = env.get_module_dbname('harnessed_agent')
|
||||
except Exception:
|
||||
dbname = 'default'
|
||||
|
||||
try:
|
||||
async with DBPools().sqlorContext(dbname) as sor:
|
||||
filters = {}
|
||||
if user_id:
|
||||
filters['user_id'] = user_id
|
||||
rows = await sor.R('harnessed_agent_config', filters,
|
||||
orderby='updated_at DESC', limit=1)
|
||||
if rows:
|
||||
return rows[0]
|
||||
except Exception as e:
|
||||
print(f"Error fetching LLM config: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def _call_llm_api(
|
||||
service_url: str,
|
||||
api_key: str,
|
||||
model: str,
|
||||
messages: List[Dict[str, str]],
|
||||
temperature: float = 0.7,
|
||||
max_tokens: Optional[int] = None,
|
||||
stream: bool = False,
|
||||
top_p: float = 1.0,
|
||||
**kwargs
|
||||
) -> Any:
|
||||
"""Call an OpenAI-compatible LLM API endpoint."""
|
||||
url = service_url.rstrip('/') + '/chat/completions'
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {api_key}',
|
||||
}
|
||||
|
||||
body = {
|
||||
'model': model,
|
||||
'messages': messages,
|
||||
'temperature': temperature,
|
||||
'top_p': top_p,
|
||||
'stream': stream,
|
||||
}
|
||||
|
||||
if max_tokens is not None:
|
||||
body['max_tokens'] = max_tokens
|
||||
|
||||
# Pass through any additional OpenAI-compatible parameters
|
||||
for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools', 'tool_choice',
|
||||
'response_format', 'seed'):
|
||||
if key in kwargs and kwargs[key] is not None:
|
||||
body[key] = kwargs[key]
|
||||
|
||||
if stream:
|
||||
return await _stream_llm_response(url, headers, body)
|
||||
else:
|
||||
return await _sync_llm_response(url, headers, body)
|
||||
|
||||
|
||||
async def _sync_llm_response(url: str, headers: Dict, body: Dict) -> Dict[str, Any]:
|
||||
"""Make a non-streaming LLM API call."""
|
||||
async with ClientSession() as session:
|
||||
async with session.post(url, headers=headers, json=body,
|
||||
timeout=ClientTimeout(total=300)) as resp:
|
||||
if resp.status != 200:
|
||||
error_text = await resp.text()
|
||||
return {
|
||||
'error': {
|
||||
'message': f'LLM API error: HTTP {resp.status}',
|
||||
'type': 'api_error',
|
||||
'code': resp.status,
|
||||
'detail': error_text[:500],
|
||||
}
|
||||
}
|
||||
|
||||
data = await resp.json()
|
||||
return data
|
||||
|
||||
|
||||
async def _stream_llm_response(url: str, headers: Dict, body: Dict) -> AsyncGenerator[str, None]:
|
||||
"""Make a streaming LLM API call, yielding SSE chunks."""
|
||||
async with ClientSession() as session:
|
||||
async with session.post(url, headers=headers, json=body,
|
||||
timeout=ClientTimeout(total=600)) as resp:
|
||||
if resp.status != 200:
|
||||
error_text = await resp.text()
|
||||
error_data = {
|
||||
'error': {
|
||||
'message': f'LLM API error: HTTP {resp.status}',
|
||||
'type': 'api_error',
|
||||
'code': resp.status,
|
||||
'detail': error_text[:500],
|
||||
}
|
||||
}
|
||||
yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
return
|
||||
|
||||
async for line in resp.content:
|
||||
line = line.decode('utf-8').strip()
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith('data: '):
|
||||
yield line + '\n\n'
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Public API functions (registered to ServerEnv via init.py)
|
||||
# ============================================================
|
||||
|
||||
async def harnessed_llm_chat_completions(body: Dict[str, Any]) -> Any:
|
||||
"""
|
||||
OpenAI-compatible /v1/chat/completions endpoint.
|
||||
|
||||
Args:
|
||||
body: dict with keys: model, messages, temperature, max_tokens,
|
||||
stream, top_p, and other OpenAI-compatible params.
|
||||
|
||||
Returns:
|
||||
If stream=False: dict matching OpenAI chat completion response.
|
||||
If stream=True: aiohttp.StreamResponse with SSE.
|
||||
"""
|
||||
from aiohttp import web
|
||||
|
||||
model = body.get('model', 'default')
|
||||
messages = body.get('messages', [])
|
||||
temperature = body.get('temperature', 0.7)
|
||||
max_tokens = body.get('max_tokens', None)
|
||||
stream = body.get('stream', False)
|
||||
top_p = body.get('top_p', 1.0)
|
||||
|
||||
# Get LLM service config
|
||||
config = await _async_get_llm_config()
|
||||
if not config:
|
||||
return {
|
||||
'error': {
|
||||
'message': 'LLM service not configured. Please configure llm_service_url and llm_api_key in agent settings.',
|
||||
'type': 'configuration_error',
|
||||
'code': 503,
|
||||
}
|
||||
}
|
||||
|
||||
service_url = config.get('llm_service_url') or config.get('api_endpoint')
|
||||
api_key = config.get('llm_api_key') or config.get('api_key')
|
||||
|
||||
if not service_url or not api_key:
|
||||
return {
|
||||
'error': {
|
||||
'message': 'LLM service URL or API key not configured. Set llm_service_url and llm_api_key in harnessed_agent_config.',
|
||||
'type': 'configuration_error',
|
||||
'code': 503,
|
||||
}
|
||||
}
|
||||
|
||||
# Use default model from config if request model is 'default' or empty
|
||||
default_model = config.get('default_model', 'qwen3-max')
|
||||
if not model or model == 'default':
|
||||
model = default_model
|
||||
|
||||
# Pass through extra params
|
||||
extra_params = {}
|
||||
for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools',
|
||||
'tool_choice', 'response_format', 'seed'):
|
||||
if key in body:
|
||||
extra_params[key] = body[key]
|
||||
|
||||
if stream:
|
||||
resp = web.StreamResponse(
|
||||
status=200,
|
||||
reason='OK',
|
||||
headers={
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'X-Accel-Buffering': 'no',
|
||||
}
|
||||
)
|
||||
await resp.prepare(body.get('_request'))
|
||||
|
||||
request_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
|
||||
created = _now_ts()
|
||||
|
||||
try:
|
||||
async for chunk_line in _stream_llm_response(
|
||||
service_url, {'Content-Type': 'application/json', 'Authorization': f'Bearer {api_key}'},
|
||||
{
|
||||
'model': model,
|
||||
'messages': messages,
|
||||
'temperature': temperature,
|
||||
'top_p': top_p,
|
||||
'stream': True,
|
||||
'max_tokens': max_tokens,
|
||||
**extra_params,
|
||||
}
|
||||
):
|
||||
await resp.write(chunk_line.encode('utf-8') if isinstance(chunk_line, str) else chunk_line)
|
||||
await resp.drain()
|
||||
except Exception as e:
|
||||
error_data = {
|
||||
'error': {
|
||||
'message': f'Streaming error: {str(e)}',
|
||||
'type': 'server_error',
|
||||
'code': 500,
|
||||
}
|
||||
}
|
||||
await resp.write(f"data: {json.dumps(error_data)}\n\n".encode('utf-8'))
|
||||
await resp.drain()
|
||||
|
||||
await resp.write(b"data: [DONE]\n\n")
|
||||
await resp.drain()
|
||||
return resp
|
||||
|
||||
else:
|
||||
result = await _call_llm_api(
|
||||
service_url=service_url,
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
stream=False,
|
||||
top_p=top_p,
|
||||
**extra_params,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
async def harnessed_llm_models() -> Dict[str, Any]:
|
||||
"""
|
||||
OpenAI-compatible /v1/models endpoint.
|
||||
Returns list of available models from config.
|
||||
"""
|
||||
config = await _async_get_llm_config()
|
||||
|
||||
models_list = []
|
||||
|
||||
# Add default model from config
|
||||
default_model = 'qwen3-max'
|
||||
if config:
|
||||
default_model = config.get('default_model', 'qwen3-max')
|
||||
|
||||
# If available_models is configured as JSON string, parse it
|
||||
if config and config.get('available_models'):
|
||||
try:
|
||||
models_str = config['available_models']
|
||||
if isinstance(models_str, str):
|
||||
model_names = json.loads(models_str)
|
||||
else:
|
||||
model_names = models_str
|
||||
for m in model_names:
|
||||
if isinstance(m, str):
|
||||
models_list.append({
|
||||
'id': m,
|
||||
'object': 'model',
|
||||
'created': _now_ts(),
|
||||
'owned_by': 'harnessed_agent',
|
||||
})
|
||||
elif isinstance(m, dict):
|
||||
models_list.append(m)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Always include the default model if not already listed
|
||||
existing_ids = {m['id'] for m in models_list}
|
||||
if default_model not in existing_ids:
|
||||
models_list.insert(0, {
|
||||
'id': default_model,
|
||||
'object': 'model',
|
||||
'created': _now_ts(),
|
||||
'owned_by': 'harnessed_agent',
|
||||
})
|
||||
|
||||
return {
|
||||
'object': 'list',
|
||||
'data': models_list,
|
||||
}
|
||||
|
||||
|
||||
async def harnessed_llm_completions(body: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
OpenAI-compatible /v1/completions endpoint (legacy, non-chat).
|
||||
Converts prompt to messages format internally.
|
||||
"""
|
||||
prompt = body.get('prompt', '')
|
||||
model = body.get('model', 'default')
|
||||
temperature = body.get('temperature', 0.7)
|
||||
max_tokens = body.get('max_tokens', None)
|
||||
stream = body.get('stream', False)
|
||||
|
||||
# Convert to chat format
|
||||
messages = [{'role': 'user', 'content': prompt}]
|
||||
|
||||
# Build request body for chat completions
|
||||
chat_body = {
|
||||
'model': model,
|
||||
'messages': messages,
|
||||
'temperature': temperature,
|
||||
'max_tokens': max_tokens,
|
||||
'stream': stream,
|
||||
'_request': body.get('_request'),
|
||||
}
|
||||
|
||||
return await harnessed_llm_chat_completions(chat_body)
|
||||
@ -4,20 +4,22 @@
|
||||
"title": "Agent Configuration",
|
||||
"params": {
|
||||
"logined_userid": "user_id",
|
||||
"confidential_fields": [],
|
||||
"confidential_fields": ["llm_api_key"],
|
||||
"browserfields": {
|
||||
"exclouded": ["llm_api_key", "available_models"],
|
||||
"alters": {
|
||||
"auto_cleanup_enabled": {
|
||||
"uitype": "code",
|
||||
"data": [
|
||||
{
|
||||
"value": "1",
|
||||
"text": "Enabled"
|
||||
},
|
||||
{
|
||||
"value": "0",
|
||||
"text": "Disabled"
|
||||
}
|
||||
{"value": "1", "text": "Enabled"},
|
||||
{"value": "0", "text": "Disabled"}
|
||||
]
|
||||
},
|
||||
"enable_streaming": {
|
||||
"uitype": "code",
|
||||
"data": [
|
||||
{"value": "1", "text": "Enabled"},
|
||||
{"value": "0", "text": "Disabled"}
|
||||
]
|
||||
}
|
||||
}
|
||||
@ -25,7 +27,8 @@
|
||||
"editexclouded": [
|
||||
"id",
|
||||
"user_id",
|
||||
"created_at"
|
||||
"created_at",
|
||||
"updated_at"
|
||||
],
|
||||
"editable": {
|
||||
"new_data_url": "{{entire_url('../api/harnessed_agent_config_view_create.dspy')}}",
|
||||
|
||||
@ -105,6 +105,32 @@
|
||||
"nullable": "no",
|
||||
"default": "1"
|
||||
},
|
||||
{
|
||||
"name": "llm_service_url",
|
||||
"title": "LLM service base URL (OpenAI-compatible endpoint)",
|
||||
"type": "str",
|
||||
"length": 255,
|
||||
"nullable": "yes",
|
||||
"default": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
"comments": "Base URL for the LLM provider API, e.g. https://api.openai.com/v1"
|
||||
},
|
||||
{
|
||||
"name": "llm_api_key",
|
||||
"title": "LLM service API key",
|
||||
"type": "str",
|
||||
"length": 255,
|
||||
"nullable": "yes",
|
||||
"default": "",
|
||||
"comments": "API key for LLM service authentication (Bearer token)"
|
||||
},
|
||||
{
|
||||
"name": "available_models",
|
||||
"title": "Available LLM models (JSON array)",
|
||||
"type": "text",
|
||||
"nullable": "yes",
|
||||
"default": "",
|
||||
"comments": "JSON array of model IDs, e.g. [\"qwen3-max\", \"qwen3-plus\"]"
|
||||
},
|
||||
{
|
||||
"name": "created_at",
|
||||
"title": "Creation timestamp",
|
||||
|
||||
46
wwwroot/v1/chat/completions.dspy
Normal file
46
wwwroot/v1/chat/completions.dspy
Normal file
@ -0,0 +1,46 @@
|
||||
"""
|
||||
OpenAI-compatible /v1/chat/completions endpoint
|
||||
Accepts POST with JSON body matching OpenAI API format.
|
||||
"""
|
||||
import json
|
||||
|
||||
async def main():
|
||||
# Read request body
|
||||
body = {}
|
||||
try:
|
||||
raw_body = await request.read()
|
||||
if raw_body:
|
||||
body = json.loads(raw_body)
|
||||
except Exception as e:
|
||||
result = {
|
||||
'error': {
|
||||
'message': f'Invalid JSON body: {str(e)}',
|
||||
'type': 'invalid_request_error',
|
||||
'code': 400,
|
||||
}
|
||||
}
|
||||
return json.dumps(result, ensure_ascii=False)
|
||||
|
||||
# Pass the request object for streaming support
|
||||
body['_request'] = request
|
||||
|
||||
# Call the LLM API handler
|
||||
result = await harnessed_llm_chat_completions(body)
|
||||
|
||||
# Handle streaming response (StreamResponse)
|
||||
from aiohttp.web_response import StreamResponse
|
||||
if isinstance(result, StreamResponse):
|
||||
return result
|
||||
|
||||
# Handle error response
|
||||
if 'error' in result:
|
||||
status_code = result.get('error', {}).get('code', 500)
|
||||
resp = web.Response(
|
||||
status=status_code,
|
||||
body=json.dumps(result, ensure_ascii=False),
|
||||
content_type='application/json'
|
||||
)
|
||||
return resp
|
||||
|
||||
# Return successful response
|
||||
return json.dumps(result, ensure_ascii=False)
|
||||
41
wwwroot/v1/completions.dspy
Normal file
41
wwwroot/v1/completions.dspy
Normal file
@ -0,0 +1,41 @@
|
||||
"""
|
||||
OpenAI-compatible /v1/completions endpoint (legacy)
|
||||
Accepts POST with JSON body matching OpenAI completions API format.
|
||||
"""
|
||||
import json
|
||||
|
||||
async def main():
|
||||
# Read request body
|
||||
body = {}
|
||||
try:
|
||||
raw_body = await request.read()
|
||||
if raw_body:
|
||||
body = json.loads(raw_body)
|
||||
except Exception as e:
|
||||
result = {
|
||||
'error': {
|
||||
'message': f'Invalid JSON body: {str(e)}',
|
||||
'type': 'invalid_request_error',
|
||||
'code': 400,
|
||||
}
|
||||
}
|
||||
return json.dumps(result, ensure_ascii=False)
|
||||
|
||||
body['_request'] = request
|
||||
|
||||
result = await harnessed_llm_completions(body)
|
||||
|
||||
from aiohttp.web_response import StreamResponse
|
||||
if isinstance(result, StreamResponse):
|
||||
return result
|
||||
|
||||
if 'error' in result:
|
||||
status_code = result.get('error', {}).get('code', 500)
|
||||
resp = web.Response(
|
||||
status=status_code,
|
||||
body=json.dumps(result, ensure_ascii=False),
|
||||
content_type='application/json'
|
||||
)
|
||||
return resp
|
||||
|
||||
return json.dumps(result, ensure_ascii=False)
|
||||
9
wwwroot/v1/models.dspy
Normal file
9
wwwroot/v1/models.dspy
Normal file
@ -0,0 +1,9 @@
|
||||
"""
|
||||
OpenAI-compatible /v1/models endpoint
|
||||
Returns list of available models.
|
||||
"""
|
||||
import json
|
||||
|
||||
async def main():
|
||||
result = await harnessed_llm_models()
|
||||
return json.dumps(result, ensure_ascii=False)
|
||||
Loading…
x
Reference in New Issue
Block a user