feat: implement OpenAI-compatible LLM API

- Add /v1/chat/completions endpoint (POST) with streaming support
- Add /v1/models endpoint (GET) listing available models
- Add /v1/completions endpoint (POST) legacy compatibility
- Add llm_api.py module with OpenAI API proxy via aiohttp
- Add llm_service_url, llm_api_key, available_models to config model
- Update harnessed_agent_config_view CRUD to protect API key field
- Register new functions in init.py (harnessed_llm_chat_completions etc.)
- Add .gitignore for pycache files

Endpoints available under module path:
  POST /harnessed_agent/v1/chat/completions
  GET  /harnessed_agent/v1/models
  POST /harnessed_agent/v1/completions
This commit is contained in:
yumoqing 2026-05-07 11:36:35 +08:00
parent 52f88239ed
commit 608413a5d5
8 changed files with 536 additions and 11 deletions

8
.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
__pycache__/
*.pyc
*.pyo
*.egg-info/
dist/
build/
py3/
*.egg

View File

@ -20,6 +20,11 @@ from .config_functions import (
harnessed_get_agent_config,
harnessed_save_agent_config
)
from .llm_api import (
harnessed_llm_chat_completions,
harnessed_llm_models,
harnessed_llm_completions,
)
def load_harnessed_agent():
env = ServerEnv()
@ -43,3 +48,8 @@ def load_harnessed_agent():
# Configuration management functions
env.harnessed_get_agent_config = harnessed_get_agent_config
env.harnessed_save_agent_config = harnessed_save_agent_config
# OpenAI-compatible LLM API
env.harnessed_llm_chat_completions = harnessed_llm_chat_completions
env.harnessed_llm_models = harnessed_llm_models
env.harnessed_llm_completions = harnessed_llm_completions

382
harnessed_agent/llm_api.py Normal file
View File

@ -0,0 +1,382 @@
"""
OpenAI-compatible LLM API for Hermes Agent
Provides /v1/chat/completions and /v1/models endpoints compatible with OpenAI API spec.
"""
import json
import uuid
import time
import asyncio
from typing import Dict, Any, List, Optional, AsyncGenerator
from datetime import datetime
try:
from aiohttp import ClientSession, ClientTimeout
from aiohttp.client_exceptions import ClientError
HAS_AIOHTTP = True
except ImportError:
HAS_AIOHTTP = False
try:
from ahserver.serverenv import ServerEnv
from sqlor.dbpools import DBPools
from appPublic.worker import awaitify
except ImportError:
class ServerEnv:
pass
class DBPools:
pass
def awaitify(f):
return f
def _now_iso():
return datetime.utcnow().isoformat() + 'Z'
def _now_ts():
return int(time.time())
def _get_default_llm_config(user_id: str = None) -> Dict[str, Any]:
"""Get LLM service config from harnessed_agent_config table."""
try:
dbname = ServerEnv().get_module_dbname('harnessed_agent')
except Exception:
dbname = 'default'
try:
import asyncio
async def _fetch():
async with DBPools().sqlorContext(dbname) as sor:
filters = {}
if user_id:
filters['user_id'] = user_id
rows = await sor.R('harnessed_agent_config', filters,
orderby='updated_at DESC', limit=1)
if rows:
return rows[0]
return None
try:
loop = asyncio.get_event_loop()
if loop.is_running():
# Already in async context, need to use create_task pattern
# but for simplicity in .dspy context we return None and let caller handle
return None
else:
config = loop.run_until_complete(_fetch())
return config
except RuntimeError:
return None
except Exception:
return None
async def _async_get_llm_config(user_id: str = None) -> Dict[str, Any]:
"""Async version to get LLM service config."""
try:
env = ServerEnv()
dbname = env.get_module_dbname('harnessed_agent')
except Exception:
dbname = 'default'
try:
async with DBPools().sqlorContext(dbname) as sor:
filters = {}
if user_id:
filters['user_id'] = user_id
rows = await sor.R('harnessed_agent_config', filters,
orderby='updated_at DESC', limit=1)
if rows:
return rows[0]
except Exception as e:
print(f"Error fetching LLM config: {e}")
return None
async def _call_llm_api(
service_url: str,
api_key: str,
model: str,
messages: List[Dict[str, str]],
temperature: float = 0.7,
max_tokens: Optional[int] = None,
stream: bool = False,
top_p: float = 1.0,
**kwargs
) -> Any:
"""Call an OpenAI-compatible LLM API endpoint."""
url = service_url.rstrip('/') + '/chat/completions'
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {api_key}',
}
body = {
'model': model,
'messages': messages,
'temperature': temperature,
'top_p': top_p,
'stream': stream,
}
if max_tokens is not None:
body['max_tokens'] = max_tokens
# Pass through any additional OpenAI-compatible parameters
for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools', 'tool_choice',
'response_format', 'seed'):
if key in kwargs and kwargs[key] is not None:
body[key] = kwargs[key]
if stream:
return await _stream_llm_response(url, headers, body)
else:
return await _sync_llm_response(url, headers, body)
async def _sync_llm_response(url: str, headers: Dict, body: Dict) -> Dict[str, Any]:
"""Make a non-streaming LLM API call."""
async with ClientSession() as session:
async with session.post(url, headers=headers, json=body,
timeout=ClientTimeout(total=300)) as resp:
if resp.status != 200:
error_text = await resp.text()
return {
'error': {
'message': f'LLM API error: HTTP {resp.status}',
'type': 'api_error',
'code': resp.status,
'detail': error_text[:500],
}
}
data = await resp.json()
return data
async def _stream_llm_response(url: str, headers: Dict, body: Dict) -> AsyncGenerator[str, None]:
"""Make a streaming LLM API call, yielding SSE chunks."""
async with ClientSession() as session:
async with session.post(url, headers=headers, json=body,
timeout=ClientTimeout(total=600)) as resp:
if resp.status != 200:
error_text = await resp.text()
error_data = {
'error': {
'message': f'LLM API error: HTTP {resp.status}',
'type': 'api_error',
'code': resp.status,
'detail': error_text[:500],
}
}
yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
yield "data: [DONE]\n\n"
return
async for line in resp.content:
line = line.decode('utf-8').strip()
if not line:
continue
if line.startswith('data: '):
yield line + '\n\n'
# ============================================================
# Public API functions (registered to ServerEnv via init.py)
# ============================================================
async def harnessed_llm_chat_completions(body: Dict[str, Any]) -> Any:
"""
OpenAI-compatible /v1/chat/completions endpoint.
Args:
body: dict with keys: model, messages, temperature, max_tokens,
stream, top_p, and other OpenAI-compatible params.
Returns:
If stream=False: dict matching OpenAI chat completion response.
If stream=True: aiohttp.StreamResponse with SSE.
"""
from aiohttp import web
model = body.get('model', 'default')
messages = body.get('messages', [])
temperature = body.get('temperature', 0.7)
max_tokens = body.get('max_tokens', None)
stream = body.get('stream', False)
top_p = body.get('top_p', 1.0)
# Get LLM service config
config = await _async_get_llm_config()
if not config:
return {
'error': {
'message': 'LLM service not configured. Please configure llm_service_url and llm_api_key in agent settings.',
'type': 'configuration_error',
'code': 503,
}
}
service_url = config.get('llm_service_url') or config.get('api_endpoint')
api_key = config.get('llm_api_key') or config.get('api_key')
if not service_url or not api_key:
return {
'error': {
'message': 'LLM service URL or API key not configured. Set llm_service_url and llm_api_key in harnessed_agent_config.',
'type': 'configuration_error',
'code': 503,
}
}
# Use default model from config if request model is 'default' or empty
default_model = config.get('default_model', 'qwen3-max')
if not model or model == 'default':
model = default_model
# Pass through extra params
extra_params = {}
for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools',
'tool_choice', 'response_format', 'seed'):
if key in body:
extra_params[key] = body[key]
if stream:
resp = web.StreamResponse(
status=200,
reason='OK',
headers={
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'X-Accel-Buffering': 'no',
}
)
await resp.prepare(body.get('_request'))
request_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
created = _now_ts()
try:
async for chunk_line in _stream_llm_response(
service_url, {'Content-Type': 'application/json', 'Authorization': f'Bearer {api_key}'},
{
'model': model,
'messages': messages,
'temperature': temperature,
'top_p': top_p,
'stream': True,
'max_tokens': max_tokens,
**extra_params,
}
):
await resp.write(chunk_line.encode('utf-8') if isinstance(chunk_line, str) else chunk_line)
await resp.drain()
except Exception as e:
error_data = {
'error': {
'message': f'Streaming error: {str(e)}',
'type': 'server_error',
'code': 500,
}
}
await resp.write(f"data: {json.dumps(error_data)}\n\n".encode('utf-8'))
await resp.drain()
await resp.write(b"data: [DONE]\n\n")
await resp.drain()
return resp
else:
result = await _call_llm_api(
service_url=service_url,
api_key=api_key,
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
stream=False,
top_p=top_p,
**extra_params,
)
return result
async def harnessed_llm_models() -> Dict[str, Any]:
"""
OpenAI-compatible /v1/models endpoint.
Returns list of available models from config.
"""
config = await _async_get_llm_config()
models_list = []
# Add default model from config
default_model = 'qwen3-max'
if config:
default_model = config.get('default_model', 'qwen3-max')
# If available_models is configured as JSON string, parse it
if config and config.get('available_models'):
try:
models_str = config['available_models']
if isinstance(models_str, str):
model_names = json.loads(models_str)
else:
model_names = models_str
for m in model_names:
if isinstance(m, str):
models_list.append({
'id': m,
'object': 'model',
'created': _now_ts(),
'owned_by': 'harnessed_agent',
})
elif isinstance(m, dict):
models_list.append(m)
except Exception:
pass
# Always include the default model if not already listed
existing_ids = {m['id'] for m in models_list}
if default_model not in existing_ids:
models_list.insert(0, {
'id': default_model,
'object': 'model',
'created': _now_ts(),
'owned_by': 'harnessed_agent',
})
return {
'object': 'list',
'data': models_list,
}
async def harnessed_llm_completions(body: Dict[str, Any]) -> Dict[str, Any]:
"""
OpenAI-compatible /v1/completions endpoint (legacy, non-chat).
Converts prompt to messages format internally.
"""
prompt = body.get('prompt', '')
model = body.get('model', 'default')
temperature = body.get('temperature', 0.7)
max_tokens = body.get('max_tokens', None)
stream = body.get('stream', False)
# Convert to chat format
messages = [{'role': 'user', 'content': prompt}]
# Build request body for chat completions
chat_body = {
'model': model,
'messages': messages,
'temperature': temperature,
'max_tokens': max_tokens,
'stream': stream,
'_request': body.get('_request'),
}
return await harnessed_llm_chat_completions(chat_body)

View File

@ -4,20 +4,22 @@
"title": "Agent Configuration",
"params": {
"logined_userid": "user_id",
"confidential_fields": [],
"confidential_fields": ["llm_api_key"],
"browserfields": {
"exclouded": ["llm_api_key", "available_models"],
"alters": {
"auto_cleanup_enabled": {
"uitype": "code",
"data": [
{
"value": "1",
"text": "Enabled"
},
{
"value": "0",
"text": "Disabled"
}
{"value": "1", "text": "Enabled"},
{"value": "0", "text": "Disabled"}
]
},
"enable_streaming": {
"uitype": "code",
"data": [
{"value": "1", "text": "Enabled"},
{"value": "0", "text": "Disabled"}
]
}
}
@ -25,7 +27,8 @@
"editexclouded": [
"id",
"user_id",
"created_at"
"created_at",
"updated_at"
],
"editable": {
"new_data_url": "{{entire_url('../api/harnessed_agent_config_view_create.dspy')}}",

View File

@ -105,6 +105,32 @@
"nullable": "no",
"default": "1"
},
{
"name": "llm_service_url",
"title": "LLM service base URL (OpenAI-compatible endpoint)",
"type": "str",
"length": 255,
"nullable": "yes",
"default": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"comments": "Base URL for the LLM provider API, e.g. https://api.openai.com/v1"
},
{
"name": "llm_api_key",
"title": "LLM service API key",
"type": "str",
"length": 255,
"nullable": "yes",
"default": "",
"comments": "API key for LLM service authentication (Bearer token)"
},
{
"name": "available_models",
"title": "Available LLM models (JSON array)",
"type": "text",
"nullable": "yes",
"default": "",
"comments": "JSON array of model IDs, e.g. [\"qwen3-max\", \"qwen3-plus\"]"
},
{
"name": "created_at",
"title": "Creation timestamp",

View File

@ -0,0 +1,46 @@
"""
OpenAI-compatible /v1/chat/completions endpoint
Accepts POST with JSON body matching OpenAI API format.
"""
import json
async def main():
# Read request body
body = {}
try:
raw_body = await request.read()
if raw_body:
body = json.loads(raw_body)
except Exception as e:
result = {
'error': {
'message': f'Invalid JSON body: {str(e)}',
'type': 'invalid_request_error',
'code': 400,
}
}
return json.dumps(result, ensure_ascii=False)
# Pass the request object for streaming support
body['_request'] = request
# Call the LLM API handler
result = await harnessed_llm_chat_completions(body)
# Handle streaming response (StreamResponse)
from aiohttp.web_response import StreamResponse
if isinstance(result, StreamResponse):
return result
# Handle error response
if 'error' in result:
status_code = result.get('error', {}).get('code', 500)
resp = web.Response(
status=status_code,
body=json.dumps(result, ensure_ascii=False),
content_type='application/json'
)
return resp
# Return successful response
return json.dumps(result, ensure_ascii=False)

View File

@ -0,0 +1,41 @@
"""
OpenAI-compatible /v1/completions endpoint (legacy)
Accepts POST with JSON body matching OpenAI completions API format.
"""
import json
async def main():
# Read request body
body = {}
try:
raw_body = await request.read()
if raw_body:
body = json.loads(raw_body)
except Exception as e:
result = {
'error': {
'message': f'Invalid JSON body: {str(e)}',
'type': 'invalid_request_error',
'code': 400,
}
}
return json.dumps(result, ensure_ascii=False)
body['_request'] = request
result = await harnessed_llm_completions(body)
from aiohttp.web_response import StreamResponse
if isinstance(result, StreamResponse):
return result
if 'error' in result:
status_code = result.get('error', {}).get('code', 500)
resp = web.Response(
status=status_code,
body=json.dumps(result, ensure_ascii=False),
content_type='application/json'
)
return resp
return json.dumps(result, ensure_ascii=False)

9
wwwroot/v1/models.dspy Normal file
View File

@ -0,0 +1,9 @@
"""
OpenAI-compatible /v1/models endpoint
Returns list of available models.
"""
import json
async def main():
result = await harnessed_llm_models()
return json.dumps(result, ensure_ascii=False)