feat: implement OpenAI-compatible LLM API
- Add /v1/chat/completions endpoint (POST) with streaming support - Add /v1/models endpoint (GET) listing available models - Add /v1/completions endpoint (POST) legacy compatibility - Add llm_api.py module with OpenAI API proxy via aiohttp - Add llm_service_url, llm_api_key, available_models to config model - Update harnessed_agent_config_view CRUD to protect API key field - Register new functions in init.py (harnessed_llm_chat_completions etc.) - Add .gitignore for pycache files Endpoints available under module path: POST /harnessed_agent/v1/chat/completions GET /harnessed_agent/v1/models POST /harnessed_agent/v1/completions
This commit is contained in:
parent
52f88239ed
commit
608413a5d5
8
.gitignore
vendored
Normal file
8
.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
*.egg-info/
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
py3/
|
||||||
|
*.egg
|
||||||
@ -20,6 +20,11 @@ from .config_functions import (
|
|||||||
harnessed_get_agent_config,
|
harnessed_get_agent_config,
|
||||||
harnessed_save_agent_config
|
harnessed_save_agent_config
|
||||||
)
|
)
|
||||||
|
from .llm_api import (
|
||||||
|
harnessed_llm_chat_completions,
|
||||||
|
harnessed_llm_models,
|
||||||
|
harnessed_llm_completions,
|
||||||
|
)
|
||||||
|
|
||||||
def load_harnessed_agent():
|
def load_harnessed_agent():
|
||||||
env = ServerEnv()
|
env = ServerEnv()
|
||||||
@ -43,3 +48,8 @@ def load_harnessed_agent():
|
|||||||
# Configuration management functions
|
# Configuration management functions
|
||||||
env.harnessed_get_agent_config = harnessed_get_agent_config
|
env.harnessed_get_agent_config = harnessed_get_agent_config
|
||||||
env.harnessed_save_agent_config = harnessed_save_agent_config
|
env.harnessed_save_agent_config = harnessed_save_agent_config
|
||||||
|
|
||||||
|
# OpenAI-compatible LLM API
|
||||||
|
env.harnessed_llm_chat_completions = harnessed_llm_chat_completions
|
||||||
|
env.harnessed_llm_models = harnessed_llm_models
|
||||||
|
env.harnessed_llm_completions = harnessed_llm_completions
|
||||||
382
harnessed_agent/llm_api.py
Normal file
382
harnessed_agent/llm_api.py
Normal file
@ -0,0 +1,382 @@
|
|||||||
|
"""
|
||||||
|
OpenAI-compatible LLM API for Hermes Agent
|
||||||
|
Provides /v1/chat/completions and /v1/models endpoints compatible with OpenAI API spec.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import uuid
|
||||||
|
import time
|
||||||
|
import asyncio
|
||||||
|
from typing import Dict, Any, List, Optional, AsyncGenerator
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
try:
|
||||||
|
from aiohttp import ClientSession, ClientTimeout
|
||||||
|
from aiohttp.client_exceptions import ClientError
|
||||||
|
HAS_AIOHTTP = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_AIOHTTP = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
from ahserver.serverenv import ServerEnv
|
||||||
|
from sqlor.dbpools import DBPools
|
||||||
|
from appPublic.worker import awaitify
|
||||||
|
except ImportError:
|
||||||
|
class ServerEnv:
|
||||||
|
pass
|
||||||
|
class DBPools:
|
||||||
|
pass
|
||||||
|
def awaitify(f):
|
||||||
|
return f
|
||||||
|
|
||||||
|
|
||||||
|
def _now_iso():
|
||||||
|
return datetime.utcnow().isoformat() + 'Z'
|
||||||
|
|
||||||
|
|
||||||
|
def _now_ts():
|
||||||
|
return int(time.time())
|
||||||
|
|
||||||
|
|
||||||
|
def _get_default_llm_config(user_id: str = None) -> Dict[str, Any]:
|
||||||
|
"""Get LLM service config from harnessed_agent_config table."""
|
||||||
|
try:
|
||||||
|
dbname = ServerEnv().get_module_dbname('harnessed_agent')
|
||||||
|
except Exception:
|
||||||
|
dbname = 'default'
|
||||||
|
|
||||||
|
try:
|
||||||
|
import asyncio
|
||||||
|
async def _fetch():
|
||||||
|
async with DBPools().sqlorContext(dbname) as sor:
|
||||||
|
filters = {}
|
||||||
|
if user_id:
|
||||||
|
filters['user_id'] = user_id
|
||||||
|
rows = await sor.R('harnessed_agent_config', filters,
|
||||||
|
orderby='updated_at DESC', limit=1)
|
||||||
|
if rows:
|
||||||
|
return rows[0]
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
if loop.is_running():
|
||||||
|
# Already in async context, need to use create_task pattern
|
||||||
|
# but for simplicity in .dspy context we return None and let caller handle
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
config = loop.run_until_complete(_fetch())
|
||||||
|
return config
|
||||||
|
except RuntimeError:
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _async_get_llm_config(user_id: str = None) -> Dict[str, Any]:
|
||||||
|
"""Async version to get LLM service config."""
|
||||||
|
try:
|
||||||
|
env = ServerEnv()
|
||||||
|
dbname = env.get_module_dbname('harnessed_agent')
|
||||||
|
except Exception:
|
||||||
|
dbname = 'default'
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with DBPools().sqlorContext(dbname) as sor:
|
||||||
|
filters = {}
|
||||||
|
if user_id:
|
||||||
|
filters['user_id'] = user_id
|
||||||
|
rows = await sor.R('harnessed_agent_config', filters,
|
||||||
|
orderby='updated_at DESC', limit=1)
|
||||||
|
if rows:
|
||||||
|
return rows[0]
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error fetching LLM config: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _call_llm_api(
|
||||||
|
service_url: str,
|
||||||
|
api_key: str,
|
||||||
|
model: str,
|
||||||
|
messages: List[Dict[str, str]],
|
||||||
|
temperature: float = 0.7,
|
||||||
|
max_tokens: Optional[int] = None,
|
||||||
|
stream: bool = False,
|
||||||
|
top_p: float = 1.0,
|
||||||
|
**kwargs
|
||||||
|
) -> Any:
|
||||||
|
"""Call an OpenAI-compatible LLM API endpoint."""
|
||||||
|
url = service_url.rstrip('/') + '/chat/completions'
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': f'Bearer {api_key}',
|
||||||
|
}
|
||||||
|
|
||||||
|
body = {
|
||||||
|
'model': model,
|
||||||
|
'messages': messages,
|
||||||
|
'temperature': temperature,
|
||||||
|
'top_p': top_p,
|
||||||
|
'stream': stream,
|
||||||
|
}
|
||||||
|
|
||||||
|
if max_tokens is not None:
|
||||||
|
body['max_tokens'] = max_tokens
|
||||||
|
|
||||||
|
# Pass through any additional OpenAI-compatible parameters
|
||||||
|
for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools', 'tool_choice',
|
||||||
|
'response_format', 'seed'):
|
||||||
|
if key in kwargs and kwargs[key] is not None:
|
||||||
|
body[key] = kwargs[key]
|
||||||
|
|
||||||
|
if stream:
|
||||||
|
return await _stream_llm_response(url, headers, body)
|
||||||
|
else:
|
||||||
|
return await _sync_llm_response(url, headers, body)
|
||||||
|
|
||||||
|
|
||||||
|
async def _sync_llm_response(url: str, headers: Dict, body: Dict) -> Dict[str, Any]:
|
||||||
|
"""Make a non-streaming LLM API call."""
|
||||||
|
async with ClientSession() as session:
|
||||||
|
async with session.post(url, headers=headers, json=body,
|
||||||
|
timeout=ClientTimeout(total=300)) as resp:
|
||||||
|
if resp.status != 200:
|
||||||
|
error_text = await resp.text()
|
||||||
|
return {
|
||||||
|
'error': {
|
||||||
|
'message': f'LLM API error: HTTP {resp.status}',
|
||||||
|
'type': 'api_error',
|
||||||
|
'code': resp.status,
|
||||||
|
'detail': error_text[:500],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data = await resp.json()
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
async def _stream_llm_response(url: str, headers: Dict, body: Dict) -> AsyncGenerator[str, None]:
|
||||||
|
"""Make a streaming LLM API call, yielding SSE chunks."""
|
||||||
|
async with ClientSession() as session:
|
||||||
|
async with session.post(url, headers=headers, json=body,
|
||||||
|
timeout=ClientTimeout(total=600)) as resp:
|
||||||
|
if resp.status != 200:
|
||||||
|
error_text = await resp.text()
|
||||||
|
error_data = {
|
||||||
|
'error': {
|
||||||
|
'message': f'LLM API error: HTTP {resp.status}',
|
||||||
|
'type': 'api_error',
|
||||||
|
'code': resp.status,
|
||||||
|
'detail': error_text[:500],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
|
||||||
|
yield "data: [DONE]\n\n"
|
||||||
|
return
|
||||||
|
|
||||||
|
async for line in resp.content:
|
||||||
|
line = line.decode('utf-8').strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
if line.startswith('data: '):
|
||||||
|
yield line + '\n\n'
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Public API functions (registered to ServerEnv via init.py)
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
async def harnessed_llm_chat_completions(body: Dict[str, Any]) -> Any:
|
||||||
|
"""
|
||||||
|
OpenAI-compatible /v1/chat/completions endpoint.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
body: dict with keys: model, messages, temperature, max_tokens,
|
||||||
|
stream, top_p, and other OpenAI-compatible params.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
If stream=False: dict matching OpenAI chat completion response.
|
||||||
|
If stream=True: aiohttp.StreamResponse with SSE.
|
||||||
|
"""
|
||||||
|
from aiohttp import web
|
||||||
|
|
||||||
|
model = body.get('model', 'default')
|
||||||
|
messages = body.get('messages', [])
|
||||||
|
temperature = body.get('temperature', 0.7)
|
||||||
|
max_tokens = body.get('max_tokens', None)
|
||||||
|
stream = body.get('stream', False)
|
||||||
|
top_p = body.get('top_p', 1.0)
|
||||||
|
|
||||||
|
# Get LLM service config
|
||||||
|
config = await _async_get_llm_config()
|
||||||
|
if not config:
|
||||||
|
return {
|
||||||
|
'error': {
|
||||||
|
'message': 'LLM service not configured. Please configure llm_service_url and llm_api_key in agent settings.',
|
||||||
|
'type': 'configuration_error',
|
||||||
|
'code': 503,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
service_url = config.get('llm_service_url') or config.get('api_endpoint')
|
||||||
|
api_key = config.get('llm_api_key') or config.get('api_key')
|
||||||
|
|
||||||
|
if not service_url or not api_key:
|
||||||
|
return {
|
||||||
|
'error': {
|
||||||
|
'message': 'LLM service URL or API key not configured. Set llm_service_url and llm_api_key in harnessed_agent_config.',
|
||||||
|
'type': 'configuration_error',
|
||||||
|
'code': 503,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Use default model from config if request model is 'default' or empty
|
||||||
|
default_model = config.get('default_model', 'qwen3-max')
|
||||||
|
if not model or model == 'default':
|
||||||
|
model = default_model
|
||||||
|
|
||||||
|
# Pass through extra params
|
||||||
|
extra_params = {}
|
||||||
|
for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools',
|
||||||
|
'tool_choice', 'response_format', 'seed'):
|
||||||
|
if key in body:
|
||||||
|
extra_params[key] = body[key]
|
||||||
|
|
||||||
|
if stream:
|
||||||
|
resp = web.StreamResponse(
|
||||||
|
status=200,
|
||||||
|
reason='OK',
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'text/event-stream',
|
||||||
|
'Cache-Control': 'no-cache',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'X-Accel-Buffering': 'no',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
await resp.prepare(body.get('_request'))
|
||||||
|
|
||||||
|
request_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
|
||||||
|
created = _now_ts()
|
||||||
|
|
||||||
|
try:
|
||||||
|
async for chunk_line in _stream_llm_response(
|
||||||
|
service_url, {'Content-Type': 'application/json', 'Authorization': f'Bearer {api_key}'},
|
||||||
|
{
|
||||||
|
'model': model,
|
||||||
|
'messages': messages,
|
||||||
|
'temperature': temperature,
|
||||||
|
'top_p': top_p,
|
||||||
|
'stream': True,
|
||||||
|
'max_tokens': max_tokens,
|
||||||
|
**extra_params,
|
||||||
|
}
|
||||||
|
):
|
||||||
|
await resp.write(chunk_line.encode('utf-8') if isinstance(chunk_line, str) else chunk_line)
|
||||||
|
await resp.drain()
|
||||||
|
except Exception as e:
|
||||||
|
error_data = {
|
||||||
|
'error': {
|
||||||
|
'message': f'Streaming error: {str(e)}',
|
||||||
|
'type': 'server_error',
|
||||||
|
'code': 500,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
await resp.write(f"data: {json.dumps(error_data)}\n\n".encode('utf-8'))
|
||||||
|
await resp.drain()
|
||||||
|
|
||||||
|
await resp.write(b"data: [DONE]\n\n")
|
||||||
|
await resp.drain()
|
||||||
|
return resp
|
||||||
|
|
||||||
|
else:
|
||||||
|
result = await _call_llm_api(
|
||||||
|
service_url=service_url,
|
||||||
|
api_key=api_key,
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
temperature=temperature,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
stream=False,
|
||||||
|
top_p=top_p,
|
||||||
|
**extra_params,
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def harnessed_llm_models() -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
OpenAI-compatible /v1/models endpoint.
|
||||||
|
Returns list of available models from config.
|
||||||
|
"""
|
||||||
|
config = await _async_get_llm_config()
|
||||||
|
|
||||||
|
models_list = []
|
||||||
|
|
||||||
|
# Add default model from config
|
||||||
|
default_model = 'qwen3-max'
|
||||||
|
if config:
|
||||||
|
default_model = config.get('default_model', 'qwen3-max')
|
||||||
|
|
||||||
|
# If available_models is configured as JSON string, parse it
|
||||||
|
if config and config.get('available_models'):
|
||||||
|
try:
|
||||||
|
models_str = config['available_models']
|
||||||
|
if isinstance(models_str, str):
|
||||||
|
model_names = json.loads(models_str)
|
||||||
|
else:
|
||||||
|
model_names = models_str
|
||||||
|
for m in model_names:
|
||||||
|
if isinstance(m, str):
|
||||||
|
models_list.append({
|
||||||
|
'id': m,
|
||||||
|
'object': 'model',
|
||||||
|
'created': _now_ts(),
|
||||||
|
'owned_by': 'harnessed_agent',
|
||||||
|
})
|
||||||
|
elif isinstance(m, dict):
|
||||||
|
models_list.append(m)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Always include the default model if not already listed
|
||||||
|
existing_ids = {m['id'] for m in models_list}
|
||||||
|
if default_model not in existing_ids:
|
||||||
|
models_list.insert(0, {
|
||||||
|
'id': default_model,
|
||||||
|
'object': 'model',
|
||||||
|
'created': _now_ts(),
|
||||||
|
'owned_by': 'harnessed_agent',
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'object': 'list',
|
||||||
|
'data': models_list,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def harnessed_llm_completions(body: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
OpenAI-compatible /v1/completions endpoint (legacy, non-chat).
|
||||||
|
Converts prompt to messages format internally.
|
||||||
|
"""
|
||||||
|
prompt = body.get('prompt', '')
|
||||||
|
model = body.get('model', 'default')
|
||||||
|
temperature = body.get('temperature', 0.7)
|
||||||
|
max_tokens = body.get('max_tokens', None)
|
||||||
|
stream = body.get('stream', False)
|
||||||
|
|
||||||
|
# Convert to chat format
|
||||||
|
messages = [{'role': 'user', 'content': prompt}]
|
||||||
|
|
||||||
|
# Build request body for chat completions
|
||||||
|
chat_body = {
|
||||||
|
'model': model,
|
||||||
|
'messages': messages,
|
||||||
|
'temperature': temperature,
|
||||||
|
'max_tokens': max_tokens,
|
||||||
|
'stream': stream,
|
||||||
|
'_request': body.get('_request'),
|
||||||
|
}
|
||||||
|
|
||||||
|
return await harnessed_llm_chat_completions(chat_body)
|
||||||
@ -4,20 +4,22 @@
|
|||||||
"title": "Agent Configuration",
|
"title": "Agent Configuration",
|
||||||
"params": {
|
"params": {
|
||||||
"logined_userid": "user_id",
|
"logined_userid": "user_id",
|
||||||
"confidential_fields": [],
|
"confidential_fields": ["llm_api_key"],
|
||||||
"browserfields": {
|
"browserfields": {
|
||||||
|
"exclouded": ["llm_api_key", "available_models"],
|
||||||
"alters": {
|
"alters": {
|
||||||
"auto_cleanup_enabled": {
|
"auto_cleanup_enabled": {
|
||||||
"uitype": "code",
|
"uitype": "code",
|
||||||
"data": [
|
"data": [
|
||||||
{
|
{"value": "1", "text": "Enabled"},
|
||||||
"value": "1",
|
{"value": "0", "text": "Disabled"}
|
||||||
"text": "Enabled"
|
]
|
||||||
},
|
},
|
||||||
{
|
"enable_streaming": {
|
||||||
"value": "0",
|
"uitype": "code",
|
||||||
"text": "Disabled"
|
"data": [
|
||||||
}
|
{"value": "1", "text": "Enabled"},
|
||||||
|
{"value": "0", "text": "Disabled"}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -25,7 +27,8 @@
|
|||||||
"editexclouded": [
|
"editexclouded": [
|
||||||
"id",
|
"id",
|
||||||
"user_id",
|
"user_id",
|
||||||
"created_at"
|
"created_at",
|
||||||
|
"updated_at"
|
||||||
],
|
],
|
||||||
"editable": {
|
"editable": {
|
||||||
"new_data_url": "{{entire_url('../api/harnessed_agent_config_view_create.dspy')}}",
|
"new_data_url": "{{entire_url('../api/harnessed_agent_config_view_create.dspy')}}",
|
||||||
|
|||||||
@ -105,6 +105,32 @@
|
|||||||
"nullable": "no",
|
"nullable": "no",
|
||||||
"default": "1"
|
"default": "1"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "llm_service_url",
|
||||||
|
"title": "LLM service base URL (OpenAI-compatible endpoint)",
|
||||||
|
"type": "str",
|
||||||
|
"length": 255,
|
||||||
|
"nullable": "yes",
|
||||||
|
"default": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||||
|
"comments": "Base URL for the LLM provider API, e.g. https://api.openai.com/v1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "llm_api_key",
|
||||||
|
"title": "LLM service API key",
|
||||||
|
"type": "str",
|
||||||
|
"length": 255,
|
||||||
|
"nullable": "yes",
|
||||||
|
"default": "",
|
||||||
|
"comments": "API key for LLM service authentication (Bearer token)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "available_models",
|
||||||
|
"title": "Available LLM models (JSON array)",
|
||||||
|
"type": "text",
|
||||||
|
"nullable": "yes",
|
||||||
|
"default": "",
|
||||||
|
"comments": "JSON array of model IDs, e.g. [\"qwen3-max\", \"qwen3-plus\"]"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "created_at",
|
"name": "created_at",
|
||||||
"title": "Creation timestamp",
|
"title": "Creation timestamp",
|
||||||
|
|||||||
46
wwwroot/v1/chat/completions.dspy
Normal file
46
wwwroot/v1/chat/completions.dspy
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
"""
|
||||||
|
OpenAI-compatible /v1/chat/completions endpoint
|
||||||
|
Accepts POST with JSON body matching OpenAI API format.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
# Read request body
|
||||||
|
body = {}
|
||||||
|
try:
|
||||||
|
raw_body = await request.read()
|
||||||
|
if raw_body:
|
||||||
|
body = json.loads(raw_body)
|
||||||
|
except Exception as e:
|
||||||
|
result = {
|
||||||
|
'error': {
|
||||||
|
'message': f'Invalid JSON body: {str(e)}',
|
||||||
|
'type': 'invalid_request_error',
|
||||||
|
'code': 400,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return json.dumps(result, ensure_ascii=False)
|
||||||
|
|
||||||
|
# Pass the request object for streaming support
|
||||||
|
body['_request'] = request
|
||||||
|
|
||||||
|
# Call the LLM API handler
|
||||||
|
result = await harnessed_llm_chat_completions(body)
|
||||||
|
|
||||||
|
# Handle streaming response (StreamResponse)
|
||||||
|
from aiohttp.web_response import StreamResponse
|
||||||
|
if isinstance(result, StreamResponse):
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Handle error response
|
||||||
|
if 'error' in result:
|
||||||
|
status_code = result.get('error', {}).get('code', 500)
|
||||||
|
resp = web.Response(
|
||||||
|
status=status_code,
|
||||||
|
body=json.dumps(result, ensure_ascii=False),
|
||||||
|
content_type='application/json'
|
||||||
|
)
|
||||||
|
return resp
|
||||||
|
|
||||||
|
# Return successful response
|
||||||
|
return json.dumps(result, ensure_ascii=False)
|
||||||
41
wwwroot/v1/completions.dspy
Normal file
41
wwwroot/v1/completions.dspy
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
"""
|
||||||
|
OpenAI-compatible /v1/completions endpoint (legacy)
|
||||||
|
Accepts POST with JSON body matching OpenAI completions API format.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
# Read request body
|
||||||
|
body = {}
|
||||||
|
try:
|
||||||
|
raw_body = await request.read()
|
||||||
|
if raw_body:
|
||||||
|
body = json.loads(raw_body)
|
||||||
|
except Exception as e:
|
||||||
|
result = {
|
||||||
|
'error': {
|
||||||
|
'message': f'Invalid JSON body: {str(e)}',
|
||||||
|
'type': 'invalid_request_error',
|
||||||
|
'code': 400,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return json.dumps(result, ensure_ascii=False)
|
||||||
|
|
||||||
|
body['_request'] = request
|
||||||
|
|
||||||
|
result = await harnessed_llm_completions(body)
|
||||||
|
|
||||||
|
from aiohttp.web_response import StreamResponse
|
||||||
|
if isinstance(result, StreamResponse):
|
||||||
|
return result
|
||||||
|
|
||||||
|
if 'error' in result:
|
||||||
|
status_code = result.get('error', {}).get('code', 500)
|
||||||
|
resp = web.Response(
|
||||||
|
status=status_code,
|
||||||
|
body=json.dumps(result, ensure_ascii=False),
|
||||||
|
content_type='application/json'
|
||||||
|
)
|
||||||
|
return resp
|
||||||
|
|
||||||
|
return json.dumps(result, ensure_ascii=False)
|
||||||
9
wwwroot/v1/models.dspy
Normal file
9
wwwroot/v1/models.dspy
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
"""
|
||||||
|
OpenAI-compatible /v1/models endpoint
|
||||||
|
Returns list of available models.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
result = await harnessed_llm_models()
|
||||||
|
return json.dumps(result, ensure_ascii=False)
|
||||||
Loading…
x
Reference in New Issue
Block a user