refactor: LLM client for calling supplier LLM APIs (not server)
Replaces wrong-direction llm_api.py (which served OpenAI endpoints) with llm_client.py -- a client that calls supplier LLM APIs. New module: llm_client.py - llm_chat(messages, model, temperature, ...) -> OpenAI response dict - llm_chat_stream(messages, ...) -> async generator of SSE chunks - llm_simple(prompt, system) -> plain text response - llm_list_models() -> list available models from provider - llm_get_config() -> show current config (key masked) - Supports provider presets: openai, dashscope, deepseek, siliconflow - Retry with exponential backoff (3 attempts) - 429 rate limit handling with Retry-After - Structured logging via appPublic.log Model changes (harnessed_agent_config): - Add llm_provider (preset name: dashscope/openai/deepseek/siliconflow) - Add top_p field - llm_service_url defaults to DashScope compatible endpoint Other: - Remove wrong-direction /v1/ endpoints - Fix pyproject.toml deps: only sqlor + bricks_for_python - Update init/data.json seed data with LLM config fields - Update CRUD view with llm_provider dropdown
This commit is contained in:
parent
e4f935de07
commit
4e65ff8fe4
@ -20,10 +20,12 @@ from .config_functions import (
|
|||||||
harnessed_get_agent_config,
|
harnessed_get_agent_config,
|
||||||
harnessed_save_agent_config
|
harnessed_save_agent_config
|
||||||
)
|
)
|
||||||
from .llm_api import (
|
from .llm_client import (
|
||||||
harnessed_llm_chat_completions,
|
llm_chat,
|
||||||
harnessed_llm_models,
|
llm_chat_stream,
|
||||||
harnessed_llm_completions,
|
llm_list_models,
|
||||||
|
llm_simple,
|
||||||
|
llm_get_config,
|
||||||
)
|
)
|
||||||
|
|
||||||
def load_harnessed_agent():
|
def load_harnessed_agent():
|
||||||
@ -49,7 +51,9 @@ def load_harnessed_agent():
|
|||||||
env.harnessed_get_agent_config = harnessed_get_agent_config
|
env.harnessed_get_agent_config = harnessed_get_agent_config
|
||||||
env.harnessed_save_agent_config = harnessed_save_agent_config
|
env.harnessed_save_agent_config = harnessed_save_agent_config
|
||||||
|
|
||||||
# OpenAI-compatible LLM API
|
# LLM client -- calls supplier LLM APIs (OpenAI-compatible)
|
||||||
env.harnessed_llm_chat_completions = harnessed_llm_chat_completions
|
env.llm_chat = llm_chat
|
||||||
env.harnessed_llm_models = harnessed_llm_models
|
env.llm_chat_stream = llm_chat_stream
|
||||||
env.harnessed_llm_completions = harnessed_llm_completions
|
env.llm_list_models = llm_list_models
|
||||||
|
env.llm_simple = llm_simple
|
||||||
|
env.llm_get_config = llm_get_config
|
||||||
@ -1,463 +0,0 @@
|
|||||||
"""
|
|
||||||
OpenAI-compatible LLM API for Hermes Agent
|
|
||||||
Provides /v1/chat/completions and /v1/models endpoints compatible with OpenAI API spec.
|
|
||||||
"""
|
|
||||||
import json
|
|
||||||
import uuid
|
|
||||||
import time
|
|
||||||
import asyncio
|
|
||||||
from typing import Dict, Any, List, Optional, AsyncGenerator
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
try:
|
|
||||||
from aiohttp import ClientSession, ClientTimeout
|
|
||||||
from aiohttp.client_exceptions import ClientError
|
|
||||||
HAS_AIOHTTP = True
|
|
||||||
except ImportError:
|
|
||||||
HAS_AIOHTTP = False
|
|
||||||
|
|
||||||
try:
|
|
||||||
from ahserver.serverenv import ServerEnv
|
|
||||||
from sqlor.dbpools import DBPools
|
|
||||||
from appPublic.worker import awaitify
|
|
||||||
from appPublic.log import info, debug, warning, error, exception
|
|
||||||
except ImportError:
|
|
||||||
class ServerEnv:
|
|
||||||
pass
|
|
||||||
class DBPools:
|
|
||||||
pass
|
|
||||||
def awaitify(f):
|
|
||||||
return f
|
|
||||||
def info(*a, **kw): print(*a)
|
|
||||||
def debug(*a, **kw): pass
|
|
||||||
def warning(*a, **kw): print(*a)
|
|
||||||
def error(*a, **kw): print(*a)
|
|
||||||
def exception(*a, **kw): pass
|
|
||||||
|
|
||||||
|
|
||||||
def _now_iso():
|
|
||||||
return datetime.utcnow().isoformat() + 'Z'
|
|
||||||
|
|
||||||
|
|
||||||
def _now_ts():
|
|
||||||
return int(time.time())
|
|
||||||
|
|
||||||
|
|
||||||
def _get_default_llm_config(user_id: str = None) -> Dict[str, Any]:
|
|
||||||
"""Get LLM service config from harnessed_agent_config table."""
|
|
||||||
try:
|
|
||||||
dbname = ServerEnv().get_module_dbname('harnessed_agent')
|
|
||||||
except Exception:
|
|
||||||
dbname = 'default'
|
|
||||||
|
|
||||||
try:
|
|
||||||
import asyncio
|
|
||||||
async def _fetch():
|
|
||||||
async with DBPools().sqlorContext(dbname) as sor:
|
|
||||||
filters = {}
|
|
||||||
if user_id:
|
|
||||||
filters['user_id'] = user_id
|
|
||||||
rows = await sor.R('harnessed_agent_config', filters,
|
|
||||||
orderby='updated_at DESC', limit=1)
|
|
||||||
if rows:
|
|
||||||
return rows[0]
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
if loop.is_running():
|
|
||||||
# Already in async context, need to use create_task pattern
|
|
||||||
# but for simplicity in .dspy context we return None and let caller handle
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
config = loop.run_until_complete(_fetch())
|
|
||||||
return config
|
|
||||||
except RuntimeError:
|
|
||||||
return None
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
async def _async_get_llm_config(user_id: str = None) -> Dict[str, Any]:
|
|
||||||
"""Async version to get LLM service config."""
|
|
||||||
try:
|
|
||||||
env = ServerEnv()
|
|
||||||
dbname = env.get_module_dbname('harnessed_agent')
|
|
||||||
except Exception:
|
|
||||||
dbname = 'default'
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with DBPools().sqlorContext(dbname) as sor:
|
|
||||||
filters = {}
|
|
||||||
if user_id:
|
|
||||||
filters['user_id'] = user_id
|
|
||||||
rows = await sor.R('harnessed_agent_config', filters,
|
|
||||||
orderby='updated_at DESC', limit=1)
|
|
||||||
if rows:
|
|
||||||
return rows[0]
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error fetching LLM config: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
async def _call_llm_api(
|
|
||||||
service_url: str,
|
|
||||||
api_key: str,
|
|
||||||
model: str,
|
|
||||||
messages: List[Dict[str, str]],
|
|
||||||
temperature: float = 0.7,
|
|
||||||
max_tokens: Optional[int] = None,
|
|
||||||
stream: bool = False,
|
|
||||||
top_p: float = 1.0,
|
|
||||||
**kwargs
|
|
||||||
) -> Any:
|
|
||||||
"""Call an OpenAI-compatible LLM API endpoint."""
|
|
||||||
url = service_url.rstrip('/') + '/chat/completions'
|
|
||||||
|
|
||||||
headers = {
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'Authorization': f'Bearer {api_key}',
|
|
||||||
}
|
|
||||||
|
|
||||||
body = {
|
|
||||||
'model': model,
|
|
||||||
'messages': messages,
|
|
||||||
'temperature': temperature,
|
|
||||||
'top_p': top_p,
|
|
||||||
'stream': stream,
|
|
||||||
}
|
|
||||||
|
|
||||||
if max_tokens is not None:
|
|
||||||
body['max_tokens'] = max_tokens
|
|
||||||
|
|
||||||
# Pass through any additional OpenAI-compatible parameters
|
|
||||||
for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools', 'tool_choice',
|
|
||||||
'response_format', 'seed'):
|
|
||||||
if key in kwargs and kwargs[key] is not None:
|
|
||||||
body[key] = kwargs[key]
|
|
||||||
|
|
||||||
if stream:
|
|
||||||
return await _stream_llm_response(url, headers, body)
|
|
||||||
else:
|
|
||||||
return await _sync_llm_response(url, headers, body)
|
|
||||||
|
|
||||||
|
|
||||||
async def _sync_llm_response(url: str, headers: Dict, body: Dict) -> Dict[str, Any]:
|
|
||||||
"""Make a non-streaming LLM API call with retry logic."""
|
|
||||||
max_retries = 3
|
|
||||||
base_delay = 2 # seconds
|
|
||||||
|
|
||||||
for attempt in range(max_retries):
|
|
||||||
try:
|
|
||||||
async with ClientSession() as session:
|
|
||||||
async with session.post(url, headers=headers, json=body,
|
|
||||||
timeout=ClientTimeout(total=300)) as resp:
|
|
||||||
if resp.status == 429:
|
|
||||||
# Rate limited - respect Retry-After header
|
|
||||||
retry_after = int(resp.headers.get('Retry-After', base_delay * (2 ** attempt)))
|
|
||||||
if attempt < max_retries - 1:
|
|
||||||
warning(f"LLM API rate limited (429), retrying after {retry_after}s (attempt {attempt+1}/{max_retries})")
|
|
||||||
await asyncio.sleep(retry_after)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
return {
|
|
||||||
'error': {
|
|
||||||
'message': 'Rate limited by LLM provider. Please retry later.',
|
|
||||||
'type': 'rate_limit_error',
|
|
||||||
'code': 429,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if resp.status == 500 and attempt < max_retries - 1:
|
|
||||||
# Transient server error - retry
|
|
||||||
delay = base_delay * (2 ** attempt)
|
|
||||||
warning(f"LLM API server error (500), retrying after {delay}s (attempt {attempt+1}/{max_retries})")
|
|
||||||
await asyncio.sleep(delay)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if resp.status != 200:
|
|
||||||
error_text = await resp.text()
|
|
||||||
return {
|
|
||||||
'error': {
|
|
||||||
'message': f'LLM API error: HTTP {resp.status}',
|
|
||||||
'type': 'api_error',
|
|
||||||
'code': resp.status,
|
|
||||||
'detail': error_text[:500],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
data = await resp.json()
|
|
||||||
return data
|
|
||||||
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
if attempt < max_retries - 1:
|
|
||||||
delay = base_delay * (2 ** attempt)
|
|
||||||
warning(f"LLM API timeout, retrying after {delay}s (attempt {attempt+1}/{max_retries})")
|
|
||||||
await asyncio.sleep(delay)
|
|
||||||
continue
|
|
||||||
return {
|
|
||||||
'error': {
|
|
||||||
'message': 'LLM API request timed out after 300s',
|
|
||||||
'type': 'timeout_error',
|
|
||||||
'code': 504,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
except ClientError as e:
|
|
||||||
if attempt < max_retries - 1:
|
|
||||||
delay = base_delay * (2 ** attempt)
|
|
||||||
warning(f"LLM API connection error: {e}, retrying after {delay}s")
|
|
||||||
await asyncio.sleep(delay)
|
|
||||||
continue
|
|
||||||
return {
|
|
||||||
'error': {
|
|
||||||
'message': f'LLM API connection failed: {str(e)}',
|
|
||||||
'type': 'connection_error',
|
|
||||||
'code': 502,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
'error': {
|
|
||||||
'message': 'LLM API request failed after all retries',
|
|
||||||
'type': 'server_error',
|
|
||||||
'code': 500,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def _stream_llm_response(url: str, headers: Dict, body: Dict) -> AsyncGenerator[str, None]:
|
|
||||||
"""Make a streaming LLM API call, yielding SSE chunks."""
|
|
||||||
async with ClientSession() as session:
|
|
||||||
async with session.post(url, headers=headers, json=body,
|
|
||||||
timeout=ClientTimeout(total=600)) as resp:
|
|
||||||
if resp.status != 200:
|
|
||||||
error_text = await resp.text()
|
|
||||||
error_data = {
|
|
||||||
'error': {
|
|
||||||
'message': f'LLM API error: HTTP {resp.status}',
|
|
||||||
'type': 'api_error',
|
|
||||||
'code': resp.status,
|
|
||||||
'detail': error_text[:500],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
|
|
||||||
yield "data: [DONE]\n\n"
|
|
||||||
return
|
|
||||||
|
|
||||||
async for line in resp.content:
|
|
||||||
line = line.decode('utf-8').strip()
|
|
||||||
if not line:
|
|
||||||
continue
|
|
||||||
if line.startswith('data: '):
|
|
||||||
yield line + '\n\n'
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
|
||||||
# Public API functions (registered to ServerEnv via init.py)
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
async def harnessed_llm_chat_completions(body: Dict[str, Any]) -> Any:
|
|
||||||
"""
|
|
||||||
OpenAI-compatible /v1/chat/completions endpoint.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
body: dict with keys: model, messages, temperature, max_tokens,
|
|
||||||
stream, top_p, and other OpenAI-compatible params.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
If stream=False: dict matching OpenAI chat completion response.
|
|
||||||
If stream=True: aiohttp.StreamResponse with SSE.
|
|
||||||
"""
|
|
||||||
from aiohttp import web
|
|
||||||
|
|
||||||
model = body.get('model', 'default')
|
|
||||||
messages = body.get('messages', [])
|
|
||||||
temperature = body.get('temperature', 0.7)
|
|
||||||
max_tokens = body.get('max_tokens', None)
|
|
||||||
stream = body.get('stream', False)
|
|
||||||
top_p = body.get('top_p', 1.0)
|
|
||||||
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
# Get LLM service config
|
|
||||||
config = await _async_get_llm_config()
|
|
||||||
if not config:
|
|
||||||
error("LLM service not configured: no harnessed_agent_config found")
|
|
||||||
return {
|
|
||||||
'error': {
|
|
||||||
'message': 'LLM service not configured. Please configure llm_service_url and llm_api_key in agent settings.',
|
|
||||||
'type': 'configuration_error',
|
|
||||||
'code': 503,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
service_url = config.get('llm_service_url') or config.get('api_endpoint')
|
|
||||||
api_key = config.get('llm_api_key') or config.get('api_key')
|
|
||||||
|
|
||||||
if not service_url or not api_key:
|
|
||||||
error(f"LLM service misconfigured: service_url={'set' if service_url else 'MISSING'}, api_key={'set' if api_key else 'MISSING'}")
|
|
||||||
return {
|
|
||||||
'error': {
|
|
||||||
'message': 'LLM service URL or API key not configured. Set llm_service_url and llm_api_key in harnessed_agent_config.',
|
|
||||||
'type': 'configuration_error',
|
|
||||||
'code': 503,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Use default model from config if request model is 'default' or empty
|
|
||||||
default_model = config.get('default_model', 'qwen3-max')
|
|
||||||
if not model or model == 'default':
|
|
||||||
model = default_model
|
|
||||||
|
|
||||||
info(f"LLM chat request: model={model}, stream={stream}, messages={len(messages)}, temperature={temperature}")
|
|
||||||
|
|
||||||
# Pass through extra params
|
|
||||||
extra_params = {}
|
|
||||||
for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools',
|
|
||||||
'tool_choice', 'response_format', 'seed'):
|
|
||||||
if key in body:
|
|
||||||
extra_params[key] = body[key]
|
|
||||||
|
|
||||||
if stream:
|
|
||||||
resp = web.StreamResponse(
|
|
||||||
status=200,
|
|
||||||
reason='OK',
|
|
||||||
headers={
|
|
||||||
'Content-Type': 'text/event-stream',
|
|
||||||
'Cache-Control': 'no-cache',
|
|
||||||
'Connection': 'keep-alive',
|
|
||||||
'X-Accel-Buffering': 'no',
|
|
||||||
}
|
|
||||||
)
|
|
||||||
await resp.prepare(body.get('_request'))
|
|
||||||
|
|
||||||
request_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
|
|
||||||
created = _now_ts()
|
|
||||||
|
|
||||||
try:
|
|
||||||
async for chunk_line in _stream_llm_response(
|
|
||||||
service_url, {'Content-Type': 'application/json', 'Authorization': f'Bearer {api_key}'},
|
|
||||||
{
|
|
||||||
'model': model,
|
|
||||||
'messages': messages,
|
|
||||||
'temperature': temperature,
|
|
||||||
'top_p': top_p,
|
|
||||||
'stream': True,
|
|
||||||
'max_tokens': max_tokens,
|
|
||||||
**extra_params,
|
|
||||||
}
|
|
||||||
):
|
|
||||||
await resp.write(chunk_line.encode('utf-8') if isinstance(chunk_line, str) else chunk_line)
|
|
||||||
await resp.drain()
|
|
||||||
except Exception as e:
|
|
||||||
error_data = {
|
|
||||||
'error': {
|
|
||||||
'message': f'Streaming error: {str(e)}',
|
|
||||||
'type': 'server_error',
|
|
||||||
'code': 500,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
await resp.write(f"data: {json.dumps(error_data)}\n\n".encode('utf-8'))
|
|
||||||
await resp.drain()
|
|
||||||
|
|
||||||
await resp.write(b"data: [DONE]\n\n")
|
|
||||||
await resp.drain()
|
|
||||||
return resp
|
|
||||||
|
|
||||||
else:
|
|
||||||
result = await _call_llm_api(
|
|
||||||
service_url=service_url,
|
|
||||||
api_key=api_key,
|
|
||||||
model=model,
|
|
||||||
messages=messages,
|
|
||||||
temperature=temperature,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
stream=False,
|
|
||||||
top_p=top_p,
|
|
||||||
**extra_params,
|
|
||||||
)
|
|
||||||
elapsed = time.time() - start_time
|
|
||||||
if 'error' in result:
|
|
||||||
error(f"LLM chat error: model={model}, elapsed={elapsed:.2f}s, error={result.get('error',{}).get('message','')}")
|
|
||||||
else:
|
|
||||||
usage = result.get('usage', {})
|
|
||||||
info(f"LLM chat done: model={model}, elapsed={elapsed:.2f}s, tokens_in={usage.get('prompt_tokens','?')}, tokens_out={usage.get('completion_tokens','?')}")
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
async def harnessed_llm_models() -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
OpenAI-compatible /v1/models endpoint.
|
|
||||||
Returns list of available models from config.
|
|
||||||
"""
|
|
||||||
config = await _async_get_llm_config()
|
|
||||||
|
|
||||||
models_list = []
|
|
||||||
|
|
||||||
# Add default model from config
|
|
||||||
default_model = 'qwen3-max'
|
|
||||||
if config:
|
|
||||||
default_model = config.get('default_model', 'qwen3-max')
|
|
||||||
|
|
||||||
# If available_models is configured as JSON string, parse it
|
|
||||||
if config and config.get('available_models'):
|
|
||||||
try:
|
|
||||||
models_str = config['available_models']
|
|
||||||
if isinstance(models_str, str):
|
|
||||||
model_names = json.loads(models_str)
|
|
||||||
else:
|
|
||||||
model_names = models_str
|
|
||||||
for m in model_names:
|
|
||||||
if isinstance(m, str):
|
|
||||||
models_list.append({
|
|
||||||
'id': m,
|
|
||||||
'object': 'model',
|
|
||||||
'created': _now_ts(),
|
|
||||||
'owned_by': 'harnessed_agent',
|
|
||||||
})
|
|
||||||
elif isinstance(m, dict):
|
|
||||||
models_list.append(m)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Always include the default model if not already listed
|
|
||||||
existing_ids = {m['id'] for m in models_list}
|
|
||||||
if default_model not in existing_ids:
|
|
||||||
models_list.insert(0, {
|
|
||||||
'id': default_model,
|
|
||||||
'object': 'model',
|
|
||||||
'created': _now_ts(),
|
|
||||||
'owned_by': 'harnessed_agent',
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'object': 'list',
|
|
||||||
'data': models_list,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def harnessed_llm_completions(body: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
OpenAI-compatible /v1/completions endpoint (legacy, non-chat).
|
|
||||||
Converts prompt to messages format internally.
|
|
||||||
"""
|
|
||||||
prompt = body.get('prompt', '')
|
|
||||||
model = body.get('model', 'default')
|
|
||||||
temperature = body.get('temperature', 0.7)
|
|
||||||
max_tokens = body.get('max_tokens', None)
|
|
||||||
stream = body.get('stream', False)
|
|
||||||
|
|
||||||
# Convert to chat format
|
|
||||||
messages = [{'role': 'user', 'content': prompt}]
|
|
||||||
|
|
||||||
# Build request body for chat completions
|
|
||||||
chat_body = {
|
|
||||||
'model': model,
|
|
||||||
'messages': messages,
|
|
||||||
'temperature': temperature,
|
|
||||||
'max_tokens': max_tokens,
|
|
||||||
'stream': stream,
|
|
||||||
'_request': body.get('_request'),
|
|
||||||
}
|
|
||||||
|
|
||||||
return await harnessed_llm_chat_completions(chat_body)
|
|
||||||
475
harnessed_agent/llm_client.py
Normal file
475
harnessed_agent/llm_client.py
Normal file
@ -0,0 +1,475 @@
|
|||||||
|
"""
|
||||||
|
LLM Client for Hermes Agent - Calls supplier LLM APIs via OpenAI-compatible interface.
|
||||||
|
|
||||||
|
This module provides a client-side implementation for calling external LLM providers
|
||||||
|
(OpenAI, DashScope, DeepSeek, etc.) through their OpenAI-compatible /v1/chat/completions API.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# From .dspy files:
|
||||||
|
result = await llm_chat(messages=[{"role": "user", "content": "Hello"}])
|
||||||
|
|
||||||
|
# With explicit config:
|
||||||
|
result = await llm_chat(
|
||||||
|
messages=[{"role": "user", "content": "Hello"}],
|
||||||
|
model="qwen3-max",
|
||||||
|
temperature=0.7,
|
||||||
|
stream=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Stream mode:
|
||||||
|
async for chunk in llm_chat_stream(messages=[...]):
|
||||||
|
print(chunk.get("delta", ""))
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import asyncio
|
||||||
|
from typing import Dict, Any, List, Optional, AsyncGenerator
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
try:
|
||||||
|
from aiohttp import ClientSession, ClientTimeout, ClientError
|
||||||
|
except ImportError:
|
||||||
|
ClientError = Exception
|
||||||
|
|
||||||
|
try:
|
||||||
|
from ahserver.serverenv import ServerEnv
|
||||||
|
from sqlor.dbpools import DBPools
|
||||||
|
from appPublic.log import info, debug, warning, error, exception
|
||||||
|
except ImportError:
|
||||||
|
class ServerEnv:
|
||||||
|
pass
|
||||||
|
class DBPools:
|
||||||
|
pass
|
||||||
|
def info(*a, **kw): print(*a)
|
||||||
|
def debug(*a, **kw): pass
|
||||||
|
def warning(*a, **kw): print(*a)
|
||||||
|
def error(*a, **kw): print(*a)
|
||||||
|
def exception(*a, **kw): pass
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# LLM Provider configurations
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
LLM_PROVIDERS = {
|
||||||
|
'openai': {
|
||||||
|
'url': 'https://api.openai.com/v1',
|
||||||
|
'model_default': 'gpt-4o',
|
||||||
|
},
|
||||||
|
'dashscope': {
|
||||||
|
'url': 'https://dashscope.aliyuncs.com/compatible-mode/v1',
|
||||||
|
'model_default': 'qwen-plus',
|
||||||
|
},
|
||||||
|
'deepseek': {
|
||||||
|
'url': 'https://api.deepseek.com/v1',
|
||||||
|
'model_default': 'deepseek-chat',
|
||||||
|
},
|
||||||
|
'siliconflow': {
|
||||||
|
'url': 'https://api.siliconflow.cn/v1',
|
||||||
|
'model_default': 'Qwen/Qwen2.5-72B-Instruct',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Config retrieval
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
async def _get_llm_config() -> Dict[str, Any]:
|
||||||
|
"""Get LLM client configuration from harnessed_agent_config table."""
|
||||||
|
try:
|
||||||
|
env = ServerEnv()
|
||||||
|
dbname = env.get_module_dbname('harnessed_agent')
|
||||||
|
except Exception:
|
||||||
|
dbname = 'default'
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with DBPools().sqlorContext(dbname) as sor:
|
||||||
|
rows = await sor.R('harnessed_agent_config', {},
|
||||||
|
orderby='updated_at DESC', limit=1)
|
||||||
|
if rows:
|
||||||
|
return rows[0]
|
||||||
|
except Exception as e:
|
||||||
|
error(f"Failed to fetch LLM config: {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_provider(config: Dict[str, Any]) -> Dict[str, str]:
|
||||||
|
"""Resolve base URL and model from config, with provider presets."""
|
||||||
|
provider = (config.get('llm_provider') or '').lower()
|
||||||
|
service_url = config.get('llm_service_url', '')
|
||||||
|
api_key = config.get('llm_api_key', '')
|
||||||
|
model = config.get('default_model', '')
|
||||||
|
|
||||||
|
# If provider name is set, use preset URL
|
||||||
|
if provider and provider in LLM_PROVIDERS:
|
||||||
|
preset = LLM_PROVIDERS[provider]
|
||||||
|
if not service_url:
|
||||||
|
service_url = preset['url']
|
||||||
|
if not model:
|
||||||
|
model = preset['model_default']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'service_url': service_url.rstrip('/'),
|
||||||
|
'api_key': api_key,
|
||||||
|
'model': model,
|
||||||
|
'provider': provider,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Core LLM client
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
async def _post_chat_completions(
|
||||||
|
service_url: str,
|
||||||
|
api_key: str,
|
||||||
|
model: str,
|
||||||
|
messages: List[Dict[str, str]],
|
||||||
|
temperature: float = 0.7,
|
||||||
|
max_tokens: Optional[int] = None,
|
||||||
|
stream: bool = False,
|
||||||
|
top_p: float = 1.0,
|
||||||
|
extra: Optional[Dict] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Call OpenAI-compatible /v1/chat/completions endpoint.
|
||||||
|
|
||||||
|
Returns dict with OpenAI response format or error dict.
|
||||||
|
"""
|
||||||
|
if not service_url:
|
||||||
|
return {'error': {'message': 'llm_service_url not configured', 'type': 'configuration_error'}}
|
||||||
|
if not api_key:
|
||||||
|
return {'error': {'message': 'llm_api_key not configured', 'type': 'configuration_error'}}
|
||||||
|
|
||||||
|
url = f"{service_url}/chat/completions"
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': f'Bearer {api_key}',
|
||||||
|
}
|
||||||
|
|
||||||
|
body = {
|
||||||
|
'model': model,
|
||||||
|
'messages': messages,
|
||||||
|
'temperature': temperature,
|
||||||
|
'top_p': top_p,
|
||||||
|
'stream': stream,
|
||||||
|
}
|
||||||
|
|
||||||
|
if max_tokens is not None:
|
||||||
|
body['max_tokens'] = max_tokens
|
||||||
|
|
||||||
|
if extra:
|
||||||
|
for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools',
|
||||||
|
'tool_choice', 'response_format', 'seed'):
|
||||||
|
if key in extra and extra[key] is not None:
|
||||||
|
body[key] = extra[key]
|
||||||
|
|
||||||
|
max_retries = 3
|
||||||
|
base_delay = 2
|
||||||
|
|
||||||
|
for attempt in range(max_retries):
|
||||||
|
try:
|
||||||
|
async with ClientSession() as session:
|
||||||
|
async with session.post(url, headers=headers, json=body,
|
||||||
|
timeout=ClientTimeout(total=300)) as resp:
|
||||||
|
|
||||||
|
if resp.status == 429:
|
||||||
|
retry_after = int(resp.headers.get('Retry-After', base_delay * (2 ** attempt)))
|
||||||
|
if attempt < max_retries - 1:
|
||||||
|
warning(f"LLM rate limited (429), retrying in {retry_after}s (attempt {attempt+1}/{max_retries})")
|
||||||
|
await asyncio.sleep(retry_after)
|
||||||
|
continue
|
||||||
|
return {'error': {'message': 'Rate limited by LLM provider', 'type': 'rate_limit_error', 'code': 429}}
|
||||||
|
|
||||||
|
if resp.status == 500 and attempt < max_retries - 1:
|
||||||
|
delay = base_delay * (2 ** attempt)
|
||||||
|
warning(f"LLM server error (500), retrying in {delay}s (attempt {attempt+1}/{max_retries})")
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if resp.status != 200:
|
||||||
|
err_text = await resp.text()
|
||||||
|
return {
|
||||||
|
'error': {
|
||||||
|
'message': f'LLM API error: HTTP {resp.status}',
|
||||||
|
'type': 'api_error',
|
||||||
|
'code': resp.status,
|
||||||
|
'detail': err_text[:500],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return await resp.json()
|
||||||
|
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
if attempt < max_retries - 1:
|
||||||
|
delay = base_delay * (2 ** attempt)
|
||||||
|
warning(f"LLM API timeout, retrying in {delay}s (attempt {attempt+1}/{max_retries})")
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
continue
|
||||||
|
return {'error': {'message': 'LLM API request timed out', 'type': 'timeout_error', 'code': 504}}
|
||||||
|
|
||||||
|
except ClientError as e:
|
||||||
|
if attempt < max_retries - 1:
|
||||||
|
delay = base_delay * (2 ** attempt)
|
||||||
|
warning(f"LLM connection error: {e}, retrying in {delay}s")
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
continue
|
||||||
|
return {'error': {'message': f'LLM connection failed: {str(e)}', 'type': 'connection_error', 'code': 502}}
|
||||||
|
|
||||||
|
return {'error': {'message': 'LLM API failed after all retries', 'type': 'server_error', 'code': 500}}
|
||||||
|
|
||||||
|
|
||||||
|
async def _stream_chat_completions(
|
||||||
|
service_url: str,
|
||||||
|
api_key: str,
|
||||||
|
model: str,
|
||||||
|
messages: List[Dict[str, str]],
|
||||||
|
temperature: float = 0.7,
|
||||||
|
max_tokens: Optional[int] = None,
|
||||||
|
top_p: float = 1.0,
|
||||||
|
extra: Optional[Dict] = None,
|
||||||
|
) -> AsyncGenerator[Dict[str, Any], None]:
|
||||||
|
"""
|
||||||
|
Stream LLM response via SSE. Yields parsed chunk dicts.
|
||||||
|
|
||||||
|
Each yielded dict contains:
|
||||||
|
- delta: str (the text chunk)
|
||||||
|
- finish_reason: str or None
|
||||||
|
- raw: dict (the raw SSE chunk data)
|
||||||
|
"""
|
||||||
|
if not service_url:
|
||||||
|
yield {'delta': '', 'finish_reason': 'error', 'raw': {'error': 'llm_service_url not configured'}}
|
||||||
|
return
|
||||||
|
if not api_key:
|
||||||
|
yield {'delta': '', 'finish_reason': 'error', 'raw': {'error': 'llm_api_key not configured'}}
|
||||||
|
return
|
||||||
|
|
||||||
|
url = f"{service_url}/chat/completions"
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': f'Bearer {api_key}',
|
||||||
|
}
|
||||||
|
|
||||||
|
body = {
|
||||||
|
'model': model,
|
||||||
|
'messages': messages,
|
||||||
|
'temperature': temperature,
|
||||||
|
'top_p': top_p,
|
||||||
|
'stream': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
if max_tokens is not None:
|
||||||
|
body['max_tokens'] = max_tokens
|
||||||
|
|
||||||
|
if extra:
|
||||||
|
for key in ('stop', 'presence_penalty', 'frequency_penalty', 'tools',
|
||||||
|
'tool_choice', 'response_format', 'seed'):
|
||||||
|
if key in extra and extra[key] is not None:
|
||||||
|
body[key] = extra[key]
|
||||||
|
|
||||||
|
async with ClientSession() as session:
|
||||||
|
async with session.post(url, headers=headers, json=body,
|
||||||
|
timeout=ClientTimeout(total=600)) as resp:
|
||||||
|
if resp.status != 200:
|
||||||
|
err_text = await resp.text()
|
||||||
|
yield {
|
||||||
|
'delta': '',
|
||||||
|
'finish_reason': 'error',
|
||||||
|
'raw': {'error': f'HTTP {resp.status}', 'detail': err_text[:300]},
|
||||||
|
}
|
||||||
|
return
|
||||||
|
|
||||||
|
async for line in resp.content:
|
||||||
|
line = line.decode('utf-8').strip()
|
||||||
|
if not line or not line.startswith('data:'):
|
||||||
|
continue
|
||||||
|
data_str = line[5:].strip()
|
||||||
|
if data_str == '[DONE]':
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
chunk = json.loads(data_str)
|
||||||
|
choices = chunk.get('choices', [])
|
||||||
|
if choices:
|
||||||
|
choice = choices[0]
|
||||||
|
delta = choice.get('delta', {})
|
||||||
|
text = delta.get('content', '') or ''
|
||||||
|
finish_reason = choice.get('finish_reason')
|
||||||
|
yield {
|
||||||
|
'delta': text,
|
||||||
|
'finish_reason': finish_reason,
|
||||||
|
'raw': chunk,
|
||||||
|
}
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Public API functions (registered to ServerEnv)
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
async def llm_chat(
|
||||||
|
messages: List[Dict[str, str]],
|
||||||
|
model: str = None,
|
||||||
|
temperature: float = None,
|
||||||
|
max_tokens: int = None,
|
||||||
|
stream: bool = False,
|
||||||
|
top_p: float = None,
|
||||||
|
**extra,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Call LLM provider and get chat completion.
|
||||||
|
|
||||||
|
This is the primary function for AI calls within harnessed_agent.
|
||||||
|
Reads provider config from harnessed_agent_config table automatically.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
messages: List of {role, content} dicts (OpenAI format)
|
||||||
|
model: Override model name (uses config default_model if not set)
|
||||||
|
temperature: Override temperature (uses config default_temperature if not set)
|
||||||
|
max_tokens: Max response tokens
|
||||||
|
stream: If True, uses streaming
|
||||||
|
top_p: Override top_p
|
||||||
|
**extra: Other OpenAI-compatible params (stop, tools, etc.)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict matching OpenAI chat completion response, or error dict.
|
||||||
|
"""
|
||||||
|
config = await _get_llm_config()
|
||||||
|
provider = _resolve_provider(config)
|
||||||
|
|
||||||
|
resolved_model = model or provider['model']
|
||||||
|
resolved_temp = temperature if temperature is not None else config.get('default_temperature', 0.7)
|
||||||
|
resolved_top_p = top_p if top_p is not None else config.get('top_p', 1.0)
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
info(f"LLM chat: model={resolved_model}, temp={resolved_temp}, messages={len(messages)}")
|
||||||
|
|
||||||
|
result = await _post_chat_completions(
|
||||||
|
service_url=provider['service_url'],
|
||||||
|
api_key=provider['api_key'],
|
||||||
|
model=resolved_model,
|
||||||
|
messages=messages,
|
||||||
|
temperature=resolved_temp,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
stream=False,
|
||||||
|
top_p=resolved_top_p,
|
||||||
|
extra=extra if extra else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
elapsed = time.time() - start_time
|
||||||
|
if 'error' in result:
|
||||||
|
error(f"LLM chat error: model={resolved_model}, elapsed={elapsed:.2f}s, error={result['error'].get('message')}")
|
||||||
|
else:
|
||||||
|
usage = result.get('usage', {})
|
||||||
|
info(f"LLM chat done: model={resolved_model}, elapsed={elapsed:.2f}s, prompt_tokens={usage.get('prompt_tokens')}, completion_tokens={usage.get('completion_tokens')}")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def llm_chat_stream(
|
||||||
|
messages: List[Dict[str, str]],
|
||||||
|
model: str = None,
|
||||||
|
temperature: float = None,
|
||||||
|
max_tokens: int = None,
|
||||||
|
top_p: float = None,
|
||||||
|
**extra,
|
||||||
|
) -> AsyncGenerator[Dict[str, Any], None]:
|
||||||
|
"""
|
||||||
|
Stream LLM chat response.
|
||||||
|
|
||||||
|
Yields dicts with {delta: str, finish_reason: str|None, raw: dict}.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
async for chunk in llm_chat_stream(messages=[{"role": "user", "content": "Hello"}]):
|
||||||
|
print(chunk['delta'], end='', flush=True)
|
||||||
|
"""
|
||||||
|
config = await _get_llm_config()
|
||||||
|
provider = _resolve_provider(config)
|
||||||
|
|
||||||
|
resolved_model = model or provider['model']
|
||||||
|
resolved_temp = temperature if temperature is not None else config.get('default_temperature', 0.7)
|
||||||
|
resolved_top_p = top_p if top_p is not None else config.get('top_p', 1.0)
|
||||||
|
|
||||||
|
info(f"LLM chat stream: model={resolved_model}, temp={resolved_temp}, messages={len(messages)}")
|
||||||
|
|
||||||
|
async for chunk in _stream_chat_completions(
|
||||||
|
service_url=provider['service_url'],
|
||||||
|
api_key=provider['api_key'],
|
||||||
|
model=resolved_model,
|
||||||
|
messages=messages,
|
||||||
|
temperature=resolved_temp,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
top_p=resolved_top_p,
|
||||||
|
extra=extra if extra else None,
|
||||||
|
):
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
|
||||||
|
async def llm_list_models() -> Dict[str, Any]:
|
||||||
|
"""List models available from the configured LLM provider."""
|
||||||
|
config = await _get_llm_config()
|
||||||
|
provider = _resolve_provider(config)
|
||||||
|
|
||||||
|
if not provider['service_url'] or not provider['api_key']:
|
||||||
|
return {'error': 'LLM not configured'}
|
||||||
|
|
||||||
|
# Try to call /v1/models endpoint
|
||||||
|
url = f"{provider['service_url']}/models"
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': f'Bearer {provider["api_key"]}',
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with ClientSession() as session:
|
||||||
|
async with session.get(url, headers=headers,
|
||||||
|
timeout=ClientTimeout(total=30)) as resp:
|
||||||
|
if resp.status == 200:
|
||||||
|
return await resp.json()
|
||||||
|
else:
|
||||||
|
return {'error': f'HTTP {resp.status}'}
|
||||||
|
except Exception as e:
|
||||||
|
return {'error': str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
async def llm_simple(prompt: str, system: str = None, **kwargs) -> str:
|
||||||
|
"""
|
||||||
|
Simplified LLM call: returns just the response text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: User message text
|
||||||
|
system: Optional system prompt
|
||||||
|
**kwargs: Passed to llm_chat
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Response content string, or error message.
|
||||||
|
"""
|
||||||
|
messages = []
|
||||||
|
if system:
|
||||||
|
messages.append({'role': 'system', 'content': system})
|
||||||
|
messages.append({'role': 'user', 'content': prompt})
|
||||||
|
|
||||||
|
result = await llm_chat(messages=messages, **kwargs)
|
||||||
|
|
||||||
|
if 'error' in result:
|
||||||
|
return f"Error: {result['error'].get('message', 'Unknown error')}"
|
||||||
|
|
||||||
|
choices = result.get('choices', [])
|
||||||
|
if choices:
|
||||||
|
return choices[0].get('message', {}).get('content', '')
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
|
async def llm_get_config() -> Dict[str, Any]:
|
||||||
|
"""Get current LLM client configuration (with api_key masked)."""
|
||||||
|
config = await _get_llm_config()
|
||||||
|
provider = _resolve_provider(config)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'provider': provider['provider'],
|
||||||
|
'service_url': provider['service_url'],
|
||||||
|
'api_key': '***' + provider['api_key'][-4:] if provider['api_key'] else '(not set)',
|
||||||
|
'default_model': provider['model'],
|
||||||
|
'default_temperature': config.get('default_temperature', 0.7),
|
||||||
|
}
|
||||||
@ -9,7 +9,7 @@
|
|||||||
"updated_at": "2026-04-15 21:06:00"
|
"updated_at": "2026-04-15 21:06:00"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "default_memory_notes_1",
|
"id": "default_memory_notes_1",
|
||||||
"user_id": "user_1",
|
"user_id": "user_1",
|
||||||
"target": "memory",
|
"target": "memory",
|
||||||
"content": "Default memory notes for Hermes Agent module - User 1",
|
"content": "Default memory notes for Hermes Agent module - User 1",
|
||||||
@ -25,7 +25,7 @@
|
|||||||
"updated_at": "2026-04-15 21:06:00"
|
"updated_at": "2026-04-15 21:06:00"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "default_memory_notes_2",
|
"id": "default_memory_notes_2",
|
||||||
"user_id": "user_2",
|
"user_id": "user_2",
|
||||||
"target": "memory",
|
"target": "memory",
|
||||||
"content": "Default memory notes for Hermes Agent module - User 2",
|
"content": "Default memory notes for Hermes Agent module - User 2",
|
||||||
@ -70,7 +70,15 @@
|
|||||||
"auto_cleanup_enabled": "1",
|
"auto_cleanup_enabled": "1",
|
||||||
"min_retention_days": 30,
|
"min_retention_days": 30,
|
||||||
"created_at": "2026-04-20 10:48:00",
|
"created_at": "2026-04-20 10:48:00",
|
||||||
"updated_at": "2026-04-20 10:48:00"
|
"updated_at": "2026-04-20 10:48:00",
|
||||||
|
"llm_provider": "dashscope",
|
||||||
|
"llm_service_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||||
|
"llm_api_key": "",
|
||||||
|
"available_models": "[\"qwen3-max\", \"qwen-plus\", \"qwen-turbo\"]",
|
||||||
|
"default_model": "qwen-plus",
|
||||||
|
"default_temperature": 0.7,
|
||||||
|
"top_p": 1.0,
|
||||||
|
"enable_streaming": "1"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "default_agent_config_user_2",
|
"id": "default_agent_config_user_2",
|
||||||
@ -84,7 +92,15 @@
|
|||||||
"auto_cleanup_enabled": "1",
|
"auto_cleanup_enabled": "1",
|
||||||
"min_retention_days": 30,
|
"min_retention_days": 30,
|
||||||
"created_at": "2026-04-20 10:48:00",
|
"created_at": "2026-04-20 10:48:00",
|
||||||
"updated_at": "2026-04-20 10:48:00"
|
"updated_at": "2026-04-20 10:48:00",
|
||||||
|
"llm_provider": "dashscope",
|
||||||
|
"llm_service_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||||
|
"llm_api_key": "",
|
||||||
|
"available_models": "[\"qwen3-max\", \"qwen-plus\", \"qwen-turbo\"]",
|
||||||
|
"default_model": "qwen-plus",
|
||||||
|
"default_temperature": 0.7,
|
||||||
|
"top_p": 1.0,
|
||||||
|
"enable_streaming": "1"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@ -21,6 +21,16 @@
|
|||||||
{"value": "1", "text": "Enabled"},
|
{"value": "1", "text": "Enabled"},
|
||||||
{"value": "0", "text": "Disabled"}
|
{"value": "0", "text": "Disabled"}
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
"llm_provider": {
|
||||||
|
"uitype": "code",
|
||||||
|
"data": [
|
||||||
|
{"value": "dashscope", "text": "阿里云 DashScope"},
|
||||||
|
{"value": "openai", "text": "OpenAI"},
|
||||||
|
{"value": "deepseek", "text": "DeepSeek"},
|
||||||
|
{"value": "siliconflow", "text": "SiliconFlow"},
|
||||||
|
{"value": "", "text": "自定义 (Custom URL)"}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
@ -97,6 +97,15 @@
|
|||||||
"nullable": "no",
|
"nullable": "no",
|
||||||
"default": "0.7"
|
"default": "0.7"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "top_p",
|
||||||
|
"title": "Default top_p for LLM calls",
|
||||||
|
"type": "float",
|
||||||
|
"length": 5,
|
||||||
|
"dec": 2,
|
||||||
|
"nullable": "no",
|
||||||
|
"default": "1.00"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "enable_streaming",
|
"name": "enable_streaming",
|
||||||
"title": "Enable streaming response for LLM calls",
|
"title": "Enable streaming response for LLM calls",
|
||||||
@ -105,6 +114,15 @@
|
|||||||
"nullable": "no",
|
"nullable": "no",
|
||||||
"default": "1"
|
"default": "1"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "llm_provider",
|
||||||
|
"title": "LLM provider preset name",
|
||||||
|
"type": "str",
|
||||||
|
"length": 32,
|
||||||
|
"nullable": "yes",
|
||||||
|
"default": "dashscope",
|
||||||
|
"comments": "Provider preset: openai, dashscope, deepseek, siliconflow, or custom (empty)"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "llm_service_url",
|
"name": "llm_service_url",
|
||||||
"title": "LLM service base URL (OpenAI-compatible endpoint)",
|
"title": "LLM service base URL (OpenAI-compatible endpoint)",
|
||||||
|
|||||||
@ -8,11 +8,8 @@ version = "1.0.0"
|
|||||||
description = "Hermes Agent module - multi-user AI agent with memory, skills, workflows, and remote skill deployment"
|
description = "Hermes Agent module - multi-user AI agent with memory, skills, workflows, and remote skill deployment"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahserver",
|
|
||||||
"sqlor",
|
"sqlor",
|
||||||
"apppublic",
|
"bricks_for_python",
|
||||||
"appbase",
|
|
||||||
"rbac",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
|||||||
@ -1,46 +0,0 @@
|
|||||||
"""
|
|
||||||
OpenAI-compatible /v1/chat/completions endpoint
|
|
||||||
Accepts POST with JSON body matching OpenAI API format.
|
|
||||||
"""
|
|
||||||
import json
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
# Read request body
|
|
||||||
body = {}
|
|
||||||
try:
|
|
||||||
raw_body = await request.read()
|
|
||||||
if raw_body:
|
|
||||||
body = json.loads(raw_body)
|
|
||||||
except Exception as e:
|
|
||||||
result = {
|
|
||||||
'error': {
|
|
||||||
'message': f'Invalid JSON body: {str(e)}',
|
|
||||||
'type': 'invalid_request_error',
|
|
||||||
'code': 400,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return json.dumps(result, ensure_ascii=False)
|
|
||||||
|
|
||||||
# Pass the request object for streaming support
|
|
||||||
body['_request'] = request
|
|
||||||
|
|
||||||
# Call the LLM API handler
|
|
||||||
result = await harnessed_llm_chat_completions(body)
|
|
||||||
|
|
||||||
# Handle streaming response (StreamResponse)
|
|
||||||
from aiohttp.web_response import StreamResponse
|
|
||||||
if isinstance(result, StreamResponse):
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Handle error response
|
|
||||||
if 'error' in result:
|
|
||||||
status_code = result.get('error', {}).get('code', 500)
|
|
||||||
resp = web.Response(
|
|
||||||
status=status_code,
|
|
||||||
body=json.dumps(result, ensure_ascii=False),
|
|
||||||
content_type='application/json'
|
|
||||||
)
|
|
||||||
return resp
|
|
||||||
|
|
||||||
# Return successful response
|
|
||||||
return json.dumps(result, ensure_ascii=False)
|
|
||||||
@ -1,41 +0,0 @@
|
|||||||
"""
|
|
||||||
OpenAI-compatible /v1/completions endpoint (legacy)
|
|
||||||
Accepts POST with JSON body matching OpenAI completions API format.
|
|
||||||
"""
|
|
||||||
import json
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
# Read request body
|
|
||||||
body = {}
|
|
||||||
try:
|
|
||||||
raw_body = await request.read()
|
|
||||||
if raw_body:
|
|
||||||
body = json.loads(raw_body)
|
|
||||||
except Exception as e:
|
|
||||||
result = {
|
|
||||||
'error': {
|
|
||||||
'message': f'Invalid JSON body: {str(e)}',
|
|
||||||
'type': 'invalid_request_error',
|
|
||||||
'code': 400,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return json.dumps(result, ensure_ascii=False)
|
|
||||||
|
|
||||||
body['_request'] = request
|
|
||||||
|
|
||||||
result = await harnessed_llm_completions(body)
|
|
||||||
|
|
||||||
from aiohttp.web_response import StreamResponse
|
|
||||||
if isinstance(result, StreamResponse):
|
|
||||||
return result
|
|
||||||
|
|
||||||
if 'error' in result:
|
|
||||||
status_code = result.get('error', {}).get('code', 500)
|
|
||||||
resp = web.Response(
|
|
||||||
status=status_code,
|
|
||||||
body=json.dumps(result, ensure_ascii=False),
|
|
||||||
content_type='application/json'
|
|
||||||
)
|
|
||||||
return resp
|
|
||||||
|
|
||||||
return json.dumps(result, ensure_ascii=False)
|
|
||||||
@ -1,9 +0,0 @@
|
|||||||
"""
|
|
||||||
OpenAI-compatible /v1/models endpoint
|
|
||||||
Returns list of available models.
|
|
||||||
"""
|
|
||||||
import json
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
result = await harnessed_llm_models()
|
|
||||||
return json.dumps(result, ensure_ascii=False)
|
|
||||||
Loading…
x
Reference in New Issue
Block a user