llmage/scripts/migrate_llm_api_map.py
yumoqing d2ffd9c6d0 refactor: introduce llm_api_map table and remove uapiset intermediate layer
- New llm_api_map table: extract ability-specific fields (apiname,
  query_apiname, query_period, ppid) from llm table to support
  one-model-multi-ability without redundancy
- Remove uapiset from llmage JOIN chain: upapp.apisetid now directly
  joins uapi.apisetid
- Updated BufferedLLMs.get_llm() to JOIN llm_api_map for
  query_apiname/query_period/ppid fields
- Updated llmcheck.dspy and list_paging_catelog_llms.dspy to remove
  uapiset references
- Added migration script to generate llm_api_map INSERTs from existing
  llm data
2026-05-20 15:13:07 +08:00

166 lines
5.7 KiB
Python

"""
Migration script: Generate llm_api_map records from existing llm table data.
This script reads existing llm records and produces SQL INSERT statements
for the new llm_api_map table. It does NOT directly modify the database.
Usage:
python migrate_llm_api_map.py [--db-config CONFIG_PATH] [--output OUTPUT_FILE]
The script outputs INSERT SQL statements that can be reviewed and executed manually.
"""
import sys
import json
import argparse
from appPublic.uniqueID import getID
def generate_migration_sql(llm_records, catalog_rel_records=None):
"""
Generate INSERT statements for llm_api_map from existing llm data.
For each llm record:
- If llm_catalog_rel exists: create one llm_api_map per (llmid, llmcatelogid)
- If no catalog_rel: create one llm_api_map with the llm's default catalog
"""
inserts = []
# Build catalog_rel lookup: llmid -> [llmcatelogid, ...]
catelog_map = {}
if catalog_rel_records:
for rel in catalog_rel_records:
llmid = rel.get('llmid')
catelogid = rel.get('llmcatelogid')
if llmid and catelogid:
catelog_map.setdefault(llmid, []).append(catelogid)
for llm in llm_records:
llmid = llm.get('id')
if not llmid:
continue
apiname = llm.get('apiname', '')
query_apiname = llm.get('query_apiname', '')
query_period = llm.get('query_period', '')
ppid = llm.get('ppid', '')
upappid = llm.get('upappid', '')
# Get catalog IDs for this llm
catelog_ids = catelog_map.get(llmid)
if not catelog_ids:
# Fallback: use a default or skip
# In practice, every llm should have at least one catalog_rel entry
# If not, we can try to infer from the model type
catelog_ids = [llm.get('llmcatelogid', '')]
if not catelog_ids[0]:
print(f"WARNING: llm {llmid} has no catalog_rel entry, skipping",
file=sys.stderr)
continue
for catelogid in catelog_ids:
map_id = getID()
# Build VALUES
values = {
'id': f"'{map_id}'",
'llmid': f"'{llmid}'",
'llmcatelogid': f"'{catelogid}'",
'apiname': f"'{apiname}'",
}
if query_apiname:
values['query_apiname'] = f"'{query_apiname}'"
else:
values['query_apiname'] = 'NULL'
if query_period is not None and query_period != '':
values['query_period'] = str(int(query_period))
else:
values['query_period'] = 'NULL'
if ppid:
values['ppid'] = f"'{ppid}'"
else:
values['ppid'] = 'NULL'
cols = ', '.join(values.keys())
vals = ', '.join(values.values())
sql = f"INSERT INTO llm_api_map ({cols}) VALUES ({vals});"
inserts.append(sql)
return inserts
def main():
parser = argparse.ArgumentParser(
description='Generate llm_api_map migration SQL from existing llm data')
parser.add_argument('--input', '-i',
help='Input JSON file with llm records (for offline mode)')
parser.add_argument('--catalog-rel', '-c',
help='Input JSON file with llm_catalog_rel records')
parser.add_argument('--output', '-o', default='-',
help='Output file for SQL statements (default: stdout)')
parser.add_argument('--dry-run', action='store_true',
help='Only show count of generated statements')
args = parser.parse_args()
# Load llm records from JSON input (offline mode)
# In production, this would connect to the database
if args.input:
with open(args.input, 'r', encoding='utf-8') as f:
llm_records = json.load(f)
else:
print("No --input provided. Use --input to provide llm records JSON.",
file=sys.stderr)
print("Example: python migrate_llm_api_map.py -i llm_dump.json",
file=sys.stderr)
sys.exit(1)
catalog_rel_records = None
if args.catalog_rel:
with open(args.catalog_rel, 'r', encoding='utf-8') as f:
catalog_rel_records = json.load(f)
inserts = generate_migration_sql(llm_records, catalog_rel_records)
if args.dry_run:
print(f"Would generate {len(inserts)} INSERT statements for llm_api_map")
return
# Output
header_lines = [
"-- Migration: Create llm_api_map records from existing llm data",
"-- Generated by migrate_llm_api_map.py",
"-- Review these statements before executing!",
"",
"-- Step 1: Create the llm_api_map table (if not exists)",
"""CREATE TABLE llm_api_map (
id VARCHAR(21) NOT NULL PRIMARY KEY,
llmid VARCHAR(21) NOT NULL,
llmcatelogid VARCHAR(32) NOT NULL,
apiname VARCHAR(100) NOT NULL,
query_apiname VARCHAR(100),
query_period INT,
ppid VARCHAR(21)
);""",
"",
"CREATE INDEX idx_llm_api_llm ON llm_api_map (llmid);",
"CREATE UNIQUE INDEX idx_llm_api_catelog ON llm_api_map (llmid, llmcatelogid);",
"",
"-- Step 2: Insert data",
""
]
output_text = '\n'.join(header_lines) + '\n'.join(inserts) + '\n'
if args.output == '-':
print(output_text)
else:
with open(args.output, 'w', encoding='utf-8') as f:
f.write(output_text)
print(f"Generated {len(inserts)} INSERT statements -> {args.output}")
if __name__ == '__main__':
main()