llmage/scripts/migrate_llm_api_map.py

"""
Migration script: Generate llm_api_map records from existing llm table data.

This script reads existing llm records and produces SQL INSERT statements
for the new llm_api_map table. It does NOT directly modify the database.

Usage:
    python migrate_llm_api_map.py [--db-config CONFIG_PATH] [--output OUTPUT_FILE]

The script outputs INSERT SQL statements that can be reviewed and executed manually.
"""
import sys
import json
import argparse
from appPublic.uniqueID import getID


def generate_migration_sql(llm_records, catalog_rel_records=None):
    """
    Generate INSERT statements for llm_api_map from existing llm data.

    For each llm record:
    - If catalog info provided: create one llm_api_map per (llmid, llmcatelogid)
    - If no catalog info: use llm's llmcatelogid field (legacy)

    NOTE: llm_catalog_rel has been deprecated. Catalog relationship is now
    maintained directly in llm_api_map table.
    """
    inserts = []

    # Build catalog_rel lookup: llmid -> [llmcatelogid, ...]
    catelog_map = {}
    if catalog_rel_records:
        for rel in catalog_rel_records:
            llmid = rel.get('llmid')
            catelogid = rel.get('llmcatelogid')
            if llmid and catelogid:
                catelog_map.setdefault(llmid, []).append(catelogid)

    for llm in llm_records:
        llmid = llm.get('id')
        if not llmid:
            continue

        apiname = llm.get('apiname', '')
        query_apiname = llm.get('query_apiname', '')
        query_period = llm.get('query_period', '')
        ppid = llm.get('ppid', '')
        upappid = llm.get('upappid', '')

        # Get catalog IDs for this llm
        catelog_ids = catelog_map.get(llmid)
        if not catelog_ids:
            # Fallback: use a default or skip
            # In practice, every llm should have at least one catalog_rel entry
            # If not, we can try to infer from the model type
            catelog_ids = [llm.get('llmcatelogid', '')]
            if not catelog_ids[0]:
                print(f"WARNING: llm {llmid} has no catalog_rel entry, skipping",
                      file=sys.stderr)
                continue

        for catelogid in catelog_ids:
            map_id = getID()

            # Build VALUES
            values = {
                'id': f"'{map_id}'",
                'llmid': f"'{llmid}'",
                'llmcatelogid': f"'{catelogid}'",
                'apiname': f"'{apiname}'",
            }

            if query_apiname:
                values['query_apiname'] = f"'{query_apiname}'"
            else:
                values['query_apiname'] = 'NULL'

            if query_period is not None and query_period != '':
                values['query_period'] = str(int(query_period))
            else:
                values['query_period'] = 'NULL'

            if ppid:
                values['ppid'] = f"'{ppid}'"
            else:
                values['ppid'] = 'NULL'

            cols = ', '.join(values.keys())
            vals = ', '.join(values.values())
            sql = f"INSERT INTO llm_api_map ({cols}) VALUES ({vals});"
            inserts.append(sql)

    return inserts


def main():
    parser = argparse.ArgumentParser(
        description='Generate llm_api_map migration SQL from existing llm data')
    parser.add_argument('--input', '-i',
                        help='Input JSON file with llm records (for offline mode)')
    parser.add_argument('--catalog-rel', '-c',
                        help='Input JSON file with catalog records (deprecated, use llm_api_map instead)')
    parser.add_argument('--output', '-o', default='-',
                        help='Output file for SQL statements (default: stdout)')
    parser.add_argument('--dry-run', action='store_true',
                        help='Only show count of generated statements')
    args = parser.parse_args()

    # Load llm records from JSON input (offline mode)
    # In production, this would connect to the database
    if args.input:
        with open(args.input, 'r', encoding='utf-8') as f:
            llm_records = json.load(f)
    else:
        print("No --input provided. Use --input to provide llm records JSON.",
              file=sys.stderr)
        print("Example: python migrate_llm_api_map.py -i llm_dump.json",
              file=sys.stderr)
        sys.exit(1)

    catalog_rel_records = None
    if args.catalog_rel:
        with open(args.catalog_rel, 'r', encoding='utf-8') as f:
            catalog_rel_records = json.load(f)

    inserts = generate_migration_sql(llm_records, catalog_rel_records)

    if args.dry_run:
        print(f"Would generate {len(inserts)} INSERT statements for llm_api_map")
        return

    # Output
    header_lines = [
        "-- Migration: Create llm_api_map records from existing llm data",
        "-- Generated by migrate_llm_api_map.py",
        "-- Review these statements before executing!",
        "",
        "-- Step 1: Create the llm_api_map table (if not exists)",
        """CREATE TABLE llm_api_map (
    id VARCHAR(21) NOT NULL PRIMARY KEY,
    llmid VARCHAR(21) NOT NULL,
    llmcatelogid VARCHAR(32) NOT NULL,
    apiname VARCHAR(100) NOT NULL,
    query_apiname VARCHAR(100),
    query_period INT,
    ppid VARCHAR(21)
);""",
        "",
        "CREATE INDEX idx_llm_api_llm ON llm_api_map (llmid);",
        "CREATE UNIQUE INDEX idx_llm_api_catelog ON llm_api_map (llmid, llmcatelogid);",
        "",
        "-- Step 2: Insert data",
        ""
    ]

    output_text = '\n'.join(header_lines) + '\n'.join(inserts) + '\n'

    if args.output == '-':
        print(output_text)
    else:
        with open(args.output, 'w', encoding='utf-8') as f:
            f.write(output_text)
        print(f"Generated {len(inserts)} INSERT statements -> {args.output}")


if __name__ == '__main__':
    main()