pipeline-sdlc/pipeline_sdlc/handlers/deploy.py

"""
Deployment phase handlers: deploy_env_collect, deploy_test, deploy_test_verify,
deploy_production, deploy_production_verify
"""
import json
import logging
import asyncio
import subprocess

logger = logging.getLogger(__name__)


async def handle_deploy_test(tenant_id, task_id, step_name, input_data, config):
    """Deploy to test environment via SSH."""
    env_output = input_data.get("deploy_env_collect", {}).get("output", {})
    code_output = input_data.get("code_auto_fix", {}).get("output", {})
    if not code_output:
        code_output = input_data.get("code_generate", {}).get("output", {})

    test_env = env_output.get("test_env", {})
    if not test_env:
        raise ValueError("Test environment not configured. Run deploy_env_collect first.")

    host = test_env.get("host")
    port = test_env.get("port", 22)
    user = test_env.get("user")
    ssh_key = test_env.get("ssh_key_path", "")
    deploy_path = test_env.get("deploy_path")
    python_path = test_env.get("python_path", "python3")
    repo_url = config.get("repo_url", "")

    if not all([host, user, deploy_path]):
        raise ValueError(f"Incomplete test env config: host={host}, user={user}, deploy_path={deploy_path}")

    # Build SSH command sequence
    ssh_opts = f"-o StrictHostKeyChecking=no -p {port}"
    if ssh_key:
        ssh_opts += f" -i {ssh_key}"
    ssh_target = f"{user}@{host}"

    commands = [
        f"cd {deploy_path}",
        f"git pull origin main" if repo_url else "echo 'No repo, skipping git pull'",
        f"{python_path} -m pip install . 2>&1 | tail -5",
        "bash build.sh 2>&1 | tail -20",
    ]

    results = []
    for cmd in commands:
        result = await _ssh_exec(ssh_target, ssh_opts, cmd)
        results.append({"command": cmd, "output": result["output"], "exit_code": result["exit_code"]})
        if result["exit_code"] != 0 and "pip install" not in cmd:
            logger.warning(f"Command failed: {cmd} -> exit {result['exit_code']}")

    all_success = all(r["exit_code"] == 0 for r in results if "pip install" not in r.get("command", ""))

    return {
        "env_type": "test",
        "host": host,
        "deploy_path": deploy_path,
        "commands": results,
        "success": all_success,
    }


async def handle_deploy_test_verify(tenant_id, task_id, step_name, input_data, config):
    """Verify test deployment health."""
    deploy_output = input_data.get("deploy_test", {}).get("output", {})
    env_output = input_data.get("deploy_env_collect", {}).get("output", {})
    test_env = env_output.get("test_env", {})

    host = test_env.get("host", "localhost")
    port = config.get("test_app_port", 9090)
    base_url = f"http://{host}:{port}"

    checks = []

    # 1. Health endpoint
    health_result = await _http_check(f"{base_url}/health", "health_check")
    checks.append(health_result)

    # 2. Index page
    index_result = await _http_check(f"{base_url}/index.ui", "index_page")
    checks.append(index_result)

    # 3. API endpoints (from code_generate output)
    code_output = input_data.get("code_auto_fix", {}).get("output", {})
    if not code_output:
        code_output = input_data.get("code_generate", {}).get("output", {})
    api_list = code_output.get("files", [])
    api_endpoints = [f for f in api_list if f.get("path", "").endswith(".dspy")]

    for ep in api_endpoints[:5]:  # Check first 5 API endpoints
        ep_path = ep.get("path", "").replace("wwwroot/", "/")
        result = await _http_check(f"{base_url}{ep_path}", f"api:{ep.get('path', '')}")
        checks.append(result)

    passed = sum(1 for c in checks if c.get("status") in ("ok", "reachable"))
    failed = sum(1 for c in checks if c.get("status") not in ("ok", "reachable"))

    return {
        "env_type": "test",
        "base_url": base_url,
        "total_checks": len(checks),
        "passed": passed,
        "failed": failed,
        "checks": checks,
        "verified": failed == 0,
    }


async def handle_deploy_production(tenant_id, task_id, step_name, input_data, config):
    """Deploy to production environment via SSH."""
    env_output = input_data.get("deploy_env_collect", {}).get("output", {})
    prod_env = env_output.get("production_env", {})

    if not prod_env:
        raise ValueError("Production environment not configured. Run deploy_env_collect first.")

    host = prod_env.get("host")
    port = prod_env.get("port", 22)
    user = prod_env.get("user")
    ssh_key = prod_env.get("ssh_key_path", "")
    deploy_path = prod_env.get("deploy_path")
    python_path = prod_env.get("python_path", "python3")

    if not all([host, user, deploy_path]):
        raise ValueError(f"Incomplete prod env: host={host}, user={user}, deploy_path={deploy_path}")

    ssh_opts = f"-o StrictHostKeyChecking=no -p {port}"
    if ssh_key:
        ssh_opts += f" -i {ssh_key}"
    ssh_target = f"{user}@{host}"

    commands = [
        f"cd {deploy_path}",
        "git pull origin main",
        f"{python_path} -m pip install . 2>&1 | tail -5",
        "bash build.sh 2>&1 | tail -20",
    ]

    # Check if restart is needed
    sudo = "sudo" if prod_env.get("sudo_enabled") == "Y" else ""
    restart_cmd = config.get("restart_command", f"{sudo} systemctl restart app")
    commands.append(restart_cmd)

    results = []
    for cmd in commands:
        result = await _ssh_exec(ssh_target, ssh_opts, cmd)
        results.append({"command": cmd, "output": result["output"], "exit_code": result["exit_code"]})
        if result["exit_code"] != 0 and "pip install" not in cmd and "systemctl" not in cmd:
            raise RuntimeError(f"Production deploy failed at: {cmd} -> exit {result['exit_code']}")

    return {
        "env_type": "production",
        "host": host,
        "deploy_path": deploy_path,
        "commands": results,
        "success": True,
    }


async def handle_deploy_production_verify(tenant_id, task_id, step_name, input_data, config):
    """Verify production deployment health."""
    env_output = input_data.get("deploy_env_collect", {}).get("output", {})
    prod_env = env_output.get("production_env", {})

    host = prod_env.get("host", "localhost")
    port = config.get("production_app_port", 80)
    base_url = f"http://{host}:{port}"

    checks = []

    # Health check
    health_result = await _http_check(f"{base_url}/health", "health_check")
    checks.append(health_result)

    # Index page
    index_result = await _http_check(f"{base_url}/index.ui", "index_page")
    checks.append(index_result)

    # Login page (if exists)
    login_result = await _http_check(f"{base_url}/login.ui", "login_page")
    checks.append(login_result)

    passed = sum(1 for c in checks if c.get("status") in ("ok", "reachable"))
    failed = sum(1 for c in checks if c.get("status") not in ("ok", "reachable"))

    return {
        "env_type": "production",
        "base_url": base_url,
        "total_checks": len(checks),
        "passed": passed,
        "failed": failed,
        "checks": checks,
        "verified": failed == 0,
    }


# --- Helper functions ---

async def _ssh_exec(target, opts, command):
    """Execute command via SSH."""
    full_cmd = f"ssh {opts} {target} '{command}'"
    try:
        proc = await asyncio.create_subprocess_shell(
            full_cmd,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
        )
        stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=120)
        output = stdout.decode("utf-8", errors="replace")
        if stderr:
            err_text = stderr.decode("utf-8", errors="replace")
            if err_text:
                output += f"\nSTDERR: {err_text}"
        return {"output": output[:2000], "exit_code": proc.returncode}
    except asyncio.TimeoutError:
        return {"output": "SSH command timed out (120s)", "exit_code": -1}
    except Exception as e:
        return {"output": str(e), "exit_code": -1}


async def _http_check(url, check_name):
    """Simple HTTP health check."""
    import aiohttp
    try:
        timeout = aiohttp.ClientTimeout(total=10)
        async with aiohttp.ClientSession(timeout=timeout) as session:
            async with session.get(url) as resp:
                return {
                    "check": check_name,
                    "url": url,
                    "status_code": resp.status,
                    "status": "ok" if resp.status < 400 else "fail",
                }
    except Exception as e:
        return {
            "check": check_name,
            "url": url,
            "status": "fail",
            "error": str(e)[:200],
        }