"""File-based storage for pipeline tasks, artifacts, and versions.""" import json import os import shutil import uuid from datetime import datetime from typing import Any, Dict, List, Optional from .state import ( STATE_PENDING, STATE_RUNNING, STATE_COMPLETED, STATE_FAILED, PIPELINE_SUBMITTED, PIPELINE_RUNNING, build_dependency_map, ) # Base data directory DATA_DIR = os.environ.get("PIPELINE_DATA_DIR", os.path.expanduser("~/pipeline_data")) def _ensure_dir(path: str): os.makedirs(path, exist_ok=True) def _pipeline_dir(pipeline_id: str) -> str: return os.path.join(DATA_DIR, pipeline_id) def _version_dir(pipeline_id: str, version: int) -> str: return os.path.join(_pipeline_dir(pipeline_id), f"v{version}") def _manifest_path(pipeline_id: str) -> str: return os.path.join(_pipeline_dir(pipeline_id), "manifest.json") def _read_json(path: str) -> dict: with open(path, "r", encoding="utf-8") as f: return json.load(f) def _write_json(path: str, data: dict): _ensure_dir(os.path.dirname(path)) with open(path, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) def generate_pipeline_id() -> str: return f"ktv_{uuid.uuid4().hex[:12]}" def create_pipeline(user_id: str, mode: str, title: str, params: dict) -> dict: """Create a new pipeline task. Returns the manifest.""" pipeline_id = generate_pipeline_id() dep_map = build_dependency_map(mode) now = datetime.now().isoformat() # Build steps steps = {} for name, info in dep_map.items(): steps[name] = { "order": info["order"], "display_name": info["display_name"], "deps": info["deps"], "dependents": info["dependents"], "state": STATE_PENDING, "version": 1, "started_at": None, "completed_at": None, "error": None, } manifest = { "pipeline_id": pipeline_id, "user_id": user_id, "mode": mode, "title": title, "params": params, "created_at": now, "updated_at": now, "current_version": 1, "state": PIPELINE_SUBMITTED, "steps": steps, "versions": { "1": { "created_at": now, "changes": "初始版本", } }, } # Write to disk pdir = _pipeline_dir(pipeline_id) _ensure_dir(pdir) _ensure_dir(_version_dir(pipeline_id, 1)) _write_json(_manifest_path(pipeline_id), manifest) # Store initial params as artifact save_artifact(pipeline_id, 1, "_params", "input", params) # Index by user _add_to_user_index(user_id, pipeline_id) return manifest def get_manifest(pipeline_id: str) -> Optional[dict]: """Read pipeline manifest.""" path = _manifest_path(pipeline_id) if not os.path.exists(path): return None return _read_json(path) def save_manifest(pipeline_id: str, manifest: dict): """Save pipeline manifest.""" manifest["updated_at"] = datetime.now().isoformat() _write_json(_manifest_path(pipeline_id), manifest) def save_artifact(pipeline_id: str, version: int, step: str, io_type: str, data: Any): """Save artifact data for a step. io_type: 'input' or 'output' """ vdir = _version_dir(pipeline_id, version) _ensure_dir(vdir) path = os.path.join(vdir, f"{step}.{io_type}.json") _write_json(path, {"step": step, "version": version, "type": io_type, "data": data, "saved_at": datetime.now().isoformat()}) def get_artifact(pipeline_id: str, version: int, step: str, io_type: str) -> Optional[dict]: """Read artifact data for a step.""" vdir = _version_dir(pipeline_id, version) path = os.path.join(vdir, f"{step}.{io_type}.json") if not os.path.exists(path): # Try previous versions for v in range(version, 0, -1): path = os.path.join(_version_dir(pipeline_id, v), f"{step}.{io_type}.json") if os.path.exists(path): return _read_json(path) return None return _read_json(path) def get_all_artifacts(pipeline_id: str, version: int) -> Dict[str, dict]: """Get all artifacts for a specific version.""" vdir = _version_dir(pipeline_id, version) if not os.path.exists(vdir): return {} artifacts = {} for fname in os.listdir(vdir): if fname.endswith(".json"): fpath = os.path.join(vdir, fname) try: data = _read_json(fpath) key = fname.replace(".json", "") artifacts[key] = data except Exception: pass return artifacts def create_new_version(pipeline_id: str, changes: str) -> int: """Create a new version directory. Returns new version number.""" manifest = get_manifest(pipeline_id) if not manifest: raise ValueError(f"Pipeline not found: {pipeline_id}") new_version = manifest["current_version"] + 1 manifest["current_version"] = new_version manifest["versions"][str(new_version)] = { "created_at": datetime.now().isoformat(), "changes": changes, } # Copy previous version artifacts to new version (hard links) prev_vdir = _version_dir(pipeline_id, new_version - 1) new_vdir = _version_dir(pipeline_id, new_version) _ensure_dir(new_vdir) if os.path.exists(prev_vdir): for fname in os.listdir(prev_vdir): src = os.path.join(prev_vdir, fname) dst = os.path.join(new_vdir, fname) if os.path.isfile(src) and not os.path.exists(dst): try: os.link(src, dst) # hard link except OSError: shutil.copy2(src, dst) save_manifest(pipeline_id, manifest) return new_version def reset_steps(pipeline_id: str, step_names: List[str]): """Reset specified steps to pending state.""" manifest = get_manifest(pipeline_id) if not manifest: return for name in step_names: if name in manifest["steps"]: manifest["steps"][name]["state"] = STATE_PENDING manifest["steps"][name]["error"] = None manifest["steps"][name]["started_at"] = None manifest["steps"][name]["completed_at"] = None save_manifest(pipeline_id, manifest) def update_step_state(pipeline_id: str, step: str, state: str, error: str = None): """Update a step's state.""" manifest = get_manifest(pipeline_id) if not manifest or step not in manifest["steps"]: return now = datetime.now().isoformat() manifest["steps"][step]["state"] = state if state == STATE_RUNNING: manifest["steps"][step]["started_at"] = now elif state in (STATE_COMPLETED, STATE_FAILED): manifest["steps"][step]["completed_at"] = now if error: manifest["steps"][step]["error"] = error # Update pipeline state all_states = [s["state"] for s in manifest["steps"].values()] if all(s == STATE_COMPLETED for s in all_states): manifest["state"] = "completed" elif any(s == STATE_FAILED for s in all_states): manifest["state"] = "failed" elif any(s == STATE_RUNNING for s in all_states): manifest["state"] = PIPELINE_RUNNING save_manifest(pipeline_id, manifest) # === User Index === def _user_index_path(user_id: str) -> str: return os.path.join(DATA_DIR, f"_user_{user_id}.json") def _add_to_user_index(user_id: str, pipeline_id: str): path = _user_index_path(user_id) if os.path.exists(path): data = _read_json(path) else: data = {"user_id": user_id, "pipelines": []} if pipeline_id not in data["pipelines"]: data["pipelines"].append(pipeline_id) _write_json(path, data) def get_user_pipelines(user_id: str) -> List[str]: """Get all pipeline IDs for a user.""" path = _user_index_path(user_id) if not os.path.exists(path): return [] data = _read_json(path) return data.get("pipelines", [])