259 lines
7.9 KiB
Python
259 lines
7.9 KiB
Python
"""File-based storage for pipeline tasks, artifacts, and versions."""
|
|
|
|
import json
|
|
import os
|
|
import shutil
|
|
import uuid
|
|
from datetime import datetime
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from .state import (
|
|
STATE_PENDING, STATE_RUNNING, STATE_COMPLETED, STATE_FAILED,
|
|
PIPELINE_SUBMITTED, PIPELINE_RUNNING,
|
|
build_dependency_map,
|
|
)
|
|
|
|
# Base data directory
|
|
DATA_DIR = os.environ.get("PIPELINE_DATA_DIR", os.path.expanduser("~/pipeline_data"))
|
|
|
|
|
|
def _ensure_dir(path: str):
|
|
os.makedirs(path, exist_ok=True)
|
|
|
|
|
|
def _pipeline_dir(pipeline_id: str) -> str:
|
|
return os.path.join(DATA_DIR, pipeline_id)
|
|
|
|
|
|
def _version_dir(pipeline_id: str, version: int) -> str:
|
|
return os.path.join(_pipeline_dir(pipeline_id), f"v{version}")
|
|
|
|
|
|
def _manifest_path(pipeline_id: str) -> str:
|
|
return os.path.join(_pipeline_dir(pipeline_id), "manifest.json")
|
|
|
|
|
|
def _read_json(path: str) -> dict:
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
|
|
|
|
def _write_json(path: str, data: dict):
|
|
_ensure_dir(os.path.dirname(path))
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
|
|
|
def generate_pipeline_id() -> str:
|
|
return f"ktv_{uuid.uuid4().hex[:12]}"
|
|
|
|
|
|
def create_pipeline(user_id: str, mode: str, title: str, params: dict) -> dict:
|
|
"""Create a new pipeline task. Returns the manifest."""
|
|
pipeline_id = generate_pipeline_id()
|
|
dep_map = build_dependency_map(mode)
|
|
now = datetime.now().isoformat()
|
|
|
|
# Build steps
|
|
steps = {}
|
|
for name, info in dep_map.items():
|
|
steps[name] = {
|
|
"order": info["order"],
|
|
"display_name": info["display_name"],
|
|
"deps": info["deps"],
|
|
"dependents": info["dependents"],
|
|
"state": STATE_PENDING,
|
|
"version": 1,
|
|
"started_at": None,
|
|
"completed_at": None,
|
|
"error": None,
|
|
}
|
|
|
|
manifest = {
|
|
"pipeline_id": pipeline_id,
|
|
"user_id": user_id,
|
|
"mode": mode,
|
|
"title": title,
|
|
"params": params,
|
|
"created_at": now,
|
|
"updated_at": now,
|
|
"current_version": 1,
|
|
"state": PIPELINE_SUBMITTED,
|
|
"steps": steps,
|
|
"versions": {
|
|
"1": {
|
|
"created_at": now,
|
|
"changes": "初始版本",
|
|
}
|
|
},
|
|
}
|
|
|
|
# Write to disk
|
|
pdir = _pipeline_dir(pipeline_id)
|
|
_ensure_dir(pdir)
|
|
_ensure_dir(_version_dir(pipeline_id, 1))
|
|
_write_json(_manifest_path(pipeline_id), manifest)
|
|
|
|
# Store initial params as artifact
|
|
save_artifact(pipeline_id, 1, "_params", "input", params)
|
|
|
|
# Index by user
|
|
_add_to_user_index(user_id, pipeline_id)
|
|
|
|
return manifest
|
|
|
|
|
|
def get_manifest(pipeline_id: str) -> Optional[dict]:
|
|
"""Read pipeline manifest."""
|
|
path = _manifest_path(pipeline_id)
|
|
if not os.path.exists(path):
|
|
return None
|
|
return _read_json(path)
|
|
|
|
|
|
def save_manifest(pipeline_id: str, manifest: dict):
|
|
"""Save pipeline manifest."""
|
|
manifest["updated_at"] = datetime.now().isoformat()
|
|
_write_json(_manifest_path(pipeline_id), manifest)
|
|
|
|
|
|
def save_artifact(pipeline_id: str, version: int, step: str, io_type: str, data: Any):
|
|
"""Save artifact data for a step.
|
|
io_type: 'input' or 'output'
|
|
"""
|
|
vdir = _version_dir(pipeline_id, version)
|
|
_ensure_dir(vdir)
|
|
path = os.path.join(vdir, f"{step}.{io_type}.json")
|
|
_write_json(path, {"step": step, "version": version, "type": io_type, "data": data,
|
|
"saved_at": datetime.now().isoformat()})
|
|
|
|
|
|
def get_artifact(pipeline_id: str, version: int, step: str, io_type: str) -> Optional[dict]:
|
|
"""Read artifact data for a step."""
|
|
vdir = _version_dir(pipeline_id, version)
|
|
path = os.path.join(vdir, f"{step}.{io_type}.json")
|
|
if not os.path.exists(path):
|
|
# Try previous versions
|
|
for v in range(version, 0, -1):
|
|
path = os.path.join(_version_dir(pipeline_id, v), f"{step}.{io_type}.json")
|
|
if os.path.exists(path):
|
|
return _read_json(path)
|
|
return None
|
|
return _read_json(path)
|
|
|
|
|
|
def get_all_artifacts(pipeline_id: str, version: int) -> Dict[str, dict]:
|
|
"""Get all artifacts for a specific version."""
|
|
vdir = _version_dir(pipeline_id, version)
|
|
if not os.path.exists(vdir):
|
|
return {}
|
|
artifacts = {}
|
|
for fname in os.listdir(vdir):
|
|
if fname.endswith(".json"):
|
|
fpath = os.path.join(vdir, fname)
|
|
try:
|
|
data = _read_json(fpath)
|
|
key = fname.replace(".json", "")
|
|
artifacts[key] = data
|
|
except Exception:
|
|
pass
|
|
return artifacts
|
|
|
|
|
|
def create_new_version(pipeline_id: str, changes: str) -> int:
|
|
"""Create a new version directory. Returns new version number."""
|
|
manifest = get_manifest(pipeline_id)
|
|
if not manifest:
|
|
raise ValueError(f"Pipeline not found: {pipeline_id}")
|
|
|
|
new_version = manifest["current_version"] + 1
|
|
manifest["current_version"] = new_version
|
|
manifest["versions"][str(new_version)] = {
|
|
"created_at": datetime.now().isoformat(),
|
|
"changes": changes,
|
|
}
|
|
|
|
# Copy previous version artifacts to new version (hard links)
|
|
prev_vdir = _version_dir(pipeline_id, new_version - 1)
|
|
new_vdir = _version_dir(pipeline_id, new_version)
|
|
_ensure_dir(new_vdir)
|
|
|
|
if os.path.exists(prev_vdir):
|
|
for fname in os.listdir(prev_vdir):
|
|
src = os.path.join(prev_vdir, fname)
|
|
dst = os.path.join(new_vdir, fname)
|
|
if os.path.isfile(src) and not os.path.exists(dst):
|
|
try:
|
|
os.link(src, dst) # hard link
|
|
except OSError:
|
|
shutil.copy2(src, dst)
|
|
|
|
save_manifest(pipeline_id, manifest)
|
|
return new_version
|
|
|
|
|
|
def reset_steps(pipeline_id: str, step_names: List[str]):
|
|
"""Reset specified steps to pending state."""
|
|
manifest = get_manifest(pipeline_id)
|
|
if not manifest:
|
|
return
|
|
for name in step_names:
|
|
if name in manifest["steps"]:
|
|
manifest["steps"][name]["state"] = STATE_PENDING
|
|
manifest["steps"][name]["error"] = None
|
|
manifest["steps"][name]["started_at"] = None
|
|
manifest["steps"][name]["completed_at"] = None
|
|
save_manifest(pipeline_id, manifest)
|
|
|
|
|
|
def update_step_state(pipeline_id: str, step: str, state: str, error: str = None):
|
|
"""Update a step's state."""
|
|
manifest = get_manifest(pipeline_id)
|
|
if not manifest or step not in manifest["steps"]:
|
|
return
|
|
now = datetime.now().isoformat()
|
|
manifest["steps"][step]["state"] = state
|
|
if state == STATE_RUNNING:
|
|
manifest["steps"][step]["started_at"] = now
|
|
elif state in (STATE_COMPLETED, STATE_FAILED):
|
|
manifest["steps"][step]["completed_at"] = now
|
|
if error:
|
|
manifest["steps"][step]["error"] = error
|
|
|
|
# Update pipeline state
|
|
all_states = [s["state"] for s in manifest["steps"].values()]
|
|
if all(s == STATE_COMPLETED for s in all_states):
|
|
manifest["state"] = "completed"
|
|
elif any(s == STATE_FAILED for s in all_states):
|
|
manifest["state"] = "failed"
|
|
elif any(s == STATE_RUNNING for s in all_states):
|
|
manifest["state"] = PIPELINE_RUNNING
|
|
|
|
save_manifest(pipeline_id, manifest)
|
|
|
|
|
|
# === User Index ===
|
|
|
|
def _user_index_path(user_id: str) -> str:
|
|
return os.path.join(DATA_DIR, f"_user_{user_id}.json")
|
|
|
|
|
|
def _add_to_user_index(user_id: str, pipeline_id: str):
|
|
path = _user_index_path(user_id)
|
|
if os.path.exists(path):
|
|
data = _read_json(path)
|
|
else:
|
|
data = {"user_id": user_id, "pipelines": []}
|
|
if pipeline_id not in data["pipelines"]:
|
|
data["pipelines"].append(pipeline_id)
|
|
_write_json(path, data)
|
|
|
|
|
|
def get_user_pipelines(user_id: str) -> List[str]:
|
|
"""Get all pipeline IDs for a user."""
|
|
path = _user_index_path(user_id)
|
|
if not os.path.exists(path):
|
|
return []
|
|
data = _read_json(path)
|
|
return data.get("pipelines", [])
|