259 lines
7.9 KiB
Python

"""File-based storage for pipeline tasks, artifacts, and versions."""
import json
import os
import shutil
import uuid
from datetime import datetime
from typing import Any, Dict, List, Optional
from .state import (
STATE_PENDING, STATE_RUNNING, STATE_COMPLETED, STATE_FAILED,
PIPELINE_SUBMITTED, PIPELINE_RUNNING,
build_dependency_map,
)
# Base data directory
DATA_DIR = os.environ.get("PIPELINE_DATA_DIR", os.path.expanduser("~/pipeline_data"))
def _ensure_dir(path: str):
os.makedirs(path, exist_ok=True)
def _pipeline_dir(pipeline_id: str) -> str:
return os.path.join(DATA_DIR, pipeline_id)
def _version_dir(pipeline_id: str, version: int) -> str:
return os.path.join(_pipeline_dir(pipeline_id), f"v{version}")
def _manifest_path(pipeline_id: str) -> str:
return os.path.join(_pipeline_dir(pipeline_id), "manifest.json")
def _read_json(path: str) -> dict:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def _write_json(path: str, data: dict):
_ensure_dir(os.path.dirname(path))
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
def generate_pipeline_id() -> str:
return f"ktv_{uuid.uuid4().hex[:12]}"
def create_pipeline(user_id: str, mode: str, title: str, params: dict) -> dict:
"""Create a new pipeline task. Returns the manifest."""
pipeline_id = generate_pipeline_id()
dep_map = build_dependency_map(mode)
now = datetime.now().isoformat()
# Build steps
steps = {}
for name, info in dep_map.items():
steps[name] = {
"order": info["order"],
"display_name": info["display_name"],
"deps": info["deps"],
"dependents": info["dependents"],
"state": STATE_PENDING,
"version": 1,
"started_at": None,
"completed_at": None,
"error": None,
}
manifest = {
"pipeline_id": pipeline_id,
"user_id": user_id,
"mode": mode,
"title": title,
"params": params,
"created_at": now,
"updated_at": now,
"current_version": 1,
"state": PIPELINE_SUBMITTED,
"steps": steps,
"versions": {
"1": {
"created_at": now,
"changes": "初始版本",
}
},
}
# Write to disk
pdir = _pipeline_dir(pipeline_id)
_ensure_dir(pdir)
_ensure_dir(_version_dir(pipeline_id, 1))
_write_json(_manifest_path(pipeline_id), manifest)
# Store initial params as artifact
save_artifact(pipeline_id, 1, "_params", "input", params)
# Index by user
_add_to_user_index(user_id, pipeline_id)
return manifest
def get_manifest(pipeline_id: str) -> Optional[dict]:
"""Read pipeline manifest."""
path = _manifest_path(pipeline_id)
if not os.path.exists(path):
return None
return _read_json(path)
def save_manifest(pipeline_id: str, manifest: dict):
"""Save pipeline manifest."""
manifest["updated_at"] = datetime.now().isoformat()
_write_json(_manifest_path(pipeline_id), manifest)
def save_artifact(pipeline_id: str, version: int, step: str, io_type: str, data: Any):
"""Save artifact data for a step.
io_type: 'input' or 'output'
"""
vdir = _version_dir(pipeline_id, version)
_ensure_dir(vdir)
path = os.path.join(vdir, f"{step}.{io_type}.json")
_write_json(path, {"step": step, "version": version, "type": io_type, "data": data,
"saved_at": datetime.now().isoformat()})
def get_artifact(pipeline_id: str, version: int, step: str, io_type: str) -> Optional[dict]:
"""Read artifact data for a step."""
vdir = _version_dir(pipeline_id, version)
path = os.path.join(vdir, f"{step}.{io_type}.json")
if not os.path.exists(path):
# Try previous versions
for v in range(version, 0, -1):
path = os.path.join(_version_dir(pipeline_id, v), f"{step}.{io_type}.json")
if os.path.exists(path):
return _read_json(path)
return None
return _read_json(path)
def get_all_artifacts(pipeline_id: str, version: int) -> Dict[str, dict]:
"""Get all artifacts for a specific version."""
vdir = _version_dir(pipeline_id, version)
if not os.path.exists(vdir):
return {}
artifacts = {}
for fname in os.listdir(vdir):
if fname.endswith(".json"):
fpath = os.path.join(vdir, fname)
try:
data = _read_json(fpath)
key = fname.replace(".json", "")
artifacts[key] = data
except Exception:
pass
return artifacts
def create_new_version(pipeline_id: str, changes: str) -> int:
"""Create a new version directory. Returns new version number."""
manifest = get_manifest(pipeline_id)
if not manifest:
raise ValueError(f"Pipeline not found: {pipeline_id}")
new_version = manifest["current_version"] + 1
manifest["current_version"] = new_version
manifest["versions"][str(new_version)] = {
"created_at": datetime.now().isoformat(),
"changes": changes,
}
# Copy previous version artifacts to new version (hard links)
prev_vdir = _version_dir(pipeline_id, new_version - 1)
new_vdir = _version_dir(pipeline_id, new_version)
_ensure_dir(new_vdir)
if os.path.exists(prev_vdir):
for fname in os.listdir(prev_vdir):
src = os.path.join(prev_vdir, fname)
dst = os.path.join(new_vdir, fname)
if os.path.isfile(src) and not os.path.exists(dst):
try:
os.link(src, dst) # hard link
except OSError:
shutil.copy2(src, dst)
save_manifest(pipeline_id, manifest)
return new_version
def reset_steps(pipeline_id: str, step_names: List[str]):
"""Reset specified steps to pending state."""
manifest = get_manifest(pipeline_id)
if not manifest:
return
for name in step_names:
if name in manifest["steps"]:
manifest["steps"][name]["state"] = STATE_PENDING
manifest["steps"][name]["error"] = None
manifest["steps"][name]["started_at"] = None
manifest["steps"][name]["completed_at"] = None
save_manifest(pipeline_id, manifest)
def update_step_state(pipeline_id: str, step: str, state: str, error: str = None):
"""Update a step's state."""
manifest = get_manifest(pipeline_id)
if not manifest or step not in manifest["steps"]:
return
now = datetime.now().isoformat()
manifest["steps"][step]["state"] = state
if state == STATE_RUNNING:
manifest["steps"][step]["started_at"] = now
elif state in (STATE_COMPLETED, STATE_FAILED):
manifest["steps"][step]["completed_at"] = now
if error:
manifest["steps"][step]["error"] = error
# Update pipeline state
all_states = [s["state"] for s in manifest["steps"].values()]
if all(s == STATE_COMPLETED for s in all_states):
manifest["state"] = "completed"
elif any(s == STATE_FAILED for s in all_states):
manifest["state"] = "failed"
elif any(s == STATE_RUNNING for s in all_states):
manifest["state"] = PIPELINE_RUNNING
save_manifest(pipeline_id, manifest)
# === User Index ===
def _user_index_path(user_id: str) -> str:
return os.path.join(DATA_DIR, f"_user_{user_id}.json")
def _add_to_user_index(user_id: str, pipeline_id: str):
path = _user_index_path(user_id)
if os.path.exists(path):
data = _read_json(path)
else:
data = {"user_id": user_id, "pipelines": []}
if pipeline_id not in data["pipelines"]:
data["pipelines"].append(pipeline_id)
_write_json(path, data)
def get_user_pipelines(user_id: str) -> List[str]:
"""Get all pipeline IDs for a user."""
path = _user_index_path(user_id)
if not os.path.exists(path):
return []
data = _read_json(path)
return data.get("pipelines", [])