bugfix
This commit is contained in:
parent
430c57b277
commit
e3d1d04387
77
cluster-config.yaml
Normal file
77
cluster-config.yaml
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
# cluster-config.yaml - 修改为你的实际集群配置
|
||||||
|
|
||||||
|
kubernetes:
|
||||||
|
version: "1.29.3"
|
||||||
|
pod_cidr: "10.244.0.0/16"
|
||||||
|
service_cidr: "10.96.0.0/12"
|
||||||
|
cluster_name: "offline-k8s-cluster"
|
||||||
|
|
||||||
|
k8s_version: "1.29.3"
|
||||||
|
containerd_version: "1.7.13"
|
||||||
|
crictl_version: "1.29.0"
|
||||||
|
|
||||||
|
kubevirt_version: "v1.28.0"
|
||||||
|
cdi_version: "v1.65.0"
|
||||||
|
|
||||||
|
nfs_server: "192.168.16.2"
|
||||||
|
nfs_path: "/d/share/101206"
|
||||||
|
|
||||||
|
|
||||||
|
registry: "registry.local:5000" # 可选:若使用私有 registry
|
||||||
|
|
||||||
|
|
||||||
|
control_plane_ip: "192.168.16.5"
|
||||||
|
|
||||||
|
nodes:
|
||||||
|
control_plane:
|
||||||
|
hostname: "k8s-master"
|
||||||
|
ip: "192.168.16.5"
|
||||||
|
|
||||||
|
join:
|
||||||
|
token: "abcdef.0123456789abcdef"
|
||||||
|
hash: "123456abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234"
|
||||||
|
|
||||||
|
# =====================================================
|
||||||
|
# GPU Operator / NVIDIA 配置
|
||||||
|
# =====================================================
|
||||||
|
gpu:
|
||||||
|
driver_version: "535"
|
||||||
|
cuda_version: "12.4"
|
||||||
|
|
||||||
|
# =====================================================
|
||||||
|
# KubeVirt configuration
|
||||||
|
# =====================================================
|
||||||
|
kubevirt:
|
||||||
|
version: "1.3.0"
|
||||||
|
namespace: "kubevirt"
|
||||||
|
|
||||||
|
# =====================================================
|
||||||
|
# CDI (Containerized Data Importer)
|
||||||
|
# =====================================================
|
||||||
|
cdi:
|
||||||
|
version: "1.58.0"
|
||||||
|
namespace: "cdi"
|
||||||
|
|
||||||
|
# =====================================================
|
||||||
|
# GPU Operator
|
||||||
|
# =====================================================
|
||||||
|
gpu_operator:
|
||||||
|
version: "v23.9.2"
|
||||||
|
driver_version: "535"
|
||||||
|
namespace: "gpu-operator"
|
||||||
|
|
||||||
|
# =====================================================
|
||||||
|
# NFS shared storage for VM disks
|
||||||
|
# =====================================================
|
||||||
|
storage:
|
||||||
|
nfs_server: "i192.168.16.2"
|
||||||
|
nfs_path: "/d/share/11157"
|
||||||
|
storage_class_name: "nfs-kubevirt"
|
||||||
|
|
||||||
|
# =====================================================
|
||||||
|
# Offline bundle paths
|
||||||
|
# =====================================================
|
||||||
|
offline_bundle:
|
||||||
|
output_dir: "/opt/k8s-offline"
|
||||||
|
output_file: "k8s-offline.tgz"
|
||||||
|
|
||||||
41
control-plane-install.sh.j2
Normal file
41
control-plane-install.sh.j2
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
OFFLINE=/opt/k8s-offline
|
||||||
|
|
||||||
|
echo "[1] 解压离线包"
|
||||||
|
mkdir -p $OFFLINE
|
||||||
|
tar xf k8s-offline.tgz -C $OFFLINE
|
||||||
|
|
||||||
|
echo "[2] 安装 kubeadm/kubelet/kubectl"
|
||||||
|
install -m755 $OFFLINE/offline-cache/bin/* /usr/local/bin/
|
||||||
|
|
||||||
|
echo "[3] 初始化控制平面"
|
||||||
|
kubeadm init \
|
||||||
|
--kubernetes-version={{ kubernetes.version }} \
|
||||||
|
--pod-network-cidr={{ kubernetes.pod_cidr }} \
|
||||||
|
--service-cidr={{ kubernetes.service_cidr }} \
|
||||||
|
--upload-certs
|
||||||
|
|
||||||
|
mkdir -p ~/.kube
|
||||||
|
cp /etc/kubernetes/admin.conf ~/.kube/config
|
||||||
|
|
||||||
|
echo "[4] 加载所有离线镜像"
|
||||||
|
for img in $OFFLINE/offline-cache/images/*.tar; do
|
||||||
|
ctr -n=k8s.io images import "$img"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "[5] 部署 CNI(flannel)"
|
||||||
|
kubectl apply -f https://raw.githubusercontent.com/flannel-io/flannel/master/Documentation/kube-flannel.yml
|
||||||
|
|
||||||
|
echo "[6] 部署 KubeVirt 与 CDI"
|
||||||
|
kubectl apply -f $OFFLINE/offline-cache/manifests/kubevirt-operator.yaml
|
||||||
|
kubectl apply -f $OFFLINE/offline-cache/manifests/kubevirt-cr.yaml
|
||||||
|
kubectl apply -f $OFFLINE/offline-cache/manifests/cdi-operator.yaml
|
||||||
|
kubectl apply -f $OFFLINE/offline-cache/manifests/cdi-cr.yaml
|
||||||
|
|
||||||
|
echo "[7] 部署 NFS-CSI"
|
||||||
|
kubectl apply -f $OFFLINE/offline-cache/manifests/nfs-csi.yaml
|
||||||
|
|
||||||
|
echo "控制平面安装完成。"
|
||||||
|
|
||||||
24
cpu-worker-install.sh.j2
Normal file
24
cpu-worker-install.sh.j2
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
OFFLINE=/opt/k8s-offline
|
||||||
|
|
||||||
|
echo "[1] 解压离线包"
|
||||||
|
mkdir -p $OFFLINE
|
||||||
|
tar xf k8s-offline.tgz -C $OFFLINE
|
||||||
|
|
||||||
|
echo "[2] 安装 kubeadm/kubelet/kubectl"
|
||||||
|
install -m755 $OFFLINE/offline-cache/bin/* /usr/local/bin/
|
||||||
|
|
||||||
|
echo "[3] 加载所有离线镜像"
|
||||||
|
for img in $OFFLINE/offline-cache/images/*.tar; do
|
||||||
|
ctr -n=k8s.io images import "$img"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "[4] 加入集群"
|
||||||
|
kubeadm join {{ nodes.control_plane.ip }}:6443 \
|
||||||
|
--token {{ join.token }} \
|
||||||
|
--discovery-token-ca-cert-hash sha256:{{ join.hash }}
|
||||||
|
|
||||||
|
echo "CPU 工作节点已加入集群"
|
||||||
|
|
||||||
50
download_offline_packages.sh.j2
Normal file
50
download_offline_packages.sh.j2
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# 在下载主机上需要安装docker
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
curdir=$(pwd)
|
||||||
|
OUT=./k8s-offline.tgz
|
||||||
|
TMP=./offline-cache
|
||||||
|
|
||||||
|
apt install podman-docker
|
||||||
|
mkdir -p $TMP/bin $TMP/manifests $TMP/images $TMP/deps
|
||||||
|
|
||||||
|
echo "[1] 下载 依赖包"
|
||||||
|
cd $TMP/deps
|
||||||
|
apt install --downloadonly nfs-common nfs-utils rpcbind
|
||||||
|
echo "📥 下载 crictl"
|
||||||
|
curl -L https://github.com/kubernetes-sigs/cri-tools/releases/download/${CRICLT_VERSION}/crictl-${CRICLT_VERSION}-linux-${ARCH}.tar.gz | tar xz -C .
|
||||||
|
echo "📥 下载 CNI plugins"
|
||||||
|
curl -L https://github.com/containernetworking/plugins/releases/download/${CNI_VERSION}/cni-plugins-linux-${ARCH}-${CNI_VERSION}.tgz -o cni-plugins.tgz
|
||||||
|
|
||||||
|
echo "📥 下载 containerd"
|
||||||
|
CONTAINERD_URL="https://github.com/containerd/containerd/releases/download/v${CONTAINERD_VERSION}/containerd-${CONTAINERD_VERSION}-linux-${ARCH}.tar.gz"
|
||||||
|
curl -L ${CONTAINERD_URL} -o containerd.tar.gz
|
||||||
|
|
||||||
|
|
||||||
|
echo "[2] 下载 Kubernetes 二进制 {{ kubernetes.version }}"
|
||||||
|
cd $TMP/bin
|
||||||
|
curl -LO https://dl.k8s.io/release/v{{ kubernetes.version }}/bin/linux/amd64/kubeadm
|
||||||
|
curl -LO https://dl.k8s.io/release/v{{ kubernetes.version }}/bin/linux/amd64/kubelet
|
||||||
|
curl -LO https://dl.k8s.io/release/v{{ kubernetes.version }}/bin/linux/amd64/kubectl
|
||||||
|
chmod +x kubeadm kubelet kubectl
|
||||||
|
|
||||||
|
|
||||||
|
cd $curdir
|
||||||
|
echo "[3] 下载镜像(kubeadm config images)"
|
||||||
|
$TMP/bin/kubeadm config images list --kubernetes-version {{ kubernetes.version }} > $TMP/images/images.txt
|
||||||
|
for img in "${images[@]}"; do
|
||||||
|
echo "Pull image: $img"
|
||||||
|
docker pull $img
|
||||||
|
docker save -o $TMP/images/$(echo $img | tr '/:' '_').tar $img
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "[4] 复制 manifests"
|
||||||
|
cp -r ../manifests/* $TMP/manifests/
|
||||||
|
|
||||||
|
echo "[5] 打包离线资源"
|
||||||
|
tar czf $OUT offline-cache
|
||||||
|
|
||||||
|
echo "已生成离线包: $OUT"
|
||||||
|
|
||||||
29
gpu-worker-install.sh.j2
Normal file
29
gpu-worker-install.sh.j2
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
OFFLINE=/opt/k8s-offline
|
||||||
|
|
||||||
|
mkdir -p $OFFLINE
|
||||||
|
tar xf k8s-offline.tgz -C $OFFLINE
|
||||||
|
|
||||||
|
echo "[1] 安装 nvidia driver(离线)"
|
||||||
|
bash ./install_nvidia_driver.sh
|
||||||
|
|
||||||
|
echo "[2] 安装 kubeadm/kubelet/kubectl"
|
||||||
|
install -m755 $OFFLINE/offline-cache/bin/* /usr/local/bin/
|
||||||
|
|
||||||
|
echo "[3] 导入镜像"
|
||||||
|
for img in $OFFLINE/offline-cache/images/*.tar; do
|
||||||
|
ctr -n=k8s.io images import "$img"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "[4] 加入集群"
|
||||||
|
kubeadm join {{ nodes.control_plane.ip }}:6443 \
|
||||||
|
--token {{ join.token }} \
|
||||||
|
--discovery-token-ca-cert-hash sha256:{{ join.hash }}
|
||||||
|
|
||||||
|
echo "[5] 自动部署 GPU Operator"
|
||||||
|
kubectl apply -f $OFFLINE/offline-cache/manifests/gpu-operator.yaml
|
||||||
|
|
||||||
|
echo "GPU 工作节点初始化完成"
|
||||||
|
|
||||||
13
install_nvidia_driver.sh.j2
Normal file
13
install_nvidia_driver.sh.j2
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
echo "安装 NVIDIA 驱动 {{ gpu.driver_version }}(离线方式)"
|
||||||
|
|
||||||
|
bash NVIDIA-Linux-x86_64-{{ gpu.driver_version }}.run --silent
|
||||||
|
|
||||||
|
echo "加载 nvidia 模块"
|
||||||
|
modprobe nvidia
|
||||||
|
modprobe nvidia_uvm
|
||||||
|
|
||||||
|
echo "NVIDIA 驱动安装完成"
|
||||||
|
|
||||||
6
manifests/cdi-cr.yaml
Normal file
6
manifests/cdi-cr.yaml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
apiVersion: cdi.kubevirt.io/v1beta1
|
||||||
|
kind: CDI
|
||||||
|
metadata:
|
||||||
|
name: cdi
|
||||||
|
namespace: cdi
|
||||||
|
|
||||||
5
manifests/cdi-operator.yaml
Normal file
5
manifests/cdi-operator.yaml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: cdi
|
||||||
|
|
||||||
9
manifests/gpu-operator.yaml
Normal file
9
manifests/gpu-operator.yaml
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# GPU Operator 示例,可替换为最新版本
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: gpu-operator
|
||||||
|
namespace: gpu-operator
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
|
||||||
8
manifests/kubevirt-cr.yaml
Normal file
8
manifests/kubevirt-cr.yaml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
apiVersion: kubevirt.io/v1
|
||||||
|
kind: KubeVirt
|
||||||
|
metadata:
|
||||||
|
namespace: kubevirt
|
||||||
|
name: kubevirt
|
||||||
|
spec:
|
||||||
|
workloadUpdateStrategy: LiveMigrate
|
||||||
|
|
||||||
9
manifests/kubevirt-operator.yaml
Normal file
9
manifests/kubevirt-operator.yaml
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# 示例(可替换为最新版)
|
||||||
|
apiVersion: operator.kubevirt.io/v1
|
||||||
|
kind: KubeVirt
|
||||||
|
metadata:
|
||||||
|
name: kubevirt
|
||||||
|
namespace: kubevirt
|
||||||
|
spec:
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
|
||||||
7
manifests/nfs-csi.yaml
Normal file
7
manifests/nfs-csi.yaml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: nfs-csi
|
||||||
|
---
|
||||||
|
# 这里填你需要的 nfs-csi manifest...
|
||||||
|
|
||||||
20
render.sh
Executable file
20
render.sh
Executable file
@ -0,0 +1,20 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
# 渲染目录
|
||||||
|
TEMPLATE_DIR="."
|
||||||
|
OUT_DIR="rendered"
|
||||||
|
mkdir -p ${OUT_DIR}
|
||||||
|
|
||||||
|
|
||||||
|
# 渲染每个模板
|
||||||
|
jinja2 ${TEMPLATE_DIR}/download_offline_packages.sh.j2 cluster-config.yaml > ${OUT_DIR}/download_offline_packages.sh
|
||||||
|
jinja2 ${TEMPLATE_DIR}/control-plane-install.sh.j2 cluster-config.yaml > ${OUT_DIR}/control-plane-install.sh
|
||||||
|
jinja2 ${TEMPLATE_DIR}/cpu-worker-install.sh.j2 cluster-config.yaml > ${OUT_DIR}/cpu-worker-install.sh
|
||||||
|
jinja2 ${TEMPLATE_DIR}/gpu-worker-install.sh.j2 cluster-config.yaml > ${OUT_DIR}/gpu-worker-install.sh
|
||||||
|
jinja2 ${TEMPLATE_DIR}/install_nvidia_driver.sh.j2 cluster-config.yaml > ${OUT_DIR}/install_nvidia_driver.sh
|
||||||
|
|
||||||
|
|
||||||
|
chmod +x ${OUT_DIR}/*.sh
|
||||||
|
|
||||||
|
|
||||||
|
echo "渲染完成,生成脚本在 ${OUT_DIR} 目录。"
|
||||||
0
rendered/README.md
Normal file
0
rendered/README.md
Normal file
Loading…
x
Reference in New Issue
Block a user