bugfix
This commit is contained in:
parent
430c57b277
commit
e3d1d04387
77
cluster-config.yaml
Normal file
77
cluster-config.yaml
Normal file
@ -0,0 +1,77 @@
|
||||
# cluster-config.yaml - 修改为你的实际集群配置
|
||||
|
||||
kubernetes:
|
||||
version: "1.29.3"
|
||||
pod_cidr: "10.244.0.0/16"
|
||||
service_cidr: "10.96.0.0/12"
|
||||
cluster_name: "offline-k8s-cluster"
|
||||
|
||||
k8s_version: "1.29.3"
|
||||
containerd_version: "1.7.13"
|
||||
crictl_version: "1.29.0"
|
||||
|
||||
kubevirt_version: "v1.28.0"
|
||||
cdi_version: "v1.65.0"
|
||||
|
||||
nfs_server: "192.168.16.2"
|
||||
nfs_path: "/d/share/101206"
|
||||
|
||||
|
||||
registry: "registry.local:5000" # 可选:若使用私有 registry
|
||||
|
||||
|
||||
control_plane_ip: "192.168.16.5"
|
||||
|
||||
nodes:
|
||||
control_plane:
|
||||
hostname: "k8s-master"
|
||||
ip: "192.168.16.5"
|
||||
|
||||
join:
|
||||
token: "abcdef.0123456789abcdef"
|
||||
hash: "123456abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234"
|
||||
|
||||
# =====================================================
|
||||
# GPU Operator / NVIDIA 配置
|
||||
# =====================================================
|
||||
gpu:
|
||||
driver_version: "535"
|
||||
cuda_version: "12.4"
|
||||
|
||||
# =====================================================
|
||||
# KubeVirt configuration
|
||||
# =====================================================
|
||||
kubevirt:
|
||||
version: "1.3.0"
|
||||
namespace: "kubevirt"
|
||||
|
||||
# =====================================================
|
||||
# CDI (Containerized Data Importer)
|
||||
# =====================================================
|
||||
cdi:
|
||||
version: "1.58.0"
|
||||
namespace: "cdi"
|
||||
|
||||
# =====================================================
|
||||
# GPU Operator
|
||||
# =====================================================
|
||||
gpu_operator:
|
||||
version: "v23.9.2"
|
||||
driver_version: "535"
|
||||
namespace: "gpu-operator"
|
||||
|
||||
# =====================================================
|
||||
# NFS shared storage for VM disks
|
||||
# =====================================================
|
||||
storage:
|
||||
nfs_server: "i192.168.16.2"
|
||||
nfs_path: "/d/share/11157"
|
||||
storage_class_name: "nfs-kubevirt"
|
||||
|
||||
# =====================================================
|
||||
# Offline bundle paths
|
||||
# =====================================================
|
||||
offline_bundle:
|
||||
output_dir: "/opt/k8s-offline"
|
||||
output_file: "k8s-offline.tgz"
|
||||
|
||||
41
control-plane-install.sh.j2
Normal file
41
control-plane-install.sh.j2
Normal file
@ -0,0 +1,41 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
OFFLINE=/opt/k8s-offline
|
||||
|
||||
echo "[1] 解压离线包"
|
||||
mkdir -p $OFFLINE
|
||||
tar xf k8s-offline.tgz -C $OFFLINE
|
||||
|
||||
echo "[2] 安装 kubeadm/kubelet/kubectl"
|
||||
install -m755 $OFFLINE/offline-cache/bin/* /usr/local/bin/
|
||||
|
||||
echo "[3] 初始化控制平面"
|
||||
kubeadm init \
|
||||
--kubernetes-version={{ kubernetes.version }} \
|
||||
--pod-network-cidr={{ kubernetes.pod_cidr }} \
|
||||
--service-cidr={{ kubernetes.service_cidr }} \
|
||||
--upload-certs
|
||||
|
||||
mkdir -p ~/.kube
|
||||
cp /etc/kubernetes/admin.conf ~/.kube/config
|
||||
|
||||
echo "[4] 加载所有离线镜像"
|
||||
for img in $OFFLINE/offline-cache/images/*.tar; do
|
||||
ctr -n=k8s.io images import "$img"
|
||||
done
|
||||
|
||||
echo "[5] 部署 CNI(flannel)"
|
||||
kubectl apply -f https://raw.githubusercontent.com/flannel-io/flannel/master/Documentation/kube-flannel.yml
|
||||
|
||||
echo "[6] 部署 KubeVirt 与 CDI"
|
||||
kubectl apply -f $OFFLINE/offline-cache/manifests/kubevirt-operator.yaml
|
||||
kubectl apply -f $OFFLINE/offline-cache/manifests/kubevirt-cr.yaml
|
||||
kubectl apply -f $OFFLINE/offline-cache/manifests/cdi-operator.yaml
|
||||
kubectl apply -f $OFFLINE/offline-cache/manifests/cdi-cr.yaml
|
||||
|
||||
echo "[7] 部署 NFS-CSI"
|
||||
kubectl apply -f $OFFLINE/offline-cache/manifests/nfs-csi.yaml
|
||||
|
||||
echo "控制平面安装完成。"
|
||||
|
||||
24
cpu-worker-install.sh.j2
Normal file
24
cpu-worker-install.sh.j2
Normal file
@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
OFFLINE=/opt/k8s-offline
|
||||
|
||||
echo "[1] 解压离线包"
|
||||
mkdir -p $OFFLINE
|
||||
tar xf k8s-offline.tgz -C $OFFLINE
|
||||
|
||||
echo "[2] 安装 kubeadm/kubelet/kubectl"
|
||||
install -m755 $OFFLINE/offline-cache/bin/* /usr/local/bin/
|
||||
|
||||
echo "[3] 加载所有离线镜像"
|
||||
for img in $OFFLINE/offline-cache/images/*.tar; do
|
||||
ctr -n=k8s.io images import "$img"
|
||||
done
|
||||
|
||||
echo "[4] 加入集群"
|
||||
kubeadm join {{ nodes.control_plane.ip }}:6443 \
|
||||
--token {{ join.token }} \
|
||||
--discovery-token-ca-cert-hash sha256:{{ join.hash }}
|
||||
|
||||
echo "CPU 工作节点已加入集群"
|
||||
|
||||
50
download_offline_packages.sh.j2
Normal file
50
download_offline_packages.sh.j2
Normal file
@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
# 在下载主机上需要安装docker
|
||||
|
||||
set -e
|
||||
|
||||
curdir=$(pwd)
|
||||
OUT=./k8s-offline.tgz
|
||||
TMP=./offline-cache
|
||||
|
||||
apt install podman-docker
|
||||
mkdir -p $TMP/bin $TMP/manifests $TMP/images $TMP/deps
|
||||
|
||||
echo "[1] 下载 依赖包"
|
||||
cd $TMP/deps
|
||||
apt install --downloadonly nfs-common nfs-utils rpcbind
|
||||
echo "📥 下载 crictl"
|
||||
curl -L https://github.com/kubernetes-sigs/cri-tools/releases/download/${CRICLT_VERSION}/crictl-${CRICLT_VERSION}-linux-${ARCH}.tar.gz | tar xz -C .
|
||||
echo "📥 下载 CNI plugins"
|
||||
curl -L https://github.com/containernetworking/plugins/releases/download/${CNI_VERSION}/cni-plugins-linux-${ARCH}-${CNI_VERSION}.tgz -o cni-plugins.tgz
|
||||
|
||||
echo "📥 下载 containerd"
|
||||
CONTAINERD_URL="https://github.com/containerd/containerd/releases/download/v${CONTAINERD_VERSION}/containerd-${CONTAINERD_VERSION}-linux-${ARCH}.tar.gz"
|
||||
curl -L ${CONTAINERD_URL} -o containerd.tar.gz
|
||||
|
||||
|
||||
echo "[2] 下载 Kubernetes 二进制 {{ kubernetes.version }}"
|
||||
cd $TMP/bin
|
||||
curl -LO https://dl.k8s.io/release/v{{ kubernetes.version }}/bin/linux/amd64/kubeadm
|
||||
curl -LO https://dl.k8s.io/release/v{{ kubernetes.version }}/bin/linux/amd64/kubelet
|
||||
curl -LO https://dl.k8s.io/release/v{{ kubernetes.version }}/bin/linux/amd64/kubectl
|
||||
chmod +x kubeadm kubelet kubectl
|
||||
|
||||
|
||||
cd $curdir
|
||||
echo "[3] 下载镜像(kubeadm config images)"
|
||||
$TMP/bin/kubeadm config images list --kubernetes-version {{ kubernetes.version }} > $TMP/images/images.txt
|
||||
for img in "${images[@]}"; do
|
||||
echo "Pull image: $img"
|
||||
docker pull $img
|
||||
docker save -o $TMP/images/$(echo $img | tr '/:' '_').tar $img
|
||||
done
|
||||
|
||||
echo "[4] 复制 manifests"
|
||||
cp -r ../manifests/* $TMP/manifests/
|
||||
|
||||
echo "[5] 打包离线资源"
|
||||
tar czf $OUT offline-cache
|
||||
|
||||
echo "已生成离线包: $OUT"
|
||||
|
||||
29
gpu-worker-install.sh.j2
Normal file
29
gpu-worker-install.sh.j2
Normal file
@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
OFFLINE=/opt/k8s-offline
|
||||
|
||||
mkdir -p $OFFLINE
|
||||
tar xf k8s-offline.tgz -C $OFFLINE
|
||||
|
||||
echo "[1] 安装 nvidia driver(离线)"
|
||||
bash ./install_nvidia_driver.sh
|
||||
|
||||
echo "[2] 安装 kubeadm/kubelet/kubectl"
|
||||
install -m755 $OFFLINE/offline-cache/bin/* /usr/local/bin/
|
||||
|
||||
echo "[3] 导入镜像"
|
||||
for img in $OFFLINE/offline-cache/images/*.tar; do
|
||||
ctr -n=k8s.io images import "$img"
|
||||
done
|
||||
|
||||
echo "[4] 加入集群"
|
||||
kubeadm join {{ nodes.control_plane.ip }}:6443 \
|
||||
--token {{ join.token }} \
|
||||
--discovery-token-ca-cert-hash sha256:{{ join.hash }}
|
||||
|
||||
echo "[5] 自动部署 GPU Operator"
|
||||
kubectl apply -f $OFFLINE/offline-cache/manifests/gpu-operator.yaml
|
||||
|
||||
echo "GPU 工作节点初始化完成"
|
||||
|
||||
13
install_nvidia_driver.sh.j2
Normal file
13
install_nvidia_driver.sh.j2
Normal file
@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo "安装 NVIDIA 驱动 {{ gpu.driver_version }}(离线方式)"
|
||||
|
||||
bash NVIDIA-Linux-x86_64-{{ gpu.driver_version }}.run --silent
|
||||
|
||||
echo "加载 nvidia 模块"
|
||||
modprobe nvidia
|
||||
modprobe nvidia_uvm
|
||||
|
||||
echo "NVIDIA 驱动安装完成"
|
||||
|
||||
6
manifests/cdi-cr.yaml
Normal file
6
manifests/cdi-cr.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
apiVersion: cdi.kubevirt.io/v1beta1
|
||||
kind: CDI
|
||||
metadata:
|
||||
name: cdi
|
||||
namespace: cdi
|
||||
|
||||
5
manifests/cdi-operator.yaml
Normal file
5
manifests/cdi-operator.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: cdi
|
||||
|
||||
9
manifests/gpu-operator.yaml
Normal file
9
manifests/gpu-operator.yaml
Normal file
@ -0,0 +1,9 @@
|
||||
# GPU Operator 示例,可替换为最新版本
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: gpu-operator
|
||||
namespace: gpu-operator
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
8
manifests/kubevirt-cr.yaml
Normal file
8
manifests/kubevirt-cr.yaml
Normal file
@ -0,0 +1,8 @@
|
||||
apiVersion: kubevirt.io/v1
|
||||
kind: KubeVirt
|
||||
metadata:
|
||||
namespace: kubevirt
|
||||
name: kubevirt
|
||||
spec:
|
||||
workloadUpdateStrategy: LiveMigrate
|
||||
|
||||
9
manifests/kubevirt-operator.yaml
Normal file
9
manifests/kubevirt-operator.yaml
Normal file
@ -0,0 +1,9 @@
|
||||
# 示例(可替换为最新版)
|
||||
apiVersion: operator.kubevirt.io/v1
|
||||
kind: KubeVirt
|
||||
metadata:
|
||||
name: kubevirt
|
||||
namespace: kubevirt
|
||||
spec:
|
||||
imagePullPolicy: IfNotPresent
|
||||
|
||||
7
manifests/nfs-csi.yaml
Normal file
7
manifests/nfs-csi.yaml
Normal file
@ -0,0 +1,7 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: nfs-csi
|
||||
---
|
||||
# 这里填你需要的 nfs-csi manifest...
|
||||
|
||||
20
render.sh
Executable file
20
render.sh
Executable file
@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
# 渲染目录
|
||||
TEMPLATE_DIR="."
|
||||
OUT_DIR="rendered"
|
||||
mkdir -p ${OUT_DIR}
|
||||
|
||||
|
||||
# 渲染每个模板
|
||||
jinja2 ${TEMPLATE_DIR}/download_offline_packages.sh.j2 cluster-config.yaml > ${OUT_DIR}/download_offline_packages.sh
|
||||
jinja2 ${TEMPLATE_DIR}/control-plane-install.sh.j2 cluster-config.yaml > ${OUT_DIR}/control-plane-install.sh
|
||||
jinja2 ${TEMPLATE_DIR}/cpu-worker-install.sh.j2 cluster-config.yaml > ${OUT_DIR}/cpu-worker-install.sh
|
||||
jinja2 ${TEMPLATE_DIR}/gpu-worker-install.sh.j2 cluster-config.yaml > ${OUT_DIR}/gpu-worker-install.sh
|
||||
jinja2 ${TEMPLATE_DIR}/install_nvidia_driver.sh.j2 cluster-config.yaml > ${OUT_DIR}/install_nvidia_driver.sh
|
||||
|
||||
|
||||
chmod +x ${OUT_DIR}/*.sh
|
||||
|
||||
|
||||
echo "渲染完成,生成脚本在 ${OUT_DIR} 目录。"
|
||||
0
rendered/README.md
Normal file
0
rendered/README.md
Normal file
Loading…
x
Reference in New Issue
Block a user