This commit is contained in:
yumoqing 2025-11-21 15:21:45 +08:00
parent 430c57b277
commit e3d1d04387
14 changed files with 298 additions and 0 deletions

77
cluster-config.yaml Normal file
View File

@ -0,0 +1,77 @@
# cluster-config.yaml - 修改为你的实际集群配置
kubernetes:
version: "1.29.3"
pod_cidr: "10.244.0.0/16"
service_cidr: "10.96.0.0/12"
cluster_name: "offline-k8s-cluster"
k8s_version: "1.29.3"
containerd_version: "1.7.13"
crictl_version: "1.29.0"
kubevirt_version: "v1.28.0"
cdi_version: "v1.65.0"
nfs_server: "192.168.16.2"
nfs_path: "/d/share/101206"
registry: "registry.local:5000" # 可选:若使用私有 registry
control_plane_ip: "192.168.16.5"
nodes:
control_plane:
hostname: "k8s-master"
ip: "192.168.16.5"
join:
token: "abcdef.0123456789abcdef"
hash: "123456abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234"
# =====================================================
# GPU Operator / NVIDIA 配置
# =====================================================
gpu:
driver_version: "535"
cuda_version: "12.4"
# =====================================================
# KubeVirt configuration
# =====================================================
kubevirt:
version: "1.3.0"
namespace: "kubevirt"
# =====================================================
# CDI (Containerized Data Importer)
# =====================================================
cdi:
version: "1.58.0"
namespace: "cdi"
# =====================================================
# GPU Operator
# =====================================================
gpu_operator:
version: "v23.9.2"
driver_version: "535"
namespace: "gpu-operator"
# =====================================================
# NFS shared storage for VM disks
# =====================================================
storage:
nfs_server: "i192.168.16.2"
nfs_path: "/d/share/11157"
storage_class_name: "nfs-kubevirt"
# =====================================================
# Offline bundle paths
# =====================================================
offline_bundle:
output_dir: "/opt/k8s-offline"
output_file: "k8s-offline.tgz"

View File

@ -0,0 +1,41 @@
#!/bin/bash
set -e
OFFLINE=/opt/k8s-offline
echo "[1] 解压离线包"
mkdir -p $OFFLINE
tar xf k8s-offline.tgz -C $OFFLINE
echo "[2] 安装 kubeadm/kubelet/kubectl"
install -m755 $OFFLINE/offline-cache/bin/* /usr/local/bin/
echo "[3] 初始化控制平面"
kubeadm init \
--kubernetes-version={{ kubernetes.version }} \
--pod-network-cidr={{ kubernetes.pod_cidr }} \
--service-cidr={{ kubernetes.service_cidr }} \
--upload-certs
mkdir -p ~/.kube
cp /etc/kubernetes/admin.conf ~/.kube/config
echo "[4] 加载所有离线镜像"
for img in $OFFLINE/offline-cache/images/*.tar; do
ctr -n=k8s.io images import "$img"
done
echo "[5] 部署 CNIflannel"
kubectl apply -f https://raw.githubusercontent.com/flannel-io/flannel/master/Documentation/kube-flannel.yml
echo "[6] 部署 KubeVirt 与 CDI"
kubectl apply -f $OFFLINE/offline-cache/manifests/kubevirt-operator.yaml
kubectl apply -f $OFFLINE/offline-cache/manifests/kubevirt-cr.yaml
kubectl apply -f $OFFLINE/offline-cache/manifests/cdi-operator.yaml
kubectl apply -f $OFFLINE/offline-cache/manifests/cdi-cr.yaml
echo "[7] 部署 NFS-CSI"
kubectl apply -f $OFFLINE/offline-cache/manifests/nfs-csi.yaml
echo "控制平面安装完成。"

24
cpu-worker-install.sh.j2 Normal file
View File

@ -0,0 +1,24 @@
#!/bin/bash
set -e
OFFLINE=/opt/k8s-offline
echo "[1] 解压离线包"
mkdir -p $OFFLINE
tar xf k8s-offline.tgz -C $OFFLINE
echo "[2] 安装 kubeadm/kubelet/kubectl"
install -m755 $OFFLINE/offline-cache/bin/* /usr/local/bin/
echo "[3] 加载所有离线镜像"
for img in $OFFLINE/offline-cache/images/*.tar; do
ctr -n=k8s.io images import "$img"
done
echo "[4] 加入集群"
kubeadm join {{ nodes.control_plane.ip }}:6443 \
--token {{ join.token }} \
--discovery-token-ca-cert-hash sha256:{{ join.hash }}
echo "CPU 工作节点已加入集群"

View File

@ -0,0 +1,50 @@
#!/bin/bash
# 在下载主机上需要安装docker
set -e
curdir=$(pwd)
OUT=./k8s-offline.tgz
TMP=./offline-cache
apt install podman-docker
mkdir -p $TMP/bin $TMP/manifests $TMP/images $TMP/deps
echo "[1] 下载 依赖包"
cd $TMP/deps
apt install --downloadonly nfs-common nfs-utils rpcbind
echo "📥 下载 crictl"
curl -L https://github.com/kubernetes-sigs/cri-tools/releases/download/${CRICLT_VERSION}/crictl-${CRICLT_VERSION}-linux-${ARCH}.tar.gz | tar xz -C .
echo "📥 下载 CNI plugins"
curl -L https://github.com/containernetworking/plugins/releases/download/${CNI_VERSION}/cni-plugins-linux-${ARCH}-${CNI_VERSION}.tgz -o cni-plugins.tgz
echo "📥 下载 containerd"
CONTAINERD_URL="https://github.com/containerd/containerd/releases/download/v${CONTAINERD_VERSION}/containerd-${CONTAINERD_VERSION}-linux-${ARCH}.tar.gz"
curl -L ${CONTAINERD_URL} -o containerd.tar.gz
echo "[2] 下载 Kubernetes 二进制 {{ kubernetes.version }}"
cd $TMP/bin
curl -LO https://dl.k8s.io/release/v{{ kubernetes.version }}/bin/linux/amd64/kubeadm
curl -LO https://dl.k8s.io/release/v{{ kubernetes.version }}/bin/linux/amd64/kubelet
curl -LO https://dl.k8s.io/release/v{{ kubernetes.version }}/bin/linux/amd64/kubectl
chmod +x kubeadm kubelet kubectl
cd $curdir
echo "[3] 下载镜像kubeadm config images"
$TMP/bin/kubeadm config images list --kubernetes-version {{ kubernetes.version }} > $TMP/images/images.txt
for img in "${images[@]}"; do
echo "Pull image: $img"
docker pull $img
docker save -o $TMP/images/$(echo $img | tr '/:' '_').tar $img
done
echo "[4] 复制 manifests"
cp -r ../manifests/* $TMP/manifests/
echo "[5] 打包离线资源"
tar czf $OUT offline-cache
echo "已生成离线包: $OUT"

29
gpu-worker-install.sh.j2 Normal file
View File

@ -0,0 +1,29 @@
#!/bin/bash
set -e
OFFLINE=/opt/k8s-offline
mkdir -p $OFFLINE
tar xf k8s-offline.tgz -C $OFFLINE
echo "[1] 安装 nvidia driver离线"
bash ./install_nvidia_driver.sh
echo "[2] 安装 kubeadm/kubelet/kubectl"
install -m755 $OFFLINE/offline-cache/bin/* /usr/local/bin/
echo "[3] 导入镜像"
for img in $OFFLINE/offline-cache/images/*.tar; do
ctr -n=k8s.io images import "$img"
done
echo "[4] 加入集群"
kubeadm join {{ nodes.control_plane.ip }}:6443 \
--token {{ join.token }} \
--discovery-token-ca-cert-hash sha256:{{ join.hash }}
echo "[5] 自动部署 GPU Operator"
kubectl apply -f $OFFLINE/offline-cache/manifests/gpu-operator.yaml
echo "GPU 工作节点初始化完成"

View File

@ -0,0 +1,13 @@
#!/bin/bash
set -e
echo "安装 NVIDIA 驱动 {{ gpu.driver_version }}(离线方式)"
bash NVIDIA-Linux-x86_64-{{ gpu.driver_version }}.run --silent
echo "加载 nvidia 模块"
modprobe nvidia
modprobe nvidia_uvm
echo "NVIDIA 驱动安装完成"

6
manifests/cdi-cr.yaml Normal file
View File

@ -0,0 +1,6 @@
apiVersion: cdi.kubevirt.io/v1beta1
kind: CDI
metadata:
name: cdi
namespace: cdi

View File

@ -0,0 +1,5 @@
apiVersion: v1
kind: Namespace
metadata:
name: cdi

View File

@ -0,0 +1,9 @@
# GPU Operator 示例,可替换为最新版本
apiVersion: apps/v1
kind: Deployment
metadata:
name: gpu-operator
namespace: gpu-operator
spec:
replicas: 1

View File

@ -0,0 +1,8 @@
apiVersion: kubevirt.io/v1
kind: KubeVirt
metadata:
namespace: kubevirt
name: kubevirt
spec:
workloadUpdateStrategy: LiveMigrate

View File

@ -0,0 +1,9 @@
# 示例(可替换为最新版)
apiVersion: operator.kubevirt.io/v1
kind: KubeVirt
metadata:
name: kubevirt
namespace: kubevirt
spec:
imagePullPolicy: IfNotPresent

7
manifests/nfs-csi.yaml Normal file
View File

@ -0,0 +1,7 @@
apiVersion: v1
kind: Namespace
metadata:
name: nfs-csi
---
# 这里填你需要的 nfs-csi manifest...

20
render.sh Executable file
View File

@ -0,0 +1,20 @@
#!/bin/bash
set -e
# 渲染目录
TEMPLATE_DIR="."
OUT_DIR="rendered"
mkdir -p ${OUT_DIR}
# 渲染每个模板
jinja2 ${TEMPLATE_DIR}/download_offline_packages.sh.j2 cluster-config.yaml > ${OUT_DIR}/download_offline_packages.sh
jinja2 ${TEMPLATE_DIR}/control-plane-install.sh.j2 cluster-config.yaml > ${OUT_DIR}/control-plane-install.sh
jinja2 ${TEMPLATE_DIR}/cpu-worker-install.sh.j2 cluster-config.yaml > ${OUT_DIR}/cpu-worker-install.sh
jinja2 ${TEMPLATE_DIR}/gpu-worker-install.sh.j2 cluster-config.yaml > ${OUT_DIR}/gpu-worker-install.sh
jinja2 ${TEMPLATE_DIR}/install_nvidia_driver.sh.j2 cluster-config.yaml > ${OUT_DIR}/install_nvidia_driver.sh
chmod +x ${OUT_DIR}/*.sh
echo "渲染完成,生成脚本在 ${OUT_DIR} 目录。"

0
rendered/README.md Normal file
View File