This commit is contained in:
yumoqing 2025-11-26 21:34:17 +08:00
parent 99e98e34c8
commit f6916a9a77
3 changed files with 674 additions and 96 deletions

View File

@ -1,25 +1,20 @@
# cluster-config.yaml - 修改为你的实际集群配置
kubernetes:
version: "1.29.3"
version: "1.28.2"
pod_cidr: "10.244.0.0/16"
service_cidr: "10.96.0.0/12"
cluster_name: "offline-k8s-cluster"
k8s_version: "1.29.3"
containerd_version: "1.7.13"
containerd_version: "v1.6.37"
crictl_version: "1.29.0"
nfs_server: "192.168.16.2"
nfs_path: "/d/share/101206"
registry: "registry.local:5000" # 可选:若使用私有 registry
control_plane_ip: "192.168.16.5"
nodes:
control_plane:
hostname: "k8s-master"
@ -29,7 +24,6 @@ join:
token: "abcdef.0123456789abcdef"
hash: "123456abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234"
# =====================================================
# GPU Operator / NVIDIA 配置
# =====================================================
gpu:

View File

@ -14,8 +14,13 @@ NVIDIA_DRIVER_VERSION="535.129.03"
# =========================================
echo ">>> [0/6] 初始化目录..."
mkdir -p $WORKDIR/{bin,debs,images,drivers,charts,manifests,scripts}
PKGS_TO_DOWNLOAD="nfs-common socat conntrack ipset ebtables lvm2 gnupg2 software-properties-common curl ca-certificates apt-transport-https redis-server"
mkdir -p $WORKDIR/{bin,service, debs,images,drivers,charts,manifests,scripts}
echo ">>>[x] 下载containerd.service"
cd $WORKDIR/service
sudo curl -L https://raw.githubusercontent.com/containerd/containerd/main/containerd.service -o containerd.service
PKGS_TO_DOWNLOAD="docker.io nfs-common socat conntrack ipset ebtables lvm2 gnupg2 software-properties-common curl ca-certificates apt-transport-https redis-server"
cd $WORKDIR/debs
sudo apt-get update -q
for pkg in $PKGS_TO_DOWNLOAD; do
@ -61,7 +66,8 @@ if [ ! -f "cni-plugins-linux-amd64-${CNI_VERSION}.tgz" ]; then
echo "Downloading CNI Plugins..."
wget -q https://github.com/containernetworking/plugins/releases/download/${CNI_VERSION}/cni-plugins-linux-amd64-${CNI_VERSION}.tgz
fi
# containerd
curl -L --retry 3 https://github.com/containerd/containerd/releases/download/v1.6.37/containerd-1.6.37-linux-amd64.tar.gz -o containerd-1.6.37-linux-amd64.tar.gz
echo "Binaries ready."
# ================= 2. 容器镜像 =================
@ -79,7 +85,7 @@ IMAGES=(
"registry.k8s.io/kube-scheduler:v${K8S_VERSION}"
"registry.k8s.io/kube-proxy:v${K8S_VERSION}"
"registry.k8s.io/pause:3.9"
"registry.k8s.io/etcd:3.5.12-0"
"registry.k8s.io/etcd:3.5.9-0"
"registry.k8s.io/coredns/coredns:v1.10.1"
"docker.io/calico/cni:${CALICO_VERSION}"
"docker.io/calico/node:${CALICO_VERSION}"

View File

@ -1,107 +1,685 @@
#!/bin/bash
source ./common.sh
set -eo pipefail # 脚本遇到任何错误立即退出,未捕捉的管道错误也退出
echo "[INFO] === 初始化 Master 节点 ==="
# ==============================================================================
# 配置区域
# ==============================================================================
OFFLINE_ASSETS_DIR="/root/k8s-offline-bundle"
cat <<CFG > kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta3
kind: ClusterConfiguration
kubernetesVersion: v{{ cluster.kubernetes_version }}
controlPlaneEndpoint: "{{ cluster.api_server_ip }}:6443"
networking:
podSubnet: "{{ cluster.pod_cidr }}"
serviceSubnet: "{{ cluster.service_cidr }}"
imageRepository: {{ registry.ip }}:{{ registry.port }}
---
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
cgroupDriver: systemd
CFG
K8S_VERSION="v1.28.2"
CALICO_VERSION="v3.26.1"
KUBEVIRT_VERSION="v1.1.0"
NFS_PROVISIONER_VERSION="v4.0.2" # 镜像标签
NFS_CHART_VERSION="4.0.18" # Helm Chart 版本
# 预先检查
kubeadm init phase preflight --config kubeadm-config.yaml --ignore-preflight-errors=all
LOCAL_REGISTRY_IP="192.168.16.5"
LOCAL_REGISTRY_PORT="5000"
LOCAL_REGISTRY="${LOCAL_REGISTRY_IP}:${LOCAL_REGISTRY_PORT}"
# 正式初始化
# 注意:因为我们已经手动导入了镜像,不需要 kubeadm pull
kubeadm init --config kubeadm-config.yaml --upload-certs | tee kubeadm-init.log
K8S_APISERVER_ADVERTISE_ADDRESS="${LOCAL_REGISTRY_IP}"
POD_CIDR="192.168.0.0/16"
SERVICE_CIDR="10.96.0.0/12"
mkdir -p $HOME/.kube
cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
chown $(id -u):$(id -g) $HOME/.kube/config
echo "[INFO] 部署网络插件 (Calico)..."
kubectl apply -f "$BUNDLE_ROOT/manifests/calico.yaml"
NFS_SERVER="192.168.16.2"
NFS_PATH="/d/share/101206"
NFS_STORAGE_CLASS_NAME="nfs-client"
TEMP_DIR="/tmp/k8s-master-setup"
NAMESPACE="default"
LOCAL_REGISTRY="{{ registry.ip }}:{{ registry.port }}"
echo "[INFO] 5. 导入离线镜像..."
if [ -d "$IMAGES_DIR" ]; then
for tarfile in "$IMAGE_DIR"/*.tar; do
[ -e "$tarfile" ] || continue
LOCAL_REGISTRY_IP="192.168.16.5"
LOCAL_REGISTRY_PORT="5000"
LOCAL_REGISTRY_ADDR="${LOCAL_REGISTRY_IP}:${LOCAL_REGISTRY_PORT}"
CONTAINERD_CONFIG="/etc/containerd/config.toml"
CERTS_D_PATH="/etc/containerd/certs.d"
CALICO_YAML_PATH="$OFFLINE_ASSETS_DIR/manifests/calico.yaml" # 请确认这个路径
CALICO_VERSION="v3.26.1"
mkdir -p ${TEMP_DIR}
echo ""
echo ">>> Processing $tarfile"
echo "=================================================="
echo " Kubernetes 控制节点离线安装脚本 "
echo "=================================================="
echo "配置参数:"
echo " K8s 版本: ${K8S_VERSION}"
echo " 本地镜像仓库: ${LOCAL_REGISTRY_ADDR}"
echo " K8s API Server IP: ${K8S_APISERVER_ADVERTISE_ADDRESS}"
echo " Pod CIDR: ${POD_CIDR}"
echo " NFS Server: ${NFS_SERVER}:${NFS_PATH}"
echo "--------------------------------------------------"
# 1⃣ 导入镜像
ctr -n "$NAMESPACE" images import "$tarfile"
# ==============================================================================
# 通用函数 (common.sh 中的内容,为简化,直接内联到这里)
# ==============================================================================
# 2⃣ 获取最新导入镜像(兼容老版本 ctr
ORIGIN_IMG=$(ctr -n "$NAMESPACE" images ls -q | head -n1)
if [[ -z "$ORIGIN_IMG" ]]; then
echo "❌ Failed to detect original image name, skipping..."
continue
fi
echo "Original image: $ORIGIN_IMG"
log_info() {
echo -e "\e[32m[INFO] $(date +'%Y-%m-%d %H:%M:%S') $1\e[0m"
}
# 3⃣ 根据 tar 文件名生成本地 registry 镜像名
# 文件名示例docker.io_calico_cni_v3.26.1.tar
BASENAME=$(basename "$tarfile" .tar)
BASENAME=${BASENAME#*_} # 去掉 registry 前缀: calico_cni_v3.26.1
NAME_TAG=${BASENAME}
NAME=${NAME_TAG%_*} # calico_cni
TAG=${NAME_TAG##*_} # v3.26.1
NEW_IMG="${LOCAL_REGISTRY}/${NAME}:${TAG}"
log_error() {
echo -e "\e[31m[ERROR] $(date +'%Y-%m-%d %H:%M:%S') $1\e[0m" >&2
exit 1
}
echo "Retag as: $NEW_IMG"
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# 4⃣ 打 tag
ctr -n "$NAMESPACE" images tag "$ORIGIN_IMG" "$NEW_IMG"
check_root() {
if [[ $EUID -ne 0 ]]; then
log_error "此脚本必须以 root 用户或使用 sudo 运行。"
fi
}
# 5⃣ 推送到本地 registry
ctr -n "$NAMESPACE" images push --plain-http "$NEW_IMG"
configure_sysctl() {
log_info "配置系统内核参数..."
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
sudo modprobe overlay
sudo modprobe br_netfilter
echo "✅ Done: $NEW_IMG"
done
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
sudo sysctl --system
log_info "系统内核参数配置完成。"
}
disable_swap() {
log_info "禁用 Swap 分区..."
if grep -q "swap" /etc/fstab; then
sudo swapoff -a
sudo sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
log_info "Swap 分区已禁用并从 fstab 中注释。"
else
log_info "未检测到 Swap 分区或已禁用。"
fi
}
# ==============================================================================
# 0. 前置检查与环境初始化
# ==============================================================================
check_root
configure_sysctl
disable_swap
log_info "将离线资源目录添加到 PATH。"
export PATH="${OFFLINE_ASSETS_DIR}/bin:$PATH"
echo "export PATH=${OFFLINE_ASSETS_DIR}/bin:\$PATH" | sudo tee /etc/profile.d/offline-k8s.sh
# ==============================================================================
# 1. 安装操作系统依赖 (DEB 包)
# ==============================================================================
log_info "安装操作系统依赖 (DEB 包)..."
cd "${OFFLINE_ASSETS_DIR}/debs"
# 尝试安装所有deb包。使用 --force-depends 强制安装以应对离线环境下的依赖顺序问题
# 更好的方法是使用 dpkg -i A.deb B.deb C.deb D.deb (D依赖CC依赖BB依赖A)
# 或者循环多次,直到没有错误
# 考虑到依赖可能很复杂,最稳妥的是尝试全部安装,然后 apt install -f (不确定离线是否能完全解决)
# 或者在下载阶段使用 apt-get download --reinstall $(apt-cache depends --recurse --no-recommends --no-suggests --no-conflicts --no-breaks --no-replaces --no-enhances <package> | grep "^\w" | sort -u)
# 但目前我们只有你提供的列表,直接全部安装
sudo dpkg -i *.deb || true # 第一次安装可能因为依赖失败,忽略错误
# 再次尝试,确保所有包尽可能安装
sudo dpkg -i *.deb || true
# 离线环境apt install -f 无法工作这里假设所有必要依赖都在debs目录中
# 如果有未满足的依赖,这里会显示错误
log_info "已尝试安装所有DEB包。请检查上述输出是否有未满足的依赖。"
cd - > /dev/null
# ==============================================================================
# 2. 安装 Docker (仅用于本地镜像仓库)
# ==============================================================================
log_info "安装 Docker daemon (仅用于本地镜像仓库) ..."
# 由于你的DEB包列表中没有docker-ce或docker.io的deb包
# 假设docker已经通过其他方式安装或者这里需要补充下载docker的deb包
# 暂时跳过docker的deb包安装直接检查docker命令是否存在
if ! command_exists docker; then
log_error "未检测到 Docker CLI。请确保已安装 Docker (或其他兼容的容器引擎如Podman)。"
log_info "如果在离线环境中,请将 docker-ce 及其依赖的 .deb 包下载并放在 debs 目录中进行安装。"
fi
echo "[INFO] 部署本地 Registry 容器..."
mkdir -p /opt/registry-data
ctr images import $IMAGES_DIR/registry_2.tar
ctr container create \
--net-host \
--mount type=bind,src=/opt/registry-data,dst=/var/lib/registry,options=rbind:rw \
docker.io/library/registry:2 \
registry-local
nohup ctr task start registry-local &
# 配置 Docker daemon 以信任本地仓库 (针对非 HTTPS)
log_info "配置 Docker daemon 信任本地仓库 ${LOCAL_REGISTRY_ADDR} (针对非 HTTPS)..."
sudo mkdir -p /etc/docker
cat <<EOF | sudo tee /etc/docker/daemon.json
{
"insecure-registries": ["${LOCAL_REGISTRY_ADDR}"],
"exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
# 建议设置cgroupdriver为systemd
sudo groupadd docker || true
sudo systemctl daemon-reload
sudo systemctl enable docker.socket
sudo systemctl enable docker
sudo systemctl restart docker.socket
sudo systemctl restart docker
sudo systemctl status docker --no-pager || log_error "Docker daemon 启动失败。"
log_info "Docker daemon 已配置信任本地仓库并重启。"
echo "[INFO] 部署 Storage & Virtualization..."
# 安装 Helm
cp "$BUNDLE_ROOT/bin/helm" /usr/local/bin/
# ==============================================================================
# 3. 安装 Containerd 运行时
# ==============================================================================
log_info "安装 Containerd 运行时..."
CONTAINERD_TAR_GZ=$(find "${OFFLINE_ASSETS_DIR}/bin" -name "containerd-*.tar.gz")
if [ -z "$CONTAINERD_TAR_GZ" ]; then
log_error "未找到 Containerd 压缩包。"
fi
# NFS
"$BUNDLE_ROOT/bin/helm" install nfs-subdir-provisioner "$BUNDLE_ROOT/charts/nfs-subdir-external-provisioner" \
--set nfs.server={{ storage.nfs_server }} \
--set nfs.path={{ storage.nfs_path }} \
--set storageClass.defaultClass=true
# 解压 containerd 二进制文件
sudo tar Cxzvf /usr/local "$CONTAINERD_TAR_GZ"
# KubeVirt
kubectl apply -f "$BUNDLE_ROOT/manifests/kubevirt-operator.yaml"
kubectl apply -f "$BUNDLE_ROOT/manifests/kubevirt-cr.yaml"
# 移动 containerd systemd 服务文件 (从 GitHub 下载最新版本)
cp ${OFFLINE_ASSETS_DIR}/service/containerd.service /etc/systemd/system
sudo systemctl daemon-reload # 重新加载服务配置
# Multus
kubectl apply -f "$BUNDLE_ROOT/manifests/multus-daemonset.yaml"
# 生成默认配置
sudo mkdir -p /etc/containerd
sudo containerd config default | sudo tee /etc/containerd/config.toml
echo "=== Configuring containerd registry mirrors using config_path ==="
echo "Local Registry Address: http://${LOCAL_REGISTRY_ADDR}"
# --- 1. 备份原始 containerd config.toml ---
if [ -f "$CONTAINERD_CONFIG" ]; then
echo "Backing up original containerd config.toml to ${CONTAINERD_CONFIG}.bak"
sudo cp "$CONTAINERD_CONFIG" "${CONTAINERD_CONFIG}.bak"
else
echo "Warning: ${CONTAINERD_CONFIG} not found. Generating default config."
sudo containerd config default | sudo tee "$CONTAINERD_CONFIG" > /dev/null
fi
# --- 2. 创建必要的目录结构 ---
echo "Creating necessary directory structure under ${CERTS_D_PATH}"
sudo mkdir -p "${CERTS_D_PATH}/${LOCAL_REGISTRY_ADDR}"
sudo mkdir -p "${CERTS_D_PATH}/registry.k8s.io"
# --- 3. 生成 hosts.toml 文件 ---
# 为本地 Registry 配置 hosts.toml (http, skip_verify)
echo "Creating ${CERTS_D_PATH}/${LOCAL_REGISTRY_ADDR}/hosts.toml"
sudo tee "${CERTS_D_PATH}/${LOCAL_REGISTRY_ADDR}/hosts.toml" > /dev/null <<EOF
server = "http://${LOCAL_REGISTRY_ADDR}"
[host."http://${LOCAL_REGISTRY_ADDR}"]
capabilities = ["pull", "resolve"]
skip_verify = true
EOF
# 为 registry.k8s.io 配置 hosts.toml (镜像到本地,回退到官方)
echo "Creating ${CERTS_D_PATH}/registry.k8s.io/hosts.toml"
sudo tee "${CERTS_D_PATH}/registry.k8s.io/hosts.toml" > /dev/null <<EOF
server = "https://registry.k8s.io" # This specifies the primary server for this registry
# Prioritize local insecure registry
[host."http://${LOCAL_REGISTRY_ADDR}"]
capabilities = ["pull", "resolve"]
skip_verify = true
# Fallback to official registry
[host."https://registry.k8s.io"]
capabilities = ["pull", "resolve"]
EOF
# --- 4. 修改 /etc/containerd/config.toml ---
echo "Modifying ${CONTAINERD_CONFIG} to use config_path and remove deprecated settings."
# 使用 sed 替换或插入 config_path
# 首先,尝试插入或替换 config_path
sudo sed -i "s|sandbox_image = \"registry.k8s.io/pause:3.6\"|sandbox_image = \"${LOCAL_REGISTRY_ADDR}/pause:3.9\"|g" "$CONTAINERD_CONFIG"
if grep -q "config_path =" "$CONTAINERD_CONFIG"; then
sudo sed -i "s|^[[:space:]]*config_path = .*| config_path = \"${CERTS_D_PATH}\"|" "$CONTAINERD_CONFIG"
else
# 如果不存在,找到 [plugins."io.containerd.grpc.v1.cri".registry] 块,在里面添加
# 或者直接插入到这个块的开头
sudo sed -i "/\[plugins.\"io.containerd.grpc.v1.cri\".registry\]/a \\\n config_path = \"${CERTS_D_PATH}\"" "$CONTAINERD_CONFIG"
fi
# 移除旧的 mirrors 和 configs (弃用警告相关的部分)
# 使用 sed 删除 [plugins."io.containerd.grpc.v1.cri".registry.mirrors] 及其内容
sudo sed -i '/^\[plugins\."io\.containerd\.grpc\.v1\.cri"\.registry\.mirrors\."registry\.k8s\.io"\]/,/^endpoint = \[/d' "$CONTAINERD_CONFIG"
# 移除 [plugins."io.containerd.grpc.v1.cri".registry.configs."192.168.16.5:5000".tls] 及其内容
sudo sed -i '/^\[plugins\."io\.containerd\.grpc\.v1\.cri"\.registry\.configs\."192\.168\.16\.5:5000"\.tls\]/,/^insecure_skip_verify = /d' "$CONTAINERD_CONFIG"
# 确保删除所有相关的空行或残留
sudo sed -i '/^\[plugins\."io\.containerd\.grpc\.v1\.cri"\.registry\.mirrors\]/d' "$CONTAINERD_CONFIG"
sudo sed -i '/^\[plugins\."io\.containerd\.grpc\.v1\.cri"\.registry\.configs\]/d' "$CONTAINERD_CONFIG"
echo "--- 5. Restarting containerd service ---"
sudo systemctl daemon-reload
sudo systemctl restart containerd
sudo systemctl status containerd | head -n 10 # 显示服务状态前10行检查是否有错误
echo "=== containerd registry configuration with config_path completed. ==="
echo "Please check 'sudo systemctl status containerd' for any errors."
echo "You should no longer see DEPRECATION warnings related to 'mirrors' and 'configs'."
# 配置 crictl
log_info "配置 crictl..."
cat <<EOF | sudo tee /etc/crictl.yaml
runtime-endpoint: unix:///run/containerd/containerd.sock
image-endpoint: unix:///run/containerd/containerd.sock
EOF
log_info "crictl 配置完成。"
# ==============================================================================
# 4. 安装 CNI 插件
# ==============================================================================
log_info "安装 CNI 插件..."
CNI_PLUGINS_TAR_GZ=$(find "${OFFLINE_ASSETS_DIR}/bin" -name "cni-plugins-*.tgz")
if [ -z "$CNI_PLUGINS_TAR_GZ" ]; then
log_error "未找到 CNI 插件压缩包。"
fi
sudo mkdir -p /opt/cni/bin
sudo tar Cxzvf /opt/cni/bin "$CNI_PLUGINS_TAR_GZ"
log_info "CNI 插件安装完成。"
# ==============================================================================
# 5. 安装 Kubernetes Binaries (kubelet, kubeadm, kubectl)
# ==============================================================================
log_info "安装 Kubernetes Binaries..."
sudo cp "${OFFLINE_ASSETS_DIR}/bin/kubelet" /usr/local/bin/kubelet
sudo cp "${OFFLINE_ASSETS_DIR}/bin/kubeadm" /usr/local/bin/kubeadm
sudo cp "${OFFLINE_ASSETS_DIR}/bin/kubectl" /usr/local/bin/kubectl
sudo cp "${OFFLINE_ASSETS_DIR}/bin/helm" /usr/local/bin/helm # Helm CLI
sudo chmod +x /usr/local/bin/{kubeadm,kubelet,kubectl,helm}
# 配置 kubelet systemd 服务 (从模板生成)
log_info "配置 kubelet systemd 服务..."
# kubelet.service.j2 模板内容将直接在这里写入或通过 cat heredoc 方式
cat <<'EOF' | sudo tee /etc/systemd/system/kubelet.service
[Unit]
Description=kubelet: The Kubernetes Node Agent
Documentation=https://kubernetes.io/docs/
After=containerd.service
Wants=containerd.service
[Service]
ExecStart=/usr/local/bin/kubelet
Restart=always
StartLimitInterval=0
RestartSec=10
[Install]
WantedBy=multi-user.target
EOF
sudo mkdir -p /etc/systemd/system/kubelet.service.d
cat <<'EOF' | sudo tee /etc/systemd/system/kubelet.service.d/10-kubeadm.conf
[Service]
Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"
Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
EnvironmentFile=-/etc/default/kubelet
ExecStart=
ExecStart=/usr/local/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_EXTRA_ARGS
EOF
sudo systemctl daemon-reload
sudo systemctl enable kubelet
# 此时不启动 kubelet它需要配置才能正常启动
log_info "Kubernetes Binaries 安装完成kubelet 服务已启用但未启动。"
# ==============================================================================
# 6. 启动本地镜像仓库 (仅在控制节点192.168.16.5)
# ==============================================================================
log_info "启动本地镜像仓库 ${LOCAL_REGISTRY_ADDR} ..."
# 加载 registry 镜像
cd "${OFFLINE_ASSETS_DIR}/images"
REGISTRY_TAR=$(find . -name "registry_2.tar")
if [ -z "$REGISTRY_TAR" ]; then
log_error "未找到本地镜像仓库 registry:2 的 tar 包。"
fi
sudo docker load -i "$REGISTRY_TAR"
# 启动 registry 容器
sudo docker run -d -p "${LOCAL_REGISTRY_PORT}:5000" --restart=always --name registry registry:2
log_info "本地镜像仓库已在 ${LOCAL_REGISTRY_ADDR} 启动。"
cd - > /dev/null
# ==============================================================================
# 7. 导入并推送到本地镜像仓库 (使用 Docker CLI因为目标是 Docker Registry)
# ==============================================================================
log_info "导入并推送到本地镜像仓库 (使用 Docker CLI)..."
IMAGE_TAR_FILES=$(find "${OFFLINE_ASSETS_DIR}/images" -name "*.tar")
echo "### Cleaning up local Docker Registry and containerd storage ###"
# 1. 清理本地 Docker Registry (停止并删除容器及数据卷)
echo " Stopping and removing local Docker Registry container: ${LOCAL_REGISTRY_ADDR}"
# 假设 Registry 容器名为 'registry'
sudo docker stop registry || true # 停止容器,如果不存在则忽略错误
sudo docker rm -v registry || true # 删除容器及其匿名数据卷,如果不存在则忽略错误
# 如果你的 Registry 使用了具名数据卷 (named volume),需要额外删除它
# 例如sudo docker volume rm my-registry-volume || true
echo " Restarting a fresh local Docker Registry container."
# 重新启动一个干净的 Registry 容器
# 确保你的 Registry 容器名是 'registry',如果不是请修改
sudo docker run -d -p 5000:5000 --restart=always --name registry registry:2
# 稍等片刻,确保 Registry 完全启动
sleep 5
echo " Local Docker Registry is ready."
# 2. 清理 containerd 本地存储中的所有镜像
echo " Cleaning up existing images from containerd local storage..."
# 获取所有非 <none> 且不带 LOCAL_REGISTRY_ADDR 前缀的镜像,以及带 LOCAL_REGISTRY_ADDR 前缀的镜像
# 这里的目的是删除所有可能由之前操作留下的镜像,包括原始的和打了本地标签的。
# 排除掉正在被 kubelet 启动的 pause 镜像等,以防万一
ctr -n "$NAMESPACE" images ls | awk 'NR>1 {print $1}' | while read -r image_ref; do
# 避免删除 LOCAL_REGISTRY_ADDR 容器本身(如果它也被导入了)
if [[ "$image_ref" == "${LOCAL_REGISTRY_ADDR}/registry:2" || "$image_ref" == "docker.io/library/registry:2" ]]; then
echo " Skipping deletion of registry image: $image_ref"
continue
fi
if [[ "$image_ref" == "<none>" ]]; then
continue # 跳过 <none> 镜像,它们通常是悬空层
fi
echo " Deleting containerd image: $image_ref"
ctr -n "$NAMESPACE" images rm "$image_ref" || true # || true 避免因为镜像正在被使用而中断
done
echo "### Finished cleaning up local environment. ###"
IMAGE_DIR=$OFFLINE_ASSETS_DIR/images
echo "=== Importing images from $IMAGE_DIR to local registry $LOCAL_REGISTRY_ADDR ==="
for tarfile in "$IMAGE_DIR"/*.tar; do
[ -e "$tarfile" ] || continue
echo ""
echo ">>> Processing $tarfile"
# 1⃣ 获取导入前的镜像列表
IMAGES_BEFORE=$(mktemp)
# ctr images ls 的第一列就是 REF (镜像名称),使用 awk 提取
if ! ctr -n "$NAMESPACE" images ls | awk 'NR>1 {print $1}' | sort > "$IMAGES_BEFORE"; then
echo "❌ Failed to get images list before import."
continue
fi
# Debug:
# echo "Images BEFORE import for $tarfile:"
# cat "$IMAGES_BEFORE"
# 2⃣ 导入镜像
if ! ctr -n "$NAMESPACE" images import "$tarfile"; then
echo "❌ Failed to import image from $tarfile."
rm -f "$IMAGES_BEFORE" # 清理临时文件
continue
fi
# 3⃣ 获取导入后的镜像列表
IMAGES_AFTER=$(mktemp)
if ! ctr -n "$NAMESPACE" images ls | awk 'NR>1 {print $1}' | sort > "$IMAGES_AFTER"; then
echo "❌ Failed to get images list after import."
rm -f "$IMAGES_BEFORE" # 清理临时文件
continue
fi
# Debug:
# echo "Images AFTER import for $tarfile:"
# cat "$IMAGES_AFTER"
# echo "Raw difference (comm -13):"
# comm -13 "$IMAGES_BEFORE" "$IMAGES_AFTER"
# 4⃣ 找出新增的镜像 (即原始镜像)。排除掉带有本地Registry前缀的镜像本身。
# 过滤条件:排除本地 registry 已存在的镜像,以及 <none> 引用。
# 因为导入的 tarfile 可能会包含多个 tag我们只取第一个符合条件的
ORIGIN_IMG=$(comm -13 "$IMAGES_BEFORE" "$IMAGES_AFTER" | grep -vE "${LOCAL_REGISTRY_ADDR}|<none>" | head -n1)
rm -f "$IMAGES_BEFORE" "$IMAGES_AFTER" # 清理临时文件
if [[ -z "$ORIGIN_IMG" ]]; then
echo "❌ Failed to detect original image name, skipping..."
continue
fi
echo "Original image: $ORIGIN_IMG"
NEW_IMG=""
if [[ "$ORIGIN_IMG" == "registry.k8s.io/"* ]]; then
if [[ "$ORIGIN_IMG" == "registry.k8s.io/coredns/"* ]]; then
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG#registry.k8s.io/coredns/}"
else
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG#registry.k8s.io/}"
fi
elif [[ "$ORIGIN_IMG" == "ghcr.io/"* ]]; then
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG#ghcr.io/}"
elif [[ "$ORIGIN_IMG" == "quay.io/"* ]]; then
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG#quay.io/}"
elif [[ "$ORIGIN_IMG" == "nvcr.io/"* ]]; then
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG#nvcr.io/}"
elif [[ "$ORIGIN_IMG" == "docker.io/"* ]]; then
if [[ "$ORIGIN_IMG" == "docker.io/library/"* ]]; then
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG#docker.io/library/}"
else
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG#docker.io/}"
fi
else
echo "Warning: Unknown original registry prefix for $ORIGIN_IMG. Directly prepending LOCAL_REGISTRY_ADDR."
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG}"
fi
echo "Retag as: $NEW_IMG"
# 4⃣ 打 tag
ctr -n "$NAMESPACE" images tag "$ORIGIN_IMG" "$NEW_IMG"
# 5⃣ 推送到本地 registry
ctr -n "$NAMESPACE" images push --plain-http "$NEW_IMG"
echo "tarfile=$tarfile ORIGIN_IMG=$ORIGIN_IMG NEW_IMG=$NEW_IMG"
echo "✅ Done: $NEW_IMG"
done
log_info "所有镜像已导入并推送到本地镜像仓库。"
cd - > /dev/null
# ==============================================================================
# 8. 初始化 Kubernetes 控制平面
# ==============================================================================
log_info "初始化 Kubernetes 控制平面..."
# 生成 kubeadm 配置
cat <<EOF | sudo tee ${TEMP_DIR}/kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta3
kind: ClusterConfiguration
kubernetesVersion: ${K8S_VERSION}
controlPlaneEndpoint: "${K8S_APISERVER_ADVERTISE_ADDRESS}:6443"
imageRepository: "${LOCAL_REGISTRY_ADDR}" # 指定从本地仓库拉取镜像
networking:
podSubnet: ${POD_CIDR}
serviceSubnet: ${SERVICE_CIDR}
---
apiVersion: kubeadm.k8s.io/v1beta3
kind: InitConfiguration
nodeRegistration:
criSocket: "unix:///run/containerd/containerd.sock"
name: $(hostname)
kubeletExtraArgs:
node-ip: "${K8S_APISERVER_ADVERTISE_ADDRESS}"
EOF
log_info "kubeadm-config.yaml 已生成。"
cat ${TEMP_DIR}/kubeadm-config.yaml
# 运行 kubeadm init
# --upload-certs: 上传证书到集群以便工作节点获取
# --config: 指定配置
# --ignore-preflight-errors=all: 忽略所有预检错误,但通常不建议在生产环境使用。
# 更好的做法是逐一排查并解决预检错误。
sudo kubeadm init --config=${TEMP_DIR}/kubeadm-config.yaml --upload-certs --ignore-preflight-errors=all
if [ $? -ne 0 ]; then
log_error "kubeadm init 失败。"
fi
log_info "Kubernetes 控制平面初始化完成。"
# 配置 kubectl
log_info "配置 kubectl 访问集群..."
mkdir -p "$HOME/.kube"
sudo cp /etc/kubernetes/admin.conf "$HOME/.kube/config"
sudo chown $(id -u):$(id -g) "$HOME/.kube/config"
export KUBECONFIG=$HOME/.kube/config # 确保当前会话可用
log_info "kubectl 配置完成。"
log_info "查看集群状态:"
kubectl get nodes
# ==============================================================================
# 9. 安装 CNI 网络插件 (Calico)
# ==============================================================================
log_info "安装 CNI 网络插件 (Calico)..."
CALICO_MANIFEST="${OFFLINE_ASSETS_DIR}/manifests/calico.yaml"
cp "${CALICO_MANIFEST}" "${TEMP_DIR}/calico.yaml"
# 替换镜像地址
# 注意:这里我们替换的是原始镜像地址,确保 kubeadm config images list 能够找到它们
sudo sed -i "s|docker.io/calico/cni:${CALICO_VERSION}|${LOCAL_REGISTRY_ADDR}/docker.io/calico/cni:${CALICO_VERSION}|g" "${TEMP_DIR}/calico.yaml"
sudo sed -i "s|docker.io/calico/node:${CALICO_VERSION}|${LOCAL_REGISTRY_ADDR}/docker.io/calico/node:${CALICO_VERSION}|g" "${TEMP_DIR}/calico.yaml"
sudo sed -i "s|docker.io/calico/kube-controllers:${CALICO_VERSION}|${LOCAL_REGISTRY_ADDR}/docker.io/calico/kube-controllers:${CALICO_VERSION}|g" "${TEMP_DIR}/calico.yaml"
# 设置 Pod CIDR
# 假设 CALICO_IPV4POOL_CIDR 及其注释行在 YAML 中是连续的
sudo sed -i "s|# - name: CALICO_IPV4POOL_CIDR|- name: CALICO_IPV4POOL_CIDR|g" "${TEMP_DIR}/calico.yaml"
sudo sed -i "s|# value: \"192.168.0.0/16\"| value: \"${POD_CIDR}\"|g" "${TEMP_DIR}/calico.yaml"
kubectl apply -f "${TEMP_DIR}/calico.yaml"
log_info "Calico 网络插件安装完成。"
log_info "等待 Calico Pod 启动..."
kubectl wait --for=condition=ready pod -l k8s-app=calico-node -n kube-system --timeout=300s || log_error "Calico Pod 未能在指定时间内启动。"
# ==============================================================================
# 10. 安装 Multus CNI (用于 KubeVirt 虚拟机多网卡)
# ==============================================================================
log_info "安装 Multus CNI 插件..."
MULTUS_MANIFEST="${OFFLINE_ASSETS_DIR}/manifests/multus-daemonset.yaml"
cp "${MULTUS_MANIFEST}" "${TEMP_DIR}/multus-daemonset.yaml"
# 替换 Multus 镜像地址
sudo sed -i "s|ghcr.io/k8snetworkplumbingwg/multus-cni:v4.0.2|${LOCAL_REGISTRY_ADDR}/ghcr.io/k8snetworkplumbingwg/multus-cni:v4.0.2|g" "${TEMP_DIR}/multus-daemonset.yaml"
kubectl apply -f "${TEMP_DIR}/multus-daemonset.yaml"
log_info "Multus CNI 插件安装完成。"
log_info "等待 Multus Pod 启动..."
kubectl wait --for=condition=ready pod -l name=multus -n kube-system --timeout=300s || log_error "Multus Pod 未能在指定时间内启动。"
# ==============================================================================
# 11. 安装 KubeVirt
# ==============================================================================
log_info "安装 KubeVirt..."
KUBEVIRT_OPERATOR_MANIFEST="${OFFLINE_ASSETS_DIR}/manifests/kubevirt-operator.yaml"
KUBEVIRT_CR_MANIFEST="${OFFLINE_ASSETS_DIR}/manifests/kubevirt-cr.yaml"
cp "${KUBEVIRT_OPERATOR_MANIFEST}" "${TEMP_DIR}/kubevirt-operator.yaml"
cp "${KUBEVIRT_CR_MANIFEST}" "${TEMP_DIR}/kubevirt-cr.yaml"
# 替换 KubeVirt Operator 镜像地址
sudo sed -i "s|quay.io/kubevirt/virt-operator:${KUBEVIRT_VERSION}|${LOCAL_REGISTRY_ADDR}/quay.io/kubevirt/virt-operator:${KUBEVIRT_VERSION}|g" "${TEMP_DIR}/kubevirt-operator.yaml"
kubectl apply -f "${TEMP_DIR}/kubevirt-operator.yaml"
log_info "KubeVirt Operator 安装完成。"
log_info "等待 KubeVirt Operator Pod 启动..."
kubectl wait --for=condition=ready pod -l kubevirt.io=virt-operator -n kubevirt --timeout=300s || log_error "KubeVirt Operator Pod 未能在指定时间内启动。"
# 部署 KubeVirt CR (Custom Resource)
kubectl apply -f "${TEMP_DIR}/kubevirt-cr.yaml"
log_info "KubeVirt CR 部署完成。"
log_info "等待 KubeVirt 组件 Pod 启动..."
# KubeVirt 有多个组件,简单起见,等待所有带有 kubevirt.io 标签的 Pod
kubectl wait --for=condition=ready pod -l kubevirt.io -n kubevirt --timeout=600s || log_error "KubeVirt 组件 Pod 未能在指定时间内启动。"
log_info "KubeVirt 安装完成。"
# ==============================================================================
# 12. 安装 NFS Subdir External Provisioner (NFS 动态存储)
# ==============================================================================
log_info "安装 NFS Subdir External Provisioner..."
# 12.1 准备 NFS 挂载点
log_info "在控制节点创建 NFS 挂载点..."
NFS_MOUNT_POINT="/mnt/nfs_provisioner"
sudo mkdir -p "${NFS_MOUNT_POINT}"
# 检查是否已挂载,防止重复挂载
if ! mountpoint -q "${NFS_MOUNT_POINT}"; then
sudo mount -t nfs "${NFS_SERVER}:${NFS_PATH}" "${NFS_MOUNT_POINT}"
if [ $? -ne 0 ]; then
log_error "NFS 共享 ${NFS_SERVER}:${NFS_PATH} 挂载失败请检查NFS服务器配置和网络连接。"
fi
log_info "NFS 共享已挂载到 ${NFS_MOUNT_POINT}。"
else
log_info "NFS 共享已挂载。"
fi
# 12.2 修改 Helm Chart指向本地镜像仓库
log_info "准备 NFS Provisioner Helm Chart..."
NFS_CHART_FILE="${OFFLINE_ASSETS_DIR}/charts/nfs-subdir-external-provisioner-${NFS_CHART_VERSION}.tgz"
# 解压 Chart 到临时目录
cd "${TEMP_DIR}"
tar -xzvf "${NFS_CHART_FILE}"
NFS_CHART_DIR="nfs-subdir-external-provisioner" # 解压后的目录名
# 修改 values.yaml 替换镜像地址
sudo sed -i "s|repository: registry.k8s.io/sig-storage/nfs-subdir-external-provisioner|repository: ${LOCAL_REGISTRY_ADDR}/registry.k8s.io/sig-storage/nfs-subdir-external-provisioner|g" "${NFS_CHART_DIR}/values.yaml"
sudo sed -i "s|tag: v${NFS_PROVISIONER_VERSION}|tag: v${NFS_PROVISIONER_VERSION}|g" "${NFS_CHART_DIR}/values.yaml" # 确保tag正确
# 12.3 使用 Helm 安装 Provisioner
log_info "使用 Helm 安装 NFS Provisioner..."
helm upgrade --install nfs-client-provisioner "${NFS_CHART_DIR}" \
--namespace nfs-client-provisioner --create-namespace \
--set nfs.server="${NFS_SERVER}" \
--set nfs.path="${NFS_PATH}" \
--set storageClass.name="${NFS_STORAGE_CLASS_NAME}" \
--set storageClass.reclaimPolicy=Retain \
--set storageClass.defaultClass=true # 设置为默认StorageClass
log_info "NFS Subdir External Provisioner 安装完成。"
log_info "等待 NFS Provisioner Pod 启动..."
kubectl wait --for=condition
echo "等待10分钟"
sleep 600
echo "安装 CNI 插件"
# 安装 CNI 插件
# 备份原始文件
cp "$CALICO_YAML_PATH" "${CALICO_YAML_PATH}.bak-calico-images"
echo "Replacing Calico image registries in $CALICO_YAML_PATH for version $CALICO_VERSION..."
# 替换 calico/node 镜像
# 从 "image: docker.io/calico/node:v3.26.1" 替换为 "image: 192.168.16.5:5000/calico/node:v3.26.1"
sudo sed -i "s|image: docker.io/calico/node:${CALICO_VERSION}|image: ${LOCAL_REGISTRY_ADDR}/calico/node:${CALICO_VERSION}|g" "$CALICO_YAML_PATH"
# 替换 calico/kube-controllers 镜像
sudo sed -i "s|image: docker.io/calico/kube-controllers:${CALICO_VERSION}|image: ${LOCAL_REGISTRY_ADDR}/calico/kube-controllers:${CALICO_VERSION}|g" "$CALICO_YAML_PATH"
# 替换 calico/cni 镜像
sudo sed -i "s|image: docker.io/calico/cni:${CALICO_VERSION}|image: ${LOCAL_REGISTRY_ADDR}/calico/cni:${CALICO_VERSION}|g" "$CALICO_YAML_PATH"
echo "Calico image registry replacement complete."
# 再次检查,确认所有 image 路径都已替换
echo "Verifying image paths in $CALICO_YAML_PATH:"
grep "image:" "$CALICO_YAML_PATH" | grep "calico"
# 在 calico.yaml 文件末尾添加 IPPool 资源
echo -e "\n---\napiVersion: crd.projectcalico.org/v1\nkind: IPPool\nmetadata:\n name: default-pool-ipv4\nspec:\n cidr: $POD_CIDR\n natOutgoing: true\n disabled: false\n ipipMode: Always" | sudo tee -a "$CALICO_YAML_PATH"
# 启动CNI插件
kubectl apply -f "$CALICO_YAML_PATH"
echo "[INFO] 生成 Worker 加入脚本..."
kubeadm token create --print-join-command > ../../output/join_cluster.sh
chmod +x ../../output/join_cluster.sh
echo "Master 部署完成!请检查 kubectl get nodes"