2025-11-26 21:34:17 +08:00

686 lines
29 KiB
Django/Jinja
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -eo pipefail # 脚本遇到任何错误立即退出,未捕捉的管道错误也退出
# ==============================================================================
# 配置区域
# ==============================================================================
OFFLINE_ASSETS_DIR="/root/k8s-offline-bundle"
K8S_VERSION="v1.28.2"
CALICO_VERSION="v3.26.1"
KUBEVIRT_VERSION="v1.1.0"
NFS_PROVISIONER_VERSION="v4.0.2" # 镜像标签
NFS_CHART_VERSION="4.0.18" # Helm Chart 版本
LOCAL_REGISTRY_IP="192.168.16.5"
LOCAL_REGISTRY_PORT="5000"
LOCAL_REGISTRY="${LOCAL_REGISTRY_IP}:${LOCAL_REGISTRY_PORT}"
K8S_APISERVER_ADVERTISE_ADDRESS="${LOCAL_REGISTRY_IP}"
POD_CIDR="192.168.0.0/16"
SERVICE_CIDR="10.96.0.0/12"
NFS_SERVER="192.168.16.2"
NFS_PATH="/d/share/101206"
NFS_STORAGE_CLASS_NAME="nfs-client"
TEMP_DIR="/tmp/k8s-master-setup"
NAMESPACE="default"
LOCAL_REGISTRY_IP="192.168.16.5"
LOCAL_REGISTRY_PORT="5000"
LOCAL_REGISTRY_ADDR="${LOCAL_REGISTRY_IP}:${LOCAL_REGISTRY_PORT}"
CONTAINERD_CONFIG="/etc/containerd/config.toml"
CERTS_D_PATH="/etc/containerd/certs.d"
CALICO_YAML_PATH="$OFFLINE_ASSETS_DIR/manifests/calico.yaml" # 请确认这个路径
CALICO_VERSION="v3.26.1"
mkdir -p ${TEMP_DIR}
echo "=================================================="
echo " Kubernetes 控制节点离线安装脚本 "
echo "=================================================="
echo "配置参数:"
echo " K8s 版本: ${K8S_VERSION}"
echo " 本地镜像仓库: ${LOCAL_REGISTRY_ADDR}"
echo " K8s API Server IP: ${K8S_APISERVER_ADVERTISE_ADDRESS}"
echo " Pod CIDR: ${POD_CIDR}"
echo " NFS Server: ${NFS_SERVER}:${NFS_PATH}"
echo "--------------------------------------------------"
# ==============================================================================
# 通用函数 (common.sh 中的内容,为简化,直接内联到这里)
# ==============================================================================
log_info() {
echo -e "\e[32m[INFO] $(date +'%Y-%m-%d %H:%M:%S') $1\e[0m"
}
log_error() {
echo -e "\e[31m[ERROR] $(date +'%Y-%m-%d %H:%M:%S') $1\e[0m" >&2
exit 1
}
command_exists() {
command -v "$1" >/dev/null 2>&1
}
check_root() {
if [[ $EUID -ne 0 ]]; then
log_error "此脚本必须以 root 用户或使用 sudo 运行。"
fi
}
configure_sysctl() {
log_info "配置系统内核参数..."
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
sudo modprobe overlay
sudo modprobe br_netfilter
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
sudo sysctl --system
log_info "系统内核参数配置完成。"
}
disable_swap() {
log_info "禁用 Swap 分区..."
if grep -q "swap" /etc/fstab; then
sudo swapoff -a
sudo sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
log_info "Swap 分区已禁用并从 fstab 中注释。"
else
log_info "未检测到 Swap 分区或已禁用。"
fi
}
# ==============================================================================
# 0. 前置检查与环境初始化
# ==============================================================================
check_root
configure_sysctl
disable_swap
log_info "将离线资源目录添加到 PATH。"
export PATH="${OFFLINE_ASSETS_DIR}/bin:$PATH"
echo "export PATH=${OFFLINE_ASSETS_DIR}/bin:\$PATH" | sudo tee /etc/profile.d/offline-k8s.sh
# ==============================================================================
# 1. 安装操作系统依赖 (DEB 包)
# ==============================================================================
log_info "安装操作系统依赖 (DEB 包)..."
cd "${OFFLINE_ASSETS_DIR}/debs"
# 尝试安装所有deb包。使用 --force-depends 强制安装以应对离线环境下的依赖顺序问题
# 更好的方法是使用 dpkg -i A.deb B.deb C.deb D.deb (D依赖CC依赖BB依赖A)
# 或者循环多次,直到没有错误
# 考虑到依赖可能很复杂,最稳妥的是尝试全部安装,然后 apt install -f (不确定离线是否能完全解决)
# 或者在下载阶段使用 apt-get download --reinstall $(apt-cache depends --recurse --no-recommends --no-suggests --no-conflicts --no-breaks --no-replaces --no-enhances <package> | grep "^\w" | sort -u)
# 但目前我们只有你提供的列表,直接全部安装
sudo dpkg -i *.deb || true # 第一次安装可能因为依赖失败,忽略错误
# 再次尝试,确保所有包尽可能安装
sudo dpkg -i *.deb || true
# 离线环境apt install -f 无法工作这里假设所有必要依赖都在debs目录中
# 如果有未满足的依赖,这里会显示错误
log_info "已尝试安装所有DEB包。请检查上述输出是否有未满足的依赖。"
cd - > /dev/null
# ==============================================================================
# 2. 安装 Docker (仅用于本地镜像仓库)
# ==============================================================================
log_info "安装 Docker daemon (仅用于本地镜像仓库) ..."
# 由于你的DEB包列表中没有docker-ce或docker.io的deb包
# 假设docker已经通过其他方式安装或者这里需要补充下载docker的deb包
# 暂时跳过docker的deb包安装直接检查docker命令是否存在
if ! command_exists docker; then
log_error "未检测到 Docker CLI。请确保已安装 Docker (或其他兼容的容器引擎如Podman)。"
log_info "如果在离线环境中,请将 docker-ce 及其依赖的 .deb 包下载并放在 debs 目录中进行安装。"
fi
# 配置 Docker daemon 以信任本地仓库 (针对非 HTTPS)
log_info "配置 Docker daemon 信任本地仓库 ${LOCAL_REGISTRY_ADDR} (针对非 HTTPS)..."
sudo mkdir -p /etc/docker
cat <<EOF | sudo tee /etc/docker/daemon.json
{
"insecure-registries": ["${LOCAL_REGISTRY_ADDR}"],
"exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
# 建议设置cgroupdriver为systemd
sudo groupadd docker || true
sudo systemctl daemon-reload
sudo systemctl enable docker.socket
sudo systemctl enable docker
sudo systemctl restart docker.socket
sudo systemctl restart docker
sudo systemctl status docker --no-pager || log_error "Docker daemon 启动失败。"
log_info "Docker daemon 已配置信任本地仓库并重启。"
# ==============================================================================
# 3. 安装 Containerd 运行时
# ==============================================================================
log_info "安装 Containerd 运行时..."
CONTAINERD_TAR_GZ=$(find "${OFFLINE_ASSETS_DIR}/bin" -name "containerd-*.tar.gz")
if [ -z "$CONTAINERD_TAR_GZ" ]; then
log_error "未找到 Containerd 压缩包。"
fi
# 解压 containerd 二进制文件
sudo tar Cxzvf /usr/local "$CONTAINERD_TAR_GZ"
# 移动 containerd systemd 服务文件 (从 GitHub 下载最新版本)
cp ${OFFLINE_ASSETS_DIR}/service/containerd.service /etc/systemd/system
sudo systemctl daemon-reload # 重新加载服务配置
# 生成默认配置
sudo mkdir -p /etc/containerd
sudo containerd config default | sudo tee /etc/containerd/config.toml
echo "=== Configuring containerd registry mirrors using config_path ==="
echo "Local Registry Address: http://${LOCAL_REGISTRY_ADDR}"
# --- 1. 备份原始 containerd config.toml ---
if [ -f "$CONTAINERD_CONFIG" ]; then
echo "Backing up original containerd config.toml to ${CONTAINERD_CONFIG}.bak"
sudo cp "$CONTAINERD_CONFIG" "${CONTAINERD_CONFIG}.bak"
else
echo "Warning: ${CONTAINERD_CONFIG} not found. Generating default config."
sudo containerd config default | sudo tee "$CONTAINERD_CONFIG" > /dev/null
fi
# --- 2. 创建必要的目录结构 ---
echo "Creating necessary directory structure under ${CERTS_D_PATH}"
sudo mkdir -p "${CERTS_D_PATH}/${LOCAL_REGISTRY_ADDR}"
sudo mkdir -p "${CERTS_D_PATH}/registry.k8s.io"
# --- 3. 生成 hosts.toml 文件 ---
# 为本地 Registry 配置 hosts.toml (http, skip_verify)
echo "Creating ${CERTS_D_PATH}/${LOCAL_REGISTRY_ADDR}/hosts.toml"
sudo tee "${CERTS_D_PATH}/${LOCAL_REGISTRY_ADDR}/hosts.toml" > /dev/null <<EOF
server = "http://${LOCAL_REGISTRY_ADDR}"
[host."http://${LOCAL_REGISTRY_ADDR}"]
capabilities = ["pull", "resolve"]
skip_verify = true
EOF
# 为 registry.k8s.io 配置 hosts.toml (镜像到本地,回退到官方)
echo "Creating ${CERTS_D_PATH}/registry.k8s.io/hosts.toml"
sudo tee "${CERTS_D_PATH}/registry.k8s.io/hosts.toml" > /dev/null <<EOF
server = "https://registry.k8s.io" # This specifies the primary server for this registry
# Prioritize local insecure registry
[host."http://${LOCAL_REGISTRY_ADDR}"]
capabilities = ["pull", "resolve"]
skip_verify = true
# Fallback to official registry
[host."https://registry.k8s.io"]
capabilities = ["pull", "resolve"]
EOF
# --- 4. 修改 /etc/containerd/config.toml ---
echo "Modifying ${CONTAINERD_CONFIG} to use config_path and remove deprecated settings."
# 使用 sed 替换或插入 config_path
# 首先,尝试插入或替换 config_path
sudo sed -i "s|sandbox_image = \"registry.k8s.io/pause:3.6\"|sandbox_image = \"${LOCAL_REGISTRY_ADDR}/pause:3.9\"|g" "$CONTAINERD_CONFIG"
if grep -q "config_path =" "$CONTAINERD_CONFIG"; then
sudo sed -i "s|^[[:space:]]*config_path = .*| config_path = \"${CERTS_D_PATH}\"|" "$CONTAINERD_CONFIG"
else
# 如果不存在,找到 [plugins."io.containerd.grpc.v1.cri".registry] 块,在里面添加
# 或者直接插入到这个块的开头
sudo sed -i "/\[plugins.\"io.containerd.grpc.v1.cri\".registry\]/a \\\n config_path = \"${CERTS_D_PATH}\"" "$CONTAINERD_CONFIG"
fi
# 移除旧的 mirrors 和 configs (弃用警告相关的部分)
# 使用 sed 删除 [plugins."io.containerd.grpc.v1.cri".registry.mirrors] 及其内容
sudo sed -i '/^\[plugins\."io\.containerd\.grpc\.v1\.cri"\.registry\.mirrors\."registry\.k8s\.io"\]/,/^endpoint = \[/d' "$CONTAINERD_CONFIG"
# 移除 [plugins."io.containerd.grpc.v1.cri".registry.configs."192.168.16.5:5000".tls] 及其内容
sudo sed -i '/^\[plugins\."io\.containerd\.grpc\.v1\.cri"\.registry\.configs\."192\.168\.16\.5:5000"\.tls\]/,/^insecure_skip_verify = /d' "$CONTAINERD_CONFIG"
# 确保删除所有相关的空行或残留
sudo sed -i '/^\[plugins\."io\.containerd\.grpc\.v1\.cri"\.registry\.mirrors\]/d' "$CONTAINERD_CONFIG"
sudo sed -i '/^\[plugins\."io\.containerd\.grpc\.v1\.cri"\.registry\.configs\]/d' "$CONTAINERD_CONFIG"
echo "--- 5. Restarting containerd service ---"
sudo systemctl daemon-reload
sudo systemctl restart containerd
sudo systemctl status containerd | head -n 10 # 显示服务状态前10行检查是否有错误
echo "=== containerd registry configuration with config_path completed. ==="
echo "Please check 'sudo systemctl status containerd' for any errors."
echo "You should no longer see DEPRECATION warnings related to 'mirrors' and 'configs'."
# 配置 crictl
log_info "配置 crictl..."
cat <<EOF | sudo tee /etc/crictl.yaml
runtime-endpoint: unix:///run/containerd/containerd.sock
image-endpoint: unix:///run/containerd/containerd.sock
EOF
log_info "crictl 配置完成。"
# ==============================================================================
# 4. 安装 CNI 插件
# ==============================================================================
log_info "安装 CNI 插件..."
CNI_PLUGINS_TAR_GZ=$(find "${OFFLINE_ASSETS_DIR}/bin" -name "cni-plugins-*.tgz")
if [ -z "$CNI_PLUGINS_TAR_GZ" ]; then
log_error "未找到 CNI 插件压缩包。"
fi
sudo mkdir -p /opt/cni/bin
sudo tar Cxzvf /opt/cni/bin "$CNI_PLUGINS_TAR_GZ"
log_info "CNI 插件安装完成。"
# ==============================================================================
# 5. 安装 Kubernetes Binaries (kubelet, kubeadm, kubectl)
# ==============================================================================
log_info "安装 Kubernetes Binaries..."
sudo cp "${OFFLINE_ASSETS_DIR}/bin/kubelet" /usr/local/bin/kubelet
sudo cp "${OFFLINE_ASSETS_DIR}/bin/kubeadm" /usr/local/bin/kubeadm
sudo cp "${OFFLINE_ASSETS_DIR}/bin/kubectl" /usr/local/bin/kubectl
sudo cp "${OFFLINE_ASSETS_DIR}/bin/helm" /usr/local/bin/helm # Helm CLI
sudo chmod +x /usr/local/bin/{kubeadm,kubelet,kubectl,helm}
# 配置 kubelet systemd 服务 (从模板生成)
log_info "配置 kubelet systemd 服务..."
# kubelet.service.j2 模板内容将直接在这里写入或通过 cat heredoc 方式
cat <<'EOF' | sudo tee /etc/systemd/system/kubelet.service
[Unit]
Description=kubelet: The Kubernetes Node Agent
Documentation=https://kubernetes.io/docs/
After=containerd.service
Wants=containerd.service
[Service]
ExecStart=/usr/local/bin/kubelet
Restart=always
StartLimitInterval=0
RestartSec=10
[Install]
WantedBy=multi-user.target
EOF
sudo mkdir -p /etc/systemd/system/kubelet.service.d
cat <<'EOF' | sudo tee /etc/systemd/system/kubelet.service.d/10-kubeadm.conf
[Service]
Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"
Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
EnvironmentFile=-/etc/default/kubelet
ExecStart=
ExecStart=/usr/local/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_EXTRA_ARGS
EOF
sudo systemctl daemon-reload
sudo systemctl enable kubelet
# 此时不启动 kubelet它需要配置才能正常启动
log_info "Kubernetes Binaries 安装完成kubelet 服务已启用但未启动。"
# ==============================================================================
# 6. 启动本地镜像仓库 (仅在控制节点192.168.16.5)
# ==============================================================================
log_info "启动本地镜像仓库 ${LOCAL_REGISTRY_ADDR} ..."
# 加载 registry 镜像
cd "${OFFLINE_ASSETS_DIR}/images"
REGISTRY_TAR=$(find . -name "registry_2.tar")
if [ -z "$REGISTRY_TAR" ]; then
log_error "未找到本地镜像仓库 registry:2 的 tar 包。"
fi
sudo docker load -i "$REGISTRY_TAR"
# 启动 registry 容器
sudo docker run -d -p "${LOCAL_REGISTRY_PORT}:5000" --restart=always --name registry registry:2
log_info "本地镜像仓库已在 ${LOCAL_REGISTRY_ADDR} 启动。"
cd - > /dev/null
# ==============================================================================
# 7. 导入并推送到本地镜像仓库 (使用 Docker CLI因为目标是 Docker Registry)
# ==============================================================================
log_info "导入并推送到本地镜像仓库 (使用 Docker CLI)..."
IMAGE_TAR_FILES=$(find "${OFFLINE_ASSETS_DIR}/images" -name "*.tar")
echo "### Cleaning up local Docker Registry and containerd storage ###"
# 1. 清理本地 Docker Registry (停止并删除容器及数据卷)
echo " Stopping and removing local Docker Registry container: ${LOCAL_REGISTRY_ADDR}"
# 假设 Registry 容器名为 'registry'
sudo docker stop registry || true # 停止容器,如果不存在则忽略错误
sudo docker rm -v registry || true # 删除容器及其匿名数据卷,如果不存在则忽略错误
# 如果你的 Registry 使用了具名数据卷 (named volume),需要额外删除它
# 例如sudo docker volume rm my-registry-volume || true
echo " Restarting a fresh local Docker Registry container."
# 重新启动一个干净的 Registry 容器
# 确保你的 Registry 容器名是 'registry',如果不是请修改
sudo docker run -d -p 5000:5000 --restart=always --name registry registry:2
# 稍等片刻,确保 Registry 完全启动
sleep 5
echo " Local Docker Registry is ready."
# 2. 清理 containerd 本地存储中的所有镜像
echo " Cleaning up existing images from containerd local storage..."
# 获取所有非 <none> 且不带 LOCAL_REGISTRY_ADDR 前缀的镜像,以及带 LOCAL_REGISTRY_ADDR 前缀的镜像
# 这里的目的是删除所有可能由之前操作留下的镜像,包括原始的和打了本地标签的。
# 排除掉正在被 kubelet 启动的 pause 镜像等,以防万一
ctr -n "$NAMESPACE" images ls | awk 'NR>1 {print $1}' | while read -r image_ref; do
# 避免删除 LOCAL_REGISTRY_ADDR 容器本身(如果它也被导入了)
if [[ "$image_ref" == "${LOCAL_REGISTRY_ADDR}/registry:2" || "$image_ref" == "docker.io/library/registry:2" ]]; then
echo " Skipping deletion of registry image: $image_ref"
continue
fi
if [[ "$image_ref" == "<none>" ]]; then
continue # 跳过 <none> 镜像,它们通常是悬空层
fi
echo " Deleting containerd image: $image_ref"
ctr -n "$NAMESPACE" images rm "$image_ref" || true # || true 避免因为镜像正在被使用而中断
done
echo "### Finished cleaning up local environment. ###"
IMAGE_DIR=$OFFLINE_ASSETS_DIR/images
echo "=== Importing images from $IMAGE_DIR to local registry $LOCAL_REGISTRY_ADDR ==="
for tarfile in "$IMAGE_DIR"/*.tar; do
[ -e "$tarfile" ] || continue
echo ""
echo ">>> Processing $tarfile"
# 1⃣ 获取导入前的镜像列表
IMAGES_BEFORE=$(mktemp)
# ctr images ls 的第一列就是 REF (镜像名称),使用 awk 提取
if ! ctr -n "$NAMESPACE" images ls | awk 'NR>1 {print $1}' | sort > "$IMAGES_BEFORE"; then
echo "❌ Failed to get images list before import."
continue
fi
# Debug:
# echo "Images BEFORE import for $tarfile:"
# cat "$IMAGES_BEFORE"
# 2⃣ 导入镜像
if ! ctr -n "$NAMESPACE" images import "$tarfile"; then
echo "❌ Failed to import image from $tarfile."
rm -f "$IMAGES_BEFORE" # 清理临时文件
continue
fi
# 3⃣ 获取导入后的镜像列表
IMAGES_AFTER=$(mktemp)
if ! ctr -n "$NAMESPACE" images ls | awk 'NR>1 {print $1}' | sort > "$IMAGES_AFTER"; then
echo "❌ Failed to get images list after import."
rm -f "$IMAGES_BEFORE" # 清理临时文件
continue
fi
# Debug:
# echo "Images AFTER import for $tarfile:"
# cat "$IMAGES_AFTER"
# echo "Raw difference (comm -13):"
# comm -13 "$IMAGES_BEFORE" "$IMAGES_AFTER"
# 4⃣ 找出新增的镜像 (即原始镜像)。排除掉带有本地Registry前缀的镜像本身。
# 过滤条件:排除本地 registry 已存在的镜像,以及 <none> 引用。
# 因为导入的 tarfile 可能会包含多个 tag我们只取第一个符合条件的
ORIGIN_IMG=$(comm -13 "$IMAGES_BEFORE" "$IMAGES_AFTER" | grep -vE "${LOCAL_REGISTRY_ADDR}|<none>" | head -n1)
rm -f "$IMAGES_BEFORE" "$IMAGES_AFTER" # 清理临时文件
if [[ -z "$ORIGIN_IMG" ]]; then
echo "❌ Failed to detect original image name, skipping..."
continue
fi
echo "Original image: $ORIGIN_IMG"
NEW_IMG=""
if [[ "$ORIGIN_IMG" == "registry.k8s.io/"* ]]; then
if [[ "$ORIGIN_IMG" == "registry.k8s.io/coredns/"* ]]; then
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG#registry.k8s.io/coredns/}"
else
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG#registry.k8s.io/}"
fi
elif [[ "$ORIGIN_IMG" == "ghcr.io/"* ]]; then
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG#ghcr.io/}"
elif [[ "$ORIGIN_IMG" == "quay.io/"* ]]; then
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG#quay.io/}"
elif [[ "$ORIGIN_IMG" == "nvcr.io/"* ]]; then
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG#nvcr.io/}"
elif [[ "$ORIGIN_IMG" == "docker.io/"* ]]; then
if [[ "$ORIGIN_IMG" == "docker.io/library/"* ]]; then
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG#docker.io/library/}"
else
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG#docker.io/}"
fi
else
echo "Warning: Unknown original registry prefix for $ORIGIN_IMG. Directly prepending LOCAL_REGISTRY_ADDR."
NEW_IMG="${LOCAL_REGISTRY_ADDR}/${ORIGIN_IMG}"
fi
echo "Retag as: $NEW_IMG"
# 4⃣ 打 tag
ctr -n "$NAMESPACE" images tag "$ORIGIN_IMG" "$NEW_IMG"
# 5⃣ 推送到本地 registry
ctr -n "$NAMESPACE" images push --plain-http "$NEW_IMG"
echo "tarfile=$tarfile ORIGIN_IMG=$ORIGIN_IMG NEW_IMG=$NEW_IMG"
echo "✅ Done: $NEW_IMG"
done
log_info "所有镜像已导入并推送到本地镜像仓库。"
cd - > /dev/null
# ==============================================================================
# 8. 初始化 Kubernetes 控制平面
# ==============================================================================
log_info "初始化 Kubernetes 控制平面..."
# 生成 kubeadm 配置
cat <<EOF | sudo tee ${TEMP_DIR}/kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta3
kind: ClusterConfiguration
kubernetesVersion: ${K8S_VERSION}
controlPlaneEndpoint: "${K8S_APISERVER_ADVERTISE_ADDRESS}:6443"
imageRepository: "${LOCAL_REGISTRY_ADDR}" # 指定从本地仓库拉取镜像
networking:
podSubnet: ${POD_CIDR}
serviceSubnet: ${SERVICE_CIDR}
---
apiVersion: kubeadm.k8s.io/v1beta3
kind: InitConfiguration
nodeRegistration:
criSocket: "unix:///run/containerd/containerd.sock"
name: $(hostname)
kubeletExtraArgs:
node-ip: "${K8S_APISERVER_ADVERTISE_ADDRESS}"
EOF
log_info "kubeadm-config.yaml 已生成。"
cat ${TEMP_DIR}/kubeadm-config.yaml
# 运行 kubeadm init
# --upload-certs: 上传证书到集群以便工作节点获取
# --config: 指定配置
# --ignore-preflight-errors=all: 忽略所有预检错误,但通常不建议在生产环境使用。
# 更好的做法是逐一排查并解决预检错误。
sudo kubeadm init --config=${TEMP_DIR}/kubeadm-config.yaml --upload-certs --ignore-preflight-errors=all
if [ $? -ne 0 ]; then
log_error "kubeadm init 失败。"
fi
log_info "Kubernetes 控制平面初始化完成。"
# 配置 kubectl
log_info "配置 kubectl 访问集群..."
mkdir -p "$HOME/.kube"
sudo cp /etc/kubernetes/admin.conf "$HOME/.kube/config"
sudo chown $(id -u):$(id -g) "$HOME/.kube/config"
export KUBECONFIG=$HOME/.kube/config # 确保当前会话可用
log_info "kubectl 配置完成。"
log_info "查看集群状态:"
kubectl get nodes
# ==============================================================================
# 9. 安装 CNI 网络插件 (Calico)
# ==============================================================================
log_info "安装 CNI 网络插件 (Calico)..."
CALICO_MANIFEST="${OFFLINE_ASSETS_DIR}/manifests/calico.yaml"
cp "${CALICO_MANIFEST}" "${TEMP_DIR}/calico.yaml"
# 替换镜像地址
# 注意:这里我们替换的是原始镜像地址,确保 kubeadm config images list 能够找到它们
sudo sed -i "s|docker.io/calico/cni:${CALICO_VERSION}|${LOCAL_REGISTRY_ADDR}/docker.io/calico/cni:${CALICO_VERSION}|g" "${TEMP_DIR}/calico.yaml"
sudo sed -i "s|docker.io/calico/node:${CALICO_VERSION}|${LOCAL_REGISTRY_ADDR}/docker.io/calico/node:${CALICO_VERSION}|g" "${TEMP_DIR}/calico.yaml"
sudo sed -i "s|docker.io/calico/kube-controllers:${CALICO_VERSION}|${LOCAL_REGISTRY_ADDR}/docker.io/calico/kube-controllers:${CALICO_VERSION}|g" "${TEMP_DIR}/calico.yaml"
# 设置 Pod CIDR
# 假设 CALICO_IPV4POOL_CIDR 及其注释行在 YAML 中是连续的
sudo sed -i "s|# - name: CALICO_IPV4POOL_CIDR|- name: CALICO_IPV4POOL_CIDR|g" "${TEMP_DIR}/calico.yaml"
sudo sed -i "s|# value: \"192.168.0.0/16\"| value: \"${POD_CIDR}\"|g" "${TEMP_DIR}/calico.yaml"
kubectl apply -f "${TEMP_DIR}/calico.yaml"
log_info "Calico 网络插件安装完成。"
log_info "等待 Calico Pod 启动..."
kubectl wait --for=condition=ready pod -l k8s-app=calico-node -n kube-system --timeout=300s || log_error "Calico Pod 未能在指定时间内启动。"
# ==============================================================================
# 10. 安装 Multus CNI (用于 KubeVirt 虚拟机多网卡)
# ==============================================================================
log_info "安装 Multus CNI 插件..."
MULTUS_MANIFEST="${OFFLINE_ASSETS_DIR}/manifests/multus-daemonset.yaml"
cp "${MULTUS_MANIFEST}" "${TEMP_DIR}/multus-daemonset.yaml"
# 替换 Multus 镜像地址
sudo sed -i "s|ghcr.io/k8snetworkplumbingwg/multus-cni:v4.0.2|${LOCAL_REGISTRY_ADDR}/ghcr.io/k8snetworkplumbingwg/multus-cni:v4.0.2|g" "${TEMP_DIR}/multus-daemonset.yaml"
kubectl apply -f "${TEMP_DIR}/multus-daemonset.yaml"
log_info "Multus CNI 插件安装完成。"
log_info "等待 Multus Pod 启动..."
kubectl wait --for=condition=ready pod -l name=multus -n kube-system --timeout=300s || log_error "Multus Pod 未能在指定时间内启动。"
# ==============================================================================
# 11. 安装 KubeVirt
# ==============================================================================
log_info "安装 KubeVirt..."
KUBEVIRT_OPERATOR_MANIFEST="${OFFLINE_ASSETS_DIR}/manifests/kubevirt-operator.yaml"
KUBEVIRT_CR_MANIFEST="${OFFLINE_ASSETS_DIR}/manifests/kubevirt-cr.yaml"
cp "${KUBEVIRT_OPERATOR_MANIFEST}" "${TEMP_DIR}/kubevirt-operator.yaml"
cp "${KUBEVIRT_CR_MANIFEST}" "${TEMP_DIR}/kubevirt-cr.yaml"
# 替换 KubeVirt Operator 镜像地址
sudo sed -i "s|quay.io/kubevirt/virt-operator:${KUBEVIRT_VERSION}|${LOCAL_REGISTRY_ADDR}/quay.io/kubevirt/virt-operator:${KUBEVIRT_VERSION}|g" "${TEMP_DIR}/kubevirt-operator.yaml"
kubectl apply -f "${TEMP_DIR}/kubevirt-operator.yaml"
log_info "KubeVirt Operator 安装完成。"
log_info "等待 KubeVirt Operator Pod 启动..."
kubectl wait --for=condition=ready pod -l kubevirt.io=virt-operator -n kubevirt --timeout=300s || log_error "KubeVirt Operator Pod 未能在指定时间内启动。"
# 部署 KubeVirt CR (Custom Resource)
kubectl apply -f "${TEMP_DIR}/kubevirt-cr.yaml"
log_info "KubeVirt CR 部署完成。"
log_info "等待 KubeVirt 组件 Pod 启动..."
# KubeVirt 有多个组件,简单起见,等待所有带有 kubevirt.io 标签的 Pod
kubectl wait --for=condition=ready pod -l kubevirt.io -n kubevirt --timeout=600s || log_error "KubeVirt 组件 Pod 未能在指定时间内启动。"
log_info "KubeVirt 安装完成。"
# ==============================================================================
# 12. 安装 NFS Subdir External Provisioner (NFS 动态存储)
# ==============================================================================
log_info "安装 NFS Subdir External Provisioner..."
# 12.1 准备 NFS 挂载点
log_info "在控制节点创建 NFS 挂载点..."
NFS_MOUNT_POINT="/mnt/nfs_provisioner"
sudo mkdir -p "${NFS_MOUNT_POINT}"
# 检查是否已挂载,防止重复挂载
if ! mountpoint -q "${NFS_MOUNT_POINT}"; then
sudo mount -t nfs "${NFS_SERVER}:${NFS_PATH}" "${NFS_MOUNT_POINT}"
if [ $? -ne 0 ]; then
log_error "NFS 共享 ${NFS_SERVER}:${NFS_PATH} 挂载失败请检查NFS服务器配置和网络连接。"
fi
log_info "NFS 共享已挂载到 ${NFS_MOUNT_POINT}。"
else
log_info "NFS 共享已挂载。"
fi
# 12.2 修改 Helm Chart指向本地镜像仓库
log_info "准备 NFS Provisioner Helm Chart..."
NFS_CHART_FILE="${OFFLINE_ASSETS_DIR}/charts/nfs-subdir-external-provisioner-${NFS_CHART_VERSION}.tgz"
# 解压 Chart 到临时目录
cd "${TEMP_DIR}"
tar -xzvf "${NFS_CHART_FILE}"
NFS_CHART_DIR="nfs-subdir-external-provisioner" # 解压后的目录名
# 修改 values.yaml 替换镜像地址
sudo sed -i "s|repository: registry.k8s.io/sig-storage/nfs-subdir-external-provisioner|repository: ${LOCAL_REGISTRY_ADDR}/registry.k8s.io/sig-storage/nfs-subdir-external-provisioner|g" "${NFS_CHART_DIR}/values.yaml"
sudo sed -i "s|tag: v${NFS_PROVISIONER_VERSION}|tag: v${NFS_PROVISIONER_VERSION}|g" "${NFS_CHART_DIR}/values.yaml" # 确保tag正确
# 12.3 使用 Helm 安装 Provisioner
log_info "使用 Helm 安装 NFS Provisioner..."
helm upgrade --install nfs-client-provisioner "${NFS_CHART_DIR}" \
--namespace nfs-client-provisioner --create-namespace \
--set nfs.server="${NFS_SERVER}" \
--set nfs.path="${NFS_PATH}" \
--set storageClass.name="${NFS_STORAGE_CLASS_NAME}" \
--set storageClass.reclaimPolicy=Retain \
--set storageClass.defaultClass=true # 设置为默认StorageClass
log_info "NFS Subdir External Provisioner 安装完成。"
log_info "等待 NFS Provisioner Pod 启动..."
kubectl wait --for=condition
echo "等待10分钟"
sleep 600
echo "安装 CNI 插件"
# 安装 CNI 插件
# 备份原始文件
cp "$CALICO_YAML_PATH" "${CALICO_YAML_PATH}.bak-calico-images"
echo "Replacing Calico image registries in $CALICO_YAML_PATH for version $CALICO_VERSION..."
# 替换 calico/node 镜像
# 从 "image: docker.io/calico/node:v3.26.1" 替换为 "image: 192.168.16.5:5000/calico/node:v3.26.1"
sudo sed -i "s|image: docker.io/calico/node:${CALICO_VERSION}|image: ${LOCAL_REGISTRY_ADDR}/calico/node:${CALICO_VERSION}|g" "$CALICO_YAML_PATH"
# 替换 calico/kube-controllers 镜像
sudo sed -i "s|image: docker.io/calico/kube-controllers:${CALICO_VERSION}|image: ${LOCAL_REGISTRY_ADDR}/calico/kube-controllers:${CALICO_VERSION}|g" "$CALICO_YAML_PATH"
# 替换 calico/cni 镜像
sudo sed -i "s|image: docker.io/calico/cni:${CALICO_VERSION}|image: ${LOCAL_REGISTRY_ADDR}/calico/cni:${CALICO_VERSION}|g" "$CALICO_YAML_PATH"
echo "Calico image registry replacement complete."
# 再次检查,确认所有 image 路径都已替换
echo "Verifying image paths in $CALICO_YAML_PATH:"
grep "image:" "$CALICO_YAML_PATH" | grep "calico"
# 在 calico.yaml 文件末尾添加 IPPool 资源
echo -e "\n---\napiVersion: crd.projectcalico.org/v1\nkind: IPPool\nmetadata:\n name: default-pool-ipv4\nspec:\n cidr: $POD_CIDR\n natOutgoing: true\n disabled: false\n ipipMode: Always" | sudo tee -a "$CALICO_YAML_PATH"
# 启动CNI插件
kubectl apply -f "$CALICO_YAML_PATH"