修补问题
This commit is contained in:
parent
7d32077aa4
commit
d69c5336e6
@ -13,7 +13,7 @@ from . import ssh_utils,k8s_utils_public
|
|||||||
from appPublic.log import debug
|
from appPublic.log import debug
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
def delete_cluster_node(params):
|
async def delete_cluster_node(params):
|
||||||
"""
|
"""
|
||||||
删除集群节点
|
删除集群节点
|
||||||
--namespace 或 -n:指定节点所在的命名空间。不过,节点是集群级别的资源,不隶属于特定的命名空间,所以此参数一般不用于删除节点。
|
--namespace 或 -n:指定节点所在的命名空间。不过,节点是集群级别的资源,不隶属于特定的命名空间,所以此参数一般不用于删除节点。
|
||||||
@ -32,7 +32,7 @@ def delete_cluster_node(params):
|
|||||||
"""
|
"""
|
||||||
return "delete_cluster_node ok"
|
return "delete_cluster_node ok"
|
||||||
|
|
||||||
def node_state_switch(params):
|
async def node_state_switch(params):
|
||||||
"""
|
"""
|
||||||
恢复节点:
|
恢复节点:
|
||||||
kubectl uncordon 命令将节点标记为可调度状态,这样调度器就会重新考虑将新的 Pod 分配到该节点上
|
kubectl uncordon 命令将节点标记为可调度状态,这样调度器就会重新考虑将新的 Pod 分配到该节点上
|
||||||
@ -45,7 +45,7 @@ def node_state_switch(params):
|
|||||||
"""
|
"""
|
||||||
return "node_state_switch ok"
|
return "node_state_switch ok"
|
||||||
|
|
||||||
def yaml_apply_delete(params):
|
async def yaml_apply_delete(params):
|
||||||
"""
|
"""
|
||||||
1. 通过cpcc传递过来的参数进行级联初始化资源实例;
|
1. 通过cpcc传递过来的参数进行级联初始化资源实例;
|
||||||
2. 通过cpcc传递过来的参数进行级联更新资源实例;
|
2. 通过cpcc传递过来的参数进行级联更新资源实例;
|
||||||
@ -60,7 +60,7 @@ def yaml_apply_delete(params):
|
|||||||
elif instance_type == "LinuxOS":
|
elif instance_type == "LinuxOS":
|
||||||
k8s_utils_linuxos_ubuntu.handle_k8s_operations(params)
|
k8s_utils_linuxos_ubuntu.handle_k8s_operations(params)
|
||||||
|
|
||||||
def node_label_opt(params):
|
async def node_label_opt(params):
|
||||||
"""
|
"""
|
||||||
要设置节点 worker-node-1 上的标签 app,可以使用以下命令:
|
要设置节点 worker-node-1 上的标签 app,可以使用以下命令:
|
||||||
kubectl label nodes worker-node-1 app=app,注意标签键和值之间有一个等号 (=),表示设置该标签。
|
kubectl label nodes worker-node-1 app=app,注意标签键和值之间有一个等号 (=),表示设置该标签。
|
||||||
@ -106,7 +106,7 @@ def node_label_opt(params):
|
|||||||
else:
|
else:
|
||||||
raise f"{worker_node} 解绑标签 {label} 失败,请检查集群节点状态或标签是否已绑定?"
|
raise f"{worker_node} 解绑标签 {label} 失败,请检查集群节点状态或标签是否已绑定?"
|
||||||
|
|
||||||
def unset_node_label(params):
|
async def unset_node_label(params):
|
||||||
"""
|
"""
|
||||||
要取消节点 worker-node-1 上的标签 app,可以使用以下命令:
|
要取消节点 worker-node-1 上的标签 app,可以使用以下命令:
|
||||||
kubectl label nodes worker-node-1 app-,注意标签键后面有一个短横线 (-),表示取消该标签。
|
kubectl label nodes worker-node-1 app-,注意标签键后面有一个短横线 (-),表示取消该标签。
|
||||||
@ -121,7 +121,7 @@ def unset_node_label(params):
|
|||||||
label = params.get("label")
|
label = params.get("label")
|
||||||
|
|
||||||
|
|
||||||
def get_cluster_nodes_by_server(params):
|
async def get_cluster_nodes_by_server(params):
|
||||||
host = params.get("host")
|
host = params.get("host")
|
||||||
port = int(params.get("port"))
|
port = int(params.get("port"))
|
||||||
username = params.get("user")
|
username = params.get("user")
|
||||||
@ -137,7 +137,7 @@ def get_cluster_nodes_by_server(params):
|
|||||||
# debug(f'集群 {host=} 所有节点信息如下{results=} => 转换后:\n{parse_k8s_nodes_result=}')
|
# debug(f'集群 {host=} 所有节点信息如下{results=} => 转换后:\n{parse_k8s_nodes_result=}')
|
||||||
return parse_k8s_nodes_result
|
return parse_k8s_nodes_result
|
||||||
|
|
||||||
def get_cluster_pods_by_kubeconfig(params):
|
async def get_cluster_pods_by_kubeconfig(params):
|
||||||
"""
|
"""
|
||||||
通过调用方传递来的kubeconfig信息
|
通过调用方传递来的kubeconfig信息
|
||||||
获取集群中所有资源实例(Pod)信息详情
|
获取集群中所有资源实例(Pod)信息详情
|
||||||
@ -145,7 +145,7 @@ def get_cluster_pods_by_kubeconfig(params):
|
|||||||
kubeconfig = params.get("kubeconfig")
|
kubeconfig = params.get("kubeconfig")
|
||||||
return k8s_utils_public.get_pod_info(kubeconfig)
|
return k8s_utils_public.get_pod_info(kubeconfig)
|
||||||
|
|
||||||
def determine_accommodat_by_kubeconfig(params):
|
async def determine_accommodat_by_kubeconfig(params):
|
||||||
"""
|
"""
|
||||||
通过调用方传递来的kubeconfig信息
|
通过调用方传递来的kubeconfig信息
|
||||||
判断集群中可部署哪些部件组合n
|
判断集群中可部署哪些部件组合n
|
||||||
@ -157,7 +157,7 @@ def determine_accommodat_by_kubeconfig(params):
|
|||||||
# debug(f'=====kubeconfig: {kubeconfig}, resources: {resources}')
|
# debug(f'=====kubeconfig: {kubeconfig}, resources: {resources}')
|
||||||
return k8s_utils_public.determine_accommodat(kubeconfig, resources)
|
return k8s_utils_public.determine_accommodat(kubeconfig, resources)
|
||||||
|
|
||||||
def get_cluster_nodes_by_kubeconfig(params):
|
async def get_cluster_nodes_by_kubeconfig(params):
|
||||||
"""
|
"""
|
||||||
通过调用方传递来的kubeconfig信息
|
通过调用方传递来的kubeconfig信息
|
||||||
获取集群中所有节点信息详情
|
获取集群中所有节点信息详情
|
||||||
@ -165,7 +165,7 @@ def get_cluster_nodes_by_kubeconfig(params):
|
|||||||
kubeconfig = params.get("kubeconfig")
|
kubeconfig = params.get("kubeconfig")
|
||||||
return k8s_utils_public.get_node_info(kubeconfig)
|
return k8s_utils_public.get_node_info(kubeconfig)
|
||||||
|
|
||||||
def get_cluster_pods_by_server(params):
|
async def get_cluster_pods_by_server(params):
|
||||||
host = params.get("host")
|
host = params.get("host")
|
||||||
port = int(params.get("port"))
|
port = int(params.get("port"))
|
||||||
username = params.get("user")
|
username = params.get("user")
|
||||||
@ -182,7 +182,7 @@ def get_cluster_pods_by_server(params):
|
|||||||
# debug(f'集群 {host=} 所有Pod信息如下{results=} => 转换后:\n{parse_k8s_pods_result=}')
|
# debug(f'集群 {host=} 所有Pod信息如下{results=} => 转换后:\n{parse_k8s_pods_result=}')
|
||||||
return parse_k8s_pods_result
|
return parse_k8s_pods_result
|
||||||
|
|
||||||
def new_cluster_install(params):
|
async def new_cluster_install(params):
|
||||||
# 随后填充远程操控k8s主逻辑
|
# 随后填充远程操控k8s主逻辑
|
||||||
"""
|
"""
|
||||||
用于接收cpcc端传递过来的k8s安装指令参数, 进行远程sshx调用操作内网机器进行集群节点的安装
|
用于接收cpcc端传递过来的k8s安装指令参数, 进行远程sshx调用操作内网机器进行集群节点的安装
|
||||||
@ -206,6 +206,7 @@ def new_cluster_install(params):
|
|||||||
"files/storage_class.yaml":"/opt/storage_class.yaml",
|
"files/storage_class.yaml":"/opt/storage_class.yaml",
|
||||||
# "files/nfs-provisioner-deploy.yaml":"/opt/nfs-provisioner-deploy.yaml",
|
# "files/nfs-provisioner-deploy.yaml":"/opt/nfs-provisioner-deploy.yaml",
|
||||||
"files/nfs-rbac.yaml": "/opt/nfs-rbac.yaml",
|
"files/nfs-rbac.yaml": "/opt/nfs-rbac.yaml",
|
||||||
|
"files/config.toml": "/opt/config.toml",
|
||||||
"files/nvidia-device-plugin.yml": "/opt/nvidia-device-plugin.yml",
|
"files/nvidia-device-plugin.yml": "/opt/nvidia-device-plugin.yml",
|
||||||
"files/libnvidia-container-tools_1.17.8-1_amd64.deb": "/opt/libnvidia-container-tools_1.17.8-1_amd64.deb",
|
"files/libnvidia-container-tools_1.17.8-1_amd64.deb": "/opt/libnvidia-container-tools_1.17.8-1_amd64.deb",
|
||||||
"files/libnvidia-container1_1.17.8-1_amd64.deb": "/opt/libnvidia-container1_1.17.8-1_amd64.deb",
|
"files/libnvidia-container1_1.17.8-1_amd64.deb": "/opt/libnvidia-container1_1.17.8-1_amd64.deb",
|
||||||
@ -275,7 +276,7 @@ def new_cluster_install(params):
|
|||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def get_multiple_cluster_pod():
|
async def get_multiple_cluster_pod():
|
||||||
"""
|
"""
|
||||||
获取 kubeconfig 中所有集群的 Pod 信息(JSON 格式)
|
获取 kubeconfig 中所有集群的 Pod 信息(JSON 格式)
|
||||||
|
|
||||||
@ -326,7 +327,7 @@ def get_multiple_cluster_pod():
|
|||||||
return all_clusters_pods
|
return all_clusters_pods
|
||||||
|
|
||||||
|
|
||||||
def get_multiple_cluster():
|
async def get_multiple_cluster():
|
||||||
"""
|
"""
|
||||||
获取所有集群的完整信息,包括用户证书、RBAC状态、服务账号颁发者等。
|
获取所有集群的完整信息,包括用户证书、RBAC状态、服务账号颁发者等。
|
||||||
|
|
||||||
@ -445,7 +446,7 @@ def get_multiple_cluster():
|
|||||||
}, indent=4)
|
}, indent=4)
|
||||||
|
|
||||||
|
|
||||||
def process_kubeconfigs():
|
async def process_kubeconfigs():
|
||||||
"""
|
"""
|
||||||
检测当前目录下的 kubestage 文件夹中的 kubeconfig 格式文件,
|
检测当前目录下的 kubestage 文件夹中的 kubeconfig 格式文件,
|
||||||
计算每个文件的大写 MD5 值,将其改名成对应的 MD5 值,
|
计算每个文件的大写 MD5 值,将其改名成对应的 MD5 值,
|
||||||
|
|||||||
24
app/pcapi.py
24
app/pcapi.py
@ -70,18 +70,18 @@ def init_func():
|
|||||||
# g.delete_ldap_user=delete_ldap_user
|
# g.delete_ldap_user=delete_ldap_user
|
||||||
|
|
||||||
### k8s多集群相关
|
### k8s多集群相关
|
||||||
g.new_cluster_install = awaitify(new_cluster_install)
|
g.new_cluster_install = new_cluster_install
|
||||||
g.get_multiple_cluster = awaitify(get_multiple_cluster)
|
g.get_multiple_cluster = get_multiple_cluster
|
||||||
g.get_multiple_cluster_pod = awaitify(get_multiple_cluster_pod)
|
g.get_multiple_cluster_pod = get_multiple_cluster_pod
|
||||||
g.get_cluster_nodes_by_server = awaitify(get_cluster_nodes_by_server)
|
g.get_cluster_nodes_by_server = get_cluster_nodes_by_server
|
||||||
g.get_cluster_pods_by_server = awaitify(get_cluster_pods_by_server)
|
g.get_cluster_pods_by_server = get_cluster_pods_by_server
|
||||||
g.delete_cluster_node = awaitify(delete_cluster_node)
|
g.delete_cluster_node = delete_cluster_node
|
||||||
g.node_state_switch = awaitify(node_state_switch)
|
g.node_state_switch = node_state_switch
|
||||||
g.yaml_apply_delete = awaitify(yaml_apply_delete)
|
g.yaml_apply_delete = yaml_apply_delete
|
||||||
g.get_cluster_nodes_by_kubeconfig = awaitify(get_cluster_nodes_by_kubeconfig)
|
g.get_cluster_nodes_by_kubeconfig = get_cluster_nodes_by_kubeconfig
|
||||||
g.determine_accommodat_by_kubeconfig = awaitify(determine_accommodat_by_kubeconfig)
|
g.determine_accommodat_by_kubeconfig = determine_accommodat_by_kubeconfig
|
||||||
g.get_cluster_pods_by_kubeconfig = awaitify(get_cluster_pods_by_kubeconfig)
|
g.get_cluster_pods_by_kubeconfig = get_cluster_pods_by_kubeconfig
|
||||||
g.node_label_opt = awaitify(node_label_opt)
|
g.node_label_opt = node_label_opt
|
||||||
|
|
||||||
g.get_storage_json=get_storage_json
|
g.get_storage_json=get_storage_json
|
||||||
g.result_dict={
|
g.result_dict={
|
||||||
|
|||||||
297
files/config.toml
Normal file
297
files/config.toml
Normal file
@ -0,0 +1,297 @@
|
|||||||
|
disabled_plugins = []
|
||||||
|
imports = []
|
||||||
|
oom_score = 0
|
||||||
|
plugin_dir = ""
|
||||||
|
required_plugins = []
|
||||||
|
root = "/var/lib/containerd"
|
||||||
|
state = "/run/containerd"
|
||||||
|
temp = ""
|
||||||
|
version = 2
|
||||||
|
|
||||||
|
[cgroup]
|
||||||
|
path = ""
|
||||||
|
|
||||||
|
[debug]
|
||||||
|
address = ""
|
||||||
|
format = ""
|
||||||
|
gid = 0
|
||||||
|
level = ""
|
||||||
|
uid = 0
|
||||||
|
|
||||||
|
[grpc]
|
||||||
|
address = "/run/containerd/containerd.sock"
|
||||||
|
gid = 0
|
||||||
|
max_recv_message_size = 16777216
|
||||||
|
max_send_message_size = 16777216
|
||||||
|
tcp_address = ""
|
||||||
|
tcp_tls_ca = ""
|
||||||
|
tcp_tls_cert = ""
|
||||||
|
tcp_tls_key = ""
|
||||||
|
uid = 0
|
||||||
|
|
||||||
|
[metrics]
|
||||||
|
address = ""
|
||||||
|
grpc_histogram = false
|
||||||
|
|
||||||
|
[plugins]
|
||||||
|
|
||||||
|
[plugins."io.containerd.gc.v1.scheduler"]
|
||||||
|
deletion_threshold = 0
|
||||||
|
mutation_threshold = 100
|
||||||
|
pause_threshold = 0.02
|
||||||
|
schedule_delay = "0s"
|
||||||
|
startup_delay = "100ms"
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri"]
|
||||||
|
cdi_spec_dirs = ["/etc/cdi", "/var/run/cdi"]
|
||||||
|
device_ownership_from_security_context = false
|
||||||
|
disable_apparmor = false
|
||||||
|
disable_cgroup = false
|
||||||
|
disable_hugetlb_controller = true
|
||||||
|
disable_proc_mount = false
|
||||||
|
disable_tcp_service = true
|
||||||
|
drain_exec_sync_io_timeout = "0s"
|
||||||
|
enable_cdi = false
|
||||||
|
enable_selinux = false
|
||||||
|
enable_tls_streaming = false
|
||||||
|
enable_unprivileged_icmp = false
|
||||||
|
enable_unprivileged_ports = false
|
||||||
|
ignore_deprecation_warnings = []
|
||||||
|
ignore_image_defined_volumes = false
|
||||||
|
image_pull_progress_timeout = "5m0s"
|
||||||
|
image_pull_with_sync_fs = false
|
||||||
|
max_concurrent_downloads = 3
|
||||||
|
max_container_log_line_size = 16384
|
||||||
|
netns_mounts_under_state_dir = false
|
||||||
|
restrict_oom_score_adj = false
|
||||||
|
sandbox_image = "registry.aliyuncs.com/google_containers/pause:3.9"
|
||||||
|
selinux_category_range = 1024
|
||||||
|
stats_collect_period = 10
|
||||||
|
stream_idle_timeout = "4h0m0s"
|
||||||
|
stream_server_address = "127.0.0.1"
|
||||||
|
stream_server_port = "0"
|
||||||
|
systemd_cgroup = false
|
||||||
|
tolerate_missing_hugetlb_controller = true
|
||||||
|
unset_seccomp_profile = ""
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".cni]
|
||||||
|
bin_dir = "/opt/cni/bin"
|
||||||
|
conf_dir = "/etc/cni/net.d"
|
||||||
|
conf_template = ""
|
||||||
|
ip_pref = ""
|
||||||
|
max_conf_num = 1
|
||||||
|
setup_serially = false
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".containerd]
|
||||||
|
default_runtime_name = "nvidia"
|
||||||
|
disable_snapshot_annotations = true
|
||||||
|
discard_unpacked_layers = false
|
||||||
|
ignore_blockio_not_enabled_errors = false
|
||||||
|
ignore_rdt_not_enabled_errors = false
|
||||||
|
no_pivot = false
|
||||||
|
snapshotter = "overlayfs"
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".containerd.default_runtime]
|
||||||
|
base_runtime_spec = ""
|
||||||
|
cni_conf_dir = ""
|
||||||
|
cni_max_conf_num = 0
|
||||||
|
container_annotations = []
|
||||||
|
pod_annotations = []
|
||||||
|
privileged_without_host_devices = false
|
||||||
|
privileged_without_host_devices_all_devices_allowed = false
|
||||||
|
runtime_engine = ""
|
||||||
|
runtime_path = ""
|
||||||
|
runtime_root = ""
|
||||||
|
runtime_type = ""
|
||||||
|
sandbox_mode = ""
|
||||||
|
snapshotter = ""
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".containerd.default_runtime.options]
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
|
||||||
|
runtime_type = "io.containerd.runc.v2"
|
||||||
|
privileged_without_host_devices = false
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
|
||||||
|
BinaryName = "/usr/bin/nvidia-container-runtime"
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
|
||||||
|
base_runtime_spec = ""
|
||||||
|
cni_conf_dir = ""
|
||||||
|
cni_max_conf_num = 0
|
||||||
|
container_annotations = []
|
||||||
|
pod_annotations = []
|
||||||
|
privileged_without_host_devices = false
|
||||||
|
privileged_without_host_devices_all_devices_allowed = false
|
||||||
|
runtime_engine = ""
|
||||||
|
runtime_path = ""
|
||||||
|
runtime_root = ""
|
||||||
|
runtime_type = "io.containerd.runc.v2"
|
||||||
|
sandbox_mode = "podsandbox"
|
||||||
|
snapshotter = ""
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
|
||||||
|
BinaryName = ""
|
||||||
|
CriuImagePath = ""
|
||||||
|
CriuPath = ""
|
||||||
|
CriuWorkPath = ""
|
||||||
|
IoGid = 0
|
||||||
|
IoUid = 0
|
||||||
|
NoNewKeyring = false
|
||||||
|
NoPivotRoot = false
|
||||||
|
Root = ""
|
||||||
|
ShimCgroup = ""
|
||||||
|
SystemdCgroup = true
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime]
|
||||||
|
base_runtime_spec = ""
|
||||||
|
cni_conf_dir = ""
|
||||||
|
cni_max_conf_num = 0
|
||||||
|
container_annotations = []
|
||||||
|
pod_annotations = []
|
||||||
|
privileged_without_host_devices = false
|
||||||
|
privileged_without_host_devices_all_devices_allowed = false
|
||||||
|
runtime_engine = ""
|
||||||
|
runtime_path = ""
|
||||||
|
runtime_root = ""
|
||||||
|
runtime_type = ""
|
||||||
|
sandbox_mode = ""
|
||||||
|
snapshotter = ""
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime.options]
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".image_decryption]
|
||||||
|
key_model = "node"
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".registry]
|
||||||
|
config_path = ""
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".registry.auths]
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".registry.configs]
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".registry.headers]
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".registry.mirrors]
|
||||||
|
|
||||||
|
[plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming]
|
||||||
|
tls_cert_file = ""
|
||||||
|
tls_key_file = ""
|
||||||
|
|
||||||
|
[plugins."io.containerd.internal.v1.opt"]
|
||||||
|
path = "/opt/containerd"
|
||||||
|
|
||||||
|
[plugins."io.containerd.internal.v1.restart"]
|
||||||
|
interval = "10s"
|
||||||
|
|
||||||
|
[plugins."io.containerd.internal.v1.tracing"]
|
||||||
|
|
||||||
|
[plugins."io.containerd.metadata.v1.bolt"]
|
||||||
|
content_sharing_policy = "shared"
|
||||||
|
|
||||||
|
[plugins."io.containerd.monitor.v1.cgroups"]
|
||||||
|
no_prometheus = false
|
||||||
|
|
||||||
|
[plugins."io.containerd.nri.v1.nri"]
|
||||||
|
disable = true
|
||||||
|
disable_connections = false
|
||||||
|
plugin_config_path = "/etc/nri/conf.d"
|
||||||
|
plugin_path = "/opt/nri/plugins"
|
||||||
|
plugin_registration_timeout = "5s"
|
||||||
|
plugin_request_timeout = "2s"
|
||||||
|
socket_path = "/var/run/nri/nri.sock"
|
||||||
|
|
||||||
|
[plugins."io.containerd.runtime.v1.linux"]
|
||||||
|
no_shim = false
|
||||||
|
runtime = "runc"
|
||||||
|
runtime_root = ""
|
||||||
|
shim = "containerd-shim"
|
||||||
|
shim_debug = false
|
||||||
|
|
||||||
|
[plugins."io.containerd.runtime.v2.task"]
|
||||||
|
platforms = ["linux/amd64"]
|
||||||
|
sched_core = false
|
||||||
|
|
||||||
|
[plugins."io.containerd.service.v1.diff-service"]
|
||||||
|
default = ["walking"]
|
||||||
|
|
||||||
|
[plugins."io.containerd.service.v1.tasks-service"]
|
||||||
|
blockio_config_file = ""
|
||||||
|
rdt_config_file = ""
|
||||||
|
|
||||||
|
[plugins."io.containerd.snapshotter.v1.aufs"]
|
||||||
|
root_path = ""
|
||||||
|
|
||||||
|
[plugins."io.containerd.snapshotter.v1.blockfile"]
|
||||||
|
fs_type = ""
|
||||||
|
mount_options = []
|
||||||
|
root_path = ""
|
||||||
|
scratch_file = ""
|
||||||
|
|
||||||
|
[plugins."io.containerd.snapshotter.v1.btrfs"]
|
||||||
|
root_path = ""
|
||||||
|
|
||||||
|
[plugins."io.containerd.snapshotter.v1.devmapper"]
|
||||||
|
async_remove = false
|
||||||
|
base_image_size = ""
|
||||||
|
discard_blocks = false
|
||||||
|
fs_options = ""
|
||||||
|
fs_type = ""
|
||||||
|
pool_name = ""
|
||||||
|
root_path = ""
|
||||||
|
|
||||||
|
[plugins."io.containerd.snapshotter.v1.native"]
|
||||||
|
root_path = ""
|
||||||
|
|
||||||
|
[plugins."io.containerd.snapshotter.v1.overlayfs"]
|
||||||
|
mount_options = []
|
||||||
|
root_path = ""
|
||||||
|
sync_remove = false
|
||||||
|
upperdir_label = false
|
||||||
|
|
||||||
|
[plugins."io.containerd.snapshotter.v1.zfs"]
|
||||||
|
root_path = ""
|
||||||
|
|
||||||
|
[plugins."io.containerd.tracing.processor.v1.otlp"]
|
||||||
|
|
||||||
|
[plugins."io.containerd.transfer.v1.local"]
|
||||||
|
config_path = ""
|
||||||
|
max_concurrent_downloads = 3
|
||||||
|
max_concurrent_uploaded_layers = 3
|
||||||
|
|
||||||
|
[[plugins."io.containerd.transfer.v1.local".unpack_config]]
|
||||||
|
differ = ""
|
||||||
|
platform = "linux/amd64"
|
||||||
|
snapshotter = "overlayfs"
|
||||||
|
|
||||||
|
[proxy_plugins]
|
||||||
|
|
||||||
|
[stream_processors]
|
||||||
|
|
||||||
|
[stream_processors."io.containerd.ocicrypt.decoder.v1.tar"]
|
||||||
|
accepts = ["application/vnd.oci.image.layer.v1.tar+encrypted"]
|
||||||
|
args = ["--decryption-keys-path", "/etc/containerd/ocicrypt/keys"]
|
||||||
|
env = ["OCICRYPT_KEYPROVIDER_CONFIG=/etc/containerd/ocicrypt/ocicrypt_keyprovider.conf"]
|
||||||
|
path = "ctd-decoder"
|
||||||
|
returns = "application/vnd.oci.image.layer.v1.tar"
|
||||||
|
|
||||||
|
[stream_processors."io.containerd.ocicrypt.decoder.v1.tar.gzip"]
|
||||||
|
accepts = ["application/vnd.oci.image.layer.v1.tar+gzip+encrypted"]
|
||||||
|
args = ["--decryption-keys-path", "/etc/containerd/ocicrypt/keys"]
|
||||||
|
env = ["OCICRYPT_KEYPROVIDER_CONFIG=/etc/containerd/ocicrypt/ocicrypt_keyprovider.conf"]
|
||||||
|
path = "ctd-decoder"
|
||||||
|
returns = "application/vnd.oci.image.layer.v1.tar+gzip"
|
||||||
|
|
||||||
|
[timeouts]
|
||||||
|
"io.containerd.timeout.bolt.open" = "0s"
|
||||||
|
"io.containerd.timeout.metrics.shimstats" = "2s"
|
||||||
|
"io.containerd.timeout.shim.cleanup" = "5s"
|
||||||
|
"io.containerd.timeout.shim.load" = "5s"
|
||||||
|
"io.containerd.timeout.shim.shutdown" = "3s"
|
||||||
|
"io.containerd.timeout.task.state" = "2s"
|
||||||
|
|
||||||
|
[ttrpc]
|
||||||
|
address = ""
|
||||||
|
gid = 0
|
||||||
|
uid = 0
|
||||||
@ -315,36 +315,45 @@ if lspci | grep -i nvidia > /dev/null 2>&1; then
|
|||||||
log_info "检测到NVIDIA GPU,开始配置nvidia-container-runtime..."
|
log_info "检测到NVIDIA GPU,开始配置nvidia-container-runtime..."
|
||||||
|
|
||||||
# 检查 .deb 文件是否存在
|
# 检查 .deb 文件是否存在
|
||||||
if [ ! "$(ls /opt/*.deb 2>/dev/null | wc -l)" -ge 1 ]; then
|
DEB_FILES=(/opt/*_amd64.deb)
|
||||||
|
if [ ! -e "${DEB_FILES[0]}" ]; then
|
||||||
log_error "/opt/ 下没有 .deb 文件"
|
log_error "/opt/ 下没有 .deb 文件"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# 安装 .deb 包
|
# 安装 .deb 包
|
||||||
for deb in /opt/*_amd64.deb; do
|
for deb in "${DEB_FILES[@]}"; do
|
||||||
dpkg -i "$deb" || log_error "安装 $deb 失败"
|
dpkg -i "$deb" || {
|
||||||
|
log_error "安装 $deb 失败"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
done
|
done
|
||||||
|
|
||||||
# 配置 containerd
|
# 配置 containerd
|
||||||
CONTAINERD_CONFIG="/etc/containerd/config.toml"
|
CONTAINERD_CONFIG="/etc/containerd/config.toml"
|
||||||
log_info "正在更新 $CONTAINERD_CONFIG 配置..."
|
log_info "正在更新 $CONTAINERD_CONFIG 配置..."
|
||||||
|
|
||||||
# 1. 添加 nvidia 运行时配置(插入到 runtimes 块内部)
|
# 1. 添加 nvidia 运行时配置到 runtimes 块内部
|
||||||
if ! grep -qF '[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]' "$CONTAINERD_CONFIG"; then
|
# 添加 nvidia runtime 配置到 runtimes 块下
|
||||||
# 在 runtimes 块下插入 nvidia 配置(保持格式缩进)
|
NVIDIA_SECTION='plugins\."io\.containerd\.grpc\.v1\.cri"\.containerd\.runtimes\.nvidia'
|
||||||
sed -i '/\[plugins."io.containerd.grpc.v1.cri".containerd.runtimes\]/a \
|
# if ! grep -qF "[${NVIDIA_SECTION}]" "$CONTAINERD_CONFIG"; then
|
||||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
|
# sudo sed -i '/^
|
||||||
privileged_without_host_devices = false
|
# $$
|
||||||
runtime_type = "io.containerd.runc.v2"
|
# plugins\."io\.containerd\.grpc\.v1\.cri"\.containerd\.runtimes
|
||||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
|
# $$
|
||||||
BinaryName = "/usr/bin/nvidia-container-runtime"
|
# $/a \
|
||||||
' "$CONTAINERD_CONFIG"
|
# [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]\n\
|
||||||
fi
|
# privileged_without_host_devices = false\n\
|
||||||
|
# runtime_type = "io.containerd.runc.v2"\n\
|
||||||
|
# [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]\n\
|
||||||
|
# BinaryName = "/usr/bin/nvidia-container-runtime"' /etc/containerd/config.toml
|
||||||
|
# fi
|
||||||
|
|
||||||
# 2. 修改默认运行时为 nvidia(正确匹配配置项)
|
# # 2. 修改默认运行时为 nvidia
|
||||||
if ! grep -qF 'default_runtime_name = "nvidia"' "$CONTAINERD_CONFIG"; then
|
# if ! grep -qF 'default_runtime_name = "nvidia"' "$CONTAINERD_CONFIG"; then
|
||||||
sed -i '/default_runtime_name = "runc"/s/"runc"/"nvidia"/' "$CONTAINERD_CONFIG"
|
# sudo sed -i 's/default_runtime_name = "runc"/default_runtime_name = "nvidia"/' "$CONTAINERD_CONFIG"
|
||||||
fi
|
# fi
|
||||||
|
cp -v /opt/config.toml /etc/containerd/config.toml || log_error "直接复制containerd配置文件失败"
|
||||||
|
|
||||||
# 3. 重启 containerd 并检查状态
|
# 3. 重启 containerd 并检查状态
|
||||||
log_info "重启 containerd 服务..."
|
log_info "重启 containerd 服务..."
|
||||||
@ -359,6 +368,7 @@ if lspci | grep -i nvidia > /dev/null 2>&1; then
|
|||||||
log_info "配置 CUDA 环境变量..."
|
log_info "配置 CUDA 环境变量..."
|
||||||
grep -qxF 'export PATH=/usr/local/cuda/bin:$PATH' ~/.bashrc || echo 'export PATH=/usr/local/cuda/bin:$PATH' >> ~/.bashrc
|
grep -qxF 'export PATH=/usr/local/cuda/bin:$PATH' ~/.bashrc || echo 'export PATH=/usr/local/cuda/bin:$PATH' >> ~/.bashrc
|
||||||
grep -qxF 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' ~/.bashrc || echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc
|
grep -qxF 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' ~/.bashrc || echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc
|
||||||
|
|
||||||
# 应用环境变量(非交互式shell提示手动执行)
|
# 应用环境变量(非交互式shell提示手动执行)
|
||||||
if [[ "$-" == *i* ]]; then
|
if [[ "$-" == *i* ]]; then
|
||||||
source ~/.bashrc
|
source ~/.bashrc
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user