async def get_available_resources(ns={}): # 创建 API 实例 v1 = client.CoreV1Api() # 获取所有节点 nodes = v1.list_node() # 获取所有 Pod pods = v1.list_pod_for_all_namespaces() # 存储节点资源信息 node_resources = {} total_allocatable = { 'cpu': 0, 'memory': 0, 'gpu': 0, 'storage': 0 } total_used = { 'cpu': 0, 'memory': 0, 'gpu': 0, 'storage': 0 } for node in nodes.items: name = node.metadata.name allocatable = node.status.allocatable node_resources[name] = { 'cpu_allocatable': int(allocatable.get('cpu', '0').rstrip('m')) / 1000 if 'm' in allocatable.get('cpu', '0') else int(allocatable.get('cpu', '0')), 'memory_allocatable': int(allocatable.get('memory', '0').rstrip('Ki')) / 1024 / 1024, 'gpu_allocatable': int(allocatable.get('nvidia.com/gpu', '0')), 'storage_allocatable': int(allocatable.get('ephemeral-storage', '0').rstrip('Ki')) / 1024 / 1024 } # 累加总可分配资源 total_allocatable['cpu'] += node_resources[name]['cpu_allocatable'] total_allocatable['memory'] += node_resources[name]['memory_allocatable'] total_allocatable['gpu'] += node_resources[name]['gpu_allocatable'] total_allocatable['storage'] += node_resources[name]['storage_allocatable'] # 初始化已分配 node_resources[name].update({ 'cpu_used': 0, 'memory_used': 0, 'gpu_used': 0, 'storage_used': 0 }) # 遍历所有 Pod,统计每个节点的已分配资源 for pod in pods.items: if pod.spec.node_name: # 确保 Pod 已被调度到节点 node_name = pod.spec.node_name for container in pod.spec.containers: reque = container.resources.requests or {} node_resources[node_name]['cpu_used'] += float(reque.get('cpu', '0').rstrip('m')) / 1000 if 'm' in reque.get('cpu', '0') else float(reque.get('cpu', '0')) node_resources[node_name]['memory_used'] += int(reque.get('memory', '0').rstrip('Mi')) if 'Mi' in reque.get('memory', '0') else int(reque.get('memory', '0').rstrip('Gi')) * 1024 node_resources[node_name]['gpu_used'] += int(reque.get('nvidia.com/gpu', '0')) node_resources[node_name]['storage_used'] += int(reque.get('ephemeral-storage', '0').rstrip('Mi')) if 'Mi' in reque.get('ephemeral-storage', '0') else 0 # 计算总已使用资源 for node_name, resources in node_resources.items(): total_used['cpu'] += resources['cpu_used'] total_used['memory'] += resources['memory_used'] total_used['gpu'] += resources['gpu_used'] total_used['storage'] += resources['storage_used'] # 计算节点的资源占用情况 print(f"Node: {node_name}") print(f" CPU Remaining: {resources['cpu_allocatable'] - resources['cpu_used']} cores") print(f" Memory Remaining: {resources['memory_allocatable'] - resources['memory_used']} Mi") print(f" GPU Remaining: {resources['gpu_allocatable'] - resources['gpu_used']} GPUs") print(f" Storage Remaining: {resources['storage_allocatable'] - resources['storage_used']} Mi") print() # 计算总剩余资源和使用百分比 total_remaining = {key: total_allocatable[key] - total_used[key] for key in total_allocatable} usage_percentage = {key: (total_used[key] / total_allocatable[key] * 100 if total_allocatable[key] > 0 else 0) for key in total_allocatable} # 输出总资源和使用情况 print("Cluster Resource Summary:") print(f" Total Allocatable CPU: {total_allocatable['cpu']} cores") print(f" Total Allocatable Memory: {total_allocatable['memory']} Mi") print(f" Total Allocatable GPU: {total_allocatable['gpu']} GPUs") print(f" Total Allocatable Storage: {total_allocatable['storage']} Mi") print() print(f" CPU Usage Percentage: {usage_percentage['cpu']:.2f}%") print(f" Memory Usage Percentage: {usage_percentage['memory']:.2f}%") print(f" GPU Usage Percentage: {usage_percentage['gpu']:.2f}%") print(f" Storage Usage Percentage: {usage_percentage['storage']:.2f}%") print() # 返回数据 return { "total_allocatable": total_allocatable, "total_used": total_used, "total_remaining": total_remaining, "usage_percentage": usage_percentage } ret = await get_available_resources(params_kw) return ret