103 lines
4.5 KiB
Plaintext
103 lines
4.5 KiB
Plaintext
async def get_available_resources(ns={}):
|
||
|
||
# 创建 API 实例
|
||
v1 = client.CoreV1Api()
|
||
|
||
# 获取所有节点
|
||
nodes = v1.list_node()
|
||
# 获取所有 Pod
|
||
pods = v1.list_pod_for_all_namespaces()
|
||
|
||
# 存储节点资源信息
|
||
node_resources = {}
|
||
|
||
total_allocatable = {
|
||
'cpu': 0,
|
||
'memory': 0,
|
||
'gpu': 0,
|
||
'storage': 0
|
||
}
|
||
total_used = {
|
||
'cpu': 0,
|
||
'memory': 0,
|
||
'gpu': 0,
|
||
'storage': 0
|
||
}
|
||
|
||
for node in nodes.items:
|
||
name = node.metadata.name
|
||
allocatable = node.status.allocatable
|
||
node_resources[name] = {
|
||
'cpu_allocatable': int(allocatable.get('cpu', '0').rstrip('m')) / 1000 if 'm' in allocatable.get('cpu', '0') else int(allocatable.get('cpu', '0')),
|
||
'memory_allocatable': int(allocatable.get('memory', '0').rstrip('Ki')) / 1024 / 1024,
|
||
'gpu_allocatable': int(allocatable.get('nvidia.com/gpu', '0')),
|
||
'storage_allocatable': int(allocatable.get('ephemeral-storage', '0').rstrip('Ki')) / 1024 / 1024
|
||
}
|
||
# 累加总可分配资源
|
||
total_allocatable['cpu'] += node_resources[name]['cpu_allocatable']
|
||
total_allocatable['memory'] += node_resources[name]['memory_allocatable']
|
||
total_allocatable['gpu'] += node_resources[name]['gpu_allocatable']
|
||
total_allocatable['storage'] += node_resources[name]['storage_allocatable']
|
||
|
||
# 初始化已分配
|
||
node_resources[name].update({
|
||
'cpu_used': 0,
|
||
'memory_used': 0,
|
||
'gpu_used': 0,
|
||
'storage_used': 0
|
||
})
|
||
|
||
# 遍历所有 Pod,统计每个节点的已分配资源
|
||
for pod in pods.items:
|
||
if pod.spec.node_name: # 确保 Pod 已被调度到节点
|
||
node_name = pod.spec.node_name
|
||
for container in pod.spec.containers:
|
||
reque = container.resources.requests or {}
|
||
node_resources[node_name]['cpu_used'] += float(reque.get('cpu', '0').rstrip('m')) / 1000 if 'm' in reque.get('cpu', '0') else float(reque.get('cpu', '0'))
|
||
node_resources[node_name]['memory_used'] += int(reque.get('memory', '0').rstrip('Mi')) if 'Mi' in reque.get('memory', '0') else int(reque.get('memory', '0').rstrip('Gi')) * 1024
|
||
node_resources[node_name]['gpu_used'] += int(reque.get('nvidia.com/gpu', '0'))
|
||
node_resources[node_name]['storage_used'] += int(reque.get('ephemeral-storage', '0').rstrip('Mi')) if 'Mi' in reque.get('ephemeral-storage', '0') else 0
|
||
|
||
# 计算总已使用资源
|
||
for node_name, resources in node_resources.items():
|
||
total_used['cpu'] += resources['cpu_used']
|
||
total_used['memory'] += resources['memory_used']
|
||
total_used['gpu'] += resources['gpu_used']
|
||
total_used['storage'] += resources['storage_used']
|
||
|
||
# 计算节点的资源占用情况
|
||
print(f"Node: {node_name}")
|
||
print(f" CPU Remaining: {resources['cpu_allocatable'] - resources['cpu_used']} cores")
|
||
print(f" Memory Remaining: {resources['memory_allocatable'] - resources['memory_used']} Mi")
|
||
print(f" GPU Remaining: {resources['gpu_allocatable'] - resources['gpu_used']} GPUs")
|
||
print(f" Storage Remaining: {resources['storage_allocatable'] - resources['storage_used']} Mi")
|
||
print()
|
||
|
||
# 计算总剩余资源和使用百分比
|
||
total_remaining = {key: total_allocatable[key] - total_used[key] for key in total_allocatable}
|
||
usage_percentage = {key: (total_used[key] / total_allocatable[key] * 100 if total_allocatable[key] > 0 else 0) for key in total_allocatable}
|
||
|
||
# 输出总资源和使用情况
|
||
print("Cluster Resource Summary:")
|
||
print(f" Total Allocatable CPU: {total_allocatable['cpu']} cores")
|
||
print(f" Total Allocatable Memory: {total_allocatable['memory']} Mi")
|
||
print(f" Total Allocatable GPU: {total_allocatable['gpu']} GPUs")
|
||
print(f" Total Allocatable Storage: {total_allocatable['storage']} Mi")
|
||
print()
|
||
print(f" CPU Usage Percentage: {usage_percentage['cpu']:.2f}%")
|
||
print(f" Memory Usage Percentage: {usage_percentage['memory']:.2f}%")
|
||
print(f" GPU Usage Percentage: {usage_percentage['gpu']:.2f}%")
|
||
print(f" Storage Usage Percentage: {usage_percentage['storage']:.2f}%")
|
||
print()
|
||
|
||
# 返回数据
|
||
return {
|
||
"total_allocatable": total_allocatable,
|
||
"total_used": total_used,
|
||
"total_remaining": total_remaining,
|
||
"usage_percentage": usage_percentage
|
||
}
|
||
|
||
ret = await get_available_resources(params_kw)
|
||
return ret
|
||
|