pcapi/wwwroot/api/v1/get_available_resources.dspy
2025-07-16 14:46:24 +08:00

103 lines
4.5 KiB
Plaintext

async def get_available_resources(ns={}):
# 创建 API 实例
v1 = client.CoreV1Api()
# 获取所有节点
nodes = v1.list_node()
# 获取所有 Pod
pods = v1.list_pod_for_all_namespaces()
# 存储节点资源信息
node_resources = {}
total_allocatable = {
'cpu': 0,
'memory': 0,
'gpu': 0,
'storage': 0
}
total_used = {
'cpu': 0,
'memory': 0,
'gpu': 0,
'storage': 0
}
for node in nodes.items:
name = node.metadata.name
allocatable = node.status.allocatable
node_resources[name] = {
'cpu_allocatable': int(allocatable.get('cpu', '0').rstrip('m')) / 1000 if 'm' in allocatable.get('cpu', '0') else int(allocatable.get('cpu', '0')),
'memory_allocatable': int(allocatable.get('memory', '0').rstrip('Ki')) / 1024 / 1024,
'gpu_allocatable': int(allocatable.get('nvidia.com/gpu', '0')),
'storage_allocatable': int(allocatable.get('ephemeral-storage', '0').rstrip('Ki')) / 1024 / 1024
}
# 累加总可分配资源
total_allocatable['cpu'] += node_resources[name]['cpu_allocatable']
total_allocatable['memory'] += node_resources[name]['memory_allocatable']
total_allocatable['gpu'] += node_resources[name]['gpu_allocatable']
total_allocatable['storage'] += node_resources[name]['storage_allocatable']
# 初始化已分配
node_resources[name].update({
'cpu_used': 0,
'memory_used': 0,
'gpu_used': 0,
'storage_used': 0
})
# 遍历所有 Pod,统计每个节点的已分配资源
for pod in pods.items:
if pod.spec.node_name: # 确保 Pod 已被调度到节点
node_name = pod.spec.node_name
for container in pod.spec.containers:
reque = container.resources.requests or {}
node_resources[node_name]['cpu_used'] += float(reque.get('cpu', '0').rstrip('m')) / 1000 if 'm' in reque.get('cpu', '0') else float(reque.get('cpu', '0'))
node_resources[node_name]['memory_used'] += int(reque.get('memory', '0').rstrip('Mi')) if 'Mi' in reque.get('memory', '0') else int(reque.get('memory', '0').rstrip('Gi')) * 1024
node_resources[node_name]['gpu_used'] += int(reque.get('nvidia.com/gpu', '0'))
node_resources[node_name]['storage_used'] += int(reque.get('ephemeral-storage', '0').rstrip('Mi')) if 'Mi' in reque.get('ephemeral-storage', '0') else 0
# 计算总已使用资源
for node_name, resources in node_resources.items():
total_used['cpu'] += resources['cpu_used']
total_used['memory'] += resources['memory_used']
total_used['gpu'] += resources['gpu_used']
total_used['storage'] += resources['storage_used']
# 计算节点的资源占用情况
print(f"Node: {node_name}")
print(f" CPU Remaining: {resources['cpu_allocatable'] - resources['cpu_used']} cores")
print(f" Memory Remaining: {resources['memory_allocatable'] - resources['memory_used']} Mi")
print(f" GPU Remaining: {resources['gpu_allocatable'] - resources['gpu_used']} GPUs")
print(f" Storage Remaining: {resources['storage_allocatable'] - resources['storage_used']} Mi")
print()
# 计算总剩余资源和使用百分比
total_remaining = {key: total_allocatable[key] - total_used[key] for key in total_allocatable}
usage_percentage = {key: (total_used[key] / total_allocatable[key] * 100 if total_allocatable[key] > 0 else 0) for key in total_allocatable}
# 输出总资源和使用情况
print("Cluster Resource Summary:")
print(f" Total Allocatable CPU: {total_allocatable['cpu']} cores")
print(f" Total Allocatable Memory: {total_allocatable['memory']} Mi")
print(f" Total Allocatable GPU: {total_allocatable['gpu']} GPUs")
print(f" Total Allocatable Storage: {total_allocatable['storage']} Mi")
print()
print(f" CPU Usage Percentage: {usage_percentage['cpu']:.2f}%")
print(f" Memory Usage Percentage: {usage_percentage['memory']:.2f}%")
print(f" GPU Usage Percentage: {usage_percentage['gpu']:.2f}%")
print(f" Storage Usage Percentage: {usage_percentage['storage']:.2f}%")
print()
# 返回数据
return {
"total_allocatable": total_allocatable,
"total_used": total_used,
"total_remaining": total_remaining,
"usage_percentage": usage_percentage
}
ret = await get_available_resources(params_kw)
return ret