成功获取信息之前,需要nvidia-smi成功的获取到信息
# nvidia-smi
Tue Sep 10 14:57:24 2024
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 555.58.02 Driver Version: 555.58.02 CUDA Version: 12.5 |
|-----------------------------------------+------------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+========================+======================|
| 0 NVIDIA GeForce RTX 3060 Off | 00000000:01:00.0 Off | N/A |
| 53% 70C P0 168W / 170W | 311MiB / 12288MiB | 100% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
| 1 NVIDIA GeForce RTX 3060 Off | 00000000:02:00.0 Off | N/A |
| 57% 73C P0 169W / 170W | 311MiB / 12288MiB | 100% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
| 2 NVIDIA GeForce RTX 3060 Off | 00000000:03:00.0 Off | N/A |
| 59% 74C P0 166W / 170W | 311MiB / 12288MiB | 100% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
| 3 NVIDIA GeForce RTX 3060 Off | 00000000:81:00.0 Off | N/A |
| 56% 72C P0 167W / 170W | 311MiB / 12288MiB | 100% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
| 4 NVIDIA GeForce RTX 3060 Off | 00000000:82:00.0 Off | N/A |
| 58% 73C P0 166W / 170W | 311MiB / 12288MiB | 100% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
| 5 NVIDIA GeForce RTX 3060 Off | 00000000:83:00.0 Off | N/A |
| 60% 74C P0 168W / 170W | 311MiB / 12288MiB | 100% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=========================================================================================|
| 0 N/A N/A 1970 C /root/AiModel/Distributed_training 302MiB |
| 1 N/A N/A 1970 C /root/AiModel/Distributed_training 302MiB |
| 2 N/A N/A 1970 C /root/AiModel/Distributed_training 302MiB |
| 3 N/A N/A 1970 C /root/AiModel/Distributed_training 302MiB |
| 4 N/A N/A 1970 C /root/AiModel/Distributed_training 302MiB |
| 5 N/A N/A 1970 C /root/AiModel/Distributed_training 302MiB |
+-----------------------------------------------------------------------------------------+
gpu.sh
#!/bin/bash
# 获取 GPU 数量
gpu_count=$(nvidia-smi -a | grep 'Attached GPUs' | awk '{ print $4 }')
# 循环遍历所有 GPU
for (( i=0; i<gpu_count; i++ )); do
echo "GPU $i:"
nvidia-smi -a -i $i > /tmp/gpus$i.detail
# 提取每个 GPU 的最大和当前时钟频率
max_g=$(cat /tmp/gpus$i.detail | grep -A 4 " Max Clocks" | grep -i "Graphics" | grep -iEow "[0-9].*" | sed 's/\ MHz//g')
curr_g=$(cat /tmp/gpus$i.detail | grep -A 4 " Clocks" | grep -i "Graphics" | grep -iEow "[0-9].*" | sed 's/\ MHz//g')
max_sm=$(cat /tmp/gpus$i.detail | grep -A 4 " Max Clocks" | grep -i "SM" | grep -iEow "[0-9].*" | sed 's/\ MHz//g')
curr_sm=$(cat /tmp/gpus$i.detail | grep -A 4 " Clocks" | grep -i "SM" | grep -iEow "[0-9].*" | sed 's/\ MHz//g')
max_mem=$(cat /tmp/gpus$i.detail | grep -A 4 " Max Clocks" | grep -i "Memory" | grep -iEow "[0-9].*" | sed 's/\ MHz//g')
curr_mem=$(cat /tmp/gpus$i.detail | grep -A 4 " Clocks" | grep -i "Memory" | grep -iEow "[0-9].*" | sed 's/\ MHz//g')
max_video=$(cat /tmp/gpus$i.detail | grep -A 4 " Max Clocks" | grep -i "Video" | grep -iEow "[0-9].*" | sed 's/\ MHz//g')
curr_video=$(cat /tmp/gpus$i.detail | grep -A 4 " Clocks" | grep -i "Video" | grep -iEow "[0-9].*" | sed 's/\ MHz//g')
# 打印 GPU 信息
echo -e "图形频率: ${curr_g} / ${max_g} MHz\nSM频率: ${curr_sm} / ${max_sm} MHz\n显存频率: ${curr_mem} / ${max_mem} MHz\n视频频率: ${curr_video} / ${max_video} MHz"
# 删除临时文件
rm /tmp/gpus$i.detail
done