写一个批量获取主机CPU、GPU温度的Python脚本

写一个批量获取主机CPU、GPU温度的Python脚本

微信搜索 zze_coding 或扫描 👉 二维码关注我的微信公众号获取更多资源推送:

因公司最近的需求所以就有了这个脚本,附上内容如下:

注:使用前需确保运行该脚本的主机已安装了 fabricgenvent 库。

#!/usr/bin/python2
# -*- coding: utf-8 -*-
from invoke import Responder
from fabric import Connection
from gevent import monkey

import gevent

monkey.patch_all()


class CPUTemp:
    '''
    CPU 温度
    '''

    def __init__(self, connection):
        self.connection = connection
        self.host = self.connection.host

    def load_kernel_module(self):
        '''
        加载内核模块
        :return: 是否成功
        '''
        file_count = self.connection.sudo('ls /dev/ipmi0* | wc -l', hide=True).stdout
        if file_count == 0:
            return self.connection.sudo('modprobe ipmi_devintf ipmi_si', hide=True).ok

        return True

    def install_tools(self):
        '''
        安装工具包
        :return: 是否成功
        '''
        installed_result = self.connection.sudo("dpkg -l | egrep '(lm-sensors|ipmitool)' | wc -l", hide=True)
        if int(installed_result.stdout) < 2:
            self.connection.sudo('apt install lm-sensors ipmitool -y', hide=True)

        return True

    def sensors_detect(self):
        '''
        扫描感应器
        :return: 是否成功
        '''
        watcher1 = Responder(
            pattern=r'\(YES/no\):|\(yes/NO\)',
            response='yes\n')

        watcher2 = Responder(
            pattern=r'Just press ENTER to continue: ',
            response='\n'
        )

        watcher3 = Responder(
            pattern=r'\(YES/no/selectively\): ',
            response='yes\n'
        )

        watcher4 = Responder(
            pattern=r'\(yes/NO/selectively\):',
            response='yes\n'
        )

        res = self.connection.sudo('sensors-detect', pty=True, watchers=[watcher1, watcher2, watcher3, watcher4],
                                   hide=True)

        return res.ok

    def get_sensors(self):
        '''
        获取 CPU 温度列表
        :return: 列表
        '''
        ret = self.connection.sudo("sudo sensors | egrep 'Physical|Package' | awk -F' ' '{print $4}'", hide=True)
        if ret.ok:
            import re
            str = ret.stdout
            return [float(item) for item in re.findall(r"\d+\.?\d*", str)]
        return []

    @property
    def max_temp(self):
        '''
        获取当前 CPU 的最高温度
        :return: 浮点数,温度
        '''
        max_temp = 0
        if self.load_kernel_module() and self.install_tools():
            temp_list = self.get_sensors()
            # 获取最高温度
            if temp_list:
                max_temp = max(temp_list)
            else:
                # 扫描感应器
                if self.sensors_detect():
                    max_temp = max(self.get_sensors())

        return max_temp


class GPUTemp:
    '''
    GPU 温度
    '''
    def __init__(self, connection):
        self.connection = connection
        self.host = self.connection.host

    @property
    def max_temp(self):
        '''
        获取当前 GPU 最高温度
        :return: 整数,温度
        '''
        file_exists = self.connection.sudo('which nvidia-smi | wc -l', hide=True).stdout
        if int(file_exists):
            ret = self.connection.sudo(
                "nvidia-smi -q -d TEMPERATURE | grep 'GPU Current Temp' | awk -F':|C' '{print $3}'", hide=True)
            if ret.stdout.strip():
                return max([int(temp) for temp in ret.stdout.strip().split('\n')])
        return 0


def print_temp(host, port, user, pwd):
    '''
    打印指定主机中 CPU 的最高温度
    :param host: 主机名/IP
    :param port: SSH 端口
    :param user: 用户名
    :param pwd: 密码
    :return:
    '''
    c = Connection(host=host, port=port, user=user, connect_kwargs={
        "password": pwd,
    }, )
    temp_cpu_obj = CPUTemp(c)
    temp_graphic_obj = GPUTemp(c)
    print '{} 最高 CPU 温度: {}'.format(temp_cpu_obj.host, temp_cpu_obj.max_temp)
    print '{} 最高 GPU 温度: {}'.format(temp_graphic_obj.host, temp_graphic_obj.max_temp)


if __name__ == '__main__':
    host_list = (
        '192.168.0.11',
        '192.168.0.12',
    )
    port = 22
    user = 'zze'
    pwd = '123'
    # [print_temp(host, port, user, pwd) for host in host_list]
    # 协程并发
    gevent.joinall([gevent.spawn(print_temp, host, port, user, pwd) for host in host_list], timeout=20)

Copyright: 采用 知识共享署名4.0 国际许可协议进行许可

Links: https://www.zze.xyz/archives/python-cpu-gpu-temp.html

Buy me a cup of coffee ☕.