#!/usr/bin/perl -w # # Wildcard plugin to monitor sensor by using ipmitool sensor program. # # Contributed by Jun Futagawa # This script is based on sensors_ plugin. # # Usage: # ln -s /usr/share/munin/plugins/ipmitool_sensor_ /etc/munin/plugins/ipmitool_sensor_fan # ln -s /usr/share/munin/plugins/ipmitool_sensor_ /etc/munin/plugins/ipmitool_sensor_temp # ln -s /usr/share/munin/plugins/ipmitool_sensor_ /etc/munin/plugins/ipmitool_sensor_volt # # Requirements: # - OpenIPMI tool (ipmitool command) # # Note: # - Sensor names are read from the output of the ipmitool sensor program. # # Add the following to your /etc/munin/plugin-conf.d/munin-node: # # [ipmitool_sensor*] # user root # timeout 20 # # If you want to use "ipmitool sdr", add the following: # Note: When you use this, the threshold provided by the sensor board is not used. # # [ipmitool_sensor*] # user root # timeout 20 # env.ipmitool_options sdr # # Parameters supported: # # config # autoconf # suggest # # Configurable variables # # ipmitool - ipmitool command (default: ipmitool) # ipmitool_options - ipmitool command options (default: sensor) # sdr: you can use 'sdr' instead of sensor. # cache_file - cache file # (default: /var/lib/munin/plugin-state/plugin-ipmitool_sensor.cache) # cache_expires - cache expires (default: 275) # # fan_type_regex - Regular expression for unit of fan (default: RPM) # temp_type_regex - Regular expression for unit of temp (default: degrees C) # volt_type_regex - Regular expression for unit of volt (default: (Volts|Watts|Amps)) # # fan_warn_percent - Percentage over mininum for warning (default: 5) # fan_lower_critical - Preferred lower critical value for fan # fan_upper_critical - Preferred upper critical value for fan # temp_lower_critical - Preferred lower critical value for temp # temp_lower_warning - Preferred lower warining value for temp # temp_upper_warning - Preferred upper warning value for temp # temp_upper_critical - Preferred upper critical value for temp # volt_warn_percent - Percentage over mininum/under maximum for warning # Narrow the voltage bracket by this. (default: 20) # # $Log$ # Revision 1.6 2011/02/07 12:50:00 jfut # Bug fix: Check temp_upper_warning and temp_upper_critical was not working again. # # Revision 1.5 2011/01/28 00:39:00 jfut # Bug fix: Check temp_upper_warning and temp_upper_critical was not working. # # Revision 1.4 2009/02/08 23:51:00 jfut # Support "ipmitool sdr". # Add Watts and Amp as voltage unit. # Add fan_type_regex/temp_type_regex/volt_type_regex as option of sensor type. # # Revision 1.3 2008/11/11 13:55:00 jfut # Add infinity value check for HP ProLiant DL160. # Add preferred value option for fan and temp. # # Revision 1.2 2008/10/28 19:21:22 jfut # Add file check. # # Revision 1.1 2008/10/27 18:52:31 jfut # Add cache mechanism. # # Revision 1.0 2008/10/27 14:25:12 jfut # Initial release. # # Magic markers: #%# family=manual #%# capabilities=autoconf suggest use strict; $ENV{'LANG'} = "C"; # Force parseable output from sensors. $ENV{'LC_ALL'} = "C"; # Force parseable output from sensors. my $IPMITOOL = $ENV{'ipmitool'} || 'ipmitool'; my @IPMITOOL_OPTS = exists $ENV{'ipmitool_options'} ? split(/\s+/, $ENV{'ipmitool_options'}) : ('sensor'); my $CACHE_DIR = "/var/lib/munin/plugin-state"; my $CACHE_FILE = $ENV{'cache_file'} || "$CACHE_DIR/plugin-ipmitool_sensor.cache"; my $CACHE_EXPIRES = $ENV{'cache_expires'} || 275; my %config = ( fan => { regex => exists $ENV{'fan_type_regex'} ? qr/$ENV{'fan_type_regex'}/im : qr/RPM/im, title => 'IPMITool Sensor: Fans', vtitle => 'RPM', print_threshold => \&fan_threshold, graph_args => '--base 1000 -l 0' }, temp => { regex => exists $ENV{'temp_type_regex'} ? qr/$ENV{'temp_type_regex'}/im : qr/degrees C/im, title => 'IPMITool Sensor: Temperatures', vtitle => 'Celsius', print_threshold => \&temp_threshold, graph_args => '--base 1000 -l 0' }, volt => { regex => exists $ENV{'volt_type_regex'} ? qr/$ENV{'volt_type_regex'}/im : qr/(Volts|Watts|Amps)/im, title => 'IPMITool Sensor: Voltages', vtitle => '_AUTO_DETECT_FAILED_', print_threshold => \&volt_threshold, graph_args => '--base 1000' }, ); if (defined $ARGV[0] and $ARGV[0] eq 'autoconf') { close(STDERR); my $ret = system($IPMITOOL); open (STDERR, ">&STDOUT"); if ($ret == 0 || $ret == 256) { print "yes\n"; exit 0; } else { print "no (program $IPMITOOL not found)\n"; } exit 1; } if (defined $ARGV[0] and $ARGV[0] eq 'suggest') { my $text = get_sensor_data(); my $alltext = join('\n', @{$text}); foreach my $func (keys %config) { print $func, "\n" if $alltext =~ $config{$func}->{regex}; } exit; } $0 =~ /ipmitool_sensor_(.+)*$/; my $func = $1; exit 2 unless defined $func; my $text = get_sensor_data(); my $sensor = 1; if (defined $ARGV[0] and $ARGV[0] eq 'config') { # detect the unit of volt if ($func eq 'volt') { foreach my $line (@{$text}) { if ($line =~ /$config{$func}->{regex}/) { my ($label, $value, $unit, $lcr, $lnc, $unc, $ucr) = &get_sensor_items($line, $config{$func}->{regex}); $config{$func}->{vtitle} = $unit; last; } } $text = get_sensor_data(); } # print header print "graph_title $config{$func}->{title}\n"; print "graph_vtitle $config{$func}->{vtitle}\n"; print "graph_args $config{$func}->{graph_args}\n"; print "graph_category sensors\n"; # print data foreach my $line (@{$text}) { if ($line =~ /$config{$func}->{regex}/) { my ($label, $value, $unit, $lcr, $lnc, $unc, $ucr) = &get_sensor_items($line, $config{$func}->{regex}); if (&is_valid_value($value)) { print "$func$sensor.label $label\n"; $config{$func}->{print_threshold}->($func.$sensor, $lcr, $lnc, $unc, $ucr); print "$func$sensor.graph no\n" if exists $ENV{"ignore_$func$sensor"}; $sensor++; } } } exit 0; } foreach my $line (@{$text}) { if ($line =~ /$config{$func}->{regex}/) { my ($label, $value, $unit, $lcr, $lnc, $unc, $ucr) = &get_sensor_items($line, $config{$func}->{regex}); # for debug # print "$func$sensor.value [$label] [$value] [$lcr] [$lnc] [$unc] [$ucr]\n"; if (&is_valid_value($value)) { print "$func$sensor.value $value\n"; $sensor++; } } } sub get_sensor_data { my $text = undef; if (-f $CACHE_FILE) { my $cache_timestamp = (stat($CACHE_FILE))[9]; if ($CACHE_EXPIRES == -1 || time - $cache_timestamp <= $CACHE_EXPIRES) { open(IN, "<", $CACHE_FILE) or die "Could not open \"$CACHE_FILE\" for reading\n"; while () { push (@{$text}, $_); } close(IN); } } if (! defined $text) { my $pid = open(EXE, '-|'); if ($pid == 0) { exec($IPMITOOL, @IPMITOOL_OPTS); } elsif (defined $pid) { while() { push (@{$text}, $_); } close(EXE); } else { die "fork failed: $!"; } if (-w $CACHE_DIR) { open(OUT, ">", $CACHE_FILE) or die "Could not open \"$CACHE_FILE\" for writing\n"; foreach my $line (@{$text}) { print OUT "$line"; } close OUT; } } return $text; } sub get_sensor_items { my ($line, $regex) = @_; my @items = split(/\s*\|\s*/, $line); my ($label, $value, $unit, $lcr, $lnc, $unc, $ucr) = (trim($items[0]), trim($items[1]), trim($items[2]), trim($items[5]), trim($items[6]), trim($items[7]), trim($items[8])); if ($#items == 9) { # ipmitool sensor } elsif ($#items == 2) { # ipmitool sdr if ($value =~ /$regex/) { $value = trim($`); $unit = trim($1); } } # some boards show data in incorrect order. # - HP ProLiant ML110 G5 # CPU FAN | 1434.309 | RPM | ok | 5537.099 | 4960.317 | 4859.086 | na | 937.383 | na # SYSTEM FAN | 1506.932 | RPM | ok | 5952.381 | 5668.934 | 5411.255 | na | 937.383 | na # - HP ProLiant DL160 # FAN1 ROTOR1 | 7680.492 | RPM | ok | na | inf | na | na | 1000.400 | na if (&is_valid_value($lcr) && &is_valid_value($ucr) && $lcr > $ucr || $lcr eq 'inf') { ($lcr, $lnc, $unc, $ucr) = ($ucr, $unc, $lnc, $lcr); } if (&is_valid_value($lnc) && &is_valid_value($unc) && $lnc > $unc || $lnc eq 'inf') { ($lcr, $lnc, $unc, $ucr) = ($ucr, $unc, $lnc, $lcr); } return ($label, $value, $unit, $lcr, $lnc, $unc, $ucr); } sub fan_threshold { my ($name, $lcr, $lnc, $unc, $ucr) = @_; my $warn_percent = exists $ENV{fan_warn_percent} ? $ENV{fan_warn_percent} : 5; # lcr: lower critical if (exists $ENV{fan_lower_critical}) { $lcr = $ENV{fan_lower_critical}; } elsif (! &is_valid_value($lcr)) { if ($lcr eq 'inf') { $lcr = ''; } else { $lcr = '50'; } } # lnc: lower warning if (! &is_valid_value($lnc)) { if ($lnc eq 'inf') { $lnc = ''; } else { $lnc = ($lcr eq '') ? '' : $lcr * (100 + $warn_percent) / 100; } } # ucr: upper critical if (exists $ENV{fan_upper_critical}) { $ucr = $ENV{fan_upper_critical}; } elsif (! &is_valid_value($ucr)) { if ($ucr eq 'inf') { $ucr = ''; } else { $ucr = '6000'; } } # unc: upper warning if (! &is_valid_value($unc)) { if ($unc eq 'inf') { $unc = ''; } else { $unc = ($ucr eq '') ? '' : $ucr * (100 - $warn_percent) / 100; } } return unless ($lcr ne '' || $lnc ne '' || $unc ne '' || $ucr ne ''); printf "$name.warning $lnc:$unc\n"; printf "$name.critical $lcr:$ucr\n"; } sub temp_threshold { my ($name, $lcr, $lnc, $unc, $ucr) = @_; # lcr: lower critical if (exists $ENV{temp_lower_critical}) { $lcr = $ENV{temp_lower_critical}; } elsif (! &is_valid_value($lcr)) { if ($lcr eq 'inf') { $lcr = ''; } else { $lcr = 5; } } # lnc: lower warning if (exists $ENV{temp_lower_warning}) { $lnc = $ENV{temp_lower_warning}; } elsif (! &is_valid_value($lnc)) { if ($lnc eq 'inf') { $lnc = ''; } else { $lnc = 10; } } # unc: upper warning if (exists $ENV{temp_upper_warning}) { $unc = $ENV{temp_upper_warning}; } elsif (! &is_valid_value($unc)) { if ($unc eq 'inf') { $unc = ''; } else { $unc = '65'; } } # ucr: upper critical if (exists $ENV{temp_upper_critical}) { $ucr = $ENV{temp_upper_critical}; } elsif (! &is_valid_value($ucr)) { if ($ucr eq 'inf') { $ucr = ''; } else { $ucr = '70'; } } return unless ($lcr ne '' || $lnc ne '' || $unc ne '' || $ucr ne ''); printf "$name.warning $lnc:$unc\n"; printf "$name.critical $lcr:$ucr\n"; } sub volt_threshold { my ($name, $lcr, $lnc, $unc, $ucr) = @_; my $warn_percent = exists $ENV{volt_warn_percent} ? $ENV{volt_warn_percent} : 20; if (! &is_valid_value($lcr)) { $lcr = ''; } if (! &is_valid_value($lnc)) { $lnc = ($lcr eq '') ? '' : $lcr * (100 + $warn_percent) / 100; } if (! &is_valid_value($ucr)) { $ucr = ''; } if (! &is_valid_value($unc)) { $unc = ($ucr eq '') ? '' : $ucr * (100 - $warn_percent) / 100; } return unless ($lcr ne '' || $lnc ne '' || $unc ne '' || $ucr ne ''); printf "$name.warning $lnc:$unc\n"; printf "$name.critical $lcr:$ucr\n"; } sub trim { my $value = shift; if (defined $value) { $value =~ s/^\s*(.*?)\s*$/$1/; } else { $value = 'na' } return $value; } sub is_valid_value() { my $value = shift; if ($value eq 'na' || $value eq 'inf' || $value eq '') { return 0; } else { return 1; } } ######################################## =head1 How to test cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_volt cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_volt config cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_volt suggest cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_volt autoconf fan_warn_percent=50 fan_lower_critical=100 fan_upper_critical=1000 cache_file=ipmitool_sensor_ \ cache_expires=-1 ./ipmitool_sensor_fan config temp_lower_warning=1 temp_lower_critical=2 temp_upper_critical=71 temp_upper_warning=72 \ cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_temp config volt_warn_percent=50 \ cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_volt config =head1 Test Data unr Upper Non-Recoverable ucr Upper Critical unc Upper Non-Critical lnc Lower Non-Critical lcr Lower Critical lnr Lower Non-Recoverable =head2 ipmitool sensor # HP ProLiant ML110 G5 CPU FAN | 1434.309 | RPM | ok | 5537.099 | 4960.317 | 4859.086 | na | 937.383 | na SYSTEM FAN | 1497.454 | RPM | ok | 5952.381 | 5668.934 | 5411.255 | na | 937.383 | na System 12V | 12.152 | Volts | ok | na | na | na | na | na | na System 5V | 5.078 | Volts | ok | na | na | na | na | na | na System 3.3V | 3.271 | Volts | ok | na | na | na | na | na | na CPU0 Vcore | 1.127 | Volts | ok | na | na | na | na | na | na System 1.25V | 1.254 | Volts | ok | na | na | na | na | na | na System 1.8V | 1.842 | Volts | ok | na | na | na | na | na | na System 1.2V | 1.107 | Volts | ok | na | na | na | na | na | na CPU0 Diode | na | degrees C | na | na | 20.000 | 25.000 | 85.000 | 90.000 | 95.000 CPU0 Dmn 0 Temp | 24.500 | degrees C | ok | na | 0.000 | 0.000 | 97.000 | 100.000 | 100.500 CPU0 Dmn 1 Temp | 29.000 | degrees C | ok | na | 0.000 | 0.000 | 97.000 | 100.000 | 100.500 # HP ProLiant DL160 FAN1 ROTOR1 | 7680.492 | RPM | ok | na | inf | na | na | 1000.400 | na # HP ProLiant DL360 G5 Fan Block 1 | 34.888 | unspecified | nc | na | na | 75.264 | na | na | na Fan Block 2 | 29.792 | unspecified | nc | na | na | 75.264 | na | na | na Fan Block 3 | 37.240 | unspecified | nc | na | na | 75.264 | na | na | na Fan Blocks | 0.000 | unspecified | nc | na | na | 0.000 | na | na | na Temp 1 | 40.000 | degrees C | ok | na | na | -64.000 | na | na | na Temp 2 | 21.000 | degrees C | ok | na | na | -64.000 | na | na | na Temp 3 | 30.000 | degrees C | ok | na | na | -64.000 | na | na | na Temp 4 | 30.000 | degrees C | ok | na | na | -64.000 | na | na | na Temp 5 | 28.000 | degrees C | ok | na | na | -64.000 | na | na | na Temp 6 | na | degrees C | na | na | na | 32.000 | na | na | na Temp 7 | na | degrees C | na | na | na | 32.000 | na | na | na Power Meter | 214.000 | Watts | cr | na | na | 384.000 | na | na | na Power Meter 2 | 220.000 | watts | cr | na | na | 384.000 | na | na | na =head2 ipmitool sdr # HP ProLiant ML110 G5 CPU FAN | 1434.31 RPM | ok SYSTEM FAN | 1497.45 RPM | ok System 12V | 12.10 Volts | ok System 5V | 5.08 Volts | ok System 3.3V | 3.27 Volts | ok CPU0 Vcore | 1.14 Volts | ok System 1.25V | 1.25 Volts | ok System 1.8V | 1.84 Volts | ok System 1.2V | 1.11 Volts | ok CPU0 Diode | disabled | ns CPU0 Dmn 0 Temp | 23.50 degrees C | ok CPU0 Dmn 1 Temp | 29 degrees C | ok # HP ProLiant DL360 G5 Fan Block 1 | 34.89 unspecifi | nc Fan Block 2 | 29.79 unspecifi | nc Fan Block 3 | 37.24 unspecifi | nc Fan Blocks | 0 unspecified | nc Temp 1 | 41 degrees C | ok Temp 2 | 19 degrees C | ok Temp 3 | 30 degrees C | ok Temp 4 | 30 degrees C | ok Temp 5 | 26 degrees C | ok Temp 6 | disabled | ns Temp 7 | disabled | ns Power Meter | 208 Watts | cr Power Meter 2 | 210 watts | cr =cut # vim:syntax=perl