#!/bin/sh # cache error trigger. This shell script is executed by mcelog in daemon mode # when a CPU reports excessive corrected cache errors. This could be a indication # for future uncorrected errors. # # environment: # MESSAGE Human readable error message # CPU Linux CPU number that triggered the error # LEVEL Cache level affected by error # TYPE Cache type affected by error (Data,Instruction,Generic) # AFFECTED_CPUS List of CPUs sharing the affected cache # SOCKETID Socket ID of affected CPU # # note: will run as mcelog configured user # this can be changed in mcelog.conf # # offline the CPUs sharing the affected cache # EXIT=0 for i in $AFFECTED_CPUS ; do logger -s -p daemon.crit -t mcelog "Offlining CPU $i due to cache error threshold" F=$(printf "/sys/devices/system/cpu/cpu%d/online" $i) echo 0 > $F if [ "$(cat $F)" != "0" ] ; then logger -s -p daemon.warn -t mcelog "Offlining CPU $i failed" EXIT=1 fi done [ -x ./cache-error-trigger.local ] && . ./cache-error-trigger.local exit $EXIT