diff --git a/conftest/README b/conftest/README new file mode 100644 index 0000000..2c4fdfe --- /dev/null +++ b/conftest/README @@ -0,0 +1,9 @@ +To test + - copy crashin fakemail to /R/bin + - copy crashmail.conf testcrash.conf testcrash2.conf to /etc/supervisor/conf.d + +monitor log files in /L/ + - You'll get an 'Ignoring hostname: testcrash' message every 10s in /L/crashmail.log (testcrash) + + - You'll get 2 'unexpected exit, mailing' message every 30s in /L/crashmail.log (notifycrash:testcrash2, notifycrash:testcrash3) + and the message in /L/fakemail.log \ No newline at end of file diff --git a/conftest/crashin b/conftest/crashin new file mode 100755 index 0000000..dbd31ad --- /dev/null +++ b/conftest/crashin @@ -0,0 +1,7 @@ +#!/bin/bash +# test program: non 0 exit code after timeout +timeout="$1" +echo $(date) "Crashing in $timeout seconds" +sleep "$timeout" +echo $(date) "CRASHING NOW" +exit 123 diff --git a/conftest/crashmail.conf b/conftest/crashmail.conf new file mode 100644 index 0000000..dc6bdb8 --- /dev/null +++ b/conftest/crashmail.conf @@ -0,0 +1,10 @@ +# config to use the fakemail mail sender +# and monitor only testcrash2 (crashes every 30s) +[eventlistener:crashmail] +command = + /usr/local/bin/crashmail + -p notifycrash:* + -o hostname -m notify-on-crash@domain.com + -s '/R/bin/fakemail -t -i -f crash-notifier@domain.com' +events=PROCESS_STATE_EXITED +stderr_logfile=/L/crashmail.log \ No newline at end of file diff --git a/conftest/fakemail b/conftest/fakemail new file mode 100755 index 0000000..e60f0fd --- /dev/null +++ b/conftest/fakemail @@ -0,0 +1,6 @@ +#!/bin/bash +# don't send any mail: only write the args and stdin to the logfile +logfile=/L/fakemail.log +echo $(date) "$*" >> "$logfile" +cat >> "$logfile" +echo >> "$logfile" \ No newline at end of file diff --git a/conftest/testcrash.conf b/conftest/testcrash.conf new file mode 100644 index 0000000..fb8fdbc --- /dev/null +++ b/conftest/testcrash.conf @@ -0,0 +1,9 @@ +# test program: crashes every 10 seconds +[program:testcrash] +command=/R/bin/crashin 10 + +autostart=true +autorestart=true + +stdout_logfile=/L/testcrash.log +redirect_stderr=true diff --git a/conftest/testcrash2.conf b/conftest/testcrash2.conf new file mode 100644 index 0000000..951bcd0 --- /dev/null +++ b/conftest/testcrash2.conf @@ -0,0 +1,21 @@ +[group:notifycrash] +programs=testcrash2,testcrash3 + +# test program: crashes every 30 seconds +[program:testcrash2] +command=/R/bin/crashin 30 + +autostart=true +autorestart=true + +stdout_logfile=/L/testcrash2.log +redirect_stderr=true + +[program:testcrash3] +command=/R/bin/crashin 33 + +autostart=true +autorestart=true + +stdout_logfile=/L/testcrash3.log +redirect_stderr=true diff --git a/superlance/crashmail.py b/superlance/crashmail.py old mode 100644 new mode 100755 index eacac31..a240311 --- a/superlance/crashmail.py +++ b/superlance/crashmail.py @@ -37,7 +37,7 @@ -p -- specify a supervisor process_name. Send mail when this process transitions to the EXITED state unexpectedly. If this process is - part of a group, it can be specified using the + part of a group, it must be specified using the 'process_name:group_name' syntax. -a -- Send mail when any child of the supervisord transitions @@ -66,6 +66,7 @@ """ import os +import re import sys from supervisor import childutils @@ -124,10 +125,25 @@ def runforever(self, test=False): if self.optionalheader: subject = self.optionalheader + ':' + subject - self.stderr.write('unexpected exit, mailing\n') - self.stderr.flush() - self.mail(self.email, subject, msg) + ident = pheaders['processname'] + if pheaders['groupname'] != ident: + ident = pheaders['groupname'] + ":" + ident + + if self.any or \ + not self.programs or \ + any(prog.match(ident) for prog in self.programs): + + self.stderr.write('\nunexpected exit, mailing\n') + self.stderr.flush() + + self.mail(self.email, subject, msg) + + else: + + self.stderr.write('\nignoring %s\n' % subject) + self.stderr.flush() + childutils.listener.ok(self.stdout) if test: @@ -140,7 +156,7 @@ def mail(self, email, subject, msg): body += msg with os.popen(self.sendmail, 'w') as m: m.write(body) - self.stderr.write('Mailed:\n\n%s' % body) + self.stderr.write('Mailed:\n\n%s\n' % body) self.mailed = body @@ -167,13 +183,19 @@ def main(argv=sys.argv): email = None optionalheader = None + progGroupRE = re.compile(r"(\w+):\*") + for option, value in opts: if option in ('-h', '--help'): usage() if option in ('-p', '--program'): - programs.append(value) + pg = progGroupRE.match(value) + if pg: + programs.append(re.compile(pg.group(1)+":.*")) + else: + programs.append(re.compile(re.escape(value))) if option in ('-a', '--any'): any = True diff --git a/superlance/tests/crashmail_test.py b/superlance/tests/crashmail_test.py index c7a9667..3b9da62 100644 --- a/superlance/tests/crashmail_test.py +++ b/superlance/tests/crashmail_test.py @@ -1,5 +1,6 @@ import unittest from superlance.compat import StringIO +import re class CrashMailTests(unittest.TestCase): def _getTargetClass(self): @@ -29,7 +30,7 @@ def _makeOnePopulated(self, programs, any, response=None): return prog def test_runforever_not_process_state_exited(self): - programs = {'foo':0, 'bar':0, 'baz_01':0 } + programs = [] any = None prog = self._makeOnePopulated(programs, any) prog.stdin.write('eventname:PROCESS_STATE len:0\n') @@ -38,7 +39,7 @@ def test_runforever_not_process_state_exited(self): self.assertEqual(prog.stderr.getvalue(), 'non-exited event\n') def test_runforever_expected_exit(self): - programs = ['foo'] + programs = [re.compile('foo')] any = None prog = self._makeOnePopulated(programs, any) payload=('expected:1 processname:foo groupname:bar ' @@ -51,7 +52,7 @@ def test_runforever_expected_exit(self): self.assertEqual(prog.stderr.getvalue(), 'expected exit\n') def test_runforever_unexpected_exit(self): - programs = ['foo'] + programs = [re.compile('bar:foo')] any = None prog = self._makeOnePopulated(programs, any) payload=('expected:0 processname:foo groupname:bar ' @@ -63,14 +64,15 @@ def test_runforever_unexpected_exit(self): prog.runforever(test=True) output = prog.stderr.getvalue() lines = output.split('\n') - self.assertEqual(lines[0], 'unexpected exit, mailing') - self.assertEqual(lines[1], 'Mailed:') - self.assertEqual(lines[2], '') - self.assertEqual(lines[3], 'To: chrism@plope.com') - self.assertTrue('Subject: [foo]: foo crashed at' in lines[4]) - self.assertEqual(lines[5], '') + + self.assertEqual(lines[1], 'unexpected exit, mailing') + self.assertEqual(lines[2], 'Mailed:') + self.assertEqual(lines[3], '') + self.assertEqual(lines[4], 'To: chrism@plope.com') + self.assertTrue('Subject: [foo]: foo crashed at' in lines[5]) + self.assertEqual(lines[6], '') self.assertTrue( - 'Process foo in group bar exited unexpectedly' in lines[6]) + 'Process foo in group bar exited unexpectedly' in lines[7]) import os f = open(os.path.join(self.tempdir, 'email.log'), 'r') mail = f.read() @@ -78,5 +80,37 @@ def test_runforever_unexpected_exit(self): self.assertTrue( 'Process foo in group bar exited unexpectedly' in mail) + def test_runforever_unexpected_exit_group(self): + programs = [re.compile('bar:*')] + any = None + prog = self._makeOnePopulated(programs, any) + payload=('expected:0 processname:foo groupname:bar ' + 'from_state:RUNNING pid:1') + prog.stdin.write( + 'eventname:PROCESS_STATE_EXITED len:%s\n' % len(payload)) + prog.stdin.write(payload) + prog.stdin.seek(0) + prog.runforever(test=True) + output = prog.stderr.getvalue() + lines = output.split('\n') + + self.assertEqual(lines[1], 'unexpected exit, mailing') + + def test_runforever_unexpected_exit_ignored(self): + programs = [re.compile('notfoo')] + any = None + prog = self._makeOnePopulated(programs, any) + payload=('expected:0 processname:foo groupname:bar ' + 'from_state:RUNNING pid:1') + prog.stdin.write( + 'eventname:PROCESS_STATE_EXITED len:%s\n' % len(payload)) + prog.stdin.write(payload) + prog.stdin.seek(0) + prog.runforever(test=True) + output = prog.stderr.getvalue() + lines = output.split('\n') + + self.assertTrue('ignoring [foo]: foo crashed ' in lines[1]) + if __name__ == '__main__': unittest.main()