forked from tchernicum/bcapps
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbc-daemon-checker.pl
executable file
·147 lines (112 loc) · 4.02 KB
/
bc-daemon-checker.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/bin/perl
# Confirms daemons are running and prevents other programs from
# running too long (eg, a long 'curl' run can hang a program waiting
# for it to end; timed-run would be another solution)
# Runs from cron (so doesn't need to be in @must below)
# --nomail: dont send email
push(@INC,"/usr/local/lib");
require "bclib.pl";
# this command really does all the work
($out) = cache_command("ps -www -ax -eo 'pid etime rss vsz args'","age=30");
@procs = split(/\n/,$out);
shift(@procs); # ignore header line
# TODO: turn this into an external file list or something
# processes that MUST always be running (I should be able to trim
# down this list?)
# NOTE: full path varies because of the way these procs start
# sshd dropped for dropbear 17 Dec 2012
@must = (
"init", "syslogd", "klogd", "/usr/sbin/dropbear", "ntpd",
"/usr/libexec/mysqld", "/usr/libexec/postfix/master", "qmgr",
"crond", "/usr/sbin/atd", "/sbin/mingetty",
"/usr/local/bin/bc-voronoi-temperature.pl",
"/usr/local/bin/bc-delaunay-temperature.pl",
"/usr/local/bin/bc-metar-db.pl",
"/usr/local/bin/bc-gocomics-comments.pl",
"/usr/sbin/lighttpd", "/usr/local/bin/php-cgi",
"teenydns", "pickup", "/usr/bin/fail2ban-server"
);
# processes that MAY run forever but aren't required to do so (choices
# like /sbin/udevd are weird, but I don't really care if hotplug is
# working?)
# "sshd:" represents a specific login; the main daemon must always
# run, but the client daemon doesn't have to always run (but can if it
# wants)
@may = (
"SCREEN", "screen", "-csh", "sh", "/bin/sh", "/sbin/udevd",
"/usr/libexec/gam_server", "sshd:", "-bin/tcsh",
"/usr/sbin/yum-updatesd", "/sites/TEST/bc-slow-cgi.pl"
);
# Processes on this list must be killed if they run over 5m
# <h>Right now, it's just you, curly!</h>
@kill = (
"curl"
);
# easier as hashes
%must = list2hash(@must);
%may = list2hash(@may);
%kill = list2hash(@kill);
for $i (@procs) {
# cleanup proc line and split into fields
$i=trim($i);
$i=~s/\s+/ /isg;
($pid, $time, $rss, $vsz, $proc, $proc2, $proc3) = split(/\s+/,$i);
# ignore [bracketed] processes (TODO: why?)
if ($proc=~/^\[.*\]$/) {next;}
# for perl/xargs/python/ruby, the next non-option arg tells what the process really is
if ($proc=~m%/perl$%||$proc eq "xargs"||$proc=~m%/python$%||$proc=~m%(^|/)ruby$%) {
# TODO: this is imperfect
if ($proc2=~/^\-/) {
$proc=$proc3;
} else {
$proc=$proc2;
}
}
# really ugly HACK: (for "perl -w") [can't even do -* because of -tcsh]
if ($proc=~/^\-w$/) {$proc=$proc3;}
# can't do much w/ defunct procs
if ($i=~/<defunct>/) {next;}
# if this program is permitted to run forever, but not required, stop here
# TODO: add check if process is on two lists?
if ($may{$proc}) {next;}
# if this process must run, record it and continue
if ($must{$proc}) {
$isproc{$proc}=1;
next;
}
# how long has program been running?
if ($time=~/^(\d+)\-(\d{2}):(\d{2}):(\d{2})$/) {
$sec = $1*86400+$2*3600+$3*60+$4;
} elsif ($time=~/^(\d{2}):(\d{2}):(\d{2})$/) {
$sec = $1*3600+$2*60+$3;
} elsif ($time=~/^(\d{2}):(\d{2})$/) {
$sec = $1*60+$2;
} else {
warnlocal("Can't convert $time into seconds");
next;
}
# any process permitted to run up to 5m
# TODO: specific limits for procs where 5m is wrong
if ($sec<=300) {next;}
# if I'm allowed to kill this process, do so now
if ($kill{$proc}) {
system("kill $pid");
next;
}
# process is running long, and is neither permitted nor required to
# run forever, but I'm now allowed to kill it.... so whine
push(@err, "$proc ($pid): running > 300s, but no perm to kill");
}
# confirm all "must" processes are in fact running
for $i (sort keys %MUST) {
if ($isproc{$i}) {next;}
push(@err, "$i: not running, but is required");
}
# send me errors (if any)
if (@err) {
$body = join("\n",@err);
# TODO: message should be host specific if run on multiple hosts
unless ($globopts{nomail}) {
sendmail("erf\@barrycarter.info", "error\@barrycarter.info", "Errors exist!", $body);
}
}