File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -358,6 +358,8 @@ services:
358358 volumes :
359359 # Needed to inspect other docker containers
360360 - /var/run/docker.sock:/var/run/docker.sock
361+ # Needed to inspect fail2ban since it runs on the host
362+ - /var/run/fail2ban:/var/run/fail2ban
361363 logging :
362364 options :
363365 max-size : " 512m"
Original file line number Diff line number Diff line change @@ -24,6 +24,8 @@ RUN apt-get -qq update && apt-get install -y \
2424 libffi-dev \
2525 curl \
2626 screen \
27+ # fail2ban used for monitoring nginx logs and banning abusive IPs
28+ fail2ban \
2729# Editors (for our convenience)
2830 vim \
2931 emacs \
Original file line number Diff line number Diff line change 1+ from scripts .monitoring .utils import bash_run
2+
3+
4+ def get_fail2ban_counts (jail : str ) -> tuple [int , int ]:
5+ """
6+ Returns (currently_failed, currently_banned) counts for the given fail2ban jail.
7+ """
8+ result = bash_run (
9+ f"fail2ban-client status { jail } | grep 'Currently'" ,
10+ capture_output = True ,
11+ )
12+ failed = 0
13+ banned = 0
14+ for line in result .stdout .splitlines ():
15+ line = line .strip ()
16+ if "Currently failed:" in line :
17+ failed = int (line .split (":" )[- 1 ].strip ())
18+ elif "Currently banned:" in line :
19+ banned = int (line .split (":" )[- 1 ].strip ())
20+ return failed , banned
Original file line number Diff line number Diff line change 1010
1111import httpx
1212
13+ from scripts .monitoring .fail2ban_monitor import get_fail2ban_counts
1314from scripts .monitoring .solr_updater_monitor import get_solr_updater_lag_event
1415from scripts .monitoring .utils import (
1516 GraphiteEvent ,
@@ -172,6 +173,29 @@ async def monitor_empty_homepage():
172173 ).submit (GRAPHITE_URL )
173174
174175
176+ @limit_server (["ol-www0" ], scheduler )
177+ @scheduler .scheduled_job ('interval' , seconds = 60 )
178+ def monitor_fail2ban ():
179+ """Logs fail2ban nginx-429 jail stats (currently failed and banned counts)."""
180+ failed , banned = get_fail2ban_counts ("nginx-429" )
181+ ts = int (time .time ())
182+ GraphiteEvent .submit_many (
183+ [
184+ GraphiteEvent (
185+ path = "stats.ol.fail2ban.nginx-429.failed" ,
186+ value = float (failed ),
187+ timestamp = ts ,
188+ ),
189+ GraphiteEvent (
190+ path = "stats.ol.fail2ban.nginx-429.banned" ,
191+ value = float (banned ),
192+ timestamp = ts ,
193+ ),
194+ ],
195+ GRAPHITE_URL ,
196+ )
197+
198+
175199@limit_server (["ol-home0" ], scheduler )
176200@scheduler .scheduled_job ('interval' , seconds = 60 )
177201async def monitor_solr_updater_lag ():
Original file line number Diff line number Diff line change 1+ from unittest .mock import MagicMock , patch
2+
3+ from scripts .monitoring .fail2ban_monitor import get_fail2ban_counts
4+
5+ FAKE_FAIL2BAN_OUTPUT = """
6+ Status for the jail: nginx-429
7+ |- Filter
8+ | |- Currently failed: 193
9+ | |- Total failed: 56440304
10+ | `- File list: /1/var/log/nginx/error.log
11+ `- Actions
12+ |- Currently banned: 141
13+ |- Total banned: 661976
14+ `- Banned IP list: 08.07.04.02 04.06.02.09
15+ """
16+
17+
18+ def test_get_fail2ban_counts ():
19+ mock_result = MagicMock ()
20+ mock_result .stdout = FAKE_FAIL2BAN_OUTPUT
21+
22+ with patch ("scripts.monitoring.fail2ban_monitor.bash_run" , return_value = mock_result ):
23+ failed , banned = get_fail2ban_counts ("nginx-429" )
24+
25+ assert failed == 193
26+ assert banned == 141
You can’t perform that action at this time.
0 commit comments