Skip to content

Commit 65a5aab

Browse files
authored
Add fail2ban monitor (#12251)
* Add fail2ban monitor
1 parent 5881e3e commit 65a5aab

5 files changed

Lines changed: 74 additions & 0 deletions

File tree

compose.production.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,8 @@ services:
358358
volumes:
359359
# Needed to inspect other docker containers
360360
- /var/run/docker.sock:/var/run/docker.sock
361+
# Needed to inspect fail2ban since it runs on the host
362+
- /var/run/fail2ban:/var/run/fail2ban
361363
logging:
362364
options:
363365
max-size: "512m"

docker/Dockerfile.olbase

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ RUN apt-get -qq update && apt-get install -y \
2424
libffi-dev \
2525
curl \
2626
screen \
27+
# fail2ban used for monitoring nginx logs and banning abusive IPs
28+
fail2ban \
2729
# Editors (for our convenience)
2830
vim \
2931
emacs \
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from scripts.monitoring.utils import bash_run
2+
3+
4+
def get_fail2ban_counts(jail: str) -> tuple[int, int]:
5+
"""
6+
Returns (currently_failed, currently_banned) counts for the given fail2ban jail.
7+
"""
8+
result = bash_run(
9+
f"fail2ban-client status {jail} | grep 'Currently'",
10+
capture_output=True,
11+
)
12+
failed = 0
13+
banned = 0
14+
for line in result.stdout.splitlines():
15+
line = line.strip()
16+
if "Currently failed:" in line:
17+
failed = int(line.split(":")[-1].strip())
18+
elif "Currently banned:" in line:
19+
banned = int(line.split(":")[-1].strip())
20+
return failed, banned

scripts/monitoring/monitor.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import httpx
1212

13+
from scripts.monitoring.fail2ban_monitor import get_fail2ban_counts
1314
from scripts.monitoring.solr_updater_monitor import get_solr_updater_lag_event
1415
from scripts.monitoring.utils import (
1516
GraphiteEvent,
@@ -172,6 +173,29 @@ async def monitor_empty_homepage():
172173
).submit(GRAPHITE_URL)
173174

174175

176+
@limit_server(["ol-www0"], scheduler)
177+
@scheduler.scheduled_job('interval', seconds=60)
178+
def monitor_fail2ban():
179+
"""Logs fail2ban nginx-429 jail stats (currently failed and banned counts)."""
180+
failed, banned = get_fail2ban_counts("nginx-429")
181+
ts = int(time.time())
182+
GraphiteEvent.submit_many(
183+
[
184+
GraphiteEvent(
185+
path="stats.ol.fail2ban.nginx-429.failed",
186+
value=float(failed),
187+
timestamp=ts,
188+
),
189+
GraphiteEvent(
190+
path="stats.ol.fail2ban.nginx-429.banned",
191+
value=float(banned),
192+
timestamp=ts,
193+
),
194+
],
195+
GRAPHITE_URL,
196+
)
197+
198+
175199
@limit_server(["ol-home0"], scheduler)
176200
@scheduler.scheduled_job('interval', seconds=60)
177201
async def monitor_solr_updater_lag():
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from unittest.mock import MagicMock, patch
2+
3+
from scripts.monitoring.fail2ban_monitor import get_fail2ban_counts
4+
5+
FAKE_FAIL2BAN_OUTPUT = """
6+
Status for the jail: nginx-429
7+
|- Filter
8+
| |- Currently failed: 193
9+
| |- Total failed: 56440304
10+
| `- File list: /1/var/log/nginx/error.log
11+
`- Actions
12+
|- Currently banned: 141
13+
|- Total banned: 661976
14+
`- Banned IP list: 08.07.04.02 04.06.02.09
15+
"""
16+
17+
18+
def test_get_fail2ban_counts():
19+
mock_result = MagicMock()
20+
mock_result.stdout = FAKE_FAIL2BAN_OUTPUT
21+
22+
with patch("scripts.monitoring.fail2ban_monitor.bash_run", return_value=mock_result):
23+
failed, banned = get_fail2ban_counts("nginx-429")
24+
25+
assert failed == 193
26+
assert banned == 141

0 commit comments

Comments
 (0)