Skip to content

Commit 6bdc03d

Browse files
committed
Added watchdog for Windows
This change adds an external watchdog for windows. When enabled, any cf-agent processes found running longer than 5 minutes are terminated. Ticket: ENT-5538 Changelog: Title
1 parent 83c4e0c commit 6bdc03d

4 files changed

Lines changed: 149 additions & 2 deletions

File tree

cfe_internal/core/watchdog/README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,20 @@ If there is less than 500MB of free space, the watchdog will clean up old archiv
5050
- Introduced check for too many concurrent cf-agent processes (3.15.0)
5151
- Introduced check for integrity issues identified by cf-check (3.15.0)
5252

53+
## Windows Watchdog
54+
55+
The Windows watchdog is implemented as a powershell script rendered via mustache template.
56+
57+
When **enabled** the policy ensures that the watchdog script is scheduled for execution via the windows task scheduler.
58+
59+
When **disabled** the policy ensures that the there it no scheduled task named `CFEngine-watchdog`.
60+
61+
The watchdog logs to `$(sys.workdir)/watchdog.log` (`C:\Program Files\Cfengine\watchdog.log`). Note, this log file is **not** automatically rotated or purged.
62+
63+
**History:**
64+
65+
- Initially introduced with check to terminate any cf-agent processes that have been running for longer than 5 minutes. (3.17.0)
66+
5367
### Symptoms of pathology
5468

5569
The following conditions are included in the watchdog checks:
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
$LOGFILE="{{{logfile}}}"
2+
3+
$long_running_cf_agent_threshold_min = 5
4+
$long_running_cf_agent_count = @(Get-Process cf-agent -erroraction "silentlycontinue" | Where { $_.StartTime -lt (Get-Date).AddMinutes(-$long_running_cf_agent_threshold_min) }).count
5+
$long_running_cf_agent_count_threshold = 1
6+
7+
If ($long_running_cf_agent_count -ge $long_running_cf_agent_count_threshold) {
8+
$DATESTAMP=Get-Date -Format "yyyy-MM-dd HH:mm"
9+
10+
Write-Output "${DATESTAMP}: Count of long running cf-agent ($long_running_cf_agent_count) has met the threshold ($long_running_cf_agent_count_threshold) of long running agents, remediation triggered." | Tee-Object -FilePath "$LOGFILE" -Append
11+
Write-Output "${DATESTAMP}: Before remediation" | Tee-Object -FilePath "$LOGFILE" -Append
12+
Get-Process cf-agent -erroraction "silentlycontinue" | ft -erroraction "silentlycontinue" Name,CommandLine,StartTime,@{label="Elapsed Minutes";expression={[System.Math]::Round(((Get-Date)-$_.StartTime).totalminutes)}} | Tee-Object -FilePath "$LOGFILE" -Append
13+
14+
Get-Process cf-agent -erroraction "silentlycontinue" | Where { $_.StartTime -lt (Get-Date).AddMinutes(-$long_running_cf_agent_threshold_min) } | Stop-Process -Force
15+
# Give the system a bit of time to kill all the processes
16+
sleep 1
17+
18+
$DATESTAMP=Get-Date -Format "yyyy-MM-dd HH:mm"
19+
$running_cf_agent_count = @(Get-Process cf-agent -erroraction "silentlycontinue").count
20+
Write-Output "${DATESTAMP}: Observed $running_cf_agent_count cf-agent processes after remediation" | Tee-Object -FilePath "$LOGFILE" -Append
21+
Get-Process cf-agent -erroraction "silentlycontinue" | ft -erroraction "silentlycontinue" Name,CommandLine,StartTime,@{label="Elapsed Minutes";expression={[System.Math]::Round(((Get-Date)-$_.StartTime).totalminutes)}} | Tee-Object -FilePath "$LOGFILE" -Append
22+
}

cfe_internal/core/watchdog/watchdog.cf

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ bundle agent cfe_internal_core_watchdog(state)
66
"description"
77
string => "Configure external watchdog processes (like cron, or monit) to
88
make sure that cf-execd is always running";
9+
vars:
10+
11+
"_logfile" string => "$(sys.workdir)/watchdog.log";
912

1013
classes:
1114
"invalid_state"
@@ -23,21 +26,110 @@ bundle agent cfe_internal_core_watchdog(state)
2326
"use_cfe_internal_core_watchdog_aix"
2427
expression => "!use_cfe_internal_core_watchdog_cron_d.aix";
2528

29+
"use_cfe_internal_core_watchdog_windows"
30+
expression => "windows";
31+
2632
methods:
2733
use_cfe_internal_core_watchdog_cron_d::
2834
"any" usebundle => cfe_internal_core_watchdog_cron_d( $(state) );
2935

3036
use_cfe_internal_core_watchdog_aix::
3137
"any" usebundle => cfe_internal_core_watchdog_aix( $(state) );
3238

39+
use_cfe_internal_core_watchdog_windows::
40+
"any" usebundle => cfe_internal_core_watchdog_windows( $(state) );
41+
3342
reports:
3443
DEBUG|DEBUG_cfe_internal_core_watchdog::
3544
"DEBUG $(this.bundle): Watchdog '$(state)'";
3645
"DEBUG $(this.bundle): Invalid state '$(state)' only enabled|disabled allowed"
3746
ifvarclass => "invalid_state";
3847

39-
!(use_cfe_internal_core_watchdog_cron_d|aix)::
40-
"WARNING $(this.bundle): Currently only supports /etc/cron.d on systems that have pgrep in the the stdlib paths bundle and AIX hosts.";
48+
!(use_cfe_internal_core_watchdog_cron_d|use_cfe_internal_core_watchdog_aix|use_cfe_internal_core_watchdog_windows)::
49+
"WARNING $(this.bundle): Currently only supports /etc/cron.d on systems that have pgrep in the the stdlib paths bundle, AIX and Windows hosts.";
50+
}
51+
52+
bundle agent cfe_internal_core_watchdog_windows(state)
53+
# @brief Manage watchdog state on windows
54+
# @param state enabled|disabled
55+
# - When enabled a scheduled task "CFEngine-watchdog" will be present and enabled
56+
# - When disabled a scheduled task named "CFEngine-watchdog" will be absent.
57+
{
58+
59+
vars:
60+
windows::
61+
"_requested_state" string => ifelse( regcmp( "enabled|disabled", $(state) ), "$(state)", "invalid");
62+
"_taskname" string => "CFEngine-watchdog";
63+
"_taskfreq" string => "1";
64+
"_taskscript" string => "$(sys.bindir)$(const.dirsep)watchdog.ps1";
65+
"_taskrun" string => "PowerShell";
66+
"_taskrun_args" string => "-NoProfile -ExecutionPolicy bypass -File";
67+
"_logfile" string => "$(cfe_internal_core_watchdog._logfile)";
68+
# -NonInteractive?
69+
70+
"_cmd_task_schedule"
71+
string => `$(sys.winsysdir)$(const.dirsep)schtasks.exe /create /tn "$(_taskname)" /tr "$(_taskrun) $(_taskrun_args) '$(_taskscript)'" /ru "System" /sc minute /mo $(_taskfreq) /rl highest /f`;
72+
73+
# We use XML output because it's the most portable output considering localization etc ...
74+
"_cmd_task_query"
75+
string => `schtasks /QUERY /TN "$(_taskname)" /XML 2> $(const.dollar)null`;
76+
77+
"_cmd_task_query_result"
78+
string => execresult( $(_cmd_task_query), powershell);
79+
80+
# This regular expression is used to match against the XML output querying the task
81+
# We escape _taskscript with \Q \E since it contains backslashes which we don't want to be expanded
82+
"_scheduled_task_regex"
83+
string => concat(".*Interval.PT$(_taskfreq)M..Interval",
84+
".*Command.$(_taskrun)..Command",
85+
".*Arguments.$(_taskrun_args) .\Q$(_taskscript)\E...Arguments",
86+
".*");
87+
classes:
88+
windows::
89+
"_requested_state_$(_requested_state)";
90+
91+
_requested_state_enabled::
92+
"_watchdog_present_correct"
93+
expression => regcmp( $(_scheduled_task_regex), $(_cmd_task_query_result) );
94+
95+
_requested_state_disabled::
96+
"_watchdog_absent_correct"
97+
expression => not( returnszero( 'schtasks /QUERY /TN "$(_taskname)" 2> $(const.dollar)null', powershell ));
98+
99+
files:
100+
"$(_taskscript)"
101+
create => "true",
102+
template_method => "mustache",
103+
edit_template => "$(this.promise_dirname)/templates/watchdog-windows.ps1.mustache",
104+
template_data => parsejson( '{"logfile": "$(_logfile)" }' );
105+
106+
commands:
107+
108+
_requested_state_disabled.!_watchdog_absent_correct::
109+
`schtasks /DELETE /TN "$(_taskname)" /F`
110+
action => immediate,
111+
contain => powershell,
112+
classes => results( "bundle", "win_watchdog_script");
113+
114+
_requested_state_enabled.!_watchdog_present_correct::
115+
`$(_cmd_task_schedule)`
116+
action => immediate,
117+
contain => in_shell,
118+
classes => results( "bundle", "win_watchdog_script");
119+
120+
reports:
121+
verbose_mode::
122+
"CFEngine-watchdog desired state '$(_requested_state)'";
123+
124+
"CFEngine-watchdog scheduled task state '$(_requested_state)' correct"
125+
if => "_watchdog_present_correct|_watchdog_absent_correct";
126+
127+
verbose_mode.(!_watchdog_present_correct._requested_state_enabled)::
128+
"CFEngine-watchdog scheduled task state incorrect";
129+
`Should: $(_cmd_task_schedule)`;
130+
131+
(inform_mode|verbose_mode).win_watchdog_script_repaired::
132+
"CFEngine-watchdog scheduled task repaired";
41133
}
42134

43135
bundle agent cfe_internal_core_watchdog_aix(state)

lib/commands.cf

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,25 @@ bundle agent daemonize(command)
2727
## contain
2828
##-------------------------------------------------------
2929

30+
body contain powershell
31+
# @brief Run command with powershell (windows only)
32+
#
33+
# **Example:**
34+
#
35+
# ```cf3
36+
# commands:
37+
# windows::
38+
# 'schtasks /DELETE /TN "$(_taskname)" /F'
39+
# contain => powershell;
40+
# ```
41+
#
42+
# **History:**
43+
#
44+
# * Introduced in 3.17.0
45+
{
46+
useshell => "powershell";
47+
}
48+
3049
body contain silent
3150
# @brief suppress command output
3251
{

0 commit comments

Comments
 (0)