|
| 1 | +body file control |
| 2 | +{ |
| 3 | + namespace => "inventory_smartctl"; |
| 4 | +} |
| 5 | + |
| 6 | +bundle agent main |
| 7 | +# @brief Inventory SMART drive health, temperature, and wear data via smartctl JSON |
| 8 | +# |
| 9 | +# Requires smartmontools >= 7.0 (for JSON output support). |
| 10 | +# Runs on Linux only; silently no-ops on other platforms. |
| 11 | +# |
| 12 | +# Simplified version: reads JSON directly in main bundle, no sub-bundle needed. |
| 13 | +# |
| 14 | +# Attributes exposed in Mission Portal: |
| 15 | +# @inventory SMART drive health - Per-drive PASSED/FAILED |
| 16 | +# @inventory SMART drive model - Drive model per device |
| 17 | +# @inventory SMART drive temperatures - Current temperature in Celsius |
| 18 | +# @inventory SMART drive power-on hours - Cumulative runtime in hours |
| 19 | +# @inventory SMART NVMe available spare - Remaining spare blocks (%), NVMe only |
| 20 | +# @inventory SMART NVMe percentage used - Drive life consumed (%), NVMe only |
| 21 | +# @inventory SMART NVMe media errors - Uncorrectable media errors, NVMe only |
| 22 | +# @inventory SMART failed drives - Only present on hosts with a failing drive |
| 23 | +{ |
| 24 | + vars: |
| 25 | + linux:: |
| 26 | + "_smartctl" string => ifelse( |
| 27 | + fileexists("/usr/sbin/smartctl"), "/usr/sbin/smartctl", |
| 28 | + fileexists("/sbin/smartctl"), "/sbin/smartctl", |
| 29 | + "/usr/sbin/smartctl" # default fallback |
| 30 | + ); |
| 31 | + "_sdir" string => "$(sys.statedir)"; |
| 32 | + "_cache_ttl" string => "3600"; # 1 hour |
| 33 | + |
| 34 | + # Enumerate drives - extract first field from each line of smartctl --scan |
| 35 | + "_scan_lines" |
| 36 | + slist => splitstring( |
| 37 | + execresult("$(_smartctl) --scan 2>/dev/null", "useshell"), |
| 38 | + "\n", 32); |
| 39 | + |
| 40 | + "_drives" |
| 41 | + slist => maplist(regex_replace("$(this)", "^(\S+).*", "\1", ""), "_scan_lines"); |
| 42 | + |
| 43 | + "_id[${_drives}]" string => canonify("${_drives}"); |
| 44 | + "_cache[${_drives}]" string => "$(_sdir)/inventory_smartctl_${_id[${_drives}]}.json"; |
| 45 | + |
| 46 | + classes: |
| 47 | + linux:: |
| 48 | + "_have_smartctl" expression => isexecutable("$(_smartctl)"); |
| 49 | + |
| 50 | + # Cache file is missing - needs refresh |
| 51 | + "_cache_missing_${_id[${_drives}]}" |
| 52 | + not => fileexists("${_cache[${_drives}]}"); |
| 53 | + |
| 54 | + # Cache file is stale - needs refresh |
| 55 | + "_cache_stale_${_id[${_drives}]}" |
| 56 | + expression => isgreaterthan( |
| 57 | + eval("$(sys.systime) - $(filestat(${_cache[${_drives}]}, mtime))"), |
| 58 | + "$(_cache_ttl)"), |
| 59 | + if => fileexists("${_cache[${_drives}]}"); |
| 60 | + |
| 61 | + # Refresh if missing or stale |
| 62 | + "_refresh_${_id[${_drives}]}" |
| 63 | + or => { |
| 64 | + "_cache_missing_${_id[${_drives}]}", |
| 65 | + "_cache_stale_${_id[${_drives}]}" |
| 66 | + }; |
| 67 | + |
| 68 | + files: |
| 69 | + linux._have_smartctl:: |
| 70 | + "${_cache[${_drives}]}" |
| 71 | + content => execresult("$(_smartctl) -j -a ${_drives}", "noshell", "stdout"), |
| 72 | + if => "_refresh_${_id[${_drives}]}"; |
| 73 | + |
| 74 | + methods: |
| 75 | + linux._have_smartctl:: |
| 76 | + # Call parsing bundle for each drive (only when cache exists) |
| 77 | + "parse_${_id[${_drives}]}" |
| 78 | + usebundle => parse("${_drives}", "${_cache[${_drives}]}"), |
| 79 | + useresult => "_d_${_id[${_drives}]}", |
| 80 | + if => fileexists("${_cache[${_drives}]}"); |
| 81 | + |
| 82 | + vars: |
| 83 | + linux._have_smartctl:: |
| 84 | + # Collect results from sub-bundles into formatted entries |
| 85 | + "_health_entries[${_drives}]" |
| 86 | + string => "${_drives}: ${_d_${_id[${_drives}]}[health]}", |
| 87 | + if => isvariable("_d_${_id[${_drives}]}[health]"); |
| 88 | + |
| 89 | + "_model_entries[${_drives}]" |
| 90 | + string => "${_drives}: ${_d_${_id[${_drives}]}[model]}", |
| 91 | + if => isvariable("_d_${_id[${_drives}]}[model]"); |
| 92 | + |
| 93 | + "_temp_entries[${_drives}]" |
| 94 | + string => "${_drives}: ${_d_${_id[${_drives}]}[temp]} C", |
| 95 | + if => isvariable("_d_${_id[${_drives}]}[temp]"); |
| 96 | + |
| 97 | + "_hours_entries[${_drives}]" |
| 98 | + string => "${_drives}: ${_d_${_id[${_drives}]}[hours]} h", |
| 99 | + if => isvariable("_d_${_id[${_drives}]}[hours]"); |
| 100 | + |
| 101 | + "_nvme_spare_entries[${_drives}]" |
| 102 | + string => "${_drives}: ${_d_${_id[${_drives}]}[nvme_spare]}%", |
| 103 | + if => isvariable("_d_${_id[${_drives}]}[nvme_spare]"); |
| 104 | + |
| 105 | + "_nvme_pct_used_entries[${_drives}]" |
| 106 | + string => "${_drives}: ${_d_${_id[${_drives}]}[nvme_pct_used]}%", |
| 107 | + if => isvariable("_d_${_id[${_drives}]}[nvme_pct_used]"); |
| 108 | + |
| 109 | + "_nvme_media_errors_entries[${_drives}]" |
| 110 | + string => "${_drives}: ${_d_${_id[${_drives}]}[nvme_media_errors]}", |
| 111 | + if => isvariable("_d_${_id[${_drives}]}[nvme_media_errors]"); |
| 112 | + |
| 113 | + "_failed_entries[${_drives}]" |
| 114 | + string => "${_drives}", |
| 115 | + if => strcmp("${_d_${_id[${_drives}]}[health]}", "FAILED"); |
| 116 | + |
| 117 | + # Inventory attributes (visible in Mission Portal) |
| 118 | + "drive_health" |
| 119 | + slist => getvalues(_health_entries), |
| 120 | + meta => { "inventory", "attribute_name=SMART drive health" }; |
| 121 | + |
| 122 | + "drive_model" |
| 123 | + slist => getvalues(_model_entries), |
| 124 | + meta => { "inventory", "attribute_name=SMART drive model" }; |
| 125 | + |
| 126 | + "drive_temperatures" |
| 127 | + slist => getvalues(_temp_entries), |
| 128 | + meta => { "inventory", "attribute_name=SMART drive temperatures (C)" }; |
| 129 | + |
| 130 | + "drive_power_on_hours" |
| 131 | + slist => getvalues(_hours_entries), |
| 132 | + meta => { "inventory", "attribute_name=SMART drive power-on hours" }; |
| 133 | + |
| 134 | + "nvme_available_spare" |
| 135 | + slist => getvalues(_nvme_spare_entries), |
| 136 | + meta => { "inventory", "attribute_name=SMART NVMe available spare" }; |
| 137 | + |
| 138 | + "nvme_percentage_used" |
| 139 | + slist => getvalues(_nvme_pct_used_entries), |
| 140 | + meta => { "inventory", "attribute_name=SMART NVMe percentage used" }; |
| 141 | + |
| 142 | + "nvme_media_errors" |
| 143 | + slist => getvalues(_nvme_media_errors_entries), |
| 144 | + meta => { "inventory", "attribute_name=SMART NVMe media errors" }; |
| 145 | + |
| 146 | + "failed_drives" |
| 147 | + slist => getvalues(_failed_entries), |
| 148 | + meta => { "inventory", "attribute_name=SMART failed drives" }; |
| 149 | + |
| 150 | + linux.!_have_smartctl:: |
| 151 | + "drive_health" |
| 152 | + string => "SMARTCTL_MISSING", |
| 153 | + meta => { "inventory", "attribute_name=SMART drive health" }; |
| 154 | + |
| 155 | + reports: |
| 156 | + linux._have_smartctl.verbose_mode:: |
| 157 | + "inventory_smartctl: monitoring ${_drives}"; |
| 158 | + "inventory_smartctl: ${_drives} health=${_d_${_id[${_drives}]}[health]}" |
| 159 | + if => isvariable("_d_${_id[${_drives}]}[health]"); |
| 160 | + |
| 161 | + !linux.verbose_mode:: |
| 162 | + "$(this.promise_filename): inventory_smartctl is Linux-only."; |
| 163 | +} |
| 164 | + |
| 165 | +bundle agent parse(drive, cache_file) |
| 166 | +# @brief Parse smartctl JSON and return key metrics via bundle_return_value_index |
| 167 | +{ |
| 168 | + vars: |
| 169 | + "_json" data => readjson("$(cache_file)"); |
| 170 | + |
| 171 | + # Extract metrics directly from JSON |
| 172 | + "_health" |
| 173 | + string => ifelse(strcmp("${_json[smart_status][passed]}", "true"), "PASSED", "FAILED"), |
| 174 | + if => isvariable("_json[smart_status][passed]"); |
| 175 | + |
| 176 | + "_model" |
| 177 | + string => "${_json[model_name]}", |
| 178 | + if => isvariable("_json[model_name]"); |
| 179 | + |
| 180 | + "_temp" |
| 181 | + string => "${_json[temperature][current]}", |
| 182 | + if => isvariable("_json[temperature][current]"); |
| 183 | + |
| 184 | + "_hours" |
| 185 | + string => "${_json[power_on_time][hours]}", |
| 186 | + if => isvariable("_json[power_on_time][hours]"); |
| 187 | + |
| 188 | + "_nvme_spare" |
| 189 | + string => "${_json[nvme_smart_health_information_log][available_spare]}", |
| 190 | + if => isvariable("_json[nvme_smart_health_information_log][available_spare]"); |
| 191 | + |
| 192 | + "_nvme_pct_used" |
| 193 | + string => "${_json[nvme_smart_health_information_log][percentage_used]}", |
| 194 | + if => isvariable("_json[nvme_smart_health_information_log][percentage_used]"); |
| 195 | + |
| 196 | + "_nvme_media_errors" |
| 197 | + string => "${_json[nvme_smart_health_information_log][media_errors]}", |
| 198 | + if => isvariable("_json[nvme_smart_health_information_log][media_errors]"); |
| 199 | + |
| 200 | + reports: |
| 201 | + "$(_health)" bundle_return_value_index => "health"; |
| 202 | + "$(_model)" bundle_return_value_index => "model"; |
| 203 | + "$(_temp)" bundle_return_value_index => "temp"; |
| 204 | + "$(_hours)" bundle_return_value_index => "hours"; |
| 205 | + "$(_nvme_spare)" bundle_return_value_index => "nvme_spare"; |
| 206 | + "$(_nvme_pct_used)" bundle_return_value_index => "nvme_pct_used"; |
| 207 | + "$(_nvme_media_errors)" bundle_return_value_index => "nvme_media_errors"; |
| 208 | +} |
| 209 | + |
| 210 | +body file control { namespace => "default"; } |
| 211 | + |
| 212 | +bundle agent __main__ |
| 213 | +{ |
| 214 | + methods: |
| 215 | + "inventory_smartctl:main"; |
| 216 | +} |
0 commit comments