Skip to content

Commit 00b62f7

Browse files
Merge pull request #182 from amd/alex_regex_plugin
RegexSearchPlugin
2 parents 3d29a0b + 023d2b7 commit 00b62f7

15 files changed

Lines changed: 662 additions & 17 deletions

File tree

nodescraper/base/regexanalyzer.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,24 @@
3535
from nodescraper.models.event import Event
3636

3737

38+
def _coerce_event_priority_from_dict(value: Union[str, int, EventPriority]) -> EventPriority:
39+
"""Turn a string name, integer level, or already-coerced value into the canonical priority member.
40+
41+
Args:
42+
value: Member name (case-insensitive), numeric level, or same-type value passthrough.
43+
44+
Returns:
45+
Matching priority member for the configured level.
46+
"""
47+
if isinstance(value, EventPriority):
48+
return value
49+
if isinstance(value, int):
50+
return EventPriority(value)
51+
if isinstance(value, str):
52+
return EventPriority[value.upper()]
53+
raise TypeError(f"Invalid event_priority: {value!r}")
54+
55+
3856
class ErrorRegex(BaseModel):
3957
regex: re.Pattern
4058
message: str
@@ -135,13 +153,13 @@ def _convert_and_extend_error_regex(
135153
if isinstance(item, ErrorRegex):
136154
converted_regex.append(item)
137155
elif isinstance(item, dict):
138-
# Convert dict to ErrorRegex
139-
item["regex"] = re.compile(item["regex"])
140-
if "event_category" in item:
141-
item["event_category"] = EventCategory(item["event_category"])
142-
if "event_priority" in item:
143-
item["event_priority"] = EventPriority(item["event_priority"])
144-
converted_regex.append(ErrorRegex(**item))
156+
d = dict(item)
157+
d["regex"] = re.compile(d["regex"])
158+
if "event_category" in d:
159+
d["event_category"] = EventCategory(d["event_category"])
160+
if "event_priority" in d:
161+
d["event_priority"] = _coerce_event_priority_from_dict(d["event_priority"])
162+
converted_regex.append(ErrorRegex(**d))
145163

146164
return converted_regex + list(base_regex)
147165

nodescraper/cli/dynamicparserbuilder.py

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,40 @@
4343
from nodescraper.typeutils import TypeUtils
4444

4545

46+
def _help_from_annotated(anno: object) -> str:
47+
"""Pull CLI help from ``Annotated[T, metadata...]`` (string or ``Field(description=...)``)."""
48+
if anno is None or get_origin(anno) is not Annotated:
49+
return ""
50+
for meta in get_args(anno)[1:]:
51+
if isinstance(meta, str):
52+
return meta
53+
desc = getattr(meta, "description", None)
54+
if isinstance(desc, str) and desc.strip():
55+
return desc
56+
return ""
57+
58+
4659
def _get_run_arg_help(plugin_class: Type[PluginInterface], arg: str) -> str:
4760
"""Get help text for a run() parameter from typing.Annotated metadata on the parameter."""
4861
try:
49-
hints = get_type_hints(plugin_class.run, include_extras=True)
50-
anno = hints.get(arg)
51-
if anno is not None and get_origin(anno) is Annotated:
52-
args = get_args(anno)
53-
if len(args) >= 2 and isinstance(args[1], str):
54-
return args[1]
62+
run_obj = None
63+
for cls in plugin_class.__mro__:
64+
if "run" in cls.__dict__:
65+
run_obj = cls.__dict__["run"]
66+
break
67+
if run_obj is None:
68+
run_obj = plugin_class.run
69+
run_fn = run_obj
70+
if isinstance(run_obj, staticmethod):
71+
run_fn = run_obj.__func__
72+
elif isinstance(run_obj, classmethod):
73+
run_fn = run_obj.__func__
74+
raw = getattr(run_fn, "__annotations__", {}).get(arg)
75+
text = _help_from_annotated(raw)
76+
if text:
77+
return text
78+
hints = get_type_hints(run_fn, include_extras=True)
79+
return _help_from_annotated(hints.get(arg))
5580
except Exception:
5681
pass
5782
return ""
@@ -167,12 +192,22 @@ def add_argument(
167192

168193
if list in type_class_map:
169194
type_class = type_class_map[list]
195+
inner = type_class.inner_type
196+
if inner is dict or get_origin(inner) is dict:
197+
elt_type = dict_arg
198+
metavar = META_VAR_MAP[dict]
199+
elif inner is not None:
200+
elt_type = inner
201+
metavar = META_VAR_MAP.get(inner, "STRING")
202+
else:
203+
elt_type = str
204+
metavar = "STRING"
170205
self.parser.add_argument(
171206
f"--{arg_name}",
172207
nargs="*",
173-
type=type_class.inner_type if type_class.inner_type else str,
208+
type=elt_type,
174209
required=required,
175-
metavar=META_VAR_MAP.get(type_class.inner_type, "STRING"),
210+
metavar=metavar,
176211
**add_kw,
177212
)
178213
elif bool in type_class_map:

nodescraper/cli/helper.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,8 @@ def process_args(
417417
else:
418418
cur_plugin = None
419419
for arg in plugin_args:
420-
if not arg.startswith("-") and "," in arg:
420+
# Only split on commas before a plugin context is set (e.g. "P1,P2").
421+
if not arg.startswith("-") and "," in arg and cur_plugin is None:
421422
for potential_plugin in arg.split(","):
422423
potential_plugin = potential_plugin.strip()
423424
if potential_plugin in plugin_names:

nodescraper/interfaces/dataplugin.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
from pathlib import Path
3030
from typing import Annotated, Any, Generic, Optional, Type, Union
3131

32+
from pydantic import Field
33+
3234
from nodescraper.enums import EventPriority, ExecutionStatus, SystemInteractionLevel
3335
from nodescraper.generictypes import TAnalyzeArg, TCollectArg, TDataModel
3436
from nodescraper.interfaces.dataanalyzertask import DataAnalyzer
@@ -313,7 +315,12 @@ def run(
313315
preserve_connection: bool = False,
314316
data: Annotated[
315317
Optional[Union[str, dict, TDataModel]],
316-
"Path to pre-collected data (file or directory). Load this data instead of collecting; use with --collection False to run only the analyzer.",
318+
Field(
319+
description=(
320+
"Path to pre-collected data"
321+
"; use with --collection False to run the analyzer only."
322+
),
323+
),
317324
] = None,
318325
collection_args: Optional[Union[TCollectArg, dict]] = None,
319326
analysis_args: Optional[Union[TAnalyzeArg, dict]] = None,
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
###############################################################################
2+
#
3+
# MIT License
4+
#
5+
# Copyright (c) 2026 Advanced Micro Devices, Inc.
6+
#
7+
# Permission is hereby granted, free of charge, to any person obtaining a copy
8+
# of this software and associated documentation files (the "Software"), to deal
9+
# in the Software without restriction, including without limitation the rights
10+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
# copies of the Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be included in all
15+
# copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
# SOFTWARE.
24+
#
25+
###############################################################################
26+
from .regex_search_plugin import RegexSearchPlugin
27+
28+
__all__ = ["RegexSearchPlugin"]
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
###############################################################################
2+
#
3+
# MIT License
4+
#
5+
# Copyright (c) 2026 Advanced Micro Devices, Inc.
6+
#
7+
# Permission is hereby granted, free of charge, to any person obtaining a copy
8+
# of this software and associated documentation files (the "Software"), to deal
9+
# in the Software without restriction, including without limitation the rights
10+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
# copies of the Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be included in all
15+
# copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
# SOFTWARE.
24+
#
25+
###############################################################################
26+
from typing import Any, Optional
27+
28+
from pydantic import Field
29+
30+
from nodescraper.models import AnalyzerArgs
31+
32+
33+
class RegexSearchAnalyzerArgs(AnalyzerArgs):
34+
"""Arguments for RegexSearchAnalyzer (dict items match Dmesg-style error_regex)."""
35+
36+
error_regex: Optional[list[dict[str, Any]]] = Field(
37+
default=None,
38+
description=(
39+
"Regex patterns to search for; each dict may include regex (str), message, "
40+
"event_category, event_priority (same as Dmesg analyzer error_regex). "
41+
),
42+
)
43+
interval_to_collapse_event: int = Field(
44+
default=60,
45+
description="Seconds within which repeated events are collapsed into one.",
46+
)
47+
num_timestamps: int = Field(
48+
default=3,
49+
description="Number of timestamps to include per event in output.",
50+
)
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
###############################################################################
2+
#
3+
# MIT License
4+
#
5+
# Copyright (c) 2026 Advanced Micro Devices, Inc.
6+
#
7+
# Permission is hereby granted, free of charge, to any person obtaining a copy
8+
# of this software and associated documentation files (the "Software"), to deal
9+
# in the Software without restriction, including without limitation the rights
10+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
# copies of the Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be included in all
15+
# copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
# SOFTWARE.
24+
#
25+
###############################################################################
26+
import os
27+
from typing import Optional, Union
28+
29+
from nodescraper.base.regexanalyzer import ErrorRegex, RegexAnalyzer, RegexEvent
30+
from nodescraper.enums import ExecutionStatus
31+
from nodescraper.models import TaskResult
32+
33+
from .analyzer_args import RegexSearchAnalyzerArgs
34+
from .regex_search_data import RegexSearchData
35+
36+
37+
class RegexSearchAnalyzer(RegexAnalyzer[RegexSearchData, RegexSearchAnalyzerArgs]):
38+
"""Run user-provided regexes against text loaded from --data (file or directory)."""
39+
40+
DATA_MODEL = RegexSearchData
41+
42+
ERROR_REGEX: list[ErrorRegex] = []
43+
44+
def _build_regex_event(
45+
self, regex_obj: ErrorRegex, match: Union[str, list[str]], source: str
46+
) -> RegexEvent:
47+
"""Augment the default event text with a file path when the origin is a concrete path.
48+
49+
Args:
50+
regex_obj: Metadata for the rule that produced the match.
51+
match: Substring or grouped capture text from the pattern.
52+
source: Origin label, or an absolute path when matching per file.
53+
54+
Returns:
55+
Match record with an extended description when a path-like source is present.
56+
"""
57+
event = super()._build_regex_event(regex_obj, match, source)
58+
if source and source != "regex_search":
59+
event.description = f"{regex_obj.message} [file: {source}]"
60+
return event
61+
62+
def analyze_data(
63+
self,
64+
data: RegexSearchData,
65+
args: Optional[RegexSearchAnalyzerArgs] = None,
66+
) -> TaskResult:
67+
"""Scan loaded inputs with the given patterns, or mark the task not run if inputs are incomplete.
68+
69+
Args:
70+
data: Aggregated and per-file text loaded from the user data path.
71+
args: Optional pattern list and timing knobs; omitted or empty patterns skip work.
72+
73+
Returns:
74+
Work outcome with match events, or a not-run status when patterns are absent.
75+
"""
76+
if args is None or not args.error_regex:
77+
self.result.status = ExecutionStatus.NOT_RAN
78+
self.result.message = "Analysis args need to be provided for the analyzer to run"
79+
return self.result
80+
81+
final_regex = self._convert_and_extend_error_regex(args.error_regex, [])
82+
83+
if data.files:
84+
for rel_path in sorted(data.files.keys()):
85+
file_content = data.files[rel_path]
86+
abs_source = os.path.normpath(os.path.join(data.data_root, rel_path))
87+
self.result.events += self.check_all_regexes(
88+
content=file_content,
89+
source=abs_source,
90+
error_regex=final_regex,
91+
num_timestamps=args.num_timestamps,
92+
interval_to_collapse_event=args.interval_to_collapse_event,
93+
)
94+
else:
95+
self.result.events += self.check_all_regexes(
96+
content=data.content,
97+
source=data.data_root or "regex_search",
98+
error_regex=final_regex,
99+
num_timestamps=args.num_timestamps,
100+
interval_to_collapse_event=args.interval_to_collapse_event,
101+
)
102+
return self.result

0 commit comments

Comments
 (0)