Skip to content

Commit 6163b47

Browse files
committed
xml parsing
1 parent 41f8f69 commit 6163b47

3 files changed

Lines changed: 41 additions & 31 deletions

File tree

src/agentlab/analyze/error_analysis/pipeline.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,15 @@ def save_analysis(self, exp_result: ExpResult, error_analysis: dict, exists_ok=T
6969

7070
from agentlab.llm.llm_configs import CHAT_MODEL_ARGS_DICT
7171

72-
llm = CHAT_MODEL_ARGS_DICT["azure/gpt-4o-mini-2024-07-18"].make_model()
72+
llm = CHAT_MODEL_ARGS_DICT["azure/gpt-4o-2024-08-06"].make_model()
7373

7474
step_summarizer = ChangeSummarizer(llm, lambda x: x)
7575
episode_summarizer = EpisodeSummarizer()
7676

7777
pipeline = ErrorAnalysisPipeline(
7878
exp_dir=exp_dir,
7979
filter=filter,
80-
episode_summarizer=EpisodeErrorSummarizer(ChangeSummarizer(llm, HTML_FORMATTER), llm),
80+
episode_summarizer=EpisodeErrorSummarizer(ChangeSummarizer(llm, AXTREE_FORMATTER), llm),
8181
)
8282

8383
pipeline.run_analysis()

src/agentlab/analyze/error_analysis/summarizer.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
ERROR_CLASSIFICATION_PROMPT,
88
)
99
from agentlab.analyze.inspect_results import summarize
10-
from agentlab.llm.llm_utils import json_parser
10+
from agentlab.llm.llm_utils import json_parser, parse_html_tags
1111

1212

1313
def _diff(past_obs, current_obs):
@@ -39,7 +39,7 @@ def summarize(self, obs: StepInfo, next_obs: StepInfo, past_summaries: list[str]
3939
if self.use_diff:
4040
next_obs_message = _diff(obs_message, next_obs_message)
4141

42-
return self.llm(
42+
return self.parse(self.llm(
4343
self.make_prompt(
4444
obs_message,
4545
action,
@@ -48,7 +48,7 @@ def summarize(self, obs: StepInfo, next_obs: StepInfo, past_summaries: list[str]
4848
goal,
4949
obs.obs.get("plan", "No plan available"),
5050
)
51-
)
51+
)['content'])
5252

5353
def make_prompt(
5454
self, past_obs_message, action, current_obs_message, past_summaries, goal, plan
@@ -63,6 +63,10 @@ def make_prompt(
6363
action=action,
6464
)
6565

66+
def parse(self, raw_output: str) -> dict:
67+
parsed_result = parse_html_tags(raw_output, keys=["changeSummary", "actionAssessment", "explanation", "suggestion"])[0]
68+
return parsed_result
69+
6670

6771
@dataclass
6872
class EpisodeAnalysis:
@@ -83,13 +87,13 @@ def make_prompt(self, exp_results: ExpResult, summaries: list[str]): ...
8387
def __call__(self, exp_results: ExpResult) -> EpisodeAnalysis:
8488
"""Run Change Summarizer for every step in the episode or extract a pre-computed one."""
8589

86-
if exp_results.steps_info[-1].reward == 1:
87-
return {"analysis": "Success", "summaries": {}}
90+
# if exp_results.steps_info[-1].reward == 1:
91+
# return {"analysis": "Success", "summaries": {}}
8892

8993
summaries = self.make_change_summaries(exp_results)
9094
prompt = self.make_prompt(exp_results, summaries)
9195
raw_analysis = self.llm(prompt)["content"]
92-
analysis = self.parser(raw_analysis)
96+
analysis = self.parse(raw_analysis)
9397
return {
9498
"analysis": analysis,
9599
"summaries": {i: self.parser(a) for i, a in enumerate(summaries)},
@@ -102,10 +106,13 @@ def make_change_summaries(self, exp_result: ExpResult) -> list[str]:
102106
# TODO:(thibault) make some checks or w/e
103107
for step, next_step in zip(exp_result.steps_info[:-1], exp_result.steps_info[1:]):
104108
summaries.append(
105-
self.change_summarizer.summarize(step, next_step, summaries)["content"]
109+
self.change_summarizer.summarize(step, next_step, summaries)
106110
)
107111
return summaries
108112

113+
def parse(self, raw_output: str) -> dict:
114+
parsed_result = parse_html_tags(raw_output, keys=["explanation", "success", "errorCategory"])[0]
115+
return parsed_result
109116

110117
@dataclass
111118
class EpisodeErrorSummarizer(EpisodeSummarizer):
@@ -116,7 +123,13 @@ def make_prompt(self, exp_results: ExpResult, summaries: list[str]):
116123
"""TODO: Implement the prompt."""
117124
goal = exp_results.steps_info[0].obs["goal"]
118125

119-
txt_summaries = "\n".join(summaries)
126+
def format_summary(summary):
127+
res = ''
128+
for key, value in summary.items():
129+
res += f"{key}: {value}\n"
130+
return res
131+
132+
txt_summaries = "\n".join([format_summary(summary) for summary in summaries])
120133

121134
thoughts = [step.agent_info.think for step in exp_results.steps_info[:-1]]
122135
actions = [step.action for step in exp_results.steps_info[:-1]]

src/agentlab/analyze/error_analysis/summarizer_prompts.py

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,19 @@
2121
OUTPUT FORMAT (per step):
2222
Return your analysis as a JSON-like structure, for example:
2323
24-
{{
25-
"changeSummary": "A new search results panel appeared on the right side.",
26-
"actionAssessment": "Correct",
27-
"explanation": "Clicking 'Search' was appropriate to display the results."
28-
}}
24+
<changeSummary>A new search results panel appeared on the right side.</changeSummary>
25+
<actionAssessment>Correct</actionAssessment>
26+
<explanation>Clicking 'Search' was appropriate to display the results.</explanation>
2927
3028
Or for an incorrect action:
3129
32-
{{
33-
"changeSummary": "The page reloaded but the date fields were reset to defaults.",
34-
"actionAssessment": "Incorrect",
35-
"explanation": "The agent should have fixed the date format first instead of re-clicking 'Show report'.",
36-
"suggestion": "Correct the date format or check for error messages."
37-
}}
30+
<changeSummary>The page reloaded but the date fields were reset to defaults.</changeSummary>
31+
<actionAssessment>Incorrect</actionAssessment>
32+
<explanation>The agent should have fixed the date format first instead of re-clicking 'Show report'.</explanation>
33+
<suggestion>Correct the date format or check for error messages.</suggestion>
34+
35+
36+
Please use single quotes '' to quote elements from the page, so as not to create parsing issues.
3837
3938
Please follow this structure at every step. Keep your responses concise and clear. Below are the details.
4039
@@ -139,19 +138,17 @@
139138
3. Provide a brief explanation justifying your classification, referencing specific steps if helpful.
140139
141140
Output format example for an unsuccessful interaction:
142-
{{
143-
"explanation": "The agent opened the wrong GitLab page and never recovered...",
144-
"success": False,
145-
"errorCategory": ["Navigation & Planning"],
146-
}}
141+
142+
<explanation>The agent opened the wrong GitLab page and never recovered...</explanation>
143+
<success>False</success>
144+
<errorCategory>["Navigation & Planning"]</errorCategory>
147145
148146
Output format example for a successful interaction:
149-
{{
150-
"explanation": "The agent opened the correct GitLab page and ...",
151-
"success": True,
152-
"errorCategory": [],
153-
}}
154147
148+
<explanation>The agent opened the correct GitLab page and ...</explanation>
149+
<success>True</success>
150+
<errorCategory>[]</errorCategory>
151+
155152
Please follow this structure at every step. Keep your responses concise and clear.
156153
157154
Below are the details for the interaction.

0 commit comments

Comments
 (0)