Skip to content

Commit b11263c

Browse files
committed
refactor: update document processing to use structured results
- Introduced `DocumentProcessingResult` interface for better structure in document handling. - Updated `generateSummary` function to return a summary instead of a description. - Refactored `processDocument`, `processPdfDocument`, and `processTextDocument` methods to utilize the new structured result format. - Enhanced logging and error handling to improve clarity and maintainability.
1 parent 3597267 commit b11263c

1 file changed

Lines changed: 83 additions & 46 deletions

File tree

src/messageManager.ts

Lines changed: 83 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,31 @@ import {
2727
import { convertToTelegramButtons, convertMarkdownToTelegram } from "./utils";
2828
import fs from "fs";
2929

30+
/**
31+
* Interface for structured document processing results.
32+
*/
33+
interface DocumentProcessingResult {
34+
title: string;
35+
summary: string;
36+
fullText: string;
37+
formattedDescription: string;
38+
fileName: string;
39+
mimeType: string | undefined;
40+
fileSize: number | undefined;
41+
error?: string;
42+
}
43+
3044
/**
3145
* Generates a summary for the provided text using a specified model.
3246
*
3347
* @param {IAgentRuntime} runtime - The runtime environment for the agent.
3448
* @param {string} text - The text to generate a summary for.
35-
* @returns {Promise<{ title: string; description: string }>} An object containing the generated title and description.
49+
* @returns {Promise<{ title: string; summary: string }>} An object containing the generated title and description.
3650
*/
3751
async function generateSummary(
3852
runtime: IAgentRuntime,
3953
text: string
40-
): Promise<{ title: string; description: string }> {
54+
): Promise<{ title: string; summary: string }> {
4155
// make sure text is under 128k characters
4256
text = await trimTokens(text, 100000, runtime);
4357

@@ -64,13 +78,13 @@ async function generateSummary(
6478
if (parsedResponse?.title && parsedResponse?.summary) {
6579
return {
6680
title: parsedResponse.title,
67-
description: parsedResponse.summary,
81+
summary: parsedResponse.summary,
6882
};
6983
}
7084

7185
return {
7286
title: "",
73-
description: "",
87+
summary: "",
7488
};
7589
}
7690

@@ -179,7 +193,7 @@ export class MessageManager {
179193
*/
180194
async processDocument(
181195
message: Message,
182-
): Promise<{ description: string } | null> {
196+
): Promise<DocumentProcessingResult | null> {
183197
try {
184198
if (!("document" in message) || !message.document) {
185199
return null;
@@ -199,7 +213,13 @@ export class MessageManager {
199213

200214
// Generic fallback for unsupported types
201215
return {
202-
description: `[Document: ${document.file_name}\nType: ${document.mime_type}\nSize: ${document.file_size} bytes]`
216+
title: `Document: ${document.file_name || 'Unknown Document'}`,
217+
summary: `Type: ${document.mime_type || 'unknown'}\nSize: ${document.file_size || 0} bytes`,
218+
fullText: "",
219+
formattedDescription: `[Document: ${document.file_name || 'Unknown Document'}\nType: ${document.mime_type || 'unknown'}\nSize: ${document.file_size || 0} bytes]`,
220+
fileName: document.file_name || 'Unknown Document',
221+
mimeType: document.mime_type,
222+
fileSize: document.file_size,
203223
};
204224

205225
} catch (error) {
@@ -211,7 +231,7 @@ export class MessageManager {
211231
/**
212232
* Get the appropriate document processor based on MIME type.
213233
*/
214-
private getDocumentProcessor(mimeType?: string): ((document: Document, url: string) => Promise<{ description: string }>) | null {
234+
private getDocumentProcessor(mimeType?: string): ((document: Document, url: string) => Promise<DocumentProcessingResult>) | null {
215235
if (!mimeType) return null;
216236

217237
const processors = {
@@ -233,13 +253,19 @@ export class MessageManager {
233253
/**
234254
* Process PDF documents by converting them to text.
235255
*/
236-
private async processPdfDocument(document: Document, documentUrl: string): Promise<{ description: string }> {
256+
private async processPdfDocument(document: Document, documentUrl: string): Promise<DocumentProcessingResult> {
237257
try {
238258
const pdfService = this.runtime.getService(ServiceType.PDF) as any;
239259
if (!pdfService) {
240260
logger.warn("PDF service not available, using fallback");
241261
return {
242-
description: `[PDF Document: ${document.file_name}\nSize: ${document.file_size} bytes\nUnable to extract text content]`
262+
title: `PDF Document: ${document.file_name || 'Unknown Document'}`,
263+
summary: `Size: ${document.file_size || 0} bytes\nUnable to extract text content`,
264+
fullText: "",
265+
formattedDescription: `[PDF Document: ${document.file_name || 'Unknown Document'}\nSize: ${document.file_size || 0} bytes\nUnable to extract text content]`,
266+
fileName: document.file_name || 'Unknown Document',
267+
mimeType: document.mime_type,
268+
fileSize: document.file_size,
243269
};
244270
}
245271

@@ -252,27 +278,39 @@ export class MessageManager {
252278
const text = await pdfService.convertPdfToText(Buffer.from(pdfBuffer));
253279

254280
// Use generateSummary for context extraction
255-
const { title, description } = await generateSummary(this.runtime, text);
281+
const { title, summary } = await generateSummary(this.runtime, text);
256282

257283
logger.info(`PDF processed successfully: ${text.length} characters extracted`);
258284
return {
259-
description: title
260-
? `[PDF Document: ${document.file_name}\nTitle: ${title}\nSummary: ${description}\n\nFull Content:\n${text}\n--- END DOCUMENT]`
261-
: `[PDF Document: ${document.file_name}\nContent: ${text.substring(0, 500)}... [Document truncated]`
285+
title: title,
286+
summary: summary,
287+
fullText: text,
288+
formattedDescription: title
289+
? `[PDF Document: ${document.file_name || 'Unknown Document'}\nTitle: ${title}\nSummary: ${summary}\n\nFull Content:\n${text}\n--- END DOCUMENT]`
290+
: `[PDF Document: ${document.file_name || 'Unknown Document'}\nContent: ${text.substring(0, 500)}... [Document truncated]`,
291+
fileName: document.file_name || 'Unknown Document',
292+
mimeType: document.mime_type,
293+
fileSize: document.file_size,
262294
};
263295

264296
} catch (error) {
265297
logger.error("Error processing PDF document:", error);
266298
return {
267-
description: `[PDF Document: ${document.file_name}\nSize: ${document.file_size} bytes\nError: Unable to extract text content]`
299+
title: `PDF Document: ${document.file_name || 'Unknown Document'}`,
300+
summary: `Size: ${document.file_size || 0} bytes\nError: Unable to extract text content`,
301+
fullText: "",
302+
formattedDescription: `[PDF Document: ${document.file_name || 'Unknown Document'}\nSize: ${document.file_size || 0} bytes\nError: Unable to extract text content]`,
303+
fileName: document.file_name || 'Unknown Document',
304+
mimeType: document.mime_type,
305+
fileSize: document.file_size,
268306
};
269307
}
270308
}
271309

272310
/**
273311
* Process text documents by fetching their content.
274312
*/
275-
private async processTextDocument(document: Document, documentUrl: string): Promise<{ description: string }> {
313+
private async processTextDocument(document: Document, documentUrl: string): Promise<DocumentProcessingResult> {
276314
try {
277315
const response = await fetch(documentUrl);
278316
if (!response.ok) {
@@ -282,19 +320,31 @@ export class MessageManager {
282320
const text = await response.text();
283321

284322
// Use generateSummary for context extraction
285-
const { title, description } = await generateSummary(this.runtime, text);
323+
const { title, summary } = await generateSummary(this.runtime, text);
286324

287325
logger.info(`Text document processed successfully: ${text.length} characters extracted`);
288326
return {
289-
description: title
290-
? `[Text Document: ${document.file_name}\nTitle: ${title}\nSummary: ${description}\n\nFull Content:\n${text}\n--- END DOCUMENT]`
291-
: `[Text Document: ${document.file_name}\nContent: ${text.substring(0, 500)}... [Document truncated]`
327+
title: title,
328+
summary: summary,
329+
fullText: text,
330+
formattedDescription: title
331+
? `[Text Document: ${document.file_name || 'Unknown Document'}\nTitle: ${title}\nSummary: ${summary}\n\nFull Content:\n${text}\n--- END DOCUMENT]`
332+
: `[Text Document: ${document.file_name || 'Unknown Document'}\nContent: ${text.substring(0, 500)}... [Document truncated]`,
333+
fileName: document.file_name || 'Unknown Document',
334+
mimeType: document.mime_type,
335+
fileSize: document.file_size,
292336
};
293337

294338
} catch (error) {
295339
logger.error("Error processing text document:", error);
296340
return {
297-
description: `[Text Document: ${document.file_name}\nSize: ${document.file_size} bytes\nError: Unable to read content]`
341+
title: `Text Document: ${document.file_name || 'Unknown Document'}`,
342+
summary: `Size: ${document.file_size || 0} bytes\nError: Unable to read content`,
343+
fullText: "",
344+
formattedDescription: `[Text Document: ${document.file_name || 'Unknown Document'}\nSize: ${document.file_size || 0} bytes\nError: Unable to read content]`,
345+
fileName: document.file_name || 'Unknown Document',
346+
mimeType: document.mime_type,
347+
fileSize: document.file_size,
298348
};
299349
}
300350
}
@@ -330,23 +380,10 @@ export class MessageManager {
330380
try {
331381
const fileLink = await this.bot.telegram.getFileLink(document.file_id);
332382

333-
// Extract title and description from documentInfo
334-
const titleMatch = documentInfo.description.match(/Title: ([^\n]+)/);
335-
const summaryMatch = documentInfo.description.match(/Summary: ([^\n]+)/);
336-
const title = titleMatch ? titleMatch[1] : `Document: ${document.file_name}`;
337-
const summary = summaryMatch ? summaryMatch[1] : documentInfo.description;
338-
339-
// Get the full text content using the existing processor
340-
let fullText = "";
341-
const processor = this.getDocumentProcessor(document.mime_type);
342-
if (processor) {
343-
const result = await processor(document, fileLink.toString());
344-
// Extract full text from the result
345-
const fullTextMatch = result.description.match(/Full Content:\n([\s\S]*?)\n--- END DOCUMENT/);
346-
if (fullTextMatch) {
347-
fullText = fullTextMatch[1];
348-
}
349-
}
383+
// Use structured data directly instead of regex parsing
384+
const title = documentInfo.title;
385+
const summary = documentInfo.summary;
386+
const fullText = documentInfo.fullText;
350387

351388
// Add document content to processedContent so agent can access it
352389
if (fullText) {
@@ -362,28 +399,28 @@ export class MessageManager {
362399
description: summary,
363400
text: fullText || summary, // Use full text if available, fallback to summary
364401
});
365-
logger.info(`Document processed successfully: ${document.file_name}`);
402+
logger.info(`Document processed successfully: ${documentInfo.fileName}`);
366403
} catch (error) {
367-
logger.error(`Error processing document ${document.file_name}:`, error);
404+
logger.error(`Error processing document ${documentInfo.fileName}:`, error);
368405
// Add a fallback attachment even if processing failed
369406
attachments.push({
370407
id: document.file_id,
371408
url: "",
372-
title: `Document: ${document.file_name}`,
409+
title: `Document: ${documentInfo.fileName}`,
373410
source: "Document",
374-
description: `Document processing failed: ${document.file_name}`,
375-
text: `Document: ${document.file_name}\nSize: ${document.file_size} bytes\nType: ${document.mime_type}`,
411+
description: `Document processing failed: ${documentInfo.fileName}`,
412+
text: `Document: ${documentInfo.fileName}\nSize: ${documentInfo.fileSize || 0} bytes\nType: ${documentInfo.mimeType || 'unknown'}`,
376413
});
377414
}
378415
} else {
379416
// Add a basic attachment even if documentInfo is null
380417
attachments.push({
381418
id: document.file_id,
382419
url: "",
383-
title: `Document: ${document.file_name}`,
420+
title: `Document: ${document.file_name || 'Unknown Document'}`,
384421
source: "Document",
385-
description: `Document: ${document.file_name}`,
386-
text: `Document: ${document.file_name}\nSize: ${document.file_size} bytes\nType: ${document.mime_type}`,
422+
description: `Document: ${document.file_name || 'Unknown Document'}`,
423+
text: `Document: ${document.file_name || 'Unknown Document'}\nSize: ${document.file_size || 0} bytes\nType: ${document.mime_type || 'unknown'}`,
387424
});
388425
}
389426
}
@@ -604,7 +641,7 @@ export class MessageManager {
604641
// Type guard to ensure message exists
605642
if (!ctx.message || !ctx.from) return;
606643

607-
const message = ctx.message;
644+
const message = ctx.message as Message.TextMessage;
608645

609646
try {
610647
// Convert IDs to UUIDs

0 commit comments

Comments
 (0)