Skip to content

Commit 3597267

Browse files
committed
feat: enhance message processing with document handling and summary generation
- Introduced `generateSummary` function to create concise summaries for text. - Added `processDocument` method to handle various document types, including PDFs and text files. - Implemented centralized document processing based on MIME type. - Updated `processMessage` to integrate document processing and summary extraction. - Enhanced logging for document processing and error handling.
1 parent edaa711 commit 3597267

1 file changed

Lines changed: 309 additions & 24 deletions

File tree

src/messageManager.ts

Lines changed: 309 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,73 @@ import {
77
type Media,
88
type Memory,
99
ModelType,
10+
ServiceType,
1011
type UUID,
1112
createUniqueUuid,
1213
logger,
14+
trimTokens,
15+
parseJSONObjectFromText,
1316
} from "@elizaos/core";
14-
import type { Chat, Message, ReactionType, Update } from "@telegraf/types";
17+
import type { Chat, Message, ReactionType, Update, Document } from "@telegraf/types";
1518
import type { Context, NarrowedContext, Telegraf } from "telegraf";
1619
import { Markup } from "telegraf";
1720
import {
18-
TelegramContent,
21+
type TelegramContent,
1922
TelegramEventTypes,
2023
type TelegramMessageReceivedPayload,
2124
type TelegramMessageSentPayload,
2225
type TelegramReactionReceivedPayload,
2326
} from "./types";
2427
import { convertToTelegramButtons, convertMarkdownToTelegram } from "./utils";
2528
import fs from "fs";
29+
30+
/**
31+
* Generates a summary for the provided text using a specified model.
32+
*
33+
* @param {IAgentRuntime} runtime - The runtime environment for the agent.
34+
* @param {string} text - The text to generate a summary for.
35+
* @returns {Promise<{ title: string; description: string }>} An object containing the generated title and description.
36+
*/
37+
async function generateSummary(
38+
runtime: IAgentRuntime,
39+
text: string
40+
): Promise<{ title: string; description: string }> {
41+
// make sure text is under 128k characters
42+
text = await trimTokens(text, 100000, runtime);
43+
44+
const prompt = `Please generate a concise summary for the following text:
45+
46+
Text: """
47+
${text}
48+
"""
49+
50+
Respond with a JSON object in the following format:
51+
\`\`\`json
52+
{
53+
"title": "Generated Title",
54+
"summary": "Generated summary and/or description of the text"
55+
}
56+
\`\`\``;
57+
58+
const response = await runtime.useModel(ModelType.TEXT_SMALL, {
59+
prompt,
60+
});
61+
62+
const parsedResponse = parseJSONObjectFromText(response);
63+
64+
if (parsedResponse?.title && parsedResponse?.summary) {
65+
return {
66+
title: parsedResponse.title,
67+
description: parsedResponse.summary,
68+
};
69+
}
70+
71+
return {
72+
title: "",
73+
description: "",
74+
};
75+
}
76+
2677
/**
2778
* Enum representing different types of media.
2879
* @enum { string }
@@ -92,7 +143,8 @@ export class MessageManager {
92143
imageUrl = fileLink.toString();
93144
} else if (
94145
"document" in message &&
95-
message.document?.mime_type?.startsWith("image/")
146+
message.document?.mime_type?.startsWith("image/") &&
147+
!message.document?.mime_type?.startsWith("application/pdf")
96148
) {
97149
const fileLink = await this.bot.telegram.getFileLink(
98150
message.document.file_id,
@@ -114,6 +166,250 @@ export class MessageManager {
114166
return null;
115167
}
116168

169+
/**
170+
* Process a document from a Telegram message to extract the document URL and description.
171+
* Handles PDFs and other document types by converting them to text when possible.
172+
*
173+
* @param {Message} message - The Telegram message object containing the document.
174+
* @returns {Promise<{ description: string } | null>} The description of the processed document or null if no document found.
175+
*/
176+
/**
177+
* Process a document from a Telegram message and extract its content.
178+
* Handles PDFs, text files, and other document types centrally.
179+
*/
180+
async processDocument(
181+
message: Message,
182+
): Promise<{ description: string } | null> {
183+
try {
184+
if (!("document" in message) || !message.document) {
185+
return null;
186+
}
187+
188+
const document = message.document;
189+
const fileLink = await this.bot.telegram.getFileLink(document.file_id);
190+
const documentUrl = fileLink.toString();
191+
192+
logger.info(`Processing document: ${document.file_name} (${document.mime_type}, ${document.file_size} bytes)`);
193+
194+
// Centralized document processing based on MIME type
195+
const documentProcessor = this.getDocumentProcessor(document.mime_type);
196+
if (documentProcessor) {
197+
return await documentProcessor(document, documentUrl);
198+
}
199+
200+
// Generic fallback for unsupported types
201+
return {
202+
description: `[Document: ${document.file_name}\nType: ${document.mime_type}\nSize: ${document.file_size} bytes]`
203+
};
204+
205+
} catch (error) {
206+
logger.error("Error processing document:", error);
207+
return null;
208+
}
209+
}
210+
211+
/**
212+
* Get the appropriate document processor based on MIME type.
213+
*/
214+
private getDocumentProcessor(mimeType?: string): ((document: Document, url: string) => Promise<{ description: string }>) | null {
215+
if (!mimeType) return null;
216+
217+
const processors = {
218+
"application/pdf": this.processPdfDocument.bind(this),
219+
"text/plain": this.processTextDocument.bind(this),
220+
"text/csv": this.processTextDocument.bind(this),
221+
"application/json": this.processTextDocument.bind(this),
222+
};
223+
224+
for (const [pattern, processor] of Object.entries(processors)) {
225+
if (mimeType.startsWith(pattern)) {
226+
return processor;
227+
}
228+
}
229+
230+
return null;
231+
}
232+
233+
/**
234+
* Process PDF documents by converting them to text.
235+
*/
236+
private async processPdfDocument(document: Document, documentUrl: string): Promise<{ description: string }> {
237+
try {
238+
const pdfService = this.runtime.getService(ServiceType.PDF) as any;
239+
if (!pdfService) {
240+
logger.warn("PDF service not available, using fallback");
241+
return {
242+
description: `[PDF Document: ${document.file_name}\nSize: ${document.file_size} bytes\nUnable to extract text content]`
243+
};
244+
}
245+
246+
const response = await fetch(documentUrl);
247+
if (!response.ok) {
248+
throw new Error(`Failed to fetch PDF: ${response.status}`);
249+
}
250+
251+
const pdfBuffer = await response.arrayBuffer();
252+
const text = await pdfService.convertPdfToText(Buffer.from(pdfBuffer));
253+
254+
// Use generateSummary for context extraction
255+
const { title, description } = await generateSummary(this.runtime, text);
256+
257+
logger.info(`PDF processed successfully: ${text.length} characters extracted`);
258+
return {
259+
description: title
260+
? `[PDF Document: ${document.file_name}\nTitle: ${title}\nSummary: ${description}\n\nFull Content:\n${text}\n--- END DOCUMENT]`
261+
: `[PDF Document: ${document.file_name}\nContent: ${text.substring(0, 500)}... [Document truncated]`
262+
};
263+
264+
} catch (error) {
265+
logger.error("Error processing PDF document:", error);
266+
return {
267+
description: `[PDF Document: ${document.file_name}\nSize: ${document.file_size} bytes\nError: Unable to extract text content]`
268+
};
269+
}
270+
}
271+
272+
/**
273+
* Process text documents by fetching their content.
274+
*/
275+
private async processTextDocument(document: Document, documentUrl: string): Promise<{ description: string }> {
276+
try {
277+
const response = await fetch(documentUrl);
278+
if (!response.ok) {
279+
throw new Error(`Failed to fetch text document: ${response.status}`);
280+
}
281+
282+
const text = await response.text();
283+
284+
// Use generateSummary for context extraction
285+
const { title, description } = await generateSummary(this.runtime, text);
286+
287+
logger.info(`Text document processed successfully: ${text.length} characters extracted`);
288+
return {
289+
description: title
290+
? `[Text Document: ${document.file_name}\nTitle: ${title}\nSummary: ${description}\n\nFull Content:\n${text}\n--- END DOCUMENT]`
291+
: `[Text Document: ${document.file_name}\nContent: ${text.substring(0, 500)}... [Document truncated]`
292+
};
293+
294+
} catch (error) {
295+
logger.error("Error processing text document:", error);
296+
return {
297+
description: `[Text Document: ${document.file_name}\nSize: ${document.file_size} bytes\nError: Unable to read content]`
298+
};
299+
}
300+
}
301+
302+
/**
303+
* Processes the message content, documents, and images to generate
304+
* processed content and media attachments.
305+
*
306+
* @param {Message} message The message to process
307+
* @returns {Promise<{ processedContent: string; attachments: Media[] }>} Processed content and media attachments
308+
*/
309+
async processMessage(
310+
message: Message
311+
): Promise<{ processedContent: string; attachments: Media[] }> {
312+
let processedContent = "";
313+
let attachments: Media[] = [];
314+
315+
// Get message text
316+
if ("text" in message && message.text) {
317+
processedContent = message.text;
318+
} else if ("caption" in message && message.caption) {
319+
processedContent = message.caption as string;
320+
}
321+
322+
logger.info(`Message processed - Content: ${processedContent ? 'yes' : 'no'}, Attachments: ${attachments.length}`);
323+
324+
// Process documents
325+
if ("document" in message && message.document) {
326+
const document = message.document;
327+
const documentInfo = await this.processDocument(message);
328+
329+
if (documentInfo) {
330+
try {
331+
const fileLink = await this.bot.telegram.getFileLink(document.file_id);
332+
333+
// Extract title and description from documentInfo
334+
const titleMatch = documentInfo.description.match(/Title: ([^\n]+)/);
335+
const summaryMatch = documentInfo.description.match(/Summary: ([^\n]+)/);
336+
const title = titleMatch ? titleMatch[1] : `Document: ${document.file_name}`;
337+
const summary = summaryMatch ? summaryMatch[1] : documentInfo.description;
338+
339+
// Get the full text content using the existing processor
340+
let fullText = "";
341+
const processor = this.getDocumentProcessor(document.mime_type);
342+
if (processor) {
343+
const result = await processor(document, fileLink.toString());
344+
// Extract full text from the result
345+
const fullTextMatch = result.description.match(/Full Content:\n([\s\S]*?)\n--- END DOCUMENT/);
346+
if (fullTextMatch) {
347+
fullText = fullTextMatch[1];
348+
}
349+
}
350+
351+
// Add document content to processedContent so agent can access it
352+
if (fullText) {
353+
const documentContent = `\n\n--- DOCUMENT CONTENT ---\nTitle: ${title}\nSummary: ${summary}\n\nFull Content:\n${fullText}\n--- END DOCUMENT ---\n\n`;
354+
processedContent += documentContent;
355+
}
356+
357+
attachments.push({
358+
id: document.file_id,
359+
url: fileLink.toString(),
360+
title: title,
361+
source: document.mime_type?.startsWith("application/pdf") ? "PDF" : "Document",
362+
description: summary,
363+
text: fullText || summary, // Use full text if available, fallback to summary
364+
});
365+
logger.info(`Document processed successfully: ${document.file_name}`);
366+
} catch (error) {
367+
logger.error(`Error processing document ${document.file_name}:`, error);
368+
// Add a fallback attachment even if processing failed
369+
attachments.push({
370+
id: document.file_id,
371+
url: "",
372+
title: `Document: ${document.file_name}`,
373+
source: "Document",
374+
description: `Document processing failed: ${document.file_name}`,
375+
text: `Document: ${document.file_name}\nSize: ${document.file_size} bytes\nType: ${document.mime_type}`,
376+
});
377+
}
378+
} else {
379+
// Add a basic attachment even if documentInfo is null
380+
attachments.push({
381+
id: document.file_id,
382+
url: "",
383+
title: `Document: ${document.file_name}`,
384+
source: "Document",
385+
description: `Document: ${document.file_name}`,
386+
text: `Document: ${document.file_name}\nSize: ${document.file_size} bytes\nType: ${document.mime_type}`,
387+
});
388+
}
389+
}
390+
391+
// Process images
392+
if ("photo" in message && message.photo?.length > 0) {
393+
const imageInfo = await this.processImage(message);
394+
if (imageInfo) {
395+
const photo = message.photo[message.photo.length - 1];
396+
const fileLink = await this.bot.telegram.getFileLink(photo.file_id);
397+
attachments.push({
398+
id: photo.file_id,
399+
url: fileLink.toString(),
400+
title: "Image Attachment",
401+
source: "Image",
402+
description: imageInfo.description,
403+
text: imageInfo.description,
404+
});
405+
}
406+
}
407+
408+
logger.info(`Message processed - Content: ${processedContent ? 'yes' : 'no'}, Attachments: ${attachments.length}`);
409+
410+
return { processedContent, attachments };
411+
}
412+
117413
// Send long messages in chunks
118414
/**
119415
* Sends a message in chunks, handling attachments and splitting the message if necessary
@@ -308,7 +604,7 @@ export class MessageManager {
308604
// Type guard to ensure message exists
309605
if (!ctx.message || !ctx.from) return;
310606

311-
const message = ctx.message as Message.TextMessage;
607+
const message = ctx.message;
312608

313609
try {
314610
// Convert IDs to UUIDs
@@ -339,23 +635,13 @@ export class MessageManager {
339635
message?.message_id?.toString(),
340636
);
341637

342-
// Handle images
343-
const imageInfo = await this.processImage(message);
638+
// Process message content and attachments like Discord
639+
const { processedContent, attachments } = await this.processMessage(message);
344640

345-
// Get message text - use type guards for safety
346-
let messageText = "";
347-
if ("text" in message && message.text) {
348-
messageText = message.text;
349-
} else if ("caption" in message && message.caption) {
350-
messageText = message.caption as string;
641+
if (!processedContent && attachments.length === 0) {
642+
return;
351643
}
352644

353-
// Combine text and image description
354-
const fullText = imageInfo
355-
? `${messageText} ${imageInfo.description}`
356-
: messageText;
357-
if (!fullText) return;
358-
359645
// Get chat type and determine channel type
360646
const chat = message.chat as Chat;
361647
const channelType = getChannelType(chat);
@@ -382,17 +668,16 @@ export class MessageManager {
382668
agentId: this.runtime.agentId,
383669
roomId,
384670
content: {
385-
text: fullText,
386-
// attachments?
671+
text: processedContent || " ",
672+
attachments: attachments, // Remove the conditional check to match Discord's approach
387673
source: "telegram",
388-
// url?
389674
channelType: channelType,
390675
inReplyTo:
391676
"reply_to_message" in message && message.reply_to_message
392677
? createUniqueUuid(
393-
this.runtime,
394-
message.reply_to_message.message_id.toString(),
395-
)
678+
this.runtime,
679+
message.reply_to_message.message_id.toString(),
680+
)
396681
: undefined,
397682
},
398683
metadata: {

0 commit comments

Comments
 (0)