@@ -27,17 +27,31 @@ import {
2727import { convertToTelegramButtons , convertMarkdownToTelegram } from "./utils" ;
2828import fs from "fs" ;
2929
30+ /**
31+ * Interface for structured document processing results.
32+ */
33+ interface DocumentProcessingResult {
34+ title : string ;
35+ summary : string ;
36+ fullText : string ;
37+ formattedDescription : string ;
38+ fileName : string ;
39+ mimeType : string | undefined ;
40+ fileSize : number | undefined ;
41+ error ?: string ;
42+ }
43+
3044/**
3145 * Generates a summary for the provided text using a specified model.
3246 *
3347 * @param {IAgentRuntime } runtime - The runtime environment for the agent.
3448 * @param {string } text - The text to generate a summary for.
35- * @returns {Promise<{ title: string; description : string }> } An object containing the generated title and description.
49+ * @returns {Promise<{ title: string; summary : string }> } An object containing the generated title and description.
3650 */
3751async function generateSummary (
3852 runtime : IAgentRuntime ,
3953 text : string
40- ) : Promise < { title : string ; description : string } > {
54+ ) : Promise < { title : string ; summary : string } > {
4155 // make sure text is under 128k characters
4256 text = await trimTokens ( text , 100000 , runtime ) ;
4357
@@ -64,13 +78,13 @@ async function generateSummary(
6478 if ( parsedResponse ?. title && parsedResponse ?. summary ) {
6579 return {
6680 title : parsedResponse . title ,
67- description : parsedResponse . summary ,
81+ summary : parsedResponse . summary ,
6882 } ;
6983 }
7084
7185 return {
7286 title : "" ,
73- description : "" ,
87+ summary : "" ,
7488 } ;
7589}
7690
@@ -179,7 +193,7 @@ export class MessageManager {
179193 */
180194 async processDocument (
181195 message : Message ,
182- ) : Promise < { description : string } | null > {
196+ ) : Promise < DocumentProcessingResult | null > {
183197 try {
184198 if ( ! ( "document" in message ) || ! message . document ) {
185199 return null ;
@@ -199,7 +213,13 @@ export class MessageManager {
199213
200214 // Generic fallback for unsupported types
201215 return {
202- description : `[Document: ${ document . file_name } \nType: ${ document . mime_type } \nSize: ${ document . file_size } bytes]`
216+ title : `Document: ${ document . file_name || 'Unknown Document' } ` ,
217+ summary : `Type: ${ document . mime_type || 'unknown' } \nSize: ${ document . file_size || 0 } bytes` ,
218+ fullText : "" ,
219+ formattedDescription : `[Document: ${ document . file_name || 'Unknown Document' } \nType: ${ document . mime_type || 'unknown' } \nSize: ${ document . file_size || 0 } bytes]` ,
220+ fileName : document . file_name || 'Unknown Document' ,
221+ mimeType : document . mime_type ,
222+ fileSize : document . file_size ,
203223 } ;
204224
205225 } catch ( error ) {
@@ -211,7 +231,7 @@ export class MessageManager {
211231 /**
212232 * Get the appropriate document processor based on MIME type.
213233 */
214- private getDocumentProcessor ( mimeType ?: string ) : ( ( document : Document , url : string ) => Promise < { description : string } > ) | null {
234+ private getDocumentProcessor ( mimeType ?: string ) : ( ( document : Document , url : string ) => Promise < DocumentProcessingResult > ) | null {
215235 if ( ! mimeType ) return null ;
216236
217237 const processors = {
@@ -233,13 +253,19 @@ export class MessageManager {
233253 /**
234254 * Process PDF documents by converting them to text.
235255 */
236- private async processPdfDocument ( document : Document , documentUrl : string ) : Promise < { description : string } > {
256+ private async processPdfDocument ( document : Document , documentUrl : string ) : Promise < DocumentProcessingResult > {
237257 try {
238258 const pdfService = this . runtime . getService ( ServiceType . PDF ) as any ;
239259 if ( ! pdfService ) {
240260 logger . warn ( "PDF service not available, using fallback" ) ;
241261 return {
242- description : `[PDF Document: ${ document . file_name } \nSize: ${ document . file_size } bytes\nUnable to extract text content]`
262+ title : `PDF Document: ${ document . file_name || 'Unknown Document' } ` ,
263+ summary : `Size: ${ document . file_size || 0 } bytes\nUnable to extract text content` ,
264+ fullText : "" ,
265+ formattedDescription : `[PDF Document: ${ document . file_name || 'Unknown Document' } \nSize: ${ document . file_size || 0 } bytes\nUnable to extract text content]` ,
266+ fileName : document . file_name || 'Unknown Document' ,
267+ mimeType : document . mime_type ,
268+ fileSize : document . file_size ,
243269 } ;
244270 }
245271
@@ -252,27 +278,39 @@ export class MessageManager {
252278 const text = await pdfService . convertPdfToText ( Buffer . from ( pdfBuffer ) ) ;
253279
254280 // Use generateSummary for context extraction
255- const { title, description } = await generateSummary ( this . runtime , text ) ;
281+ const { title, summary } = await generateSummary ( this . runtime , text ) ;
256282
257283 logger . info ( `PDF processed successfully: ${ text . length } characters extracted` ) ;
258284 return {
259- description : title
260- ? `[PDF Document: ${ document . file_name } \nTitle: ${ title } \nSummary: ${ description } \n\nFull Content:\n${ text } \n--- END DOCUMENT]`
261- : `[PDF Document: ${ document . file_name } \nContent: ${ text . substring ( 0 , 500 ) } ... [Document truncated]`
285+ title : title ,
286+ summary : summary ,
287+ fullText : text ,
288+ formattedDescription : title
289+ ? `[PDF Document: ${ document . file_name || 'Unknown Document' } \nTitle: ${ title } \nSummary: ${ summary } \n\nFull Content:\n${ text } \n--- END DOCUMENT]`
290+ : `[PDF Document: ${ document . file_name || 'Unknown Document' } \nContent: ${ text . substring ( 0 , 500 ) } ... [Document truncated]` ,
291+ fileName : document . file_name || 'Unknown Document' ,
292+ mimeType : document . mime_type ,
293+ fileSize : document . file_size ,
262294 } ;
263295
264296 } catch ( error ) {
265297 logger . error ( "Error processing PDF document:" , error ) ;
266298 return {
267- description : `[PDF Document: ${ document . file_name } \nSize: ${ document . file_size } bytes\nError: Unable to extract text content]`
299+ title : `PDF Document: ${ document . file_name || 'Unknown Document' } ` ,
300+ summary : `Size: ${ document . file_size || 0 } bytes\nError: Unable to extract text content` ,
301+ fullText : "" ,
302+ formattedDescription : `[PDF Document: ${ document . file_name || 'Unknown Document' } \nSize: ${ document . file_size || 0 } bytes\nError: Unable to extract text content]` ,
303+ fileName : document . file_name || 'Unknown Document' ,
304+ mimeType : document . mime_type ,
305+ fileSize : document . file_size ,
268306 } ;
269307 }
270308 }
271309
272310 /**
273311 * Process text documents by fetching their content.
274312 */
275- private async processTextDocument ( document : Document , documentUrl : string ) : Promise < { description : string } > {
313+ private async processTextDocument ( document : Document , documentUrl : string ) : Promise < DocumentProcessingResult > {
276314 try {
277315 const response = await fetch ( documentUrl ) ;
278316 if ( ! response . ok ) {
@@ -282,19 +320,31 @@ export class MessageManager {
282320 const text = await response . text ( ) ;
283321
284322 // Use generateSummary for context extraction
285- const { title, description } = await generateSummary ( this . runtime , text ) ;
323+ const { title, summary } = await generateSummary ( this . runtime , text ) ;
286324
287325 logger . info ( `Text document processed successfully: ${ text . length } characters extracted` ) ;
288326 return {
289- description : title
290- ? `[Text Document: ${ document . file_name } \nTitle: ${ title } \nSummary: ${ description } \n\nFull Content:\n${ text } \n--- END DOCUMENT]`
291- : `[Text Document: ${ document . file_name } \nContent: ${ text . substring ( 0 , 500 ) } ... [Document truncated]`
327+ title : title ,
328+ summary : summary ,
329+ fullText : text ,
330+ formattedDescription : title
331+ ? `[Text Document: ${ document . file_name || 'Unknown Document' } \nTitle: ${ title } \nSummary: ${ summary } \n\nFull Content:\n${ text } \n--- END DOCUMENT]`
332+ : `[Text Document: ${ document . file_name || 'Unknown Document' } \nContent: ${ text . substring ( 0 , 500 ) } ... [Document truncated]` ,
333+ fileName : document . file_name || 'Unknown Document' ,
334+ mimeType : document . mime_type ,
335+ fileSize : document . file_size ,
292336 } ;
293337
294338 } catch ( error ) {
295339 logger . error ( "Error processing text document:" , error ) ;
296340 return {
297- description : `[Text Document: ${ document . file_name } \nSize: ${ document . file_size } bytes\nError: Unable to read content]`
341+ title : `Text Document: ${ document . file_name || 'Unknown Document' } ` ,
342+ summary : `Size: ${ document . file_size || 0 } bytes\nError: Unable to read content` ,
343+ fullText : "" ,
344+ formattedDescription : `[Text Document: ${ document . file_name || 'Unknown Document' } \nSize: ${ document . file_size || 0 } bytes\nError: Unable to read content]` ,
345+ fileName : document . file_name || 'Unknown Document' ,
346+ mimeType : document . mime_type ,
347+ fileSize : document . file_size ,
298348 } ;
299349 }
300350 }
@@ -330,23 +380,10 @@ export class MessageManager {
330380 try {
331381 const fileLink = await this . bot . telegram . getFileLink ( document . file_id ) ;
332382
333- // Extract title and description from documentInfo
334- const titleMatch = documentInfo . description . match ( / T i t l e : ( [ ^ \n ] + ) / ) ;
335- const summaryMatch = documentInfo . description . match ( / S u m m a r y : ( [ ^ \n ] + ) / ) ;
336- const title = titleMatch ? titleMatch [ 1 ] : `Document: ${ document . file_name } ` ;
337- const summary = summaryMatch ? summaryMatch [ 1 ] : documentInfo . description ;
338-
339- // Get the full text content using the existing processor
340- let fullText = "" ;
341- const processor = this . getDocumentProcessor ( document . mime_type ) ;
342- if ( processor ) {
343- const result = await processor ( document , fileLink . toString ( ) ) ;
344- // Extract full text from the result
345- const fullTextMatch = result . description . match ( / F u l l C o n t e n t : \n ( [ \s \S ] * ?) \n - - - E N D D O C U M E N T / ) ;
346- if ( fullTextMatch ) {
347- fullText = fullTextMatch [ 1 ] ;
348- }
349- }
383+ // Use structured data directly instead of regex parsing
384+ const title = documentInfo . title ;
385+ const summary = documentInfo . summary ;
386+ const fullText = documentInfo . fullText ;
350387
351388 // Add document content to processedContent so agent can access it
352389 if ( fullText ) {
@@ -362,28 +399,28 @@ export class MessageManager {
362399 description : summary ,
363400 text : fullText || summary , // Use full text if available, fallback to summary
364401 } ) ;
365- logger . info ( `Document processed successfully: ${ document . file_name } ` ) ;
402+ logger . info ( `Document processed successfully: ${ documentInfo . fileName } ` ) ;
366403 } catch ( error ) {
367- logger . error ( `Error processing document ${ document . file_name } :` , error ) ;
404+ logger . error ( `Error processing document ${ documentInfo . fileName } :` , error ) ;
368405 // Add a fallback attachment even if processing failed
369406 attachments . push ( {
370407 id : document . file_id ,
371408 url : "" ,
372- title : `Document: ${ document . file_name } ` ,
409+ title : `Document: ${ documentInfo . fileName } ` ,
373410 source : "Document" ,
374- description : `Document processing failed: ${ document . file_name } ` ,
375- text : `Document: ${ document . file_name } \nSize: ${ document . file_size } bytes\nType: ${ document . mime_type } ` ,
411+ description : `Document processing failed: ${ documentInfo . fileName } ` ,
412+ text : `Document: ${ documentInfo . fileName } \nSize: ${ documentInfo . fileSize || 0 } bytes\nType: ${ documentInfo . mimeType || 'unknown' } ` ,
376413 } ) ;
377414 }
378415 } else {
379416 // Add a basic attachment even if documentInfo is null
380417 attachments . push ( {
381418 id : document . file_id ,
382419 url : "" ,
383- title : `Document: ${ document . file_name } ` ,
420+ title : `Document: ${ document . file_name || 'Unknown Document' } ` ,
384421 source : "Document" ,
385- description : `Document: ${ document . file_name } ` ,
386- text : `Document: ${ document . file_name } \nSize: ${ document . file_size } bytes\nType: ${ document . mime_type } ` ,
422+ description : `Document: ${ document . file_name || 'Unknown Document' } ` ,
423+ text : `Document: ${ document . file_name || 'Unknown Document' } \nSize: ${ document . file_size || 0 } bytes\nType: ${ document . mime_type || 'unknown' } ` ,
387424 } ) ;
388425 }
389426 }
@@ -604,7 +641,7 @@ export class MessageManager {
604641 // Type guard to ensure message exists
605642 if ( ! ctx . message || ! ctx . from ) return ;
606643
607- const message = ctx . message ;
644+ const message = ctx . message as Message . TextMessage ;
608645
609646 try {
610647 // Convert IDs to UUIDs
0 commit comments