Skip to content

Commit 1d0a4c5

Browse files
authored
🪨 feat: AWS Bedrock Document Uploads (danny-avila#11912)
* feat: add aws bedrock upload to provider support * chore: address copilot comments * feat: add shared Bedrock document format types and MIME mapping Bedrock Converse API accepts 9 document formats beyond PDF. Add BedrockDocumentFormat union type, MIME-to-format mapping, and helpers in data-provider so both client and backend can reference them. * refactor: generalize Bedrock PDF validation to support all document types Rename validateBedrockPdf to validateBedrockDocument with MIME-aware logic: 4.5MB hard limit applies to all types, PDF header check only runs for application/pdf. Adds test coverage for non-PDF documents. * feat: support all Bedrock document formats in encoding pipeline Widen file type gates to accept csv, doc, docx, xls, xlsx, html, txt, md for Bedrock. Uses shared MIME-to-format map instead of hardcoded 'pdf'. Other providers' PDF-only paths remain unchanged. * feat: expand Bedrock file upload UI to accept all document types Add 'image_document_extended' upload type for Bedrock with accept filters for all 9 supported formats. Update drag-and-drop validation to use isBedrockDocumentType helper. * fix: route Bedrock document types through provider pipeline
1 parent b349f2f commit 1d0a4c5

10 files changed

Lines changed: 527 additions & 46 deletions

File tree

api/app/clients/BaseClient.js

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ const {
2020
isAgentsEndpoint,
2121
isEphemeralAgentId,
2222
supportsBalanceCheck,
23+
isBedrockDocumentType,
2324
} = require('librechat-data-provider');
2425
const {
2526
updateMessage,
@@ -1300,6 +1301,9 @@ class BaseClient {
13001301

13011302
const allFiles = [];
13021303

1304+
const provider = this.options.agent?.provider ?? this.options.endpoint;
1305+
const isBedrock = provider === EModelEndpoint.bedrock;
1306+
13031307
for (const file of attachments) {
13041308
/** @type {FileSources} */
13051309
const source = file.source ?? FileSources.local;
@@ -1317,6 +1321,9 @@ class BaseClient {
13171321
} else if (file.type === 'application/pdf') {
13181322
categorizedAttachments.documents.push(file);
13191323
allFiles.push(file);
1324+
} else if (isBedrock && isBedrockDocumentType(file.type)) {
1325+
categorizedAttachments.documents.push(file);
1326+
allFiles.push(file);
13201327
} else if (file.type.startsWith('video/')) {
13211328
categorizedAttachments.videos.push(file);
13221329
allFiles.push(file);

client/src/components/Chat/Input/Files/AttachFileMenu.tsx

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,21 @@ import {
88
FileImageIcon,
99
TerminalSquareIcon,
1010
} from 'lucide-react';
11-
import {
12-
Providers,
13-
EToolResources,
14-
EModelEndpoint,
15-
defaultAgentCapabilities,
16-
isDocumentSupportedProvider,
17-
} from 'librechat-data-provider';
1811
import {
1912
FileUpload,
2013
TooltipAnchor,
2114
DropdownPopup,
2215
AttachmentIcon,
2316
SharePointIcon,
2417
} from '@librechat/client';
18+
import {
19+
Providers,
20+
EToolResources,
21+
EModelEndpoint,
22+
defaultAgentCapabilities,
23+
bedrockDocumentExtensions,
24+
isDocumentSupportedProvider,
25+
} from 'librechat-data-provider';
2526
import type { EndpointFileConfig } from 'librechat-data-provider';
2627
import {
2728
useAgentToolPermissions,
@@ -37,7 +38,12 @@ import { ephemeralAgentByConvoId } from '~/store';
3738
import { MenuItemProps } from '~/common';
3839
import { cn } from '~/utils';
3940

40-
type FileUploadType = 'image' | 'document' | 'image_document' | 'image_document_video_audio';
41+
type FileUploadType =
42+
| 'image'
43+
| 'document'
44+
| 'image_document'
45+
| 'image_document_extended'
46+
| 'image_document_video_audio';
4147

4248
interface AttachFileMenuProps {
4349
agentId?: string | null;
@@ -99,6 +105,8 @@ const AttachFileMenu = ({
99105
inputRef.current.accept = '.pdf,application/pdf';
100106
} else if (fileType === 'image_document') {
101107
inputRef.current.accept = 'image/*,.heif,.heic,.pdf,application/pdf';
108+
} else if (fileType === 'image_document_extended') {
109+
inputRef.current.accept = `image/*,.heif,.heic,${bedrockDocumentExtensions}`;
102110
} else if (fileType === 'image_document_video_audio') {
103111
inputRef.current.accept = 'image/*,.heif,.heic,.pdf,application/pdf,video/*,audio/*';
104112
} else {
@@ -134,6 +142,11 @@ const AttachFileMenu = ({
134142
let fileType: Exclude<FileUploadType, 'image' | 'document'> = 'image_document';
135143
if (currentProvider === Providers.GOOGLE || currentProvider === Providers.OPENROUTER) {
136144
fileType = 'image_document_video_audio';
145+
} else if (
146+
currentProvider === Providers.BEDROCK ||
147+
endpointType === EModelEndpoint.bedrock
148+
) {
149+
fileType = 'image_document_extended';
137150
}
138151
onAction(fileType);
139152
},

client/src/components/Chat/Input/Files/DragDropModal.tsx

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,22 @@
11
import React, { useMemo } from 'react';
22
import { useRecoilValue } from 'recoil';
33
import { OGDialog, OGDialogTemplate } from '@librechat/client';
4+
import {
5+
ImageUpIcon,
6+
FileSearch,
7+
FileType2Icon,
8+
FileImageIcon,
9+
TerminalSquareIcon,
10+
} from 'lucide-react';
411
import {
512
Providers,
613
inferMimeType,
714
EToolResources,
815
EModelEndpoint,
16+
isBedrockDocumentType,
917
defaultAgentCapabilities,
1018
isDocumentSupportedProvider,
1119
} from 'librechat-data-provider';
12-
import {
13-
ImageUpIcon,
14-
FileSearch,
15-
FileType2Icon,
16-
FileImageIcon,
17-
TerminalSquareIcon,
18-
} from 'lucide-react';
1920
import {
2021
useAgentToolPermissions,
2122
useAgentCapabilities,
@@ -77,20 +78,26 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD
7778
) {
7879
const supportsImageDocVideoAudio =
7980
currentProvider === EModelEndpoint.google || currentProvider === Providers.OPENROUTER;
80-
const validFileTypes = supportsImageDocVideoAudio
81-
? files.every((file) => {
82-
const type = getFileType(file);
83-
return (
84-
type?.startsWith('image/') ||
85-
type?.startsWith('video/') ||
86-
type?.startsWith('audio/') ||
87-
type === 'application/pdf'
88-
);
89-
})
90-
: files.every((file) => {
91-
const type = getFileType(file);
92-
return type?.startsWith('image/') || type === 'application/pdf';
93-
});
81+
const isBedrock =
82+
currentProvider === Providers.BEDROCK || endpointType === EModelEndpoint.bedrock;
83+
84+
const isValidProviderFile = (file: File): boolean => {
85+
const type = getFileType(file);
86+
if (supportsImageDocVideoAudio) {
87+
return (
88+
type?.startsWith('image/') ||
89+
type?.startsWith('video/') ||
90+
type?.startsWith('audio/') ||
91+
type === 'application/pdf'
92+
);
93+
}
94+
if (isBedrock) {
95+
return type?.startsWith('image/') || isBedrockDocumentType(type);
96+
}
97+
return type?.startsWith('image/') || type === 'application/pdf';
98+
};
99+
100+
const validFileTypes = files.every(isValidProviderFile);
94101

95102
_options.push({
96103
label: localize('com_ui_upload_provider'),

packages/api/src/files/encode/document.spec.ts

Lines changed: 184 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { encodeAndFormatDocuments } from './document';
77
/** Mock the validation module */
88
jest.mock('~/files/validation', () => ({
99
validatePdf: jest.fn(),
10+
validateBedrockDocument: jest.fn(),
1011
}));
1112

1213
/** Mock the utils module */
@@ -15,11 +16,14 @@ jest.mock('./utils', () => ({
1516
getConfiguredFileSizeLimit: jest.fn(),
1617
}));
1718

18-
import { validatePdf } from '~/files/validation';
19+
import { validatePdf, validateBedrockDocument } from '~/files/validation';
1920
import { getFileStream, getConfiguredFileSizeLimit } from './utils';
2021
import { Types } from 'mongoose';
2122

2223
const mockedValidatePdf = validatePdf as jest.MockedFunction<typeof validatePdf>;
24+
const mockedValidateBedrockDocument = validateBedrockDocument as jest.MockedFunction<
25+
typeof validateBedrockDocument
26+
>;
2327
const mockedGetFileStream = getFileStream as jest.MockedFunction<typeof getFileStream>;
2428
const mockedGetConfiguredFileSizeLimit = getConfiguredFileSizeLimit as jest.MockedFunction<
2529
typeof getConfiguredFileSizeLimit
@@ -84,6 +88,26 @@ describe('encodeAndFormatDocuments - fileConfig integration', () => {
8488
updatedAt: new Date(),
8589
}) as unknown as IMongoFile;
8690

91+
const createMockDocFile = (
92+
sizeInMB: number,
93+
mimeType: string,
94+
filename: string,
95+
): IMongoFile =>
96+
({
97+
_id: new Types.ObjectId(),
98+
user: new Types.ObjectId(),
99+
file_id: new Types.ObjectId().toString(),
100+
filename,
101+
type: mimeType,
102+
bytes: Math.floor(sizeInMB * 1024 * 1024),
103+
object: 'file',
104+
usage: 0,
105+
source: 'test',
106+
filepath: `/test/path/${filename}`,
107+
createdAt: new Date(),
108+
updatedAt: new Date(),
109+
}) as unknown as IMongoFile;
110+
87111
describe('Configuration extraction and validation', () => {
88112
it('should pass configured file size limit to validatePdf for OpenAI', async () => {
89113
const configuredLimit = mbToBytes(15);
@@ -500,6 +524,165 @@ describe('encodeAndFormatDocuments - fileConfig integration', () => {
500524
});
501525
});
502526

527+
it('should format Bedrock document with valid PDF', async () => {
528+
const req = createMockRequest() as ServerRequest;
529+
const file = createMockFile(3);
530+
531+
const mockContent = Buffer.from('test-pdf-content').toString('base64');
532+
mockedGetFileStream.mockResolvedValue({
533+
file,
534+
content: mockContent,
535+
metadata: file,
536+
});
537+
538+
mockedValidateBedrockDocument.mockResolvedValue({ isValid: true });
539+
540+
const result = await encodeAndFormatDocuments(
541+
req,
542+
[file],
543+
{ provider: Providers.BEDROCK },
544+
mockStrategyFunctions,
545+
);
546+
547+
expect(result.documents).toHaveLength(1);
548+
expect(result.documents[0]).toMatchObject({
549+
type: 'document',
550+
document: {
551+
name: 'test_pdf',
552+
format: 'pdf',
553+
source: {
554+
bytes: expect.any(Buffer),
555+
},
556+
},
557+
});
558+
});
559+
560+
it('should format Bedrock CSV document', async () => {
561+
const req = createMockRequest() as ServerRequest;
562+
const file = createMockDocFile(1, 'text/csv', 'data.csv');
563+
564+
const mockContent = Buffer.from('col1,col2\nval1,val2').toString('base64');
565+
mockedGetFileStream.mockResolvedValue({
566+
file,
567+
content: mockContent,
568+
metadata: file,
569+
});
570+
571+
mockedValidateBedrockDocument.mockResolvedValue({ isValid: true });
572+
573+
const result = await encodeAndFormatDocuments(
574+
req,
575+
[file],
576+
{ provider: Providers.BEDROCK },
577+
mockStrategyFunctions,
578+
);
579+
580+
expect(result.documents).toHaveLength(1);
581+
expect(result.documents[0]).toMatchObject({
582+
type: 'document',
583+
document: {
584+
name: 'data_csv',
585+
format: 'csv',
586+
source: {
587+
bytes: expect.any(Buffer),
588+
},
589+
},
590+
});
591+
});
592+
593+
it('should format Bedrock DOCX document', async () => {
594+
const req = createMockRequest() as ServerRequest;
595+
const mimeType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
596+
const file = createMockDocFile(2, mimeType, 'report.docx');
597+
598+
const mockContent = Buffer.from('docx-binary-content').toString('base64');
599+
mockedGetFileStream.mockResolvedValue({
600+
file,
601+
content: mockContent,
602+
metadata: file,
603+
});
604+
605+
mockedValidateBedrockDocument.mockResolvedValue({ isValid: true });
606+
607+
const result = await encodeAndFormatDocuments(
608+
req,
609+
[file],
610+
{ provider: Providers.BEDROCK },
611+
mockStrategyFunctions,
612+
);
613+
614+
expect(result.documents).toHaveLength(1);
615+
expect(result.documents[0]).toMatchObject({
616+
type: 'document',
617+
document: {
618+
name: 'report_docx',
619+
format: 'docx',
620+
source: {
621+
bytes: expect.any(Buffer),
622+
},
623+
},
624+
});
625+
});
626+
627+
it('should format Bedrock plain text document', async () => {
628+
const req = createMockRequest() as ServerRequest;
629+
const file = createMockDocFile(0.5, 'text/plain', 'notes.txt');
630+
631+
const mockContent = Buffer.from('plain text content').toString('base64');
632+
mockedGetFileStream.mockResolvedValue({
633+
file,
634+
content: mockContent,
635+
metadata: file,
636+
});
637+
638+
mockedValidateBedrockDocument.mockResolvedValue({ isValid: true });
639+
640+
const result = await encodeAndFormatDocuments(
641+
req,
642+
[file],
643+
{ provider: Providers.BEDROCK },
644+
mockStrategyFunctions,
645+
);
646+
647+
expect(result.documents).toHaveLength(1);
648+
expect(result.documents[0]).toMatchObject({
649+
type: 'document',
650+
document: {
651+
name: 'notes_txt',
652+
format: 'txt',
653+
source: {
654+
bytes: expect.any(Buffer),
655+
},
656+
},
657+
});
658+
});
659+
660+
it('should reject Bedrock document when validation fails', async () => {
661+
const req = createMockRequest() as ServerRequest;
662+
const file = createMockDocFile(5, 'text/csv', 'big.csv');
663+
664+
const mockContent = Buffer.from('large-csv-content').toString('base64');
665+
mockedGetFileStream.mockResolvedValue({
666+
file,
667+
content: mockContent,
668+
metadata: file,
669+
});
670+
671+
mockedValidateBedrockDocument.mockResolvedValue({
672+
isValid: false,
673+
error: 'File size (5.0MB) exceeds the 4.5MB limit for Bedrock',
674+
});
675+
676+
await expect(
677+
encodeAndFormatDocuments(
678+
req,
679+
[file],
680+
{ provider: Providers.BEDROCK },
681+
mockStrategyFunctions,
682+
),
683+
).rejects.toThrow('Document validation failed');
684+
});
685+
503686
it('should format OpenAI document with responses API', async () => {
504687
const req = createMockRequest(15) as ServerRequest;
505688
const file = createMockFile(10);

0 commit comments

Comments
 (0)