88from bisheng .knowledge .domain .schemas .knowledge_rag_schema import Metadata
99from bisheng .knowledge .rag .base_file_pipeline import BaseFilePipeline
1010from bisheng .knowledge .rag .pipeline .transformer .abstract import AbstractTransformer
11+ from bisheng .knowledge .rag .pipeline .transformer .extra_file import ExtraFileTransformer
1112from bisheng .knowledge .rag .pipeline .transformer .splitter import SplitterTransformer
1213from bisheng .user .domain .models .user import UserDao
14+ from bisheng .utils import generate_uuid
1315
1416
1517class PreviewFilePipeline (BaseFilePipeline ):
@@ -31,6 +33,7 @@ def __init__(
3133 invoke_user_id : int ,
3234 local_file_path : str ,
3335 file_name : str ,
36+ knowledge_id : int ,
3437 file_rule : FileProcessBase = None ,
3538 ** kwargs ,
3639 ):
@@ -41,6 +44,7 @@ def __init__(
4144 ** kwargs ,
4245 )
4346 self .local_file_path = local_file_path
47+ self .knowledge_id = knowledge_id
4448
4549 @cached_property
4650 def file_metadata (self ) -> Dict :
@@ -64,6 +68,13 @@ def _init_abstract_transformers(self) -> List[BaseDocumentTransformer]:
6468
6569 def _init_common_transformers (self ) -> List [BaseDocumentTransformer ]:
6670 transformers = self ._init_abstract_transformers ()
71+ transformers .append (ExtraFileTransformer (
72+ loader = self .loader ,
73+ document_id = generate_uuid (),
74+ knowledge_id = self .knowledge_id ,
75+ knowledge_file = None ,
76+ retain_images = self .file_split_rule .retain_images == 1
77+ ))
6778 transformers .append (
6879 SplitterTransformer (
6980 separator = self .file_split_rule .separator ,
0 commit comments