1- import { PassThrough , type Readable } from 'node:stream'
2- import { mergeStreamsFromUrls , type MergeFormat } from '@bitofsky/merge-streams'
31import type {
42 AuthInfo ,
3+ ExternalLinkInfo ,
54 StatementResult ,
65 FetchStreamOptions ,
76 StatementManifest ,
87} from '../types.js'
8+ import { PassThrough , Readable } from 'node:stream'
9+ import { mergeStreamsFromUrls , type MergeFormat } from '@bitofsky/merge-streams'
910import { getChunk } from '../databricks-api.js'
10- import { AbortError } from '../errors.js'
11- import { validateSucceededResult } from '../util.js'
11+ import { AbortError , DatabricksSqlError } from '../errors.js'
12+ import { pipeUrlToOutput , validateSucceededResult } from '../util.js'
1213
1314/**
1415 * Create a readable stream from statement result.
@@ -20,31 +21,32 @@ export function fetchStream(
2021 auth : AuthInfo ,
2122 options : FetchStreamOptions = { }
2223) : Readable {
23- const { signal } = options
24+ const { signal, forceMerge } = options
2425 const manifest = validateSucceededResult ( statementResult )
2526 const format = manifest . format as MergeFormat
2627
28+ if ( statementResult . result ?. data_array ) {
29+ throw new DatabricksSqlError (
30+ 'fetchStream only supports EXTERNAL_LINKS results' ,
31+ 'UNSUPPORTED_FORMAT' ,
32+ statementResult . statement_id
33+ )
34+ }
35+
2736 // Create PassThrough as output (readable by consumer)
2837 const output = new PassThrough ( )
2938
3039 // Handle AbortSignal
3140 if ( signal ) {
32- const onAbort = ( ) => {
33- output . destroy ( new AbortError ( 'Stream aborted' ) )
34- }
41+ const onAbort = ( ) => output . destroy ( new AbortError ( 'Stream aborted' ) )
3542 signal . addEventListener ( 'abort' , onAbort , { once : true } )
36- output . once ( 'close' , ( ) => {
37- signal . removeEventListener ( 'abort' , onAbort )
38- } )
43+ output . once ( 'close' , ( ) => signal . removeEventListener ( 'abort' , onAbort ) )
3944 }
4045
4146 // Start async merge process
4247 // Errors are forwarded to the stream consumer via destroy.
43- mergeChunksToStream ( statementResult , auth , manifest , format , output , signal ) . catch (
44- ( err ) => {
45- output . destroy ( err as Error )
46- }
47- )
48+ mergeChunksToStream ( statementResult , auth , manifest , format , output , signal , forceMerge )
49+ . catch ( ( err ) => output . destroy ( err as Error ) )
4850
4951 return output
5052}
@@ -58,29 +60,59 @@ async function mergeChunksToStream(
5860 manifest : StatementManifest ,
5961 format : MergeFormat ,
6062 output : PassThrough ,
61- signal ?: AbortSignal
63+ signal ?: AbortSignal ,
64+ forceMerge ?: boolean
6265) : Promise < void > {
63- const result = statementResult . result
64-
65- // Collect all external link URLs
66- let urls = result ?. external_links ?. map ( ( link ) => link . external_link ) ?? [ ]
67-
68- // If no URLs in initial result, fetch from chunks
69- if ( urls . length === 0 && manifest . total_chunk_count > 0 ) {
70- for ( let i = 0 ; i < manifest . total_chunk_count ; i ++ ) {
71- if ( signal ?. aborted ) throw new AbortError ( 'Aborted while collecting URLs' )
72-
73- // Chunk metadata contains external link URLs when results are chunked.
74- const chunkData = await getChunk ( auth , statementResult . statement_id , i , signal )
75- const chunkUrls = chunkData . external_links ?. map ( ( link ) => link . external_link ) ?? [ ]
76- urls . push ( ...chunkUrls )
77- }
78- }
66+ const urls = await collectExternalUrls ( statementResult , auth , manifest , signal )
7967
8068 // No external links - close the stream
8169 if ( urls . length === 0 )
8270 return void output . end ( )
8371
72+ // Single URL - pipe directly to output unless forcing merge
73+ if ( urls . length === 1 && ! forceMerge )
74+ // Avoid merge-streams overhead for a single URL unless forced.
75+ return pipeUrlToOutput ( urls [ 0 ] ! , output , signal )
76+
8477 // Merge all URLs using merge-streams
85- await mergeStreamsFromUrls ( format , signal ? { urls, output, signal } : { urls, output } )
78+ return mergeStreamsFromUrls ( format , signal ? { urls, output, signal } : { urls, output } )
79+ }
80+
81+ async function collectExternalUrls (
82+ statementResult : StatementResult ,
83+ auth : AuthInfo ,
84+ manifest : StatementManifest ,
85+ signal ?: AbortSignal
86+ ) : Promise < string [ ] > {
87+ const urls = extractExternalLinks ( statementResult . result ?. external_links )
88+ if ( urls . length > 0 )
89+ return urls
90+
91+ if ( ! manifest . total_chunk_count )
92+ return [ ]
93+
94+ const chunkUrls : string [ ] = [ ]
95+ for ( let i = 0 ; i < manifest . total_chunk_count ; i ++ ) {
96+ if ( signal ?. aborted )
97+ throw new AbortError ( 'Aborted while collecting URLs' )
98+
99+ // Chunk metadata contains external link URLs when results are chunked.
100+ const chunkData = await getChunk ( auth , statementResult . statement_id , i , signal )
101+ chunkUrls . push ( ...extractExternalLinks ( chunkData . external_links ) )
102+ }
103+
104+ return chunkUrls
105+ }
106+
107+ function extractExternalLinks ( externalLinks ?: ExternalLinkInfo [ ] ) : string [ ] {
108+ if ( ! externalLinks )
109+ return [ ]
110+
111+ return externalLinks
112+ . map ( ( link ) => link . external_link )
113+ . filter ( isNonEmptyString )
114+ }
115+
116+ function isNonEmptyString ( value : unknown ) : value is string {
117+ return typeof value === 'string' && value . length > 0
86118}
0 commit comments