@@ -282,88 +282,84 @@ async def astream(self) -> str:
282282 0 if self ._underlying_value is None else len (str (self ._underlying_value ))
283283 ) # type: ignore
284284
285- exception_to_raise = None
286- try :
287- # Type of the chunk depends on the backend.
288- chunks : list [Any | None ] = []
289- while True :
290- try :
291- item = self ._async_queue .get_nowait ()
292- chunks .append (item )
293- except asyncio .QueueEmpty :
294- # We've exhausted the current items in the queue.
295- break
296-
297- # Make sure we always get the minimum chunk size.
298- while len (chunks ) <= self ._chunk_size :
299- if len (chunks ) > 0 :
300- if chunks [- 1 ] is None or isinstance (chunks [- 1 ], Exception ):
301- break # Hit sentinel value or an error.
302- # We could switch to relying on the `done` / `finish_reason` field of chunks,
303- # but that forces us to know about the chunk type here. Prefer sentinel values
304- # for now.
305-
306- item = await self ._async_queue .get ()
285+ # Type of the chunk depends on the backend.
286+ chunks : list [Any | None ] = []
287+ while True :
288+ try :
289+ item = self ._async_queue .get_nowait ()
307290 chunks .append (item )
308-
309- # Process the sentinel value if it's there.
310- if chunks [- 1 ] is None :
311- chunks .pop () # Remove the sentinel value.
312- do_set_computed = True
313-
314- # Shouldn't be needed, but cancel the Tasks this ModelOutputThunk relied on.
315- if self ._generate is not None :
316- self ._generate .cancel ()
317- if self ._generate_extra is not None :
318- # Covers an hf edge case. The task is done generating anything useful but isn't `done` yet.
319- await self ._generate_extra
320- self ._generate_extra .cancel ()
321-
322- # If ModelOutputThunks get too bulky, we can do additional cleanup here
323- # and set fields to None.
324-
325- elif isinstance (chunks [- 1 ], Exception ):
326- # Mark as computed so post_process runs in finally block
327- self ._computed = True
328- # Store exception to re-raise after cleanup
329- exception_to_raise = chunks [- 1 ]
330-
331- for chunk in chunks :
332- assert self ._process is not None
333- await self ._process (self , chunk )
334-
335- if do_set_computed :
336- assert self ._underlying_value is not None
337- self ._computed = True
338- finally :
339- # Always call post_process if computed, even on exception
340- # This ensures telemetry spans are properly closed
341- if self ._computed :
342- assert self ._post_process is not None
343- await self ._post_process (self )
344-
345- # Only parse if no exception occurred
346- if exception_to_raise is None :
347- match self ._action :
348- case Component ():
349- self .parsed_repr = self ._action ._parse (self )
350- case CBlock ():
351- assert self .value is not None , (
352- "value must be non-None since this thunk is computed"
353- )
354- self .parsed_repr = self .value # type: ignore
355- case _:
356- raise ValueError (
357- "attempted to astream from a model output thunk with no ._action set"
358- )
359- assert self .parsed_repr is not None , (
360- "enforce constraint that a computed ModelOutputThunk has a non-None parsed_repr"
291+ except asyncio .QueueEmpty :
292+ # We've exhausted the current items in the queue.
293+ break
294+
295+ # Make sure we always get the minimum chunk size.
296+ while len (chunks ) <= self ._chunk_size :
297+ if len (chunks ) > 0 :
298+ if chunks [- 1 ] is None or isinstance (chunks [- 1 ], Exception ):
299+ break # Hit sentinel value or an error.
300+ # We could switch to relying on the `done` / `finish_reason` field of chunks,
301+ # but that forces us to know about the chunk type here. Prefer sentinel values
302+ # for now.
303+
304+ item = await self ._async_queue .get ()
305+ chunks .append (item )
306+
307+ # Process the sentinel value if it's there.
308+ if chunks [- 1 ] is None :
309+ chunks .pop () # Remove the sentinel value.
310+ do_set_computed = True
311+
312+ # Shouldn't be needed, but cancel the Tasks this ModelOutputThunk relied on.
313+ if self ._generate is not None :
314+ self ._generate .cancel ()
315+ if self ._generate_extra is not None :
316+ # Covers an hf edge case. The task is done generating anything useful but isn't `done` yet.
317+ await self ._generate_extra
318+ self ._generate_extra .cancel ()
319+
320+ # If ModelOutputThunks get too bulky, we can do additional cleanup here
321+ # and set fields to None.
322+
323+ elif isinstance (chunks [- 1 ], Exception ):
324+ # Close any open telemetry span before propagating the error.
325+ # We can't call full post_process here (it assumes success invariants),
326+ # but we must not leak the span.
327+ span = self ._meta .get ("_telemetry_span" )
328+ if span is not None :
329+ from ..telemetry import end_backend_span , set_span_error
330+
331+ set_span_error (span , chunks [- 1 ])
332+ end_backend_span (span )
333+ del self ._meta ["_telemetry_span" ]
334+ raise chunks [- 1 ]
335+
336+ for chunk in chunks :
337+ assert self ._process is not None
338+ await self ._process (self , chunk )
339+
340+ if do_set_computed :
341+ assert self ._underlying_value is not None
342+ self ._computed = True
343+
344+ assert self ._post_process is not None
345+ await self ._post_process (self )
346+
347+ match self ._action :
348+ case Component ():
349+ self .parsed_repr = self ._action ._parse (self )
350+ case CBlock ():
351+ assert self .value is not None , (
352+ "value must be non-None since this thunk is computed"
361353 )
362- return self ._underlying_value # type: ignore
363-
364- # Re-raise exception after cleanup if one occurred
365- if exception_to_raise is not None :
366- raise exception_to_raise
354+ self .parsed_repr = self .value # type: ignore
355+ case _:
356+ raise ValueError (
357+ "attempted to astream from a model output thunk with no ._action set"
358+ )
359+ assert self .parsed_repr is not None , (
360+ "enforce constraint that a computed ModelOutputThunk has a non-None parsed_repr"
361+ )
362+ return self ._underlying_value # type: ignore
367363
368364 return (
369365 self ._underlying_value
0 commit comments