@@ -792,9 +792,10 @@ const RANGE_FETCH_CONCURRENCY: usize = 10;
792792/// metadata prefetch hint: 512 KiB.
793793const METADATA_SIZE_HINT : usize = 512 * 1024 ;
794794/// Minimum range size for splitting: 4 MiB.
795- /// Ranges smaller than this will not be split further to avoid
796- /// excessive small IO requests whose per-request overhead dominates.
797- const MIN_SPLIT_SIZE : u64 = 4 * 1024 * 1024 ;
795+ /// The block size used for split alignment and as the minimum split
796+ /// granularity. Ranges smaller than this will not be split further to
797+ /// avoid excessive small IO requests whose per-request overhead dominates.
798+ const IO_BLOCK_SIZE : u64 = 4 * 1024 * 1024 ;
798799
799800impl ArrowFileReader {
800801 fn new ( file_size : u64 , r : Box < dyn FileRead > ) -> Self {
@@ -995,7 +996,7 @@ fn merge_byte_ranges(ranges: &[Range<u64>], coalesce: u64) -> Vec<Range<u64>> {
995996/// Split merged ranges into fixed-size batches to utilize concurrency,
996997/// Each merged range is divided into chunks of `expected_size`,
997998/// with the last chunk taking whatever remains.
998- /// Ranges smaller than `2 * MIN_SPLIT_SIZE ` are kept as-is to
999+ /// Ranges smaller than `2 * IO_BLOCK_SIZE ` are kept as-is to
9991000/// avoid excessive small IO requests.
10001001fn split_ranges_for_concurrency ( merged : Vec < Range < u64 > > , concurrency : usize ) -> Vec < Range < u64 > > {
10011002 if merged. is_empty ( ) || concurrency <= 1 {
@@ -1006,21 +1007,21 @@ fn split_ranges_for_concurrency(merged: Vec<Range<u64>>, concurrency: usize) ->
10061007
10071008 for range in & merged {
10081009 let length = range. end - range. start ;
1009- let raw_size = MIN_SPLIT_SIZE . max ( length / concurrency as u64 + 1 ) ;
1010- // Round up to the nearest multiple of MIN_SPLIT_SIZE (4 MB) so that
1010+ let raw_size = IO_BLOCK_SIZE . max ( length. div_ceil ( concurrency as u64 ) ) ;
1011+ // Round up to the nearest multiple of IO_BLOCK_SIZE (4 MB) so that
10111012 // every split boundary is 4 MB-aligned relative to the range start.
1012- let expected_size = raw_size. div_ceil ( MIN_SPLIT_SIZE ) * MIN_SPLIT_SIZE ;
1013- let min_tail_size = expected_size. max ( MIN_SPLIT_SIZE * 2 ) ;
1013+ let expected_size = raw_size. div_ceil ( IO_BLOCK_SIZE ) * IO_BLOCK_SIZE ;
1014+ let min_tail_size = expected_size. max ( IO_BLOCK_SIZE * 2 ) ;
10141015
10151016 let mut offset = range. start ;
10161017 let end = range. end ;
10171018
10181019 // Align the first split boundary: if `offset` is not 4 MB-aligned,
10191020 // emit a short head chunk so that all subsequent chunks start on a
10201021 // 4 MB boundary.
1021- let misalign = offset % MIN_SPLIT_SIZE ;
1022+ let misalign = offset % IO_BLOCK_SIZE ;
10221023 if misalign != 0 {
1023- let first_end = ( offset - misalign + MIN_SPLIT_SIZE ) . min ( end) ;
1024+ let first_end = ( offset - misalign + IO_BLOCK_SIZE ) . min ( end) ;
10241025 result. push ( offset..first_end) ;
10251026 offset = first_end;
10261027 }
0 commit comments