@@ -55,13 +55,6 @@ static inline bool xfs_buf_is_uncached(struct xfs_buf *bp)
5555 return bp -> b_rhash_key == XFS_BUF_DADDR_NULL ;
5656}
5757
58- static inline int
59- xfs_buf_vmap_len (
60- struct xfs_buf * bp )
61- {
62- return (bp -> b_page_count * PAGE_SIZE );
63- }
64-
6558/*
6659 * When we mark a buffer stale, we remove the buffer from the LRU and clear the
6760 * b_lru_ref count so that the buffer is freed immediately when the buffer
@@ -190,29 +183,6 @@ _xfs_buf_alloc(
190183 return 0 ;
191184}
192185
193- static void
194- xfs_buf_free_pages (
195- struct xfs_buf * bp )
196- {
197- uint i ;
198-
199- ASSERT (bp -> b_flags & _XBF_PAGES );
200-
201- if (is_vmalloc_addr (bp -> b_addr ))
202- vm_unmap_ram (bp -> b_addr , bp -> b_page_count );
203-
204- for (i = 0 ; i < bp -> b_page_count ; i ++ ) {
205- if (bp -> b_pages [i ])
206- folio_put (page_folio (bp -> b_pages [i ]));
207- }
208- mm_account_reclaimed_pages (howmany (BBTOB (bp -> b_length ), PAGE_SIZE ));
209-
210- if (bp -> b_pages != bp -> b_page_array )
211- kfree (bp -> b_pages );
212- bp -> b_pages = NULL ;
213- bp -> b_flags &= ~_XBF_PAGES ;
214- }
215-
216186static void
217187xfs_buf_free_callback (
218188 struct callback_head * cb )
@@ -227,16 +197,23 @@ static void
227197xfs_buf_free (
228198 struct xfs_buf * bp )
229199{
200+ unsigned int size = BBTOB (bp -> b_length );
201+
230202 trace_xfs_buf_free (bp , _RET_IP_ );
231203
232204 ASSERT (list_empty (& bp -> b_lru ));
233205
206+ if (!xfs_buftarg_is_mem (bp -> b_target ) && size >= PAGE_SIZE )
207+ mm_account_reclaimed_pages (howmany (size , PAGE_SHIFT ));
208+
234209 if (xfs_buftarg_is_mem (bp -> b_target ))
235210 xmbuf_unmap_page (bp );
236- else if (bp -> b_flags & _XBF_PAGES )
237- xfs_buf_free_pages (bp );
211+ else if (is_vmalloc_addr ( bp -> b_addr ) )
212+ vfree (bp -> b_addr );
238213 else if (bp -> b_flags & _XBF_KMEM )
239214 kfree (bp -> b_addr );
215+ else
216+ folio_put (virt_to_folio (bp -> b_addr ));
240217
241218 call_rcu (& bp -> b_rcu , xfs_buf_free_callback );
242219}
@@ -264,9 +241,6 @@ xfs_buf_alloc_kmem(
264241 bp -> b_addr = NULL ;
265242 return - ENOMEM ;
266243 }
267- bp -> b_pages = bp -> b_page_array ;
268- bp -> b_pages [0 ] = kmem_to_page (bp -> b_addr );
269- bp -> b_page_count = 1 ;
270244 bp -> b_flags |= _XBF_KMEM ;
271245 return 0 ;
272246}
@@ -287,9 +261,9 @@ xfs_buf_alloc_kmem(
287261 * by the rest of the code - the buffer memory spans a single contiguous memory
288262 * region that we don't have to map and unmap to access the data directly.
289263 *
290- * The third type of buffer is the multi-page buffer. These are always made
291- * up of single pages so that they can be fed to vmap_ram() to return a
292- * contiguous memory region we can access the data through .
264+ * The third type of buffer is the vmalloc()d buffer. This provides the buffer
265+ * with the required contiguous memory region but backed by discontiguous
266+ * physical pages .
293267 */
294268static int
295269xfs_buf_alloc_backing_mem (
@@ -299,7 +273,6 @@ xfs_buf_alloc_backing_mem(
299273 size_t size = BBTOB (bp -> b_length );
300274 gfp_t gfp_mask = GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOWARN ;
301275 struct folio * folio ;
302- long filled = 0 ;
303276
304277 if (xfs_buftarg_is_mem (bp -> b_target ))
305278 return xmbuf_map_page (bp );
@@ -351,98 +324,18 @@ xfs_buf_alloc_backing_mem(
351324 goto fallback ;
352325 }
353326 bp -> b_addr = folio_address (folio );
354- bp -> b_page_array [0 ] = & folio -> page ;
355- bp -> b_pages = bp -> b_page_array ;
356- bp -> b_page_count = 1 ;
357- bp -> b_flags |= _XBF_PAGES ;
358327 return 0 ;
359328
360329fallback :
361- /* Fall back to allocating an array of single page folios. */
362- bp -> b_page_count = DIV_ROUND_UP (size , PAGE_SIZE );
363- if (bp -> b_page_count <= XB_PAGES ) {
364- bp -> b_pages = bp -> b_page_array ;
365- } else {
366- bp -> b_pages = kzalloc (sizeof (struct page * ) * bp -> b_page_count ,
367- gfp_mask );
368- if (!bp -> b_pages )
369- return - ENOMEM ;
370- }
371- bp -> b_flags |= _XBF_PAGES ;
372-
373- /*
374- * Bulk filling of pages can take multiple calls. Not filling the entire
375- * array is not an allocation failure, so don't back off if we get at
376- * least one extra page.
377- */
378330 for (;;) {
379- long last = filled ;
380-
381- filled = alloc_pages_bulk (gfp_mask , bp -> b_page_count ,
382- bp -> b_pages );
383- if (filled == bp -> b_page_count ) {
384- XFS_STATS_INC (bp -> b_mount , xb_page_found );
331+ bp -> b_addr = __vmalloc (size , gfp_mask );
332+ if (bp -> b_addr )
385333 break ;
386- }
387-
388- if (filled != last )
389- continue ;
390-
391- if (flags & XBF_READ_AHEAD ) {
392- xfs_buf_free_pages (bp );
334+ if (flags & XBF_READ_AHEAD )
393335 return - ENOMEM ;
394- }
395-
396336 XFS_STATS_INC (bp -> b_mount , xb_page_retries );
397337 memalloc_retry_wait (gfp_mask );
398338 }
399- return 0 ;
400- }
401-
402- /*
403- * Map buffer into kernel address-space if necessary.
404- */
405- STATIC int
406- _xfs_buf_map_pages (
407- struct xfs_buf * bp ,
408- xfs_buf_flags_t flags )
409- {
410- ASSERT (bp -> b_flags & _XBF_PAGES );
411- if (bp -> b_page_count == 1 ) {
412- /* A single page buffer is always mappable */
413- bp -> b_addr = page_address (bp -> b_pages [0 ]);
414- } else {
415- int retried = 0 ;
416- unsigned nofs_flag ;
417-
418- /*
419- * vm_map_ram() will allocate auxiliary structures (e.g.
420- * pagetables) with GFP_KERNEL, yet we often under a scoped nofs
421- * context here. Mixing GFP_KERNEL with GFP_NOFS allocations
422- * from the same call site that can be run from both above and
423- * below memory reclaim causes lockdep false positives. Hence we
424- * always need to force this allocation to nofs context because
425- * we can't pass __GFP_NOLOCKDEP down to auxillary structures to
426- * prevent false positive lockdep reports.
427- *
428- * XXX(dgc): I think dquot reclaim is the only place we can get
429- * to this function from memory reclaim context now. If we fix
430- * that like we've fixed inode reclaim to avoid writeback from
431- * reclaim, this nofs wrapping can go away.
432- */
433- nofs_flag = memalloc_nofs_save ();
434- do {
435- bp -> b_addr = vm_map_ram (bp -> b_pages , bp -> b_page_count ,
436- -1 );
437- if (bp -> b_addr )
438- break ;
439- vm_unmap_aliases ();
440- } while (retried ++ <= 1 );
441- memalloc_nofs_restore (nofs_flag );
442-
443- if (!bp -> b_addr )
444- return - ENOMEM ;
445- }
446339
447340 return 0 ;
448341}
@@ -562,7 +455,7 @@ xfs_buf_find_lock(
562455 return - ENOENT ;
563456 }
564457 ASSERT ((bp -> b_flags & _XBF_DELWRI_Q ) == 0 );
565- bp -> b_flags &= _XBF_KMEM | _XBF_PAGES ;
458+ bp -> b_flags &= _XBF_KMEM ;
566459 bp -> b_ops = NULL ;
567460 }
568461 return 0 ;
@@ -748,18 +641,6 @@ xfs_buf_get_map(
748641 xfs_perag_put (pag );
749642 }
750643
751- /* We do not hold a perag reference anymore. */
752- if (!bp -> b_addr ) {
753- error = _xfs_buf_map_pages (bp , flags );
754- if (unlikely (error )) {
755- xfs_warn_ratelimited (btp -> bt_mount ,
756- "%s: failed to map %u pages" , __func__ ,
757- bp -> b_page_count );
758- xfs_buf_relse (bp );
759- return error ;
760- }
761- }
762-
763644 /*
764645 * Clear b_error if this is a lookup from a caller that doesn't expect
765646 * valid data to be found in the buffer.
@@ -1002,14 +883,6 @@ xfs_buf_get_uncached(
1002883 if (error )
1003884 goto fail_free_buf ;
1004885
1005- if (!bp -> b_addr )
1006- error = _xfs_buf_map_pages (bp , 0 );
1007- if (unlikely (error )) {
1008- xfs_warn (target -> bt_mount ,
1009- "%s: failed to map pages" , __func__ );
1010- goto fail_free_buf ;
1011- }
1012-
1013886 trace_xfs_buf_get_uncached (bp , _RET_IP_ );
1014887 * bpp = bp ;
1015888 return 0 ;
@@ -1343,7 +1216,7 @@ __xfs_buf_ioend(
13431216 if (bp -> b_flags & XBF_READ ) {
13441217 if (!bp -> b_error && is_vmalloc_addr (bp -> b_addr ))
13451218 invalidate_kernel_vmap_range (bp -> b_addr ,
1346- xfs_buf_vmap_len ( bp ));
1219+ roundup ( BBTOB ( bp -> b_length ), PAGE_SIZE ));
13471220 if (!bp -> b_error && bp -> b_ops )
13481221 bp -> b_ops -> verify_read (bp );
13491222 if (!bp -> b_error )
@@ -1504,29 +1377,48 @@ static void
15041377xfs_buf_submit_bio (
15051378 struct xfs_buf * bp )
15061379{
1507- unsigned int size = BBTOB (bp -> b_length );
1508- unsigned int map = 0 , p ;
1380+ unsigned int map = 0 ;
15091381 struct blk_plug plug ;
15101382 struct bio * bio ;
15111383
1512- bio = bio_alloc ( bp -> b_target -> bt_bdev , bp -> b_page_count ,
1513- xfs_buf_bio_op (bp ), GFP_NOIO );
1514- bio -> bi_private = bp ;
1515- bio -> bi_end_io = xfs_buf_bio_end_io ;
1384+ if ( is_vmalloc_addr ( bp -> b_addr )) {
1385+ unsigned int size = BBTOB (bp -> b_length );
1386+ unsigned int alloc_size = roundup ( size , PAGE_SIZE ) ;
1387+ void * data = bp -> b_addr ;
15161388
1517- if (bp -> b_page_count == 1 ) {
1518- __bio_add_page (bio , virt_to_page (bp -> b_addr ), size ,
1519- offset_in_page (bp -> b_addr ));
1520- } else {
1521- for (p = 0 ; p < bp -> b_page_count ; p ++ )
1522- __bio_add_page (bio , bp -> b_pages [p ], PAGE_SIZE , 0 );
1523- bio -> bi_iter .bi_size = size ; /* limit to the actual size used */
1389+ bio = bio_alloc (bp -> b_target -> bt_bdev , alloc_size >> PAGE_SHIFT ,
1390+ xfs_buf_bio_op (bp ), GFP_NOIO );
1391+
1392+ do {
1393+ unsigned int len = min (size , PAGE_SIZE );
15241394
1525- if (is_vmalloc_addr (bp -> b_addr ))
1526- flush_kernel_vmap_range (bp -> b_addr ,
1527- xfs_buf_vmap_len (bp ));
1395+ ASSERT (offset_in_page (data ) == 0 );
1396+ __bio_add_page (bio , vmalloc_to_page (data ), len , 0 );
1397+ data += len ;
1398+ size -= len ;
1399+ } while (size );
1400+
1401+ flush_kernel_vmap_range (bp -> b_addr , alloc_size );
1402+ } else {
1403+ /*
1404+ * Single folio or slab allocation. Must be contiguous and thus
1405+ * only a single bvec is needed.
1406+ *
1407+ * This uses the page based bio add helper for now as that is
1408+ * the lowest common denominator between folios and slab
1409+ * allocations. To be replaced with a better block layer
1410+ * helper soon (hopefully).
1411+ */
1412+ bio = bio_alloc (bp -> b_target -> bt_bdev , 1 , xfs_buf_bio_op (bp ),
1413+ GFP_NOIO );
1414+ __bio_add_page (bio , virt_to_page (bp -> b_addr ),
1415+ BBTOB (bp -> b_length ),
1416+ offset_in_page (bp -> b_addr ));
15281417 }
15291418
1419+ bio -> bi_private = bp ;
1420+ bio -> bi_end_io = xfs_buf_bio_end_io ;
1421+
15301422 /*
15311423 * If there is more than one map segment, split out a new bio for each
15321424 * map except of the last one. The last map is handled by the
0 commit comments