@@ -424,6 +424,7 @@ static struct xfs_open_zone *
424424xfs_init_open_zone (
425425 struct xfs_rtgroup * rtg ,
426426 xfs_rgblock_t write_pointer ,
427+ enum rw_hint write_hint ,
427428 bool is_gc )
428429{
429430 struct xfs_open_zone * oz ;
@@ -434,6 +435,7 @@ xfs_init_open_zone(
434435 oz -> oz_rtg = rtg ;
435436 oz -> oz_write_pointer = write_pointer ;
436437 oz -> oz_written = write_pointer ;
438+ oz -> oz_write_hint = write_hint ;
437439 oz -> oz_is_gc = is_gc ;
438440
439441 /*
@@ -453,6 +455,7 @@ xfs_init_open_zone(
453455struct xfs_open_zone *
454456xfs_open_zone (
455457 struct xfs_mount * mp ,
458+ enum rw_hint write_hint ,
456459 bool is_gc )
457460{
458461 struct xfs_zone_info * zi = mp -> m_zone_info ;
@@ -465,12 +468,13 @@ xfs_open_zone(
465468 return NULL ;
466469
467470 set_current_state (TASK_RUNNING );
468- return xfs_init_open_zone (to_rtg (xg ), 0 , is_gc );
471+ return xfs_init_open_zone (to_rtg (xg ), 0 , write_hint , is_gc );
469472}
470473
471474static struct xfs_open_zone *
472475xfs_try_open_zone (
473- struct xfs_mount * mp )
476+ struct xfs_mount * mp ,
477+ enum rw_hint write_hint )
474478{
475479 struct xfs_zone_info * zi = mp -> m_zone_info ;
476480 struct xfs_open_zone * oz ;
@@ -487,7 +491,7 @@ xfs_try_open_zone(
487491 */
488492 zi -> zi_nr_open_zones ++ ;
489493 spin_unlock (& zi -> zi_open_zones_lock );
490- oz = xfs_open_zone (mp , false);
494+ oz = xfs_open_zone (mp , write_hint , false);
491495 spin_lock (& zi -> zi_open_zones_lock );
492496 if (!oz ) {
493497 zi -> zi_nr_open_zones -- ;
@@ -510,16 +514,78 @@ xfs_try_open_zone(
510514 return oz ;
511515}
512516
517+ /*
518+ * For data with short or medium lifetime, try to colocated it into an
519+ * already open zone with a matching temperature.
520+ */
521+ static bool
522+ xfs_colocate_eagerly (
523+ enum rw_hint file_hint )
524+ {
525+ switch (file_hint ) {
526+ case WRITE_LIFE_MEDIUM :
527+ case WRITE_LIFE_SHORT :
528+ case WRITE_LIFE_NONE :
529+ return true;
530+ default :
531+ return false;
532+ }
533+ }
534+
535+ static bool
536+ xfs_good_hint_match (
537+ struct xfs_open_zone * oz ,
538+ enum rw_hint file_hint )
539+ {
540+ switch (oz -> oz_write_hint ) {
541+ case WRITE_LIFE_LONG :
542+ case WRITE_LIFE_EXTREME :
543+ /* colocate long and extreme */
544+ if (file_hint == WRITE_LIFE_LONG ||
545+ file_hint == WRITE_LIFE_EXTREME )
546+ return true;
547+ break ;
548+ case WRITE_LIFE_MEDIUM :
549+ /* colocate medium with medium */
550+ if (file_hint == WRITE_LIFE_MEDIUM )
551+ return true;
552+ break ;
553+ case WRITE_LIFE_SHORT :
554+ case WRITE_LIFE_NONE :
555+ case WRITE_LIFE_NOT_SET :
556+ /* colocate short and none */
557+ if (file_hint <= WRITE_LIFE_SHORT )
558+ return true;
559+ break ;
560+ }
561+ return false;
562+ }
563+
513564static bool
514565xfs_try_use_zone (
515566 struct xfs_zone_info * zi ,
516- struct xfs_open_zone * oz )
567+ enum rw_hint file_hint ,
568+ struct xfs_open_zone * oz ,
569+ bool lowspace )
517570{
518571 if (oz -> oz_write_pointer == rtg_blocks (oz -> oz_rtg ))
519572 return false;
573+ if (!lowspace && !xfs_good_hint_match (oz , file_hint ))
574+ return false;
520575 if (!atomic_inc_not_zero (& oz -> oz_ref ))
521576 return false;
522577
578+ /*
579+ * If we have a hint set for the data, use that for the zone even if
580+ * some data was written already without any hint set, but don't change
581+ * the temperature after that as that would make little sense without
582+ * tracking per-temperature class written block counts, which is
583+ * probably overkill anyway.
584+ */
585+ if (file_hint != WRITE_LIFE_NOT_SET &&
586+ oz -> oz_write_hint == WRITE_LIFE_NOT_SET )
587+ oz -> oz_write_hint = file_hint ;
588+
523589 /*
524590 * If we couldn't match by inode or life time we just pick the first
525591 * zone with enough space above. For that we want the least busy zone
@@ -534,14 +600,16 @@ xfs_try_use_zone(
534600
535601static struct xfs_open_zone *
536602xfs_select_open_zone_lru (
537- struct xfs_zone_info * zi )
603+ struct xfs_zone_info * zi ,
604+ enum rw_hint file_hint ,
605+ bool lowspace )
538606{
539607 struct xfs_open_zone * oz ;
540608
541609 lockdep_assert_held (& zi -> zi_open_zones_lock );
542610
543611 list_for_each_entry (oz , & zi -> zi_open_zones , oz_entry )
544- if (xfs_try_use_zone (zi , oz ))
612+ if (xfs_try_use_zone (zi , file_hint , oz , lowspace ))
545613 return oz ;
546614
547615 cond_resched_lock (& zi -> zi_open_zones_lock );
@@ -550,20 +618,28 @@ xfs_select_open_zone_lru(
550618
551619static struct xfs_open_zone *
552620xfs_select_open_zone_mru (
553- struct xfs_zone_info * zi )
621+ struct xfs_zone_info * zi ,
622+ enum rw_hint file_hint )
554623{
555624 struct xfs_open_zone * oz ;
556625
557626 lockdep_assert_held (& zi -> zi_open_zones_lock );
558627
559628 list_for_each_entry_reverse (oz , & zi -> zi_open_zones , oz_entry )
560- if (xfs_try_use_zone (zi , oz ))
629+ if (xfs_try_use_zone (zi , file_hint , oz , false ))
561630 return oz ;
562631
563632 cond_resched_lock (& zi -> zi_open_zones_lock );
564633 return NULL ;
565634}
566635
636+ static inline enum rw_hint xfs_inode_write_hint (struct xfs_inode * ip )
637+ {
638+ if (xfs_has_nolifetime (ip -> i_mount ))
639+ return WRITE_LIFE_NOT_SET ;
640+ return VFS_I (ip )-> i_write_hint ;
641+ }
642+
567643/*
568644 * Try to pack inodes that are written back after they were closed tight instead
569645 * of trying to open new zones for them or spread them to the least recently
@@ -587,6 +663,7 @@ static inline bool xfs_zoned_pack_tight(struct xfs_inode *ip)
587663static struct xfs_open_zone *
588664xfs_select_zone_nowait (
589665 struct xfs_mount * mp ,
666+ enum rw_hint write_hint ,
590667 bool pack_tight )
591668{
592669 struct xfs_zone_info * zi = mp -> m_zone_info ;
@@ -595,20 +672,38 @@ xfs_select_zone_nowait(
595672 if (xfs_is_shutdown (mp ))
596673 return NULL ;
597674
675+ /*
676+ * Try to fill up open zones with matching temperature if available. It
677+ * is better to try to co-locate data when this is favorable, so we can
678+ * activate empty zones when it is statistically better to separate
679+ * data.
680+ */
598681 spin_lock (& zi -> zi_open_zones_lock );
599- if (pack_tight )
600- oz = xfs_select_open_zone_mru (zi );
682+ if (xfs_colocate_eagerly (write_hint ))
683+ oz = xfs_select_open_zone_lru (zi , write_hint , false);
684+ else if (pack_tight )
685+ oz = xfs_select_open_zone_mru (zi , write_hint );
601686 if (oz )
602687 goto out_unlock ;
603688
604689 /*
605690 * See if we can open a new zone and use that.
606691 */
607- oz = xfs_try_open_zone (mp );
692+ oz = xfs_try_open_zone (mp , write_hint );
608693 if (oz )
609694 goto out_unlock ;
610695
611- oz = xfs_select_open_zone_lru (zi );
696+ /*
697+ * Try to colocate cold data with other cold data if we failed to open a
698+ * new zone for it.
699+ */
700+ if (write_hint != WRITE_LIFE_NOT_SET &&
701+ !xfs_colocate_eagerly (write_hint ))
702+ oz = xfs_select_open_zone_lru (zi , write_hint , false);
703+ if (!oz )
704+ oz = xfs_select_open_zone_lru (zi , WRITE_LIFE_NOT_SET , false);
705+ if (!oz )
706+ oz = xfs_select_open_zone_lru (zi , WRITE_LIFE_NOT_SET , true);
612707out_unlock :
613708 spin_unlock (& zi -> zi_open_zones_lock );
614709 return oz ;
@@ -617,19 +712,20 @@ xfs_select_zone_nowait(
617712static struct xfs_open_zone *
618713xfs_select_zone (
619714 struct xfs_mount * mp ,
715+ enum rw_hint write_hint ,
620716 bool pack_tight )
621717{
622718 struct xfs_zone_info * zi = mp -> m_zone_info ;
623719 DEFINE_WAIT (wait );
624720 struct xfs_open_zone * oz ;
625721
626- oz = xfs_select_zone_nowait (mp , pack_tight );
722+ oz = xfs_select_zone_nowait (mp , write_hint , pack_tight );
627723 if (oz )
628724 return oz ;
629725
630726 for (;;) {
631727 prepare_to_wait (& zi -> zi_zone_wait , & wait , TASK_UNINTERRUPTIBLE );
632- oz = xfs_select_zone_nowait (mp , pack_tight );
728+ oz = xfs_select_zone_nowait (mp , write_hint , pack_tight );
633729 if (oz )
634730 break ;
635731 schedule ();
@@ -707,6 +803,7 @@ xfs_zone_alloc_and_submit(
707803{
708804 struct xfs_inode * ip = XFS_I (ioend -> io_inode );
709805 struct xfs_mount * mp = ip -> i_mount ;
806+ enum rw_hint write_hint = xfs_inode_write_hint (ip );
710807 bool pack_tight = xfs_zoned_pack_tight (ip );
711808 unsigned int alloc_len ;
712809 struct iomap_ioend * split ;
@@ -724,7 +821,7 @@ xfs_zone_alloc_and_submit(
724821 * oz = xfs_last_used_zone (ioend );
725822 if (!* oz ) {
726823select_zone :
727- * oz = xfs_select_zone (mp , pack_tight );
824+ * oz = xfs_select_zone (mp , write_hint , pack_tight );
728825 if (!* oz )
729826 goto out_error ;
730827 }
@@ -862,7 +959,8 @@ xfs_init_zone(
862959 struct xfs_open_zone * oz ;
863960
864961 atomic_inc (& rtg_group (rtg )-> xg_active_ref );
865- oz = xfs_init_open_zone (rtg , write_pointer , false);
962+ oz = xfs_init_open_zone (rtg , write_pointer , WRITE_LIFE_NOT_SET ,
963+ false);
866964 list_add_tail (& oz -> oz_entry , & zi -> zi_open_zones );
867965 zi -> zi_nr_open_zones ++ ;
868966
0 commit comments