Skip to content

Commit b410484

Browse files
committed
Merge tag 'memblock-v6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock
Pull mm-init update from Mike Rapoport: "Simplify deferred initialization of struct pages Refactor and simplify deferred initialization of the memory map. Beside the negative diffstat it gives 3ms (55ms vs 58ms) reduction in the initialization of deferred pages on single node system with 64GiB of RAM" * tag 'memblock-v6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock: memblock: drop for_each_free_mem_pfn_range_in_zone_from() mm/mm_init: drop deferred_init_maxorder() mm/mm_init: deferred_init_memmap: use a job per zone mm/mm_init: use deferred_init_memmap_chunk() in deferred_grow_zone()
2 parents c4c8bca + e68f150 commit b410484

4 files changed

Lines changed: 65 additions & 219 deletions

File tree

.clang-format

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,6 @@ ForEachMacros:
294294
- 'for_each_fib6_node_rt_rcu'
295295
- 'for_each_fib6_walker_rt'
296296
- 'for_each_file_lock'
297-
- 'for_each_free_mem_pfn_range_in_zone_from'
298297
- 'for_each_free_mem_range'
299298
- 'for_each_free_mem_range_reverse'
300299
- 'for_each_func_rsrc'

include/linux/memblock.h

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -324,28 +324,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
324324
for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
325325
i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
326326

327-
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
328-
void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
329-
unsigned long *out_spfn,
330-
unsigned long *out_epfn);
331-
332-
/**
333-
* for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific
334-
* free memblock areas from a given point
335-
* @i: u64 used as loop variable
336-
* @zone: zone in which all of the memory blocks reside
337-
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
338-
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
339-
*
340-
* Walks over free (memory && !reserved) areas of memblock in a specific
341-
* zone, continuing from current position. Available as soon as memblock is
342-
* initialized.
343-
*/
344-
#define for_each_free_mem_pfn_range_in_zone_from(i, zone, p_start, p_end) \
345-
for (; i != U64_MAX; \
346-
__next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
347-
348-
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
349327

350328
/**
351329
* for_each_free_mem_range - iterate through free memblock areas

mm/memblock.c

Lines changed: 0 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,70 +1445,6 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
14451445
return 0;
14461446
}
14471447

1448-
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1449-
/**
1450-
* __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone()
1451-
*
1452-
* @idx: pointer to u64 loop variable
1453-
* @zone: zone in which all of the memory blocks reside
1454-
* @out_spfn: ptr to ulong for start pfn of the range, can be %NULL
1455-
* @out_epfn: ptr to ulong for end pfn of the range, can be %NULL
1456-
*
1457-
* This function is meant to be a zone/pfn specific wrapper for the
1458-
* for_each_mem_range type iterators. Specifically they are used in the
1459-
* deferred memory init routines and as such we were duplicating much of
1460-
* this logic throughout the code. So instead of having it in multiple
1461-
* locations it seemed like it would make more sense to centralize this to
1462-
* one new iterator that does everything they need.
1463-
*/
1464-
void __init_memblock
1465-
__next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
1466-
unsigned long *out_spfn, unsigned long *out_epfn)
1467-
{
1468-
int zone_nid = zone_to_nid(zone);
1469-
phys_addr_t spa, epa;
1470-
1471-
__next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
1472-
&memblock.memory, &memblock.reserved,
1473-
&spa, &epa, NULL);
1474-
1475-
while (*idx != U64_MAX) {
1476-
unsigned long epfn = PFN_DOWN(epa);
1477-
unsigned long spfn = PFN_UP(spa);
1478-
1479-
/*
1480-
* Verify the end is at least past the start of the zone and
1481-
* that we have at least one PFN to initialize.
1482-
*/
1483-
if (zone->zone_start_pfn < epfn && spfn < epfn) {
1484-
/* if we went too far just stop searching */
1485-
if (zone_end_pfn(zone) <= spfn) {
1486-
*idx = U64_MAX;
1487-
break;
1488-
}
1489-
1490-
if (out_spfn)
1491-
*out_spfn = max(zone->zone_start_pfn, spfn);
1492-
if (out_epfn)
1493-
*out_epfn = min(zone_end_pfn(zone), epfn);
1494-
1495-
return;
1496-
}
1497-
1498-
__next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
1499-
&memblock.memory, &memblock.reserved,
1500-
&spa, &epa, NULL);
1501-
}
1502-
1503-
/* signal end of iteration */
1504-
if (out_spfn)
1505-
*out_spfn = ULONG_MAX;
1506-
if (out_epfn)
1507-
*out_epfn = 0;
1508-
}
1509-
1510-
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1511-
15121448
/**
15131449
* memblock_alloc_range_nid - allocate boot memory block
15141450
* @size: size of memory block to be allocated in bytes

mm/mm_init.c

Lines changed: 65 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -2045,112 +2045,63 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
20452045
}
20462046

20472047
/*
2048-
* This function is meant to pre-load the iterator for the zone init from
2049-
* a given point.
2050-
* Specifically it walks through the ranges starting with initial index
2051-
* passed to it until we are caught up to the first_init_pfn value and
2052-
* exits there. If we never encounter the value we return false indicating
2053-
* there are no valid ranges left.
2054-
*/
2055-
static bool __init
2056-
deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone,
2057-
unsigned long *spfn, unsigned long *epfn,
2058-
unsigned long first_init_pfn)
2059-
{
2060-
u64 j = *i;
2061-
2062-
if (j == 0)
2063-
__next_mem_pfn_range_in_zone(&j, zone, spfn, epfn);
2064-
2065-
/*
2066-
* Start out by walking through the ranges in this zone that have
2067-
* already been initialized. We don't need to do anything with them
2068-
* so we just need to flush them out of the system.
2069-
*/
2070-
for_each_free_mem_pfn_range_in_zone_from(j, zone, spfn, epfn) {
2071-
if (*epfn <= first_init_pfn)
2072-
continue;
2073-
if (*spfn < first_init_pfn)
2074-
*spfn = first_init_pfn;
2075-
*i = j;
2076-
return true;
2077-
}
2078-
2079-
return false;
2080-
}
2081-
2082-
/*
2083-
* Initialize and free pages. We do it in two loops: first we initialize
2084-
* struct page, then free to buddy allocator, because while we are
2085-
* freeing pages we can access pages that are ahead (computing buddy
2086-
* page in __free_one_page()).
2048+
* Initialize and free pages.
2049+
*
2050+
* At this point reserved pages and struct pages that correspond to holes in
2051+
* memblock.memory are already intialized so every free range has a valid
2052+
* memory map around it.
2053+
* This ensures that access of pages that are ahead of the range being
2054+
* initialized (computing buddy page in __free_one_page()) always reads a valid
2055+
* struct page.
20872056
*
2088-
* In order to try and keep some memory in the cache we have the loop
2089-
* broken along max page order boundaries. This way we will not cause
2090-
* any issues with the buddy page computation.
2057+
* In order to try and improve CPU cache locality we have the loop broken along
2058+
* max page order boundaries.
20912059
*/
20922060
static unsigned long __init
2093-
deferred_init_maxorder(u64 *i, struct zone *zone, unsigned long *start_pfn,
2094-
unsigned long *end_pfn)
2061+
deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
2062+
struct zone *zone)
20952063
{
2096-
unsigned long mo_pfn = ALIGN(*start_pfn + 1, MAX_ORDER_NR_PAGES);
2097-
unsigned long spfn = *start_pfn, epfn = *end_pfn;
2064+
int nid = zone_to_nid(zone);
20982065
unsigned long nr_pages = 0;
2099-
u64 j = *i;
2100-
2101-
/* First we loop through and initialize the page values */
2102-
for_each_free_mem_pfn_range_in_zone_from(j, zone, start_pfn, end_pfn) {
2103-
unsigned long t;
2104-
2105-
if (mo_pfn <= *start_pfn)
2106-
break;
2066+
phys_addr_t start, end;
2067+
u64 i = 0;
21072068

2108-
t = min(mo_pfn, *end_pfn);
2109-
nr_pages += deferred_init_pages(zone, *start_pfn, t);
2069+
for_each_free_mem_range(i, nid, 0, &start, &end, NULL) {
2070+
unsigned long spfn = PFN_UP(start);
2071+
unsigned long epfn = PFN_DOWN(end);
21102072

2111-
if (mo_pfn < *end_pfn) {
2112-
*start_pfn = mo_pfn;
2073+
if (spfn >= end_pfn)
21132074
break;
2114-
}
2115-
}
21162075

2117-
/* Reset values and now loop through freeing pages as needed */
2118-
swap(j, *i);
2076+
spfn = max(spfn, start_pfn);
2077+
epfn = min(epfn, end_pfn);
21192078

2120-
for_each_free_mem_pfn_range_in_zone_from(j, zone, &spfn, &epfn) {
2121-
unsigned long t;
2079+
while (spfn < epfn) {
2080+
unsigned long mo_pfn = ALIGN(spfn + 1, MAX_ORDER_NR_PAGES);
2081+
unsigned long chunk_end = min(mo_pfn, epfn);
21222082

2123-
if (mo_pfn <= spfn)
2124-
break;
2083+
nr_pages += deferred_init_pages(zone, spfn, chunk_end);
2084+
deferred_free_pages(spfn, chunk_end - spfn);
21252085

2126-
t = min(mo_pfn, epfn);
2127-
deferred_free_pages(spfn, t - spfn);
2086+
spfn = chunk_end;
21282087

2129-
if (mo_pfn <= epfn)
2130-
break;
2088+
if (irqs_disabled())
2089+
touch_nmi_watchdog();
2090+
else
2091+
cond_resched();
2092+
}
21312093
}
21322094

21332095
return nr_pages;
21342096
}
21352097

21362098
static void __init
2137-
deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
2138-
void *arg)
2099+
deferred_init_memmap_job(unsigned long start_pfn, unsigned long end_pfn,
2100+
void *arg)
21392101
{
2140-
unsigned long spfn, epfn;
21412102
struct zone *zone = arg;
2142-
u64 i = 0;
2143-
2144-
deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn);
21452103

2146-
/*
2147-
* Initialize and free pages in MAX_PAGE_ORDER sized increments so that
2148-
* we can avoid introducing any issues with the buddy allocator.
2149-
*/
2150-
while (spfn < end_pfn) {
2151-
deferred_init_maxorder(&i, zone, &spfn, &epfn);
2152-
cond_resched();
2153-
}
2104+
deferred_init_memmap_chunk(start_pfn, end_pfn, zone);
21542105
}
21552106

21562107
static unsigned int __init
@@ -2164,12 +2115,10 @@ static int __init deferred_init_memmap(void *data)
21642115
{
21652116
pg_data_t *pgdat = data;
21662117
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
2167-
unsigned long spfn = 0, epfn = 0;
2168-
unsigned long first_init_pfn, flags;
2118+
int max_threads = deferred_page_init_max_threads(cpumask);
2119+
unsigned long first_init_pfn, last_pfn, flags;
21692120
unsigned long start = jiffies;
21702121
struct zone *zone;
2171-
int max_threads;
2172-
u64 i = 0;
21732122

21742123
/* Bind memory initialisation thread to a local node if possible */
21752124
if (!cpumask_empty(cpumask))
@@ -2197,24 +2146,20 @@ static int __init deferred_init_memmap(void *data)
21972146

21982147
/* Only the highest zone is deferred */
21992148
zone = pgdat->node_zones + pgdat->nr_zones - 1;
2149+
last_pfn = SECTION_ALIGN_UP(zone_end_pfn(zone));
22002150

2201-
max_threads = deferred_page_init_max_threads(cpumask);
2151+
struct padata_mt_job job = {
2152+
.thread_fn = deferred_init_memmap_job,
2153+
.fn_arg = zone,
2154+
.start = first_init_pfn,
2155+
.size = last_pfn - first_init_pfn,
2156+
.align = PAGES_PER_SECTION,
2157+
.min_chunk = PAGES_PER_SECTION,
2158+
.max_threads = max_threads,
2159+
.numa_aware = false,
2160+
};
22022161

2203-
while (deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, first_init_pfn)) {
2204-
first_init_pfn = ALIGN(epfn, PAGES_PER_SECTION);
2205-
struct padata_mt_job job = {
2206-
.thread_fn = deferred_init_memmap_chunk,
2207-
.fn_arg = zone,
2208-
.start = spfn,
2209-
.size = first_init_pfn - spfn,
2210-
.align = PAGES_PER_SECTION,
2211-
.min_chunk = PAGES_PER_SECTION,
2212-
.max_threads = max_threads,
2213-
.numa_aware = false,
2214-
};
2215-
2216-
padata_do_multithreaded(&job);
2217-
}
2162+
padata_do_multithreaded(&job);
22182163

22192164
/* Sanity check that the next zone really is unpopulated */
22202165
WARN_ON(pgdat->nr_zones < MAX_NR_ZONES && populated_zone(++zone));
@@ -2239,12 +2184,11 @@ static int __init deferred_init_memmap(void *data)
22392184
*/
22402185
bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
22412186
{
2242-
unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
2187+
unsigned long nr_pages_needed = SECTION_ALIGN_UP(1 << order);
22432188
pg_data_t *pgdat = zone->zone_pgdat;
22442189
unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
22452190
unsigned long spfn, epfn, flags;
22462191
unsigned long nr_pages = 0;
2247-
u64 i = 0;
22482192

22492193
/* Only the last zone may have deferred pages */
22502194
if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
@@ -2261,37 +2205,26 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
22612205
return true;
22622206
}
22632207

2264-
/* If the zone is empty somebody else may have cleared out the zone */
2265-
if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
2266-
first_deferred_pfn)) {
2267-
pgdat->first_deferred_pfn = ULONG_MAX;
2268-
pgdat_resize_unlock(pgdat, &flags);
2269-
/* Retry only once. */
2270-
return first_deferred_pfn != ULONG_MAX;
2208+
/*
2209+
* Initialize at least nr_pages_needed in section chunks.
2210+
* If a section has less free memory than nr_pages_needed, the next
2211+
* section will be also initialized.
2212+
* Note, that it still does not guarantee that allocation of order can
2213+
* be satisfied if the sections are fragmented because of memblock
2214+
* allocations.
2215+
*/
2216+
for (spfn = first_deferred_pfn, epfn = SECTION_ALIGN_UP(spfn + 1);
2217+
nr_pages < nr_pages_needed && spfn < zone_end_pfn(zone);
2218+
spfn = epfn, epfn += PAGES_PER_SECTION) {
2219+
nr_pages += deferred_init_memmap_chunk(spfn, epfn, zone);
22712220
}
22722221

22732222
/*
2274-
* Initialize and free pages in MAX_PAGE_ORDER sized increments so
2275-
* that we can avoid introducing any issues with the buddy
2276-
* allocator.
2223+
* There were no pages to initialize and free which means the zone's
2224+
* memory map is completely initialized.
22772225
*/
2278-
while (spfn < epfn) {
2279-
/* update our first deferred PFN for this section */
2280-
first_deferred_pfn = spfn;
2281-
2282-
nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn);
2283-
touch_nmi_watchdog();
2284-
2285-
/* We should only stop along section boundaries */
2286-
if ((first_deferred_pfn ^ spfn) < PAGES_PER_SECTION)
2287-
continue;
2288-
2289-
/* If our quota has been met we can stop here */
2290-
if (nr_pages >= nr_pages_needed)
2291-
break;
2292-
}
2226+
pgdat->first_deferred_pfn = nr_pages ? spfn : ULONG_MAX;
22932227

2294-
pgdat->first_deferred_pfn = spfn;
22952228
pgdat_resize_unlock(pgdat, &flags);
22962229

22972230
return nr_pages > 0;

0 commit comments

Comments
 (0)