aboutsummaryrefslogtreecommitdiffstats
path: root/mm/internal.h
diff options
context:
space:
mode:
authorDavid Hildenbrand <[email protected]>2025-07-02 10:49:25 +0000
committerAndrew Morton <[email protected]>2025-07-20 01:59:45 +0000
commitdd80cfd4878bafc74f2a386c51b5398a12ffeb8c (patch)
tree264c51f6a381d57727638decc8ef04e0a1649a53 /mm/internal.h
parentmm: smaller folio_pte_batch() improvements (diff)
downloadkernel-dd80cfd4878bafc74f2a386c51b5398a12ffeb8c.tar.gz
kernel-dd80cfd4878bafc74f2a386c51b5398a12ffeb8c.zip
mm: split folio_pte_batch() into folio_pte_batch() and folio_pte_batch_flags()
Many users (including upcoming ones) don't really need the flags etc, and can live with the possible overhead of a function call. So let's provide a basic, non-inlined folio_pte_batch(), to avoid code bloat while still providing a variant that optimizes out all flag checks at runtime. folio_pte_batch_flags() will get inlined into folio_pte_batch(), optimizing out any conditionals that depend on input flags. folio_pte_batch() will behave like folio_pte_batch_flags() when no flags are specified. It's okay to add new users of folio_pte_batch_flags(), but using folio_pte_batch() if applicable is preferred. So, before this change, folio_pte_batch() was inlined into the C file optimized by propagating constants within the resulting object file. With this change, we now also have a folio_pte_batch() that is optimized by propagating all constants. But instead of having one instance per object file, we have a single shared one. In zap_present_ptes(), where we care about performance, the compiler already seem to generate a call to a common inlined folio_pte_batch() variant, shared with fork() code. So calling the new non-inlined variant should not make a difference. While at it, drop the "addr" parameter that is unused. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: David Hildenbrand <[email protected]> Suggested-by: Andrew Morton <[email protected]> Link: https://lore.kernel.org/linux-mm/[email protected]/ Reviewed-by: Oscar Salvador <[email protected]> Reviewed-by: Zi Yan <[email protected]> Reviewed-by: Dev Jain <[email protected]> Cc: Alistair Popple <[email protected]> Cc: Byungchul Park <[email protected]> Cc: Gregory Price <[email protected]> Cc: "Huang, Ying" <[email protected]> Cc: Jann Horn <[email protected]> Cc: Joshua Hahn <[email protected]> Cc: Lance Yang <[email protected]> Cc: Liam Howlett <[email protected]> Cc: Lorenzo Stoakes <[email protected]> Cc: Mathew Brost <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Rakie Kim <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Suren Baghdasaryan <[email protected]> Cc: Vlastimil Babka <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
Diffstat (limited to 'mm/internal.h')
-rw-r--r--mm/internal.h11
1 files changed, 8 insertions, 3 deletions
diff --git a/mm/internal.h b/mm/internal.h
index 40ee7200e510..c7d18f608c3f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -218,9 +218,8 @@ static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
}
/**
- * folio_pte_batch - detect a PTE batch for a large folio
+ * folio_pte_batch_flags - detect a PTE batch for a large folio
* @folio: The large folio to detect a PTE batch for.
- * @addr: The user virtual address the first page is mapped at.
* @ptep: Page table pointer for the first entry.
* @pte: Page table entry for the first page.
* @max_nr: The maximum number of table entries to consider.
@@ -243,9 +242,12 @@ static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
* must be limited by the caller so scanning cannot exceed a single VMA and
* a single page table.
*
+ * This function will be inlined to optimize based on the input parameters;
+ * consider using folio_pte_batch() instead if applicable.
+ *
* Return: the number of table entries in the batch.
*/
-static inline unsigned int folio_pte_batch(struct folio *folio, unsigned long addr,
+static inline unsigned int folio_pte_batch_flags(struct folio *folio,
pte_t *ptep, pte_t pte, unsigned int max_nr, fpb_t flags,
bool *any_writable, bool *any_young, bool *any_dirty)
{
@@ -293,6 +295,9 @@ static inline unsigned int folio_pte_batch(struct folio *folio, unsigned long ad
return min(nr, max_nr);
}
+unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte,
+ unsigned int max_nr);
+
/**
* pte_move_swp_offset - Move the swap entry offset field of a swap pte
* forward or backward by delta