aboutsummaryrefslogtreecommitdiffstats
path: root/mm/util.c
diff options
context:
space:
mode:
authorDavid Hildenbrand <[email protected]>2025-07-02 10:49:25 +0000
committerAndrew Morton <[email protected]>2025-07-20 01:59:45 +0000
commitdd80cfd4878bafc74f2a386c51b5398a12ffeb8c (patch)
tree264c51f6a381d57727638decc8ef04e0a1649a53 /mm/util.c
parentmm: smaller folio_pte_batch() improvements (diff)
downloadkernel-dd80cfd4878bafc74f2a386c51b5398a12ffeb8c.tar.gz
kernel-dd80cfd4878bafc74f2a386c51b5398a12ffeb8c.zip
mm: split folio_pte_batch() into folio_pte_batch() and folio_pte_batch_flags()
Many users (including upcoming ones) don't really need the flags etc, and can live with the possible overhead of a function call. So let's provide a basic, non-inlined folio_pte_batch(), to avoid code bloat while still providing a variant that optimizes out all flag checks at runtime. folio_pte_batch_flags() will get inlined into folio_pte_batch(), optimizing out any conditionals that depend on input flags. folio_pte_batch() will behave like folio_pte_batch_flags() when no flags are specified. It's okay to add new users of folio_pte_batch_flags(), but using folio_pte_batch() if applicable is preferred. So, before this change, folio_pte_batch() was inlined into the C file optimized by propagating constants within the resulting object file. With this change, we now also have a folio_pte_batch() that is optimized by propagating all constants. But instead of having one instance per object file, we have a single shared one. In zap_present_ptes(), where we care about performance, the compiler already seem to generate a call to a common inlined folio_pte_batch() variant, shared with fork() code. So calling the new non-inlined variant should not make a difference. While at it, drop the "addr" parameter that is unused. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: David Hildenbrand <[email protected]> Suggested-by: Andrew Morton <[email protected]> Link: https://lore.kernel.org/linux-mm/[email protected]/ Reviewed-by: Oscar Salvador <[email protected]> Reviewed-by: Zi Yan <[email protected]> Reviewed-by: Dev Jain <[email protected]> Cc: Alistair Popple <[email protected]> Cc: Byungchul Park <[email protected]> Cc: Gregory Price <[email protected]> Cc: "Huang, Ying" <[email protected]> Cc: Jann Horn <[email protected]> Cc: Joshua Hahn <[email protected]> Cc: Lance Yang <[email protected]> Cc: Liam Howlett <[email protected]> Cc: Lorenzo Stoakes <[email protected]> Cc: Mathew Brost <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Rakie Kim <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Suren Baghdasaryan <[email protected]> Cc: Vlastimil Babka <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
Diffstat (limited to 'mm/util.c')
-rw-r--r--mm/util.c29
1 files changed, 29 insertions, 0 deletions
diff --git a/mm/util.c b/mm/util.c
index 20bbfe4ce1b8..f134cefc9062 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1171,3 +1171,32 @@ int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma)
return 0;
}
EXPORT_SYMBOL(compat_vma_mmap_prepare);
+
+#ifdef CONFIG_MMU
+/**
+ * folio_pte_batch - detect a PTE batch for a large folio
+ * @folio: The large folio to detect a PTE batch for.
+ * @ptep: Page table pointer for the first entry.
+ * @pte: Page table entry for the first page.
+ * @max_nr: The maximum number of table entries to consider.
+ *
+ * This is a simplified variant of folio_pte_batch_flags().
+ *
+ * Detect a PTE batch: consecutive (present) PTEs that map consecutive
+ * pages of the same large folio in a single VMA and a single page table.
+ *
+ * All PTEs inside a PTE batch have the same PTE bits set, excluding the PFN,
+ * the accessed bit, writable bit, dirt-bit and soft-dirty bit.
+ *
+ * ptep must map any page of the folio. max_nr must be at least one and
+ * must be limited by the caller so scanning cannot exceed a single VMA and
+ * a single page table.
+ *
+ * Return: the number of table entries in the batch.
+ */
+unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte,
+ unsigned int max_nr)
+{
+ return folio_pte_batch_flags(folio, ptep, pte, max_nr, 0, NULL, NULL, NULL);
+}
+#endif /* CONFIG_MMU */