aboutsummaryrefslogtreecommitdiffstats
path: root/lib/xarray.c
diff options
context:
space:
mode:
authorZi Yan <[email protected]>2025-03-14 22:21:12 +0000
committerAndrew Morton <[email protected]>2025-03-18 05:07:01 +0000
commit200a89c159a7a416115e6e309183c82183bf98aa (patch)
tree2b15bfa130230fb7b7c6d300dd7076c9f9b0d608 /lib/xarray.c
parentselftests/mm: add tests for folio_split(), buddy allocator like split (diff)
downloadkernel-200a89c159a7a416115e6e309183c82183bf98aa.tar.gz
kernel-200a89c159a7a416115e6e309183c82183bf98aa.zip
mm/filemap: use xas_try_split() in __filemap_add_folio()
Patch series "Minimize xa_node allocation during xarry split", v3. When splitting a multi-index entry in XArray from order-n to order-m, existing xas_split_alloc()+xas_split() approach requires 2^(n % XA_CHUNK_SHIFT) xa_node allocations. But its callers, __filemap_add_folio() and shmem_split_large_entry(), use at most 1 xa_node. To minimize xa_node allocation and remove the limitation of no split from order-12 (or above) to order-0 (or anything between 0 and 5)[1], xas_try_split() was added[2], which allocates (n / XA_CHUNK_SHIFT - m / XA_CHUNK_SHIFT) xa_node. It is used for non-uniform folio split, but can be used by __filemap_add_folio() and shmem_split_large_entry(). xas_split_alloc() and xas_split() split an order-9 to order-0: --------------------------------- | | | | | | | | | | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | | | | | | | | | | --------------------------------- | | | | ------- --- --- ------- | | ... | | V V V V ----------- ----------- ----------- ----------- | xa_node | | xa_node | ... | xa_node | | xa_node | ----------- ----------- ----------- ----------- xas_try_split() splits an order-9 to order-0: --------------------------------- | | | | | | | | | | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | | | | | | | | | | --------------------------------- | | V ----------- | xa_node | ----------- xas_try_split() is designed to be called iteratively with n = m + 1. xas_try_split_mini_order() is added to minmize the number of calls to xas_try_split() by telling the caller the next minimal order to split to instead of n - 1. Splitting order-n to order-m when m= l * XA_CHUNK_SHIFT does not require xa_node allocation and requires 1 xa_node when n=l * XA_CHUNK_SHIFT and m = n - 1, so it is OK to use xas_try_split() with n > m + 1 when no new xa_node is needed. xfstests quick group test passed on xfs and tmpfs. [1] https://lore.kernel.org/linux-mm/[email protected]/ [2] https://lore.kernel.org/linux-mm/[email protected]/ This patch (of 2): During __filemap_add_folio(), a shadow entry is covering n slots and a folio covers m slots with m < n is to be added. Instead of splitting all n slots, only the m slots covered by the folio need to be split and the remaining n-m shadow entries can be retained with orders ranging from m to n-1. This method only requires (n/XA_CHUNK_SHIFT) - (m/XA_CHUNK_SHIFT) new xa_nodes instead of (n % XA_CHUNK_SHIFT) * ((n/XA_CHUNK_SHIFT) - (m/XA_CHUNK_SHIFT)) new xa_nodes, compared to the original xas_split_alloc() + xas_split() one. For example, to insert an order-0 folio when an order-9 shadow entry is present (assuming XA_CHUNK_SHIFT is 6), 1 xa_node is needed instead of 8. xas_try_split_min_order() is introduced to reduce the number of calls to xas_try_split() during split. Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Zi Yan <[email protected]> Cc: Baolin Wang <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Kairui Song <[email protected]> Cc: Miaohe Lin <[email protected]> Cc: Mattew Wilcox <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: John Hubbard <[email protected]> Cc: Kefeng Wang <[email protected]> Cc: Kirill A. Shuemov <[email protected]> Cc: Ryan Roberts <[email protected]> Cc: Yang Shi <[email protected]> Cc: Yu Zhao <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
Diffstat (limited to 'lib/xarray.c')
-rw-r--r--lib/xarray.c25
1 files changed, 25 insertions, 0 deletions
diff --git a/lib/xarray.c b/lib/xarray.c
index 3bae48558e21..9644b18af18d 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1135,6 +1135,28 @@ void xas_split(struct xa_state *xas, void *entry, unsigned int order)
EXPORT_SYMBOL_GPL(xas_split);
/**
+ * xas_try_split_min_order() - Minimal split order xas_try_split() can accept
+ * @order: Current entry order.
+ *
+ * xas_try_split() can split a multi-index entry to smaller than @order - 1 if
+ * no new xa_node is needed. This function provides the minimal order
+ * xas_try_split() supports.
+ *
+ * Return: the minimal order xas_try_split() supports
+ *
+ * Context: Any context.
+ *
+ */
+unsigned int xas_try_split_min_order(unsigned int order)
+{
+ if (order % XA_CHUNK_SHIFT == 0)
+ return order == 0 ? 0 : order - 1;
+
+ return order - (order % XA_CHUNK_SHIFT);
+}
+EXPORT_SYMBOL_GPL(xas_try_split_min_order);
+
+/**
* xas_try_split() - Try to split a multi-index entry.
* @xas: XArray operation state.
* @entry: New entry to store in the array.
@@ -1145,6 +1167,9 @@ EXPORT_SYMBOL_GPL(xas_split);
* needed, the function will use GFP_NOWAIT to get one if xas->xa_alloc is
* NULL. If more new xa_node are needed, the function gives EINVAL error.
*
+ * NOTE: use xas_try_split_min_order() to get next split order instead of
+ * @order - 1 if you want to minmize xas_try_split() calls.
+ *
* Context: Any context. The caller should hold the xa_lock.
*/
void xas_try_split(struct xa_state *xas, void *entry, unsigned int order)