aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHuang Ying <[email protected]>2023-10-16 05:29:55 +0000
committerAndrew Morton <[email protected]>2023-10-25 23:47:10 +0000
commit94a3bfe4073cd88b05f7fb201ea7bf9dfa2cf5d5 (patch)
treef854d0bb36c9e9aa90d8c6c069ccb5244cada582
parentmm, pcp: avoid to drain PCP when process exit (diff)
downloadkernel-94a3bfe4073cd88b05f7fb201ea7bf9dfa2cf5d5.tar.gz
kernel-94a3bfe4073cd88b05f7fb201ea7bf9dfa2cf5d5.zip
cacheinfo: calculate size of per-CPU data cache slice
This can be used to estimate the size of the data cache slice that can be used by one CPU under ideal circumstances. Both DATA caches and UNIFIED caches are used in calculation. So, the users need to consider the impact of the code cache usage. Because the cache inclusive/non-inclusive information isn't available now, we just use the size of the per-CPU slice of LLC to make the result more predictable across architectures. This may be improved when more cache information is available in the future. A brute-force algorithm to iterate all online CPUs is used to avoid to allocate an extra cpumask, especially in offline callback. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: "Huang, Ying" <[email protected]> Acked-by: Mel Gorman <[email protected]> Cc: Sudeep Holla <[email protected]> Cc: Vlastimil Babka <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Pavel Tatashin <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Christoph Lameter <[email protected]> Cc: Arjan van de Ven <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
-rw-r--r--drivers/base/cacheinfo.c49
-rw-r--r--include/linux/cacheinfo.h1
2 files changed, 49 insertions, 1 deletions
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index cbae8be1fe52..585c66fce9d9 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -898,6 +898,48 @@ err:
return rc;
}
+/*
+ * Calculate the size of the per-CPU data cache slice. This can be
+ * used to estimate the size of the data cache slice that can be used
+ * by one CPU under ideal circumstances. UNIFIED caches are counted
+ * in addition to DATA caches. So, please consider code cache usage
+ * when use the result.
+ *
+ * Because the cache inclusive/non-inclusive information isn't
+ * available, we just use the size of the per-CPU slice of LLC to make
+ * the result more predictable across architectures.
+ */
+static void update_per_cpu_data_slice_size_cpu(unsigned int cpu)
+{
+ struct cpu_cacheinfo *ci;
+ struct cacheinfo *llc;
+ unsigned int nr_shared;
+
+ if (!last_level_cache_is_valid(cpu))
+ return;
+
+ ci = ci_cacheinfo(cpu);
+ llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
+
+ if (llc->type != CACHE_TYPE_DATA && llc->type != CACHE_TYPE_UNIFIED)
+ return;
+
+ nr_shared = cpumask_weight(&llc->shared_cpu_map);
+ if (nr_shared)
+ ci->per_cpu_data_slice_size = llc->size / nr_shared;
+}
+
+static void update_per_cpu_data_slice_size(bool cpu_online, unsigned int cpu)
+{
+ unsigned int icpu;
+
+ for_each_online_cpu(icpu) {
+ if (!cpu_online && icpu == cpu)
+ continue;
+ update_per_cpu_data_slice_size_cpu(icpu);
+ }
+}
+
static int cacheinfo_cpu_online(unsigned int cpu)
{
int rc = detect_cache_attributes(cpu);
@@ -906,7 +948,11 @@ static int cacheinfo_cpu_online(unsigned int cpu)
return rc;
rc = cache_add_dev(cpu);
if (rc)
- free_cache_attributes(cpu);
+ goto err;
+ update_per_cpu_data_slice_size(true, cpu);
+ return 0;
+err:
+ free_cache_attributes(cpu);
return rc;
}
@@ -916,6 +962,7 @@ static int cacheinfo_cpu_pre_down(unsigned int cpu)
cpu_cache_sysfs_exit(cpu);
free_cache_attributes(cpu);
+ update_per_cpu_data_slice_size(false, cpu);
return 0;
}
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index a5cfd44fab45..d504eb4b49ab 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -73,6 +73,7 @@ struct cacheinfo {
struct cpu_cacheinfo {
struct cacheinfo *info_list;
+ unsigned int per_cpu_data_slice_size;
unsigned int num_levels;
unsigned int num_leaves;
bool cpu_map_populated;