diff options
| author | Linus Torvalds <[email protected]> | 2025-06-02 23:00:26 +0000 |
|---|---|---|
| committer | Linus Torvalds <[email protected]> | 2025-06-02 23:00:26 +0000 |
| commit | fd1f8473503e5bf897bd3e8efe3545c0352954e6 (patch) | |
| tree | bd9f699a23c0093dd55be8cac76d4329837654d0 /mm/memcontrol.c | |
| parent | Merge tag 'gfs2-for-6.16-fix' of git://git.kernel.org/pub/scm/linux/kernel/gi... (diff) | |
| parent | mm/khugepaged: clean up refcount check using folio_expected_ref_count() (diff) | |
| download | kernel-fd1f8473503e5bf897bd3e8efe3545c0352954e6.tar.gz kernel-fd1f8473503e5bf897bd3e8efe3545c0352954e6.zip | |
Merge tag 'mm-stable-2025-06-01-14-06' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull more MM updates from Andrew Morton:
- "zram: support algorithm-specific parameters" from Sergey Senozhatsky
adds infrastructure for passing algorithm-specific parameters into
zram. A single parameter `winbits' is implemented at this time.
- "memcg: nmi-safe kmem charging" from Shakeel Butt makes memcg
charging nmi-safe, which is required by BFP, which can operate in NMI
context.
- "Some random fixes and cleanup to shmem" from Kemeng Shi implements
small fixes and cleanups in the shmem code.
- "Skip mm selftests instead when kernel features are not present" from
Zi Yan fixes some issues in the MM selftest code.
- "mm/damon: build-enable essential DAMON components by default" from
SeongJae Park reworks DAMON Kconfig to make it easier to enable
CONFIG_DAMON.
- "sched/numa: add statistics of numa balance task migration" from Libo
Chen adds more info into sysfs and procfs files to improve visibility
into the NUMA balancer's task migration activity.
- "selftests/mm: cow and gup_longterm cleanups" from Mark Brown
provides various updates to some of the MM selftests to make them
play better with the overall containing framework.
* tag 'mm-stable-2025-06-01-14-06' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (43 commits)
mm/khugepaged: clean up refcount check using folio_expected_ref_count()
selftests/mm: fix test result reporting in gup_longterm
selftests/mm: report unique test names for each cow test
selftests/mm: add helper for logging test start and results
selftests/mm: use standard ksft_finished() in cow and gup_longterm
selftests/damon/_damon_sysfs: skip testcases if CONFIG_DAMON_SYSFS is disabled
sched/numa: add statistics of numa balance task
sched/numa: fix task swap by skipping kernel threads
tools/testing: check correct variable in open_procmap()
tools/testing/vma: add missing function stub
mm/gup: update comment explaining why gup_fast() disables IRQs
selftests/mm: two fixes for the pfnmap test
mm/khugepaged: fix race with folio split/free using temporary reference
mm: add CONFIG_PAGE_BLOCK_ORDER to select page block order
mmu_notifiers: remove leftover stub macros
selftests/mm: deduplicate test names in madv_populate
kcov: rust: add flags for KCOV with Rust
mm: rust: make CONFIG_MMU ifdefs more narrow
mmu_gather: move tlb flush for VM_PFNMAP/VM_MIXEDMAP vmas into free_pgtables()
mm/damon/Kconfig: enable CONFIG_DAMON by default
...
Diffstat (limited to 'mm/memcontrol.c')
| -rw-r--r-- | mm/memcontrol.c | 127 |
1 files changed, 115 insertions, 12 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b90aa3075950..902da8a9c643 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -474,6 +474,8 @@ static const unsigned int memcg_vm_event_stat[] = { NUMA_PAGE_MIGRATE, NUMA_PTE_UPDATES, NUMA_HINT_FAULTS, + NUMA_TASK_MIGRATE, + NUMA_TASK_SWAP, #endif }; @@ -531,7 +533,7 @@ struct memcg_vmstats { unsigned long events_pending[NR_MEMCG_EVENTS]; /* Stats updates since the last flush */ - atomic64_t stats_updates; + atomic_t stats_updates; }; /* @@ -557,7 +559,7 @@ static u64 flush_last_time; static bool memcg_vmstats_needs_flush(struct memcg_vmstats *vmstats) { - return atomic64_read(&vmstats->stats_updates) > + return atomic_read(&vmstats->stats_updates) > MEMCG_CHARGE_BATCH * num_online_cpus(); } @@ -571,7 +573,9 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val, if (!val) return; - css_rstat_updated(&memcg->css, cpu); + /* TODO: add to cgroup update tree once it is nmi-safe. */ + if (!in_nmi()) + css_rstat_updated(&memcg->css, cpu); statc_pcpu = memcg->vmstats_percpu; for (; statc_pcpu; statc_pcpu = statc->parent_pcpu) { statc = this_cpu_ptr(statc_pcpu); @@ -589,7 +593,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val, continue; stats_updates = this_cpu_xchg(statc_pcpu->stats_updates, 0); - atomic64_add(stats_updates, &statc->vmstats->stats_updates); + atomic_add(stats_updates, &statc->vmstats->stats_updates); } } @@ -597,7 +601,7 @@ static void __mem_cgroup_flush_stats(struct mem_cgroup *memcg, bool force) { bool needs_flush = memcg_vmstats_needs_flush(memcg->vmstats); - trace_memcg_flush_stats(memcg, atomic64_read(&memcg->vmstats->stats_updates), + trace_memcg_flush_stats(memcg, atomic_read(&memcg->vmstats->stats_updates), force, needs_flush); if (!force && !needs_flush) @@ -2513,17 +2517,47 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg) folio->memcg_data = (unsigned long)memcg; } +#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC +static inline void account_slab_nmi_safe(struct mem_cgroup *memcg, + struct pglist_data *pgdat, + enum node_stat_item idx, int nr) +{ + struct lruvec *lruvec; + + if (likely(!in_nmi())) { + lruvec = mem_cgroup_lruvec(memcg, pgdat); + mod_memcg_lruvec_state(lruvec, idx, nr); + } else { + struct mem_cgroup_per_node *pn = memcg->nodeinfo[pgdat->node_id]; + + /* TODO: add to cgroup update tree once it is nmi-safe. */ + if (idx == NR_SLAB_RECLAIMABLE_B) + atomic_add(nr, &pn->slab_reclaimable); + else + atomic_add(nr, &pn->slab_unreclaimable); + } +} +#else +static inline void account_slab_nmi_safe(struct mem_cgroup *memcg, + struct pglist_data *pgdat, + enum node_stat_item idx, int nr) +{ + struct lruvec *lruvec; + + lruvec = mem_cgroup_lruvec(memcg, pgdat); + mod_memcg_lruvec_state(lruvec, idx, nr); +} +#endif + static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, struct pglist_data *pgdat, enum node_stat_item idx, int nr) { struct mem_cgroup *memcg; - struct lruvec *lruvec; rcu_read_lock(); memcg = obj_cgroup_memcg(objcg); - lruvec = mem_cgroup_lruvec(memcg, pgdat); - mod_memcg_lruvec_state(lruvec, idx, nr); + account_slab_nmi_safe(memcg, pgdat, idx, nr); rcu_read_unlock(); } @@ -2648,6 +2682,9 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void) struct mem_cgroup *memcg; struct obj_cgroup *objcg; + if (IS_ENABLED(CONFIG_MEMCG_NMI_UNSAFE) && in_nmi()) + return NULL; + if (in_task()) { memcg = current->active_memcg; if (unlikely(memcg)) @@ -2710,6 +2747,23 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio) return objcg; } +#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC +static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val) +{ + if (likely(!in_nmi())) { + mod_memcg_state(memcg, MEMCG_KMEM, val); + } else { + /* TODO: add to cgroup update tree once it is nmi-safe. */ + atomic_add(val, &memcg->kmem_stat); + } +} +#else +static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val) +{ + mod_memcg_state(memcg, MEMCG_KMEM, val); +} +#endif + /* * obj_cgroup_uncharge_pages: uncharge a number of kernel pages from a objcg * @objcg: object cgroup to uncharge @@ -2722,7 +2776,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg, memcg = get_mem_cgroup_from_objcg(objcg); - mod_memcg_state(memcg, MEMCG_KMEM, -nr_pages); + account_kmem_nmi_safe(memcg, -nr_pages); memcg1_account_kmem(memcg, -nr_pages); if (!mem_cgroup_is_root(memcg)) refill_stock(memcg, nr_pages); @@ -2750,7 +2804,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp, if (ret) goto out; - mod_memcg_state(memcg, MEMCG_KMEM, nr_pages); + account_kmem_nmi_safe(memcg, nr_pages); memcg1_account_kmem(memcg, nr_pages); out: css_put(&memcg->css); @@ -3961,6 +4015,53 @@ static void mem_cgroup_stat_aggregate(struct aggregate_control *ac) } } +#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC +static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent, + int cpu) +{ + int nid; + + if (atomic_read(&memcg->kmem_stat)) { + int kmem = atomic_xchg(&memcg->kmem_stat, 0); + int index = memcg_stats_index(MEMCG_KMEM); + + memcg->vmstats->state[index] += kmem; + if (parent) + parent->vmstats->state_pending[index] += kmem; + } + + for_each_node_state(nid, N_MEMORY) { + struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid]; + struct lruvec_stats *lstats = pn->lruvec_stats; + struct lruvec_stats *plstats = NULL; + + if (parent) + plstats = parent->nodeinfo[nid]->lruvec_stats; + + if (atomic_read(&pn->slab_reclaimable)) { + int slab = atomic_xchg(&pn->slab_reclaimable, 0); + int index = memcg_stats_index(NR_SLAB_RECLAIMABLE_B); + + lstats->state[index] += slab; + if (plstats) + plstats->state_pending[index] += slab; + } + if (atomic_read(&pn->slab_unreclaimable)) { + int slab = atomic_xchg(&pn->slab_unreclaimable, 0); + int index = memcg_stats_index(NR_SLAB_UNRECLAIMABLE_B); + + lstats->state[index] += slab; + if (plstats) + plstats->state_pending[index] += slab; + } + } +} +#else +static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent, + int cpu) +{} +#endif + static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); @@ -3969,6 +4070,8 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) struct aggregate_control ac; int nid; + flush_nmi_stats(memcg, parent, cpu); + statc = per_cpu_ptr(memcg->vmstats_percpu, cpu); ac = (struct aggregate_control) { @@ -4018,8 +4121,8 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) } WRITE_ONCE(statc->stats_updates, 0); /* We are in a per-cpu loop here, only do the atomic write once */ - if (atomic64_read(&memcg->vmstats->stats_updates)) - atomic64_set(&memcg->vmstats->stats_updates, 0); + if (atomic_read(&memcg->vmstats->stats_updates)) + atomic_set(&memcg->vmstats->stats_updates, 0); } static void mem_cgroup_fork(struct task_struct *task) |
