diff options
| -rw-r--r-- | fs/proc/internal.h | 5 | ||||
| -rw-r--r-- | fs/proc/task_mmu.c | 141 | ||||
| -rw-r--r-- | include/linux/mmap_lock.h | 11 | ||||
| -rw-r--r-- | mm/madvise.c | 3 | ||||
| -rw-r--r-- | mm/mmap_lock.c | 93 |
5 files changed, 244 insertions, 9 deletions
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 3d48ffe72583..7c235451c5ea 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -384,6 +384,11 @@ struct proc_maps_private { struct task_struct *task; struct mm_struct *mm; struct vma_iterator iter; + loff_t last_pos; +#ifdef CONFIG_PER_VMA_LOCK + bool mmap_locked; + struct vm_area_struct *locked_vma; +#endif #ifdef CONFIG_NUMA struct mempolicy *task_mempolicy; #endif diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 90237df1ed33..3d6d8a9f13fc 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -130,13 +130,132 @@ static void release_task_mempolicy(struct proc_maps_private *priv) } #endif -static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv, - loff_t *ppos) +#ifdef CONFIG_PER_VMA_LOCK + +static void unlock_vma(struct proc_maps_private *priv) +{ + if (priv->locked_vma) { + vma_end_read(priv->locked_vma); + priv->locked_vma = NULL; + } +} + +static const struct seq_operations proc_pid_maps_op; + +static inline bool lock_vma_range(struct seq_file *m, + struct proc_maps_private *priv) +{ + /* + * smaps and numa_maps perform page table walk, therefore require + * mmap_lock but maps can be read with locking just the vma and + * walking the vma tree under rcu read protection. + */ + if (m->op != &proc_pid_maps_op) { + if (mmap_read_lock_killable(priv->mm)) + return false; + + priv->mmap_locked = true; + } else { + rcu_read_lock(); + priv->locked_vma = NULL; + priv->mmap_locked = false; + } + + return true; +} + +static inline void unlock_vma_range(struct proc_maps_private *priv) +{ + if (priv->mmap_locked) { + mmap_read_unlock(priv->mm); + } else { + unlock_vma(priv); + rcu_read_unlock(); + } +} + +static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv, + loff_t last_pos) +{ + struct vm_area_struct *vma; + + if (priv->mmap_locked) + return vma_next(&priv->iter); + + unlock_vma(priv); + vma = lock_next_vma(priv->mm, &priv->iter, last_pos); + if (!IS_ERR_OR_NULL(vma)) + priv->locked_vma = vma; + + return vma; +} + +static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv, + loff_t pos) { - struct vm_area_struct *vma = vma_next(&priv->iter); + if (priv->mmap_locked) + return false; + + rcu_read_unlock(); + mmap_read_lock(priv->mm); + /* Reinitialize the iterator after taking mmap_lock */ + vma_iter_set(&priv->iter, pos); + priv->mmap_locked = true; + return true; +} + +#else /* CONFIG_PER_VMA_LOCK */ + +static inline bool lock_vma_range(struct seq_file *m, + struct proc_maps_private *priv) +{ + return mmap_read_lock_killable(priv->mm) == 0; +} + +static inline void unlock_vma_range(struct proc_maps_private *priv) +{ + mmap_read_unlock(priv->mm); +} + +static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv, + loff_t last_pos) +{ + return vma_next(&priv->iter); +} + +static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv, + loff_t pos) +{ + return false; +} + +#endif /* CONFIG_PER_VMA_LOCK */ + +static struct vm_area_struct *proc_get_vma(struct seq_file *m, loff_t *ppos) +{ + struct proc_maps_private *priv = m->private; + struct vm_area_struct *vma; + +retry: + vma = get_next_vma(priv, *ppos); + /* EINTR of EAGAIN is possible */ + if (IS_ERR(vma)) { + if (PTR_ERR(vma) == -EAGAIN && fallback_to_mmap_lock(priv, *ppos)) + goto retry; + + return vma; + } + + /* Store previous position to be able to restart if needed */ + priv->last_pos = *ppos; if (vma) { - *ppos = vma->vm_start; + /* + * Track the end of the reported vma to ensure position changes + * even if previous vma was merged with the next vma and we + * found the extended vma with the same vm_start. + */ + *ppos = vma->vm_end; } else { *ppos = SENTINEL_VMA_GATE; vma = get_gate_vma(priv->mm); @@ -166,19 +285,25 @@ static void *m_start(struct seq_file *m, loff_t *ppos) return NULL; } - if (mmap_read_lock_killable(mm)) { + if (!lock_vma_range(m, priv)) { mmput(mm); put_task_struct(priv->task); priv->task = NULL; return ERR_PTR(-EINTR); } + /* + * Reset current position if last_addr was set before + * and it's not a sentinel. + */ + if (last_addr > 0) + *ppos = last_addr = priv->last_pos; vma_iter_init(&priv->iter, mm, (unsigned long)last_addr); hold_task_mempolicy(priv); if (last_addr == SENTINEL_VMA_GATE) return get_gate_vma(mm); - return proc_get_vma(priv, ppos); + return proc_get_vma(m, ppos); } static void *m_next(struct seq_file *m, void *v, loff_t *ppos) @@ -187,7 +312,7 @@ static void *m_next(struct seq_file *m, void *v, loff_t *ppos) *ppos = SENTINEL_VMA_END; return NULL; } - return proc_get_vma(m->private, ppos); + return proc_get_vma(m, ppos); } static void m_stop(struct seq_file *m, void *v) @@ -199,7 +324,7 @@ static void m_stop(struct seq_file *m, void *v) return; release_task_mempolicy(priv); - mmap_read_unlock(mm); + unlock_vma_range(priv); mmput(mm); put_task_struct(priv->task); priv->task = NULL; diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 5da384bd0a26..1f4f44951abe 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -309,6 +309,17 @@ void vma_mark_detached(struct vm_area_struct *vma); struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, unsigned long address); +/* + * Locks next vma pointed by the iterator. Confirms the locked vma has not + * been modified and will retry under mmap_lock protection if modification + * was detected. Should be called from read RCU section. + * Returns either a valid locked VMA, NULL if no more VMAs or -EINTR if the + * process was interrupted. + */ +struct vm_area_struct *lock_next_vma(struct mm_struct *mm, + struct vma_iterator *iter, + unsigned long address); + #else /* CONFIG_PER_VMA_LOCK */ static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} diff --git a/mm/madvise.c b/mm/madvise.c index 2bf80989d5b6..bb80fc5ea08f 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -108,7 +108,8 @@ void anon_vma_name_free(struct kref *kref) struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) { - mmap_assert_locked(vma->vm_mm); + if (!rwsem_is_locked(&vma->vm_mm->mmap_lock)) + vma_assert_locked(vma); return vma->anon_name; } diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c index 5f725cc67334..729fb7d0dd59 100644 --- a/mm/mmap_lock.c +++ b/mm/mmap_lock.c @@ -178,6 +178,99 @@ inval: count_vm_vma_lock_event(VMA_LOCK_ABORT); return NULL; } + +static struct vm_area_struct *lock_next_vma_under_mmap_lock(struct mm_struct *mm, + struct vma_iterator *vmi, + unsigned long from_addr) +{ + struct vm_area_struct *vma; + int ret; + + ret = mmap_read_lock_killable(mm); + if (ret) + return ERR_PTR(ret); + + /* Lookup the vma at the last position again under mmap_read_lock */ + vma_iter_set(vmi, from_addr); + vma = vma_next(vmi); + if (vma) { + /* Very unlikely vma->vm_refcnt overflow case */ + if (unlikely(!vma_start_read_locked(vma))) + vma = ERR_PTR(-EAGAIN); + } + + mmap_read_unlock(mm); + + return vma; +} + +struct vm_area_struct *lock_next_vma(struct mm_struct *mm, + struct vma_iterator *vmi, + unsigned long from_addr) +{ + struct vm_area_struct *vma; + unsigned int mm_wr_seq; + bool mmap_unlocked; + + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu read lock held"); +retry: + /* Start mmap_lock speculation in case we need to verify the vma later */ + mmap_unlocked = mmap_lock_speculate_try_begin(mm, &mm_wr_seq); + vma = vma_next(vmi); + if (!vma) + return NULL; + + vma = vma_start_read(mm, vma); + if (IS_ERR_OR_NULL(vma)) { + /* + * Retry immediately if the vma gets detached from under us. + * Infinite loop should not happen because the vma we find will + * have to be constantly knocked out from under us. + */ + if (PTR_ERR(vma) == -EAGAIN) { + /* reset to search from the last address */ + vma_iter_set(vmi, from_addr); + goto retry; + } + + goto fallback; + } + + /* + * Verify the vma we locked belongs to the same address space and it's + * not behind of the last search position. + */ + if (unlikely(vma->vm_mm != mm || from_addr >= vma->vm_end)) + goto fallback_unlock; + + /* + * vma can be ahead of the last search position but we need to verify + * it was not shrunk after we found it and another vma has not been + * installed ahead of it. Otherwise we might observe a gap that should + * not be there. + */ + if (from_addr < vma->vm_start) { + /* Verify only if the address space might have changed since vma lookup. */ + if (!mmap_unlocked || mmap_lock_speculate_retry(mm, mm_wr_seq)) { + vma_iter_set(vmi, from_addr); + if (vma != vma_next(vmi)) + goto fallback_unlock; + } + } + + return vma; + +fallback_unlock: + vma_end_read(vma); +fallback: + rcu_read_unlock(); + vma = lock_next_vma_under_mmap_lock(mm, vmi, from_addr); + rcu_read_lock(); + /* Reinitialize the iterator after re-entering rcu read section */ + vma_iter_set(vmi, IS_ERR_OR_NULL(vma) ? from_addr : vma->vm_end); + + return vma; +} #endif /* CONFIG_PER_VMA_LOCK */ #ifdef CONFIG_LOCK_MM_AND_FIND_VMA |
