diff options
Diffstat (limited to 'mm/filemap.c')
| -rw-r--r-- | mm/filemap.c | 107 |
1 files changed, 105 insertions, 2 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 33b60d448fca..b8ed647416e9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -47,6 +47,7 @@ #include <linux/splice.h> #include <linux/rcupdate_wait.h> #include <linux/sched/mm.h> +#include <linux/fsnotify.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include "internal.h" @@ -1523,7 +1524,7 @@ void folio_end_read(struct folio *folio, bool success) /* Must be in bottom byte for x86 to work */ BUILD_BUG_ON(PG_uptodate > 7); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - VM_BUG_ON_FOLIO(folio_test_uptodate(folio), folio); + VM_BUG_ON_FOLIO(success && folio_test_uptodate(folio), folio); if (likely(success)) mask |= 1 << PG_uptodate; @@ -2996,7 +2997,7 @@ static inline loff_t folio_seek_hole_data(struct xa_state *xas, if (ops->is_partially_uptodate(folio, offset, bsz) == seek_data) break; - start = (start + bsz) & ~(bsz - 1); + start = (start + bsz) & ~((u64)bsz - 1); offset += bsz; } while (offset < folio_size(folio)); unlock: @@ -3141,6 +3142,14 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) unsigned long vm_flags = vmf->vma->vm_flags; unsigned int mmap_miss; + /* + * If we have pre-content watches we need to disable readahead to make + * sure that we don't populate our mapping with 0 filled pages that we + * never emitted an event for. + */ + if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) + return fpin; + #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* Use the readahead code, even if readahead is disabled */ if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) { @@ -3209,6 +3218,10 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, struct file *fpin = NULL; unsigned int mmap_miss; + /* See comment in do_sync_mmap_readahead. */ + if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) + return fpin; + /* If we don't want any read-ahead, don't bother */ if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages) return fpin; @@ -3268,6 +3281,48 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf) } /** + * filemap_fsnotify_fault - maybe emit a pre-content event. + * @vmf: struct vm_fault containing details of the fault. + * + * If we have a pre-content watch on this file we will emit an event for this + * range. If we return anything the fault caller should return immediately, we + * will return VM_FAULT_RETRY if we had to emit an event, which will trigger the + * fault again and then the fault handler will run the second time through. + * + * Return: a bitwise-OR of %VM_FAULT_ codes, 0 if nothing happened. + */ +vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf) +{ + struct file *fpin = NULL; + int mask = (vmf->flags & FAULT_FLAG_WRITE) ? MAY_WRITE : MAY_ACCESS; + loff_t pos = vmf->pgoff >> PAGE_SHIFT; + size_t count = PAGE_SIZE; + int err; + + /* + * We already did this and now we're retrying with everything locked, + * don't emit the event and continue. + */ + if (vmf->flags & FAULT_FLAG_TRIED) + return 0; + + /* No watches, we're done. */ + if (likely(!FMODE_FSNOTIFY_HSM(vmf->vma->vm_file->f_mode))) + return 0; + + fpin = maybe_unlock_mmap_for_io(vmf, fpin); + if (!fpin) + return VM_FAULT_SIGBUS; + + err = fsnotify_file_area_perm(fpin, mask, &pos, count); + fput(fpin); + if (err) + return VM_FAULT_SIGBUS; + return VM_FAULT_RETRY; +} +EXPORT_SYMBOL_GPL(filemap_fsnotify_fault); + +/** * filemap_fault - read in file data for page fault handling * @vmf: struct vm_fault containing details of the fault * @@ -3371,6 +3426,37 @@ retry_find: */ if (unlikely(!folio_test_uptodate(folio))) { /* + * If this is a precontent file we have can now emit an event to + * try and populate the folio. + */ + if (!(vmf->flags & FAULT_FLAG_TRIED) && + unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) { + loff_t pos = folio_pos(folio); + size_t count = folio_size(folio); + + /* We're NOWAIT, we have to retry. */ + if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) { + folio_unlock(folio); + goto out_retry; + } + + if (mapping_locked) + filemap_invalidate_unlock_shared(mapping); + mapping_locked = false; + + folio_unlock(folio); + fpin = maybe_unlock_mmap_for_io(vmf, fpin); + if (!fpin) + goto out_retry; + + error = fsnotify_file_area_perm(fpin, MAY_ACCESS, &pos, + count); + if (error) + ret = VM_FAULT_SIGBUS; + goto out_retry; + } + + /* * If the invalidate lock is not held, the folio was in cache * and uptodate and now it is not. Strange but possible since we * didn't hold the page lock all the time. Let's drop @@ -4376,6 +4462,20 @@ resched: } /* + * See mincore: reveal pagecache information only for files + * that the calling process has write access to, or could (if + * tried) open for writing. + */ +static inline bool can_do_cachestat(struct file *f) +{ + if (f->f_mode & FMODE_WRITE) + return true; + if (inode_owner_or_capable(file_mnt_idmap(f), file_inode(f))) + return true; + return file_permission(f, MAY_WRITE) == 0; +} + +/* * The cachestat(2) system call. * * cachestat() returns the page cache statistics of a file in the @@ -4430,6 +4530,9 @@ SYSCALL_DEFINE4(cachestat, unsigned int, fd, if (is_file_hugepages(fd_file(f))) return -EOPNOTSUPP; + if (!can_do_cachestat(fd_file(f))) + return -EPERM; + if (flags != 0) return -EINVAL; |
