From d919b33dafb3e222d23671b2bb06d119aede625f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Apr 2020 20:09:01 -0700 Subject: proc: faster open/read/close with "permanent" files Now that "struct proc_ops" exist we can start putting there stuff which could not fly with VFS "struct file_operations"... Most of fs/proc/inode.c file is dedicated to make open/read/.../close reliable in the event of disappearing /proc entries which usually happens if module is getting removed. Files like /proc/cpuinfo which never disappear simply do not need such protection. Save 2 atomic ops, 1 allocation, 1 free per open/read/close sequence for such "permanent" files. Enable "permanent" flag for /proc/cpuinfo /proc/kmsg /proc/modules /proc/slabinfo /proc/stat /proc/sysvipc/* /proc/swaps More will come once I figure out foolproof way to prevent out module authors from marking their stuff "permanent" for performance reasons when it is not. This should help with scalability: benchmark is "read /proc/cpuinfo R times by N threads scattered over the system". N R t, s (before) t, s (after) ----------------------------------------------------- 64 4096 1.582458 1.530502 -3.2% 256 4096 6.371926 6.125168 -3.9% 1024 4096 25.64888 24.47528 -4.6% Benchmark source: #include #include #include #include #include #include #include #include const int NR_CPUS = sysconf(_SC_NPROCESSORS_ONLN); int N; const char *filename; int R; int xxx = 0; int glue(int n) { cpu_set_t m; CPU_ZERO(&m); CPU_SET(n, &m); return sched_setaffinity(0, sizeof(cpu_set_t), &m); } void f(int n) { glue(n % NR_CPUS); while (*(volatile int *)&xxx == 0) { } for (int i = 0; i < R; i++) { int fd = open(filename, O_RDONLY); char buf[4096]; ssize_t rv = read(fd, buf, sizeof(buf)); asm volatile ("" :: "g" (rv)); close(fd); } } int main(int argc, char *argv[]) { if (argc < 4) { std::cerr << "usage: " << argv[0] << ' ' << "N /proc/filename R "; return 1; } N = atoi(argv[1]); filename = argv[2]; R = atoi(argv[3]); for (int i = 0; i < NR_CPUS; i++) { if (glue(i) == 0) break; } std::vector T; T.reserve(N); for (int i = 0; i < N; i++) { T.emplace_back(f, i); } auto t0 = std::chrono::system_clock::now(); { *(volatile int *)&xxx = 1; for (auto& t: T) { t.join(); } } auto t1 = std::chrono::system_clock::now(); std::chrono::duration dt = t1 - t0; std::cout << dt.count() << ' '; return 0; } P.S.: Explicit randomization marker is added because adding non-function pointer will silently disable structure layout randomization. [akpm@linux-foundation.org: coding style fixes] Reported-by: kbuild test robot Reported-by: Dan Carpenter Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Cc: Al Viro Cc: Joe Perches Link: http://lkml.kernel.org/r/20200222201539.GA22576@avx2 Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) (limited to 'fs/proc/generic.c') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 3faed94e4b65..4ed6dabdf6ff 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -531,6 +531,12 @@ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, return p; } +static inline void pde_set_flags(struct proc_dir_entry *pde) +{ + if (pde->proc_ops->proc_flags & PROC_ENTRY_PERMANENT) + pde->flags |= PROC_ENTRY_PERMANENT; +} + struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct proc_ops *proc_ops, void *data) @@ -541,6 +547,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, if (!p) return NULL; p->proc_ops = proc_ops; + pde_set_flags(p); return proc_register(parent, p); } EXPORT_SYMBOL(proc_create_data); @@ -572,6 +579,7 @@ static int proc_seq_release(struct inode *inode, struct file *file) } static const struct proc_ops proc_seq_ops = { + /* not permanent -- can call into arbitrary seq_operations */ .proc_open = proc_seq_open, .proc_read = seq_read, .proc_lseek = seq_lseek, @@ -602,6 +610,7 @@ static int proc_single_open(struct inode *inode, struct file *file) } static const struct proc_ops proc_single_ops = { + /* not permanent -- can call into arbitrary ->single_show */ .proc_open = proc_single_open, .proc_read = seq_read, .proc_lseek = seq_lseek, @@ -662,9 +671,13 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) de = pde_subdir_find(parent, fn, len); if (de) { - rb_erase(&de->subdir_node, &parent->subdir); - if (S_ISDIR(de->mode)) { - parent->nlink--; + if (unlikely(pde_is_permanent(de))) { + WARN(1, "removing permanent /proc entry '%s'", de->name); + de = NULL; + } else { + rb_erase(&de->subdir_node, &parent->subdir); + if (S_ISDIR(de->mode)) + parent->nlink--; } } write_unlock(&proc_subdir_lock); @@ -700,12 +713,24 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) write_unlock(&proc_subdir_lock); return -ENOENT; } + if (unlikely(pde_is_permanent(root))) { + write_unlock(&proc_subdir_lock); + WARN(1, "removing permanent /proc entry '%s/%s'", + root->parent->name, root->name); + return -EINVAL; + } rb_erase(&root->subdir_node, &parent->subdir); de = root; while (1) { next = pde_subdir_first(de); if (next) { + if (unlikely(pde_is_permanent(root))) { + write_unlock(&proc_subdir_lock); + WARN(1, "removing permanent /proc entry '%s/%s'", + next->parent->name, next->name); + return -EINVAL; + } rb_erase(&next->subdir_node, &de->subdir); de = next; continue; -- cgit