aboutsummaryrefslogtreecommitdiffstats
path: root/tools/testing
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing')
-rw-r--r--tools/testing/cxl/test/mem.c21
-rw-r--r--tools/testing/selftests/Makefile9
-rw-r--r--tools/testing/selftests/arm64/abi/Makefile2
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c16
-rw-r--r--tools/testing/selftests/arm64/abi/tpidr2.c140
-rw-r--r--tools/testing/selftests/arm64/fp/fp-ptrace.c77
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c12
-rw-r--r--tools/testing/selftests/arm64/gcs/basic-gcs.c63
-rw-r--r--tools/testing/selftests/arm64/mte/check_buffer_fill.c12
-rw-r--r--tools/testing/selftests/arm64/mte/check_child_memory.c8
-rw-r--r--tools/testing/selftests/arm64/mte/check_hugetlb_options.c10
-rw-r--r--tools/testing/selftests/arm64/mte/check_ksm_options.c6
-rw-r--r--tools/testing/selftests/arm64/mte/check_mmap_options.c890
-rw-r--r--tools/testing/selftests/arm64/mte/check_prctl.c29
-rw-r--r--tools/testing/selftests/arm64/mte/check_tags_inclusion.c10
-rw-r--r--tools/testing/selftests/arm64/mte/check_user_mem.c4
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.c84
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.h9
-rw-r--r--tools/testing/selftests/arm64/mte/mte_def.h8
-rw-r--r--tools/testing/selftests/bpf/config3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c458
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c91
-rw-r--r--tools/testing/selftests/bpf/progs/sock_iter_batch.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_ktls.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ctx.c25
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c56
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h1
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c41
-rw-r--r--tools/testing/selftests/coredump/Makefile2
-rw-r--r--tools/testing/selftests/coredump/config3
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c1697
-rwxr-xr-xtools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh4
-rw-r--r--tools/testing/selftests/drivers/net/Makefile3
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile1
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py465
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devmem.py5
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/iou-zcrx.py98
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/__init__.py17
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c9
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_api.py476
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_input_xfrm.py8
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/tso.py101
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/__init__.py14
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py2
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/load.py25
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh165
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/napi_id.py4
-rw-r--r--tools/testing/selftests/drivers/net/napi_id_helper.c35
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_basic.sh55
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_cmdline.sh52
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_sysdata.sh30
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh55
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh23
-rwxr-xr-xtools/testing/selftests/drivers/net/netpoll_basic.py396
-rwxr-xr-xtools/testing/selftests/drivers/net/ping.py2
-rwxr-xr-xtools/testing/selftests/drivers/net/stats.py45
-rwxr-xr-xtools/testing/selftests/drivers/net/xdp.py658
-rw-r--r--tools/testing/selftests/filesystems/.gitignore1
-rw-r--r--tools/testing/selftests/filesystems/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/kernfs_test.c38
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc28
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc2
-rw-r--r--tools/testing/selftests/futex/functional/futex_priv_hash.c113
-rw-r--r--tools/testing/selftests/futex/include/futex2test.h8
-rw-r--r--tools/testing/selftests/futex/include/futextest.h11
-rw-r--r--tools/testing/selftests/hid/config.common1
-rw-r--r--tools/testing/selftests/ipc/msgque.c47
-rw-r--r--tools/testing/selftests/kexec/Makefile2
-rw-r--r--tools/testing/selftests/kvm/arm64/debug-exceptions.c4
-rw-r--r--tools/testing/selftests/landlock/audit.h7
-rw-r--r--tools/testing/selftests/landlock/audit_test.c1
-rw-r--r--tools/testing/selftests/landlock/fs_test.c40
-rw-r--r--tools/testing/selftests/lkdtm/config2
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c3
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile5
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/scm_inq.c125
-rw-r--r--tools/testing/selftests/net/af_unix/scm_pidfd.c217
-rw-r--r--tools/testing/selftests/net/bench/Makefile7
-rw-r--r--tools/testing/selftests/net/bench/page_pool/Makefile17
-rw-r--r--tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c267
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.c394
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.h238
-rwxr-xr-xtools/testing/selftests/net/bench/test_bench_page_pool.sh32
-rwxr-xr-xtools/testing/selftests/net/broadcast_pmtu.sh47
-rw-r--r--tools/testing/selftests/net/config11
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile1
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh69
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multicast.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh52
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh771
-rwxr-xr-xtools/testing/selftests/net/ipv6_force_forwarding.sh105
-rw-r--r--tools/testing/selftests/net/lib.sh35
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py2
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py7
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py39
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py5
-rw-r--r--tools/testing/selftests/net/lib/xdp_native.bpf.c621
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile3
-rw-r--r--tools/testing/selftests/net/mptcp/config2
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh5
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c24
-rwxr-xr-xtools/testing/selftests/net/msg_zerocopy.sh84
-rwxr-xr-xtools/testing/selftests/net/netdev-l2addr.sh59
-rw-r--r--tools/testing/selftests/net/netfilter/config7
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_clash.sh45
-rwxr-xr-xtools/testing/selftests/net/netfilter/ipvs.sh4
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_interface_stress.sh5
-rw-r--r--tools/testing/selftests/net/nettest.c12
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py127
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh4
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt45
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt44
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt33
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh92
-rwxr-xr-xtools/testing/selftests/net/rtnetlink_notification.sh112
-rwxr-xr-xtools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh2
-rwxr-xr-xtools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh50
-rwxr-xr-xtools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh2
-rwxr-xr-xtools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh2
-rw-r--r--tools/testing/selftests/net/tcp_ao/seq-ext.c2
-rwxr-xr-xtools/testing/selftests/net/test_neigh.sh366
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_vnifiltering.sh9
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh4
-rw-r--r--tools/testing/selftests/nolibc/Makefile343
-rw-r--r--tools/testing/selftests/nolibc/Makefile.include10
-rw-r--r--tools/testing/selftests/nolibc/Makefile.nolibc383
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c53
-rwxr-xr-xtools/testing/selftests/nolibc/run-tests.sh14
-rw-r--r--tools/testing/selftests/pidfd/.gitignore2
-rw-r--r--tools/testing/selftests/pidfd/Makefile5
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h15
-rw-r--r--tools/testing/selftests/pidfd/pidfd_file_handle_test.c60
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setattr_test.c69
-rw-r--r--tools/testing/selftests/pidfd/pidfd_xattr_test.c132
-rw-r--r--tools/testing/selftests/ptp/testptp.c11
-rw-r--r--tools/testing/selftests/ptrace/peeksiginfo.c2
-rw-r--r--tools/testing/selftests/sched_ext/exit.c8
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/sud_test.c140
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh2
-rw-r--r--tools/testing/selftests/tc-testing/config2
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json5
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json254
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json81
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json36
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh6
-rw-r--r--tools/testing/selftests/ublk/fault_inject.c15
-rw-r--r--tools/testing/selftests/ublk/file_backed.c32
-rw-r--r--tools/testing/selftests/ublk/kublk.c140
-rw-r--r--tools/testing/selftests/ublk/kublk.h135
-rw-r--r--tools/testing/selftests/ublk/null.c32
-rw-r--r--tools/testing/selftests/ublk/stripe.c33
-rw-r--r--tools/testing/selftests/ublk/utils.h70
-rw-r--r--tools/testing/selftests/vDSO/Makefile2
-rw-r--r--tools/testing/selftests/vDSO/vdso_config.h2
l---------[-rw-r--r--]tools/testing/selftests/vDSO/vdso_standalone_test_x86.c59
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_chacha.c3
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_clock_getres.c1
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_correctness.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getrandom.c10
-rw-r--r--tools/testing/selftests/vsock/.gitignore2
-rw-r--r--tools/testing/selftests/vsock/Makefile17
-rw-r--r--tools/testing/selftests/vsock/config111
-rw-r--r--tools/testing/selftests/vsock/settings1
-rwxr-xr-xtools/testing/selftests/vsock/vmtest.sh487
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config4
-rw-r--r--tools/testing/vma/vma_internal.h34
-rw-r--r--tools/testing/vsock/Makefile1
-rw-r--r--tools/testing/vsock/util.c112
-rw-r--r--tools/testing/vsock/util.h35
-rw-r--r--tools/testing/vsock/vsock_test.c353
180 files changed, 12810 insertions, 1699 deletions
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 0f1d91f57ba3..d533481672b7 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -1828,27 +1828,10 @@ static ssize_t fw_buf_checksum_show(struct device *dev,
{
struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
u8 hash[SHA256_DIGEST_SIZE];
- unsigned char *hstr, *hptr;
- struct sha256_state sctx;
- ssize_t written = 0;
- int i;
-
- sha256_init(&sctx);
- sha256_update(&sctx, mdata->fw, mdata->fw_size);
- sha256_final(&sctx, hash);
-
- hstr = kzalloc((SHA256_DIGEST_SIZE * 2) + 1, GFP_KERNEL);
- if (!hstr)
- return -ENOMEM;
-
- hptr = hstr;
- for (i = 0; i < SHA256_DIGEST_SIZE; i++)
- hptr += sprintf(hptr, "%02x", hash[i]);
- written = sysfs_emit(buf, "%s\n", hstr);
+ sha256(mdata->fw, mdata->fw_size, hash);
- kfree(hstr);
- return written;
+ return sysfs_emit(buf, "%*phN\n", SHA256_DIGEST_SIZE, hash);
}
static DEVICE_ATTR_RO(fw_buf_checksum);
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 339b31e6a6b5..030da61dbff3 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -77,6 +77,7 @@ TARGETS += net/ovpn
TARGETS += net/packetdrill
TARGETS += net/rds
TARGETS += net/tcp_ao
+TARGETS += nolibc
TARGETS += nsfs
TARGETS += pci_endpoint
TARGETS += pcie_bwctrl
@@ -293,6 +294,14 @@ ifdef INSTALL_PATH
$(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \
-C $$TARGET emit_tests >> $(TEST_LIST); \
done;
+ @VERSION=$$(git describe HEAD 2>/dev/null); \
+ if [ -n "$$VERSION" ]; then \
+ echo "$$VERSION" > $(INSTALL_PATH)/VERSION; \
+ printf "Version saved to $(INSTALL_PATH)/VERSION\n"; \
+ else \
+ printf "Unable to get version from git describe\n"; \
+ fi
+ @echo "**Kselftest Installation is complete: $(INSTALL_PATH)**"
else
$(error Error: set INSTALL_PATH to use install)
endif
diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile
index a6d30c620908..483488f8c2ad 100644
--- a/tools/testing/selftests/arm64/abi/Makefile
+++ b/tools/testing/selftests/arm64/abi/Makefile
@@ -12,4 +12,4 @@ $(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S
$(OUTPUT)/tpidr2: tpidr2.c
$(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
-static -include ../../../../include/nolibc/nolibc.h \
- -ffreestanding -Wall $^ -o $@ -lgcc
+ -I../.. -ffreestanding -Wall $^ -o $@ -lgcc
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 35f521e5f41c..002ec38a8bbb 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -21,6 +21,10 @@
#define TESTS_PER_HWCAP 3
+#ifndef AT_HWCAP3
+#define AT_HWCAP3 29
+#endif
+
/*
* Function expected to generate exception when the feature is not
* supported and return when it is supported. If the specific exception
@@ -1098,6 +1102,18 @@ static const struct hwcap_data {
.sigill_fn = hbc_sigill,
.sigill_reliable = true,
},
+ {
+ .name = "MTE_FAR",
+ .at_hwcap = AT_HWCAP3,
+ .hwcap_bit = HWCAP3_MTE_FAR,
+ .cpuinfo = "mtefar",
+ },
+ {
+ .name = "MTE_STOREONLY",
+ .at_hwcap = AT_HWCAP3,
+ .hwcap_bit = HWCAP3_MTE_STORE_ONLY,
+ .cpuinfo = "mtestoreonly",
+ },
};
typedef void (*sighandler_fn)(int, siginfo_t *, void *);
diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c
index eb19dcc37a75..f58a9f89b952 100644
--- a/tools/testing/selftests/arm64/abi/tpidr2.c
+++ b/tools/testing/selftests/arm64/abi/tpidr2.c
@@ -3,31 +3,12 @@
#include <linux/sched.h>
#include <linux/wait.h>
+#include "kselftest.h"
+
#define SYS_TPIDR2 "S3_3_C13_C0_5"
#define EXPECTED_TESTS 5
-static void putstr(const char *str)
-{
- write(1, str, strlen(str));
-}
-
-static void putnum(unsigned int num)
-{
- char c;
-
- if (num / 10)
- putnum(num / 10);
-
- c = '0' + (num % 10);
- write(1, &c, 1);
-}
-
-static int tests_run;
-static int tests_passed;
-static int tests_failed;
-static int tests_skipped;
-
static void set_tpidr2(uint64_t val)
{
asm volatile (
@@ -50,20 +31,6 @@ static uint64_t get_tpidr2(void)
return val;
}
-static void print_summary(void)
-{
- if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS)
- putstr("# UNEXPECTED TEST COUNT: ");
-
- putstr("# Totals: pass:");
- putnum(tests_passed);
- putstr(" fail:");
- putnum(tests_failed);
- putstr(" xfail:0 xpass:0 skip:");
- putnum(tests_skipped);
- putstr(" error:0\n");
-}
-
/* Processes should start with TPIDR2 == 0 */
static int default_value(void)
{
@@ -105,9 +72,8 @@ static int write_fork_read(void)
if (newpid == 0) {
/* In child */
if (get_tpidr2() != oldpid) {
- putstr("# TPIDR2 changed in child: ");
- putnum(get_tpidr2());
- putstr("\n");
+ ksft_print_msg("TPIDR2 changed in child: %llx\n",
+ get_tpidr2());
exit(0);
}
@@ -115,14 +81,12 @@ static int write_fork_read(void)
if (get_tpidr2() == getpid()) {
exit(1);
} else {
- putstr("# Failed to set TPIDR2 in child\n");
+ ksft_print_msg("Failed to set TPIDR2 in child\n");
exit(0);
}
}
if (newpid < 0) {
- putstr("# fork() failed: -");
- putnum(-newpid);
- putstr("\n");
+ ksft_print_msg("fork() failed: %d\n", newpid);
return 0;
}
@@ -132,23 +96,22 @@ static int write_fork_read(void)
if (waiting < 0) {
if (errno == EINTR)
continue;
- putstr("# waitpid() failed: ");
- putnum(errno);
- putstr("\n");
+ ksft_print_msg("waitpid() failed: %d\n", errno);
return 0;
}
if (waiting != newpid) {
- putstr("# waitpid() returned wrong PID\n");
+ ksft_print_msg("waitpid() returned wrong PID: %d != %d\n",
+ waiting, newpid);
return 0;
}
if (!WIFEXITED(status)) {
- putstr("# child did not exit\n");
+ ksft_print_msg("child did not exit\n");
return 0;
}
if (getpid() != get_tpidr2()) {
- putstr("# TPIDR2 corrupted in parent\n");
+ ksft_print_msg("TPIDR2 corrupted in parent\n");
return 0;
}
@@ -188,35 +151,32 @@ static int write_clone_read(void)
stack = malloc(__STACK_SIZE);
if (!stack) {
- putstr("# malloc() failed\n");
+ ksft_print_msg("malloc() failed\n");
return 0;
}
ret = sys_clone(CLONE_VM, (unsigned long)stack + __STACK_SIZE,
&parent_tid, 0, &child_tid);
if (ret == -1) {
- putstr("# clone() failed\n");
- putnum(errno);
- putstr("\n");
+ ksft_print_msg("clone() failed: %d\n", errno);
return 0;
}
if (ret == 0) {
/* In child */
if (get_tpidr2() != 0) {
- putstr("# TPIDR2 non-zero in child: ");
- putnum(get_tpidr2());
- putstr("\n");
+ ksft_print_msg("TPIDR2 non-zero in child: %llx\n",
+ get_tpidr2());
exit(0);
}
if (gettid() == 0)
- putstr("# Child TID==0\n");
+ ksft_print_msg("Child TID==0\n");
set_tpidr2(gettid());
if (get_tpidr2() == gettid()) {
exit(1);
} else {
- putstr("# Failed to set TPIDR2 in child\n");
+ ksft_print_msg("Failed to set TPIDR2 in child\n");
exit(0);
}
}
@@ -227,25 +187,22 @@ static int write_clone_read(void)
if (waiting < 0) {
if (errno == EINTR)
continue;
- putstr("# wait4() failed: ");
- putnum(errno);
- putstr("\n");
+ ksft_print_msg("wait4() failed: %d\n", errno);
return 0;
}
if (waiting != ret) {
- putstr("# wait4() returned wrong PID ");
- putnum(waiting);
- putstr("\n");
+ ksft_print_msg("wait4() returned wrong PID %d\n",
+ waiting);
return 0;
}
if (!WIFEXITED(status)) {
- putstr("# child did not exit\n");
+ ksft_print_msg("child did not exit\n");
return 0;
}
if (parent != get_tpidr2()) {
- putstr("# TPIDR2 corrupted in parent\n");
+ ksft_print_msg("TPIDR2 corrupted in parent\n");
return 0;
}
@@ -253,35 +210,14 @@ static int write_clone_read(void)
}
}
-#define run_test(name) \
- if (name()) { \
- tests_passed++; \
- } else { \
- tests_failed++; \
- putstr("not "); \
- } \
- putstr("ok "); \
- putnum(++tests_run); \
- putstr(" " #name "\n");
-
-#define skip_test(name) \
- tests_skipped++; \
- putstr("ok "); \
- putnum(++tests_run); \
- putstr(" # SKIP " #name "\n");
-
int main(int argc, char **argv)
{
int ret;
- putstr("TAP version 13\n");
- putstr("1..");
- putnum(EXPECTED_TESTS);
- putstr("\n");
+ ksft_print_header();
+ ksft_set_plan(5);
- putstr("# PID: ");
- putnum(getpid());
- putstr("\n");
+ ksft_print_msg("PID: %d\n", getpid());
/*
* This test is run with nolibc which doesn't support hwcap and
@@ -290,23 +226,21 @@ int main(int argc, char **argv)
*/
ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
if (ret >= 0) {
- run_test(default_value);
- run_test(write_read);
- run_test(write_sleep_read);
- run_test(write_fork_read);
- run_test(write_clone_read);
+ ksft_test_result(default_value(), "default_value\n");
+ ksft_test_result(write_read, "write_read\n");
+ ksft_test_result(write_sleep_read, "write_sleep_read\n");
+ ksft_test_result(write_fork_read, "write_fork_read\n");
+ ksft_test_result(write_clone_read, "write_clone_read\n");
} else {
- putstr("# SME support not present\n");
+ ksft_print_msg("SME support not present\n");
- skip_test(default_value);
- skip_test(write_read);
- skip_test(write_sleep_read);
- skip_test(write_fork_read);
- skip_test(write_clone_read);
+ ksft_test_result_skip("default_value\n");
+ ksft_test_result_skip("write_read\n");
+ ksft_test_result_skip("write_sleep_read\n");
+ ksft_test_result_skip("write_fork_read\n");
+ ksft_test_result_skip("write_clone_read\n");
}
- print_summary();
-
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c
index 191c47ca0ed8..124bc883365e 100644
--- a/tools/testing/selftests/arm64/fp/fp-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c
@@ -1061,11 +1061,31 @@ static bool sve_write_supported(struct test_config *config)
if (config->sme_vl_in != config->sme_vl_expected) {
return false;
}
+
+ if (!sve_supported())
+ return false;
}
return true;
}
+static bool sve_write_fpsimd_supported(struct test_config *config)
+{
+ if (!sve_supported())
+ return false;
+
+ if ((config->svcr_in & SVCR_ZA) != (config->svcr_expected & SVCR_ZA))
+ return false;
+
+ if (config->svcr_expected & SVCR_SM)
+ return false;
+
+ if (config->sme_vl_in != config->sme_vl_expected)
+ return false;
+
+ return true;
+}
+
static void fpsimd_write_expected(struct test_config *config)
{
int vl;
@@ -1134,6 +1154,9 @@ static void sve_write_expected(struct test_config *config)
int vl = vl_expected(config);
int sme_vq = __sve_vq_from_vl(config->sme_vl_expected);
+ if (!vl)
+ return;
+
fill_random(z_expected, __SVE_ZREGS_SIZE(__sve_vq_from_vl(vl)));
fill_random(p_expected, __SVE_PREGS_SIZE(__sve_vq_from_vl(vl)));
@@ -1152,7 +1175,7 @@ static void sve_write_expected(struct test_config *config)
}
}
-static void sve_write(pid_t child, struct test_config *config)
+static void sve_write_sve(pid_t child, struct test_config *config)
{
struct user_sve_header *sve;
struct iovec iov;
@@ -1161,6 +1184,9 @@ static void sve_write(pid_t child, struct test_config *config)
vl = vl_expected(config);
vq = __sve_vq_from_vl(vl);
+ if (!vl)
+ return;
+
iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
iov.iov_base = malloc(iov.iov_len);
if (!iov.iov_base) {
@@ -1195,6 +1221,45 @@ static void sve_write(pid_t child, struct test_config *config)
free(iov.iov_base);
}
+static void sve_write_fpsimd(pid_t child, struct test_config *config)
+{
+ struct user_sve_header *sve;
+ struct user_fpsimd_state *fpsimd;
+ struct iovec iov;
+ int ret, vl, vq;
+
+ vl = vl_expected(config);
+ vq = __sve_vq_from_vl(vl);
+
+ if (!vl)
+ return;
+
+ iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq,
+ SVE_PT_REGS_FPSIMD);
+ iov.iov_base = malloc(iov.iov_len);
+ if (!iov.iov_base) {
+ ksft_print_msg("Failed allocating %lu byte SVE write buffer\n",
+ iov.iov_len);
+ return;
+ }
+ memset(iov.iov_base, 0, iov.iov_len);
+
+ sve = iov.iov_base;
+ sve->size = iov.iov_len;
+ sve->flags = SVE_PT_REGS_FPSIMD;
+ sve->vl = vl;
+
+ fpsimd = iov.iov_base + SVE_PT_REGS_OFFSET;
+ memcpy(&fpsimd->vregs, v_expected, sizeof(v_expected));
+
+ ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_SVE, &iov);
+ if (ret != 0)
+ ksft_print_msg("Failed to write SVE: %s (%d)\n",
+ strerror(errno), errno);
+
+ free(iov.iov_base);
+}
+
static bool za_write_supported(struct test_config *config)
{
if ((config->svcr_in & SVCR_SM) != (config->svcr_expected & SVCR_SM))
@@ -1386,7 +1451,13 @@ static struct test_definition sve_test_defs[] = {
.name = "SVE write",
.supported = sve_write_supported,
.set_expected_values = sve_write_expected,
- .modify_values = sve_write,
+ .modify_values = sve_write_sve,
+ },
+ {
+ .name = "SVE write FPSIMD format",
+ .supported = sve_write_fpsimd_supported,
+ .set_expected_values = fpsimd_write_expected,
+ .modify_values = sve_write_fpsimd,
},
};
@@ -1607,7 +1678,7 @@ int main(void)
* Run the test set if there is no SVE or SME, with those we
* have to pick a VL for each run.
*/
- if (!sve_supported()) {
+ if (!sve_supported() && !sme_supported()) {
test_config.sve_vl_in = 0;
test_config.sve_vl_expected = 0;
test_config.sme_vl_in = 0;
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index 577b6e05e860..b22303778fb0 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -170,7 +170,7 @@ static void ptrace_set_get_inherit(pid_t child, const struct vec_type *type)
memset(&sve, 0, sizeof(sve));
sve.size = sizeof(sve);
sve.vl = sve_vl_from_vq(SVE_VQ_MIN);
- sve.flags = SVE_PT_VL_INHERIT;
+ sve.flags = SVE_PT_VL_INHERIT | SVE_PT_REGS_SVE;
ret = set_sve(child, type, &sve);
if (ret != 0) {
ksft_test_result_fail("Failed to set %s SVE_PT_VL_INHERIT\n",
@@ -235,6 +235,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type,
/* Set the VL by doing a set with no register payload */
memset(&sve, 0, sizeof(sve));
sve.size = sizeof(sve);
+ sve.flags = SVE_PT_REGS_SVE;
sve.vl = vl;
ret = set_sve(child, type, &sve);
if (ret != 0) {
@@ -253,7 +254,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type,
return;
}
- ksft_test_result(new_sve->vl = prctl_vl, "Set %s VL %u\n",
+ ksft_test_result(new_sve->vl == prctl_vl, "Set %s VL %u\n",
type->name, vl);
free(new_sve);
@@ -301,8 +302,10 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
p[j] = j;
}
+ /* This should only succeed for SVE */
ret = set_sve(child, type, sve);
- ksft_test_result(ret == 0, "%s FPSIMD set via SVE: %d\n",
+ ksft_test_result((type->regset == NT_ARM_SVE) == (ret == 0),
+ "%s FPSIMD set via SVE: %d\n",
type->name, ret);
if (ret)
goto out;
@@ -750,9 +753,6 @@ int main(void)
ksft_print_header();
ksft_set_plan(EXPECTED_TESTS);
- if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
- ksft_exit_skip("SVE not available\n");
-
child = fork();
if (!child)
return do_child();
diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c
index 3fb9742342a3..54f9c888249d 100644
--- a/tools/testing/selftests/arm64/gcs/basic-gcs.c
+++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c
@@ -298,6 +298,68 @@ out:
return pass;
}
+/* A vfork()ed process can run and exit */
+static bool test_vfork(void)
+{
+ unsigned long child_mode;
+ int ret, status;
+ pid_t pid;
+ bool pass = true;
+
+ pid = vfork();
+ if (pid == -1) {
+ ksft_print_msg("vfork() failed: %d\n", errno);
+ pass = false;
+ goto out;
+ }
+ if (pid == 0) {
+ /*
+ * In child, make sure we can call a function, read
+ * the GCS pointer and status and then exit.
+ */
+ valid_gcs_function();
+ get_gcspr();
+
+ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
+ &child_mode, 0, 0, 0);
+ if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) {
+ ksft_print_msg("GCS not enabled in child\n");
+ ret = EXIT_FAILURE;
+ }
+
+ _exit(ret);
+ }
+
+ /*
+ * In parent, check we can still do function calls then check
+ * on the child.
+ */
+ valid_gcs_function();
+
+ ksft_print_msg("Waiting for child %d\n", pid);
+
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ ksft_print_msg("Failed to wait for child: %d\n",
+ errno);
+ return false;
+ }
+
+ if (!WIFEXITED(status)) {
+ ksft_print_msg("Child exited due to signal %d\n",
+ WTERMSIG(status));
+ pass = false;
+ } else if (WEXITSTATUS(status)) {
+ ksft_print_msg("Child exited with status %d\n",
+ WEXITSTATUS(status));
+ pass = false;
+ }
+
+out:
+
+ return pass;
+}
+
typedef bool (*gcs_test)(void);
static struct {
@@ -314,6 +376,7 @@ static struct {
{ "enable_invalid", enable_invalid, true },
{ "map_guarded_stack", map_guarded_stack },
{ "fork", test_fork },
+ { "vfork", test_vfork },
};
int main(void)
diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
index 2ee7f114d7fa..ff4e07503349 100644
--- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c
+++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
@@ -31,7 +31,7 @@ static int check_buffer_by_byte(int mem_type, int mode)
int i, j, item;
bool err;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
item = ARRAY_SIZE(sizes);
for (i = 0; i < item; i++) {
@@ -68,7 +68,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode,
bool err;
char *und_ptr = NULL;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
item = ARRAY_SIZE(sizes);
for (i = 0; i < item; i++) {
ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
@@ -164,7 +164,7 @@ static int check_buffer_overflow_by_byte(int mem_type, int mode,
size_t tagged_size, overflow_size;
char *over_ptr = NULL;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
item = ARRAY_SIZE(sizes);
for (i = 0; i < item; i++) {
ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
@@ -337,7 +337,7 @@ static int check_buffer_by_block(int mem_type, int mode)
{
int i, item, result = KSFT_PASS;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
item = ARRAY_SIZE(sizes);
cur_mte_cxt.fault_valid = false;
for (i = 0; i < item; i++) {
@@ -368,7 +368,7 @@ static int check_memory_initial_tags(int mem_type, int mode, int mapping)
int run, fd;
int total = ARRAY_SIZE(sizes);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < total; run++) {
/* check initial tags for anonymous mmap */
ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false);
@@ -415,7 +415,7 @@ int main(int argc, char *argv[])
return err;
/* Register SIGSEGV handler */
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(20);
diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c
index 7597fc632cad..5e97ee792e4d 100644
--- a/tools/testing/selftests/arm64/mte/check_child_memory.c
+++ b/tools/testing/selftests/arm64/mte/check_child_memory.c
@@ -88,7 +88,7 @@ static int check_child_memory_mapping(int mem_type, int mode, int mapping)
int item = ARRAY_SIZE(sizes);
item = ARRAY_SIZE(sizes);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < item; run++) {
ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
UNDERFLOW, OVERFLOW);
@@ -109,7 +109,7 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping)
int run, fd, map_size, result = KSFT_PASS;
int total = ARRAY_SIZE(sizes);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < total; run++) {
fd = create_temp_file();
if (fd == -1)
@@ -160,8 +160,8 @@ int main(int argc, char *argv[])
return err;
/* Register SIGSEGV handler */
- mte_register_signal(SIGSEGV, mte_default_handler);
- mte_register_signal(SIGBUS, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
+ mte_register_signal(SIGBUS, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(12);
diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
index 3bfcd3848432..aad1234c7e0f 100644
--- a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
+++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
@@ -151,7 +151,7 @@ static int check_hugetlb_memory_mapping(int mem_type, int mode, int mapping, int
map_size = default_huge_page_size();
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false);
if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
@@ -180,7 +180,7 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
unsigned long map_size;
prot_flag = PROT_READ | PROT_WRITE;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
map_size = default_huge_page_size();
map_ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping,
0, 0);
@@ -210,7 +210,7 @@ static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mappin
map_size = default_huge_page_size();
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping,
0, 0);
if (check_allocated_memory_range(ptr, map_size, mem_type,
@@ -235,8 +235,8 @@ int main(int argc, char *argv[])
return err;
/* Register signal handlers */
- mte_register_signal(SIGBUS, mte_default_handler);
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGBUS, mte_default_handler, false);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
allocate_hugetlb();
diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c
index 88c74bc46d4f..0cf5faef1724 100644
--- a/tools/testing/selftests/arm64/mte/check_ksm_options.c
+++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c
@@ -106,7 +106,7 @@ static int check_madvise_options(int mem_type, int mode, int mapping)
return err;
}
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true);
if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
@@ -141,8 +141,8 @@ int main(int argc, char *argv[])
return KSFT_FAIL;
}
/* Register signal handlers */
- mte_register_signal(SIGBUS, mte_default_handler);
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGBUS, mte_default_handler, false);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(4);
diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c
index 17694caaff53..c100af3012cb 100644
--- a/tools/testing/selftests/arm64/mte/check_mmap_options.c
+++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c
@@ -3,6 +3,7 @@
#define _GNU_SOURCE
+#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
@@ -23,6 +24,35 @@
#define OVERFLOW MT_GRANULE_SIZE
#define TAG_CHECK_ON 0
#define TAG_CHECK_OFF 1
+#define ATAG_CHECK_ON 1
+#define ATAG_CHECK_OFF 0
+
+#define TEST_NAME_MAX 256
+
+enum mte_mem_check_type {
+ CHECK_ANON_MEM = 0,
+ CHECK_FILE_MEM = 1,
+ CHECK_CLEAR_PROT_MTE = 2,
+};
+
+enum mte_tag_op_type {
+ TAG_OP_ALL = 0,
+ TAG_OP_STONLY = 1,
+};
+
+struct check_mmap_testcase {
+ int check_type;
+ int mem_type;
+ int mte_sync;
+ int mapping;
+ int tag_check;
+ int atag_check;
+ int tag_op;
+ bool enable_tco;
+};
+
+#define TAG_OP_ALL 0
+#define TAG_OP_STONLY 1
static size_t page_size;
static int sizes[] = {
@@ -30,8 +60,17 @@ static int sizes[] = {
/* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
};
-static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
+static int check_mte_memory(char *ptr, int size, int mode,
+ int tag_check,int atag_check, int tag_op)
{
+ char buf[MT_GRANULE_SIZE];
+
+ if (!mtefar_support && atag_check == ATAG_CHECK_ON)
+ return KSFT_SKIP;
+
+ if (atag_check == ATAG_CHECK_ON)
+ ptr = mte_insert_atag(ptr);
+
mte_initialize_current_context(mode, (uintptr_t)ptr, size);
memset(ptr, '1', size);
mte_wait_after_trig();
@@ -54,16 +93,34 @@ static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
return KSFT_FAIL;
+ if (tag_op == TAG_OP_STONLY) {
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+ memcpy(buf, ptr - UNDERFLOW, MT_GRANULE_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+ memcpy(buf, ptr + size, MT_GRANULE_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true)
+ return KSFT_FAIL;
+ }
+
return KSFT_PASS;
}
-static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping,
+ int tag_check, int atag_check, int tag_op)
{
char *ptr, *map_ptr;
int run, result, map_size;
int item = ARRAY_SIZE(sizes);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ if (tag_op == TAG_OP_STONLY && !mtestonly_support)
+ return KSFT_SKIP;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, tag_op);
for (run = 0; run < item; run++) {
map_size = sizes[run] + OVERFLOW + UNDERFLOW;
map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false);
@@ -79,23 +136,27 @@ static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, i
munmap((void *)map_ptr, map_size);
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check, tag_op);
mte_clear_tags((void *)ptr, sizes[run]);
mte_free_memory((void *)map_ptr, map_size, mem_type, false);
- if (result == KSFT_FAIL)
- return KSFT_FAIL;
+ if (result != KSFT_PASS)
+ return result;
}
return KSFT_PASS;
}
-static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+static int check_file_memory_mapping(int mem_type, int mode, int mapping,
+ int tag_check, int atag_check, int tag_op)
{
char *ptr, *map_ptr;
int run, fd, map_size;
int total = ARRAY_SIZE(sizes);
int result = KSFT_PASS;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ if (tag_op == TAG_OP_STONLY && !mtestonly_support)
+ return KSFT_SKIP;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, tag_op);
for (run = 0; run < total; run++) {
fd = create_temp_file();
if (fd == -1)
@@ -117,24 +178,24 @@ static int check_file_memory_mapping(int mem_type, int mode, int mapping, int ta
close(fd);
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check, tag_op);
mte_clear_tags((void *)ptr, sizes[run]);
munmap((void *)map_ptr, map_size);
close(fd);
- if (result == KSFT_FAIL)
- break;
+ if (result != KSFT_PASS)
+ return result;
}
- return result;
+ return KSFT_PASS;
}
-static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
+static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping, int atag_check)
{
char *ptr, *map_ptr;
int run, prot_flag, result, fd, map_size;
int total = ARRAY_SIZE(sizes);
prot_flag = PROT_READ | PROT_WRITE;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < total; run++) {
map_size = sizes[run] + OVERFLOW + UNDERFLOW;
ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
@@ -150,10 +211,10 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check, TAG_OP_ALL);
mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
if (result != KSFT_PASS)
- return KSFT_FAIL;
+ return result;
fd = create_temp_file();
if (fd == -1)
@@ -174,19 +235,715 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
close(fd);
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check, TAG_OP_ALL);
mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
close(fd);
if (result != KSFT_PASS)
- return KSFT_FAIL;
+ return result;
}
return KSFT_PASS;
}
+const char *format_test_name(struct check_mmap_testcase *tc)
+{
+ static char test_name[TEST_NAME_MAX];
+ const char *check_type_str;
+ const char *mem_type_str;
+ const char *sync_str;
+ const char *mapping_str;
+ const char *tag_check_str;
+ const char *atag_check_str;
+ const char *tag_op_str;
+
+ switch (tc->check_type) {
+ case CHECK_ANON_MEM:
+ check_type_str = "anonymous memory";
+ break;
+ case CHECK_FILE_MEM:
+ check_type_str = "file memory";
+ break;
+ case CHECK_CLEAR_PROT_MTE:
+ check_type_str = "clear PROT_MTE flags";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->mem_type) {
+ case USE_MMAP:
+ mem_type_str = "mmap";
+ break;
+ case USE_MPROTECT:
+ mem_type_str = "mmap/mprotect";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->mte_sync) {
+ case MTE_NONE_ERR:
+ sync_str = "no error";
+ break;
+ case MTE_SYNC_ERR:
+ sync_str = "sync error";
+ break;
+ case MTE_ASYNC_ERR:
+ sync_str = "async error";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->mapping) {
+ case MAP_SHARED:
+ mapping_str = "shared";
+ break;
+ case MAP_PRIVATE:
+ mapping_str = "private";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->tag_check) {
+ case TAG_CHECK_ON:
+ tag_check_str = "tag check on";
+ break;
+ case TAG_CHECK_OFF:
+ tag_check_str = "tag check off";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->atag_check) {
+ case ATAG_CHECK_ON:
+ atag_check_str = "with address tag [63:60]";
+ break;
+ case ATAG_CHECK_OFF:
+ atag_check_str = "without address tag [63:60]";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ snprintf(test_name, sizeof(test_name),
+ "Check %s with %s mapping, %s mode, %s memory and %s (%s)\n",
+ check_type_str, mapping_str, sync_str, mem_type_str,
+ tag_check_str, atag_check_str);
+
+ switch (tc->tag_op) {
+ case TAG_OP_ALL:
+ tag_op_str = "";
+ break;
+ case TAG_OP_STONLY:
+ tag_op_str = " / store-only";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ snprintf(test_name, TEST_NAME_MAX,
+ "Check %s with %s mapping, %s mode, %s memory and %s (%s%s)\n",
+ check_type_str, mapping_str, sync_str, mem_type_str,
+ tag_check_str, atag_check_str, tag_op_str);
+
+ return test_name;
+}
+
int main(int argc, char *argv[])
{
- int err;
+ int err, i;
int item = ARRAY_SIZE(sizes);
+ struct check_mmap_testcase test_cases[]= {
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = true,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = true,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_NONE_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_NONE_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ };
err = mte_default_setup();
if (err)
@@ -200,64 +957,51 @@ int main(int argc, char *argv[])
sizes[item - 2] = page_size;
sizes[item - 1] = page_size + 1;
- /* Register signal handlers */
- mte_register_signal(SIGBUS, mte_default_handler);
- mte_register_signal(SIGSEGV, mte_default_handler);
-
/* Set test plan */
- ksft_set_plan(22);
-
- mte_enable_pstate_tco();
-
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n");
+ ksft_set_plan(ARRAY_SIZE(test_cases));
- mte_disable_pstate_tco();
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n");
+ for (i = 0 ; i < ARRAY_SIZE(test_cases); i++) {
+ /* Register signal handlers */
+ mte_register_signal(SIGBUS, mte_default_handler,
+ test_cases[i].atag_check == ATAG_CHECK_ON);
+ mte_register_signal(SIGSEGV, mte_default_handler,
+ test_cases[i].atag_check == ATAG_CHECK_ON);
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
+ if (test_cases[i].enable_tco)
+ mte_enable_pstate_tco();
+ else
+ mte_disable_pstate_tco();
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
-
- evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
- "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n");
- evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
- "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
+ switch (test_cases[i].check_type) {
+ case CHECK_ANON_MEM:
+ evaluate_test(check_anonymous_memory_mapping(test_cases[i].mem_type,
+ test_cases[i].mte_sync,
+ test_cases[i].mapping,
+ test_cases[i].tag_check,
+ test_cases[i].atag_check,
+ test_cases[i].tag_op),
+ format_test_name(&test_cases[i]));
+ break;
+ case CHECK_FILE_MEM:
+ evaluate_test(check_file_memory_mapping(test_cases[i].mem_type,
+ test_cases[i].mte_sync,
+ test_cases[i].mapping,
+ test_cases[i].tag_check,
+ test_cases[i].atag_check,
+ test_cases[i].tag_op),
+ format_test_name(&test_cases[i]));
+ break;
+ case CHECK_CLEAR_PROT_MTE:
+ evaluate_test(check_clear_prot_mte_flag(test_cases[i].mem_type,
+ test_cases[i].mte_sync,
+ test_cases[i].mapping,
+ test_cases[i].atag_check),
+ format_test_name(&test_cases[i]));
+ break;
+ default:
+ exit(KSFT_FAIL);
+ }
+ }
mte_restore_setup();
ksft_print_cnts();
diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c
index 4c89e9538ca0..f7f320defa7b 100644
--- a/tools/testing/selftests/arm64/mte/check_prctl.c
+++ b/tools/testing/selftests/arm64/mte/check_prctl.c
@@ -12,6 +12,10 @@
#include "kselftest.h"
+#ifndef AT_HWCAP3
+#define AT_HWCAP3 29
+#endif
+
static int set_tagged_addr_ctrl(int val)
{
int ret;
@@ -60,7 +64,7 @@ void check_basic_read(void)
/*
* Attempt to set a specified combination of modes.
*/
-void set_mode_test(const char *name, int hwcap2, int mask)
+void set_mode_test(const char *name, int hwcap2, int hwcap3, int mask)
{
int ret;
@@ -69,6 +73,11 @@ void set_mode_test(const char *name, int hwcap2, int mask)
return;
}
+ if ((getauxval(AT_HWCAP3) & hwcap3) != hwcap3) {
+ ksft_test_result_skip("%s\n", name);
+ return;
+ }
+
ret = set_tagged_addr_ctrl(mask);
if (ret < 0) {
ksft_test_result_fail("%s\n", name);
@@ -81,7 +90,7 @@ void set_mode_test(const char *name, int hwcap2, int mask)
return;
}
- if ((ret & PR_MTE_TCF_MASK) == mask) {
+ if ((ret & (PR_MTE_TCF_MASK | PR_MTE_STORE_ONLY)) == mask) {
ksft_test_result_pass("%s\n", name);
} else {
ksft_print_msg("Got %x, expected %x\n",
@@ -93,12 +102,16 @@ void set_mode_test(const char *name, int hwcap2, int mask)
struct mte_mode {
int mask;
int hwcap2;
+ int hwcap3;
const char *name;
} mte_modes[] = {
- { PR_MTE_TCF_NONE, 0, "NONE" },
- { PR_MTE_TCF_SYNC, HWCAP2_MTE, "SYNC" },
- { PR_MTE_TCF_ASYNC, HWCAP2_MTE, "ASYNC" },
- { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, "SYNC+ASYNC" },
+ { PR_MTE_TCF_NONE, 0, 0, "NONE" },
+ { PR_MTE_TCF_SYNC, HWCAP2_MTE, 0, "SYNC" },
+ { PR_MTE_TCF_ASYNC, HWCAP2_MTE, 0, "ASYNC" },
+ { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, 0, "SYNC+ASYNC" },
+ { PR_MTE_TCF_SYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "SYNC+STONLY" },
+ { PR_MTE_TCF_ASYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "ASYNC+STONLY" },
+ { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "SYNC+ASYNC+STONLY" },
};
int main(void)
@@ -106,11 +119,11 @@ int main(void)
int i;
ksft_print_header();
- ksft_set_plan(5);
+ ksft_set_plan(ARRAY_SIZE(mte_modes));
check_basic_read();
for (i = 0; i < ARRAY_SIZE(mte_modes); i++)
- set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2,
+ set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2, mte_modes[i].hwcap3,
mte_modes[i].mask);
ksft_print_cnts();
diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
index a3d1e23fe02a..4b764f2a8185 100644
--- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -57,7 +57,7 @@ static int check_single_included_tags(int mem_type, int mode)
return KSFT_FAIL;
for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) {
- ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+ ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag), false);
if (ret != 0)
result = KSFT_FAIL;
/* Try to catch a excluded tag by a number of tries. */
@@ -91,7 +91,7 @@ static int check_multiple_included_tags(int mem_type, int mode)
for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) {
excl_mask |= 1 << tag;
- mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask));
+ mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask), false);
/* Try to catch a excluded tag by a number of tries. */
for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
ptr = mte_insert_tags(ptr, BUFFER_SIZE);
@@ -120,7 +120,7 @@ static int check_all_included_tags(int mem_type, int mode)
mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
- ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+ ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK, false);
if (ret != 0)
return KSFT_FAIL;
/* Try to catch a excluded tag by a number of tries. */
@@ -145,7 +145,7 @@ static int check_none_included_tags(int mem_type, int mode)
if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
- ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+ ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK, false);
if (ret != 0)
return KSFT_FAIL;
/* Try to catch a excluded tag by a number of tries. */
@@ -180,7 +180,7 @@ int main(int argc, char *argv[])
return err;
/* Register SIGSEGV handler */
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(4);
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index f4ae5f87a3b7..fb7936c4e097 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -44,7 +44,7 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
err = KSFT_PASS;
len = 2 * page_sz;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
fd = create_temp_file();
if (fd == -1)
return KSFT_FAIL;
@@ -211,7 +211,7 @@ int main(int argc, char *argv[])
return err;
/* Register signal handlers */
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(64);
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
index a1dc2fe5285b..397e57dd946a 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.c
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -6,6 +6,7 @@
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
+#include <time.h>
#include <unistd.h>
#include <linux/auxvec.h>
@@ -19,20 +20,40 @@
#include "mte_common_util.h"
#include "mte_def.h"
+#ifndef SA_EXPOSE_TAGBITS
+#define SA_EXPOSE_TAGBITS 0x00000800
+#endif
+
#define INIT_BUFFER_SIZE 256
struct mte_fault_cxt cur_mte_cxt;
+bool mtefar_support;
+bool mtestonly_support;
static unsigned int mte_cur_mode;
static unsigned int mte_cur_pstate_tco;
+static bool mte_cur_stonly;
void mte_default_handler(int signum, siginfo_t *si, void *uc)
{
+ struct sigaction sa;
unsigned long addr = (unsigned long)si->si_addr;
+ unsigned char si_tag, si_atag;
+
+ sigaction(signum, NULL, &sa);
+
+ if (sa.sa_flags & SA_EXPOSE_TAGBITS) {
+ si_tag = MT_FETCH_TAG(addr);
+ si_atag = MT_FETCH_ATAG(addr);
+ addr = MT_CLEAR_TAGS(addr);
+ } else {
+ si_tag = 0;
+ si_atag = 0;
+ }
if (signum == SIGSEGV) {
#ifdef DEBUG
- ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
- ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+ ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx, si_tag=%x, si_atag=%x\n",
+ ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code, si_tag, si_atag);
#endif
if (si->si_code == SEGV_MTEAERR) {
if (cur_mte_cxt.trig_si_code == si->si_code)
@@ -45,13 +66,18 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc)
}
/* Compare the context for precise error */
else if (si->si_code == SEGV_MTESERR) {
+ if ((!mtefar_support && si_atag) || (si_atag != MT_FETCH_ATAG(cur_mte_cxt.trig_addr))) {
+ ksft_print_msg("Invalid MTE synchronous exception caught for address tag! si_tag=%x, si_atag: %x\n", si_tag, si_atag);
+ exit(KSFT_FAIL);
+ }
+
if (cur_mte_cxt.trig_si_code == si->si_code &&
((cur_mte_cxt.trig_range >= 0 &&
- addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
(cur_mte_cxt.trig_range < 0 &&
- addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) {
+ addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) {
cur_mte_cxt.fault_valid = true;
/* Adjust the pc by 4 */
((ucontext_t *)uc)->uc_mcontext.pc += 4;
@@ -67,11 +93,11 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc)
ksft_print_msg("INFO: SIGBUS signal at pc=%llx, fault addr=%lx, si_code=%x\n",
((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
if ((cur_mte_cxt.trig_range >= 0 &&
- addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
(cur_mte_cxt.trig_range < 0 &&
- addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) {
+ addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) {
cur_mte_cxt.fault_valid = true;
/* Adjust the pc by 4 */
((ucontext_t *)uc)->uc_mcontext.pc += 4;
@@ -79,12 +105,17 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc)
}
}
-void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *))
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *),
+ bool export_tags)
{
struct sigaction sa;
sa.sa_sigaction = handler;
sa.sa_flags = SA_SIGINFO;
+
+ if (export_tags && signal == SIGSEGV)
+ sa.sa_flags |= SA_EXPOSE_TAGBITS;
+
sigemptyset(&sa.sa_mask);
sigaction(signal, &sa, NULL);
}
@@ -120,6 +151,19 @@ void mte_clear_tags(void *ptr, size_t size)
mte_clear_tag_address_range(ptr, size);
}
+void *mte_insert_atag(void *ptr)
+{
+ unsigned char atag;
+
+ atag = mtefar_support ? (random() % MT_ATAG_MASK) + 1 : 0;
+ return (void *)MT_SET_ATAG((unsigned long)ptr, atag);
+}
+
+void *mte_clear_atag(void *ptr)
+{
+ return (void *)MT_CLEAR_ATAG((unsigned long)ptr);
+}
+
static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
size_t range_before, size_t range_after,
bool tags, int fd)
@@ -272,7 +316,7 @@ void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range)
cur_mte_cxt.trig_si_code = 0;
}
-int mte_switch_mode(int mte_option, unsigned long incl_mask)
+int mte_switch_mode(int mte_option, unsigned long incl_mask, bool stonly)
{
unsigned long en = 0;
@@ -304,6 +348,9 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask)
break;
}
+ if (mtestonly_support && stonly)
+ en |= PR_MTE_STORE_ONLY;
+
en |= (incl_mask << PR_MTE_TAG_SHIFT);
/* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
if (prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) != 0) {
@@ -316,12 +363,21 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask)
int mte_default_setup(void)
{
unsigned long hwcaps2 = getauxval(AT_HWCAP2);
+ unsigned long hwcaps3 = getauxval(AT_HWCAP3);
unsigned long en = 0;
int ret;
+ /* To generate random address tag */
+ srandom(time(NULL));
+
if (!(hwcaps2 & HWCAP2_MTE))
ksft_exit_skip("MTE features unavailable\n");
+ mtefar_support = !!(hwcaps3 & HWCAP3_MTE_FAR);
+
+ if (hwcaps3 & HWCAP3_MTE_STORE_ONLY)
+ mtestonly_support = true;
+
/* Get current mte mode */
ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0);
if (ret < 0) {
@@ -335,6 +391,8 @@ int mte_default_setup(void)
else if (ret & PR_MTE_TCF_NONE)
mte_cur_mode = MTE_NONE_ERR;
+ mte_cur_stonly = (ret & PR_MTE_STORE_ONLY) ? true : false;
+
mte_cur_pstate_tco = mte_get_pstate_tco();
/* Disable PSTATE.TCO */
mte_disable_pstate_tco();
@@ -343,7 +401,7 @@ int mte_default_setup(void)
void mte_restore_setup(void)
{
- mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG, mte_cur_stonly);
if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN)
mte_enable_pstate_tco();
else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS)
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h
index a0017a303beb..250d671329a5 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.h
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.h
@@ -37,10 +37,13 @@ struct mte_fault_cxt {
};
extern struct mte_fault_cxt cur_mte_cxt;
+extern bool mtefar_support;
+extern bool mtestonly_support;
/* MTE utility functions */
void mte_default_handler(int signum, siginfo_t *si, void *uc);
-void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *));
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *),
+ bool export_tags);
void mte_wait_after_trig(void);
void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags);
void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
@@ -54,9 +57,11 @@ void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
size_t range_before, size_t range_after);
void *mte_insert_tags(void *ptr, size_t size);
void mte_clear_tags(void *ptr, size_t size);
+void *mte_insert_atag(void *ptr);
+void *mte_clear_atag(void *ptr);
int mte_default_setup(void);
void mte_restore_setup(void);
-int mte_switch_mode(int mte_option, unsigned long incl_mask);
+int mte_switch_mode(int mte_option, unsigned long incl_mask, bool stonly);
void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range);
/* Common utility functions */
diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h
index 9b188254b61a..6ad22f07c9b8 100644
--- a/tools/testing/selftests/arm64/mte/mte_def.h
+++ b/tools/testing/selftests/arm64/mte/mte_def.h
@@ -42,6 +42,8 @@
#define MT_TAG_COUNT 16
#define MT_INCLUDE_TAG_MASK 0xFFFF
#define MT_EXCLUDE_TAG_MASK 0x0
+#define MT_ATAG_SHIFT 60
+#define MT_ATAG_MASK 0xFUL
#define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1)
#define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT))
@@ -49,6 +51,12 @@
#define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK))
#define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE))
+#define MT_CLEAR_ATAG(x) ((x) & ~(MT_TAG_MASK << MT_ATAG_SHIFT))
+#define MT_SET_ATAG(x, y) ((x) | (((y) & MT_ATAG_MASK) << MT_ATAG_SHIFT))
+#define MT_FETCH_ATAG(x) ((x >> MT_ATAG_SHIFT) & (MT_ATAG_MASK))
+
+#define MT_CLEAR_TAGS(x) (MT_CLEAR_ATAG(MT_CLEAR_TAG(x)))
+
#define MT_PSTATE_TCO_SHIFT 25
#define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT)
#define MT_PSTATE_TCO_EN 1
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 7247833fe623..8916ab814a3e 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -97,6 +97,9 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NF_TABLES_IPV6=y
CONFIG_NETFILTER_INGRESS=y
+CONFIG_IP_NF_IPTABLES_LEGACY=y
+CONFIG_IP6_NF_IPTABLES_LEGACY=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NF_FLOW_TABLE=y
CONFIG_NF_FLOW_TABLE_INET=y
CONFIG_NETFILTER_NETLINK=y
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
index a4517bee34d5..27781df8f2fb 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
@@ -1,11 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2024 Meta
+#include <poll.h>
#include <test_progs.h>
#include "network_helpers.h"
#include "sock_iter_batch.skel.h"
#define TEST_NS "sock_iter_batch_netns"
+#define TEST_CHILD_NS "sock_iter_batch_child_netns"
static const int init_batch_size = 16;
static const int nr_soreuse = 4;
@@ -118,6 +120,45 @@ done:
return nth_sock_idx;
}
+static void destroy(int fd)
+{
+ struct sock_iter_batch *skel = NULL;
+ __u64 cookie = socket_cookie(fd);
+ struct bpf_link *link = NULL;
+ int iter_fd = -1;
+ int nread;
+ __u64 out;
+
+ skel = sock_iter_batch__open();
+ if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
+ goto done;
+
+ skel->rodata->destroy_cookie = cookie;
+
+ if (!ASSERT_OK(sock_iter_batch__load(skel), "sock_iter_batch__load"))
+ goto done;
+
+ link = bpf_program__attach_iter(skel->progs.iter_tcp_destroy, NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+ goto done;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+ goto done;
+
+ /* Delete matching socket. */
+ nread = read(iter_fd, &out, sizeof(out));
+ ASSERT_GE(nread, 0, "nread");
+ if (nread)
+ ASSERT_EQ(out, cookie, "cookie matches");
+done:
+ if (iter_fd >= 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ sock_iter_batch__destroy(skel);
+ close(fd);
+}
+
static int get_seen_count(int fd, struct sock_count counts[], int n)
{
__u64 cookie = socket_cookie(fd);
@@ -152,8 +193,71 @@ static void check_n_were_seen_once(int *fds, int fds_len, int n,
ASSERT_EQ(seen_once, n, "seen_once");
}
+static int accept_from_one(struct pollfd *server_poll_fds,
+ int server_poll_fds_len)
+{
+ static const int poll_timeout_ms = 5000; /* 5s */
+ int ret;
+ int i;
+
+ ret = poll(server_poll_fds, server_poll_fds_len, poll_timeout_ms);
+ if (!ASSERT_EQ(ret, 1, "poll"))
+ return -1;
+
+ for (i = 0; i < server_poll_fds_len; i++)
+ if (server_poll_fds[i].revents & POLLIN)
+ return accept(server_poll_fds[i].fd, NULL, NULL);
+
+ return -1;
+}
+
+static int *connect_to_server(int family, int sock_type, const char *addr,
+ __u16 port, int nr_connects, int *server_fds,
+ int server_fds_len)
+{
+ struct pollfd *server_poll_fds = NULL;
+ int *established_socks = NULL;
+ int i;
+
+ server_poll_fds = calloc(server_fds_len, sizeof(*server_poll_fds));
+ if (!ASSERT_OK_PTR(server_poll_fds, "server_poll_fds"))
+ return NULL;
+
+ for (i = 0; i < server_fds_len; i++) {
+ server_poll_fds[i].fd = server_fds[i];
+ server_poll_fds[i].events = POLLIN;
+ }
+
+ i = 0;
+
+ established_socks = malloc(sizeof(*established_socks) * nr_connects*2);
+ if (!ASSERT_OK_PTR(established_socks, "established_socks"))
+ goto error;
+
+ while (nr_connects--) {
+ established_socks[i] = connect_to_addr_str(family, sock_type,
+ addr, port, NULL);
+ if (!ASSERT_OK_FD(established_socks[i], "connect_to_addr_str"))
+ goto error;
+ i++;
+ established_socks[i] = accept_from_one(server_poll_fds,
+ server_fds_len);
+ if (!ASSERT_OK_FD(established_socks[i], "accept_from_one"))
+ goto error;
+ i++;
+ }
+
+ free(server_poll_fds);
+ return established_socks;
+error:
+ free_fds(established_socks, i);
+ free(server_poll_fds);
+ return NULL;
+}
+
static void remove_seen(int family, int sock_type, const char *addr, __u16 port,
- int *socks, int socks_len, struct sock_count *counts,
+ int *socks, int socks_len, int *established_socks,
+ int established_socks_len, struct sock_count *counts,
int counts_len, struct bpf_link *link, int iter_fd)
{
int close_idx;
@@ -182,8 +286,46 @@ static void remove_seen(int family, int sock_type, const char *addr, __u16 port,
counts_len);
}
+static void remove_seen_established(int family, int sock_type, const char *addr,
+ __u16 port, int *listen_socks,
+ int listen_socks_len, int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Leave one established socket. */
+ read_n(iter_fd, established_socks_len - 1, counts, counts_len);
+
+ /* Close a socket we've already seen to remove it from the bucket. */
+ close_idx = get_nth_socket(established_socks, established_socks_len,
+ link, listen_socks_len + 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ destroy(established_socks[close_idx]);
+ established_socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the last socket wasn't skipped and that there were no
+ * repeats.
+ */
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len - 1, counts, counts_len);
+}
+
static void remove_unseen(int family, int sock_type, const char *addr,
__u16 port, int *socks, int socks_len,
+ int *established_socks, int established_socks_len,
struct sock_count *counts, int counts_len,
struct bpf_link *link, int iter_fd)
{
@@ -214,8 +356,54 @@ static void remove_unseen(int family, int sock_type, const char *addr,
counts_len);
}
+static void remove_unseen_established(int family, int sock_type,
+ const char *addr, __u16 port,
+ int *listen_socks, int listen_socks_len,
+ int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Iterate through the first established socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw one established socks. */
+ check_n_were_seen_once(established_socks, established_socks_len, 1,
+ counts, counts_len);
+
+ /* Close what would be the next socket in the bucket to exercise the
+ * condition where we need to skip past the first cookie we remembered.
+ */
+ close_idx = get_nth_socket(established_socks, established_socks_len,
+ link, listen_socks_len + 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+
+ destroy(established_socks[close_idx]);
+ established_socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the remaining sockets were seen exactly once and that we
+ * didn't repeat the socket that was already seen.
+ */
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len - 1, counts, counts_len);
+}
+
static void remove_all(int family, int sock_type, const char *addr,
__u16 port, int *socks, int socks_len,
+ int *established_socks, int established_socks_len,
struct sock_count *counts, int counts_len,
struct bpf_link *link, int iter_fd)
{
@@ -242,8 +430,57 @@ static void remove_all(int family, int sock_type, const char *addr,
ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
}
+static void remove_all_established(int family, int sock_type, const char *addr,
+ __u16 port, int *listen_socks,
+ int listen_socks_len, int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int *close_idx = NULL;
+ int i;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Iterate through the first established socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw one established socks. */
+ check_n_were_seen_once(established_socks, established_socks_len, 1,
+ counts, counts_len);
+
+ /* Close all remaining sockets to exhaust the list of saved cookies and
+ * exit without putting any sockets into the batch on the next read.
+ */
+ close_idx = malloc(sizeof(int) * (established_socks_len - 1));
+ if (!ASSERT_OK_PTR(close_idx, "close_idx malloc"))
+ return;
+ for (i = 0; i < established_socks_len - 1; i++) {
+ close_idx[i] = get_nth_socket(established_socks,
+ established_socks_len, link,
+ listen_socks_len + i);
+ if (!ASSERT_GE(close_idx[i], 0, "close_idx"))
+ return;
+ }
+
+ for (i = 0; i < established_socks_len - 1; i++) {
+ destroy(established_socks[close_idx[i]]);
+ established_socks[close_idx[i]] = -1;
+ }
+
+ /* Make sure there are no more sockets returned */
+ ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
+ free(close_idx);
+}
+
static void add_some(int family, int sock_type, const char *addr, __u16 port,
- int *socks, int socks_len, struct sock_count *counts,
+ int *socks, int socks_len, int *established_socks,
+ int established_socks_len, struct sock_count *counts,
int counts_len, struct bpf_link *link, int iter_fd)
{
int *new_socks = NULL;
@@ -271,8 +508,52 @@ done:
free_fds(new_socks, socks_len);
}
+static void add_some_established(int family, int sock_type, const char *addr,
+ __u16 port, int *listen_socks,
+ int listen_socks_len, int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts,
+ int counts_len, struct bpf_link *link,
+ int iter_fd)
+{
+ int *new_socks = NULL;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Iterate through the first established_socks_len - 1 sockets. */
+ read_n(iter_fd, established_socks_len - 1, counts, counts_len);
+
+ /* Make sure we saw established_socks_len - 1 sockets exactly once. */
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len - 1, counts, counts_len);
+
+ /* Double the number of established sockets in the bucket. */
+ new_socks = connect_to_server(family, sock_type, addr, port,
+ established_socks_len / 2, listen_socks,
+ listen_socks_len);
+ if (!ASSERT_OK_PTR(new_socks, "connect_to_server"))
+ goto done;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each of the original sockets was seen exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len, counts, counts_len);
+done:
+ free_fds(new_socks, established_socks_len);
+}
+
static void force_realloc(int family, int sock_type, const char *addr,
__u16 port, int *socks, int socks_len,
+ int *established_socks, int established_socks_len,
struct sock_count *counts, int counts_len,
struct bpf_link *link, int iter_fd)
{
@@ -299,11 +580,32 @@ done:
free_fds(new_socks, socks_len);
}
+static void force_realloc_established(int family, int sock_type,
+ const char *addr, __u16 port,
+ int *listen_socks, int listen_socks_len,
+ int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ /* Iterate through all sockets to trigger a realloc. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each socket was seen exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len, counts, counts_len);
+}
+
struct test_case {
void (*test)(int family, int sock_type, const char *addr, __u16 port,
- int *socks, int socks_len, struct sock_count *counts,
+ int *socks, int socks_len, int *established_socks,
+ int established_socks_len, struct sock_count *counts,
int counts_len, struct bpf_link *link, int iter_fd);
const char *description;
+ int ehash_buckets;
+ int connections;
int init_socks;
int max_socks;
int sock_type;
@@ -358,18 +660,140 @@ static struct test_case resume_tests[] = {
.family = AF_INET6,
.test = force_realloc,
},
+ {
+ .description = "tcp: resume after removing a seen socket (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_seen,
+ },
+ {
+ .description = "tcp: resume after removing one unseen socket (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_unseen,
+ },
+ {
+ .description = "tcp: resume after removing all unseen sockets (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_all,
+ },
+ {
+ .description = "tcp: resume after adding a few sockets (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ /* Use AF_INET so that new sockets are added to the head of the
+ * bucket's list.
+ */
+ .family = AF_INET,
+ .test = add_some,
+ },
+ {
+ .description = "tcp: force a realloc to occur (listening)",
+ .init_socks = init_batch_size,
+ .max_socks = init_batch_size * 2,
+ .sock_type = SOCK_STREAM,
+ /* Use AF_INET6 so that new sockets are added to the tail of the
+ * bucket's list, needing to be added to the next batch to force
+ * a realloc.
+ */
+ .family = AF_INET6,
+ .test = force_realloc,
+ },
+ {
+ .description = "tcp: resume after removing a seen socket (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_seen_established,
+ },
+ {
+ .description = "tcp: resume after removing one unseen socket (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_unseen_established,
+ },
+ {
+ .description = "tcp: resume after removing all unseen sockets (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_all_established,
+ },
+ {
+ .description = "tcp: resume after adding a few sockets (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = add_some_established,
+ },
+ {
+ .description = "tcp: force a realloc to occur (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ /* Bucket size will need to double when going from listening to
+ * established sockets.
+ */
+ .connections = init_batch_size,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse + (init_batch_size * 2),
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = force_realloc_established,
+ },
};
static void do_resume_test(struct test_case *tc)
{
struct sock_iter_batch *skel = NULL;
+ struct sock_count *counts = NULL;
static const __u16 port = 10001;
+ struct nstoken *nstoken = NULL;
struct bpf_link *link = NULL;
- struct sock_count *counts;
+ int *established_fds = NULL;
int err, iter_fd = -1;
const char *addr;
int *fds = NULL;
- int local_port;
+
+ if (tc->ehash_buckets) {
+ SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
+ SYS(done, "sysctl -wq net.ipv4.tcp_child_ehash_entries=%d",
+ tc->ehash_buckets);
+ SYS(done, "ip netns add %s", TEST_CHILD_NS);
+ SYS(done, "ip -net %s link set dev lo up", TEST_CHILD_NS);
+ nstoken = open_netns(TEST_CHILD_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open_child_netns"))
+ goto done;
+ }
counts = calloc(tc->max_socks, sizeof(*counts));
if (!ASSERT_OK_PTR(counts, "counts"))
@@ -384,11 +808,18 @@ static void do_resume_test(struct test_case *tc)
tc->init_socks);
if (!ASSERT_OK_PTR(fds, "start_reuseport_server"))
goto done;
- local_port = get_socket_local_port(*fds);
- if (!ASSERT_GE(local_port, 0, "get_socket_local_port"))
- goto done;
- skel->rodata->ports[0] = ntohs(local_port);
+ if (tc->connections) {
+ established_fds = connect_to_server(tc->family, tc->sock_type,
+ addr, port,
+ tc->connections, fds,
+ tc->init_socks);
+ if (!ASSERT_OK_PTR(established_fds, "connect_to_server"))
+ goto done;
+ }
+ skel->rodata->ports[0] = 0;
+ skel->rodata->ports[1] = 0;
skel->rodata->sf = tc->family;
+ skel->rodata->ss = 0;
err = sock_iter_batch__load(skel);
if (!ASSERT_OK(err, "sock_iter_batch__load"))
@@ -406,10 +837,15 @@ static void do_resume_test(struct test_case *tc)
goto done;
tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks,
- counts, tc->max_socks, link, iter_fd);
+ established_fds, tc->connections*2, counts, tc->max_socks,
+ link, iter_fd);
done:
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
+ SYS_NOFAIL("sysctl -w net.ipv4.tcp_child_ehash_entries=0");
free(counts);
free_fds(fds, tc->init_socks);
+ free_fds(established_fds, tc->connections*2);
if (iter_fd >= 0)
close(iter_fd);
bpf_link__destroy(link);
@@ -454,6 +890,8 @@ static void do_test(int sock_type, bool onebyone)
skel->rodata->ports[i] = ntohs(local_port);
}
skel->rodata->sf = AF_INET6;
+ if (sock_type == SOCK_STREAM)
+ skel->rodata->ss = TCP_LISTEN;
err = sock_iter_batch__load(skel);
if (!ASSERT_OK(err, "sock_iter_batch__load"))
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index b6c471da5c28..b87e7f39e15a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -314,6 +314,95 @@ out:
test_sockmap_ktls__destroy(skel);
}
+static void test_sockmap_ktls_tx_pop(int family, int sotype)
+{
+ char msg[37] = "0123456789abcdefghijklmnopqrstuvwxyz\0";
+ int c = 0, p = 0, one = 1, sent, recvd;
+ struct test_sockmap_ktls *skel;
+ int prog_fd, map_fd;
+ char rcv[50] = {0};
+ int err;
+ int i, m, r;
+
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open ktls skel"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.prog_sk_policy);
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+ goto out;
+
+ err = init_ktls_pairs(c, p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ goto out;
+
+ struct {
+ int pop_start;
+ int pop_len;
+ } pop_policy[] = {
+ /* trim the start */
+ {0, 2},
+ {0, 10},
+ {1, 2},
+ {1, 10},
+ /* trim the end */
+ {35, 2},
+ /* New entries should be added before this line */
+ {-1, -1},
+ };
+
+ i = 0;
+ while (pop_policy[i].pop_start >= 0) {
+ skel->bss->pop_start = pop_policy[i].pop_start;
+ skel->bss->pop_end = pop_policy[i].pop_len;
+
+ sent = send(c, msg, sizeof(msg), 0);
+ if (!ASSERT_EQ(sent, sizeof(msg), "send(msg)"))
+ goto out;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(recvd, sizeof(msg) - pop_policy[i].pop_len, "pop len mismatch"))
+ goto out;
+
+ /* verify the data
+ * msg: 0123456789a bcdefghij klmnopqrstuvwxyz
+ * | |
+ * popped data
+ */
+ for (m = 0, r = 0; m < sizeof(msg);) {
+ /* skip checking the data that has been popped */
+ if (m >= pop_policy[i].pop_start &&
+ m <= pop_policy[i].pop_start + pop_policy[i].pop_len - 1) {
+ m++;
+ continue;
+ }
+
+ if (!ASSERT_EQ(msg[m], rcv[r], "data mismatch"))
+ goto out;
+ m++;
+ r++;
+ }
+ i++;
+ }
+out:
+ if (c)
+ close(c);
+ if (p)
+ close(p);
+ test_sockmap_ktls__destroy(skel);
+}
+
static void run_tests(int family, enum bpf_map_type map_type)
{
int map;
@@ -338,6 +427,8 @@ static void run_ktls_test(int family, int sotype)
test_sockmap_ktls_tx_cork(family, sotype, true);
if (test__start_subtest("tls tx egress with no buf"))
test_sockmap_ktls_tx_no_buf(family, sotype, true);
+ if (test__start_subtest("tls tx with pop"))
+ test_sockmap_ktls_tx_pop(family, sotype);
}
void test_sockmap_ktls(void)
diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
index 8f483337e103..77966ded5467 100644
--- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
@@ -23,6 +23,7 @@ static bool ipv4_addr_loopback(__be32 a)
}
volatile const unsigned int sf;
+volatile const unsigned int ss;
volatile const __u16 ports[2];
unsigned int bucket[2];
@@ -42,16 +43,18 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
sock_cookie = bpf_get_socket_cookie(sk);
sk = bpf_core_cast(sk, struct sock);
if (sk->sk_family != sf ||
- sk->sk_state != TCP_LISTEN ||
- sk->sk_family == AF_INET6 ?
+ (ss && sk->sk_state != ss) ||
+ (sk->sk_family == AF_INET6 ?
!ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
- !ipv4_addr_loopback(sk->sk_rcv_saddr))
+ !ipv4_addr_loopback(sk->sk_rcv_saddr)))
return 0;
if (sk->sk_num == ports[0])
idx = 0;
else if (sk->sk_num == ports[1])
idx = 1;
+ else if (!ports[0] && !ports[1])
+ idx = 0;
else
return 0;
@@ -67,6 +70,27 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
return 0;
}
+volatile const __u64 destroy_cookie;
+
+SEC("iter/tcp")
+int iter_tcp_destroy(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = (struct sock_common *)ctx->sk_common;
+ __u64 sock_cookie;
+
+ if (!sk_common)
+ return 0;
+
+ sock_cookie = bpf_get_socket_cookie(sk_common);
+ if (sock_cookie != destroy_cookie)
+ return 0;
+
+ bpf_sock_destroy(sk_common);
+ bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
+
+ return 0;
+}
+
#define udp_sk(ptr) container_of(ptr, struct udp_sock, inet.sk)
SEC("iter/udp")
@@ -83,15 +107,17 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
sock_cookie = bpf_get_socket_cookie(sk);
sk = bpf_core_cast(sk, struct sock);
if (sk->sk_family != sf ||
- sk->sk_family == AF_INET6 ?
+ (sk->sk_family == AF_INET6 ?
!ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
- !ipv4_addr_loopback(sk->sk_rcv_saddr))
+ !ipv4_addr_loopback(sk->sk_rcv_saddr)))
return 0;
if (sk->sk_num == ports[0])
idx = 0;
else if (sk->sk_num == ports[1])
idx = 1;
+ else if (!ports[0] && !ports[1])
+ idx = 0;
else
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
index 8bdb9987c0c7..83df4919c224 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
@@ -7,6 +7,8 @@ int cork_byte;
int push_start;
int push_end;
int apply_bytes;
+int pop_start;
+int pop_end;
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
@@ -22,6 +24,8 @@ int prog_sk_policy(struct sk_msg_md *msg)
bpf_msg_cork_bytes(msg, cork_byte);
if (push_start > 0 && push_end > 0)
bpf_msg_push_data(msg, push_start, push_end, 0);
+ if (pop_start >= 0 && pop_end > 0)
+ bpf_msg_pop_data(msg, pop_start, pop_end, 0);
return SK_PASS;
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c
index a83809a1dbbf..0450840c92d9 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ctx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c
@@ -218,4 +218,29 @@ __naked void null_check_8_null_bind(void)
: __clobber_all);
}
+#define narrow_load(type, ctx, field) \
+ SEC(type) \
+ __description("narrow load on field " #field " of " #ctx) \
+ __failure __msg("invalid bpf_context access") \
+ __naked void invalid_narrow_load##ctx##field(void) \
+ { \
+ asm volatile (" \
+ r1 = *(u32 *)(r1 + %[off]); \
+ r0 = 0; \
+ exit;" \
+ : \
+ : __imm_const(off, offsetof(struct ctx, field) + 4) \
+ : __clobber_all); \
+ }
+
+narrow_load("cgroup/getsockopt", bpf_sockopt, sk);
+narrow_load("cgroup/getsockopt", bpf_sockopt, optval);
+narrow_load("cgroup/getsockopt", bpf_sockopt, optval_end);
+narrow_load("tc", __sk_buff, sk);
+narrow_load("cgroup/bind4", bpf_sock_addr, sk);
+narrow_load("sockops", bpf_sock_ops, sk);
+narrow_load("sockops", bpf_sock_ops, skb_data);
+narrow_load("sockops", bpf_sock_ops, skb_data_end);
+narrow_load("sockops", bpf_sock_ops, skb_hwtstamp);
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 0ced4026ee44..a29de0713f19 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -109,6 +109,8 @@
#include <network_helpers.h>
+#define MAX_TX_BUDGET_DEFAULT 32
+
static bool opt_verbose;
static bool opt_print_tests;
static enum test_mode opt_mode = TEST_MODE_ALL;
@@ -1091,11 +1093,45 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
return true;
}
+static u32 load_value(u32 *counter)
+{
+ return __atomic_load_n(counter, __ATOMIC_ACQUIRE);
+}
+
+static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret)
+{
+ u32 max_budget = MAX_TX_BUDGET_DEFAULT;
+ u32 cons, ready_to_send;
+ int delta;
+
+ cons = load_value(xsk->tx.consumer);
+ ready_to_send = load_value(xsk->tx.producer) - cons;
+ *ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+
+ delta = load_value(xsk->tx.consumer) - cons;
+ /* By default, xsk should consume exact @max_budget descs at one
+ * send in this case where hitting the max budget limit in while
+ * loop is triggered in __xsk_generic_xmit(). Please make sure that
+ * the number of descs to be sent is larger than @max_budget, or
+ * else the tx.consumer will be updated in xskq_cons_peek_desc()
+ * in time which hides the issue we try to verify.
+ */
+ if (ready_to_send > max_budget && delta != max_budget)
+ return false;
+
+ return true;
+}
+
static int kick_tx(struct xsk_socket_info *xsk)
{
int ret;
- ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ if (xsk->check_consumer) {
+ if (!kick_tx_with_check(xsk, &ret))
+ return TEST_FAILURE;
+ } else {
+ ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ }
if (ret >= 0)
return TEST_PASS;
if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
@@ -2613,6 +2649,23 @@ static int testapp_adjust_tail_grow_mb(struct test_spec *test)
XSK_UMEM__LARGE_FRAME_SIZE * 2);
}
+static int testapp_tx_queue_consumer(struct test_spec *test)
+{
+ int nr_packets;
+
+ if (test->mode == TEST_MODE_ZC) {
+ ksft_test_result_skip("Can not run TX_QUEUE_CONSUMER test for ZC mode\n");
+ return TEST_SKIP;
+ }
+
+ nr_packets = MAX_TX_BUDGET_DEFAULT + 1;
+ pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE);
+ test->ifobj_tx->xsk->batch_size = nr_packets;
+ test->ifobj_tx->xsk->check_consumer = true;
+
+ return testapp_validate_traffic(test);
+}
+
static void run_pkt_test(struct test_spec *test)
{
int ret;
@@ -2723,6 +2776,7 @@ static const struct test_spec tests[] = {
{.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb},
{.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow},
{.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb},
+ {.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer},
};
static void print_tests(void)
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 67fc44b2813b..4df3a5d329ac 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -95,6 +95,7 @@ struct xsk_socket_info {
u32 batch_size;
u8 dst_mac[ETH_ALEN];
u8 src_mac[ETH_ALEN];
+ bool check_consumer;
};
struct pkt {
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
index 8d275f03e977..8d233ac95696 100644
--- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -127,22 +127,42 @@ int run_test(int cpu)
return KSFT_PASS;
}
+/*
+ * Reads the suspend success count from sysfs.
+ * Returns the count on success or exits on failure.
+ */
+static int get_suspend_success_count_or_fail(void)
+{
+ FILE *fp;
+ int val;
+
+ fp = fopen("/sys/power/suspend_stats/success", "r");
+ if (!fp)
+ ksft_exit_fail_msg(
+ "Failed to open suspend_stats/success: %s\n",
+ strerror(errno));
+
+ if (fscanf(fp, "%d", &val) != 1) {
+ fclose(fp);
+ ksft_exit_fail_msg(
+ "Failed to read suspend success count\n");
+ }
+
+ fclose(fp);
+ return val;
+}
+
void suspend(void)
{
- int power_state_fd;
int timerfd;
int err;
+ int count_before;
+ int count_after;
struct itimerspec spec = {};
if (getuid() != 0)
ksft_exit_skip("Please run the test as root - Exiting.\n");
- power_state_fd = open("/sys/power/state", O_RDWR);
- if (power_state_fd < 0)
- ksft_exit_fail_msg(
- "open(\"/sys/power/state\") failed %s)\n",
- strerror(errno));
-
timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0);
if (timerfd < 0)
ksft_exit_fail_msg("timerfd_create() failed\n");
@@ -152,14 +172,15 @@ void suspend(void)
if (err < 0)
ksft_exit_fail_msg("timerfd_settime() failed\n");
+ count_before = get_suspend_success_count_or_fail();
+
system("(echo mem > /sys/power/state) 2> /dev/null");
- timerfd_gettime(timerfd, &spec);
- if (spec.it_value.tv_sec != 0 || spec.it_value.tv_nsec != 0)
+ count_after = get_suspend_success_count_or_fail();
+ if (count_after <= count_before)
ksft_exit_fail_msg("Failed to enter Suspend state\n");
close(timerfd);
- close(power_state_fd);
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile
index ed210037b29d..77b3665c73c7 100644
--- a/tools/testing/selftests/coredump/Makefile
+++ b/tools/testing/selftests/coredump/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS = $(KHDR_INCLUDES)
+CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
TEST_GEN_PROGS := stackdump_test
TEST_FILES := stackdump
diff --git a/tools/testing/selftests/coredump/config b/tools/testing/selftests/coredump/config
new file mode 100644
index 000000000000..a05ef112b4f9
--- /dev/null
+++ b/tools/testing/selftests/coredump/config
@@ -0,0 +1,3 @@
+CONFIG_COREDUMP=y
+CONFIG_NET=y
+CONFIG_UNIX=y
diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c
index 68f8e479ac36..5a5a7a5f7e1d 100644
--- a/tools/testing/selftests/coredump/stackdump_test.c
+++ b/tools/testing/selftests/coredump/stackdump_test.c
@@ -1,12 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
#include <fcntl.h>
#include <inttypes.h>
#include <libgen.h>
+#include <limits.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
#include <linux/limits.h>
#include <pthread.h>
#include <string.h>
#include <sys/mount.h>
+#include <poll.h>
+#include <sys/epoll.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/socket.h>
@@ -14,16 +20,23 @@
#include <unistd.h>
#include "../kselftest_harness.h"
+#include "../filesystems/wrappers.h"
#include "../pidfd/pidfd.h"
#define STACKDUMP_FILE "stack_values"
#define STACKDUMP_SCRIPT "stackdump"
#define NUM_THREAD_SPAWN 128
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
static void *do_nothing(void *)
{
while (1)
pause();
+
+ return NULL;
}
static void crashing_child(void)
@@ -42,16 +55,32 @@ FIXTURE(coredump)
{
char original_core_pattern[256];
pid_t pid_coredump_server;
+ int fd_tmpfs_detached;
};
+static int create_detached_tmpfs(void)
+{
+ int fd_context, fd_tmpfs;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ if (fd_context < 0)
+ return -1;
+
+ if (sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0)
+ return -1;
+
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ close(fd_context);
+ return fd_tmpfs;
+}
+
FIXTURE_SETUP(coredump)
{
- char buf[PATH_MAX];
FILE *file;
- char *dir;
int ret;
self->pid_coredump_server = -ESRCH;
+ self->fd_tmpfs_detached = -1;
file = fopen("/proc/sys/kernel/core_pattern", "r");
ASSERT_NE(NULL, file);
@@ -60,6 +89,8 @@ FIXTURE_SETUP(coredump)
ASSERT_LT(ret, sizeof(self->original_core_pattern));
self->original_core_pattern[ret] = '\0';
+ self->fd_tmpfs_detached = create_detached_tmpfs();
+ ASSERT_GE(self->fd_tmpfs_detached, 0);
ret = fclose(file);
ASSERT_EQ(0, ret);
@@ -98,6 +129,15 @@ FIXTURE_TEARDOWN(coredump)
goto fail;
}
+ if (self->fd_tmpfs_detached >= 0) {
+ ret = close(self->fd_tmpfs_detached);
+ if (ret < 0) {
+ reason = "Unable to close detached tmpfs";
+ goto fail;
+ }
+ self->fd_tmpfs_detached = -1;
+ }
+
return;
fail:
/* This should never happen */
@@ -106,11 +146,10 @@ fail:
TEST_F_TIMEOUT(coredump, stackdump, 120)
{
- struct sigaction action = {};
unsigned long long stack;
char *test_dir, *line;
size_t line_length;
- char buf[PATH_MAX];
+ char buf[PAGE_SIZE];
int ret, i, status;
FILE *file;
pid_t pid;
@@ -169,153 +208,166 @@ TEST_F_TIMEOUT(coredump, stackdump, 120)
fclose(file);
}
+static int create_and_listen_unix_socket(const char *path)
+{
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ assert(strlen(path) < sizeof(addr.sun_path) - 1);
+ strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
+ size_t addr_len =
+ offsetof(struct sockaddr_un, sun_path) + strlen(path) + 1;
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ goto out;
+
+ ret = bind(fd, (const struct sockaddr *)&addr, addr_len);
+ if (ret < 0)
+ goto out;
+
+ ret = listen(fd, 128);
+ if (ret < 0)
+ goto out;
+
+ return fd;
+
+out:
+ if (fd >= 0)
+ close(fd);
+ return -1;
+}
+
+static bool set_core_pattern(const char *pattern)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open("/proc/sys/kernel/core_pattern", O_WRONLY | O_CLOEXEC);
+ if (fd < 0)
+ return false;
+
+ ret = write(fd, pattern, strlen(pattern));
+ close(fd);
+ if (ret < 0)
+ return false;
+
+ fprintf(stderr, "Set core_pattern to '%s' | %zu == %zu\n", pattern, ret, strlen(pattern));
+ return ret == strlen(pattern);
+}
+
+static int get_peer_pidfd(int fd)
+{
+ int fd_peer_pidfd;
+ socklen_t fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
+ int ret = getsockopt(fd, SOL_SOCKET, SO_PEERPIDFD, &fd_peer_pidfd,
+ &fd_peer_pidfd_len);
+ if (ret < 0) {
+ fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n");
+ return -1;
+ }
+ return fd_peer_pidfd;
+}
+
+static bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info)
+{
+ memset(info, 0, sizeof(*info));
+ info->mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ return ioctl(fd_peer_pidfd, PIDFD_GET_INFO, info) == 0;
+}
+
+static void
+wait_and_check_coredump_server(pid_t pid_coredump_server,
+ struct __test_metadata *const _metadata,
+ FIXTURE_DATA(coredump)* self)
+{
+ int status;
+ waitpid(pid_coredump_server, &status, 0);
+ self->pid_coredump_server = -ESRCH;
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+}
+
TEST_F(coredump, socket)
{
- int fd, pidfd, ret, status;
- FILE *file;
+ int pidfd, ret, status;
pid_t pid, pid_coredump_server;
struct stat st;
- char core_file[PATH_MAX];
struct pidfd_info info = {};
int ipc_sockets[2];
char c;
- const struct sockaddr_un coredump_sk = {
- .sun_family = AF_UNIX,
- .sun_path = "/tmp/coredump.socket",
- };
- size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
- sizeof("/tmp/coredump.socket");
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
ASSERT_EQ(ret, 0);
- file = fopen("/proc/sys/kernel/core_pattern", "w");
- ASSERT_NE(file, NULL);
-
- ret = fprintf(file, "@/tmp/coredump.socket");
- ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
- ASSERT_EQ(fclose(file), 0);
-
pid_coredump_server = fork();
ASSERT_GE(pid_coredump_server, 0);
if (pid_coredump_server == 0) {
- int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file;
- socklen_t fd_peer_pidfd_len;
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
close(ipc_sockets[0]);
- fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
if (fd_server < 0)
- _exit(EXIT_FAILURE);
-
- ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
- if (ret < 0) {
- fprintf(stderr, "Failed to bind coredump socket\n");
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
-
- ret = listen(fd_server, 1);
- if (ret < 0) {
- fprintf(stderr, "Failed to listen on coredump socket\n");
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
+ goto out;
- if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
close(ipc_sockets[1]);
fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0) {
- fprintf(stderr, "Failed to accept coredump socket connection\n");
- close(fd_server);
- _exit(EXIT_FAILURE);
- }
+ if (fd_coredump < 0)
+ goto out;
- fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
- ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD,
- &fd_peer_pidfd, &fd_peer_pidfd_len);
- if (ret < 0) {
- fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n");
- close(fd_coredump);
- close(fd_server);
- _exit(EXIT_FAILURE);
- }
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
- memset(&info, 0, sizeof(info));
- info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
- ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info);
- if (ret < 0) {
- fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
- if (!(info.mask & PIDFD_INFO_COREDUMP)) {
- fprintf(stderr, "Missing coredump information from coredumping task\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
- if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
- fprintf(stderr, "Received connection from non-coredumping task\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
fd_core_file = creat("/tmp/coredump.file", 0644);
- if (fd_core_file < 0) {
- fprintf(stderr, "Failed to create coredump file\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (fd_core_file < 0)
+ goto out;
for (;;) {
char buffer[4096];
ssize_t bytes_read, bytes_write;
bytes_read = read(fd_coredump, buffer, sizeof(buffer));
- if (bytes_read < 0) {
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- close(fd_core_file);
- _exit(EXIT_FAILURE);
- }
+ if (bytes_read < 0)
+ goto out;
if (bytes_read == 0)
break;
bytes_write = write(fd_core_file, buffer, bytes_read);
- if (bytes_read != bytes_write) {
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- close(fd_core_file);
- _exit(EXIT_FAILURE);
- }
+ if (bytes_read != bytes_write)
+ goto out;
}
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- close(fd_core_file);
- _exit(EXIT_SUCCESS);
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
}
self->pid_coredump_server = pid_coredump_server;
@@ -335,48 +387,27 @@ TEST_F(coredump, socket)
ASSERT_TRUE(WIFSIGNALED(status));
ASSERT_TRUE(WCOREDUMP(status));
- info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
- ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
- waitpid(pid_coredump_server, &status, 0);
- self->pid_coredump_server = -ESRCH;
- ASSERT_TRUE(WIFEXITED(status));
- ASSERT_EQ(WEXITSTATUS(status), 0);
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
ASSERT_GT(st.st_size, 0);
- /*
- * We should somehow validate the produced core file.
- * For now just allow for visual inspection
- */
system("file /tmp/coredump.file");
}
TEST_F(coredump, socket_detect_userspace_client)
{
- int fd, pidfd, ret, status;
- FILE *file;
+ int pidfd, ret, status;
pid_t pid, pid_coredump_server;
struct stat st;
- char core_file[PATH_MAX];
struct pidfd_info info = {};
int ipc_sockets[2];
char c;
- const struct sockaddr_un coredump_sk = {
- .sun_family = AF_UNIX,
- .sun_path = "/tmp/coredump.socket",
- };
- size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
- sizeof("/tmp/coredump.socket");
-
- file = fopen("/proc/sys/kernel/core_pattern", "w");
- ASSERT_NE(file, NULL);
- ret = fprintf(file, "@/tmp/coredump.socket");
- ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
- ASSERT_EQ(fclose(file), 0);
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
ASSERT_EQ(ret, 0);
@@ -384,93 +415,49 @@ TEST_F(coredump, socket_detect_userspace_client)
pid_coredump_server = fork();
ASSERT_GE(pid_coredump_server, 0);
if (pid_coredump_server == 0) {
- int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file;
- socklen_t fd_peer_pidfd_len;
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
close(ipc_sockets[0]);
- fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
if (fd_server < 0)
- _exit(EXIT_FAILURE);
-
- ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
- if (ret < 0) {
- fprintf(stderr, "Failed to bind coredump socket\n");
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
+ goto out;
- ret = listen(fd_server, 1);
- if (ret < 0) {
- fprintf(stderr, "Failed to listen on coredump socket\n");
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
close(ipc_sockets[1]);
fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0) {
- fprintf(stderr, "Failed to accept coredump socket connection\n");
- close(fd_server);
- _exit(EXIT_FAILURE);
- }
-
- fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
- ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD,
- &fd_peer_pidfd, &fd_peer_pidfd_len);
- if (ret < 0) {
- fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n");
- close(fd_coredump);
- close(fd_server);
- _exit(EXIT_FAILURE);
- }
+ if (fd_coredump < 0)
+ goto out;
- memset(&info, 0, sizeof(info));
- info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
- ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info);
- if (ret < 0) {
- fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
- if (!(info.mask & PIDFD_INFO_COREDUMP)) {
- fprintf(stderr, "Missing coredump information from coredumping task\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
- if (info.coredump_mask & PIDFD_COREDUMPED) {
- fprintf(stderr, "Received unexpected connection from coredumping task\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
- ret = read(fd_coredump, &c, 1);
+ if (info.coredump_mask & PIDFD_COREDUMPED)
+ goto out;
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- close(fd_core_file);
+ if (read(fd_coredump, &c, 1) < 1)
+ goto out;
- if (ret < 1)
- _exit(EXIT_FAILURE);
- _exit(EXIT_SUCCESS);
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
}
self->pid_coredump_server = pid_coredump_server;
@@ -483,17 +470,22 @@ TEST_F(coredump, socket_detect_userspace_client)
if (pid == 0) {
int fd_socket;
ssize_t ret;
+ const struct sockaddr_un coredump_sk = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/tmp/coredump.socket",
+ };
+ size_t coredump_sk_len =
+ offsetof(struct sockaddr_un, sun_path) +
+ sizeof("/tmp/coredump.socket");
fd_socket = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd_socket < 0)
_exit(EXIT_FAILURE);
-
ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
if (ret < 0)
_exit(EXIT_FAILURE);
- (void *)write(fd_socket, &(char){ 0 }, 1);
close(fd_socket);
_exit(EXIT_SUCCESS);
}
@@ -505,15 +497,11 @@ TEST_F(coredump, socket_detect_userspace_client)
ASSERT_TRUE(WIFEXITED(status));
ASSERT_EQ(WEXITSTATUS(status), 0);
- info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
- ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0);
- waitpid(pid_coredump_server, &status, 0);
- self->pid_coredump_server = -ESRCH;
- ASSERT_TRUE(WIFEXITED(status));
- ASSERT_EQ(WEXITSTATUS(status), 0);
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
ASSERT_NE(stat("/tmp/coredump.file", &st), 0);
ASSERT_EQ(errno, ENOENT);
@@ -521,17 +509,10 @@ TEST_F(coredump, socket_detect_userspace_client)
TEST_F(coredump, socket_enoent)
{
- int pidfd, ret, status;
- FILE *file;
+ int pidfd, status;
pid_t pid;
- char core_file[PATH_MAX];
- file = fopen("/proc/sys/kernel/core_pattern", "w");
- ASSERT_NE(file, NULL);
-
- ret = fprintf(file, "@/tmp/coredump.socket");
- ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
- ASSERT_EQ(fclose(file), 0);
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
pid = fork();
ASSERT_GE(pid, 0);
@@ -549,7 +530,6 @@ TEST_F(coredump, socket_enoent)
TEST_F(coredump, socket_no_listener)
{
int pidfd, ret, status;
- FILE *file;
pid_t pid, pid_coredump_server;
int ipc_sockets[2];
char c;
@@ -560,45 +540,616 @@ TEST_F(coredump, socket_no_listener)
size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
sizeof("/tmp/coredump.socket");
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
ASSERT_EQ(ret, 0);
- file = fopen("/proc/sys/kernel/core_pattern", "w");
- ASSERT_NE(file, NULL);
-
- ret = fprintf(file, "@/tmp/coredump.socket");
- ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
- ASSERT_EQ(fclose(file), 0);
-
pid_coredump_server = fork();
ASSERT_GE(pid_coredump_server, 0);
if (pid_coredump_server == 0) {
- int fd_server;
- socklen_t fd_peer_pidfd_len;
+ int fd_server = -1;
+ int exit_code = EXIT_FAILURE;
close(ipc_sockets[0]);
fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
if (fd_server < 0)
- _exit(EXIT_FAILURE);
+ goto out;
ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
- if (ret < 0) {
- fprintf(stderr, "Failed to bind coredump socket\n");
+ if (ret < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_server >= 0)
close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
+ close(ipc_sockets[1]);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+static ssize_t recv_marker(int fd)
+{
+ enum coredump_mark mark = COREDUMP_MARK_REQACK;
+ ssize_t ret;
+
+ ret = recv(fd, &mark, sizeof(mark), MSG_WAITALL);
+ if (ret != sizeof(mark))
+ return -1;
+
+ switch (mark) {
+ case COREDUMP_MARK_REQACK:
+ fprintf(stderr, "Received marker: ReqAck\n");
+ return COREDUMP_MARK_REQACK;
+ case COREDUMP_MARK_MINSIZE:
+ fprintf(stderr, "Received marker: MinSize\n");
+ return COREDUMP_MARK_MINSIZE;
+ case COREDUMP_MARK_MAXSIZE:
+ fprintf(stderr, "Received marker: MaxSize\n");
+ return COREDUMP_MARK_MAXSIZE;
+ case COREDUMP_MARK_UNSUPPORTED:
+ fprintf(stderr, "Received marker: Unsupported\n");
+ return COREDUMP_MARK_UNSUPPORTED;
+ case COREDUMP_MARK_CONFLICTING:
+ fprintf(stderr, "Received marker: Conflicting\n");
+ return COREDUMP_MARK_CONFLICTING;
+ default:
+ fprintf(stderr, "Received unknown marker: %u\n", mark);
+ break;
+ }
+ return -1;
+}
+
+static bool read_marker(int fd, enum coredump_mark mark)
+{
+ ssize_t ret;
+
+ ret = recv_marker(fd);
+ if (ret < 0)
+ return false;
+ return ret == mark;
+}
+
+static bool read_coredump_req(int fd, struct coredump_req *req)
+{
+ ssize_t ret;
+ size_t field_size, user_size, ack_size, kernel_size, remaining_size;
+
+ memset(req, 0, sizeof(*req));
+ field_size = sizeof(req->size);
+
+ /* Peek the size of the coredump request. */
+ ret = recv(fd, req, field_size, MSG_PEEK | MSG_WAITALL);
+ if (ret != field_size)
+ return false;
+ kernel_size = req->size;
+
+ if (kernel_size < COREDUMP_ACK_SIZE_VER0)
+ return false;
+ if (kernel_size >= PAGE_SIZE)
+ return false;
+
+ /* Use the minimum of user and kernel size to read the full request. */
+ user_size = sizeof(struct coredump_req);
+ ack_size = user_size < kernel_size ? user_size : kernel_size;
+ ret = recv(fd, req, ack_size, MSG_WAITALL);
+ if (ret != ack_size)
+ return false;
+
+ fprintf(stderr, "Read coredump request with size %u and mask 0x%llx\n",
+ req->size, (unsigned long long)req->mask);
+
+ if (user_size > kernel_size)
+ remaining_size = user_size - kernel_size;
+ else
+ remaining_size = kernel_size - user_size;
+
+ if (PAGE_SIZE <= remaining_size)
+ return false;
+
+ /*
+ * Discard any additional data if the kernel's request was larger than
+ * what we knew about or cared about.
+ */
+ if (remaining_size) {
+ char buffer[PAGE_SIZE];
+
+ ret = recv(fd, buffer, sizeof(buffer), MSG_WAITALL);
+ if (ret != remaining_size)
+ return false;
+ fprintf(stderr, "Discarded %zu bytes of data after coredump request\n", remaining_size);
+ }
+
+ return true;
+}
+
+static bool send_coredump_ack(int fd, const struct coredump_req *req,
+ __u64 mask, size_t size_ack)
+{
+ ssize_t ret;
+ /*
+ * Wrap struct coredump_ack in a larger struct so we can
+ * simulate sending to much data to the kernel.
+ */
+ struct large_ack_for_size_testing {
+ struct coredump_ack ack;
+ char buffer[PAGE_SIZE];
+ } large_ack = {};
+
+ if (!size_ack)
+ size_ack = sizeof(struct coredump_ack) < req->size_ack ?
+ sizeof(struct coredump_ack) :
+ req->size_ack;
+ large_ack.ack.mask = mask;
+ large_ack.ack.size = size_ack;
+ ret = send(fd, &large_ack, size_ack, MSG_NOSIGNAL);
+ if (ret != size_ack)
+ return false;
+
+ fprintf(stderr, "Sent coredump ack with size %zu and mask 0x%llx\n",
+ size_ack, (unsigned long long)mask);
+ return true;
+}
+
+static bool check_coredump_req(const struct coredump_req *req, size_t min_size,
+ __u64 required_mask)
+{
+ if (req->size < min_size)
+ return false;
+ if ((req->mask & required_mask) != required_mask)
+ return false;
+ if (req->mask & ~required_mask)
+ return false;
+ return true;
+}
+
+TEST_F(coredump, socket_request_kernel)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_core_file = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ fd_core_file = creat("/tmp/coredump.file", 0644);
+ if (fd_core_file < 0)
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_WAIT, 0))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
+ goto out;
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0)
+ goto out;
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write)
+ goto out;
}
- if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+
+ ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_GT(st.st_size, 0);
+ system("file /tmp/coredump.file");
+}
+
+TEST_F(coredump, socket_request_userspace)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_USERSPACE | COREDUMP_WAIT, 0))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
+ goto out;
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read > 0)
+ goto out;
+
+ if (bytes_read < 0)
+ goto out;
+
+ if (bytes_read == 0)
+ break;
+ }
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_reject)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT, 0))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
+ goto out;
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read > 0)
+ goto out;
+
+ if (bytes_read < 0)
+ goto out;
+
+ if (bytes_read == 0)
+ break;
}
- close(fd_server);
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_flag_combination)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
close(ipc_sockets[1]);
- _exit(EXIT_SUCCESS);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_REJECT | COREDUMP_WAIT, 0))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_CONFLICTING))
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
}
self->pid_coredump_server = pid_coredump_server;
@@ -618,10 +1169,662 @@ TEST_F(coredump, socket_no_listener)
ASSERT_TRUE(WIFSIGNALED(status));
ASSERT_FALSE(WCOREDUMP(status));
- waitpid(pid_coredump_server, &status, 0);
- self->pid_coredump_server = -ESRCH;
- ASSERT_TRUE(WIFEXITED(status));
- ASSERT_EQ(WEXITSTATUS(status), 0);
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_unknown_flag)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req, (1ULL << 63), 0))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_UNSUPPORTED))
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_size_small)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT,
+ COREDUMP_ACK_SIZE_VER0 / 2))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_MINSIZE))
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_size_large)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT,
+ COREDUMP_ACK_SIZE_VER0 + PAGE_SIZE))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_MAXSIZE))
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+static int open_coredump_tmpfile(int fd_tmpfs_detached)
+{
+ return openat(fd_tmpfs_detached, ".", O_TMPFILE | O_RDWR | O_EXCL, 0600);
+}
+
+#define NUM_CRASHING_COREDUMPS 5
+
+TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps, 500)
+{
+ int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
+ pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
+ struct coredump_req req = {};
+
+ close(ipc_sockets[0]);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "Failed to create and listen on unix socket\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "Failed to notify parent via ipc socket\n");
+ goto out;
+ }
+ close(ipc_sockets[1]);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "get_peer_pidfd failed for fd %d: %m\n", fd_coredump);
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "get_pidfd_info failed for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "pidfd info missing PIDFD_INFO_COREDUMP for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "pidfd info missing PIDFD_COREDUMPED for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "read_coredump_req failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "check_coredump_req failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "send_coredump_ack failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "read_marker failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "%m - open_coredump_tmpfile failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "read failed for fd %d: %m\n", fd_coredump);
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ fprintf(stderr, "write failed for fd %d: %m\n", fd_core_file);
+ goto out;
+ }
+ }
+
+ close(fd_core_file);
+ close(fd_peer_pidfd);
+ close(fd_coredump);
+ fd_peer_pidfd = -1;
+ fd_coredump = -1;
+ }
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ pid[i] = fork();
+ ASSERT_GE(pid[i], 0);
+ if (pid[i] == 0)
+ crashing_child();
+ pidfd[i] = sys_pidfd_open(pid[i], 0);
+ ASSERT_GE(pidfd[i], 0);
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ waitpid(pid[i], &status[i], 0);
+ ASSERT_TRUE(WIFSIGNALED(status[i]));
+ ASSERT_TRUE(WCOREDUMP(status[i]));
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+ }
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+#define MAX_EVENTS 128
+
+static void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file)
+{
+ int epfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ epfd = epoll_create1(0);
+ if (epfd < 0)
+ goto out;
+
+ struct epoll_event ev;
+ ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET;
+ ev.data.fd = fd_coredump;
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd_coredump, &ev) < 0)
+ goto out;
+
+ for (;;) {
+ struct epoll_event events[1];
+ int n = epoll_wait(epfd, events, 1, -1);
+ if (n < 0)
+ break;
+
+ if (events[0].events & (EPOLLIN | EPOLLRDHUP)) {
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ break;
+ goto out;
+ }
+ if (bytes_read == 0)
+ goto done;
+ ssize_t bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_write != bytes_read)
+ goto out;
+ }
+ }
+ }
+
+done:
+ exit_code = EXIT_SUCCESS;
+out:
+ if (epfd >= 0)
+ close(epfd);
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ _exit(exit_code);
+}
+
+TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps_epoll_workers, 500)
+{
+ int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
+ pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server, worker_pids[NUM_CRASHING_COREDUMPS];
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, exit_code = EXIT_FAILURE, n_conns = 0;
+ fd_server = -1;
+ exit_code = EXIT_FAILURE;
+ n_conns = 0;
+ close(ipc_sockets[0]);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+ close(ipc_sockets[1]);
+
+ while (n_conns < NUM_CRASHING_COREDUMPS) {
+ int fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ struct coredump_req req = {};
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ continue;
+ goto out;
+ }
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+ if (!(info.mask & PIDFD_INFO_COREDUMP) || !(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+ if (!send_coredump_ack(fd_coredump, &req, COREDUMP_KERNEL | COREDUMP_WAIT, 0))
+ goto out;
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
+ goto out;
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0)
+ goto out;
+ pid_t worker = fork();
+ if (worker == 0) {
+ close(fd_server);
+ process_coredump_worker(fd_coredump, fd_peer_pidfd, fd_core_file);
+ }
+ worker_pids[n_conns] = worker;
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ n_conns++;
+ }
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_server >= 0)
+ close(fd_server);
+
+ // Reap all worker processes
+ for (int i = 0; i < n_conns; i++) {
+ int wstatus;
+ if (waitpid(worker_pids[i], &wstatus, 0) < 0) {
+ fprintf(stderr, "Failed to wait for worker %d: %m\n", worker_pids[i]);
+ } else if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != EXIT_SUCCESS) {
+ fprintf(stderr, "Worker %d exited with error code %d\n", worker_pids[i], WEXITSTATUS(wstatus));
+ exit_code = EXIT_FAILURE;
+ }
+ }
+
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ pid[i] = fork();
+ ASSERT_GE(pid[i], 0);
+ if (pid[i] == 0)
+ crashing_child();
+ pidfd[i] = sys_pidfd_open(pid[i], 0);
+ ASSERT_GE(pidfd[i], 0);
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ ASSERT_GE(waitpid(pid[i], &status[i], 0), 0);
+ ASSERT_TRUE(WIFSIGNALED(status[i]));
+ ASSERT_TRUE(WCOREDUMP(status[i]));
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+ }
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_invalid_paths)
+{
+ ASSERT_FALSE(set_core_pattern("@ /tmp/coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@/tmp/../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@/tmp/coredump.socket/.."));
+ ASSERT_FALSE(set_core_pattern("@.."));
+
+ ASSERT_FALSE(set_core_pattern("@@ /tmp/coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@/tmp/../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@/tmp/coredump.socket/.."));
+ ASSERT_FALSE(set_core_pattern("@@.."));
+
+ ASSERT_FALSE(set_core_pattern("@@@/tmp/coredump.socket"));
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
index d5dc7e0dc726..6232a46ca6e1 100755
--- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
+++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
@@ -67,7 +67,7 @@ hotpluggable_cpus()
done
}
-hotplaggable_offline_cpus()
+hotpluggable_offline_cpus()
{
hotpluggable_cpus 0
}
@@ -151,7 +151,7 @@ offline_cpu_expect_fail()
online_all_hot_pluggable_cpus()
{
- for cpu in `hotplaggable_offline_cpus`; do
+ for cpu in `hotpluggable_offline_cpus`; do
online_cpu_expect_success $cpu
done
}
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index be780bcb73a3..3556f3563e08 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -12,14 +12,17 @@ TEST_GEN_FILES := \
TEST_PROGS := \
napi_id.py \
netcons_basic.sh \
+ netcons_cmdline.sh \
netcons_fragmented_msg.sh \
netcons_overflow.sh \
netcons_sysdata.sh \
+ netpoll_basic.py \
ping.py \
queues.py \
stats.py \
shaper.py \
hds.py \
+ xdp.py \
# end of TEST_PROGS
include ../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index df2c047ffa90..fdc97355588c 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -16,6 +16,7 @@ TEST_PROGS = \
irq.py \
loopback.sh \
pp_alloc_fail.py \
+ rss_api.py \
rss_ctx.py \
rss_input_xfrm.py \
tso.py \
diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
new file mode 100755
index 000000000000..ead6784d1910
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
@@ -0,0 +1,465 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Devlink Rate TC Bandwidth Test Suite
+===================================
+
+This test suite verifies the functionality of devlink-rate traffic class (TC)
+bandwidth distribution in a virtualized environment. The tests validate that
+bandwidth can be properly allocated between different traffic classes and
+that TC mapping works as expected.
+
+Test Environment:
+----------------
+- Creates 1 VF
+- Establishes a bridge connecting the VF representor and the uplink representor
+- Sets up 2 VLAN interfaces on the VF with different VLAN IDs (101, 102)
+- Configures different traffic classes (TC3 and TC4) for each VLAN
+
+Test Cases:
+----------
+1. test_no_tc_mapping_bandwidth:
+ - Verifies that without TC mapping, bandwidth is NOT distributed according to
+ the configured 80/20 split between TC4 and TC3
+ - This test should fail if bandwidth matches the 80/20 split without TC
+ mapping
+ - Expected: Bandwidth should NOT be distributed as 80/20
+
+2. test_tc_mapping_bandwidth:
+ - Configures TC mapping using mqprio qdisc
+ - Verifies that with TC mapping, bandwidth IS distributed according to the
+ configured 80/20 split between TC3 and TC4
+ - Expected: Bandwidth should be distributed as 80/20
+
+Bandwidth Distribution:
+----------------------
+- TC3 (VLAN 101): Configured for 80% of total bandwidth
+- TC4 (VLAN 102): Configured for 20% of total bandwidth
+- Total bandwidth: 1Gbps
+- Tolerance: +-12%
+
+Hardware-Specific Behavior (mlx5):
+--------------------------
+mlx5 hardware enforces traffic class separation by ensuring that each transmit
+queue (SQ) is associated with a single TC. If a packet is sent on a queue that
+doesn't match the expected TC (based on DSCP or VLAN priority and hypervisor-set
+mapping), the hardware moves the queue to the correct TC scheduler to preserve
+traffic isolation.
+
+This behavior means that even without explicit TC-to-queue mapping, bandwidth
+enforcement may still appear to work—because the hardware dynamically adjusts
+the scheduling context. However, this can lead to performance issues in high
+rates and HOL blocking if traffic from different TCs is mixed on the same queue.
+"""
+
+import json
+import os
+import subprocess
+import threading
+import time
+
+from lib.py import ksft_pr, ksft_run, ksft_exit
+from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+from lib.py import NetDrvEpEnv, DevlinkFamily
+from lib.py import NlError
+from lib.py import cmd, defer, ethtool, ip
+
+
+class BandwidthValidator:
+ """
+ Validates bandwidth totals and per-TC shares against expected values
+ with a tolerance.
+ """
+
+ def __init__(self):
+ self.tolerance_percent = 12
+ self.expected_total_gbps = 1.0
+ self.total_min_expected = self.min_expected(self.expected_total_gbps)
+ self.total_max_expected = self.max_expected(self.expected_total_gbps)
+ self.tc_expected_percent = {
+ 3: 20.0,
+ 4: 80.0,
+ }
+
+ def min_expected(self, value):
+ """Calculates the minimum acceptable value based on tolerance."""
+ return value - (value * self.tolerance_percent / 100)
+
+ def max_expected(self, value):
+ """Calculates the maximum acceptable value based on tolerance."""
+ return value + (value * self.tolerance_percent / 100)
+
+ def bound(self, expected, value):
+ """Returns True if value is within expected tolerance."""
+ return self.min_expected(expected) <= value <= self.max_expected(expected)
+
+ def tc_bandwidth_bound(self, value, tc_ix):
+ """
+ Returns True if the given bandwidth value is within tolerance
+ for the TC's expected bandwidth.
+ """
+ expected = self.tc_expected_percent[tc_ix]
+ return self.bound(expected, value)
+
+
+def setup_vf(cfg, set_tc_mapping=True):
+ """
+ Sets up a VF on the given network interface.
+
+ Enables SR-IOV and switchdev mode, brings the VF interface up,
+ and optionally configures TC mapping using mqprio.
+ """
+ try:
+ cmd(f"devlink dev eswitch set pci/{cfg.pci} mode switchdev")
+ defer(cmd, f"devlink dev eswitch set pci/{cfg.pci} mode legacy")
+ except Exception as exc:
+ raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc
+ try:
+ cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs")
+ defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs")
+ except Exception as exc:
+ raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc
+
+ time.sleep(2)
+ vf_ifc = (os.listdir(
+ f"/sys/class/net/{cfg.ifname}/device/virtfn0/net") or [None])[0]
+ if vf_ifc:
+ ip(f"link set dev {vf_ifc} up")
+ else:
+ raise KsftSkipEx("VF interface not found")
+ if set_tc_mapping:
+ cmd(f"tc qdisc add dev {vf_ifc} root handle 5 mqprio mode dcb hw 1 num_tc 8")
+
+ return vf_ifc
+
+
+def setup_vlans_on_vf(vf_ifc):
+ """
+ Sets up two VLAN interfaces on the given VF, each mapped to a different TC.
+ """
+ vlan_configs = [
+ {"vlan_id": 101, "tc": 3, "ip": "198.51.100.2"},
+ {"vlan_id": 102, "tc": 4, "ip": "198.51.100.10"},
+ ]
+
+ for config in vlan_configs:
+ vlan_dev = f"{vf_ifc}.{config['vlan_id']}"
+ ip(f"link add link {vf_ifc} name {vlan_dev} type vlan id {config['vlan_id']}")
+ ip(f"addr add {config['ip']}/29 dev {vlan_dev}")
+ ip(f"link set dev {vlan_dev} up")
+ ip(f"link set dev {vlan_dev} type vlan egress-qos-map 0:{config['tc']}")
+ ksft_pr(f"Created VLAN {vlan_dev} on {vf_ifc} with tc {config['tc']} and IP {config['ip']}")
+
+
+def get_vf_info(cfg):
+ """
+ Finds the VF representor interface and devlink port index
+ for the given PCI device used in the test environment.
+ """
+ cfg.vf_representor = None
+ cfg.vf_port_index = None
+ out = subprocess.check_output(["devlink", "-j", "port", "show"], encoding="utf-8")
+ ports = json.loads(out)["port"]
+
+ for port_name, props in ports.items():
+ netdev = props.get("netdev")
+
+ if (port_name.startswith(f"pci/{cfg.pci}/") and
+ props.get("vfnum") == 0):
+ cfg.vf_representor = netdev
+ cfg.vf_port_index = int(port_name.split("/")[-1])
+ break
+
+
+def setup_bridge(cfg):
+ """
+ Creates and configures a Linux bridge, with both the uplink
+ and VF representor interfaces attached to it.
+ """
+ bridge_name = f"br_{os.getpid()}"
+ ip(f"link add name {bridge_name} type bridge")
+ defer(cmd, f"ip link del name {bridge_name} type bridge")
+
+ ip(f"link set dev {cfg.ifname} master {bridge_name}")
+
+ rep_name = cfg.vf_representor
+ if rep_name:
+ ip(f"link set dev {rep_name} master {bridge_name}")
+ ip(f"link set dev {rep_name} up")
+ ksft_pr(f"Set representor {rep_name} up and added to bridge")
+ else:
+ raise KsftSkipEx("Could not find representor for the VF")
+
+ ip(f"link set dev {bridge_name} up")
+
+
+def setup_devlink_rate(cfg):
+ """
+ Configures devlink rate tx_max and traffic class bandwidth for the VF.
+ """
+ port_index = cfg.vf_port_index
+ if port_index is None:
+ raise KsftSkipEx("Could not find VF port index")
+ try:
+ cfg.devnl.rate_set({
+ "bus-name": "pci",
+ "dev-name": cfg.pci,
+ "port-index": port_index,
+ "rate-tx-max": 125000000,
+ "rate-tc-bws": [
+ {"index": 0, "bw": 0},
+ {"index": 1, "bw": 0},
+ {"index": 2, "bw": 0},
+ {"index": 3, "bw": 20},
+ {"index": 4, "bw": 80},
+ {"index": 5, "bw": 0},
+ {"index": 6, "bw": 0},
+ {"index": 7, "bw": 0},
+ ]
+ })
+ except NlError as exc:
+ if exc.error == 95: # EOPNOTSUPP
+ raise KsftSkipEx("devlink rate configuration is not supported on the VF") from exc
+ raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc
+
+
+def setup_remote_server(cfg):
+ """
+ Sets up VLAN interfaces and starts iperf3 servers on the remote side.
+ """
+ remote_dev = cfg.remote_ifname
+ vlan_ids = [101, 102]
+ remote_ips = ["198.51.100.1", "198.51.100.9"]
+
+ for vlan_id, ip_addr in zip(vlan_ids, remote_ips):
+ vlan_dev = f"{remote_dev}.{vlan_id}"
+ cmd(f"ip link add link {remote_dev} name {vlan_dev} "
+ f"type vlan id {vlan_id}", host=cfg.remote)
+ cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote)
+ cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote)
+ cmd(f"iperf3 -s -1 -B {ip_addr}",background=True, host=cfg.remote)
+ defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote)
+
+
+def setup_test_environment(cfg, set_tc_mapping=True):
+ """
+ Sets up the complete test environment including VF creation, VLANs,
+ bridge configuration, devlink rate setup, and the remote server.
+ """
+ vf_ifc = setup_vf(cfg, set_tc_mapping)
+ ksft_pr(f"Created VF interface: {vf_ifc}")
+
+ setup_vlans_on_vf(vf_ifc)
+
+ get_vf_info(cfg)
+ setup_bridge(cfg)
+
+ setup_devlink_rate(cfg)
+ setup_remote_server(cfg)
+ time.sleep(2)
+
+
+def run_iperf_client(server_ip, local_ip, barrier, min_expected_gbps=0.1):
+ """
+ Runs a single iperf3 client instance, binding to the given local IP.
+ Waits on a barrier to synchronize with other threads.
+ """
+ try:
+ barrier.wait(timeout=10)
+ except Exception as exc:
+ raise KsftFailEx("iperf3 barrier wait timed") from exc
+
+ iperf_cmd = ["iperf3", "-c", server_ip, "-B", local_ip, "-J"]
+ result = subprocess.run(iperf_cmd, capture_output=True, text=True,
+ check=True)
+
+ try:
+ output = json.loads(result.stdout)
+ bits_per_second = output["end"]["sum_received"]["bits_per_second"]
+ gbps = bits_per_second / 1e9
+ if gbps < min_expected_gbps:
+ ksft_pr(
+ f"iperf3 bandwidth too low: {gbps:.2f} Gbps "
+ f"(expected ≥ {min_expected_gbps} Gbps)"
+ )
+ return None
+ return gbps
+ except json.JSONDecodeError as exc:
+ ksft_pr(f"Failed to parse iperf3 JSON output: {exc}")
+ return None
+
+
+def run_bandwidth_test():
+ """
+ Launches iperf3 client threads for each VLAN/TC pair and collects results.
+ """
+ def _run_iperf_client_thread(server_ip, local_ip, results, barrier, tc_ix):
+ results[tc_ix] = run_iperf_client(server_ip, local_ip, barrier)
+
+ vf_vlan_data = [
+ # (local_ip, remote_ip, TC)
+ ("198.51.100.2", "198.51.100.1", 3),
+ ("198.51.100.10", "198.51.100.9", 4),
+ ]
+
+ results = {}
+ threads = []
+ start_barrier = threading.Barrier(len(vf_vlan_data))
+
+ for local_ip, remote_ip, tc_ix in vf_vlan_data:
+ thread = threading.Thread(
+ target=_run_iperf_client_thread,
+ args=(remote_ip, local_ip, results, start_barrier, tc_ix)
+ )
+ thread.start()
+ threads.append(thread)
+
+ for thread in threads:
+ thread.join()
+
+ for tc_ix, tc_bw in results.items():
+ if tc_bw is None:
+ raise KsftFailEx("iperf3 client failed; cannot evaluate bandwidth")
+
+ return results
+
+def calculate_bandwidth_percentages(results):
+ """
+ Calculates the percentage of total bandwidth received by TC3 and TC4.
+ """
+ if 3 not in results or 4 not in results:
+ raise KsftFailEx(f"Missing expected TC results in {results}")
+
+ tc3_bw = results[3]
+ tc4_bw = results[4]
+ total_bw = tc3_bw + tc4_bw
+ tc3_percentage = (tc3_bw / total_bw) * 100
+ tc4_percentage = (tc4_bw / total_bw) * 100
+
+ return {
+ 'tc3_bw': tc3_bw,
+ 'tc4_bw': tc4_bw,
+ 'tc3_percentage': tc3_percentage,
+ 'tc4_percentage': tc4_percentage,
+ 'total_bw': total_bw
+ }
+
+
+def print_bandwidth_results(bw_data, test_name):
+ """
+ Prints bandwidth measurements and TC usage summary for a given test.
+ """
+ ksft_pr(f"Bandwidth check results {test_name}:")
+ ksft_pr(f"TC 3: {bw_data['tc3_bw']:.2f} Gbits/sec")
+ ksft_pr(f"TC 4: {bw_data['tc4_bw']:.2f} Gbits/sec")
+ ksft_pr(f"Total bandwidth: {bw_data['total_bw']:.2f} Gbits/sec")
+ ksft_pr(f"TC 3 percentage: {bw_data['tc3_percentage']:.1f}%")
+ ksft_pr(f"TC 4 percentage: {bw_data['tc4_percentage']:.1f}%")
+
+
+def verify_total_bandwidth(bw_data, validator):
+ """
+ Ensures the total measured bandwidth falls within the acceptable tolerance.
+ """
+ total = bw_data['total_bw']
+
+ if validator.bound(validator.expected_total_gbps, total):
+ return
+
+ if total < validator.total_min_expected:
+ raise KsftSkipEx(
+ f"Total bandwidth {total:.2f} Gbps < minimum "
+ f"{validator.total_min_expected:.2f} Gbps; "
+ f"parent tx_max ({validator.expected_total_gbps:.1f} G) "
+ f"not reached, cannot validate share"
+ )
+
+ raise KsftFailEx(
+ f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling "
+ f"{validator.total_max_expected:.2f} Gbps "
+ f"(VF tx_max set to {validator.expected_total_gbps:.1f} G)"
+ )
+
+
+def check_bandwidth_distribution(bw_data, validator):
+ """
+ Checks whether the measured TC3 and TC4 bandwidth percentages
+ fall within their expected tolerance ranges.
+
+ Returns:
+ bool: True if both TC3 and TC4 percentages are within bounds.
+ """
+ tc3_valid = validator.tc_bandwidth_bound(bw_data['tc3_percentage'], 3)
+ tc4_valid = validator.tc_bandwidth_bound(bw_data['tc4_percentage'], 4)
+
+ return tc3_valid and tc4_valid
+
+
+def run_bandwidth_distribution_test(cfg, set_tc_mapping):
+ """
+ Runs parallel iperf3 tests for both TCs and collects results.
+ """
+ setup_test_environment(cfg, set_tc_mapping)
+ bandwidths = run_bandwidth_test()
+ bw_data = calculate_bandwidth_percentages(bandwidths)
+ test_name = "with TC mapping" if set_tc_mapping else "without TC mapping"
+ print_bandwidth_results(bw_data, test_name)
+
+ verify_total_bandwidth(bw_data, cfg.bw_validator)
+
+ return check_bandwidth_distribution(bw_data, cfg.bw_validator)
+
+
+def test_no_tc_mapping_bandwidth(cfg):
+ """
+ Verifies that bandwidth is not split 80/20 without traffic class mapping.
+ """
+ pass_bw_msg = "Bandwidth is NOT distributed as 80/20 without TC mapping"
+ fail_bw_msg = "Bandwidth matched 80/20 split without TC mapping"
+ is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout
+
+ if run_bandwidth_distribution_test(cfg, set_tc_mapping=False):
+ if is_mlx5:
+ raise KsftXfailEx(fail_bw_msg)
+ raise KsftFailEx(fail_bw_msg)
+ if is_mlx5:
+ raise KsftFailEx("mlx5 behavior changed:" + pass_bw_msg)
+ ksft_pr(pass_bw_msg)
+
+
+def test_tc_mapping_bandwidth(cfg):
+ """
+ Verifies that bandwidth is correctly split 80/20 between TC3 and TC4
+ when traffic class mapping is set.
+ """
+ if run_bandwidth_distribution_test(cfg, set_tc_mapping=True):
+ ksft_pr("Bandwidth is distributed as 80/20 with TC mapping")
+ else:
+ raise KsftFailEx("Bandwidth did not match 80/20 split with TC mapping")
+
+
+def main() -> None:
+ """
+ Main entry point for running the test cases.
+ """
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.devnl = DevlinkFamily()
+
+ cfg.pci = os.path.basename(
+ os.path.realpath(f"/sys/class/net/{cfg.ifname}/device")
+ )
+ if not cfg.pci:
+ raise KsftSkipEx("Could not get PCI address of the interface")
+ cfg.require_cmd("iperf3", local=True, remote=True)
+
+ cfg.bw_validator = BandwidthValidator()
+
+ cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth]
+
+ ksft_run(cases=cases, args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py
index 7947650210a0..baa2f24240ba 100755
--- a/tools/testing/selftests/drivers/net/hw/devmem.py
+++ b/tools/testing/selftests/drivers/net/hw/devmem.py
@@ -51,15 +51,14 @@ def check_tx(cfg) -> None:
@ksft_disruptive
def check_tx_chunks(cfg) -> None:
- cfg.require_ipver("6")
require_devmem(cfg)
port = rand_port()
- listen_cmd = f"socat -U - TCP6-LISTEN:{port}"
+ listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
with bkg(listen_cmd, exit_wait=True) as socat:
wait_port_listen(port)
- cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port} -z 3", host=cfg.remote, shell=True)
+ cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port} -z 3", host=cfg.remote, shell=True)
ksft_eq(socat.stdout.strip(), "hello\nworld")
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
index 9c03fd777f3d..712c806508b5 100755
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -3,37 +3,37 @@
import re
from os import path
-from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_run, ksft_exit, KsftSkipEx
from lib.py import NetDrvEpEnv
from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
def _get_current_settings(cfg):
- output = ethtool(f"-g {cfg.ifname}", json=True, host=cfg.remote)[0]
+ output = ethtool(f"-g {cfg.ifname}", json=True)[0]
return (output['rx'], output['hds-thresh'])
def _get_combined_channels(cfg):
- output = ethtool(f"-l {cfg.ifname}", host=cfg.remote).stdout
+ output = ethtool(f"-l {cfg.ifname}").stdout
values = re.findall(r'Combined:\s+(\d+)', output)
return int(values[1])
def _create_rss_ctx(cfg, chan):
- output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1", host=cfg.remote).stdout
+ output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1").stdout
values = re.search(r'New RSS context is (\d+)', output).group(1)
ctx_id = int(values)
- return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}", host=cfg.remote))
+ return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}"))
def _set_flow_rule(cfg, port, chan):
- output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}", host=cfg.remote).stdout
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}").stdout
values = re.search(r'ID (\d+)', output).group(1)
return int(values)
def _set_flow_rule_rss(cfg, port, ctx_id):
- output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}", host=cfg.remote).stdout
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}").stdout
values = re.search(r'ID (\d+)', output).group(1)
return int(values)
@@ -47,26 +47,26 @@ def test_zcrx(cfg) -> None:
(rx_ring, hds_thresh) = _get_current_settings(cfg)
port = rand_port()
- ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
- ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
- ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
- defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
- defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
- rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840"
- with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
- wait_port_listen(port, proto="tcp", host=cfg.remote)
- cmd(tx_cmd)
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
def test_zcrx_oneshot(cfg) -> None:
@@ -78,26 +78,26 @@ def test_zcrx_oneshot(cfg) -> None:
(rx_ring, hds_thresh) = _get_current_settings(cfg)
port = rand_port()
- ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
- ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
- ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
- defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
- defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
- rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 4096 -z 16384"
- with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
- wait_port_listen(port, proto="tcp", host=cfg.remote)
- cmd(tx_cmd)
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 4096 -z 16384"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
def test_zcrx_rss(cfg) -> None:
@@ -109,27 +109,27 @@ def test_zcrx_rss(cfg) -> None:
(rx_ring, hds_thresh) = _get_current_settings(cfg)
port = rand_port()
- ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
- ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
- ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
- defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
(ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1)
flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id)
- defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
- rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840"
- with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
- wait_port_listen(port, proto="tcp", host=cfg.remote)
- cmd(tx_cmd)
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
def main() -> None:
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
index b582885786f5..1462a339a74b 100644
--- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -7,8 +7,25 @@ KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve()
try:
sys.path.append(KSFT_DIR.as_posix())
+
from net.lib.py import *
from drivers.net.lib.py import *
+
+ # Import one by one to avoid pylint false positives
+ from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
+ NlError, RtnlFamily, DevlinkFamily
+ from net.lib.py import CmdExitFailure
+ from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \
+ rand_port, tool, wait_port_listen
+ from net.lib.py import fd_read_timeout
+ from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+ from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
+ ksft_setup
+ from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
+ ksft_ne, ksft_not_in, ksft_raises, ksft_true
+ from net.lib.py import NetNSEnter
+ from drivers.net.lib.py import GenerateTraffic
+ from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv
except ModuleNotFoundError as e:
ksft_pr("Failed importing `net` library from kernel sources")
ksft_pr(str(e))
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 02e4d3d7ded2..72f828021f83 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -526,12 +526,10 @@ static struct netdev_queue_id *create_queues(void)
struct netdev_queue_id *queues;
size_t i = 0;
- queues = calloc(num_queues, sizeof(*queues));
+ queues = netdev_queue_id_alloc(num_queues);
for (i = 0; i < num_queues; i++) {
- queues[i]._present.type = 1;
- queues[i]._present.id = 1;
- queues[i].type = NETDEV_QUEUE_TYPE_RX;
- queues[i].id = start_queue + i;
+ netdev_queue_id_set_type(&queues[i], NETDEV_QUEUE_TYPE_RX);
+ netdev_queue_id_set_id(&queues[i], start_queue + i);
}
return queues;
@@ -852,7 +850,6 @@ static int do_client(struct memory_buffer *mem)
ssize_t line_size = 0;
struct cmsghdr *cmsg;
char *line = NULL;
- unsigned long mid;
size_t len = 0;
int socket_fd;
__u32 ddmabuf;
diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py
new file mode 100755
index 000000000000..19847f3d4a00
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_api.py
@@ -0,0 +1,476 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+API level tests for RSS (mostly Netlink vs IOCTL).
+"""
+
+import errno
+import glob
+import random
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne, ksft_raises
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import defer, ethtool, CmdExitFailure
+from lib.py import EthtoolFamily, NlError
+from lib.py import NetDrvEnv
+
+
+def _require_2qs(cfg):
+ qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
+ if qcnt < 2:
+ raise KsftSkipEx(f"Local has only {qcnt} queues")
+ return qcnt
+
+
+def _ethtool_create(cfg, act, opts):
+ output = ethtool(f"{act} {cfg.ifname} {opts}").stdout
+ # Output will be something like: "New RSS context is 1" or
+ # "Added rule with ID 7", we want the integer from the end
+ return int(output.split()[-1])
+
+
+def _ethtool_get_cfg(cfg, fl_type, to_nl=False):
+ descr = ethtool(f"-n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+
+ if to_nl:
+ converter = {
+ "IP SA": "ip-src",
+ "IP DA": "ip-dst",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "l4-b-0-1",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "l4-b-2-3",
+ }
+
+ ret = set()
+ else:
+ converter = {
+ "IP SA": "s",
+ "IP DA": "d",
+ "L3 proto": "t",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "f",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "n",
+ }
+
+ ret = ""
+
+ for line in descr.split("\n")[1:-2]:
+ # if this raises we probably need to add more keys to converter above
+ if to_nl:
+ ret.add(converter[line])
+ else:
+ ret += converter[line]
+ return ret
+
+
+def test_rxfh_nl_set_fail(cfg):
+ """
+ Test error path of Netlink SET.
+ """
+ _require_2qs(cfg)
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ with ksft_raises(NlError):
+ ethnl.rss_set({"header": {"dev-name": "lo"},
+ "indir": None})
+
+ with ksft_raises(NlError):
+ ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [100000]})
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ ksft_is(ntf, None)
+
+
+def test_rxfh_nl_set_indir(cfg):
+ """
+ Test setting indirection table via Netlink.
+ """
+ qcnt = _require_2qs(cfg)
+
+ # Test some SETs with a value
+ reset = defer(cfg.ethnl.rss_set,
+ {"header": {"dev-index": cfg.ifindex}, "indir": None})
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), {1})
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [0, 1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+ # Make sure we can't set the queue count below max queue used
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 0 rx 1")
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 1 rx 0")
+
+ # Test reset back to default
+ reset.exec()
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), set(range(qcnt)))
+
+
+def test_rxfh_nl_set_indir_ctx(cfg):
+ """
+ Test setting indirection table for a custom context via Netlink.
+ """
+ _require_2qs(cfg)
+
+ # Get setting for ctx 0, we'll make sure they don't get clobbered
+ dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+
+ # Create context
+ ctx_id = _ethtool_create(cfg, "-X", "context new")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id, "indir": [1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id})
+ ksft_eq(set(rss.get("indir", [-1])), {1})
+
+ ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(ctx0, dflt)
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id, "indir": [0, 1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id})
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+ ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(ctx0, dflt)
+
+ # Make sure we can't set the queue count below max queue used
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 0 rx 1")
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 1 rx 0")
+
+
+def test_rxfh_indir_ntf(cfg):
+ """
+ Check that Netlink notifications are generated when RSS indirection
+ table was modified.
+ """
+ _require_2qs(cfg)
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethtool(f"--disable-netlink -X {cfg.ifname} weight 0 1")
+ reset = defer(ethtool, f"-X {cfg.ifname} default")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(set(ntf["msg"]["indir"]), {1})
+
+ reset.exec()
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after reset")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_is(ntf["msg"].get("context"), None)
+ ksft_ne(set(ntf["msg"]["indir"]), {1})
+
+
+def test_rxfh_indir_ctx_ntf(cfg):
+ """
+ Check that Netlink notifications are generated when RSS indirection
+ table was modified on an additional RSS context.
+ """
+ _require_2qs(cfg)
+
+ ctx_id = _ethtool_create(cfg, "-X", "context new")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} weight 0 1")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(ntf["msg"].get("context"), ctx_id)
+ ksft_eq(set(ntf["msg"]["indir"]), {1})
+
+
+def test_rxfh_nl_set_key(cfg):
+ """
+ Test setting hashing key via Netlink.
+ """
+
+ dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ defer(cfg.ethnl.rss_set,
+ {"header": {"dev-index": cfg.ifindex},
+ "hkey": dflt["hkey"], "indir": None})
+
+ # Empty key should error out
+ with ksft_raises(NlError) as cm:
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hkey": None})
+ ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.hkey')
+
+ # Set key to random
+ mod = random.randbytes(len(dflt["hkey"]))
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hkey": mod})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(rss.get("hkey", [-1]), mod)
+
+ # Set key to random and indir tbl to something at once
+ mod = random.randbytes(len(dflt["hkey"]))
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [0, 1], "hkey": mod})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(rss.get("hkey", [-1]), mod)
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+
+def test_rxfh_fields(cfg):
+ """
+ Test reading Rx Flow Hash over Netlink.
+ """
+
+ flow_types = ["tcp4", "tcp6", "udp4", "udp6"]
+ ethnl = EthtoolFamily()
+
+ cfg_nl = ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ for fl_type in flow_types:
+ one = _ethtool_get_cfg(cfg, fl_type, to_nl=True)
+ ksft_eq(one, cfg_nl["flow-hash"][fl_type],
+ comment="Config for " + fl_type)
+
+
+def test_rxfh_fields_set(cfg):
+ """ Test configuring Rx Flow Hash over Netlink. """
+
+ flow_types = ["tcp4", "tcp6", "udp4", "udp6"]
+
+ # Collect current settings
+ cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # symmetric hashing is config-order-sensitive make sure we leave
+ # symmetric mode, or make the flow-hash sym-compatible first
+ changes = [{"flow-hash": cfg_old["flow-hash"],},
+ {"input-xfrm": cfg_old.get("input-xfrm", {}),}]
+ if cfg_old.get("input-xfrm"):
+ changes = list(reversed(changes))
+ for old in changes:
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old)
+
+ # symmetric hashing prevents some of the configs below
+ if cfg_old.get("input-xfrm"):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": {}})
+
+ for fl_type in flow_types:
+ cur = _ethtool_get_cfg(cfg, fl_type)
+ if cur == "sdfn":
+ change_nl = {"ip-src", "ip-dst"}
+ change_ic = "sd"
+ else:
+ change_nl = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}
+ change_ic = "sdfn"
+
+ cfg.ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {fl_type: change_nl}
+ })
+ reset = defer(ethtool, f"--disable-netlink -N {cfg.ifname} "
+ f"rx-flow-hash {fl_type} {cur}")
+
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(change_nl, cfg_nl["flow-hash"][fl_type],
+ comment=f"Config for {fl_type} over Netlink")
+ cfg_ic = _ethtool_get_cfg(cfg, fl_type)
+ ksft_eq(change_ic, cfg_ic,
+ comment=f"Config for {fl_type} over IOCTL")
+
+ reset.exec()
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(cfg_old["flow-hash"][fl_type], cfg_nl["flow-hash"][fl_type],
+ comment=f"Un-config for {fl_type} over Netlink")
+ cfg_ic = _ethtool_get_cfg(cfg, fl_type)
+ ksft_eq(cur, cfg_ic, comment=f"Un-config for {fl_type} over IOCTL")
+
+ # Try to set multiple at once, the defer was already installed at the start
+ change = {"ip-src"}
+ if change == cfg_old["flow-hash"]["tcp4"]:
+ change = {"ip-dst"}
+ cfg.ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {x: change for x in flow_types}
+ })
+
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ for fl_type in flow_types:
+ ksft_eq(change, cfg_nl["flow-hash"][fl_type],
+ comment=f"multi-config for {fl_type} over Netlink")
+
+
+def test_rxfh_fields_set_xfrm(cfg):
+ """ Test changing Rx Flow Hash vs xfrm_input at once. """
+
+ def set_rss(cfg, xfrm, fh):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": xfrm, "flow-hash": fh})
+
+ # Install the reset handler
+ cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # symmetric hashing is config-order-sensitive make sure we leave
+ # symmetric mode, or make the flow-hash sym-compatible first
+ changes = [{"flow-hash": cfg_old["flow-hash"],},
+ {"input-xfrm": cfg_old.get("input-xfrm", {}),}]
+ if cfg_old.get("input-xfrm"):
+ changes = list(reversed(changes))
+ for old in changes:
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old)
+
+ # Make sure we start with input-xfrm off, and tcp4 config non-sym
+ set_rss(cfg, {}, {})
+ set_rss(cfg, {}, {"tcp4": {"ip-src"}})
+
+ # Setting sym and fixing tcp4 config not expected to pass right now
+ with ksft_raises(NlError):
+ set_rss(cfg, {"sym-xor"}, {"tcp4": {"ip-src", "ip-dst"}})
+ # One at a time should work, hopefully
+ set_rss(cfg, 0, {"tcp4": {"ip-src", "ip-dst"}})
+ no_support = False
+ try:
+ set_rss(cfg, {"sym-xor"}, {})
+ except NlError:
+ try:
+ set_rss(cfg, {"sym-or-xor"}, {})
+ except NlError:
+ no_support = True
+ if no_support:
+ raise KsftSkipEx("no input-xfrm supported")
+ # Disabling two at once should not work either without kernel changes
+ with ksft_raises(NlError):
+ set_rss(cfg, {}, {"tcp4": {"ip-src"}})
+
+
+def test_rxfh_fields_ntf(cfg):
+ """ Test Rx Flow Hash notifications. """
+
+ cur = _ethtool_get_cfg(cfg, "tcp4")
+ if cur == "sdfn":
+ change = {"ip-src", "ip-dst"}
+ else:
+ change = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {"tcp4": change}
+ })
+ reset = defer(ethtool,
+ f"--disable-netlink -N {cfg.ifname} rx-flow-hash tcp4 {cur}")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after IOCTL change")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(ntf["msg"]["flow-hash"]["tcp4"], change)
+ ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None)
+
+ reset.exec()
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after Netlink change")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_ne(ntf["msg"]["flow-hash"]["tcp4"], change)
+ ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None)
+
+
+def test_rss_ctx_add(cfg):
+ """ Test creating an additional RSS context via Netlink """
+
+ _require_2qs(cfg)
+
+ # Test basic creation
+ ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}})
+ d = defer(ethtool, f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ ksft_ne(ctx.get("context", 0), 0)
+ ksft_ne(set(ctx.get("indir", [0])), {0},
+ comment="Driver should init the indirection table")
+
+ # Try requesting the ID we just got allocated
+ with ksft_raises(NlError) as cm:
+ ctx = cfg.ethnl.rss_create_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx.get("context"),
+ })
+ ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ d.exec()
+ ksft_eq(cm.exception.nl_msg.error, -errno.EBUSY)
+
+ # Test creating with a specified RSS table, and context ID
+ ctx_id = ctx.get("context")
+ ctx = cfg.ethnl.rss_create_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx_id,
+ "indir": [1],
+ })
+ ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ ksft_eq(ctx.get("context"), ctx_id)
+ ksft_eq(set(ctx.get("indir", [0])), {1})
+
+
+def test_rss_ctx_ntf(cfg):
+ """ Test notifications for creating additional RSS contexts """
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ # Create / delete via Netlink
+ ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}})
+ cfg.ethnl.rss_delete_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx["context"],
+ })
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[NL] No notification after context creation")
+ ksft_eq(ntf["name"], "rss-create-ntf")
+ ksft_eq(ctx, ntf["msg"])
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[NL] No notification after context deletion")
+ ksft_eq(ntf["name"], "rss-delete-ntf")
+
+ # Create / deleve via IOCTL
+ ctx_id = _ethtool_create(cfg, "--disable-netlink -X", "context new")
+ ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} delete")
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[IOCTL] No notification after context creation")
+ ksft_eq(ntf["name"], "rss-create-ntf")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[IOCTL] No notification after context deletion")
+ ksft_eq(ntf["name"], "rss-delete-ntf")
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
index 648ff50bc1c3..72880e388478 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -32,16 +32,16 @@ def test_rss_input_xfrm(cfg, ipver):
if multiprocessing.cpu_count() < 2:
raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash")
- cfg.require_cmd("socat", remote=True)
+ cfg.require_cmd("socat", local=False, remote=True)
if not hasattr(socket, "SO_INCOMING_CPU"):
raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
- input_xfrm = cfg.ethnl.rss_get(
- {'header': {'dev-name': cfg.ifname}}).get('input-xfrm')
+ rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}})
+ input_xfrm = set(filter(lambda x: 'sym' in x, rss.get('input-xfrm', {})))
# Check for symmetric xor/or-xor
- if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2):
+ if not input_xfrm:
raise KsftSkipEx("Symmetric RSS hash not requested")
cpus = set()
diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py
index 3370827409aa..c13dd5efa27a 100755
--- a/tools/testing/selftests/drivers/net/hw/tso.py
+++ b/tools/testing/selftests/drivers/net/hw/tso.py
@@ -34,7 +34,7 @@ def tcp_sock_get_retrans(sock):
def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso):
- cfg.require_cmd("socat", remote=True)
+ cfg.require_cmd("socat", local=False, remote=True)
port = rand_port()
listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof"
@@ -102,7 +102,7 @@ def build_tunnel(cfg, outer_ipver, tun_info):
remote_addr = cfg.remote_addr_v[outer_ipver]
tun_type = tun_info[0]
- tun_arg = tun_info[2]
+ tun_arg = tun_info[1]
ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}")
defer(ip, f"link del {tun_type}-ksft")
ip(f"link set dev {tun_type}-ksft up")
@@ -119,15 +119,30 @@ def build_tunnel(cfg, outer_ipver, tun_info):
return remote_v4, remote_v6
+def restore_wanted_features(cfg):
+ features_cmd = ""
+ for feature in cfg.hw_features:
+ setting = "on" if feature in cfg.wanted_features else "off"
+ features_cmd += f" {feature} {setting}"
+ try:
+ ethtool(f"-K {cfg.ifname} {features_cmd}")
+ except Exception as e:
+ ksft_pr(f"WARNING: failure restoring wanted features: {e}")
+
+
def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None):
"""Construct specific tests from the common template."""
def f(cfg):
cfg.require_ipver(outer_ipver)
+ defer(restore_wanted_features, cfg)
if not cfg.have_stat_super_count and \
not cfg.have_stat_wire_count:
raise KsftSkipEx(f"Device does not support LSO queue stats")
+ if feature not in cfg.hw_features:
+ raise KsftSkipEx(f"Device does not support {feature}")
+
ipver = outer_ipver
if tun:
remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun)
@@ -136,36 +151,21 @@ def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None):
remote_v4 = cfg.remote_addr_v["4"]
remote_v6 = cfg.remote_addr_v["6"]
- tun_partial = tun and tun[1]
- # Tunnel which can silently fall back to gso-partial
- has_gso_partial = tun and 'tx-gso-partial' in cfg.features
-
- # For TSO4 via partial we need mangleid
- if ipver == "4" and feature in cfg.partial_features:
- ksft_pr("Testing with mangleid enabled")
- if 'tx-tcp-mangleid-segmentation' not in cfg.features:
- ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on")
- defer(ethtool, f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off")
-
# First test without the feature enabled.
ethtool(f"-K {cfg.ifname} {feature} off")
- if has_gso_partial:
- ethtool(f"-K {cfg.ifname} tx-gso-partial off")
run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False)
- # Now test with the feature enabled.
- # For compatible tunnels only - just GSO partial, not specific feature.
- if has_gso_partial:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off")
+ if feature in cfg.partial_features:
ethtool(f"-K {cfg.ifname} tx-gso-partial on")
- run_one_stream(cfg, ipver, remote_v4, remote_v6,
- should_lso=tun_partial)
+ if ipver == "4":
+ ksft_pr("Testing with mangleid enabled")
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on")
# Full feature enabled.
- if feature in cfg.features:
- ethtool(f"-K {cfg.ifname} {feature} on")
- run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True)
- else:
- raise KsftXfailEx(f"Device does not support {feature}")
+ ethtool(f"-K {cfg.ifname} {feature} on")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True)
f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver
return f
@@ -176,23 +176,39 @@ def query_nic_features(cfg) -> None:
cfg.have_stat_super_count = False
cfg.have_stat_wire_count = False
- cfg.features = set()
features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
- for f in features["active"]["bits"]["bit"]:
- cfg.features.add(f["name"])
+
+ cfg.wanted_features = set()
+ for f in features["wanted"]["bits"]["bit"]:
+ cfg.wanted_features.add(f["name"])
+
+ cfg.hw_features = set()
+ hw_all_features_cmd = ""
+ for f in features["hw"]["bits"]["bit"]:
+ if f.get("value", False):
+ feature = f["name"]
+ cfg.hw_features.add(feature)
+ hw_all_features_cmd += f" {feature} on"
+ try:
+ ethtool(f"-K {cfg.ifname} {hw_all_features_cmd}")
+ except Exception as e:
+ ksft_pr(f"WARNING: failure enabling all hw features: {e}")
+ ksft_pr("partial gso feature detection may be impacted")
# Check which features are supported via GSO partial
cfg.partial_features = set()
- if 'tx-gso-partial' in cfg.features:
+ if 'tx-gso-partial' in cfg.hw_features:
ethtool(f"-K {cfg.ifname} tx-gso-partial off")
no_partial = set()
features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
for f in features["active"]["bits"]["bit"]:
no_partial.add(f["name"])
- cfg.partial_features = cfg.features - no_partial
+ cfg.partial_features = cfg.hw_features - no_partial
ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+ restore_wanted_features(cfg)
+
stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)
if stats:
if 'tx-hw-gso-packets' in stats[0]:
@@ -211,13 +227,14 @@ def main() -> None:
query_nic_features(cfg)
test_info = (
- # name, v4/v6 ethtool_feature tun:(type, partial, args)
- ("", "4", "tx-tcp-segmentation", None),
- ("", "6", "tx-tcp6-segmentation", None),
- ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")),
- ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")),
- ("gre", "4", "tx-gre-segmentation", ("gre", False, "")),
- ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")),
+ # name, v4/v6 ethtool_feature tun:(type, args, inner ip versions)
+ ("", "4", "tx-tcp-segmentation", None),
+ ("", "6", "tx-tcp6-segmentation", None),
+ ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum", ("4", "6"))),
+ ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx", ("4", "6"))),
+ ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum", ("4", "6"))),
+ ("gre", "4", "tx-gre-segmentation", ("gre", "", ("4", "6"))),
+ ("gre", "6", "tx-gre-segmentation", ("ip6gre","", ("4", "6"))),
)
cases = []
@@ -227,11 +244,13 @@ def main() -> None:
if info[1] and outer_ipver != info[1]:
continue
- cases.append(test_builder(info[0], cfg, outer_ipver, info[2],
- tun=info[3], inner_ipver="4"))
if info[3]:
- cases.append(test_builder(info[0], cfg, outer_ipver, info[2],
- tun=info[3], inner_ipver="6"))
+ cases += [
+ test_builder(info[0], cfg, outer_ipver, info[2], info[3], inner_ipver)
+ for inner_ipver in info[3][2]
+ ]
+ else:
+ cases.append(test_builder(info[0], cfg, outer_ipver, info[2], None, outer_ipver))
ksft_run(cases=cases, args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py
index 401e70f7f136..8711c67ad658 100644
--- a/tools/testing/selftests/drivers/net/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py
@@ -7,7 +7,21 @@ KSFT_DIR = (Path(__file__).parent / "../../../..").resolve()
try:
sys.path.append(KSFT_DIR.as_posix())
+
from net.lib.py import *
+
+ # Import one by one to avoid pylint false positives
+ from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
+ NlError, RtnlFamily, DevlinkFamily
+ from net.lib.py import CmdExitFailure
+ from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
+ fd_read_timeout, ip, rand_port, tool, wait_port_listen
+ from net.lib.py import fd_read_timeout
+ from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+ from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
+ ksft_setup
+ from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
+ ksft_ne, ksft_not_in, ksft_raises, ksft_true
except ModuleNotFoundError as e:
ksft_pr("Failed importing `net` library from kernel sources")
ksft_pr(str(e))
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index 3bccddf8cbc5..1b8bd648048f 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -259,7 +259,7 @@ class NetDrvEpEnv(NetDrvEnvBase):
if not self._require_cmd(comm, "local"):
raise KsftSkipEx("Test requires command: " + comm)
if remote:
- if not self._require_cmd(comm, "remote"):
+ if not self._require_cmd(comm, "remote", host=self.remote):
raise KsftSkipEx("Test requires (remote) command: " + comm)
def wait_hw_stats_settle(self):
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
index d9c10613ae67..c4e808407cc4 100644
--- a/tools/testing/selftests/drivers/net/lib/py/load.py
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -1,21 +1,21 @@
# SPDX-License-Identifier: GPL-2.0
+import re
import time
from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen
class GenerateTraffic:
def __init__(self, env, port=None):
- env.require_cmd("iperf3", remote=True)
+ env.require_cmd("iperf3", local=True, remote=True)
self.env = env
- if port is None:
- port = rand_port()
- self._iperf_server = cmd(f"iperf3 -s -1 -p {port}", background=True)
- wait_port_listen(port)
+ self.port = rand_port() if port is None else port
+ self._iperf_server = cmd(f"iperf3 -s -1 -p {self.port}", background=True)
+ wait_port_listen(self.port)
time.sleep(0.1)
- self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400",
+ self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {self.port} -t 86400",
background=True, host=env.remote)
# Wait for traffic to ramp up
@@ -56,3 +56,16 @@ class GenerateTraffic:
ksft_pr(">> Server:")
ksft_pr(self._iperf_server.stdout)
ksft_pr(self._iperf_server.stderr)
+ self._wait_client_stopped()
+
+ def _wait_client_stopped(self, sleep=0.005, timeout=5):
+ end = time.monotonic() + timeout
+
+ live_port_pattern = re.compile(fr":{self.port:04X} 0[^6] ")
+
+ while time.monotonic() < end:
+ data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout
+ if not live_port_pattern.search(data):
+ return
+ time.sleep(sleep)
+ raise Exception(f"Waiting for client to stop timed out after {timeout}s")
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index 29b01b8e2215..b6071e80ebbb 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -11,9 +11,11 @@ set -euo pipefail
LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
SRCIF="" # to be populated later
-SRCIP=192.0.2.1
+SRCIP4="192.0.2.1"
+SRCIP6="fc00::1"
DSTIF="" # to be populated later
-DSTIP=192.0.2.2
+DSTIP4="192.0.2.2"
+DSTIP6="fc00::2"
PORT="6666"
MSG="netconsole selftest"
@@ -80,7 +82,23 @@ function configure_ip() {
ip link set "${SRCIF}" up
}
+function select_ipv4_or_ipv6()
+{
+ local VERSION=${1}
+
+ if [[ "$VERSION" == "ipv6" ]]
+ then
+ DSTIP="${DSTIP6}"
+ SRCIP="${SRCIP6}"
+ else
+ DSTIP="${DSTIP4}"
+ SRCIP="${SRCIP4}"
+ fi
+}
+
function set_network() {
+ local IP_VERSION=${1:-"ipv4"}
+
# setup_ns function is coming from lib.sh
setup_ns NAMESPACE
@@ -91,10 +109,13 @@ function set_network() {
# Link both interfaces back to back
link_ifaces
+ select_ipv4_or_ipv6 "${IP_VERSION}"
configure_ip
}
function create_dynamic_target() {
+ local FORMAT=${1:-"extended"}
+
DSTMAC=$(ip netns exec "${NAMESPACE}" \
ip link show "${DSTIF}" | awk '/ether/ {print $2}')
@@ -106,7 +127,33 @@ function create_dynamic_target() {
echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac
echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name
+ if [ "${FORMAT}" == "basic" ]
+ then
+ # Basic target does not support release
+ echo 0 > "${NETCONS_PATH}"/release
+ echo 0 > "${NETCONS_PATH}"/extended
+ elif [ "${FORMAT}" == "extended" ]
+ then
+ echo 1 > "${NETCONS_PATH}"/extended
+ fi
+
echo 1 > "${NETCONS_PATH}"/enabled
+
+ # This will make sure that the kernel was able to
+ # load the netconsole driver configuration. The console message
+ # gets more organized/sequential as well.
+ sleep 1
+}
+
+# Generate the command line argument for netconsole following:
+# netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr]
+function create_cmdline_str() {
+ DSTMAC=$(ip netns exec "${NAMESPACE}" \
+ ip link show "${DSTIF}" | awk '/ether/ {print $2}')
+ SRCPORT="1514"
+ TGTPORT="6666"
+
+ echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCIF},${TGTPORT}@${DSTIP}/${DSTMAC}\""
}
# Do not append the release to the header of the message
@@ -116,16 +163,9 @@ function disable_release_append() {
echo 1 > "${NETCONS_PATH}"/enabled
}
-function cleanup() {
+function do_cleanup() {
local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
- # delete netconsole dynamic reconfiguration
- echo 0 > "${NETCONS_PATH}"/enabled
- # Remove all the keys that got created during the selftest
- find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
- # Remove the configfs entry
- rmdir "${NETCONS_PATH}"
-
# Delete netdevsim devices
echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL"
echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL"
@@ -137,6 +177,17 @@ function cleanup() {
echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
}
+function cleanup() {
+ # delete netconsole dynamic reconfiguration
+ echo 0 > "${NETCONS_PATH}"/enabled
+ # Remove all the keys that got created during the selftest
+ find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
+ # Remove the configfs entry
+ rmdir "${NETCONS_PATH}"
+
+ do_cleanup
+}
+
function set_user_data() {
if [[ ! -d "${NETCONS_PATH}""/userdata" ]]
then
@@ -152,18 +203,24 @@ function set_user_data() {
function listen_port_and_save_to() {
local OUTPUT=${1}
+ local IPVERSION=${2:-"ipv4"}
+
+ if [ "${IPVERSION}" == "ipv4" ]
+ then
+ SOCAT_MODE="UDP-LISTEN"
+ else
+ SOCAT_MODE="UDP6-LISTEN"
+ fi
+
# Just wait for 2 seconds
timeout 2 ip netns exec "${NAMESPACE}" \
- socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}"
+ socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}"
}
-function validate_result() {
+# Only validate that the message arrived properly
+function validate_msg() {
local TMPFILENAME="$1"
- # TMPFILENAME will contain something like:
- # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
- # key=value
-
# Check if the file exists
if [ ! -f "$TMPFILENAME" ]; then
echo "FAIL: File was not generated." >&2
@@ -175,17 +232,32 @@ function validate_result() {
cat "${TMPFILENAME}" >&2
exit "${ksft_fail}"
fi
+}
- if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
- echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
- cat "${TMPFILENAME}" >&2
- exit "${ksft_fail}"
+# Validate the message and userdata
+function validate_result() {
+ local TMPFILENAME="$1"
+
+ # TMPFILENAME will contain something like:
+ # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
+ # key=value
+
+ validate_msg "${TMPFILENAME}"
+
+ # userdata is not supported on basic format target,
+ # thus, do not validate it.
+ if [ "${FORMAT}" != "basic" ];
+ then
+ if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
+ echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
+ cat "${TMPFILENAME}" >&2
+ exit "${ksft_fail}"
+ fi
fi
# Delete the file once it is validated, otherwise keep it
# for debugging purposes
rm "${TMPFILENAME}"
- exit "${ksft_pass}"
}
function check_for_dependencies() {
@@ -209,6 +281,11 @@ function check_for_dependencies() {
exit "${ksft_skip}"
fi
+ if [ ! -f /proc/net/if_inet6 ]; then
+ echo "SKIP: IPv6 not configured. Check if CONFIG_IPV6 is enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then
echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2
exit "${ksft_skip}"
@@ -224,8 +301,15 @@ function check_for_dependencies() {
exit "${ksft_skip}"
fi
- if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then
- echo "SKIP: IPs already in use. Skipping it" >&2
+ REGEXP4="inet.*(${SRCIP4}|${DSTIP4})"
+ REGEXP6="inet.*(${SRCIP6}|${DSTIP6})"
+ if ip addr list | grep -E "${REGEXP4}" 2> /dev/null; then
+ echo "SKIP: IPv4s already in use. Skipping it" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ip addr list | grep -E "${REGEXP6}" 2> /dev/null; then
+ echo "SKIP: IPv6s already in use. Skipping it" >&2
exit "${ksft_skip}"
fi
}
@@ -239,10 +323,41 @@ function check_for_taskset() {
# This is necessary if running multiple tests in a row
function pkill_socat() {
- PROCESS_NAME="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}"
+ PROCESS_NAME4="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}"
+ PROCESS_NAME6="socat UDP6-LISTEN:6666,fork ${OUTPUT_FILE}"
# socat runs under timeout(1), kill it if it is still alive
# do not fail if socat doesn't exist anymore
set +e
- pkill -f "${PROCESS_NAME}"
+ pkill -f "${PROCESS_NAME4}"
+ pkill -f "${PROCESS_NAME6}"
set -e
}
+
+# Check if netconsole was compiled as a module, otherwise exit
+function check_netconsole_module() {
+ if modinfo netconsole | grep filename: | grep -q builtin
+ then
+ echo "SKIP: netconsole should be compiled as a module" >&2
+ exit "${ksft_skip}"
+ fi
+}
+
+# A wrapper to translate protocol version to udp version
+function wait_for_port() {
+ local NAMESPACE=${1}
+ local PORT=${2}
+ IP_VERSION=${3}
+
+ if [ "${IP_VERSION}" == "ipv6" ]
+ then
+ PROTOCOL="udp6"
+ else
+ PROTOCOL="udp"
+ fi
+
+ wait_local_port_listen "${NAMESPACE}" "${PORT}" "${PROTOCOL}"
+ # even after the port is open, let's wait 1 second before writing
+ # otherwise the packet could be missed, and the test will fail. Happens
+ # more frequently on IPv6
+ sleep 1
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index 899b6892603f..d7505b933aef 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -51,7 +51,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
fi
${current_test}_setup_prepare
- setup_wait $num_netifs
+ setup_wait_n $num_netifs
# Update target in case occupancy of a certain resource changed
# following the test setup.
target=$(${current_test}_get_target "$should_fail")
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index 482ebb744eba..7b98cdd0580d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -55,7 +55,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
continue
fi
${current_test}_setup_prepare
- setup_wait $num_netifs
+ setup_wait_n $num_netifs
# Update target in case occupancy of a certain resource
# changed following the test setup.
target=$(${current_test}_get_target "$should_fail")
diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py
index 356bac46ba04..d05eddcad539 100755
--- a/tools/testing/selftests/drivers/net/napi_id.py
+++ b/tools/testing/selftests/drivers/net/napi_id.py
@@ -7,10 +7,10 @@ from lib.py import bkg, cmd, rand_port, NetNSEnter
def test_napi_id(cfg) -> None:
port = rand_port()
- listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr_v['4']} {port}"
+ listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr} {port}"
with bkg(listen_cmd, ksft_wait=3) as server:
- cmd(f"echo a | socat - TCP:{cfg.addr_v['4']}:{port}", host=cfg.remote, shell=True)
+ cmd(f"echo a | socat - TCP:{cfg.baddr}:{port}", host=cfg.remote, shell=True)
ksft_eq(0, server.ret)
diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c
index eecd610c2109..7f49ca6c8637 100644
--- a/tools/testing/selftests/drivers/net/napi_id_helper.c
+++ b/tools/testing/selftests/drivers/net/napi_id_helper.c
@@ -7,41 +7,58 @@
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/socket.h>
+#include <netdb.h>
#include "../../net/lib/ksft.h"
int main(int argc, char *argv[])
{
- struct sockaddr_in address;
+ struct sockaddr_storage address;
+ struct addrinfo *result;
+ struct addrinfo hints;
unsigned int napi_id;
- unsigned int port;
+ socklen_t addr_len;
socklen_t optlen;
char buf[1024];
int opt = 1;
+ int family;
int server;
int client;
int ret;
- server = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_PASSIVE;
+
+ ret = getaddrinfo(argv[1], argv[2], &hints, &result);
+ if (ret != 0) {
+ fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(ret));
+ return 1;
+ }
+
+ family = result->ai_family;
+ addr_len = result->ai_addrlen;
+
+ server = socket(family, SOCK_STREAM, IPPROTO_TCP);
if (server < 0) {
perror("socket creation failed");
+ freeaddrinfo(result);
if (errno == EAFNOSUPPORT)
return -1;
return 1;
}
- port = atoi(argv[2]);
-
if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) {
perror("setsockopt");
+ freeaddrinfo(result);
return 1;
}
- address.sin_family = AF_INET;
- inet_pton(AF_INET, argv[1], &address.sin_addr);
- address.sin_port = htons(port);
+ memcpy(&address, result->ai_addr, result->ai_addrlen);
+ freeaddrinfo(result);
- if (bind(server, (struct sockaddr *)&address, sizeof(address)) < 0) {
+ if (bind(server, (struct sockaddr *)&address, addr_len) < 0) {
perror("bind failed");
return 1;
}
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh
index fe765da498e8..a3446b569976 100755
--- a/tools/testing/selftests/drivers/net/netcons_basic.sh
+++ b/tools/testing/selftests/drivers/net/netcons_basic.sh
@@ -32,21 +32,42 @@ check_for_dependencies
echo "6 5" > /proc/sys/kernel/printk
# Remove the namespace, interfaces and netconsole target on exit
trap cleanup EXIT
-# Create one namespace and two interfaces
-set_network
-# Create a dynamic target for netconsole
-create_dynamic_target
-# Set userdata "key" with the "value" value
-set_user_data
-# Listed for netconsole port inside the namespace and destination interface
-listen_port_and_save_to "${OUTPUT_FILE}" &
-# Wait for socat to start and listen to the port.
-wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
-# Send the message
-echo "${MSG}: ${TARGET}" > /dev/kmsg
-# Wait until socat saves the file to disk
-busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
-# Make sure the message was received in the dst part
-# and exit
-validate_result "${OUTPUT_FILE}"
+# Run the test twice, with different format modes
+for FORMAT in "basic" "extended"
+do
+ for IP_VERSION in "ipv6" "ipv4"
+ do
+ echo "Running with target mode: ${FORMAT} (${IP_VERSION})"
+ # Create one namespace and two interfaces
+ set_network "${IP_VERSION}"
+ # Create a dynamic target for netconsole
+ create_dynamic_target "${FORMAT}"
+ # Only set userdata for extended format
+ if [ "$FORMAT" == "extended" ]
+ then
+ # Set userdata "key" with the "value" value
+ set_user_data
+ fi
+ # Listed for netconsole port inside the namespace and
+ # destination interface
+ listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" &
+ # Wait for socat to start and listen to the port.
+ wait_for_port "${NAMESPACE}" "${PORT}" "${IP_VERSION}"
+ # Send the message
+ echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+
+ # Make sure the message was received in the dst part
+ # and exit
+ validate_result "${OUTPUT_FILE}" "${FORMAT}"
+ # kill socat in case it is still running
+ pkill_socat
+ cleanup
+ echo "${FORMAT} : ${IP_VERSION} : Test passed" >&2
+ done
+done
+
+trap - EXIT
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh
new file mode 100755
index 000000000000..ad2fb8b1c463
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This is a selftest to test cmdline arguments on netconsole.
+# It exercises loading of netconsole from cmdline instead of the dynamic
+# reconfiguration. This includes parsing the long netconsole= line and all the
+# flow through init_netconsole().
+#
+# Author: Breno Leitao <[email protected]>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+check_netconsole_module
+
+modprobe netdevsim 2> /dev/null || true
+rmmod netconsole 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# Check for basic system dependency and exit if not found
+# check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace and network interfaces
+trap do_cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create the command line for netconsole, with the configuration from the
+# function above
+CMDLINE="$(create_cmdline_str)"
+
+# Load the module, with the cmdline set
+modprobe netconsole "${CMDLINE}"
+
+# Listed for netconsole port inside the namespace and destination interface
+listen_port_and_save_to "${OUTPUT_FILE}" &
+# Wait for socat to start and listen to the port.
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+# Send the message
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+# Wait until socat saves the file to disk
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+# Make sure the message was received in the dst part
+# and exit
+validate_msg "${OUTPUT_FILE}"
+
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
index a737e377bf08..baf69031089e 100755
--- a/tools/testing/selftests/drivers/net/netcons_sysdata.sh
+++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
@@ -53,6 +53,17 @@ function set_release() {
echo 1 > "${NETCONS_PATH}/userdata/release_enabled"
}
+# Enable the msgid to be appended to sysdata
+function set_msgid() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/msgid_enabled" ]]
+ then
+ echo "Not able to enable msgid sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/msgid_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/msgid_enabled"
+}
+
# Disable the sysdata cpu_nr feature
function unset_cpu_nr() {
echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled"
@@ -67,6 +78,10 @@ function unset_release() {
echo 0 > "${NETCONS_PATH}/userdata/release_enabled"
}
+function unset_msgid() {
+ echo 0 > "${NETCONS_PATH}/userdata/msgid_enabled"
+}
+
# Test if MSG contains sysdata
function validate_sysdata() {
# OUTPUT_FILE will contain something like:
@@ -74,6 +89,7 @@ function validate_sysdata() {
# userdatakey=userdatavalue
# cpu=X
# taskname=<taskname>
+ # msgid=<id>
# Echo is what this test uses to create the message. See runtest()
# function
@@ -104,6 +120,12 @@ function validate_sysdata() {
exit "${ksft_fail}"
fi
+ if ! grep -q "msgid=[0-9]\+$" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'msgid=<id>' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
rm "${OUTPUT_FILE}"
pkill_socat
}
@@ -155,6 +177,12 @@ function validate_no_sysdata() {
exit "${ksft_fail}"
fi
+ if grep -q "msgid=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'msgid= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
rm "${OUTPUT_FILE}"
}
@@ -206,6 +234,7 @@ set_cpu_nr
# Enable taskname to be appended to sysdata
set_taskname
set_release
+set_msgid
runtest
# Make sure the message was received in the dst part
# and exit
@@ -235,6 +264,7 @@ MSG="Test #3 from CPU${CPU}"
unset_cpu_nr
unset_taskname
unset_release
+unset_msgid
runtest
# At this time, cpu= shouldn't be present in the msg
validate_no_sysdata
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index b5ea2526f23c..030762b203d7 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -40,6 +40,8 @@ fw_flash_test()
return
fi
+ echo "10"> $DEBUGFS_DIR/fw_update_flash_chunk_time_ms
+
devlink dev flash $DL_HANDLE file $DUMMYFILE
check_err $? "Failed to flash with status updates on"
@@ -608,6 +610,46 @@ rate_attr_parent_check()
check_err $? "Unexpected parent attr value $api_value != $parent"
}
+rate_attr_tc_bw_check()
+{
+ local handle=$1
+ local tc_bw=$2
+ local debug_file=$3
+
+ local tc_bw_str=""
+ for bw in $tc_bw; do
+ local tc=${bw%%:*}
+ local value=${bw##*:}
+ tc_bw_str="$tc_bw_str $tc:$value"
+ done
+ tc_bw_str=${tc_bw_str# }
+
+ rate_attr_set "$handle" tc-bw "$tc_bw_str"
+ check_err $? "Failed to set tc-bw values"
+
+ for bw in $tc_bw; do
+ local tc=${bw%%:*}
+ local value=${bw##*:}
+ local debug_value
+ debug_value=$(cat "$debug_file"/tc"${tc}"_bw)
+ check_err $? "Failed to read tc-bw value from debugfs for tc$tc"
+ [ "$debug_value" == "$value" ]
+ check_err $? "Unexpected tc-bw debug value for tc$tc: $debug_value != $value"
+ done
+
+ for bw in $tc_bw; do
+ local tc=${bw%%:*}
+ local expected_value=${bw##*:}
+ local api_value
+ api_value=$(rate_attr_get "$handle" tc_"$tc")
+ if [ "$api_value" = "null" ]; then
+ api_value=0
+ fi
+ [ "$api_value" == "$expected_value" ]
+ check_err $? "Unexpected tc-bw value for tc$tc: $api_value != $expected_value"
+ done
+}
+
rate_node_add()
{
local handle=$1
@@ -649,6 +691,13 @@ rate_test()
rate=$(($rate+100))
done
+ local tc_bw="0:0 1:40 2:0 3:0 4:0 5:0 6:60 7:0"
+ for r_obj in $leafs
+ do
+ rate_attr_tc_bw_check "$r_obj" "$tc_bw" \
+ "$DEBUGFS_DIR"/ports/"${r_obj##*/}"
+ done
+
local node1_name='group1'
local node1="$DL_HANDLE/$node1_name"
rate_node_add "$node1"
@@ -666,6 +715,12 @@ rate_test()
rate_attr_tx_rate_check $node1 tx_max $node_tx_max \
$DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max
+
+ local tc_bw="0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0"
+ rate_attr_tc_bw_check $node1 "$tc_bw" \
+ "$DEBUGFS_DIR"/rate_nodes/"${node1##*/}"
+
+
rate_node_del "$node1"
check_err $? "Failed to delete node $node1"
local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
index 92c2f0376c08..4c859ecdad94 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -266,7 +266,6 @@ for port in 0 1; do
echo $NSIM_ID > /sys/bus/netdevsim/new_device
else
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
echo 1 > $NSIM_DEV_SYS/new_port
fi
NSIM_NETDEV=`get_netdev_name old_netdevs`
@@ -350,23 +349,11 @@ old_netdevs=$(ls /sys/class/net)
port=0
echo $NSIM_ID > /sys/bus/netdevsim/new_device
echo 0 > $NSIM_DEV_SYS/del_port
-echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep
echo 0 > $NSIM_DEV_SYS/new_port
NSIM_NETDEV=`get_netdev_name old_netdevs`
msg="create VxLANs"
-exp0=( 0 0 0 0 ) # sleep is longer than out wait
-new_vxlan vxlan0 10000 $NSIM_NETDEV
-
-modprobe -r vxlan
-modprobe -r udp_tunnel
-
-msg="remove tunnels"
-exp0=( 0 0 0 0 )
-check_tables
-
-msg="create VxLANs"
-exp0=( 0 0 0 0 ) # sleep is longer than out wait
+exp0=( `mke 10000 1` 0 0 0 )
new_vxlan vxlan0 10000 $NSIM_NETDEV
exp0=( 0 0 0 0 )
@@ -428,7 +415,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -486,7 +472,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -543,7 +528,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -573,7 +557,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -634,7 +617,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -690,7 +672,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -750,7 +731,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -809,7 +789,6 @@ echo $NSIM_ID > /sys/bus/netdevsim/new_device
echo 0 > $NSIM_DEV_SYS/del_port
echo 0 > $NSIM_DEV_DFS/udp_ports_open_only
-echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
echo 1 > $NSIM_DEV_DFS/udp_ports_shared
old_netdevs=$(ls /sys/class/net)
diff --git a/tools/testing/selftests/drivers/net/netpoll_basic.py b/tools/testing/selftests/drivers/net/netpoll_basic.py
new file mode 100755
index 000000000000..408bd54d6779
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netpoll_basic.py
@@ -0,0 +1,396 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Author: Breno Leitao <[email protected]>
+"""
+ This test aims to evaluate the netpoll polling mechanism (as in
+ netpoll_poll_dev()). It presents a complex scenario where the network
+ attempts to send a packet but fails, prompting it to poll the NIC from within
+ the netpoll TX side.
+
+ This has been a crucial path in netpoll that was previously untested. Jakub
+ suggested using a single RX/TX queue, pushing traffic to the NIC, and then
+ sending netpoll messages (via netconsole) to trigger the poll.
+
+ In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If
+ so, the test passes, otherwise it will be skipped. This test is very dependent on
+ the driver and environment, given we are trying to trigger a tricky scenario.
+"""
+
+import errno
+import logging
+import os
+import random
+import string
+import threading
+import time
+from typing import Optional
+
+from lib.py import (
+ bpftrace,
+ CmdExitFailure,
+ defer,
+ ethtool,
+ GenerateTraffic,
+ ksft_exit,
+ ksft_pr,
+ ksft_run,
+ KsftFailEx,
+ KsftSkipEx,
+ NetDrvEpEnv,
+ KsftXfailEx,
+)
+
+# Configure logging
+logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole"
+NETCONS_REMOTE_PORT: int = 6666
+NETCONS_LOCAL_PORT: int = 1514
+
+# Max number of netcons messages to send. Each iteration will setup
+# netconsole and send MAX_WRITES messages
+ITERATIONS: int = 20
+# Number of writes to /dev/kmsg per iteration
+MAX_WRITES: int = 40
+# MAPS contains the information coming from bpftrace it will have only one
+# key: "hits", which tells the number of times netpoll_poll_dev() was called
+MAPS: dict[str, int] = {}
+# Thread to run bpftrace in parallel
+BPF_THREAD: Optional[threading.Thread] = None
+# Time bpftrace will be running in parallel.
+BPFTRACE_TIMEOUT: int = 10
+
+
+def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]:
+ """
+ Read the ringsize using ethtool. This will be used to restore it after the test
+ """
+ try:
+ ethtool_result = ethtool(f"-g {interface_name}", json=True)[0]
+ rxs = ethtool_result["rx"]
+ txs = ethtool_result["tx"]
+ except (KeyError, IndexError) as exception:
+ raise KsftSkipEx(
+ f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them."
+ ) from exception
+
+ return rxs, txs
+
+
+def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool:
+ """Try to the number of RX and TX ringsize."""
+ rxs = ring_size[0]
+ txs = ring_size[1]
+
+ logging.debug("Setting ring size to %d/%d", rxs, txs)
+ try:
+ ethtool(f"-G {interface_name} rx {rxs} tx {txs}")
+ except CmdExitFailure:
+ # This might fail on real device, retry with a higher value,
+ # worst case, keep it as it is.
+ return False
+
+ return True
+
+
+def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]:
+ """Read the number of RX, TX and combined queues using ethtool"""
+
+ try:
+ ethtool_result = ethtool(f"-l {interface_name}", json=True)[0]
+ rxq = ethtool_result.get("rx", -1)
+ txq = ethtool_result.get("tx", -1)
+ combined = ethtool_result.get("combined", -1)
+
+ except IndexError as exception:
+ raise KsftSkipEx(
+ f"Failed to read queues numbers: {exception}. Not going to mess with them."
+ ) from exception
+
+ return rxq, txq, combined
+
+
+def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None:
+ """Set the number of RX, TX and combined queues using ethtool"""
+ rxq, txq, combined = queues
+
+ cmdline = f"-L {interface_name}"
+
+ if rxq != -1:
+ cmdline += f" rx {rxq}"
+ if txq != -1:
+ cmdline += f" tx {txq}"
+ if combined != -1:
+ cmdline += f" combined {combined}"
+
+ logging.debug("calling: ethtool %s", cmdline)
+
+ try:
+ ethtool(cmdline)
+ except CmdExitFailure as exception:
+ raise KsftSkipEx(
+ f"Failed to configure RX/TX queues: {exception}. Ethtool not available?"
+ ) from exception
+
+
+def netcons_generate_random_target_name() -> str:
+ """Generate a random target name starting with 'netcons'"""
+ random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
+ return f"netcons_{random_suffix}"
+
+
+def netcons_create_target(
+ config_data: dict[str, str],
+ target_name: str,
+) -> None:
+ """Create a netconsole dynamic target against the interfaces"""
+ logging.debug("Using netconsole name: %s", target_name)
+ try:
+ os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True)
+ logging.debug(
+ "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name
+ )
+ except OSError as exception:
+ if exception.errno != errno.EEXIST:
+ raise KsftFailEx(
+ f"Failed to create netconsole target directory: {exception}"
+ ) from exception
+
+ try:
+ for key, value in config_data.items():
+ path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}"
+ logging.debug("Writing %s to %s", key, path)
+ with open(path, "w", encoding="utf-8") as file:
+ # Always convert to string to write to file
+ file.write(str(value))
+
+ # Read all configuration values for debugging purposes
+ for debug_key in config_data.keys():
+ with open(
+ f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}",
+ "r",
+ encoding="utf-8",
+ ) as file:
+ content = file.read()
+ logging.debug(
+ "%s/%s/%s : %s",
+ NETCONSOLE_CONFIGFS_PATH,
+ target_name,
+ debug_key,
+ content.strip(),
+ )
+
+ except Exception as exception:
+ raise KsftFailEx(
+ f"Failed to configure netconsole target: {exception}"
+ ) from exception
+
+
+def netcons_configure_target(
+ cfg: NetDrvEpEnv, interface_name: str, target_name: str
+) -> None:
+ """Configure netconsole on the interface with the given target name"""
+ config_data = {
+ "extended": "1",
+ "dev_name": interface_name,
+ "local_port": NETCONS_LOCAL_PORT,
+ "remote_port": NETCONS_REMOTE_PORT,
+ "local_ip": cfg.addr,
+ "remote_ip": cfg.remote_addr,
+ "remote_mac": "00:00:00:00:00:00", # Not important for this test
+ "enabled": "1",
+ }
+
+ netcons_create_target(config_data, target_name)
+ logging.debug(
+ "Created netconsole target: %s on interface %s", target_name, interface_name
+ )
+
+
+def netcons_delete_target(name: str) -> None:
+ """Delete a netconsole dynamic target"""
+ target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}"
+ try:
+ if os.path.exists(target_path):
+ os.rmdir(target_path)
+ except OSError as exception:
+ raise KsftFailEx(
+ f"Failed to delete netconsole target: {exception}"
+ ) from exception
+
+
+def netcons_load_module() -> None:
+ """Try to load the netconsole module"""
+ os.system("modprobe netconsole")
+
+
+def bpftrace_call() -> None:
+ """Call bpftrace to find how many times netpoll_poll_dev() is called.
+ Output is saved in the global variable `maps`"""
+
+ # This is going to update the global variable, that will be seen by the
+ # main function
+ global MAPS # pylint: disable=W0603
+
+ # This will be passed to bpftrace as in bpftrace -e "expr"
+ expr = "kprobe:netpoll_poll_dev { @hits = count(); }"
+
+ MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True)
+ logging.debug("BPFtrace output: %s", MAPS)
+
+
+def bpftrace_start():
+ """Start a thread to call `call_bpf` in a parallel thread"""
+ global BPF_THREAD # pylint: disable=W0603
+
+ BPF_THREAD = threading.Thread(target=bpftrace_call)
+ BPF_THREAD.start()
+ if not BPF_THREAD.is_alive():
+ raise KsftSkipEx("BPFtrace thread is not alive. Skipping test")
+
+
+def bpftrace_stop() -> None:
+ """Stop the bpftrace thread"""
+ if BPF_THREAD:
+ BPF_THREAD.join()
+
+
+def bpftrace_any_hit(join: bool) -> bool:
+ """Check if netpoll_poll_dev() was called by checking the global variable `maps`"""
+ if not BPF_THREAD:
+ raise KsftFailEx("BPFtrace didn't start")
+
+ if BPF_THREAD.is_alive():
+ if join:
+ # Wait for bpftrace to finish
+ BPF_THREAD.join()
+ else:
+ # bpftrace is still running, so, we will not check the result yet
+ return False
+
+ logging.debug("MAPS coming from bpftrace = %s", MAPS)
+ if "hits" not in MAPS.keys():
+ raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}")
+
+ logging.debug("Got a total of %d hits", MAPS["hits"])
+ return MAPS["hits"] > 0
+
+
+def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
+ """Print messages to the console, trying to trigger a netpoll poll"""
+ # Start bpftrace in parallel, so, it is watching
+ # netpoll_poll_dev() while we are sending netconsole messages
+ bpftrace_start()
+ defer(bpftrace_stop)
+
+ do_netpoll_flush(cfg, ifname, target_name)
+
+ if bpftrace_any_hit(join=True):
+ ksft_pr("netpoll_poll_dev() was called. Success")
+ return
+
+ raise KsftXfailEx("netpoll_poll_dev() was not called during the test...")
+
+
+def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
+ """Print messages to the console, trying to trigger a netpoll poll"""
+ netcons_configure_target(cfg, ifname, target_name)
+ retry = 0
+
+ for i in range(int(ITERATIONS)):
+ if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False):
+ # bpftrace is done, stop sending messages
+ break
+
+ msg = f"netcons test #{i}"
+ with open("/dev/kmsg", "w", encoding="utf-8") as kmsg:
+ for j in range(MAX_WRITES):
+ try:
+ kmsg.write(f"{msg}-{j}\n")
+ except OSError as exception:
+ # in some cases, kmsg can be busy, so, we will retry
+ time.sleep(1)
+ retry += 1
+ if retry < 5:
+ logging.info("Failed to write to kmsg. Retrying")
+ # Just retry a few times
+ continue
+ raise KsftFailEx(
+ f"Failed to write to kmsg: {exception}"
+ ) from exception
+
+ netcons_delete_target(target_name)
+ netcons_configure_target(cfg, ifname, target_name)
+ # If we sleep here, we will have a better chance of triggering
+ # This number is based on a few tests I ran while developing this test
+ time.sleep(0.4)
+
+
+def configure_network(ifname: str) -> None:
+ """Configure ring size and queue numbers"""
+
+ # Set defined queues to 1 to force congestion
+ prev_queues = ethtool_get_queues_cnt(ifname)
+ logging.debug("RX/TX/combined queues: %s", prev_queues)
+ # Only set the queues to 1 if they exists in the device. I.e, they are > 0
+ ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues))
+ defer(ethtool_set_queues_cnt, ifname, prev_queues)
+
+ # Try to set the ring size to some low value.
+ # Do not fail if the hardware do not accepted desired values
+ prev_ring_size = ethtool_get_ringsize(ifname)
+ for size in [(1, 1), (128, 128), (256, 256)]:
+ if ethtool_set_ringsize(ifname, size):
+ # hardware accepted the desired ringsize
+ logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size)
+ break
+ defer(ethtool_set_ringsize, ifname, prev_ring_size)
+
+
+def test_netpoll(cfg: NetDrvEpEnv) -> None:
+ """
+ Test netpoll by sending traffic to the interface and then sending
+ netconsole messages to trigger a poll
+ """
+
+ ifname = cfg.ifname
+ configure_network(ifname)
+ target_name = netcons_generate_random_target_name()
+ traffic = None
+
+ try:
+ traffic = GenerateTraffic(cfg)
+ do_netpoll_flush_monitored(cfg, ifname, target_name)
+ finally:
+ if traffic:
+ traffic.stop()
+
+ # Revert RX/TX queues
+ netcons_delete_target(target_name)
+
+
+def test_check_dependencies() -> None:
+ """Check if the dependencies are met"""
+ if not os.path.exists(NETCONSOLE_CONFIGFS_PATH):
+ raise KsftSkipEx(
+ f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301
+ )
+
+
+def main() -> None:
+ """Main function to run the test"""
+ netcons_load_module()
+ test_check_dependencies()
+ with NetDrvEpEnv(__file__) as cfg:
+ ksft_run(
+ [test_netpoll],
+ args=(cfg,),
+ )
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
index e0f114612c1a..da3623c5e8a9 100755
--- a/tools/testing/selftests/drivers/net/ping.py
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -30,7 +30,7 @@ def _test_v6(cfg) -> None:
cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote)
def _test_tcp(cfg) -> None:
- cfg.require_cmd("socat", remote=True)
+ cfg.require_cmd("socat", local=False, remote=True)
port = rand_port()
listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT"
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
index efcc1e10575b..c2bb5d3f1ca1 100755
--- a/tools/testing/selftests/drivers/net/stats.py
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -1,12 +1,16 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+"""
+Tests related to standard netdevice statistics.
+"""
+
import errno
import subprocess
import time
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises
-from lib.py import KsftSkipEx, KsftXfailEx
+from lib.py import KsftSkipEx, KsftFailEx
from lib.py import ksft_disruptive
from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
from lib.py import NetDrvEnv
@@ -18,13 +22,16 @@ rtnl = RtnlFamily()
def check_pause(cfg) -> None:
- global ethnl
+ """
+ Check that drivers which support Pause config also report standard
+ pause stats.
+ """
try:
ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
if e.error == errno.EOPNOTSUPP:
- raise KsftXfailEx("pause not supported by the device")
+ raise KsftSkipEx("pause not supported by the device") from e
raise
data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
@@ -33,13 +40,16 @@ def check_pause(cfg) -> None:
def check_fec(cfg) -> None:
- global ethnl
+ """
+ Check that drivers which support FEC config also report standard
+ FEC stats.
+ """
try:
ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
if e.error == errno.EOPNOTSUPP:
- raise KsftXfailEx("FEC not supported by the device")
+ raise KsftSkipEx("FEC not supported by the device") from e
raise
data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
@@ -48,15 +58,17 @@ def check_fec(cfg) -> None:
def pkt_byte_sum(cfg) -> None:
- global netfam, rtnl
+ """
+ Check that qstat and interface stats match in value.
+ """
def get_qstat(test):
- global netfam
stats = netfam.qstats_get({}, dump=True)
if stats:
for qs in stats:
if qs["ifindex"]== test.ifindex:
return qs
+ return None
qstat = get_qstat(cfg)
if qstat is None:
@@ -77,15 +89,14 @@ def pkt_byte_sum(cfg) -> None:
for _ in range(10):
rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
if stat_cmp(rtstat, qstat) < 0:
- raise Exception("RTNL stats are lower, fetched later")
+ raise KsftFailEx("RTNL stats are lower, fetched later")
qstat = get_qstat(cfg)
if stat_cmp(rtstat, qstat) > 0:
- raise Exception("Qstats are lower, fetched later")
+ raise KsftFailEx("Qstats are lower, fetched later")
def qstat_by_ifindex(cfg) -> None:
- global netfam
- global rtnl
+ """ Qstats Netlink API tests - querying by ifindex. """
# Construct a map ifindex -> [dump, by-index, dump]
ifindexes = {}
@@ -93,7 +104,7 @@ def qstat_by_ifindex(cfg) -> None:
for entry in stats:
ifindexes[entry['ifindex']] = [entry, None, None]
- for ifindex in ifindexes.keys():
+ for ifindex in ifindexes:
entry = netfam.qstats_get({"ifindex": ifindex}, dump=True)
ksft_eq(len(entry), 1)
ifindexes[entry[0]['ifindex']][1] = entry[0]
@@ -145,7 +156,7 @@ def qstat_by_ifindex(cfg) -> None:
# Try to get stats for lowest unused ifindex but not 0
devs = rtnl.getlink({}, dump=True)
- all_ifindexes = set([dev["ifi-index"] for dev in devs])
+ all_ifindexes = set(dev["ifi-index"] for dev in devs)
lowest = 2
while lowest in all_ifindexes:
lowest += 1
@@ -158,18 +169,20 @@ def qstat_by_ifindex(cfg) -> None:
@ksft_disruptive
def check_down(cfg) -> None:
+ """ Test statistics (interface and qstat) are not impacted by ifdown """
+
try:
qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
except NlError as e:
if e.error == errno.EOPNOTSUPP:
- raise KsftSkipEx("qstats not supported by the device")
+ raise KsftSkipEx("qstats not supported by the device") from e
raise
ip(f"link set dev {cfg.dev['ifname']} down")
defer(ip, f"link set dev {cfg.dev['ifname']} up")
qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
- for k, v in qstat.items():
+ for k in qstat:
ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down")
# exercise per-queue API to make sure that "device down" state
@@ -263,6 +276,8 @@ def procfs_downup_hammer(cfg) -> None:
def main() -> None:
+ """ Ksft boiler plate main """
+
with NetDrvEnv(__file__, queue_count=100) as cfg:
ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex,
check_down, procfs_hammer, procfs_downup_hammer],
diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py
new file mode 100755
index 000000000000..1dd8bf3bf6c9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/xdp.py
@@ -0,0 +1,658 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+This file contains tests to verify native XDP support in network drivers.
+The tests utilize the BPF program `xdp_native.bpf.o` from the `selftests.net.lib`
+directory, with each test focusing on a specific aspect of XDP functionality.
+"""
+import random
+import string
+from dataclasses import dataclass
+from enum import Enum
+
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne, ksft_pr
+from lib.py import KsftFailEx, NetDrvEpEnv, EthtoolFamily, NlError
+from lib.py import bkg, cmd, rand_port, wait_port_listen
+from lib.py import ip, bpftool, defer
+
+
+class TestConfig(Enum):
+ """Enum for XDP configuration options."""
+ MODE = 0 # Configures the BPF program for a specific test
+ PORT = 1 # Port configuration to communicate with the remote host
+ ADJST_OFFSET = 2 # Tail/Head adjustment offset for extension/shrinking
+ ADJST_TAG = 3 # Adjustment tag to annotate the start and end of extension
+
+
+class XDPAction(Enum):
+ """Enum for XDP actions."""
+ PASS = 0 # Pass the packet up to the stack
+ DROP = 1 # Drop the packet
+ TX = 2 # Route the packet to the remote host
+ TAIL_ADJST = 3 # Adjust the tail of the packet
+ HEAD_ADJST = 4 # Adjust the head of the packet
+
+
+class XDPStats(Enum):
+ """Enum for XDP statistics."""
+ RX = 0 # Count of valid packets received for testing
+ PASS = 1 # Count of packets passed up to the stack
+ DROP = 2 # Count of packets dropped
+ TX = 3 # Count of incoming packets routed to the remote host
+ ABORT = 4 # Count of packets that were aborted
+
+
+@dataclass
+class BPFProgInfo:
+ """Data class to store information about a BPF program."""
+ name: str # Name of the BPF program
+ file: str # BPF program object file
+ xdp_sec: str = "xdp" # XDP section name (e.g., "xdp" or "xdp.frags")
+ mtu: int = 1500 # Maximum Transmission Unit, default is 1500
+
+
+def _exchg_udp(cfg, port, test_string):
+ """
+ Exchanges UDP packets between a local and remote host using the socat tool.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ port: Port number to use for the UDP communication.
+ test_string: String that the remote host will send.
+
+ Returns:
+ The string received by the test host.
+ """
+ cfg.require_cmd("socat", remote=True)
+
+ rx_udp_cmd = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT"
+ tx_udp_cmd = f"echo -n {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}"
+
+ with bkg(rx_udp_cmd, exit_wait=True) as nc:
+ wait_port_listen(port, proto="udp")
+ cmd(tx_udp_cmd, host=cfg.remote, shell=True)
+
+ return nc.stdout.strip()
+
+
+def _test_udp(cfg, port, size=256):
+ """
+ Tests UDP packet exchange between a local and remote host.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ port: Port number to use for the UDP communication.
+ size: The length of the test string to be exchanged, default is 256 characters.
+
+ Returns:
+ bool: True if the received string matches the sent string, False otherwise.
+ """
+ test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(size))
+ recvd_str = _exchg_udp(cfg, port, test_str)
+
+ return recvd_str == test_str
+
+
+def _load_xdp_prog(cfg, bpf_info):
+ """
+ Loads an XDP program onto a network interface.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing information about the BPF program.
+
+ Returns:
+ dict: A dictionary containing the XDP program ID, name, and associated map IDs.
+ """
+ abs_path = cfg.net_lib_dir / bpf_info.file
+ prog_info = {}
+
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu {bpf_info.mtu}", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
+
+ cmd(
+ f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdp obj {abs_path} sec {bpf_info.xdp_sec}",
+ shell=True
+ )
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+
+ xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0]
+ prog_info["id"] = xdp_info["xdp"]["prog"]["id"]
+ prog_info["name"] = xdp_info["xdp"]["prog"]["name"]
+ prog_id = prog_info["id"]
+
+ map_ids = bpftool(f"prog show id {prog_id}", json=True)["map_ids"]
+ prog_info["maps"] = {}
+ for map_id in map_ids:
+ name = bpftool(f"map show id {map_id}", json=True)["name"]
+ prog_info["maps"][name] = map_id
+
+ return prog_info
+
+
+def format_hex_bytes(value):
+ """
+ Helper function that converts an integer into a formatted hexadecimal byte string.
+
+ Args:
+ value: An integer representing the number to be converted.
+
+ Returns:
+ A string representing hexadecimal equivalent of value, with bytes separated by spaces.
+ """
+ hex_str = value.to_bytes(4, byteorder='little', signed=True)
+ return ' '.join(f'{byte:02x}' for byte in hex_str)
+
+
+def _set_xdp_map(map_name, key, value):
+ """
+ Updates an XDP map with a given key-value pair using bpftool.
+
+ Args:
+ map_name: The name of the XDP map to update.
+ key: The key to update in the map, formatted as a hexadecimal string.
+ value: The value to associate with the key, formatted as a hexadecimal string.
+ """
+ key_formatted = format_hex_bytes(key)
+ value_formatted = format_hex_bytes(value)
+ bpftool(
+ f"map update name {map_name} key hex {key_formatted} value hex {value_formatted}"
+ )
+
+
+def _get_stats(xdp_map_id):
+ """
+ Retrieves and formats statistics from an XDP map.
+
+ Args:
+ xdp_map_id: The ID of the XDP map from which to retrieve statistics.
+
+ Returns:
+ A dictionary containing formatted packet statistics for various XDP actions.
+ The keys are based on the XDPStats Enum values.
+
+ Raises:
+ KsftFailEx: If the stats retrieval fails.
+ """
+ stats_dump = bpftool(f"map dump id {xdp_map_id}", json=True)
+ if not stats_dump:
+ raise KsftFailEx(f"Failed to get stats for map {xdp_map_id}")
+
+ stats_formatted = {}
+ for key in range(0, 5):
+ val = stats_dump[key]["formatted"]["value"]
+ if stats_dump[key]["formatted"]["key"] == XDPStats.RX.value:
+ stats_formatted[XDPStats.RX.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.PASS.value:
+ stats_formatted[XDPStats.PASS.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.DROP.value:
+ stats_formatted[XDPStats.DROP.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.TX.value:
+ stats_formatted[XDPStats.TX.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.ABORT.value:
+ stats_formatted[XDPStats.ABORT.value] = val
+
+ return stats_formatted
+
+
+def _test_pass(cfg, bpf_info, msg_sz):
+ """
+ Tests the XDP_PASS action by exchanging UDP packets.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing information about the BPF program.
+ msg_sz: Size of the test message to send.
+ """
+
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.PASS.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ ksft_eq(_test_udp(cfg, port, msg_sz), True, "UDP packet exchange failed")
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+ ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should not be zero")
+ ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.PASS.value], "RX and PASS stats mismatch")
+
+
+def test_xdp_native_pass_sb(cfg):
+ """
+ Tests the XDP_PASS action for single buffer case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+ _test_pass(cfg, bpf_info, 256)
+
+
+def test_xdp_native_pass_mb(cfg):
+ """
+ Tests the XDP_PASS action for a multi-buff size.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+ _test_pass(cfg, bpf_info, 8000)
+
+
+def _test_drop(cfg, bpf_info, msg_sz):
+ """
+ Tests the XDP_DROP action by exchanging UDP packets.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing information about the BPF program.
+ msg_sz: Size of the test message to send.
+ """
+
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.DROP.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ ksft_eq(_test_udp(cfg, port, msg_sz), False, "UDP packet exchange should fail")
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+ ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should be zero")
+ ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.DROP.value], "RX and DROP stats mismatch")
+
+
+def test_xdp_native_drop_sb(cfg):
+ """
+ Tests the XDP_DROP action for a signle-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+ _test_drop(cfg, bpf_info, 256)
+
+
+def test_xdp_native_drop_mb(cfg):
+ """
+ Tests the XDP_DROP action for a multi-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+ _test_drop(cfg, bpf_info, 8000)
+
+
+def test_xdp_native_tx_mb(cfg):
+ """
+ Tests the XDP_TX action for a multi-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ cfg.require_cmd("socat", remote=True)
+
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(8000))
+ rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT"
+ tx_udp = f"echo {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}"
+
+ with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc:
+ wait_port_listen(port, proto="udp", host=cfg.remote)
+ cmd(tx_udp, host=cfg.remote, shell=True)
+
+ stats = _get_stats(prog_info['maps']['map_xdp_stats'])
+
+ ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed")
+ ksft_eq(stats[XDPStats.TX.value], 1, "TX stats mismatch")
+
+
+def _validate_res(res, offset_lst, pkt_sz_lst):
+ """
+ Validates the result of a test.
+
+ Args:
+ res: The result of the test, which should be a dictionary with a "status" key.
+
+ Raises:
+ KsftFailEx: If the test fails to pass any combination of offset and packet size.
+ """
+ if "status" not in res:
+ raise KsftFailEx("Missing 'status' key in result dictionary")
+
+ # Validate that not a single case was successful
+ if res["status"] == "fail":
+ if res["offset"] == offset_lst[0] and res["pkt_sz"] == pkt_sz_lst[0]:
+ raise KsftFailEx(f"{res['reason']}")
+
+ # Get the previous offset and packet size to report the successful run
+ tmp_idx = offset_lst.index(res["offset"])
+ prev_offset = offset_lst[tmp_idx - 1]
+ if tmp_idx == 0:
+ tmp_idx = pkt_sz_lst.index(res["pkt_sz"])
+ prev_pkt_sz = pkt_sz_lst[tmp_idx - 1]
+ else:
+ prev_pkt_sz = res["pkt_sz"]
+
+ # Use these values for error reporting
+ ksft_pr(
+ f"Failed run: pkt_sz {res['pkt_sz']}, offset {res['offset']}. "
+ f"Last successful run: pkt_sz {prev_pkt_sz}, offset {prev_offset}. "
+ f"Reason: {res['reason']}"
+ )
+
+
+def _check_for_failures(recvd_str, stats):
+ """
+ Checks for common failures while adjusting headroom or tailroom.
+
+ Args:
+ recvd_str: The string received from the remote host after sending a test string.
+ stats: A dictionary containing formatted packet statistics for various XDP actions.
+
+ Returns:
+ str: A string describing the failure reason if a failure is detected, otherwise None.
+ """
+
+ # Any adjustment failure result in an abort hence, we track this counter
+ if stats[XDPStats.ABORT.value] != 0:
+ return "Adjustment failed"
+
+ # Since we are using aggregate stats for a single test across all offsets and packet sizes
+ # we can't use RX stats only to track data exchange failure without taking a previous
+ # snapshot. An easier way is to simply check for non-zero length of received string.
+ if len(recvd_str) == 0:
+ return "Data exchange failed"
+
+ # Check for RX and PASS stats mismatch. Ideally, they should be equal for a successful run
+ if stats[XDPStats.RX.value] != stats[XDPStats.PASS.value]:
+ return "RX stats mismatch"
+
+ return None
+
+
+def _test_xdp_native_tail_adjst(cfg, pkt_sz_lst, offset_lst):
+ """
+ Tests the XDP tail adjustment functionality.
+
+ This function loads the appropriate XDP program based on the provided
+ program name and configures the XDP map for tail adjustment. It then
+ validates the tail adjustment by sending and receiving UDP packets
+ with specified packet sizes and offsets.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ prog: Name of the XDP program to load.
+ pkt_sz_lst: List of packet sizes to test.
+ offset_lst: List of offsets to validate support for tail adjustment.
+
+ Returns:
+ dict: A dictionary with test status and failure details if applicable.
+ """
+ port = rand_port()
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+
+ # Configure the XDP map for tail adjustment
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TAIL_ADJST.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ for offset in offset_lst:
+ tag = format(random.randint(65, 90), "02x")
+
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset)
+ if offset > 0:
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16))
+
+ for pkt_sz in pkt_sz_lst:
+ test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz))
+ recvd_str = _exchg_udp(cfg, port, test_str)
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+ failure = _check_for_failures(recvd_str, stats)
+ if failure is not None:
+ return {
+ "status": "fail",
+ "reason": failure,
+ "offset": offset,
+ "pkt_sz": pkt_sz,
+ }
+
+ # Validate data content based on offset direction
+ expected_data = None
+ if offset > 0:
+ expected_data = test_str + (offset * chr(int(tag, 16)))
+ else:
+ expected_data = test_str[0:pkt_sz + offset]
+
+ if recvd_str != expected_data:
+ return {
+ "status": "fail",
+ "reason": "Data mismatch",
+ "offset": offset,
+ "pkt_sz": pkt_sz,
+ }
+
+ return {"status": "pass"}
+
+
+def test_xdp_native_adjst_tail_grow_data(cfg):
+ """
+ Tests the XDP tail adjustment by growing packet data.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+ offset_lst = [1, 16, 32, 64, 128, 256]
+ res = _test_xdp_native_tail_adjst(
+ cfg,
+ pkt_sz_lst,
+ offset_lst,
+ )
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def test_xdp_native_adjst_tail_shrnk_data(cfg):
+ """
+ Tests the XDP tail adjustment by shrinking packet data.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+ offset_lst = [-16, -32, -64, -128, -256]
+ res = _test_xdp_native_tail_adjst(
+ cfg,
+ pkt_sz_lst,
+ offset_lst,
+ )
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def get_hds_thresh(cfg):
+ """
+ Retrieves the header data split (HDS) threshold for a network interface.
+
+ Args:
+ cfg: Configuration object containing network settings.
+
+ Returns:
+ The HDS threshold value. If the threshold is not supported or an error occurs,
+ a default value of 1500 is returned.
+ """
+ netnl = cfg.netnl
+ hds_thresh = 1500
+
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ if 'hds-thresh' not in rings:
+ ksft_pr(f'hds-thresh not supported. Using default: {hds_thresh}')
+ return hds_thresh
+ hds_thresh = rings['hds-thresh']
+ except NlError as e:
+ ksft_pr(f"Failed to get rings: {e}. Using default: {hds_thresh}")
+
+ return hds_thresh
+
+
+def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst):
+ """
+ Tests the XDP head adjustment action for a multi-buffer case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ netnl: Network namespace or link object (not used in this function).
+
+ This function sets up the packet size and offset lists, then performs
+ the head adjustment test by sending and receiving UDP packets.
+ """
+ cfg.require_cmd("socat", remote=True)
+
+ prog_info = _load_xdp_prog(cfg, BPFProgInfo(prog, "xdp_native.bpf.o", "xdp.frags", 9000))
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.HEAD_ADJST.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ hds_thresh = get_hds_thresh(cfg)
+ for offset in offset_lst:
+ for pkt_sz in pkt_sz_lst:
+ # The "head" buffer must contain at least the Ethernet header
+ # after we eat into it. We send large-enough packets, but if HDS
+ # is enabled head will only contain headers. Don't try to eat
+ # more than 28 bytes (UDPv4 + eth hdr left: (14 + 20 + 8) - 14)
+ l2_cut_off = 28 if cfg.addr_ipver == 4 else 48
+ if pkt_sz > hds_thresh and offset > l2_cut_off:
+ ksft_pr(
+ f"Failed run: pkt_sz ({pkt_sz}) > HDS threshold ({hds_thresh}) and "
+ f"offset {offset} > {l2_cut_off}"
+ )
+ return {"status": "pass"}
+
+ test_str = ''.join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz))
+ tag = format(random.randint(65, 90), '02x')
+
+ _set_xdp_map("map_xdp_setup",
+ TestConfig.ADJST_OFFSET.value,
+ offset)
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16))
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset)
+
+ recvd_str = _exchg_udp(cfg, port, test_str)
+
+ # Check for failures around adjustment and data exchange
+ failure = _check_for_failures(recvd_str, _get_stats(prog_info['maps']['map_xdp_stats']))
+ if failure is not None:
+ return {
+ "status": "fail",
+ "reason": failure,
+ "offset": offset,
+ "pkt_sz": pkt_sz
+ }
+
+ # Validate data content based on offset direction
+ expected_data = None
+ if offset < 0:
+ expected_data = chr(int(tag, 16)) * (0 - offset) + test_str
+ else:
+ expected_data = test_str[offset:]
+
+ if recvd_str != expected_data:
+ return {
+ "status": "fail",
+ "reason": "Data mismatch",
+ "offset": offset,
+ "pkt_sz": pkt_sz
+ }
+
+ return {"status": "pass"}
+
+
+def test_xdp_native_adjst_head_grow_data(cfg):
+ """
+ Tests the XDP headroom growth support.
+
+ Args:
+ cfg: Configuration object containing network settings.
+
+ This function sets up the packet size and offset lists, then calls the
+ _test_xdp_native_head_adjst_mb function to perform the actual test. The
+ test is passed if the headroom is successfully extended for given packet
+ sizes and offsets.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+
+ # Negative values result in headroom shrinking, resulting in growing of payload
+ offset_lst = [-16, -32, -64, -128, -256]
+ res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst)
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def test_xdp_native_adjst_head_shrnk_data(cfg):
+ """
+ Tests the XDP headroom shrinking support.
+
+ Args:
+ cfg: Configuration object containing network settings.
+
+ This function sets up the packet size and offset lists, then calls the
+ _test_xdp_native_head_adjst_mb function to perform the actual test. The
+ test is passed if the headroom is successfully shrunk for given packet
+ sizes and offsets.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+
+ # Positive values result in headroom growing, resulting in shrinking of payload
+ offset_lst = [16, 32, 64, 128, 256]
+ res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst)
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def main():
+ """
+ Main function to execute the XDP tests.
+
+ This function runs a series of tests to validate the XDP support for
+ both the single and multi-buffer. It uses the NetDrvEpEnv context
+ manager to manage the network driver environment and the ksft_run
+ function to execute the tests.
+ """
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.netnl = EthtoolFamily()
+ ksft_run(
+ [
+ test_xdp_native_pass_sb,
+ test_xdp_native_pass_mb,
+ test_xdp_native_drop_sb,
+ test_xdp_native_drop_mb,
+ test_xdp_native_tx_mb,
+ test_xdp_native_adjst_tail_grow_data,
+ test_xdp_native_adjst_tail_shrnk_data,
+ test_xdp_native_adjst_head_grow_data,
+ test_xdp_native_adjst_head_shrnk_data,
+ ],
+ args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore
index 7afa58e2bb20..fcbdb1297e24 100644
--- a/tools/testing/selftests/filesystems/.gitignore
+++ b/tools/testing/selftests/filesystems/.gitignore
@@ -3,3 +3,4 @@ dnotify_test
devpts_pts
file_stressor
anon_inode_test
+kernfs_test
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
index b02326193fee..73d4650af1a5 100644
--- a/tools/testing/selftests/filesystems/Makefile
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test
+TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test
TEST_GEN_PROGS_EXTENDED := dnotify_test
include ../lib.mk
diff --git a/tools/testing/selftests/filesystems/kernfs_test.c b/tools/testing/selftests/filesystems/kernfs_test.c
new file mode 100644
index 000000000000..16538b3b318e
--- /dev/null
+++ b/tools/testing/selftests/filesystems/kernfs_test.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+
+#include "../kselftest_harness.h"
+#include "wrappers.h"
+
+TEST(kernfs_listxattr)
+{
+ int fd;
+
+ /* Read-only file that can never have any extended attributes set. */
+ fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+ ASSERT_EQ(flistxattr(fd, NULL, 0), 0);
+ EXPECT_EQ(close(fd), 0);
+}
+
+TEST(kernfs_getxattr)
+{
+ int fd;
+ char buf[1];
+
+ /* Read-only file that can never have any extended attributes set. */
+ fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+ ASSERT_LT(fgetxattr(fd, "user.foo", buf, sizeof(buf)), 0);
+ ASSERT_EQ(errno, ENODATA);
+ EXPECT_EQ(close(fd), 0);
+}
+
+TEST_HARNESS_MAIN
+
diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
index b7c8f29c09a9..65916bb55dfb 100644
--- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
@@ -14,11 +14,35 @@ fail() { #msg
exit_fail
}
+# As reading trace can last forever, simply look for 3 different
+# events then exit out of reading the file. If there's not 3 different
+# events, then the test has failed.
+check_unique() {
+ cat trace | grep -v '^#' | awk '
+ BEGIN { cnt = 0; }
+ {
+ for (i = 0; i < cnt; i++) {
+ if (event[i] == $5) {
+ break;
+ }
+ }
+ if (i == cnt) {
+ event[cnt++] = $5;
+ if (cnt > 2) {
+ exit;
+ }
+ }
+ }
+ END {
+ printf "%d", cnt;
+ }'
+}
+
echo 'sched:*' > set_event
yield
-count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`check_unique`
if [ $count -lt 3 ]; then
fail "at least fork, exec and exit events should be recorded"
fi
@@ -29,7 +53,7 @@ echo 1 > events/sched/enable
yield
-count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`check_unique`
if [ $count -lt 3 ]; then
fail "at least fork, exec and exit events should be recorded"
fi
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
index 4b994b6df5ac..ed81eaf2afd6 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
@@ -29,7 +29,7 @@ ftrace_filter_check 'schedule*' '^schedule.*$'
ftrace_filter_check '*pin*lock' '.*pin.*lock$'
# filter by start*mid*
-ftrace_filter_check 'mutex*try*' '^mutex.*try.*'
+ftrace_filter_check 'mutex*unl*' '^mutex.*unl.*'
# Advanced full-glob matching feature is recently supported.
# Skip the tests if we are sure the kernel does not support it.
diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c
index 24a92dc94eb8..aea001ac4946 100644
--- a/tools/testing/selftests/futex/functional/futex_priv_hash.c
+++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c
@@ -26,14 +26,12 @@ static int counter;
#ifndef PR_FUTEX_HASH
#define PR_FUTEX_HASH 78
# define PR_FUTEX_HASH_SET_SLOTS 1
-# define FH_FLAG_IMMUTABLE (1ULL << 0)
# define PR_FUTEX_HASH_GET_SLOTS 2
-# define PR_FUTEX_HASH_GET_IMMUTABLE 3
#endif
-static int futex_hash_slots_set(unsigned int slots, int flags)
+static int futex_hash_slots_set(unsigned int slots)
{
- return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, flags);
+ return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, 0);
}
static int futex_hash_slots_get(void)
@@ -41,16 +39,11 @@ static int futex_hash_slots_get(void)
return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS);
}
-static int futex_hash_immutable_get(void)
-{
- return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE);
-}
-
static void futex_hash_slots_set_verify(int slots)
{
int ret;
- ret = futex_hash_slots_set(slots, 0);
+ ret = futex_hash_slots_set(slots);
if (ret != 0) {
ksft_test_result_fail("Failed to set slots to %d: %m\n", slots);
ksft_finished();
@@ -64,13 +57,13 @@ static void futex_hash_slots_set_verify(int slots)
ksft_test_result_pass("SET and GET slots %d passed\n", slots);
}
-static void futex_hash_slots_set_must_fail(int slots, int flags)
+static void futex_hash_slots_set_must_fail(int slots)
{
int ret;
- ret = futex_hash_slots_set(slots, flags);
- ksft_test_result(ret < 0, "futex_hash_slots_set(%d, %d)\n",
- slots, flags);
+ ret = futex_hash_slots_set(slots);
+ ksft_test_result(ret < 0, "futex_hash_slots_set(%d)\n",
+ slots);
}
static void *thread_return_fn(void *arg)
@@ -111,6 +104,30 @@ static void join_max_threads(void)
}
}
+#define SEC_IN_NSEC 1000000000
+#define MSEC_IN_NSEC 1000000
+
+static void futex_dummy_op(void)
+{
+ pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+ struct timespec timeout;
+ int ret;
+
+ pthread_mutex_lock(&lock);
+ clock_gettime(CLOCK_REALTIME, &timeout);
+ timeout.tv_nsec += 100 * MSEC_IN_NSEC;
+ if (timeout.tv_nsec >= SEC_IN_NSEC) {
+ timeout.tv_nsec -= SEC_IN_NSEC;
+ timeout.tv_sec++;
+ }
+ ret = pthread_mutex_timedlock(&lock, &timeout);
+ if (ret == 0)
+ ksft_exit_fail_msg("Successfully locked an already locked mutex.\n");
+
+ if (ret != ETIMEDOUT)
+ ksft_exit_fail_msg("pthread_mutex_timedlock() did not timeout: %d.\n", ret);
+}
+
static void usage(char *prog)
{
printf("Usage: %s\n", prog);
@@ -128,18 +145,14 @@ int main(int argc, char *argv[])
{
int futex_slots1, futex_slotsn, online_cpus;
pthread_mutexattr_t mutex_attr_pi;
- int use_global_hash = 0;
- int ret;
+ int ret, retry = 20;
int c;
- while ((c = getopt(argc, argv, "cghv:")) != -1) {
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
switch (c) {
case 'c':
log_color(1);
break;
- case 'g':
- use_global_hash = 1;
- break;
case 'h':
usage(basename(argv[0]));
exit(0);
@@ -154,7 +167,7 @@ int main(int argc, char *argv[])
}
ksft_print_header();
- ksft_set_plan(22);
+ ksft_set_plan(21);
ret = pthread_mutexattr_init(&mutex_attr_pi);
ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT);
@@ -167,10 +180,6 @@ int main(int argc, char *argv[])
if (ret != 0)
ksft_exit_fail_msg("futex_hash_slots_get() failed: %d, %m\n", ret);
- ret = futex_hash_immutable_get();
- if (ret != 0)
- ksft_exit_fail_msg("futex_hash_immutable_get() failed: %d, %m\n", ret);
-
ksft_test_result_pass("Basic get slots and immutable status.\n");
ret = pthread_create(&threads[0], NULL, thread_return_fn, NULL);
if (ret != 0)
@@ -208,8 +217,24 @@ int main(int argc, char *argv[])
*/
ksft_print_msg("Online CPUs: %d\n", online_cpus);
if (online_cpus > 16) {
+retry_getslots:
futex_slotsn = futex_hash_slots_get();
if (futex_slotsn < 0 || futex_slots1 == futex_slotsn) {
+ retry--;
+ /*
+ * Auto scaling on thread creation can be slightly delayed
+ * because it waits for a RCU grace period twice. The new
+ * private hash is assigned upon the first futex operation
+ * after grace period.
+ * To cover all this for testing purposes the function
+ * below will acquire a lock and acquire it again with a
+ * 100ms timeout which must timeout. This ensures we
+ * sleep for 100ms and issue a futex operation.
+ */
+ if (retry > 0) {
+ futex_dummy_op();
+ goto retry_getslots;
+ }
ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n",
futex_slots1, futex_slotsn);
ksft_exit_fail_msg(test_msg_auto_inc);
@@ -227,7 +252,7 @@ int main(int argc, char *argv[])
futex_hash_slots_set_verify(32);
futex_hash_slots_set_verify(16);
- ret = futex_hash_slots_set(15, 0);
+ ret = futex_hash_slots_set(15);
ksft_test_result(ret < 0, "Use 15 slots\n");
futex_hash_slots_set_verify(2);
@@ -245,28 +270,23 @@ int main(int argc, char *argv[])
ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n",
ret);
- futex_hash_slots_set_must_fail(1 << 29, 0);
+ futex_hash_slots_set_must_fail(1 << 29);
+ futex_hash_slots_set_verify(4);
/*
- * Once the private hash has been made immutable or global hash has been requested,
- * then this requested can not be undone.
+ * Once the global hash has been requested, then this requested can not
+ * be undone.
*/
- if (use_global_hash) {
- ret = futex_hash_slots_set(0, 0);
- ksft_test_result(ret == 0, "Global hash request\n");
- } else {
- ret = futex_hash_slots_set(4, FH_FLAG_IMMUTABLE);
- ksft_test_result(ret == 0, "Immutable resize to 4\n");
- }
+ ret = futex_hash_slots_set(0);
+ ksft_test_result(ret == 0, "Global hash request\n");
if (ret != 0)
goto out;
- futex_hash_slots_set_must_fail(4, 0);
- futex_hash_slots_set_must_fail(4, FH_FLAG_IMMUTABLE);
- futex_hash_slots_set_must_fail(8, 0);
- futex_hash_slots_set_must_fail(8, FH_FLAG_IMMUTABLE);
- futex_hash_slots_set_must_fail(0, FH_FLAG_IMMUTABLE);
- futex_hash_slots_set_must_fail(6, FH_FLAG_IMMUTABLE);
+ futex_hash_slots_set_must_fail(4);
+ futex_hash_slots_set_must_fail(8);
+ futex_hash_slots_set_must_fail(8);
+ futex_hash_slots_set_must_fail(0);
+ futex_hash_slots_set_must_fail(6);
ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
if (ret != 0) {
@@ -277,14 +297,7 @@ int main(int argc, char *argv[])
join_max_threads();
ret = futex_hash_slots_get();
- if (use_global_hash) {
- ksft_test_result(ret == 0, "Continue to use global hash\n");
- } else {
- ksft_test_result(ret == 4, "Continue to use the 4 hash buckets\n");
- }
-
- ret = futex_hash_immutable_get();
- ksft_test_result(ret == 1, "Hash reports to be immutable\n");
+ ksft_test_result(ret == 0, "Continue to use global hash\n");
out:
ksft_finished();
diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h
index ea79662405bc..1f625b39948a 100644
--- a/tools/testing/selftests/futex/include/futex2test.h
+++ b/tools/testing/selftests/futex/include/futex2test.h
@@ -4,6 +4,7 @@
*
* Copyright 2021 Collabora Ltd.
*/
+#include <linux/time_types.h>
#include <stdint.h>
#define u64_to_ptr(x) ((void *)(uintptr_t)(x))
@@ -65,7 +66,12 @@ struct futex32_numa {
static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters,
unsigned long flags, struct timespec *timo, clockid_t clockid)
{
- return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid);
+ struct __kernel_timespec ts = {
+ .tv_sec = timo->tv_sec,
+ .tv_nsec = timo->tv_nsec,
+ };
+
+ return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, &ts, clockid);
}
/*
diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h
index ddbcfc9b7bac..7a5fd1d5355e 100644
--- a/tools/testing/selftests/futex/include/futextest.h
+++ b/tools/testing/selftests/futex/include/futextest.h
@@ -47,6 +47,17 @@ typedef volatile u_int32_t futex_t;
FUTEX_PRIVATE_FLAG)
#endif
+/*
+ * SYS_futex is expected from system C library, in glibc some 32-bit
+ * architectures (e.g. RV32) are using 64-bit time_t, therefore it doesn't have
+ * SYS_futex defined but just SYS_futex_time64. Define SYS_futex as
+ * SYS_futex_time64 in this situation to ensure the compilation and the
+ * compatibility.
+ */
+#if !defined(SYS_futex) && defined(SYS_futex_time64)
+#define SYS_futex SYS_futex_time64
+#endif
+
/**
* futex() - SYS_futex syscall wrapper
* @uaddr: address of first futex
diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common
index b1f40857307d..38c51158adf8 100644
--- a/tools/testing/selftests/hid/config.common
+++ b/tools/testing/selftests/hid/config.common
@@ -135,6 +135,7 @@ CONFIG_NET_EMATCH=y
CONFIG_NETFILTER_NETLINK_LOG=y
CONFIG_NETFILTER_NETLINK_QUEUE=y
CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y
CONFIG_NETFILTER_XT_MATCH_BPF=y
CONFIG_NETFILTER_XT_MATCH_COMMENT=y
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
index e9dbb84c100a..5e36aeeb9901 100644
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -39,26 +39,26 @@ int restore_queue(struct msgque_data *msgque)
fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY);
if (fd == -1) {
- printf("Failed to open /proc/sys/kernel/msg_next_id\n");
+ ksft_test_result_fail("Failed to open /proc/sys/kernel/msg_next_id\n");
return -errno;
}
sprintf(buf, "%d", msgque->msq_id);
ret = write(fd, buf, strlen(buf));
if (ret != strlen(buf)) {
- printf("Failed to write to /proc/sys/kernel/msg_next_id\n");
+ ksft_test_result_fail("Failed to write to /proc/sys/kernel/msg_next_id\n");
return -errno;
}
id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL);
if (id == -1) {
- printf("Failed to create queue\n");
+ ksft_test_result_fail("Failed to create queue\n");
return -errno;
}
if (id != msgque->msq_id) {
- printf("Restored queue has wrong id (%d instead of %d)\n",
- id, msgque->msq_id);
+ ksft_test_result_fail("Restored queue has wrong id (%d instead of %d)\n"
+ , id, msgque->msq_id);
ret = -EFAULT;
goto destroy;
}
@@ -66,7 +66,7 @@ int restore_queue(struct msgque_data *msgque)
for (i = 0; i < msgque->qnum; i++) {
if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype,
msgque->messages[i].msize, IPC_NOWAIT) != 0) {
- printf("msgsnd failed (%m)\n");
+ ksft_test_result_fail("msgsnd failed (%m)\n");
ret = -errno;
goto destroy;
}
@@ -90,23 +90,22 @@ int check_and_destroy_queue(struct msgque_data *msgque)
if (ret < 0) {
if (errno == ENOMSG)
break;
- printf("Failed to read IPC message: %m\n");
+ ksft_test_result_fail("Failed to read IPC message: %m\n");
ret = -errno;
goto err;
}
if (ret != msgque->messages[cnt].msize) {
- printf("Wrong message size: %d (expected %d)\n", ret,
- msgque->messages[cnt].msize);
+ ksft_test_result_fail("Wrong message size: %d (expected %d)\n", ret, msgque->messages[cnt].msize);
ret = -EINVAL;
goto err;
}
if (message.mtype != msgque->messages[cnt].mtype) {
- printf("Wrong message type\n");
+ ksft_test_result_fail("Wrong message type\n");
ret = -EINVAL;
goto err;
}
if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) {
- printf("Wrong message content\n");
+ ksft_test_result_fail("Wrong message content\n");
ret = -EINVAL;
goto err;
}
@@ -114,7 +113,7 @@ int check_and_destroy_queue(struct msgque_data *msgque)
}
if (cnt != msgque->qnum) {
- printf("Wrong message number\n");
+ ksft_test_result_fail("Wrong message number\n");
ret = -EINVAL;
goto err;
}
@@ -139,7 +138,7 @@ int dump_queue(struct msgque_data *msgque)
if (ret < 0) {
if (errno == EINVAL)
continue;
- printf("Failed to get stats for IPC queue with id %d\n",
+ ksft_test_result_fail("Failed to get stats for IPC queue with id %d\n",
kern_id);
return -errno;
}
@@ -150,7 +149,7 @@ int dump_queue(struct msgque_data *msgque)
msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum);
if (msgque->messages == NULL) {
- printf("Failed to get stats for IPC queue\n");
+ ksft_test_result_fail("Failed to get stats for IPC queue\n");
return -ENOMEM;
}
@@ -162,7 +161,7 @@ int dump_queue(struct msgque_data *msgque)
ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype,
MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY);
if (ret < 0) {
- printf("Failed to copy IPC message: %m (%d)\n", errno);
+ ksft_test_result_fail("Failed to copy IPC message: %m (%d)\n", errno);
return -errno;
}
msgque->messages[i].msize = ret;
@@ -178,7 +177,7 @@ int fill_msgque(struct msgque_data *msgque)
memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING));
if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING),
IPC_NOWAIT) != 0) {
- printf("First message send failed (%m)\n");
+ ksft_test_result_fail("First message send failed (%m)\n");
return -errno;
}
@@ -186,7 +185,7 @@ int fill_msgque(struct msgque_data *msgque)
memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING));
if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING),
IPC_NOWAIT) != 0) {
- printf("Second message send failed (%m)\n");
+ ksft_test_result_fail("Second message send failed (%m)\n");
return -errno;
}
return 0;
@@ -202,44 +201,44 @@ int main(int argc, char **argv)
msgque.key = ftok(argv[0], 822155650);
if (msgque.key == -1) {
- printf("Can't make key: %d\n", -errno);
+ ksft_test_result_fail("Can't make key: %d\n", -errno);
ksft_exit_fail();
}
msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666);
if (msgque.msq_id == -1) {
err = -errno;
- printf("Can't create queue: %d\n", err);
+ ksft_test_result_fail("Can't create queue: %d\n", err);
goto err_out;
}
err = fill_msgque(&msgque);
if (err) {
- printf("Failed to fill queue: %d\n", err);
+ ksft_test_result_fail("Failed to fill queue: %d\n", err);
goto err_destroy;
}
err = dump_queue(&msgque);
if (err) {
- printf("Failed to dump queue: %d\n", err);
+ ksft_test_result_fail("Failed to dump queue: %d\n", err);
goto err_destroy;
}
err = check_and_destroy_queue(&msgque);
if (err) {
- printf("Failed to check and destroy queue: %d\n", err);
+ ksft_test_result_fail("Failed to check and destroy queue: %d\n", err);
goto err_out;
}
err = restore_queue(&msgque);
if (err) {
- printf("Failed to restore queue: %d\n", err);
+ ksft_test_result_fail("Failed to restore queue: %d\n", err);
goto err_destroy;
}
err = check_and_destroy_queue(&msgque);
if (err) {
- printf("Failed to test queue: %d\n", err);
+ ksft_test_result_fail("Failed to test queue: %d\n", err);
goto err_out;
}
ksft_exit_pass();
diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile
index e3000ccb9a5d..874cfdd3b75b 100644
--- a/tools/testing/selftests/kexec/Makefile
+++ b/tools/testing/selftests/kexec/Makefile
@@ -12,7 +12,7 @@ include ../../../scripts/Makefile.arch
ifeq ($(IS_64_BIT)$(ARCH_PROCESSED),1x86)
TEST_PROGS += test_kexec_jump.sh
-test_kexec_jump.sh: $(OUTPUT)/test_kexec_jump
+TEST_GEN_PROGS := test_kexec_jump
endif
include ../lib.mk
diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
index c7fb55c9135b..e34963956fbc 100644
--- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
@@ -140,7 +140,7 @@ static void enable_os_lock(void)
static void enable_monitor_debug_exceptions(void)
{
- uint32_t mdscr;
+ uint64_t mdscr;
asm volatile("msr daifclr, #8");
@@ -223,7 +223,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
static void install_ss(void)
{
- uint32_t mdscr;
+ uint64_t mdscr;
asm volatile("msr daifclr, #8");
diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h
index 18a6014920b5..b16986aa6442 100644
--- a/tools/testing/selftests/landlock/audit.h
+++ b/tools/testing/selftests/landlock/audit.h
@@ -403,11 +403,12 @@ static int audit_init_filter_exe(struct audit_filter *filter, const char *path)
/* It is assume that there is not already filtering rules. */
filter->record_type = AUDIT_EXE;
if (!path) {
- filter->exe_len = readlink("/proc/self/exe", filter->exe,
- sizeof(filter->exe) - 1);
- if (filter->exe_len < 0)
+ int ret = readlink("/proc/self/exe", filter->exe,
+ sizeof(filter->exe) - 1);
+ if (ret < 0)
return -errno;
+ filter->exe_len = ret;
return 0;
}
diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
index cfc571afd0eb..46d02d49835a 100644
--- a/tools/testing/selftests/landlock/audit_test.c
+++ b/tools/testing/selftests/landlock/audit_test.c
@@ -7,6 +7,7 @@
#define _GNU_SOURCE
#include <errno.h>
+#include <fcntl.h>
#include <limits.h>
#include <linux/landlock.h>
#include <pthread.h>
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 73729382d40f..fa0f18ec62c4 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -1832,6 +1832,46 @@ TEST_F_FORK(layout1, release_inodes)
ASSERT_EQ(ENOENT, test_open(dir_s3d3, O_RDONLY));
}
+/*
+ * This test checks that a rule on a directory used as a mount point does not
+ * grant access to the mount covering it. It is a generalization of the bind
+ * mount case in layout3_fs.hostfs.release_inodes that tests hidden mount points.
+ */
+TEST_F_FORK(layout1, covered_rule)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s3d2,
+ .access = LANDLOCK_ACCESS_FS_READ_DIR,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ /* Unmount to simplify FIXTURE_TEARDOWN. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, umount(dir_s3d2));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ /* Creates a ruleset with the future hidden directory. */
+ ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_DIR, layer1);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Covers with a new mount point. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s3d2));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that access to the new mount point is denied. */
+ ASSERT_EQ(EACCES, test_open(dir_s3d2, O_RDONLY));
+}
+
enum relative_access {
REL_OPEN,
REL_CHDIR,
diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index 7afe05e8c4d7..bd09fdaf53e0 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -2,7 +2,7 @@ CONFIG_LKDTM=y
CONFIG_DEBUG_LIST=y
CONFIG_SLAB_FREELIST_HARDENED=y
CONFIG_FORTIFY_SOURCE=y
-CONFIG_GCC_PLUGIN_STACKLEAK=y
+CONFIG_KSTACK_ERASE=y
CONFIG_HARDENED_USERCOPY=y
CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
CONFIG_INIT_ON_FREE_DEFAULT_ON=y
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index aa7400ed0e99..f0d9c035641d 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -31,6 +31,7 @@ uint64_t pmd_pagesize;
#define INPUT_MAX 80
#define PID_FMT "%d,0x%lx,0x%lx,%d"
+#define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d"
#define PATH_FMT "%s,0x%lx,0x%lx,%d"
#define PFN_MASK ((1UL<<55)-1)
@@ -483,7 +484,7 @@ void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc,
write_debugfs(PID_FMT, getpid(), (uint64_t)addr,
(uint64_t)addr + fd_size, order);
else
- write_debugfs(PID_FMT, getpid(), (uint64_t)addr,
+ write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)addr,
(uint64_t)addr + fd_size, order, offset);
for (i = 0; i < fd_size; i++)
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index c6dd2a335cf4..47c293c2962f 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -34,6 +34,7 @@ reuseport_bpf_numa
reuseport_dualstack
rxtimestamp
sctp_hello
+scm_inq
scm_pidfd
scm_rights
sk_bind_sendto_listen
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 332f387615d7..b31a71f2b372 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -41,6 +41,7 @@ TEST_PROGS += netns-name.sh
TEST_PROGS += link_netns.py
TEST_PROGS += nl_netdev.py
TEST_PROGS += rtnetlink.py
+TEST_PROGS += rtnetlink_notification.sh
TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -62,6 +63,7 @@ TEST_PROGS += ip_local_port_range.sh
TEST_PROGS += rps_default_mask.sh
TEST_PROGS += big_tcp.sh
TEST_PROGS += netns-sysctl.sh
+TEST_PROGS += netdev-l2addr.sh
TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -99,6 +101,7 @@ TEST_PROGS += test_vxlan_mdb.sh
TEST_PROGS += test_bridge_neigh_suppress.sh
TEST_PROGS += test_vxlan_nolocalbypass.sh
TEST_PROGS += test_bridge_backup_port.sh
+TEST_PROGS += test_neigh.sh
TEST_PROGS += fdb_flush.sh fdb_notify.sh
TEST_PROGS += fq_band_pktlimit.sh
TEST_PROGS += vlan_hw_filter.sh
@@ -112,6 +115,8 @@ TEST_PROGS += skf_net_off.sh
TEST_GEN_FILES += skf_net_off
TEST_GEN_FILES += tfo
TEST_PROGS += tfo_passive.sh
+TEST_PROGS += broadcast_pmtu.sh
+TEST_PROGS += ipv6_force_forwarding.sh
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller netlink-dumps
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 50584479540b..a4b61c6d0290 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,4 @@
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect
+TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c
new file mode 100644
index 000000000000..9d22561e7b8f
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/scm_inq.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "../../kselftest_harness.h"
+
+#define NR_CHUNKS 100
+#define MSG_LEN 256
+
+struct scm_inq {
+ struct cmsghdr cmsghdr;
+ int inq;
+};
+
+FIXTURE(scm_inq)
+{
+ int fd[2];
+};
+
+FIXTURE_VARIANT(scm_inq)
+{
+ int type;
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, stream)
+{
+ .type = SOCK_STREAM,
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, dgram)
+{
+ .type = SOCK_DGRAM,
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, seqpacket)
+{
+ .type = SOCK_SEQPACKET,
+};
+
+FIXTURE_SETUP(scm_inq)
+{
+ int err;
+
+ err = socketpair(AF_UNIX, variant->type | SOCK_NONBLOCK, 0, self->fd);
+ ASSERT_EQ(0, err);
+}
+
+FIXTURE_TEARDOWN(scm_inq)
+{
+ close(self->fd[0]);
+ close(self->fd[1]);
+}
+
+static void send_chunks(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_inq) *self)
+{
+ char buf[MSG_LEN] = {};
+ int i, ret;
+
+ for (i = 0; i < NR_CHUNKS; i++) {
+ ret = send(self->fd[0], buf, sizeof(buf), 0);
+ ASSERT_EQ(sizeof(buf), ret);
+ }
+}
+
+static void recv_chunks(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_inq) *self)
+{
+ struct msghdr msg = {};
+ struct iovec iov = {};
+ struct scm_inq cmsg;
+ char buf[MSG_LEN];
+ int i, ret;
+ int inq;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = &cmsg;
+ msg.msg_controllen = CMSG_SPACE(sizeof(cmsg.inq));
+
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+
+ for (i = 0; i < NR_CHUNKS; i++) {
+ memset(buf, 0, sizeof(buf));
+ memset(&cmsg, 0, sizeof(cmsg));
+
+ ret = recvmsg(self->fd[1], &msg, 0);
+ ASSERT_EQ(MSG_LEN, ret);
+ ASSERT_NE(NULL, CMSG_FIRSTHDR(&msg));
+ ASSERT_EQ(CMSG_LEN(sizeof(cmsg.inq)), cmsg.cmsghdr.cmsg_len);
+ ASSERT_EQ(SOL_SOCKET, cmsg.cmsghdr.cmsg_level);
+ ASSERT_EQ(SCM_INQ, cmsg.cmsghdr.cmsg_type);
+
+ ret = ioctl(self->fd[1], SIOCINQ, &inq);
+ ASSERT_EQ(0, ret);
+ ASSERT_EQ(cmsg.inq, inq);
+ }
+}
+
+TEST_F(scm_inq, basic)
+{
+ int err, inq;
+
+ err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int));
+ if (variant->type != SOCK_STREAM) {
+ ASSERT_EQ(-ENOPROTOOPT, -errno);
+ return;
+ }
+
+ ASSERT_EQ(0, err);
+
+ err = ioctl(self->fd[1], SIOCINQ, &inq);
+ ASSERT_EQ(0, err);
+ ASSERT_EQ(0, inq);
+
+ send_chunks(_metadata, self);
+ recv_chunks(_metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c
index 7e534594167e..37e034874034 100644
--- a/tools/testing/selftests/net/af_unix/scm_pidfd.c
+++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c
@@ -15,6 +15,7 @@
#include <sys/types.h>
#include <sys/wait.h>
+#include "../../pidfd/pidfd.h"
#include "../../kselftest_harness.h"
#define clean_errno() (errno == 0 ? "None" : strerror(errno))
@@ -26,6 +27,8 @@
#define SCM_PIDFD 0x04
#endif
+#define CHILD_EXIT_CODE_OK 123
+
static void child_die()
{
exit(1);
@@ -126,16 +129,65 @@ out:
return result;
}
+struct cmsg_data {
+ struct ucred *ucred;
+ int *pidfd;
+};
+
+static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res)
+{
+ struct cmsghdr *cmsg;
+ int data = 0;
+
+ if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ log_err("recvmsg: truncated");
+ return 1;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_PIDFD) {
+ if (cmsg->cmsg_len < sizeof(*res->pidfd)) {
+ log_err("CMSG parse: SCM_PIDFD wrong len");
+ return 1;
+ }
+
+ res->pidfd = (void *)CMSG_DATA(cmsg);
+ }
+
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS) {
+ if (cmsg->cmsg_len < sizeof(*res->ucred)) {
+ log_err("CMSG parse: SCM_CREDENTIALS wrong len");
+ return 1;
+ }
+
+ res->ucred = (void *)CMSG_DATA(cmsg);
+ }
+ }
+
+ if (!res->pidfd) {
+ log_err("CMSG parse: SCM_PIDFD not found");
+ return 1;
+ }
+
+ if (!res->ucred) {
+ log_err("CMSG parse: SCM_CREDENTIALS not found");
+ return 1;
+ }
+
+ return 0;
+}
+
static int cmsg_check(int fd)
{
struct msghdr msg = { 0 };
- struct cmsghdr *cmsg;
+ struct cmsg_data res;
struct iovec iov;
- struct ucred *ucred = NULL;
int data = 0;
char control[CMSG_SPACE(sizeof(struct ucred)) +
CMSG_SPACE(sizeof(int))] = { 0 };
- int *pidfd = NULL;
pid_t parent_pid;
int err;
@@ -158,53 +210,99 @@ static int cmsg_check(int fd)
return 1;
}
- for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
- cmsg = CMSG_NXTHDR(&msg, cmsg)) {
- if (cmsg->cmsg_level == SOL_SOCKET &&
- cmsg->cmsg_type == SCM_PIDFD) {
- if (cmsg->cmsg_len < sizeof(*pidfd)) {
- log_err("CMSG parse: SCM_PIDFD wrong len");
- return 1;
- }
+ /* send(pfd, "x", sizeof(char), 0) */
+ if (data != 'x') {
+ log_err("recvmsg: data corruption");
+ return 1;
+ }
- pidfd = (void *)CMSG_DATA(cmsg);
- }
+ if (parse_cmsg(&msg, &res)) {
+ log_err("CMSG parse: parse_cmsg() failed");
+ return 1;
+ }
- if (cmsg->cmsg_level == SOL_SOCKET &&
- cmsg->cmsg_type == SCM_CREDENTIALS) {
- if (cmsg->cmsg_len < sizeof(*ucred)) {
- log_err("CMSG parse: SCM_CREDENTIALS wrong len");
- return 1;
- }
+ /* pidfd from SCM_PIDFD should point to the parent process PID */
+ parent_pid =
+ get_pid_from_fdinfo_file(*res.pidfd, "Pid:", sizeof("Pid:") - 1);
+ if (parent_pid != getppid()) {
+ log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid());
+ close(*res.pidfd);
+ return 1;
+ }
- ucred = (void *)CMSG_DATA(cmsg);
- }
+ close(*res.pidfd);
+ return 0;
+}
+
+static int cmsg_check_dead(int fd, int expected_pid)
+{
+ int err;
+ struct msghdr msg = { 0 };
+ struct cmsg_data res;
+ struct iovec iov;
+ int data = 0;
+ char control[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int))] = { 0 };
+ pid_t client_pid;
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_EXIT,
+ };
+
+ iov.iov_base = &data;
+ iov.iov_len = sizeof(data);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ err = recvmsg(fd, &msg, 0);
+ if (err < 0) {
+ log_err("recvmsg");
+ return 1;
}
- /* send(pfd, "x", sizeof(char), 0) */
- if (data != 'x') {
+ if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ log_err("recvmsg: truncated");
+ return 1;
+ }
+
+ /* send(cfd, "y", sizeof(char), 0) */
+ if (data != 'y') {
log_err("recvmsg: data corruption");
return 1;
}
- if (!pidfd) {
- log_err("CMSG parse: SCM_PIDFD not found");
+ if (parse_cmsg(&msg, &res)) {
+ log_err("CMSG parse: parse_cmsg() failed");
return 1;
}
- if (!ucred) {
- log_err("CMSG parse: SCM_CREDENTIALS not found");
+ /*
+ * pidfd from SCM_PIDFD should point to the client_pid.
+ * Let's read exit information and check if it's what
+ * we expect to see.
+ */
+ if (ioctl(*res.pidfd, PIDFD_GET_INFO, &info)) {
+ log_err("%s: ioctl(PIDFD_GET_INFO) failed", __func__);
+ close(*res.pidfd);
return 1;
}
- /* pidfd from SCM_PIDFD should point to the parent process PID */
- parent_pid =
- get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1);
- if (parent_pid != getppid()) {
- log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid());
+ if (!(info.mask & PIDFD_INFO_EXIT)) {
+ log_err("%s: No exit information from ioctl(PIDFD_GET_INFO)", __func__);
+ close(*res.pidfd);
return 1;
}
+ err = WIFEXITED(info.exit_code) ? WEXITSTATUS(info.exit_code) : 1;
+ if (err != CHILD_EXIT_CODE_OK) {
+ log_err("%s: wrong exit_code %d != %d", __func__, err, CHILD_EXIT_CODE_OK);
+ close(*res.pidfd);
+ return 1;
+ }
+
+ close(*res.pidfd);
return 0;
}
@@ -291,6 +389,24 @@ static void fill_sockaddr(struct sock_addr *addr, bool abstract)
memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name));
}
+static int sk_enable_cred_pass(int sk)
+{
+ int on = 0;
+
+ on = 1;
+ if (setsockopt(sk, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) {
+ log_err("Failed to set SO_PASSCRED");
+ return 1;
+ }
+
+ if (setsockopt(sk, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) {
+ log_err("Failed to set SO_PASSPIDFD");
+ return 1;
+ }
+
+ return 0;
+}
+
static void client(FIXTURE_DATA(scm_pidfd) *self,
const FIXTURE_VARIANT(scm_pidfd) *variant)
{
@@ -299,7 +415,6 @@ static void client(FIXTURE_DATA(scm_pidfd) *self,
struct ucred peer_cred;
int peer_pidfd;
pid_t peer_pid;
- int on = 0;
cfd = socket(AF_UNIX, variant->type, 0);
if (cfd < 0) {
@@ -322,14 +437,8 @@ static void client(FIXTURE_DATA(scm_pidfd) *self,
child_die();
}
- on = 1;
- if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) {
- log_err("Failed to set SO_PASSCRED");
- child_die();
- }
-
- if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) {
- log_err("Failed to set SO_PASSPIDFD");
+ if (sk_enable_cred_pass(cfd)) {
+ log_err("sk_enable_cred_pass() failed");
child_die();
}
@@ -340,6 +449,12 @@ static void client(FIXTURE_DATA(scm_pidfd) *self,
child_die();
}
+ /* send something to the parent so it can receive SCM_PIDFD too and validate it */
+ if (send(cfd, "y", sizeof(char), 0) == -1) {
+ log_err("Failed to send(cfd, \"y\", sizeof(char), 0)");
+ child_die();
+ }
+
/* skip further for SOCK_DGRAM as it's not applicable */
if (variant->type == SOCK_DGRAM)
return;
@@ -398,7 +513,13 @@ TEST_F(scm_pidfd, test)
close(self->server);
close(self->startup_pipe[0]);
client(self, variant);
- exit(0);
+
+ /*
+ * It's a bit unusual, but in case of success we return non-zero
+ * exit code (CHILD_EXIT_CODE_OK) and then we expect to read it
+ * from ioctl(PIDFD_GET_INFO) in cmsg_check_dead().
+ */
+ exit(CHILD_EXIT_CODE_OK);
}
close(self->startup_pipe[1]);
@@ -421,9 +542,17 @@ TEST_F(scm_pidfd, test)
ASSERT_NE(-1, err);
}
- close(pfd);
waitpid(self->client_pid, &child_status, 0);
- ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1);
+ /* see comment before exit(CHILD_EXIT_CODE_OK) */
+ ASSERT_EQ(CHILD_EXIT_CODE_OK, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1);
+
+ err = sk_enable_cred_pass(pfd);
+ ASSERT_EQ(0, err);
+
+ err = cmsg_check_dead(pfd, self->client_pid);
+ ASSERT_EQ(0, err);
+
+ close(pfd);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/bench/Makefile b/tools/testing/selftests/net/bench/Makefile
new file mode 100644
index 000000000000..2546c45e42f7
--- /dev/null
+++ b/tools/testing/selftests/net/bench/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_MODS_DIR := page_pool
+
+TEST_PROGS += test_bench_page_pool.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/bench/page_pool/Makefile b/tools/testing/selftests/net/bench/page_pool/Makefile
new file mode 100644
index 000000000000..0549a16ba275
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/Makefile
@@ -0,0 +1,17 @@
+BENCH_PAGE_POOL_SIMPLE_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+obj-m += bench_page_pool.o
+bench_page_pool-y += bench_page_pool_simple.o time_bench.o
+
+all:
+ +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) modules
+
+clean:
+ +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) clean
diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
new file mode 100644
index 000000000000..cb6468adbda4
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmark module for page_pool.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/interrupt.h>
+#include <linux/limits.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <net/page_pool/helpers.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+#define MY_POOL_SIZE 1024
+
+/* Makes tests selectable. Useful for perf-record to analyze a single test.
+ * Hint: Bash shells support writing binary number like: $((2#101010)
+ *
+ * # modprobe bench_page_pool_simple run_flags=$((2#100))
+ */
+static unsigned long run_flags = 0xFFFFFFFF;
+module_param(run_flags, ulong, 0);
+MODULE_PARM_DESC(run_flags, "Limit which bench test that runs");
+
+/* Count the bit number from the enum */
+enum benchmark_bit {
+ bit_run_bench_baseline,
+ bit_run_bench_no_softirq01,
+ bit_run_bench_no_softirq02,
+ bit_run_bench_no_softirq03,
+};
+
+#define bit(b) (1 << (b))
+#define enabled(b) ((run_flags & (bit(b))))
+
+/* notice time_bench is limited to U32_MAX nr loops */
+static unsigned long loops = 10000000;
+module_param(loops, ulong, 0);
+MODULE_PARM_DESC(loops, "Specify loops bench will run");
+
+/* Timing at the nanosec level, we need to know the overhead
+ * introduced by the for loop itself
+ */
+static int time_bench_for_loop(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ int i;
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+ }
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+static int time_bench_atomic_inc(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ atomic_t cnt;
+ int i;
+
+ atomic_set(&cnt, 0);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ atomic_inc(&cnt);
+ barrier(); /* avoid compiler to optimize this loop */
+ }
+ loops_cnt = atomic_read(&cnt);
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+/* The ptr_ping in page_pool uses a spinlock. We need to know the minimum
+ * overhead of taking+releasing a spinlock, to know the cycles that can be saved
+ * by e.g. amortizing this via bulking.
+ */
+static int time_bench_lock(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ spinlock_t lock;
+ int i;
+
+ spin_lock_init(&lock);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ spin_lock(&lock);
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+ spin_unlock(&lock);
+ }
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+/* Helper for filling some page's into ptr_ring */
+static void pp_fill_ptr_ring(struct page_pool *pp, int elems)
+{
+ /* GFP_ATOMIC needed when under run softirq */
+ gfp_t gfp_mask = GFP_ATOMIC;
+ struct page **array;
+ int i;
+
+ array = kcalloc(elems, sizeof(struct page *), gfp_mask);
+
+ for (i = 0; i < elems; i++)
+ array[i] = page_pool_alloc_pages(pp, gfp_mask);
+ for (i = 0; i < elems; i++)
+ page_pool_put_page(pp, array[i], -1, false);
+
+ kfree(array);
+}
+
+enum test_type { type_fast_path, type_ptr_ring, type_page_allocator };
+
+/* Depends on compile optimizing this function */
+static int time_bench_page_pool(struct time_bench_record *rec, void *data,
+ enum test_type type, const char *func)
+{
+ uint64_t loops_cnt = 0;
+ gfp_t gfp_mask = GFP_ATOMIC; /* GFP_ATOMIC is not really needed */
+ int i, err;
+
+ struct page_pool *pp;
+ struct page *page;
+
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .flags = 0,
+ .pool_size = MY_POOL_SIZE,
+ .nid = NUMA_NO_NODE,
+ .dev = NULL, /* Only use for DMA mapping */
+ .dma_dir = DMA_BIDIRECTIONAL,
+ };
+
+ pp = page_pool_create(&pp_params);
+ if (IS_ERR(pp)) {
+ err = PTR_ERR(pp);
+ pr_warn("%s: Error(%d) creating page_pool\n", func, err);
+ goto out;
+ }
+ pp_fill_ptr_ring(pp, 64);
+
+ if (in_serving_softirq())
+ pr_warn("%s(): in_serving_softirq fast-path\n", func);
+ else
+ pr_warn("%s(): Cannot use page_pool fast-path\n", func);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ /* Common fast-path alloc that depend on in_serving_softirq() */
+ page = page_pool_alloc_pages(pp, gfp_mask);
+ if (!page)
+ break;
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+
+ /* The benchmarks purpose it to test different return paths.
+ * Compiler should inline optimize other function calls out
+ */
+ if (type == type_fast_path) {
+ /* Fast-path recycling e.g. XDP_DROP use-case */
+ page_pool_recycle_direct(pp, page);
+
+ } else if (type == type_ptr_ring) {
+ /* Normal return path */
+ page_pool_put_page(pp, page, -1, false);
+
+ } else if (type == type_page_allocator) {
+ /* Test if not pages are recycled, but instead
+ * returned back into systems page allocator
+ */
+ get_page(page); /* cause no-recycling */
+ page_pool_put_page(pp, page, -1, false);
+ put_page(page);
+ } else {
+ BUILD_BUG();
+ }
+ }
+ time_bench_stop(rec, loops_cnt);
+out:
+ page_pool_destroy(pp);
+ return loops_cnt;
+}
+
+static int time_bench_page_pool01_fast_path(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_fast_path, __func__);
+}
+
+static int time_bench_page_pool02_ptr_ring(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_ptr_ring, __func__);
+}
+
+static int time_bench_page_pool03_slow(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_page_allocator, __func__);
+}
+
+static int run_benchmark_tests(void)
+{
+ uint32_t nr_loops = loops;
+
+ /* Baseline tests */
+ if (enabled(bit_run_bench_baseline)) {
+ time_bench_loop(nr_loops * 10, 0, "for_loop", NULL,
+ time_bench_for_loop);
+ time_bench_loop(nr_loops * 10, 0, "atomic_inc", NULL,
+ time_bench_atomic_inc);
+ time_bench_loop(nr_loops, 0, "lock", NULL, time_bench_lock);
+ }
+
+ /* This test cannot activate correct code path, due to no-softirq ctx */
+ if (enabled(bit_run_bench_no_softirq01))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool01", NULL,
+ time_bench_page_pool01_fast_path);
+ if (enabled(bit_run_bench_no_softirq02))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool02", NULL,
+ time_bench_page_pool02_ptr_ring);
+ if (enabled(bit_run_bench_no_softirq03))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool03", NULL,
+ time_bench_page_pool03_slow);
+
+ return 0;
+}
+
+static int __init bench_page_pool_simple_module_init(void)
+{
+ if (verbose)
+ pr_info("Loaded\n");
+
+ if (loops > U32_MAX) {
+ pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", loops,
+ U32_MAX);
+ return -ECHRNG;
+ }
+
+ run_benchmark_tests();
+
+ return 0;
+}
+module_init(bench_page_pool_simple_module_init);
+
+static void __exit bench_page_pool_simple_module_exit(void)
+{
+ if (verbose)
+ pr_info("Unloaded\n");
+}
+module_exit(bench_page_pool_simple_module_exit);
+
+MODULE_DESCRIPTION("Benchmark of page_pool simple cases");
+MODULE_AUTHOR("Jesper Dangaard Brouer <[email protected]>");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.c b/tools/testing/selftests/net/bench/page_pool/time_bench.c
new file mode 100644
index 000000000000..073bb36ec5f2
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/time.h>
+
+#include <linux/perf_event.h> /* perf_event_create_kernel_counter() */
+
+/* For concurrency testing */
+#include <linux/completion.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+
+/** TSC (Time-Stamp Counter) based **
+ * See: linux/time_bench.h
+ * tsc_start_clock() and tsc_stop_clock()
+ */
+
+/** Wall-clock based **
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ */
+#define PERF_FORMAT \
+ (PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \
+ PERF_FORMAT_TOTAL_TIME_RUNNING)
+
+struct raw_perf_event {
+ uint64_t config; /* event */
+ uint64_t config1; /* umask */
+ struct perf_event *save;
+ char *desc;
+};
+
+/* if HT is enable a maximum of 4 events (5 if one is instructions
+ * retired can be specified, if HT is disabled a maximum of 8 (9 if
+ * one is instructions retired) can be specified.
+ *
+ * From Table 19-1. Architectural Performance Events
+ * Architectures Software Developer’s Manual Volume 3: System Programming
+ * Guide
+ */
+struct raw_perf_event perf_events[] = {
+ { 0x3c, 0x00, NULL, "Unhalted CPU Cycles" },
+ { 0xc0, 0x00, NULL, "Instruction Retired" }
+};
+
+#define NUM_EVTS (ARRAY_SIZE(perf_events))
+
+/* WARNING: PMU config is currently broken!
+ */
+bool time_bench_PMU_config(bool enable)
+{
+ int i;
+ struct perf_event_attr perf_conf;
+ struct perf_event *perf_event;
+ int cpu;
+
+ preempt_disable();
+ cpu = smp_processor_id();
+ pr_info("DEBUG: cpu:%d\n", cpu);
+ preempt_enable();
+
+ memset(&perf_conf, 0, sizeof(struct perf_event_attr));
+ perf_conf.type = PERF_TYPE_RAW;
+ perf_conf.size = sizeof(struct perf_event_attr);
+ perf_conf.read_format = PERF_FORMAT;
+ perf_conf.pinned = 1;
+ perf_conf.exclude_user = 1; /* No userspace events */
+ perf_conf.exclude_kernel = 0; /* Only kernel events */
+
+ for (i = 0; i < NUM_EVTS; i++) {
+ perf_conf.disabled = enable;
+ //perf_conf.disabled = (i == 0) ? 1 : 0;
+ perf_conf.config = perf_events[i].config;
+ perf_conf.config1 = perf_events[i].config1;
+ if (verbose)
+ pr_info("%s() enable PMU counter: %s\n",
+ __func__, perf_events[i].desc);
+ perf_event = perf_event_create_kernel_counter(&perf_conf, cpu,
+ NULL /* task */,
+ NULL /* overflow_handler*/,
+ NULL /* context */);
+ if (perf_event) {
+ perf_events[i].save = perf_event;
+ pr_info("%s():DEBUG perf_event success\n", __func__);
+
+ perf_event_enable(perf_event);
+ } else {
+ pr_info("%s():DEBUG perf_event is NULL\n", __func__);
+ }
+ }
+
+ return true;
+}
+
+/** Generic functions **
+ */
+
+/* Calculate stats, store results in record */
+bool time_bench_calc_stats(struct time_bench_record *rec)
+{
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+ uint64_t ns_per_call_tmp_rem = 0;
+ uint32_t ns_per_call_remainder = 0;
+ uint64_t pmc_ipc_tmp_rem = 0;
+ uint32_t pmc_ipc_remainder = 0;
+ uint32_t pmc_ipc_div = 0;
+ uint32_t invoked_cnt_precision = 0;
+ uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */
+
+ if (rec->flags & TIME_BENCH_LOOP) {
+ if (rec->invoked_cnt < 1000) {
+ pr_err("ERR: need more(>1000) loops(%llu) for timing\n",
+ rec->invoked_cnt);
+ return false;
+ }
+ if (rec->invoked_cnt > ((1ULL << 32) - 1)) {
+ /* div_u64_rem() can only support div with 32bit*/
+ pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n",
+ rec->invoked_cnt);
+ return false;
+ }
+ invoked_cnt = (uint32_t)rec->invoked_cnt;
+ }
+
+ /* TSC (Time-Stamp Counter) records */
+ if (rec->flags & TIME_BENCH_TSC) {
+ rec->tsc_interval = rec->tsc_stop - rec->tsc_start;
+ if (rec->tsc_interval == 0) {
+ pr_err("ABORT: timing took ZERO TSC time\n");
+ return false;
+ }
+ /* Calculate stats */
+ if (rec->flags & TIME_BENCH_LOOP)
+ rec->tsc_cycles = rec->tsc_interval / invoked_cnt;
+ else
+ rec->tsc_cycles = rec->tsc_interval;
+ }
+
+ /* Wall-clock time calc */
+ if (rec->flags & TIME_BENCH_WALLCLOCK) {
+ rec->time_start = rec->ts_start.tv_nsec +
+ (NANOSEC_PER_SEC * rec->ts_start.tv_sec);
+ rec->time_stop = rec->ts_stop.tv_nsec +
+ (NANOSEC_PER_SEC * rec->ts_stop.tv_sec);
+ rec->time_interval = rec->time_stop - rec->time_start;
+ if (rec->time_interval == 0) {
+ pr_err("ABORT: timing took ZERO wallclock time\n");
+ return false;
+ }
+ /* Calculate stats */
+ /*** Division in kernel it tricky ***/
+ /* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */
+ /* remainder only correct because NANOSEC_PER_SEC is 10^9 */
+ rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC,
+ &rec->time_sec_remainder);
+ //TODO: use existing struct timespec records instead of div?
+
+ if (rec->flags & TIME_BENCH_LOOP) {
+ /*** Division in kernel it tricky ***/
+ /* Orig: ns = ((double)time_interval / invoked_cnt); */
+ /* First get quotient */
+ rec->ns_per_call_quotient =
+ div_u64_rem(rec->time_interval, invoked_cnt,
+ &ns_per_call_remainder);
+ /* Now get decimals .xxx precision (incorrect roundup)*/
+ ns_per_call_tmp_rem = ns_per_call_remainder;
+ invoked_cnt_precision = invoked_cnt / 1000;
+ if (invoked_cnt_precision > 0) {
+ rec->ns_per_call_decimal =
+ div_u64_rem(ns_per_call_tmp_rem,
+ invoked_cnt_precision,
+ &ns_per_call_remainder);
+ }
+ }
+ }
+
+ /* Performance Monitor Unit (PMU) counters */
+ if (rec->flags & TIME_BENCH_PMU) {
+ //FIXME: Overflow handling???
+ rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start;
+ rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start;
+
+ /* Calc Instruction Per Cycle (IPC) */
+ /* First get quotient */
+ rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk,
+ &pmc_ipc_remainder);
+ /* Now get decimals .xxx precision (incorrect roundup)*/
+ pmc_ipc_tmp_rem = pmc_ipc_remainder;
+ pmc_ipc_div = rec->pmc_clk / 1000;
+ if (pmc_ipc_div > 0) {
+ rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem,
+ pmc_ipc_div,
+ &pmc_ipc_remainder);
+ }
+ }
+
+ return true;
+}
+
+/* Generic function for invoking a loop function and calculating
+ * execution time stats. The function being called/timed is assumed
+ * to perform a tight loop, and update the timing record struct.
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+ int (*func)(struct time_bench_record *record, void *data))
+{
+ struct time_bench_record rec;
+
+ /* Setup record */
+ memset(&rec, 0, sizeof(rec)); /* zero func might not update all */
+ rec.version_abi = 1;
+ rec.loops = loops;
+ rec.step = step;
+ rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK);
+
+ /*** Loop function being timed ***/
+ if (!func(&rec, data)) {
+ pr_err("ABORT: function being timed failed\n");
+ return false;
+ }
+
+ if (rec.invoked_cnt < loops)
+ pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n",
+ rec.invoked_cnt, loops);
+
+ /* Calculate stats */
+ time_bench_calc_stats(&rec);
+
+ pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+ txt, rec.tsc_cycles, rec.ns_per_call_quotient,
+ rec.ns_per_call_decimal, rec.step, rec.time_sec,
+ rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt,
+ rec.tsc_interval);
+ if (rec.flags & TIME_BENCH_PMU)
+ pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n",
+ txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient,
+ rec.pmc_ipc_decimal);
+ return true;
+}
+
+/* Function getting invoked by kthread */
+static int invoke_test_on_cpu_func(void *private)
+{
+ struct time_bench_cpu *cpu = private;
+ struct time_bench_sync *sync = cpu->sync;
+ cpumask_t newmask = CPU_MASK_NONE;
+ void *data = cpu->data;
+
+ /* Restrict CPU */
+ cpumask_set_cpu(cpu->rec.cpu, &newmask);
+ set_cpus_allowed_ptr(current, &newmask);
+
+ /* Synchronize start of concurrency test */
+ atomic_inc(&sync->nr_tests_running);
+ wait_for_completion(&sync->start_event);
+
+ /* Start benchmark function */
+ if (!cpu->bench_func(&cpu->rec, data)) {
+ pr_err("ERROR: function being timed failed on CPU:%d(%d)\n",
+ cpu->rec.cpu, smp_processor_id());
+ } else {
+ if (verbose)
+ pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu,
+ smp_processor_id());
+ }
+ cpu->did_bench_run = true;
+
+ /* End test */
+ atomic_dec(&sync->nr_tests_running);
+ /* Wait for kthread_stop() telling us to stop */
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+ return 0;
+}
+
+void time_bench_print_stats_cpumask(const char *desc,
+ struct time_bench_cpu *cpu_tasks,
+ const struct cpumask *mask)
+{
+ uint64_t average = 0;
+ int cpu;
+ int step = 0;
+ struct sum {
+ uint64_t tsc_cycles;
+ int records;
+ } sum = { 0 };
+
+ /* Get stats */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+ struct time_bench_record *rec = &c->rec;
+
+ /* Calculate stats */
+ time_bench_calc_stats(rec);
+
+ pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+ desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient,
+ rec->ns_per_call_decimal, rec->step, rec->time_sec,
+ rec->time_sec_remainder, rec->time_interval,
+ rec->invoked_cnt, rec->tsc_interval);
+
+ /* Collect average */
+ sum.records++;
+ sum.tsc_cycles += rec->tsc_cycles;
+ step = rec->step;
+ }
+
+ if (sum.records) /* avoid div-by-zero */
+ average = sum.tsc_cycles / sum.records;
+ pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc,
+ average, sum.records, step);
+}
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+ const struct cpumask *mask, /* Support masking outsome CPUs*/
+ struct time_bench_sync *sync,
+ struct time_bench_cpu *cpu_tasks,
+ int (*func)(struct time_bench_record *record, void *data))
+{
+ int cpu, running = 0;
+
+ if (verbose) // DEBUG
+ pr_warn("%s() Started on CPU:%d\n", __func__,
+ smp_processor_id());
+
+ /* Reset sync conditions */
+ atomic_set(&sync->nr_tests_running, 0);
+ init_completion(&sync->start_event);
+
+ /* Spawn off jobs on all CPUs */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+ running++;
+ c->sync = sync; /* Send sync variable along */
+ c->data = data; /* Send opaque along */
+
+ /* Init benchmark record */
+ memset(&c->rec, 0, sizeof(struct time_bench_record));
+ c->rec.version_abi = 1;
+ c->rec.loops = loops;
+ c->rec.step = step;
+ c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC |
+ TIME_BENCH_WALLCLOCK);
+ c->rec.cpu = cpu;
+ c->bench_func = func;
+ c->task = kthread_run(invoke_test_on_cpu_func, c,
+ "time_bench%d", cpu);
+ if (IS_ERR(c->task)) {
+ pr_err("%s(): Failed to start test func\n", __func__);
+ return; /* Argh, what about cleanup?! */
+ }
+ }
+
+ /* Wait until all processes are running */
+ while (atomic_read(&sync->nr_tests_running) < running) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(10);
+ }
+ /* Kick off all CPU concurrently on completion event */
+ complete_all(&sync->start_event);
+
+ /* Wait for CPUs to finish */
+ while (atomic_read(&sync->nr_tests_running)) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(10);
+ }
+
+ /* Stop the kthreads */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+ kthread_stop(c->task);
+ }
+
+ if (verbose) // DEBUG - happens often, finish on another CPU
+ pr_warn("%s() Finished on CPU:%d\n", __func__,
+ smp_processor_id());
+}
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.h b/tools/testing/selftests/net/bench/page_pool/time_bench.h
new file mode 100644
index 000000000000..e113fcf341dc
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ * for licensing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_TIME_BENCH_H
+#define _LINUX_TIME_BENCH_H
+
+/* Main structure used for recording a benchmark run */
+struct time_bench_record {
+ uint32_t version_abi;
+ uint32_t loops; /* Requested loop invocations */
+ uint32_t step; /* option for e.g. bulk invocations */
+
+ uint32_t flags; /* Measurements types enabled */
+#define TIME_BENCH_LOOP BIT(0)
+#define TIME_BENCH_TSC BIT(1)
+#define TIME_BENCH_WALLCLOCK BIT(2)
+#define TIME_BENCH_PMU BIT(3)
+
+ uint32_t cpu; /* Used when embedded in time_bench_cpu */
+
+ /* Records */
+ uint64_t invoked_cnt; /* Returned actual invocations */
+ uint64_t tsc_start;
+ uint64_t tsc_stop;
+ struct timespec64 ts_start;
+ struct timespec64 ts_stop;
+ /* PMU counters for instruction and cycles
+ * instructions counter including pipelined instructions
+ */
+ uint64_t pmc_inst_start;
+ uint64_t pmc_inst_stop;
+ /* CPU unhalted clock counter */
+ uint64_t pmc_clk_start;
+ uint64_t pmc_clk_stop;
+
+ /* Result records */
+ uint64_t tsc_interval;
+ uint64_t time_start, time_stop, time_interval; /* in nanosec */
+ uint64_t pmc_inst, pmc_clk;
+
+ /* Derived result records */
+ uint64_t tsc_cycles; // +decimal?
+ uint64_t ns_per_call_quotient, ns_per_call_decimal;
+ uint64_t time_sec;
+ uint32_t time_sec_remainder;
+ uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */
+};
+
+/* For synchronizing parallel CPUs to run concurrently */
+struct time_bench_sync {
+ atomic_t nr_tests_running;
+ struct completion start_event;
+};
+
+/* Keep track of CPUs executing our bench function.
+ *
+ * Embed a time_bench_record for storing info per cpu
+ */
+struct time_bench_cpu {
+ struct time_bench_record rec;
+ struct time_bench_sync *sync; /* back ptr */
+ struct task_struct *task;
+ /* "data" opaque could have been placed in time_bench_sync,
+ * but to avoid any false sharing, place it per CPU
+ */
+ void *data;
+ /* Support masking outsome CPUs, mark if it ran */
+ bool did_bench_run;
+ /* int cpu; // note CPU stored in time_bench_record */
+ int (*bench_func)(struct time_bench_record *record, void *data);
+};
+
+/*
+ * Below TSC assembler code is not compatible with other archs, and
+ * can also fail on guests if cpu-flags are not correct.
+ *
+ * The way TSC reading is used, many iterations, does not require as
+ * high accuracy as described below (in Intel Doc #324264).
+ *
+ * Considering changing to use get_cycles() (#include <asm/timex.h>).
+ */
+
+/** TSC (Time-Stamp Counter) based **
+ * Recommend reading, to understand details of reading TSC accurately:
+ * Intel Doc #324264, "How to Benchmark Code Execution Times on Intel"
+ *
+ * Consider getting exclusive ownership of CPU by using:
+ * unsigned long flags;
+ * preempt_disable();
+ * raw_local_irq_save(flags);
+ * _your_code_
+ * raw_local_irq_restore(flags);
+ * preempt_enable();
+ *
+ * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx"
+ * RDTSC only change "%rax" and "%rdx" but
+ * CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx)
+ */
+static __always_inline uint64_t tsc_start_clock(void)
+{
+ /* See: Intel Doc #324264 */
+ unsigned int hi, lo;
+
+ asm volatile("CPUID\n\t"
+ "RDTSC\n\t"
+ "mov %%edx, %0\n\t"
+ "mov %%eax, %1\n\t"
+ : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+ //FIXME: on 32bit use clobbered %eax + %edx
+ return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+static __always_inline uint64_t tsc_stop_clock(void)
+{
+ /* See: Intel Doc #324264 */
+ unsigned int hi, lo;
+
+ asm volatile("RDTSCP\n\t"
+ "mov %%edx, %0\n\t"
+ "mov %%eax, %1\n\t"
+ "CPUID\n\t"
+ : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+ return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+/** Wall-clock based **
+ *
+ * use: getnstimeofday()
+ * getnstimeofday(&rec->ts_start);
+ * getnstimeofday(&rec->ts_stop);
+ *
+ * API changed see: Documentation/core-api/timekeeping.rst
+ * https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday
+ *
+ * We should instead use: ktime_get_real_ts64() is a direct
+ * replacement, but consider using monotonic time (ktime_get_ts64())
+ * and/or a ktime_t based interface (ktime_get()/ktime_get_real()).
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ *
+ * Needed for calculating: Instructions Per Cycle (IPC)
+ * - The IPC number tell how efficient the CPU pipelining were
+ */
+//lookup: perf_event_create_kernel_counter()
+
+bool time_bench_PMU_config(bool enable);
+
+/* Raw reading via rdpmc() using fixed counters
+ *
+ * From: https://github.com/andikleen/simple-pmu
+ */
+enum {
+ FIXED_SELECT = (1U << 30), /* == 0x40000000 */
+ FIXED_INST_RETIRED_ANY = 0,
+ FIXED_CPU_CLK_UNHALTED_CORE = 1,
+ FIXED_CPU_CLK_UNHALTED_REF = 2,
+};
+
+static __always_inline unsigned int long long p_rdpmc(unsigned int in)
+{
+ unsigned int d, a;
+
+ asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory");
+ return ((unsigned long long)d << 32) | a;
+}
+
+/* These PMU counter needs to be enabled, but I don't have the
+ * configure code implemented. My current hack is running:
+ * sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko
+ */
+/* Reading all pipelined instruction */
+static __always_inline unsigned long long pmc_inst(void)
+{
+ return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY);
+}
+
+/* Reading CPU clock cycles */
+static __always_inline unsigned long long pmc_clk(void)
+{
+ return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE);
+}
+
+/* Raw reading via MSR rdmsr() is likely wrong
+ * FIXME: How can I know which raw MSR registers are conf for what?
+ */
+#define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */
+#define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */
+#define MSR_IA32_PCM2 0x400000C3
+static inline uint64_t msr_inst(unsigned long long *msr_result)
+{
+ return rdmsrq_safe(MSR_IA32_PCM0, msr_result);
+}
+
+/** Generic functions **
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+ int (*func)(struct time_bench_record *rec, void *data));
+bool time_bench_calc_stats(struct time_bench_record *rec);
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+ const struct cpumask *mask, /* Support masking outsome CPUs*/
+ struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks,
+ int (*func)(struct time_bench_record *record, void *data));
+void time_bench_print_stats_cpumask(const char *desc,
+ struct time_bench_cpu *cpu_tasks,
+ const struct cpumask *mask);
+
+//FIXME: use rec->flags to select measurement, should be MACRO
+static __always_inline void time_bench_start(struct time_bench_record *rec)
+{
+ //getnstimeofday(&rec->ts_start);
+ ktime_get_real_ts64(&rec->ts_start);
+ if (rec->flags & TIME_BENCH_PMU) {
+ rec->pmc_inst_start = pmc_inst();
+ rec->pmc_clk_start = pmc_clk();
+ }
+ rec->tsc_start = tsc_start_clock();
+}
+
+static __always_inline void time_bench_stop(struct time_bench_record *rec,
+ uint64_t invoked_cnt)
+{
+ rec->tsc_stop = tsc_stop_clock();
+ if (rec->flags & TIME_BENCH_PMU) {
+ rec->pmc_inst_stop = pmc_inst();
+ rec->pmc_clk_stop = pmc_clk();
+ }
+ //getnstimeofday(&rec->ts_stop);
+ ktime_get_real_ts64(&rec->ts_stop);
+ rec->invoked_cnt = invoked_cnt;
+}
+
+#endif /* _LINUX_TIME_BENCH_H */
diff --git a/tools/testing/selftests/net/bench/test_bench_page_pool.sh b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
new file mode 100755
index 000000000000..7b8b18cfedce
--- /dev/null
+++ b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+set -e
+
+DRIVER="./page_pool/bench_page_pool.ko"
+result=""
+
+function run_test()
+{
+ rmmod "bench_page_pool.ko" || true
+ insmod $DRIVER > /dev/null 2>&1
+ result=$(dmesg | tail -10)
+ echo "$result"
+
+ echo
+ echo "Fast path results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool01 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+ echo
+ echo "ptr_ring results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool02 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+ echo
+ echo "slow path results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool03 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+}
+
+run_test
+
+exit 0
diff --git a/tools/testing/selftests/net/broadcast_pmtu.sh b/tools/testing/selftests/net/broadcast_pmtu.sh
new file mode 100755
index 000000000000..726eb5d25839
--- /dev/null
+++ b/tools/testing/selftests/net/broadcast_pmtu.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Ensures broadcast route MTU is respected
+
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+CLIENT_IP4="192.168.0.1/24"
+CLIENT_BROADCAST_ADDRESS="192.168.0.255"
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+SERVER_IP4="192.168.0.2/24"
+
+setup() {
+ ip netns add "${CLIENT_NS}"
+ ip netns add "${SERVER_NS}"
+
+ ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}"
+
+ ip -net "${CLIENT_NS}" link set link0 up
+ ip -net "${CLIENT_NS}" link set link0 mtu 9000
+ ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}" dev link0
+
+ ip -net "${SERVER_NS}" link set link1 up
+ ip -net "${SERVER_NS}" link set link1 mtu 1500
+ ip -net "${SERVER_NS}" addr add "${SERVER_IP4}" dev link1
+
+ read -r -a CLIENT_BROADCAST_ENTRY <<< "$(ip -net "${CLIENT_NS}" route show table local type broadcast)"
+ ip -net "${CLIENT_NS}" route del "${CLIENT_BROADCAST_ENTRY[@]}"
+ ip -net "${CLIENT_NS}" route add "${CLIENT_BROADCAST_ENTRY[@]}" mtu 1500
+
+ ip net exec "${SERVER_NS}" sysctl -wq net.ipv4.icmp_echo_ignore_broadcasts=0
+}
+
+cleanup() {
+ ip -net "${SERVER_NS}" link del link1
+ ip netns del "${CLIENT_NS}"
+ ip netns del "${SERVER_NS}"
+}
+
+trap cleanup EXIT
+
+setup &&
+ echo "Testing for broadcast route MTU" &&
+ ip net exec "${CLIENT_NS}" ping -f -M want -q -c 1 -s 8000 -w 1 -b "${CLIENT_BROADCAST_ADDRESS}" > /dev/null 2>&1
+
+exit $?
+
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 3cfef5153823..c24417d0047b 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -30,16 +30,25 @@ CONFIG_NET_FOU=y
CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NF_CONNTRACK=m
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_SIT=y
CONFIG_NF_NAT=m
CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_RAW=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IPV6_GRE=m
CONFIG_IPV6_SEG6_LWTUNNEL=y
@@ -57,6 +66,8 @@ CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_NAT=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_TARGET_HL=m
+CONFIG_NETFILTER_XT_NAT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_GACT=m
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 00bde7b6f39e..d7bb2e80e88c 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -102,6 +102,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
vxlan_bridge_1d_port_8472.sh \
vxlan_bridge_1d.sh \
vxlan_bridge_1q_ipv6.sh \
+ vxlan_bridge_1q_mc_ul.sh \
vxlan_bridge_1q_port_8472_ipv6.sh \
vxlan_bridge_1q_port_8472.sh \
vxlan_bridge_1q.sh \
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 508f3c700d71..890b3374dacd 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -37,6 +37,7 @@ declare -A NETIFS=(
: "${TEAMD:=teamd}"
: "${MCD:=smcrouted}"
: "${MC_CLI:=smcroutectl}"
+: "${MCD_TABLE_NAME:=selftests}"
# Constants for netdevice bring-up:
# Default time in seconds to wait for an interface to come up before giving up
@@ -141,6 +142,20 @@ check_tc_version()
fi
}
+check_tc_erspan_support()
+{
+ local dev=$1; shift
+
+ tc filter add dev $dev ingress pref 1 handle 1 flower \
+ erspan_opts 1:0:0:0 &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing erspan support"
+ return $ksft_skip
+ fi
+ tc filter del dev $dev ingress pref 1 handle 1 flower \
+ erspan_opts 1:0:0:0 &> /dev/null
+}
+
# Old versions of tc don't understand "mpls_uc"
check_tc_mpls_support()
{
@@ -525,9 +540,9 @@ setup_wait_dev_with_timeout()
return 1
}
-setup_wait()
+setup_wait_n()
{
- local num_netifs=${1:-$NUM_NETIFS}
+ local num_netifs=$1; shift
local i
for ((i = 1; i <= num_netifs; ++i)); do
@@ -538,6 +553,11 @@ setup_wait()
sleep $WAIT_TIME
}
+setup_wait()
+{
+ setup_wait_n "$NUM_NETIFS"
+}
+
wait_for_dev()
{
local dev=$1; shift
@@ -1757,6 +1777,51 @@ mc_send()
msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
}
+adf_mcd_start()
+{
+ local ifs=("$@")
+
+ local table_name="$MCD_TABLE_NAME"
+ local smcroutedir
+ local pid
+ local if
+ local i
+
+ check_command "$MCD" || return 1
+ check_command "$MC_CLI" || return 1
+
+ smcroutedir=$(mktemp -d)
+ defer rm -rf "$smcroutedir"
+
+ for ((i = 1; i <= NUM_NETIFS; ++i)); do
+ echo "phyint ${NETIFS[p$i]} enable" >> \
+ "$smcroutedir/$table_name.conf"
+ done
+
+ for if in "${ifs[@]}"; do
+ if ! ip_link_has_flag "$if" MULTICAST; then
+ ip link set dev "$if" multicast on
+ defer ip link set dev "$if" multicast off
+ fi
+
+ echo "phyint $if enable" >> \
+ "$smcroutedir/$table_name.conf"
+ done
+
+ "$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \
+ -P "$smcroutedir/$table_name.pid"
+ busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid"
+ pid=$(cat "$smcroutedir/$table_name.pid")
+ defer kill_process "$pid"
+}
+
+mc_cli()
+{
+ local table_name="$MCD_TABLE_NAME"
+
+ "$MC_CLI" -I "$table_name" "$@"
+}
+
start_ip_monitor()
{
local mtype=$1; shift
diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh
index 5a58b1ec8aef..83e52abdbc2e 100755
--- a/tools/testing/selftests/net/forwarding/router_multicast.sh
+++ b/tools/testing/selftests/net/forwarding/router_multicast.sh
@@ -33,10 +33,6 @@ NUM_NETIFS=6
source lib.sh
source tc_common.sh
-require_command $MCD
-require_command $MC_CLI
-table_name=selftests
-
h1_create()
{
simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64
@@ -149,25 +145,6 @@ router_destroy()
ip link set dev $rp1 down
}
-start_mcd()
-{
- SMCROUTEDIR="$(mktemp -d)"
-
- for ((i = 1; i <= $NUM_NETIFS; ++i)); do
- echo "phyint ${NETIFS[p$i]} enable" >> \
- $SMCROUTEDIR/$table_name.conf
- done
-
- $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \
- -P $SMCROUTEDIR/$table_name.pid
-}
-
-kill_mcd()
-{
- pkill $MCD
- rm -rf $SMCROUTEDIR
-}
-
setup_prepare()
{
h1=${NETIFS[p1]}
@@ -179,7 +156,7 @@ setup_prepare()
rp3=${NETIFS[p5]}
h3=${NETIFS[p6]}
- start_mcd
+ adf_mcd_start || exit "$EXIT_STATUS"
vrf_prepare
@@ -206,7 +183,7 @@ cleanup()
vrf_cleanup
- kill_mcd
+ defer_scopes_cleanup
}
create_mcast_sg()
@@ -214,9 +191,9 @@ create_mcast_sg()
local if_name=$1; shift
local s_addr=$1; shift
local mcast=$1; shift
- local dest_ifs=${@}
+ local dest_ifs=("${@}")
- $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs
+ mc_cli add "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
}
delete_mcast_sg()
@@ -224,9 +201,9 @@ delete_mcast_sg()
local if_name=$1; shift
local s_addr=$1; shift
local mcast=$1; shift
- local dest_ifs=${@}
+ local dest_ifs=("${@}")
- $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs
+ mc_cli remove "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
}
mcast_v4()
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index b1daad19b01e..b58909a93112 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -6,7 +6,7 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
match_ip_tos_test match_indev_test match_ip_ttl_test
match_mpls_label_test \
match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \
- match_mpls_lse_test"
+ match_mpls_lse_test match_erspan_opts_test"
NUM_NETIFS=2
source tc_common.sh
source lib.sh
@@ -676,6 +676,56 @@ match_mpls_lse_test()
log_test "mpls lse match ($tcflags)"
}
+match_erspan_opts_test()
+{
+ RET=0
+
+ check_tc_erspan_support $h2 || return 0
+
+ # h1 erspan setup
+ tunnel_create erspan1 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1001 \
+ tos C ttl 64 erspan_ver 1 erspan 6789 # ERSPAN Type II
+ tunnel_create erspan2 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1002 \
+ tos C ttl 64 erspan_ver 2 erspan_dir egress erspan_hwid 63 \
+ # ERSPAN Type III
+ ip link set dev erspan1 master v$h1
+ ip link set dev erspan2 master v$h1
+ # h2 erspan setup
+ ip link add ep-ex type erspan ttl 64 external # To collect tunnel info
+ ip link set ep-ex up
+ ip link set dev ep-ex master v$h2
+ tc qdisc add dev ep-ex clsact
+
+ # ERSPAN Type II [decap direction]
+ tc filter add dev ep-ex ingress protocol ip handle 101 flower \
+ $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \
+ enc_key_id 1001 erspan_opts 1:6789:0:0 \
+ action drop
+ # ERSPAN Type III [decap direction]
+ tc filter add dev ep-ex ingress protocol ip handle 102 flower \
+ $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \
+ enc_key_id 1002 erspan_opts 2:0:1:63 action drop
+
+ ep1mac=$(mac_get erspan1)
+ $MZ erspan1 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q
+ tc_check_packets "dev ep-ex ingress" 101 1
+ check_err $? "ERSPAN Type II"
+
+ ep2mac=$(mac_get erspan2)
+ $MZ erspan2 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q
+ tc_check_packets "dev ep-ex ingress" 102 1
+ check_err $? "ERSPAN Type III"
+
+ # h2 erspan cleanup
+ tc qdisc del dev ep-ex clsact
+ tunnel_destroy ep-ex
+ # h1 erspan cleanup
+ tunnel_destroy erspan2 # ERSPAN Type III
+ tunnel_destroy erspan1 # ERSPAN Type II
+
+ log_test "erspan_opts match ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
new file mode 100755
index 000000000000..462db0b603e7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
@@ -0,0 +1,771 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------------------------+
+# | + $h1.10 + $h1.20 |
+# | | 192.0.2.1/28 | 2001:db8:1::1/64 |
+# | \________ ________/ |
+# | \ / |
+# | + $h1 H1 (vrf) |
+# +-----------|-----------------------------+
+# |
+# +-----------|----------------------------------------------------------------+
+# | +---------|--------------------------------------+ SWITCH (main vrf) |
+# | | + $swp1 BR1 (802.1q) | |
+# | | vid 10 20 | |
+# | | | |
+# | | + vx10 (vxlan) + vx20 (vxlan) | + lo10 (dummy) |
+# | | local 192.0.2.100 local 2001:db8:4::1 | 192.0.2.100/28 |
+# | | group 233.252.0.1 group ff0e::1:2:3 | 2001:db8:4::1/64 |
+# | | id 1000 id 2000 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | +------------------------------------------------+ |
+# | |
+# | + $swp2 $swp3 + |
+# | | 192.0.2.33/28 192.0.2.65/28 | |
+# | | 2001:db8:2::1/64 2001:db8:3::1/64 | |
+# | | | |
+# +---|--------------------------------------------------------------------|---+
+# | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | | H2 (vrf) | | H3 (vrf) | |
+# | +-|----------------------------+ | | +-----------------------------|-+ |
+# | | + $h2 BR2 (802.1d) | | | | BR3 (802.1d) $h3 + | |
+# | | | | | | | |
+# | | + v1$h2 (veth) | | | | v1$h3 (veth) + | |
+# | +-|----------------------------+ | | +-----------------------------|-+ |
+# | | | | | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | + v2$h2 (veth) NS2 (netns) | | NS3 (netns) v2$h3 (veth) + |
+# | 192.0.2.34/28 | | 192.0.2.66/28 |
+# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 |
+# | | | |
+# | +--------------------------------+ | | +--------------------------------+ |
+# | | BR1 (802.1q) | | | | BR1 (802.1q) | |
+# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | |
+# | | local 192.0.2.34 | | | | local 192.0.2.50 | |
+# | | group 233.252.0.1 dev v2$h2 | | | | group 233.252.0.1 dev v2$h3 | |
+# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | |
+# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | |
+# | | | | | | | |
+# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | |
+# | | local 2001:db8:2::2 | | | | local 2001:db8:3::2 | |
+# | | group ff0e::1:2:3 dev v2$h2 | | | | group ff0e::1:2:3 dev v2$h3 | |
+# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | |
+# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | |
+# | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | | | vid 10 20 | | | | | vid 10 20 | |
+# | +--|-----------------------------+ | | +--|-----------------------------+ |
+# | | | | | |
+# | +--|-----------------------------+ | | +--|-----------------------------+ |
+# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | |
+# | | |\ | | | | |\ | |
+# | | | + w2.10 | | | | | + w2.10 | |
+# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | |
+# | | | | | | | | | |
+# | | + w2.20 | | | | + w2.20 | |
+# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | |
+# | +--------------------------------+ | | +--------------------------------+ |
+# +------------------------------------+ +------------------------------------+
+#
+#shellcheck disable=SC2317 # SC doesn't see our uses of functions.
+
+: "${VXPORT:=4789}"
+export VXPORT
+
+: "${GROUP4:=233.252.0.1}"
+export GROUP4
+
+: "${GROUP6:=ff0e::1:2:3}"
+export GROUP6
+
+: "${IPMR:=lo10}"
+
+ALL_TESTS="
+ ipv4_nomcroute
+ ipv4_mcroute
+ ipv4_mcroute_changelink
+ ipv4_mcroute_starg
+ ipv4_mcroute_noroute
+ ipv4_mcroute_fdb
+ ipv4_mcroute_fdb_oif0
+ ipv4_mcroute_fdb_oif0_sep
+
+ ipv6_nomcroute
+ ipv6_mcroute
+ ipv6_mcroute_changelink
+ ipv6_mcroute_starg
+ ipv6_mcroute_noroute
+ ipv6_mcroute_fdb
+ ipv6_mcroute_fdb_oif0
+
+ ipv4_nomcroute_rx
+ ipv4_mcroute_rx
+ ipv4_mcroute_starg_rx
+ ipv4_mcroute_fdb_oif0_sep_rx
+ ipv4_mcroute_fdb_sep_rx
+
+ ipv6_nomcroute_rx
+ ipv6_mcroute_rx
+ ipv6_mcroute_starg_rx
+ ipv6_mcroute_fdb_sep_rx
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init "$h1"
+ defer simple_if_fini "$h1"
+
+ ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10
+ ip_link_set_up "$h1.10"
+ ip_addr_add "$h1.10" 192.0.2.1/28
+
+ ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20
+ ip_link_set_up "$h1.20"
+ ip_addr_add "$h1.20" 2001:db8:1::1/64
+}
+
+install_capture()
+{
+ local dev=$1; shift
+
+ tc qdisc add dev "$dev" clsact
+ defer tc qdisc del dev "$dev" clsact
+
+ tc filter add dev "$dev" ingress proto ip pref 104 \
+ flower skip_hw ip_proto udp dst_port "$VXPORT" \
+ action pass
+ defer tc filter del dev "$dev" ingress proto ip pref 104
+
+ tc filter add dev "$dev" ingress proto ipv6 pref 106 \
+ flower skip_hw ip_proto udp dst_port "$VXPORT" \
+ action pass
+ defer tc filter del dev "$dev" ingress proto ipv6 pref 106
+}
+
+h2_create()
+{
+ # $h2
+ ip_link_set_up "$h2"
+
+ # H2
+ vrf_create "v$h2"
+ defer vrf_destroy "v$h2"
+
+ ip_link_set_up "v$h2"
+
+ # br2
+ ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0
+ ip_link_set_master br2 "v$h2"
+ ip_link_set_up br2
+
+ # $h2
+ ip_link_set_master "$h2" br2
+ install_capture "$h2"
+
+ # v1$h2
+ ip_link_set_up "v1$h2"
+ ip_link_set_master "v1$h2" br2
+}
+
+h3_create()
+{
+ # $h3
+ ip_link_set_up "$h3"
+
+ # H3
+ vrf_create "v$h3"
+ defer vrf_destroy "v$h3"
+
+ ip_link_set_up "v$h3"
+
+ # br3
+ ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0
+ ip_link_set_master br3 "v$h3"
+ ip_link_set_up br3
+
+ # $h3
+ ip_link_set_master "$h3" br3
+ install_capture "$h3"
+
+ # v1$h3
+ ip_link_set_up "v1$h3"
+ ip_link_set_master "v1$h3" br3
+}
+
+switch_create()
+{
+ local swp1_mac
+
+ # br1
+ swp1_mac=$(mac_get "$swp1")
+ ip_link_add br1 type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+ ip_link_set_addr br1 "$swp1_mac"
+ ip_link_set_up br1
+
+ # A dummy to force the IPv6 OIF=0 test to install a suitable MC route on
+ # $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test.
+ ip_link_add "X$IPMR" up type dummy
+
+ # IPMR
+ ip_link_add "$IPMR" up type dummy
+ ip_addr_add "$IPMR" 192.0.2.100/28
+ ip_addr_add "$IPMR" 2001:db8:4::1/64
+
+ # $swp1
+ ip_link_set_up "$swp1"
+ ip_link_set_master "$swp1" br1
+ bridge_vlan_add vid 10 dev "$swp1"
+ bridge_vlan_add vid 20 dev "$swp1"
+
+ # $swp2
+ ip_link_set_up "$swp2"
+ ip_addr_add "$swp2" 192.0.2.33/28
+ ip_addr_add "$swp2" 2001:db8:2::1/64
+
+ # $swp3
+ ip_link_set_up "$swp3"
+ ip_addr_add "$swp3" 192.0.2.65/28
+ ip_addr_add "$swp3" 2001:db8:3::1/64
+}
+
+vx_create()
+{
+ local name=$1; shift
+ local vid=$1; shift
+
+ ip_link_add "$name" up type vxlan dstport "$VXPORT" \
+ nolearning noudpcsum tos inherit ttl 16 \
+ "$@"
+ ip_link_set_master "$name" br1
+ bridge_vlan_add vid "$vid" dev "$name" pvid untagged
+}
+export -f vx_create
+
+vx_wait()
+{
+ # Wait for all the ARP, IGMP etc. noise to settle down so that the
+ # tunnel is clear for measurements.
+ sleep 10
+}
+
+vx10_create()
+{
+ vx_create vx10 10 id 1000 "$@"
+}
+export -f vx10_create
+
+vx20_create()
+{
+ vx_create vx20 20 id 2000 "$@"
+}
+export -f vx20_create
+
+vx10_create_wait()
+{
+ vx10_create "$@"
+ vx_wait
+}
+
+vx20_create_wait()
+{
+ vx20_create "$@"
+ vx_wait
+}
+
+ns_init_common()
+{
+ local ns=$1; shift
+ local if_in=$1; shift
+ local ipv4_in=$1; shift
+ local ipv6_in=$1; shift
+ local ipv4_host=$1; shift
+ local ipv6_host=$1; shift
+
+ # v2$h2 / v2$h3
+ ip_link_set_up "$if_in"
+ ip_addr_add "$if_in" "$ipv4_in"
+ ip_addr_add "$if_in" "$ipv6_in"
+
+ # br1
+ ip_link_add br1 type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+ ip_link_set_up br1
+
+ # vx10, vx20
+ vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in"
+ vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in"
+
+ # w1
+ ip_link_add w1 type veth peer name w2
+ ip_link_set_master w1 br1
+ ip_link_set_up w1
+ bridge_vlan_add vid 10 dev w1
+ bridge_vlan_add vid 20 dev w1
+
+ # w2
+ simple_if_init w2
+ defer simple_if_fini w2
+
+ # w2.10
+ ip_link_add w2.10 master vw2 link w2 type vlan id 10
+ ip_link_set_up w2.10
+ ip_addr_add w2.10 "$ipv4_host"
+
+ # w2.20
+ ip_link_add w2.20 master vw2 link w2 type vlan id 20
+ ip_link_set_up w2.20
+ ip_addr_add w2.20 "$ipv6_host"
+}
+export -f ns_init_common
+
+ns2_create()
+{
+ # NS2
+ ip netns add ns2
+ defer ip netns del ns2
+
+ # v2$h2
+ ip link set dev "v2$h2" netns ns2
+ defer ip -n ns2 link set dev "v2$h2" netns 1
+
+ in_ns ns2 \
+ ns_init_common ns2 "v2$h2" \
+ 192.0.2.34/28 2001:db8:2::2/64 \
+ 192.0.2.3/28 2001:db8:1::3/64
+}
+
+ns3_create()
+{
+ # NS3
+ ip netns add ns3
+ defer ip netns del ns3
+
+ # v2$h3
+ ip link set dev "v2$h3" netns ns3
+ defer ip -n ns3 link set dev "v2$h3" netns 1
+
+ ip -n ns3 link set dev "v2$h3" up
+
+ in_ns ns3 \
+ ns_init_common ns3 "v2$h3" \
+ 192.0.2.66/28 2001:db8:3::2/64 \
+ 192.0.2.4/28 2001:db8:1::4/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ defer vrf_cleanup
+
+ forwarding_enable
+ defer forwarding_restore
+
+ ip_link_add "v1$h2" type veth peer name "v2$h2"
+ ip_link_add "v1$h3" type veth peer name "v2$h3"
+
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+ ns2_create
+ ns3_create
+}
+
+adf_install_broken_sg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+ defer mc_cli remove "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+
+ mc_cli add "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+ defer mc_cli remove "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+}
+
+adf_install_rx()
+{
+ mc_cli add "$swp2" 0.0.0.0 "$GROUP4" "$IPMR"
+ defer mc_cli remove "$swp2" 0.0.0.0 "$GROUP4" lo10
+
+ mc_cli add "$swp3" 0.0.0.0 "$GROUP4" "$IPMR"
+ defer mc_cli remove "$swp3" 0.0.0.0 "$GROUP4" lo10
+
+ mc_cli add "$swp2" :: "$GROUP6" "$IPMR"
+ defer mc_cli remove "$swp2" :: "$GROUP6" lo10
+
+ mc_cli add "$swp3" :: "$GROUP6" "$IPMR"
+ defer mc_cli remove "$swp3" :: "$GROUP6" lo10
+}
+
+adf_install_sg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$IPMR" 192.0.2.100 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 192.0.2.33 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+adf_install_sg_sep()
+{
+ adf_mcd_start lo || exit "$EXIT_STATUS"
+
+ mc_cli add lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+}
+
+adf_install_sg_sep_rx()
+{
+ local lo=$1; shift
+
+ adf_mcd_start "$IPMR" "$lo" || exit "$EXIT_STATUS"
+
+ mc_cli add "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+adf_install_starg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+do_packets_v4()
+{
+ local mac
+
+ mac=$(mac_get "$h2")
+ "$MZ" "$h1" -Q 10 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+ -A 192.0.2.1 -B 192.0.2.2 -t udp sp=1234,dp=2345 -q
+}
+
+do_packets_v6()
+{
+ local mac
+
+ mac=$(mac_get "$h2")
+ "$MZ" -6 "$h1" -Q 20 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+ -A 2001:db8:1::1 -B 2001:db8:1::2 -t udp sp=1234,dp=2345 -q
+}
+
+do_test()
+{
+ local ipv=$1; shift
+ local expect_h2=$1; shift
+ local expect_h3=$1; shift
+ local what=$1; shift
+
+ local pref=$((100 + ipv))
+ local t0_h2
+ local t0_h3
+ local t1_h2
+ local t1_h3
+ local d_h2
+ local d_h3
+
+ RET=0
+
+ t0_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+ t0_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+ "do_packets_v$ipv"
+ sleep 1
+
+ t1_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+ t1_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+ d_h2=$((t1_h2 - t0_h2))
+ d_h3=$((t1_h3 - t0_h3))
+
+ ((d_h2 == expect_h2))
+ check_err $? "Expected $expect_h2 packets on H2, got $d_h2"
+
+ ((d_h3 == expect_h3))
+ check_err $? "Expected $expect_h3 packets on H3, got $d_h3"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv4_do_test_rx()
+{
+ local h3_should_fail=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ping_do "$h1.10" 192.0.2.3
+ check_err $? "H2 should respond"
+
+ ping_do "$h1.10" 192.0.2.4
+ check_err_fail "$h3_should_fail" $? "H3 responds"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv6_do_test_rx()
+{
+ local h3_should_fail=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ping6_do "$h1.20" 2001:db8:1::3
+ check_err $? "H2 should respond"
+
+ ping6_do "$h1.20" 2001:db8:1::4
+ check_err_fail "$h3_should_fail" $? "H3 responds"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv4_nomcroute()
+{
+ # Install a misleading (S,G) rule to attempt to trick the system into
+ # pushing the packets elsewhere.
+ adf_install_broken_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2"
+ do_test 4 10 0 "IPv4 nomcroute"
+}
+
+ipv6_nomcroute()
+{
+ # Like for IPv4, install a misleading (S,G).
+ adf_install_broken_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+ do_test 6 10 0 "IPv6 nomcroute"
+}
+
+ipv4_nomcroute_rx()
+{
+ vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2"
+ ipv4_do_test_rx 1 "IPv4 nomcroute ping"
+}
+
+ipv6_nomcroute_rx()
+{
+ vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+ ipv6_do_test_rx 1 "IPv6 nomcroute ping"
+}
+
+ipv4_mcroute()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 10 10 "IPv4 mcroute"
+}
+
+ipv6_mcroute()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 10 10 "IPv6 mcroute"
+}
+
+ipv4_mcroute_rx()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ ipv4_do_test_rx 0 "IPv4 mcroute ping"
+}
+
+ipv6_mcroute_rx()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ipv6_do_test_rx 0 "IPv6 mcroute ping"
+}
+
+ipv4_mcroute_changelink()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR"
+ ip link set dev vx10 type vxlan mcroute
+ sleep 1
+ do_test 4 10 10 "IPv4 mcroute changelink"
+}
+
+ipv6_mcroute_changelink()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ip link set dev vx20 type vxlan mcroute
+ sleep 1
+ do_test 6 10 10 "IPv6 mcroute changelink"
+}
+
+ipv4_mcroute_starg()
+{
+ adf_install_starg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 10 10 "IPv4 mcroute (*,G)"
+}
+
+ipv6_mcroute_starg()
+{
+ adf_install_starg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 10 10 "IPv6 mcroute (*,G)"
+}
+
+ipv4_mcroute_starg_rx()
+{
+ adf_install_starg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping"
+}
+
+ipv6_mcroute_starg_rx()
+{
+ adf_install_starg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping"
+}
+
+ipv4_mcroute_noroute()
+{
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 0 0 "IPv4 mcroute, no route"
+}
+
+ipv6_mcroute_noroute()
+{
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 0 0 "IPv6 mcroute, no route"
+}
+
+ipv4_mcroute_fdb()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute
+ bridge fdb add dev vx10 \
+ 00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR"
+ do_test 4 10 10 "IPv4 mcroute FDB"
+}
+
+ipv6_mcroute_fdb()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute
+ bridge -6 fdb add dev vx20 \
+ 00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR"
+ do_test 6 10 10 "IPv6 mcroute FDB"
+}
+
+# Use FDB to configure VXLAN in a way where oif=0 for purposes of FIB lookup.
+ipv4_mcroute_fdb_oif0()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ do_test 4 10 10 "IPv4 mcroute oif=0"
+}
+
+ipv6_mcroute_fdb_oif0()
+{
+ # The IPv6 tunnel lookup does not fall back to selection by source
+ # address. Instead it just does a FIB match, and that would find one of
+ # the several ff00::/8 multicast routes -- each device has one. In order
+ # to reliably force the $IPMR device, add a /128 route for the
+ # destination group address.
+ ip -6 route add table local multicast "$GROUP6/128" dev "$IPMR"
+ defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR"
+
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ bridge -6 fdb del dev vx20 00:00:00:00:00:00
+ bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6"
+ do_test 6 10 10 "IPv6 mcroute oif=0"
+}
+
+# In oif=0 test as above, have FIB lookup resolve to loopback instead of IPMR.
+# This doesn't work with IPv6 -- a MC route on lo would be marked as RTF_REJECT.
+ipv4_mcroute_fdb_oif0_sep()
+{
+ adf_install_sg_sep
+
+ ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0"
+}
+
+ipv4_mcroute_fdb_oif0_sep_rx()
+{
+ adf_install_sg_sep_rx lo
+
+ ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping"
+}
+
+ipv4_mcroute_fdb_sep_rx()
+{
+ adf_install_sg_sep_rx lo
+
+ ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add \
+ dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo
+ ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX ping"
+}
+
+ipv6_mcroute_fdb_sep_rx()
+{
+ adf_install_sg_sep_rx "X$IPMR"
+
+ ip_addr_add "X$IPMR" 2001:db8:5::1/64
+ vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute
+ bridge -6 fdb del dev vx20 00:00:00:00:00:00
+ bridge -6 fdb add dev vx20 00:00:00:00:00:00 \
+ self static dst "$GROUP6" via "X$IPMR"
+ ipv6_do_test_rx 0 "IPv6 mcroute TX!=RX ping"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/ipv6_force_forwarding.sh b/tools/testing/selftests/net/ipv6_force_forwarding.sh
new file mode 100755
index 000000000000..bf0243366caa
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_force_forwarding.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test IPv6 force_forwarding interface property
+#
+# This test verifies that the force_forwarding property works correctly:
+# - When global forwarding is disabled, packets are not forwarded normally
+# - When force_forwarding is enabled on an interface, packets are forwarded
+# regardless of the global forwarding setting
+
+source lib.sh
+
+cleanup() {
+ cleanup_ns $ns1 $ns2 $ns3
+}
+
+trap cleanup EXIT
+
+setup_test() {
+ # Create three namespaces: sender, router, receiver
+ setup_ns ns1 ns2 ns3
+
+ # Create veth pairs: ns1 <-> ns2 <-> ns3
+ ip link add name veth12 type veth peer name veth21
+ ip link add name veth23 type veth peer name veth32
+
+ # Move interfaces to namespaces
+ ip link set veth12 netns $ns1
+ ip link set veth21 netns $ns2
+ ip link set veth23 netns $ns2
+ ip link set veth32 netns $ns3
+
+ # Configure interfaces
+ ip -n $ns1 addr add 2001:db8:1::1/64 dev veth12 nodad
+ ip -n $ns2 addr add 2001:db8:1::2/64 dev veth21 nodad
+ ip -n $ns2 addr add 2001:db8:2::1/64 dev veth23 nodad
+ ip -n $ns3 addr add 2001:db8:2::2/64 dev veth32 nodad
+
+ # Bring up interfaces
+ ip -n $ns1 link set veth12 up
+ ip -n $ns2 link set veth21 up
+ ip -n $ns2 link set veth23 up
+ ip -n $ns3 link set veth32 up
+
+ # Add routes
+ ip -n $ns1 route add 2001:db8:2::/64 via 2001:db8:1::2
+ ip -n $ns3 route add 2001:db8:1::/64 via 2001:db8:2::1
+
+ # Disable global forwarding
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=0
+}
+
+test_force_forwarding() {
+ local ret=0
+
+ echo "TEST: force_forwarding functionality"
+
+ # Check if force_forwarding sysctl exists
+ if ! ip netns exec $ns2 test -f /proc/sys/net/ipv6/conf/veth21/force_forwarding; then
+ echo "SKIP: force_forwarding not available"
+ return $ksft_skip
+ fi
+
+ # Test 1: Without force_forwarding, ping should fail
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=0
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=0
+
+ if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then
+ echo "FAIL: ping succeeded when forwarding disabled"
+ ret=1
+ else
+ echo "PASS: forwarding disabled correctly"
+ fi
+
+ # Test 2: With force_forwarding enabled, ping should succeed
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=1
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=1
+
+ if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then
+ echo "PASS: force_forwarding enabled forwarding"
+ else
+ echo "FAIL: ping failed with force_forwarding enabled"
+ ret=1
+ fi
+
+ return $ret
+}
+
+echo "IPv6 force_forwarding test"
+echo "=========================="
+
+setup_test
+test_force_forwarding
+ret=$?
+
+if [ $ret -eq 0 ]; then
+ echo "OK"
+ exit 0
+elif [ $ret -eq $ksft_skip ]; then
+ echo "SKIP"
+ exit $ksft_skip
+else
+ echo "FAIL"
+ exit 1
+fi
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 86a216e9aca8..c7add0dc4c60 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -240,6 +240,29 @@ create_netdevsim() {
echo nsim$id
}
+create_netdevsim_port() {
+ local nsim_id="$1"
+ local ns="$2"
+ local port_id="$3"
+ local perm_addr="$4"
+ local orig_dev
+ local new_dev
+ local nsim_path
+
+ nsim_path="/sys/bus/netdevsim/devices/netdevsim$nsim_id"
+
+ echo "$port_id $perm_addr" | ip netns exec "$ns" tee "$nsim_path"/new_port > /dev/null || return 1
+
+ orig_dev=$(ip netns exec "$ns" find "$nsim_path"/net/ -maxdepth 1 -name 'e*' | tail -n 1)
+ orig_dev=$(basename "$orig_dev")
+ new_dev="nsim${nsim_id}p$port_id"
+
+ ip -netns "$ns" link set dev "$orig_dev" name "$new_dev"
+ ip -netns "$ns" link set dev "$new_dev" up
+
+ echo "$new_dev"
+}
+
# Remove netdevsim with given id.
cleanup_netdevsim() {
local id="$1"
@@ -547,13 +570,19 @@ ip_link_set_addr()
defer ip link set dev "$name" address "$old_addr"
}
-ip_link_is_up()
+ip_link_has_flag()
{
local name=$1; shift
+ local flag=$1; shift
local state=$(ip -j link show "$name" |
- jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"')
- [[ $state == "UP" ]]
+ jq --arg flag "$flag" 'any(.[].flags.[]; . == $flag)')
+ [[ $state == true ]]
+}
+
+ip_link_is_up()
+{
+ ip_link_has_flag "$1" UP
}
ip_link_set_up()
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
index 8697bd27dc30..02be28dcc089 100644
--- a/tools/testing/selftests/net/lib/py/__init__.py
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -6,4 +6,4 @@ from .netns import NetNS, NetNSEnter
from .nsim import *
from .utils import *
from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily
-from .ynl import NetshaperFamily
+from .ynl import NetshaperFamily, DevlinkFamily
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 61287c203b6e..8e35ed12ed9e 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -32,6 +32,7 @@ class KsftTerminate(KeyboardInterrupt):
def ksft_pr(*objs, **kwargs):
+ kwargs["flush"] = True
print("#", *objs, **kwargs)
@@ -139,7 +140,7 @@ def ktap_result(ok, cnt=1, case="", comment=""):
res += "." + str(case.__name__)
if comment:
res += " # " + comment
- print(res)
+ print(res, flush=True)
def ksft_flush_defer():
@@ -227,8 +228,8 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
- print("TAP version 13")
- print("1.." + str(len(cases)))
+ print("TAP version 13", flush=True)
+ print("1.." + str(len(cases)), flush=True)
global KSFT_RESULT
cnt = 0
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index 34470d65d871..f395c90fb0f1 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -175,6 +175,10 @@ def tool(name, args, json=None, ns=None, host=None):
return cmd_obj
+def bpftool(args, json=None, ns=None, host=None):
+ return tool('bpftool', args, json=json, ns=ns, host=host)
+
+
def ip(args, json=None, ns=None, host=None):
if ns:
args = f'-netns {ns} ' + args
@@ -185,6 +189,41 @@ def ethtool(args, json=None, ns=None, host=None):
return tool('ethtool', args, json=json, ns=ns, host=host)
+def bpftrace(expr, json=None, ns=None, host=None, timeout=None):
+ """
+ Run bpftrace and return map data (if json=True).
+ The output of bpftrace is inconvenient, so the helper converts
+ to a dict indexed by map name, e.g.:
+ {
+ "@": { ... },
+ "@map2": { ... },
+ }
+ """
+ cmd_arr = ['bpftrace']
+ # Throw in --quiet if json, otherwise the output has two objects
+ if json:
+ cmd_arr += ['-f', 'json', '-q']
+ if timeout:
+ expr += ' interval:s:' + str(timeout) + ' { exit(); }'
+ cmd_arr += ['-e', expr]
+ cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False)
+ if json:
+ # bpftrace prints objects as lines
+ ret = {}
+ for l in cmd_obj.stdout.split('\n'):
+ if not l.strip():
+ continue
+ one = _json.loads(l)
+ if one.get('type') != 'map':
+ continue
+ for k, v in one["data"].items():
+ if k.startswith('@'):
+ k = k.lstrip('@')
+ ret[k] = v
+ return ret
+ return cmd_obj
+
+
def rand_port(type=socket.SOCK_STREAM):
"""
Get a random unprivileged port.
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index 6329ae805abf..2b3a61ea3bfa 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -56,3 +56,8 @@ class NetshaperFamily(YnlFamily):
def __init__(self, recv_size=0):
super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(),
schema='', recv_size=recv_size)
+
+class DevlinkFamily(YnlFamily):
+ def __init__(self, recv_size=0):
+ super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(),
+ schema='', recv_size=recv_size)
diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c
new file mode 100644
index 000000000000..521ba38f2ddd
--- /dev/null
+++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c
@@ -0,0 +1,621 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_ADJST_OFFSET 256
+#define MAX_PAYLOAD_LEN 5000
+#define MAX_HDR_LEN 64
+
+enum {
+ XDP_MODE = 0,
+ XDP_PORT = 1,
+ XDP_ADJST_OFFSET = 2,
+ XDP_ADJST_TAG = 3,
+} xdp_map_setup_keys;
+
+enum {
+ XDP_MODE_PASS = 0,
+ XDP_MODE_DROP = 1,
+ XDP_MODE_TX = 2,
+ XDP_MODE_TAIL_ADJST = 3,
+ XDP_MODE_HEAD_ADJST = 4,
+} xdp_map_modes;
+
+enum {
+ STATS_RX = 0,
+ STATS_PASS = 1,
+ STATS_DROP = 2,
+ STATS_TX = 3,
+ STATS_ABORT = 4,
+} xdp_stats;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 5);
+ __type(key, __u32);
+ __type(value, __s32);
+} map_xdp_setup SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 5);
+ __type(key, __u32);
+ __type(value, __u64);
+} map_xdp_stats SEC(".maps");
+
+static __u32 min(__u32 a, __u32 b)
+{
+ return a < b ? a : b;
+}
+
+static void record_stats(struct xdp_md *ctx, __u32 stat_type)
+{
+ __u64 *count;
+
+ count = bpf_map_lookup_elem(&map_xdp_stats, &stat_type);
+
+ if (count)
+ __sync_fetch_and_add(count, 1);
+}
+
+static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ struct ethhdr *eth = data;
+
+ if (data + sizeof(*eth) > data_end)
+ return NULL;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+
+ if (iph + 1 > (struct iphdr *)data_end ||
+ iph->protocol != IPPROTO_UDP)
+ return NULL;
+
+ udph = (void *)eth + sizeof(*iph) + sizeof(*eth);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h = data + sizeof(*eth);
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
+ ipv6h->nexthdr != IPPROTO_UDP)
+ return NULL;
+
+ udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth);
+ } else {
+ return NULL;
+ }
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ if (udph->dest != bpf_htons(port))
+ return NULL;
+
+ record_stats(ctx, STATS_RX);
+
+ return udph;
+}
+
+static int xdp_mode_pass(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_PASS);
+
+ return XDP_PASS;
+}
+
+static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_DROP);
+
+ return XDP_DROP;
+}
+
+static void swap_machdr(void *data)
+{
+ struct ethhdr *eth = data;
+ __u8 tmp_mac[ETH_ALEN];
+
+ __builtin_memcpy(tmp_mac, eth->h_source, ETH_ALEN);
+ __builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN);
+ __builtin_memcpy(eth->h_dest, tmp_mac, ETH_ALEN);
+}
+
+static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ struct ethhdr *eth = data;
+
+ if (data + sizeof(*eth) > data_end)
+ return XDP_PASS;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+ __be32 tmp_ip = iph->saddr;
+
+ if (iph + 1 > (struct iphdr *)data_end ||
+ iph->protocol != IPPROTO_UDP)
+ return XDP_PASS;
+
+ udph = data + sizeof(*iph) + sizeof(*eth);
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return XDP_PASS;
+ if (udph->dest != bpf_htons(port))
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_RX);
+ swap_machdr((void *)eth);
+
+ iph->saddr = iph->daddr;
+ iph->daddr = tmp_ip;
+
+ record_stats(ctx, STATS_TX);
+
+ return XDP_TX;
+
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h = data + sizeof(*eth);
+ struct in6_addr tmp_ipv6;
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
+ ipv6h->nexthdr != IPPROTO_UDP)
+ return XDP_PASS;
+
+ udph = data + sizeof(*ipv6h) + sizeof(*eth);
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return XDP_PASS;
+ if (udph->dest != bpf_htons(port))
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_RX);
+ swap_machdr((void *)eth);
+
+ __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6));
+ __builtin_memcpy(&ipv6h->saddr, &ipv6h->daddr,
+ sizeof(tmp_ipv6));
+ __builtin_memcpy(&ipv6h->daddr, &tmp_ipv6, sizeof(tmp_ipv6));
+
+ record_stats(ctx, STATS_TX);
+
+ return XDP_TX;
+ }
+
+ return XDP_PASS;
+}
+
+static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ struct ethhdr *eth = data;
+ __u32 len, len_new;
+
+ if (data + sizeof(*eth) > data_end)
+ return NULL;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+ __u16 total_len;
+
+ if (iph + 1 > (struct iphdr *)data_end)
+ return NULL;
+
+ iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset);
+
+ udph = (void *)eth + sizeof(*iph) + sizeof(*eth);
+ if (!udph || udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h = data + sizeof(*eth);
+ __u16 payload_len;
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end)
+ return NULL;
+
+ udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth);
+ if (!udph || udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ *udp_csum = ~((__u32)udph->check);
+
+ len = ipv6h->payload_len;
+ len_new = bpf_htons(bpf_ntohs(len) + offset);
+ ipv6h->payload_len = len_new;
+
+ *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+ sizeof(len_new), *udp_csum);
+
+ len = udph->len;
+ len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
+ *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+ sizeof(len_new), *udp_csum);
+ } else {
+ return NULL;
+ }
+
+ udph->len = len_new;
+
+ return udph;
+}
+
+static __u16 csum_fold_helper(__u32 csum)
+{
+ return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff;
+}
+
+static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset,
+ __u32 hdr_len)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ __u32 buff_pos, udp_csum = 0;
+ struct udphdr *udph = NULL;
+ __u32 buff_len;
+
+ udph = update_pkt(ctx, 0 - offset, &udp_csum);
+ if (!udph)
+ return -1;
+
+ buff_len = bpf_xdp_get_buff_len(ctx);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ /* Make sure we have enough data to avoid eating the header */
+ if (buff_len - offset < hdr_len)
+ return -1;
+
+ buff_pos = buff_len - offset;
+ if (bpf_xdp_load_bytes(ctx, buff_pos, tmp_buff, offset) < 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ __u32 buff_pos, udp_csum = 0;
+ __u32 buff_len, hdr_len, key;
+ struct udphdr *udph;
+ __s32 *val;
+ __u8 tag;
+
+ /* Proceed to update the packet headers before attempting to adjuste
+ * the tail. Once the tail is adjusted we lose access to the offset
+ * amount of data at the end of the packet which is crucial to update
+ * the checksum.
+ * Since any failure beyond this would abort the packet, we should
+ * not worry about passing a packet up the stack with wrong headers
+ */
+ udph = update_pkt(ctx, offset, &udp_csum);
+ if (!udph)
+ return -1;
+
+ key = XDP_ADJST_TAG;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return -1;
+
+ tag = (__u8)(*val);
+
+ for (int i = 0; i < MAX_ADJST_OFFSET; i++)
+ __builtin_memcpy(&tmp_buff[i], &tag, 1);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ buff_len = bpf_xdp_get_buff_len(ctx);
+
+ if (bpf_xdp_adjust_tail(ctx, offset) < 0) {
+ bpf_printk("Failed to adjust tail\n");
+ return -1;
+ }
+
+ if (bpf_xdp_store_bytes(ctx, buff_len, tmp_buff, offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port)
+{
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ __s32 *adjust_offset, *val;
+ __u32 key, hdr_len;
+ void *offset_ptr;
+ __u8 tag;
+ int ret;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ hdr_len = (void *)udph - data + sizeof(struct udphdr);
+ key = XDP_ADJST_OFFSET;
+ adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!adjust_offset)
+ return XDP_PASS;
+
+ if (*adjust_offset < 0)
+ ret = xdp_adjst_tail_shrnk_data(ctx,
+ (__u16)(0 - *adjust_offset),
+ hdr_len);
+ else
+ ret = xdp_adjst_tail_grow_data(ctx, (__u16)(*adjust_offset));
+ if (ret)
+ goto abort_pkt;
+
+ record_stats(ctx, STATS_PASS);
+ return XDP_PASS;
+
+abort_pkt:
+ record_stats(ctx, STATS_ABORT);
+ return XDP_ABORTED;
+}
+
+static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len,
+ __u32 offset)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ struct udphdr *udph;
+ void *offset_ptr;
+ __u32 udp_csum = 0;
+
+ /* Update the length information in the IP and UDP headers before
+ * adjusting the headroom. This simplifies accessing the relevant
+ * fields in the IP and UDP headers for fragmented packets. Any
+ * failure beyond this point will result in the packet being aborted,
+ * so we don't need to worry about incorrect length information for
+ * passed packets.
+ */
+ udph = update_pkt(ctx, (__s16)(0 - offset), &udp_csum);
+ if (!udph)
+ return -1;
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ if (bpf_xdp_load_bytes(ctx, hdr_len, tmp_buff, offset) < 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
+
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0)
+ return -1;
+
+ if (bpf_xdp_adjust_head(ctx, offset) < 0)
+ return -1;
+
+ if (offset > MAX_ADJST_OFFSET)
+ return -1;
+
+ if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0)
+ return -1;
+
+ /* Added here to handle clang complain about negative value */
+ hdr_len = hdr_len & 0xff;
+
+ if (hdr_len == 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, 0, tmp_buff, hdr_len) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len,
+ __u32 offset)
+{
+ char hdr_buff[MAX_HDR_LEN];
+ char data_buff[MAX_ADJST_OFFSET];
+ void *offset_ptr;
+ __s32 *val;
+ __u32 key;
+ __u8 tag;
+ __u32 udp_csum = 0;
+ struct udphdr *udph;
+
+ udph = update_pkt(ctx, (__s16)(offset), &udp_csum);
+ if (!udph)
+ return -1;
+
+ key = XDP_ADJST_TAG;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return -1;
+
+ tag = (__u8)(*val);
+ for (int i = 0; i < MAX_ADJST_OFFSET; i++)
+ __builtin_memcpy(&data_buff[i], &tag, 1);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0)
+ return -1;
+
+ /* Added here to handle clang complain about negative value */
+ hdr_len = hdr_len & 0xff;
+
+ if (hdr_len == 0)
+ return -1;
+
+ if (bpf_xdp_load_bytes(ctx, 0, hdr_buff, hdr_len) < 0)
+ return -1;
+
+ if (offset > MAX_ADJST_OFFSET)
+ return -1;
+
+ if (bpf_xdp_adjust_head(ctx, 0 - offset) < 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, 0, hdr_buff, hdr_len) < 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, hdr_len, data_buff, offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_head_adjst(struct xdp_md *ctx, __u16 port)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph_ptr = NULL;
+ __u32 key, size, hdr_len;
+ __s32 *val;
+ int res;
+
+ /* Filter packets based on UDP port */
+ udph_ptr = filter_udphdr(ctx, port);
+ if (!udph_ptr)
+ return XDP_PASS;
+
+ hdr_len = (void *)udph_ptr - data + sizeof(struct udphdr);
+
+ key = XDP_ADJST_OFFSET;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return XDP_PASS;
+
+ switch (*val) {
+ case -16:
+ case 16:
+ size = 16;
+ break;
+ case -32:
+ case 32:
+ size = 32;
+ break;
+ case -64:
+ case 64:
+ size = 64;
+ break;
+ case -128:
+ case 128:
+ size = 128;
+ break;
+ case -256:
+ case 256:
+ size = 256;
+ break;
+ default:
+ bpf_printk("Invalid adjustment offset: %d\n", *val);
+ goto abort;
+ }
+
+ if (*val < 0)
+ res = xdp_adjst_head_grow_data(ctx, hdr_len, size);
+ else
+ res = xdp_adjst_head_shrnk_data(ctx, hdr_len, size);
+
+ if (res)
+ goto abort;
+
+ record_stats(ctx, STATS_PASS);
+ return XDP_PASS;
+
+abort:
+ record_stats(ctx, STATS_ABORT);
+ return XDP_ABORTED;
+}
+
+static int xdp_prog_common(struct xdp_md *ctx)
+{
+ __u32 key, *port;
+ __s32 *mode;
+
+ key = XDP_MODE;
+ mode = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!mode)
+ return XDP_PASS;
+
+ key = XDP_PORT;
+ port = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!port)
+ return XDP_PASS;
+
+ switch (*mode) {
+ case XDP_MODE_PASS:
+ return xdp_mode_pass(ctx, (__u16)(*port));
+ case XDP_MODE_DROP:
+ return xdp_mode_drop_handler(ctx, (__u16)(*port));
+ case XDP_MODE_TX:
+ return xdp_mode_tx_handler(ctx, (__u16)(*port));
+ case XDP_MODE_TAIL_ADJST:
+ return xdp_adjst_tail(ctx, (__u16)(*port));
+ case XDP_MODE_HEAD_ADJST:
+ return xdp_head_adjst(ctx, (__u16)(*port));
+ }
+
+ /* Default action is to simple pass */
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_prog(struct xdp_md *ctx)
+{
+ return xdp_prog_common(ctx);
+}
+
+SEC("xdp.frags")
+int xdp_prog_frags(struct xdp_md *ctx)
+{
+ return xdp_prog_common(ctx);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index e47788bfa671..4c7e51336ab2 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -4,7 +4,8 @@ top_srcdir = ../../../../..
CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
-TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
+TEST_PROGS := mptcp_connect.sh mptcp_connect_mmap.sh mptcp_connect_sendfile.sh \
+ mptcp_connect_checksum.sh pm_netlink.sh mptcp_join.sh diag.sh \
simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 4f80014cae49..968d440c03fe 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -13,6 +13,7 @@ CONFIG_NETFILTER_NETLINK=m
CONFIG_NF_TABLES=m
CONFIG_NFT_COMPAT=m
CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_MATCH_BPF=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
@@ -25,6 +26,7 @@ CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IP6_NF_FILTER=m
CONFIG_NET_ACT_CSUM=m
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh
new file mode 100755
index 000000000000..ce93ec2f107f
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -C "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh
new file mode 100755
index 000000000000..5dd30f9394af
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -m mmap "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh
new file mode 100755
index 000000000000..1d16fb1cc9bb
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -m sendfile "${@}"
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 7ea5fb28c93d..1d5d3c4e7e87 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -77,6 +77,7 @@
static int cfg_cork;
static bool cfg_cork_mixed;
static int cfg_cpu = -1; /* default: pin to last cpu */
+static int cfg_expect_zerocopy = -1;
static int cfg_family = PF_UNSPEC;
static int cfg_ifindex = 1;
static int cfg_payload_len;
@@ -92,9 +93,9 @@ static socklen_t cfg_alen;
static struct sockaddr_storage cfg_dst_addr;
static struct sockaddr_storage cfg_src_addr;
+static int exitcode;
static char payload[IP_MAXPACKET];
static long packets, bytes, completions, expected_completions;
-static int zerocopied = -1;
static uint32_t next_completion;
static uint32_t sends_since_notify;
@@ -444,11 +445,13 @@ static bool do_recv_completion(int fd, int domain)
next_completion = hi + 1;
zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
- if (zerocopied == -1)
- zerocopied = zerocopy;
- else if (zerocopied != zerocopy) {
- fprintf(stderr, "serr: inconsistent\n");
- zerocopied = zerocopy;
+ if (cfg_expect_zerocopy != -1 &&
+ cfg_expect_zerocopy != zerocopy) {
+ fprintf(stderr, "serr: ee_code: %u != expected %u\n",
+ zerocopy, cfg_expect_zerocopy);
+ exitcode = 1;
+ /* suppress repeated messages */
+ cfg_expect_zerocopy = zerocopy;
}
if (cfg_verbose >= 2)
@@ -571,7 +574,7 @@ static void do_tx(int domain, int type, int protocol)
fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
packets, bytes >> 20, completions,
- zerocopied == 1 ? 'y' : 'n');
+ cfg_zerocopy && cfg_expect_zerocopy == 1 ? 'y' : 'n');
}
static int do_setup_rx(int domain, int type, int protocol)
@@ -715,7 +718,7 @@ static void parse_opts(int argc, char **argv)
cfg_payload_len = max_payload_len;
- while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
+ while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vzZ:")) != -1) {
switch (c) {
case '4':
if (cfg_family != PF_UNSPEC)
@@ -770,6 +773,9 @@ static void parse_opts(int argc, char **argv)
case 'z':
cfg_zerocopy = true;
break;
+ case 'Z':
+ cfg_expect_zerocopy = !!atoi(optarg);
+ break;
}
}
@@ -817,5 +823,5 @@ int main(int argc, char **argv)
else
error(1, 0, "unknown cfg_test %s", cfg_test);
- return 0;
+ return exitcode;
}
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index 89c22f5320e0..28178a38a4e7 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -6,6 +6,7 @@
set -e
readonly DEV="veth0"
+readonly DUMMY_DEV="dummy0"
readonly DEV_MTU=65535
readonly BIN="./msg_zerocopy"
@@ -14,21 +15,25 @@ readonly NSPREFIX="ns-${RAND}"
readonly NS1="${NSPREFIX}1"
readonly NS2="${NSPREFIX}2"
-readonly SADDR4='192.168.1.1'
-readonly DADDR4='192.168.1.2'
-readonly SADDR6='fd::1'
-readonly DADDR6='fd::2'
+readonly LPREFIX4='192.168.1'
+readonly RPREFIX4='192.168.2'
+readonly LPREFIX6='fd'
+readonly RPREFIX6='fc'
+
readonly path_sysctl_mem="net.core.optmem_max"
# No arguments: automated test
if [[ "$#" -eq "0" ]]; then
- $0 4 tcp -t 1
- $0 6 tcp -t 1
- $0 4 udp -t 1
- $0 6 udp -t 1
- echo "OK. All tests passed"
- exit 0
+ ret=0
+
+ $0 4 tcp -t 1 || ret=1
+ $0 6 tcp -t 1 || ret=1
+ $0 4 udp -t 1 || ret=1
+ $0 6 udp -t 1 || ret=1
+
+ [[ "$ret" == "0" ]] && echo "OK. All tests passed"
+ exit $ret
fi
# Argument parsing
@@ -45,11 +50,18 @@ readonly EXTRA_ARGS="$@"
# Argument parsing: configure addresses
if [[ "${IP}" == "4" ]]; then
- readonly SADDR="${SADDR4}"
- readonly DADDR="${DADDR4}"
+ readonly SADDR="${LPREFIX4}.1"
+ readonly DADDR="${LPREFIX4}.2"
+ readonly DUMMY_ADDR="${RPREFIX4}.1"
+ readonly DADDR_TXONLY="${RPREFIX4}.2"
+ readonly MASK="24"
elif [[ "${IP}" == "6" ]]; then
- readonly SADDR="${SADDR6}"
- readonly DADDR="${DADDR6}"
+ readonly SADDR="${LPREFIX6}::1"
+ readonly DADDR="${LPREFIX6}::2"
+ readonly DUMMY_ADDR="${RPREFIX6}::1"
+ readonly DADDR_TXONLY="${RPREFIX6}::2"
+ readonly MASK="64"
+ readonly NODAD="nodad"
else
echo "Invalid IP version ${IP}"
exit 1
@@ -89,33 +101,61 @@ ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000"
ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+ip link add "${DUMMY_DEV}" mtu "${DEV_MTU}" netns "${NS2}" type dummy
+
# Bring the devices up
ip -netns "${NS1}" link set "${DEV}" up
ip -netns "${NS2}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DUMMY_DEV}" up
# Set fixed MAC addresses on the devices
ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
# Add fixed IP addresses to the devices
-ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
-ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
-ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
-ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
+ip -netns "${NS1}" addr add "${SADDR}/${MASK}" dev "${DEV}" ${NODAD}
+ip -netns "${NS2}" addr add "${DADDR}/${MASK}" dev "${DEV}" ${NODAD}
+ip -netns "${NS2}" addr add "${DUMMY_ADDR}/${MASK}" dev "${DUMMY_DEV}" ${NODAD}
+
+ip -netns "${NS1}" route add default via "${DADDR}" dev "${DEV}"
+ip -netns "${NS2}" route add default via "${DADDR_TXONLY}" dev "${DUMMY_DEV}"
+
+ip netns exec "${NS2}" sysctl -wq net.ipv4.ip_forward=1
+ip netns exec "${NS2}" sysctl -wq net.ipv6.conf.all.forwarding=1
# Optionally disable sg or csum offload to test edge cases
# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+ret=0
+
do_test() {
local readonly ARGS="$1"
- echo "ipv${IP} ${TXMODE} ${ARGS}"
- ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
+ # tx-rx test
+ # packets queued to a local socket are copied,
+ # sender notification has SO_EE_CODE_ZEROCOPY_COPIED.
+
+ echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-rx\n"
+ ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 \
+ -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
sleep 0.2
- ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}"
+ ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
+ -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" -Z 0 || ret=1
wait
+
+ # next test is unconnected tx to dummy0, cannot exercise with tcp
+ [[ "${TXMODE}" == "tcp" ]] && return
+
+ # tx-only test: send out dummy0
+ # packets leaving the host are not copied,
+ # sender notification does not have SO_EE_CODE_ZEROCOPY_COPIED.
+
+ echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-only\n"
+ ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
+ -S "${SADDR}" -D "${DADDR_TXONLY}" ${ARGS} "${TXMODE}" -Z 1 || ret=1
}
do_test "${EXTRA_ARGS}"
do_test "-z ${EXTRA_ARGS}"
-echo ok
+
+[[ "$ret" == "0" ]] && echo "OK"
diff --git a/tools/testing/selftests/net/netdev-l2addr.sh b/tools/testing/selftests/net/netdev-l2addr.sh
new file mode 100755
index 000000000000..18509da293e5
--- /dev/null
+++ b/tools/testing/selftests/net/netdev-l2addr.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+set -o pipefail
+
+NSIM_ADDR=2025
+TEST_ADDR="d0:be:d0:be:d0:00"
+
+RET_CODE=0
+
+cleanup() {
+ cleanup_netdevsim "$NSIM_ADDR"
+ cleanup_ns "$NS"
+}
+
+trap cleanup EXIT
+
+fail() {
+ echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+ RET_CODE=1
+}
+
+get_addr()
+{
+ local type="$1"
+ local dev="$2"
+ local ns="$3"
+
+ ip -j -n "$ns" link show dev "$dev" | jq -er ".[0].$type"
+}
+
+setup_ns NS
+
+nsim=$(create_netdevsim $NSIM_ADDR "$NS")
+
+get_addr address "$nsim" "$NS" >/dev/null || fail "Couldn't get ether addr"
+get_addr broadcast "$nsim" "$NS" >/dev/null || fail "Couldn't get brd addr"
+get_addr permaddr "$nsim" "$NS" >/dev/null && fail "Found perm_addr without setting it"
+
+ip -n "$NS" link set dev "$nsim" address "$TEST_ADDR"
+ip -n "$NS" link set dev "$nsim" brd "$TEST_ADDR"
+
+[[ "$(get_addr address "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set ether addr"
+[[ "$(get_addr broadcast "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set brd addr"
+
+if create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "FF:FF:FF:FF:FF:FF" 2>/dev/null; then
+ fail "Created netdevsim with broadcast permaddr"
+fi
+
+nsim_port=$(create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "$TEST_ADDR")
+
+get_addr address "$nsim_port" "$NS" >/dev/null || fail "Couldn't get ether addr"
+get_addr broadcast "$nsim_port" "$NS" >/dev/null || fail "Couldn't get brd addr"
+[[ "$(get_addr permaddr "$nsim_port" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't get permaddr"
+
+cleanup_netdevsim "$NSIM_ADDR" "$NS"
+
+exit $RET_CODE
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 363646f4fefe..79d5b33966ba 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -1,6 +1,8 @@
CONFIG_AUDIT=y
CONFIG_BPF_SYSCALL=y
CONFIG_BRIDGE=m
+CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m
CONFIG_BRIDGE_EBT_BROUTE=m
CONFIG_BRIDGE_EBT_IP=m
CONFIG_BRIDGE_EBT_REDIRECT=m
@@ -14,7 +16,10 @@ CONFIG_INET_ESP=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
+CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP_NF_RAW=m
@@ -92,4 +97,4 @@ CONFIG_XFRM_STATISTICS=y
CONFIG_NET_PKTGEN=m
CONFIG_TUN=m
CONFIG_INET_DIAG=m
-CONFIG_SCTP_DIAG=m
+CONFIG_INET_SCTP_DIAG=m
diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
index 3712c1b9b38b..606a43a60f73 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_clash.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
@@ -93,32 +93,28 @@ ping_test()
run_one_clash_test()
{
local ns="$1"
- local daddr="$2"
- local dport="$3"
+ local ctns="$2"
+ local daddr="$3"
+ local dport="$4"
local entries
local cre
if ! ip netns exec "$ns" ./udpclash $daddr $dport;then
- echo "FAIL: did not receive expected number of replies for $daddr:$dport"
- ret=1
- return 1
+ echo "INFO: did not receive expected number of replies for $daddr:$dport"
+ ip netns exec "$ctns" conntrack -S
+ # don't fail: check if clash resolution triggered after all.
fi
- entries=$(conntrack -S | wc -l)
- cre=$(conntrack -S | grep -v "clash_resolve=0" | wc -l)
+ entries=$(ip netns exec "$ctns" conntrack -S | wc -l)
+ cre=$(ip netns exec "$ctns" conntrack -S | grep "clash_resolve=0" | wc -l)
- if [ "$cre" -ne "$entries" ] ;then
+ if [ "$cre" -ne "$entries" ];then
clash_resolution_active=1
return 0
fi
- # 1 cpu -> parallel insertion impossible
- if [ "$entries" -eq 1 ]; then
- return 0
- fi
-
- # not a failure: clash resolution logic did not trigger, but all replies
- # were received. With right timing, xmit completed sequentially and
+ # not a failure: clash resolution logic did not trigger.
+ # With right timing, xmit completed sequentially and
# no parallel insertion occurs.
return $ksft_skip
}
@@ -126,20 +122,23 @@ run_one_clash_test()
run_clash_test()
{
local ns="$1"
- local daddr="$2"
- local dport="$3"
+ local ctns="$2"
+ local daddr="$3"
+ local dport="$4"
+ local softerr=0
for i in $(seq 1 10);do
- run_one_clash_test "$ns" "$daddr" "$dport"
+ run_one_clash_test "$ns" "$ctns" "$daddr" "$dport"
local rv=$?
if [ $rv -eq 0 ];then
echo "PASS: clash resolution test for $daddr:$dport on attempt $i"
return 0
- elif [ $rv -eq 1 ];then
- echo "FAIL: clash resolution test for $daddr:$dport on attempt $i"
- return 1
+ elif [ $rv -eq $ksft_skip ]; then
+ softerr=1
fi
done
+
+ [ $softerr -eq 1 ] && echo "SKIP: clash resolution for $daddr:$dport did not trigger"
}
ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter"
@@ -161,11 +160,11 @@ spawn_servers "$nsclient2"
# exercise clash resolution with nat:
# nsrouter is supposed to dnat to 10.0.2.1:900{0,1,2,3}.
-run_clash_test "$nsclient1" 10.0.1.99 "$dport"
+run_clash_test "$nsclient1" "$nsrouter" 10.0.1.99 "$dport"
# exercise clash resolution without nat.
load_simple_ruleset "$nsclient2"
-run_clash_test "$nsclient2" 127.0.0.1 9001
+run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001
if [ $clash_resolution_active -eq 0 ];then
[ "$ret" -eq 0 ] && ret=$ksft_skip
diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh
index 6af2ea3ad6b8..9c9d5b38ab71 100755
--- a/tools/testing/selftests/net/netfilter/ipvs.sh
+++ b/tools/testing/selftests/net/netfilter/ipvs.sh
@@ -151,7 +151,7 @@ test_nat() {
test_tun() {
ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
- ip netns exec "${ns1}" modprobe -q ipip
+ modprobe -q ipip
ip netns exec "${ns1}" ip link set tunl0 up
ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0
ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0
@@ -160,10 +160,10 @@ test_tun() {
ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port}
ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1
- ip netns exec "${ns2}" modprobe -q ipip
ip netns exec "${ns2}" ip link set tunl0 up
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
test_service
diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
index 5ff7be9daeee..c0fffaa6dbd9 100755
--- a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
+++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
@@ -10,6 +10,8 @@ source lib.sh
checktool "nft --version" "run test without nft tool"
checktool "iperf3 --version" "run test without iperf3 tool"
+read kernel_tainted < /proc/sys/kernel/tainted
+
# how many seconds to torture the kernel?
# default to 80% of max run time but don't exceed 48s
TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10))
@@ -135,7 +137,8 @@ else
wait
fi
-[[ $(</proc/sys/kernel/tainted) -eq 0 ]] || {
+
+[[ $kernel_tainted -eq 0 && $(</proc/sys/kernel/tainted) -ne 0 ]] && {
echo "FAIL: Kernel is tainted!"
exit $ksft_fail
}
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index cd8a58097448..1f5227f3d64d 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -385,7 +385,7 @@ static int get_bind_to_device(int sd, char *name, size_t len)
name[0] = '\0';
rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen);
if (rc < 0)
- log_err_errno("setsockopt(SO_BINDTODEVICE)");
+ log_err_errno("getsockopt(SO_BINDTODEVICE)");
return rc;
}
@@ -535,7 +535,7 @@ static int set_freebind(int sd, int version)
break;
case AF_INET6:
if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) {
- log_err_errno("setsockopt(IPV6_FREEBIND");
+ log_err_errno("setsockopt(IPV6_FREEBIND)");
rc = -1;
}
break;
@@ -812,7 +812,7 @@ static int convert_addr(struct sock_args *args, const char *_str,
sep++;
if (str_to_uint(sep, 1, pfx_len_max,
&args->prefix_len) != 0) {
- fprintf(stderr, "Invalid port\n");
+ fprintf(stderr, "Invalid prefix length\n");
return 1;
}
} else {
@@ -1272,7 +1272,7 @@ static int msg_loop(int client, int sd, void *addr, socklen_t alen,
}
}
- nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1;
+ nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1;
while (1) {
FD_ZERO(&rfds);
FD_SET(sd, &rfds);
@@ -1492,7 +1492,7 @@ static int lsock_init(struct sock_args *args)
sd = socket(args->version, args->type, args->protocol);
if (sd < 0) {
log_err_errno("Error opening socket");
- return -1;
+ return -1;
}
if (set_reuseaddr(sd) != 0)
@@ -1912,7 +1912,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
* waiting to be told when to continue
*/
if (read(fd, &buf, sizeof(buf)) <= 0) {
- log_err_errno("Failed to read IPC status from status");
+ log_err_errno("Failed to read IPC status from pipe");
return 1;
}
if (!buf) {
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
index beaee5e4e2aa..5c66421ab8aa 100755
--- a/tools/testing/selftests/net/nl_netdev.py
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -2,8 +2,9 @@
# SPDX-License-Identifier: GPL-2.0
import time
+from os import system
from lib.py import ksft_run, ksft_exit, ksft_pr
-from lib.py import ksft_eq, ksft_ge, ksft_busy_wait
+from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait
from lib.py import NetdevFamily, NetdevSimDev, ip
@@ -34,6 +35,128 @@ def napi_list_check(nf) -> None:
ksft_eq(len(napis), 100,
comment=f"queue count after reset queue {q} mode {i}")
+def napi_set_threaded(nf) -> None:
+ """
+ Test that verifies various cases of napi threaded
+ set and unset at napi and device level.
+ """
+ with NetdevSimDev(queue_count=2) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ # set napi threaded and verify
+ nf.napi_set({'id': napi0_id, 'threaded': "enabled"})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # check it is not set for napi1
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ ip(f"link set dev {nsim.ifname} down")
+ ip(f"link set dev {nsim.ifname} up")
+
+ # verify if napi threaded is still set
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # check it is still not set for napi1
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ # unset napi threaded and verify
+ nf.napi_set({'id': napi0_id, 'threaded': "disabled"})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+
+ # set threaded at device level
+ system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is set for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "enabled")
+ ksft_ne(napi1.get('pid'), None)
+
+ # unset threaded at device level
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ # set napi threaded for napi0
+ nf.napi_set({'id': napi0_id, 'threaded': 1})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # unset threaded at device level
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+def dev_set_threaded(nf) -> None:
+ """
+ Test that verifies various cases of napi threaded
+ set and unset at device level using sysfs.
+ """
+ with NetdevSimDev(queue_count=2) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ # set threaded
+ system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is set for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "enabled")
+ ksft_ne(napi1.get('pid'), None)
+
+ # unset threaded
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
def nsim_rxq_reset_down(nf) -> None:
"""
@@ -122,7 +245,7 @@ def page_pool_check(nf) -> None:
def main() -> None:
nf = NetdevFamily()
ksft_run([empty_check, lo_check, page_pool_check, napi_list_check,
- nsim_rxq_reset_down],
+ dev_set_threaded, napi_set_threaded, nsim_rxq_reset_down],
args=(nf, ))
ksft_exit()
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
index ef8b25a606d8..c5b01e1bd4c7 100755
--- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -39,11 +39,15 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then
# xfail tests that are known flaky with dbg config, not fixable.
# still run them for coverage (and expect 100% pass without dbg).
declare -ar xfail_list=(
+ "tcp_blocking_blocking-connect.pkt"
+ "tcp_blocking_blocking-read.pkt"
"tcp_eor_no-coalesce-retrans.pkt"
"tcp_fast_recovery_prr-ss.*.pkt"
+ "tcp_sack_sack-route-refresh-ip-tos.pkt"
"tcp_slow_start_slow-start-after-win-update.pkt"
"tcp_timestamping.*.pkt"
"tcp_user_timeout_user-timeout-probe.pkt"
+ "tcp_zerocopy_cl.*.pkt"
"tcp_zerocopy_epoll_.*.pkt"
"tcp_tcp_info_tcp-info-.*-limited.pkt"
)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
index 914eabab367a..657e42ca65b5 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Test for blocking read.
+
--tolerance_usecs=10000
+--mss=1000
`./defaults.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt
new file mode 100644
index 000000000000..c790d0af635e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test various DSACK (RFC 2883) behaviors.
+
+--mss=1000
+
+`./defaults.sh`
+
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 1024
+ +0 accept(3, ..., ...) = 4
+
+// First SACK range.
+ +0 < P. 1001:2001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop, nop, sack 1001:2001>
+
+// Check SACK coalescing (contiguous sequence).
+ +0 < P. 2001:3001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 1001:3001>
+
+// Check we have two SACK ranges for non contiguous sequences.
+ +0 < P. 4001:5001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 4001:5001 1001:3001>
+
+// Three ranges.
+ +0 < P. 7001:8001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 7001:8001 4001:5001 1001:3001>
+
+// DSACK (1001:3001) + SACK (6001:7001)
+ +0 < P. 1:6001(6000) ack 1 win 1024
+ +0 > . 1:1(0) ack 6001 <nop,nop,sack 1001:3001 7001:8001>
+
+// DSACK (7001:8001)
+ +0 < P. 6001:8001(2000) ack 1 win 1024
+ +0 > . 1:1(0) ack 8001 <nop,nop,sack 7001:8001>
+
+// DSACK for an older segment.
+ +0 < P. 1:1001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 8001 <nop,nop,sack 1:1001>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
index df49c67645ac..e13f0eee9795 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Test TCP_INQ and TCP_CM_INQ on the client side.
+
+--mss=1000
+
`./defaults.sh
`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
index 04a5e2590c62..14dd5f813d50 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Test TCP_INQ and TCP_CM_INQ on the server side.
+
+--mss=1000
+
`./defaults.sh
`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
new file mode 100644
index 000000000000..7e6bc5fb0c8d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"`
+
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < . 2001:11001(9000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:11001>
+
+// check that ooo packet properly updates tcpi_rcv_mss
+ +0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }%
+
+ +0 < . 11001:21001(10000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:21001>
+
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
new file mode 100644
index 000000000000..3848b419e68c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+ 0 `nstat -n`
+
+// Establish a connection.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < P. 1:4001(4000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +0 < P. 4001:54001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +1 < P. 5001:55001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +0 < P. 70001:80001(10000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+ +0 read(4, ..., 100000) = 4000
+
+// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd
+ +0 < P. 4001:54001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 54001 win 0
+
+// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times.
++0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
new file mode 100644
index 000000000000..f575c0ff89da
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+ 0 `nstat -n`
+
+// Establish a connection.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < P. 1:20001(20000) ack 1 win 257
+ +.04 > . 1:1(0) ack 20001 win 18000
+
+ +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0
+ +0 < P. 20001:80001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 20001 win 18000
+
+ +0 read(4, ..., 20000) = 20000
+// A too big packet is accepted if the receive queue is empty
+ +0 < P. 20001:80001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 80001 win 0
+
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 2e8243a65b50..d6c00efeb664 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -21,6 +21,7 @@ ALL_TESTS="
kci_test_vrf
kci_test_encap
kci_test_macsec
+ kci_test_macsec_vlan
kci_test_ipsec
kci_test_ipsec_offload
kci_test_fdb_get
@@ -30,6 +31,7 @@ ALL_TESTS="
kci_test_address_proto
kci_test_enslave_bonding
kci_test_mngtmpaddr
+ kci_test_operstate
"
devdummy="test-dummy0"
@@ -291,6 +293,17 @@ kci_test_route_get()
end_test "PASS: route get"
}
+check_addr_not_exist()
+{
+ dev=$1
+ addr=$2
+ if ip addr show dev $dev | grep -q $addr; then
+ return 1
+ else
+ return 0
+ fi
+}
+
kci_test_addrlft()
{
for i in $(seq 10 100) ;do
@@ -298,9 +311,8 @@ kci_test_addrlft()
run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
done
- sleep 5
- run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy"
- if [ $? -eq 0 ]; then
+ slowwait 5 check_addr_not_exist "$devdummy" "10.23.11."
+ if [ $? -eq 1 ]; then
check_err 1
end_test "FAIL: preferred_lft addresses remaining"
return
@@ -561,6 +573,41 @@ kci_test_macsec()
end_test "PASS: macsec"
}
+# Test __dev_set_rx_mode call from dev_uc_add under addr_list_lock spinlock.
+# Make sure __dev_set_promiscuity is not grabbing (sleeping) netdev instance
+# lock.
+# https://lore.kernel.org/netdev/[email protected]/
+kci_test_macsec_vlan()
+{
+ msname="test_macsec1"
+ vlanname="test_vlan1"
+ local ret=0
+ run_cmd_grep "^Usage: ip macsec" ip macsec help
+ if [ $? -ne 0 ]; then
+ end_test "SKIP: macsec: iproute2 too old"
+ return $ksft_skip
+ fi
+ run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
+ if [ $ret -ne 0 ];then
+ end_test "FAIL: can't add macsec interface, skipping test"
+ return 1
+ fi
+
+ run_cmd ip link set dev "$msname" up
+ ip link add link "$msname" name "$vlanname" type vlan id 1
+ ip link set dev "$vlanname" address 00:11:22:33:44:88
+ ip link set dev "$vlanname" up
+ run_cmd ip link del dev "$vlanname"
+ run_cmd ip link del dev "$msname"
+
+ if [ $ret -ne 0 ];then
+ end_test "FAIL: macsec_vlan"
+ return 1
+ fi
+
+ end_test "PASS: macsec_vlan"
+}
+
#-------------------------------------------------------------------
# Example commands
# ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
@@ -673,6 +720,11 @@ kci_test_ipsec_offload()
sysfsf=$sysfsd/ipsec
sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
probed=false
+ esp4_offload_probed_default=false
+
+ if lsmod | grep -q esp4_offload; then
+ esp4_offload_probed_default=true
+ fi
if ! mount | grep -q debugfs; then
mount -t debugfs none /sys/kernel/debug/ &> /dev/null
@@ -766,6 +818,7 @@ EOF
fi
# clean up any leftovers
+ ! "$esp4_offload_probed_default" && lsmod | grep -q esp4_offload && rmmod esp4_offload
echo 0 > /sys/bus/netdevsim/del_device
$probed && rmmod netdevsim
@@ -1334,6 +1387,39 @@ kci_test_mngtmpaddr()
return $ret
}
+kci_test_operstate()
+{
+ local ret=0
+
+ # Check that it is possible to set operational state during device
+ # creation and that it is preserved when the administrative state of
+ # the device is toggled.
+ run_cmd ip link add name vx0 up state up type vxlan id 10010 dstport 4789
+ run_cmd_grep "state UP" ip link show dev vx0
+ run_cmd ip link set dev vx0 down
+ run_cmd_grep "state DOWN" ip link show dev vx0
+ run_cmd ip link set dev vx0 up
+ run_cmd_grep "state UP" ip link show dev vx0
+
+ run_cmd ip link del dev vx0
+
+ # Check that it is possible to set the operational state of the device
+ # after creation.
+ run_cmd ip link add name vx0 up type vxlan id 10010 dstport 4789
+ run_cmd_grep "state UNKNOWN" ip link show dev vx0
+ run_cmd ip link set dev vx0 state up
+ run_cmd_grep "state UP" ip link show dev vx0
+
+ run_cmd ip link del dev vx0
+
+ if [ "$ret" -ne 0 ]; then
+ end_test "FAIL: operstate"
+ return 1
+ fi
+
+ end_test "PASS: operstate"
+}
+
kci_test_rtnl()
{
local current_test
diff --git a/tools/testing/selftests/net/rtnetlink_notification.sh b/tools/testing/selftests/net/rtnetlink_notification.sh
new file mode 100755
index 000000000000..3f9780232bd6
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink_notification.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking rtnetlink notification callpaths, and get as much
+# coverage as possible.
+#
+# set -e
+
+ALL_TESTS="
+ kci_test_mcast_addr_notification
+ kci_test_anycast_addr_notification
+"
+
+source lib.sh
+test_dev="test-dummy1"
+
+kci_test_mcast_addr_notification()
+{
+ RET=0
+ local tmpfile
+ local monitor_pid
+ local match_result
+
+ tmpfile=$(mktemp)
+ defer rm "$tmpfile"
+
+ ip monitor maddr > $tmpfile &
+ monitor_pid=$!
+ defer kill_process "$monitor_pid"
+
+ sleep 1
+
+ if [ ! -e "/proc/$monitor_pid" ]; then
+ RET=$ksft_skip
+ log_test "mcast addr notification: iproute2 too old"
+ return $RET
+ fi
+
+ ip link add name "$test_dev" type dummy
+ check_err $? "failed to add dummy interface"
+ ip link set "$test_dev" up
+ check_err $? "failed to set dummy interface up"
+ ip link del dev "$test_dev"
+ check_err $? "Failed to delete dummy interface"
+ sleep 1
+
+ # There should be 4 line matches as follows.
+ # 13: test-dummy1    inet6 mcast ff02::1 scope global 
+ # 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+ # Deleted 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+ # Deleted 13: test-dummy1    inet6 mcast ff02::1 scope global 
+ match_result=$(grep -cE "$test_dev.*(224.0.0.1|ff02::1)" "$tmpfile")
+ if [ "$match_result" -ne 4 ]; then
+ RET=$ksft_fail
+ fi
+ log_test "mcast addr notification: Expected 4 matches, got $match_result"
+ return $RET
+}
+
+kci_test_anycast_addr_notification()
+{
+ RET=0
+ local tmpfile
+ local monitor_pid
+ local match_result
+
+ tmpfile=$(mktemp)
+ defer rm "$tmpfile"
+
+ ip monitor acaddress > "$tmpfile" &
+ monitor_pid=$!
+ defer kill_process "$monitor_pid"
+ sleep 1
+
+ if [ ! -e "/proc/$monitor_pid" ]; then
+ RET=$ksft_skip
+ log_test "anycast addr notification: iproute2 too old"
+ return "$RET"
+ fi
+
+ ip link add name "$test_dev" type dummy
+ check_err $? "failed to add dummy interface"
+ ip link set "$test_dev" up
+ check_err $? "failed to set dummy interface up"
+ sysctl -qw net.ipv6.conf."$test_dev".forwarding=1
+ ip link del dev "$test_dev"
+ check_err $? "Failed to delete dummy interface"
+ sleep 1
+
+ # There should be 2 line matches as follows.
+ # 9: dummy2 inet6 any fe80:: scope global
+ # Deleted 9: dummy2 inet6 any fe80:: scope global
+ match_result=$(grep -cE "$test_dev.*(fe80::)" "$tmpfile")
+ if [ "$match_result" -ne 2 ]; then
+ RET=$ksft_fail
+ fi
+ log_test "anycast addr notification: Expected 2 matches, got $match_result"
+ return "$RET"
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+ RET=$ksft_skip
+ log_test "need root privileges"
+ exit $RET
+fi
+
+require_command ip
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
index ba730655a7bf..4bc135e5c22c 100755
--- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
@@ -594,7 +594,7 @@ setup_rt_local_sids()
dev "${DUMMY_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
index 4b86040c58c6..34b781a2ae74 100755
--- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
@@ -72,6 +72,9 @@
# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
# the selftest network.
#
+# In addition, every router interface connecting rt-x to rt-y is assigned an
+# IPv6 link-local address fe80::x:y/64.
+#
# Local SID/C-SID table
# =====================
#
@@ -521,6 +524,9 @@ setup_rt_networking()
ip -netns "${nsname}" addr \
add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+ ip -netns "${nsname}" addr \
+ add "fe80::${rt}:${neigh}/64" dev "${devname}" nodad
+
ip -netns "${nsname}" link set "${devname}" up
done
@@ -609,6 +615,27 @@ set_end_x_nextcsid()
nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
}
+set_end_x_ll_nextcsid()
+{
+ local rt="$1"
+ local adj="$2"
+
+ eval nsname=\${$(get_rtname "${rt}")}
+ lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
+ nh6_ll_addr="fe80::${adj}:${rt}"
+ oifname="veth-rt-${rt}-${adj}"
+
+ # enabled NEXT-C-SID SRv6 End.X behavior via an IPv6 link-local nexthop
+ # address (note that "dev" is the dummy dum0 device chosen for the sake
+ # of simplicity).
+ ip -netns "${nsname}" -6 route \
+ replace "${lcnode_func_prefix}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End.X nh6 "${nh6_ll_addr}" \
+ oif "${oifname}" flavors next-csid lblen "${LCBLOCK_BLEN}" \
+ nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
+}
+
set_underlay_sids_reachability()
{
local rt="$1"
@@ -654,7 +681,7 @@ setup_rt_local_sids()
set_underlay_sids_reachability "${rt}" "${rt_neighs}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -1016,6 +1043,27 @@ host_vpn_tests()
check_and_log_hs_ipv4_connectivity 1 2
check_and_log_hs_ipv4_connectivity 2 1
+
+ # Setup the adjacencies in the SRv6 aware routers using IPv6 link-local
+ # addresses.
+ # - rt-3 SRv6 End.X adjacency with rt-4
+ # - rt-4 SRv6 End.X adjacency with rt-1
+ set_end_x_ll_nextcsid 3 4
+ set_end_x_ll_nextcsid 4 1
+
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local"
+
+ check_and_log_hs_ipv6_connectivity 1 2
+ check_and_log_hs_ipv6_connectivity 2 1
+
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local"
+
+ check_and_log_hs_ipv4_connectivity 1 2
+ check_and_log_hs_ipv4_connectivity 2 1
+
+ # Restore the previous adjacencies.
+ set_end_x_nextcsid 3 4
+ set_end_x_nextcsid 4 1
}
__nextcsid_end_x_behavior_test()
diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
index 3efce1718c5f..6a68c7eff1dc 100755
--- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
@@ -395,7 +395,7 @@ setup_rt_local_sids()
dev "${VRF_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
index cabc70538ffe..0979b5316fdf 100755
--- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
@@ -343,7 +343,7 @@ setup_rt_local_sids()
encap seg6local action End dev "${DUMMY_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behaviors instaces are grouped together in the 'localsid'
+ # Endpoint behaviors instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule add \
to "${VPN_LOCATOR_SERVICE}::/16" \
diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c
index f00245263b20..6478da6a71c3 100644
--- a/tools/testing/selftests/net/tcp_ao/seq-ext.c
+++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Check that after SEQ number wrap-around:
* 1. SEQ-extension has upper bytes set
- * 2. TCP conneciton is alive and no TCPAOBad segments
+ * 2. TCP connection is alive and no TCPAOBad segments
* In order to test (2), the test doesn't just adjust seq number for a queue
* on a connected socket, but migrates it to another sk+port number, so
* that there won't be any delayed packets that will fail to verify
diff --git a/tools/testing/selftests/net/test_neigh.sh b/tools/testing/selftests/net/test_neigh.sh
new file mode 100755
index 000000000000..388056472b5b
--- /dev/null
+++ b/tools/testing/selftests/net/test_neigh.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+TESTS="
+ extern_valid_ipv4
+ extern_valid_ipv6
+"
+VERBOSE=0
+
+################################################################################
+# Utilities
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ stderr=
+ fi
+
+ out=$(eval "$cmd" "$stderr")
+ rc=$?
+ if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+################################################################################
+# Setup
+
+setup()
+{
+ set -e
+
+ setup_ns ns1 ns2
+
+ ip -n "$ns1" link add veth0 type veth peer name veth1 netns "$ns2"
+ ip -n "$ns1" link set dev veth0 up
+ ip -n "$ns2" link set dev veth1 up
+
+ ip -n "$ns1" address add 192.0.2.1/24 dev veth0
+ ip -n "$ns1" address add 2001:db8:1::1/64 dev veth0 nodad
+ ip -n "$ns2" address add 192.0.2.2/24 dev veth1
+ ip -n "$ns2" address add 2001:db8:1::2/64 dev veth1 nodad
+
+ ip netns exec "$ns1" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec "$ns2" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+
+ sleep 5
+
+ set +e
+}
+
+exit_cleanup_all()
+{
+ cleanup_all_ns
+ exit "${EXIT_STATUS}"
+}
+
+################################################################################
+# Tests
+
+extern_valid_common()
+{
+ local af_str=$1; shift
+ local ip_addr=$1; shift
+ local tbl_name=$1; shift
+ local subnet=$1; shift
+ local mac
+
+ mac=$(ip -n "$ns2" -j link show dev veth1 | jq -r '.[]["address"]')
+
+ RET=0
+
+ # Check that simple addition works.
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "No \"extern_valid\" flag after addition"
+
+ log_test "$af_str \"extern_valid\" flag: Add entry"
+
+ RET=0
+
+ # Check that an entry cannot be added with "extern_valid" flag and an
+ # invalid state.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr nud none dev veth0 extern_valid"
+ check_fail $? "Managed to add an entry with \"extern_valid\" flag and an invalid state"
+
+ log_test "$af_str \"extern_valid\" flag: Add with an invalid state"
+
+ RET=0
+
+ # Check that entry cannot be added with both "extern_valid" flag and
+ # "use" / "managed" flag.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ check_fail $? "Managed to add an entry with \"extern_valid\" flag and \"use\" flag"
+
+ log_test "$af_str \"extern_valid\" flag: Add with \"use\" flag"
+
+ RET=0
+
+ # Check that "extern_valid" flag can be toggled using replace.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Did not manage to set \"extern_valid\" flag with replace"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_fail $? "Did not manage to clear \"extern_valid\" flag with replace"
+
+ log_test "$af_str \"extern_valid\" flag: Replace entry"
+
+ RET=0
+
+ # Check that an existing "extern_valid" entry can be marked as
+ # "managed".
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid managed"
+ check_err $? "Did not manage to add \"managed\" flag to an existing \"extern_valid\" entry"
+
+ log_test "$af_str \"extern_valid\" flag: Replace entry with \"managed\" flag"
+
+ RET=0
+
+ # Check that entry cannot be replaced with "extern_valid" flag and an
+ # invalid state.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr nud none dev veth0 extern_valid"
+ check_fail $? "Managed to replace an entry with \"extern_valid\" flag and an invalid state"
+
+ log_test "$af_str \"extern_valid\" flag: Replace with an invalid state"
+
+ RET=0
+
+ # Check that an "extern_valid" entry is flushed when the interface is
+ # put administratively down.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 link set dev veth0 down"
+ run_cmd "ip -n $ns1 link set dev veth0 up"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0"
+ check_fail $? "\"extern_valid\" entry not flushed upon interface down"
+
+ log_test "$af_str \"extern_valid\" flag: Interface down"
+
+ RET=0
+
+ # Check that an "extern_valid" entry is not flushed when the interface
+ # loses its carrier.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns2 link set dev veth1 down"
+ run_cmd "ip -n $ns2 link set dev veth1 up"
+ run_cmd "sleep 2"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0"
+ check_err $? "\"extern_valid\" entry flushed upon carrier down"
+
+ log_test "$af_str \"extern_valid\" flag: Carrier down"
+
+ RET=0
+
+ # Check that when entry transitions to "reachable" state it maintains
+ # the "extern_valid" flag. Wait "delay_probe" seconds for ARP request /
+ # NS to be sent.
+ local delay_probe
+
+ delay_probe=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["delay_probe"]')
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ run_cmd "sleep $((delay_probe / 1000 + 2))"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"REACHABLE\""
+ check_err $? "Entry did not transition to \"reachable\" state"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry did not maintain \"extern_valid\" flag after transition to \"reachable\" state"
+
+ log_test "$af_str \"extern_valid\" flag: Transition to \"reachable\" state"
+
+ RET=0
+
+ # Drop all packets, trigger resolution and check that entry goes back
+ # to "stale" state instead of "failed".
+ local mcast_reprobes
+ local retrans_time
+ local ucast_probes
+ local app_probes
+ local probes
+ local delay
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "tc -n $ns2 qdisc add dev veth1 clsact"
+ run_cmd "tc -n $ns2 filter add dev veth1 ingress proto all matchall action drop"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ retrans_time=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["retrans"]')
+ ucast_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["ucast_probes"]')
+ app_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["app_probes"]')
+ mcast_reprobes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["mcast_reprobes"]')
+ delay=$((delay_probe + (ucast_probes + app_probes + mcast_reprobes) * retrans_time))
+ run_cmd "sleep $((delay / 1000 + 2))"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"STALE\""
+ check_err $? "Entry did not return to \"stale\" state"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry did not maintain \"extern_valid\" flag after returning to \"stale\" state"
+ probes=$(ip -n "$ns1" -j -s neigh get "$ip_addr" dev veth0 | jq '.[]["probes"]')
+ if [[ $probes -eq 0 ]]; then
+ check_err 1 "No probes were sent"
+ fi
+
+ log_test "$af_str \"extern_valid\" flag: Transition back to \"stale\" state"
+
+ run_cmd "tc -n $ns2 qdisc del dev veth1 clsact"
+
+ RET=0
+
+ # Forced garbage collection runs whenever the number of entries is
+ # larger than "thresh3" and deletes stale entries that have not been
+ # updated in the last 5 seconds.
+ #
+ # Check that an "extern_valid" entry survives a forced garbage
+ # collection. Add an entry, wait 5 seconds and add more entries than
+ # "thresh3" so that forced garbage collection will run.
+ #
+ # Note that the garbage collection thresholds are global resources and
+ # that changes in the initial namespace affect all the namespaces.
+ local forced_gc_runs_t0
+ local forced_gc_runs_t1
+ local orig_thresh1
+ local orig_thresh2
+ local orig_thresh3
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]')
+ orig_thresh2=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh2")) | .["thresh2"]')
+ orig_thresh3=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh3")) | .["thresh3"]')
+ run_cmd "ip ntable change name $tbl_name thresh3 10 thresh2 9 thresh1 8"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0"
+ run_cmd "sleep 5"
+ forced_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]')
+ for i in {1..20}; do
+ run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0"
+ done
+ forced_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]')
+ if [[ $forced_gc_runs_t1 -eq $forced_gc_runs_t0 ]]; then
+ check_err 1 "Forced garbage collection did not run"
+ fi
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry with \"extern_valid\" flag did not survive forced garbage collection"
+ run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0"
+ check_fail $? "Entry without \"extern_valid\" flag survived forced garbage collection"
+
+ log_test "$af_str \"extern_valid\" flag: Forced garbage collection"
+
+ run_cmd "ip ntable change name $tbl_name thresh3 $orig_thresh3 thresh2 $orig_thresh2 thresh1 $orig_thresh1"
+
+ RET=0
+
+ # Periodic garbage collection runs every "base_reachable"/2 seconds and
+ # if the number of entries is larger than "thresh1", then it deletes
+ # stale entries that have not been used in the last "gc_stale" seconds.
+ #
+ # Check that an "extern_valid" entry survives a periodic garbage
+ # collection. Add an "extern_valid" entry, add more than "thresh1"
+ # regular entries, wait "base_reachable" (longer than "gc_stale")
+ # seconds and check that the "extern_valid" entry was not deleted.
+ #
+ # Note that the garbage collection thresholds and "base_reachable" are
+ # global resources and that changes in the initial namespace affect all
+ # the namespaces.
+ local periodic_gc_runs_t0
+ local periodic_gc_runs_t1
+ local orig_base_reachable
+ local orig_gc_stale
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]')
+ orig_base_reachable=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["base_reachable"]')
+ run_cmd "ip ntable change name $tbl_name thresh1 10 base_reachable 10000"
+ orig_gc_stale=$(ip -n "$ns1" -j ntable show name "$tbl_name" dev veth0 | jq '.[]["gc_stale"]')
+ run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 5000"
+ # Wait orig_base_reachable/2 for the new interval to take effect.
+ run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0"
+ for i in {1..20}; do
+ run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0"
+ done
+ periodic_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]')
+ run_cmd "sleep 10"
+ periodic_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]')
+ [[ $periodic_gc_runs_t1 -ne $periodic_gc_runs_t0 ]]
+ check_err $? "Periodic garbage collection did not run"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry with \"extern_valid\" flag did not survive periodic garbage collection"
+ run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0"
+ check_fail $? "Entry without \"extern_valid\" flag survived periodic garbage collection"
+
+ log_test "$af_str \"extern_valid\" flag: Periodic garbage collection"
+
+ run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale $orig_gc_stale"
+ run_cmd "ip ntable change name $tbl_name thresh1 $orig_thresh1 base_reachable $orig_base_reachable"
+}
+
+extern_valid_ipv4()
+{
+ extern_valid_common "IPv4" 192.0.2.2 "arp_cache" 192.0.2.
+}
+
+extern_valid_ipv6()
+{
+ extern_valid_common "IPv6" 2001:db8:1::2 "ndisc_cache" 2001:db8:1::
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+while getopts ":t:pvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$((VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+require_command jq
+
+if ! ip neigh help 2>&1 | grep -q "extern_valid"; then
+ echo "SKIP: iproute2 ip too old, missing \"extern_valid\" support"
+ exit "$ksft_skip"
+fi
+
+trap exit_cleanup_all EXIT
+
+for t in $TESTS
+do
+ setup; $t; cleanup_all_ns;
+done
diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
index 6127a78ee988..8deacc565afa 100755
--- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
+++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
@@ -146,18 +146,17 @@ run_cmd()
}
check_hv_connectivity() {
- ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
- sleep 1
- ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
+ slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
+ slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
return $?
}
check_vm_connectivity() {
- run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
+ slowwait 5 run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)"
- run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
+ slowwait 5 run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)"
}
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
index e9c2f71da207..ce34cb2e6e0b 100755
--- a/tools/testing/selftests/net/vrf_route_leaking.sh
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -275,7 +275,7 @@ setup_sym()
# Wait for ip config to settle
- sleep 2
+ slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
}
setup_asym()
@@ -370,7 +370,7 @@ setup_asym()
ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
# Wait for ip config to settle
- sleep 2
+ slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
}
check_connectivity()
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 94176ffe4646..40f5c2908dda 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -1,341 +1,26 @@
# SPDX-License-Identifier: GPL-2.0
-# Makefile for nolibc tests
-# we're in ".../tools/testing/selftests/nolibc"
-ifeq ($(srctree),)
-srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR)))
-endif
-
-include $(srctree)/tools/scripts/utilities.mak
-# We need this for the "__cc-option" macro.
-include $(srctree)/scripts/Makefile.compiler
-
-ifneq ($(O),)
-ifneq ($(call is-absolute,$(O)),y)
-$(error Only absolute O= parameters are supported)
-endif
-objtree := $(O)
-else
-objtree ?= $(srctree)
-endif
-
-ifeq ($(ARCH),)
-include $(srctree)/scripts/subarch.include
-ARCH = $(SUBARCH)
-endif
-
-cc-option = $(call __cc-option, $(CC),$(CLANG_CROSS_FLAGS),$(1),$(2))
-
-# XARCH extends the kernel's ARCH with a few variants of the same
-# architecture that only differ by the configuration, the toolchain
-# and the Qemu program used. It is copied as-is into ARCH except for
-# a few specific values which are mapped like this:
-#
-# XARCH | ARCH | config
-# -------------|-----------|-------------------------
-# ppc | powerpc | 32 bits
-# ppc64 | powerpc | 64 bits big endian
-# ppc64le | powerpc | 64 bits little endian
-#
-# It is recommended to only use XARCH, though it does not harm if
-# ARCH is already set. For simplicity, ARCH is sufficient for all
-# architectures where both are equal.
-
-# configure default variants for target kernel supported architectures
-XARCH_powerpc = ppc
-XARCH_mips = mips32le
-XARCH_riscv = riscv64
-XARCH = $(or $(XARCH_$(ARCH)),$(ARCH))
-
-# map from user input variants to their kernel supported architectures
-ARCH_armthumb = arm
-ARCH_ppc = powerpc
-ARCH_ppc64 = powerpc
-ARCH_ppc64le = powerpc
-ARCH_mips32le = mips
-ARCH_mips32be = mips
-ARCH_riscv32 = riscv
-ARCH_riscv64 = riscv
-ARCH_s390x = s390
-ARCH_sparc32 = sparc
-ARCH_sparc64 = sparc
-ARCH := $(or $(ARCH_$(XARCH)),$(XARCH))
-# kernel image names by architecture
-IMAGE_i386 = arch/x86/boot/bzImage
-IMAGE_x86_64 = arch/x86/boot/bzImage
-IMAGE_x86 = arch/x86/boot/bzImage
-IMAGE_arm64 = arch/arm64/boot/Image
-IMAGE_arm = arch/arm/boot/zImage
-IMAGE_armthumb = arch/arm/boot/zImage
-IMAGE_mips32le = vmlinuz
-IMAGE_mips32be = vmlinuz
-IMAGE_ppc = vmlinux
-IMAGE_ppc64 = vmlinux
-IMAGE_ppc64le = arch/powerpc/boot/zImage
-IMAGE_riscv = arch/riscv/boot/Image
-IMAGE_riscv32 = arch/riscv/boot/Image
-IMAGE_riscv64 = arch/riscv/boot/Image
-IMAGE_s390x = arch/s390/boot/bzImage
-IMAGE_s390 = arch/s390/boot/bzImage
-IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi
-IMAGE_sparc32 = arch/sparc/boot/image
-IMAGE_sparc64 = arch/sparc/boot/image
-IMAGE_m68k = vmlinux
-IMAGE = $(objtree)/$(IMAGE_$(XARCH))
-IMAGE_NAME = $(notdir $(IMAGE))
+TEST_GEN_PROGS := nolibc-test
-# default kernel configurations that appear to be usable
-DEFCONFIG_i386 = defconfig
-DEFCONFIG_x86_64 = defconfig
-DEFCONFIG_x86 = defconfig
-DEFCONFIG_arm64 = defconfig
-DEFCONFIG_arm = multi_v7_defconfig
-DEFCONFIG_armthumb = multi_v7_defconfig
-DEFCONFIG_mips32le = malta_defconfig
-DEFCONFIG_mips32be = malta_defconfig generic/eb.config
-DEFCONFIG_ppc = pmac32_defconfig
-DEFCONFIG_ppc64 = powernv_be_defconfig
-DEFCONFIG_ppc64le = powernv_defconfig
-DEFCONFIG_riscv = defconfig
-DEFCONFIG_riscv32 = rv32_defconfig
-DEFCONFIG_riscv64 = defconfig
-DEFCONFIG_s390x = defconfig
-DEFCONFIG_s390 = defconfig compat.config
-DEFCONFIG_loongarch = defconfig
-DEFCONFIG_sparc32 = sparc32_defconfig
-DEFCONFIG_sparc64 = sparc64_defconfig
-DEFCONFIG_m68k = virt_defconfig
-DEFCONFIG = $(DEFCONFIG_$(XARCH))
+include ../lib.mk
+include $(top_srcdir)/scripts/Makefile.compiler
-EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD
-EXTRACONFIG = $(EXTRACONFIG_$(XARCH))
-EXTRACONFIG_arm = -e CONFIG_NAMESPACES
-EXTRACONFIG_armthumb = -e CONFIG_NAMESPACES
-
-# optional tests to run (default = all)
-TEST =
-
-# QEMU_ARCH: arch names used by qemu
-QEMU_ARCH_i386 = i386
-QEMU_ARCH_x86_64 = x86_64
-QEMU_ARCH_x86 = x86_64
-QEMU_ARCH_arm64 = aarch64
-QEMU_ARCH_arm = arm
-QEMU_ARCH_armthumb = arm
-QEMU_ARCH_mips32le = mipsel # works with malta_defconfig
-QEMU_ARCH_mips32be = mips
-QEMU_ARCH_ppc = ppc
-QEMU_ARCH_ppc64 = ppc64
-QEMU_ARCH_ppc64le = ppc64
-QEMU_ARCH_riscv = riscv64
-QEMU_ARCH_riscv32 = riscv32
-QEMU_ARCH_riscv64 = riscv64
-QEMU_ARCH_s390x = s390x
-QEMU_ARCH_s390 = s390x
-QEMU_ARCH_loongarch = loongarch64
-QEMU_ARCH_sparc32 = sparc
-QEMU_ARCH_sparc64 = sparc64
-QEMU_ARCH_m68k = m68k
-QEMU_ARCH = $(QEMU_ARCH_$(XARCH))
-
-QEMU_ARCH_USER_ppc64le = ppc64le
-QEMU_ARCH_USER = $(or $(QEMU_ARCH_USER_$(XARCH)),$(QEMU_ARCH_$(XARCH)))
-
-QEMU_BIOS_DIR = /usr/share/edk2/
-QEMU_BIOS_loongarch = $(QEMU_BIOS_DIR)/loongarch64/OVMF_CODE.fd
-
-ifneq ($(QEMU_BIOS_$(XARCH)),)
-QEMU_ARGS_BIOS = -bios $(QEMU_BIOS_$(XARCH))
-endif
-
-# QEMU_ARGS : some arch-specific args to pass to qemu
-QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_armthumb = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_riscv32 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_m68k = -M virt -append "console=ttyGF0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA)
-
-# OUTPUT is only set when run from the main makefile, otherwise
-# it defaults to this nolibc directory.
-OUTPUT ?= $(CURDIR)/
-
-ifeq ($(V),1)
-Q=
-else
-Q=@
-endif
+cc-option = $(call __cc-option, $(CC),,$(1),$(2))
-CFLAGS_i386 = $(call cc-option,-m32)
-CFLAGS_arm = -marm
-CFLAGS_armthumb = -mthumb -march=armv6t2
-CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
-CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
-CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2)
-CFLAGS_s390x = -m64
-CFLAGS_s390 = -m31
-CFLAGS_mips32le = -EL -mabi=32 -fPIC
-CFLAGS_mips32be = -EB -mabi=32
-CFLAGS_sparc32 = $(call cc-option,-m32)
-ifeq ($(origin XARCH),command line)
-CFLAGS_XARCH = $(CFLAGS_$(XARCH))
-endif
-CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all))
-CFLAGS_SANITIZER ?= $(call cc-option,-fsanitize=undefined -fsanitize-trap=all)
-CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \
- $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \
- $(CFLAGS_XARCH) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_SANITIZER) $(CFLAGS_EXTRA)
-LDFLAGS :=
+include Makefile.include
-LIBGCC := -lgcc
+CFLAGS = -nostdlib -nostdinc -static \
+ -isystem $(top_srcdir)/tools/include/nolibc -isystem $(top_srcdir)/usr/include \
+ $(CFLAGS_NOLIBC_TEST)
-ifneq ($(LLVM),)
-# Not needed for clang
-LIBGCC :=
+ifeq ($(LLVM),)
+LDLIBS := -lgcc
endif
-# Modify CFLAGS based on LLVM=
-include $(srctree)/tools/scripts/Makefile.include
-
-# GCC uses "s390", clang "systemz"
-CLANG_CROSS_FLAGS := $(subst --target=s390-linux,--target=systemz-linux,$(CLANG_CROSS_FLAGS))
-
-REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \
- END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \
- if (f || !p) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \
- printf("\nSee all results in %s\n", ARGV[1]); }'
+$(OUTPUT)/nolibc-test: nolibc-test.c nolibc-test-linkage.c | headers
help:
- @echo "Supported targets under selftests/nolibc:"
- @echo " all call the \"run\" target below"
- @echo " help this help"
- @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)"
- @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)"
- @echo " libc-test build an executable using the compiler's default libc instead"
- @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)"
- @echo " initramfs.cpio prepare the initramfs archive with nolibc-test"
- @echo " initramfs prepare the initramfs tree with nolibc-test"
- @echo " defconfig create a fresh new default config (uses \$$XARCH)"
- @echo " kernel (re)build the kernel (uses \$$XARCH)"
- @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH)"
- @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)"
- @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)"
- @echo " clean clean the sysroot, initramfs, build and output files"
- @echo ""
- @echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST."
- @echo ""
- @echo "Currently using the following variables:"
- @echo " ARCH = $(ARCH)"
- @echo " XARCH = $(XARCH)"
- @echo " CROSS_COMPILE = $(CROSS_COMPILE)"
- @echo " CC = $(CC)"
- @echo " OUTPUT = $(OUTPUT)"
- @echo " TEST = $(TEST)"
- @echo " QEMU_ARCH = $(if $(QEMU_ARCH),$(QEMU_ARCH),UNKNOWN_ARCH) [determined from \$$XARCH]"
- @echo " IMAGE_NAME = $(if $(IMAGE_NAME),$(IMAGE_NAME),UNKNOWN_ARCH) [determined from \$$XARCH]"
- @echo ""
-
-all: run
-
-sysroot: sysroot/$(ARCH)/include
-
-sysroot/$(ARCH)/include:
- $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot
- $(QUIET_MKDIR)mkdir -p sysroot
- $(Q)$(MAKE) -C $(srctree) outputmakefile
- $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone headers_check
- $(Q)mv sysroot/sysroot sysroot/$(ARCH)
-
-ifneq ($(NOLIBC_SYSROOT),0)
-nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include
- $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
- -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c $(LIBGCC)
-else
-nolibc-test: nolibc-test.c nolibc-test-linkage.c
- $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
- -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c $(LIBGCC)
-endif
-
-libc-test: nolibc-test.c nolibc-test-linkage.c
- $(QUIET_CC)$(HOSTCC) -o $@ nolibc-test.c nolibc-test-linkage.c
-
-# local libc-test
-run-libc-test: libc-test
- $(Q)./libc-test > "$(CURDIR)/run.out" || :
- $(Q)$(REPORT) $(CURDIR)/run.out
-
-# local nolibc-test
-run-nolibc-test: nolibc-test
- $(Q)./nolibc-test > "$(CURDIR)/run.out" || :
- $(Q)$(REPORT) $(CURDIR)/run.out
-
-# qemu user-land test
-run-user: nolibc-test
- $(Q)qemu-$(QEMU_ARCH_USER) ./nolibc-test > "$(CURDIR)/run.out" || :
- $(Q)$(REPORT) $(CURDIR)/run.out
-
-initramfs.cpio: kernel nolibc-test
- $(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(objtree)/usr/gen_init_cpio - > initramfs.cpio
-
-initramfs: nolibc-test
- $(QUIET_MKDIR)mkdir -p initramfs
- $(call QUIET_INSTALL, initramfs/init)
- $(Q)cp nolibc-test initramfs/init
-
-defconfig:
- $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(DEFCONFIG)
- $(Q)if [ -n "$(EXTRACONFIG)" ]; then \
- $(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \
- $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \
- fi
-
-kernel: | defconfig
- $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null
-
-kernel-standalone: initramfs | defconfig
- $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null
-
-# run the tests after building the kernel
-run: kernel initramfs.cpio
- $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
- $(Q)$(REPORT) $(CURDIR)/run.out
-
-# re-run the tests from an existing kernel
-rerun:
- $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
- $(Q)$(REPORT) $(CURDIR)/run.out
-
-# report with existing test log
-report:
- $(Q)$(REPORT) $(CURDIR)/run.out
-
-clean:
- $(call QUIET_CLEAN, sysroot)
- $(Q)rm -rf sysroot
- $(call QUIET_CLEAN, nolibc-test)
- $(Q)rm -f nolibc-test
- $(call QUIET_CLEAN, libc-test)
- $(Q)rm -f libc-test
- $(call QUIET_CLEAN, initramfs.cpio)
- $(Q)rm -rf initramfs.cpio
- $(call QUIET_CLEAN, initramfs)
- $(Q)rm -rf initramfs
- $(call QUIET_CLEAN, run.out)
- $(Q)rm -rf run.out
+ @echo "For the custom nolibc testsuite use '$(MAKE) -f Makefile.nolibc'; available targets:"
+ @$(MAKE) -f Makefile.nolibc help
-.PHONY: sysroot/$(ARCH)/include
+.PHONY: help
diff --git a/tools/testing/selftests/nolibc/Makefile.include b/tools/testing/selftests/nolibc/Makefile.include
new file mode 100644
index 000000000000..66287fafbbe0
--- /dev/null
+++ b/tools/testing/selftests/nolibc/Makefile.include
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+__CFLAGS_STACKPROTECTOR = $(call cc-option,-fstack-protector-all) $(call cc-option,-mstack-protector-guard=global)
+_CFLAGS_STACKPROTECTOR ?= $(call try-run, \
+ echo 'void foo(void) {}' | $(CC) -x c - -o - -S $(CLANG_CROSS_FLAGS) $(__CFLAGS_STACKPROTECTOR) | grep -q __stack_chk_guard, \
+ $(__CFLAGS_STACKPROTECTOR))
+_CFLAGS_SANITIZER ?= $(call cc-option,-fsanitize=undefined -fsanitize-trap=all)
+CFLAGS_NOLIBC_TEST ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \
+ $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \
+ $(_CFLAGS_STACKPROTECTOR) $(_CFLAGS_SANITIZER)
diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc
new file mode 100644
index 000000000000..0fb759ba992e
--- /dev/null
+++ b/tools/testing/selftests/nolibc/Makefile.nolibc
@@ -0,0 +1,383 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for nolibc tests
+# we're in ".../tools/testing/selftests/nolibc"
+ifeq ($(srctree),)
+srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR)))
+endif
+
+include $(srctree)/tools/scripts/utilities.mak
+# We need this for the "__cc-option" macro.
+include $(srctree)/scripts/Makefile.compiler
+
+ifneq ($(O),)
+ifneq ($(call is-absolute,$(O)),y)
+$(error Only absolute O= parameters are supported)
+endif
+objtree := $(O)
+else
+objtree ?= $(srctree)
+endif
+
+ifeq ($(ARCH),)
+include $(srctree)/scripts/subarch.include
+ARCH = $(SUBARCH)
+endif
+
+cc-option = $(call __cc-option, $(CC),$(CLANG_CROSS_FLAGS),$(1),$(2))
+
+# XARCH extends the kernel's ARCH with a few variants of the same
+# architecture that only differ by the configuration, the toolchain
+# and the Qemu program used. It is copied as-is into ARCH except for
+# a few specific values which are mapped like this:
+#
+# XARCH | ARCH | config
+# -------------|-----------|-------------------------
+# ppc | powerpc | 32 bits
+# ppc64 | powerpc | 64 bits big endian
+# ppc64le | powerpc | 64 bits little endian
+#
+# It is recommended to only use XARCH, though it does not harm if
+# ARCH is already set. For simplicity, ARCH is sufficient for all
+# architectures where both are equal.
+
+# configure default variants for target kernel supported architectures
+XARCH_powerpc = ppc
+XARCH_mips = mips32le
+XARCH_riscv = riscv64
+XARCH = $(or $(XARCH_$(ARCH)),$(ARCH))
+
+# map from user input variants to their kernel supported architectures
+ARCH_x32 = x86
+ARCH_armthumb = arm
+ARCH_ppc = powerpc
+ARCH_ppc64 = powerpc
+ARCH_ppc64le = powerpc
+ARCH_mips32le = mips
+ARCH_mips32be = mips
+ARCH_mipsn32le = mips
+ARCH_mipsn32be = mips
+ARCH_mips64le = mips
+ARCH_mips64be = mips
+ARCH_riscv32 = riscv
+ARCH_riscv64 = riscv
+ARCH_s390x = s390
+ARCH_sparc32 = sparc
+ARCH_sparc64 = sparc
+ARCH_sh4 = sh
+ARCH := $(or $(ARCH_$(XARCH)),$(XARCH))
+
+# kernel image names by architecture
+IMAGE_i386 = arch/x86/boot/bzImage
+IMAGE_x86_64 = arch/x86/boot/bzImage
+IMAGE_x32 = arch/x86/boot/bzImage
+IMAGE_x86 = arch/x86/boot/bzImage
+IMAGE_arm64 = arch/arm64/boot/Image
+IMAGE_arm = arch/arm/boot/zImage
+IMAGE_armthumb = arch/arm/boot/zImage
+IMAGE_mips32le = vmlinuz
+IMAGE_mips32be = vmlinuz
+IMAGE_mipsn32le = vmlinuz
+IMAGE_mipsn32be = vmlinuz
+IMAGE_mips64le = vmlinuz
+IMAGE_mips64be = vmlinuz
+IMAGE_ppc = vmlinux
+IMAGE_ppc64 = vmlinux
+IMAGE_ppc64le = arch/powerpc/boot/zImage
+IMAGE_riscv = arch/riscv/boot/Image
+IMAGE_riscv32 = arch/riscv/boot/Image
+IMAGE_riscv64 = arch/riscv/boot/Image
+IMAGE_s390x = arch/s390/boot/bzImage
+IMAGE_s390 = arch/s390/boot/bzImage
+IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi
+IMAGE_sparc32 = arch/sparc/boot/image
+IMAGE_sparc64 = arch/sparc/boot/image
+IMAGE_m68k = vmlinux
+IMAGE_sh4 = arch/sh/boot/zImage
+IMAGE = $(objtree)/$(IMAGE_$(XARCH))
+IMAGE_NAME = $(notdir $(IMAGE))
+
+# default kernel configurations that appear to be usable
+DEFCONFIG_i386 = defconfig
+DEFCONFIG_x86_64 = defconfig
+DEFCONFIG_x32 = defconfig
+DEFCONFIG_x86 = defconfig
+DEFCONFIG_arm64 = defconfig
+DEFCONFIG_arm = multi_v7_defconfig
+DEFCONFIG_armthumb = multi_v7_defconfig
+DEFCONFIG_mips32le = malta_defconfig
+DEFCONFIG_mips32be = malta_defconfig generic/eb.config
+DEFCONFIG_mipsn32le = malta_defconfig generic/64r2.config
+DEFCONFIG_mipsn32be = malta_defconfig generic/64r6.config generic/eb.config
+DEFCONFIG_mips64le = malta_defconfig generic/64r6.config
+DEFCONFIG_mips64be = malta_defconfig generic/64r2.config generic/eb.config
+DEFCONFIG_ppc = pmac32_defconfig
+DEFCONFIG_ppc64 = powernv_be_defconfig
+DEFCONFIG_ppc64le = powernv_defconfig
+DEFCONFIG_riscv = defconfig
+DEFCONFIG_riscv32 = rv32_defconfig
+DEFCONFIG_riscv64 = defconfig
+DEFCONFIG_s390x = defconfig
+DEFCONFIG_s390 = defconfig compat.config
+DEFCONFIG_loongarch = defconfig
+DEFCONFIG_sparc32 = sparc32_defconfig
+DEFCONFIG_sparc64 = sparc64_defconfig
+DEFCONFIG_m68k = virt_defconfig
+DEFCONFIG_sh4 = rts7751r2dplus_defconfig
+DEFCONFIG = $(DEFCONFIG_$(XARCH))
+
+EXTRACONFIG_x32 = -e CONFIG_X86_X32_ABI
+EXTRACONFIG_arm = -e CONFIG_NAMESPACES
+EXTRACONFIG_armthumb = -e CONFIG_NAMESPACES
+EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD
+EXTRACONFIG_sh4 = -e CONFIG_BLK_DEV_INITRD -e CONFIG_CMDLINE_FROM_BOOTLOADER
+EXTRACONFIG = $(EXTRACONFIG_$(XARCH))
+
+# optional tests to run (default = all)
+TEST =
+
+# QEMU_ARCH: arch names used by qemu
+QEMU_ARCH_i386 = i386
+QEMU_ARCH_x86_64 = x86_64
+QEMU_ARCH_x32 = x86_64
+QEMU_ARCH_x86 = x86_64
+QEMU_ARCH_arm64 = aarch64
+QEMU_ARCH_arm = arm
+QEMU_ARCH_armthumb = arm
+QEMU_ARCH_mips32le = mipsel # works with malta_defconfig
+QEMU_ARCH_mips32be = mips
+QEMU_ARCH_mipsn32le = mips64el
+QEMU_ARCH_mipsn32be = mips64
+QEMU_ARCH_mips64le = mips64el
+QEMU_ARCH_mips64be = mips64
+QEMU_ARCH_ppc = ppc
+QEMU_ARCH_ppc64 = ppc64
+QEMU_ARCH_ppc64le = ppc64
+QEMU_ARCH_riscv = riscv64
+QEMU_ARCH_riscv32 = riscv32
+QEMU_ARCH_riscv64 = riscv64
+QEMU_ARCH_s390x = s390x
+QEMU_ARCH_s390 = s390x
+QEMU_ARCH_loongarch = loongarch64
+QEMU_ARCH_sparc32 = sparc
+QEMU_ARCH_sparc64 = sparc64
+QEMU_ARCH_m68k = m68k
+QEMU_ARCH_sh4 = sh4
+QEMU_ARCH = $(QEMU_ARCH_$(XARCH))
+
+QEMU_ARCH_USER_ppc64le = ppc64le
+QEMU_ARCH_USER_mipsn32le = mipsn32el
+QEMU_ARCH_USER_mipsn32be = mipsn32
+QEMU_ARCH_USER = $(or $(QEMU_ARCH_USER_$(XARCH)),$(QEMU_ARCH_$(XARCH)))
+
+QEMU_BIOS_DIR = /usr/share/edk2/
+QEMU_BIOS_loongarch = $(QEMU_BIOS_DIR)/loongarch64/OVMF_CODE.fd
+
+ifneq ($(QEMU_BIOS_$(XARCH)),)
+QEMU_ARGS_BIOS = -bios $(QEMU_BIOS_$(XARCH))
+endif
+
+# QEMU_ARGS : some arch-specific args to pass to qemu
+QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_x32 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_armthumb = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_mipsn32le = -M malta -cpu 5KEc -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_mipsn32be = -M malta -cpu I6400 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_mips64le = -M malta -cpu I6400 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_mips64be = -M malta -cpu 5KEc -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_riscv32 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_m68k = -M virt -append "console=ttyGF0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_sh4 = -M r2d -serial file:/dev/stdout -append "console=ttySC1,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA)
+
+# OUTPUT is only set when run from the main makefile, otherwise
+# it defaults to this nolibc directory.
+OUTPUT ?= $(CURDIR)/
+
+ifeq ($(V),1)
+Q=
+else
+Q=@
+endif
+
+CFLAGS_i386 = $(call cc-option,-m32)
+CFLAGS_x32 = -mx32
+CFLAGS_arm = -marm
+CFLAGS_armthumb = -mthumb -march=armv6t2
+CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
+CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
+CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2)
+CFLAGS_s390x = -m64
+CFLAGS_s390 = -m31
+CFLAGS_mips32le = -EL -mabi=32 -fPIC
+CFLAGS_mips32be = -EB -mabi=32
+CFLAGS_mipsn32le = -EL -mabi=n32 -fPIC -march=mips64r2
+CFLAGS_mipsn32be = -EB -mabi=n32 -march=mips64r6
+CFLAGS_mips64le = -EL -mabi=64 -march=mips64r6
+CFLAGS_mips64be = -EB -mabi=64 -march=mips64r2
+CFLAGS_sparc32 = $(call cc-option,-m32)
+CFLAGS_sh4 = -ml -m4
+ifeq ($(origin XARCH),command line)
+CFLAGS_XARCH = $(CFLAGS_$(XARCH))
+endif
+
+include Makefile.include
+
+CFLAGS ?= $(CFLAGS_NOLIBC_TEST) $(CFLAGS_XARCH) $(CFLAGS_EXTRA)
+LDFLAGS :=
+
+LIBGCC := -lgcc
+
+ifeq ($(ARCH),x86)
+# Not needed on x86, probably not present for x32
+LIBGCC :=
+endif
+
+ifneq ($(LLVM),)
+# Not needed for clang
+LIBGCC :=
+endif
+
+# Modify CFLAGS based on LLVM=
+include $(srctree)/tools/scripts/Makefile.include
+
+REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \
+ /^Total number of errors:/{done++} \
+ END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \
+ if (f || !p || !done) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \
+ printf("\nSee all results in %s\n", ARGV[1]); }'
+
+help:
+ @echo "Supported targets under selftests/nolibc:"
+ @echo " all call the \"run\" target below"
+ @echo " help this help"
+ @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)"
+ @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)"
+ @echo " libc-test build an executable using the compiler's default libc instead"
+ @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)"
+ @echo " initramfs.cpio prepare the initramfs archive with nolibc-test"
+ @echo " initramfs prepare the initramfs tree with nolibc-test"
+ @echo " defconfig create a fresh new default config (uses \$$XARCH)"
+ @echo " kernel (re)build the kernel (uses \$$XARCH)"
+ @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH)"
+ @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)"
+ @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)"
+ @echo " clean clean the sysroot, initramfs, build and output files"
+ @echo ""
+ @echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST."
+ @echo ""
+ @echo "Currently using the following variables:"
+ @echo " ARCH = $(ARCH)"
+ @echo " XARCH = $(XARCH)"
+ @echo " CROSS_COMPILE = $(CROSS_COMPILE)"
+ @echo " CC = $(CC)"
+ @echo " OUTPUT = $(OUTPUT)"
+ @echo " TEST = $(TEST)"
+ @echo " QEMU_ARCH = $(if $(QEMU_ARCH),$(QEMU_ARCH),UNKNOWN_ARCH) [determined from \$$XARCH]"
+ @echo " IMAGE_NAME = $(if $(IMAGE_NAME),$(IMAGE_NAME),UNKNOWN_ARCH) [determined from \$$XARCH]"
+ @echo ""
+
+all: run
+
+sysroot: sysroot/$(ARCH)/include
+
+sysroot/$(ARCH)/include:
+ $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot
+ $(QUIET_MKDIR)mkdir -p sysroot
+ $(Q)$(MAKE) -C $(srctree) outputmakefile
+ $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone headers_check
+ $(Q)mv sysroot/sysroot sysroot/$(ARCH)
+
+ifneq ($(NOLIBC_SYSROOT),0)
+nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include
+ $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
+ -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c $(LIBGCC)
+else
+nolibc-test: nolibc-test.c nolibc-test-linkage.c
+ $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
+ -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c $(LIBGCC)
+endif
+
+libc-test: nolibc-test.c nolibc-test-linkage.c
+ $(QUIET_CC)$(HOSTCC) -o $@ nolibc-test.c nolibc-test-linkage.c
+
+# local libc-test
+run-libc-test: libc-test
+ $(Q)./libc-test > "$(CURDIR)/run.out" || :
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+# local nolibc-test
+run-nolibc-test: nolibc-test
+ $(Q)./nolibc-test > "$(CURDIR)/run.out" || :
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+# qemu user-land test
+run-user: nolibc-test
+ $(Q)qemu-$(QEMU_ARCH_USER) ./nolibc-test > "$(CURDIR)/run.out" || :
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+initramfs.cpio: kernel nolibc-test
+ $(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(objtree)/usr/gen_init_cpio - > initramfs.cpio
+
+initramfs: nolibc-test
+ $(QUIET_MKDIR)mkdir -p initramfs
+ $(call QUIET_INSTALL, initramfs/init)
+ $(Q)cp nolibc-test initramfs/init
+
+defconfig:
+ $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(DEFCONFIG)
+ $(Q)if [ -n "$(EXTRACONFIG)" ]; then \
+ $(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \
+ $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \
+ fi
+
+kernel:
+ $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null
+
+kernel-standalone: initramfs
+ $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null
+
+# run the tests after building the kernel
+run: kernel initramfs.cpio
+ $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial file:/dev/stdout $(QEMU_ARGS) > "$(CURDIR)/run.out"
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+# re-run the tests from an existing kernel
+rerun:
+ $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial file:/dev/stdout $(QEMU_ARGS) > "$(CURDIR)/run.out"
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+# report with existing test log
+report:
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+clean:
+ $(call QUIET_CLEAN, sysroot)
+ $(Q)rm -rf sysroot
+ $(call QUIET_CLEAN, nolibc-test)
+ $(Q)rm -f nolibc-test
+ $(call QUIET_CLEAN, libc-test)
+ $(Q)rm -f libc-test
+ $(call QUIET_CLEAN, initramfs.cpio)
+ $(Q)rm -rf initramfs.cpio
+ $(call QUIET_CLEAN, initramfs)
+ $(Q)rm -rf initramfs
+ $(call QUIET_CLEAN, run.out)
+ $(Q)rm -rf run.out
+
+.PHONY: sysroot/$(ARCH)/include
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index dbe13000fb1a..a297ee0d6d07 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -877,7 +877,12 @@ int test_file_stream(void)
return 0;
}
-int test_fork(void)
+enum fork_type {
+ FORK_STANDARD,
+ FORK_VFORK,
+};
+
+int test_fork(enum fork_type type)
{
int status;
pid_t pid;
@@ -886,14 +891,23 @@ int test_fork(void)
fflush(stdout);
fflush(stderr);
- pid = fork();
+ switch (type) {
+ case FORK_STANDARD:
+ pid = fork();
+ break;
+ case FORK_VFORK:
+ pid = vfork();
+ break;
+ default:
+ return 1;
+ }
switch (pid) {
case -1:
return 1;
case 0:
- exit(123);
+ _exit(123);
default:
pid = waitpid(pid, &status, 0);
@@ -1330,7 +1344,7 @@ int run_syscall(int min, int max)
CASE_TEST(dup3_m1); tmp = dup3(-1, 100, 0); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break;
CASE_TEST(execve_root); EXPECT_SYSER(1, execve("/", (char*[]){ [0] = "/", [1] = NULL }, NULL), -1, EACCES); break;
CASE_TEST(file_stream); EXPECT_SYSZR(1, test_file_stream()); break;
- CASE_TEST(fork); EXPECT_SYSZR(1, test_fork()); break;
+ CASE_TEST(fork); EXPECT_SYSZR(1, test_fork(FORK_STANDARD)); break;
CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break;
CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break;
CASE_TEST(directories); EXPECT_SYSZR(proc, test_dirent()); break;
@@ -1349,6 +1363,7 @@ int run_syscall(int min, int max)
CASE_TEST(mmap_bad); EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break;
CASE_TEST(munmap_bad); EXPECT_SYSER(1, munmap(NULL, 0), -1, EINVAL); break;
CASE_TEST(mmap_munmap_good); EXPECT_SYSZR(1, test_mmap_munmap()); break;
+ CASE_TEST(nanosleep); ts.tv_nsec = -1; EXPECT_SYSER(1, nanosleep(&ts, NULL), -1, EINVAL); break;
CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", O_RDONLY), -1); if (tmp != -1) close(tmp); break;
CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", O_RDONLY), -1, ENOENT); if (tmp != -1) close(tmp); break;
CASE_TEST(openat_dir); EXPECT_SYSZR(1, test_openat()); break;
@@ -1374,6 +1389,7 @@ int run_syscall(int min, int max)
CASE_TEST(uname_fault); EXPECT_SYSER(1, uname(NULL), -1, EFAULT); break;
CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break;
CASE_TEST(unlink_blah); EXPECT_SYSER(1, unlink("/proc/self/blah"), -1, ENOENT); break;
+ CASE_TEST(vfork); EXPECT_SYSZR(1, test_fork(FORK_VFORK)); break;
CASE_TEST(wait_child); EXPECT_SYSER(1, wait(&tmp), -1, ECHILD); break;
CASE_TEST(waitpid_min); EXPECT_SYSER(1, waitpid(INT_MIN, &tmp, WNOHANG), -1, ESRCH); break;
CASE_TEST(waitpid_child); EXPECT_SYSER(1, waitpid(getpid(), &tmp, WNOHANG), -1, ECHILD); break;
@@ -1413,7 +1429,7 @@ int run_stdlib(int min, int max)
* Add some more chars after the \0, to test functions that overwrite the buffer set
* the \0 at the exact right position.
*/
- char buf[10] = "test123456";
+ char buf[11] = "test123456";
buf[4] = '\0';
@@ -1646,6 +1662,28 @@ int test_strerror(void)
return 0;
}
+static int test_printf_error(void)
+{
+ int fd, ret, saved_errno;
+
+ fd = open("/dev/full", O_RDWR);
+ if (fd == -1)
+ return 1;
+
+ errno = 0;
+ ret = dprintf(fd, "foo");
+ saved_errno = errno;
+ close(fd);
+
+ if (ret != -1)
+ return 2;
+
+ if (saved_errno != ENOSPC)
+ return 3;
+
+ return 0;
+}
+
static int run_printf(int min, int max)
{
int test;
@@ -1675,6 +1713,7 @@ static int run_printf(int min, int max)
CASE_TEST(width_trunc); EXPECT_VFPRINTF(25, " ", "%25d", 1); break;
CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break;
CASE_TEST(strerror); EXPECT_ZR(1, test_strerror()); break;
+ CASE_TEST(printf_error); EXPECT_ZR(1, test_printf_error()); break;
case __LINE__:
return ret; /* must be last */
/* note: do not set any defaults so as to permit holes above */
@@ -1762,12 +1801,14 @@ int prepare(void)
if (stat("/dev/.", &stat_buf) == 0 || mkdir("/dev", 0755) == 0) {
if (stat("/dev/console", &stat_buf) != 0 ||
stat("/dev/null", &stat_buf) != 0 ||
- stat("/dev/zero", &stat_buf) != 0) {
+ stat("/dev/zero", &stat_buf) != 0 ||
+ stat("/dev/full", &stat_buf) != 0) {
/* try devtmpfs first, otherwise fall back to manual creation */
if (mount("/dev", "/dev", "devtmpfs", 0, 0) != 0) {
mknod("/dev/console", 0600 | S_IFCHR, makedev(5, 1));
mknod("/dev/null", 0666 | S_IFCHR, makedev(1, 3));
mknod("/dev/zero", 0666 | S_IFCHR, makedev(1, 5));
+ mknod("/dev/full", 0666 | S_IFCHR, makedev(1, 7));
}
}
}
diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh
index 8277599e6441..e8af1fb505cf 100755
--- a/tools/testing/selftests/nolibc/run-tests.sh
+++ b/tools/testing/selftests/nolibc/run-tests.sh
@@ -18,15 +18,16 @@ test_mode=system
werror=1
llvm=
all_archs=(
- i386 x86_64
+ i386 x86_64 x32
arm64 arm armthumb
- mips32le mips32be
+ mips32le mips32be mipsn32le mipsn32be mips64le mips64be
ppc ppc64 ppc64le
riscv32 riscv64
s390x s390
loongarch
sparc32 sparc64
m68k
+ sh4
)
archs="${all_archs[@]}"
@@ -114,6 +115,7 @@ crosstool_arch() {
mips*) echo mips;;
s390*) echo s390;;
sparc*) echo sparc64;;
+ x32*) echo x86_64;;
*) echo "$1";;
esac
}
@@ -169,7 +171,7 @@ test_arch() {
if [ "$werror" -ne 0 ]; then
CFLAGS_EXTRA="$CFLAGS_EXTRA -Werror"
fi
- MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}")
+ MAKE=(make -f Makefile.nolibc -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}")
case "$test_mode" in
'system')
@@ -187,7 +189,11 @@ test_arch() {
echo "Unsupported configuration"
return
fi
- if [ "$arch" = "m68k" ] && [ "$llvm" = "1" ]; then
+ if [ "$arch" = "m68k" -o "$arch" = "sh4" ] && [ "$llvm" = "1" ]; then
+ echo "Unsupported configuration"
+ return
+ fi
+ if [ "$arch" = "x32" ] && [ "$test_mode" = "user" ]; then
echo "Unsupported configuration"
return
fi
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
index 0406a065deb4..144e7ff65d6a 100644
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -10,3 +10,5 @@ pidfd_file_handle_test
pidfd_bind_mount
pidfd_info_test
pidfd_exec_helper
+pidfd_xattr_test
+pidfd_setattr_test
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
index fcbefc0d77f6..764a8f9ecefa 100644
--- a/tools/testing/selftests/pidfd/Makefile
+++ b/tools/testing/selftests/pidfd/Makefile
@@ -1,9 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall
+CFLAGS += -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) -pthread -Wall
TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \
pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \
- pidfd_file_handle_test pidfd_bind_mount pidfd_info_test
+ pidfd_file_handle_test pidfd_bind_mount pidfd_info_test \
+ pidfd_xattr_test pidfd_setattr_test
TEST_GEN_PROGS_EXTENDED := pidfd_exec_helper
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index efd74063126e..f87993def738 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -16,9 +16,22 @@
#include <sys/types.h>
#include <sys/wait.h>
+/*
+ * Remove the userspace definitions of the following preprocessor symbols
+ * to avoid duplicate-definition warnings from the subsequent in-kernel
+ * definitions.
+ */
+#undef SCHED_NORMAL
+#undef SCHED_FLAG_KEEP_ALL
+#undef SCHED_FLAG_UTIL_CLAMP
+
#include "../kselftest.h"
#include "../clone3/clone3_selftests.h"
+#ifndef FD_PIDFS_ROOT
+#define FD_PIDFS_ROOT -10002
+#endif
+
#ifndef P_PIDFD
#define P_PIDFD 3
#endif
@@ -56,7 +69,7 @@
#endif
#ifndef PIDFD_SELF_THREAD_GROUP
-#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */
+#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */
#endif
#ifndef PIDFD_SELF
diff --git a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c
index 439b9c6c0457..6bd2e9c9565b 100644
--- a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c
@@ -500,4 +500,64 @@ TEST_F(file_handle, valid_name_to_handle_at_flags)
ASSERT_EQ(close(pidfd), 0);
}
+/*
+ * That we decode a file handle without having to pass a pidfd.
+ */
+TEST_F(file_handle, decode_purely_based_on_file_handle)
+{
+ int mnt_id;
+ struct file_handle *fh;
+ int pidfd = -EBADF;
+ struct stat st1, st2;
+
+ fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(fh, NULL);
+ memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ fh->handle_bytes = MAX_HANDLE_SZ;
+
+ ASSERT_EQ(name_to_handle_at(self->child_pidfd1, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
+
+ ASSERT_EQ(fstat(self->child_pidfd1, &st1), 0);
+
+ pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, 0);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_CLOEXEC);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_NONBLOCK);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ pidfd = open_by_handle_at(self->pidfd, fh, 0);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ pidfd = open_by_handle_at(-EBADF, fh, 0);
+ ASSERT_LT(pidfd, 0);
+
+ pidfd = open_by_handle_at(AT_FDCWD, fh, 0);
+ ASSERT_LT(pidfd, 0);
+
+ free(fh);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/pidfd_setattr_test.c b/tools/testing/selftests/pidfd/pidfd_setattr_test.c
new file mode 100644
index 000000000000..d7de05edc4b3
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_setattr_test.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <poll.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <linux/kcmp.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+
+#include "pidfd.h"
+#include "../kselftest_harness.h"
+
+FIXTURE(pidfs_setattr)
+{
+ pid_t child_pid;
+ int child_pidfd;
+};
+
+FIXTURE_SETUP(pidfs_setattr)
+{
+ self->child_pid = create_child(&self->child_pidfd, CLONE_NEWUSER | CLONE_NEWPID);
+ EXPECT_GE(self->child_pid, 0);
+
+ if (self->child_pid == 0)
+ _exit(EXIT_SUCCESS);
+}
+
+FIXTURE_TEARDOWN(pidfs_setattr)
+{
+ sys_waitid(P_PID, self->child_pid, NULL, WEXITED);
+ EXPECT_EQ(close(self->child_pidfd), 0);
+}
+
+TEST_F(pidfs_setattr, no_chown)
+{
+ ASSERT_LT(fchown(self->child_pidfd, 1234, 5678), 0);
+ ASSERT_EQ(errno, EOPNOTSUPP);
+}
+
+TEST_F(pidfs_setattr, no_chmod)
+{
+ ASSERT_LT(fchmod(self->child_pidfd, 0777), 0);
+ ASSERT_EQ(errno, EOPNOTSUPP);
+}
+
+TEST_F(pidfs_setattr, no_exec)
+{
+ char *const argv[] = { NULL };
+ char *const envp[] = { NULL };
+
+ ASSERT_LT(execveat(self->child_pidfd, "", argv, envp, AT_EMPTY_PATH), 0);
+ ASSERT_EQ(errno, EACCES);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/pidfd_xattr_test.c b/tools/testing/selftests/pidfd/pidfd_xattr_test.c
new file mode 100644
index 000000000000..5cf7bb0e4bf2
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_xattr_test.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <poll.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <linux/kcmp.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+
+#include "pidfd.h"
+#include "../kselftest_harness.h"
+
+FIXTURE(pidfs_xattr)
+{
+ pid_t child_pid;
+ int child_pidfd;
+};
+
+FIXTURE_SETUP(pidfs_xattr)
+{
+ self->child_pid = create_child(&self->child_pidfd, CLONE_NEWUSER | CLONE_NEWPID);
+ EXPECT_GE(self->child_pid, 0);
+
+ if (self->child_pid == 0)
+ _exit(EXIT_SUCCESS);
+}
+
+FIXTURE_TEARDOWN(pidfs_xattr)
+{
+ sys_waitid(P_PID, self->child_pid, NULL, WEXITED);
+}
+
+TEST_F(pidfs_xattr, set_get_list_xattr_multiple)
+{
+ int ret, i;
+ char xattr_name[32];
+ char xattr_value[32];
+ char buf[32];
+ const int num_xattrs = 10;
+ char list[PATH_MAX] = {};
+
+ for (i = 0; i < num_xattrs; i++) {
+ snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i);
+ snprintf(xattr_value, sizeof(xattr_value), "testvalue%d", i);
+ ret = fsetxattr(self->child_pidfd, xattr_name, xattr_value, strlen(xattr_value), 0);
+ ASSERT_EQ(ret, 0);
+ }
+
+ for (i = 0; i < num_xattrs; i++) {
+ snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i);
+ snprintf(xattr_value, sizeof(xattr_value), "testvalue%d", i);
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->child_pidfd, xattr_name, buf, sizeof(buf));
+ ASSERT_EQ(ret, strlen(xattr_value));
+ ASSERT_EQ(strcmp(buf, xattr_value), 0);
+ }
+
+ ret = flistxattr(self->child_pidfd, list, sizeof(list));
+ ASSERT_GT(ret, 0);
+ for (i = 0; i < num_xattrs; i++) {
+ snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i);
+ bool found = false;
+ for (char *it = list; it < list + ret; it += strlen(it) + 1) {
+ if (strcmp(it, xattr_name))
+ continue;
+ found = true;
+ break;
+ }
+ ASSERT_TRUE(found);
+ }
+
+ for (i = 0; i < num_xattrs; i++) {
+ snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i);
+ ret = fremovexattr(self->child_pidfd, xattr_name);
+ ASSERT_EQ(ret, 0);
+
+ ret = fgetxattr(self->child_pidfd, xattr_name, buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+ }
+}
+
+TEST_F(pidfs_xattr, set_get_list_xattr_persistent)
+{
+ int ret;
+ char buf[32];
+ char list[PATH_MAX] = {};
+
+ ret = fsetxattr(self->child_pidfd, "trusted.persistent", "persistent value", strlen("persistent value"), 0);
+ ASSERT_EQ(ret, 0);
+
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->child_pidfd, "trusted.persistent", buf, sizeof(buf));
+ ASSERT_EQ(ret, strlen("persistent value"));
+ ASSERT_EQ(strcmp(buf, "persistent value"), 0);
+
+ ret = flistxattr(self->child_pidfd, list, sizeof(list));
+ ASSERT_GT(ret, 0);
+ ASSERT_EQ(strcmp(list, "trusted.persistent"), 0)
+
+ ASSERT_EQ(close(self->child_pidfd), 0);
+ self->child_pidfd = -EBADF;
+ sleep(2);
+
+ self->child_pidfd = sys_pidfd_open(self->child_pid, 0);
+ ASSERT_GE(self->child_pidfd, 0);
+
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->child_pidfd, "trusted.persistent", buf, sizeof(buf));
+ ASSERT_EQ(ret, strlen("persistent value"));
+ ASSERT_EQ(strcmp(buf, "persistent value"), 0);
+
+ ret = flistxattr(self->child_pidfd, list, sizeof(list));
+ ASSERT_GT(ret, 0);
+ ASSERT_EQ(strcmp(list, "trusted.persistent"), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index edc08a4433fd..ed1e2886ba3c 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -120,6 +120,7 @@ static void usage(char *progname)
" -c query the ptp clock's capabilities\n"
" -d name device to open\n"
" -e val read 'val' external time stamp events\n"
+ " -E val enable rising (1), falling (2), or both (3) edges\n"
" -f val adjust the ptp clock frequency by 'val' ppb\n"
" -F chan Enable single channel mask and keep device open for debugfs verification.\n"
" -g get the ptp clock time\n"
@@ -178,6 +179,7 @@ int main(int argc, char *argv[])
int adjphase = 0;
int capabilities = 0;
int extts = 0;
+ int edge = 0;
int flagtest = 0;
int gettime = 0;
int index = 0;
@@ -202,7 +204,7 @@ int main(int argc, char *argv[])
progname = strrchr(argv[0], '/');
progname = progname ? 1+progname : argv[0];
- while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) {
+ while (EOF != (c = getopt(argc, argv, "cd:e:E:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) {
switch (c) {
case 'c':
capabilities = 1;
@@ -213,6 +215,11 @@ int main(int argc, char *argv[])
case 'e':
extts = atoi(optarg);
break;
+ case 'E':
+ edge = atoi(optarg);
+ edge = (edge & 1 ? PTP_RISING_EDGE : 0) |
+ (edge & 2 ? PTP_FALLING_EDGE : 0);
+ break;
case 'f':
adjfreq = atoi(optarg);
break;
@@ -444,7 +451,7 @@ int main(int argc, char *argv[])
if (!readonly) {
memset(&extts_request, 0, sizeof(extts_request));
extts_request.index = index;
- extts_request.flags = PTP_ENABLE_FEATURE;
+ extts_request.flags = PTP_ENABLE_FEATURE | edge;
if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
perror("PTP_EXTTS_REQUEST");
extts = 0;
diff --git a/tools/testing/selftests/ptrace/peeksiginfo.c b/tools/testing/selftests/ptrace/peeksiginfo.c
index a6884f66dc01..2f345d11e4b8 100644
--- a/tools/testing/selftests/ptrace/peeksiginfo.c
+++ b/tools/testing/selftests/ptrace/peeksiginfo.c
@@ -199,7 +199,7 @@ int main(int argc, char *argv[])
/*
* Dump signal from the process-wide queue.
- * The number of signals is not multible to the buffer size
+ * The number of signals is not multiple to the buffer size
*/
if (check_direct_path(child, 1, 3))
goto out;
diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c
index 9451782689de..ee25824b1cbe 100644
--- a/tools/testing/selftests/sched_ext/exit.c
+++ b/tools/testing/selftests/sched_ext/exit.c
@@ -22,6 +22,14 @@ static enum scx_test_status run(void *ctx)
struct bpf_link *link;
char buf[16];
+ /*
+ * On single-CPU systems, ops.select_cpu() is never
+ * invoked, so skip this test to avoid getting stuck
+ * indefinitely.
+ */
+ if (tc == EXIT_SELECT_CPU && libbpf_num_possible_cpus() == 1)
+ continue;
+
skel = exit__open();
SCX_ENUM_INIT(skel);
skel->rodata->exit_point = tc;
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
index d975a6767329..2eb2c06303f2 100644
--- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c
+++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
@@ -10,6 +10,8 @@
#include <sys/sysinfo.h>
#include <sys/syscall.h>
#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
#include <asm/unistd.h>
#include "../kselftest_harness.h"
@@ -17,11 +19,15 @@
#ifndef PR_SET_SYSCALL_USER_DISPATCH
# define PR_SET_SYSCALL_USER_DISPATCH 59
# define PR_SYS_DISPATCH_OFF 0
-# define PR_SYS_DISPATCH_ON 1
# define SYSCALL_DISPATCH_FILTER_ALLOW 0
# define SYSCALL_DISPATCH_FILTER_BLOCK 1
#endif
+#ifndef PR_SYS_DISPATCH_EXCLUSIVE_ON
+# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1
+# define PR_SYS_DISPATCH_INCLUSIVE_ON 2
+#endif
+
#ifndef SYS_USER_DISPATCH
# define SYS_USER_DISPATCH 2
#endif
@@ -65,7 +71,7 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS)
ret = sysinfo(&info);
ASSERT_EQ(0, ret);
- ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel);
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &sel);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
}
@@ -79,6 +85,21 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS)
}
}
+static void prctl_valid(struct __test_metadata *_metadata,
+ unsigned long op, unsigned long off,
+ unsigned long size, void *sel)
+{
+ EXPECT_EQ(0, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, sel));
+}
+
+static void prctl_invalid(struct __test_metadata *_metadata,
+ unsigned long op, unsigned long off,
+ unsigned long size, void *sel, int err)
+{
+ EXPECT_EQ(-1, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, sel));
+ EXPECT_EQ(err, errno);
+}
+
TEST(bad_prctl_param)
{
char sel = SYSCALL_DISPATCH_FILTER_ALLOW;
@@ -86,57 +107,54 @@ TEST(bad_prctl_param)
/* Invalid op */
op = -1;
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0, 0, &sel);
- ASSERT_EQ(EINVAL, errno);
+ prctl_invalid(_metadata, op, 0, 0, &sel, EINVAL);
/* PR_SYS_DISPATCH_OFF */
op = PR_SYS_DISPATCH_OFF;
/* offset != 0 */
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, 0);
- EXPECT_EQ(EINVAL, errno);
+ prctl_invalid(_metadata, op, 0x1, 0x0, 0, EINVAL);
/* len != 0 */
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0xff, 0);
- EXPECT_EQ(EINVAL, errno);
+ prctl_invalid(_metadata, op, 0x0, 0xff, 0, EINVAL);
/* sel != NULL */
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, &sel);
- EXPECT_EQ(EINVAL, errno);
+ prctl_invalid(_metadata, op, 0x0, 0x0, &sel, EINVAL);
/* Valid parameter */
- errno = 0;
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, 0x0);
- EXPECT_EQ(0, errno);
+ prctl_valid(_metadata, op, 0x0, 0x0, 0x0);
- /* PR_SYS_DISPATCH_ON */
- op = PR_SYS_DISPATCH_ON;
+ /* PR_SYS_DISPATCH_EXCLUSIVE_ON */
+ op = PR_SYS_DISPATCH_EXCLUSIVE_ON;
/* Dispatcher region is bad (offset > 0 && len == 0) */
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, &sel);
- EXPECT_EQ(EINVAL, errno);
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, -1L, 0x0, &sel);
- EXPECT_EQ(EINVAL, errno);
+ prctl_invalid(_metadata, op, 0x1, 0x0, &sel, EINVAL);
+ prctl_invalid(_metadata, op, -1L, 0x0, &sel, EINVAL);
/* Invalid selector */
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x1, (void *) -1);
- ASSERT_EQ(EFAULT, errno);
+ prctl_invalid(_metadata, op, 0x0, 0x1, (void *) -1, EFAULT);
/*
* Dispatcher range overflows unsigned long
*/
- prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 1, -1L, &sel);
- ASSERT_EQ(EINVAL, errno) {
- TH_LOG("Should reject bad syscall range");
- }
+ prctl_invalid(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, 1, -1L, &sel, EINVAL);
/*
* Allowed range overflows usigned long
*/
- prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, -1L, 0x1, &sel);
- ASSERT_EQ(EINVAL, errno) {
- TH_LOG("Should reject bad syscall range");
- }
+ prctl_invalid(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, -1L, 0x1, &sel, EINVAL);
+
+ /* 0 len should fail for PR_SYS_DISPATCH_INCLUSIVE_ON */
+ prctl_invalid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, 1, 0, 0, EINVAL);
+
+ /* Range wrap-around should fail */
+ prctl_invalid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, -1L, 2, 0, EINVAL);
+
+ /* Normal range shouldn't fail */
+ prctl_valid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, 2, 3, 0);
+
+ /* Invalid selector */
+ prctl_invalid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, 2, 3, (void *) -1, EFAULT);
}
/*
@@ -147,11 +165,13 @@ char glob_sel;
int nr_syscalls_emulated;
int si_code;
int si_errno;
+unsigned long syscall_addr;
static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
{
si_code = info->si_code;
si_errno = info->si_errno;
+ syscall_addr = (unsigned long)info->si_call_addr;
if (info->si_syscall == MAGIC_SYSCALL_1)
nr_syscalls_emulated++;
@@ -174,31 +194,34 @@ static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
#endif
}
-TEST(dispatch_and_return)
+int setup_sigsys_handler(void)
{
- long ret;
struct sigaction act;
sigset_t mask;
- glob_sel = 0;
- nr_syscalls_emulated = 0;
- si_code = 0;
- si_errno = 0;
-
memset(&act, 0, sizeof(act));
sigemptyset(&mask);
-
act.sa_sigaction = handle_sigsys;
act.sa_flags = SA_SIGINFO;
act.sa_mask = mask;
+ return sigaction(SIGSYS, &act, NULL);
+}
- ret = sigaction(SIGSYS, &act, NULL);
- ASSERT_EQ(0, ret);
+TEST(dispatch_and_return)
+{
+ long ret;
+
+ glob_sel = 0;
+ nr_syscalls_emulated = 0;
+ si_code = 0;
+ si_errno = 0;
+
+ ASSERT_EQ(0, setup_sigsys_handler());
/* Make sure selector is good prior to prctl. */
SYSCALL_DISPATCH_OFF(glob_sel);
- ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel);
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &glob_sel);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
}
@@ -254,7 +277,7 @@ TEST_SIGNAL(bad_selector, SIGSYS)
/* Make sure selector is good prior to prctl. */
SYSCALL_DISPATCH_OFF(glob_sel);
- ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel);
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &glob_sel);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
}
@@ -278,7 +301,7 @@ TEST(disable_dispatch)
struct sysinfo info;
char sel = 0;
- ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel);
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &sel);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
}
@@ -310,7 +333,7 @@ TEST(direct_dispatch_range)
* Instead of calculating libc addresses; allow the entire
* memory map and lock the selector.
*/
- ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, -1L, &sel);
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, -1L, &sel);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
}
@@ -323,4 +346,35 @@ TEST(direct_dispatch_range)
}
}
+static void test_range(struct __test_metadata *_metadata,
+ unsigned long op, unsigned long off,
+ unsigned long size, bool dispatch)
+{
+ nr_syscalls_emulated = 0;
+ SYSCALL_DISPATCH_OFF(glob_sel);
+ EXPECT_EQ(0, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, &glob_sel));
+ SYSCALL_DISPATCH_ON(glob_sel);
+ if (dispatch) {
+ EXPECT_EQ(syscall(MAGIC_SYSCALL_1), MAGIC_SYSCALL_1);
+ EXPECT_EQ(nr_syscalls_emulated, 1);
+ } else {
+ EXPECT_EQ(syscall(MAGIC_SYSCALL_1), -1);
+ EXPECT_EQ(nr_syscalls_emulated, 0);
+ }
+}
+
+TEST(dispatch_range)
+{
+ ASSERT_EQ(0, setup_sigsys_handler());
+ test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, true);
+ test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr, 1, false);
+ test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr-100, 200, false);
+ test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr+1, 100, true);
+ test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr-100, 100, true);
+ test_range(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, syscall_addr, 1, true);
+ test_range(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, syscall_addr-1, 1, false);
+ test_range(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, syscall_addr+1, 1, false);
+ SYSCALL_DISPATCH_OFF(glob_sel);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index a10350c8a46e..b2d8bd9026a7 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
# Copyright (C) 2017 Luis R. Rodriguez <[email protected]>
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index db176fe7d0c3..c20aa16b1d63 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -21,6 +21,7 @@ CONFIG_NF_NAT=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NET_SCHED=y
+CONFIG_IP_SET=m
#
# Queueing/Scheduling
@@ -30,6 +31,7 @@ CONFIG_NET_SCH_CBS=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_CODEL=m
CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_DUALPI2=m
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_FQ_CODEL=m
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index c6db7fa94f55..23a61e5b99d0 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -504,7 +504,6 @@
"$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%",
"$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1",
"$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit",
- "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%",
"$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2",
"ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true",
"$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1",
@@ -517,8 +516,8 @@
{
"kind": "hfsc",
"handle": "1:",
- "bytes": 392,
- "packets": 4
+ "bytes": 294,
+ "packets": 3
}
],
"matchCount": "1",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json
new file mode 100644
index 000000000000..cd1f2ee8f354
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json
@@ -0,0 +1,254 @@
+[
+ {
+ "id": "a4c7",
+ "name": "Create DualPI2 with default setting",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* step_thresh 1ms min_qlen_step 0p coupling_factor 2 drop_on_overload drop_dequeue classic_protection 10% l4s_ect split_gso",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "1ea4",
+ "name": "Create DualPI2 with memlimit",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 memlimit 20000000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* memlimit 20000000B",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2130",
+ "name": "Create DualPI2 with typical_rtt and max_rtt",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 typical_rtt 20ms max_rtt 200ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* target 20ms tupdate 20ms alpha 0.042969 beta 1.496094",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "90c1",
+ "name": "Create DualPI2 with max_rtt",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 max_rtt 300ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* target 50ms tupdate 50ms alpha 0.050781 beta 0.996094",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "7b3c",
+ "name": "Create DualPI2 with any_ect option",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 any_ect",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* any_ect",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "49a3",
+ "name": "Create DualPI2 with overflow option",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 overflow",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* overflow",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "d0a1",
+ "name": "Create DualPI2 with drop_enqueue option",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 drop_enqueue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* drop_enqueue",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "f051",
+ "name": "Create DualPI2 with no_split_gso option",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 no_split_gso",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* no_split_gso",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "456b",
+ "name": "Create DualPI2 with packet step_thresh",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 step_thresh 3p",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* step_thresh 3p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "610c",
+ "name": "Create DualPI2 with packet min_qlen_step",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 min_qlen_step 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* min_qlen_step 1p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "b4fa",
+ "name": "Create DualPI2 with packet coupling_factor",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 coupling_factor 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* coupling_factor 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "37f1",
+ "name": "Create DualPI2 with packet classic_protection",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 classic_protection 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* classic_protection 0%",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json
index 3c4444961488..718d2df2aafa 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json
@@ -336,5 +336,86 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "d34d",
+ "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change root",
+ "category": ["qdisc", "netem"],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle 1: netem limit 1",
+ "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: netem duplicate 50%",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem",
+ "matchCount": "2",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root"
+ ]
+ },
+ {
+ "id": "b33f",
+ "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change non-root",
+ "category": ["qdisc", "netem"],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle 1: netem limit 1",
+ "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 2: netem duplicate 50%",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem",
+ "matchCount": "2",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root"
+ ]
+ },
+ {
+ "id": "cafe",
+ "name": "NETEM test qdisc duplication restriction in qdisc tree",
+ "category": ["qdisc", "netem"],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle 1: netem limit 1 duplicate 100%"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1: handle 2: netem duplicate 100%",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root"
+ ]
+ },
+ {
+ "id": "1337",
+ "name": "NETEM test qdisc duplication restriction in qdisc tree across branches",
+ "category": ["qdisc", "netem"],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY parent root handle 1:0 hfsc",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc rt m2 10Mbit",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc rt m2 10Mbit"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
index 28c6ce6da7db..531a2f6e4900 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
@@ -264,5 +264,41 @@
"matchPattern": "sfq",
"matchCount": "0",
"teardown": []
+ },
+ {
+ "id": "cdc1",
+ "name": "Check that a negative perturb timer is rejected",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq perturb -10",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "sfq",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "a9f0",
+ "name": "Check that a too big perturb timer is rejected",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq perturb 1000000000",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "sfq",
+ "matchCount": "0",
+ "teardown": []
}
]
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index 589b18ed758a..dae19687912d 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -4,8 +4,7 @@
# If a module is required and was not compiled
# the test that requires it will fail anyways
try_modprobe() {
- modprobe -q -R "$1"
- if [ $? -ne 0 ]; then
+ if ! modprobe -q -R "$1"; then
echo "Module $1 not found... skipping."
else
modprobe "$1"
@@ -67,4 +66,5 @@ try_modprobe sch_hfsc
try_modprobe sch_hhf
try_modprobe sch_htb
try_modprobe sch_teql
-./tdc.py -J`nproc`
+try_modprobe sch_dualpi2
+./tdc.py -J"$(nproc)"
diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c
index 6e60f7d97125..b227bd78b252 100644
--- a/tools/testing/selftests/ublk/fault_inject.c
+++ b/tools/testing/selftests/ublk/fault_inject.c
@@ -38,7 +38,8 @@ static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
return 0;
}
-static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag)
+static int ublk_fault_inject_queue_io(struct ublk_thread *t,
+ struct ublk_queue *q, int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
struct io_uring_sqe *sqe;
@@ -46,25 +47,27 @@ static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag)
.tv_nsec = (long long)q->dev->private_data,
};
- ublk_io_alloc_sqes(ublk_get_io(q, tag), &sqe, 1);
+ ublk_io_alloc_sqes(t, &sqe, 1);
io_uring_prep_timeout(sqe, &ts, 1, 0);
sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, q->q_id, 1);
- ublk_queued_tgt_io(q, tag, 1);
+ ublk_queued_tgt_io(t, q, tag, 1);
return 0;
}
-static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, int tag,
+static void ublk_fault_inject_tgt_io_done(struct ublk_thread *t,
+ struct ublk_queue *q,
const struct io_uring_cqe *cqe)
{
+ unsigned tag = user_data_to_tag(cqe->user_data);
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
if (cqe->res != -ETIME)
ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res);
- if (ublk_completed_tgt_io(q, tag))
- ublk_complete_io(q, tag, iod->nr_sectors << 9);
+ if (ublk_completed_tgt_io(t, q, tag))
+ ublk_complete_io(t, q, tag, iod->nr_sectors << 9);
else
ublk_err("%s: io not complete after 1 cqe\n", __func__);
}
diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c
index cfa59b631693..2d93ac860bd5 100644
--- a/tools/testing/selftests/ublk/file_backed.c
+++ b/tools/testing/selftests/ublk/file_backed.c
@@ -13,12 +13,13 @@ static enum io_uring_op ublk_to_uring_op(const struct ublksrv_io_desc *iod, int
assert(0);
}
-static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
+static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q,
+ const struct ublksrv_io_desc *iod, int tag)
{
unsigned ublk_op = ublksrv_get_op(iod);
struct io_uring_sqe *sqe[1];
- ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1);
+ ublk_io_alloc_sqes(t, sqe, 1);
io_uring_prep_fsync(sqe[0], 1 /*fds[1]*/, IORING_FSYNC_DATASYNC);
io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
/* bit63 marks us as tgt io */
@@ -26,7 +27,8 @@ static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_des
return 1;
}
-static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
+static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
+ const struct ublksrv_io_desc *iod, int tag)
{
unsigned ublk_op = ublksrv_get_op(iod);
unsigned zc = ublk_queue_use_zc(q);
@@ -36,7 +38,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de
void *addr = (zc | auto_zc) ? NULL : (void *)iod->addr;
if (!zc || auto_zc) {
- ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1);
+ ublk_io_alloc_sqes(t, sqe, 1);
if (!sqe[0])
return -ENOMEM;
@@ -52,7 +54,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de
return 1;
}
- ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 3);
+ ublk_io_alloc_sqes(t, sqe, 3);
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
@@ -72,7 +74,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de
return 2;
}
-static int loop_queue_tgt_io(struct ublk_queue *q, int tag)
+static int loop_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
unsigned ublk_op = ublksrv_get_op(iod);
@@ -80,7 +82,7 @@ static int loop_queue_tgt_io(struct ublk_queue *q, int tag)
switch (ublk_op) {
case UBLK_IO_OP_FLUSH:
- ret = loop_queue_flush_io(q, iod, tag);
+ ret = loop_queue_flush_io(t, q, iod, tag);
break;
case UBLK_IO_OP_WRITE_ZEROES:
case UBLK_IO_OP_DISCARD:
@@ -88,7 +90,7 @@ static int loop_queue_tgt_io(struct ublk_queue *q, int tag)
break;
case UBLK_IO_OP_READ:
case UBLK_IO_OP_WRITE:
- ret = loop_queue_tgt_rw_io(q, iod, tag);
+ ret = loop_queue_tgt_rw_io(t, q, iod, tag);
break;
default:
ret = -EINVAL;
@@ -100,17 +102,19 @@ static int loop_queue_tgt_io(struct ublk_queue *q, int tag)
return ret;
}
-static int ublk_loop_queue_io(struct ublk_queue *q, int tag)
+static int ublk_loop_queue_io(struct ublk_thread *t, struct ublk_queue *q,
+ int tag)
{
- int queued = loop_queue_tgt_io(q, tag);
+ int queued = loop_queue_tgt_io(t, q, tag);
- ublk_queued_tgt_io(q, tag, queued);
+ ublk_queued_tgt_io(t, q, tag, queued);
return 0;
}
-static void ublk_loop_io_done(struct ublk_queue *q, int tag,
+static void ublk_loop_io_done(struct ublk_thread *t, struct ublk_queue *q,
const struct io_uring_cqe *cqe)
{
+ unsigned tag = user_data_to_tag(cqe->user_data);
unsigned op = user_data_to_op(cqe->user_data);
struct ublk_io *io = ublk_get_io(q, tag);
@@ -126,8 +130,8 @@ static void ublk_loop_io_done(struct ublk_queue *q, int tag,
if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
io->tgt_ios += 1;
- if (ublk_completed_tgt_io(q, tag))
- ublk_complete_io(q, tag, io->result);
+ if (ublk_completed_tgt_io(t, q, tag))
+ ublk_complete_io(t, q, tag, io->result);
}
static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index e2d2042810d4..95188065b2e9 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -441,17 +441,10 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
unsigned long off;
q->tgt_ops = dev->tgt.ops;
- q->state = 0;
+ q->flags = 0;
q->q_depth = depth;
-
- if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) {
- q->state |= UBLKSRV_NO_BUF;
- if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY)
- q->state |= UBLKSRV_ZC;
- if (dev->dev_info.flags & UBLK_F_AUTO_BUF_REG)
- q->state |= UBLKSRV_AUTO_BUF_REG;
- }
- q->state |= extra_flags;
+ q->flags = dev->dev_info.flags;
+ q->flags |= extra_flags;
cmd_buf_size = ublk_queue_cmd_buf_sz(q);
off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz();
@@ -466,10 +459,10 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
io_buf_size = dev->dev_info.max_io_buf_bytes;
for (i = 0; i < q->q_depth; i++) {
q->ios[i].buf_addr = NULL;
- q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
+ q->ios[i].flags = UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_FREE;
q->ios[i].tag = i;
- if (q->state & UBLKSRV_NO_BUF)
+ if (ublk_queue_no_buf(q))
continue;
if (posix_memalign((void **)&q->ios[i].buf_addr,
@@ -583,15 +576,14 @@ static void ublk_set_auto_buf_reg(const struct ublk_queue *q,
else
buf.index = q->ios[tag].buf_index;
- if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK)
+ if (ublk_queue_auto_zc_fallback(q))
buf.flags = UBLK_AUTO_BUF_REG_FALLBACK;
sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf);
}
-int ublk_queue_io_cmd(struct ublk_io *io)
+int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io)
{
- struct ublk_thread *t = io->t;
struct ublk_queue *q = ublk_io_to_queue(io);
struct ublksrv_io_cmd *cmd;
struct io_uring_sqe *sqe[1];
@@ -599,7 +591,7 @@ int ublk_queue_io_cmd(struct ublk_io *io)
__u64 user_data;
/* only freed io can be issued */
- if (!(io->flags & UBLKSRV_IO_FREE))
+ if (!(io->flags & UBLKS_IO_FREE))
return 0;
/*
@@ -607,20 +599,20 @@ int ublk_queue_io_cmd(struct ublk_io *io)
* getting data
*/
if (!(io->flags &
- (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA)))
+ (UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_NEED_GET_DATA)))
return 0;
- if (io->flags & UBLKSRV_NEED_GET_DATA)
+ if (io->flags & UBLKS_IO_NEED_GET_DATA)
cmd_op = UBLK_U_IO_NEED_GET_DATA;
- else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
+ else if (io->flags & UBLKS_IO_NEED_COMMIT_RQ_COMP)
cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
- else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
+ else if (io->flags & UBLKS_IO_NEED_FETCH_RQ)
cmd_op = UBLK_U_IO_FETCH_REQ;
if (io_uring_sq_space_left(&t->ring) < 1)
io_uring_submit(&t->ring);
- ublk_io_alloc_sqes(io, sqe, 1);
+ ublk_io_alloc_sqes(t, sqe, 1);
if (!sqe[0]) {
ublk_err("%s: run out of sqe. thread %u, tag %d\n",
__func__, t->idx, io->tag);
@@ -640,12 +632,12 @@ int ublk_queue_io_cmd(struct ublk_io *io)
sqe[0]->rw_flags = 0;
cmd->tag = io->tag;
cmd->q_id = q->q_id;
- if (!(q->state & UBLKSRV_NO_BUF))
+ if (!ublk_queue_no_buf(q))
cmd->addr = (__u64) (uintptr_t) io->buf_addr;
else
cmd->addr = 0;
- if (q->state & UBLKSRV_AUTO_BUF_REG)
+ if (ublk_queue_use_auto_zc(q))
ublk_set_auto_buf_reg(q, sqe[0], io->tag);
user_data = build_user_data(io->tag, _IOC_NR(cmd_op), 0, q->q_id, 0);
@@ -657,7 +649,7 @@ int ublk_queue_io_cmd(struct ublk_io *io)
ublk_dbg(UBLK_DBG_IO_CMD, "%s: (thread %u qid %d tag %u cmd_op %u) iof %x stopping %d\n",
__func__, t->idx, q->q_id, io->tag, cmd_op,
- io->flags, !!(t->state & UBLKSRV_THREAD_STOPPING));
+ io->flags, !!(t->state & UBLKS_T_STOPPING));
return 1;
}
@@ -685,9 +677,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread *t)
int tag = i % dinfo->queue_depth;
q = &t->dev->q[q_id];
io = &q->ios[tag];
- io->t = t;
io->buf_index = j++;
- ublk_queue_io_cmd(io);
+ ublk_queue_io_cmd(t, io);
}
} else {
/*
@@ -697,9 +688,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread *t)
struct ublk_queue *q = &t->dev->q[t->idx];
for (i = 0; i < q->q_depth; i++) {
io = &q->ios[i];
- io->t = t;
io->buf_index = i;
- ublk_queue_io_cmd(io);
+ ublk_queue_io_cmd(t, io);
}
}
}
@@ -711,14 +701,13 @@ static int ublk_thread_is_idle(struct ublk_thread *t)
static int ublk_thread_is_done(struct ublk_thread *t)
{
- return (t->state & UBLKSRV_THREAD_STOPPING) && ublk_thread_is_idle(t);
+ return (t->state & UBLKS_T_STOPPING) && ublk_thread_is_idle(t);
}
-static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
- struct io_uring_cqe *cqe)
+static inline void ublksrv_handle_tgt_cqe(struct ublk_thread *t,
+ struct ublk_queue *q,
+ struct io_uring_cqe *cqe)
{
- unsigned tag = user_data_to_tag(cqe->user_data);
-
if (cqe->res < 0 && cqe->res != -EAGAIN)
ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
__func__, cqe->res, q->q_id,
@@ -726,7 +715,41 @@ static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
user_data_to_op(cqe->user_data));
if (q->tgt_ops->tgt_io_done)
- q->tgt_ops->tgt_io_done(q, tag, cqe);
+ q->tgt_ops->tgt_io_done(t, q, cqe);
+}
+
+static void ublk_handle_uring_cmd(struct ublk_thread *t,
+ struct ublk_queue *q,
+ const struct io_uring_cqe *cqe)
+{
+ int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
+ !(t->state & UBLKS_T_STOPPING);
+ unsigned tag = user_data_to_tag(cqe->user_data);
+ struct ublk_io *io = &q->ios[tag];
+
+ if (!fetch) {
+ t->state |= UBLKS_T_STOPPING;
+ io->flags &= ~UBLKS_IO_NEED_FETCH_RQ;
+ }
+
+ if (cqe->res == UBLK_IO_RES_OK) {
+ assert(tag < q->q_depth);
+ if (q->tgt_ops->queue_io)
+ q->tgt_ops->queue_io(t, q, tag);
+ } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) {
+ io->flags |= UBLKS_IO_NEED_GET_DATA | UBLKS_IO_FREE;
+ ublk_queue_io_cmd(t, io);
+ } else {
+ /*
+ * COMMIT_REQ will be completed immediately since no fetching
+ * piggyback is required.
+ *
+ * Marking IO_FREE only, then this io won't be issued since
+ * we only issue io with (UBLKS_IO_FREE | UBLKSRV_NEED_*)
+ *
+ * */
+ io->flags = UBLKS_IO_FREE;
+ }
}
static void ublk_handle_cqe(struct ublk_thread *t,
@@ -735,54 +758,27 @@ static void ublk_handle_cqe(struct ublk_thread *t,
struct ublk_dev *dev = t->dev;
unsigned q_id = user_data_to_q_id(cqe->user_data);
struct ublk_queue *q = &dev->q[q_id];
- unsigned tag = user_data_to_tag(cqe->user_data);
unsigned cmd_op = user_data_to_op(cqe->user_data);
- int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
- !(t->state & UBLKSRV_THREAD_STOPPING);
- struct ublk_io *io;
if (cqe->res < 0 && cqe->res != -ENODEV)
ublk_err("%s: res %d userdata %llx queue state %x\n", __func__,
- cqe->res, cqe->user_data, q->state);
+ cqe->res, cqe->user_data, q->flags);
ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n",
- __func__, cqe->res, q->q_id, tag, cmd_op,
- is_target_io(cqe->user_data),
+ __func__, cqe->res, q->q_id, user_data_to_tag(cqe->user_data),
+ cmd_op, is_target_io(cqe->user_data),
user_data_to_tgt_data(cqe->user_data),
- (t->state & UBLKSRV_THREAD_STOPPING));
+ (t->state & UBLKS_T_STOPPING));
/* Don't retrieve io in case of target io */
if (is_target_io(cqe->user_data)) {
- ublksrv_handle_tgt_cqe(q, cqe);
+ ublksrv_handle_tgt_cqe(t, q, cqe);
return;
}
- io = &q->ios[tag];
t->cmd_inflight--;
- if (!fetch) {
- t->state |= UBLKSRV_THREAD_STOPPING;
- io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
- }
-
- if (cqe->res == UBLK_IO_RES_OK) {
- assert(tag < q->q_depth);
- if (q->tgt_ops->queue_io)
- q->tgt_ops->queue_io(q, tag);
- } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) {
- io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE;
- ublk_queue_io_cmd(io);
- } else {
- /*
- * COMMIT_REQ will be completed immediately since no fetching
- * piggyback is required.
- *
- * Marking IO_FREE only, then this io won't be issued since
- * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
- *
- * */
- io->flags = UBLKSRV_IO_FREE;
- }
+ ublk_handle_uring_cmd(t, q, cqe);
}
static int ublk_reap_events_uring(struct ublk_thread *t)
@@ -808,7 +804,7 @@ static int ublk_process_io(struct ublk_thread *t)
t->dev->dev_info.dev_id,
t->idx, io_uring_sq_ready(&t->ring),
t->cmd_inflight,
- (t->state & UBLKSRV_THREAD_STOPPING));
+ (t->state & UBLKS_T_STOPPING));
if (ublk_thread_is_done(t))
return -ENODEV;
@@ -817,8 +813,8 @@ static int ublk_process_io(struct ublk_thread *t)
reapped = ublk_reap_events_uring(t);
ublk_dbg(UBLK_DBG_THREAD, "submit result %d, reapped %d stop %d idle %d\n",
- ret, reapped, (t->state & UBLKSRV_THREAD_STOPPING),
- (t->state & UBLKSRV_THREAD_IDLE));
+ ret, reapped, (t->state & UBLKS_T_STOPPING),
+ (t->state & UBLKS_T_IDLE));
return reapped;
}
@@ -915,7 +911,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
{
const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
struct ublk_thread_info *tinfo;
- unsigned extra_flags = 0;
+ unsigned long long extra_flags = 0;
cpu_set_t *affinity_buf;
void *thread_ret;
sem_t ready;
@@ -937,7 +933,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
return ret;
if (ctx->auto_zc_fallback)
- extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK;
+ extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK;
for (i = 0; i < dinfo->nr_hw_queues; i++) {
dev->q[i].dev = dev;
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index 6be601536b3d..219233f8a053 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -29,13 +29,9 @@
#include "ublk_dep.h"
#include <linux/ublk_cmd.h>
-#define __maybe_unused __attribute__((unused))
-#define MAX_BACK_FILES 4
-#ifndef min
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#endif
+#include "utils.h"
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+#define MAX_BACK_FILES 4
/****************** part 1: libublk ********************/
@@ -45,9 +41,6 @@
#define UBLK_CTRL_RING_DEPTH 32
#define ERROR_EVTFD_DEVID -2
-/* queue idle timeout */
-#define UBLKSRV_IO_IDLE_SECS 20
-
#define UBLK_IO_MAX_BYTES (1 << 20)
#define UBLK_MAX_QUEUES_SHIFT 5
#define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT)
@@ -55,13 +48,6 @@
#define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT)
#define UBLK_QUEUE_DEPTH 1024
-#define UBLK_DBG_DEV (1U << 0)
-#define UBLK_DBG_THREAD (1U << 1)
-#define UBLK_DBG_IO_CMD (1U << 2)
-#define UBLK_DBG_IO (1U << 3)
-#define UBLK_DBG_CTRL_CMD (1U << 4)
-#define UBLK_LOG (1U << 5)
-
struct ublk_dev;
struct ublk_queue;
struct ublk_thread;
@@ -121,11 +107,11 @@ struct ublk_ctrl_cmd_data {
struct ublk_io {
char *buf_addr;
-#define UBLKSRV_NEED_FETCH_RQ (1UL << 0)
-#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
-#define UBLKSRV_IO_FREE (1UL << 2)
-#define UBLKSRV_NEED_GET_DATA (1UL << 3)
-#define UBLKSRV_NEED_REG_BUF (1UL << 4)
+#define UBLKS_IO_NEED_FETCH_RQ (1UL << 0)
+#define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1)
+#define UBLKS_IO_FREE (1UL << 2)
+#define UBLKS_IO_NEED_GET_DATA (1UL << 3)
+#define UBLKS_IO_NEED_REG_BUF (1UL << 4)
unsigned short flags;
unsigned short refs; /* used by target code only */
@@ -136,7 +122,6 @@ struct ublk_io {
unsigned short buf_index;
unsigned short tgt_ios;
void *private_data;
- struct ublk_thread *t;
};
struct ublk_tgt_ops {
@@ -144,9 +129,9 @@ struct ublk_tgt_ops {
int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *);
void (*deinit_tgt)(struct ublk_dev *);
- int (*queue_io)(struct ublk_queue *, int tag);
- void (*tgt_io_done)(struct ublk_queue *,
- int tag, const struct io_uring_cqe *);
+ int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag);
+ void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *,
+ const struct io_uring_cqe *);
/*
* Target specific command line handling
@@ -179,12 +164,10 @@ struct ublk_queue {
const struct ublk_tgt_ops *tgt_ops;
struct ublksrv_io_desc *io_cmd_buf;
+/* borrow one bit of ublk uapi flags, which may never be used */
+#define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63)
+ __u64 flags;
struct ublk_io ios[UBLK_QUEUE_DEPTH];
-#define UBLKSRV_NO_BUF (1U << 2)
-#define UBLKSRV_ZC (1U << 3)
-#define UBLKSRV_AUTO_BUF_REG (1U << 4)
-#define UBLKSRV_AUTO_BUF_REG_FALLBACK (1U << 5)
- unsigned state;
};
struct ublk_thread {
@@ -196,8 +179,8 @@ struct ublk_thread {
pthread_t thread;
unsigned idx;
-#define UBLKSRV_THREAD_STOPPING (1U << 0)
-#define UBLKSRV_THREAD_IDLE (1U << 1)
+#define UBLKS_T_STOPPING (1U << 0)
+#define UBLKS_T_IDLE (1U << 1)
unsigned state;
};
@@ -217,22 +200,7 @@ struct ublk_dev {
void *private_data;
};
-#ifndef offsetof
-#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
-#endif
-
-#ifndef container_of
-#define container_of(ptr, type, member) ({ \
- unsigned long __mptr = (unsigned long)(ptr); \
- ((type *)(__mptr - offsetof(type, member))); })
-#endif
-
-#define round_up(val, rnd) \
- (((val) + ((rnd) - 1)) & ~((rnd) - 1))
-
-
-extern unsigned int ublk_dbg_mask;
-extern int ublk_queue_io_cmd(struct ublk_io *io);
+extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io);
static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod)
@@ -281,43 +249,15 @@ static inline unsigned short ublk_cmd_op_nr(unsigned int op)
return _IOC_NR(op);
}
-static inline void ublk_err(const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
-}
-
-static inline void ublk_log(const char *fmt, ...)
-{
- if (ublk_dbg_mask & UBLK_LOG) {
- va_list ap;
-
- va_start(ap, fmt);
- vfprintf(stdout, fmt, ap);
- }
-}
-
-static inline void ublk_dbg(int level, const char *fmt, ...)
-{
- if (level & ublk_dbg_mask) {
- va_list ap;
-
- va_start(ap, fmt);
- vfprintf(stdout, fmt, ap);
- }
-}
-
static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io)
{
return container_of(io, struct ublk_queue, ios[io->tag]);
}
-static inline int ublk_io_alloc_sqes(struct ublk_io *io,
+static inline int ublk_io_alloc_sqes(struct ublk_thread *t,
struct io_uring_sqe *sqes[], int nr_sqes)
{
- struct io_uring *ring = &io->t->ring;
+ struct io_uring *ring = &t->ring;
unsigned left = io_uring_sq_space_left(ring);
int i;
@@ -380,7 +320,7 @@ static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag)
static inline void ublk_mark_io_done(struct ublk_io *io, int res)
{
- io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
+ io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE);
io->result = res;
}
@@ -402,45 +342,58 @@ static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag)
return &q->ios[tag];
}
-static inline int ublk_complete_io(struct ublk_queue *q, unsigned tag, int res)
+static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q,
+ unsigned tag, int res)
{
struct ublk_io *io = &q->ios[tag];
ublk_mark_io_done(io, res);
- return ublk_queue_io_cmd(io);
+ return ublk_queue_io_cmd(t, io);
}
-static inline void ublk_queued_tgt_io(struct ublk_queue *q, unsigned tag, int queued)
+static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q,
+ unsigned tag, int queued)
{
if (queued < 0)
- ublk_complete_io(q, tag, queued);
+ ublk_complete_io(t, q, tag, queued);
else {
struct ublk_io *io = ublk_get_io(q, tag);
- io->t->io_inflight += queued;
+ t->io_inflight += queued;
io->tgt_ios = queued;
io->result = 0;
}
}
-static inline int ublk_completed_tgt_io(struct ublk_queue *q, unsigned tag)
+static inline int ublk_completed_tgt_io(struct ublk_thread *t,
+ struct ublk_queue *q, unsigned tag)
{
struct ublk_io *io = ublk_get_io(q, tag);
- io->t->io_inflight--;
+ t->io_inflight--;
return --io->tgt_ios == 0;
}
static inline int ublk_queue_use_zc(const struct ublk_queue *q)
{
- return q->state & UBLKSRV_ZC;
+ return q->flags & UBLK_F_SUPPORT_ZERO_COPY;
}
static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q)
{
- return q->state & UBLKSRV_AUTO_BUF_REG;
+ return q->flags & UBLK_F_AUTO_BUF_REG;
+}
+
+static inline int ublk_queue_auto_zc_fallback(const struct ublk_queue *q)
+{
+ return q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK;
+}
+
+static inline int ublk_queue_no_buf(const struct ublk_queue *q)
+{
+ return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q);
}
extern const struct ublk_tgt_ops null_tgt_ops;
@@ -451,10 +404,4 @@ extern const struct ublk_tgt_ops fault_inject_tgt_ops;
void backing_file_tgt_deinit(struct ublk_dev *dev);
int backing_file_tgt_init(struct ublk_dev *dev);
-static inline unsigned int ilog2(unsigned int x)
-{
- if (x == 0)
- return 0;
- return (sizeof(x) * 8 - 1) - __builtin_clz(x);
-}
#endif
diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c
index afe0b99d77ee..f0e0003a4860 100644
--- a/tools/testing/selftests/ublk/null.c
+++ b/tools/testing/selftests/ublk/null.c
@@ -55,12 +55,13 @@ static void __setup_nop_io(int tag, const struct ublksrv_io_desc *iod,
sqe->user_data = build_user_data(tag, ublk_op, 0, q_id, 1);
}
-static int null_queue_zc_io(struct ublk_queue *q, int tag)
+static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q,
+ int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
struct io_uring_sqe *sqe[3];
- ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 3);
+ ublk_io_alloc_sqes(t, sqe, 3);
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
sqe[0]->user_data = build_user_data(tag,
@@ -77,19 +78,21 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag)
return 2;
}
-static int null_queue_auto_zc_io(struct ublk_queue *q, int tag)
+static int null_queue_auto_zc_io(struct ublk_thread *t, struct ublk_queue *q,
+ int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
struct io_uring_sqe *sqe[1];
- ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1);
+ ublk_io_alloc_sqes(t, sqe, 1);
__setup_nop_io(tag, iod, sqe[0], q->q_id);
return 1;
}
-static void ublk_null_io_done(struct ublk_queue *q, int tag,
- const struct io_uring_cqe *cqe)
+static void ublk_null_io_done(struct ublk_thread *t, struct ublk_queue *q,
+ const struct io_uring_cqe *cqe)
{
+ unsigned tag = user_data_to_tag(cqe->user_data);
unsigned op = user_data_to_op(cqe->user_data);
struct ublk_io *io = ublk_get_io(q, tag);
@@ -105,11 +108,12 @@ static void ublk_null_io_done(struct ublk_queue *q, int tag,
if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
io->tgt_ios += 1;
- if (ublk_completed_tgt_io(q, tag))
- ublk_complete_io(q, tag, io->result);
+ if (ublk_completed_tgt_io(t, q, tag))
+ ublk_complete_io(t, q, tag, io->result);
}
-static int ublk_null_queue_io(struct ublk_queue *q, int tag)
+static int ublk_null_queue_io(struct ublk_thread *t, struct ublk_queue *q,
+ int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
unsigned auto_zc = ublk_queue_use_auto_zc(q);
@@ -117,14 +121,14 @@ static int ublk_null_queue_io(struct ublk_queue *q, int tag)
int queued;
if (auto_zc && !ublk_io_auto_zc_fallback(iod))
- queued = null_queue_auto_zc_io(q, tag);
+ queued = null_queue_auto_zc_io(t, q, tag);
else if (zc)
- queued = null_queue_zc_io(q, tag);
+ queued = null_queue_zc_io(t, q, tag);
else {
- ublk_complete_io(q, tag, iod->nr_sectors << 9);
+ ublk_complete_io(t, q, tag, iod->nr_sectors << 9);
return 0;
}
- ublk_queued_tgt_io(q, tag, queued);
+ ublk_queued_tgt_io(t, q, tag, queued);
return 0;
}
@@ -134,7 +138,7 @@ static int ublk_null_queue_io(struct ublk_queue *q, int tag)
*/
static unsigned short ublk_null_buf_index(const struct ublk_queue *q, int tag)
{
- if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK)
+ if (ublk_queue_auto_zc_fallback(q))
return (unsigned short)-1;
return q->ios[tag].buf_index;
}
diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c
index 37d50bbf5f5e..1fb9b7cc281b 100644
--- a/tools/testing/selftests/ublk/stripe.c
+++ b/tools/testing/selftests/ublk/stripe.c
@@ -123,7 +123,8 @@ static inline enum io_uring_op stripe_to_uring_op(
assert(0);
}
-static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
+static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
+ const struct ublksrv_io_desc *iod, int tag)
{
const struct stripe_conf *conf = get_chunk_shift(q);
unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0);
@@ -138,7 +139,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_
io->private_data = s;
calculate_stripe_array(conf, iod, s, base);
- ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, s->nr + extra);
+ ublk_io_alloc_sqes(t, sqe, s->nr + extra);
if (zc) {
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, io->buf_index);
@@ -176,13 +177,14 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_
return s->nr + zc;
}
-static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
+static int handle_flush(struct ublk_thread *t, struct ublk_queue *q,
+ const struct ublksrv_io_desc *iod, int tag)
{
const struct stripe_conf *conf = get_chunk_shift(q);
struct io_uring_sqe *sqe[NR_STRIPE];
int i;
- ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, conf->nr_files);
+ ublk_io_alloc_sqes(t, sqe, conf->nr_files);
for (i = 0; i < conf->nr_files; i++) {
io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC);
io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
@@ -191,7 +193,8 @@ static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod,
return conf->nr_files;
}
-static int stripe_queue_tgt_io(struct ublk_queue *q, int tag)
+static int stripe_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q,
+ int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
unsigned ublk_op = ublksrv_get_op(iod);
@@ -199,7 +202,7 @@ static int stripe_queue_tgt_io(struct ublk_queue *q, int tag)
switch (ublk_op) {
case UBLK_IO_OP_FLUSH:
- ret = handle_flush(q, iod, tag);
+ ret = handle_flush(t, q, iod, tag);
break;
case UBLK_IO_OP_WRITE_ZEROES:
case UBLK_IO_OP_DISCARD:
@@ -207,7 +210,7 @@ static int stripe_queue_tgt_io(struct ublk_queue *q, int tag)
break;
case UBLK_IO_OP_READ:
case UBLK_IO_OP_WRITE:
- ret = stripe_queue_tgt_rw_io(q, iod, tag);
+ ret = stripe_queue_tgt_rw_io(t, q, iod, tag);
break;
default:
ret = -EINVAL;
@@ -218,17 +221,19 @@ static int stripe_queue_tgt_io(struct ublk_queue *q, int tag)
return ret;
}
-static int ublk_stripe_queue_io(struct ublk_queue *q, int tag)
+static int ublk_stripe_queue_io(struct ublk_thread *t, struct ublk_queue *q,
+ int tag)
{
- int queued = stripe_queue_tgt_io(q, tag);
+ int queued = stripe_queue_tgt_io(t, q, tag);
- ublk_queued_tgt_io(q, tag, queued);
+ ublk_queued_tgt_io(t, q, tag, queued);
return 0;
}
-static void ublk_stripe_io_done(struct ublk_queue *q, int tag,
- const struct io_uring_cqe *cqe)
+static void ublk_stripe_io_done(struct ublk_thread *t, struct ublk_queue *q,
+ const struct io_uring_cqe *cqe)
{
+ unsigned tag = user_data_to_tag(cqe->user_data);
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
unsigned op = user_data_to_op(cqe->user_data);
struct ublk_io *io = ublk_get_io(q, tag);
@@ -257,13 +262,13 @@ static void ublk_stripe_io_done(struct ublk_queue *q, int tag,
}
}
- if (ublk_completed_tgt_io(q, tag)) {
+ if (ublk_completed_tgt_io(t, q, tag)) {
int res = io->result;
if (!res)
res = iod->nr_sectors << 9;
- ublk_complete_io(q, tag, res);
+ ublk_complete_io(t, q, tag, res);
free_stripe_array(io->private_data);
io->private_data = NULL;
diff --git a/tools/testing/selftests/ublk/utils.h b/tools/testing/selftests/ublk/utils.h
new file mode 100644
index 000000000000..36545d1567f1
--- /dev/null
+++ b/tools/testing/selftests/ublk/utils.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef KUBLK_UTILS_H
+#define KUBLK_UTILS_H
+
+#define __maybe_unused __attribute__((unused))
+
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
+#endif
+
+#ifndef container_of
+#define container_of(ptr, type, member) ({ \
+ unsigned long __mptr = (unsigned long)(ptr); \
+ ((type *)(__mptr - offsetof(type, member))); })
+#endif
+
+#define round_up(val, rnd) \
+ (((val) + ((rnd) - 1)) & ~((rnd) - 1))
+
+static inline unsigned int ilog2(unsigned int x)
+{
+ if (x == 0)
+ return 0;
+ return (sizeof(x) * 8 - 1) - __builtin_clz(x);
+}
+
+#define UBLK_DBG_DEV (1U << 0)
+#define UBLK_DBG_THREAD (1U << 1)
+#define UBLK_DBG_IO_CMD (1U << 2)
+#define UBLK_DBG_IO (1U << 3)
+#define UBLK_DBG_CTRL_CMD (1U << 4)
+#define UBLK_LOG (1U << 5)
+
+extern unsigned int ublk_dbg_mask;
+
+static inline void ublk_err(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+}
+
+static inline void ublk_log(const char *fmt, ...)
+{
+ if (ublk_dbg_mask & UBLK_LOG) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stdout, fmt, ap);
+ }
+}
+
+static inline void ublk_dbg(int level, const char *fmt, ...)
+{
+ if (level & ublk_dbg_mask) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stdout, fmt, ap);
+ }
+}
+
+#endif
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 12a0614b9fd4..918a2caa070e 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -12,7 +12,7 @@ TEST_GEN_PROGS += vdso_test_correctness
TEST_GEN_PROGS += vdso_test_getrandom
TEST_GEN_PROGS += vdso_test_chacha
-CFLAGS := -std=gnu99 -O2
+CFLAGS := -std=gnu99 -O2 -Wall -Wstrict-prototypes
ifeq ($(CONFIG_X86_32),y)
LDLIBS += -lgcc_s
diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h
index 722260f97561..5fdd0f362337 100644
--- a/tools/testing/selftests/vDSO/vdso_config.h
+++ b/tools/testing/selftests/vDSO/vdso_config.h
@@ -58,6 +58,7 @@
#define VDSO_NAMES 1
#endif
+__attribute__((unused))
static const char *versions[7] = {
"LINUX_2.6",
"LINUX_2.6.15",
@@ -68,6 +69,7 @@ static const char *versions[7] = {
"LINUX_5.10"
};
+__attribute__((unused))
static const char *names[2][7] = {
{
"__kernel_gettimeofday",
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
index 9ce795b806f0..4d3d96f1e440 100644..120000
--- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
+++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
@@ -1,58 +1 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vdso_test_gettimeofday.c: Sample code to test parse_vdso.c and
- * vDSO gettimeofday()
- * Copyright (c) 2014 Andy Lutomirski
- *
- * Compile with:
- * gcc -std=gnu99 vdso_test_gettimeofday.c parse_vdso_gettimeofday.c
- *
- * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too.
- */
-
-#include <stdio.h>
-#ifndef NOLIBC
-#include <sys/auxv.h>
-#include <sys/time.h>
-#endif
-
-#include "../kselftest.h"
-#include "parse_vdso.h"
-#include "vdso_config.h"
-#include "vdso_call.h"
-
-int main(int argc, char **argv)
-{
- const char *version = versions[VDSO_VERSION];
- const char **name = (const char **)&names[VDSO_NAMES];
-
- unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
- if (!sysinfo_ehdr) {
- printf("AT_SYSINFO_EHDR is not present!\n");
- return KSFT_SKIP;
- }
-
- vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
-
- /* Find gettimeofday. */
- typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
- gtod_t gtod = (gtod_t)vdso_sym(version, name[0]);
-
- if (!gtod) {
- printf("Could not find %s\n", name[0]);
- return KSFT_SKIP;
- }
-
- struct timeval tv;
- long ret = VDSO_CALL(gtod, 2, &tv, 0);
-
- if (ret == 0) {
- printf("The time is %lld.%06lld\n",
- (long long)tv.tv_sec, (long long)tv.tv_usec);
- } else {
- printf("%s failed\n", name[0]);
- return KSFT_FAIL;
- }
-
- return 0;
-}
+vdso_test_gettimeofday.c \ No newline at end of file
diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c
index 8757f738b0b1..0aad682b12c8 100644
--- a/tools/testing/selftests/vDSO/vdso_test_chacha.c
+++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c
@@ -76,7 +76,8 @@ static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, u
void __weak __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, const uint32_t *key, uint32_t *counter, size_t nblocks)
{
- ksft_exit_skip("Not implemented on architecture\n");
+ ksft_test_result_skip("Not implemented on architecture\n");
+ ksft_finished();
}
int main(int argc, char *argv[])
diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
index 38d46a8bf7cb..b5d5f59f725a 100644
--- a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
+++ b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
@@ -13,7 +13,6 @@
#define _GNU_SOURCE
#include <elf.h>
-#include <err.h>
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c
index 5fb97ad67eea..da651cf53c6c 100644
--- a/tools/testing/selftests/vDSO/vdso_test_correctness.c
+++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c
@@ -108,7 +108,7 @@ static void *vsyscall_getcpu(void)
}
-static void fill_function_pointers()
+static void fill_function_pointers(void)
{
void *vdso = dlopen("linux-vdso.so.1",
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c
index 95057f7567db..dd1132508a0d 100644
--- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c
+++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c
@@ -21,7 +21,6 @@
#include <sys/wait.h>
#include <sys/types.h>
#include <linux/random.h>
-#include <linux/compiler.h>
#include <linux/ptrace.h>
#include "../kselftest.h"
@@ -101,6 +100,7 @@ out:
return state;
}
+__attribute__((unused)) /* Example for libc implementors */
static void vgetrandom_put_state(void *state)
{
if (!state)
@@ -242,6 +242,7 @@ static void kselftest(void)
pid_t child;
ksft_print_header();
+ vgetrandom_init();
ksft_set_plan(2);
for (size_t i = 0; i < 1000; ++i) {
@@ -265,7 +266,7 @@ static void kselftest(void)
}
for (;;) {
struct ptrace_syscall_info info = { 0 };
- int status, ret;
+ int status;
ksft_assert(waitpid(child, &status, 0) >= 0);
if (WIFEXITED(status)) {
ksft_assert(WEXITSTATUS(status) == 0);
@@ -295,8 +296,6 @@ static void usage(const char *argv0)
int main(int argc, char *argv[])
{
- vgetrandom_init();
-
if (argc == 1) {
kselftest();
return 0;
@@ -306,6 +305,9 @@ int main(int argc, char *argv[])
usage(argv[0]);
return 1;
}
+
+ vgetrandom_init();
+
if (!strcmp(argv[1], "bench-single"))
bench_single();
else if (!strcmp(argv[1], "bench-multi"))
diff --git a/tools/testing/selftests/vsock/.gitignore b/tools/testing/selftests/vsock/.gitignore
new file mode 100644
index 000000000000..9c5bf379480f
--- /dev/null
+++ b/tools/testing/selftests/vsock/.gitignore
@@ -0,0 +1,2 @@
+vmtest.log
+vsock_test
diff --git a/tools/testing/selftests/vsock/Makefile b/tools/testing/selftests/vsock/Makefile
new file mode 100644
index 000000000000..c407c0afd938
--- /dev/null
+++ b/tools/testing/selftests/vsock/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CURDIR := $(abspath .)
+TOOLSDIR := $(abspath ../../..)
+VSOCK_TEST_DIR := $(TOOLSDIR)/testing/vsock
+VSOCK_TEST_SRCS := $(wildcard $(VSOCK_TEST_DIR)/*.c $(VSOCK_TEST_DIR)/*.h)
+
+$(OUTPUT)/vsock_test: $(VSOCK_TEST_DIR)/vsock_test
+ install -m 755 $< $@
+
+$(VSOCK_TEST_DIR)/vsock_test: $(VSOCK_TEST_SRCS)
+ $(MAKE) -C $(VSOCK_TEST_DIR) vsock_test
+TEST_PROGS += vmtest.sh
+TEST_GEN_FILES := vsock_test
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/vsock/config b/tools/testing/selftests/vsock/config
new file mode 100644
index 000000000000..5f0a4f17dfc9
--- /dev/null
+++ b/tools/testing/selftests/vsock/config
@@ -0,0 +1,111 @@
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BPF=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_BPF_JIT=y
+CONFIG_HAVE_EBPF_JIT=y
+CONFIG_BPF_EVENTS=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_DYNAMIC_FTRACE=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_KPROBES=y
+CONFIG_KPROBE_EVENTS=y
+CONFIG_ARCH_SUPPORTS_UPROBES=y
+CONFIG_UPROBES=y
+CONFIG_UPROBE_EVENTS=y
+CONFIG_DEBUG_FS=y
+CONFIG_FW_CFG_SYSFS=y
+CONFIG_FW_CFG_SYSFS_CMDLINE=y
+CONFIG_DRM=y
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_DRM_VIRTIO_GPU_KMS=y
+CONFIG_DRM_BOCHS=y
+CONFIG_VIRTIO_IOMMU=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_PCI=y
+CONFIG_SND_INTEL8X0=y
+CONFIG_SND_HDA_CODEC_REALTEK=y
+CONFIG_SECURITYFS=y
+CONFIG_CGROUP_BPF=y
+CONFIG_SQUASHFS=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_SQUASHFS_ZSTD=y
+CONFIG_FUSE_FS=y
+CONFIG_VIRTIO_FS=y
+CONFIG_SERIO=y
+CONFIG_PCI=y
+CONFIG_INPUT=y
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_X86_VERBOSE_BOOTUP=y
+CONFIG_VGA_CONSOLE=y
+CONFIG_FB=y
+CONFIG_FB_VESA=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PARAVIRT=y
+CONFIG_KVM_GUEST=y
+CONFIG_KVM=y
+CONFIG_KVM_INTEL=y
+CONFIG_KVM_AMD=y
+CONFIG_VSOCKETS=y
+CONFIG_VSOCKETS_DIAG=y
+CONFIG_VSOCKETS_LOOPBACK=y
+CONFIG_VMWARE_VMCI_VSOCKETS=y
+CONFIG_VIRTIO_VSOCKETS=y
+CONFIG_VIRTIO_VSOCKETS_COMMON=y
+CONFIG_HYPERV_VSOCKETS=y
+CONFIG_VMWARE_VMCI=y
+CONFIG_VHOST_VSOCK=y
+CONFIG_HYPERV=y
+CONFIG_UEVENT_HELPER=n
+CONFIG_VIRTIO=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_NET=y
+CONFIG_NET_CORE=y
+CONFIG_NETDEVICES=y
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_INET=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_9P_FS=y
+CONFIG_VIRTIO_NET=y
+CONFIG_CMDLINE_OVERRIDE=n
+CONFIG_BINFMT_SCRIPT=y
+CONFIG_SHMEM=y
+CONFIG_TMPFS=y
+CONFIG_UNIX=y
+CONFIG_MODULE_SIG_FORCE=n
+CONFIG_DEVTMPFS=y
+CONFIG_TTY=y
+CONFIG_VT=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_INOTIFY_USER=y
+CONFIG_BLOCK=y
+CONFIG_SCSI_LOWLEVEL=y
+CONFIG_SCSI=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_CORE=y
+CONFIG_I6300ESB_WDT=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_OVERLAY_FS=y
+CONFIG_DAX=y
+CONFIG_DAX_DRIVER=y
+CONFIG_FS_DAX=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_ZONE_DEVICE=y
diff --git a/tools/testing/selftests/vsock/settings b/tools/testing/selftests/vsock/settings
new file mode 100644
index 000000000000..694d70710ff0
--- /dev/null
+++ b/tools/testing/selftests/vsock/settings
@@ -0,0 +1 @@
+timeout=300
diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh
new file mode 100755
index 000000000000..edacebfc1632
--- /dev/null
+++ b/tools/testing/selftests/vsock/vmtest.sh
@@ -0,0 +1,487 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Meta Platforms, Inc. and affiliates
+#
+# Dependencies:
+# * virtme-ng
+# * busybox-static (used by virtme-ng)
+# * qemu (used by virtme-ng)
+
+readonly SCRIPT_DIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+readonly KERNEL_CHECKOUT=$(realpath "${SCRIPT_DIR}"/../../../../)
+
+source "${SCRIPT_DIR}"/../kselftest/ktap_helpers.sh
+
+readonly VSOCK_TEST="${SCRIPT_DIR}"/vsock_test
+readonly TEST_GUEST_PORT=51000
+readonly TEST_HOST_PORT=50000
+readonly TEST_HOST_PORT_LISTENER=50001
+readonly SSH_GUEST_PORT=22
+readonly SSH_HOST_PORT=2222
+readonly VSOCK_CID=1234
+readonly WAIT_PERIOD=3
+readonly WAIT_PERIOD_MAX=60
+readonly WAIT_TOTAL=$(( WAIT_PERIOD * WAIT_PERIOD_MAX ))
+readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_vsock_vmtest_XXXX.pid)
+
+# virtme-ng offers a netdev for ssh when using "--ssh", but we also need a
+# control port forwarded for vsock_test. Because virtme-ng doesn't support
+# adding an additional port to forward to the device created from "--ssh" and
+# virtme-init mistakenly sets identical IPs to the ssh device and additional
+# devices, we instead opt out of using --ssh, add the device manually, and also
+# add the kernel cmdline options that virtme-init uses to setup the interface.
+readonly QEMU_TEST_PORT_FWD="hostfwd=tcp::${TEST_HOST_PORT}-:${TEST_GUEST_PORT}"
+readonly QEMU_SSH_PORT_FWD="hostfwd=tcp::${SSH_HOST_PORT}-:${SSH_GUEST_PORT}"
+readonly QEMU_OPTS="\
+ -netdev user,id=n0,${QEMU_TEST_PORT_FWD},${QEMU_SSH_PORT_FWD} \
+ -device virtio-net-pci,netdev=n0 \
+ -device vhost-vsock-pci,guest-cid=${VSOCK_CID} \
+ --pidfile ${QEMU_PIDFILE} \
+"
+readonly KERNEL_CMDLINE="\
+ virtme.dhcp net.ifnames=0 biosdevname=0 \
+ virtme.ssh virtme_ssh_channel=tcp virtme_ssh_user=$USER \
+"
+readonly LOG=$(mktemp /tmp/vsock_vmtest_XXXX.log)
+readonly TEST_NAMES=(vm_server_host_client vm_client_host_server vm_loopback)
+readonly TEST_DESCS=(
+ "Run vsock_test in server mode on the VM and in client mode on the host."
+ "Run vsock_test in client mode on the VM and in server mode on the host."
+ "Run vsock_test using the loopback transport in the VM."
+)
+
+VERBOSE=0
+
+usage() {
+ local name
+ local desc
+ local i
+
+ echo
+ echo "$0 [OPTIONS] [TEST]..."
+ echo "If no TEST argument is given, all tests will be run."
+ echo
+ echo "Options"
+ echo " -b: build the kernel from the current source tree and use it for guest VMs"
+ echo " -q: set the path to or name of qemu binary"
+ echo " -v: verbose output"
+ echo
+ echo "Available tests"
+
+ for ((i = 0; i < ${#TEST_NAMES[@]}; i++)); do
+ name=${TEST_NAMES[${i}]}
+ desc=${TEST_DESCS[${i}]}
+ printf "\t%-35s%-35s\n" "${name}" "${desc}"
+ done
+ echo
+
+ exit 1
+}
+
+die() {
+ echo "$*" >&2
+ exit "${KSFT_FAIL}"
+}
+
+vm_ssh() {
+ ssh -q -o UserKnownHostsFile=/dev/null -p ${SSH_HOST_PORT} localhost "$@"
+ return $?
+}
+
+cleanup() {
+ if [[ -s "${QEMU_PIDFILE}" ]]; then
+ pkill -SIGTERM -F "${QEMU_PIDFILE}" > /dev/null 2>&1
+ fi
+
+ # If failure occurred during or before qemu start up, then we need
+ # to clean this up ourselves.
+ if [[ -e "${QEMU_PIDFILE}" ]]; then
+ rm "${QEMU_PIDFILE}"
+ fi
+}
+
+check_args() {
+ local found
+
+ for arg in "$@"; do
+ found=0
+ for name in "${TEST_NAMES[@]}"; do
+ if [[ "${name}" = "${arg}" ]]; then
+ found=1
+ break
+ fi
+ done
+
+ if [[ "${found}" -eq 0 ]]; then
+ echo "${arg} is not an available test" >&2
+ usage
+ fi
+ done
+
+ for arg in "$@"; do
+ if ! command -v > /dev/null "test_${arg}"; then
+ echo "Test ${arg} not found" >&2
+ usage
+ fi
+ done
+}
+
+check_deps() {
+ for dep in vng ${QEMU} busybox pkill ssh; do
+ if [[ ! -x $(command -v "${dep}") ]]; then
+ echo -e "skip: dependency ${dep} not found!\n"
+ exit "${KSFT_SKIP}"
+ fi
+ done
+
+ if [[ ! -x $(command -v "${VSOCK_TEST}") ]]; then
+ printf "skip: %s not found!" "${VSOCK_TEST}"
+ printf " Please build the kselftest vsock target.\n"
+ exit "${KSFT_SKIP}"
+ fi
+}
+
+check_vng() {
+ local tested_versions
+ local version
+ local ok
+
+ tested_versions=("1.33" "1.36")
+ version="$(vng --version)"
+
+ ok=0
+ for tv in "${tested_versions[@]}"; do
+ if [[ "${version}" == *"${tv}"* ]]; then
+ ok=1
+ break
+ fi
+ done
+
+ if [[ ! "${ok}" -eq 1 ]]; then
+ printf "warning: vng version '%s' has not been tested and may " "${version}" >&2
+ printf "not function properly.\n\tThe following versions have been tested: " >&2
+ echo "${tested_versions[@]}" >&2
+ fi
+}
+
+handle_build() {
+ if [[ ! "${BUILD}" -eq 1 ]]; then
+ return
+ fi
+
+ if [[ ! -d "${KERNEL_CHECKOUT}" ]]; then
+ echo "-b requires vmtest.sh called from the kernel source tree" >&2
+ exit 1
+ fi
+
+ pushd "${KERNEL_CHECKOUT}" &>/dev/null
+
+ if ! vng --kconfig --config "${SCRIPT_DIR}"/config; then
+ die "failed to generate .config for kernel source tree (${KERNEL_CHECKOUT})"
+ fi
+
+ if ! make -j$(nproc); then
+ die "failed to build kernel from source tree (${KERNEL_CHECKOUT})"
+ fi
+
+ popd &>/dev/null
+}
+
+vm_start() {
+ local logfile=/dev/null
+ local verbose_opt=""
+ local kernel_opt=""
+ local qemu
+
+ qemu=$(command -v "${QEMU}")
+
+ if [[ "${VERBOSE}" -eq 1 ]]; then
+ verbose_opt="--verbose"
+ logfile=/dev/stdout
+ fi
+
+ if [[ "${BUILD}" -eq 1 ]]; then
+ kernel_opt="${KERNEL_CHECKOUT}"
+ fi
+
+ vng \
+ --run \
+ ${kernel_opt} \
+ ${verbose_opt} \
+ --qemu-opts="${QEMU_OPTS}" \
+ --qemu="${qemu}" \
+ --user root \
+ --append "${KERNEL_CMDLINE}" \
+ --rw &> ${logfile} &
+
+ if ! timeout ${WAIT_TOTAL} \
+ bash -c 'while [[ ! -s '"${QEMU_PIDFILE}"' ]]; do sleep 1; done; exit 0'; then
+ die "failed to boot VM"
+ fi
+}
+
+vm_wait_for_ssh() {
+ local i
+
+ i=0
+ while true; do
+ if [[ ${i} -gt ${WAIT_PERIOD_MAX} ]]; then
+ die "Timed out waiting for guest ssh"
+ fi
+ if vm_ssh -- true; then
+ break
+ fi
+ i=$(( i + 1 ))
+ sleep ${WAIT_PERIOD}
+ done
+}
+
+# derived from selftests/net/net_helper.sh
+wait_for_listener()
+{
+ local port=$1
+ local interval=$2
+ local max_intervals=$3
+ local protocol=tcp
+ local pattern
+ local i
+
+ pattern=":$(printf "%04X" "${port}") "
+
+ # for tcp protocol additionally check the socket state
+ [ "${protocol}" = "tcp" ] && pattern="${pattern}0A"
+ for i in $(seq "${max_intervals}"); do
+ if awk '{print $2" "$4}' /proc/net/"${protocol}"* | \
+ grep -q "${pattern}"; then
+ break
+ fi
+ sleep "${interval}"
+ done
+}
+
+vm_wait_for_listener() {
+ local port=$1
+
+ vm_ssh <<EOF
+$(declare -f wait_for_listener)
+wait_for_listener ${port} ${WAIT_PERIOD} ${WAIT_PERIOD_MAX}
+EOF
+}
+
+host_wait_for_listener() {
+ wait_for_listener "${TEST_HOST_PORT_LISTENER}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}"
+}
+
+__log_stdin() {
+ cat | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }'
+}
+
+__log_args() {
+ echo "$*" | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }'
+}
+
+log() {
+ local prefix="$1"
+
+ shift
+ local redirect=
+ if [[ ${VERBOSE} -eq 0 ]]; then
+ redirect=/dev/null
+ else
+ redirect=/dev/stdout
+ fi
+
+ if [[ "$#" -eq 0 ]]; then
+ __log_stdin | tee -a "${LOG}" > ${redirect}
+ else
+ __log_args "$@" | tee -a "${LOG}" > ${redirect}
+ fi
+}
+
+log_setup() {
+ log "setup" "$@"
+}
+
+log_host() {
+ local testname=$1
+
+ shift
+ log "test:${testname}:host" "$@"
+}
+
+log_guest() {
+ local testname=$1
+
+ shift
+ log "test:${testname}:guest" "$@"
+}
+
+test_vm_server_host_client() {
+ local testname="${FUNCNAME[0]#test_}"
+
+ vm_ssh -- "${VSOCK_TEST}" \
+ --mode=server \
+ --control-port="${TEST_GUEST_PORT}" \
+ --peer-cid=2 \
+ 2>&1 | log_guest "${testname}" &
+
+ vm_wait_for_listener "${TEST_GUEST_PORT}"
+
+ ${VSOCK_TEST} \
+ --mode=client \
+ --control-host=127.0.0.1 \
+ --peer-cid="${VSOCK_CID}" \
+ --control-port="${TEST_HOST_PORT}" 2>&1 | log_host "${testname}"
+
+ return $?
+}
+
+test_vm_client_host_server() {
+ local testname="${FUNCNAME[0]#test_}"
+
+ ${VSOCK_TEST} \
+ --mode "server" \
+ --control-port "${TEST_HOST_PORT_LISTENER}" \
+ --peer-cid "${VSOCK_CID}" 2>&1 | log_host "${testname}" &
+
+ host_wait_for_listener
+
+ vm_ssh -- "${VSOCK_TEST}" \
+ --mode=client \
+ --control-host=10.0.2.2 \
+ --peer-cid=2 \
+ --control-port="${TEST_HOST_PORT_LISTENER}" 2>&1 | log_guest "${testname}"
+
+ return $?
+}
+
+test_vm_loopback() {
+ local testname="${FUNCNAME[0]#test_}"
+ local port=60000 # non-forwarded local port
+
+ vm_ssh -- "${VSOCK_TEST}" \
+ --mode=server \
+ --control-port="${port}" \
+ --peer-cid=1 2>&1 | log_guest "${testname}" &
+
+ vm_wait_for_listener "${port}"
+
+ vm_ssh -- "${VSOCK_TEST}" \
+ --mode=client \
+ --control-host="127.0.0.1" \
+ --control-port="${port}" \
+ --peer-cid=1 2>&1 | log_guest "${testname}"
+
+ return $?
+}
+
+run_test() {
+ local host_oops_cnt_before
+ local host_warn_cnt_before
+ local vm_oops_cnt_before
+ local vm_warn_cnt_before
+ local host_oops_cnt_after
+ local host_warn_cnt_after
+ local vm_oops_cnt_after
+ local vm_warn_cnt_after
+ local name
+ local rc
+
+ host_oops_cnt_before=$(dmesg | grep -c -i 'Oops')
+ host_warn_cnt_before=$(dmesg --level=warn | wc -l)
+ vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops')
+ vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | wc -l)
+
+ name=$(echo "${1}" | awk '{ print $1 }')
+ eval test_"${name}"
+ rc=$?
+
+ host_oops_cnt_after=$(dmesg | grep -i 'Oops' | wc -l)
+ if [[ ${host_oops_cnt_after} -gt ${host_oops_cnt_before} ]]; then
+ echo "FAIL: kernel oops detected on host" | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ host_warn_cnt_after=$(dmesg --level=warn | wc -l)
+ if [[ ${host_warn_cnt_after} -gt ${host_warn_cnt_before} ]]; then
+ echo "FAIL: kernel warning detected on host" | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l)
+ if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then
+ echo "FAIL: kernel oops detected on vm" | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | wc -l)
+ if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then
+ echo "FAIL: kernel warning detected on vm" | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ return "${rc}"
+}
+
+QEMU="qemu-system-$(uname -m)"
+
+while getopts :hvsq:b o
+do
+ case $o in
+ v) VERBOSE=1;;
+ b) BUILD=1;;
+ q) QEMU=$OPTARG;;
+ h|*) usage;;
+ esac
+done
+shift $((OPTIND-1))
+
+trap cleanup EXIT
+
+if [[ ${#} -eq 0 ]]; then
+ ARGS=("${TEST_NAMES[@]}")
+else
+ ARGS=("$@")
+fi
+
+check_args "${ARGS[@]}"
+check_deps
+check_vng
+handle_build
+
+echo "1..${#ARGS[@]}"
+
+log_setup "Booting up VM"
+vm_start
+vm_wait_for_ssh
+log_setup "VM booted up"
+
+cnt_pass=0
+cnt_fail=0
+cnt_skip=0
+cnt_total=0
+for arg in "${ARGS[@]}"; do
+ run_test "${arg}"
+ rc=$?
+ if [[ ${rc} -eq $KSFT_PASS ]]; then
+ cnt_pass=$(( cnt_pass + 1 ))
+ echo "ok ${cnt_total} ${arg}"
+ elif [[ ${rc} -eq $KSFT_SKIP ]]; then
+ cnt_skip=$(( cnt_skip + 1 ))
+ echo "ok ${cnt_total} ${arg} # SKIP"
+ elif [[ ${rc} -eq $KSFT_FAIL ]]; then
+ cnt_fail=$(( cnt_fail + 1 ))
+ echo "not ok ${cnt_total} ${arg} # exit=$rc"
+ fi
+ cnt_total=$(( cnt_total + 1 ))
+done
+
+echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}"
+echo "Log: ${LOG}"
+
+if [ $((cnt_pass + cnt_skip)) -eq ${cnt_total} ]; then
+ exit "$KSFT_PASS"
+else
+ exit "$KSFT_FAIL"
+fi
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index f314d3789f17..0a5381717e9f 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -16,9 +16,13 @@ CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=y
CONFIG_NF_NAT=y
CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_NAT=y
CONFIG_NETFILTER_XT_MATCH_LENGTH=y
CONFIG_NETFILTER_XT_MARK=y
+CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_MANGLE=y
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 14718ca23a05..816e7e057585 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -1442,8 +1442,29 @@ static inline void free_anon_vma_name(struct vm_area_struct *vma)
(void)vma;
}
+/* Declared in vma.h. */
+static inline void set_vma_from_desc(struct vm_area_struct *vma,
+ struct vm_area_desc *desc);
+
+static inline struct vm_area_desc *vma_to_desc(struct vm_area_struct *vma,
+ struct vm_area_desc *desc);
+
+static int compat_vma_mmap_prepare(struct file *file,
+ struct vm_area_struct *vma)
+{
+ struct vm_area_desc desc;
+ int err;
+
+ err = file->f_op->mmap_prepare(vma_to_desc(vma, &desc));
+ if (err)
+ return err;
+ set_vma_from_desc(vma, &desc);
+
+ return 0;
+}
+
/* Did the driver provide valid mmap hook configuration? */
-static inline bool file_has_valid_mmap_hooks(struct file *file)
+static inline bool can_mmap_file(struct file *file)
{
bool has_mmap = file->f_op->mmap;
bool has_mmap_prepare = file->f_op->mmap_prepare;
@@ -1451,22 +1472,21 @@ static inline bool file_has_valid_mmap_hooks(struct file *file)
/* Hooks are mutually exclusive. */
if (WARN_ON_ONCE(has_mmap && has_mmap_prepare))
return false;
- if (WARN_ON_ONCE(!has_mmap && !has_mmap_prepare))
+ if (!has_mmap && !has_mmap_prepare)
return false;
return true;
}
-static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
{
- if (WARN_ON_ONCE(file->f_op->mmap_prepare))
- return -EINVAL;
+ if (file->f_op->mmap_prepare)
+ return compat_vma_mmap_prepare(file, vma);
return file->f_op->mmap(file, vma);
}
-static inline int __call_mmap_prepare(struct file *file,
- struct vm_area_desc *desc)
+static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
{
return file->f_op->mmap_prepare(desc);
}
diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile
index 6e0b4e95e230..88211fd132d2 100644
--- a/tools/testing/vsock/Makefile
+++ b/tools/testing/vsock/Makefile
@@ -5,6 +5,7 @@ vsock_test: vsock_test.o vsock_test_zerocopy.o timeout.o control.o util.o msg_ze
vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o
vsock_perf: vsock_perf.o msg_zerocopy_common.o
+vsock_test: LDLIBS = -lpthread
vsock_uring_test: LDLIBS = -luring
vsock_uring_test: control.o util.o vsock_uring_test.o timeout.o msg_zerocopy_common.o
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 0c7e9cbcbc85..7b861a8e997a 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -7,6 +7,7 @@
* Author: Stefan Hajnoczi <[email protected]>
*/
+#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdint.h>
@@ -16,6 +17,7 @@
#include <unistd.h>
#include <assert.h>
#include <sys/epoll.h>
+#include <sys/ioctl.h>
#include <sys/mman.h>
#include <linux/sockios.h>
@@ -23,6 +25,9 @@
#include "control.h"
#include "util.h"
+#define KALLSYMS_PATH "/proc/kallsyms"
+#define KALLSYMS_LINE_LEN 512
+
/* Install signal handlers */
void init_signals(void)
{
@@ -97,39 +102,52 @@ void vsock_wait_remote_close(int fd)
close(epollfd);
}
-/* Wait until transport reports no data left to be sent.
- * Return false if transport does not implement the unsent_bytes() callback.
+/* Wait until ioctl gives an expected int value.
+ * Return false if the op is not supported.
*/
-bool vsock_wait_sent(int fd)
+bool vsock_ioctl_int(int fd, unsigned long op, int expected)
{
- int ret, sock_bytes_unsent;
+ int actual, ret;
+ char name[32];
+
+ snprintf(name, sizeof(name), "ioctl(%lu)", op);
timeout_begin(TIMEOUT);
do {
- ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
+ ret = ioctl(fd, op, &actual);
if (ret < 0) {
- if (errno == EOPNOTSUPP)
+ if (errno == EOPNOTSUPP || errno == ENOTTY)
break;
- perror("ioctl(SIOCOUTQ)");
+ perror(name);
exit(EXIT_FAILURE);
}
- timeout_check("SIOCOUTQ");
- } while (sock_bytes_unsent != 0);
+ timeout_check(name);
+ } while (actual != expected);
timeout_end();
- return !ret;
+ return ret >= 0;
}
-/* Create socket <type>, bind to <cid, port> and return the file descriptor. */
-int vsock_bind(unsigned int cid, unsigned int port, int type)
+/* Wait until transport reports no data left to be sent.
+ * Return false if transport does not implement the unsent_bytes() callback.
+ */
+bool vsock_wait_sent(int fd)
+{
+ return vsock_ioctl_int(fd, SIOCOUTQ, 0);
+}
+
+/* Create socket <type>, bind to <cid, port>.
+ * Return the file descriptor, or -1 on error.
+ */
+int vsock_bind_try(unsigned int cid, unsigned int port, int type)
{
struct sockaddr_vm sa = {
.svm_family = AF_VSOCK,
.svm_cid = cid,
.svm_port = port,
};
- int fd;
+ int fd, saved_errno;
fd = socket(AF_VSOCK, type, 0);
if (fd < 0) {
@@ -138,6 +156,22 @@ int vsock_bind(unsigned int cid, unsigned int port, int type)
}
if (bind(fd, (struct sockaddr *)&sa, sizeof(sa))) {
+ saved_errno = errno;
+ close(fd);
+ errno = saved_errno;
+ fd = -1;
+ }
+
+ return fd;
+}
+
+/* Create socket <type>, bind to <cid, port> and return the file descriptor. */
+int vsock_bind(unsigned int cid, unsigned int port, int type)
+{
+ int fd;
+
+ fd = vsock_bind_try(cid, port, type);
+ if (fd < 0) {
perror("bind");
exit(EXIT_FAILURE);
}
@@ -836,3 +870,55 @@ void enable_so_linger(int fd, int timeout)
exit(EXIT_FAILURE);
}
}
+
+static int __get_transports(void)
+{
+ char buf[KALLSYMS_LINE_LEN];
+ const char *ksym;
+ int ret = 0;
+ FILE *f;
+
+ f = fopen(KALLSYMS_PATH, "r");
+ if (!f) {
+ perror("Can't open " KALLSYMS_PATH);
+ exit(EXIT_FAILURE);
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ char *match;
+ int i;
+
+ assert(buf[strlen(buf) - 1] == '\n');
+
+ for (i = 0; i < TRANSPORT_NUM; ++i) {
+ if (ret & BIT(i))
+ continue;
+
+ /* Match should be followed by '\t' or '\n'.
+ * See kallsyms.c:s_show().
+ */
+ ksym = transport_ksyms[i];
+ match = strstr(buf, ksym);
+ if (match && isspace(match[strlen(ksym)])) {
+ ret |= BIT(i);
+ break;
+ }
+ }
+ }
+
+ fclose(f);
+ return ret;
+}
+
+/* Return integer with TRANSPORT_* bit set for every (known) registered vsock
+ * transport.
+ */
+int get_transports(void)
+{
+ static int tr = -1;
+
+ if (tr == -1)
+ tr = __get_transports();
+
+ return tr;
+}
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index 5e2db67072d5..142c02a6834a 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -3,8 +3,40 @@
#define UTIL_H
#include <sys/socket.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
#include <linux/vm_sockets.h>
+/* All known vsock transports, see callers of vsock_core_register() */
+#define KNOWN_TRANSPORTS(x) \
+ x(LOOPBACK, "loopback") \
+ x(VIRTIO, "virtio") \
+ x(VHOST, "vhost") \
+ x(VMCI, "vmci") \
+ x(HYPERV, "hvs")
+
+enum transport {
+ TRANSPORT_COUNTER_BASE = __COUNTER__ + 1,
+ #define x(name, symbol) \
+ TRANSPORT_##name = BIT(__COUNTER__ - TRANSPORT_COUNTER_BASE),
+ KNOWN_TRANSPORTS(x)
+ TRANSPORT_NUM = __COUNTER__ - TRANSPORT_COUNTER_BASE,
+ #undef x
+};
+
+static const char * const transport_ksyms[] = {
+ #define x(name, symbol) "d " symbol "_transport",
+ KNOWN_TRANSPORTS(x)
+ #undef x
+};
+
+static_assert(ARRAY_SIZE(transport_ksyms) == TRANSPORT_NUM);
+static_assert(BITS_PER_TYPE(int) >= TRANSPORT_NUM);
+
+#define TRANSPORTS_G2H (TRANSPORT_VIRTIO | TRANSPORT_VMCI | TRANSPORT_HYPERV)
+#define TRANSPORTS_H2G (TRANSPORT_VHOST | TRANSPORT_VMCI)
+#define TRANSPORTS_LOCAL (TRANSPORT_LOOPBACK)
+
/* Tests can either run as the client or the server */
enum test_mode {
TEST_MODE_UNSET,
@@ -44,6 +76,7 @@ int vsock_connect(unsigned int cid, unsigned int port, int type);
int vsock_accept(unsigned int cid, unsigned int port,
struct sockaddr_vm *clientaddrp, int type);
int vsock_stream_connect(unsigned int cid, unsigned int port);
+int vsock_bind_try(unsigned int cid, unsigned int port, int type);
int vsock_bind(unsigned int cid, unsigned int port, int type);
int vsock_bind_connect(unsigned int cid, unsigned int port,
unsigned int bind_port, int type);
@@ -54,6 +87,7 @@ int vsock_stream_listen(unsigned int cid, unsigned int port);
int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
struct sockaddr_vm *clientaddrp);
void vsock_wait_remote_close(int fd);
+bool vsock_ioctl_int(int fd, unsigned long op, int expected);
bool vsock_wait_sent(int fd);
void send_buf(int fd, const void *buf, size_t len, int flags,
ssize_t expected_ret);
@@ -81,4 +115,5 @@ void setsockopt_timeval_check(int fd, int level, int optname,
struct timeval val, char const *errmsg);
void enable_so_zerocopy_check(int fd);
void enable_so_linger(int fd, int timeout);
+int get_transports(void);
#endif /* UTIL_H */
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index f669baaa0dca..d4517386e551 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -22,6 +22,9 @@
#include <signal.h>
#include <sys/ioctl.h>
#include <linux/time64.h>
+#include <pthread.h>
+#include <fcntl.h>
+#include <linux/sockios.h>
#include "vsock_test_zerocopy.h"
#include "timeout.h"
@@ -1305,6 +1308,54 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type)
close(fd);
}
+static void test_unread_bytes_server(const struct test_opts *opts, int type)
+{
+ unsigned char buf[MSG_BUF_IOCTL_LEN];
+ int client_fd;
+
+ client_fd = vsock_accept(VMADDR_CID_ANY, opts->peer_port, NULL, type);
+ if (client_fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ for (int i = 0; i < sizeof(buf); i++)
+ buf[i] = rand() & 0xFF;
+
+ send_buf(client_fd, buf, sizeof(buf), 0, sizeof(buf));
+ control_writeln("SENT");
+
+ close(client_fd);
+}
+
+static void test_unread_bytes_client(const struct test_opts *opts, int type)
+{
+ unsigned char buf[MSG_BUF_IOCTL_LEN];
+ int fd;
+
+ fd = vsock_connect(opts->peer_cid, opts->peer_port, type);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ control_expectln("SENT");
+ /* The data has arrived but has not been read. The expected is
+ * MSG_BUF_IOCTL_LEN.
+ */
+ if (!vsock_ioctl_int(fd, SIOCINQ, MSG_BUF_IOCTL_LEN)) {
+ fprintf(stderr, "Test skipped, SIOCINQ not supported.\n");
+ goto out;
+ }
+
+ recv_buf(fd, buf, sizeof(buf), 0, sizeof(buf));
+ /* All data has been consumed, so the expected is 0. */
+ vsock_ioctl_int(fd, SIOCINQ, 0);
+
+out:
+ close(fd);
+}
+
static void test_stream_unsent_bytes_client(const struct test_opts *opts)
{
test_unsent_bytes_client(opts, SOCK_STREAM);
@@ -1325,6 +1376,26 @@ static void test_seqpacket_unsent_bytes_server(const struct test_opts *opts)
test_unsent_bytes_server(opts, SOCK_SEQPACKET);
}
+static void test_stream_unread_bytes_client(const struct test_opts *opts)
+{
+ test_unread_bytes_client(opts, SOCK_STREAM);
+}
+
+static void test_stream_unread_bytes_server(const struct test_opts *opts)
+{
+ test_unread_bytes_server(opts, SOCK_STREAM);
+}
+
+static void test_seqpacket_unread_bytes_client(const struct test_opts *opts)
+{
+ test_unread_bytes_client(opts, SOCK_SEQPACKET);
+}
+
+static void test_seqpacket_unread_bytes_server(const struct test_opts *opts)
+{
+ test_unread_bytes_server(opts, SOCK_SEQPACKET);
+}
+
#define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128)
/* This define is the same as in 'include/linux/virtio_vsock.h':
* it is used to decide when to send credit update message during
@@ -1718,16 +1789,27 @@ static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts)
#define MAX_PORT_RETRIES 24 /* net/vmw_vsock/af_vsock.c */
-/* Test attempts to trigger a transport release for an unbound socket. This can
- * lead to a reference count mishandling.
- */
-static void test_stream_transport_uaf_client(const struct test_opts *opts)
+static bool test_stream_transport_uaf(int cid)
{
int sockets[MAX_PORT_RETRIES];
struct sockaddr_vm addr;
- int fd, i, alen;
+ socklen_t alen;
+ int fd, i, c;
+ bool ret;
- fd = vsock_bind(VMADDR_CID_ANY, VMADDR_PORT_ANY, SOCK_STREAM);
+ /* Probe for a transport by attempting a local CID bind. Unavailable
+ * transport (or more specifically: an unsupported transport/CID
+ * combination) results in EADDRNOTAVAIL, other errnos are fatal.
+ */
+ fd = vsock_bind_try(cid, VMADDR_PORT_ANY, SOCK_STREAM);
+ if (fd < 0) {
+ if (errno != EADDRNOTAVAIL) {
+ perror("Unexpected bind() errno");
+ exit(EXIT_FAILURE);
+ }
+
+ return false;
+ }
alen = sizeof(addr);
if (getsockname(fd, (struct sockaddr *)&addr, &alen)) {
@@ -1735,38 +1817,83 @@ static void test_stream_transport_uaf_client(const struct test_opts *opts)
exit(EXIT_FAILURE);
}
+ /* Drain the autobind pool; see __vsock_bind_connectible(). */
for (i = 0; i < MAX_PORT_RETRIES; ++i)
- sockets[i] = vsock_bind(VMADDR_CID_ANY, ++addr.svm_port,
- SOCK_STREAM);
+ sockets[i] = vsock_bind(cid, ++addr.svm_port, SOCK_STREAM);
close(fd);
- fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ /* Setting SOCK_NONBLOCK makes connect() return soon after
+ * (re-)assigning the transport. We are not connecting to anything
+ * anyway, so there is no point entering the main loop in
+ * vsock_connect(); waiting for timeout, checking for signals, etc.
+ */
+ fd = socket(AF_VSOCK, SOCK_STREAM | SOCK_NONBLOCK, 0);
if (fd < 0) {
perror("socket");
exit(EXIT_FAILURE);
}
- if (!vsock_connect_fd(fd, addr.svm_cid, addr.svm_port)) {
- perror("Unexpected connect() #1 success");
+ /* Assign transport, while failing to autobind. Autobind pool was
+ * drained, so EADDRNOTAVAIL coming from __vsock_bind_connectible() is
+ * expected.
+ *
+ * One exception is ENODEV which is thrown by vsock_assign_transport(),
+ * i.e. before vsock_auto_bind(), when the only transport loaded is
+ * vhost.
+ */
+ if (!connect(fd, (struct sockaddr *)&addr, alen)) {
+ fprintf(stderr, "Unexpected connect() success\n");
exit(EXIT_FAILURE);
}
-
- /* Vulnerable system may crash now. */
- if (!vsock_connect_fd(fd, VMADDR_CID_HOST, VMADDR_PORT_ANY)) {
- perror("Unexpected connect() #2 success");
+ if (errno == ENODEV && cid == VMADDR_CID_HOST) {
+ ret = false;
+ goto cleanup;
+ }
+ if (errno != EADDRNOTAVAIL) {
+ perror("Unexpected connect() errno");
exit(EXIT_FAILURE);
}
+ /* Reassign transport, triggering old transport release and
+ * (potentially) unbinding of an unbound socket.
+ *
+ * Vulnerable system may crash now.
+ */
+ for (c = VMADDR_CID_HYPERVISOR; c <= VMADDR_CID_HOST + 1; ++c) {
+ if (c != cid) {
+ addr.svm_cid = c;
+ (void)connect(fd, (struct sockaddr *)&addr, alen);
+ }
+ }
+
+ ret = true;
+cleanup:
close(fd);
while (i--)
close(sockets[i]);
- control_writeln("DONE");
+ return ret;
}
-static void test_stream_transport_uaf_server(const struct test_opts *opts)
+/* Test attempts to trigger a transport release for an unbound socket. This can
+ * lead to a reference count mishandling.
+ */
+static void test_stream_transport_uaf_client(const struct test_opts *opts)
{
- control_expectln("DONE");
+ bool tested = false;
+ int cid, tr;
+
+ for (cid = VMADDR_CID_HYPERVISOR; cid <= VMADDR_CID_HOST + 1; ++cid)
+ tested |= test_stream_transport_uaf(cid);
+
+ tr = get_transports();
+ if (!tr)
+ fprintf(stderr, "No transports detected\n");
+ else if (tr == TRANSPORT_VIRTIO)
+ fprintf(stderr, "Setup unsupported: sole virtio transport\n");
+ else if (!tested)
+ fprintf(stderr, "No transports tested\n");
}
static void test_stream_connect_retry_client(const struct test_opts *opts)
@@ -1811,6 +1938,180 @@ static void test_stream_connect_retry_server(const struct test_opts *opts)
close(fd);
}
+#define TRANSPORT_CHANGE_TIMEOUT 2 /* seconds */
+
+static void *test_stream_transport_change_thread(void *vargp)
+{
+ pid_t *pid = (pid_t *)vargp;
+ int ret;
+
+ ret = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
+ if (ret) {
+ fprintf(stderr, "pthread_setcanceltype: %d\n", ret);
+ exit(EXIT_FAILURE);
+ }
+
+ while (true) {
+ if (kill(*pid, SIGUSR1) < 0) {
+ perror("kill");
+ exit(EXIT_FAILURE);
+ }
+ }
+ return NULL;
+}
+
+static void test_transport_change_signal_handler(int signal)
+{
+ /* We need a custom handler for SIGUSR1 as the default one terminates the process. */
+}
+
+static void test_stream_transport_change_client(const struct test_opts *opts)
+{
+ __sighandler_t old_handler;
+ pid_t pid = getpid();
+ pthread_t thread_id;
+ time_t tout;
+ int ret, tr;
+
+ tr = get_transports();
+
+ /* Print a warning if there is a G2H transport loaded.
+ * This is on a best effort basis because VMCI can be either G2H and H2G, and there is
+ * no easy way to understand it.
+ * The bug we are testing only appears when G2H transports are not loaded.
+ * This is because `vsock_assign_transport`, when using CID 0, assigns a G2H transport
+ * to vsk->transport. If none is available it is set to NULL, causing the null-ptr-deref.
+ */
+ if (tr & TRANSPORTS_G2H)
+ fprintf(stderr, "G2H Transport detected. This test will not fail.\n");
+
+ old_handler = signal(SIGUSR1, test_transport_change_signal_handler);
+ if (old_handler == SIG_ERR) {
+ perror("signal");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = pthread_create(&thread_id, NULL, test_stream_transport_change_thread, &pid);
+ if (ret) {
+ fprintf(stderr, "pthread_create: %d\n", ret);
+ exit(EXIT_FAILURE);
+ }
+
+ control_expectln("LISTENING");
+
+ tout = current_nsec() + TRANSPORT_CHANGE_TIMEOUT * NSEC_PER_SEC;
+ do {
+ struct sockaddr_vm sa = {
+ .svm_family = AF_VSOCK,
+ .svm_cid = opts->peer_cid,
+ .svm_port = opts->peer_port,
+ };
+ bool send_control = false;
+ int s;
+
+ s = socket(AF_VSOCK, SOCK_STREAM, 0);
+ if (s < 0) {
+ perror("socket");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = connect(s, (struct sockaddr *)&sa, sizeof(sa));
+ /* The connect can fail due to signals coming from the thread,
+ * or because the receiver connection queue is full.
+ * Ignoring also the latter case because there is no way
+ * of synchronizing client's connect and server's accept when
+ * connect(s) are constantly being interrupted by signals.
+ */
+ if (ret == -1 && (errno != EINTR && errno != ECONNRESET)) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Notify the server if the connect() is successful or the
+ * receiver connection queue is full, so it will do accept()
+ * to drain it.
+ */
+ if (!ret || errno == ECONNRESET)
+ send_control = true;
+
+ /* Set CID to 0 cause a transport change. */
+ sa.svm_cid = 0;
+
+ /* There is a case where this will not fail:
+ * if the previous connect() is interrupted while the
+ * connection request is already sent, this second
+ * connect() will wait for the response.
+ */
+ ret = connect(s, (struct sockaddr *)&sa, sizeof(sa));
+ if (!ret || errno == ECONNRESET)
+ send_control = true;
+
+ close(s);
+
+ if (send_control)
+ control_writeulong(CONTROL_CONTINUE);
+
+ } while (current_nsec() < tout);
+
+ control_writeulong(CONTROL_DONE);
+
+ ret = pthread_cancel(thread_id);
+ if (ret) {
+ fprintf(stderr, "pthread_cancel: %d\n", ret);
+ exit(EXIT_FAILURE);
+ }
+
+ ret = pthread_join(thread_id, NULL);
+ if (ret) {
+ fprintf(stderr, "pthread_join: %d\n", ret);
+ exit(EXIT_FAILURE);
+ }
+
+ if (signal(SIGUSR1, old_handler) == SIG_ERR) {
+ perror("signal");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void test_stream_transport_change_server(const struct test_opts *opts)
+{
+ int s = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port);
+
+ /* Set the socket to be nonblocking because connects that have been interrupted
+ * (EINTR) can fill the receiver's accept queue anyway, leading to connect failure.
+ * As of today (6.15) in such situation there is no way to understand, from the
+ * client side, if the connection has been queued in the server or not.
+ */
+ if (fcntl(s, F_SETFL, fcntl(s, F_GETFL, 0) | O_NONBLOCK) < 0) {
+ perror("fcntl");
+ exit(EXIT_FAILURE);
+ }
+ control_writeln("LISTENING");
+
+ while (control_readulong() == CONTROL_CONTINUE) {
+ /* Must accept the connection, otherwise the `listen`
+ * queue will fill up and new connections will fail.
+ * There can be more than one queued connection,
+ * clear them all.
+ */
+ while (true) {
+ int client = accept(s, NULL, NULL);
+
+ if (client < 0) {
+ if (errno == EAGAIN)
+ break;
+
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ close(client);
+ }
+ }
+
+ close(s);
+}
+
static void test_stream_linger_client(const struct test_opts *opts)
{
int fd;
@@ -2034,7 +2335,6 @@ static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM transport release use-after-free",
.run_client = test_stream_transport_uaf_client,
- .run_server = test_stream_transport_uaf_server,
},
{
.name = "SOCK_STREAM retry failed connect()",
@@ -2051,6 +2351,21 @@ static struct test_case test_cases[] = {
.run_client = test_stream_nolinger_client,
.run_server = test_stream_nolinger_server,
},
+ {
+ .name = "SOCK_STREAM transport change null-ptr-deref",
+ .run_client = test_stream_transport_change_client,
+ .run_server = test_stream_transport_change_server,
+ },
+ {
+ .name = "SOCK_STREAM ioctl(SIOCINQ) functionality",
+ .run_client = test_stream_unread_bytes_client,
+ .run_server = test_stream_unread_bytes_server,
+ },
+ {
+ .name = "SOCK_SEQPACKET ioctl(SIOCINQ) functionality",
+ .run_client = test_seqpacket_unread_bytes_client,
+ .run_server = test_seqpacket_unread_bytes_server,
+ },
{},
};