diff options
Diffstat (limited to 'drivers/perf/raspberrypi_axi_monitor.c')
| -rw-r--r-- | drivers/perf/raspberrypi_axi_monitor.c | 826 |
1 files changed, 826 insertions, 0 deletions
diff --git a/drivers/perf/raspberrypi_axi_monitor.c b/drivers/perf/raspberrypi_axi_monitor.c new file mode 100644 index 000000000000..b5ad12a2a9ab --- /dev/null +++ b/drivers/perf/raspberrypi_axi_monitor.c @@ -0,0 +1,826 @@ +/* + * raspberrypi_axi_monitor.c + * + * Author: [email protected] + * + * Raspberry Pi AXI performance counters. + * + * Copyright (C) 2017 Raspberry Pi Trading Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/debugfs.h> +#include <linux/devcoredump.h> +#include <linux/device.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/mutex.h> +#include <linux/of.h> +#include <linux/platform_device.h> + +#include <soc/bcm2835/raspberrypi-firmware.h> + +#define NUM_MONITORS 2 +#define NUM_BUS_WATCHERS_PER_MONITOR 3 + +#define SYSTEM_MONITOR 0 +#define VPU_MONITOR 1 + +#define MAX_BUSES 16 +#define DEFAULT_SAMPLE_TIME 100 + +#define NUM_BUS_WATCHER_RESULTS 11 + +struct bus_watcher_data { + union { + u32 results[NUM_BUS_WATCHER_RESULTS]; + struct { + u32 atrans; + u32 atwait; + u32 amax; + u32 wtrans; + u32 wtwait; + u32 wmax; + u32 rtrans; + u32 rtwait; + u32 rmax; + u32 rpend; + u32 ratrans; + }; + }; +}; + + +struct rpi_axiperf { + struct platform_device *dev; + struct dentry *root_folder; + + struct task_struct *monitor_thread; + struct mutex lock; + + struct rpi_firmware *firmware; + + /* Sample time spent on for each bus */ + int sample_time; + + /* chip specific bus config */ + const struct bwconfig_config *config; + + /* Now storage for the per monitor settings and the resulting + * performance figures + */ + struct { + /* Bit field of buses we want to monitor */ + int bus_enabled; + /* Bit field of buses to filter by */ + int bus_filter; + /* The current buses being monitored on this monitor */ + int current_bus[NUM_BUS_WATCHERS_PER_MONITOR]; + /* The last bus monitored on this monitor */ + int last_monitored; + + /* Set true if this mailbox must use the mailbox interface + * rather than access registers directly. + */ + int use_mailbox_interface; + + /* Current result values */ + struct bus_watcher_data results[MAX_BUSES]; + + struct dentry *debugfs_entry; + void __iomem *base_address; + + } monitor[NUM_MONITORS]; + +}; + +static struct rpi_axiperf *state; + +/* Two monitors, System and VPU, each with the following register sets. + * Each monitor can only monitor one bus at a time, so we time share them, + * giving each bus 100ms (default, settable via debugfs) of time on its + * associated monitor + * Record results from the three Bus watchers per monitor and push to the sysfs + */ + +/* general registers */ +const int GEN_CTRL; + +const int GEN_CTL_ENABLE_BIT = BIT(0); +const int GEN_CTL_RESET_BIT = BIT(1); +const int GEN_CTL_WATCH_BIT = BIT(2); + +/* Bus watcher registers */ +const int BW_PITCH = 0x40; + +const int BW0_CTRL = 0x40; +const int BW1_CTRL = 0x80; +const int BW2_CTRL = 0xc0; + +const int BW_ATRANS_OFFSET = 0x04; +const int BW_ATWAIT_OFFSET = 0x08; +const int BW_AMAX_OFFSET = 0x0c; +const int BW_WTRANS_OFFSET = 0x10; +const int BW_WTWAIT_OFFSET = 0x14; +const int BW_WMAX_OFFSET = 0x18; +const int BW_RTRANS_OFFSET = 0x1c; +const int BW_RTWAIT_OFFSET = 0x20; +const int BW_RMAX_OFFSET = 0x24; + +const int BW_CTRL_RESET_BIT = BIT(31); +const int BW_CTRL_ENABLE_BIT = BIT(30); +const int BW_CTRL_ENABLE_ID_FILTER_BIT = BIT(29); +const int BW_CTRL_LIMIT_HALT_BIT = BIT(28); + +const int BW_CTRL_SOURCE_SHIFT = 8; +const int BW_CTRL_SOURCE_MASK = GENMASK(12, 8); // 5 bits +const int BW_CTRL_BUS_WATCH_SHIFT; +const int BW_CTRL_BUS_WATCH_MASK = GENMASK(5, 0); // 6 bits +const int BW_CTRL_BUS_FILTER_SHIFT = 8; + +static const char *bus_filter_strings[] = { + "", + "CORE0_V", + "ICACHE0", + "DCACHE0", + "CORE1_V", + "ICACHE1", + "DCACHE1", + "L2_MAIN", + "HOST_PORT", + "HOST_PORT2", + "HVS", + "ISP", + "VIDEO_DCT", + "VIDEO_SD2AXI", + "CAM0", + "CAM1", + "DMA0", + "DMA1", + "DMA2_VPU", + "JPEG", + "VIDEO_CME", + "TRANSPOSER", + "VIDEO_FME", + "CCP2TX", + "USB", + "V3D0", + "V3D1", + "V3D2", + "AVE", + "DEBUG", + "CPU", + "M30" +}; + +static const char * const bus_filter_strings_2711[] = { + "AIO", + "CORE0_V", + "ICACHE0", + "DCACHE0", + "CORE1_V", + "ICACHE1", + "DCACHE1", + "L2_MAIN", + "ARGON", + "PCIE", + "HVS", + "ISP", + "VIDEO_DCT", + "VIDEO_SD2AXI", + "CAM0", + "CAM1", + "DMA0", + "DMA1", + "DMA2", + "JPEG", + "VIDEO_CME", + "TRANSPOSER", + "VIDEO_FME", + "GIGE", + "USB", + "V3D0", + "V3D1", + "V3D2", + "GISB_AXI", + "DEBUG", + "ARM", + "EMMCSTB", +}; + +static const char * const bus_filter_strings_2712[] = { + "", + "VPU_UC0", + "VPU_IC0", + "VPU_DC0", + "VPU_UC1", + "VPU_IC1", + "VPU_DC1", + "VPU_L2", + "DMA2", + "VPU_DEBUG", + "ARM", + "DMA0", + "DMA1", + "RAAGA", + "BBSI", + "PCIE0", + "PCIE1", + "PCIE2", + "UMR", + "SAGE", + "HVDP", + "BSP", + "HVS", + "HVS_WMK", + "MOP0", + "MOP1", + "MBVN", + "DSI", + "XPT", + "EMMC0", + "GENET", + "USB", + "ARGON", + "UNICAM", + "PISP", + "PISPFE", + "JPEG", + "EMMC1", + "EMMC2", + "TRC", + "BSTM0", + "BSTM1", + "BSTM0_SEC", + "BSTM1_SEC", + "AIO", + "MAP", + "SYS_DMA", + "MMUCACHE0", + "MMUCACHE1", + "MPUCACHE0", + "MPUCACHE1", +}; + +static const char *system_bus_string[] = { + "DMA_L2", + "TRANS", + "JPEG", + "SYSTEM_UC", + "DMA_UC", + "SYSTEM_L2", + "CCP2TX", + "MPHI_RX", + "MPHI_TX", + "HVS", + "H264", + "ISP", + "V3D", + "PERIPHERAL", + "CPU_UC", + "CPU_L2" +}; + +static const char * const system_bus_string_2711[] = { + "DMA_L2", + "TRANS", + "JPEG", + "VPU_UC", + "DMA_UC", + "SYSTEM_L2", + "HVS", + "ARGON", + "H264", + "PERIPHERAL", + "ARM_UC", + "ARM_L2", +}; + +static const char * const system_bus_string_2712[] = { + "VPU_UC", + "DISPLAY_TOP", + "V3D", + "ARM", + "XPT", + "BSTM_TOP", + "PCIE_01", + "ARGON_TOP", + "ARB3", + "SRC", + "HVDP", + "PER", + "SYSTEM_L2", +}; + +static const char *vpu_bus_string[] = { + "VPU1_D_L2", + "VPU0_D_L2", + "VPU1_I_L2", + "VPU0_I_L2", + "SYSTEM_L2", + "L2_FLUSH", + "DMA_L2", + "VPU1_D_UC", + "VPU0_D_UC", + "VPU1_I_UC", + "VPU0_I_UC", + "SYSTEM_UC", + "L2_OUT", + "DMA_UC", + "SDRAM", + "L2_IN" +}; + +static const char * const vpu_bus_string_2711[] = { + "VPU1_D_L2", + "VPU0_D_L2", + "VPU1_I_L2", + "VPU0_I_L2", + "SYSTEM_L2", + "DMA_L2", + "VPU1_D_UC", + "VPU0_D_UC", + "VPU1_I_UC", + "VPU0_I_UC", + "VPU_UC", + "L2_OUT", + "DMA_UC", + "L2_IN" +}; + +static const char * const vpu_bus_string_2712[] = { + "VPU1_D_L2", + "VPU0_D_L2", + "VPU1_I_L2", + "VPU0_I_L2", + "SYSTEM_L2", + "DMA_L2", + "VPU1_D_UC", + "VPU0_D_UC", + "VPU1_I_UC", + "VPU0_I_UC", + "VPU_UC", + "L2_OUT", + "DMA_UC", + "L2_IN" +}; + +struct bwconfig_config { + const char * const *bus_filter_strings; + const int num_bus_filters; + const char * const *system_bus_string; + const int num_system_buses; + const char * const *vpu_bus_string; + const int num_vpu_buses; +}; + +static const struct bwconfig_config config_2835 = { + bus_filter_strings, ARRAY_SIZE(bus_filter_strings), + system_bus_string, ARRAY_SIZE(system_bus_string), + vpu_bus_string, ARRAY_SIZE(vpu_bus_string), +}; + +static const struct bwconfig_config config_2711 = { + bus_filter_strings_2711, ARRAY_SIZE(bus_filter_strings_2711), + system_bus_string_2711, ARRAY_SIZE(system_bus_string_2711), + vpu_bus_string_2711, ARRAY_SIZE(vpu_bus_string_2711), +}; + +static const struct bwconfig_config config_2712 = { + bus_filter_strings_2712, ARRAY_SIZE(bus_filter_strings_2712), + system_bus_string_2712, ARRAY_SIZE(system_bus_string_2712), + vpu_bus_string_2712, ARRAY_SIZE(vpu_bus_string_2712), +}; + +static const char *monitor_name[] = { + "System", + "VPU" +}; + +static inline void write_reg(int monitor, int reg, u32 value) +{ + writel(value, state->monitor[monitor].base_address + reg); +} + +static inline u32 read_reg(int monitor, u32 reg) +{ + return readl(state->monitor[monitor].base_address + reg); +} + +static void read_bus_watcher(int monitor, int watcher, u32 *results) +{ + if (state->monitor[monitor].use_mailbox_interface) { + /* We have NUM_BUS_WATCHER_RESULTS results, plus the overheads + * of start address and length + */ + u32 tmp[NUM_BUS_WATCHER_RESULTS+2]; + int err; + + tmp[0] = (u32)(uintptr_t)(state->monitor[monitor].base_address + watcher + + BW_ATRANS_OFFSET); + tmp[1] = NUM_BUS_WATCHER_RESULTS; + + err = rpi_firmware_property(state->firmware, + RPI_FIRMWARE_GET_PERIPH_REG, + tmp, sizeof(tmp)); + + if (err < 0 || tmp[1] != NUM_BUS_WATCHER_RESULTS) + dev_err_once(&state->dev->dev, + "Failed to read bus watcher"); + else + memcpy(results, &tmp[2], + NUM_BUS_WATCHER_RESULTS * sizeof(u32)); + } else { + int i; + void __iomem *addr = state->monitor[monitor].base_address + + watcher + BW_ATRANS_OFFSET; + for (i = 0; i < NUM_BUS_WATCHER_RESULTS; i++, addr += 4) + *results++ = readl(addr); + } +} + +static void set_monitor_control(int monitor, u32 set) +{ + if (state->monitor[monitor].use_mailbox_interface) { + u32 tmp[3] = {(u32)(uintptr_t)(state->monitor[monitor].base_address + + GEN_CTRL), 1, set}; + int err = rpi_firmware_property(state->firmware, + RPI_FIRMWARE_SET_PERIPH_REG, + tmp, sizeof(tmp)); + + if (err < 0 || tmp[1] != 1) + dev_err_once(&state->dev->dev, + "Failed to set monitor control"); + } else + write_reg(monitor, GEN_CTRL, set); +} + +static void set_bus_watcher_control(int monitor, int watcher, u32 set) +{ + if (state->monitor[monitor].use_mailbox_interface) { + u32 tmp[3] = {(u32)(uintptr_t)(state->monitor[monitor].base_address + + watcher), 1, set}; + int err = rpi_firmware_property(state->firmware, + RPI_FIRMWARE_SET_PERIPH_REG, + tmp, sizeof(tmp)); + if (err < 0 || tmp[1] != 1) + dev_err_once(&state->dev->dev, + "Failed to set bus watcher control"); + } else + write_reg(monitor, watcher, set); +} + +static void monitor(struct rpi_axiperf *state) +{ + int monitor, num_buses[NUM_MONITORS]; + + mutex_lock(&state->lock); + + for (monitor = 0; monitor < NUM_MONITORS; monitor++) { + typeof(state->monitor[0]) *mon = &(state->monitor[monitor]); + + /* Anything enabled? */ + if (mon->bus_enabled == 0) { + /* No, disable all monitoring for this monitor */ + set_monitor_control(monitor, GEN_CTL_RESET_BIT); + } else { + int i; + + /* Find out how many busses we want to monitor, and + * spread our 3 actual monitors over them + */ + num_buses[monitor] = hweight32(mon->bus_enabled); + num_buses[monitor] = min(num_buses[monitor], + NUM_BUS_WATCHERS_PER_MONITOR); + + for (i = 0; i < num_buses[monitor]; i++) { + int bus_control; + + do { + mon->last_monitored++; + mon->last_monitored &= 0xf; + } while ((mon->bus_enabled & + (1 << mon->last_monitored)) == 0); + + mon->current_bus[i] = mon->last_monitored; + + /* Reset the counters */ + set_bus_watcher_control(monitor, + BW0_CTRL + + i*BW_PITCH, + BW_CTRL_RESET_BIT); + + bus_control = BW_CTRL_ENABLE_BIT | + mon->current_bus[i]; + + if (mon->bus_filter) { + bus_control |= + BW_CTRL_ENABLE_ID_FILTER_BIT; + bus_control |= + ((mon->bus_filter & 0x1f) + << BW_CTRL_BUS_FILTER_SHIFT); + } + + // Start capture + set_bus_watcher_control(monitor, + BW0_CTRL + i*BW_PITCH, + bus_control); + } + } + + /* start monitoring */ + set_monitor_control(monitor, GEN_CTL_ENABLE_BIT | GEN_CTL_WATCH_BIT); + } + + mutex_unlock(&state->lock); + + msleep(state->sample_time); + + /* Now read the results */ + + mutex_lock(&state->lock); + for (monitor = 0; monitor < NUM_MONITORS; monitor++) { + typeof(state->monitor[0]) *mon = &(state->monitor[monitor]); + + /* Anything enabled? */ + if (mon->bus_enabled == 0) { + /* No, disable all monitoring for this monitor */ + set_monitor_control(monitor, 0); + } else { + int i; + + for (i = 0; i < num_buses[monitor]; i++) { + int bus = mon->current_bus[i]; + + read_bus_watcher(monitor, + BW0_CTRL + i*BW_PITCH, + (u32 *)&mon->results[bus].results); + } + } + } + mutex_unlock(&state->lock); +} + +static int monitor_thread(void *data) +{ + struct rpi_axiperf *state = data; + + while (1) { + monitor(state); + + if (kthread_should_stop()) + return 0; + } + return 0; +} + +static ssize_t myreader(struct file *fp, char __user *user_buffer, + size_t count, loff_t *position) +{ +#define INIT_BUFF_SIZE 2048 + + int i; + int idx = (int)(uintptr_t)(fp->private_data); + int num_buses, cnt; + char *string_buffer; + int buff_size = INIT_BUFF_SIZE; + char *p; + typeof(state->monitor[0]) *mon = &(state->monitor[idx]); + const struct bwconfig_config *config = state->config; + + if (idx < 0 || idx > NUM_MONITORS) + idx = 0; + + num_buses = idx == SYSTEM_MONITOR ? config->num_system_buses : config->num_vpu_buses; + + string_buffer = kmalloc(buff_size, GFP_KERNEL); + + if (!string_buffer) { + dev_err(&state->dev->dev, + "Failed temporary string allocation\n"); + return 0; + } + + p = string_buffer; + + mutex_lock(&state->lock); + + if (mon->bus_filter) { + int filt = min(mon->bus_filter & 0x1f, config->num_bus_filters); + + cnt = snprintf(p, buff_size, + "\nMonitoring transactions from %s only\n", + config->bus_filter_strings[filt]); + p += cnt; + buff_size -= cnt; + } + + cnt = snprintf(p, buff_size, " Bus | Atrans Atwait AMax Wtrans Wtwait WMax Rtrans Rtwait RMax RPend RAtrans\n" + "===========================================================================================================================\n"); + + if (cnt >= buff_size) + goto done; + + p += cnt; + buff_size -= cnt; + +#define M(x) ((x) >= 1000000000 ? (x)/1000000 : (x) >= 1000 ? (x)/1000 : (x)) +#define N(x) ((x) >= 1000000000 ? 'M' : (x) >= 1000 ? 'K' : ' ') + + for (i = 0; i < num_buses; i++) { + if (mon->bus_enabled & (1 << i)) { + typeof(mon->results[0]) *res = &(mon->results[i]); + + cnt = snprintf(p, buff_size, + "%11s | %8u%c %8u%c %8u%c %8u%c %8u%c %8u%c %8u%c %8u%c %8u%c %8u%c %8u%c\n", + idx == SYSTEM_MONITOR ? + config->system_bus_string[i] : + config->vpu_bus_string[i], + M(res->atrans), N(res->atrans), + M(res->atwait), N(res->atwait), + M(res->amax), N(res->amax), + M(res->wtrans), N(res->wtrans), + M(res->wtwait), N(res->wtwait), + M(res->wmax), N(res->wmax), + M(res->rtrans), N(res->rtrans), + M(res->rtwait), N(res->rtwait), + M(res->rmax), N(res->rmax), + M(res->rpend), N(res->rpend), + M(res->ratrans), N(res->ratrans) + ); + if (cnt >= buff_size) + goto done; + + p += cnt; + buff_size -= cnt; + } + } + + mutex_unlock(&state->lock); + +done: + + /* did the last string entry exceeed our buffer size? ie out of string + * buffer space. Null terminate, use what we have. + */ + if (cnt >= buff_size) { + buff_size = 0; + string_buffer[INIT_BUFF_SIZE] = 0; + } + + cnt = simple_read_from_buffer(user_buffer, count, position, + string_buffer, + INIT_BUFF_SIZE - buff_size); + + kfree(string_buffer); + + return cnt; +} + +static ssize_t mywriter(struct file *fp, const char __user *user_buffer, + size_t count, loff_t *position) +{ + int idx = (int)(uintptr_t)(fp->private_data); + + if (idx < 0 || idx > NUM_MONITORS) + idx = 0; + + /* At the moment, this does nothing, but in the future it could be + * used to reset counters etc + */ + return count; +} + +static const struct file_operations fops_debug = { + .read = myreader, + .write = mywriter, + .open = simple_open +}; + +static int rpi_axiperf_probe(struct platform_device *pdev) +{ + int ret = 0, i; + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct device_node *fw_node; + + state = kzalloc(sizeof(struct rpi_axiperf), GFP_KERNEL); + if (!state) + return -ENOMEM; + + state->config = of_device_get_match_data(dev); + if (!state->config) + return -EINVAL; + + /* Get the firmware handle for future rpi-firmware-xxx calls */ + fw_node = of_parse_phandle(np, "firmware", 0); + if (!fw_node) { + dev_err(dev, "Missing firmware node\n"); + return -ENOENT; + } + + state->firmware = rpi_firmware_get(fw_node); + if (!state->firmware) + return -EPROBE_DEFER; + + /* Special case for the VPU monitor, we must use the mailbox interface + * as it is not accessible from the ARM address space. + */ + state->monitor[VPU_MONITOR].use_mailbox_interface = 1; + state->monitor[SYSTEM_MONITOR].use_mailbox_interface = 0; + + for (i = 0; i < NUM_MONITORS; i++) { + if (state->monitor[i].use_mailbox_interface) { + of_property_read_u32_index(np, "reg", i*2, + (u32 *)(&state->monitor[i].base_address)); + } else { + struct resource *resource = + platform_get_resource(pdev, IORESOURCE_MEM, i); + + state->monitor[i].base_address = + devm_ioremap_resource(&pdev->dev, resource); + } + + if (IS_ERR(state->monitor[i].base_address)) + return PTR_ERR(state->monitor[i].base_address); + + /* Enable all buses by default */ + state->monitor[i].bus_enabled = 0xffff; + } + + state->dev = pdev; + platform_set_drvdata(pdev, state); + + state->sample_time = DEFAULT_SAMPLE_TIME; + + /* Set up all the debugfs stuff */ + state->root_folder = debugfs_create_dir(KBUILD_MODNAME, NULL); + + for (i = 0; i < NUM_MONITORS; i++) { + state->monitor[i].debugfs_entry = + debugfs_create_dir(monitor_name[i], state->root_folder); + if (IS_ERR(state->monitor[i].debugfs_entry)) + state->monitor[i].debugfs_entry = NULL; + + debugfs_create_file("data", 0444, + state->monitor[i].debugfs_entry, + (void *)(uintptr_t)i, &fops_debug); + debugfs_create_u32("enable", 0644, + state->monitor[i].debugfs_entry, + &state->monitor[i].bus_enabled); + debugfs_create_u32("filter", 0644, + state->monitor[i].debugfs_entry, + &state->monitor[i].bus_filter); + debugfs_create_u32("sample_time", 0644, + state->monitor[i].debugfs_entry, + &state->sample_time); + } + + mutex_init(&state->lock); + + state->monitor_thread = kthread_run(monitor_thread, state, + "rpi-axiperfmon"); + + return ret; + +} + +static void rpi_axiperf_remove(struct platform_device *dev) +{ + kthread_stop(state->monitor_thread); + + debugfs_remove_recursive(state->root_folder); + state->root_folder = NULL; +} + +static const struct of_device_id rpi_axiperf_match[] = { + { .compatible = "brcm,bcm2835-axiperf", + .data = &config_2835 }, + { .compatible = "brcm,bcm2711-axiperf", + .data = &config_2711 }, + { .compatible = "brcm,bcm2712-axiperf", + .data = &config_2712 }, + {}, +}; +MODULE_DEVICE_TABLE(of, rpi_axiperf_match); + +static struct platform_driver rpi_axiperf_driver = { + .probe = rpi_axiperf_probe, + .remove = rpi_axiperf_remove, + .driver = { + .name = "rpi-bcm2835-axiperf", + .of_match_table = of_match_ptr(rpi_axiperf_match), + }, +}; + +module_platform_driver(rpi_axiperf_driver); + +/* Module information */ +MODULE_AUTHOR("James Hughes <[email protected]>"); +MODULE_DESCRIPTION("RPI AXI Performance monitor driver"); +MODULE_LICENSE("GPL"); + |
