diff options
Diffstat (limited to 'tools')
206 files changed, 18331 insertions, 1599 deletions
diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt index 00ad2d608727..aa5e092c4352 100644 --- a/tools/build/Documentation/Build.txt +++ b/tools/build/Documentation/Build.txt @@ -66,6 +66,7 @@ To follow the above example, the user provides following 'Build' files: ex/Build: ex-y += a.o ex-y += b.o + ex-y += b.o # duplicates in the lists are allowed libex-y += c.o libex-y += d.o diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index faca2bf6a430..0c5f485521d6 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -57,11 +57,13 @@ quiet_cmd_cc_i_c = CPP $@ quiet_cmd_cc_s_c = AS $@ cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $< +quiet_cmd_gen = GEN $@ + # Link agregate command # If there's nothing to link, create empty $@ object. quiet_cmd_ld_multi = LD $@ cmd_ld_multi = $(if $(strip $(obj-y)),\ - $(LD) -r -o $@ $(obj-y),rm -f $@; $(AR) rcs $@) + $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@) # Build rules $(OUTPUT)%.o: %.c FORCE diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 463ed8f2a267..74ca42093d70 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -33,7 +33,8 @@ FILES= \ test-compile-32.bin \ test-compile-x32.bin \ test-zlib.bin \ - test-lzma.bin + test-lzma.bin \ + test-bpf.bin CC := $(CROSS_COMPILE)gcc -MD PKG_CONFIG := $(CROSS_COMPILE)pkg-config @@ -69,8 +70,13 @@ test-libelf.bin: test-glibc.bin: $(BUILD) +DWARFLIBS := -ldw +ifeq ($(findstring -static,${LDFLAGS}),-static) +DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 +endif + test-dwarf.bin: - $(BUILD) -ldw + $(BUILD) $(DWARFLIBS) test-libelf-mmap.bin: $(BUILD) -lelf @@ -156,6 +162,9 @@ test-zlib.bin: test-lzma.bin: $(BUILD) -llzma +test-bpf.bin: + $(BUILD) + -include *.d ############################### diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c new file mode 100644 index 000000000000..062bac811af9 --- /dev/null +++ b/tools/build/feature/test-bpf.c @@ -0,0 +1,18 @@ +#include <linux/bpf.h> + +int main(void) +{ + union bpf_attr attr; + + attr.prog_type = BPF_PROG_TYPE_KPROBE; + attr.insn_cnt = 0; + attr.insns = 0; + attr.license = 0; + attr.log_buf = 0; + attr.log_size = 0; + attr.log_level = 0; + attr.kern_version = 0; + + attr = attr; + return 0; +} diff --git a/tools/build/feature/test-glibc.c b/tools/build/feature/test-glibc.c index b0820345cd98..9367f7586676 100644 --- a/tools/build/feature/test-glibc.c +++ b/tools/build/feature/test-glibc.c @@ -1,8 +1,19 @@ +#include <stdlib.h> + +#if !defined(__UCLIBC__) #include <gnu/libc-version.h> +#else +#define XSTR(s) STR(s) +#define STR(s) #s +#endif int main(void) { +#if !defined(__UCLIBC__) const char *version = gnu_get_libc_version(); +#else + const char *version = XSTR(__GLIBC__) "." XSTR(__GLIBC_MINOR__); +#endif return (long)version; } diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build index 70d876237c57..429c7d452101 100644 --- a/tools/build/tests/ex/Build +++ b/tools/build/tests/ex/Build @@ -1,6 +1,7 @@ ex-y += ex.o ex-y += a.o ex-y += b.o +ex-y += b.o ex-y += empty/ ex-y += empty2/ diff --git a/tools/hv/lsvmbus b/tools/hv/lsvmbus new file mode 100644 index 000000000000..162a3784d80e --- /dev/null +++ b/tools/hv/lsvmbus @@ -0,0 +1,101 @@ +#!/usr/bin/env python + +import os +from optparse import OptionParser + +parser = OptionParser() +parser.add_option("-v", "--verbose", dest="verbose", + help="print verbose messages. Try -vv, -vvv for \ + more verbose messages", action="count") + +(options, args) = parser.parse_args() + +verbose = 0 +if options.verbose is not None: + verbose = options.verbose + +vmbus_sys_path = '/sys/bus/vmbus/devices' +if not os.path.isdir(vmbus_sys_path): + print "%s doesn't exist: exiting..." % vmbus_sys_path + exit(-1) + +vmbus_dev_dict = { + '{0e0b6031-5213-4934-818b-38d90ced39db}' : '[Operating system shutdown]', + '{9527e630-d0ae-497b-adce-e80ab0175caf}' : '[Time Synchronization]', + '{57164f39-9115-4e78-ab55-382f3bd5422d}' : '[Heartbeat]', + '{a9a0f4e7-5a45-4d96-b827-8a841e8c03e6}' : '[Data Exchange]', + '{35fa2e29-ea23-4236-96ae-3a6ebacba440}' : '[Backup (volume checkpoint)]', + '{34d14be3-dee4-41c8-9ae7-6b174977c192}' : '[Guest services]', + '{525074dc-8985-46e2-8057-a307dc18a502}' : '[Dynamic Memory]', + '{cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a}' : 'Synthetic mouse', + '{f912ad6d-2b17-48ea-bd65-f927a61c7684}' : 'Synthetic keyboard', + '{da0a7802-e377-4aac-8e77-0558eb1073f8}' : 'Synthetic framebuffer adapter', + '{f8615163-df3e-46c5-913f-f2d2f965ed0e}' : 'Synthetic network adapter', + '{32412632-86cb-44a2-9b5c-50d1417354f5}' : 'Synthetic IDE Controller', + '{ba6163d9-04a1-4d29-b605-72e2ffb1dc7f}' : 'Synthetic SCSI Controller', + '{2f9bcc4a-0069-4af3-b76b-6fd0be528cda}' : 'Synthetic fiber channel adapter', + '{8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501}' : 'Synthetic RDMA adapter', + '{276aacf4-ac15-426c-98dd-7521ad3f01fe}' : '[Reserved system device]', + '{f8e65716-3cb3-4a06-9a60-1889c5cccab5}' : '[Reserved system device]', + '{3375baf4-9e15-4b30-b765-67acb10d607b}' : '[Reserved system device]', +} + +def get_vmbus_dev_attr(dev_name, attr): + try: + f = open('%s/%s/%s' % (vmbus_sys_path, dev_name, attr), 'r') + lines = f.readlines() + f.close() + except IOError: + lines = [] + + return lines + +class VMBus_Dev: + pass + + +vmbus_dev_list = [] + +for f in os.listdir(vmbus_sys_path): + vmbus_id = get_vmbus_dev_attr(f, 'id')[0].strip() + class_id = get_vmbus_dev_attr(f, 'class_id')[0].strip() + device_id = get_vmbus_dev_attr(f, 'device_id')[0].strip() + dev_desc = vmbus_dev_dict.get(class_id, 'Unknown') + + chn_vp_mapping = get_vmbus_dev_attr(f, 'channel_vp_mapping') + chn_vp_mapping = [c.strip() for c in chn_vp_mapping] + chn_vp_mapping = sorted(chn_vp_mapping, + key = lambda c : int(c.split(':')[0])) + + chn_vp_mapping = ['\tRel_ID=%s, target_cpu=%s' % + (c.split(':')[0], c.split(':')[1]) + for c in chn_vp_mapping] + d = VMBus_Dev() + d.sysfs_path = '%s/%s' % (vmbus_sys_path, f) + d.vmbus_id = vmbus_id + d.class_id = class_id + d.device_id = device_id + d.dev_desc = dev_desc + d.chn_vp_mapping = '\n'.join(chn_vp_mapping) + if d.chn_vp_mapping: + d.chn_vp_mapping += '\n' + + vmbus_dev_list.append(d) + + +vmbus_dev_list = sorted(vmbus_dev_list, key = lambda d : int(d.vmbus_id)) + +format0 = '%2s: %s' +format1 = '%2s: Class_ID = %s - %s\n%s' +format2 = '%2s: Class_ID = %s - %s\n\tDevice_ID = %s\n\tSysfs path: %s\n%s' + +for d in vmbus_dev_list: + if verbose == 0: + print ('VMBUS ID ' + format0) % (d.vmbus_id, d.dev_desc) + elif verbose == 1: + print ('VMBUS ID ' + format1) % \ + (d.vmbus_id, d.class_id, d.dev_desc, d.chn_vp_mapping) + else: + print ('VMBUS ID ' + format2) % \ + (d.vmbus_id, d.class_id, d.dev_desc, \ + d.device_id, d.sysfs_path, d.chn_vp_mapping) diff --git a/tools/iio/generic_buffer.c b/tools/iio/generic_buffer.c index 4eebb6616e5c..9f7b85bf6ada 100644 --- a/tools/iio/generic_buffer.c +++ b/tools/iio/generic_buffer.c @@ -51,14 +51,33 @@ int size_from_channelarray(struct iio_channel_info *channels, int num_channels) if (bytes % channels[i].bytes == 0) channels[i].location = bytes; else - channels[i].location = bytes - bytes%channels[i].bytes - + channels[i].bytes; + channels[i].location = bytes - bytes % channels[i].bytes + + channels[i].bytes; + bytes = channels[i].location + channels[i].bytes; i++; } + return bytes; } +void print1byte(uint8_t input, struct iio_channel_info *info) +{ + /* + * Shift before conversion to avoid sign extension + * of left aligned data + */ + input >>= info->shift; + input &= info->mask; + if (info->is_signed) { + int8_t val = (int8_t)(input << (8 - info->bits_used)) >> + (8 - info->bits_used); + printf("%05f ", ((float)val + info->offset) * info->scale); + } else { + printf("%05f ", ((float)input + info->offset) * info->scale); + } +} + void print2byte(uint16_t input, struct iio_channel_info *info) { /* First swap if incorrect endian */ @@ -136,9 +155,9 @@ void print8byte(uint64_t input, struct iio_channel_info *info) /** * process_scan() - print out the values in SI units * @data: pointer to the start of the scan - * @channels: information about the channels. Note - * size_from_channelarray must have been called first to fill the - * location offsets. + * @channels: information about the channels. + * Note: size_from_channelarray must have been called first + * to fill the location offsets. * @num_channels: number of channels **/ void process_scan(char *data, @@ -150,6 +169,10 @@ void process_scan(char *data, for (k = 0; k < num_channels; k++) switch (channels[k].bytes) { /* only a few cases implemented so far */ + case 1: + print1byte(*(uint8_t *)(data + channels[k].location), + &channels[k]); + break; case 2: print2byte(*(uint16_t *)(data + channels[k].location), &channels[k]); @@ -170,15 +193,15 @@ void process_scan(char *data, void print_usage(void) { - printf("Usage: generic_buffer [options]...\n" - "Capture, convert and output data from IIO device buffer\n" - " -c <n> Do n conversions\n" - " -e Disable wait for event (new data)\n" - " -g Use trigger-less mode\n" - " -l <n> Set buffer length to n samples\n" - " -n <name> Set device name (mandatory)\n" - " -t <name> Set trigger name\n" - " -w <n> Set delay between reads in us (event-less mode)\n"); + fprintf(stderr, "Usage: generic_buffer [options]...\n" + "Capture, convert and output data from IIO device buffer\n" + " -c <n> Do n conversions\n" + " -e Disable wait for event (new data)\n" + " -g Use trigger-less mode\n" + " -l <n> Set buffer length to n samples\n" + " -n <name> Set device name (mandatory)\n" + " -t <name> Set trigger name\n" + " -w <n> Set delay between reads in us (event-less mode)\n"); } int main(int argc, char **argv) @@ -213,6 +236,7 @@ int main(int argc, char **argv) num_loops = strtoul(optarg, &dummy, 10); if (errno) return -errno; + break; case 'e': noevents = 1; @@ -225,6 +249,7 @@ int main(int argc, char **argv) buf_len = strtoul(optarg, &dummy, 10); if (errno) return -errno; + break; case 'n': device_name = optarg; @@ -245,8 +270,8 @@ int main(int argc, char **argv) } } - if (device_name == NULL) { - printf("Device name not set\n"); + if (!device_name) { + fprintf(stderr, "Device name not set\n"); print_usage(); return -1; } @@ -254,9 +279,10 @@ int main(int argc, char **argv) /* Find the device requested */ dev_num = find_type_by_name(device_name, "iio:device"); if (dev_num < 0) { - printf("Failed to find the %s\n", device_name); + fprintf(stderr, "Failed to find the %s\n", device_name); return dev_num; } + printf("iio device number being used is %d\n", dev_num); ret = asprintf(&dev_dir_name, "%siio:device%d", iio_dir, dev_num); @@ -264,7 +290,7 @@ int main(int argc, char **argv) return -ENOMEM; if (!notrigger) { - if (trigger_name == NULL) { + if (!trigger_name) { /* * Build the trigger name. If it is device associated * its name is <device_name>_dev[n] where n matches @@ -281,13 +307,16 @@ int main(int argc, char **argv) /* Verify the trigger exists */ trig_num = find_type_by_name(trigger_name, "trigger"); if (trig_num < 0) { - printf("Failed to find the trigger %s\n", trigger_name); + fprintf(stderr, "Failed to find the trigger %s\n", + trigger_name); ret = trig_num; goto error_free_triggername; } + printf("iio trigger number being used is %d\n", trig_num); - } else + } else { printf("trigger-less mode selected\n"); + } /* * Parse the files in scan_elements to identify what channels are @@ -295,8 +324,8 @@ int main(int argc, char **argv) */ ret = build_channel_array(dev_dir_name, &channels, &num_channels); if (ret) { - printf("Problem reading scan element information\n"); - printf("diag %s\n", dev_dir_name); + fprintf(stderr, "Problem reading scan element information\n" + "diag %s\n", dev_dir_name); goto error_free_triggername; } @@ -314,13 +343,16 @@ int main(int argc, char **argv) if (!notrigger) { printf("%s %s\n", dev_dir_name, trigger_name); - /* Set the device trigger to be the data ready trigger found - * above */ + /* + * Set the device trigger to be the data ready trigger found + * above + */ ret = write_sysfs_string_and_verify("trigger/current_trigger", dev_dir_name, trigger_name); if (ret < 0) { - printf("Failed to write current_trigger file\n"); + fprintf(stderr, + "Failed to write current_trigger file\n"); goto error_free_buf_dir_name; } } @@ -332,10 +364,14 @@ int main(int argc, char **argv) /* Enable the buffer */ ret = write_sysfs_int("enable", buf_dir_name, 1); - if (ret < 0) + if (ret < 0) { + fprintf(stderr, + "Failed to enable buffer: %s\n", strerror(-ret)); goto error_free_buf_dir_name; + } + scan_size = size_from_channelarray(channels, num_channels); - data = malloc(scan_size*buf_len); + data = malloc(scan_size * buf_len); if (!data) { ret = -ENOMEM; goto error_free_buf_dir_name; @@ -349,13 +385,12 @@ int main(int argc, char **argv) /* Attempt to open non blocking the access dev */ fp = open(buffer_access, O_RDONLY | O_NONBLOCK); - if (fp == -1) { /* If it isn't there make the node */ + if (fp == -1) { /* TODO: If it isn't there make the node */ ret = -errno; - printf("Failed to open %s\n", buffer_access); + fprintf(stderr, "Failed to open %s\n", buffer_access); goto error_free_buffer_access; } - /* Wait for events 10 times */ for (j = 0; j < num_loops; j++) { if (!noevents) { struct pollfd pfd = { @@ -372,25 +407,22 @@ int main(int argc, char **argv) } toread = buf_len; - } else { usleep(timedelay); toread = 64; } - read_size = read(fp, - data, - toread*scan_size); + read_size = read(fp, data, toread * scan_size); if (read_size < 0) { if (errno == EAGAIN) { - printf("nothing available\n"); + fprintf(stderr, "nothing available\n"); continue; - } else + } else { break; + } } - for (i = 0; i < read_size/scan_size; i++) - process_scan(data + scan_size*i, - channels, + for (i = 0; i < read_size / scan_size; i++) + process_scan(data + scan_size * i, channels, num_channels); } @@ -404,11 +436,13 @@ int main(int argc, char **argv) ret = write_sysfs_string("trigger/current_trigger", dev_dir_name, "NULL"); if (ret < 0) - printf("Failed to write to %s\n", dev_dir_name); + fprintf(stderr, "Failed to write to %s\n", + dev_dir_name); error_close_buffer_access: if (close(fp) == -1) perror("Failed to close buffer"); + error_free_buffer_access: free(buffer_access); error_free_data: @@ -424,6 +458,7 @@ error_free_channels: error_free_triggername: if (datardytrigger) free(trigger_name); + error_free_dev_dir_name: free(dev_dir_name); diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index 016760e769c0..cd3fd41b481d 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -13,7 +13,6 @@ * * Usage: * iio_event_monitor <device_name> - * */ #include <unistd.h> @@ -51,6 +50,9 @@ static const char * const iio_chan_type_name_spec[] = { [IIO_HUMIDITYRELATIVE] = "humidityrelative", [IIO_ACTIVITY] = "activity", [IIO_STEPS] = "steps", + [IIO_ENERGY] = "energy", + [IIO_DISTANCE] = "distance", + [IIO_VELOCITY] = "velocity", }; static const char * const iio_ev_type_text[] = { @@ -99,6 +101,7 @@ static const char * const iio_modifier_names[] = { [IIO_MOD_JOGGING] = "jogging", [IIO_MOD_WALKING] = "walking", [IIO_MOD_STILL] = "still", + [IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z] = "sqrt(x^2+y^2+z^2)", }; static bool event_is_known(struct iio_event_data *event) @@ -130,6 +133,9 @@ static bool event_is_known(struct iio_event_data *event) case IIO_HUMIDITYRELATIVE: case IIO_ACTIVITY: case IIO_STEPS: + case IIO_ENERGY: + case IIO_DISTANCE: + case IIO_VELOCITY: break; default: return false; @@ -167,6 +173,7 @@ static bool event_is_known(struct iio_event_data *event) case IIO_MOD_JOGGING: case IIO_MOD_WALKING: case IIO_MOD_STILL: + case IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z: break; default: return false; @@ -208,8 +215,9 @@ static void print_event(struct iio_event_data *event) bool diff = IIO_EVENT_CODE_EXTRACT_DIFF(event->id); if (!event_is_known(event)) { - printf("Unknown event: time: %lld, id: %llx\n", - event->timestamp, event->id); + fprintf(stderr, "Unknown event: time: %lld, id: %llx\n", + event->timestamp, event->id); + return; } @@ -229,6 +237,7 @@ static void print_event(struct iio_event_data *event) if (dir != IIO_EV_DIR_NONE) printf(", direction: %s", iio_ev_dir_text[dir]); + printf("\n"); } @@ -242,7 +251,7 @@ int main(int argc, char **argv) int fd, event_fd; if (argc <= 1) { - printf("Usage: %s <device_name>\n", argv[0]); + fprintf(stderr, "Usage: %s <device_name>\n", argv[0]); return -1; } @@ -251,14 +260,15 @@ int main(int argc, char **argv) dev_num = find_type_by_name(device_name, "iio:device"); if (dev_num >= 0) { printf("Found IIO device with name %s with device number %d\n", - device_name, dev_num); + device_name, dev_num); ret = asprintf(&chrdev_name, "/dev/iio:device%d", dev_num); - if (ret < 0) { + if (ret < 0) return -ENOMEM; - } } else { - /* If we can't find a IIO device by name assume device_name is a - IIO chrdev */ + /* + * If we can't find an IIO device by name assume device_name is + * an IIO chrdev + */ chrdev_name = strdup(device_name); if (!chrdev_name) return -ENOMEM; @@ -267,14 +277,14 @@ int main(int argc, char **argv) fd = open(chrdev_name, 0); if (fd == -1) { ret = -errno; - fprintf(stdout, "Failed to open %s\n", chrdev_name); + fprintf(stderr, "Failed to open %s\n", chrdev_name); goto error_free_chrdev_name; } ret = ioctl(fd, IIO_GET_EVENT_FD_IOCTL, &event_fd); if (ret == -1 || event_fd == -1) { ret = -errno; - fprintf(stdout, "Failed to retrieve event fd\n"); + fprintf(stderr, "Failed to retrieve event fd\n"); if (close(fd) == -1) perror("Failed to close character device file"); @@ -290,7 +300,7 @@ int main(int argc, char **argv) ret = read(event_fd, &event, sizeof(event)); if (ret == -1) { if (errno == EAGAIN) { - printf("nothing available\n"); + fprintf(stderr, "nothing available\n"); continue; } else { ret = -errno; @@ -299,6 +309,12 @@ int main(int argc, char **argv) } } + if (ret != sizeof(event)) { + fprintf(stderr, "Reading event failed!\n"); + ret = -EIO; + break; + } + print_event(&event); } diff --git a/tools/iio/iio_utils.c b/tools/iio/iio_utils.c index ec9ab7f9ae4c..5eb6793f3972 100644 --- a/tools/iio/iio_utils.c +++ b/tools/iio/iio_utils.c @@ -6,9 +6,6 @@ * under the terms of the GNU General Public License version 2 as published by * the Free Software Foundation. */ -#ifndef _IIO_UTILS_H -#define _IIO_UTILS_H - #include <string.h> #include <stdlib.h> #include <stdio.h> @@ -32,15 +29,14 @@ static char * const iio_direction[] = { * * Returns 0 on success, or a negative error code if string extraction failed. **/ -int iioutils_break_up_name(const char *full_name, - char **generic_name) +int iioutils_break_up_name(const char *full_name, char **generic_name) { char *current; char *w, *r; char *working, *prefix = ""; int i, ret; - for (i = 0; i < sizeof(iio_direction) / sizeof(iio_direction[0]); i++) + for (i = 0; i < ARRAY_SIZE(iio_direction); i++) if (!strncmp(full_name, iio_direction[i], strlen(iio_direction[i]))) { prefix = iio_direction[i]; @@ -65,6 +61,7 @@ int iioutils_break_up_name(const char *full_name, *w = *r; w++; } + r++; } *w = '\0'; @@ -88,15 +85,10 @@ int iioutils_break_up_name(const char *full_name, * * Returns a value >= 0 on success, otherwise a negative error code. **/ -int iioutils_get_type(unsigned *is_signed, - unsigned *bytes, - unsigned *bits_used, - unsigned *shift, - uint64_t *mask, - unsigned *be, - const char *device_dir, - const char *name, - const char *generic_name) +int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used, + unsigned *shift, uint64_t *mask, unsigned *be, + const char *device_dir, const char *name, + const char *generic_name) { FILE *sysfsfp; int ret; @@ -122,12 +114,13 @@ int iioutils_get_type(unsigned *is_signed, } dp = opendir(scan_el_dir); - if (dp == NULL) { + if (!dp) { ret = -errno; goto error_free_builtname_generic; } + ret = -ENOENT; - while (ent = readdir(dp), ent != NULL) + while (ent = readdir(dp), ent) /* * Do we allow devices to override a generic name with * a specific one? @@ -140,10 +133,12 @@ int iioutils_get_type(unsigned *is_signed, ret = -ENOMEM; goto error_closedir; } + sysfsfp = fopen(filename, "r"); - if (sysfsfp == NULL) { + if (!sysfsfp) { ret = -errno; - printf("failed to open %s\n", filename); + fprintf(stderr, "failed to open %s\n", + filename); goto error_free_filename; } @@ -155,31 +150,36 @@ int iioutils_get_type(unsigned *is_signed, &padint, shift); if (ret < 0) { ret = -errno; - printf("failed to pass scan type description\n"); + fprintf(stderr, + "failed to pass scan type description\n"); goto error_close_sysfsfp; } else if (ret != 5) { ret = -EIO; - printf("scan type description didn't match\n"); + fprintf(stderr, + "scan type description didn't match\n"); goto error_close_sysfsfp; } + *be = (endianchar == 'b'); *bytes = padint / 8; if (*bits_used == 64) *mask = ~0; else - *mask = (1 << *bits_used) - 1; + *mask = (1ULL << *bits_used) - 1; + *is_signed = (signchar == 's'); if (fclose(sysfsfp)) { ret = -errno; - printf("Failed to close %s\n", filename); + fprintf(stderr, "Failed to close %s\n", + filename); goto error_free_filename; } sysfsfp = 0; free(filename); - filename = 0; } + error_close_sysfsfp: if (sysfsfp) if (fclose(sysfsfp)) @@ -188,6 +188,7 @@ error_close_sysfsfp: error_free_filename: if (filename) free(filename); + error_closedir: if (closedir(dp) == -1) perror("iioutils_get_type(): Failed to close directory"); @@ -212,11 +213,9 @@ error_free_scan_el_dir: * * Returns a value >= 0 on success, otherwise a negative error code. **/ -int iioutils_get_param_float(float *output, - const char *param_name, - const char *device_dir, - const char *name, - const char *generic_name) +int iioutils_get_param_float(float *output, const char *param_name, + const char *device_dir, const char *name, + const char *generic_name) { FILE *sysfsfp; int ret; @@ -235,13 +234,15 @@ int iioutils_get_param_float(float *output, ret = -ENOMEM; goto error_free_builtname; } + dp = opendir(device_dir); - if (dp == NULL) { + if (!dp) { ret = -errno; goto error_free_builtname_generic; } + ret = -ENOENT; - while (ent = readdir(dp), ent != NULL) + while (ent = readdir(dp), ent) if ((strcmp(builtname, ent->d_name) == 0) || (strcmp(builtname_generic, ent->d_name) == 0)) { ret = asprintf(&filename, @@ -250,11 +251,13 @@ int iioutils_get_param_float(float *output, ret = -ENOMEM; goto error_closedir; } + sysfsfp = fopen(filename, "r"); if (!sysfsfp) { ret = -errno; goto error_free_filename; } + errno = 0; if (fscanf(sysfsfp, "%f", output) != 1) ret = errno ? -errno : -ENODATA; @@ -264,6 +267,7 @@ int iioutils_get_param_float(float *output, error_free_filename: if (filename) free(filename); + error_closedir: if (closedir(dp) == -1) perror("iioutils_get_param_float(): Failed to close directory"); @@ -282,19 +286,17 @@ error_free_builtname: * @cnt: the amount of array elements **/ -void bsort_channel_array_by_index(struct iio_channel_info **ci_array, - int cnt) +void bsort_channel_array_by_index(struct iio_channel_info *ci_array, int cnt) { - struct iio_channel_info temp; int x, y; for (x = 0; x < cnt; x++) for (y = 0; y < (cnt - 1); y++) - if ((*ci_array)[y].index > (*ci_array)[y+1].index) { - temp = (*ci_array)[y + 1]; - (*ci_array)[y + 1] = (*ci_array)[y]; - (*ci_array)[y] = temp; + if (ci_array[y].index > ci_array[y + 1].index) { + temp = ci_array[y + 1]; + ci_array[y + 1] = ci_array[y]; + ci_array[y] = temp; } } @@ -307,8 +309,7 @@ void bsort_channel_array_by_index(struct iio_channel_info **ci_array, * Returns 0 on success, otherwise a negative error code. **/ int build_channel_array(const char *device_dir, - struct iio_channel_info **ci_array, - int *counter) + struct iio_channel_info **ci_array, int *counter) { DIR *dp; FILE *sysfsfp; @@ -325,11 +326,12 @@ int build_channel_array(const char *device_dir, return -ENOMEM; dp = opendir(scan_el_dir); - if (dp == NULL) { + if (!dp) { ret = -errno; goto error_free_name; } - while (ent = readdir(dp), ent != NULL) + + while (ent = readdir(dp), ent) if (strcmp(ent->d_name + strlen(ent->d_name) - strlen("_en"), "_en") == 0) { ret = asprintf(&filename, @@ -338,12 +340,14 @@ int build_channel_array(const char *device_dir, ret = -ENOMEM; goto error_close_dir; } + sysfsfp = fopen(filename, "r"); - if (sysfsfp == NULL) { + if (!sysfsfp) { ret = -errno; free(filename); goto error_close_dir; } + errno = 0; if (fscanf(sysfsfp, "%i", &ret) != 1) { ret = errno ? -errno : -ENODATA; @@ -353,9 +357,9 @@ int build_channel_array(const char *device_dir, free(filename); goto error_close_dir; } - if (ret == 1) (*counter)++; + if (fclose(sysfsfp)) { ret = -errno; free(filename); @@ -364,13 +368,15 @@ int build_channel_array(const char *device_dir, free(filename); } + *ci_array = malloc(sizeof(**ci_array) * (*counter)); - if (*ci_array == NULL) { + if (!*ci_array) { ret = -ENOMEM; goto error_close_dir; } + seekdir(dp, 0); - while (ent = readdir(dp), ent != NULL) { + while (ent = readdir(dp), ent) { if (strcmp(ent->d_name + strlen(ent->d_name) - strlen("_en"), "_en") == 0) { int current_enabled = 0; @@ -384,13 +390,15 @@ int build_channel_array(const char *device_dir, count--; goto error_cleanup_array; } + sysfsfp = fopen(filename, "r"); - if (sysfsfp == NULL) { + if (!sysfsfp) { ret = -errno; free(filename); count--; goto error_cleanup_array; } + errno = 0; if (fscanf(sysfsfp, "%i", ¤t_enabled) != 1) { ret = errno ? -errno : -ENODATA; @@ -417,12 +425,13 @@ int build_channel_array(const char *device_dir, current->name = strndup(ent->d_name, strlen(ent->d_name) - strlen("_en")); - if (current->name == NULL) { + if (!current->name) { free(filename); ret = -ENOMEM; count--; goto error_cleanup_array; } + /* Get the generic and specific name elements */ ret = iioutils_break_up_name(current->name, ¤t->generic_name); @@ -432,6 +441,7 @@ int build_channel_array(const char *device_dir, count--; goto error_cleanup_array; } + ret = asprintf(&filename, "%s/%s_index", scan_el_dir, @@ -441,10 +451,12 @@ int build_channel_array(const char *device_dir, ret = -ENOMEM; goto error_cleanup_array; } + sysfsfp = fopen(filename, "r"); - if (sysfsfp == NULL) { + if (!sysfsfp) { ret = -errno; - printf("failed to open %s\n", filename); + fprintf(stderr, "failed to open %s\n", + filename); free(filename); goto error_cleanup_array; } @@ -472,15 +484,17 @@ int build_channel_array(const char *device_dir, device_dir, current->name, current->generic_name); - if (ret < 0) + if ((ret < 0) && (ret != -ENOENT)) goto error_cleanup_array; + ret = iioutils_get_param_float(¤t->offset, "offset", device_dir, current->name, current->generic_name); - if (ret < 0) + if ((ret < 0) && (ret != -ENOENT)) goto error_cleanup_array; + ret = iioutils_get_type(¤t->is_signed, ¤t->bytes, ¤t->bits_used, @@ -502,7 +516,7 @@ int build_channel_array(const char *device_dir, free(scan_el_dir); /* reorder so that the array is in index order */ - bsort_channel_array_by_index(ci_array, *counter); + bsort_channel_array_by_index(*ci_array, *counter); return 0; @@ -512,6 +526,8 @@ error_cleanup_array: free((*ci_array)[i].generic_name); } free(*ci_array); + *ci_array = NULL; + *counter = 0; error_close_dir: if (dp) if (closedir(dp) == -1) @@ -523,7 +539,7 @@ error_free_name: return ret; } -int calc_digits(int num) +static int calc_digits(int num) { int count = 0; @@ -549,44 +565,43 @@ int find_type_by_name(const char *name, const char *type) const struct dirent *ent; int number, numstrlen, ret; - FILE *nameFile; + FILE *namefp; DIR *dp; char thisname[IIO_MAX_NAME_LENGTH]; char *filename; dp = opendir(iio_dir); - if (dp == NULL) { - printf("No industrialio devices available\n"); + if (!dp) { + fprintf(stderr, "No industrialio devices available\n"); return -ENODEV; } - while (ent = readdir(dp), ent != NULL) { + while (ent = readdir(dp), ent) { if (strcmp(ent->d_name, ".") != 0 && - strcmp(ent->d_name, "..") != 0 && - strlen(ent->d_name) > strlen(type) && - strncmp(ent->d_name, type, strlen(type)) == 0) { + strcmp(ent->d_name, "..") != 0 && + strlen(ent->d_name) > strlen(type) && + strncmp(ent->d_name, type, strlen(type)) == 0) { errno = 0; ret = sscanf(ent->d_name + strlen(type), "%d", &number); if (ret < 0) { ret = -errno; - printf("failed to read element number\n"); + fprintf(stderr, + "failed to read element number\n"); goto error_close_dir; } else if (ret != 1) { ret = -EIO; - printf("failed to match element number\n"); + fprintf(stderr, + "failed to match element number\n"); goto error_close_dir; } numstrlen = calc_digits(number); /* verify the next character is not a colon */ if (strncmp(ent->d_name + strlen(type) + numstrlen, - ":", - 1) != 0) { - filename = malloc(strlen(iio_dir) - + strlen(type) - + numstrlen - + 6); - if (filename == NULL) { + ":", 1) != 0) { + filename = malloc(strlen(iio_dir) + strlen(type) + + numstrlen + 6); + if (!filename) { ret = -ENOMEM; goto error_close_dir; } @@ -598,19 +613,20 @@ int find_type_by_name(const char *name, const char *type) goto error_close_dir; } - nameFile = fopen(filename, "r"); - if (!nameFile) { + namefp = fopen(filename, "r"); + if (!namefp) { free(filename); continue; } + free(filename); errno = 0; - if (fscanf(nameFile, "%s", thisname) != 1) { + if (fscanf(namefp, "%s", thisname) != 1) { ret = errno ? -errno : -ENODATA; goto error_close_dir; } - if (fclose(nameFile)) { + if (fclose(namefp)) { ret = -errno; goto error_close_dir; } @@ -618,6 +634,7 @@ int find_type_by_name(const char *name, const char *type) if (strcmp(name, thisname) == 0) { if (closedir(dp) == -1) return -errno; + return number; } } @@ -631,6 +648,7 @@ int find_type_by_name(const char *name, const char *type) error_close_dir: if (closedir(dp) == -1) perror("find_type_by_name(): Failed to close directory"); + return ret; } @@ -642,18 +660,20 @@ static int _write_sysfs_int(const char *filename, const char *basedir, int val, int test; char *temp = malloc(strlen(basedir) + strlen(filename) + 2); - if (temp == NULL) + if (!temp) return -ENOMEM; + ret = sprintf(temp, "%s/%s", basedir, filename); if (ret < 0) goto error_free; sysfsfp = fopen(temp, "w"); - if (sysfsfp == NULL) { + if (!sysfsfp) { ret = -errno; - printf("failed to open %s\n", temp); + fprintf(stderr, "failed to open %s\n", temp); goto error_free; } + ret = fprintf(sysfsfp, "%d", val); if (ret < 0) { if (fclose(sysfsfp)) @@ -669,11 +689,12 @@ static int _write_sysfs_int(const char *filename, const char *basedir, int val, if (verify) { sysfsfp = fopen(temp, "r"); - if (sysfsfp == NULL) { + if (!sysfsfp) { ret = -errno; - printf("failed to open %s\n", temp); + fprintf(stderr, "failed to open %s\n", temp); goto error_free; } + if (fscanf(sysfsfp, "%d", &test) != 1) { ret = errno ? -errno : -ENODATA; if (fclose(sysfsfp)) @@ -688,13 +709,13 @@ static int _write_sysfs_int(const char *filename, const char *basedir, int val, } if (test != val) { - printf("Possible failure in int write %d to %s%s\n", - val, - basedir, - filename); + fprintf(stderr, + "Possible failure in int write %d to %s/%s\n", + val, basedir, filename); ret = -1; } } + error_free: free(temp); return ret; @@ -735,20 +756,22 @@ static int _write_sysfs_string(const char *filename, const char *basedir, FILE *sysfsfp; char *temp = malloc(strlen(basedir) + strlen(filename) + 2); - if (temp == NULL) { - printf("Memory allocation failed\n"); + if (!temp) { + fprintf(stderr, "Memory allocation failed\n"); return -ENOMEM; } + ret = sprintf(temp, "%s/%s", basedir, filename); if (ret < 0) goto error_free; sysfsfp = fopen(temp, "w"); - if (sysfsfp == NULL) { + if (!sysfsfp) { ret = -errno; - printf("Could not open %s\n", temp); + fprintf(stderr, "Could not open %s\n", temp); goto error_free; } + ret = fprintf(sysfsfp, "%s", val); if (ret < 0) { if (fclose(sysfsfp)) @@ -764,11 +787,12 @@ static int _write_sysfs_string(const char *filename, const char *basedir, if (verify) { sysfsfp = fopen(temp, "r"); - if (sysfsfp == NULL) { + if (!sysfsfp) { ret = -errno; - printf("could not open file to verify\n"); + fprintf(stderr, "Could not open file to verify\n"); goto error_free; } + if (fscanf(sysfsfp, "%s", temp) != 1) { ret = errno ? -errno : -ENODATA; if (fclose(sysfsfp)) @@ -783,16 +807,14 @@ static int _write_sysfs_string(const char *filename, const char *basedir, } if (strcmp(temp, val) != 0) { - printf("Possible failure in string write of %s " - "Should be %s " - "written to %s\%s\n", - temp, - val, - basedir, - filename); + fprintf(stderr, + "Possible failure in string write of %s " + "Should be %s written to %s/%s\n", temp, val, + basedir, filename); ret = -1; } } + error_free: free(temp); @@ -841,19 +863,21 @@ int read_sysfs_posint(const char *filename, const char *basedir) FILE *sysfsfp; char *temp = malloc(strlen(basedir) + strlen(filename) + 2); - if (temp == NULL) { - printf("Memory allocation failed"); + if (!temp) { + fprintf(stderr, "Memory allocation failed"); return -ENOMEM; } + ret = sprintf(temp, "%s/%s", basedir, filename); if (ret < 0) goto error_free; sysfsfp = fopen(temp, "r"); - if (sysfsfp == NULL) { + if (!sysfsfp) { ret = -errno; goto error_free; } + errno = 0; if (fscanf(sysfsfp, "%d\n", &ret) != 1) { ret = errno ? -errno : -ENODATA; @@ -868,6 +892,7 @@ int read_sysfs_posint(const char *filename, const char *basedir) error_free: free(temp); + return ret; } @@ -885,19 +910,21 @@ int read_sysfs_float(const char *filename, const char *basedir, float *val) FILE *sysfsfp; char *temp = malloc(strlen(basedir) + strlen(filename) + 2); - if (temp == NULL) { - printf("Memory allocation failed"); + if (!temp) { + fprintf(stderr, "Memory allocation failed"); return -ENOMEM; } + ret = sprintf(temp, "%s/%s", basedir, filename); if (ret < 0) goto error_free; sysfsfp = fopen(temp, "r"); - if (sysfsfp == NULL) { + if (!sysfsfp) { ret = -errno; goto error_free; } + errno = 0; if (fscanf(sysfsfp, "%f\n", val) != 1) { ret = errno ? -errno : -ENODATA; @@ -912,6 +939,7 @@ int read_sysfs_float(const char *filename, const char *basedir, float *val) error_free: free(temp); + return ret; } @@ -929,19 +957,21 @@ int read_sysfs_string(const char *filename, const char *basedir, char *str) FILE *sysfsfp; char *temp = malloc(strlen(basedir) + strlen(filename) + 2); - if (temp == NULL) { - printf("Memory allocation failed"); + if (!temp) { + fprintf(stderr, "Memory allocation failed"); return -ENOMEM; } + ret = sprintf(temp, "%s/%s", basedir, filename); if (ret < 0) goto error_free; sysfsfp = fopen(temp, "r"); - if (sysfsfp == NULL) { + if (!sysfsfp) { ret = -errno; goto error_free; } + errno = 0; if (fscanf(sysfsfp, "%s\n", str) != 1) { ret = errno ? -errno : -ENODATA; @@ -956,7 +986,6 @@ int read_sysfs_string(const char *filename, const char *basedir, char *str) error_free: free(temp); + return ret; } - -#endif /* _IIO_UTILS_H */ diff --git a/tools/iio/iio_utils.h b/tools/iio/iio_utils.h index 379eed9deaea..e3503bfe538b 100644 --- a/tools/iio/iio_utils.h +++ b/tools/iio/iio_utils.h @@ -18,6 +18,8 @@ #define FORMAT_SCAN_ELEMENTS_DIR "%s/scan_elements" #define FORMAT_TYPE_FILE "%s_type" +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) + extern const char *iio_dir; /** @@ -51,17 +53,16 @@ struct iio_channel_info { }; int iioutils_break_up_name(const char *full_name, char **generic_name); -int iioutils_get_type(unsigned *is_signed, unsigned *bytes, - unsigned *bits_used, unsigned *shift, - uint64_t *mask, unsigned *be, - const char *device_dir, const char *name, - const char *generic_name); +int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used, + unsigned *shift, uint64_t *mask, unsigned *be, + const char *device_dir, const char *name, + const char *generic_name); int iioutils_get_param_float(float *output, const char *param_name, - const char *device_dir, const char *name, - const char *generic_name); -void bsort_channel_array_by_index(struct iio_channel_info **ci_array, int cnt); + const char *device_dir, const char *name, + const char *generic_name); +void bsort_channel_array_by_index(struct iio_channel_info *ci_array, int cnt); int build_channel_array(const char *device_dir, - struct iio_channel_info **ci_array, int *counter); + struct iio_channel_info **ci_array, int *counter); int find_type_by_name(const char *name, const char *type); int write_sysfs_int(const char *filename, const char *basedir, int val); int write_sysfs_int_and_verify(const char *filename, const char *basedir, diff --git a/tools/iio/lsiio.c b/tools/iio/lsiio.c index b59ee1733924..3d650e668252 100644 --- a/tools/iio/lsiio.c +++ b/tools/iio/lsiio.c @@ -20,7 +20,6 @@ #include <sys/dir.h> #include "iio_utils.h" - static enum verbosity { VERBLEVEL_DEFAULT, /* 0 gives lspci behaviour */ VERBLEVEL_SENSORS, /* 1 lists sensors */ @@ -29,17 +28,16 @@ static enum verbosity { const char *type_device = "iio:device"; const char *type_trigger = "trigger"; - static inline int check_prefix(const char *str, const char *prefix) { return strlen(str) > strlen(prefix) && - strncmp(str, prefix, strlen(prefix)) == 0; + strncmp(str, prefix, strlen(prefix)) == 0; } static inline int check_postfix(const char *str, const char *postfix) { return strlen(str) > strlen(postfix) && - strcmp(str + strlen(str) - strlen(postfix), postfix) == 0; + strcmp(str + strlen(str) - strlen(postfix), postfix) == 0; } static int dump_channels(const char *dev_dir_name) @@ -48,13 +46,13 @@ static int dump_channels(const char *dev_dir_name) const struct dirent *ent; dp = opendir(dev_dir_name); - if (dp == NULL) + if (!dp) return -errno; - while (ent = readdir(dp), ent != NULL) + + while (ent = readdir(dp), ent) if (check_prefix(ent->d_name, "in_") && - check_postfix(ent->d_name, "_raw")) { + check_postfix(ent->d_name, "_raw")) printf(" %-10s\n", ent->d_name); - } return (closedir(dp) == -1) ? -errno : 0; } @@ -63,20 +61,22 @@ static int dump_one_device(const char *dev_dir_name) { char name[IIO_MAX_NAME_LENGTH]; int dev_idx; - int retval; + int ret; - retval = sscanf(dev_dir_name + strlen(iio_dir) + strlen(type_device), - "%i", &dev_idx); - if (retval != 1) + ret = sscanf(dev_dir_name + strlen(iio_dir) + strlen(type_device), "%i", + &dev_idx); + if (ret != 1) return -EINVAL; - retval = read_sysfs_string("name", dev_dir_name, name); - if (retval) - return retval; + + ret = read_sysfs_string("name", dev_dir_name, name); + if (ret < 0) + return ret; printf("Device %03d: %s\n", dev_idx, name); if (verblevel >= VERBLEVEL_SENSORS) return dump_channels(dev_dir_name); + return 0; } @@ -84,17 +84,19 @@ static int dump_one_trigger(const char *dev_dir_name) { char name[IIO_MAX_NAME_LENGTH]; int dev_idx; - int retval; + int ret; - retval = sscanf(dev_dir_name + strlen(iio_dir) + strlen(type_trigger), - "%i", &dev_idx); - if (retval != 1) + ret = sscanf(dev_dir_name + strlen(iio_dir) + strlen(type_trigger), + "%i", &dev_idx); + if (ret != 1) return -EINVAL; - retval = read_sysfs_string("name", dev_dir_name, name); - if (retval) - return retval; + + ret = read_sysfs_string("name", dev_dir_name, name); + if (ret < 0) + return ret; printf("Trigger %03d: %s\n", dev_idx, name); + return 0; } @@ -105,12 +107,12 @@ static int dump_devices(void) DIR *dp; dp = opendir(iio_dir); - if (dp == NULL) { - printf("No industrial I/O devices available\n"); + if (!dp) { + fprintf(stderr, "No industrial I/O devices available\n"); return -ENODEV; } - while (ent = readdir(dp), ent != NULL) { + while (ent = readdir(dp), ent) { if (check_prefix(ent->d_name, type_device)) { char *dev_dir_name; @@ -132,7 +134,7 @@ static int dump_devices(void) } } rewinddir(dp); - while (ent = readdir(dp), ent != NULL) { + while (ent = readdir(dp), ent) { if (check_prefix(ent->d_name, type_trigger)) { char *dev_dir_name; @@ -151,6 +153,7 @@ static int dump_devices(void) free(dev_dir_name); } } + return (closedir(dp) == -1) ? -errno : 0; error_close_dir: diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h new file mode 100644 index 000000000000..d07e586b9ba0 --- /dev/null +++ b/tools/include/linux/export.h @@ -0,0 +1,10 @@ +#ifndef _TOOLS_LINUX_EXPORT_H_ +#define _TOOLS_LINUX_EXPORT_H_ + +#define EXPORT_SYMBOL(sym) +#define EXPORT_SYMBOL_GPL(sym) +#define EXPORT_SYMBOL_GPL_FUTURE(sym) +#define EXPORT_UNUSED_SYMBOL(sym) +#define EXPORT_UNUSED_SYMBOL_GPL(sym) + +#endif diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c index 8305b3e9d48e..eb7cf4d18f8a 100644 --- a/tools/lib/api/fs/debugfs.c +++ b/tools/lib/api/fs/debugfs.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include "debugfs.h" +#include "tracefs.h" #ifndef DEBUGFS_DEFAULT_PATH #define DEBUGFS_DEFAULT_PATH "/sys/kernel/debug" @@ -94,11 +95,21 @@ int debugfs__strerror_open(int err, char *buf, size_t size, const char *filename "Hint:\tIs the debugfs filesystem mounted?\n" "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); break; - case EACCES: + case EACCES: { + const char *mountpoint = debugfs_mountpoint; + + if (!access(debugfs_mountpoint, R_OK) && strncmp(filename, "tracing/", 8) == 0) { + const char *tracefs_mntpoint = tracefs_find_mountpoint(); + + if (tracefs_mntpoint) + mountpoint = tracefs_mntpoint; + } + snprintf(buf, size, "Error:\tNo permissions to read %s/%s\n" "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", - debugfs_mountpoint, filename, debugfs_mountpoint); + debugfs_mountpoint, filename, mountpoint); + } break; default: snprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf))); diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore new file mode 100644 index 000000000000..812aeedaea38 --- /dev/null +++ b/tools/lib/bpf/.gitignore @@ -0,0 +1,2 @@ +libbpf_version.h +FEATURE-DUMP diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build new file mode 100644 index 000000000000..d8749756352d --- /dev/null +++ b/tools/lib/bpf/Build @@ -0,0 +1 @@ +libbpf-y := libbpf.o bpf.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile new file mode 100644 index 000000000000..f68d23a0b487 --- /dev/null +++ b/tools/lib/bpf/Makefile @@ -0,0 +1,195 @@ +# Most of this file is copied from tools/lib/traceevent/Makefile + +BPF_VERSION = 0 +BPF_PATCHLEVEL = 0 +BPF_EXTRAVERSION = 1 + +MAKEFLAGS += --no-print-directory + + +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) + +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= /usr/local +libdir = $(prefix)/$(libdir_relative) +man_dir = $(prefix)/share/man +man_dir_SQ = '$(subst ','\'',$(man_dir))' + +export man_dir man_dir_SQ INSTALL +export DESTDIR DESTDIR_SQ + +include ../../scripts/Makefile.include + +# copy a bit from Linux kbuild + +ifeq ("$(origin V)", "command line") + VERBOSE = $(V) +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +FEATURE_DISPLAY = libelf libelf-getphdrnum libelf-mmap bpf +FEATURE_TESTS = libelf bpf + +INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi +FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) + +include $(srctree)/tools/build/Makefile.feature + +export prefix libdir src obj + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) +libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) +plugin_dir_SQ = $(subst ','\'',$(plugin_dir)) + +LIB_FILE = libbpf.a libbpf.so + +VERSION = $(BPF_VERSION) +PATCHLEVEL = $(BPF_PATCHLEVEL) +EXTRAVERSION = $(BPF_EXTRAVERSION) + +OBJ = $@ +N = + +LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION) + +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif + +ifeq ($(feature-libelf-mmap), 1) + override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT +endif + +ifeq ($(feature-libelf-getphdrnum), 1) + override CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT +endif + +# Append required CFLAGS +override CFLAGS += $(EXTRA_WARNINGS) +override CFLAGS += -Werror -Wall +override CFLAGS += -fPIC +override CFLAGS += $(INCLUDES) + +ifeq ($(VERBOSE),1) + Q = +else + Q = @ +endif + +# Disable command line variables (CFLAGS) overide from top +# level Makefile (perf), otherwise build Makefile will get +# the same command line setup. +MAKEOVERRIDES= + +export srctree OUTPUT CC LD CFLAGS V +build := -f $(srctree)/tools/build/Makefile.build dir=. obj + +BPF_IN := $(OUTPUT)libbpf-in.o +LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) + +CMD_TARGETS = $(LIB_FILE) + +TARGETS = $(CMD_TARGETS) + +all: $(VERSION_FILES) all_cmd + +all_cmd: $(CMD_TARGETS) + +$(BPF_IN): force elfdep bpfdep + $(Q)$(MAKE) $(build)=libbpf + +$(OUTPUT)libbpf.so: $(BPF_IN) + $(QUIET_LINK)$(CC) --shared $^ -o $@ + +$(OUTPUT)libbpf.a: $(BPF_IN) + $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ + +define update_dir + (echo $1 > $@.tmp; \ + if [ -r $@ ] && cmp -s $@ $@.tmp; then \ + rm -f $@.tmp; \ + else \ + echo ' UPDATE $@'; \ + mv -f $@.tmp $@; \ + fi); +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 '$(DESTDIR_SQ)$2' +endef + +install_lib: all_cmd + $(call QUIET_INSTALL, $(LIB_FILE)) \ + $(call do_install,$(LIB_FILE),$(libdir_SQ)) + +install: install_lib + +### Cleaning rules + +config-clean: + $(call QUIET_CLEAN, config) + $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null + +clean: + $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d \ + $(RM) LIBBPF-CFLAGS + $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP + + + +PHONY += force elfdep bpfdep +force: + +elfdep: + @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit -1 ; fi + +bpfdep: + @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit -1 ; fi + +# Declare the contents of the .PHONY variable as phony. We keep that +# information in a variable so we can use it in if_changed and friends. +.PHONY: $(PHONY) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c new file mode 100644 index 000000000000..a6331050ab79 --- /dev/null +++ b/tools/lib/bpf/bpf.c @@ -0,0 +1,85 @@ +/* + * common eBPF ELF operations. + * + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + */ + +#include <stdlib.h> +#include <memory.h> +#include <unistd.h> +#include <asm/unistd.h> +#include <linux/bpf.h> +#include "bpf.h" + +/* + * When building perf, unistd.h is override. Define __NR_bpf is + * required to be defined. + */ +#ifndef __NR_bpf +# if defined(__i386__) +# define __NR_bpf 357 +# elif defined(__x86_64__) +# define __NR_bpf 321 +# elif defined(__aarch64__) +# define __NR_bpf 280 +# else +# error __NR_bpf not defined. libbpf does not support your arch. +# endif +#endif + +static __u64 ptr_to_u64(void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + +static int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size) +{ + return syscall(__NR_bpf, cmd, attr, size); +} + +int bpf_create_map(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries) +{ + union bpf_attr attr; + + memset(&attr, '\0', sizeof(attr)); + + attr.map_type = map_type; + attr.key_size = key_size; + attr.value_size = value_size; + attr.max_entries = max_entries; + + return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); +} + +int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, + size_t insns_cnt, char *license, + u32 kern_version, char *log_buf, size_t log_buf_sz) +{ + int fd; + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.prog_type = type; + attr.insn_cnt = (__u32)insns_cnt; + attr.insns = ptr_to_u64(insns); + attr.license = ptr_to_u64(license); + attr.log_buf = ptr_to_u64(NULL); + attr.log_size = 0; + attr.log_level = 0; + attr.kern_version = kern_version; + + fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + if (fd >= 0 || !log_buf || !log_buf_sz) + return fd; + + /* Try again with log */ + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_buf_sz; + attr.log_level = 1; + log_buf[0] = 0; + return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h new file mode 100644 index 000000000000..854b7361b784 --- /dev/null +++ b/tools/lib/bpf/bpf.h @@ -0,0 +1,23 @@ +/* + * common eBPF ELF operations. + * + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + */ +#ifndef __BPF_BPF_H +#define __BPF_BPF_H + +#include <linux/bpf.h> + +int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, + int max_entries); + +/* Recommend log buffer size */ +#define BPF_LOG_BUF_SIZE 65536 +int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, + size_t insns_cnt, char *license, + u32 kern_version, char *log_buf, + size_t log_buf_sz); + +#endif diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c new file mode 100644 index 000000000000..4fa4bc4505f5 --- /dev/null +++ b/tools/lib/bpf/libbpf.c @@ -0,0 +1,1037 @@ +/* + * Common eBPF ELF object loading operations. + * + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <inttypes.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> +#include <asm/unistd.h> +#include <linux/kernel.h> +#include <linux/bpf.h> +#include <linux/list.h> +#include <libelf.h> +#include <gelf.h> + +#include "libbpf.h" +#include "bpf.h" + +#define __printf(a, b) __attribute__((format(printf, a, b))) + +__printf(1, 2) +static int __base_pr(const char *format, ...) +{ + va_list args; + int err; + + va_start(args, format); + err = vfprintf(stderr, format, args); + va_end(args); + return err; +} + +static __printf(1, 2) libbpf_print_fn_t __pr_warning = __base_pr; +static __printf(1, 2) libbpf_print_fn_t __pr_info = __base_pr; +static __printf(1, 2) libbpf_print_fn_t __pr_debug; + +#define __pr(func, fmt, ...) \ +do { \ + if ((func)) \ + (func)("libbpf: " fmt, ##__VA_ARGS__); \ +} while (0) + +#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__) + +void libbpf_set_print(libbpf_print_fn_t warn, + libbpf_print_fn_t info, + libbpf_print_fn_t debug) +{ + __pr_warning = warn; + __pr_info = info; + __pr_debug = debug; +} + +/* Copied from tools/perf/util/util.h */ +#ifndef zfree +# define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) +#endif + +#ifndef zclose +# define zclose(fd) ({ \ + int ___err = 0; \ + if ((fd) >= 0) \ + ___err = close((fd)); \ + fd = -1; \ + ___err; }) +#endif + +#ifdef HAVE_LIBELF_MMAP_SUPPORT +# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP +#else +# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ +#endif + +/* + * bpf_prog should be a better name but it has been used in + * linux/filter.h. + */ +struct bpf_program { + /* Index in elf obj file, for relocation use. */ + int idx; + char *section_name; + struct bpf_insn *insns; + size_t insns_cnt; + + struct { + int insn_idx; + int map_idx; + } *reloc_desc; + int nr_reloc; + + int fd; + + struct bpf_object *obj; + void *priv; + bpf_program_clear_priv_t clear_priv; +}; + +static LIST_HEAD(bpf_objects_list); + +struct bpf_object { + char license[64]; + u32 kern_version; + void *maps_buf; + size_t maps_buf_sz; + + struct bpf_program *programs; + size_t nr_programs; + int *map_fds; + /* + * This field is required because maps_buf will be freed and + * maps_buf_sz will be set to 0 after loaded. + */ + size_t nr_map_fds; + bool loaded; + + /* + * Information when doing elf related work. Only valid if fd + * is valid. + */ + struct { + int fd; + void *obj_buf; + size_t obj_buf_sz; + Elf *elf; + GElf_Ehdr ehdr; + Elf_Data *symbols; + struct { + GElf_Shdr shdr; + Elf_Data *data; + } *reloc; + int nr_reloc; + } efile; + /* + * All loaded bpf_object is linked in a list, which is + * hidden to caller. bpf_objects__<func> handlers deal with + * all objects. + */ + struct list_head list; + char path[]; +}; +#define obj_elf_valid(o) ((o)->efile.elf) + +static void bpf_program__unload(struct bpf_program *prog) +{ + if (!prog) + return; + + zclose(prog->fd); +} + +static void bpf_program__exit(struct bpf_program *prog) +{ + if (!prog) + return; + + if (prog->clear_priv) + prog->clear_priv(prog, prog->priv); + + prog->priv = NULL; + prog->clear_priv = NULL; + + bpf_program__unload(prog); + zfree(&prog->section_name); + zfree(&prog->insns); + zfree(&prog->reloc_desc); + + prog->nr_reloc = 0; + prog->insns_cnt = 0; + prog->idx = -1; +} + +static int +bpf_program__init(void *data, size_t size, char *name, int idx, + struct bpf_program *prog) +{ + if (size < sizeof(struct bpf_insn)) { + pr_warning("corrupted section '%s'\n", name); + return -EINVAL; + } + + bzero(prog, sizeof(*prog)); + + prog->section_name = strdup(name); + if (!prog->section_name) { + pr_warning("failed to alloc name for prog %s\n", + name); + goto errout; + } + + prog->insns = malloc(size); + if (!prog->insns) { + pr_warning("failed to alloc insns for %s\n", name); + goto errout; + } + prog->insns_cnt = size / sizeof(struct bpf_insn); + memcpy(prog->insns, data, + prog->insns_cnt * sizeof(struct bpf_insn)); + prog->idx = idx; + prog->fd = -1; + + return 0; +errout: + bpf_program__exit(prog); + return -ENOMEM; +} + +static int +bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, + char *name, int idx) +{ + struct bpf_program prog, *progs; + int nr_progs, err; + + err = bpf_program__init(data, size, name, idx, &prog); + if (err) + return err; + + progs = obj->programs; + nr_progs = obj->nr_programs; + + progs = realloc(progs, sizeof(progs[0]) * (nr_progs + 1)); + if (!progs) { + /* + * In this case the original obj->programs + * is still valid, so don't need special treat for + * bpf_close_object(). + */ + pr_warning("failed to alloc a new program '%s'\n", + name); + bpf_program__exit(&prog); + return -ENOMEM; + } + + pr_debug("found program %s\n", prog.section_name); + obj->programs = progs; + obj->nr_programs = nr_progs + 1; + prog.obj = obj; + progs[nr_progs] = prog; + return 0; +} + +static struct bpf_object *bpf_object__new(const char *path, + void *obj_buf, + size_t obj_buf_sz) +{ + struct bpf_object *obj; + + obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); + if (!obj) { + pr_warning("alloc memory failed for %s\n", path); + return NULL; + } + + strcpy(obj->path, path); + obj->efile.fd = -1; + + /* + * Caller of this function should also calls + * bpf_object__elf_finish() after data collection to return + * obj_buf to user. If not, we should duplicate the buffer to + * avoid user freeing them before elf finish. + */ + obj->efile.obj_buf = obj_buf; + obj->efile.obj_buf_sz = obj_buf_sz; + + obj->loaded = false; + + INIT_LIST_HEAD(&obj->list); + list_add(&obj->list, &bpf_objects_list); + return obj; +} + +static void bpf_object__elf_finish(struct bpf_object *obj) +{ + if (!obj_elf_valid(obj)) + return; + + if (obj->efile.elf) { + elf_end(obj->efile.elf); + obj->efile.elf = NULL; + } + obj->efile.symbols = NULL; + + zfree(&obj->efile.reloc); + obj->efile.nr_reloc = 0; + zclose(obj->efile.fd); + obj->efile.obj_buf = NULL; + obj->efile.obj_buf_sz = 0; +} + +static int bpf_object__elf_init(struct bpf_object *obj) +{ + int err = 0; + GElf_Ehdr *ep; + + if (obj_elf_valid(obj)) { + pr_warning("elf init: internal error\n"); + return -EEXIST; + } + + if (obj->efile.obj_buf_sz > 0) { + /* + * obj_buf should have been validated by + * bpf_object__open_buffer(). + */ + obj->efile.elf = elf_memory(obj->efile.obj_buf, + obj->efile.obj_buf_sz); + } else { + obj->efile.fd = open(obj->path, O_RDONLY); + if (obj->efile.fd < 0) { + pr_warning("failed to open %s: %s\n", obj->path, + strerror(errno)); + return -errno; + } + + obj->efile.elf = elf_begin(obj->efile.fd, + LIBBPF_ELF_C_READ_MMAP, + NULL); + } + + if (!obj->efile.elf) { + pr_warning("failed to open %s as ELF file\n", + obj->path); + err = -EINVAL; + goto errout; + } + + if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) { + pr_warning("failed to get EHDR from %s\n", + obj->path); + err = -EINVAL; + goto errout; + } + ep = &obj->efile.ehdr; + + if ((ep->e_type != ET_REL) || (ep->e_machine != 0)) { + pr_warning("%s is not an eBPF object file\n", + obj->path); + err = -EINVAL; + goto errout; + } + + return 0; +errout: + bpf_object__elf_finish(obj); + return err; +} + +static int +bpf_object__check_endianness(struct bpf_object *obj) +{ + static unsigned int const endian = 1; + + switch (obj->efile.ehdr.e_ident[EI_DATA]) { + case ELFDATA2LSB: + /* We are big endian, BPF obj is little endian. */ + if (*(unsigned char const *)&endian != 1) + goto mismatch; + break; + + case ELFDATA2MSB: + /* We are little endian, BPF obj is big endian. */ + if (*(unsigned char const *)&endian != 0) + goto mismatch; + break; + default: + return -EINVAL; + } + + return 0; + +mismatch: + pr_warning("Error: endianness mismatch.\n"); + return -EINVAL; +} + +static int +bpf_object__init_license(struct bpf_object *obj, + void *data, size_t size) +{ + memcpy(obj->license, data, + min(size, sizeof(obj->license) - 1)); + pr_debug("license of %s is %s\n", obj->path, obj->license); + return 0; +} + +static int +bpf_object__init_kversion(struct bpf_object *obj, + void *data, size_t size) +{ + u32 kver; + + if (size != sizeof(kver)) { + pr_warning("invalid kver section in %s\n", obj->path); + return -EINVAL; + } + memcpy(&kver, data, sizeof(kver)); + obj->kern_version = kver; + pr_debug("kernel version of %s is %x\n", obj->path, + obj->kern_version); + return 0; +} + +static int +bpf_object__init_maps(struct bpf_object *obj, void *data, + size_t size) +{ + if (size == 0) { + pr_debug("%s doesn't need map definition\n", + obj->path); + return 0; + } + + obj->maps_buf = malloc(size); + if (!obj->maps_buf) { + pr_warning("malloc maps failed: %s\n", obj->path); + return -ENOMEM; + } + + obj->maps_buf_sz = size; + memcpy(obj->maps_buf, data, size); + pr_debug("maps in %s: %ld bytes\n", obj->path, (long)size); + return 0; +} + +static int bpf_object__elf_collect(struct bpf_object *obj) +{ + Elf *elf = obj->efile.elf; + GElf_Ehdr *ep = &obj->efile.ehdr; + Elf_Scn *scn = NULL; + int idx = 0, err = 0; + + /* Elf is corrupted/truncated, avoid calling elf_strptr. */ + if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) { + pr_warning("failed to get e_shstrndx from %s\n", + obj->path); + return -EINVAL; + } + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + char *name; + GElf_Shdr sh; + Elf_Data *data; + + idx++; + if (gelf_getshdr(scn, &sh) != &sh) { + pr_warning("failed to get section header from %s\n", + obj->path); + err = -EINVAL; + goto out; + } + + name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); + if (!name) { + pr_warning("failed to get section name from %s\n", + obj->path); + err = -EINVAL; + goto out; + } + + data = elf_getdata(scn, 0); + if (!data) { + pr_warning("failed to get section data from %s(%s)\n", + name, obj->path); + err = -EINVAL; + goto out; + } + pr_debug("section %s, size %ld, link %d, flags %lx, type=%d\n", + name, (unsigned long)data->d_size, + (int)sh.sh_link, (unsigned long)sh.sh_flags, + (int)sh.sh_type); + + if (strcmp(name, "license") == 0) + err = bpf_object__init_license(obj, + data->d_buf, + data->d_size); + else if (strcmp(name, "version") == 0) + err = bpf_object__init_kversion(obj, + data->d_buf, + data->d_size); + else if (strcmp(name, "maps") == 0) + err = bpf_object__init_maps(obj, data->d_buf, + data->d_size); + else if (sh.sh_type == SHT_SYMTAB) { + if (obj->efile.symbols) { + pr_warning("bpf: multiple SYMTAB in %s\n", + obj->path); + err = -EEXIST; + } else + obj->efile.symbols = data; + } else if ((sh.sh_type == SHT_PROGBITS) && + (sh.sh_flags & SHF_EXECINSTR) && + (data->d_size > 0)) { + err = bpf_object__add_program(obj, data->d_buf, + data->d_size, name, idx); + if (err) { + char errmsg[128]; + strerror_r(-err, errmsg, sizeof(errmsg)); + pr_warning("failed to alloc program %s (%s): %s", + name, obj->path, errmsg); + } + } else if (sh.sh_type == SHT_REL) { + void *reloc = obj->efile.reloc; + int nr_reloc = obj->efile.nr_reloc + 1; + + reloc = realloc(reloc, + sizeof(*obj->efile.reloc) * nr_reloc); + if (!reloc) { + pr_warning("realloc failed\n"); + err = -ENOMEM; + } else { + int n = nr_reloc - 1; + + obj->efile.reloc = reloc; + obj->efile.nr_reloc = nr_reloc; + + obj->efile.reloc[n].shdr = sh; + obj->efile.reloc[n].data = data; + } + } + if (err) + goto out; + } +out: + return err; +} + +static struct bpf_program * +bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) +{ + struct bpf_program *prog; + size_t i; + + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + if (prog->idx == idx) + return prog; + } + return NULL; +} + +static int +bpf_program__collect_reloc(struct bpf_program *prog, + size_t nr_maps, GElf_Shdr *shdr, + Elf_Data *data, Elf_Data *symbols) +{ + int i, nrels; + + pr_debug("collecting relocating info for: '%s'\n", + prog->section_name); + nrels = shdr->sh_size / shdr->sh_entsize; + + prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels); + if (!prog->reloc_desc) { + pr_warning("failed to alloc memory in relocation\n"); + return -ENOMEM; + } + prog->nr_reloc = nrels; + + for (i = 0; i < nrels; i++) { + GElf_Sym sym; + GElf_Rel rel; + unsigned int insn_idx; + struct bpf_insn *insns = prog->insns; + size_t map_idx; + + if (!gelf_getrel(data, i, &rel)) { + pr_warning("relocation: failed to get %d reloc\n", i); + return -EINVAL; + } + + insn_idx = rel.r_offset / sizeof(struct bpf_insn); + pr_debug("relocation: insn_idx=%u\n", insn_idx); + + if (!gelf_getsym(symbols, + GELF_R_SYM(rel.r_info), + &sym)) { + pr_warning("relocation: symbol %"PRIx64" not found\n", + GELF_R_SYM(rel.r_info)); + return -EINVAL; + } + + if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { + pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n", + insn_idx, insns[insn_idx].code); + return -EINVAL; + } + + map_idx = sym.st_value / sizeof(struct bpf_map_def); + if (map_idx >= nr_maps) { + pr_warning("bpf relocation: map_idx %d large than %d\n", + (int)map_idx, (int)nr_maps - 1); + return -EINVAL; + } + + prog->reloc_desc[i].insn_idx = insn_idx; + prog->reloc_desc[i].map_idx = map_idx; + } + return 0; +} + +static int +bpf_object__create_maps(struct bpf_object *obj) +{ + unsigned int i; + size_t nr_maps; + int *pfd; + + nr_maps = obj->maps_buf_sz / sizeof(struct bpf_map_def); + if (!obj->maps_buf || !nr_maps) { + pr_debug("don't need create maps for %s\n", + obj->path); + return 0; + } + + obj->map_fds = malloc(sizeof(int) * nr_maps); + if (!obj->map_fds) { + pr_warning("realloc perf_bpf_map_fds failed\n"); + return -ENOMEM; + } + obj->nr_map_fds = nr_maps; + + /* fill all fd with -1 */ + memset(obj->map_fds, -1, sizeof(int) * nr_maps); + + pfd = obj->map_fds; + for (i = 0; i < nr_maps; i++) { + struct bpf_map_def def; + + def = *(struct bpf_map_def *)(obj->maps_buf + + i * sizeof(struct bpf_map_def)); + + *pfd = bpf_create_map(def.type, + def.key_size, + def.value_size, + def.max_entries); + if (*pfd < 0) { + size_t j; + int err = *pfd; + + pr_warning("failed to create map: %s\n", + strerror(errno)); + for (j = 0; j < i; j++) + zclose(obj->map_fds[j]); + obj->nr_map_fds = 0; + zfree(&obj->map_fds); + return err; + } + pr_debug("create map: fd=%d\n", *pfd); + pfd++; + } + + zfree(&obj->maps_buf); + obj->maps_buf_sz = 0; + return 0; +} + +static int +bpf_program__relocate(struct bpf_program *prog, int *map_fds) +{ + int i; + + if (!prog || !prog->reloc_desc) + return 0; + + for (i = 0; i < prog->nr_reloc; i++) { + int insn_idx, map_idx; + struct bpf_insn *insns = prog->insns; + + insn_idx = prog->reloc_desc[i].insn_idx; + map_idx = prog->reloc_desc[i].map_idx; + + if (insn_idx >= (int)prog->insns_cnt) { + pr_warning("relocation out of range: '%s'\n", + prog->section_name); + return -ERANGE; + } + insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + insns[insn_idx].imm = map_fds[map_idx]; + } + + zfree(&prog->reloc_desc); + prog->nr_reloc = 0; + return 0; +} + + +static int +bpf_object__relocate(struct bpf_object *obj) +{ + struct bpf_program *prog; + size_t i; + int err; + + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + + err = bpf_program__relocate(prog, obj->map_fds); + if (err) { + pr_warning("failed to relocate '%s'\n", + prog->section_name); + return err; + } + } + return 0; +} + +static int bpf_object__collect_reloc(struct bpf_object *obj) +{ + int i, err; + + if (!obj_elf_valid(obj)) { + pr_warning("Internal error: elf object is closed\n"); + return -EINVAL; + } + + for (i = 0; i < obj->efile.nr_reloc; i++) { + GElf_Shdr *shdr = &obj->efile.reloc[i].shdr; + Elf_Data *data = obj->efile.reloc[i].data; + int idx = shdr->sh_info; + struct bpf_program *prog; + size_t nr_maps = obj->maps_buf_sz / + sizeof(struct bpf_map_def); + + if (shdr->sh_type != SHT_REL) { + pr_warning("internal error at %d\n", __LINE__); + return -EINVAL; + } + + prog = bpf_object__find_prog_by_idx(obj, idx); + if (!prog) { + pr_warning("relocation failed: no %d section\n", + idx); + return -ENOENT; + } + + err = bpf_program__collect_reloc(prog, nr_maps, + shdr, data, + obj->efile.symbols); + if (err) + return -EINVAL; + } + return 0; +} + +static int +load_program(struct bpf_insn *insns, int insns_cnt, + char *license, u32 kern_version, int *pfd) +{ + int ret; + char *log_buf; + + if (!insns || !insns_cnt) + return -EINVAL; + + log_buf = malloc(BPF_LOG_BUF_SIZE); + if (!log_buf) + pr_warning("Alloc log buffer for bpf loader error, continue without log\n"); + + ret = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns, + insns_cnt, license, kern_version, + log_buf, BPF_LOG_BUF_SIZE); + + if (ret >= 0) { + *pfd = ret; + ret = 0; + goto out; + } + + ret = -EINVAL; + pr_warning("load bpf program failed: %s\n", strerror(errno)); + + if (log_buf) { + pr_warning("-- BEGIN DUMP LOG ---\n"); + pr_warning("\n%s\n", log_buf); + pr_warning("-- END LOG --\n"); + } + +out: + free(log_buf); + return ret; +} + +static int +bpf_program__load(struct bpf_program *prog, + char *license, u32 kern_version) +{ + int err, fd; + + err = load_program(prog->insns, prog->insns_cnt, + license, kern_version, &fd); + if (!err) + prog->fd = fd; + + if (err) + pr_warning("failed to load program '%s'\n", + prog->section_name); + zfree(&prog->insns); + prog->insns_cnt = 0; + return err; +} + +static int +bpf_object__load_progs(struct bpf_object *obj) +{ + size_t i; + int err; + + for (i = 0; i < obj->nr_programs; i++) { + err = bpf_program__load(&obj->programs[i], + obj->license, + obj->kern_version); + if (err) + return err; + } + return 0; +} + +static int bpf_object__validate(struct bpf_object *obj) +{ + if (obj->kern_version == 0) { + pr_warning("%s doesn't provide kernel version\n", + obj->path); + return -EINVAL; + } + return 0; +} + +static struct bpf_object * +__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz) +{ + struct bpf_object *obj; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warning("failed to init libelf for %s\n", path); + return NULL; + } + + obj = bpf_object__new(path, obj_buf, obj_buf_sz); + if (!obj) + return NULL; + + if (bpf_object__elf_init(obj)) + goto out; + if (bpf_object__check_endianness(obj)) + goto out; + if (bpf_object__elf_collect(obj)) + goto out; + if (bpf_object__collect_reloc(obj)) + goto out; + if (bpf_object__validate(obj)) + goto out; + + bpf_object__elf_finish(obj); + return obj; +out: + bpf_object__close(obj); + return NULL; +} + +struct bpf_object *bpf_object__open(const char *path) +{ + /* param validation */ + if (!path) + return NULL; + + pr_debug("loading %s\n", path); + + return __bpf_object__open(path, NULL, 0); +} + +struct bpf_object *bpf_object__open_buffer(void *obj_buf, + size_t obj_buf_sz) +{ + /* param validation */ + if (!obj_buf || obj_buf_sz <= 0) + return NULL; + + pr_debug("loading object from buffer\n"); + + return __bpf_object__open("[buffer]", obj_buf, obj_buf_sz); +} + +int bpf_object__unload(struct bpf_object *obj) +{ + size_t i; + + if (!obj) + return -EINVAL; + + for (i = 0; i < obj->nr_map_fds; i++) + zclose(obj->map_fds[i]); + zfree(&obj->map_fds); + obj->nr_map_fds = 0; + + for (i = 0; i < obj->nr_programs; i++) + bpf_program__unload(&obj->programs[i]); + + return 0; +} + +int bpf_object__load(struct bpf_object *obj) +{ + if (!obj) + return -EINVAL; + + if (obj->loaded) { + pr_warning("object should not be loaded twice\n"); + return -EINVAL; + } + + obj->loaded = true; + if (bpf_object__create_maps(obj)) + goto out; + if (bpf_object__relocate(obj)) + goto out; + if (bpf_object__load_progs(obj)) + goto out; + + return 0; +out: + bpf_object__unload(obj); + pr_warning("failed to load object '%s'\n", obj->path); + return -EINVAL; +} + +void bpf_object__close(struct bpf_object *obj) +{ + size_t i; + + if (!obj) + return; + + bpf_object__elf_finish(obj); + bpf_object__unload(obj); + + zfree(&obj->maps_buf); + + if (obj->programs && obj->nr_programs) { + for (i = 0; i < obj->nr_programs; i++) + bpf_program__exit(&obj->programs[i]); + } + zfree(&obj->programs); + + list_del(&obj->list); + free(obj); +} + +struct bpf_object * +bpf_object__next(struct bpf_object *prev) +{ + struct bpf_object *next; + + if (!prev) + next = list_first_entry(&bpf_objects_list, + struct bpf_object, + list); + else + next = list_next_entry(prev, list); + + /* Empty list is noticed here so don't need checking on entry. */ + if (&next->list == &bpf_objects_list) + return NULL; + + return next; +} + +struct bpf_program * +bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) +{ + size_t idx; + + if (!obj->programs) + return NULL; + /* First handler */ + if (prev == NULL) + return &obj->programs[0]; + + if (prev->obj != obj) { + pr_warning("error: program handler doesn't match object\n"); + return NULL; + } + + idx = (prev - obj->programs) + 1; + if (idx >= obj->nr_programs) + return NULL; + return &obj->programs[idx]; +} + +int bpf_program__set_private(struct bpf_program *prog, + void *priv, + bpf_program_clear_priv_t clear_priv) +{ + if (prog->priv && prog->clear_priv) + prog->clear_priv(prog, prog->priv); + + prog->priv = priv; + prog->clear_priv = clear_priv; + return 0; +} + +int bpf_program__get_private(struct bpf_program *prog, void **ppriv) +{ + *ppriv = prog->priv; + return 0; +} + +const char *bpf_program__title(struct bpf_program *prog, bool dup) +{ + const char *title; + + title = prog->section_name; + if (dup) { + title = strdup(title); + if (!title) { + pr_warning("failed to strdup program title\n"); + return NULL; + } + } + + return title; +} + +int bpf_program__fd(struct bpf_program *prog) +{ + return prog->fd; +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h new file mode 100644 index 000000000000..ea8adc206b62 --- /dev/null +++ b/tools/lib/bpf/libbpf.h @@ -0,0 +1,81 @@ +/* + * Common eBPF ELF object loading operations. + * + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + */ +#ifndef __BPF_LIBBPF_H +#define __BPF_LIBBPF_H + +#include <stdio.h> +#include <stdbool.h> + +/* + * In include/linux/compiler-gcc.h, __printf is defined. However + * it should be better if libbpf.h doesn't depend on Linux header file. + * So instead of __printf, here we use gcc attribute directly. + */ +typedef int (*libbpf_print_fn_t)(const char *, ...) + __attribute__((format(printf, 1, 2))); + +void libbpf_set_print(libbpf_print_fn_t warn, + libbpf_print_fn_t info, + libbpf_print_fn_t debug); + +/* Hide internal to user */ +struct bpf_object; + +struct bpf_object *bpf_object__open(const char *path); +struct bpf_object *bpf_object__open_buffer(void *obj_buf, + size_t obj_buf_sz); +void bpf_object__close(struct bpf_object *object); + +/* Load/unload object into/from kernel */ +int bpf_object__load(struct bpf_object *obj); +int bpf_object__unload(struct bpf_object *obj); + +struct bpf_object *bpf_object__next(struct bpf_object *prev); +#define bpf_object__for_each_safe(pos, tmp) \ + for ((pos) = bpf_object__next(NULL), \ + (tmp) = bpf_object__next(pos); \ + (pos) != NULL; \ + (pos) = (tmp), (tmp) = bpf_object__next(tmp)) + +/* Accessors of bpf_program. */ +struct bpf_program; +struct bpf_program *bpf_program__next(struct bpf_program *prog, + struct bpf_object *obj); + +#define bpf_object__for_each_program(pos, obj) \ + for ((pos) = bpf_program__next(NULL, (obj)); \ + (pos) != NULL; \ + (pos) = bpf_program__next((pos), (obj))) + +typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, + void *); + +int bpf_program__set_private(struct bpf_program *prog, void *priv, + bpf_program_clear_priv_t clear_priv); + +int bpf_program__get_private(struct bpf_program *prog, + void **ppriv); + +const char *bpf_program__title(struct bpf_program *prog, bool dup); + +int bpf_program__fd(struct bpf_program *prog); + +/* + * We don't need __attribute__((packed)) now since it is + * unnecessary for 'bpf_map_def' because they are all aligned. + * In addition, using it will trigger -Wpacked warning message, + * and will be treated as an error due to -Werror. + */ +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; +}; + +#endif diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c index 0b0112c80f22..21cdf869a01b 100644 --- a/tools/lib/lockdep/preload.c +++ b/tools/lib/lockdep/preload.c @@ -5,7 +5,7 @@ #include <stdlib.h> #include <sysexits.h> #include "include/liblockdep/mutex.h" -#include "../../../include/linux/rbtree.h" +#include "../../include/linux/rbtree.h" /** * struct lock_lookup - liblockdep's view of a single unique lock diff --git a/tools/lib/lockdep/uinclude/linux/kernel.h b/tools/lib/lockdep/uinclude/linux/kernel.h index cd2cc59a5da7..276c7a8b2ed1 100644 --- a/tools/lib/lockdep/uinclude/linux/kernel.h +++ b/tools/lib/lockdep/uinclude/linux/kernel.h @@ -23,7 +23,7 @@ #define WARN_ON(x) (x) #define WARN_ON_ONCE(x) (x) #define likely(x) (x) -#define WARN(x, y, z) (x) +#define WARN(x, y...) (x) #define uninitialized_var(x) x #define __init #define noinline diff --git a/tools/lib/lockdep/uinclude/linux/rbtree.h b/tools/lib/lockdep/uinclude/linux/rbtree.h deleted file mode 100644 index 965901db4862..000000000000 --- a/tools/lib/lockdep/uinclude/linux/rbtree.h +++ /dev/null @@ -1 +0,0 @@ -#include "../../../include/linux/rbtree.h" diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index cc25f059ab3d..4d885934b919 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -418,7 +418,7 @@ static int func_map_init(struct pevent *pevent) } static struct func_map * -find_func(struct pevent *pevent, unsigned long long addr) +__find_func(struct pevent *pevent, unsigned long long addr) { struct func_map *func; struct func_map key; @@ -434,6 +434,71 @@ find_func(struct pevent *pevent, unsigned long long addr) return func; } +struct func_resolver { + pevent_func_resolver_t *func; + void *priv; + struct func_map map; +}; + +/** + * pevent_set_function_resolver - set an alternative function resolver + * @pevent: handle for the pevent + * @resolver: function to be used + * @priv: resolver function private state. + * + * Some tools may have already a way to resolve kernel functions, allow them to + * keep using it instead of duplicating all the entries inside + * pevent->funclist. + */ +int pevent_set_function_resolver(struct pevent *pevent, + pevent_func_resolver_t *func, void *priv) +{ + struct func_resolver *resolver = malloc(sizeof(*resolver)); + + if (resolver == NULL) + return -1; + + resolver->func = func; + resolver->priv = priv; + + free(pevent->func_resolver); + pevent->func_resolver = resolver; + + return 0; +} + +/** + * pevent_reset_function_resolver - reset alternative function resolver + * @pevent: handle for the pevent + * + * Stop using whatever alternative resolver was set, use the default + * one instead. + */ +void pevent_reset_function_resolver(struct pevent *pevent) +{ + free(pevent->func_resolver); + pevent->func_resolver = NULL; +} + +static struct func_map * +find_func(struct pevent *pevent, unsigned long long addr) +{ + struct func_map *map; + + if (!pevent->func_resolver) + return __find_func(pevent, addr); + + map = &pevent->func_resolver->map; + map->mod = NULL; + map->addr = addr; + map->func = pevent->func_resolver->func(pevent->func_resolver->priv, + &map->addr, &map->mod); + if (map->func == NULL) + return NULL; + + return map; +} + /** * pevent_find_function - find a function by a given address * @pevent: handle for the pevent @@ -1680,6 +1745,9 @@ process_cond(struct event_format *event, struct print_arg *top, char **tok) type = process_arg(event, left, &token); again: + if (type == EVENT_ERROR) + goto out_free; + /* Handle other operations in the arguments */ if (type == EVENT_OP && strcmp(token, ":") != 0) { type = process_op(event, left, &token); @@ -1939,6 +2007,12 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) goto out_warn_free; type = process_arg_token(event, right, tok, type); + if (type == EVENT_ERROR) { + free_arg(right); + /* token was freed in process_arg_token() via *tok */ + token = NULL; + goto out_free; + } if (right->type == PRINT_OP && get_op_prio(arg->op.op) < get_op_prio(right->op.op)) { @@ -4754,6 +4828,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event case 'z': case 'Z': case '0' ... '9': + case '-': goto cont_process; case 'p': if (pevent->long_size == 4) @@ -6564,6 +6639,7 @@ void pevent_free(struct pevent *pevent) free(pevent->trace_clock); free(pevent->events); free(pevent->sort_events); + free(pevent->func_resolver); free(pevent); } diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 063b1971eb35..204befb05a17 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -453,6 +453,10 @@ struct cmdline_list; struct func_map; struct func_list; struct event_handler; +struct func_resolver; + +typedef char *(pevent_func_resolver_t)(void *priv, + unsigned long long *addrp, char **modp); struct pevent { int ref_count; @@ -481,6 +485,7 @@ struct pevent { int cmdline_count; struct func_map *func_map; + struct func_resolver *func_resolver; struct func_list *funclist; unsigned int func_count; @@ -611,6 +616,9 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x10, }; +int pevent_set_function_resolver(struct pevent *pevent, + pevent_func_resolver_t *func, void *priv); +void pevent_reset_function_resolver(struct pevent *pevent); int pevent_register_comm(struct pevent *pevent, const char *comm, int pid); int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock); int pevent_register_function(struct pevent *pevent, char *name, diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 09db62ba5786..3d1bb802dbf4 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -29,3 +29,4 @@ config.mak.autogen *.pyc *.pyo .config-detected +util/intel-pt-decoder/inat-tables.c diff --git a/tools/perf/Build b/tools/perf/Build index b77370ef7005..72237455b400 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -35,6 +35,7 @@ paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))" CFLAGS_builtin-help.o += $(paths) CFLAGS_builtin-timechart.o += $(paths) CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE +CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_SQ))" libperf-y += util/ libperf-y += arch/ diff --git a/tools/perf/Documentation/intel-bts.txt b/tools/perf/Documentation/intel-bts.txt new file mode 100644 index 000000000000..8bdc93bd7fdb --- /dev/null +++ b/tools/perf/Documentation/intel-bts.txt @@ -0,0 +1,86 @@ +Intel Branch Trace Store +======================== + +Overview +======== + +Intel BTS could be regarded as a predecessor to Intel PT and has some +similarities because it can also identify every branch a program takes. A +notable difference is that Intel BTS has no timing information and as a +consequence the present implementation is limited to per-thread recording. + +While decoding Intel BTS does not require walking the object code, the object +code is still needed to pair up calls and returns correctly, consequently much +of the Intel PT documentation applies also to Intel BTS. Refer to the Intel PT +documentation and consider that the PMU 'intel_bts' can usually be used in +place of 'intel_pt' in the examples provided, with the proviso that per-thread +recording must also be stipulated i.e. the --per-thread option for +'perf record'. + + +perf record +=========== + +new event +--------- + +The Intel BTS kernel driver creates a new PMU for Intel BTS. The perf record +option is: + + -e intel_bts// + +Currently Intel BTS is limited to per-thread tracing so the --per-thread option +is also needed. + + +snapshot option +--------------- + +The snapshot option is the same as Intel PT (refer Intel PT documentation). + + +auxtrace mmap size option +----------------------- + +The mmap size option is the same as Intel PT (refer Intel PT documentation). + + +perf script +=========== + +By default, perf script will decode trace data found in the perf.data file. +This can be further controlled by option --itrace. The --itrace option is +the same as Intel PT (refer Intel PT documentation) except that neither +"instructions" events nor "transactions" events (and consequently call +chains) are supported. + +To disable trace decoding entirely, use the option --no-itrace. + + +dump option +----------- + +perf script has an option (-D) to "dump" the events i.e. display the binary +data. + +When -D is used, Intel BTS packets are displayed. + +To disable the display of Intel BTS packets, combine the -D option with +--no-itrace. + + +perf report +=========== + +By default, perf report will decode trace data found in the perf.data file. +This can be further controlled by new option --itrace exactly the same as +perf script. + + +perf inject +=========== + +perf inject also accepts the --itrace option in which case tracing data is +removed and replaced with the synthesized events. e.g. + + perf inject --itrace -i perf.data -o perf.data.new diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt new file mode 100644 index 000000000000..4a0501d7a3b4 --- /dev/null +++ b/tools/perf/Documentation/intel-pt.txt @@ -0,0 +1,766 @@ +Intel Processor Trace +===================== + +Overview +======== + +Intel Processor Trace (Intel PT) is an extension of Intel Architecture that +collects information about software execution such as control flow, execution +modes and timings and formats it into highly compressed binary packets. +Technical details are documented in the Intel 64 and IA-32 Architectures +Software Developer Manuals, Chapter 36 Intel Processor Trace. + +Intel PT is first supported in Intel Core M and 5th generation Intel Core +processors that are based on the Intel micro-architecture code name Broadwell. + +Trace data is collected by 'perf record' and stored within the perf.data file. +See below for options to 'perf record'. + +Trace data must be 'decoded' which involves walking the object code and matching +the trace data packets. For example a TNT packet only tells whether a +conditional branch was taken or not taken, so to make use of that packet the +decoder must know precisely which instruction was being executed. + +Decoding is done on-the-fly. The decoder outputs samples in the same format as +samples output by perf hardware events, for example as though the "instructions" +or "branches" events had been recorded. Presently 3 tools support this: +'perf script', 'perf report' and 'perf inject'. See below for more information +on using those tools. + +The main distinguishing feature of Intel PT is that the decoder can determine +the exact flow of software execution. Intel PT can be used to understand why +and how did software get to a certain point, or behave a certain way. The +software does not have to be recompiled, so Intel PT works with debug or release +builds, however the executed images are needed - which makes use in JIT-compiled +environments, or with self-modified code, a challenge. Also symbols need to be +provided to make sense of addresses. + +A limitation of Intel PT is that it produces huge amounts of trace data +(hundreds of megabytes per second per core) which takes a long time to decode, +for example two or three orders of magnitude longer than it took to collect. +Another limitation is the performance impact of tracing, something that will +vary depending on the use-case and architecture. + + +Quickstart +========== + +It is important to start small. That is because it is easy to capture vastly +more data than can possibly be processed. + +The simplest thing to do with Intel PT is userspace profiling of small programs. +Data is captured with 'perf record' e.g. to trace 'ls' userspace-only: + + perf record -e intel_pt//u ls + +And profiled with 'perf report' e.g. + + perf report + +To also trace kernel space presents a problem, namely kernel self-modifying +code. A fairly good kernel image is available in /proc/kcore but to get an +accurate image a copy of /proc/kcore needs to be made under the same conditions +as the data capture. A script perf-with-kcore can do that, but beware that the +script makes use of 'sudo' to copy /proc/kcore. If you have perf installed +locally from the source tree you can do: + + ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls + +which will create a directory named 'pt_ls' and put the perf.data file and +copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use +'perf report' becomes: + + ~/libexec/perf-core/perf-with-kcore report pt_ls + +Because samples are synthesized after-the-fact, the sampling period can be +selected for reporting. e.g. sample every microsecond + + ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge + +See the sections below for more information about the --itrace option. + +Beware the smaller the period, the more samples that are produced, and the +longer it takes to process them. + +Also note that the coarseness of Intel PT timing information will start to +distort the statistical value of the sampling as the sampling period becomes +smaller. + +To represent software control flow, "branches" samples are produced. By default +a branch sample is synthesized for every single branch. To get an idea what +data is available you can use the 'perf script' tool with no parameters, which +will list all the samples. + + perf record -e intel_pt//u ls + perf script + +An interesting field that is not printed by default is 'flags' which can be +displayed as follows: + + perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags + +The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, +system, asynchronous, interrupt, transaction abort, trace begin, trace end, and +in transaction, respectively. + +While it is possible to create scripts to analyze the data, an alternative +approach is available to export the data to a postgresql database. Refer to +script export-to-postgresql.py for more details, and to script +call-graph-from-postgresql.py for an example of using the database. + +As mentioned above, it is easy to capture too much data. One way to limit the +data captured is to use 'snapshot' mode which is explained further below. +Refer to 'new snapshot option' and 'Intel PT modes of operation' further below. + +Another problem that will be experienced is decoder errors. They can be caused +by inability to access the executed image, self-modified or JIT-ed code, or the +inability to match side-band information (such as context switches and mmaps) +which results in the decoder not knowing what code was executed. + +There is also the problem of perf not being able to copy the data fast enough, +resulting in data lost because the buffer was full. See 'Buffer handling' below +for more details. + + +perf record +=========== + +new event +--------- + +The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are +selected by providing the PMU name followed by the "config" separated by slashes. +An enhancement has been made to allow default "config" e.g. the option + + -e intel_pt// + +will use a default config value. Currently that is the same as + + -e intel_pt/tsc,noretcomp=0/ + +which is the same as + + -e intel_pt/tsc=1,noretcomp=0/ + +Note there are now new config terms - see section 'config terms' further below. + +The config terms are listed in /sys/devices/intel_pt/format. They are bit +fields within the config member of the struct perf_event_attr which is +passed to the kernel by the perf_event_open system call. They correspond to bit +fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions: + + $ grep -H . /sys/bus/event_source/devices/intel_pt/format/* + /sys/bus/event_source/devices/intel_pt/format/cyc:config:1 + /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22 + /sys/bus/event_source/devices/intel_pt/format/mtc:config:9 + /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17 + /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11 + /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27 + /sys/bus/event_source/devices/intel_pt/format/tsc:config:10 + +Note that the default config must be overridden for each term i.e. + + -e intel_pt/noretcomp=0/ + +is the same as: + + -e intel_pt/tsc=1,noretcomp=0/ + +So, to disable TSC packets use: + + -e intel_pt/tsc=0/ + +It is also possible to specify the config value explicitly: + + -e intel_pt/config=0x400/ + +Note that, as with all events, the event is suffixed with event modifiers: + + u userspace + k kernel + h hypervisor + G guest + H host + p precise ip + +'h', 'G' and 'H' are for virtualization which is not supported by Intel PT. +'p' is also not relevant to Intel PT. So only options 'u' and 'k' are +meaningful for Intel PT. + +perf_event_attr is displayed if the -vv option is used e.g. + + ------------------------------------------------------------ + perf_event_attr: + type 6 + size 112 + config 0x400 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + enable_on_exec 1 + sample_id_all 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + + +config terms +------------ + +The June 2015 version of Intel 64 and IA-32 Architectures Software Developer +Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features. +Some of the features are reflect in new config terms. All the config terms are +described below. + +tsc Always supported. Produces TSC timestamp packets to provide + timing information. In some cases it is possible to decode + without timing information, for example a per-thread context + that does not overlap executable memory maps. + + The default config selects tsc (i.e. tsc=1). + +noretcomp Always supported. Disables "return compression" so a TIP packet + is produced when a function returns. Causes more packets to be + produced but might make decoding more reliable. + + The default config does not select noretcomp (i.e. noretcomp=0). + +psb_period Allows the frequency of PSB packets to be specified. + + The PSB packet is a synchronization packet that provides a + starting point for decoding or recovery from errors. + + Support for psb_period is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc + + which contains "1" if the feature is supported and "0" + otherwise. + + Valid values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_periods + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The psb_period value is converted to the approximate number of + trace bytes between PSB packets as: + + 2 ^ (value + 11) + + e.g. value 3 means 16KiB bytes between PSBs + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/psb_period=15/u uname + Invalid psb_period for intel_pt. Valid values are: 0-5 + + If MTC packets are selected, the default config selects a value + of 3 (i.e. psb_period=3) or the nearest lower value that is + supported (0 is always supported). Otherwise the default is 0. + + If decoding is expected to be reliable and the buffer is large + then a large PSB period can be used. + + Because a TSC packet is produced with PSB, the PSB period can + also affect the granularity to timing information in the absence + of MTC or CYC. + +mtc Produces MTC timing packets. + + MTC packets provide finer grain timestamp information than TSC + packets. MTC packets record time using the hardware crystal + clock (CTC) which is related to TSC packets using a TMA packet. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/mtc + + which contains "1" if the feature is supported and + "0" otherwise. + + The frequency of MTC packets can also be specified - see + mtc_period below. + +mtc_period Specifies how frequently MTC packets are produced - see mtc + above for how to determine if MTC packets are supported. + + Valid values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/mtc_periods + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The mtc_period value is converted to the MTC frequency as: + + CTC-frequency / (2 ^ value) + + e.g. value 3 means one eighth of CTC-frequency + + Where CTC is the hardware crystal clock, the frequency of which + can be related to TSC via values provided in cpuid leaf 0x15. + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/mtc_period=15/u uname + Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9 + + The default value is 3 or the nearest lower value + that is supported (0 is always supported). + +cyc Produces CYC timing packets. + + CYC packets provide even finer grain timestamp information than + MTC and TSC packets. A CYC packet contains the number of CPU + cycles since the last CYC packet. Unlike MTC and TSC packets, + CYC packets are only sent when another packet is also sent. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc + + which contains "1" if the feature is supported and + "0" otherwise. + + The number of CYC packets produced can be reduced by specifying + a threshold - see cyc_thresh below. + +cyc_thresh Specifies how frequently CYC packets are produced - see cyc + above for how to determine if CYC packets are supported. + + Valid cyc_thresh values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The cyc_thresh value represents the minimum number of CPU cycles + that must have passed before a CYC packet can be sent. The + number of CPU cycles is: + + 2 ^ (value - 1) + + e.g. value 4 means 8 CPU cycles must pass before a CYC packet + can be sent. Note a CYC packet is still only sent when another + packet is sent, not at, e.g. every 8 CPU cycles. + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname + Invalid cyc_thresh for intel_pt. Valid values are: 0-12 + + CYC packets are not requested by default. + +no_force_psb This is a driver option and is not in the IA32_RTIT_CTL MSR. + + It stops the driver resetting the byte count to zero whenever + enabling the trace (for example on context switches) which in + turn results in no PSB being forced. However some processors + will produce a PSB anyway. + + In any case, there is still a PSB when the trace is enabled for + the first time. + + no_force_psb can be used to slightly decrease the trace size but + may make it harder for the decoder to recover from errors. + + no_force_psb is not selected by default. + + +new snapshot option +------------------- + +The difference between full trace and snapshot from the kernel's perspective is +that in full trace we don't overwrite trace data that the user hasn't collected +yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let +the trace run and overwrite older data in the buffer so that whenever something +interesting happens, we can stop it and grab a snapshot of what was going on +around that interesting moment. + +To select snapshot mode a new option has been added: + + -S + +Optionally it can be followed by the snapshot size e.g. + + -S0x100000 + +The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size +nor snapshot size is specified, then the default is 4MiB for privileged users +(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. +If an unprivileged user does not specify mmap pages, the mmap pages will be +reduced as described in the 'new auxtrace mmap size option' section below. + +The snapshot size is displayed if the option -vv is used e.g. + + Intel PT snapshot size: %zu + + +new auxtrace mmap size option +--------------------------- + +Intel PT buffer size is specified by an addition to the -m option e.g. + + -m,16 + +selects a buffer size of 16 pages i.e. 64KiB. + +Note that the existing functionality of -m is unchanged. The auxtrace mmap size +is specified by the optional addition of a comma and the value. + +The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users +(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. +If an unprivileged user does not specify mmap pages, the mmap pages will be +reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the +user is likely to get an error as they exceed their mlock limit (Max locked +memory as shown in /proc/self/limits). Note that perf does not count the first +512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu +against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus +their mlock limit (which defaults to 64KiB but is not multiplied by the number +of cpus). + +In full-trace mode, powers of two are allowed for buffer size, with a minimum +size of 2 pages. In snapshot mode, it is the same but the minimum size is +1 page. + +The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g. + + mmap length 528384 + auxtrace mmap length 4198400 + + +Intel PT modes of operation +--------------------------- + +Intel PT can be used in 2 modes: + full-trace mode + snapshot mode + +Full-trace mode traces continuously e.g. + + perf record -e intel_pt//u uname + +Snapshot mode captures the available data when a signal is sent e.g. + + perf record -v -e intel_pt//u -S ./loopy 1000000000 & + [1] 11435 + kill -USR2 11435 + Recording AUX area tracing snapshot + +Note that the signal sent is SIGUSR2. +Note that "Recording AUX area tracing snapshot" is displayed because the -v +option is used. + +The 2 modes cannot be used together. + + +Buffer handling +--------------- + +There may be buffer limitations (i.e. single ToPa entry) which means that actual +buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to +provide other sizes, and in particular an arbitrarily large size, multiple +buffers are logically concatenated. However an interrupt must be used to switch +between buffers. That has two potential problems: + a) the interrupt may not be handled in time so that the current buffer + becomes full and some trace data is lost. + b) the interrupts may slow the system and affect the performance + results. + +If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event +which the tools report as an error. + +In full-trace mode, the driver waits for data to be copied out before allowing +the (logical) buffer to wrap-around. If data is not copied out quickly enough, +again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to +wait, the intel_pt event gets disabled. Because it is difficult to know when +that happens, perf tools always re-enable the intel_pt event after copying out +data. + + +Intel PT and build ids +---------------------- + +By default "perf record" post-processes the event stream to find all build ids +for executables for all addresses sampled. Deliberately, Intel PT is not +decoded for that purpose (it would take too long). Instead the build ids for +all executables encountered (due to mmap, comm or task events) are included +in the perf.data file. + +To see buildids included in the perf.data file use the command: + + perf buildid-list + +If the perf.data file contains Intel PT data, that is the same as: + + perf buildid-list --with-hits + + +Snapshot mode and event disabling +--------------------------------- + +In order to make a snapshot, the intel_pt event is disabled using an IOCTL, +namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the +collection of side-band information. In order to prevent that, a dummy +software event has been introduced that permits tracking events (like mmaps) to +continue to be recorded while intel_pt is disabled. That is important to ensure +there is complete side-band information to allow the decoding of subsequent +snapshots. + +A test has been created for that. To find the test: + + perf test list + ... + 23: Test using a dummy software event to keep tracking + +To run the test: + + perf test 23 + 23: Test using a dummy software event to keep tracking : Ok + + +perf record modes (nothing new here) +------------------------------------ + +perf record essentially operates in one of three modes: + per thread + per cpu + workload only + +"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a +workload). +"per cpu" is selected by -C or -a. +"workload only" mode is selected by not using the other options but providing a +command to run (i.e. the workload). + +In per-thread mode an exact list of threads is traced. There is no inheritance. +Each thread has its own event buffer. + +In per-cpu mode all processes (or processes from the selected cgroup i.e. -G +option, or processes selected with -p or -u) are traced. Each cpu has its own +buffer. Inheritance is allowed. + +In workload-only mode, the workload is traced but with per-cpu buffers. +Inheritance is allowed. Note that you can now trace a workload in per-thread +mode by using the --per-thread option. + + +Privileged vs non-privileged users +---------------------------------- + +Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users +have memory limits imposed upon them. That affects what buffer sizes they can +have as outlined above. + +Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are +not permitted to use tracepoints which means there is insufficient side-band +information to decode Intel PT in per-cpu mode, and potentially workload-only +mode too if the workload creates new processes. + +Note also, that to use tracepoints, read-access to debugfs is required. So if +debugfs is not mounted or the user does not have read-access, it will again not +be possible to decode Intel PT in per-cpu mode. + + +sched_switch tracepoint +----------------------- + +The sched_switch tracepoint is used to provide side-band data for Intel PT +decoding. sched_switch events are automatically added. e.g. the second event +shown below + + $ perf record -vv -e intel_pt//u uname + ------------------------------------------------------------ + perf_event_attr: + type 6 + size 112 + config 0x400 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + enable_on_exec 1 + sample_id_all 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + perf_event_attr: + type 2 + size 112 + config 0x108 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER + read_format ID + inherit 1 + sample_id_all 1 + exclude_guest 1 + ------------------------------------------------------------ + sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + perf_event_attr: + type 1 + size 112 + config 0x9 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + mmap 1 + comm 1 + enable_on_exec 1 + task 1 + sample_id_all 1 + mmap2 1 + comm_exec 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + mmap size 528384B + AUX area mmap length 4194304 + perf event ring buffer mmapped per cpu + Synthesizing auxtrace information + Linux + [ perf record: Woken up 1 times to write data ] + [ perf record: Captured and wrote 0.042 MB perf.data ] + +Note, the sched_switch event is only added if the user is permitted to use it +and only in per-cpu mode. + +Note also, the sched_switch event is only added if TSC packets are requested. +That is because, in the absence of timing information, the sched_switch events +cannot be matched against the Intel PT trace. + + +perf script +=========== + +By default, perf script will decode trace data found in the perf.data file. +This can be further controlled by new option --itrace. + + +New --itrace option +------------------- + +Having no option is the same as + + --itrace + +which, in turn, is the same as + + --itrace=ibxe + +The letters are: + + i synthesize "instructions" events + b synthesize "branches" events + x synthesize "transactions" events + c synthesize branches events (calls only) + r synthesize branches events (returns only) + e synthesize tracing error events + d create a debug log + g synthesize a call chain (use with i or x) + +"Instructions" events look like they were recorded by "perf record -e +instructions". + +"Branches" events look like they were recorded by "perf record -e branches". "c" +and "r" can be combined to get calls and returns. + +"Transactions" events correspond to the start or end of transactions. The +'flags' field can be used in perf script to determine whether the event is a +tranasaction start, commit or abort. + +Error events are new. They show where the decoder lost the trace. Error events +are quite important. Users must know if what they are seeing is a complete +picture or not. + +The "d" option will cause the creation of a file "intel_pt.log" containing all +decoded packets and instructions. Note that this option slows down the decoder +and that the resulting file may be very large. + +In addition, the period of the "instructions" event can be specified. e.g. + + --itrace=i10us + +sets the period to 10us i.e. one instruction sample is synthesized for each 10 +microseconds of trace. Alternatives to "us" are "ms" (milliseconds), +"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions). + +"ms", "us" and "ns" are converted to TSC ticks. + +The timing information included with Intel PT does not give the time of every +instruction. Consequently, for the purpose of sampling, the decoder estimates +the time since the last timing packet based on 1 tick per instruction. The time +on the sample is *not* adjusted and reflects the last known value of TSC. + +For Intel PT, the default period is 100us. + +Also the call chain size (default 16, max. 1024) for instructions or +transactions events can be specified. e.g. + + --itrace=ig32 + --itrace=xg32 + +To disable trace decoding entirely, use the option --no-itrace. + + +dump option +----------- + +perf script has an option (-D) to "dump" the events i.e. display the binary +data. + +When -D is used, Intel PT packets are displayed. The packet decoder does not +pay attention to PSB packets, but just decodes the bytes - so the packets seen +by the actual decoder may not be identical in places where the data is corrupt. +One example of that would be when the buffer-switching interrupt has been too +slow, and the buffer has been filled completely. In that case, the last packet +in the buffer might be truncated and immediately followed by a PSB as the trace +continues in the next buffer. + +To disable the display of Intel PT packets, combine the -D option with +--no-itrace. + + +perf report +=========== + +By default, perf report will decode trace data found in the perf.data file. +This can be further controlled by new option --itrace exactly the same as +perf script, with the exception that the default is --itrace=igxe. + + +perf inject +=========== + +perf inject also accepts the --itrace option in which case tracing data is +removed and replaced with the synthesized events. e.g. + + perf inject --itrace -i perf.data -o perf.data.new diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt new file mode 100644 index 000000000000..2ff946677e3b --- /dev/null +++ b/tools/perf/Documentation/itrace.txt @@ -0,0 +1,22 @@ + i synthesize instructions events + b synthesize branches events + c synthesize branches events (calls only) + r synthesize branches events (returns only) + x synthesize transactions events + e synthesize error events + d create a debug log + g synthesize a call chain (use with i or x) + + The default is all events i.e. the same as --itrace=ibxe + + In addition, the period (default 100000) for instructions events + can be specified in units of: + + i instructions + t ticks + ms milliseconds + us microseconds + ns nanoseconds (default) + + Also the call chain size (default 16, max. 1024) for instructions or + transactions events can be specified. diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index bf3d0644bf10..ab632d9fbd7d 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -216,6 +216,10 @@ Suite for evaluating parallel wake calls. *requeue*:: Suite for evaluating requeue calls. +*lock-pi*:: +Suite for evaluating futex lock_pi calls. + + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index b876ae312699..0c721c3e37e1 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -48,28 +48,7 @@ OPTIONS Decode Instruction Tracing data, replacing it with synthesized events. Options are: - i synthesize instructions events - b synthesize branches events - c synthesize branches events (calls only) - r synthesize branches events (returns only) - x synthesize transactions events - e synthesize error events - d create a debug log - g synthesize a call chain (use with i or x) - - The default is all events i.e. the same as --itrace=ibxe - - In addition, the period (default 100000) for instructions events - can be specified in units of: - - i instructions - t ticks - ms milliseconds - us microseconds - ns nanoseconds (default) - - Also the call chain size (default 16, max. 1024) for instructions or - transactions events can be specified. +include::itrace.txt[] SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 9b9d9d086680..347a27322ed8 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -45,6 +45,21 @@ OPTIONS param1 and param2 are defined as formats for the PMU in: /sys/bus/event_sources/devices/<pmu>/format/* + There are also some params which are not defined in .../<pmu>/format/*. + These params can be used to overload default config values per event. + Here is a list of the params. + - 'period': Set event sampling period + - 'freq': Set event sampling frequency + - 'time': Disable/enable time stamping. Acceptable values are 1 for + enabling time stamping. 0 for disabling time stamping. + The default is 1. + - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for + FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and + "no" for disable callgraph. + - 'stack-size': user stack size for dwarf mode + Note: If user explicitly sets options which conflict with the params, + the value set by the params will be overridden. + - a hardware breakpoint event in the form of '\mem:addr[/len][:access]' where addr is the address in memory you want to break in. Access is the memory access type (read, write, execute) it can @@ -61,7 +76,16 @@ OPTIONS "perf report" to view group events together. --filter=<filter>:: - Event filter. + Event filter. This option should follow a event selector (-e) which + selects tracepoint event(s). Multiple '--filter' options are combined + using '&&'. + +--exclude-perf:: + Don't record events issued by perf itself. This option should follow + a event selector (-e) which selects tracepoint event(s). It adds a + filter expression 'common_pid != $PERFPID' to filters. If other + '--filter' exists, the new filter expression will be combined with + them by '&&'. -a:: --all-cpus:: @@ -276,6 +300,10 @@ When processing pre-existing threads /proc/XXX/mmap, it may take a long time, because the file may be huge. A time out is needed in such cases. This option sets the time out limit. The default value is 500 ms. +--switch-events:: +Record context switch events i.e. events of type PERF_RECORD_SWITCH or +PERF_RECORD_SWITCH_CPU_WIDE. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index c33b69f3374f..9c7981bfddad 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -81,6 +81,8 @@ OPTIONS - cpu: cpu number the task ran at the time of sample - srcline: filename and line number executed at the time of sample. The DWARF debugging info must be provided. + - srcfile: file name of the source file of the same. Requires dwarf + information. - weight: Event specific weight, e.g. memory latency or transaction abort cost. This is the global weight. - local_weight: Local weight version of the weight above. @@ -109,6 +111,7 @@ OPTIONS - mispredict: "N" for predicted branch, "Y" for mispredicted branch - in_tx: branch in TSX transaction - abort: TSX transaction abort. + - cycles: Cycles in basic block And default sort keys are changed to comm, dso_from, symbol_from, dso_to and symbol_to, see '--branch-stack'. @@ -328,31 +331,23 @@ OPTIONS --itrace:: Options for decoding instruction tracing data. The options are: - i synthesize instructions events - b synthesize branches events - c synthesize branches events (calls only) - r synthesize branches events (returns only) - x synthesize transactions events - e synthesize error events - d create a debug log - g synthesize a call chain (use with i or x) - - The default is all events i.e. the same as --itrace=ibxe - - In addition, the period (default 100000) for instructions events - can be specified in units of: - - i instructions - t ticks - ms milliseconds - us microseconds - ns nanoseconds (default) - - Also the call chain size (default 16, max. 1024) for instructions or - transactions events can be specified. +include::itrace.txt[] To disable decoding entirely, use --no-itrace. +--full-source-path:: + Show the full path for source files for srcline output. + +--show-ref-call-graph:: + When multiple events are sampled, it may not be needed to collect + callgraphs for all of them. The sample sites are usually nearby, + and it's enough to collect the callgraphs on a reference event. + So user can use "call-graph=no" event modifier to disable callgraph + for other events to reduce the overhead. + However, perf report cannot show callgraphs for the event which + disable the callgraph. + This option extends the perf report to show reference callgraphs, + which collected by reference event, in no callgraph event. include::callchain-overhead-calculation.txt[] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index c82df572fac2..614b2c7b0293 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -222,6 +222,17 @@ OPTIONS --show-mmap-events Display mmap related events (e.g. MMAP, MMAP2). +--show-switch-events + Display context switch events i.e. events of type PERF_RECORD_SWITCH or + PERF_RECORD_SWITCH_CPU_WIDE. + +--demangle:: + Demangle symbol names to human readable form. It's enabled by default, + disable with --no-demangle. + +--demangle-kernel:: + Demangle kernel symbol names to human readable form (for C++ kernels). + --header Show perf.data header. @@ -231,31 +242,13 @@ OPTIONS --itrace:: Options for decoding instruction tracing data. The options are: - i synthesize instructions events - b synthesize branches events - c synthesize branches events (calls only) - r synthesize branches events (returns only) - x synthesize transactions events - e synthesize error events - d create a debug log - g synthesize a call chain (use with i or x) - - The default is all events i.e. the same as --itrace=ibxe - - In addition, the period (default 100000) for instructions events - can be specified in units of: - - i instructions - t ticks - ms milliseconds - us microseconds - ns nanoseconds (default) - - Also the call chain size (default 16, max. 1024) for instructions or - transactions events can be specified. +include::itrace.txt[] To disable decoding entirely, use --no-itrace. +--full-source-path:: + Show the full path for source files for srcline output. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 776aec4d0927..f6a23eb294e7 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -208,6 +208,27 @@ Default is to monitor all CPUS. This option sets the time out limit. The default value is 500 ms. +-b:: +--branch-any:: + Enable taken branch stack sampling. Any type of taken branch may be sampled. + This is a shortcut for --branch-filter any. See --branch-filter for more infos. + +-j:: +--branch-filter:: + Enable taken branch stack sampling. Each sample captures a series of consecutive + taken branches. The number of branches captured with each sample depends on the + underlying hardware, the type of branches of interest, and the executed code. + It is possible to select the types of branches captured by enabling filters. + For a full list of modifiers please see the perf record manpage. + + The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. + The privilege levels may be omitted, in which case, the privilege levels of the associated + event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege + levels are subject to permissions. When sampling on multiple events, branch stack sampling + is enabled for all the sampling events. The sampled branch type is the same for all events. + The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k + Note that this feature may not be available on all processors. + INTERACTIVE PROMPTING KEYS -------------------------- diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index d01a0aad5a01..af009bd6e6b7 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -18,6 +18,7 @@ tools/arch/x86/include/asm/atomic.h tools/arch/x86/include/asm/rmwcc.h tools/lib/traceevent tools/lib/api +tools/lib/bpf tools/lib/hweight.c tools/lib/rbtree.c tools/lib/symbol/kallsyms.c @@ -40,7 +41,6 @@ tools/include/asm-generic/bitops.h tools/include/linux/atomic.h tools/include/linux/bitops.h tools/include/linux/compiler.h -tools/include/linux/export.h tools/include/linux/hash.h tools/include/linux/kernel.h tools/include/linux/list.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index bba34636b733..d9863cb96f59 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -76,6 +76,12 @@ include config/utilities.mak # # Define NO_AUXTRACE if you do not want AUX area tracing support +# As per kernel Makefile, avoid funny character set dependencies +unexport LC_ALL +LC_COLLATE=C +LC_NUMERIC=C +export LC_COLLATE LC_NUMERIC + ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) srctree := $(patsubst %/,%,$(dir $(srctree))) @@ -135,6 +141,7 @@ INSTALL = install FLEX = flex BISON = bison STRIP = strip +AWK = awk LIB_DIR = $(srctree)/tools/lib/api/ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ @@ -289,7 +296,7 @@ strip: $(PROGRAMS) $(OUTPUT)perf PERF_IN := $(OUTPUT)perf-in.o -export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX +export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK build := -f $(srctree)/tools/build/Makefile.build dir=. obj $(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE @@ -507,6 +514,11 @@ endif $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(call QUIET_INSTALL, perf-with-kcore) \ $(INSTALL) $(OUTPUT)perf-with-kcore -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' +ifndef NO_LIBAUDIT + $(call QUIET_INSTALL, strace/groups) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'; \ + $(INSTALL) trace/strace/groups/* -t '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)' +endif ifndef NO_LIBPERL $(call QUIET_INSTALL, perl-scripts) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \ @@ -560,7 +572,8 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 - $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* + $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ + $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/arch/alpha/Build b/tools/perf/arch/alpha/Build new file mode 100644 index 000000000000..1bb8bf6d7fd4 --- /dev/null +++ b/tools/perf/arch/alpha/Build @@ -0,0 +1 @@ +# empty diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index b7bb42c44694..b00dfd92ea73 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -128,7 +128,7 @@ static const char *normalize_arch(char *arch) return arch; } -static int perf_session_env__lookup_binutils_path(struct perf_session_env *env, +static int perf_session_env__lookup_binutils_path(struct perf_env *env, const char *name, const char **path) { @@ -206,7 +206,7 @@ out_error: return -1; } -int perf_session_env__lookup_objdump(struct perf_session_env *env) +int perf_session_env__lookup_objdump(struct perf_env *env) { /* * For live mode, env->arch will be NULL and we can use diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h index ede246eda9be..20176df69fc8 100644 --- a/tools/perf/arch/common.h +++ b/tools/perf/arch/common.h @@ -5,6 +5,6 @@ extern const char *objdump_path; -int perf_session_env__lookup_objdump(struct perf_session_env *env); +int perf_session_env__lookup_objdump(struct perf_env *env); #endif /* ARCH_PERF_COMMON_H */ diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build new file mode 100644 index 000000000000..1bb8bf6d7fd4 --- /dev/null +++ b/tools/perf/arch/mips/Build @@ -0,0 +1 @@ +# empty diff --git a/tools/perf/arch/parisc/Build b/tools/perf/arch/parisc/Build new file mode 100644 index 000000000000..1bb8bf6d7fd4 --- /dev/null +++ b/tools/perf/arch/parisc/Build @@ -0,0 +1 @@ +# empty diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index cfbccc4e3187..2c55e1b336c5 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -1,8 +1,13 @@ libperf-y += header.o libperf-y += tsc.o +libperf-y += pmu.o libperf-y += kvm-stat.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o + +libperf-$(CONFIG_AUXTRACE) += auxtrace.o +libperf-$(CONFIG_AUXTRACE) += intel-pt.o +libperf-$(CONFIG_AUXTRACE) += intel-bts.o diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c new file mode 100644 index 000000000000..7a7805583e3f --- /dev/null +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -0,0 +1,83 @@ +/* + * auxtrace.c: AUX area tracing support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdbool.h> + +#include "../../util/header.h" +#include "../../util/debug.h" +#include "../../util/pmu.h" +#include "../../util/auxtrace.h" +#include "../../util/intel-pt.h" +#include "../../util/intel-bts.h" +#include "../../util/evlist.h" + +static +struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist, + int *err) +{ + struct perf_pmu *intel_pt_pmu; + struct perf_pmu *intel_bts_pmu; + struct perf_evsel *evsel; + bool found_pt = false; + bool found_bts = false; + + intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); + intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); + + if (evlist) { + evlist__for_each(evlist, evsel) { + if (intel_pt_pmu && + evsel->attr.type == intel_pt_pmu->type) + found_pt = true; + if (intel_bts_pmu && + evsel->attr.type == intel_bts_pmu->type) + found_bts = true; + } + } + + if (found_pt && found_bts) { + pr_err("intel_pt and intel_bts may not be used together\n"); + *err = -EINVAL; + return NULL; + } + + if (found_pt) + return intel_pt_recording_init(err); + + if (found_bts) + return intel_bts_recording_init(err); + + return NULL; +} + +struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist, + int *err) +{ + char buffer[64]; + int ret; + + *err = 0; + + ret = get_cpuid(buffer, sizeof(buffer)); + if (ret) { + *err = ret; + return NULL; + } + + if (!strncmp(buffer, "GenuineIntel,", 13)) + return auxtrace_record__init_intel(evlist, err); + + return NULL; +} diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c new file mode 100644 index 000000000000..9b94ce520917 --- /dev/null +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -0,0 +1,458 @@ +/* + * intel-bts.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> + +#include "../../util/cpumap.h" +#include "../../util/evsel.h" +#include "../../util/evlist.h" +#include "../../util/session.h" +#include "../../util/util.h" +#include "../../util/pmu.h" +#include "../../util/debug.h" +#include "../../util/tsc.h" +#include "../../util/auxtrace.h" +#include "../../util/intel-bts.h" + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) +#define KiB_MASK(x) (KiB(x) - 1) +#define MiB_MASK(x) (MiB(x) - 1) + +#define INTEL_BTS_DFLT_SAMPLE_SIZE KiB(4) + +#define INTEL_BTS_MAX_SAMPLE_SIZE KiB(60) + +struct intel_bts_snapshot_ref { + void *ref_buf; + size_t ref_offset; + bool wrapped; +}; + +struct intel_bts_recording { + struct auxtrace_record itr; + struct perf_pmu *intel_bts_pmu; + struct perf_evlist *evlist; + bool snapshot_mode; + size_t snapshot_size; + int snapshot_ref_cnt; + struct intel_bts_snapshot_ref *snapshot_refs; +}; + +struct branch { + u64 from; + u64 to; + u64 misc; +}; + +static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused) +{ + return INTEL_BTS_AUXTRACE_PRIV_SIZE; +} + +static int intel_bts_info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct auxtrace_info_event *auxtrace_info, + size_t priv_size) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu; + struct perf_event_mmap_page *pc; + struct perf_tsc_conversion tc = { .time_mult = 0, }; + bool cap_user_time_zero = false; + int err; + + if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE) + return -EINVAL; + + if (!session->evlist->nr_mmaps) + return -EINVAL; + + pc = session->evlist->mmap[0].base; + if (pc) { + err = perf_read_tsc_conversion(pc, &tc); + if (err) { + if (err != -EOPNOTSUPP) + return err; + } else { + cap_user_time_zero = tc.time_mult != 0; + } + if (!cap_user_time_zero) + ui__warning("Intel BTS: TSC not available\n"); + } + + auxtrace_info->type = PERF_AUXTRACE_INTEL_BTS; + auxtrace_info->priv[INTEL_BTS_PMU_TYPE] = intel_bts_pmu->type; + auxtrace_info->priv[INTEL_BTS_TIME_SHIFT] = tc.time_shift; + auxtrace_info->priv[INTEL_BTS_TIME_MULT] = tc.time_mult; + auxtrace_info->priv[INTEL_BTS_TIME_ZERO] = tc.time_zero; + auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO] = cap_user_time_zero; + auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE] = btsr->snapshot_mode; + + return 0; +} + +static int intel_bts_recording_options(struct auxtrace_record *itr, + struct perf_evlist *evlist, + struct record_opts *opts) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu; + struct perf_evsel *evsel, *intel_bts_evsel = NULL; + const struct cpu_map *cpus = evlist->cpus; + bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; + + btsr->evlist = evlist; + btsr->snapshot_mode = opts->auxtrace_snapshot_mode; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == intel_bts_pmu->type) { + if (intel_bts_evsel) { + pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n"); + return -EINVAL; + } + evsel->attr.freq = 0; + evsel->attr.sample_period = 1; + intel_bts_evsel = evsel; + opts->full_auxtrace = true; + } + } + + if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { + pr_err("Snapshot mode (-S option) requires " INTEL_BTS_PMU_NAME " PMU event (-e " INTEL_BTS_PMU_NAME ")\n"); + return -EINVAL; + } + + if (!opts->full_auxtrace) + return 0; + + if (opts->full_auxtrace && !cpu_map__empty(cpus)) { + pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n"); + return -EINVAL; + } + + /* Set default sizes for snapshot mode */ + if (opts->auxtrace_snapshot_mode) { + if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } else if (!opts->auxtrace_mmap_pages && !privileged && + opts->mmap_pages == UINT_MAX) { + opts->mmap_pages = KiB(256) / page_size; + } + if (!opts->auxtrace_snapshot_size) + opts->auxtrace_snapshot_size = + opts->auxtrace_mmap_pages * (size_t)page_size; + if (!opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_snapshot_size; + + sz = round_up(sz, page_size) / page_size; + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } + if (opts->auxtrace_snapshot_size > + opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", + opts->auxtrace_snapshot_size, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { + pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); + return -EINVAL; + } + pr_debug2("Intel BTS snapshot size: %zu\n", + opts->auxtrace_snapshot_size); + } + + /* Set default sizes for full trace mode */ + if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } + + /* Validate auxtrace_mmap_pages */ + if (opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + size_t min_sz; + + if (opts->auxtrace_snapshot_mode) + min_sz = KiB(4); + else + min_sz = KiB(8); + + if (sz < min_sz || !is_power_of_2(sz)) { + pr_err("Invalid mmap size for Intel BTS: must be at least %zuKiB and a power of 2\n", + min_sz / 1024); + return -EINVAL; + } + } + + if (intel_bts_evsel) { + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace event + * must come first. + */ + perf_evlist__to_front(evlist, intel_bts_evsel); + /* + * In the case of per-cpu mmaps, we need the CPU on the + * AUX event. + */ + if (!cpu_map__empty(cpus)) + perf_evsel__set_sample_bit(intel_bts_evsel, CPU); + } + + /* Add dummy event to keep tracking */ + if (opts->full_auxtrace) { + struct perf_evsel *tracking_evsel; + int err; + + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + tracking_evsel = perf_evlist__last(evlist); + + perf_evlist__set_tracking_event(evlist, tracking_evsel); + + tracking_evsel->attr.freq = 0; + tracking_evsel->attr.sample_period = 1; + } + + return 0; +} + +static int intel_bts_parse_snapshot_options(struct auxtrace_record *itr, + struct record_opts *opts, + const char *str) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + unsigned long long snapshot_size = 0; + char *endptr; + + if (str) { + snapshot_size = strtoull(str, &endptr, 0); + if (*endptr || snapshot_size > SIZE_MAX) + return -1; + } + + opts->auxtrace_snapshot_mode = true; + opts->auxtrace_snapshot_size = snapshot_size; + + btsr->snapshot_size = snapshot_size; + + return 0; +} + +static u64 intel_bts_reference(struct auxtrace_record *itr __maybe_unused) +{ + return rdtsc(); +} + +static int intel_bts_alloc_snapshot_refs(struct intel_bts_recording *btsr, + int idx) +{ + const size_t sz = sizeof(struct intel_bts_snapshot_ref); + int cnt = btsr->snapshot_ref_cnt, new_cnt = cnt * 2; + struct intel_bts_snapshot_ref *refs; + + if (!new_cnt) + new_cnt = 16; + + while (new_cnt <= idx) + new_cnt *= 2; + + refs = calloc(new_cnt, sz); + if (!refs) + return -ENOMEM; + + memcpy(refs, btsr->snapshot_refs, cnt * sz); + + btsr->snapshot_refs = refs; + btsr->snapshot_ref_cnt = new_cnt; + + return 0; +} + +static void intel_bts_free_snapshot_refs(struct intel_bts_recording *btsr) +{ + int i; + + for (i = 0; i < btsr->snapshot_ref_cnt; i++) + zfree(&btsr->snapshot_refs[i].ref_buf); + zfree(&btsr->snapshot_refs); +} + +static void intel_bts_recording_free(struct auxtrace_record *itr) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + + intel_bts_free_snapshot_refs(btsr); + free(btsr); +} + +static int intel_bts_snapshot_start(struct auxtrace_record *itr) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(btsr->evlist, evsel) { + if (evsel->attr.type == btsr->intel_bts_pmu->type) + return perf_evlist__disable_event(btsr->evlist, evsel); + } + return -EINVAL; +} + +static int intel_bts_snapshot_finish(struct auxtrace_record *itr) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(btsr->evlist, evsel) { + if (evsel->attr.type == btsr->intel_bts_pmu->type) + return perf_evlist__enable_event(btsr->evlist, evsel); + } + return -EINVAL; +} + +static bool intel_bts_first_wrap(u64 *data, size_t buf_size) +{ + int i, a, b; + + b = buf_size >> 3; + a = b - 512; + if (a < 0) + a = 0; + + for (i = a; i < b; i++) { + if (data[i]) + return true; + } + + return false; +} + +static int intel_bts_find_snapshot(struct auxtrace_record *itr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 *head, u64 *old) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + bool wrapped; + int err; + + pr_debug3("%s: mmap index %d old head %zu new head %zu\n", + __func__, idx, (size_t)*old, (size_t)*head); + + if (idx >= btsr->snapshot_ref_cnt) { + err = intel_bts_alloc_snapshot_refs(btsr, idx); + if (err) + goto out_err; + } + + wrapped = btsr->snapshot_refs[idx].wrapped; + if (!wrapped && intel_bts_first_wrap((u64 *)data, mm->len)) { + btsr->snapshot_refs[idx].wrapped = true; + wrapped = true; + } + + /* + * In full trace mode 'head' continually increases. However in snapshot + * mode 'head' is an offset within the buffer. Here 'old' and 'head' + * are adjusted to match the full trace case which expects that 'old' is + * always less than 'head'. + */ + if (wrapped) { + *old = *head; + *head += mm->len; + } else { + if (mm->mask) + *old &= mm->mask; + else + *old %= mm->len; + if (*old > *head) + *head += mm->len; + } + + pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", + __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); + + return 0; + +out_err: + pr_err("%s: failed, error %d\n", __func__, err); + return err; +} + +static int intel_bts_read_finish(struct auxtrace_record *itr, int idx) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(btsr->evlist, evsel) { + if (evsel->attr.type == btsr->intel_bts_pmu->type) + return perf_evlist__enable_event_idx(btsr->evlist, + evsel, idx); + } + return -EINVAL; +} + +struct auxtrace_record *intel_bts_recording_init(int *err) +{ + struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); + struct intel_bts_recording *btsr; + + if (!intel_bts_pmu) + return NULL; + + btsr = zalloc(sizeof(struct intel_bts_recording)); + if (!btsr) { + *err = -ENOMEM; + return NULL; + } + + btsr->intel_bts_pmu = intel_bts_pmu; + btsr->itr.recording_options = intel_bts_recording_options; + btsr->itr.info_priv_size = intel_bts_info_priv_size; + btsr->itr.info_fill = intel_bts_info_fill; + btsr->itr.free = intel_bts_recording_free; + btsr->itr.snapshot_start = intel_bts_snapshot_start; + btsr->itr.snapshot_finish = intel_bts_snapshot_finish; + btsr->itr.find_snapshot = intel_bts_find_snapshot; + btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options; + btsr->itr.reference = intel_bts_reference; + btsr->itr.read_finish = intel_bts_read_finish; + btsr->itr.alignment = sizeof(struct branch); + return &btsr->itr; +} diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c new file mode 100644 index 000000000000..2ca10d796c0b --- /dev/null +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -0,0 +1,1007 @@ +/* + * intel_pt.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdbool.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> +#include <cpuid.h> + +#include "../../perf.h" +#include "../../util/session.h" +#include "../../util/event.h" +#include "../../util/evlist.h" +#include "../../util/evsel.h" +#include "../../util/cpumap.h" +#include "../../util/parse-options.h" +#include "../../util/parse-events.h" +#include "../../util/pmu.h" +#include "../../util/debug.h" +#include "../../util/auxtrace.h" +#include "../../util/tsc.h" +#include "../../util/intel-pt.h" + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) +#define KiB_MASK(x) (KiB(x) - 1) +#define MiB_MASK(x) (MiB(x) - 1) + +#define INTEL_PT_DEFAULT_SAMPLE_SIZE KiB(4) + +#define INTEL_PT_MAX_SAMPLE_SIZE KiB(60) + +#define INTEL_PT_PSB_PERIOD_NEAR 256 + +struct intel_pt_snapshot_ref { + void *ref_buf; + size_t ref_offset; + bool wrapped; +}; + +struct intel_pt_recording { + struct auxtrace_record itr; + struct perf_pmu *intel_pt_pmu; + int have_sched_switch; + struct perf_evlist *evlist; + bool snapshot_mode; + bool snapshot_init_done; + size_t snapshot_size; + size_t snapshot_ref_buf_size; + int snapshot_ref_cnt; + struct intel_pt_snapshot_ref *snapshot_refs; +}; + +static int intel_pt_parse_terms_with_default(struct list_head *formats, + const char *str, + u64 *config) +{ + struct list_head *terms; + struct perf_event_attr attr = { .size = 0, }; + int err; + + terms = malloc(sizeof(struct list_head)); + if (!terms) + return -ENOMEM; + + INIT_LIST_HEAD(terms); + + err = parse_events_terms(terms, str); + if (err) + goto out_free; + + attr.config = *config; + err = perf_pmu__config_terms(formats, &attr, terms, true, NULL); + if (err) + goto out_free; + + *config = attr.config; +out_free: + parse_events__free_terms(terms); + return err; +} + +static int intel_pt_parse_terms(struct list_head *formats, const char *str, + u64 *config) +{ + *config = 0; + return intel_pt_parse_terms_with_default(formats, str, config); +} + +static u64 intel_pt_masked_bits(u64 mask, u64 bits) +{ + const u64 top_bit = 1ULL << 63; + u64 res = 0; + int i; + + for (i = 0; i < 64; i++) { + if (mask & top_bit) { + res <<= 1; + if (bits & top_bit) + res |= 1; + } + mask <<= 1; + bits <<= 1; + } + + return res; +} + +static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str, + struct perf_evlist *evlist, u64 *res) +{ + struct perf_evsel *evsel; + u64 mask; + + *res = 0; + + mask = perf_pmu__format_bits(&intel_pt_pmu->format, str); + if (!mask) + return -EINVAL; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == intel_pt_pmu->type) { + *res = intel_pt_masked_bits(mask, evsel->attr.config); + return 0; + } + } + + return -EINVAL; +} + +static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu, + struct perf_evlist *evlist) +{ + u64 val; + int err, topa_multiple_entries; + size_t psb_period; + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries", + "%d", &topa_multiple_entries) != 1) + topa_multiple_entries = 0; + + /* + * Use caps/topa_multiple_entries to indicate early hardware that had + * extra frequent PSBs. + */ + if (!topa_multiple_entries) { + psb_period = 256; + goto out; + } + + err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val); + if (err) + val = 0; + + psb_period = 1 << (val + 11); +out: + pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period); + return psb_period; +} + +static int intel_pt_pick_bit(int bits, int target) +{ + int pos, pick = -1; + + for (pos = 0; bits; bits >>= 1, pos++) { + if (bits & 1) { + if (pos <= target || pick < 0) + pick = pos; + if (pos >= target) + break; + } + } + + return pick; +} + +static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) +{ + char buf[256]; + int mtc, mtc_periods = 0, mtc_period; + int psb_cyc, psb_periods, psb_period; + int pos = 0; + u64 config; + + pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d", + &mtc) != 1) + mtc = 1; + + if (mtc) { + if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x", + &mtc_periods) != 1) + mtc_periods = 0; + if (mtc_periods) { + mtc_period = intel_pt_pick_bit(mtc_periods, 3); + pos += scnprintf(buf + pos, sizeof(buf) - pos, + ",mtc,mtc_period=%d", mtc_period); + } + } + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d", + &psb_cyc) != 1) + psb_cyc = 1; + + if (psb_cyc && mtc_periods) { + if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x", + &psb_periods) != 1) + psb_periods = 0; + if (psb_periods) { + psb_period = intel_pt_pick_bit(psb_periods, 3); + pos += scnprintf(buf + pos, sizeof(buf) - pos, + ",psb_period=%d", psb_period); + } + } + + pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); + + intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); + + return config; +} + +static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr, + struct record_opts *opts, + const char *str) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + unsigned long long snapshot_size = 0; + char *endptr; + + if (str) { + snapshot_size = strtoull(str, &endptr, 0); + if (*endptr || snapshot_size > SIZE_MAX) + return -1; + } + + opts->auxtrace_snapshot_mode = true; + opts->auxtrace_snapshot_size = snapshot_size; + + ptr->snapshot_size = snapshot_size; + + return 0; +} + +struct perf_event_attr * +intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu) +{ + struct perf_event_attr *attr; + + attr = zalloc(sizeof(struct perf_event_attr)); + if (!attr) + return NULL; + + attr->config = intel_pt_default_config(intel_pt_pmu); + + intel_pt_pmu->selectable = true; + + return attr; +} + +static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused) +{ + return INTEL_PT_AUXTRACE_PRIV_SIZE; +} + +static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d) +{ + unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; + + __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); + *n = ebx; + *d = eax; +} + +static int intel_pt_info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct auxtrace_info_event *auxtrace_info, + size_t priv_size) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; + struct perf_event_mmap_page *pc; + struct perf_tsc_conversion tc = { .time_mult = 0, }; + bool cap_user_time_zero = false, per_cpu_mmaps; + u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit; + u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d; + int err; + + if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE) + return -EINVAL; + + intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); + intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp", + &noretcomp_bit); + intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit); + mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format, + "mtc_period"); + intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit); + + intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d); + + if (!session->evlist->nr_mmaps) + return -EINVAL; + + pc = session->evlist->mmap[0].base; + if (pc) { + err = perf_read_tsc_conversion(pc, &tc); + if (err) { + if (err != -EOPNOTSUPP) + return err; + } else { + cap_user_time_zero = tc.time_mult != 0; + } + if (!cap_user_time_zero) + ui__warning("Intel Processor Trace: TSC not available\n"); + } + + per_cpu_mmaps = !cpu_map__empty(session->evlist->cpus); + + auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; + auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; + auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift; + auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult; + auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero; + auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero; + auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit; + auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit; + auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch; + auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode; + auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps; + auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit; + auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits; + auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n; + auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d; + auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit; + + return 0; +} + +static int intel_pt_track_switches(struct perf_evlist *evlist) +{ + const char *sched_switch = "sched:sched_switch"; + struct perf_evsel *evsel; + int err; + + if (!perf_evlist__can_select_event(evlist, sched_switch)) + return -EPERM; + + err = parse_events(evlist, sched_switch, NULL); + if (err) { + pr_debug2("%s: failed to parse %s, error %d\n", + __func__, sched_switch, err); + return err; + } + + evsel = perf_evlist__last(evlist); + + perf_evsel__set_sample_bit(evsel, CPU); + perf_evsel__set_sample_bit(evsel, TIME); + + evsel->system_wide = true; + evsel->no_aux_samples = true; + evsel->immediate = true; + + return 0; +} + +static void intel_pt_valid_str(char *str, size_t len, u64 valid) +{ + unsigned int val, last = 0, state = 1; + int p = 0; + + str[0] = '\0'; + + for (val = 0; val <= 64; val++, valid >>= 1) { + if (valid & 1) { + last = val; + switch (state) { + case 0: + p += scnprintf(str + p, len - p, ","); + /* Fall through */ + case 1: + p += scnprintf(str + p, len - p, "%u", val); + state = 2; + break; + case 2: + state = 3; + break; + case 3: + state = 4; + break; + default: + break; + } + } else { + switch (state) { + case 3: + p += scnprintf(str + p, len - p, ",%u", last); + state = 0; + break; + case 4: + p += scnprintf(str + p, len - p, "-%u", last); + state = 0; + break; + default: + break; + } + if (state != 1) + state = 0; + } + } +} + +static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu, + const char *caps, const char *name, + const char *supported, u64 config) +{ + char valid_str[256]; + unsigned int shift; + unsigned long long valid; + u64 bits; + int ok; + + if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1) + valid = 0; + + if (supported && + perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok) + valid = 0; + + valid |= 1; + + bits = perf_pmu__format_bits(&intel_pt_pmu->format, name); + + config &= bits; + + for (shift = 0; bits && !(bits & 1); shift++) + bits >>= 1; + + config >>= shift; + + if (config > 63) + goto out_err; + + if (valid & (1 << config)) + return 0; +out_err: + intel_pt_valid_str(valid_str, sizeof(valid_str), valid); + pr_err("Invalid %s for %s. Valid values are: %s\n", + name, INTEL_PT_PMU_NAME, valid_str); + return -EINVAL; +} + +static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, + struct perf_evsel *evsel) +{ + int err; + + if (!evsel) + return 0; + + err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", + "cyc_thresh", "caps/psb_cyc", + evsel->attr.config); + if (err) + return err; + + err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods", + "mtc_period", "caps/mtc", + evsel->attr.config); + if (err) + return err; + + return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods", + "psb_period", "caps/psb_cyc", + evsel->attr.config); +} + +static int intel_pt_recording_options(struct auxtrace_record *itr, + struct perf_evlist *evlist, + struct record_opts *opts) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; + bool have_timing_info; + struct perf_evsel *evsel, *intel_pt_evsel = NULL; + const struct cpu_map *cpus = evlist->cpus; + bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; + u64 tsc_bit; + int err; + + ptr->evlist = evlist; + ptr->snapshot_mode = opts->auxtrace_snapshot_mode; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == intel_pt_pmu->type) { + if (intel_pt_evsel) { + pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n"); + return -EINVAL; + } + evsel->attr.freq = 0; + evsel->attr.sample_period = 1; + intel_pt_evsel = evsel; + opts->full_auxtrace = true; + } + } + + if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { + pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n"); + return -EINVAL; + } + + if (opts->use_clockid) { + pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n"); + return -EINVAL; + } + + if (!opts->full_auxtrace) + return 0; + + err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); + if (err) + return err; + + /* Set default sizes for snapshot mode */ + if (opts->auxtrace_snapshot_mode) { + size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); + + if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } else if (!opts->auxtrace_mmap_pages && !privileged && + opts->mmap_pages == UINT_MAX) { + opts->mmap_pages = KiB(256) / page_size; + } + if (!opts->auxtrace_snapshot_size) + opts->auxtrace_snapshot_size = + opts->auxtrace_mmap_pages * (size_t)page_size; + if (!opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_snapshot_size; + + sz = round_up(sz, page_size) / page_size; + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } + if (opts->auxtrace_snapshot_size > + opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", + opts->auxtrace_snapshot_size, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { + pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); + return -EINVAL; + } + pr_debug2("Intel PT snapshot size: %zu\n", + opts->auxtrace_snapshot_size); + if (psb_period && + opts->auxtrace_snapshot_size <= psb_period + + INTEL_PT_PSB_PERIOD_NEAR) + ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n", + opts->auxtrace_snapshot_size, psb_period); + } + + /* Set default sizes for full trace mode */ + if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } + + /* Validate auxtrace_mmap_pages */ + if (opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + size_t min_sz; + + if (opts->auxtrace_snapshot_mode) + min_sz = KiB(4); + else + min_sz = KiB(8); + + if (sz < min_sz || !is_power_of_2(sz)) { + pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n", + min_sz / 1024); + return -EINVAL; + } + } + + intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); + + if (opts->full_auxtrace && (intel_pt_evsel->attr.config & tsc_bit)) + have_timing_info = true; + else + have_timing_info = false; + + /* + * Per-cpu recording needs sched_switch events to distinguish different + * threads. + */ + if (have_timing_info && !cpu_map__empty(cpus)) { + err = intel_pt_track_switches(evlist); + if (err == -EPERM) + pr_debug2("Unable to select sched:sched_switch\n"); + else if (err) + return err; + else + ptr->have_sched_switch = 1; + } + + if (intel_pt_evsel) { + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace + * event must come first. + */ + perf_evlist__to_front(evlist, intel_pt_evsel); + /* + * In the case of per-cpu mmaps, we need the CPU on the + * AUX event. + */ + if (!cpu_map__empty(cpus)) + perf_evsel__set_sample_bit(intel_pt_evsel, CPU); + } + + /* Add dummy event to keep tracking */ + if (opts->full_auxtrace) { + struct perf_evsel *tracking_evsel; + + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + tracking_evsel = perf_evlist__last(evlist); + + perf_evlist__set_tracking_event(evlist, tracking_evsel); + + tracking_evsel->attr.freq = 0; + tracking_evsel->attr.sample_period = 1; + + /* In per-cpu case, always need the time of mmap events etc */ + if (!cpu_map__empty(cpus)) + perf_evsel__set_sample_bit(tracking_evsel, TIME); + } + + /* + * Warn the user when we do not have enough information to decode i.e. + * per-cpu with no sched_switch (except workload-only). + */ + if (!ptr->have_sched_switch && !cpu_map__empty(cpus) && + !target__none(&opts->target)) + ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); + + return 0; +} + +static int intel_pt_snapshot_start(struct auxtrace_record *itr) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(ptr->evlist, evsel) { + if (evsel->attr.type == ptr->intel_pt_pmu->type) + return perf_evlist__disable_event(ptr->evlist, evsel); + } + return -EINVAL; +} + +static int intel_pt_snapshot_finish(struct auxtrace_record *itr) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(ptr->evlist, evsel) { + if (evsel->attr.type == ptr->intel_pt_pmu->type) + return perf_evlist__enable_event(ptr->evlist, evsel); + } + return -EINVAL; +} + +static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx) +{ + const size_t sz = sizeof(struct intel_pt_snapshot_ref); + int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2; + struct intel_pt_snapshot_ref *refs; + + if (!new_cnt) + new_cnt = 16; + + while (new_cnt <= idx) + new_cnt *= 2; + + refs = calloc(new_cnt, sz); + if (!refs) + return -ENOMEM; + + memcpy(refs, ptr->snapshot_refs, cnt * sz); + + ptr->snapshot_refs = refs; + ptr->snapshot_ref_cnt = new_cnt; + + return 0; +} + +static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr) +{ + int i; + + for (i = 0; i < ptr->snapshot_ref_cnt; i++) + zfree(&ptr->snapshot_refs[i].ref_buf); + zfree(&ptr->snapshot_refs); +} + +static void intel_pt_recording_free(struct auxtrace_record *itr) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + + intel_pt_free_snapshot_refs(ptr); + free(ptr); +} + +static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx, + size_t snapshot_buf_size) +{ + size_t ref_buf_size = ptr->snapshot_ref_buf_size; + void *ref_buf; + + ref_buf = zalloc(ref_buf_size); + if (!ref_buf) + return -ENOMEM; + + ptr->snapshot_refs[idx].ref_buf = ref_buf; + ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size; + + return 0; +} + +static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr, + size_t snapshot_buf_size) +{ + const size_t max_size = 256 * 1024; + size_t buf_size = 0, psb_period; + + if (ptr->snapshot_size <= 64 * 1024) + return 0; + + psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist); + if (psb_period) + buf_size = psb_period * 2; + + if (!buf_size || buf_size > max_size) + buf_size = max_size; + + if (buf_size >= snapshot_buf_size) + return 0; + + if (buf_size >= ptr->snapshot_size / 2) + return 0; + + return buf_size; +} + +static int intel_pt_snapshot_init(struct intel_pt_recording *ptr, + size_t snapshot_buf_size) +{ + if (ptr->snapshot_init_done) + return 0; + + ptr->snapshot_init_done = true; + + ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr, + snapshot_buf_size); + + return 0; +} + +/** + * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer. + * @buf1: first buffer + * @compare_size: number of bytes to compare + * @buf2: second buffer (a circular buffer) + * @offs2: offset in second buffer + * @buf2_size: size of second buffer + * + * The comparison allows for the possibility that the bytes to compare in the + * circular buffer are not contiguous. It is assumed that @compare_size <= + * @buf2_size. This function returns %false if the bytes are identical, %true + * otherwise. + */ +static bool intel_pt_compare_buffers(void *buf1, size_t compare_size, + void *buf2, size_t offs2, size_t buf2_size) +{ + size_t end2 = offs2 + compare_size, part_size; + + if (end2 <= buf2_size) + return memcmp(buf1, buf2 + offs2, compare_size); + + part_size = end2 - buf2_size; + if (memcmp(buf1, buf2 + offs2, part_size)) + return true; + + compare_size -= part_size; + + return memcmp(buf1 + part_size, buf2, compare_size); +} + +static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset, + size_t ref_size, size_t buf_size, + void *data, size_t head) +{ + size_t ref_end = ref_offset + ref_size; + + if (ref_end > buf_size) { + if (head > ref_offset || head < ref_end - buf_size) + return true; + } else if (head > ref_offset && head < ref_end) { + return true; + } + + return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset, + buf_size); +} + +static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size, + void *data, size_t head) +{ + if (head >= ref_size) { + memcpy(ref_buf, data + head - ref_size, ref_size); + } else { + memcpy(ref_buf, data, head); + ref_size -= head; + memcpy(ref_buf + head, data + buf_size - ref_size, ref_size); + } +} + +static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 head) +{ + struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx]; + bool wrapped; + + wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset, + ptr->snapshot_ref_buf_size, mm->len, + data, head); + + intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len, + data, head); + + return wrapped; +} + +static bool intel_pt_first_wrap(u64 *data, size_t buf_size) +{ + int i, a, b; + + b = buf_size >> 3; + a = b - 512; + if (a < 0) + a = 0; + + for (i = a; i < b; i++) { + if (data[i]) + return true; + } + + return false; +} + +static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 *head, u64 *old) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + bool wrapped; + int err; + + pr_debug3("%s: mmap index %d old head %zu new head %zu\n", + __func__, idx, (size_t)*old, (size_t)*head); + + err = intel_pt_snapshot_init(ptr, mm->len); + if (err) + goto out_err; + + if (idx >= ptr->snapshot_ref_cnt) { + err = intel_pt_alloc_snapshot_refs(ptr, idx); + if (err) + goto out_err; + } + + if (ptr->snapshot_ref_buf_size) { + if (!ptr->snapshot_refs[idx].ref_buf) { + err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len); + if (err) + goto out_err; + } + wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head); + } else { + wrapped = ptr->snapshot_refs[idx].wrapped; + if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) { + ptr->snapshot_refs[idx].wrapped = true; + wrapped = true; + } + } + + /* + * In full trace mode 'head' continually increases. However in snapshot + * mode 'head' is an offset within the buffer. Here 'old' and 'head' + * are adjusted to match the full trace case which expects that 'old' is + * always less than 'head'. + */ + if (wrapped) { + *old = *head; + *head += mm->len; + } else { + if (mm->mask) + *old &= mm->mask; + else + *old %= mm->len; + if (*old > *head) + *head += mm->len; + } + + pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", + __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); + + return 0; + +out_err: + pr_err("%s: failed, error %d\n", __func__, err); + return err; +} + +static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) +{ + return rdtsc(); +} + +static int intel_pt_read_finish(struct auxtrace_record *itr, int idx) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(ptr->evlist, evsel) { + if (evsel->attr.type == ptr->intel_pt_pmu->type) + return perf_evlist__enable_event_idx(ptr->evlist, evsel, + idx); + } + return -EINVAL; +} + +struct auxtrace_record *intel_pt_recording_init(int *err) +{ + struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); + struct intel_pt_recording *ptr; + + if (!intel_pt_pmu) + return NULL; + + ptr = zalloc(sizeof(struct intel_pt_recording)); + if (!ptr) { + *err = -ENOMEM; + return NULL; + } + + ptr->intel_pt_pmu = intel_pt_pmu; + ptr->itr.recording_options = intel_pt_recording_options; + ptr->itr.info_priv_size = intel_pt_info_priv_size; + ptr->itr.info_fill = intel_pt_info_fill; + ptr->itr.free = intel_pt_recording_free; + ptr->itr.snapshot_start = intel_pt_snapshot_start; + ptr->itr.snapshot_finish = intel_pt_snapshot_finish; + ptr->itr.find_snapshot = intel_pt_find_snapshot; + ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; + ptr->itr.reference = intel_pt_reference; + ptr->itr.read_finish = intel_pt_read_finish; + return &ptr->itr; +} diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c new file mode 100644 index 000000000000..79fe07158d00 --- /dev/null +++ b/tools/perf/arch/x86/util/pmu.c @@ -0,0 +1,18 @@ +#include <string.h> + +#include <linux/perf_event.h> + +#include "../../util/intel-pt.h" +#include "../../util/intel-bts.h" +#include "../../util/pmu.h" + +struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) +{ +#ifdef HAVE_AUXTRACE_SUPPORT + if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) + return intel_pt_pmu_default_config(pmu); + if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) + pmu->selectable = true; +#endif + return NULL; +} diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/xtensa/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile new file mode 100644 index 000000000000..7fbca175099e --- /dev/null +++ b/tools/perf/arch/xtensa/Makefile @@ -0,0 +1,3 @@ +ifndef NO_DWARF +PERF_HAVE_DWARF_REGS := 1 +endif diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build new file mode 100644 index 000000000000..954e287bbb89 --- /dev/null +++ b/tools/perf/arch/xtensa/util/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c new file mode 100644 index 000000000000..4dba76bfb4ce --- /dev/null +++ b/tools/perf/arch/xtensa/util/dwarf-regs.c @@ -0,0 +1,25 @@ +/* + * Mapping of DWARF debug register numbers into register names. + * + * Copyright (c) 2015 Cadence Design Systems Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <stddef.h> +#include <dwarf-regs.h> + +#define XTENSA_MAX_REGS 16 + +const char *xtensa_regs_table[XTENSA_MAX_REGS] = { + "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", + "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15", +}; + +const char *get_arch_regstr(unsigned int n) +{ + return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL; +} diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index c3ab760e06b4..573e28896038 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -5,6 +5,7 @@ perf-y += futex-hash.o perf-y += futex-wake.o perf-y += futex-wake-parallel.o perf-y += futex-requeue.o +perf-y += futex-lock-pi.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 70b2f718cc21..a50df86f2b9b 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -36,6 +36,8 @@ extern int bench_futex_wake(int argc, const char **argv, const char *prefix); extern int bench_futex_wake_parallel(int argc, const char **argv, const char *prefix); extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); +/* pi futexes */ +extern int bench_futex_lock_pi(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c new file mode 100644 index 000000000000..bc6a16adbca8 --- /dev/null +++ b/tools/perf/bench/futex-lock-pi.c @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2015 Davidlohr Bueso. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/stat.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "futex.h" + +#include <err.h> +#include <stdlib.h> +#include <sys/time.h> +#include <pthread.h> + +struct worker { + int tid; + u_int32_t *futex; + pthread_t thread; + unsigned long ops; +}; + +static u_int32_t global_futex = 0; +static struct worker *worker; +static unsigned int nsecs = 10; +static bool silent = false, multi = false; +static bool done = false, fshared = false; +static unsigned int ncpus, nthreads = 0; +static int futex_flag = 0; +struct timeval start, end, runtime; +static pthread_mutex_t thread_lock; +static unsigned int threads_starting; +static struct stats throughput_stats; +static pthread_cond_t thread_parent, thread_worker; + +static const struct option options[] = { + OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), + OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), + OPT_BOOLEAN( 'M', "multi", &multi, "Use multiple futexes"), + OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), + OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), + OPT_END() +}; + +static const char * const bench_futex_lock_pi_usage[] = { + "perf bench futex requeue <options>", + NULL +}; + +static void print_summary(void) +{ + unsigned long avg = avg_stats(&throughput_stats); + double stddev = stddev_stats(&throughput_stats); + + printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", + !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), + (int) runtime.tv_sec); +} + +static void toggle_done(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) +{ + /* inform all threads that we're done for the day */ + done = true; + gettimeofday(&end, NULL); + timersub(&end, &start, &runtime); +} + +static void *workerfn(void *arg) +{ + struct worker *w = (struct worker *) arg; + + pthread_mutex_lock(&thread_lock); + threads_starting--; + if (!threads_starting) + pthread_cond_signal(&thread_parent); + pthread_cond_wait(&thread_worker, &thread_lock); + pthread_mutex_unlock(&thread_lock); + + do { + int ret; + again: + ret = futex_lock_pi(w->futex, NULL, 0, futex_flag); + + if (ret) { /* handle lock acquisition */ + if (!silent) + warn("thread %d: Could not lock pi-lock for %p (%d)", + w->tid, w->futex, ret); + if (done) + break; + + goto again; + } + + usleep(1); + ret = futex_unlock_pi(w->futex, futex_flag); + if (ret && !silent) + warn("thread %d: Could not unlock pi-lock for %p (%d)", + w->tid, w->futex, ret); + w->ops++; /* account for thread's share of work */ + } while (!done); + + return NULL; +} + +static void create_threads(struct worker *w, pthread_attr_t thread_attr) +{ + cpu_set_t cpu; + unsigned int i; + + threads_starting = nthreads; + + for (i = 0; i < nthreads; i++) { + worker[i].tid = i; + + if (multi) { + worker[i].futex = calloc(1, sizeof(u_int32_t)); + if (!worker[i].futex) + err(EXIT_FAILURE, "calloc"); + } else + worker[i].futex = &global_futex; + + CPU_ZERO(&cpu); + CPU_SET(i % ncpus, &cpu); + + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + + if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) + err(EXIT_FAILURE, "pthread_create"); + } +} + +int bench_futex_lock_pi(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + int ret = 0; + unsigned int i; + struct sigaction act; + pthread_attr_t thread_attr; + + argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0); + if (argc) + goto err; + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + + sigfillset(&act.sa_mask); + act.sa_sigaction = toggle_done; + sigaction(SIGINT, &act, NULL); + + if (!nthreads) + nthreads = ncpus; + + worker = calloc(nthreads, sizeof(*worker)); + if (!worker) + err(EXIT_FAILURE, "calloc"); + + if (!fshared) + futex_flag = FUTEX_PRIVATE_FLAG; + + printf("Run summary [PID %d]: %d threads doing pi lock/unlock pairing for %d secs.\n\n", + getpid(), nthreads, nsecs); + + init_stats(&throughput_stats); + pthread_mutex_init(&thread_lock, NULL); + pthread_cond_init(&thread_parent, NULL); + pthread_cond_init(&thread_worker, NULL); + + threads_starting = nthreads; + pthread_attr_init(&thread_attr); + gettimeofday(&start, NULL); + + create_threads(worker, thread_attr); + pthread_attr_destroy(&thread_attr); + + pthread_mutex_lock(&thread_lock); + while (threads_starting) + pthread_cond_wait(&thread_parent, &thread_lock); + pthread_cond_broadcast(&thread_worker); + pthread_mutex_unlock(&thread_lock); + + sleep(nsecs); + toggle_done(0, NULL, NULL); + + for (i = 0; i < nthreads; i++) { + ret = pthread_join(worker[i].thread, NULL); + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + + /* cleanup & report results */ + pthread_cond_destroy(&thread_parent); + pthread_cond_destroy(&thread_worker); + pthread_mutex_destroy(&thread_lock); + + for (i = 0; i < nthreads; i++) { + unsigned long t = worker[i].ops/runtime.tv_sec; + + update_stats(&throughput_stats, t); + if (!silent) + printf("[thread %3d] futex: %p [ %ld ops/sec ]\n", + worker[i].tid, worker[i].futex, t); + + if (multi) + free(worker[i].futex); + } + + print_summary(); + + free(worker); + return ret; +err: + usage_with_options(bench_futex_lock_pi_usage, options); + exit(EXIT_FAILURE); +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index 7ed22ff1e1ac..d44de9f44281 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -56,6 +56,26 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) } /** + * futex_lock_pi() - block on uaddr as a PI mutex + * @detect: whether (1) or not (0) to perform deadlock detection + */ +static inline int +futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect, + int opflags) +{ + return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags); +} + +/** + * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter + */ +static inline int +futex_unlock_pi(u_int32_t *uaddr, int opflags) +{ + return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags); +} + +/** * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2 * @nr_wake: wake up to this many tasks * @nr_requeue: requeue up to this many tasks diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 2c1bec39c30e..8edc205ff9a7 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -67,6 +67,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, rb_erase(&al->sym->rb_node, &al->map->dso->symbols[al->map->type]); symbol__delete(al->sym); + dso__reset_find_symbol_cache(al->map->dso); } return 0; } @@ -187,6 +188,7 @@ find_next: * symbol, free he->ms.sym->src to signal we already * processed this symbol. */ + zfree(¬es->src->cycles_hist); zfree(¬es->src); } } @@ -238,6 +240,8 @@ static int __cmd_annotate(struct perf_annotate *ann) if (nr_samples > 0) { total_nr_samples += nr_samples; hists__collapse_resort(hists, NULL); + /* Don't sort callchain */ + perf_evsel__reset_sample_bit(pos, CALLCHAIN); hists__output_resort(hists, NULL); if (symbol_conf.event_group && diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index b5314e452ec7..f67934d46d40 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -60,6 +60,8 @@ static struct bench futex_benchmarks[] = { { "wake", "Benchmark for futex wake calls", bench_futex_wake }, { "wake-parallel", "Benchmark for parallel futex wake calls", bench_futex_wake_parallel }, { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue }, + /* pi-futexes */ + { "lock-pi", "Benchmark for futex lock_pi calls", bench_futex_lock_pi }, { "all", "Test all futex benchmarks", NULL }, { NULL, NULL, NULL } }; diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index d47a0cdc71c9..7b8450cd33c2 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -25,8 +25,6 @@ static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) { char root_dir[PATH_MAX]; - char notes[PATH_MAX]; - u8 build_id[BUILD_ID_SIZE]; char *p; strlcpy(root_dir, proc_dir, sizeof(root_dir)); @@ -35,15 +33,7 @@ static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) if (!p) return -1; *p = '\0'; - - scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir); - - if (sysfs__read_build_id(notes, build_id, sizeof(build_id))) - return -1; - - build_id__sprintf(build_id, sizeof(build_id), sbuildid); - - return 0; + return sysfs__sprintf_build_id(root_dir, sbuildid); } static int build_id_cache__kcore_dir(char *dir, size_t sz) @@ -127,7 +117,7 @@ static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir, static int build_id_cache__add_kcore(const char *filename, bool force) { - char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1]; + char dir[32], sbuildid[SBUILD_ID_SIZE]; char from_dir[PATH_MAX], to_dir[PATH_MAX]; char *p; @@ -138,7 +128,7 @@ static int build_id_cache__add_kcore(const char *filename, bool force) return -1; *p = '\0'; - if (build_id_cache__kcore_buildid(from_dir, sbuildid)) + if (build_id_cache__kcore_buildid(from_dir, sbuildid) < 0) return -1; scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s", @@ -184,7 +174,7 @@ static int build_id_cache__add_kcore(const char *filename, bool force) static int build_id_cache__add_file(const char *filename) { - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; u8 build_id[BUILD_ID_SIZE]; int err; @@ -204,7 +194,7 @@ static int build_id_cache__add_file(const char *filename) static int build_id_cache__remove_file(const char *filename) { u8 build_id[BUILD_ID_SIZE]; - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; int err; @@ -276,7 +266,7 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f static int build_id_cache__update_file(const char *filename) { u8 build_id[BUILD_ID_SIZE]; - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; int err = 0; @@ -363,7 +353,7 @@ int cmd_buildid_cache(int argc, const char **argv, setup_pager(); if (add_name_list_str) { - list = strlist__new(true, add_name_list_str); + list = strlist__new(add_name_list_str, NULL); if (list) { strlist__for_each(pos, list) if (build_id_cache__add_file(pos->s)) { @@ -381,7 +371,7 @@ int cmd_buildid_cache(int argc, const char **argv, } if (remove_name_list_str) { - list = strlist__new(true, remove_name_list_str); + list = strlist__new(remove_name_list_str, NULL); if (list) { strlist__for_each(pos, list) if (build_id_cache__remove_file(pos->s)) { @@ -399,7 +389,7 @@ int cmd_buildid_cache(int argc, const char **argv, } if (purge_name_list_str) { - list = strlist__new(true, purge_name_list_str); + list = strlist__new(purge_name_list_str, NULL); if (list) { strlist__for_each(pos, list) if (build_id_cache__purge_path(pos->s)) { @@ -420,7 +410,7 @@ int cmd_buildid_cache(int argc, const char **argv, ret = build_id_cache__fprintf_missing(session, stdout); if (update_name_list_str) { - list = strlist__new(true, update_name_list_str); + list = strlist__new(update_name_list_str, NULL); if (list) { strlist__for_each(pos, list) if (build_id_cache__update_file(pos->s)) { diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 9fe93c8d4fcf..918b4de29de4 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -19,29 +19,25 @@ static int sysfs__fprintf_build_id(FILE *fp) { - u8 kallsyms_build_id[BUILD_ID_SIZE]; - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; + int ret; - if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id, - sizeof(kallsyms_build_id)) != 0) - return -1; + ret = sysfs__sprintf_build_id("/", sbuild_id); + if (ret != sizeof(sbuild_id)) + return ret < 0 ? ret : -EINVAL; - build_id__sprintf(kallsyms_build_id, sizeof(kallsyms_build_id), - sbuild_id); - fprintf(fp, "%s\n", sbuild_id); - return 0; + return fprintf(fp, "%s\n", sbuild_id); } static int filename__fprintf_build_id(const char *name, FILE *fp) { - u8 build_id[BUILD_ID_SIZE]; - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; + int ret; - if (filename__read_build_id(name, build_id, - sizeof(build_id)) != sizeof(build_id)) - return 0; + ret = filename__sprintf_build_id(name, sbuild_id); + if (ret != sizeof(sbuild_id)) + return ret < 0 ? ret : -EINVAL; - build_id__sprintf(build_id, sizeof(build_id), sbuild_id); return fprintf(fp, "%s\n", sbuild_id); } @@ -63,7 +59,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) /* * See if this is an ELF file first: */ - if (filename__fprintf_build_id(input_name, stdout)) + if (filename__fprintf_build_id(input_name, stdout) > 0) goto out; session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index daaa7dca9c3b..0b180a885ba3 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -722,6 +722,9 @@ static void data_process(void) if (verbose || data__files_cnt > 2) data__fprintf(); + /* Don't sort callchain for perf diff */ + perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN); + hists__process(hists_base); } } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 01b06492bd6a..f62c49b35be0 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -561,6 +561,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .lost = perf_event__repipe, .aux = perf_event__repipe, .itrace_start = perf_event__repipe, + .context_switch = perf_event__repipe, .read = perf_event__repipe_sample, .throttle = perf_event__repipe, .unthrottle = perf_event__repipe, diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 1272559fa22d..b81cec33b4b2 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -297,8 +297,7 @@ static void cleanup_params(void) clear_perf_probe_event(params.events + i); line_range__clear(¶ms.line_range); free(params.target); - if (params.filter) - strfilter__delete(params.filter); + strfilter__delete(params.filter); memset(¶ms, 0, sizeof(params)); } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index de165a1b9240..a660022f2c92 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -521,6 +521,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } + /* + * Normally perf_session__new would do this, but it doesn't have the + * evlist. + */ + if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { + pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); + rec->tool.ordered_events = false; + } + if (!rec->evlist->nr_groups) perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); @@ -762,12 +771,14 @@ static void callchain_debug(void) callchain_param.dump_size); } -int record_parse_callchain_opt(const struct option *opt __maybe_unused, +int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset) { int ret; + struct record_opts *record = (struct record_opts *)opt->value; + record->callgraph_set = true; callchain_param.enabled = !unset; /* --no-call-graph */ @@ -777,17 +788,20 @@ int record_parse_callchain_opt(const struct option *opt __maybe_unused, return 0; } - ret = parse_callchain_record_opt(arg); + ret = parse_callchain_record_opt(arg, &callchain_param); if (!ret) callchain_debug(); return ret; } -int record_callchain_opt(const struct option *opt __maybe_unused, +int record_callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unset __maybe_unused) { + struct record_opts *record = (struct record_opts *)opt->value; + + record->callgraph_set = true; callchain_param.enabled = true; if (callchain_param.record_mode == CALLCHAIN_NONE) @@ -965,9 +979,11 @@ static struct record record = { .tool = { .sample = process_sample_event, .fork = perf_event__process_fork, + .exit = perf_event__process_exit, .comm = perf_event__process_comm, .mmap = perf_event__process_mmap, .mmap2 = perf_event__process_mmap2, + .ordered_events = true, }, }; @@ -992,6 +1008,9 @@ struct option __record_options[] = { parse_events_option), OPT_CALLBACK(0, "filter", &record.evlist, "filter", "event filter", parse_filter), + OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, + NULL, "don't record events from perf itself", + exclude_perf), OPT_STRING('p', "pid", &record.opts.target.pid, "pid", "record events on existing process id"), OPT_STRING('t', "tid", &record.opts.target.tid, "tid", @@ -1030,7 +1049,9 @@ struct option __record_options[] = { OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, "per thread counts"), OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), - OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Record the sample timestamps"), + OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, + &record.opts.sample_time_set, + "Record the sample timestamps"), OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, "don't sample"), @@ -1070,6 +1091,8 @@ struct option __record_options[] = { "opts", "AUX area tracing Snapshot Mode", ""), OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, "per thread proc mmap processing timeout in ms"), + OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, + "Record context switch events"), OPT_END() }; @@ -1097,6 +1120,11 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) " system-wide mode\n"); usage_with_options(record_usage, record_options); } + if (rec->opts.record_switch_events && + !perf_can_record_switch_events()) { + ui__error("kernel does not support recording context switch events (--switch-events option)\n"); + usage_with_options(record_usage, record_options); + } if (!rec->itr) { rec->itr = auxtrace_record__init(rec->evlist, &err); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 95a47719aec3..62b285e32aa5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -53,6 +53,7 @@ struct report { bool mem_mode; bool header; bool header_only; + bool nonany_branch_mode; int max_stack; struct perf_read_values show_threads_values; const char *pretty_printing_style; @@ -102,6 +103,9 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, if (!ui__has_annotation()) return 0; + hist__account_cycles(iter->sample->branch_stack, al, iter->sample, + rep->nonany_branch_mode); + if (sort__mode == SORT_MODE__BRANCH) { bi = he->branch_info; err = addr_map_symbol__inc_samples(&bi->from, evsel->idx); @@ -258,6 +262,12 @@ static int report__setup_sample_type(struct report *rep) else callchain_param.record_mode = CALLCHAIN_FP; } + + /* ??? handle more cases than just ANY? */ + if (!(perf_evlist__combined_branch_type(session->evlist) & + PERF_SAMPLE_BRANCH_ANY)) + rep->nonany_branch_mode = true; + return 0; } @@ -306,6 +316,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report if (evname != NULL) ret += fprintf(fp, " of event '%s'", evname); + if (symbol_conf.show_ref_callgraph && + strstr(evname, "call-graph=no")) { + ret += fprintf(fp, ", show reference callgraph"); + } + if (rep->mem_mode) { ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events); ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order); @@ -728,6 +743,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options", itrace_parse_synth_opts), + OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename, + "Show full source file name path for source lines"), + OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph, + "Show callgraph from reference event"), OPT_END() }; struct perf_data_file file = { diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 24809787369f..4430340292c0 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -623,6 +623,7 @@ struct perf_script { struct perf_session *session; bool show_task_events; bool show_mmap_events; + bool show_switch_events; }; static int process_attr(struct perf_tool *tool, union perf_event *event, @@ -661,7 +662,7 @@ static int process_comm_event(struct perf_tool *tool, struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct perf_evsel *evsel = perf_evlist__first(session->evlist); + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); int ret = -1; thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid); @@ -695,7 +696,7 @@ static int process_fork_event(struct perf_tool *tool, struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct perf_evsel *evsel = perf_evlist__first(session->evlist); + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); if (perf_event__process_fork(tool, event, sample, machine) < 0) return -1; @@ -727,7 +728,7 @@ static int process_exit_event(struct perf_tool *tool, struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct perf_evsel *evsel = perf_evlist__first(session->evlist); + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); if (thread == NULL) { @@ -759,7 +760,7 @@ static int process_mmap_event(struct perf_tool *tool, struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct perf_evsel *evsel = perf_evlist__first(session->evlist); + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); if (perf_event__process_mmap(tool, event, sample, machine) < 0) return -1; @@ -790,7 +791,7 @@ static int process_mmap2_event(struct perf_tool *tool, struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct perf_evsel *evsel = perf_evlist__first(session->evlist); + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); if (perf_event__process_mmap2(tool, event, sample, machine) < 0) return -1; @@ -813,6 +814,32 @@ static int process_mmap2_event(struct perf_tool *tool, return 0; } +static int process_switch_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct thread *thread; + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); + + if (perf_event__process_switch(tool, event, sample, machine) < 0) + return -1; + + thread = machine__findnew_thread(machine, sample->pid, + sample->tid); + if (thread == NULL) { + pr_debug("problem processing SWITCH event, skipping it.\n"); + return -1; + } + + print_sample_start(sample, thread, evsel); + perf_event__fprintf(event, stdout); + thread__put(thread); + return 0; +} + static void sig_handler(int sig __maybe_unused) { session_done = 1; @@ -834,6 +861,8 @@ static int __cmd_script(struct perf_script *script) script->tool.mmap = process_mmap_event; script->tool.mmap2 = process_mmap2_event; } + if (script->show_switch_events) + script->tool.context_switch = process_switch_event; ret = perf_session__process_events(script->session); @@ -1532,6 +1561,22 @@ static int have_cmd(int argc, const char **argv) return 0; } +static void script__setup_sample_type(struct perf_script *script) +{ + struct perf_session *session = script->session; + u64 sample_type = perf_evlist__combined_sample_type(session->evlist); + + if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { + if ((sample_type & PERF_SAMPLE_REGS_USER) && + (sample_type & PERF_SAMPLE_STACK_USER)) + callchain_param.record_mode = CALLCHAIN_DWARF; + else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + callchain_param.record_mode = CALLCHAIN_LBR; + else + callchain_param.record_mode = CALLCHAIN_FP; + } +} + int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) { bool show_full_info = false; @@ -1618,10 +1663,19 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Show the fork/comm/exit events"), OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events, "Show the mmap events"), + OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events, + "Show context switch events (if recorded)"), OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options", itrace_parse_synth_opts), + OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename, + "Show full source file name path for source lines"), + OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, + "Enable symbol demangling"), + OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, + "Enable kernel symbol demangling"), + OPT_END() }; const char * const script_subcommands[] = { "record", "report", NULL }; @@ -1816,6 +1870,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) goto out_delete; script.session = session; + script__setup_sample_type(&script); session->itrace_synth_opts = &itrace_synth_opts; @@ -1830,6 +1885,14 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) else symbol_conf.use_callchain = false; + if (session->tevent.pevent && + pevent_set_function_resolver(session->tevent.pevent, + machine__resolve_kernel_addr, + &session->machines.host) < 0) { + pr_err("%s: failed to set libtraceevent function resolver\n", __func__); + return -1; + } + if (generate_script_lang) { struct stat perf_stat; int input; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d99d850e1444..d46dbb1bc65d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -58,6 +58,7 @@ #include "util/cpumap.h" #include "util/thread.h" #include "util/thread_map.h" +#include "util/counts.h" #include <stdlib.h> #include <sys/prctl.h> @@ -101,8 +102,6 @@ static struct target target = { static int run_count = 1; static bool no_inherit = false; -static bool scale = true; -static enum aggr_mode aggr_mode = AGGR_GLOBAL; static volatile pid_t child_pid = -1; static bool null_run = false; static int detailed_run = 0; @@ -112,11 +111,9 @@ static int big_num_opt = -1; static const char *csv_sep = NULL; static bool csv_output = false; static bool group = false; -static FILE *output = NULL; static const char *pre_cmd = NULL; static const char *post_cmd = NULL; static bool sync_run = false; -static unsigned int interval = 0; static unsigned int initial_delay = 0; static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; @@ -126,6 +123,11 @@ static int (*aggr_get_id)(struct cpu_map *m, int cpu); static volatile int done = 0; +static struct perf_stat_config stat_config = { + .aggr_mode = AGGR_GLOBAL, + .scale = true, +}; + static inline void diff_timespec(struct timespec *r, struct timespec *a, struct timespec *b) { @@ -148,7 +150,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) { struct perf_event_attr *attr = &evsel->attr; - if (scale) + if (stat_config.scale) attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; @@ -178,142 +180,6 @@ static inline int nsec_counter(struct perf_evsel *evsel) return 0; } -static void zero_per_pkg(struct perf_evsel *counter) -{ - if (counter->per_pkg_mask) - memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); -} - -static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) -{ - unsigned long *mask = counter->per_pkg_mask; - struct cpu_map *cpus = perf_evsel__cpus(counter); - int s; - - *skip = false; - - if (!counter->per_pkg) - return 0; - - if (cpu_map__empty(cpus)) - return 0; - - if (!mask) { - mask = zalloc(MAX_NR_CPUS); - if (!mask) - return -ENOMEM; - - counter->per_pkg_mask = mask; - } - - s = cpu_map__get_socket(cpus, cpu); - if (s < 0) - return -1; - - *skip = test_and_set_bit(s, mask) == 1; - return 0; -} - -static int -process_counter_values(struct perf_evsel *evsel, int cpu, int thread, - struct perf_counts_values *count) -{ - struct perf_counts_values *aggr = &evsel->counts->aggr; - static struct perf_counts_values zero; - bool skip = false; - - if (check_per_pkg(evsel, cpu, &skip)) { - pr_err("failed to read per-pkg counter\n"); - return -1; - } - - if (skip) - count = &zero; - - switch (aggr_mode) { - case AGGR_THREAD: - case AGGR_CORE: - case AGGR_SOCKET: - case AGGR_NONE: - if (!evsel->snapshot) - perf_evsel__compute_deltas(evsel, cpu, thread, count); - perf_counts_values__scale(count, scale, NULL); - if (aggr_mode == AGGR_NONE) - perf_stat__update_shadow_stats(evsel, count->values, cpu); - break; - case AGGR_GLOBAL: - aggr->val += count->val; - if (scale) { - aggr->ena += count->ena; - aggr->run += count->run; - } - default: - break; - } - - return 0; -} - -static int process_counter_maps(struct perf_evsel *counter) -{ - int nthreads = thread_map__nr(counter->threads); - int ncpus = perf_evsel__nr_cpus(counter); - int cpu, thread; - - if (counter->system_wide) - nthreads = 1; - - for (thread = 0; thread < nthreads; thread++) { - for (cpu = 0; cpu < ncpus; cpu++) { - if (process_counter_values(counter, cpu, thread, - perf_counts(counter->counts, cpu, thread))) - return -1; - } - } - - return 0; -} - -static int process_counter(struct perf_evsel *counter) -{ - struct perf_counts_values *aggr = &counter->counts->aggr; - struct perf_stat *ps = counter->priv; - u64 *count = counter->counts->aggr.values; - int i, ret; - - aggr->val = aggr->ena = aggr->run = 0; - init_stats(ps->res_stats); - - if (counter->per_pkg) - zero_per_pkg(counter); - - ret = process_counter_maps(counter); - if (ret) - return ret; - - if (aggr_mode != AGGR_GLOBAL) - return 0; - - if (!counter->snapshot) - perf_evsel__compute_deltas(counter, -1, -1, aggr); - perf_counts_values__scale(aggr, scale, &counter->counts->scaled); - - for (i = 0; i < 3; i++) - update_stats(&ps->res_stats[i], count[i]); - - if (verbose) { - fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", - perf_evsel__name(counter), count[0], count[1], count[2]); - } - - /* - * Save the full runtime - to allow normalization during printout: - */ - perf_stat__update_shadow_stats(counter, count, 0); - - return 0; -} - /* * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode @@ -351,7 +217,7 @@ static void read_counters(bool close_counters) if (read_counter(counter)) pr_warning("failed to read counter %s\n", counter->name); - if (process_counter(counter)) + if (perf_stat_process_counter(&stat_config, counter)) pr_warning("failed to process counter %s\n", counter->name); if (close_counters) { @@ -402,6 +268,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf static int __run_perf_stat(int argc, const char **argv) { + int interval = stat_config.interval; char msg[512]; unsigned long long t0, t1; struct perf_evsel *counter; @@ -545,13 +412,13 @@ static int run_perf_stat(int argc, const char **argv) static void print_running(u64 run, u64 ena) { if (csv_output) { - fprintf(output, "%s%" PRIu64 "%s%.2f", + fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", csv_sep, run, csv_sep, ena ? 100.0 * run / ena : 100.0); } else if (run != ena) { - fprintf(output, " (%.2f%%)", 100.0 * run / ena); + fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); } } @@ -560,9 +427,9 @@ static void print_noise_pct(double total, double avg) double pct = rel_stddev_stats(total, avg); if (csv_output) - fprintf(output, "%s%.2f%%", csv_sep, pct); + fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); else if (pct) - fprintf(output, " ( +-%6.2f%% )", pct); + fprintf(stat_config.output, " ( +-%6.2f%% )", pct); } static void print_noise(struct perf_evsel *evsel, double avg) @@ -578,9 +445,9 @@ static void print_noise(struct perf_evsel *evsel, double avg) static void aggr_printout(struct perf_evsel *evsel, int id, int nr) { - switch (aggr_mode) { + switch (stat_config.aggr_mode) { case AGGR_CORE: - fprintf(output, "S%d-C%*d%s%*d%s", + fprintf(stat_config.output, "S%d-C%*d%s%*d%s", cpu_map__id_to_socket(id), csv_output ? 0 : -8, cpu_map__id_to_cpu(id), @@ -590,7 +457,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) csv_sep); break; case AGGR_SOCKET: - fprintf(output, "S%*d%s%*d%s", + fprintf(stat_config.output, "S%*d%s%*d%s", csv_output ? 0 : -5, id, csv_sep, @@ -599,12 +466,12 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) csv_sep); break; case AGGR_NONE: - fprintf(output, "CPU%*d%s", + fprintf(stat_config.output, "CPU%*d%s", csv_output ? 0 : -4, perf_evsel__cpus(evsel)->map[id], csv_sep); break; case AGGR_THREAD: - fprintf(output, "%*s-%*d%s", + fprintf(stat_config.output, "%*s-%*d%s", csv_output ? 0 : 16, thread_map__comm(evsel->threads, id), csv_output ? 0 : -8, @@ -619,6 +486,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) { + FILE *output = stat_config.output; double msecs = avg / 1e6; const char *fmt_v, *fmt_n; char name[25]; @@ -643,7 +511,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); - if (csv_output || interval) + if (csv_output || stat_config.interval) return; if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) @@ -655,6 +523,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) { + FILE *output = stat_config.output; double sc = evsel->scale; const char *fmt; int cpu = cpu_map__id_to_cpu(id); @@ -670,7 +539,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) aggr_printout(evsel, id, nr); - if (aggr_mode == AGGR_GLOBAL) + if (stat_config.aggr_mode == AGGR_GLOBAL) cpu = 0; fprintf(output, fmt, avg, csv_sep); @@ -685,16 +554,18 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); - if (csv_output || interval) + if (csv_output || stat_config.interval) return; - perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode); + perf_stat__print_shadow_stats(output, evsel, avg, cpu, + stat_config.aggr_mode); } static void print_aggr(char *prefix) { + FILE *output = stat_config.output; struct perf_evsel *counter; - int cpu, cpu2, s, s2, id, nr; + int cpu, s, s2, id, nr; double uval; u64 ena, run, val; @@ -707,8 +578,7 @@ static void print_aggr(char *prefix) val = ena = run = 0; nr = 0; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - cpu2 = perf_evsel__cpus(counter)->map[cpu]; - s2 = aggr_get_id(evsel_list->cpus, cpu2); + s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); if (s2 != id) continue; val += perf_counts(counter->counts, cpu, 0)->val; @@ -761,6 +631,7 @@ static void print_aggr(char *prefix) static void print_aggr_thread(struct perf_evsel *counter, char *prefix) { + FILE *output = stat_config.output; int nthreads = thread_map__nr(counter->threads); int ncpus = cpu_map__nr(counter->cpus); int cpu, thread; @@ -799,6 +670,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) */ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) { + FILE *output = stat_config.output; struct perf_stat *ps = counter->priv; double avg = avg_stats(&ps->res_stats[0]); int scaled = counter->counts->scaled; @@ -850,6 +722,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) */ static void print_counter(struct perf_evsel *counter, char *prefix) { + FILE *output = stat_config.output; u64 ena, run, val; double uval; int cpu; @@ -904,12 +777,13 @@ static void print_counter(struct perf_evsel *counter, char *prefix) static void print_interval(char *prefix, struct timespec *ts) { + FILE *output = stat_config.output; static int num_print_interval; sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); if (num_print_interval == 0 && !csv_output) { - switch (aggr_mode) { + switch (stat_config.aggr_mode) { case AGGR_SOCKET: fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); break; @@ -934,6 +808,7 @@ static void print_interval(char *prefix, struct timespec *ts) static void print_header(int argc, const char **argv) { + FILE *output = stat_config.output; int i; fflush(stdout); @@ -963,6 +838,8 @@ static void print_header(int argc, const char **argv) static void print_footer(void) { + FILE *output = stat_config.output; + if (!null_run) fprintf(output, "\n"); fprintf(output, " %17.9f seconds time elapsed", @@ -977,6 +854,7 @@ static void print_footer(void) static void print_counters(struct timespec *ts, int argc, const char **argv) { + int interval = stat_config.interval; struct perf_evsel *counter; char buf[64], *prefix = NULL; @@ -985,7 +863,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) else print_header(argc, argv); - switch (aggr_mode) { + switch (stat_config.aggr_mode) { case AGGR_CORE: case AGGR_SOCKET: print_aggr(prefix); @@ -1009,14 +887,14 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) if (!interval && !csv_output) print_footer(); - fflush(output); + fflush(stat_config.output); } static volatile int signr = -1; static void skip_signal(int signo) { - if ((child_pid == -1) || interval) + if ((child_pid == -1) || stat_config.interval) done = 1; signr = signo; @@ -1064,7 +942,7 @@ static int stat__set_big_num(const struct option *opt __maybe_unused, static int perf_stat_init_aggr_mode(void) { - switch (aggr_mode) { + switch (stat_config.aggr_mode) { case AGGR_SOCKET: if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { perror("cannot build socket map"); @@ -1270,7 +1148,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "system-wide collection from all CPUs"), OPT_BOOLEAN('g', "group", &group, "put the counters into a counter group"), - OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), + OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_INTEGER('r', "repeat", &run_count, @@ -1286,7 +1164,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) stat__set_big_num), OPT_STRING('C', "cpu", &target.cpu_list, "cpu", "list of cpus to monitor in system-wide"), - OPT_SET_UINT('A', "no-aggr", &aggr_mode, + OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, "disable CPU count aggregation", AGGR_NONE), OPT_STRING('x', "field-separator", &csv_sep, "separator", "print counts with custom separator"), @@ -1300,13 +1178,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "command to run prior to the measured command"), OPT_STRING(0, "post", &post_cmd, "command", "command to run after to the measured command"), - OPT_UINTEGER('I', "interval-print", &interval, + OPT_UINTEGER('I', "interval-print", &stat_config.interval, "print counts at regular interval in ms (>= 100)"), - OPT_SET_UINT(0, "per-socket", &aggr_mode, + OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), - OPT_SET_UINT(0, "per-core", &aggr_mode, + OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), - OPT_SET_UINT(0, "per-thread", &aggr_mode, + OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, "aggregate counts per thread", AGGR_THREAD), OPT_UINTEGER('D', "delay", &initial_delay, "ms to wait before starting measurement after program start"), @@ -1318,6 +1196,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) }; int status = -EINVAL, run_idx; const char *mode; + FILE *output = stderr; + unsigned int interval; setlocale(LC_ALL, ""); @@ -1328,7 +1208,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options(argc, argv, options, stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); - output = stderr; + interval = stat_config.interval; + if (output_name && strcmp(output_name, "-")) output = NULL; @@ -1365,6 +1246,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) } } + stat_config.output = output; + if (csv_sep) { csv_output = true; if (!strcmp(csv_sep, "\\t")) @@ -1399,7 +1282,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) run_count = 1; } - if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { + if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { fprintf(stderr, "The --per-thread option is only available " "when monitoring via -p -t options.\n"); parse_options_usage(NULL, options, "p", 1); @@ -1411,7 +1294,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) * no_aggr, cgroup are for system-wide only * --per-thread is aggregated per thread, we dont mix it with cpu mode */ - if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) && + if (((stat_config.aggr_mode != AGGR_GLOBAL && + stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && !target__has_cpu(&target)) { fprintf(stderr, "both cgroup and no-aggregation " "modes only available in system-wide mode\n"); @@ -1444,7 +1328,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) * Initialize thread_map with comm names, * so we could print it out on output. */ - if (aggr_mode == AGGR_THREAD) + if (stat_config.aggr_mode == AGGR_THREAD) thread_map__read_comms(evsel_list->threads); if (interval && interval < 100) { diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ecf319728f25..8c465c83aabf 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -40,6 +40,7 @@ #include "util/xyarray.h" #include "util/sort.h" #include "util/intlist.h" +#include "util/parse-branch-options.h" #include "arch/common.h" #include "util/debug.h" @@ -601,8 +602,8 @@ static void display_sig(int sig __maybe_unused) static void display_setup_sig(void) { - signal(SIGSEGV, display_sig); - signal(SIGFPE, display_sig); + signal(SIGSEGV, sighandler_dump_stack); + signal(SIGFPE, sighandler_dump_stack); signal(SIGINT, display_sig); signal(SIGQUIT, display_sig); signal(SIGTERM, display_sig); @@ -695,6 +696,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter, perf_top__record_precise_ip(top, he, evsel->idx, ip); } + hist__account_cycles(iter->sample->branch_stack, al, iter->sample, + !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY)); return 0; } @@ -1171,6 +1174,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "don't try to adjust column width, use these fixed values"), OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout, "per thread proc mmap processing timeout in ms"), + OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack, + "branch any", "sample any taken branches", + parse_branch_stack), + OPT_CALLBACK('j', "branch-filter", &opts->branch_stack, + "branch filter mask", "branch stack filter modes", + parse_branch_stack), OPT_END() }; const char * const top_usage[] = { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 39ad4d0ca884..4e3abba03062 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1,8 +1,27 @@ +/* + * builtin-trace.c + * + * Builtin 'trace' command: + * + * Display a continuously updated trace of any workload, CPU, specific PID, + * system wide, etc. Default format is loosely strace like, but any other + * event may be specified using --event. + * + * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + * + * Initially based on the 'trace' prototype by Thomas Gleixner: + * + * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'") + * + * Released under the GPL v2. (and only v2, not any later version) + */ + #include <traceevent/event-parse.h> #include "builtin.h" #include "util/color.h" #include "util/debug.h" #include "util/evlist.h" +#include "util/exec_cmd.h" #include "util/machine.h" #include "util/session.h" #include "util/thread.h" @@ -26,6 +45,7 @@ #ifndef MADV_HWPOISON # define MADV_HWPOISON 100 + #endif #ifndef MADV_MERGEABLE @@ -247,42 +267,6 @@ out_delete: ({ struct syscall_tp *fields = evsel->priv; \ fields->name.pointer(&fields->name, sample); }) -static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist, - void *sys_enter_handler, - void *sys_exit_handler) -{ - int ret = -1; - struct perf_evsel *sys_enter, *sys_exit; - - sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler); - if (sys_enter == NULL) - goto out; - - if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) - goto out_delete_sys_enter; - - sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler); - if (sys_exit == NULL) - goto out_delete_sys_enter; - - if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) - goto out_delete_sys_exit; - - perf_evlist__add(evlist, sys_enter); - perf_evlist__add(evlist, sys_exit); - - ret = 0; -out: - return ret; - -out_delete_sys_exit: - perf_evsel__delete_priv(sys_exit); -out_delete_sys_enter: - perf_evsel__delete_priv(sys_enter); - goto out; -} - - struct syscall_arg { unsigned long val; struct thread *thread; @@ -604,6 +588,15 @@ static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; static DEFINE_STRARRAY(itimers); +static const char *keyctl_options[] = { + "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN", + "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ", + "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT", + "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT", + "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT", +}; +static DEFINE_STRARRAY(keyctl_options); + static const char *whences[] = { "SET", "CUR", "END", #ifdef SEEK_DATA "DATA", @@ -634,7 +627,8 @@ static DEFINE_STRARRAY(sighow); static const char *clockid[] = { "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", - "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", + "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME", + "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI" }; static DEFINE_STRARRAY(clockid); @@ -779,6 +773,11 @@ static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, #define SCA_ACCMODE syscall_arg__scnprintf_access_mode +static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, + struct syscall_arg *arg); + +#define SCA_FILENAME syscall_arg__scnprintf_filename + static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg) { @@ -1006,14 +1005,23 @@ static struct syscall_fmt { bool hexret; } syscall_fmts[] = { { .name = "access", .errmsg = true, - .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ + [1] = SCA_ACCMODE, /* mode */ }, }, { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, { .name = "brk", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, + { .name = "chdir", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + { .name = "chmod", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + { .name = "chroot", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, { .name = "close", .errmsg = true, .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, { .name = "connect", .errmsg = true, }, + { .name = "creat", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "dup", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "dup2", .errmsg = true, @@ -1024,7 +1032,8 @@ static struct syscall_fmt { { .name = "eventfd2", .errmsg = true, .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, { .name = "faccessat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ + [1] = SCA_FILENAME, /* filename */ }, }, { .name = "fadvise64", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fallocate", .errmsg = true, @@ -1034,11 +1043,13 @@ static struct syscall_fmt { { .name = "fchmod", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchmodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ + [1] = SCA_FILENAME, /* filename */ }, }, { .name = "fchown", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchownat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ + [1] = SCA_FILENAME, /* filename */ }, }, { .name = "fcntl", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [1] = SCA_STRARRAY, /* cmd */ }, @@ -1053,7 +1064,8 @@ static struct syscall_fmt { { .name = "fstat", .errmsg = true, .alias = "newfstat", .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fstatat", .errmsg = true, .alias = "newfstatat", - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ + [1] = SCA_FILENAME, /* filename */ }, }, { .name = "fstatfs", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fsync", .errmsg = true, @@ -1063,13 +1075,18 @@ static struct syscall_fmt { { .name = "futex", .errmsg = true, .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, { .name = "futimesat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ + [1] = SCA_FILENAME, /* filename */ }, }, { .name = "getdents", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "getdents64", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, + { .name = "getxattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "inotify_add_watch", .errmsg = true, + .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, }, { .name = "ioctl", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ #if defined(__i386__) || defined(__x86_64__) @@ -1082,22 +1099,44 @@ static struct syscall_fmt { #else [2] = SCA_HEX, /* arg */ }, }, #endif + { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), }, { .name = "kill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, + { .name = "lchown", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + { .name = "lgetxattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "linkat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + { .name = "listxattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "llistxattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "lremovexattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "lseek", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [2] = SCA_STRARRAY, /* whence */ }, .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, - { .name = "lstat", .errmsg = true, .alias = "newlstat", }, + { .name = "lsetxattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "lstat", .errmsg = true, .alias = "newlstat", + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + { .name = "lsxattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "madvise", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* start */ [2] = SCA_MADV_BHV, /* behavior */ }, }, + { .name = "mkdir", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "mkdirat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ + [1] = SCA_FILENAME, /* pathname */ }, }, + { .name = "mknod", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, { .name = "mknodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ + [1] = SCA_FILENAME, /* filename */ }, }, { .name = "mlock", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "mlockall", .errmsg = true, @@ -1110,6 +1149,8 @@ static struct syscall_fmt { { .name = "mprotect", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* start */ [2] = SCA_MMAP_PROT, /* prot */ }, }, + { .name = "mq_unlink", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, }, { .name = "mremap", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ [3] = SCA_MREMAP_FLAGS, /* flags */ @@ -1121,14 +1162,17 @@ static struct syscall_fmt { { .name = "name_to_handle_at", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "newfstatat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ + [1] = SCA_FILENAME, /* filename */ }, }, { .name = "open", .errmsg = true, - .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ + [1] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "open_by_handle_at", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ [2] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "openat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ + [1] = SCA_FILENAME, /* filename */ [2] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "perf_event_open", .errmsg = true, .arg_scnprintf = { [1] = SCA_INT, /* pid */ @@ -1150,18 +1194,28 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "read", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "readlink", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, }, { .name = "readlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ + [1] = SCA_FILENAME, /* pathname */ }, }, { .name = "readv", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "recvfrom", .errmsg = true, - .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "recvmmsg", .errmsg = true, - .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "recvmsg", .errmsg = true, - .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [2] = SCA_MSG_FLAGS, /* flags */ }, }, + { .name = "removexattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "renameat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + { .name = "rmdir", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "rt_sigaction", .errmsg = true, .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, @@ -1171,13 +1225,18 @@ static struct syscall_fmt { .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, { .name = "select", .errmsg = true, .timeout = true, }, { .name = "sendmmsg", .errmsg = true, - .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "sendmsg", .errmsg = true, - .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [2] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "sendto", .errmsg = true, - .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, + { .name = "setxattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "shutdown", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "socket", .errmsg = true, @@ -1188,18 +1247,35 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ [1] = SCA_SK_TYPE, /* type */ }, .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, - { .name = "stat", .errmsg = true, .alias = "newstat", }, + { .name = "stat", .errmsg = true, .alias = "newstat", + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "statfs", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "swapoff", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, }, + { .name = "swapon", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, }, { .name = "symlinkat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "tgkill", .errmsg = true, .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, { .name = "tkill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, + { .name = "truncate", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, }, { .name = "uname", .errmsg = true, .alias = "newuname", }, { .name = "unlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ + [1] = SCA_FILENAME, /* pathname */ }, }, + { .name = "utime", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, { .name = "utimensat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ + [1] = SCA_FILENAME, /* filename */ }, }, + { .name = "utimes", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + { .name = "vmsplice", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "write", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "writev", .errmsg = true, @@ -1223,7 +1299,6 @@ struct syscall { int nr_args; struct format_field *args; const char *name; - bool filtered; bool is_exit; struct syscall_fmt *fmt; size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); @@ -1244,6 +1319,11 @@ static size_t fprintf_duration(unsigned long t, FILE *fp) return printed + fprintf(fp, "): "); } +/** + * filename.ptr: The filename char pointer that will be vfs_getname'd + * filename.entry_str_pos: Where to insert the string translated from + * filename.ptr by the vfs_getname tracepoint/kprobe. + */ struct thread_trace { u64 entry_time; u64 exit_time; @@ -1252,6 +1332,13 @@ struct thread_trace { unsigned long pfmaj, pfmin; char *entry_str; double runtime_ms; + struct { + unsigned long ptr; + short int entry_str_pos; + bool pending_open; + unsigned int namelen; + char *name; + } filename; struct { int max; char **table; @@ -1298,6 +1385,8 @@ fail: #define TRACE_PFMAJ (1 << 0) #define TRACE_PFMIN (1 << 1) +static const size_t trace__entry_str_size = 2048; + struct trace { struct perf_tool tool; struct { @@ -1307,6 +1396,10 @@ struct trace { struct { int max; struct syscall *table; + struct { + struct perf_evsel *sys_enter, + *sys_exit; + } events; } syscalls; struct record_opts opts; struct perf_evlist *evlist; @@ -1316,7 +1409,10 @@ struct trace { FILE *output; unsigned long nr_events; struct strlist *ev_qualifier; - const char *last_vfs_getname; + struct { + size_t nr; + int *entries; + } ev_qualifier_ids; struct intlist *tid_list; struct intlist *pid_list; struct { @@ -1340,6 +1436,7 @@ struct trace { bool show_tool_stats; bool trace_syscalls; bool force; + bool vfs_getname; int trace_pgfaults; }; @@ -1443,6 +1540,27 @@ static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, return printed; } +static void thread__set_filename_pos(struct thread *thread, const char *bf, + unsigned long ptr) +{ + struct thread_trace *ttrace = thread__priv(thread); + + ttrace->filename.ptr = ptr; + ttrace->filename.entry_str_pos = bf - ttrace->entry_str; +} + +static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, + struct syscall_arg *arg) +{ + unsigned long ptr = arg->val; + + if (!arg->trace->vfs_getname) + return scnprintf(bf, size, "%#x", ptr); + + thread__set_filename_pos(arg->thread, bf, ptr); + return 0; +} + static bool trace__filter_duration(struct trace *trace, double t) { return t < (trace->duration_filter * NSEC_PER_MSEC); @@ -1517,6 +1635,9 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) if (trace->host == NULL) return -ENOMEM; + if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0) + return -errno; + err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, evlist->threads, trace__tool_process, false, trace->opts.proc_map_timeout); @@ -1578,19 +1699,6 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc = trace->syscalls.table + id; sc->name = name; - if (trace->ev_qualifier) { - bool in = strlist__find(trace->ev_qualifier, name) != NULL; - - if (!(in ^ trace->not_ev_qualifier)) { - sc->filtered = true; - /* - * No need to do read tracepoint information since this will be - * filtered out. - */ - return 0; - } - } - sc->fmt = syscall_fmt__find(sc->name); snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); @@ -1619,13 +1727,27 @@ static int trace__read_syscall_info(struct trace *trace, int id) static int trace__validate_ev_qualifier(struct trace *trace) { - int err = 0; + int err = 0, i; struct str_node *pos; + trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); + trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr * + sizeof(trace->ev_qualifier_ids.entries[0])); + + if (trace->ev_qualifier_ids.entries == NULL) { + fputs("Error:\tNot enough memory for allocating events qualifier ids\n", + trace->output); + err = -EINVAL; + goto out; + } + + i = 0; + strlist__for_each(pos, trace->ev_qualifier) { const char *sc = pos->s; + int id = audit_name_to_syscall(sc, trace->audit.machine); - if (audit_name_to_syscall(sc, trace->audit.machine) < 0) { + if (id < 0) { if (err == 0) { fputs("Error:\tInvalid syscall ", trace->output); err = -EINVAL; @@ -1635,13 +1757,17 @@ static int trace__validate_ev_qualifier(struct trace *trace) fputs(sc, trace->output); } + + trace->ev_qualifier_ids.entries[i++] = id; } if (err < 0) { fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" "\nHint:\tand: 'man syscalls'\n", trace->output); + zfree(&trace->ev_qualifier_ids.entries); + trace->ev_qualifier_ids.nr = 0; } - +out: return err; } @@ -1833,9 +1959,6 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (sc == NULL) return -1; - if (sc->filtered) - return 0; - thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); ttrace = thread__trace(thread, trace->output); if (ttrace == NULL) @@ -1844,7 +1967,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, args = perf_evsel__sc_tp_ptr(evsel, args, sample); if (ttrace->entry_str == NULL) { - ttrace->entry_str = malloc(1024); + ttrace->entry_str = malloc(trace__entry_str_size); if (!ttrace->entry_str) goto out_put; } @@ -1854,9 +1977,9 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, ttrace->entry_time = sample->time; msg = ttrace->entry_str; - printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); + printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name); - printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, + printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed, args, trace, thread); if (sc->is_exit) { @@ -1864,8 +1987,11 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); fprintf(trace->output, "%-70s\n", ttrace->entry_str); } - } else + } else { ttrace->entry_pending = true; + /* See trace__vfs_getname & trace__sys_exit */ + ttrace->filename.pending_open = false; + } if (trace->current != thread) { thread__put(trace->current); @@ -1891,9 +2017,6 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (sc == NULL) return -1; - if (sc->filtered) - return 0; - thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); ttrace = thread__trace(thread, trace->output); if (ttrace == NULL) @@ -1904,9 +2027,9 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, ret = perf_evsel__sc_tp_uint(evsel, ret, sample); - if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { - trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); - trace->last_vfs_getname = NULL; + if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) { + trace__set_fd_pathname(thread, ret, ttrace->filename.name); + ttrace->filename.pending_open = false; ++trace->stats.vfs_getname; } @@ -1961,7 +2084,56 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { - trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); + struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); + struct thread_trace *ttrace; + size_t filename_len, entry_str_len, to_move; + ssize_t remaining_space; + char *pos; + const char *filename = perf_evsel__rawptr(evsel, sample, "pathname"); + + if (!thread) + goto out; + + ttrace = thread__priv(thread); + if (!ttrace) + goto out; + + filename_len = strlen(filename); + + if (ttrace->filename.namelen < filename_len) { + char *f = realloc(ttrace->filename.name, filename_len + 1); + + if (f == NULL) + goto out; + + ttrace->filename.namelen = filename_len; + ttrace->filename.name = f; + } + + strcpy(ttrace->filename.name, filename); + ttrace->filename.pending_open = true; + + if (!ttrace->filename.ptr) + goto out; + + entry_str_len = strlen(ttrace->entry_str); + remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */ + if (remaining_space <= 0) + goto out; + + if (filename_len > (size_t)remaining_space) { + filename += filename_len - remaining_space; + filename_len = remaining_space; + } + + to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */ + pos = ttrace->entry_str + ttrace->filename.entry_str_pos; + memmove(pos + filename_len, pos, to_move); + memcpy(pos, filename, filename_len); + + ttrace->filename.ptr = 0; + ttrace->filename.entry_str_pos = 0; +out: return 0; } @@ -2214,19 +2386,20 @@ static int trace__record(struct trace *trace, int argc, const char **argv) static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); -static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) +static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) { struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); if (evsel == NULL) - return; + return false; if (perf_evsel__field(evsel, "pathname") == NULL) { perf_evsel__delete(evsel); - return; + return false; } evsel->handler = trace__vfs_getname; perf_evlist__add(evlist, evsel); + return true; } static int perf_evlist__add_pgfault(struct perf_evlist *evlist, @@ -2283,9 +2456,68 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st } } +static int trace__add_syscall_newtp(struct trace *trace) +{ + int ret = -1; + struct perf_evlist *evlist = trace->evlist; + struct perf_evsel *sys_enter, *sys_exit; + + sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter); + if (sys_enter == NULL) + goto out; + + if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) + goto out_delete_sys_enter; + + sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit); + if (sys_exit == NULL) + goto out_delete_sys_enter; + + if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) + goto out_delete_sys_exit; + + perf_evlist__add(evlist, sys_enter); + perf_evlist__add(evlist, sys_exit); + + trace->syscalls.events.sys_enter = sys_enter; + trace->syscalls.events.sys_exit = sys_exit; + + ret = 0; +out: + return ret; + +out_delete_sys_exit: + perf_evsel__delete_priv(sys_exit); +out_delete_sys_enter: + perf_evsel__delete_priv(sys_enter); + goto out; +} + +static int trace__set_ev_qualifier_filter(struct trace *trace) +{ + int err = -1; + char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier, + trace->ev_qualifier_ids.nr, + trace->ev_qualifier_ids.entries); + + if (filter == NULL) + goto out_enomem; + + if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter)) + err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter); + + free(filter); +out: + return err; +out_enomem: + errno = ENOMEM; + goto out; +} + static int trace__run(struct trace *trace, int argc, const char **argv) { struct perf_evlist *evlist = trace->evlist; + struct perf_evsel *evsel; int err = -1, i; unsigned long before; const bool forks = argc > 0; @@ -2293,13 +2525,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv) trace->live = true; - if (trace->trace_syscalls && - perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, - trace__sys_exit)) + if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) goto out_error_raw_syscalls; if (trace->trace_syscalls) - perf_evlist__add_vfs_getname(evlist); + trace->vfs_getname = perf_evlist__add_vfs_getname(evlist); if ((trace->trace_pgfaults & TRACE_PFMAJ) && perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { @@ -2356,11 +2586,22 @@ static int trace__run(struct trace *trace, int argc, const char **argv) else if (thread_map__pid(evlist->threads, 0) == -1) err = perf_evlist__set_filter_pid(evlist, getpid()); - if (err < 0) { - printf("err=%d,%s\n", -err, strerror(-err)); - exit(1); + if (err < 0) + goto out_error_mem; + + if (trace->ev_qualifier_ids.nr > 0) { + err = trace__set_ev_qualifier_filter(trace); + if (err < 0) + goto out_errno; + + pr_debug("event qualifier tracepoint filter: %s\n", + trace->syscalls.events.sys_exit->filter); } + err = perf_evlist__apply_filters(evlist, &evsel); + if (err < 0) + goto out_error_apply_filters; + err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); if (err < 0) goto out_error_mmap; @@ -2462,10 +2703,21 @@ out_error_open: out_error: fprintf(trace->output, "%s\n", errbuf); goto out_delete_evlist; + +out_error_apply_filters: + fprintf(trace->output, + "Failed to set filter \"%s\" on event %s with %d (%s)\n", + evsel->filter, perf_evsel__name(evsel), errno, + strerror_r(errno, errbuf, sizeof(errbuf))); + goto out_delete_evlist; } out_error_mem: fprintf(trace->output, "Not enough memory to run!\n"); goto out_delete_evlist; + +out_errno: + fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno)); + goto out_delete_evlist; } static int trace__replay(struct trace *trace) @@ -2586,9 +2838,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, printed += fprintf(fp, "\n"); - printed += fprintf(fp, " syscall calls min avg max stddev\n"); - printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n"); - printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n"); + printed += fprintf(fp, " syscall calls total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n"); /* each int_node is a syscall */ while (inode) { @@ -2605,8 +2857,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, sc = &trace->syscalls.table[inode->i]; printed += fprintf(fp, " %-15s", sc->name); - printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f", - n, min, avg); + printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f", + n, avg * n, min, avg); printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); } @@ -2778,7 +3030,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) .mmap_pages = UINT_MAX, .proc_map_timeout = 500, }, - .output = stdout, + .output = stderr, .show_comm = true, .trace_syscalls = true, }; @@ -2879,11 +3131,14 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) if (ev_qualifier_str != NULL) { const char *s = ev_qualifier_str; + struct strlist_config slist_config = { + .dirname = system_path(STRACE_GROUPS_DIR), + }; trace.not_ev_qualifier = *s == '!'; if (trace.not_ev_qualifier) ++s; - trace.ev_qualifier = strlist__new(true, s); + trace.ev_qualifier = strlist__new(s, &slist_config); if (trace.ev_qualifier == NULL) { fputs("Not enough memory to parse event qualifier", trace.output); diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 094ddaee104c..827557fc7511 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -11,7 +11,7 @@ ifneq ($(obj-perf),) obj-perf := $(abspath $(obj-perf))/ endif -$(shell echo -n > $(OUTPUT).config-detected) +$(shell printf "" > $(OUTPUT).config-detected) detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected) detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) @@ -297,7 +297,11 @@ ifndef NO_LIBELF else CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) LDFLAGS += $(LIBDW_LDFLAGS) - EXTLIBS += -ldw + DWARFLIBS := -ldw + ifeq ($(findstring -static,${LDFLAGS}),-static) + DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 + endif + EXTLIBS += ${DWARFLIBS} $(call detected,CONFIG_DWARF) endif # PERF_HAVE_DWARF_REGS endif # NO_DWARF @@ -638,12 +642,13 @@ ifndef DESTDIR prefix ?= $(HOME) endif bindir_relative = bin -bindir = $(prefix)/$(bindir_relative) +bindir = $(abspath $(prefix)/$(bindir_relative)) mandir = share/man infodir = share/info perfexecdir = libexec/perf-core sharedir = $(prefix)/share template_dir = share/perf-core/templates +STRACE_GROUPS_DIR = share/perf-core/strace/groups htmldir = share/doc/perf-doc ifeq ($(prefix),/usr) sysconfdir = /etc @@ -663,6 +668,7 @@ libdir = $(prefix)/$(lib) # Shell quote (do not use $(call) to accommodate ancient setups); ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) +STRACE_GROUPS_DIR_SQ = $(subst ','\'',$(STRACE_GROUPS_DIR)) DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) bindir_SQ = $(subst ','\'',$(bindir)) mandir_SQ = $(subst ','\'',$(mandir)) @@ -676,10 +682,13 @@ libdir_SQ = $(subst ','\'',$(libdir)) ifneq ($(filter /%,$(firstword $(perfexecdir))),) perfexec_instdir = $(perfexecdir) +STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR) else perfexec_instdir = $(prefix)/$(perfexecdir) +STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR) endif perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) +STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR)) # If we install to $(HOME) we keep the traceevent default: # $(HOME)/.traceevent/plugins @@ -713,6 +722,7 @@ $(call detected_var,htmldir_SQ) $(call detected_var,infodir_SQ) $(call detected_var,mandir_SQ) $(call detected_var,ETC_PERFCONFIG_SQ) +$(call detected_var,STRACE_GROUPS_DIR_SQ) $(call detected_var,prefix_SQ) $(call detected_var,perfexecdir_SQ) $(call detected_var,LIBDIR) diff --git a/tools/perf/perf-with-kcore.sh b/tools/perf/perf-with-kcore.sh index c7ff90a90e4e..7e47a7cbc195 100644 --- a/tools/perf/perf-with-kcore.sh +++ b/tools/perf/perf-with-kcore.sh @@ -50,7 +50,7 @@ copy_kcore() fi rm -f perf.data.junk - ("$PERF" record -o perf.data.junk $PERF_OPTIONS -- sleep 60) >/dev/null 2>/dev/null & + ("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null & PERF_PID=$! # Need to make sure that perf has started @@ -160,18 +160,18 @@ record() echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2 fi - if echo "$PERF_OPTIONS" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then + if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2 fi - if echo "$PERF_OPTIONS" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then + if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2 fi - if echo "$PERF_OPTIONS" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then + if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then true - elif echo "$PERF_OPTIONS" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then + elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then true elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2 @@ -193,8 +193,8 @@ record() mkdir "$PERF_DATA_DIR" - echo "$PERF record -o $PERF_DATA_DIR/perf.data $PERF_OPTIONS -- $*" - "$PERF" record -o "$PERF_DATA_DIR/perf.data" $PERF_OPTIONS -- $* || true + echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@" + "$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then exit 1 @@ -209,8 +209,8 @@ subcommand() { find_perf check_buildid_cache_permissions - echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $*" - "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" $* + echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@" + "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@" } if [ "$1" = "fix_buildid_cache_permissions" ] ; then @@ -234,7 +234,7 @@ fi case "$PERF_SUB_COMMAND" in "record") while [ "$1" != "--" ] ; do - PERF_OPTIONS+="$1 " + PERF_OPTIONS+=("$1") shift || break done if [ "$1" != "--" ] ; then @@ -242,16 +242,16 @@ case "$PERF_SUB_COMMAND" in usage fi shift - record $* + record "$@" ;; "script") - subcommand $* + subcommand "$@" ;; "report") - subcommand $* + subcommand "$@" ;; "inject") - subcommand $* + subcommand "$@" ;; *) usage diff --git a/tools/perf/perf.c b/tools/perf/perf.c index b857fcbd00cf..07dbff5c0e60 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -231,7 +231,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) (*argc)--; } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) { perf_debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR)); - fprintf(stderr, "dir: %s\n", debugfs_mountpoint); + fprintf(stderr, "dir: %s\n", tracing_path); if (envchanged) *envchanged = 1; } else if (!strcmp(cmd, "--list-cmds")) { diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 4a5827fff799..cccb4cf575d3 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -51,11 +51,14 @@ struct record_opts { bool sample_address; bool sample_weight; bool sample_time; + bool sample_time_set; + bool callgraph_set; bool period; bool sample_intr_regs; bool running_time; bool full_auxtrace; bool auxtrace_snapshot_mode; + bool record_switch_events; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py index 2225162ee1fc..b9d508336ae6 100755 --- a/tools/perf/python/twatch.py +++ b/tools/perf/python/twatch.py @@ -18,10 +18,20 @@ import perf def main(): cpus = perf.cpu_map() threads = perf.thread_map() - evsel = perf.evsel(task = 1, comm = 1, mmap = 0, + evsel = perf.evsel(type = perf.TYPE_SOFTWARE, + config = perf.COUNT_SW_DUMMY, + task = 1, comm = 1, mmap = 0, freq = 0, wakeup_events = 1, watermark = 1, sample_id_all = 1, sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU) + + """What we want are just the PERF_RECORD_ lifetime events for threads, + using the default, PERF_TYPE_HARDWARE + PERF_COUNT_HW_CYCLES & freq=1 + (the default), makes perf reenable irq_vectors:local_timer_entry, when + disabling nohz, not good for some use cases where all we want is to get + threads comes and goes... So use (perf.TYPE_SOFTWARE, perf_COUNT_SW_DUMMY, + freq=0) instead.""" + evsel.open(cpus = cpus, threads = threads); evlist = perf.evlist(cpus, threads) evlist.add(evsel) diff --git a/tools/perf/scripts/python/bin/compaction-times-record b/tools/perf/scripts/python/bin/compaction-times-record new file mode 100644 index 000000000000..6edcd40e14e8 --- /dev/null +++ b/tools/perf/scripts/python/bin/compaction-times-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -e compaction:mm_compaction_begin -e compaction:mm_compaction_end -e compaction:mm_compaction_migratepages -e compaction:mm_compaction_isolate_migratepages -e compaction:mm_compaction_isolate_freepages $@ diff --git a/tools/perf/scripts/python/bin/compaction-times-report b/tools/perf/scripts/python/bin/compaction-times-report new file mode 100644 index 000000000000..3dc13897cfde --- /dev/null +++ b/tools/perf/scripts/python/bin/compaction-times-report @@ -0,0 +1,4 @@ +#!/bin/bash +#description: display time taken by mm compaction +#args: [-h] [-u] [-p|-pv] [-t | [-m] [-fs] [-ms]] [pid|pid-range|comm-regex] +perf script -s "$PERF_EXEC_PATH"/scripts/python/compaction-times.py $@ diff --git a/tools/perf/scripts/python/call-graph-from-postgresql.py b/tools/perf/scripts/python/call-graph-from-postgresql.py new file mode 100644 index 000000000000..e78fdc2a5a9d --- /dev/null +++ b/tools/perf/scripts/python/call-graph-from-postgresql.py @@ -0,0 +1,327 @@ +#!/usr/bin/python2 +# call-graph-from-postgresql.py: create call-graph from postgresql database +# Copyright (c) 2014, Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. + +# To use this script you will need to have exported data using the +# export-to-postgresql.py script. Refer to that script for details. +# +# Following on from the example in the export-to-postgresql.py script, a +# call-graph can be displayed for the pt_example database like this: +# +# python tools/perf/scripts/python/call-graph-from-postgresql.py pt_example +# +# Note this script supports connecting to remote databases by setting hostname, +# port, username, password, and dbname e.g. +# +# python tools/perf/scripts/python/call-graph-from-postgresql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example" +# +# The result is a GUI window with a tree representing a context-sensitive +# call-graph. Expanding a couple of levels of the tree and adjusting column +# widths to suit will display something like: +# +# Call Graph: pt_example +# Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%) +# v- ls +# v- 2638:2638 +# v- _start ld-2.19.so 1 10074071 100.0 211135 100.0 +# |- unknown unknown 1 13198 0.1 1 0.0 +# >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3 +# >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3 +# v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4 +# >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1 +# >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0 +# >- __libc_csu_init ls 1 10354 0.1 10 0.0 +# |- _setjmp libc-2.19.so 1 0 0.0 4 0.0 +# v- main ls 1 8182043 99.6 180254 99.9 +# +# Points to note: +# The top level is a command name (comm) +# The next level is a thread (pid:tid) +# Subsequent levels are functions +# 'Count' is the number of calls +# 'Time' is the elapsed time until the function returns +# Percentages are relative to the level above +# 'Branch Count' is the total number of branches for that function and all +# functions that it calls + +import sys +from PySide.QtCore import * +from PySide.QtGui import * +from PySide.QtSql import * +from decimal import * + +class TreeItem(): + + def __init__(self, db, row, parent_item): + self.db = db + self.row = row + self.parent_item = parent_item + self.query_done = False; + self.child_count = 0 + self.child_items = [] + self.data = ["", "", "", "", "", "", ""] + self.comm_id = 0 + self.thread_id = 0 + self.call_path_id = 1 + self.branch_count = 0 + self.time = 0 + if not parent_item: + self.setUpRoot() + + def setUpRoot(self): + self.query_done = True + query = QSqlQuery(self.db) + ret = query.exec_('SELECT id, comm FROM comms') + if not ret: + raise Exception("Query failed: " + query.lastError().text()) + while query.next(): + if not query.value(0): + continue + child_item = TreeItem(self.db, self.child_count, self) + self.child_items.append(child_item) + self.child_count += 1 + child_item.setUpLevel1(query.value(0), query.value(1)) + + def setUpLevel1(self, comm_id, comm): + self.query_done = True; + self.comm_id = comm_id + self.data[0] = comm + self.child_items = [] + self.child_count = 0 + query = QSqlQuery(self.db) + ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id)) + if not ret: + raise Exception("Query failed: " + query.lastError().text()) + while query.next(): + child_item = TreeItem(self.db, self.child_count, self) + self.child_items.append(child_item) + self.child_count += 1 + child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2)) + + def setUpLevel2(self, comm_id, thread_id, pid, tid): + self.comm_id = comm_id + self.thread_id = thread_id + self.data[0] = str(pid) + ":" + str(tid) + + def getChildItem(self, row): + return self.child_items[row] + + def getParentItem(self): + return self.parent_item + + def getRow(self): + return self.row + + def timePercent(self, b): + if not self.time: + return "0.0" + x = (b * Decimal(100)) / self.time + return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP)) + + def branchPercent(self, b): + if not self.branch_count: + return "0.0" + x = (b * Decimal(100)) / self.branch_count + return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP)) + + def addChild(self, call_path_id, name, dso, count, time, branch_count): + child_item = TreeItem(self.db, self.child_count, self) + child_item.comm_id = self.comm_id + child_item.thread_id = self.thread_id + child_item.call_path_id = call_path_id + child_item.branch_count = branch_count + child_item.time = time + child_item.data[0] = name + if dso == "[kernel.kallsyms]": + dso = "[kernel]" + child_item.data[1] = dso + child_item.data[2] = str(count) + child_item.data[3] = str(time) + child_item.data[4] = self.timePercent(time) + child_item.data[5] = str(branch_count) + child_item.data[6] = self.branchPercent(branch_count) + self.child_items.append(child_item) + self.child_count += 1 + + def selectCalls(self): + self.query_done = True; + query = QSqlQuery(self.db) + ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, ' + '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), ' + '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), ' + '( SELECT ip FROM call_paths where id = call_path_id ) ' + 'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) + + 'ORDER BY call_path_id') + if not ret: + raise Exception("Query failed: " + query.lastError().text()) + last_call_path_id = 0 + name = "" + dso = "" + count = 0 + branch_count = 0 + total_branch_count = 0 + time = 0 + total_time = 0 + while query.next(): + if query.value(1) == last_call_path_id: + count += 1 + branch_count += query.value(2) + time += query.value(4) - query.value(3) + else: + if count: + self.addChild(last_call_path_id, name, dso, count, time, branch_count) + last_call_path_id = query.value(1) + name = query.value(5) + dso = query.value(6) + count = 1 + total_branch_count += branch_count + total_time += time + branch_count = query.value(2) + time = query.value(4) - query.value(3) + if count: + self.addChild(last_call_path_id, name, dso, count, time, branch_count) + total_branch_count += branch_count + total_time += time + # Top level does not have time or branch count, so fix that here + if total_branch_count > self.branch_count: + self.branch_count = total_branch_count + if self.branch_count: + for child_item in self.child_items: + child_item.data[6] = self.branchPercent(child_item.branch_count) + if total_time > self.time: + self.time = total_time + if self.time: + for child_item in self.child_items: + child_item.data[4] = self.timePercent(child_item.time) + + def childCount(self): + if not self.query_done: + self.selectCalls() + return self.child_count + + def columnCount(self): + return 7 + + def columnHeader(self, column): + headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] + return headers[column] + + def getData(self, column): + return self.data[column] + +class TreeModel(QAbstractItemModel): + + def __init__(self, db, parent=None): + super(TreeModel, self).__init__(parent) + self.db = db + self.root = TreeItem(db, 0, None) + + def columnCount(self, parent): + return self.root.columnCount() + + def rowCount(self, parent): + if parent.isValid(): + parent_item = parent.internalPointer() + else: + parent_item = self.root + return parent_item.childCount() + + def headerData(self, section, orientation, role): + if role == Qt.TextAlignmentRole: + if section > 1: + return Qt.AlignRight + if role != Qt.DisplayRole: + return None + if orientation != Qt.Horizontal: + return None + return self.root.columnHeader(section) + + def parent(self, child): + child_item = child.internalPointer() + if child_item is self.root: + return QModelIndex() + parent_item = child_item.getParentItem() + return self.createIndex(parent_item.getRow(), 0, parent_item) + + def index(self, row, column, parent): + if parent.isValid(): + parent_item = parent.internalPointer() + else: + parent_item = self.root + child_item = parent_item.getChildItem(row) + return self.createIndex(row, column, child_item) + + def data(self, index, role): + if role == Qt.TextAlignmentRole: + if index.column() > 1: + return Qt.AlignRight + if role != Qt.DisplayRole: + return None + index_item = index.internalPointer() + return index_item.getData(index.column()) + +class MainWindow(QMainWindow): + + def __init__(self, db, dbname, parent=None): + super(MainWindow, self).__init__(parent) + + self.setObjectName("MainWindow") + self.setWindowTitle("Call Graph: " + dbname) + self.move(100, 100) + self.resize(800, 600) + style = self.style() + icon = style.standardIcon(QStyle.SP_MessageBoxInformation) + self.setWindowIcon(icon); + + self.model = TreeModel(db) + + self.view = QTreeView() + self.view.setModel(self.model) + + self.setCentralWidget(self.view) + +if __name__ == '__main__': + if (len(sys.argv) < 2): + print >> sys.stderr, "Usage is: call-graph-from-postgresql.py <database name>" + raise Exception("Too few arguments") + + dbname = sys.argv[1] + + db = QSqlDatabase.addDatabase('QPSQL') + + opts = dbname.split() + for opt in opts: + if '=' in opt: + opt = opt.split('=') + if opt[0] == 'hostname': + db.setHostName(opt[1]) + elif opt[0] == 'port': + db.setPort(int(opt[1])) + elif opt[0] == 'username': + db.setUserName(opt[1]) + elif opt[0] == 'password': + db.setPassword(opt[1]) + elif opt[0] == 'dbname': + dbname = opt[1] + else: + dbname = opt + + db.setDatabaseName(dbname) + if not db.open(): + raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text()) + + app = QApplication(sys.argv) + window = MainWindow(db, dbname) + window.show() + err = app.exec_() + db.close() + sys.exit(err) diff --git a/tools/perf/scripts/python/compaction-times.py b/tools/perf/scripts/python/compaction-times.py new file mode 100644 index 000000000000..239cb0568ec3 --- /dev/null +++ b/tools/perf/scripts/python/compaction-times.py @@ -0,0 +1,311 @@ +# report time spent in compaction +# Licensed under the terms of the GNU GPL License version 2 + +# testing: +# 'echo 1 > /proc/sys/vm/compact_memory' to force compaction of all zones + +import os +import sys +import re + +import signal +signal.signal(signal.SIGPIPE, signal.SIG_DFL) + +usage = "usage: perf script report compaction-times.py -- [-h] [-u] [-p|-pv] [-t | [-m] [-fs] [-ms]] [pid|pid-range|comm-regex]\n" + +class popt: + DISP_DFL = 0 + DISP_PROC = 1 + DISP_PROC_VERBOSE=2 + +class topt: + DISP_TIME = 0 + DISP_MIG = 1 + DISP_ISOLFREE = 2 + DISP_ISOLMIG = 4 + DISP_ALL = 7 + +class comm_filter: + def __init__(self, re): + self.re = re + + def filter(self, pid, comm): + m = self.re.search(comm) + return m == None or m.group() == "" + +class pid_filter: + def __init__(self, low, high): + self.low = (0 if low == "" else int(low)) + self.high = (0 if high == "" else int(high)) + + def filter(self, pid, comm): + return not (pid >= self.low and (self.high == 0 or pid <= self.high)) + +def set_type(t): + global opt_disp + opt_disp = (t if opt_disp == topt.DISP_ALL else opt_disp|t) + +def ns(sec, nsec): + return (sec * 1000000000) + nsec + +def time(ns): + return "%dns" % ns if opt_ns else "%dus" % (round(ns, -3) / 1000) + +class pair: + def __init__(self, aval, bval, alabel = None, blabel = None): + self.alabel = alabel + self.blabel = blabel + self.aval = aval + self.bval = bval + + def __add__(self, rhs): + self.aval += rhs.aval + self.bval += rhs.bval + return self + + def __str__(self): + return "%s=%d %s=%d" % (self.alabel, self.aval, self.blabel, self.bval) + +class cnode: + def __init__(self, ns): + self.ns = ns + self.migrated = pair(0, 0, "moved", "failed") + self.fscan = pair(0,0, "scanned", "isolated") + self.mscan = pair(0,0, "scanned", "isolated") + + def __add__(self, rhs): + self.ns += rhs.ns + self.migrated += rhs.migrated + self.fscan += rhs.fscan + self.mscan += rhs.mscan + return self + + def __str__(self): + prev = 0 + s = "%s " % time(self.ns) + if (opt_disp & topt.DISP_MIG): + s += "migration: %s" % self.migrated + prev = 1 + if (opt_disp & topt.DISP_ISOLFREE): + s += "%sfree_scanner: %s" % (" " if prev else "", self.fscan) + prev = 1 + if (opt_disp & topt.DISP_ISOLMIG): + s += "%smigration_scanner: %s" % (" " if prev else "", self.mscan) + return s + + def complete(self, secs, nsecs): + self.ns = ns(secs, nsecs) - self.ns + + def increment(self, migrated, fscan, mscan): + if (migrated != None): + self.migrated += migrated + if (fscan != None): + self.fscan += fscan + if (mscan != None): + self.mscan += mscan + + +class chead: + heads = {} + val = cnode(0); + fobj = None + + @classmethod + def add_filter(cls, filter): + cls.fobj = filter + + @classmethod + def create_pending(cls, pid, comm, start_secs, start_nsecs): + filtered = 0 + try: + head = cls.heads[pid] + filtered = head.is_filtered() + except KeyError: + if cls.fobj != None: + filtered = cls.fobj.filter(pid, comm) + head = cls.heads[pid] = chead(comm, pid, filtered) + + if not filtered: + head.mark_pending(start_secs, start_nsecs) + + @classmethod + def increment_pending(cls, pid, migrated, fscan, mscan): + head = cls.heads[pid] + if not head.is_filtered(): + if head.is_pending(): + head.do_increment(migrated, fscan, mscan) + else: + sys.stderr.write("missing start compaction event for pid %d\n" % pid) + + @classmethod + def complete_pending(cls, pid, secs, nsecs): + head = cls.heads[pid] + if not head.is_filtered(): + if head.is_pending(): + head.make_complete(secs, nsecs) + else: + sys.stderr.write("missing start compaction event for pid %d\n" % pid) + + @classmethod + def gen(cls): + if opt_proc != popt.DISP_DFL: + for i in cls.heads: + yield cls.heads[i] + + @classmethod + def str(cls): + return cls.val + + def __init__(self, comm, pid, filtered): + self.comm = comm + self.pid = pid + self.val = cnode(0) + self.pending = None + self.filtered = filtered + self.list = [] + + def __add__(self, rhs): + self.ns += rhs.ns + self.val += rhs.val + return self + + def mark_pending(self, secs, nsecs): + self.pending = cnode(ns(secs, nsecs)) + + def do_increment(self, migrated, fscan, mscan): + self.pending.increment(migrated, fscan, mscan) + + def make_complete(self, secs, nsecs): + self.pending.complete(secs, nsecs) + chead.val += self.pending + + if opt_proc != popt.DISP_DFL: + self.val += self.pending + + if opt_proc == popt.DISP_PROC_VERBOSE: + self.list.append(self.pending) + self.pending = None + + def enumerate(self): + if opt_proc == popt.DISP_PROC_VERBOSE and not self.is_filtered(): + for i, pelem in enumerate(self.list): + sys.stdout.write("%d[%s].%d: %s\n" % (self.pid, self.comm, i+1, pelem)) + + def is_pending(self): + return self.pending != None + + def is_filtered(self): + return self.filtered + + def display(self): + if not self.is_filtered(): + sys.stdout.write("%d[%s]: %s\n" % (self.pid, self.comm, self.val)) + + +def trace_end(): + sys.stdout.write("total: %s\n" % chead.str()) + for i in chead.gen(): + i.display(), + i.enumerate() + +def compaction__mm_compaction_migratepages(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, nr_migrated, nr_failed): + + chead.increment_pending(common_pid, + pair(nr_migrated, nr_failed), None, None) + +def compaction__mm_compaction_isolate_freepages(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken): + + chead.increment_pending(common_pid, + None, pair(nr_scanned, nr_taken), None) + +def compaction__mm_compaction_isolate_migratepages(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken): + + chead.increment_pending(common_pid, + None, None, pair(nr_scanned, nr_taken)) + +def compaction__mm_compaction_end(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, zone_start, migrate_start, free_start, zone_end, + sync, status): + + chead.complete_pending(common_pid, common_secs, common_nsecs) + +def compaction__mm_compaction_begin(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, zone_start, migrate_start, free_start, zone_end, + sync): + + chead.create_pending(common_pid, common_comm, common_secs, common_nsecs) + +def pr_help(): + global usage + + sys.stdout.write(usage) + sys.stdout.write("\n") + sys.stdout.write("-h display this help\n") + sys.stdout.write("-p display by process\n") + sys.stdout.write("-pv display by process (verbose)\n") + sys.stdout.write("-t display stall times only\n") + sys.stdout.write("-m display stats for migration\n") + sys.stdout.write("-fs display stats for free scanner\n") + sys.stdout.write("-ms display stats for migration scanner\n") + sys.stdout.write("-u display results in microseconds (default nanoseconds)\n") + + +comm_re = None +pid_re = None +pid_regex = "^(\d*)-(\d*)$|^(\d*)$" + +opt_proc = popt.DISP_DFL +opt_disp = topt.DISP_ALL + +opt_ns = True + +argc = len(sys.argv) - 1 +if argc >= 1: + pid_re = re.compile(pid_regex) + + for i, opt in enumerate(sys.argv[1:]): + if opt[0] == "-": + if opt == "-h": + pr_help() + exit(0); + elif opt == "-p": + opt_proc = popt.DISP_PROC + elif opt == "-pv": + opt_proc = popt.DISP_PROC_VERBOSE + elif opt == '-u': + opt_ns = False + elif opt == "-t": + set_type(topt.DISP_TIME) + elif opt == "-m": + set_type(topt.DISP_MIG) + elif opt == "-fs": + set_type(topt.DISP_ISOLFREE) + elif opt == "-ms": + set_type(topt.DISP_ISOLMIG) + else: + sys.exit(usage) + + elif i == argc - 1: + m = pid_re.search(opt) + if m != None and m.group() != "": + if m.group(3) != None: + f = pid_filter(m.group(3), m.group(3)) + else: + f = pid_filter(m.group(1), m.group(2)) + else: + try: + comm_re=re.compile(opt) + except: + sys.stderr.write("invalid regex '%s'" % opt) + sys.exit(usage) + f = comm_filter(comm_re) + + chead.add_filter(f) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 4cdafd880074..84a32037a80f 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -15,6 +15,53 @@ import sys import struct import datetime +# To use this script you will need to have installed package python-pyside which +# provides LGPL-licensed Python bindings for Qt. You will also need the package +# libqt4-sql-psql for Qt postgresql support. +# +# The script assumes postgresql is running on the local machine and that the +# user has postgresql permissions to create databases. Examples of installing +# postgresql and adding such a user are: +# +# fedora: +# +# $ sudo yum install postgresql postgresql-server python-pyside qt-postgresql +# $ sudo su - postgres -c initdb +# $ sudo service postgresql start +# $ sudo su - postgres +# $ createuser <your user id here> +# Shall the new role be a superuser? (y/n) y +# +# ubuntu: +# +# $ sudo apt-get install postgresql +# $ sudo su - postgres +# $ createuser <your user id here> +# Shall the new role be a superuser? (y/n) y +# +# An example of using this script with Intel PT: +# +# $ perf record -e intel_pt//u ls +# $ perf script -s ~/libexec/perf-core/scripts/python/export-to-postgresql.py pt_example branches calls +# 2015-05-29 12:49:23.464364 Creating database... +# 2015-05-29 12:49:26.281717 Writing to intermediate files... +# 2015-05-29 12:49:27.190383 Copying to database... +# 2015-05-29 12:49:28.140451 Removing intermediate files... +# 2015-05-29 12:49:28.147451 Adding primary keys +# 2015-05-29 12:49:28.655683 Adding foreign keys +# 2015-05-29 12:49:29.365350 Done +# +# To browse the database, psql can be used e.g. +# +# $ psql pt_example +# pt_example=# select * from samples_view where id < 100; +# pt_example=# \d+ +# pt_example=# \d+ samples_view +# pt_example=# \q +# +# An example of using the database is provided by the script +# call-graph-from-postgresql.py. Refer to that script for details. + from PySide.QtSql import * # Need to access PostgreSQL C library directly to use COPY FROM STDIN diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index d20d6e6ab65b..c1518bdd0f1b 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -32,6 +32,7 @@ perf-y += sample-parsing.o perf-y += parse-no-sample-id-all.o perf-y += kmod-path.o perf-y += thread-map.o +perf-y += llvm.o perf-$(CONFIG_X86) += perf-time-to-tsc.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index c1dde733c3a6..136cd934be66 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -175,6 +175,10 @@ static struct test { .func = test__thread_map, }, { + .desc = "Test LLVM searching and compiling", + .func = test__llvm, + }, + { .func = NULL, }, }; diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 7d82c8be5e36..7ed737019de7 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -279,6 +279,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = false; symbol_conf.cumulate_callchain = false; + perf_evsel__reset_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); @@ -425,6 +426,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = true; symbol_conf.cumulate_callchain = false; + perf_evsel__set_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); @@ -482,6 +484,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = false; symbol_conf.cumulate_callchain = true; + perf_evsel__reset_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); @@ -665,6 +668,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = true; symbol_conf.cumulate_callchain = true; + perf_evsel__set_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c new file mode 100644 index 000000000000..a337356fd979 --- /dev/null +++ b/tools/perf/tests/llvm.c @@ -0,0 +1,98 @@ +#include <stdio.h> +#include <bpf/libbpf.h> +#include <util/llvm-utils.h> +#include <util/cache.h> +#include "tests.h" +#include "debug.h" + +static int perf_config_cb(const char *var, const char *val, + void *arg __maybe_unused) +{ + return perf_default_config(var, val, arg); +} + +/* + * Randomly give it a "version" section since we don't really load it + * into kernel + */ +static const char test_bpf_prog[] = + "__attribute__((section(\"do_fork\"), used)) " + "int fork(void *ctx) {return 0;} " + "char _license[] __attribute__((section(\"license\"), used)) = \"GPL\";" + "int _version __attribute__((section(\"version\"), used)) = 0x40100;"; + +#ifdef HAVE_LIBBPF_SUPPORT +static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) +{ + struct bpf_object *obj; + + obj = bpf_object__open_buffer(obj_buf, obj_buf_sz); + if (!obj) + return -1; + bpf_object__close(obj); + return 0; +} +#else +static int test__bpf_parsing(void *obj_buf __maybe_unused, + size_t obj_buf_sz __maybe_unused) +{ + fprintf(stderr, " (skip bpf parsing)"); + return 0; +} +#endif + +int test__llvm(void) +{ + char *tmpl_new, *clang_opt_new; + void *obj_buf; + size_t obj_buf_sz; + int err, old_verbose; + + perf_config(perf_config_cb, NULL); + + /* + * Skip this test if user's .perfconfig doesn't set [llvm] section + * and clang is not found in $PATH, and this is not perf test -v + */ + if (verbose == 0 && !llvm_param.user_set_param && llvm__search_clang()) { + fprintf(stderr, " (no clang, try 'perf test -v LLVM')"); + return TEST_SKIP; + } + + old_verbose = verbose; + /* + * llvm is verbosity when error. Suppress all error output if + * not 'perf test -v'. + */ + if (verbose == 0) + verbose = -1; + + if (!llvm_param.clang_bpf_cmd_template) + return -1; + + if (!llvm_param.clang_opt) + llvm_param.clang_opt = strdup(""); + + err = asprintf(&tmpl_new, "echo '%s' | %s", test_bpf_prog, + llvm_param.clang_bpf_cmd_template); + if (err < 0) + return -1; + err = asprintf(&clang_opt_new, "-xc %s", llvm_param.clang_opt); + if (err < 0) + return -1; + + llvm_param.clang_bpf_cmd_template = tmpl_new; + llvm_param.clang_opt = clang_opt_new; + err = llvm__compile_bpf("-", &obj_buf, &obj_buf_sz); + + verbose = old_verbose; + if (err) { + if (!verbose) + fprintf(stderr, " (use -v to see error message)"); + return -1; + } + + err = test__bpf_parsing(obj_buf, obj_buf_sz); + free(obj_buf); + return err; +} diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 729112f4cfaa..ba31c4bd441d 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -58,7 +58,8 @@ make_install_man := install-man make_install_html := install-html make_install_info := install-info make_install_pdf := install-pdf -make_install_prefix := install prefix=/tmp/krava +make_install_prefix := install prefix=/tmp/krava +make_install_prefix_slash := install prefix=/tmp/krava/ make_static := LDFLAGS=-static # all the NO_* variable combined @@ -101,6 +102,7 @@ run += make_util_pmu_bison_o run += make_install run += make_install_bin run += make_install_prefix +run += make_install_prefix_slash # FIXME 'install-*' commented out till they're fixed # run += make_install_doc # run += make_install_man @@ -175,11 +177,14 @@ test_make_install_O := $(call test_dest_files,$(installed_files_all)) test_make_install_bin := $(call test_dest_files,$(installed_files_bin)) test_make_install_bin_O := $(call test_dest_files,$(installed_files_bin)) -# We prefix all installed files for make_install_prefix +# We prefix all installed files for make_install_prefix(_slash) # with '/tmp/krava' to match installed/prefix-ed files. installed_files_all_prefix := $(addprefix /tmp/krava/,$(installed_files_all)) -test_make_install_prefix := $(call test_dest_files,$(installed_files_all_prefix)) -test_make_install_prefix_O := $(call test_dest_files,$(installed_files_all_prefix)) +test_make_install_prefix := $(call test_dest_files,$(installed_files_all_prefix)) +test_make_install_prefix_O := $(call test_dest_files,$(installed_files_all_prefix)) + +test_make_install_prefix_slash := $(test_make_install_prefix) +test_make_install_prefix_slash_O := $(test_make_install_prefix_O) # FIXME nothing gets installed test_make_install_man := test -f $$TMP_DEST/share/man/man1/perf.1 diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index d76963f7ad3d..9b6b2b6324a1 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -82,8 +82,12 @@ static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + /* + * The period value gets configured within perf_evlist__config, + * while this test executes only parse events method. + */ TEST_ASSERT_VAL("wrong period", - 100000 == evsel->attr.sample_period); + 0 == evsel->attr.sample_period); TEST_ASSERT_VAL("wrong config1", 0 == evsel->attr.config1); TEST_ASSERT_VAL("wrong config2", @@ -406,7 +410,11 @@ static int test__checkevent_pmu(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong config", 10 == evsel->attr.config); TEST_ASSERT_VAL("wrong config1", 1 == evsel->attr.config1); TEST_ASSERT_VAL("wrong config2", 3 == evsel->attr.config2); - TEST_ASSERT_VAL("wrong period", 1000 == evsel->attr.sample_period); + /* + * The period value gets configured within perf_evlist__config, + * while this test executes only parse events method. + */ + TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); return 0; } @@ -471,6 +479,39 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist) return 0; } +static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + /* cpu/config=1,call-graph=fp,time,period=100000/ */ + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); + /* + * The period, time and callgraph value gets configured + * within perf_evlist__config, + * while this test executes only parse events method. + */ + TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); + TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type)); + TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type)); + + /* cpu/config=2,call-graph=no,time=0,period=2000/ */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config); + /* + * The period, time and callgraph value gets configured + * within perf_evlist__config, + * while this test executes only parse events method. + */ + TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); + TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type)); + TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type)); + + return 0; +} + static int test__checkevent_pmu_events(struct perf_evlist *evlist) { struct perf_evsel *evsel = perf_evlist__first(evlist); @@ -1547,6 +1588,11 @@ static struct evlist_test test__events_pmu[] = { .check = test__checkevent_pmu_name, .id = 1, }, + { + .name = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/", + .check = test__checkevent_pmu_partial_time_callgraph, + .id = 2, + }, }; struct terms_test { diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index ebb47d96bc0b..bf113a247987 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -62,6 +62,7 @@ int test__fdarray__filter(void); int test__fdarray__add(void); int test__kmod_path__parse(void); int test__thread_map(void); +int test__llvm(void); #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index 5acf000939ea..138a0e3431fa 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -20,6 +20,8 @@ int test__thread_map(void) TEST_ASSERT_VAL("wrong comm", thread_map__comm(map, 0) && !strcmp(thread_map__comm(map, 0), "perf")); + TEST_ASSERT_VAL("wrong refcnt", + atomic_read(&map->refcnt) == 1); thread_map__put(map); /* test dummy pid */ @@ -33,6 +35,8 @@ int test__thread_map(void) TEST_ASSERT_VAL("wrong comm", thread_map__comm(map, 0) && !strcmp(thread_map__comm(map, 0), "dummy")); + TEST_ASSERT_VAL("wrong refcnt", + atomic_read(&map->refcnt) == 1); thread_map__put(map); return 0; } diff --git a/tools/perf/trace/strace/groups/file b/tools/perf/trace/strace/groups/file new file mode 100644 index 000000000000..62378a899d79 --- /dev/null +++ b/tools/perf/trace/strace/groups/file @@ -0,0 +1,18 @@ +access +chmod +creat +execve +faccessat +getcwd +lstat +mkdir +open +openat +quotactl +readlink +rename +rmdir +stat +statfs +symlink +unlink diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 6680fa5cb9dd..c6c7e5189214 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -46,6 +46,21 @@ void ui_browser__gotorc(struct ui_browser *browser, int y, int x) SLsmg_gotorc(browser->y + y, browser->x + x); } +void ui_browser__write_nstring(struct ui_browser *browser __maybe_unused, const char *msg, + unsigned int width) +{ + slsmg_write_nstring(msg, width); +} + +void ui_browser__printf(struct ui_browser *browser __maybe_unused, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + slsmg_vprintf(fmt, args); + va_end(args); +} + static struct list_head * ui_browser__list_head_filter_entries(struct ui_browser *browser, struct list_head *pos) @@ -234,7 +249,7 @@ void __ui_browser__show_title(struct ui_browser *browser, const char *title) { SLsmg_gotorc(0, 0); ui_browser__set_color(browser, HE_COLORSET_ROOT); - slsmg_write_nstring(title, browser->width + 1); + ui_browser__write_nstring(browser, title, browser->width + 1); } void ui_browser__show_title(struct ui_browser *browser, const char *title) diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 92ae72113965..f3cef564de02 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -37,6 +37,9 @@ void ui_browser__refresh_dimensions(struct ui_browser *browser); void ui_browser__reset_index(struct ui_browser *browser); void ui_browser__gotorc(struct ui_browser *browser, int y, int x); +void ui_browser__write_nstring(struct ui_browser *browser, const char *msg, + unsigned int width); +void ui_browser__printf(struct ui_browser *browser, const char *fmt, ...); void ui_browser__write_graph(struct ui_browser *browser, int graph); void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, u64 start, u64 end); @@ -58,8 +61,8 @@ int ui_browser__help_window(struct ui_browser *browser, const char *text); bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text); int ui_browser__input_window(const char *title, const char *text, char *input, const char *exit_msg, int delay_sec); -struct perf_session_env; -int tui__header_window(struct perf_session_env *env); +struct perf_env; +int tui__header_window(struct perf_env *env); void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence); unsigned int ui_browser__argv_refresh(struct ui_browser *browser); diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5995a8bd7c69..29739b347599 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1,7 +1,6 @@ #include "../../util/util.h" #include "../browser.h" #include "../helpline.h" -#include "../libslang.h" #include "../ui.h" #include "../util.h" #include "../../util/annotate.h" @@ -16,6 +15,9 @@ struct disasm_line_samples { u64 nr; }; +#define IPC_WIDTH 6 +#define CYCLES_WIDTH 6 + struct browser_disasm_line { struct rb_node rb_node; u32 idx; @@ -53,6 +55,7 @@ struct annotate_browser { int max_jump_sources; int nr_jumps; bool searching_backwards; + bool have_cycles; u8 addr_width; u8 jumps_width; u8 target_width; @@ -96,6 +99,15 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br return ui_browser__set_color(&browser->b, color); } +static int annotate_browser__pcnt_width(struct annotate_browser *ab) +{ + int w = 7 * ab->nr_events; + + if (ab->have_cycles) + w += IPC_WIDTH + CYCLES_WIDTH; + return w; +} + static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); @@ -106,7 +118,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int (!current_entry || (browser->use_navkeypressed && !browser->navkeypressed))); int width = browser->width, printed; - int i, pcnt_width = 7 * ab->nr_events; + int i, pcnt_width = annotate_browser__pcnt_width(ab); double percent_max = 0.0; char bf[256]; @@ -116,19 +128,36 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } if (dl->offset != -1 && percent_max != 0.0) { - for (i = 0; i < ab->nr_events; i++) { - ui_browser__set_percent_color(browser, - bdl->samples[i].percent, - current_entry); - if (annotate_browser__opts.show_total_period) - slsmg_printf("%6" PRIu64 " ", - bdl->samples[i].nr); - else - slsmg_printf("%6.2f ", bdl->samples[i].percent); + if (percent_max != 0.0) { + for (i = 0; i < ab->nr_events; i++) { + ui_browser__set_percent_color(browser, + bdl->samples[i].percent, + current_entry); + if (annotate_browser__opts.show_total_period) { + ui_browser__printf(browser, "%6" PRIu64 " ", + bdl->samples[i].nr); + } else { + ui_browser__printf(browser, "%6.2f ", + bdl->samples[i].percent); + } + } + } else { + ui_browser__write_nstring(browser, " ", 7 * ab->nr_events); } } else { ui_browser__set_percent_color(browser, 0, current_entry); - slsmg_write_nstring(" ", pcnt_width); + ui_browser__write_nstring(browser, " ", 7 * ab->nr_events); + } + if (ab->have_cycles) { + if (dl->ipc) + ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->ipc); + else + ui_browser__write_nstring(browser, " ", IPC_WIDTH); + if (dl->cycles) + ui_browser__printf(browser, "%*" PRIu64 " ", + CYCLES_WIDTH - 1, dl->cycles); + else + ui_browser__write_nstring(browser, " ", CYCLES_WIDTH); } SLsmg_write_char(' '); @@ -138,7 +167,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int width += 1; if (!*dl->line) - slsmg_write_nstring(" ", width - pcnt_width); + ui_browser__write_nstring(browser, " ", width - pcnt_width); else if (dl->offset == -1) { if (dl->line_nr && annotate_browser__opts.show_linenr) printed = scnprintf(bf, sizeof(bf), "%-*d ", @@ -146,8 +175,8 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int else printed = scnprintf(bf, sizeof(bf), "%*s ", ab->addr_width, " "); - slsmg_write_nstring(bf, printed); - slsmg_write_nstring(dl->line, width - printed - pcnt_width + 1); + ui_browser__write_nstring(browser, bf, printed); + ui_browser__write_nstring(browser, dl->line, width - printed - pcnt_width + 1); } else { u64 addr = dl->offset; int color = -1; @@ -166,7 +195,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int bdl->jump_sources); prev = annotate_browser__set_jumps_percent_color(ab, bdl->jump_sources, current_entry); - slsmg_write_nstring(bf, printed); + ui_browser__write_nstring(browser, bf, printed); ui_browser__set_color(browser, prev); } @@ -180,7 +209,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (change_color) color = ui_browser__set_color(browser, HE_COLORSET_ADDR); - slsmg_write_nstring(bf, printed); + ui_browser__write_nstring(browser, bf, printed); if (change_color) ui_browser__set_color(browser, color); if (dl->ins && dl->ins->ops->scnprintf) { @@ -194,11 +223,11 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ui_browser__write_graph(browser, SLSMG_RARROW_CHAR); SLsmg_write_char(' '); } else { - slsmg_write_nstring(" ", 2); + ui_browser__write_nstring(browser, " ", 2); } } else { if (strcmp(dl->name, "retq")) { - slsmg_write_nstring(" ", 2); + ui_browser__write_nstring(browser, " ", 2); } else { ui_browser__write_graph(browser, SLSMG_LARROW_CHAR); SLsmg_write_char(' '); @@ -206,7 +235,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } disasm_line__scnprintf(dl, bf, sizeof(bf), !annotate_browser__opts.use_offset); - slsmg_write_nstring(bf, width - pcnt_width - 3 - printed); + ui_browser__write_nstring(browser, bf, width - pcnt_width - 3 - printed); } if (current_entry) @@ -231,7 +260,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) unsigned int from, to; struct map_symbol *ms = ab->b.priv; struct symbol *sym = ms->sym; - u8 pcnt_width = 7; + u8 pcnt_width = annotate_browser__pcnt_width(ab); /* PLT symbols contain external offsets */ if (strstr(sym->name, "@plt")) @@ -255,8 +284,6 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) to = (u64)btarget->idx; } - pcnt_width *= ab->nr_events; - ui_browser__set_color(browser, HE_COLORSET_CODE); __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, from, to); @@ -266,9 +293,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); int ret = ui_browser__list_head_refresh(browser); - int pcnt_width; - - pcnt_width = 7 * ab->nr_events; + int pcnt_width = annotate_browser__pcnt_width(ab); if (annotate_browser__opts.jump_arrows) annotate_browser__draw_current_jump(browser); @@ -390,7 +415,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, max_percent = bpos->samples[i].percent; } - if (max_percent < 0.01) { + if (max_percent < 0.01 && pos->ipc == 0) { RB_CLEAR_NODE(&bpos->rb_node); continue; } @@ -869,6 +894,75 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, return map_symbol__tui_annotate(&he->ms, evsel, hbt); } + +static unsigned count_insn(struct annotate_browser *browser, u64 start, u64 end) +{ + unsigned n_insn = 0; + u64 offset; + + for (offset = start; offset <= end; offset++) { + if (browser->offsets[offset]) + n_insn++; + } + return n_insn; +} + +static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end, + struct cyc_hist *ch) +{ + unsigned n_insn; + u64 offset; + + n_insn = count_insn(browser, start, end); + if (n_insn && ch->num && ch->cycles) { + float ipc = n_insn / ((double)ch->cycles / (double)ch->num); + + /* Hide data when there are too many overlaps. */ + if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2) + return; + + for (offset = start; offset <= end; offset++) { + struct disasm_line *dl = browser->offsets[offset]; + + if (dl) + dl->ipc = ipc; + } + } +} + +/* + * This should probably be in util/annotate.c to share with the tty + * annotate, but right now we need the per byte offsets arrays, + * which are only here. + */ +static void annotate__compute_ipc(struct annotate_browser *browser, size_t size, + struct symbol *sym) +{ + u64 offset; + struct annotation *notes = symbol__annotation(sym); + + if (!notes->src || !notes->src->cycles_hist) + return; + + pthread_mutex_lock(¬es->lock); + for (offset = 0; offset < size; ++offset) { + struct cyc_hist *ch; + + ch = ¬es->src->cycles_hist[offset]; + if (ch && ch->cycles) { + struct disasm_line *dl; + + if (ch->have_start) + count_and_fill(browser, ch->start, offset, ch); + dl = browser->offsets[offset]; + if (dl && ch->num_aggr) + dl->cycles = ch->cycles_aggr / ch->num_aggr; + browser->have_cycles = true; + } + } + pthread_mutex_unlock(¬es->lock); +} + static void annotate_browser__mark_jump_targets(struct annotate_browser *browser, size_t size) { @@ -991,6 +1085,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, } annotate_browser__mark_jump_targets(&browser, size); + annotate__compute_ipc(&browser, size, sym); browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size); browser.max_addr_width = hex_width(sym->end); diff --git a/tools/perf/ui/browsers/header.c b/tools/perf/ui/browsers/header.c index e8278c558d4a..edbeaaf31ace 100644 --- a/tools/perf/ui/browsers/header.c +++ b/tools/perf/ui/browsers/header.c @@ -25,7 +25,7 @@ static void ui_browser__argv_write(struct ui_browser *browser, ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : HE_COLORSET_NORMAL); - slsmg_write_nstring(str, browser->width); + ui_browser__write_nstring(browser, str, browser->width); } static int list_menu__run(struct ui_browser *menu) @@ -91,7 +91,7 @@ static int ui__list_menu(int argc, char * const argv[]) return list_menu__run(&menu); } -int tui__header_window(struct perf_session_env *env) +int tui__header_window(struct perf_env *env) { int i, argc = 0; char **argv; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index fa67613976a8..cf86f2d3a5e7 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1,5 +1,4 @@ #include <stdio.h> -#include "../libslang.h" #include <stdlib.h> #include <string.h> #include <linux/rbtree.h> @@ -27,7 +26,7 @@ struct hist_browser { struct map_symbol *selection; struct hist_browser_timer *hbt; struct pstack *pstack; - struct perf_session_env *env; + struct perf_env *env; int print_seq; bool show_dso; bool show_headers; @@ -540,10 +539,10 @@ static void hist_browser__show_callchain_entry(struct hist_browser *browser, ui_browser__set_color(&browser->b, color); hist_browser__gotorc(browser, row, 0); - slsmg_write_nstring(" ", offset); - slsmg_printf("%c", folded_sign); + ui_browser__write_nstring(&browser->b, " ", offset); + ui_browser__printf(&browser->b, "%c", folded_sign); ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' '); - slsmg_write_nstring(str, width); + ui_browser__write_nstring(&browser->b, str, width); } static void hist_browser__fprintf_callchain_entry(struct hist_browser *b __maybe_unused, @@ -680,7 +679,7 @@ static int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...) ui_browser__set_percent_color(arg->b, percent, arg->current_entry); ret = scnprintf(hpp->buf, hpp->size, fmt, len, percent); - slsmg_printf("%s", hpp->buf); + ui_browser__printf(arg->b, "%s", hpp->buf); advance_hpp(hpp, ret); return ret; @@ -713,10 +712,11 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt, \ struct hist_entry *he) \ { \ if (!symbol_conf.cumulate_callchain) { \ + struct hpp_arg *arg = hpp->ptr; \ int len = fmt->user_len ?: fmt->len; \ int ret = scnprintf(hpp->buf, hpp->size, \ "%*s", len, "N/A"); \ - slsmg_printf("%s", hpp->buf); \ + ui_browser__printf(arg->b, "%s", hpp->buf); \ \ return ret; \ } \ @@ -801,12 +801,12 @@ static int hist_browser__show_entry(struct hist_browser *browser, if (first) { if (symbol_conf.use_callchain) { - slsmg_printf("%c ", folded_sign); + ui_browser__printf(&browser->b, "%c ", folded_sign); width -= 2; } first = false; } else { - slsmg_printf(" "); + ui_browser__printf(&browser->b, " "); width -= 2; } @@ -814,7 +814,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, width -= fmt->color(fmt, &hpp, entry); } else { width -= fmt->entry(fmt, &hpp, entry); - slsmg_printf("%s", s); + ui_browser__printf(&browser->b, "%s", s); } } @@ -822,7 +822,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, if (!browser->b.navkeypressed) width += 1; - slsmg_write_nstring("", width); + ui_browser__write_nstring(&browser->b, "", width); ++row; ++printed; @@ -899,7 +899,7 @@ static void hist_browser__show_headers(struct hist_browser *browser) hists__scnprintf_headers(headers, sizeof(headers), browser->hists); ui_browser__gotorc(&browser->b, 0, 0); ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); - slsmg_write_nstring(headers, browser->b.width + 1); + ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1); } static void ui_browser__hists_init_top(struct ui_browser *browser) @@ -1214,7 +1214,7 @@ static int hist_browser__dump(struct hist_browser *browser) static struct hist_browser *hist_browser__new(struct hists *hists, struct hist_browser_timer *hbt, - struct perf_session_env *env) + struct perf_env *env) { struct hist_browser *browser = zalloc(sizeof(*browser)); @@ -1267,6 +1267,8 @@ static int hists__browser_title(struct hists *hists, const char *ev_name = perf_evsel__name(evsel); char buf[512]; size_t buflen = sizeof(buf); + char ref[30] = " show reference callgraph, "; + bool enable_ref = false; if (symbol_conf.filter_relative) { nr_samples = hists->stats.nr_non_filtered_samples; @@ -1292,10 +1294,13 @@ static int hists__browser_title(struct hists *hists, } } + if (symbol_conf.show_ref_callgraph && + strstr(ev_name, "call-graph=no")) + enable_ref = true; nr_samples = convert_unit(nr_samples, &unit); printed = scnprintf(bf, size, - "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64, - nr_samples, unit, ev_name, nr_events); + "Samples: %lu%c of event '%s',%sEvent count (approx.): %" PRIu64, + nr_samples, unit, ev_name, enable_ref ? ref : " ", nr_events); if (hists->uid_filter_str) @@ -1690,7 +1695,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, bool left_exits, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_session_env *env) + struct perf_env *env) { struct hists *hists = evsel__hists(evsel); struct hist_browser *browser = hist_browser__new(hists, hbt, env); @@ -1868,6 +1873,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, case K_RIGHT: /* menu */ break; + case K_ESC: case K_LEFT: { const void *top; @@ -1877,6 +1883,12 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, */ if (left_exits) goto out_free_stack; + + if (key == K_ESC && + ui_browser__dialog_yesno(&browser->b, + "Do you really want to exit?")) + goto out_free_stack; + continue; } top = pstack__peek(browser->pstack); @@ -1892,12 +1904,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, do_zoom_thread(browser, actions); continue; } - case K_ESC: - if (!left_exits && - !ui_browser__dialog_yesno(&browser->b, - "Do you really want to exit?")) - continue; - /* Fall thru */ case 'q': case CTRL('c'): goto out_free_stack; @@ -2010,7 +2016,7 @@ struct perf_evsel_menu { struct perf_evsel *selection; bool lost_events, lost_events_warned; float min_pcnt; - struct perf_session_env *env; + struct perf_env *env; }; static void perf_evsel_menu__write(struct ui_browser *browser, @@ -2044,7 +2050,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser, nr_events = convert_unit(nr_events, &unit); printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events, unit, unit == ' ' ? "" : " ", ev_name); - slsmg_printf("%s", bf); + ui_browser__printf(browser, "%s", bf); nr_events = hists->stats.nr_events[PERF_RECORD_LOST]; if (nr_events != 0) { @@ -2057,7 +2063,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser, warn = bf; } - slsmg_write_nstring(warn, browser->width - printed); + ui_browser__write_nstring(browser, warn, browser->width - printed); if (current_entry) menu->selection = evsel; @@ -2120,15 +2126,11 @@ browse_hists: else pos = perf_evsel__prev(pos); goto browse_hists; - case K_ESC: - if (!ui_browser__dialog_yesno(&menu->b, - "Do you really want to exit?")) - continue; - /* Fall thru */ case K_SWITCH_INPUT_DATA: case 'q': case CTRL('c'): goto out; + case K_ESC: default: continue; } @@ -2167,7 +2169,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, int nr_entries, const char *help, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_session_env *env) + struct perf_env *env) { struct perf_evsel *pos; struct perf_evsel_menu menu = { @@ -2200,7 +2202,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_session_env *env) + struct perf_env *env) { int nr_entries = evlist->nr_entries; diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c index b11639f33682..8c154c7d4669 100644 --- a/tools/perf/ui/browsers/map.c +++ b/tools/perf/ui/browsers/map.c @@ -1,4 +1,3 @@ -#include "../libslang.h" #include <elf.h> #include <inttypes.h> #include <sys/ttydefaults.h> @@ -26,13 +25,13 @@ static void map_browser__write(struct ui_browser *browser, void *nd, int row) int width; ui_browser__set_percent_color(browser, 0, current_entry); - slsmg_printf("%*" PRIx64 " %*" PRIx64 " %c ", - mb->addrlen, sym->start, mb->addrlen, sym->end, - sym->binding == STB_GLOBAL ? 'g' : - sym->binding == STB_LOCAL ? 'l' : 'w'); + ui_browser__printf(browser, "%*" PRIx64 " %*" PRIx64 " %c ", + mb->addrlen, sym->start, mb->addrlen, sym->end, + sym->binding == STB_GLOBAL ? 'g' : + sym->binding == STB_LOCAL ? 'l' : 'w'); width = browser->width - ((mb->addrlen * 2) + 4); if (width > 0) - slsmg_write_nstring(sym->name, width); + ui_browser__write_nstring(browser, sym->name, width); } /* FIXME uber-kludgy, see comment on cmd_report... */ diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 402d2bd30b09..e13b48d705ef 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -81,7 +81,7 @@ static void script_browser__write(struct ui_browser *browser, ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : HE_COLORSET_NORMAL); - slsmg_write_nstring(sline->line, browser->width); + ui_browser__write_nstring(browser, sline->line, browser->width); } static int script_browser__run(struct perf_script_browser *browser) diff --git a/tools/perf/ui/libslang.h b/tools/perf/ui/libslang.h index 4d54b6450f5b..db816695ad97 100644 --- a/tools/perf/ui/libslang.h +++ b/tools/perf/ui/libslang.h @@ -14,12 +14,15 @@ #if SLANG_VERSION < 20104 #define slsmg_printf(msg, args...) \ SLsmg_printf((char *)(msg), ##args) +#define slsmg_vprintf(msg, vargs) \ + SLsmg_vprintf((char *)(msg), vargs) #define slsmg_write_nstring(msg, len) \ SLsmg_write_nstring((char *)(msg), len) #define sltt_set_color(obj, name, fg, bg) \ SLtt_set_color(obj,(char *)(name), (char *)(fg), (char *)(bg)) #else #define slsmg_printf SLsmg_printf +#define slsmg_vprintf SLsmg_vprintf #define slsmg_write_nstring SLsmg_write_nstring #define sltt_set_color SLtt_set_color #endif diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c index c61d14b101e0..c4b99008e2c9 100644 --- a/tools/perf/ui/tui/progress.c +++ b/tools/perf/ui/tui/progress.c @@ -33,9 +33,26 @@ static void tui_progress__update(struct ui_progress *p) pthread_mutex_unlock(&ui__lock); } +static void tui_progress__finish(void) +{ + int y; + + if (use_browser <= 0) + return; + + ui__refresh_dimensions(false); + pthread_mutex_lock(&ui__lock); + y = SLtt_Screen_Rows / 2 - 2; + SLsmg_set_color(0); + SLsmg_fill_region(y, 0, 3, SLtt_Screen_Cols, ' '); + SLsmg_refresh(); + pthread_mutex_unlock(&ui__lock); +} + static struct ui_progress_ops tui_progress__ops = { - .update = tui_progress__update, + .update = tui_progress__update, + .finish = tui_progress__finish, }; void tui_progress__init(void) diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c index bf890f72fe80..d96ad7c8325d 100644 --- a/tools/perf/ui/tui/util.c +++ b/tools/perf/ui/tui/util.c @@ -21,7 +21,7 @@ static void ui_browser__argv_write(struct ui_browser *browser, ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : HE_COLORSET_NORMAL); - slsmg_write_nstring(*arg, browser->width); + ui_browser__write_nstring(browser, *arg, browser->width); } static int popup_menu__run(struct ui_browser *menu) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index d2d318c59b37..e912856cc4e5 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -14,6 +14,7 @@ libperf-y += find_next_bit.o libperf-y += help.o libperf-y += kallsyms.o libperf-y += levenshtein.o +libperf-y += llvm-utils.o libperf-y += parse-options.o libperf-y += parse-events.o libperf-y += path.o @@ -67,6 +68,7 @@ libperf-y += target.o libperf-y += rblist.o libperf-y += intlist.o libperf-y += vdso.o +libperf-y += counts.o libperf-y += stat.o libperf-y += stat-shadow.o libperf-y += record.o @@ -76,9 +78,13 @@ libperf-$(CONFIG_X86) += tsc.o libperf-y += cloexec.o libperf-y += thread-stack.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o +libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ +libperf-$(CONFIG_AUXTRACE) += intel-pt.o +libperf-$(CONFIG_AUXTRACE) += intel-bts.o libperf-y += parse-branch-options.o libperf-$(CONFIG_LIBELF) += symbol-elf.o +libperf-$(CONFIG_LIBELF) += probe-file.o libperf-$(CONFIG_LIBELF) += probe-event.o ifndef CONFIG_LIBELF diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 03b7bc70eb66..d1eece70b84d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -473,17 +473,73 @@ int symbol__alloc_hist(struct symbol *sym) return 0; } +/* The cycles histogram is lazily allocated. */ +static int symbol__alloc_hist_cycles(struct symbol *sym) +{ + struct annotation *notes = symbol__annotation(sym); + const size_t size = symbol__size(sym); + + notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist)); + if (notes->src->cycles_hist == NULL) + return -1; + return 0; +} + void symbol__annotate_zero_histograms(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); pthread_mutex_lock(¬es->lock); - if (notes->src != NULL) + if (notes->src != NULL) { memset(notes->src->histograms, 0, notes->src->nr_histograms * notes->src->sizeof_sym_hist); + if (notes->src->cycles_hist) + memset(notes->src->cycles_hist, 0, + symbol__size(sym) * sizeof(struct cyc_hist)); + } pthread_mutex_unlock(¬es->lock); } +static int __symbol__account_cycles(struct annotation *notes, + u64 start, + unsigned offset, unsigned cycles, + unsigned have_start) +{ + struct cyc_hist *ch; + + ch = notes->src->cycles_hist; + /* + * For now we can only account one basic block per + * final jump. But multiple could be overlapping. + * Always account the longest one. So when + * a shorter one has been already seen throw it away. + * + * We separately always account the full cycles. + */ + ch[offset].num_aggr++; + ch[offset].cycles_aggr += cycles; + + if (!have_start && ch[offset].have_start) + return 0; + if (ch[offset].num) { + if (have_start && (!ch[offset].have_start || + ch[offset].start > start)) { + ch[offset].have_start = 0; + ch[offset].cycles = 0; + ch[offset].num = 0; + if (ch[offset].reset < 0xffff) + ch[offset].reset++; + } else if (have_start && + ch[offset].start < start) + return 0; + } + ch[offset].have_start = have_start; + ch[offset].start = start; + ch[offset].cycles += cycles; + ch[offset].num++; + return 0; +} + static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, struct annotation *notes, int evidx, u64 addr) { @@ -506,7 +562,7 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, return 0; } -static struct annotation *symbol__get_annotation(struct symbol *sym) +static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles) { struct annotation *notes = symbol__annotation(sym); @@ -514,6 +570,10 @@ static struct annotation *symbol__get_annotation(struct symbol *sym) if (symbol__alloc_hist(sym) < 0) return NULL; } + if (!notes->src->cycles_hist && cycles) { + if (symbol__alloc_hist_cycles(sym) < 0) + return NULL; + } return notes; } @@ -524,12 +584,73 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, if (sym == NULL) return 0; - notes = symbol__get_annotation(sym); + notes = symbol__get_annotation(sym, false); if (notes == NULL) return -ENOMEM; return __symbol__inc_addr_samples(sym, map, notes, evidx, addr); } +static int symbol__account_cycles(u64 addr, u64 start, + struct symbol *sym, unsigned cycles) +{ + struct annotation *notes; + unsigned offset; + + if (sym == NULL) + return 0; + notes = symbol__get_annotation(sym, true); + if (notes == NULL) + return -ENOMEM; + if (addr < sym->start || addr >= sym->end) + return -ERANGE; + + if (start) { + if (start < sym->start || start >= sym->end) + return -ERANGE; + if (start >= addr) + start = 0; + } + offset = addr - sym->start; + return __symbol__account_cycles(notes, + start ? start - sym->start : 0, + offset, cycles, + !!start); +} + +int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, + struct addr_map_symbol *start, + unsigned cycles) +{ + u64 saddr = 0; + int err; + + if (!cycles) + return 0; + + /* + * Only set start when IPC can be computed. We can only + * compute it when the basic block is completely in a single + * function. + * Special case the case when the jump is elsewhere, but + * it starts on the function start. + */ + if (start && + (start->sym == ams->sym || + (ams->sym && + start->addr == ams->sym->start + ams->map->start))) + saddr = start->al_addr; + if (saddr == 0) + pr_debug2("BB with bad start: addr %"PRIx64" start %"PRIx64" sym %"PRIx64" saddr %"PRIx64"\n", + ams->addr, + start ? start->addr : 0, + ams->sym ? ams->sym->start + ams->map->start : 0, + saddr); + err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles); + if (err) + pr_debug2("account_cycles failed %d\n", err); + return err; +} + int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx) { return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr); @@ -1005,6 +1126,7 @@ fallback: dso->annotate_warned = 1; pr_err("Can't annotate %s:\n\n" "No vmlinux file%s\nwas found in the path.\n\n" + "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n" "Please use:\n\n" " perf buildid-cache -vu vmlinux\n\n" "or:\n\n" diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 7e78e6c27078..e9996092a093 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -59,6 +59,8 @@ struct disasm_line { char *name; struct ins *ins; int line_nr; + float ipc; + u64 cycles; struct ins_operands ops; }; @@ -79,6 +81,17 @@ struct sym_hist { u64 addr[0]; }; +struct cyc_hist { + u64 start; + u64 cycles; + u64 cycles_aggr; + u32 num; + u32 num_aggr; + u8 have_start; + /* 1 byte padding */ + u16 reset; +}; + struct source_line_samples { double percent; double percent_sum; @@ -97,6 +110,7 @@ struct source_line { * @histogram: Array of addr hit histograms per event being monitored * @lines: If 'print_lines' is specified, per source code line percentages * @source: source parsed from a disassembler like objdump -dS + * @cyc_hist: Average cycles per basic block * * lines is allocated, percentages calculated and all sorted by percentage * when the annotation is about to be presented, so the percentages are for @@ -109,6 +123,7 @@ struct annotated_source { struct source_line *lines; int nr_histograms; int sizeof_sym_hist; + struct cyc_hist *cycles_hist; struct sym_hist histograms[0]; }; @@ -130,6 +145,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx); +int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, + struct addr_map_symbol *start, + unsigned cycles); + int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); int symbol__alloc_hist(struct symbol *sym); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 83d9dd96fe08..a980e7c50ee0 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -47,6 +47,9 @@ #include "debug.h" #include "parse-options.h" +#include "intel-pt.h" +#include "intel-bts.h" + int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, struct auxtrace_mmap_params *mp, void *userpg, int fd) @@ -876,7 +879,7 @@ static bool auxtrace__dont_decode(struct perf_session *session) int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_session *session __maybe_unused) + struct perf_session *session) { enum auxtrace_type type = event->auxtrace_info.type; @@ -884,6 +887,10 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, fprintf(stdout, " type: %u\n", type); switch (type) { + case PERF_AUXTRACE_INTEL_PT: + return intel_pt_process_auxtrace_info(event, session); + case PERF_AUXTRACE_INTEL_BTS: + return intel_bts_process_auxtrace_info(event, session); case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; @@ -942,6 +949,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, struct itrace_synth_opts *synth_opts = opt->value; const char *p; char *endptr; + bool period_type_set = false; synth_opts->set = true; @@ -970,10 +978,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, case 'i': synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; + period_type_set = true; break; case 't': synth_opts->period_type = PERF_ITRACE_PERIOD_TICKS; + period_type_set = true; break; case 'm': synth_opts->period *= 1000; @@ -986,6 +996,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, goto out_err; synth_opts->period_type = PERF_ITRACE_PERIOD_NANOSECS; + period_type_set = true; break; case '\0': goto out; @@ -1039,7 +1050,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, } out: if (synth_opts->instructions) { - if (!synth_opts->period_type) + if (!period_type_set) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; if (!synth_opts->period) diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 471aecbc4d68..bf72b77a588a 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -39,6 +39,8 @@ struct events_stats; enum auxtrace_type { PERF_AUXTRACE_UNKNOWN, + PERF_AUXTRACE_INTEL_PT, + PERF_AUXTRACE_INTEL_BTS, }; enum itrace_period_type { diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 1f6fc2323ef9..d909459fb54c 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -93,6 +93,38 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf) return raw - build_id; } +int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) +{ + char notes[PATH_MAX]; + u8 build_id[BUILD_ID_SIZE]; + int ret; + + if (!root_dir) + root_dir = ""; + + scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir); + + ret = sysfs__read_build_id(notes, build_id, sizeof(build_id)); + if (ret < 0) + return ret; + + return build_id__sprintf(build_id, sizeof(build_id), sbuild_id); +} + +int filename__sprintf_build_id(const char *pathname, char *sbuild_id) +{ + u8 build_id[BUILD_ID_SIZE]; + int ret; + + ret = filename__read_build_id(pathname, build_id, sizeof(build_id)); + if (ret < 0) + return ret; + else if (ret != sizeof(build_id)) + return -EINVAL; + + return build_id__sprintf(build_id, sizeof(build_id), sbuild_id); +} + /* asnprintf consolidates asprintf and snprintf */ static int asnprintf(char **strp, size_t size, const char *fmt, ...) { @@ -124,7 +156,7 @@ static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) { - char build_id_hex[BUILD_ID_SIZE * 2 + 1]; + char build_id_hex[SBUILD_ID_SIZE]; if (!dso->has_build_id) return NULL; @@ -291,7 +323,7 @@ int build_id_cache__list_build_ids(const char *pathname, struct dirent *d; int ret = 0; - list = strlist__new(true, NULL); + list = strlist__new(NULL, NULL); dir_name = build_id_cache__dirname_from_path(pathname, false, false); if (!list || !dir_name) { ret = -ENOMEM; @@ -384,7 +416,7 @@ static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, const char *name, bool is_kallsyms, bool is_vdso) { - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; build_id__sprintf(build_id, build_id_size, sbuild_id); diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 85011222cc14..27a14a8a945b 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -1,7 +1,8 @@ #ifndef PERF_BUILD_ID_H_ #define PERF_BUILD_ID_H_ 1 -#define BUILD_ID_SIZE 20 +#define BUILD_ID_SIZE 20 +#define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1) #include "tool.h" #include "strlist.h" @@ -11,6 +12,9 @@ extern struct perf_tool build_id__mark_dso_hit_ops; struct dso; int build_id__sprintf(const u8 *build_id, int len, char *bf); +int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); +int filename__sprintf_build_id(const char *pathname, char *sbuild_id); + char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9f643ee77001..773fe13ce627 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -25,96 +25,9 @@ __thread struct callchain_cursor callchain_cursor; -#ifdef HAVE_DWARF_UNWIND_SUPPORT -static int get_stack_size(const char *str, unsigned long *_size) -{ - char *endptr; - unsigned long size; - unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); - - size = strtoul(str, &endptr, 0); - - do { - if (*endptr) - break; - - size = round_up(size, sizeof(u64)); - if (!size || size > max_size) - break; - - *_size = size; - return 0; - - } while (0); - - pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", - max_size, str); - return -1; -} -#endif /* HAVE_DWARF_UNWIND_SUPPORT */ - -int parse_callchain_record_opt(const char *arg) +int parse_callchain_record_opt(const char *arg, struct callchain_param *param) { - char *tok, *name, *saveptr = NULL; - char *buf; - int ret = -1; - - /* We need buffer that we know we can write to. */ - buf = malloc(strlen(arg) + 1); - if (!buf) - return -ENOMEM; - - strcpy(buf, arg); - - tok = strtok_r((char *)buf, ",", &saveptr); - name = tok ? : (char *)buf; - - do { - /* Framepointer style */ - if (!strncmp(name, "fp", sizeof("fp"))) { - if (!strtok_r(NULL, ",", &saveptr)) { - callchain_param.record_mode = CALLCHAIN_FP; - ret = 0; - } else - pr_err("callchain: No more arguments " - "needed for --call-graph fp\n"); - break; - -#ifdef HAVE_DWARF_UNWIND_SUPPORT - /* Dwarf style */ - } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { - const unsigned long default_stack_dump_size = 8192; - - ret = 0; - callchain_param.record_mode = CALLCHAIN_DWARF; - callchain_param.dump_size = default_stack_dump_size; - - tok = strtok_r(NULL, ",", &saveptr); - if (tok) { - unsigned long size = 0; - - ret = get_stack_size(tok, &size); - callchain_param.dump_size = size; - } -#endif /* HAVE_DWARF_UNWIND_SUPPORT */ - } else if (!strncmp(name, "lbr", sizeof("lbr"))) { - if (!strtok_r(NULL, ",", &saveptr)) { - callchain_param.record_mode = CALLCHAIN_LBR; - ret = 0; - } else - pr_err("callchain: No more arguments " - "needed for --call-graph lbr\n"); - break; - } else { - pr_err("callchain: Unknown --call-graph option " - "value: %s\n", arg); - break; - } - - } while (0); - - free(buf); - return ret; + return parse_callchain_record(arg, param); } static int parse_callchain_mode(const char *value) @@ -219,7 +132,7 @@ int perf_callchain_config(const char *var, const char *value) var += sizeof("call-graph.") - 1; if (!strcmp(var, "record-mode")) - return parse_callchain_record_opt(value); + return parse_callchain_record_opt(value, &callchain_param); #ifdef HAVE_DWARF_UNWIND_SUPPORT if (!strcmp(var, "dump-size")) { unsigned long size = 0; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 679c2c6d8ade..acee2b3cd801 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -177,7 +177,8 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * bool hide_unresolved); extern const char record_callchain_help[]; -int parse_callchain_record_opt(const char *arg); +extern int parse_callchain_record(const char *arg, struct callchain_param *param); +int parse_callchain_record_opt(const char *arg, struct callchain_param *param); int parse_callchain_report_opt(const char *arg); int perf_callchain_config(const char *var, const char *value); diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h index 68888c29b04a..3bee6773ddb0 100644 --- a/tools/perf/util/cloexec.h +++ b/tools/perf/util/cloexec.h @@ -4,7 +4,7 @@ unsigned long perf_event_open_cloexec_flag(void); #ifdef __GLIBC_PREREQ -#if !__GLIBC_PREREQ(2, 6) +#if !__GLIBC_PREREQ(2, 6) && !defined(__UCLIBC__) extern int sched_getcpu(void) __THROW; #endif #endif diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 55355b3d4f85..9b9565416f90 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -67,8 +67,9 @@ static int __color_vsnprintf(char *bf, size_t size, const char *color, return r; } +/* Colors are not included in return value */ static int __color_vfprintf(FILE *fp, const char *color, const char *fmt, - va_list args, const char *trail) + va_list args) { int r = 0; @@ -83,12 +84,10 @@ static int __color_vfprintf(FILE *fp, const char *color, const char *fmt, } if (perf_use_color_default && *color) - r += fprintf(fp, "%s", color); + fprintf(fp, "%s", color); r += vfprintf(fp, fmt, args); if (perf_use_color_default && *color) - r += fprintf(fp, "%s", PERF_COLOR_RESET); - if (trail) - r += fprintf(fp, "%s", trail); + fprintf(fp, "%s", PERF_COLOR_RESET); return r; } @@ -100,7 +99,7 @@ int color_vsnprintf(char *bf, size_t size, const char *color, int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args) { - return __color_vfprintf(fp, color, fmt, args, NULL); + return __color_vfprintf(fp, color, fmt, args); } int color_snprintf(char *bf, size_t size, const char *color, @@ -126,16 +125,6 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) return r; } -int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...) -{ - va_list args; - int r; - va_start(args, fmt); - r = __color_vfprintf(fp, color, fmt, args, "\n"); - va_end(args); - return r; -} - /* * This function splits the buffer by newlines and colors the lines individually. * diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 38146f922c54..a93997f16dec 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -35,7 +35,6 @@ int color_vsnprintf(char *bf, size_t size, const char *color, int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...); -int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...); int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); int value_color_snprintf(char *bf, size_t size, const char *fmt, double value); int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index e18f653cd7db..2e452ac1353d 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -12,6 +12,7 @@ #include "cache.h" #include "exec_cmd.h" #include "util/hist.h" /* perf_hist_config */ +#include "util/llvm-utils.h" /* perf_llvm_config */ #define MAXNAME (256) @@ -408,6 +409,9 @@ int perf_default_config(const char *var, const char *value, if (!prefixcmp(var, "call-graph.")) return perf_callchain_config(var, value); + if (!prefixcmp(var, "llvm.")) + return perf_llvm_config(var, value); + /* Add other config variables here. */ return 0; } diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c new file mode 100644 index 000000000000..e3fde313deb2 --- /dev/null +++ b/tools/perf/util/counts.c @@ -0,0 +1,52 @@ +#include <stdlib.h> +#include "evsel.h" +#include "counts.h" + +struct perf_counts *perf_counts__new(int ncpus, int nthreads) +{ + struct perf_counts *counts = zalloc(sizeof(*counts)); + + if (counts) { + struct xyarray *values; + + values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values)); + if (!values) { + free(counts); + return NULL; + } + + counts->values = values; + } + + return counts; +} + +void perf_counts__delete(struct perf_counts *counts) +{ + if (counts) { + xyarray__delete(counts->values); + free(counts); + } +} + +static void perf_counts__reset(struct perf_counts *counts) +{ + xyarray__reset(counts->values); +} + +void perf_evsel__reset_counts(struct perf_evsel *evsel) +{ + perf_counts__reset(evsel->counts); +} + +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads) +{ + evsel->counts = perf_counts__new(ncpus, nthreads); + return evsel->counts != NULL ? 0 : -ENOMEM; +} + +void perf_evsel__free_counts(struct perf_evsel *evsel) +{ + perf_counts__delete(evsel->counts); + evsel->counts = NULL; +} diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h new file mode 100644 index 000000000000..34d8baaf558a --- /dev/null +++ b/tools/perf/util/counts.h @@ -0,0 +1,37 @@ +#ifndef __PERF_COUNTS_H +#define __PERF_COUNTS_H + +#include "xyarray.h" + +struct perf_counts_values { + union { + struct { + u64 val; + u64 ena; + u64 run; + }; + u64 values[3]; + }; +}; + +struct perf_counts { + s8 scaled; + struct perf_counts_values aggr; + struct xyarray *values; +}; + + +static inline struct perf_counts_values* +perf_counts(struct perf_counts *counts, int cpu, int thread) +{ + return xyarray__entry(counts->values, cpu, thread); +} + +struct perf_counts *perf_counts__new(int ncpus, int nthreads); +void perf_counts__delete(struct perf_counts *counts); + +void perf_evsel__reset_counts(struct perf_evsel *evsel); +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads); +void perf_evsel__free_counts(struct perf_evsel *evsel); + +#endif /* __PERF_COUNTS_H */ diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 2da5581ec74d..86d9c7302598 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -36,6 +36,11 @@ static int _eprintf(int level, int var, const char *fmt, va_list args) return ret; } +int veprintf(int level, int var, const char *fmt, va_list args) +{ + return _eprintf(level, var, fmt, args); +} + int eprintf(int level, int var, const char *fmt, ...) { va_list args; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index caac2fdc6105..8b9a088c32ab 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -50,6 +50,7 @@ void pr_stat(const char *fmt, ...); int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4))); int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5))); +int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 2fe98bb0e95b..fc8db9c764ac 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -137,6 +137,10 @@ struct dso { struct rb_node rb_node; /* rbtree node sorted by long name */ struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; + struct { + u64 addr; + struct symbol *symbol; + } last_find_result[MAP__NR_TYPES]; void *a2l; char *symsrc_filename; unsigned int a2l_fails; @@ -320,6 +324,8 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name); struct dso *dsos__findnew(struct dsos *dsos, const char *name); bool __dsos__read_build_ids(struct list_head *head, bool with_hits); +void dso__reset_find_symbol_cache(struct dso *dso); + size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm); size_t __dsos__fprintf(struct list_head *head, FILE *fp); diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 57f3ef41c2bc..a509aa8433a1 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -734,15 +734,18 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) Dwarf_Lines *lines; Dwarf_Line *line; Dwarf_Addr addr; - const char *fname; + const char *fname, *decf = NULL; int lineno, ret = 0; + int decl = 0, inl; Dwarf_Die die_mem, *cu_die; size_t nlines, i; /* Get the CU die */ - if (dwarf_tag(rt_die) != DW_TAG_compile_unit) + if (dwarf_tag(rt_die) != DW_TAG_compile_unit) { cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); - else + dwarf_decl_line(rt_die, &decl); + decf = dwarf_decl_file(rt_die); + } else cu_die = rt_die; if (!cu_die) { pr_debug2("Failed to get CU from given DIE.\n"); @@ -767,15 +770,21 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) continue; } /* Filter lines based on address */ - if (rt_die != cu_die) + if (rt_die != cu_die) { /* * Address filtering * The line is included in given function, and * no inline block includes it. */ - if (!dwarf_haspc(rt_die, addr) || - die_find_inlinefunc(rt_die, addr, &die_mem)) + if (!dwarf_haspc(rt_die, addr)) continue; + if (die_find_inlinefunc(rt_die, addr, &die_mem)) { + dwarf_decl_line(&die_mem, &inl); + if (inl != decl || + decf != dwarf_decl_file(&die_mem)) + continue; + } + } /* Get source line */ fname = dwarf_linesrc(line, NULL, NULL); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 67a977e5d0ab..7ff61274ed57 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -26,6 +26,8 @@ static const char *perf_event__names[] = { [PERF_RECORD_AUX] = "AUX", [PERF_RECORD_ITRACE_START] = "ITRACE_START", [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES", + [PERF_RECORD_SWITCH] = "SWITCH", + [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -749,6 +751,14 @@ int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused, return machine__process_lost_samples_event(machine, event, sample); } +int perf_event__process_switch(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_switch_event(machine, event); +} + size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) { return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", @@ -827,6 +837,20 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) event->itrace_start.pid, event->itrace_start.tid); } +size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) +{ + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + const char *in_out = out ? "OUT" : "IN "; + + if (event->header.type == PERF_RECORD_SWITCH) + return fprintf(fp, " %s\n", in_out); + + return fprintf(fp, " %s %s pid/tid: %5u/%-5u\n", + in_out, out ? "next" : "prev", + event->context_switch.next_prev_pid, + event->context_switch.next_prev_tid); +} + size_t perf_event__fprintf(union perf_event *event, FILE *fp) { size_t ret = fprintf(fp, "PERF_RECORD_%s", @@ -852,6 +876,10 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_ITRACE_START: ret += perf_event__fprintf_itrace_start(event, fp); break; + case PERF_RECORD_SWITCH: + case PERF_RECORD_SWITCH_CPU_WIDE: + ret += perf_event__fprintf_switch(event, fp); + break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c53f36384b64..f729df5e25e6 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -134,7 +134,8 @@ struct branch_flags { u64 predicted:1; u64 in_tx:1; u64 abort:1; - u64 reserved:60; + u64 cycles:16; + u64 reserved:44; }; struct branch_entry { @@ -348,6 +349,12 @@ struct itrace_start_event { u32 pid, tid; }; +struct context_switch_event { + struct perf_event_header header; + u32 next_prev_pid; + u32 next_prev_tid; +}; + union perf_event { struct perf_event_header header; struct mmap_event mmap; @@ -369,6 +376,7 @@ union perf_event { struct auxtrace_error_event auxtrace_error; struct aux_event aux; struct itrace_start_event itrace_start; + struct context_switch_event context_switch; }; void perf_event__print_totals(void); @@ -418,6 +426,10 @@ int perf_event__process_itrace_start(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_switch(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -480,6 +492,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp); size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); u64 kallsyms__get_function_start(const char *kallsyms_filename, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6cfdee68e763..8d00039d6a20 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -98,6 +98,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist) evlist__for_each_safe(evlist, n, pos) { list_del_init(&pos->node); + pos->evlist = NULL; perf_evsel__delete(pos); } @@ -125,6 +126,7 @@ void perf_evlist__delete(struct perf_evlist *evlist) void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) { + entry->evlist = evlist; list_add_tail(&entry->node, &evlist->entries); entry->idx = evlist->nr_entries; entry->tracking = !entry->idx; @@ -573,7 +575,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) { struct perf_sample_id *sid; - if (evlist->nr_entries == 1) + if (evlist->nr_entries == 1 || !id) return perf_evlist__first(evlist); sid = perf_evlist__id2sid(evlist, id); @@ -1102,7 +1104,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, } static int perf_evlist__propagate_maps(struct perf_evlist *evlist, - struct target *target) + bool has_user_cpus) { struct perf_evsel *evsel; @@ -1111,15 +1113,16 @@ static int perf_evlist__propagate_maps(struct perf_evlist *evlist, * We already have cpus for evsel (via PMU sysfs) so * keep it, if there's no target cpu list defined. */ - if (evsel->cpus && target->cpu_list) + if (evsel->cpus && has_user_cpus) cpu_map__put(evsel->cpus); - if (!evsel->cpus || target->cpu_list) + if (!evsel->cpus || has_user_cpus) evsel->cpus = cpu_map__get(evlist->cpus); evsel->threads = thread_map__get(evlist->threads); - if (!evsel->cpus || !evsel->threads) + if ((evlist->cpus && !evsel->cpus) || + (evlist->threads && !evsel->threads)) return -ENOMEM; } @@ -1142,7 +1145,7 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) if (evlist->cpus == NULL) goto out_delete_threads; - return perf_evlist__propagate_maps(evlist, target); + return perf_evlist__propagate_maps(evlist, !!target->cpu_list); out_delete_threads: thread_map__put(evlist->threads); @@ -1150,6 +1153,23 @@ out_delete_threads: return -1; } +int perf_evlist__set_maps(struct perf_evlist *evlist, + struct cpu_map *cpus, + struct thread_map *threads) +{ + if (evlist->cpus) + cpu_map__put(evlist->cpus); + + evlist->cpus = cpus; + + if (evlist->threads) + thread_map__put(evlist->threads); + + evlist->threads = threads; + + return perf_evlist__propagate_maps(evlist, false); +} + int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) { struct perf_evsel *evsel; @@ -1161,7 +1181,7 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e if (evsel->filter == NULL) continue; - err = perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter); + err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); if (err) { *err_evsel = evsel; break; @@ -1175,11 +1195,9 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) { struct perf_evsel *evsel; int err = 0; - const int ncpus = cpu_map__nr(evlist->cpus), - nthreads = thread_map__nr(evlist->threads); evlist__for_each(evlist, evsel) { - err = perf_evsel__set_filter(evsel, ncpus, nthreads, filter); + err = perf_evsel__set_filter(evsel, filter); if (err) break; } @@ -1257,6 +1275,16 @@ u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) return __perf_evlist__combined_sample_type(evlist); } +u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + u64 branch_type = 0; + + evlist__for_each(evlist, evsel) + branch_type |= evsel->attr.branch_sample_type; + return branch_type; +} + bool perf_evlist__valid_read_format(struct perf_evlist *evlist) { struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 037633c1da9d..b39a6198f4ac 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -56,6 +56,7 @@ struct perf_evlist { struct cpu_map *cpus; struct perf_evsel *selected; struct events_stats stats; + struct perf_env *env; }; struct perf_evsel_str_handler { @@ -114,6 +115,8 @@ void perf_evlist__close(struct perf_evlist *evlist); void perf_evlist__set_id_pos(struct perf_evlist *evlist); bool perf_can_sample_identifier(void); +bool perf_can_record_switch_events(void); +bool perf_can_record_cpu_wide(void); void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts); int record_opts__config(struct record_opts *opts); @@ -152,14 +155,9 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist, void perf_evlist__set_selected(struct perf_evlist *evlist, struct perf_evsel *evsel); -static inline void perf_evlist__set_maps(struct perf_evlist *evlist, - struct cpu_map *cpus, - struct thread_map *threads) -{ - evlist->cpus = cpus; - evlist->threads = threads; -} - +int perf_evlist__set_maps(struct perf_evlist *evlist, + struct cpu_map *cpus, + struct thread_map *threads); int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target); int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel); @@ -169,6 +167,7 @@ void perf_evlist__set_leader(struct perf_evlist *evlist); u64 perf_evlist__read_format(struct perf_evlist *evlist); u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist); u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist); +u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist); bool perf_evlist__sample_id_all(struct perf_evlist *evlist); u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2936b3080722..bac25f41a751 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -206,10 +206,13 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->leader = evsel; evsel->unit = ""; evsel->scale = 1.0; + evsel->evlist = NULL; INIT_LIST_HEAD(&evsel->node); + INIT_LIST_HEAD(&evsel->config_terms); perf_evsel__object.init(evsel); evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); perf_evsel__calc_id_pos(evsel); + evsel->cmdline_group_boundary = false; } struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) @@ -543,14 +546,15 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) static void perf_evsel__config_callgraph(struct perf_evsel *evsel, - struct record_opts *opts) + struct record_opts *opts, + struct callchain_param *param) { bool function = perf_evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->attr; perf_evsel__set_sample_bit(evsel, CALLCHAIN); - if (callchain_param.record_mode == CALLCHAIN_LBR) { + if (param->record_mode == CALLCHAIN_LBR) { if (!opts->branch_stack) { if (attr->exclude_user) { pr_warning("LBR callstack option is only available " @@ -566,12 +570,12 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel, "Falling back to framepointers.\n"); } - if (callchain_param.record_mode == CALLCHAIN_DWARF) { + if (param->record_mode == CALLCHAIN_DWARF) { if (!function) { perf_evsel__set_sample_bit(evsel, REGS_USER); perf_evsel__set_sample_bit(evsel, STACK_USER); attr->sample_regs_user = PERF_REGS_MASK; - attr->sample_stack_user = callchain_param.dump_size; + attr->sample_stack_user = param->dump_size; attr->exclude_callchain_user = 1; } else { pr_info("Cannot use DWARF unwind for function trace event," @@ -585,6 +589,97 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel, } } +static void +perf_evsel__reset_callgraph(struct perf_evsel *evsel, + struct callchain_param *param) +{ + struct perf_event_attr *attr = &evsel->attr; + + perf_evsel__reset_sample_bit(evsel, CALLCHAIN); + if (param->record_mode == CALLCHAIN_LBR) { + perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | + PERF_SAMPLE_BRANCH_CALL_STACK); + } + if (param->record_mode == CALLCHAIN_DWARF) { + perf_evsel__reset_sample_bit(evsel, REGS_USER); + perf_evsel__reset_sample_bit(evsel, STACK_USER); + } +} + +static void apply_config_terms(struct perf_evsel *evsel, + struct record_opts *opts) +{ + struct perf_evsel_config_term *term; + struct list_head *config_terms = &evsel->config_terms; + struct perf_event_attr *attr = &evsel->attr; + struct callchain_param param; + u32 dump_size = 0; + char *callgraph_buf = NULL; + + /* callgraph default */ + param.record_mode = callchain_param.record_mode; + + list_for_each_entry(term, config_terms, list) { + switch (term->type) { + case PERF_EVSEL__CONFIG_TERM_PERIOD: + attr->sample_period = term->val.period; + attr->freq = 0; + break; + case PERF_EVSEL__CONFIG_TERM_FREQ: + attr->sample_freq = term->val.freq; + attr->freq = 1; + break; + case PERF_EVSEL__CONFIG_TERM_TIME: + if (term->val.time) + perf_evsel__set_sample_bit(evsel, TIME); + else + perf_evsel__reset_sample_bit(evsel, TIME); + break; + case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: + callgraph_buf = term->val.callgraph; + break; + case PERF_EVSEL__CONFIG_TERM_STACK_USER: + dump_size = term->val.stack_user; + break; + default: + break; + } + } + + /* User explicitly set per-event callgraph, clear the old setting and reset. */ + if ((callgraph_buf != NULL) || (dump_size > 0)) { + + /* parse callgraph parameters */ + if (callgraph_buf != NULL) { + if (!strcmp(callgraph_buf, "no")) { + param.enabled = false; + param.record_mode = CALLCHAIN_NONE; + } else { + param.enabled = true; + if (parse_callchain_record(callgraph_buf, ¶m)) { + pr_err("per-event callgraph setting for %s failed. " + "Apply callgraph global setting for it\n", + evsel->name); + return; + } + } + } + if (dump_size > 0) { + dump_size = round_up(dump_size, sizeof(u64)); + param.dump_size = dump_size; + } + + /* If global callgraph set, clear it */ + if (callchain_param.enabled) + perf_evsel__reset_callgraph(evsel, &callchain_param); + + /* set perf-event callgraph */ + if (param.enabled) + perf_evsel__config_callgraph(evsel, opts, ¶m); + } +} + /* * The enable_on_exec/disabled value strategy: * @@ -689,7 +784,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) evsel->attr.exclude_callchain_user = 1; if (callchain_param.enabled && !evsel->no_aux_samples) - perf_evsel__config_callgraph(evsel, opts); + perf_evsel__config_callgraph(evsel, opts, &callchain_param); if (opts->sample_intr_regs) { attr->sample_regs_intr = PERF_REGS_MASK; @@ -707,7 +802,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) */ if (opts->sample_time && (!perf_missing_features.sample_id_all && - (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu))) + (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu || + opts->sample_time_set))) perf_evsel__set_sample_bit(evsel, TIME); if (opts->raw_samples && !evsel->no_aux_samples) { @@ -736,6 +832,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; + if (opts->record_switch_events) + attr->context_switch = track; + if (opts->sample_transaction) perf_evsel__set_sample_bit(evsel, TRANSACTION); @@ -772,6 +871,12 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->use_clockid = 1; attr->clockid = opts->clockid; } + + /* + * Apply event specific term settings, + * it overloads any global configuration. + */ + apply_config_terms(evsel, opts); } static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -815,14 +920,44 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ncpus, int nthrea return 0; } -int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, - const char *filter) +int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads, + const char *filter) { return perf_evsel__run_ioctl(evsel, ncpus, nthreads, PERF_EVENT_IOC_SET_FILTER, (void *)filter); } +int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter) +{ + char *new_filter = strdup(filter); + + if (new_filter != NULL) { + free(evsel->filter); + evsel->filter = new_filter; + return 0; + } + + return -1; +} + +int perf_evsel__append_filter(struct perf_evsel *evsel, + const char *op, const char *filter) +{ + char *new_filter; + + if (evsel->filter == NULL) + return perf_evsel__set_filter(evsel, filter); + + if (asprintf(&new_filter,"(%s) %s (%s)", evsel->filter, op, filter) > 0) { + free(evsel->filter); + evsel->filter = new_filter; + return 0; + } + + return -1; +} + int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads) { return perf_evsel__run_ioctl(evsel, ncpus, nthreads, @@ -865,6 +1000,16 @@ static void perf_evsel__free_id(struct perf_evsel *evsel) zfree(&evsel->id); } +static void perf_evsel__free_config_terms(struct perf_evsel *evsel) +{ + struct perf_evsel_config_term *term, *h; + + list_for_each_entry_safe(term, h, &evsel->config_terms, list) { + list_del(&term->list); + free(term); + } +} + void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { int cpu, thread; @@ -882,8 +1027,10 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) void perf_evsel__exit(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); + assert(evsel->evlist == NULL); perf_evsel__free_fd(evsel); perf_evsel__free_id(evsel); + perf_evsel__free_config_terms(evsel); close_cgroup(evsel->cgrp); cpu_map__put(evsel->cpus); thread_map__put(evsel->threads); @@ -1095,6 +1242,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(mmap2, p_unsigned); PRINT_ATTRf(comm_exec, p_unsigned); PRINT_ATTRf(use_clockid, p_unsigned); + PRINT_ATTRf(context_switch, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); @@ -2075,8 +2223,13 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, printed += perf_event_attr__fprintf(fp, &evsel->attr, __print_attr__fprintf, &first); } else if (details->freq) { - printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64, - (u64)evsel->attr.sample_freq); + const char *term = "sample_freq"; + + if (!evsel->attr.freq) + term = "sample_period"; + + printed += comma_fprintf(fp, &first, " %s=%" PRIu64, + term, (u64)evsel->attr.sample_freq); } out: fputc('\n', fp); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4a7ed5656cf0..298e6bbca200 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -9,7 +9,7 @@ #include "xyarray.h" #include "symbol.h" #include "cpumap.h" -#include "stat.h" +#include "counts.h" struct perf_evsel; @@ -31,8 +31,38 @@ struct perf_sample_id { struct cgroup_sel; +/* + * The 'struct perf_evsel_config_term' is used to pass event + * specific configuration data to perf_evsel__config routine. + * It is allocated within event parsing and attached to + * perf_evsel::config_terms list head. +*/ +enum { + PERF_EVSEL__CONFIG_TERM_PERIOD, + PERF_EVSEL__CONFIG_TERM_FREQ, + PERF_EVSEL__CONFIG_TERM_TIME, + PERF_EVSEL__CONFIG_TERM_CALLGRAPH, + PERF_EVSEL__CONFIG_TERM_STACK_USER, + PERF_EVSEL__CONFIG_TERM_MAX, +}; + +struct perf_evsel_config_term { + struct list_head list; + int type; + union { + u64 period; + u64 freq; + bool time; + char *callgraph; + u64 stack_user; + } val; +}; + /** struct perf_evsel - event selector * + * @evlist - evlist this evsel is in, if it is in one. + * @node - To insert it into evlist->entries or in other list_heads, say in + * the event parsing routines. * @name - Can be set to retain the original event name passed by the user, * so that when showing results in tools such as 'perf stat', we * show the name used, not some alias. @@ -46,6 +76,7 @@ struct cgroup_sel; */ struct perf_evsel { struct list_head node; + struct perf_evlist *evlist; struct perf_event_attr attr; char *filter; struct xyarray *fd; @@ -86,6 +117,8 @@ struct perf_evsel { unsigned long *per_pkg_mask; struct perf_evsel *leader; char *group_name; + bool cmdline_group_boundary; + struct list_head config_terms; }; union u64_swap { @@ -182,8 +215,11 @@ void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel, void perf_evsel__set_sample_id(struct perf_evsel *evsel, bool use_sample_identifier); -int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, - const char *filter); +int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter); +int perf_evsel__append_filter(struct perf_evsel *evsel, + const char *op, const char *filter); +int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads, + const char *filter); int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__open_per_cpu(struct perf_evsel *evsel, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 03ace57a800c..41814547da15 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -923,17 +923,13 @@ static void print_cmdline(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { int nr, i; - char *str; nr = ph->env.nr_cmdline; - str = ph->env.cmdline; fprintf(fp, "# cmdline : "); - for (i = 0; i < nr; i++) { - fprintf(fp, "%s ", str); - str += strlen(str) + 1; - } + for (i = 0; i < nr; i++) + fprintf(fp, "%s ", ph->env.cmdline_argv[i]); fputc('\n', fp); } @@ -1541,14 +1537,13 @@ process_event_desc(struct perf_file_section *section __maybe_unused, return 0; } -static int process_cmdline(struct perf_file_section *section __maybe_unused, +static int process_cmdline(struct perf_file_section *section, struct perf_header *ph, int fd, void *data __maybe_unused) { ssize_t ret; - char *str; - u32 nr, i; - struct strbuf sb; + char *str, *cmdline = NULL, **argv = NULL; + u32 nr, i, len = 0; ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) @@ -1558,22 +1553,32 @@ static int process_cmdline(struct perf_file_section *section __maybe_unused, nr = bswap_32(nr); ph->env.nr_cmdline = nr; - strbuf_init(&sb, 128); + + cmdline = zalloc(section->size + nr + 1); + if (!cmdline) + return -1; + + argv = zalloc(sizeof(char *) * (nr + 1)); + if (!argv) + goto error; for (i = 0; i < nr; i++) { str = do_read_string(fd, ph); if (!str) goto error; - /* include a NULL character at the end */ - strbuf_add(&sb, str, strlen(str) + 1); + argv[i] = cmdline + len; + memcpy(argv[i], str, strlen(str) + 1); + len += strlen(str) + 1; free(str); } - ph->env.cmdline = strbuf_detach(&sb, NULL); + ph->env.cmdline = cmdline; + ph->env.cmdline_argv = (const char **) argv; return 0; error: - strbuf_release(&sb); + free(argv); + free(cmdline); return -1; } @@ -2509,6 +2514,7 @@ int perf_session__read_header(struct perf_session *session) if (session->evlist == NULL) return -ENOMEM; + session->evlist->env = &header->env; if (perf_data_file__is_pipe(file)) return perf_header__read_pipe(session); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index d4d57962c591..396e4965f0c9 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -66,7 +66,7 @@ struct perf_header; int perf_file_header__read(struct perf_file_header *header, struct perf_header *ph, int fd); -struct perf_session_env { +struct perf_env { char *hostname; char *os_release; char *version; @@ -84,6 +84,7 @@ struct perf_session_env { int nr_pmu_mappings; int nr_groups; char *cmdline; + const char **cmdline_argv; char *sibling_cores; char *sibling_threads; char *numa_nodes; @@ -97,7 +98,7 @@ struct perf_header { u64 data_size; u64 feat_offset; DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); - struct perf_session_env env; + struct perf_env env; }; struct perf_evlist; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6f28d53d4e46..08b6cd945f1e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -151,6 +151,12 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); + if (h->srcline) + hists__new_col_len(hists, HISTC_SRCLINE, strlen(h->srcline)); + + if (h->srcfile) + hists__new_col_len(hists, HISTC_SRCFILE, strlen(h->srcfile)); + if (h->transaction) hists__new_col_len(hists, HISTC_TRANSACTION, hist_entry__transaction_len()); @@ -618,7 +624,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a * and not events sampled. Thus we use a pseudo period of 1. */ he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL, - 1, 1, 0, true); + 1, bi->flags.cycles ? bi->flags.cycles : 1, + 0, true); if (he == NULL) return -ENOMEM; @@ -760,6 +767,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, struct hist_entry **he_cache = iter->priv; struct hist_entry *he; struct hist_entry he_tmp = { + .hists = evsel__hists(evsel), .cpu = al->cpu, .thread = al->thread, .comm = thread__comm(al->thread), @@ -944,6 +952,8 @@ void hist_entry__delete(struct hist_entry *he) zfree(&he->stat_acc); free_srcline(he->srcline); + if (he->srcfile && he->srcfile[0]) + free(he->srcfile); free_callchain(he->callchain); free(he); } @@ -1099,13 +1109,14 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h) static void __hists__insert_output_entry(struct rb_root *entries, struct hist_entry *he, - u64 min_callchain_hits) + u64 min_callchain_hits, + bool use_callchain) { struct rb_node **p = &entries->rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; - if (symbol_conf.use_callchain) + if (use_callchain) callchain_param.sort(&he->sorted_chain, he->callchain, min_callchain_hits, &callchain_param); @@ -1129,6 +1140,13 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) struct rb_node *next; struct hist_entry *n; u64 min_callchain_hits; + struct perf_evsel *evsel = hists_to_evsel(hists); + bool use_callchain; + + if (evsel && !symbol_conf.show_ref_callgraph) + use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; + else + use_callchain = symbol_conf.use_callchain; min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); @@ -1147,7 +1165,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) n = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&n->rb_node_in); - __hists__insert_output_entry(&hists->entries, n, min_callchain_hits); + __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain); hists__inc_stats(hists, n); if (!n->filtered) @@ -1414,6 +1432,39 @@ int hists__link(struct hists *leader, struct hists *other) return 0; } +void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, + struct perf_sample *sample, bool nonany_branch_mode) +{ + struct branch_info *bi; + + /* If we have branch cycles always annotate them. */ + if (bs && bs->nr && bs->entries[0].flags.cycles) { + int i; + + bi = sample__resolve_bstack(sample, al); + if (bi) { + struct addr_map_symbol *prev = NULL; + + /* + * Ignore errors, still want to process the + * other entries. + * + * For non standard branch modes always + * force no IPC (prev == NULL) + * + * Note that perf stores branches reversed from + * program order! + */ + for (i = bs->nr - 1; i >= 0; i--) { + addr_map_symbol__account_cycles(&bi[i].from, + nonany_branch_mode ? NULL : prev, + bi[i].flags.cycles); + prev = &bi[i].to; + } + free(bi); + } + } +} size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 5ed8d9c22981..de6d58e7f0d5 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -30,6 +30,7 @@ enum hist_column { HISTC_PARENT, HISTC_CPU, HISTC_SRCLINE, + HISTC_SRCFILE, HISTC_MISPREDICT, HISTC_IN_TX, HISTC_ABORT, @@ -47,6 +48,7 @@ enum hist_column { HISTC_MEM_SNOOP, HISTC_MEM_DCACHELINE, HISTC_TRANSACTION, + HISTC_CYCLES, HISTC_NR_COLS, /* Last entry */ }; @@ -311,7 +313,7 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_session_env *env); + struct perf_env *env); int script_browse(const char *script_opt); #else static inline @@ -319,7 +321,7 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, const char *help __maybe_unused, struct hist_browser_timer *hbt __maybe_unused, float min_pcnt __maybe_unused, - struct perf_session_env *env __maybe_unused) + struct perf_env *env __maybe_unused) { return 0; } @@ -349,6 +351,9 @@ static inline int script_browse(const char *script_opt __maybe_unused) unsigned int hists__sort_list_width(struct hists *hists); +void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, + struct perf_sample *sample, bool nonany_branch_mode); + struct option; int parse_filter_percentage(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused); diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c new file mode 100644 index 000000000000..ea768625ab5b --- /dev/null +++ b/tools/perf/util/intel-bts.c @@ -0,0 +1,933 @@ +/* + * intel-bts.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <endian.h> +#include <byteswap.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> + +#include "cpumap.h" +#include "color.h" +#include "evsel.h" +#include "evlist.h" +#include "machine.h" +#include "session.h" +#include "util.h" +#include "thread.h" +#include "thread-stack.h" +#include "debug.h" +#include "tsc.h" +#include "auxtrace.h" +#include "intel-pt-decoder/intel-pt-insn-decoder.h" +#include "intel-bts.h" + +#define MAX_TIMESTAMP (~0ULL) + +#define INTEL_BTS_ERR_NOINSN 5 +#define INTEL_BTS_ERR_LOST 9 + +#if __BYTE_ORDER == __BIG_ENDIAN +#define le64_to_cpu bswap_64 +#else +#define le64_to_cpu +#endif + +struct intel_bts { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + u32 auxtrace_type; + struct perf_session *session; + struct machine *machine; + bool sampling_mode; + bool snapshot_mode; + bool data_queued; + u32 pmu_type; + struct perf_tsc_conversion tc; + bool cap_user_time_zero; + struct itrace_synth_opts synth_opts; + bool sample_branches; + u32 branches_filter; + u64 branches_sample_type; + u64 branches_id; + size_t branches_event_size; + bool synth_needs_swap; +}; + +struct intel_bts_queue { + struct intel_bts *bts; + unsigned int queue_nr; + struct auxtrace_buffer *buffer; + bool on_heap; + bool done; + pid_t pid; + pid_t tid; + int cpu; + u64 time; + struct intel_pt_insn intel_pt_insn; + u32 sample_flags; +}; + +struct branch { + u64 from; + u64 to; + u64 misc; +}; + +static void intel_bts_dump(struct intel_bts *bts __maybe_unused, + unsigned char *buf, size_t len) +{ + struct branch *branch; + size_t i, pos = 0, br_sz = sizeof(struct branch), sz; + const char *color = PERF_COLOR_BLUE; + + color_fprintf(stdout, color, + ". ... Intel BTS data: size %zu bytes\n", + len); + + while (len) { + if (len >= br_sz) + sz = br_sz; + else + sz = len; + printf("."); + color_fprintf(stdout, color, " %08x: ", pos); + for (i = 0; i < sz; i++) + color_fprintf(stdout, color, " %02x", buf[i]); + for (; i < br_sz; i++) + color_fprintf(stdout, color, " "); + if (len >= br_sz) { + branch = (struct branch *)buf; + color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n", + le64_to_cpu(branch->from), + le64_to_cpu(branch->to), + le64_to_cpu(branch->misc) & 0x10 ? + "pred" : "miss"); + } else { + color_fprintf(stdout, color, " Bad record!\n"); + } + pos += sz; + buf += sz; + len -= sz; + } +} + +static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf, + size_t len) +{ + printf(".\n"); + intel_bts_dump(bts, buf, len); +} + +static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample) +{ + union perf_event event; + int err; + + auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, + INTEL_BTS_ERR_LOST, sample->cpu, sample->pid, + sample->tid, 0, "Lost trace data"); + + err = perf_session__deliver_synth_event(bts->session, &event, NULL); + if (err) + pr_err("Intel BTS: failed to deliver error event, error %d\n", + err); + + return err; +} + +static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts, + unsigned int queue_nr) +{ + struct intel_bts_queue *btsq; + + btsq = zalloc(sizeof(struct intel_bts_queue)); + if (!btsq) + return NULL; + + btsq->bts = bts; + btsq->queue_nr = queue_nr; + btsq->pid = -1; + btsq->tid = -1; + btsq->cpu = -1; + + return btsq; +} + +static int intel_bts_setup_queue(struct intel_bts *bts, + struct auxtrace_queue *queue, + unsigned int queue_nr) +{ + struct intel_bts_queue *btsq = queue->priv; + + if (list_empty(&queue->head)) + return 0; + + if (!btsq) { + btsq = intel_bts_alloc_queue(bts, queue_nr); + if (!btsq) + return -ENOMEM; + queue->priv = btsq; + + if (queue->cpu != -1) + btsq->cpu = queue->cpu; + btsq->tid = queue->tid; + } + + if (bts->sampling_mode) + return 0; + + if (!btsq->on_heap && !btsq->buffer) { + int ret; + + btsq->buffer = auxtrace_buffer__next(queue, NULL); + if (!btsq->buffer) + return 0; + + ret = auxtrace_heap__add(&bts->heap, queue_nr, + btsq->buffer->reference); + if (ret) + return ret; + btsq->on_heap = true; + } + + return 0; +} + +static int intel_bts_setup_queues(struct intel_bts *bts) +{ + unsigned int i; + int ret; + + for (i = 0; i < bts->queues.nr_queues; i++) { + ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i], + i); + if (ret) + return ret; + } + return 0; +} + +static inline int intel_bts_update_queues(struct intel_bts *bts) +{ + if (bts->queues.new_data) { + bts->queues.new_data = false; + return intel_bts_setup_queues(bts); + } + return 0; +} + +static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a, + unsigned char *buf_b, size_t len_b) +{ + size_t offs, len; + + if (len_a > len_b) + offs = len_a - len_b; + else + offs = 0; + + for (; offs < len_a; offs += sizeof(struct branch)) { + len = len_a - offs; + if (!memcmp(buf_a + offs, buf_b, len)) + return buf_b + len; + } + + return buf_b; +} + +static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue, + struct auxtrace_buffer *b) +{ + struct auxtrace_buffer *a; + void *start; + + if (b->list.prev == &queue->head) + return 0; + a = list_entry(b->list.prev, struct auxtrace_buffer, list); + start = intel_bts_find_overlap(a->data, a->size, b->data, b->size); + if (!start) + return -EINVAL; + b->use_size = b->data + b->size - start; + b->use_data = start; + return 0; +} + +static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, + struct branch *branch) +{ + int ret; + struct intel_bts *bts = btsq->bts; + union perf_event event; + struct perf_sample sample = { .ip = 0, }; + + event.sample.header.type = PERF_RECORD_SAMPLE; + event.sample.header.misc = PERF_RECORD_MISC_USER; + event.sample.header.size = sizeof(struct perf_event_header); + + sample.ip = le64_to_cpu(branch->from); + sample.pid = btsq->pid; + sample.tid = btsq->tid; + sample.addr = le64_to_cpu(branch->to); + sample.id = btsq->bts->branches_id; + sample.stream_id = btsq->bts->branches_id; + sample.period = 1; + sample.cpu = btsq->cpu; + sample.flags = btsq->sample_flags; + sample.insn_len = btsq->intel_pt_insn.length; + + if (bts->synth_opts.inject) { + event.sample.header.size = bts->branches_event_size; + ret = perf_event__synthesize_sample(&event, + bts->branches_sample_type, + 0, &sample, + bts->synth_needs_swap); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(bts->session, &event, &sample); + if (ret) + pr_err("Intel BTS: failed to deliver branch event, error %d\n", + ret); + + return ret; +} + +static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip) +{ + struct machine *machine = btsq->bts->machine; + struct thread *thread; + struct addr_location al; + unsigned char buf[1024]; + size_t bufsz; + ssize_t len; + int x86_64; + uint8_t cpumode; + int err = -1; + + bufsz = intel_pt_insn_max_size(); + + if (machine__kernel_ip(machine, ip)) + cpumode = PERF_RECORD_MISC_KERNEL; + else + cpumode = PERF_RECORD_MISC_USER; + + thread = machine__find_thread(machine, -1, btsq->tid); + if (!thread) + return -1; + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al); + if (!al.map || !al.map->dso) + goto out_put; + + len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, bufsz); + if (len <= 0) + goto out_put; + + /* Load maps to ensure dso->is_64_bit has been updated */ + map__load(al.map, machine->symbol_filter); + + x86_64 = al.map->dso->is_64_bit; + + if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn)) + goto out_put; + + err = 0; +out_put: + thread__put(thread); + return err; +} + +static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid, + pid_t tid, u64 ip) +{ + union perf_event event; + int err; + + auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, + INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip, + "Failed to get instruction"); + + err = perf_session__deliver_synth_event(bts->session, &event, NULL); + if (err) + pr_err("Intel BTS: failed to deliver error event, error %d\n", + err); + + return err; +} + +static int intel_bts_get_branch_type(struct intel_bts_queue *btsq, + struct branch *branch) +{ + int err; + + if (!branch->from) { + if (branch->to) + btsq->sample_flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_BEGIN; + else + btsq->sample_flags = 0; + btsq->intel_pt_insn.length = 0; + } else if (!branch->to) { + btsq->sample_flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_END; + btsq->intel_pt_insn.length = 0; + } else { + err = intel_bts_get_next_insn(btsq, branch->from); + if (err) { + btsq->sample_flags = 0; + btsq->intel_pt_insn.length = 0; + if (!btsq->bts->synth_opts.errors) + return 0; + err = intel_bts_synth_error(btsq->bts, btsq->cpu, + btsq->pid, btsq->tid, + branch->from); + return err; + } + btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op); + /* Check for an async branch into the kernel */ + if (!machine__kernel_ip(btsq->bts->machine, branch->from) && + machine__kernel_ip(btsq->bts->machine, branch->to) && + btsq->sample_flags != (PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL | + PERF_IP_FLAG_SYSCALLRET)) + btsq->sample_flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL | + PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_INTERRUPT; + } + + return 0; +} + +static int intel_bts_process_buffer(struct intel_bts_queue *btsq, + struct auxtrace_buffer *buffer) +{ + struct branch *branch; + size_t sz, bsz = sizeof(struct branch); + u32 filter = btsq->bts->branches_filter; + int err = 0; + + if (buffer->use_data) { + sz = buffer->use_size; + branch = buffer->use_data; + } else { + sz = buffer->size; + branch = buffer->data; + } + + if (!btsq->bts->sample_branches) + return 0; + + for (; sz > bsz; branch += 1, sz -= bsz) { + if (!branch->from && !branch->to) + continue; + intel_bts_get_branch_type(btsq, branch); + if (filter && !(filter & btsq->sample_flags)) + continue; + err = intel_bts_synth_branch_sample(btsq, branch); + if (err) + break; + } + return err; +} + +static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp) +{ + struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer; + struct auxtrace_queue *queue; + struct thread *thread; + int err; + + if (btsq->done) + return 1; + + if (btsq->pid == -1) { + thread = machine__find_thread(btsq->bts->machine, -1, + btsq->tid); + if (thread) + btsq->pid = thread->pid_; + } else { + thread = machine__findnew_thread(btsq->bts->machine, btsq->pid, + btsq->tid); + } + + queue = &btsq->bts->queues.queue_array[btsq->queue_nr]; + + if (!buffer) + buffer = auxtrace_buffer__next(queue, NULL); + + if (!buffer) { + if (!btsq->bts->sampling_mode) + btsq->done = 1; + err = 1; + goto out_put; + } + + /* Currently there is no support for split buffers */ + if (buffer->consecutive) { + err = -EINVAL; + goto out_put; + } + + if (!buffer->data) { + int fd = perf_data_file__fd(btsq->bts->session->file); + + buffer->data = auxtrace_buffer__get_data(buffer, fd); + if (!buffer->data) { + err = -ENOMEM; + goto out_put; + } + } + + if (btsq->bts->snapshot_mode && !buffer->consecutive && + intel_bts_do_fix_overlap(queue, buffer)) { + err = -ENOMEM; + goto out_put; + } + + if (!btsq->bts->synth_opts.callchain && thread && + (!old_buffer || btsq->bts->sampling_mode || + (btsq->bts->snapshot_mode && !buffer->consecutive))) + thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1); + + err = intel_bts_process_buffer(btsq, buffer); + + auxtrace_buffer__drop_data(buffer); + + btsq->buffer = auxtrace_buffer__next(queue, buffer); + if (btsq->buffer) { + if (timestamp) + *timestamp = btsq->buffer->reference; + } else { + if (!btsq->bts->sampling_mode) + btsq->done = 1; + } +out_put: + thread__put(thread); + return err; +} + +static int intel_bts_flush_queue(struct intel_bts_queue *btsq) +{ + u64 ts = 0; + int ret; + + while (1) { + ret = intel_bts_process_queue(btsq, &ts); + if (ret < 0) + return ret; + if (ret) + break; + } + return 0; +} + +static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid) +{ + struct auxtrace_queues *queues = &bts->queues; + unsigned int i; + + for (i = 0; i < queues->nr_queues; i++) { + struct auxtrace_queue *queue = &bts->queues.queue_array[i]; + struct intel_bts_queue *btsq = queue->priv; + + if (btsq && btsq->tid == tid) + return intel_bts_flush_queue(btsq); + } + return 0; +} + +static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp) +{ + while (1) { + unsigned int queue_nr; + struct auxtrace_queue *queue; + struct intel_bts_queue *btsq; + u64 ts = 0; + int ret; + + if (!bts->heap.heap_cnt) + return 0; + + if (bts->heap.heap_array[0].ordinal > timestamp) + return 0; + + queue_nr = bts->heap.heap_array[0].queue_nr; + queue = &bts->queues.queue_array[queue_nr]; + btsq = queue->priv; + + auxtrace_heap__pop(&bts->heap); + + ret = intel_bts_process_queue(btsq, &ts); + if (ret < 0) { + auxtrace_heap__add(&bts->heap, queue_nr, ts); + return ret; + } + + if (!ret) { + ret = auxtrace_heap__add(&bts->heap, queue_nr, ts); + if (ret < 0) + return ret; + } else { + btsq->on_heap = false; + } + } + + return 0; +} + +static int intel_bts_process_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + u64 timestamp; + int err; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) { + pr_err("Intel BTS requires ordered events\n"); + return -EINVAL; + } + + if (sample->time && sample->time != (u64)-1) + timestamp = perf_time_to_tsc(sample->time, &bts->tc); + else + timestamp = 0; + + err = intel_bts_update_queues(bts); + if (err) + return err; + + err = intel_bts_process_queues(bts, timestamp); + if (err) + return err; + if (event->header.type == PERF_RECORD_EXIT) { + err = intel_bts_process_tid_exit(bts, event->comm.tid); + if (err) + return err; + } + + if (event->header.type == PERF_RECORD_AUX && + (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && + bts->synth_opts.errors) + err = intel_bts_lost(bts, sample); + + return err; +} + +static int intel_bts_process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool __maybe_unused) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + + if (bts->sampling_mode) + return 0; + + if (!bts->data_queued) { + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data_file__fd(session->file); + int err; + + if (perf_data_file__is_pipe(session->file)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&bts->queues, session, event, + data_offset, &buffer); + if (err) + return err; + + /* Dump here now we have copied a piped trace out of the pipe */ + if (dump_trace) { + if (auxtrace_buffer__get_data(buffer, fd)) { + intel_bts_dump_event(bts, buffer->data, + buffer->size); + auxtrace_buffer__put_data(buffer); + } + } + } + + return 0; +} + +static int intel_bts_flush(struct perf_session *session __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + int ret; + + if (dump_trace || bts->sampling_mode) + return 0; + + if (!tool->ordered_events) + return -EINVAL; + + ret = intel_bts_update_queues(bts); + if (ret < 0) + return ret; + + return intel_bts_process_queues(bts, MAX_TIMESTAMP); +} + +static void intel_bts_free_queue(void *priv) +{ + struct intel_bts_queue *btsq = priv; + + if (!btsq) + return; + free(btsq); +} + +static void intel_bts_free_events(struct perf_session *session) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + struct auxtrace_queues *queues = &bts->queues; + unsigned int i; + + for (i = 0; i < queues->nr_queues; i++) { + intel_bts_free_queue(queues->queue_array[i].priv); + queues->queue_array[i].priv = NULL; + } + auxtrace_queues__free(queues); +} + +static void intel_bts_free(struct perf_session *session) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + + auxtrace_heap__free(&bts->heap); + intel_bts_free_events(session); + session->auxtrace = NULL; + free(bts); +} + +struct intel_bts_synth { + struct perf_tool dummy_tool; + struct perf_session *session; +}; + +static int intel_bts_event_synth(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct intel_bts_synth *intel_bts_synth = + container_of(tool, struct intel_bts_synth, dummy_tool); + + return perf_session__deliver_synth_event(intel_bts_synth->session, + event, NULL); +} + +static int intel_bts_synth_event(struct perf_session *session, + struct perf_event_attr *attr, u64 id) +{ + struct intel_bts_synth intel_bts_synth; + + memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth)); + intel_bts_synth.session = session; + + return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1, + &id, intel_bts_event_synth); +} + +static int intel_bts_synth_events(struct intel_bts *bts, + struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr; + bool found = false; + u64 id; + int err; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == bts->pmu_type && evsel->ids) { + found = true; + break; + } + } + + if (!found) { + pr_debug("There are no selected events with Intel BTS data\n"); + return 0; + } + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = PERF_TYPE_HARDWARE; + attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; + attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | + PERF_SAMPLE_PERIOD; + attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; + attr.sample_type &= ~(u64)PERF_SAMPLE_CPU; + attr.exclude_user = evsel->attr.exclude_user; + attr.exclude_kernel = evsel->attr.exclude_kernel; + attr.exclude_hv = evsel->attr.exclude_hv; + attr.exclude_host = evsel->attr.exclude_host; + attr.exclude_guest = evsel->attr.exclude_guest; + attr.sample_id_all = evsel->attr.sample_id_all; + attr.read_format = evsel->attr.read_format; + + id = evsel->id[0] + 1000000000; + if (!id) + id = 1; + + if (bts->synth_opts.branches) { + attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + attr.sample_period = 1; + attr.sample_type |= PERF_SAMPLE_ADDR; + pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_bts_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'branches' event type\n", + __func__); + return err; + } + bts->sample_branches = true; + bts->branches_sample_type = attr.sample_type; + bts->branches_id = id; + /* + * We only use sample types from PERF_SAMPLE_MASK so we can use + * __perf_evsel__sample_size() here. + */ + bts->branches_event_size = sizeof(struct sample_event) + + __perf_evsel__sample_size(attr.sample_type); + } + + bts->synth_needs_swap = evsel->needs_swap; + + return 0; +} + +static const char * const intel_bts_info_fmts[] = { + [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n", + [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n", + [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n", + [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", + [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", +}; + +static void intel_bts_print_info(u64 *arr, int start, int finish) +{ + int i; + + if (!dump_trace) + return; + + for (i = start; i <= finish; i++) + fprintf(stdout, intel_bts_info_fmts[i], arr[i]); +} + +u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE]; + +int intel_bts_process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; + size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE; + struct intel_bts *bts; + int err; + + if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + + min_sz) + return -EINVAL; + + bts = zalloc(sizeof(struct intel_bts)); + if (!bts) + return -ENOMEM; + + err = auxtrace_queues__init(&bts->queues); + if (err) + goto err_free; + + bts->session = session; + bts->machine = &session->machines.host; /* No kvm support */ + bts->auxtrace_type = auxtrace_info->type; + bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE]; + bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT]; + bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT]; + bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO]; + bts->cap_user_time_zero = + auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO]; + bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE]; + + bts->sampling_mode = false; + + bts->auxtrace.process_event = intel_bts_process_event; + bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event; + bts->auxtrace.flush_events = intel_bts_flush; + bts->auxtrace.free_events = intel_bts_free_events; + bts->auxtrace.free = intel_bts_free; + session->auxtrace = &bts->auxtrace; + + intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE, + INTEL_BTS_SNAPSHOT_MODE); + + if (dump_trace) + return 0; + + if (session->itrace_synth_opts && session->itrace_synth_opts->set) + bts->synth_opts = *session->itrace_synth_opts; + else + itrace_synth_opts__set_default(&bts->synth_opts); + + if (bts->synth_opts.calls) + bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_TRACE_END; + if (bts->synth_opts.returns) + bts->branches_filter |= PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_TRACE_BEGIN; + + err = intel_bts_synth_events(bts, session); + if (err) + goto err_free_queues; + + err = auxtrace_queues__process_index(&bts->queues, session); + if (err) + goto err_free_queues; + + if (bts->queues.populated) + bts->data_queued = true; + + return 0; + +err_free_queues: + auxtrace_queues__free(&bts->queues); + session->auxtrace = NULL; +err_free: + free(bts); + return err; +} diff --git a/tools/perf/util/intel-bts.h b/tools/perf/util/intel-bts.h new file mode 100644 index 000000000000..ca65e21b3e83 --- /dev/null +++ b/tools/perf/util/intel-bts.h @@ -0,0 +1,43 @@ +/* + * intel-bts.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__PERF_INTEL_BTS_H__ +#define INCLUDE__PERF_INTEL_BTS_H__ + +#define INTEL_BTS_PMU_NAME "intel_bts" + +enum { + INTEL_BTS_PMU_TYPE, + INTEL_BTS_TIME_SHIFT, + INTEL_BTS_TIME_MULT, + INTEL_BTS_TIME_ZERO, + INTEL_BTS_CAP_USER_TIME_ZERO, + INTEL_BTS_SNAPSHOT_MODE, + INTEL_BTS_AUXTRACE_PRIV_MAX, +}; + +#define INTEL_BTS_AUXTRACE_PRIV_SIZE (INTEL_BTS_AUXTRACE_PRIV_MAX * sizeof(u64)) + +struct auxtrace_record; +struct perf_tool; +union perf_event; +struct perf_session; + +struct auxtrace_record *intel_bts_recording_init(int *err); + +int intel_bts_process_auxtrace_info(union perf_event *event, + struct perf_session *session); + +#endif diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build new file mode 100644 index 000000000000..240730d682c1 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/Build @@ -0,0 +1,11 @@ +libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o + +inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk +inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt + +$(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ + +$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c + +CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder -Wno-override-init diff --git a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk new file mode 100644 index 000000000000..517567347aac --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk @@ -0,0 +1,386 @@ +#!/bin/awk -f +# gen-insn-attr-x86.awk: Instruction attribute table generator +# Written by Masami Hiramatsu <mhiramat@redhat.com> +# +# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c + +# Awk implementation sanity check +function check_awk_implement() { + if (sprintf("%x", 0) != "0") + return "Your awk has a printf-format problem." + return "" +} + +# Clear working vars +function clear_vars() { + delete table + delete lptable2 + delete lptable1 + delete lptable3 + eid = -1 # escape id + gid = -1 # group id + aid = -1 # AVX id + tname = "" +} + +BEGIN { + # Implementation error checking + awkchecked = check_awk_implement() + if (awkchecked != "") { + print "Error: " awkchecked > "/dev/stderr" + print "Please try to use gawk." > "/dev/stderr" + exit 1 + } + + # Setup generating tables + print "/* x86 opcode map generated from x86-opcode-map.txt */" + print "/* Do not change this code. */\n" + ggid = 1 + geid = 1 + gaid = 0 + delete etable + delete gtable + delete atable + + opnd_expr = "^[A-Za-z/]" + ext_expr = "^\\(" + sep_expr = "^\\|$" + group_expr = "^Grp[0-9A-Za-z]+" + + imm_expr = "^[IJAOL][a-z]" + imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" + imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" + imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" + imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" + imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" + imm_flag["Ob"] = "INAT_MOFFSET" + imm_flag["Ov"] = "INAT_MOFFSET" + imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + + modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" + force64_expr = "\\([df]64\\)" + rex_expr = "^REX(\\.[XRWB]+)*" + fpu_expr = "^ESC" # TODO + + lprefix1_expr = "\\((66|!F3)\\)" + lprefix2_expr = "\\(F3\\)" + lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" + lprefix_expr = "\\((66|F2|F3)\\)" + max_lprefix = 4 + + # All opcodes starting with lower-case 'v' or with (v1) superscript + # accepts VEX prefix + vexok_opcode_expr = "^v.*" + vexok_expr = "\\(v1\\)" + # All opcodes with (v) superscript supports *only* VEX prefix + vexonly_expr = "\\(v\\)" + + prefix_expr = "\\(Prefix\\)" + prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" + prefix_num["REPNE"] = "INAT_PFX_REPNE" + prefix_num["REP/REPE"] = "INAT_PFX_REPE" + prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" + prefix_num["XRELEASE"] = "INAT_PFX_REPE" + prefix_num["LOCK"] = "INAT_PFX_LOCK" + prefix_num["SEG=CS"] = "INAT_PFX_CS" + prefix_num["SEG=DS"] = "INAT_PFX_DS" + prefix_num["SEG=ES"] = "INAT_PFX_ES" + prefix_num["SEG=FS"] = "INAT_PFX_FS" + prefix_num["SEG=GS"] = "INAT_PFX_GS" + prefix_num["SEG=SS"] = "INAT_PFX_SS" + prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" + prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" + prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" + + clear_vars() +} + +function semantic_error(msg) { + print "Semantic error at " NR ": " msg > "/dev/stderr" + exit 1 +} + +function debug(msg) { + print "DEBUG: " msg +} + +function array_size(arr, i,c) { + c = 0 + for (i in arr) + c++ + return c +} + +/^Table:/ { + print "/* " $0 " */" + if (tname != "") + semantic_error("Hit Table: before EndTable:."); +} + +/^Referrer:/ { + if (NF != 1) { + # escape opcode table + ref = "" + for (i = 2; i <= NF; i++) + ref = ref $i + eid = escape[ref] + tname = sprintf("inat_escape_table_%d", eid) + } +} + +/^AVXcode:/ { + if (NF != 1) { + # AVX/escape opcode table + aid = $2 + if (gaid <= aid) + gaid = aid + 1 + if (tname == "") # AVX only opcode table + tname = sprintf("inat_avx_table_%d", $2) + } + if (aid == -1 && eid == -1) # primary opcode table + tname = "inat_primary_table" +} + +/^GrpTable:/ { + print "/* " $0 " */" + if (!($2 in group)) + semantic_error("No group: " $2 ) + gid = group[$2] + tname = "inat_group_table_" gid +} + +function print_table(tbl,name,fmt,n) +{ + print "const insn_attr_t " name " = {" + for (i = 0; i < n; i++) { + id = sprintf(fmt, i) + if (tbl[id]) + print " [" id "] = " tbl[id] "," + } + print "};" +} + +/^EndTable/ { + if (gid != -1) { + # print group tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,3] = tname "_3" + } + } else { + # print primary/escaped tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,0] = tname + if (aid >= 0) + atable[aid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,1] = tname "_1" + if (aid >= 0) + atable[aid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,2] = tname "_2" + if (aid >= 0) + atable[aid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,3] = tname "_3" + if (aid >= 0) + atable[aid,3] = tname "_3" + } + } + print "" + clear_vars() +} + +function add_flags(old,new) { + if (old && new) + return old " | " new + else if (old) + return old + else + return new +} + +# convert operands to flags. +function convert_operands(count,opnd, i,j,imm,mod) +{ + imm = null + mod = null + for (j = 1; j <= count; j++) { + i = opnd[j] + if (match(i, imm_expr) == 1) { + if (!imm_flag[i]) + semantic_error("Unknown imm opnd: " i) + if (imm) { + if (i != "Ib") + semantic_error("Second IMM error") + imm = add_flags(imm, "INAT_SCNDIMM") + } else + imm = imm_flag[i] + } else if (match(i, modrm_expr)) + mod = "INAT_MODRM" + } + return add_flags(imm, mod) +} + +/^[0-9a-f]+\:/ { + if (NR == 1) + next + # get index + idx = "0x" substr($1, 1, index($1,":") - 1) + if (idx in table) + semantic_error("Redefine " idx " in " tname) + + # check if escaped opcode + if ("escape" == $2) { + if ($3 != "#") + semantic_error("No escaped name") + ref = "" + for (i = 4; i <= NF; i++) + ref = ref $i + if (ref in escape) + semantic_error("Redefine escape (" ref ")") + escape[ref] = geid + geid++ + table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" + next + } + + variant = null + # converts + i = 2 + while (i <= NF) { + opcode = $(i++) + delete opnds + ext = null + flags = null + opnd = null + # parse one opcode + if (match($i, opnd_expr)) { + opnd = $i + count = split($(i++), opnds, ",") + flags = convert_operands(count, opnds) + } + if (match($i, ext_expr)) + ext = $(i++) + if (match($i, sep_expr)) + i++ + else if (i < NF) + semantic_error($i " is not a separator") + + # check if group opcode + if (match(opcode, group_expr)) { + if (!(opcode in group)) { + group[opcode] = ggid + ggid++ + } + flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") + } + # check force(or default) 64bit + if (match(ext, force64_expr)) + flags = add_flags(flags, "INAT_FORCE64") + + # check REX prefix + if (match(opcode, rex_expr)) + flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") + + # check coprocessor escape : TODO + if (match(opcode, fpu_expr)) + flags = add_flags(flags, "INAT_MODRM") + + # check VEX codes + if (match(ext, vexonly_expr)) + flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") + else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) + flags = add_flags(flags, "INAT_VEXOK") + + # check prefixes + if (match(ext, prefix_expr)) { + if (!prefix_num[opcode]) + semantic_error("Unknown prefix: " opcode) + flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") + } + if (length(flags) == 0) + continue + # check if last prefix + if (match(ext, lprefix1_expr)) { + lptable1[idx] = add_flags(lptable1[idx],flags) + variant = "INAT_VARIANT" + } + if (match(ext, lprefix2_expr)) { + lptable2[idx] = add_flags(lptable2[idx],flags) + variant = "INAT_VARIANT" + } + if (match(ext, lprefix3_expr)) { + lptable3[idx] = add_flags(lptable3[idx],flags) + variant = "INAT_VARIANT" + } + if (!match(ext, lprefix_expr)){ + table[idx] = add_flags(table[idx],flags) + } + } + if (variant) + table[idx] = add_flags(table[idx],variant) +} + +END { + if (awkchecked != "") + exit 1 + # print escape opcode map's array + print "/* Escape opcode map array */" + print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < geid; i++) + for (j = 0; j < max_lprefix; j++) + if (etable[i,j]) + print " ["i"]["j"] = "etable[i,j]"," + print "};\n" + # print group opcode map's array + print "/* Group opcode map array */" + print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < ggid; i++) + for (j = 0; j < max_lprefix; j++) + if (gtable[i,j]) + print " ["i"]["j"] = "gtable[i,j]"," + print "};\n" + # print AVX opcode map's array + print "/* AVX opcode map array */" + print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < gaid; i++) + for (j = 0; j < max_lprefix; j++) + if (atable[i,j]) + print " ["i"]["j"] = "atable[i,j]"," + print "};" +} diff --git a/tools/perf/util/intel-pt-decoder/inat.c b/tools/perf/util/intel-pt-decoder/inat.c new file mode 100644 index 000000000000..906d94aa0a24 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat.c @@ -0,0 +1,96 @@ +/* + * x86 instruction attribute tables + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include "insn.h" + +/* Attribute tables are generated from opcode map */ +#include "inat-tables.c" + +/* Attribute search APIs */ +insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) +{ + return inat_primary_table[opcode]; +} + +int inat_get_last_prefix_id(insn_byte_t last_pfx) +{ + insn_attr_t lpfx_attr; + + lpfx_attr = inat_get_opcode_attribute(last_pfx); + return inat_last_prefix_id(lpfx_attr); +} + +insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, + insn_attr_t esc_attr) +{ + const insn_attr_t *table; + int n; + + n = inat_escape_id(esc_attr); + + table = inat_escape_tables[n][0]; + if (!table) + return 0; + if (inat_has_variant(table[opcode]) && lpfx_id) { + table = inat_escape_tables[n][lpfx_id]; + if (!table) + return 0; + } + return table[opcode]; +} + +insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, + insn_attr_t grp_attr) +{ + const insn_attr_t *table; + int n; + + n = inat_group_id(grp_attr); + + table = inat_group_tables[n][0]; + if (!table) + return inat_group_common_attribute(grp_attr); + if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { + table = inat_group_tables[n][lpfx_id]; + if (!table) + return inat_group_common_attribute(grp_attr); + } + return table[X86_MODRM_REG(modrm)] | + inat_group_common_attribute(grp_attr); +} + +insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, + insn_byte_t vex_p) +{ + const insn_attr_t *table; + if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) + return 0; + /* At first, this checks the master table */ + table = inat_avx_tables[vex_m][0]; + if (!table) + return 0; + if (!inat_is_group(table[opcode]) && vex_p) { + /* If this is not a group, get attribute directly */ + table = inat_avx_tables[vex_m][vex_p]; + if (!table) + return 0; + } + return table[opcode]; +} diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h new file mode 100644 index 000000000000..611645e903a8 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat.h @@ -0,0 +1,221 @@ +#ifndef _ASM_X86_INAT_H +#define _ASM_X86_INAT_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include "inat_types.h" + +/* + * Internal bits. Don't use bitmasks directly, because these bits are + * unstable. You should use checking functions. + */ + +#define INAT_OPCODE_TABLE_SIZE 256 +#define INAT_GROUP_TABLE_SIZE 8 + +/* Legacy last prefixes */ +#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ +#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ +#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ +/* Other Legacy prefixes */ +#define INAT_PFX_LOCK 4 /* 0xF0 */ +#define INAT_PFX_CS 5 /* 0x2E */ +#define INAT_PFX_DS 6 /* 0x3E */ +#define INAT_PFX_ES 7 /* 0x26 */ +#define INAT_PFX_FS 8 /* 0x64 */ +#define INAT_PFX_GS 9 /* 0x65 */ +#define INAT_PFX_SS 10 /* 0x36 */ +#define INAT_PFX_ADDRSZ 11 /* 0x67 */ +/* x86-64 REX prefix */ +#define INAT_PFX_REX 12 /* 0x4X */ +/* AVX VEX prefixes */ +#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ +#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ + +#define INAT_LSTPFX_MAX 3 +#define INAT_LGCPFX_MAX 11 + +/* Immediate size */ +#define INAT_IMM_BYTE 1 +#define INAT_IMM_WORD 2 +#define INAT_IMM_DWORD 3 +#define INAT_IMM_QWORD 4 +#define INAT_IMM_PTR 5 +#define INAT_IMM_VWORD32 6 +#define INAT_IMM_VWORD 7 + +/* Legacy prefix */ +#define INAT_PFX_OFFS 0 +#define INAT_PFX_BITS 4 +#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) +#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) +/* Escape opcodes */ +#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) +#define INAT_ESC_BITS 2 +#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) +#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) +/* Group opcodes (1-16) */ +#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) +#define INAT_GRP_BITS 5 +#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) +#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) +/* Immediates */ +#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) +#define INAT_IMM_BITS 3 +#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) +/* Flags */ +#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) +/* Attribute making macros for attribute tables */ +#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) +#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) +#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) +#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) + +/* Attribute search APIs */ +extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern int inat_get_last_prefix_id(insn_byte_t last_pfx); +extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, + int lpfx_id, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, + int lpfx_id, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, + insn_byte_t vex_m, + insn_byte_t vex_pp); + +/* Attribute checking functions */ +static inline int inat_is_legacy_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr && attr <= INAT_LGCPFX_MAX; +} + +static inline int inat_is_address_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; +} + +static inline int inat_is_operand_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; +} + +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_REX; +} + +static inline int inat_last_prefix_id(insn_attr_t attr) +{ + if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) + return 0; + else + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_vex_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; +} + +static inline int inat_is_vex3_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; +} + +static inline int inat_is_escape(insn_attr_t attr) +{ + return attr & INAT_ESC_MASK; +} + +static inline int inat_escape_id(insn_attr_t attr) +{ + return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; +} + +static inline int inat_is_group(insn_attr_t attr) +{ + return attr & INAT_GRP_MASK; +} + +static inline int inat_group_id(insn_attr_t attr) +{ + return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; +} + +static inline int inat_group_common_attribute(insn_attr_t attr) +{ + return attr & ~INAT_GRP_MASK; +} + +static inline int inat_has_immediate(insn_attr_t attr) +{ + return attr & INAT_IMM_MASK; +} + +static inline int inat_immediate_size(insn_attr_t attr) +{ + return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; +} + +static inline int inat_has_modrm(insn_attr_t attr) +{ + return attr & INAT_MODRM; +} + +static inline int inat_is_force64(insn_attr_t attr) +{ + return attr & INAT_FORCE64; +} + +static inline int inat_has_second_immediate(insn_attr_t attr) +{ + return attr & INAT_SCNDIMM; +} + +static inline int inat_has_moffset(insn_attr_t attr) +{ + return attr & INAT_MOFFSET; +} + +static inline int inat_has_variant(insn_attr_t attr) +{ + return attr & INAT_VARIANT; +} + +static inline int inat_accept_vex(insn_attr_t attr) +{ + return attr & INAT_VEXOK; +} + +static inline int inat_must_vex(insn_attr_t attr) +{ + return attr & INAT_VEXONLY; +} +#endif diff --git a/tools/perf/util/intel-pt-decoder/inat_types.h b/tools/perf/util/intel-pt-decoder/inat_types.h new file mode 100644 index 000000000000..cb3c20ce39cf --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat_types.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_INAT_TYPES_H +#define _ASM_X86_INAT_TYPES_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +/* Instruction attributes */ +typedef unsigned int insn_attr_t; +typedef unsigned char insn_byte_t; +typedef signed int insn_value_t; + +#endif diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c new file mode 100644 index 000000000000..47314a64399c --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/insn.c @@ -0,0 +1,594 @@ +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004, 2009 + */ + +#ifdef __KERNEL__ +#include <linux/string.h> +#else +#include <string.h> +#endif +#include "inat.h" +#include "insn.h" + +/* Verify next sizeof(t) bytes can be on the same instruction */ +#define validate_next(t, insn, n) \ + ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) + +#define __get_next(t, insn) \ + ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + +#define __peek_nbyte_next(t, insn, n) \ + ({ t r = *(t*)((insn)->next_byte + n); r; }) + +#define get_next(t, insn) \ + ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) + +#define peek_nbyte_next(t, insn, n) \ + ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) + +#define peek_next(t, insn) peek_nbyte_next(t, insn, 0) + +/** + * insn_init() - initialize struct insn + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @x86_64: !0 for 64-bit kernel or 64-bit app + */ +void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) +{ + /* + * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid + * even if the input buffer is long enough to hold them. + */ + if (buf_len > MAX_INSN_SIZE) + buf_len = MAX_INSN_SIZE; + + memset(insn, 0, sizeof(*insn)); + insn->kaddr = kaddr; + insn->end_kaddr = kaddr + buf_len; + insn->next_byte = kaddr; + insn->x86_64 = x86_64 ? 1 : 0; + insn->opnd_bytes = 4; + if (x86_64) + insn->addr_bytes = 8; + else + insn->addr_bytes = 4; +} + +/** + * insn_get_prefixes - scan x86 instruction prefix bytes + * @insn: &struct insn containing instruction + * + * Populates the @insn->prefixes bitmap, and updates @insn->next_byte + * to point to the (first) opcode. No effect if @insn->prefixes.got + * is already set. + */ +void insn_get_prefixes(struct insn *insn) +{ + struct insn_field *prefixes = &insn->prefixes; + insn_attr_t attr; + insn_byte_t b, lb; + int i, nb; + + if (prefixes->got) + return; + + nb = 0; + lb = 0; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + while (inat_is_legacy_prefix(attr)) { + /* Skip if same prefix */ + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == b) + goto found; + if (nb == 4) + /* Invalid instruction */ + break; + prefixes->bytes[nb++] = b; + if (inat_is_address_size_prefix(attr)) { + /* address size switches 2/4 or 4/8 */ + if (insn->x86_64) + insn->addr_bytes ^= 12; + else + insn->addr_bytes ^= 6; + } else if (inat_is_operand_size_prefix(attr)) { + /* oprand size switches 2/4 */ + insn->opnd_bytes ^= 6; + } +found: + prefixes->nbytes++; + insn->next_byte++; + lb = b; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + } + /* Set the last prefix */ + if (lb && lb != insn->prefixes.bytes[3]) { + if (unlikely(insn->prefixes.bytes[3])) { + /* Swap the last prefix */ + b = insn->prefixes.bytes[3]; + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == lb) + prefixes->bytes[i] = b; + } + insn->prefixes.bytes[3] = lb; + } + + /* Decode REX prefix */ + if (insn->x86_64) { + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_rex_prefix(attr)) { + insn->rex_prefix.value = b; + insn->rex_prefix.nbytes = 1; + insn->next_byte++; + if (X86_REX_W(b)) + /* REX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } + } + insn->rex_prefix.got = 1; + + /* Decode VEX prefix */ + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_vex_prefix(attr)) { + insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); + if (!insn->x86_64) { + /* + * In 32-bits mode, if the [7:6] bits (mod bits of + * ModRM) on the second byte are not 11b, it is + * LDS or LES. + */ + if (X86_MODRM_MOD(b2) != 3) + goto vex_end; + } + insn->vex_prefix.bytes[0] = b; + insn->vex_prefix.bytes[1] = b2; + if (inat_is_vex3_prefix(attr)) { + b2 = peek_nbyte_next(insn_byte_t, insn, 2); + insn->vex_prefix.bytes[2] = b2; + insn->vex_prefix.nbytes = 3; + insn->next_byte += 3; + if (insn->x86_64 && X86_VEX_W(b2)) + /* VEX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } else { + /* + * For VEX2, fake VEX3-like byte#2. + * Makes it easier to decode vex.W, vex.vvvv, + * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. + */ + insn->vex_prefix.bytes[2] = b2 & 0x7f; + insn->vex_prefix.nbytes = 2; + insn->next_byte += 2; + } + } +vex_end: + insn->vex_prefix.got = 1; + + prefixes->got = 1; + +err_out: + return; +} + +/** + * insn_get_opcode - collect opcode(s) + * @insn: &struct insn containing instruction + * + * Populates @insn->opcode, updates @insn->next_byte to point past the + * opcode byte(s), and set @insn->attr (except for groups). + * If necessary, first collects any preceding (prefix) bytes. + * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got + * is already 1. + */ +void insn_get_opcode(struct insn *insn) +{ + struct insn_field *opcode = &insn->opcode; + insn_byte_t op; + int pfx_id; + if (opcode->got) + return; + if (!insn->prefixes.got) + insn_get_prefixes(insn); + + /* Get first opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[0] = op; + opcode->nbytes = 1; + + /* Check if there is VEX prefix or not */ + if (insn_is_avx(insn)) { + insn_byte_t m, p; + m = insn_vex_m_bits(insn); + p = insn_vex_p_bits(insn); + insn->attr = inat_get_avx_attribute(op, m, p); + if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) + insn->attr = 0; /* This instruction is bad */ + goto end; /* VEX has only 1 byte for opcode */ + } + + insn->attr = inat_get_opcode_attribute(op); + while (inat_is_escape(insn->attr)) { + /* Get escaped opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[opcode->nbytes++] = op; + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); + } + if (inat_must_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ +end: + opcode->got = 1; + +err_out: + return; +} + +/** + * insn_get_modrm - collect ModRM byte, if any + * @insn: &struct insn containing instruction + * + * Populates @insn->modrm and updates @insn->next_byte to point past the + * ModRM byte, if any. If necessary, first collects the preceding bytes + * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + */ +void insn_get_modrm(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + insn_byte_t pfx_id, mod; + if (modrm->got) + return; + if (!insn->opcode.got) + insn_get_opcode(insn); + + if (inat_has_modrm(insn->attr)) { + mod = get_next(insn_byte_t, insn); + modrm->value = mod; + modrm->nbytes = 1; + if (inat_is_group(insn->attr)) { + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_group_attribute(mod, pfx_id, + insn->attr); + if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) + insn->attr = 0; /* This is bad */ + } + } + + if (insn->x86_64 && inat_is_force64(insn->attr)) + insn->opnd_bytes = 8; + modrm->got = 1; + +err_out: + return; +} + + +/** + * insn_rip_relative() - Does instruction use RIP-relative addressing mode? + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. No effect if @insn->x86_64 is 0. + */ +int insn_rip_relative(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + + if (!insn->x86_64) + return 0; + if (!modrm->got) + insn_get_modrm(insn); + /* + * For rip-relative instructions, the mod field (top 2 bits) + * is zero and the r/m field (bottom 3 bits) is 0x5. + */ + return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); +} + +/** + * insn_get_sib() - Get the SIB byte of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. + */ +void insn_get_sib(struct insn *insn) +{ + insn_byte_t modrm; + + if (insn->sib.got) + return; + if (!insn->modrm.got) + insn_get_modrm(insn); + if (insn->modrm.nbytes) { + modrm = (insn_byte_t)insn->modrm.value; + if (insn->addr_bytes != 2 && + X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { + insn->sib.value = get_next(insn_byte_t, insn); + insn->sib.nbytes = 1; + } + } + insn->sib.got = 1; + +err_out: + return; +} + + +/** + * insn_get_displacement() - Get the displacement of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * SIB byte. + * Displacement value is sign-expanded. + */ +void insn_get_displacement(struct insn *insn) +{ + insn_byte_t mod, rm, base; + + if (insn->displacement.got) + return; + if (!insn->sib.got) + insn_get_sib(insn); + if (insn->modrm.nbytes) { + /* + * Interpreting the modrm byte: + * mod = 00 - no displacement fields (exceptions below) + * mod = 01 - 1-byte displacement field + * mod = 10 - displacement field is 4 bytes, or 2 bytes if + * address size = 2 (0x67 prefix in 32-bit mode) + * mod = 11 - no memory operand + * + * If address size = 2... + * mod = 00, r/m = 110 - displacement field is 2 bytes + * + * If address size != 2... + * mod != 11, r/m = 100 - SIB byte exists + * mod = 00, SIB base = 101 - displacement field is 4 bytes + * mod = 00, r/m = 101 - rip-relative addressing, displacement + * field is 4 bytes + */ + mod = X86_MODRM_MOD(insn->modrm.value); + rm = X86_MODRM_RM(insn->modrm.value); + base = X86_SIB_BASE(insn->sib.value); + if (mod == 3) + goto out; + if (mod == 1) { + insn->displacement.value = get_next(char, insn); + insn->displacement.nbytes = 1; + } else if (insn->addr_bytes == 2) { + if ((mod == 0 && rm == 6) || mod == 2) { + insn->displacement.value = + get_next(short, insn); + insn->displacement.nbytes = 2; + } + } else { + if ((mod == 0 && rm == 5) || mod == 2 || + (mod == 0 && base == 5)) { + insn->displacement.value = get_next(int, insn); + insn->displacement.nbytes = 4; + } + } + } +out: + insn->displacement.got = 1; + +err_out: + return; +} + +/* Decode moffset16/32/64. Return 0 if failed */ +static int __get_moffset(struct insn *insn) +{ + switch (insn->addr_bytes) { + case 2: + insn->moffset1.value = get_next(short, insn); + insn->moffset1.nbytes = 2; + break; + case 4: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + break; + case 8: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + insn->moffset2.value = get_next(int, insn); + insn->moffset2.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->moffset1.got = insn->moffset2.got = 1; + + return 1; + +err_out: + return 0; +} + +/* Decode imm v32(Iz). Return 0 if failed */ +static int __get_immv32(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case 4: + case 8: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + + return 1; + +err_out: + return 0; +} + +/* Decode imm v64(Iv/Ov), Return 0 if failed */ +static int __get_immv(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->immediate1.got = insn->immediate2.got = 1; + + return 1; +err_out: + return 0; +} + +/* Decode ptr16:16/32(Ap) */ +static int __get_immptr(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + /* ptr16:64 is not exist (no segment) */ + return 0; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->immediate2.value = get_next(unsigned short, insn); + insn->immediate2.nbytes = 2; + insn->immediate1.got = insn->immediate2.got = 1; + + return 1; +err_out: + return 0; +} + +/** + * insn_get_immediate() - Get the immediates of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * displacement bytes. + * Basically, most of immediates are sign-expanded. Unsigned-value can be + * get by bit masking with ((1 << (nbytes * 8)) - 1) + */ +void insn_get_immediate(struct insn *insn) +{ + if (insn->immediate.got) + return; + if (!insn->displacement.got) + insn_get_displacement(insn); + + if (inat_has_moffset(insn->attr)) { + if (!__get_moffset(insn)) + goto err_out; + goto done; + } + + if (!inat_has_immediate(insn->attr)) + /* no immediates */ + goto done; + + switch (inat_immediate_size(insn->attr)) { + case INAT_IMM_BYTE: + insn->immediate.value = get_next(char, insn); + insn->immediate.nbytes = 1; + break; + case INAT_IMM_WORD: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case INAT_IMM_DWORD: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + case INAT_IMM_QWORD: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + case INAT_IMM_PTR: + if (!__get_immptr(insn)) + goto err_out; + break; + case INAT_IMM_VWORD32: + if (!__get_immv32(insn)) + goto err_out; + break; + case INAT_IMM_VWORD: + if (!__get_immv(insn)) + goto err_out; + break; + default: + /* Here, insn must have an immediate, but failed */ + goto err_out; + } + if (inat_has_second_immediate(insn->attr)) { + insn->immediate2.value = get_next(char, insn); + insn->immediate2.nbytes = 1; + } +done: + insn->immediate.got = 1; + +err_out: + return; +} + +/** + * insn_get_length() - Get the length of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * immediates bytes. + */ +void insn_get_length(struct insn *insn) +{ + if (insn->length) + return; + if (!insn->immediate.got) + insn_get_immediate(insn); + insn->length = (unsigned char)((unsigned long)insn->next_byte + - (unsigned long)insn->kaddr); +} diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h new file mode 100644 index 000000000000..dd12da0f4593 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/insn.h @@ -0,0 +1,201 @@ +#ifndef _ASM_X86_INSN_H +#define _ASM_X86_INSN_H +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +/* insn_attr_t is defined in inat.h */ +#include "inat.h" + +struct insn_field { + union { + insn_value_t value; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +struct insn { + struct insn_field prefixes; /* + * Prefixes + * prefixes.bytes[3]: last prefix + */ + struct insn_field rex_prefix; /* REX prefix */ + struct insn_field vex_prefix; /* VEX prefix */ + struct insn_field opcode; /* + * opcode.bytes[0]: opcode1 + * opcode.bytes[1]: opcode2 + * opcode.bytes[2]: opcode3 + */ + struct insn_field modrm; + struct insn_field sib; + struct insn_field displacement; + union { + struct insn_field immediate; + struct insn_field moffset1; /* for 64bit MOV */ + struct insn_field immediate1; /* for 64bit imm or off16/32 */ + }; + union { + struct insn_field moffset2; /* for 64bit MOV */ + struct insn_field immediate2; /* for 64bit imm or seg16 */ + }; + + insn_attr_t attr; + unsigned char opnd_bytes; + unsigned char addr_bytes; + unsigned char length; + unsigned char x86_64; + + const insn_byte_t *kaddr; /* kernel address of insn to analyze */ + const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ + const insn_byte_t *next_byte; +}; + +#define MAX_INSN_SIZE 15 + +#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) +#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +#define X86_MODRM_RM(modrm) ((modrm) & 0x07) + +#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) +#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) +#define X86_SIB_BASE(sib) ((sib) & 0x07) + +#define X86_REX_W(rex) ((rex) & 8) +#define X86_REX_R(rex) ((rex) & 4) +#define X86_REX_X(rex) ((rex) & 2) +#define X86_REX_B(rex) ((rex) & 1) + +/* VEX bit flags */ +#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ +#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ +#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ +#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ +#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ +/* VEX bit fields */ +#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ +#define X86_VEX2_M 1 /* VEX2.M always 1 */ +#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ + +extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); +extern void insn_get_prefixes(struct insn *insn); +extern void insn_get_opcode(struct insn *insn); +extern void insn_get_modrm(struct insn *insn); +extern void insn_get_sib(struct insn *insn); +extern void insn_get_displacement(struct insn *insn); +extern void insn_get_immediate(struct insn *insn); +extern void insn_get_length(struct insn *insn); + +/* Attribute will be determined after getting ModRM (for opcode groups) */ +static inline void insn_get_attribute(struct insn *insn) +{ + insn_get_modrm(insn); +} + +/* Instruction uses RIP-relative addressing */ +extern int insn_rip_relative(struct insn *insn); + +/* Init insn for kernel text */ +static inline void kernel_insn_init(struct insn *insn, + const void *kaddr, int buf_len) +{ +#ifdef CONFIG_X86_64 + insn_init(insn, kaddr, buf_len, 1); +#else /* CONFIG_X86_32 */ + insn_init(insn, kaddr, buf_len, 0); +#endif +} + +static inline int insn_is_avx(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.value != 0); +} + +/* Ensure this instruction is decoded completely */ +static inline int insn_complete(struct insn *insn) +{ + return insn->opcode.got && insn->modrm.got && insn->sib.got && + insn->displacement.got && insn->immediate.got; +} + +static inline insn_byte_t insn_vex_m_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX2_M; + else + return X86_VEX3_M(insn->vex_prefix.bytes[1]); +} + +static inline insn_byte_t insn_vex_p_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX_P(insn->vex_prefix.bytes[1]); + else + return X86_VEX_P(insn->vex_prefix.bytes[2]); +} + +/* Get the last prefix id from last prefix or VEX prefix */ +static inline int insn_last_prefix_id(struct insn *insn) +{ + if (insn_is_avx(insn)) + return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ + + if (insn->prefixes.bytes[3]) + return inat_get_last_prefix_id(insn->prefixes.bytes[3]); + + return 0; +} + +/* Offset of each field from kaddr */ +static inline int insn_offset_rex_prefix(struct insn *insn) +{ + return insn->prefixes.nbytes; +} +static inline int insn_offset_vex_prefix(struct insn *insn) +{ + return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; +} +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; +} +static inline int insn_offset_modrm(struct insn *insn) +{ + return insn_offset_opcode(insn) + insn->opcode.nbytes; +} +static inline int insn_offset_sib(struct insn *insn) +{ + return insn_offset_modrm(insn) + insn->modrm.nbytes; +} +static inline int insn_offset_displacement(struct insn *insn) +{ + return insn_offset_sib(insn) + insn->sib.nbytes; +} +static inline int insn_offset_immediate(struct insn *insn) +{ + return insn_offset_displacement(insn) + insn->displacement.nbytes; +} + +#endif /* _ASM_X86_INSN_H */ diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c new file mode 100644 index 000000000000..22ba50224319 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -0,0 +1,2345 @@ +/* + * intel_pt_decoder.c: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <stdint.h> +#include <inttypes.h> + +#include "../cache.h" +#include "../util.h" + +#include "intel-pt-insn-decoder.h" +#include "intel-pt-pkt-decoder.h" +#include "intel-pt-decoder.h" +#include "intel-pt-log.h" + +#define INTEL_PT_BLK_SIZE 1024 + +#define BIT63 (((uint64_t)1 << 63)) + +#define INTEL_PT_RETURN 1 + +/* Maximum number of loops with no packets consumed i.e. stuck in a loop */ +#define INTEL_PT_MAX_LOOPS 10000 + +struct intel_pt_blk { + struct intel_pt_blk *prev; + uint64_t ip[INTEL_PT_BLK_SIZE]; +}; + +struct intel_pt_stack { + struct intel_pt_blk *blk; + struct intel_pt_blk *spare; + int pos; +}; + +enum intel_pt_pkt_state { + INTEL_PT_STATE_NO_PSB, + INTEL_PT_STATE_NO_IP, + INTEL_PT_STATE_ERR_RESYNC, + INTEL_PT_STATE_IN_SYNC, + INTEL_PT_STATE_TNT, + INTEL_PT_STATE_TIP, + INTEL_PT_STATE_TIP_PGD, + INTEL_PT_STATE_FUP, + INTEL_PT_STATE_FUP_NO_TIP, +}; + +#ifdef INTEL_PT_STRICT +#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB +#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB +#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_NO_PSB +#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_NO_PSB +#else +#define INTEL_PT_STATE_ERR1 (decoder->pkt_state) +#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_IP +#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_ERR_RESYNC +#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_IN_SYNC +#endif + +struct intel_pt_decoder { + int (*get_trace)(struct intel_pt_buffer *buffer, void *data); + int (*walk_insn)(struct intel_pt_insn *intel_pt_insn, + uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, + uint64_t max_insn_cnt, void *data); + void *data; + struct intel_pt_state state; + const unsigned char *buf; + size_t len; + bool return_compression; + bool mtc_insn; + bool pge; + bool have_tma; + bool have_cyc; + uint64_t pos; + uint64_t last_ip; + uint64_t ip; + uint64_t cr3; + uint64_t timestamp; + uint64_t tsc_timestamp; + uint64_t ref_timestamp; + uint64_t ret_addr; + uint64_t ctc_timestamp; + uint64_t ctc_delta; + uint64_t cycle_cnt; + uint64_t cyc_ref_timestamp; + uint32_t last_mtc; + uint32_t tsc_ctc_ratio_n; + uint32_t tsc_ctc_ratio_d; + uint32_t tsc_ctc_mult; + uint32_t tsc_slip; + uint32_t ctc_rem_mask; + int mtc_shift; + struct intel_pt_stack stack; + enum intel_pt_pkt_state pkt_state; + struct intel_pt_pkt packet; + struct intel_pt_pkt tnt; + int pkt_step; + int pkt_len; + int last_packet_type; + unsigned int cbr; + unsigned int max_non_turbo_ratio; + double max_non_turbo_ratio_fp; + double cbr_cyc_to_tsc; + double calc_cyc_to_tsc; + bool have_calc_cyc_to_tsc; + int exec_mode; + unsigned int insn_bytes; + uint64_t sign_bit; + uint64_t sign_bits; + uint64_t period; + enum intel_pt_period_type period_type; + uint64_t tot_insn_cnt; + uint64_t period_insn_cnt; + uint64_t period_mask; + uint64_t period_ticks; + uint64_t last_masked_timestamp; + bool continuous_period; + bool overflow; + bool set_fup_tx_flags; + unsigned int fup_tx_flags; + unsigned int tx_flags; + uint64_t timestamp_insn_cnt; + uint64_t stuck_ip; + int no_progress; + int stuck_ip_prd; + int stuck_ip_cnt; + const unsigned char *next_buf; + size_t next_len; + unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ]; +}; + +static uint64_t intel_pt_lower_power_of_2(uint64_t x) +{ + int i; + + for (i = 0; x != 1; i++) + x >>= 1; + + return x << i; +} + +static void intel_pt_setup_period(struct intel_pt_decoder *decoder) +{ + if (decoder->period_type == INTEL_PT_PERIOD_TICKS) { + uint64_t period; + + period = intel_pt_lower_power_of_2(decoder->period); + decoder->period_mask = ~(period - 1); + decoder->period_ticks = period; + } +} + +static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d) +{ + if (!d) + return 0; + return (t / d) * n + ((t % d) * n) / d; +} + +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) +{ + struct intel_pt_decoder *decoder; + + if (!params->get_trace || !params->walk_insn) + return NULL; + + decoder = zalloc(sizeof(struct intel_pt_decoder)); + if (!decoder) + return NULL; + + decoder->get_trace = params->get_trace; + decoder->walk_insn = params->walk_insn; + decoder->data = params->data; + decoder->return_compression = params->return_compression; + + decoder->sign_bit = (uint64_t)1 << 47; + decoder->sign_bits = ~(((uint64_t)1 << 48) - 1); + + decoder->period = params->period; + decoder->period_type = params->period_type; + + decoder->max_non_turbo_ratio = params->max_non_turbo_ratio; + decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio; + + intel_pt_setup_period(decoder); + + decoder->mtc_shift = params->mtc_period; + decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1; + + decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n; + decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d; + + if (!decoder->tsc_ctc_ratio_n) + decoder->tsc_ctc_ratio_d = 0; + + if (decoder->tsc_ctc_ratio_d) { + if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d)) + decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n / + decoder->tsc_ctc_ratio_d; + + /* + * Allow for timestamps appearing to backwards because a TSC + * packet has slipped past a MTC packet, so allow 2 MTC ticks + * or ... + */ + decoder->tsc_slip = multdiv(2 << decoder->mtc_shift, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + /* ... or 0x100 paranoia */ + if (decoder->tsc_slip < 0x100) + decoder->tsc_slip = 0x100; + + intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift); + intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n); + intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d); + intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult); + intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip); + + return decoder; +} + +static void intel_pt_pop_blk(struct intel_pt_stack *stack) +{ + struct intel_pt_blk *blk = stack->blk; + + stack->blk = blk->prev; + if (!stack->spare) + stack->spare = blk; + else + free(blk); +} + +static uint64_t intel_pt_pop(struct intel_pt_stack *stack) +{ + if (!stack->pos) { + if (!stack->blk) + return 0; + intel_pt_pop_blk(stack); + if (!stack->blk) + return 0; + stack->pos = INTEL_PT_BLK_SIZE; + } + return stack->blk->ip[--stack->pos]; +} + +static int intel_pt_alloc_blk(struct intel_pt_stack *stack) +{ + struct intel_pt_blk *blk; + + if (stack->spare) { + blk = stack->spare; + stack->spare = NULL; + } else { + blk = malloc(sizeof(struct intel_pt_blk)); + if (!blk) + return -ENOMEM; + } + + blk->prev = stack->blk; + stack->blk = blk; + stack->pos = 0; + return 0; +} + +static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip) +{ + int err; + + if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) { + err = intel_pt_alloc_blk(stack); + if (err) + return err; + } + + stack->blk->ip[stack->pos++] = ip; + return 0; +} + +static void intel_pt_clear_stack(struct intel_pt_stack *stack) +{ + while (stack->blk) + intel_pt_pop_blk(stack); + stack->pos = 0; +} + +static void intel_pt_free_stack(struct intel_pt_stack *stack) +{ + intel_pt_clear_stack(stack); + zfree(&stack->blk); + zfree(&stack->spare); +} + +void intel_pt_decoder_free(struct intel_pt_decoder *decoder) +{ + intel_pt_free_stack(&decoder->stack); + free(decoder); +} + +static int intel_pt_ext_err(int code) +{ + switch (code) { + case -ENOMEM: + return INTEL_PT_ERR_NOMEM; + case -ENOSYS: + return INTEL_PT_ERR_INTERN; + case -EBADMSG: + return INTEL_PT_ERR_BADPKT; + case -ENODATA: + return INTEL_PT_ERR_NODATA; + case -EILSEQ: + return INTEL_PT_ERR_NOINSN; + case -ENOENT: + return INTEL_PT_ERR_MISMAT; + case -EOVERFLOW: + return INTEL_PT_ERR_OVR; + case -ENOSPC: + return INTEL_PT_ERR_LOST; + case -ELOOP: + return INTEL_PT_ERR_NELOOP; + default: + return INTEL_PT_ERR_UNK; + } +} + +static const char *intel_pt_err_msgs[] = { + [INTEL_PT_ERR_NOMEM] = "Memory allocation failed", + [INTEL_PT_ERR_INTERN] = "Internal error", + [INTEL_PT_ERR_BADPKT] = "Bad packet", + [INTEL_PT_ERR_NODATA] = "No more data", + [INTEL_PT_ERR_NOINSN] = "Failed to get instruction", + [INTEL_PT_ERR_MISMAT] = "Trace doesn't match instruction", + [INTEL_PT_ERR_OVR] = "Overflow packet", + [INTEL_PT_ERR_LOST] = "Lost trace data", + [INTEL_PT_ERR_UNK] = "Unknown error!", + [INTEL_PT_ERR_NELOOP] = "Never-ending loop", +}; + +int intel_pt__strerror(int code, char *buf, size_t buflen) +{ + if (code < 1 || code > INTEL_PT_ERR_MAX) + code = INTEL_PT_ERR_UNK; + strlcpy(buf, intel_pt_err_msgs[code], buflen); + return 0; +} + +static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder, + const struct intel_pt_pkt *packet, + uint64_t last_ip) +{ + uint64_t ip; + + switch (packet->count) { + case 2: + ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) | + packet->payload; + break; + case 4: + ip = (last_ip & (uint64_t)0xffffffff00000000ULL) | + packet->payload; + break; + case 6: + ip = packet->payload; + break; + default: + return 0; + } + + if (ip & decoder->sign_bit) + return ip | decoder->sign_bits; + + return ip; +} + +static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder) +{ + decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet, + decoder->last_ip); +} + +static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder) +{ + intel_pt_set_last_ip(decoder); + decoder->ip = decoder->last_ip; +} + +static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder) +{ + intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos, + decoder->buf); +} + +static int intel_pt_bug(struct intel_pt_decoder *decoder) +{ + intel_pt_log("ERROR: Internal error\n"); + decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + return -ENOSYS; +} + +static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder) +{ + decoder->tx_flags = 0; +} + +static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder) +{ + decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX; +} + +static int intel_pt_bad_packet(struct intel_pt_decoder *decoder) +{ + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; + decoder->pkt_len = 1; + decoder->pkt_step = 1; + intel_pt_decoder_log_packet(decoder); + if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) { + intel_pt_log("ERROR: Bad packet\n"); + decoder->pkt_state = INTEL_PT_STATE_ERR1; + } + return -EBADMSG; +} + +static int intel_pt_get_data(struct intel_pt_decoder *decoder) +{ + struct intel_pt_buffer buffer = { .buf = 0, }; + int ret; + + decoder->pkt_step = 0; + + intel_pt_log("Getting more data\n"); + ret = decoder->get_trace(&buffer, decoder->data); + if (ret) + return ret; + decoder->buf = buffer.buf; + decoder->len = buffer.len; + if (!decoder->len) { + intel_pt_log("No more data\n"); + return -ENODATA; + } + if (!buffer.consecutive) { + decoder->ip = 0; + decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + decoder->ref_timestamp = buffer.ref_timestamp; + decoder->timestamp = 0; + decoder->have_tma = false; + decoder->state.trace_nr = buffer.trace_nr; + intel_pt_log("Reference timestamp 0x%" PRIx64 "\n", + decoder->ref_timestamp); + return -ENOLINK; + } + + return 0; +} + +static int intel_pt_get_next_data(struct intel_pt_decoder *decoder) +{ + if (!decoder->next_buf) + return intel_pt_get_data(decoder); + + decoder->buf = decoder->next_buf; + decoder->len = decoder->next_len; + decoder->next_buf = 0; + decoder->next_len = 0; + return 0; +} + +static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder) +{ + unsigned char *buf = decoder->temp_buf; + size_t old_len, len, n; + int ret; + + old_len = decoder->len; + len = decoder->len; + memcpy(buf, decoder->buf, len); + + ret = intel_pt_get_data(decoder); + if (ret) { + decoder->pos += old_len; + return ret < 0 ? ret : -EINVAL; + } + + n = INTEL_PT_PKT_MAX_SZ - len; + if (n > decoder->len) + n = decoder->len; + memcpy(buf + len, decoder->buf, n); + len += n; + + ret = intel_pt_get_packet(buf, len, &decoder->packet); + if (ret < (int)old_len) { + decoder->next_buf = decoder->buf; + decoder->next_len = decoder->len; + decoder->buf = buf; + decoder->len = old_len; + return intel_pt_bad_packet(decoder); + } + + decoder->next_buf = decoder->buf + (ret - old_len); + decoder->next_len = decoder->len - (ret - old_len); + + decoder->buf = buf; + decoder->len = ret; + + return ret; +} + +struct intel_pt_pkt_info { + struct intel_pt_decoder *decoder; + struct intel_pt_pkt packet; + uint64_t pos; + int pkt_len; + int last_packet_type; + void *data; +}; + +typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info); + +/* Lookahead packets in current buffer */ +static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder, + intel_pt_pkt_cb_t cb, void *data) +{ + struct intel_pt_pkt_info pkt_info; + const unsigned char *buf = decoder->buf; + size_t len = decoder->len; + int ret; + + pkt_info.decoder = decoder; + pkt_info.pos = decoder->pos; + pkt_info.pkt_len = decoder->pkt_step; + pkt_info.last_packet_type = decoder->last_packet_type; + pkt_info.data = data; + + while (1) { + do { + pkt_info.pos += pkt_info.pkt_len; + buf += pkt_info.pkt_len; + len -= pkt_info.pkt_len; + + if (!len) + return INTEL_PT_NEED_MORE_BYTES; + + ret = intel_pt_get_packet(buf, len, &pkt_info.packet); + if (!ret) + return INTEL_PT_NEED_MORE_BYTES; + if (ret < 0) + return ret; + + pkt_info.pkt_len = ret; + } while (pkt_info.packet.type == INTEL_PT_PAD); + + ret = cb(&pkt_info); + if (ret) + return 0; + + pkt_info.last_packet_type = pkt_info.packet.type; + } +} + +struct intel_pt_calc_cyc_to_tsc_info { + uint64_t cycle_cnt; + unsigned int cbr; + uint32_t last_mtc; + uint64_t ctc_timestamp; + uint64_t ctc_delta; + uint64_t tsc_timestamp; + uint64_t timestamp; + bool have_tma; + bool from_mtc; + double cbr_cyc_to_tsc; +}; + +static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) +{ + struct intel_pt_decoder *decoder = pkt_info->decoder; + struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data; + uint64_t timestamp; + double cyc_to_tsc; + unsigned int cbr; + uint32_t mtc, mtc_delta, ctc, fc, ctc_rem; + + switch (pkt_info->packet.type) { + case INTEL_PT_TNT: + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + case INTEL_PT_FUP: + case INTEL_PT_PSB: + case INTEL_PT_PIP: + case INTEL_PT_MODE_EXEC: + case INTEL_PT_MODE_TSX: + case INTEL_PT_PSBEND: + case INTEL_PT_PAD: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + return 0; + + case INTEL_PT_MTC: + if (!data->have_tma) + return 0; + + mtc = pkt_info->packet.payload; + if (mtc > data->last_mtc) + mtc_delta = mtc - data->last_mtc; + else + mtc_delta = mtc + 256 - data->last_mtc; + data->ctc_delta += mtc_delta << decoder->mtc_shift; + data->last_mtc = mtc; + + if (decoder->tsc_ctc_mult) { + timestamp = data->ctc_timestamp + + data->ctc_delta * decoder->tsc_ctc_mult; + } else { + timestamp = data->ctc_timestamp + + multdiv(data->ctc_delta, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + + if (timestamp < data->timestamp) + return 1; + + if (pkt_info->last_packet_type != INTEL_PT_CYC) { + data->timestamp = timestamp; + return 0; + } + + break; + + case INTEL_PT_TSC: + timestamp = pkt_info->packet.payload | + (data->timestamp & (0xffULL << 56)); + if (data->from_mtc && timestamp < data->timestamp && + data->timestamp - timestamp < decoder->tsc_slip) + return 1; + while (timestamp < data->timestamp) + timestamp += (1ULL << 56); + if (pkt_info->last_packet_type != INTEL_PT_CYC) { + if (data->from_mtc) + return 1; + data->tsc_timestamp = timestamp; + data->timestamp = timestamp; + return 0; + } + break; + + case INTEL_PT_TMA: + if (data->from_mtc) + return 1; + + if (!decoder->tsc_ctc_ratio_d) + return 0; + + ctc = pkt_info->packet.payload; + fc = pkt_info->packet.count; + ctc_rem = ctc & decoder->ctc_rem_mask; + + data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; + + data->ctc_timestamp = data->tsc_timestamp - fc; + if (decoder->tsc_ctc_mult) { + data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult; + } else { + data->ctc_timestamp -= + multdiv(ctc_rem, decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + + data->ctc_delta = 0; + data->have_tma = true; + + return 0; + + case INTEL_PT_CYC: + data->cycle_cnt += pkt_info->packet.payload; + return 0; + + case INTEL_PT_CBR: + cbr = pkt_info->packet.payload; + if (data->cbr && data->cbr != cbr) + return 1; + data->cbr = cbr; + data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; + return 0; + + case INTEL_PT_TIP_PGD: + case INTEL_PT_TRACESTOP: + case INTEL_PT_OVF: + case INTEL_PT_BAD: /* Does not happen */ + default: + return 1; + } + + if (!data->cbr && decoder->cbr) { + data->cbr = decoder->cbr; + data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc; + } + + if (!data->cycle_cnt) + return 1; + + cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt; + + if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc && + cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) { + intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n", + cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos); + return 1; + } + + decoder->calc_cyc_to_tsc = cyc_to_tsc; + decoder->have_calc_cyc_to_tsc = true; + + if (data->cbr) { + intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n", + cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos); + } else { + intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n", + cyc_to_tsc, pkt_info->pos); + } + + return 1; +} + +static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder, + bool from_mtc) +{ + struct intel_pt_calc_cyc_to_tsc_info data = { + .cycle_cnt = 0, + .cbr = 0, + .last_mtc = decoder->last_mtc, + .ctc_timestamp = decoder->ctc_timestamp, + .ctc_delta = decoder->ctc_delta, + .tsc_timestamp = decoder->tsc_timestamp, + .timestamp = decoder->timestamp, + .have_tma = decoder->have_tma, + .from_mtc = from_mtc, + .cbr_cyc_to_tsc = 0, + }; + + intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data); +} + +static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) +{ + int ret; + + decoder->last_packet_type = decoder->packet.type; + + do { + decoder->pos += decoder->pkt_step; + decoder->buf += decoder->pkt_step; + decoder->len -= decoder->pkt_step; + + if (!decoder->len) { + ret = intel_pt_get_next_data(decoder); + if (ret) + return ret; + } + + ret = intel_pt_get_packet(decoder->buf, decoder->len, + &decoder->packet); + if (ret == INTEL_PT_NEED_MORE_BYTES && + decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { + ret = intel_pt_get_split_packet(decoder); + if (ret < 0) + return ret; + } + if (ret <= 0) + return intel_pt_bad_packet(decoder); + + decoder->pkt_len = ret; + decoder->pkt_step = ret; + intel_pt_decoder_log_packet(decoder); + } while (decoder->packet.type == INTEL_PT_PAD); + + return 0; +} + +static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp, masked_timestamp; + + timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; + masked_timestamp = timestamp & decoder->period_mask; + if (decoder->continuous_period) { + if (masked_timestamp != decoder->last_masked_timestamp) + return 1; + } else { + timestamp += 1; + masked_timestamp = timestamp & decoder->period_mask; + if (masked_timestamp != decoder->last_masked_timestamp) { + decoder->last_masked_timestamp = masked_timestamp; + decoder->continuous_period = true; + } + } + return decoder->period_ticks - (timestamp - masked_timestamp); +} + +static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder) +{ + switch (decoder->period_type) { + case INTEL_PT_PERIOD_INSTRUCTIONS: + return decoder->period - decoder->period_insn_cnt; + case INTEL_PT_PERIOD_TICKS: + return intel_pt_next_period(decoder); + case INTEL_PT_PERIOD_NONE: + case INTEL_PT_PERIOD_MTC: + default: + return 0; + } +} + +static void intel_pt_sample_insn(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp, masked_timestamp; + + switch (decoder->period_type) { + case INTEL_PT_PERIOD_INSTRUCTIONS: + decoder->period_insn_cnt = 0; + break; + case INTEL_PT_PERIOD_TICKS: + timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; + masked_timestamp = timestamp & decoder->period_mask; + decoder->last_masked_timestamp = masked_timestamp; + break; + case INTEL_PT_PERIOD_NONE: + case INTEL_PT_PERIOD_MTC: + default: + break; + } + + decoder->state.type |= INTEL_PT_INSTRUCTION; +} + +static int intel_pt_walk_insn(struct intel_pt_decoder *decoder, + struct intel_pt_insn *intel_pt_insn, uint64_t ip) +{ + uint64_t max_insn_cnt, insn_cnt = 0; + int err; + + if (!decoder->mtc_insn) + decoder->mtc_insn = true; + + max_insn_cnt = intel_pt_next_sample(decoder); + + err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip, + max_insn_cnt, decoder->data); + + decoder->tot_insn_cnt += insn_cnt; + decoder->timestamp_insn_cnt += insn_cnt; + decoder->period_insn_cnt += insn_cnt; + + if (err) { + decoder->no_progress = 0; + decoder->pkt_state = INTEL_PT_STATE_ERR2; + intel_pt_log_at("ERROR: Failed to get instruction", + decoder->ip); + if (err == -ENOENT) + return -ENOLINK; + return -EILSEQ; + } + + if (ip && decoder->ip == ip) { + err = -EAGAIN; + goto out; + } + + if (max_insn_cnt && insn_cnt >= max_insn_cnt) + intel_pt_sample_insn(decoder); + + if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) { + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->ip += intel_pt_insn->length; + err = INTEL_PT_RETURN; + goto out; + } + + if (intel_pt_insn->op == INTEL_PT_OP_CALL) { + /* Zero-length calls are excluded */ + if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL || + intel_pt_insn->rel) { + err = intel_pt_push(&decoder->stack, decoder->ip + + intel_pt_insn->length); + if (err) + goto out; + } + } else if (intel_pt_insn->op == INTEL_PT_OP_RET) { + decoder->ret_addr = intel_pt_pop(&decoder->stack); + } + + if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) { + int cnt = decoder->no_progress++; + + decoder->state.from_ip = decoder->ip; + decoder->ip += intel_pt_insn->length + + intel_pt_insn->rel; + decoder->state.to_ip = decoder->ip; + err = INTEL_PT_RETURN; + + /* + * Check for being stuck in a loop. This can happen if a + * decoder error results in the decoder erroneously setting the + * ip to an address that is itself in an infinite loop that + * consumes no packets. When that happens, there must be an + * unconditional branch. + */ + if (cnt) { + if (cnt == 1) { + decoder->stuck_ip = decoder->state.to_ip; + decoder->stuck_ip_prd = 1; + decoder->stuck_ip_cnt = 1; + } else if (cnt > INTEL_PT_MAX_LOOPS || + decoder->state.to_ip == decoder->stuck_ip) { + intel_pt_log_at("ERROR: Never-ending loop", + decoder->state.to_ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + err = -ELOOP; + goto out; + } else if (!--decoder->stuck_ip_cnt) { + decoder->stuck_ip_prd += 1; + decoder->stuck_ip_cnt = decoder->stuck_ip_prd; + decoder->stuck_ip = decoder->state.to_ip; + } + } + goto out_no_progress; + } +out: + decoder->no_progress = 0; +out_no_progress: + decoder->state.insn_op = intel_pt_insn->op; + decoder->state.insn_len = intel_pt_insn->length; + + if (decoder->tx_flags & INTEL_PT_IN_TX) + decoder->state.flags |= INTEL_PT_IN_TX; + + return err; +} + +static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) +{ + struct intel_pt_insn intel_pt_insn; + uint64_t ip; + int err; + + ip = decoder->last_ip; + + while (1) { + err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip); + if (err == INTEL_PT_RETURN) + return 0; + if (err == -EAGAIN) { + if (decoder->set_fup_tx_flags) { + decoder->set_fup_tx_flags = false; + decoder->tx_flags = decoder->fup_tx_flags; + decoder->state.type = INTEL_PT_TRANSACTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.flags = decoder->fup_tx_flags; + return 0; + } + return err; + } + decoder->set_fup_tx_flags = false; + if (err) + return err; + + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { + intel_pt_log_at("ERROR: Unexpected indirect branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + return -ENOENT; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { + intel_pt_log_at("ERROR: Unexpected conditional branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + return -ENOENT; + } + + intel_pt_bug(decoder); + } +} + +static int intel_pt_walk_tip(struct intel_pt_decoder *decoder) +{ + struct intel_pt_insn intel_pt_insn; + int err; + + err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0); + if (err == INTEL_PT_RETURN) + return 0; + if (err) + return err; + + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { + if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) { + decoder->pge = false; + decoder->continuous_period = false; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + if (decoder->packet.count != 0) + decoder->ip = decoder->last_ip; + } else { + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.from_ip = decoder->ip; + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { + decoder->state.to_ip = decoder->last_ip; + decoder->ip = decoder->last_ip; + } + } + return 0; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { + intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + return -ENOENT; + } + + return intel_pt_bug(decoder); +} + +static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) +{ + struct intel_pt_insn intel_pt_insn; + int err; + + while (1) { + err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0); + if (err == INTEL_PT_RETURN) + return 0; + if (err) + return err; + + if (intel_pt_insn.op == INTEL_PT_OP_RET) { + if (!decoder->return_compression) { + intel_pt_log_at("ERROR: RET when expecting conditional branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + } + if (!decoder->ret_addr) { + intel_pt_log_at("ERROR: Bad RET compression (stack empty)", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + } + if (!(decoder->tnt.payload & BIT63)) { + intel_pt_log_at("ERROR: Bad RET compression (TNT=N)", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + } + decoder->tnt.count -= 1; + if (!decoder->tnt.count) + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->tnt.payload <<= 1; + decoder->state.from_ip = decoder->ip; + decoder->ip = decoder->ret_addr; + decoder->state.to_ip = decoder->ip; + return 0; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { + /* Handle deferred TIPs */ + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type != INTEL_PT_TIP || + decoder->packet.count == 0) { + intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + decoder->pkt_step = 0; + return -ENOENT; + } + intel_pt_set_last_ip(decoder); + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = decoder->last_ip; + decoder->ip = decoder->last_ip; + return 0; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { + decoder->tnt.count -= 1; + if (!decoder->tnt.count) + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + if (decoder->tnt.payload & BIT63) { + decoder->tnt.payload <<= 1; + decoder->state.from_ip = decoder->ip; + decoder->ip += intel_pt_insn.length + + intel_pt_insn.rel; + decoder->state.to_ip = decoder->ip; + return 0; + } + /* Instruction sample for a non-taken branch */ + if (decoder->state.type & INTEL_PT_INSTRUCTION) { + decoder->tnt.payload <<= 1; + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->ip += intel_pt_insn.length; + return 0; + } + decoder->ip += intel_pt_insn.length; + if (!decoder->tnt.count) + return -EAGAIN; + decoder->tnt.payload <<= 1; + continue; + } + + return intel_pt_bug(decoder); + } +} + +static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip) +{ + unsigned int fup_tx_flags; + int err; + + fup_tx_flags = decoder->packet.payload & + (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX); + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type == INTEL_PT_FUP) { + decoder->fup_tx_flags = fup_tx_flags; + decoder->set_fup_tx_flags = true; + if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX)) + *no_tip = true; + } else { + intel_pt_log_at("ERROR: Missing FUP after MODE.TSX", + decoder->pos); + intel_pt_update_in_tx(decoder); + } + return 0; +} + +static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp; + + decoder->have_tma = false; + + if (decoder->ref_timestamp) { + timestamp = decoder->packet.payload | + (decoder->ref_timestamp & (0xffULL << 56)); + if (timestamp < decoder->ref_timestamp) { + if (decoder->ref_timestamp - timestamp > (1ULL << 55)) + timestamp += (1ULL << 56); + } else { + if (timestamp - decoder->ref_timestamp > (1ULL << 55)) + timestamp -= (1ULL << 56); + } + decoder->tsc_timestamp = timestamp; + decoder->timestamp = timestamp; + decoder->ref_timestamp = 0; + decoder->timestamp_insn_cnt = 0; + } else if (decoder->timestamp) { + timestamp = decoder->packet.payload | + (decoder->timestamp & (0xffULL << 56)); + decoder->tsc_timestamp = timestamp; + if (timestamp < decoder->timestamp && + decoder->timestamp - timestamp < decoder->tsc_slip) { + intel_pt_log_to("Suppressing backwards timestamp", + timestamp); + timestamp = decoder->timestamp; + } + while (timestamp < decoder->timestamp) { + intel_pt_log_to("Wraparound timestamp", timestamp); + timestamp += (1ULL << 56); + decoder->tsc_timestamp = timestamp; + } + decoder->timestamp = timestamp; + decoder->timestamp_insn_cnt = 0; + } + + if (decoder->last_packet_type == INTEL_PT_CYC) { + decoder->cyc_ref_timestamp = decoder->timestamp; + decoder->cycle_cnt = 0; + decoder->have_calc_cyc_to_tsc = false; + intel_pt_calc_cyc_to_tsc(decoder, false); + } + + intel_pt_log_to("Setting timestamp", decoder->timestamp); +} + +static int intel_pt_overflow(struct intel_pt_decoder *decoder) +{ + intel_pt_log("ERROR: Buffer overflow\n"); + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; + decoder->cbr = 0; + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + decoder->overflow = true; + return -EOVERFLOW; +} + +static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) +{ + uint32_t ctc = decoder->packet.payload; + uint32_t fc = decoder->packet.count; + uint32_t ctc_rem = ctc & decoder->ctc_rem_mask; + + if (!decoder->tsc_ctc_ratio_d) + return; + + decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; + decoder->ctc_timestamp = decoder->tsc_timestamp - fc; + if (decoder->tsc_ctc_mult) { + decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult; + } else { + decoder->ctc_timestamp -= multdiv(ctc_rem, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + decoder->ctc_delta = 0; + decoder->have_tma = true; + intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n", + decoder->ctc_timestamp, decoder->last_mtc, ctc_rem); +} + +static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp; + uint32_t mtc, mtc_delta; + + if (!decoder->have_tma) + return; + + mtc = decoder->packet.payload; + + if (mtc > decoder->last_mtc) + mtc_delta = mtc - decoder->last_mtc; + else + mtc_delta = mtc + 256 - decoder->last_mtc; + + decoder->ctc_delta += mtc_delta << decoder->mtc_shift; + + if (decoder->tsc_ctc_mult) { + timestamp = decoder->ctc_timestamp + + decoder->ctc_delta * decoder->tsc_ctc_mult; + } else { + timestamp = decoder->ctc_timestamp + + multdiv(decoder->ctc_delta, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + + if (timestamp < decoder->timestamp) + intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n", + timestamp, decoder->timestamp); + else + decoder->timestamp = timestamp; + + decoder->timestamp_insn_cnt = 0; + decoder->last_mtc = mtc; + + if (decoder->last_packet_type == INTEL_PT_CYC) { + decoder->cyc_ref_timestamp = decoder->timestamp; + decoder->cycle_cnt = 0; + decoder->have_calc_cyc_to_tsc = false; + intel_pt_calc_cyc_to_tsc(decoder, true); + } +} + +static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) +{ + unsigned int cbr = decoder->packet.payload; + + if (decoder->cbr == cbr) + return; + + decoder->cbr = cbr; + decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; +} + +static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp = decoder->cyc_ref_timestamp; + + decoder->have_cyc = true; + + decoder->cycle_cnt += decoder->packet.payload; + + if (!decoder->cyc_ref_timestamp) + return; + + if (decoder->have_calc_cyc_to_tsc) + timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc; + else if (decoder->cbr) + timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc; + else + return; + + if (timestamp < decoder->timestamp) + intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n", + timestamp, decoder->timestamp); + else + decoder->timestamp = timestamp; +} + +/* Walk PSB+ packets when already in sync. */ +static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) +{ + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_PSBEND: + return 0; + + case INTEL_PT_TIP_PGD: + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + case INTEL_PT_TNT: + case INTEL_PT_TRACESTOP: + case INTEL_PT_BAD: + case INTEL_PT_PSB: + decoder->have_tma = false; + intel_pt_log("ERROR: Unexpected packet\n"); + return -EAGAIN; + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CBR: + intel_pt_calc_cbr(decoder); + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_FUP: + decoder->pge = true; + intel_pt_set_last_ip(decoder); + break; + + case INTEL_PT_MODE_TSX: + intel_pt_update_in_tx(decoder); + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + if (decoder->period_type == INTEL_PT_PERIOD_MTC) + decoder->state.type |= INTEL_PT_INSTRUCTION; + break; + + case INTEL_PT_CYC: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + default: + break; + } + } +} + +static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) +{ + int err; + + if (decoder->tx_flags & INTEL_PT_ABORT_TX) { + decoder->tx_flags = 0; + decoder->state.flags &= ~INTEL_PT_IN_TX; + decoder->state.flags |= INTEL_PT_ABORT_TX; + } else { + decoder->state.flags |= INTEL_PT_ASYNC; + } + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_TNT: + case INTEL_PT_FUP: + case INTEL_PT_TRACESTOP: + case INTEL_PT_PSB: + case INTEL_PT_TSC: + case INTEL_PT_TMA: + case INTEL_PT_CBR: + case INTEL_PT_MODE_TSX: + case INTEL_PT_BAD: + case INTEL_PT_PSBEND: + intel_pt_log("ERROR: Missing TIP after FUP\n"); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_TIP_PGD: + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + if (decoder->packet.count != 0) { + intel_pt_set_ip(decoder); + intel_pt_log("Omitting PGD ip " x64_fmt "\n", + decoder->ip); + } + decoder->pge = false; + decoder->continuous_period = false; + return 0; + + case INTEL_PT_TIP_PGE: + decoder->pge = true; + intel_pt_log("Omitting PGE ip " x64_fmt "\n", + decoder->ip); + decoder->state.from_ip = 0; + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { + intel_pt_set_ip(decoder); + decoder->state.to_ip = decoder->ip; + } + return 0; + + case INTEL_PT_TIP: + decoder->state.from_ip = decoder->ip; + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { + intel_pt_set_ip(decoder); + decoder->state.to_ip = decoder->ip; + } + return 0; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + if (decoder->period_type == INTEL_PT_PERIOD_MTC) + decoder->state.type |= INTEL_PT_INSTRUCTION; + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + break; + + default: + return intel_pt_bug(decoder); + } + } +} + +static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) +{ + bool no_tip = false; + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; +next: + switch (decoder->packet.type) { + case INTEL_PT_TNT: + if (!decoder->packet.count) + break; + decoder->tnt = decoder->packet; + decoder->pkt_state = INTEL_PT_STATE_TNT; + err = intel_pt_walk_tnt(decoder); + if (err == -EAGAIN) + break; + return err; + + case INTEL_PT_TIP_PGD: + if (decoder->packet.count != 0) + intel_pt_set_last_ip(decoder); + decoder->pkt_state = INTEL_PT_STATE_TIP_PGD; + return intel_pt_walk_tip(decoder); + + case INTEL_PT_TIP_PGE: { + decoder->pge = true; + if (decoder->packet.count == 0) { + intel_pt_log_at("Skipping zero TIP.PGE", + decoder->pos); + break; + } + intel_pt_set_ip(decoder); + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + return 0; + } + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_TIP: + if (decoder->packet.count != 0) + intel_pt_set_last_ip(decoder); + decoder->pkt_state = INTEL_PT_STATE_TIP; + return intel_pt_walk_tip(decoder); + + case INTEL_PT_FUP: + if (decoder->packet.count == 0) { + intel_pt_log_at("Skipping zero FUP", + decoder->pos); + no_tip = false; + break; + } + intel_pt_set_last_ip(decoder); + err = intel_pt_walk_fup(decoder); + if (err != -EAGAIN) { + if (err) + return err; + if (no_tip) + decoder->pkt_state = + INTEL_PT_STATE_FUP_NO_TIP; + else + decoder->pkt_state = INTEL_PT_STATE_FUP; + return 0; + } + if (no_tip) { + no_tip = false; + break; + } + return intel_pt_walk_fup_tip(decoder); + + case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->continuous_period = false; + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; + break; + + case INTEL_PT_PSB: + intel_pt_clear_stack(&decoder->stack); + err = intel_pt_walk_psbend(decoder); + if (err == -EAGAIN) + goto next; + if (err) + return err; + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + if (decoder->period_type != INTEL_PT_PERIOD_MTC) + break; + /* + * Ensure that there has been an instruction since the + * last MTC. + */ + if (!decoder->mtc_insn) + break; + decoder->mtc_insn = false; + /* Ensure that there is a timestamp */ + if (!decoder->timestamp) + break; + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->mtc_insn = false; + return 0; + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + + case INTEL_PT_CBR: + intel_pt_calc_cbr(decoder); + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_MODE_TSX: + /* MODE_TSX need not be followed by FUP */ + if (!decoder->pge) { + intel_pt_update_in_tx(decoder); + break; + } + err = intel_pt_mode_tsx(decoder, &no_tip); + if (err) + return err; + goto next; + + case INTEL_PT_BAD: /* Does not happen */ + return intel_pt_bug(decoder); + + case INTEL_PT_PSBEND: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + break; + + default: + return intel_pt_bug(decoder); + } + } +} + +/* Walk PSB+ packets to get in sync. */ +static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) +{ + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_TIP_PGD: + decoder->continuous_period = false; + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + intel_pt_log("ERROR: Unexpected packet\n"); + return -ENOENT; + + case INTEL_PT_FUP: + decoder->pge = true; + if (decoder->last_ip || decoder->packet.count == 6 || + decoder->packet.count == 0) { + uint64_t current_ip = decoder->ip; + + intel_pt_set_ip(decoder); + if (current_ip) + intel_pt_log_to("Setting IP", + decoder->ip); + } + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + break; + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + + case INTEL_PT_CBR: + intel_pt_calc_cbr(decoder); + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_MODE_TSX: + intel_pt_update_in_tx(decoder); + break; + + case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->continuous_period = false; + intel_pt_clear_tx_flags(decoder); + case INTEL_PT_TNT: + decoder->have_tma = false; + intel_pt_log("ERROR: Unexpected packet\n"); + if (decoder->ip) + decoder->pkt_state = INTEL_PT_STATE_ERR4; + else + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + + case INTEL_PT_BAD: /* Does not happen */ + return intel_pt_bug(decoder); + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_PSBEND: + return 0; + + case INTEL_PT_PSB: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + default: + break; + } + } +} + +static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) +{ + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_TIP_PGD: + decoder->continuous_period = false; + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; + if (decoder->last_ip || decoder->packet.count == 6 || + decoder->packet.count == 0) + intel_pt_set_ip(decoder); + if (decoder->ip) + return 0; + break; + + case INTEL_PT_FUP: + if (decoder->overflow) { + if (decoder->last_ip || + decoder->packet.count == 6 || + decoder->packet.count == 0) + intel_pt_set_ip(decoder); + if (decoder->ip) + return 0; + } + if (decoder->packet.count) + intel_pt_set_last_ip(decoder); + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + break; + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + + case INTEL_PT_CBR: + intel_pt_calc_cbr(decoder); + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_MODE_TSX: + intel_pt_update_in_tx(decoder); + break; + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_BAD: /* Does not happen */ + return intel_pt_bug(decoder); + + case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->continuous_period = false; + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; + break; + + case INTEL_PT_PSB: + err = intel_pt_walk_psb(decoder); + if (err) + return err; + if (decoder->ip) { + /* Do not have a sample */ + decoder->state.type = 0; + return 0; + } + break; + + case INTEL_PT_TNT: + case INTEL_PT_PSBEND: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + default: + break; + } + } +} + +static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) +{ + int err; + + intel_pt_log("Scanning for full IP\n"); + err = intel_pt_walk_to_ip(decoder); + if (err) + return err; + + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->overflow = false; + + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + intel_pt_log_to("Setting IP", decoder->ip); + + return 0; +} + +static int intel_pt_part_psb(struct intel_pt_decoder *decoder) +{ + const unsigned char *end = decoder->buf + decoder->len; + size_t i; + + for (i = INTEL_PT_PSB_LEN - 1; i; i--) { + if (i > decoder->len) + continue; + if (!memcmp(end - i, INTEL_PT_PSB_STR, i)) + return i; + } + return 0; +} + +static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb) +{ + size_t rest_psb = INTEL_PT_PSB_LEN - part_psb; + const char *psb = INTEL_PT_PSB_STR; + + if (rest_psb > decoder->len || + memcmp(decoder->buf, psb + part_psb, rest_psb)) + return 0; + + return rest_psb; +} + +static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder, + int part_psb) +{ + int rest_psb, ret; + + decoder->pos += decoder->len; + decoder->len = 0; + + ret = intel_pt_get_next_data(decoder); + if (ret) + return ret; + + rest_psb = intel_pt_rest_psb(decoder, part_psb); + if (!rest_psb) + return 0; + + decoder->pos -= part_psb; + decoder->next_buf = decoder->buf + rest_psb; + decoder->next_len = decoder->len - rest_psb; + memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN); + decoder->buf = decoder->temp_buf; + decoder->len = INTEL_PT_PSB_LEN; + + return 0; +} + +static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder) +{ + unsigned char *next; + int ret; + + intel_pt_log("Scanning for PSB\n"); + while (1) { + if (!decoder->len) { + ret = intel_pt_get_next_data(decoder); + if (ret) + return ret; + } + + next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR, + INTEL_PT_PSB_LEN); + if (!next) { + int part_psb; + + part_psb = intel_pt_part_psb(decoder); + if (part_psb) { + ret = intel_pt_get_split_psb(decoder, part_psb); + if (ret) + return ret; + } else { + decoder->pos += decoder->len; + decoder->len = 0; + } + continue; + } + + decoder->pkt_step = next - decoder->buf; + return intel_pt_get_next_packet(decoder); + } +} + +static int intel_pt_sync(struct intel_pt_decoder *decoder) +{ + int err; + + decoder->pge = false; + decoder->continuous_period = false; + decoder->last_ip = 0; + decoder->ip = 0; + intel_pt_clear_stack(&decoder->stack); + + err = intel_pt_scan_for_psb(decoder); + if (err) + return err; + + decoder->pkt_state = INTEL_PT_STATE_NO_IP; + + err = intel_pt_walk_psb(decoder); + if (err) + return err; + + if (decoder->ip) { + decoder->state.type = 0; /* Do not have a sample */ + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + } else { + return intel_pt_sync_ip(decoder); + } + + return 0; +} + +static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t est = decoder->timestamp_insn_cnt << 1; + + if (!decoder->cbr || !decoder->max_non_turbo_ratio) + goto out; + + est *= decoder->max_non_turbo_ratio; + est /= decoder->cbr; +out: + return decoder->timestamp + est; +} + +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) +{ + int err; + + do { + decoder->state.type = INTEL_PT_BRANCH; + decoder->state.flags = 0; + + switch (decoder->pkt_state) { + case INTEL_PT_STATE_NO_PSB: + err = intel_pt_sync(decoder); + break; + case INTEL_PT_STATE_NO_IP: + decoder->last_ip = 0; + /* Fall through */ + case INTEL_PT_STATE_ERR_RESYNC: + err = intel_pt_sync_ip(decoder); + break; + case INTEL_PT_STATE_IN_SYNC: + err = intel_pt_walk_trace(decoder); + break; + case INTEL_PT_STATE_TNT: + err = intel_pt_walk_tnt(decoder); + if (err == -EAGAIN) + err = intel_pt_walk_trace(decoder); + break; + case INTEL_PT_STATE_TIP: + case INTEL_PT_STATE_TIP_PGD: + err = intel_pt_walk_tip(decoder); + break; + case INTEL_PT_STATE_FUP: + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + err = intel_pt_walk_fup(decoder); + if (err == -EAGAIN) + err = intel_pt_walk_fup_tip(decoder); + else if (!err) + decoder->pkt_state = INTEL_PT_STATE_FUP; + break; + case INTEL_PT_STATE_FUP_NO_TIP: + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + err = intel_pt_walk_fup(decoder); + if (err == -EAGAIN) + err = intel_pt_walk_trace(decoder); + break; + default: + err = intel_pt_bug(decoder); + break; + } + } while (err == -ENOLINK); + + decoder->state.err = err ? intel_pt_ext_err(err) : 0; + decoder->state.timestamp = decoder->timestamp; + decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); + decoder->state.cr3 = decoder->cr3; + decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; + + if (err) + decoder->state.from_ip = decoder->ip; + + return &decoder->state; +} + +static bool intel_pt_at_psb(unsigned char *buf, size_t len) +{ + if (len < INTEL_PT_PSB_LEN) + return false; + return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR, + INTEL_PT_PSB_LEN); +} + +/** + * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet. + * @buf: pointer to buffer pointer + * @len: size of buffer + * + * Updates the buffer pointer to point to the start of the next PSB packet if + * there is one, otherwise the buffer pointer is unchanged. If @buf is updated, + * @len is adjusted accordingly. + * + * Return: %true if a PSB packet is found, %false otherwise. + */ +static bool intel_pt_next_psb(unsigned char **buf, size_t *len) +{ + unsigned char *next; + + next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN); + if (next) { + *len -= next - *buf; + *buf = next; + return true; + } + return false; +} + +/** + * intel_pt_step_psb - move buffer pointer to the start of the following PSB + * packet. + * @buf: pointer to buffer pointer + * @len: size of buffer + * + * Updates the buffer pointer to point to the start of the following PSB packet + * (skipping the PSB at @buf itself) if there is one, otherwise the buffer + * pointer is unchanged. If @buf is updated, @len is adjusted accordingly. + * + * Return: %true if a PSB packet is found, %false otherwise. + */ +static bool intel_pt_step_psb(unsigned char **buf, size_t *len) +{ + unsigned char *next; + + if (!*len) + return false; + + next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN); + if (next) { + *len -= next - *buf; + *buf = next; + return true; + } + return false; +} + +/** + * intel_pt_last_psb - find the last PSB packet in a buffer. + * @buf: buffer + * @len: size of buffer + * + * This function finds the last PSB in a buffer. + * + * Return: A pointer to the last PSB in @buf if found, %NULL otherwise. + */ +static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len) +{ + const char *n = INTEL_PT_PSB_STR; + unsigned char *p; + size_t k; + + if (len < INTEL_PT_PSB_LEN) + return NULL; + + k = len - INTEL_PT_PSB_LEN + 1; + while (1) { + p = memrchr(buf, n[0], k); + if (!p) + return NULL; + if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1)) + return p; + k = p - buf; + if (!k) + return NULL; + } +} + +/** + * intel_pt_next_tsc - find and return next TSC. + * @buf: buffer + * @len: size of buffer + * @tsc: TSC value returned + * + * Find a TSC packet in @buf and return the TSC value. This function assumes + * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a + * PSBEND packet is found. + * + * Return: %true if TSC is found, false otherwise. + */ +static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc) +{ + struct intel_pt_pkt packet; + int ret; + + while (len) { + ret = intel_pt_get_packet(buf, len, &packet); + if (ret <= 0) + return false; + if (packet.type == INTEL_PT_TSC) { + *tsc = packet.payload; + return true; + } + if (packet.type == INTEL_PT_PSBEND) + return false; + buf += ret; + len -= ret; + } + return false; +} + +/** + * intel_pt_tsc_cmp - compare 7-byte TSCs. + * @tsc1: first TSC to compare + * @tsc2: second TSC to compare + * + * This function compares 7-byte TSC values allowing for the possibility that + * TSC wrapped around. Generally it is not possible to know if TSC has wrapped + * around so for that purpose this function assumes the absolute difference is + * less than half the maximum difference. + * + * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is + * after @tsc2. + */ +static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) +{ + const uint64_t halfway = (1ULL << 55); + + if (tsc1 == tsc2) + return 0; + + if (tsc1 < tsc2) { + if (tsc2 - tsc1 < halfway) + return -1; + else + return 1; + } else { + if (tsc1 - tsc2 < halfway) + return 1; + else + return -1; + } +} + +/** + * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data + * using TSC. + * @buf_a: first buffer + * @len_a: size of first buffer + * @buf_b: second buffer + * @len_b: size of second buffer + * + * If the trace contains TSC we can look at the last TSC of @buf_a and the + * first TSC of @buf_b in order to determine if the buffers overlap, and then + * walk forward in @buf_b until a later TSC is found. A precondition is that + * @buf_a and @buf_b are positioned at a PSB. + * + * Return: A pointer into @buf_b from where non-overlapped data starts, or + * @buf_b + @len_b if there is no non-overlapped data. + */ +static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, + size_t len_a, + unsigned char *buf_b, + size_t len_b) +{ + uint64_t tsc_a, tsc_b; + unsigned char *p; + size_t len; + + p = intel_pt_last_psb(buf_a, len_a); + if (!p) + return buf_b; /* No PSB in buf_a => no overlap */ + + len = len_a - (p - buf_a); + if (!intel_pt_next_tsc(p, len, &tsc_a)) { + /* The last PSB+ in buf_a is incomplete, so go back one more */ + len_a -= len; + p = intel_pt_last_psb(buf_a, len_a); + if (!p) + return buf_b; /* No full PSB+ => assume no overlap */ + len = len_a - (p - buf_a); + if (!intel_pt_next_tsc(p, len, &tsc_a)) + return buf_b; /* No TSC in buf_a => assume no overlap */ + } + + while (1) { + /* Ignore PSB+ with no TSC */ + if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) && + intel_pt_tsc_cmp(tsc_a, tsc_b) < 0) + return buf_b; /* tsc_a < tsc_b => no overlap */ + + if (!intel_pt_step_psb(&buf_b, &len_b)) + return buf_b + len_b; /* No PSB in buf_b => no data */ + } +} + +/** + * intel_pt_find_overlap - determine start of non-overlapped trace data. + * @buf_a: first buffer + * @len_a: size of first buffer + * @buf_b: second buffer + * @len_b: size of second buffer + * @have_tsc: can use TSC packets to detect overlap + * + * When trace samples or snapshots are recorded there is the possibility that + * the data overlaps. Note that, for the purposes of decoding, data is only + * useful if it begins with a PSB packet. + * + * Return: A pointer into @buf_b from where non-overlapped data starts, or + * @buf_b + @len_b if there is no non-overlapped data. + */ +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, + unsigned char *buf_b, size_t len_b, + bool have_tsc) +{ + unsigned char *found; + + /* Buffer 'b' must start at PSB so throw away everything before that */ + if (!intel_pt_next_psb(&buf_b, &len_b)) + return buf_b + len_b; /* No PSB */ + + if (!intel_pt_next_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + + if (have_tsc) { + found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b); + if (found) + return found; + } + + /* + * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes, + * we can ignore the first part of buffer 'a'. + */ + while (len_b < len_a) { + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + } + + /* Now len_b >= len_a */ + if (len_b > len_a) { + /* The leftover buffer 'b' must start at a PSB */ + while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + } + } + + while (1) { + /* Potential overlap so check the bytes */ + found = memmem(buf_a, len_a, buf_b, len_a); + if (found) + return buf_b + len_a; + + /* Try again at next PSB in buffer 'a' */ + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + + /* The leftover buffer 'b' must start at a PSB */ + while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + } + } +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h new file mode 100644 index 000000000000..02c38fec1c37 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -0,0 +1,109 @@ +/* + * intel_pt_decoder.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__INTEL_PT_DECODER_H__ +#define INCLUDE__INTEL_PT_DECODER_H__ + +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> + +#include "intel-pt-insn-decoder.h" + +#define INTEL_PT_IN_TX (1 << 0) +#define INTEL_PT_ABORT_TX (1 << 1) +#define INTEL_PT_ASYNC (1 << 2) + +enum intel_pt_sample_type { + INTEL_PT_BRANCH = 1 << 0, + INTEL_PT_INSTRUCTION = 1 << 1, + INTEL_PT_TRANSACTION = 1 << 2, +}; + +enum intel_pt_period_type { + INTEL_PT_PERIOD_NONE, + INTEL_PT_PERIOD_INSTRUCTIONS, + INTEL_PT_PERIOD_TICKS, + INTEL_PT_PERIOD_MTC, +}; + +enum { + INTEL_PT_ERR_NOMEM = 1, + INTEL_PT_ERR_INTERN, + INTEL_PT_ERR_BADPKT, + INTEL_PT_ERR_NODATA, + INTEL_PT_ERR_NOINSN, + INTEL_PT_ERR_MISMAT, + INTEL_PT_ERR_OVR, + INTEL_PT_ERR_LOST, + INTEL_PT_ERR_UNK, + INTEL_PT_ERR_NELOOP, + INTEL_PT_ERR_MAX, +}; + +struct intel_pt_state { + enum intel_pt_sample_type type; + int err; + uint64_t from_ip; + uint64_t to_ip; + uint64_t cr3; + uint64_t tot_insn_cnt; + uint64_t timestamp; + uint64_t est_timestamp; + uint64_t trace_nr; + uint32_t flags; + enum intel_pt_insn_op insn_op; + int insn_len; +}; + +struct intel_pt_insn; + +struct intel_pt_buffer { + const unsigned char *buf; + size_t len; + bool consecutive; + uint64_t ref_timestamp; + uint64_t trace_nr; +}; + +struct intel_pt_params { + int (*get_trace)(struct intel_pt_buffer *buffer, void *data); + int (*walk_insn)(struct intel_pt_insn *intel_pt_insn, + uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, + uint64_t max_insn_cnt, void *data); + void *data; + bool return_compression; + uint64_t period; + enum intel_pt_period_type period_type; + unsigned max_non_turbo_ratio; + unsigned int mtc_period; + uint32_t tsc_ctc_ratio_n; + uint32_t tsc_ctc_ratio_d; +}; + +struct intel_pt_decoder; + +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params); +void intel_pt_decoder_free(struct intel_pt_decoder *decoder); + +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder); + +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, + unsigned char *buf_b, size_t len_b, + bool have_tsc); + +int intel_pt__strerror(int code, char *buf, size_t buflen); + +#endif diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c new file mode 100644 index 000000000000..9e4eb8fcd559 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -0,0 +1,246 @@ +/* + * intel_pt_insn_decoder.c: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdio.h> +#include <string.h> +#include <endian.h> +#include <byteswap.h> + +#include "event.h" + +#include "insn.h" + +#include "inat.c" +#include "insn.c" + +#include "intel-pt-insn-decoder.h" + +/* Based on branch_type() from perf_event_intel_lbr.c */ +static void intel_pt_insn_decoder(struct insn *insn, + struct intel_pt_insn *intel_pt_insn) +{ + enum intel_pt_insn_op op = INTEL_PT_OP_OTHER; + enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH; + int ext; + + if (insn_is_avx(insn)) { + intel_pt_insn->op = INTEL_PT_OP_OTHER; + intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH; + intel_pt_insn->length = insn->length; + return; + } + + switch (insn->opcode.bytes[0]) { + case 0xf: + switch (insn->opcode.bytes[1]) { + case 0x05: /* syscall */ + case 0x34: /* sysenter */ + op = INTEL_PT_OP_SYSCALL; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0x07: /* sysret */ + case 0x35: /* sysexit */ + op = INTEL_PT_OP_SYSRET; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0x80 ... 0x8f: /* jcc */ + op = INTEL_PT_OP_JCC; + branch = INTEL_PT_BR_CONDITIONAL; + break; + default: + break; + } + break; + case 0x70 ... 0x7f: /* jcc */ + op = INTEL_PT_OP_JCC; + branch = INTEL_PT_BR_CONDITIONAL; + break; + case 0xc2: /* near ret */ + case 0xc3: /* near ret */ + case 0xca: /* far ret */ + case 0xcb: /* far ret */ + op = INTEL_PT_OP_RET; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xcf: /* iret */ + op = INTEL_PT_OP_IRET; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xcc ... 0xce: /* int */ + op = INTEL_PT_OP_INT; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xe8: /* call near rel */ + op = INTEL_PT_OP_CALL; + branch = INTEL_PT_BR_UNCONDITIONAL; + break; + case 0x9a: /* call far absolute */ + op = INTEL_PT_OP_CALL; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xe0 ... 0xe2: /* loop */ + op = INTEL_PT_OP_LOOP; + branch = INTEL_PT_BR_CONDITIONAL; + break; + case 0xe3: /* jcc */ + op = INTEL_PT_OP_JCC; + branch = INTEL_PT_BR_CONDITIONAL; + break; + case 0xe9: /* jmp */ + case 0xeb: /* jmp */ + op = INTEL_PT_OP_JMP; + branch = INTEL_PT_BR_UNCONDITIONAL; + break; + case 0xea: /* far jmp */ + op = INTEL_PT_OP_JMP; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xff: /* call near absolute, call far absolute ind */ + ext = (insn->modrm.bytes[0] >> 3) & 0x7; + switch (ext) { + case 2: /* near ind call */ + case 3: /* far ind call */ + op = INTEL_PT_OP_CALL; + branch = INTEL_PT_BR_INDIRECT; + break; + case 4: + case 5: + op = INTEL_PT_OP_JMP; + branch = INTEL_PT_BR_INDIRECT; + break; + default: + break; + } + break; + default: + break; + } + + intel_pt_insn->op = op; + intel_pt_insn->branch = branch; + intel_pt_insn->length = insn->length; + + if (branch == INTEL_PT_BR_CONDITIONAL || + branch == INTEL_PT_BR_UNCONDITIONAL) { +#if __BYTE_ORDER == __BIG_ENDIAN + switch (insn->immediate.nbytes) { + case 1: + intel_pt_insn->rel = insn->immediate.value; + break; + case 2: + intel_pt_insn->rel = + bswap_16((short)insn->immediate.value); + break; + case 4: + intel_pt_insn->rel = bswap_32(insn->immediate.value); + break; + } +#else + intel_pt_insn->rel = insn->immediate.value; +#endif + } +} + +int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, + struct intel_pt_insn *intel_pt_insn) +{ + struct insn insn; + + insn_init(&insn, buf, len, x86_64); + insn_get_length(&insn); + if (!insn_complete(&insn) || insn.length > len) + return -1; + intel_pt_insn_decoder(&insn, intel_pt_insn); + if (insn.length < INTEL_PT_INSN_DBG_BUF_SZ) + memcpy(intel_pt_insn->buf, buf, insn.length); + else + memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_DBG_BUF_SZ); + return 0; +} + +const char *branch_name[] = { + [INTEL_PT_OP_OTHER] = "Other", + [INTEL_PT_OP_CALL] = "Call", + [INTEL_PT_OP_RET] = "Ret", + [INTEL_PT_OP_JCC] = "Jcc", + [INTEL_PT_OP_JMP] = "Jmp", + [INTEL_PT_OP_LOOP] = "Loop", + [INTEL_PT_OP_IRET] = "IRet", + [INTEL_PT_OP_INT] = "Int", + [INTEL_PT_OP_SYSCALL] = "Syscall", + [INTEL_PT_OP_SYSRET] = "Sysret", +}; + +const char *intel_pt_insn_name(enum intel_pt_insn_op op) +{ + return branch_name[op]; +} + +int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf, + size_t buf_len) +{ + switch (intel_pt_insn->branch) { + case INTEL_PT_BR_CONDITIONAL: + case INTEL_PT_BR_UNCONDITIONAL: + return snprintf(buf, buf_len, "%s %s%d", + intel_pt_insn_name(intel_pt_insn->op), + intel_pt_insn->rel > 0 ? "+" : "", + intel_pt_insn->rel); + case INTEL_PT_BR_NO_BRANCH: + case INTEL_PT_BR_INDIRECT: + return snprintf(buf, buf_len, "%s", + intel_pt_insn_name(intel_pt_insn->op)); + default: + break; + } + return 0; +} + +size_t intel_pt_insn_max_size(void) +{ + return MAX_INSN_SIZE; +} + +int intel_pt_insn_type(enum intel_pt_insn_op op) +{ + switch (op) { + case INTEL_PT_OP_OTHER: + return 0; + case INTEL_PT_OP_CALL: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL; + case INTEL_PT_OP_RET: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN; + case INTEL_PT_OP_JCC: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL; + case INTEL_PT_OP_JMP: + return PERF_IP_FLAG_BRANCH; + case INTEL_PT_OP_LOOP: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL; + case INTEL_PT_OP_IRET: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_INTERRUPT; + case INTEL_PT_OP_INT: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_INTERRUPT; + case INTEL_PT_OP_SYSCALL: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_SYSCALLRET; + case INTEL_PT_OP_SYSRET: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_SYSCALLRET; + default: + return 0; + } +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h new file mode 100644 index 000000000000..b0adbf37323e --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h @@ -0,0 +1,65 @@ +/* + * intel_pt_insn_decoder.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__INTEL_PT_INSN_DECODER_H__ +#define INCLUDE__INTEL_PT_INSN_DECODER_H__ + +#include <stddef.h> +#include <stdint.h> + +#define INTEL_PT_INSN_DESC_MAX 32 +#define INTEL_PT_INSN_DBG_BUF_SZ 16 + +enum intel_pt_insn_op { + INTEL_PT_OP_OTHER, + INTEL_PT_OP_CALL, + INTEL_PT_OP_RET, + INTEL_PT_OP_JCC, + INTEL_PT_OP_JMP, + INTEL_PT_OP_LOOP, + INTEL_PT_OP_IRET, + INTEL_PT_OP_INT, + INTEL_PT_OP_SYSCALL, + INTEL_PT_OP_SYSRET, +}; + +enum intel_pt_insn_branch { + INTEL_PT_BR_NO_BRANCH, + INTEL_PT_BR_INDIRECT, + INTEL_PT_BR_CONDITIONAL, + INTEL_PT_BR_UN |