aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-02-20 01:49:41 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-20 01:49:41 (GMT)
commit8f55cea410dbc56114bb71a3742032070c8108d0 (patch)
tree59605f0ee961274b22f91add33f5c32459471a83
parentb7133a9a103655cda254987a3c0975fd9d8c443f (diff)
parente259514eef764a5286873618e34c560ecb6cff13 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf changes from Ingo Molnar: "There are lots of improvements, the biggest changes are: Main kernel side changes: - Improve uprobes performance by adding 'pre-filtering' support, by Oleg Nesterov. - Make some POWER7 events available in sysfs, equivalent to what was done on x86, from Sukadev Bhattiprolu. - tracing updates by Steve Rostedt - mostly misc fixes and smaller improvements. - Use perf/event tracing to report PCI Express advanced errors, by Tony Luck. - Enable northbridge performance counters on AMD family 15h, by Jacob Shin. - This tracing commit: tracing: Remove the extra 4 bytes of padding in events changes the ABI. All involved parties (PowerTop in particular) seem to agree that it's safe to do now with the introduction of libtraceevent, but the devil is in the details ... Main tooling side changes: - Add 'event group view', from Namyung Kim: To use it, 'perf record' should group events when recording. And then perf report parses the saved group relation from file header and prints them together if --group option is provided. You can use the 'perf evlist' command to see event group information: $ perf record -e '{ref-cycles,cycles}' noploop 1 [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.385 MB perf.data (~16807 samples) ] $ perf evlist --group {ref-cycles,cycles} With this example, default perf report will show you each event separately. You can use --group option to enable event group view: $ perf report --group ... # group: {ref-cycles,cycles} # ======== # Samples: 7K of event 'anon group { ref-cycles, cycles }' # Event count (approx.): 6876107743 # # Overhead Command Shared Object Symbol # ................ ....... ................. .......................... 99.84% 99.76% noploop noploop [.] main 0.07% 0.00% noploop ld-2.15.so [.] strcmp 0.03% 0.00% noploop [kernel.kallsyms] [k] timerqueue_del 0.03% 0.03% noploop [kernel.kallsyms] [k] sched_clock_cpu 0.02% 0.00% noploop [kernel.kallsyms] [k] account_user_time 0.01% 0.00% noploop [kernel.kallsyms] [k] __alloc_pages_nodemask 0.00% 0.00% noploop [kernel.kallsyms] [k] native_write_msr_safe 0.00% 0.11% noploop [kernel.kallsyms] [k] _raw_spin_lock 0.00% 0.06% noploop [kernel.kallsyms] [k] find_get_page 0.00% 0.02% noploop [kernel.kallsyms] [k] rcu_check_callbacks 0.00% 0.02% noploop [kernel.kallsyms] [k] __current_kernel_time As you can see the Overhead column now contains both of ref-cycles and cycles and header line shows group information also - 'anon group { ref-cycles, cycles }'. The output is sorted by period of group leader first. - Initial GTK+ annotate browser, from Namhyung Kim. - Add option for runtime switching perf data file in perf report, just press 's' and a menu with the valid files found in the current directory will be presented, from Feng Tang. - Add support to display whole group data for raw columns, from Jiri Olsa. - Add per processor socket count aggregation in perf stat, from Stephane Eranian. - Add interval printing in 'perf stat', from Stephane Eranian. - 'perf test' improvements - Add support for wildcards in tracepoint system name, from Jiri Olsa. - Add anonymous huge page recognition, from Joshua Zhu. - perf build-id cache now can show DSOs present in a perf.data file that are not in the cache, to integrate with build-id servers being put in place by organizations such as Fedora. - perf top now shares more of the evsel config/creation routines with 'record', paving the way for further integration like 'top' snapshots, etc. - perf top now supports DWARF callchains. - Fix mmap limitations on 32-bit, fix from David Miller. - 'perf bench numa mem' NUMA performance measurement suite - ... and lots of fixes, performance improvements, cleanups and other improvements I failed to list - see the shortlog and git log for details." * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (270 commits) perf/x86/amd: Enable northbridge performance counters on AMD family 15h perf/hwbp: Fix cleanup in case of kzalloc failure perf tools: Fix build with bison 2.3 and older. perf tools: Limit unwind support to x86 archs perf annotate: Make it to be able to skip unannotatable symbols perf gtk/annotate: Fail early if it can't annotate perf gtk/annotate: Show source lines with gray color perf gtk/annotate: Support multiple event annotation perf ui/gtk: Implement basic GTK2 annotation browser perf annotate: Fix warning message on a missing vmlinux perf buildid-cache: Add --update option uprobes/perf: Avoid uprobe_apply() whenever possible uprobes/perf: Teach trace_uprobe/perf code to use UPROBE_HANDLER_REMOVE uprobes/perf: Teach trace_uprobe/perf code to pre-filter uprobes/perf: Teach trace_uprobe/perf code to track the active perf_event's uprobes: Introduce uprobe_apply() perf: Introduce hw_perf_event->tp_target and ->tp_list uprobes/perf: Always increment trace_uprobe->nhit uprobes/tracing: Kill uprobe_trace_consumer, embed uprobe_consumer into trace_uprobe uprobes/tracing: Introduce is_trace_uprobe_enabled() ...
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-events62
-rw-r--r--Documentation/trace/ftrace.txt83
-rw-r--r--arch/Kconfig12
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h26
-rw-r--r--arch/powerpc/perf/core-book3s.c12
-rw-r--r--arch/powerpc/perf/power7-pmu.c80
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/ftrace.h1
-rw-r--r--arch/x86/include/asm/perf_event.h13
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h2
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/cpu/perf_event.c15
-rw-r--r--arch/x86/kernel/cpu/perf_event.h25
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c322
-rw-r--r--arch/x86/kernel/kprobes/Makefile7
-rw-r--r--arch/x86/kernel/kprobes/common.h (renamed from arch/x86/kernel/kprobes-common.h)11
-rw-r--r--arch/x86/kernel/kprobes/core.c (renamed from arch/x86/kernel/kprobes.c)76
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c93
-rw-r--r--arch/x86/kernel/kprobes/opt.c (renamed from arch/x86/kernel/kprobes-opt.c)2
-rw-r--r--arch/x86/kernel/uprobes.c4
-rw-r--r--drivers/acpi/apei/cper.c19
-rw-r--r--drivers/pci/pcie/aer/aerdrv_errprint.c63
-rw-r--r--include/linux/aer.h4
-rw-r--r--include/linux/ftrace.h6
-rw-r--r--include/linux/ftrace_event.h6
-rw-r--r--include/linux/hardirq.h4
-rw-r--r--include/linux/kprobes.h12
-rw-r--r--include/linux/perf_event.h20
-rw-r--r--include/linux/profile.h13
-rw-r--r--include/linux/ring_buffer.h1
-rw-r--r--include/linux/uprobes.h23
-rw-r--r--include/trace/events/ras.h77
-rw-r--r--include/uapi/linux/perf_event.h3
-rw-r--r--kernel/events/core.c5
-rw-r--r--kernel/events/hw_breakpoint.c2
-rw-r--r--kernel/events/uprobes.c466
-rw-r--r--kernel/kprobes.c8
-rw-r--r--kernel/profile.c24
-rw-r--r--kernel/ptrace.c6
-rw-r--r--kernel/trace/Kconfig18
-rw-r--r--kernel/trace/blktrace.c2
-rw-r--r--kernel/trace/ftrace.c88
-rw-r--r--kernel/trace/ring_buffer.c108
-rw-r--r--kernel/trace/trace.c252
-rw-r--r--kernel/trace/trace.h134
-rw-r--r--kernel/trace/trace_clock.c4
-rw-r--r--kernel/trace/trace_events.c1
-rw-r--r--kernel/trace/trace_functions.c61
-rw-r--r--kernel/trace/trace_functions_graph.c68
-rw-r--r--kernel/trace/trace_probe.h1
-rw-r--r--kernel/trace/trace_selftest.c21
-rw-r--r--kernel/trace/trace_syscalls.c18
-rw-r--r--kernel/trace/trace_uprobe.c217
-rw-r--r--samples/Kconfig6
-rw-r--r--samples/Makefile2
-rw-r--r--samples/tracepoints/Makefile6
-rw-r--r--samples/tracepoints/tp-samples-trace.h11
-rw-r--r--samples/tracepoints/tracepoint-probe-sample.c57
-rw-r--r--samples/tracepoints/tracepoint-probe-sample2.c44
-rw-r--r--samples/tracepoints/tracepoint-sample.c57
-rw-r--r--tools/Makefile2
-rw-r--r--tools/lib/traceevent/event-parse.c49
-rw-r--r--tools/lib/traceevent/event-parse.h3
-rw-r--r--tools/lib/traceevent/event-utils.h3
-rw-r--r--tools/lib/traceevent/parse-filter.c3
-rw-r--r--tools/lib/traceevent/parse-utils.c19
-rw-r--r--tools/lib/traceevent/trace-seq.c3
-rw-r--r--tools/perf/Documentation/Makefile4
-rw-r--r--tools/perf/Documentation/perf-annotate.txt7
-rw-r--r--tools/perf/Documentation/perf-buildid-cache.txt7
-rw-r--r--tools/perf/Documentation/perf-diff.txt4
-rw-r--r--tools/perf/Documentation/perf-evlist.txt4
-rw-r--r--tools/perf/Documentation/perf-report.txt41
-rw-r--r--tools/perf/Documentation/perf-script-python.txt2
-rw-r--r--tools/perf/Documentation/perf-stat.txt11
-rw-r--r--tools/perf/Documentation/perf-test.txt4
-rw-r--r--tools/perf/Documentation/perf-top.txt2
-rw-r--r--tools/perf/Makefile104
-rw-r--r--tools/perf/arch/common.c1
-rw-r--r--tools/perf/bench/bench.h1
-rw-r--r--tools/perf/bench/numa.c1731
-rw-r--r--tools/perf/builtin-annotate.c30
-rw-r--r--tools/perf/builtin-bench.c19
-rw-r--r--tools/perf/builtin-buildid-cache.c96
-rw-r--r--tools/perf/builtin-buildid-list.c21
-rw-r--r--tools/perf/builtin-diff.c205
-rw-r--r--tools/perf/builtin-evlist.c88
-rw-r--r--tools/perf/builtin-kmem.c16
-rw-r--r--tools/perf/builtin-kvm.c3
-rw-r--r--tools/perf/builtin-record.c168
-rw-r--r--tools/perf/builtin-report.c93
-rw-r--r--tools/perf/builtin-sched.c6
-rw-r--r--tools/perf/builtin-script.c17
-rw-r--r--tools/perf/builtin-stat.c328
-rw-r--r--tools/perf/builtin-top.c372
-rw-r--r--tools/perf/builtin-trace.c2
-rw-r--r--tools/perf/config/feature-tests.mak11
-rw-r--r--tools/perf/config/utilities.mak6
-rw-r--r--tools/perf/perf.c32
-rw-r--r--tools/perf/perf.h32
-rw-r--r--tools/perf/scripts/perl/bin/workqueue-stats-record2
-rw-r--r--tools/perf/scripts/perl/bin/workqueue-stats-report3
-rw-r--r--tools/perf/scripts/perl/rwtop.pl6
-rw-r--r--tools/perf/scripts/perl/workqueue-stats.pl129
-rw-r--r--tools/perf/tests/attr.c9
-rw-r--r--tools/perf/tests/attr.py27
-rw-r--r--tools/perf/tests/attr/base-record2
-rw-r--r--tools/perf/tests/attr/test-record-group2
-rw-r--r--tools/perf/tests/attr/test-record-group14
-rw-r--r--tools/perf/tests/builtin-test.c40
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c4
-rw-r--r--tools/perf/tests/hists_link.c500
-rw-r--r--tools/perf/tests/mmap-basic.c40
-rw-r--r--tools/perf/tests/open-syscall-all-cpus.c19
-rw-r--r--tools/perf/tests/open-syscall.c17
-rw-r--r--tools/perf/tests/parse-events.c324
-rw-r--r--tools/perf/tests/perf-record.c20
-rw-r--r--tools/perf/tests/pmu.c11
-rw-r--r--tools/perf/tests/python-use.c23
-rw-r--r--tools/perf/tests/tests.h11
-rw-r--r--tools/perf/tests/util.c30
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c7
-rw-r--r--tools/perf/ui/browser.c6
-rw-r--r--tools/perf/ui/browsers/annotate.c33
-rw-r--r--tools/perf/ui/browsers/hists.c341
-rw-r--r--tools/perf/ui/gtk/annotate.c229
-rw-r--r--tools/perf/ui/gtk/browser.c235
-rw-r--r--tools/perf/ui/gtk/gtk.h10
-rw-r--r--tools/perf/ui/gtk/helpline.c23
-rw-r--r--tools/perf/ui/gtk/hists.c312
-rw-r--r--tools/perf/ui/helpline.c12
-rw-r--r--tools/perf/ui/helpline.h22
-rw-r--r--tools/perf/ui/hist.c481
-rw-r--r--tools/perf/ui/keysyms.h1
-rw-r--r--tools/perf/ui/setup.c3
-rw-r--r--tools/perf/ui/stdio/hist.c25
-rw-r--r--tools/perf/ui/tui/helpline.c29
-rw-r--r--tools/perf/ui/util.c1
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN4
-rw-r--r--tools/perf/util/annotate.c2
-rw-r--r--tools/perf/util/annotate.h24
-rw-r--r--tools/perf/util/callchain.c2
-rw-r--r--tools/perf/util/callchain.h5
-rw-r--r--tools/perf/util/cpumap.c54
-rw-r--r--tools/perf/util/cpumap.h9
-rw-r--r--tools/perf/util/debug.c28
-rw-r--r--tools/perf/util/debug.h34
-rw-r--r--tools/perf/util/dso.c6
-rw-r--r--tools/perf/util/dso.h2
-rw-r--r--tools/perf/util/event.c4
-rw-r--r--tools/perf/util/evlist.c31
-rw-r--r--tools/perf/util/evlist.h34
-rw-r--r--tools/perf/util/evsel.c370
-rw-r--r--tools/perf/util/evsel.h50
-rw-r--r--tools/perf/util/header.c266
-rw-r--r--tools/perf/util/header.h2
-rw-r--r--tools/perf/util/hist.c142
-rw-r--r--tools/perf/util/hist.h26
-rw-r--r--tools/perf/util/include/linux/bitops.h1
-rw-r--r--tools/perf/util/intlist.c36
-rw-r--r--tools/perf/util/intlist.h2
-rw-r--r--tools/perf/util/machine.c784
-rw-r--r--tools/perf/util/machine.h41
-rw-r--r--tools/perf/util/map.c121
-rw-r--r--tools/perf/util/map.h24
-rw-r--r--tools/perf/util/parse-events.c96
-rw-r--r--tools/perf/util/parse-events.h22
-rw-r--r--tools/perf/util/parse-events.y75
-rw-r--r--tools/perf/util/pmu.c46
-rw-r--r--tools/perf/util/pmu.h15
-rw-r--r--tools/perf/util/pmu.y1
-rw-r--r--tools/perf/util/probe-finder.c10
-rw-r--r--tools/perf/util/python-ext-sources1
-rw-r--r--tools/perf/util/python.c9
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c1
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c1
-rw-r--r--tools/perf/util/session.c325
-rw-r--r--tools/perf/util/session.h35
-rw-r--r--tools/perf/util/sort.c245
-rw-r--r--tools/perf/util/sort.h15
-rw-r--r--tools/perf/util/string.c18
-rw-r--r--tools/perf/util/strlist.c54
-rw-r--r--tools/perf/util/strlist.h42
-rw-r--r--tools/perf/util/symbol-elf.c14
-rw-r--r--tools/perf/util/symbol-minimal.c1
-rw-r--r--tools/perf/util/symbol.c536
-rw-r--r--tools/perf/util/symbol.h9
-rw-r--r--tools/perf/util/sysfs.c2
-rw-r--r--tools/perf/util/thread.c20
-rw-r--r--tools/perf/util/thread.h1
-rw-r--r--tools/perf/util/top.c22
-rw-r--r--tools/perf/util/top.h10
-rw-r--r--tools/perf/util/util.c24
-rw-r--r--tools/perf/util/util.h4
195 files changed, 8995 insertions, 4097 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
new file mode 100644
index 0000000..0adeb52
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
@@ -0,0 +1,62 @@
+What: /sys/devices/cpu/events/
+ /sys/devices/cpu/events/branch-misses
+ /sys/devices/cpu/events/cache-references
+ /sys/devices/cpu/events/cache-misses
+ /sys/devices/cpu/events/stalled-cycles-frontend
+ /sys/devices/cpu/events/branch-instructions
+ /sys/devices/cpu/events/stalled-cycles-backend
+ /sys/devices/cpu/events/instructions
+ /sys/devices/cpu/events/cpu-cycles
+
+Date: 2013/01/08
+
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+
+Description: Generic performance monitoring events
+
+ A collection of performance monitoring events that may be
+ supported by many/most CPUs. These events can be monitored
+ using the 'perf(1)' tool.
+
+ The contents of each file would look like:
+
+ event=0xNNNN
+
+ where 'N' is a hex digit and the number '0xNNNN' shows the
+ "raw code" for the perf event identified by the file's
+ "basename".
+
+
+What: /sys/devices/cpu/events/PM_LD_MISS_L1
+ /sys/devices/cpu/events/PM_LD_REF_L1
+ /sys/devices/cpu/events/PM_CYC
+ /sys/devices/cpu/events/PM_BRU_FIN
+ /sys/devices/cpu/events/PM_GCT_NOSLOT_CYC
+ /sys/devices/cpu/events/PM_BRU_MPRED
+ /sys/devices/cpu/events/PM_INST_CMPL
+ /sys/devices/cpu/events/PM_CMPLU_STALL
+
+Date: 2013/01/08
+
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+ Linux Powerpc mailing list <linuxppc-dev@ozlabs.org>
+
+Description: POWER-systems specific performance monitoring events
+
+ A collection of performance monitoring events that may be
+ supported by the POWER CPU. These events can be monitored
+ using the 'perf(1)' tool.
+
+ These events may not be supported by other CPUs.
+
+ The contents of each file would look like:
+
+ event=0xNNNN
+
+ where 'N' is a hex digit and the number '0xNNNN' shows the
+ "raw code" for the perf event identified by the file's
+ "basename".
+
+ Further, multiple terms like 'event=0xNNNN' can be specified
+ and separated with comma. All available terms are defined in
+ the /sys/bus/event_source/devices/<dev>/format file.
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 6f51fed..53d6a3c 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -1842,6 +1842,89 @@ an error.
# cat buffer_size_kb
85
+Snapshot
+--------
+CONFIG_TRACER_SNAPSHOT makes a generic snapshot feature
+available to all non latency tracers. (Latency tracers which
+record max latency, such as "irqsoff" or "wakeup", can't use
+this feature, since those are already using the snapshot
+mechanism internally.)
+
+Snapshot preserves a current trace buffer at a particular point
+in time without stopping tracing. Ftrace swaps the current
+buffer with a spare buffer, and tracing continues in the new
+current (=previous spare) buffer.
+
+The following debugfs files in "tracing" are related to this
+feature:
+
+ snapshot:
+
+ This is used to take a snapshot and to read the output
+ of the snapshot. Echo 1 into this file to allocate a
+ spare buffer and to take a snapshot (swap), then read
+ the snapshot from this file in the same format as
+ "trace" (described above in the section "The File
+ System"). Both reads snapshot and tracing are executable
+ in parallel. When the spare buffer is allocated, echoing
+ 0 frees it, and echoing else (positive) values clear the
+ snapshot contents.
+ More details are shown in the table below.
+
+ status\input | 0 | 1 | else |
+ --------------+------------+------------+------------+
+ not allocated |(do nothing)| alloc+swap | EINVAL |
+ --------------+------------+------------+------------+
+ allocated | free | swap | clear |
+ --------------+------------+------------+------------+
+
+Here is an example of using the snapshot feature.
+
+ # echo 1 > events/sched/enable
+ # echo 1 > snapshot
+ # cat snapshot
+# tracer: nop
+#
+# entries-in-buffer/entries-written: 71/71 #P:8
+#
+# _-----=> irqs-off
+# / _----=> need-resched
+# | / _---=> hardirq/softirq
+# || / _--=> preempt-depth
+# ||| / delay
+# TASK-PID CPU# |||| TIMESTAMP FUNCTION
+# | | | |||| | |
+ <idle>-0 [005] d... 2440.603828: sched_switch: prev_comm=swapper/5 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2242 next_prio=120
+ sleep-2242 [005] d... 2440.603846: sched_switch: prev_comm=snapshot-test-2 prev_pid=2242 prev_prio=120 prev_state=R ==> next_comm=kworker/5:1 next_pid=60 next_prio=120
+[...]
+ <idle>-0 [002] d... 2440.707230: sched_switch: prev_comm=swapper/2 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2229 next_prio=120
+
+ # cat trace
+# tracer: nop
+#
+# entries-in-buffer/entries-written: 77/77 #P:8
+#
+# _-----=> irqs-off
+# / _----=> need-resched
+# | / _---=> hardirq/softirq
+# || / _--=> preempt-depth
+# ||| / delay
+# TASK-PID CPU# |||| TIMESTAMP FUNCTION
+# | | | |||| | |
+ <idle>-0 [007] d... 2440.707395: sched_switch: prev_comm=swapper/7 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2243 next_prio=120
+ snapshot-test-2-2229 [002] d... 2440.707438: sched_switch: prev_comm=snapshot-test-2 prev_pid=2229 prev_prio=120 prev_state=S ==> next_comm=swapper/2 next_pid=0 next_prio=120
+[...]
+
+
+If you try to use this snapshot feature when current tracer is
+one of the latency tracers, you will get the following results.
+
+ # echo wakeup > current_tracer
+ # echo 1 > snapshot
+bash: echo: write error: Device or resource busy
+ # cat snapshot
+cat: snapshot: Device or resource busy
+
-----------
More details can be found in the source code, in the
diff --git a/arch/Kconfig b/arch/Kconfig
index 7f8f281..97fb7d0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -76,6 +76,15 @@ config OPTPROBES
depends on KPROBES && HAVE_OPTPROBES
depends on !PREEMPT
+config KPROBES_ON_FTRACE
+ def_bool y
+ depends on KPROBES && HAVE_KPROBES_ON_FTRACE
+ depends on DYNAMIC_FTRACE_WITH_REGS
+ help
+ If function tracer is enabled and the arch supports full
+ passing of pt_regs to function tracing, then kprobes can
+ optimize on top of function tracing.
+
config UPROBES
bool "Transparent user-space probes (EXPERIMENTAL)"
depends on UPROBE_EVENT && PERF_EVENTS
@@ -158,6 +167,9 @@ config HAVE_KRETPROBES
config HAVE_OPTPROBES
bool
+config HAVE_KPROBES_ON_FTRACE
+ bool
+
config HAVE_NMI_WATCHDOG
bool
#
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 9710be3..136bba6 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -11,6 +11,7 @@
#include <linux/types.h>
#include <asm/hw_irq.h>
+#include <linux/device.h>
#define MAX_HWEVENTS 8
#define MAX_EVENT_ALTERNATIVES 8
@@ -35,6 +36,7 @@ struct power_pmu {
void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
int (*limited_pmc_event)(u64 event_id);
u32 flags;
+ const struct attribute_group **attr_groups;
int n_generic;
int *generic_events;
int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
@@ -109,3 +111,27 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
* If an event_id is not subject to the constraint expressed by a particular
* field, then it will have 0 in both the mask and value for that field.
*/
+
+extern ssize_t power_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *page);
+
+/*
+ * EVENT_VAR() is same as PMU_EVENT_VAR with a suffix.
+ *
+ * Having a suffix allows us to have aliases in sysfs - eg: the generic
+ * event 'cpu-cycles' can have two entries in sysfs: 'cpu-cycles' and
+ * 'PM_CYC' where the latter is the name by which the event is known in
+ * POWER CPU specification.
+ */
+#define EVENT_VAR(_id, _suffix) event_attr_##_id##_suffix
+#define EVENT_PTR(_id, _suffix) &EVENT_VAR(_id, _suffix).attr.attr
+
+#define EVENT_ATTR(_name, _id, _suffix) \
+ PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_PM_##_id, \
+ power_events_sysfs_show)
+
+#define GENERIC_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _g)
+#define GENERIC_EVENT_PTR(_id) EVENT_PTR(_id, _g)
+
+#define POWER_EVENT_ATTR(_name, _id) EVENT_ATTR(PM_##_name, _id, _p)
+#define POWER_EVENT_PTR(_id) EVENT_PTR(_id, _p)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index aa2465e..fa476d5 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1305,6 +1305,16 @@ static int power_pmu_event_idx(struct perf_event *event)
return event->hw.idx;
}
+ssize_t power_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+
+ return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
struct pmu power_pmu = {
.pmu_enable = power_pmu_enable,
.pmu_disable = power_pmu_disable,
@@ -1537,6 +1547,8 @@ int __cpuinit register_power_pmu(struct power_pmu *pmu)
pr_info("%s performance monitor hardware support registered\n",
pmu->name);
+ power_pmu.attr_groups = ppmu->attr_groups;
+
#ifdef MSR_HV
/*
* Use FCHV to ignore kernel events if MSR.HV is set.
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 2ee01e3..b554879 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -51,6 +51,18 @@
#define MMCR1_PMCSEL_MSK 0xff
/*
+ * Power7 event codes.
+ */
+#define PME_PM_CYC 0x1e
+#define PME_PM_GCT_NOSLOT_CYC 0x100f8
+#define PME_PM_CMPLU_STALL 0x4000a
+#define PME_PM_INST_CMPL 0x2
+#define PME_PM_LD_REF_L1 0xc880
+#define PME_PM_LD_MISS_L1 0x400f0
+#define PME_PM_BRU_FIN 0x10068
+#define PME_PM_BRU_MPRED 0x400f6
+
+/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
* 3210987654321098765432109876543210987654321098765432109876543210
@@ -307,14 +319,14 @@ static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[])
}
static int power7_generic_events[] = {
- [PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x100f8, /* GCT_NOSLOT_CYC */
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x4000a, /* CMPLU_STALL */
- [PERF_COUNT_HW_INSTRUCTIONS] = 2,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU*/
- [PERF_COUNT_HW_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */
+ [PERF_COUNT_HW_CPU_CYCLES] = PME_PM_CYC,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PME_PM_GCT_NOSLOT_CYC,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PME_PM_CMPLU_STALL,
+ [PERF_COUNT_HW_INSTRUCTIONS] = PME_PM_INST_CMPL,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = PME_PM_LD_REF_L1,
+ [PERF_COUNT_HW_CACHE_MISSES] = PME_PM_LD_MISS_L1,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PME_PM_BRU_FIN,
+ [PERF_COUNT_HW_BRANCH_MISSES] = PME_PM_BRU_MPRED,
};
#define C(x) PERF_COUNT_HW_CACHE_##x
@@ -362,6 +374,57 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
},
};
+
+GENERIC_EVENT_ATTR(cpu-cycles, CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-frontend, GCT_NOSLOT_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-backend, CMPLU_STALL);
+GENERIC_EVENT_ATTR(instructions, INST_CMPL);
+GENERIC_EVENT_ATTR(cache-references, LD_REF_L1);
+GENERIC_EVENT_ATTR(cache-misses, LD_MISS_L1);
+GENERIC_EVENT_ATTR(branch-instructions, BRU_FIN);
+GENERIC_EVENT_ATTR(branch-misses, BRU_MPRED);
+
+POWER_EVENT_ATTR(CYC, CYC);
+POWER_EVENT_ATTR(GCT_NOSLOT_CYC, GCT_NOSLOT_CYC);
+POWER_EVENT_ATTR(CMPLU_STALL, CMPLU_STALL);
+POWER_EVENT_ATTR(INST_CMPL, INST_CMPL);
+POWER_EVENT_ATTR(LD_REF_L1, LD_REF_L1);
+POWER_EVENT_ATTR(LD_MISS_L1, LD_MISS_L1);
+POWER_EVENT_ATTR(BRU_FIN, BRU_FIN)
+POWER_EVENT_ATTR(BRU_MPRED, BRU_MPRED);
+
+static struct attribute *power7_events_attr[] = {
+ GENERIC_EVENT_PTR(CYC),
+ GENERIC_EVENT_PTR(GCT_NOSLOT_CYC),
+ GENERIC_EVENT_PTR(CMPLU_STALL),
+ GENERIC_EVENT_PTR(INST_CMPL),
+ GENERIC_EVENT_PTR(LD_REF_L1),
+ GENERIC_EVENT_PTR(LD_MISS_L1),
+ GENERIC_EVENT_PTR(BRU_FIN),
+ GENERIC_EVENT_PTR(BRU_MPRED),
+
+ POWER_EVENT_PTR(CYC),
+ POWER_EVENT_PTR(GCT_NOSLOT_CYC),
+ POWER_EVENT_PTR(CMPLU_STALL),
+ POWER_EVENT_PTR(INST_CMPL),
+ POWER_EVENT_PTR(LD_REF_L1),
+ POWER_EVENT_PTR(LD_MISS_L1),
+ POWER_EVENT_PTR(BRU_FIN),
+ POWER_EVENT_PTR(BRU_MPRED),
+ NULL
+};
+
+
+static struct attribute_group power7_pmu_events_group = {
+ .name = "events",
+ .attrs = power7_events_attr,
+};
+
+static const struct attribute_group *power7_pmu_attr_groups[] = {
+ &power7_pmu_events_group,
+ NULL,
+};
+
static struct power_pmu power7_pmu = {
.name = "POWER7",
.n_counter = 6,
@@ -373,6 +436,7 @@ static struct power_pmu power7_pmu = {
.get_alternatives = power7_get_alternatives,
.disable_pmc = power7_disable_pmc,
.flags = PPMU_ALT_SIPR,
+ .attr_groups = power7_pmu_attr_groups,
.n_generic = ARRAY_SIZE(power7_generic_events),
.generic_events = power7_generic_events,
.cache_events = &power7_cache_events,
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 36b05ed..a257049 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -39,10 +39,12 @@ config X86
select HAVE_DMA_CONTIGUOUS if !SWIOTLB
select HAVE_KRETPROBES
select HAVE_OPTPROBES
+ select HAVE_KPROBES_ON_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FENTRY if X86_64
select HAVE_C_RECORDMCOUNT
select HAVE_DYNAMIC_FTRACE
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_FP_TEST
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 2d9075e..93fe929 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -167,6 +167,7 @@
#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -309,6 +310,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
+#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9a25b52..86cb51e 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -44,7 +44,6 @@
#ifdef CONFIG_DYNAMIC_FTRACE
#define ARCH_SUPPORTS_FTRACE_OPS 1
-#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
#endif
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 4fabcdf..57cb634 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,8 +29,13 @@
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
-#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40)
-#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41)
+#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
+#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
+#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41)
+
+#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37
+#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \
+ (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT)
#define AMD64_EVENTSEL_EVENT \
(ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
@@ -46,8 +51,12 @@
#define AMD64_RAW_EVENT_MASK \
(X86_RAW_EVENT_MASK | \
AMD64_EVENTSEL_EVENT)
+#define AMD64_RAW_EVENT_MASK_NB \
+ (AMD64_EVENTSEL_EVENT | \
+ ARCH_PERFMON_EVENTSEL_UMASK)
#define AMD64_NUM_COUNTERS 4
#define AMD64_NUM_COUNTERS_CORE 6
+#define AMD64_NUM_COUNTERS_NB 4
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 433a59f..075a402 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -194,6 +194,8 @@
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTR 0xc0010201
+#define MSR_F15H_NB_PERF_CTL 0xc0010240
+#define MSR_F15H_NB_PERF_CTR 0xc0010241
/* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 34e923a..ac3b3d0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -65,8 +65,7 @@ obj-$(CONFIG_X86_TSC) += trace_clock.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
-obj-$(CONFIG_KPROBES) += kprobes.o
-obj-$(CONFIG_OPTPROBES) += kprobes-opt.o
+obj-y += kprobes/
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
obj-$(CONFIG_KGDB) += kgdb.o
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6774c17..bf0f01a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -829,7 +829,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
} else {
hwc->config_base = x86_pmu_config_addr(hwc->idx);
hwc->event_base = x86_pmu_event_addr(hwc->idx);
- hwc->event_base_rdpmc = hwc->idx;
+ hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
}
}
@@ -1310,11 +1310,6 @@ static struct attribute_group x86_pmu_format_group = {
.attrs = NULL,
};
-struct perf_pmu_events_attr {
- struct device_attribute attr;
- u64 id;
-};
-
/*
* Remove all undefined events (x86_pmu.event_map(id) == 0)
* out of events_attr attributes.
@@ -1348,11 +1343,9 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
-#define EVENT_ATTR(_name, _id) \
-static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
- .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
- .id = PERF_COUNT_HW_##_id, \
-};
+#define EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \
+ events_sysfs_show)
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 115c1ea..7f5c75c 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -325,6 +325,8 @@ struct x86_pmu {
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
unsigned eventsel;
unsigned perfctr;
+ int (*addr_offset)(int index, bool eventsel);
+ int (*rdpmc_index)(int index);
u64 (*event_map)(int);
int max_events;
int num_counters;
@@ -446,28 +448,21 @@ extern u64 __read_mostly hw_cache_extra_regs
u64 x86_perf_event_update(struct perf_event *event);
-static inline int x86_pmu_addr_offset(int index)
+static inline unsigned int x86_pmu_config_addr(int index)
{
- int offset;
-
- /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
- alternative_io(ASM_NOP2,
- "shll $1, %%eax",
- X86_FEATURE_PERFCTR_CORE,
- "=a" (offset),
- "a" (index));
-
- return offset;
+ return x86_pmu.eventsel + (x86_pmu.addr_offset ?
+ x86_pmu.addr_offset(index, true) : index);
}
-static inline unsigned int x86_pmu_config_addr(int index)
+static inline unsigned int x86_pmu_event_addr(int index)
{
- return x86_pmu.eventsel + x86_pmu_addr_offset(index);
+ return x86_pmu.perfctr + (x86_pmu.addr_offset ?
+ x86_pmu.addr_offset(index, false) : index);
}
-static inline unsigned int x86_pmu_event_addr(int index)
+static inline int x86_pmu_rdpmc_index(int index)
{
- return x86_pmu.perfctr + x86_pmu_addr_offset(index);
+ return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
}
int x86_setup_perfctr(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index c93bc4e..dfdab42 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,21 +132,102 @@ static u64 amd_pmu_event_map(int hw_event)
return amd_perfmon_event_map[hw_event];
}
-static int amd_pmu_hw_config(struct perf_event *event)
+static struct event_constraint *amd_nb_event_constraint;
+
+/*
+ * Previously calculated offsets
+ */
+static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
+static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
+static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
+
+/*
+ * Legacy CPUs:
+ * 4 counters starting at 0xc0010000 each offset by 1
+ *
+ * CPUs with core performance counter extensions:
+ * 6 counters starting at 0xc0010200 each offset by 2
+ *
+ * CPUs with north bridge performance counter extensions:
+ * 4 additional counters starting at 0xc0010240 each offset by 2
+ * (indexed right above either one of the above core counters)
+ */
+static inline int amd_pmu_addr_offset(int index, bool eventsel)
{
- int ret;
+ int offset, first, base;
- /* pass precise event sampling to ibs: */
- if (event->attr.precise_ip && get_ibs_caps())
- return -ENOENT;
+ if (!index)
+ return index;
+
+ if (eventsel)
+ offset = event_offsets[index];
+ else
+ offset = count_offsets[index];
+
+ if (offset)
+ return offset;
+
+ if (amd_nb_event_constraint &&
+ test_bit(index, amd_nb_event_constraint->idxmsk)) {
+ /*
+ * calculate the offset of NB counters with respect to
+ * base eventsel or perfctr
+ */
+
+ first = find_first_bit(amd_nb_event_constraint->idxmsk,
+ X86_PMC_IDX_MAX);
+
+ if (eventsel)
+ base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel;
+ else
+ base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr;
+
+ offset = base + ((index - first) << 1);
+ } else if (!cpu_has_perfctr_core)
+ offset = index;
+ else
+ offset = index << 1;
+
+ if (eventsel)
+ event_offsets[index] = offset;
+ else
+ count_offsets[index] = offset;
+
+ return offset;
+}
+
+static inline int amd_pmu_rdpmc_index(int index)
+{
+ int ret, first;
+
+ if (!index)
+ return index;
+
+ ret = rdpmc_indexes[index];
- ret = x86_pmu_hw_config(event);
if (ret)
return ret;
- if (has_branch_stack(event))
- return -EOPNOTSUPP;
+ if (amd_nb_event_constraint &&
+ test_bit(index, amd_nb_event_constraint->idxmsk)) {
+ /*
+ * according to the mnual, ECX value of the NB counters is
+ * the index of the NB counter (0, 1, 2 or 3) plus 6
+ */
+
+ first = find_first_bit(amd_nb_event_constraint->idxmsk,
+ X86_PMC_IDX_MAX);
+ ret = index - first + 6;
+ } else
+ ret = index;
+
+ rdpmc_indexes[index] = ret;
+
+ return ret;
+}
+static int amd_core_hw_config(struct perf_event *event)
+{
if (event->attr.exclude_host && event->attr.exclude_guest)
/*
* When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -156,14 +237,37 @@ static int amd_pmu_hw_config(struct perf_event *event)
event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
ARCH_PERFMON_EVENTSEL_OS);
else if (event->attr.exclude_host)
- event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY;
+ event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
else if (event->attr.exclude_guest)
- event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY;
+ event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
+
+ return 0;
+}
+
+/*
+ * NB counters do not support the following event select bits:
+ * Host/Guest only
+ * Counter mask
+ * Invert counter mask
+ * Edge detect
+ * OS/User mode
+ */
+static int amd_nb_hw_config(struct perf_event *event)
+{
+ /* for NB, we only allow system wide counting mode */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
- if (event->attr.type != PERF_TYPE_RAW)
- return 0;
+ event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
+ ARCH_PERFMON_EVENTSEL_OS);
- event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+ if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB |
+ ARCH_PERFMON_EVENTSEL_INT))
+ return -EINVAL;
return 0;
}
@@ -181,6 +285,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
return (hwc->config & 0xe0) == 0xe0;
}
+static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
+{
+ return amd_nb_event_constraint && amd_is_nb_event(hwc);
+}
+
static inline int amd_has_nb(struct cpu_hw_events *cpuc)
{
struct amd_nb *nb = cpuc->amd_nb;
@@ -188,20 +297,37 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc)
return nb && nb->nb_id != -1;
}
-static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event)
+static int amd_pmu_hw_config(struct perf_event *event)
+{
+ int ret;
+
+ /* pass precise event sampling to ibs: */
+ if (event->attr.precise_ip && get_ibs_caps())
+ return -ENOENT;
+
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ ret = x86_pmu_hw_config(event);
+ if (ret)
+ return ret;
+
+ if (event->attr.type == PERF_TYPE_RAW)
+ event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+
+ if (amd_is_perfctr_nb_event(&event->hw))
+ return amd_nb_hw_config(event);
+
+ return amd_core_hw_config(event);
+}
+
+static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
{
- struct hw_perf_event *hwc = &event->hw;
struct amd_nb *nb = cpuc->amd_nb;
int i;
/*
- * only care about NB events
- */
- if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
- return;
-
- /*
* need to scan whole list because event may not have
* been assigned during scheduling
*
@@ -215,6 +341,19 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
}
}
+static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
+{
+ int core_id = cpu_data(smp_processor_id()).cpu_core_id;
+
+ /* deliver interrupts only to this core */
+ if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) {
+ hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE;
+ hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK;
+ hwc->config |= (u64)(core_id) <<
+ AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
+ }
+}
+
/*
* AMD64 NorthBridge events need special treatment because
* counter access needs to be synchronized across all cores
@@ -247,24 +386,24 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
*
* Given that resources are allocated (cmpxchg), they must be
* eventually freed for others to use. This is accomplished by
- * calling amd_put_event_constraints().
+ * calling __amd_put_nb_event_constraints()
*
* Non NB events are not impacted by this restriction.
*/
static struct event_constraint *
-amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
+ struct event_constraint *c)
{
struct hw_perf_event *hwc = &event->hw;
struct amd_nb *nb = cpuc->amd_nb;
- struct perf_event *old = NULL;
- int max = x86_pmu.num_counters;
- int i, j, k = -1;
+ struct perf_event *old;
+ int idx, new = -1;
- /*
- * if not NB event or no NB, then no constraints
- */
- if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
- return &unconstrained;
+ if (!c)
+ c = &unconstrained;
+
+ if (cpuc->is_fake)
+ return c;
/*
* detect if already present, if so reuse
@@ -276,48 +415,36 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
* because of successive calls to x86_schedule_events() from
* hw_perf_group_sched_in() without hw_perf_enable()
*/
- for (i = 0; i < max; i++) {
- /*
- * keep track of first free slot
- */
- if (k == -1 && !nb->owners[i])
- k = i;
+ for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
+ if (new == -1 || hwc->idx == idx)
+ /* assign free slot, prefer hwc->idx */
+ old = cmpxchg(nb->owners + idx, NULL, event);
+ else if (nb->owners[idx] == event)
+ /* event already present */
+ old = event;
+ else
+ continue;
+
+ if (old && old != event)
+ continue;
+
+ /* reassign to this slot */
+ if (new != -1)
+ cmpxchg(nb->owners + new, event, NULL);
+ new = idx;
/* already present, reuse */
- if (nb->owners[i] == event)
- goto done;
- }
- /*
- * not present, so grab a new slot
- * starting either at:
- */
- if (hwc->idx != -1) {
- /* previous assignment */
- i = hwc->idx;
- } else if (k != -1) {
- /* start from free slot found */
- i = k;
- } else {
- /*
- * event not found, no slot found in
- * first pass, try again from the
- * beginning
- */
- i = 0;
- }
- j = i;
- do {
- old = cmpxchg(nb->owners+i, NULL, event);
- if (!old)
+ if (old == event)
break;
- if (++i == max)
- i = 0;
- } while (i != j);
-done:
- if (!old)
- return &nb->event_constraints[i];
-
- return &emptyconstraint;
+ }
+
+ if (new == -1)
+ return &emptyconstraint;
+
+ if (amd_is_perfctr_nb_event(hwc))
+ amd_nb_interrupt_hw_config(hwc);
+
+ return &nb->event_constraints[new];
}
static struct amd_nb *amd_alloc_nb(int cpu)
@@ -364,7 +491,7 @@ static void amd_pmu_cpu_starting(int cpu)
struct amd_nb *nb;
int i, nb_id;
- cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
+ cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
if (boot_cpu_data.x86_max_cores < 2)
return;
@@ -407,6 +534,26 @@ static void amd_pmu_cpu_dead(int cpu)
}
}
+static struct event_constraint *
+amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ /*
+ * if not NB event or no NB, then no constraints
+ */
+ if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
+ return &unconstrained;
+
+ return __amd_get_nb_event_constraints(cpuc, event,
+ amd_nb_event_constraint);
+}
+
+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
+ __amd_put_nb_event_constraints(cpuc, event);
+}
+
PMU_FORMAT_ATTR(event, "config:0-7,32-35");
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
@@ -496,6 +643,9 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09,
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
+static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0);
+static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0);
+
static struct event_constraint *
amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
{
@@ -561,8 +711,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
return &amd_f15_PMC20;
}
case AMD_EVENT_NB:
- /* not yet implemented */
- return &emptyconstraint;
+ return __amd_get_nb_event_constraints(cpuc, event,
+ amd_nb_event_constraint);
default:
return &emptyconstraint;
}
@@ -587,6 +737,8 @@ static __initconst const struct x86_pmu amd_pmu = {
.schedule_events = x86_schedule_events,
.eventsel = MSR_K7_EVNTSEL0,
.perfctr = MSR_K7_PERFCTR0,
+ .addr_offset = amd_pmu_addr_offset,
+ .rdpmc_index = amd_pmu_rdpmc_index,
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
.num_counters = AMD64_NUM_COUNTERS,
@@ -608,7 +760,7 @@ static __initconst const struct x86_pmu amd_pmu = {
static int setup_event_constraints(void)
{
- if (boot_cpu_data.x86 >= 0x15)
+ if (boot_cpu_data.x86 == 0x15)
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
return 0;
}
@@ -638,6 +790,23 @@ static int setup_perfctr_core(void)
return 0;
}
+static int setup_perfctr_nb(void)
+{
+ if (!cpu_has_perfctr_nb)
+ return -ENODEV;
+
+ x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB;
+
+ if (cpu_has_perfctr_core)
+ amd_nb_event_constraint = &amd_NBPMC96;
+ else
+ amd_nb_event_constraint = &amd_NBPMC74;
+
+ printk(KERN_INFO "perf: AMD northbridge performance counters detected\n");
+
+ return 0;
+}
+
__init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
@@ -648,6 +817,7 @@ __init int amd_pmu_init(void)
setup_event_constraints();
setup_perfctr_core();
+ setup_perfctr_nb();
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
@@ -678,7 +848,7 @@ void amd_pmu_disable_virt(void)
* SVM is disabled the Guest-only bits still gets set and the counter
* will not count anything.
*/
- cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
+ cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
/* Reload all events */
x86_pmu_disable_all();
diff --git a/arch/x86/kernel/kprobes/Makefile b/arch/x86/kernel/kprobes/Makefile
new file mode 100644
index 0000000..0d33169
--- /dev/null
+++ b/arch/x86/kernel/kprobes/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for kernel probes
+#
+
+obj-$(CONFIG_KPROBES) += core.o
+obj-$(CONFIG_OPTPROBES) += opt.o
+obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes/common.h
index 3230b68..2e9d4b5 100644
--- a/arch/x86/kernel/kprobes-common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -99,4 +99,15 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
return addr;
}
#endif
+
+#ifdef CONFIG_KPROBES_ON_FTRACE
+extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb);
+#else
+static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ return 0;
+}
+#endif
#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes/core.c
index 57916c0..e124554 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -58,7 +58,7 @@
#include <asm/insn.h>
#include <asm/debugreg.h>
-#include "kprobes-common.h"
+#include "common.h"
void jprobe_return_end(void);
@@ -78,7 +78,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
* Groups, and some special opcodes can not boost.
* This is non-const and volatile to keep gcc from statically
* optimizing it out, as variable_test_bit makes gcc think only
- * *(unsigned long*) is used.
+ * *(unsigned long*) is used.
*/
static volatile u32 twobyte_is_boostable[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
@@ -117,7 +117,7 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
struct __arch_relative_insn {
u8 op;
s32 raddr;
- } __attribute__((packed)) *insn;
+ } __packed *insn;
insn = (struct __arch_relative_insn *)from;
insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
@@ -541,23 +541,6 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
return 1;
}
-#ifdef KPROBES_CAN_USE_FTRACE
-static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
-{
- /*
- * Emulate singlestep (and also recover regs->ip)
- * as if there is a 5byte nop
- */
- regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
- if (unlikely(p->post_handler)) {
- kcb->kprobe_status = KPROBE_HIT_SSDONE;
- p->post_handler(p, regs, 0);
- }
- __this_cpu_write(current_kprobe, NULL);
-}
-#endif
-
/*
* Interrupts are disabled on entry as trap3 is an interrupt gate and they
* remain disabled throughout this function.
@@ -616,13 +599,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
} else if (kprobe_running()) {
p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
-#ifdef KPROBES_CAN_USE_FTRACE
- if (kprobe_ftrace(p)) {
- skip_singlestep(p, regs, kcb);
- return 1;
- }
-#endif
- setup_singlestep(p, regs, kcb, 0);
+ if (!skip_singlestep(p, regs, kcb))
+ setup_singlestep(p, regs, kcb, 0);
return 1;
}
} /* else: not a kprobe fault; let the kernel handle it */
@@ -1075,50 +1053,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
-#ifdef KPROBES_CAN_USE_FTRACE
-/* Ftrace callback handler for kprobes */
-void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct pt_regs *regs)
-{
- struct kprobe *p;
- struct kprobe_ctlblk *kcb;
- unsigned long flags;
-
- /* Disable irq for emulating a breakpoint and avoiding preempt */
- local_irq_save(flags);
-
- p = get_kprobe((kprobe_opcode_t *)ip);
- if (unlikely(!p) || kprobe_disabled(p))
- goto end;
-
- kcb = get_kprobe_ctlblk();
- if (kprobe_running()) {
- kprobes_inc_nmissed_count(p);
- } else {
- /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
- regs->ip = ip + sizeof(kprobe_opcode_t);
-
- __this_cpu_write(current_kprobe, p);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- if (!p->pre_handler || !p->pre_handler(p, regs))
- skip_singlestep(p, regs, kcb);
- /*
- * If pre_handler returns !0, it sets regs->ip and
- * resets current kprobe.
- */
- }
-end:
- local_irq_restore(flags);
-}
-
-int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
-{
- p->ainsn.insn = NULL;
- p->ainsn.boostable = -1;
- return 0;
-}
-#endif
-
int __init arch_init_kprobes(void)
{
return arch_init_optprobes();
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
new file mode 100644
index 0000000..23ef5c5
--- /dev/null
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -0,0 +1,93 @@
+/*
+ * Dynamic Ftrace based Kprobes Optimization
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Hitachi Ltd., 2012
+ */
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/ftrace.h>
+
+#include "common.h"
+
+static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ /*
+ * Emulate singlestep (and also recover regs->ip)
+ * as if there is a 5byte nop
+ */
+ regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ __this_cpu_write(current_kprobe, NULL);
+ return 1;
+}
+
+int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ if (kprobe_ftrace(p))
+ return __skip_singlestep(p, regs, kcb);
+ else
+ return 0;
+}
+
+/* Ftrace callback handler for kprobes */
+void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct pt_regs *regs)
+{
+ struct kprobe *p;
+ struct kprobe_ctlblk *kcb;
+ unsigned long flags;
+
+ /* Disable irq for emulating a breakpoint and avoiding preempt */
+ local_irq_save(flags);
+
+ p = get_kprobe((kprobe_opcode_t *)ip);
+ if (unlikely(!p) || kprobe_disabled(p))
+ goto end;
+
+ kcb = get_kprobe_ctlblk();
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(p);
+ } else {
+ /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
+ regs->ip = ip + sizeof(kprobe_opcode_t);
+
+ __this_cpu_write(current_kprobe, p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (!p->pre_handler || !p->pre_handler(p, regs))
+ __skip_singlestep(p, regs, kcb);
+ /*
+ * If pre_handler returns !0, it sets regs->ip and
+ * resets current kprobe.
+ */
+ }
+end:
+ local_irq_restore(flags);
+}
+
+int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+ p->ainsn.insn = NULL;
+ p->ainsn.boostable = -1;
+ return 0;
+}
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes/opt.c
index c5e410e..76dc6f0 100644
--- a/arch/x86/kernel/kprobes-opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -37,7 +37,7 @@
#include <asm/insn.h>
#include <asm/debugreg.h>
-#include "kprobes-common.h"
+#include "common.h"
unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
{
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index c71025b..0ba4cfb 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -680,8 +680,10 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
if (auprobe->insn[i] == 0x66)
continue;
- if (auprobe->insn[i] == 0x90)
+ if (auprobe->insn[i] == 0x90) {
+ regs->ip += i + 1;
return true;
+ }
break;
}
diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index e6defd8..1e5d8a4 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -29,6 +29,7 @@
#include <linux/time.h>
#include <linux/cper.h>
#include <linux/acpi.h>
+#include <linux/pci.h>
#include <linux/aer.h>
/*
@@ -249,6 +250,10 @@ static const char *cper_pcie_port_type_strs[] = {
static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
const struct acpi_hest_generic_data *gdata)
{
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+ struct pci_dev *dev;
+#endif
+
if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
@@ -281,10 +286,18 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
pfx, pcie->bridge.secondary_status, pcie->bridge.control);
#ifdef CONFIG_ACPI_APEI_PCIEAER
- if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
- struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
- cper_print_aer(pfx, gdata->error_severity, aer_regs);
+ dev = pci_get_domain_bus_and_slot(pcie->device_id.segment,
+ pcie->device_id.bus, pcie->device_id.function);
+ if (!dev) {
+ pr_err("PCI AER Cannot get PCI device %04x:%02x:%02x.%d\n",
+ pcie->device_id.segment, pcie->device_id.bus,
+ pcie->device_id.slot, pcie->device_id.function);
+ return;
}
+ if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO)
+ cper_print_aer(pfx, dev, gdata->error_severity,
+ (struct aer_capability_regs *) pcie->aer_info);
+ pci_dev_put(dev);
#endif
}
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index 3ea5173..5ab1425 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -23,6 +23,9 @@
#include "aerdrv.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/ras.h>
+
#define AER_AGENT_RECEIVER 0
#define AER_AGENT_REQUESTER 1
#define AER_AGENT_COMPLETER 2
@@ -121,12 +124,11 @@ static const char *aer_agent_string[] = {
"Transmitter ID"
};
-static void __aer_print_error(const char *prefix,
+static void __aer_print_error(struct pci_dev *dev,
struct aer_err_info *info)
{
int i, status;
const char *errmsg = NULL;
-
status = (info->status & ~info->mask);
for (i = 0; i < 32; i++) {
@@ -141,26 +143,22 @@ static void __aer_print_error(const char *prefix,
aer_uncorrectable_error_string[i] : NULL;
if (errmsg)
- printk("%s"" [%2d] %-22s%s\n", prefix, i, errmsg,
+ dev_err(&dev->dev, " [%2d] %-22s%s\n", i, errmsg,
info->first_error == i ? " (First)" : "");
else
- printk("%s"" [%2d] Unknown Error Bit%s\n", prefix, i,
- info->first_error == i ? " (First)" : "");
+ dev_err(&dev->dev, " [%2d] Unknown Error Bit%s\n",
+ i, info->first_error == i ? " (First)" : "");
}
}
void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
{
int id = ((dev->bus->number << 8) | dev->devfn);
- char prefix[44];
-
- snprintf(prefix, sizeof(prefix), "%s%s %s: ",
- (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR,
- dev_driver_string(&dev->dev), dev_name(&dev->dev));
if (info->status == 0) {
- printk("%s""PCIe Bus Error: severity=%s, type=Unaccessible, "
- "id=%04x(Unregistered Agent ID)\n", prefix,
+ dev_err(&dev->dev,
+ "PCIe Bus Error: severity=%s, type=Unaccessible, "
+ "id=%04x(Unregistered Agent ID)\n",
aer_error_severity_string[info->severity], id);
} else {
int layer, agent;
@@ -168,22 +166,24 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
layer = AER_GET_LAYER_ERROR(info->severity, info->status);
agent = AER_GET_AGENT(info->severity, info->status);
- printk("%s""PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
- prefix, aer_error_severity_string[info->severity],
+ dev_err(&dev->dev,
+ "PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
+ aer_error_severity_string[info->severity],
aer_error_layer[layer], id, aer_agent_string[agent]);
- printk("%s"" device [%04x:%04x] error status/mask=%08x/%08x\n",
- prefix, dev->vendor, dev->device,
+ dev_err(&dev->dev,
+ " device [%04x:%04x] error status/mask=%08x/%08x\n",
+ dev->vendor, dev->device,
info->status, info->mask);
- __aer_print_error(prefix, info);
+ __aer_print_error(dev, info);
if (info->tlp_header_valid) {
unsigned char *tlp = (unsigned char *) &info->tlp;
- printk("%s"" TLP Header:"
+ dev_err(&dev->dev, " TLP Header:"
" %02x%02x%02x%02x %02x%02x%02x%02x"
" %02x%02x%02x%02x %02x%02x%02x%02x\n",
- prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
+ *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
*(tlp + 11), *(tlp + 10), *(tlp + 9),
*(tlp + 8), *(tlp + 15), *(tlp + 14),
@@ -192,8 +192,11 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
}
if (info->id && info->error_dev_num > 1 && info->id == id)
- printk("%s"" Error of this Agent(%04x) is reported first\n",
- prefix, id);
+ dev_err(&dev->dev,
+ " Error of this Agent(%04x) is reported first\n",
+ id);
+ trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
+ info->severity);
}
void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
@@ -217,7 +220,7 @@ int cper_severity_to_aer(int cper_severity)
}
EXPORT_SYMBOL_GPL(cper_severity_to_aer);
-void cper_print_aer(const char *prefix, int cper_severity,
+void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity,
struct aer_capability_regs *aer)
{
int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
@@ -239,25 +242,27 @@ void cper_print_aer(const char *prefix, int cper_severity,
}
layer = AER_GET_LAYER_ERROR(aer_severity, status);
agent = AER_GET_AGENT(aer_severity, status);
- printk("%s""aer_status: 0x%08x, aer_mask: 0x%08x\n",
- prefix, status, mask);
+ dev_err(&dev->dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n",
+ status, mask);
cper_print_bits(prefix, status, status_strs, status_strs_size);
- printk("%s""aer_layer=%s, aer_agent=%s\n", prefix,
+ dev_err(&dev->dev, "aer_layer=%s, aer_agent=%s\n",
aer_error_layer[layer], aer_agent_string[agent]);
if (aer_severity != AER_CORRECTABLE)
- printk("%s""aer_uncor_severity: 0x%08x\n",
- prefix, aer->uncor_severity);
+ dev_err(&dev->dev, "aer_uncor_severity: 0x%08x\n",
+ aer->uncor_severity);
if (tlp_header_valid) {
const unsigned char *tlp;
tlp = (const unsigned char *)&aer->header_log;
- printk("%s""aer_tlp_header:"
+ dev_err(&dev->dev, "aer_tlp_header:"
" %02x%02x%02x%02x %02x%02x%02x%02x"
" %02x%02x%02x%02x %02x%02x%02x%02x\n",
- prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
+ *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
*(tlp + 11), *(tlp + 10), *(tlp + 9),
*(tlp + 8), *(tlp + 15), *(tlp + 14),
*(tlp + 13), *(tlp + 12));
}
+ trace_aer_event(dev_name(&dev->dev), (status & ~mask),
+ aer_severity);
}
#endif
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 544abdb..ec10e1b 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -49,8 +49,8 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
}
#endif
-extern void cper_print_aer(const char *prefix, int cper_severity,
- struct aer_capability_regs *aer);
+extern void cper_print_aer(const char *prefix, struct pci_dev *dev,
+ int cper_severity, struct aer_capability_regs *aer);
extern int cper_severity_to_aer(int cper_severity);
extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
int severity);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 92691d8..e5ca8ef 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -74,7 +74,7 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
* SAVE_REGS - The ftrace_ops wants regs saved at each function called
* and passed to the callback. If this flag is set, but the
* architecture does not support passing regs
- * (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the
+ * (CONFIG_DYNAMIC_FTRACE_WITH_REGS is not defined), then the
* ftrace_ops will fail to register, unless the next flag
* is set.
* SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the
@@ -418,7 +418,7 @@ void ftrace_modify_all_code(int command);
#endif
#ifndef FTRACE_REGS_ADDR
-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
# define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller)
#else
# define FTRACE_REGS_ADDR FTRACE_ADDR
@@ -480,7 +480,7 @@ extern int ftrace_make_nop(struct module *mod,
*/
extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
/**
* ftrace_modify_call - convert from one addr to another (no nop)
* @rec: the mcount call site record
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index a3d4895..13a54d0 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -49,7 +49,6 @@ struct trace_entry {
unsigned char flags;
unsigned char preempt_count;
int pid;
- int padding;
};
#define FTRACE_MAX_EVENT \
@@ -84,6 +83,9 @@ struct trace_iterator {
long idx;
cpumask_var_t started;
+
+ /* it's true when current open file is snapshot */
+ bool snapshot;
};
enum trace_iter_flags {
@@ -272,7 +274,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type,
extern int trace_add_event_call(struct ftrace_event_call *call);
extern void trace_remove_event_call(struct ftrace_event_call *call);
-#define is_signed_type(type) (((type)(-1)) < 0)
+#define is_signed_type(type) (((type)(-1)) < (type)0)
int trace_set_clr_event(const char *system, const char *event, int set);
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 624ef3f..57bfdce 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -180,10 +180,10 @@ extern void irq_exit(void);
#define nmi_enter() \
do { \
+ lockdep_off(); \
ftrace_nmi_enter(); \
BUG_ON(in_nmi()); \
add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
- lockdep_off(); \
rcu_nmi_enter(); \
trace_hardirq_enter(); \
} while (0)
@@ -192,10 +192,10 @@ extern void irq_exit(void);
do { \
trace_hardirq_exit(); \
rcu_nmi_exit(); \
- lockdep_on(); \
BUG_ON(!in_nmi()); \
sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
ftrace_nmi_exit(); \
+ lockdep_on(); \
} while (0)
#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 23755ba..4b6ef4d 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -49,16 +49,6 @@
#define KPROBE_REENTER 0x00000004
#define KPROBE_HIT_SSDONE 0x00000008
-/*
- * If function tracer is enabled and the arch supports full
- * passing of pt_regs to function tracing, then kprobes can
- * optimize on top of function tracing.
- */
-#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \
- && defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE)
-# define KPROBES_CAN_USE_FTRACE
-#endif
-
/* Attach to insert probes on any functions which should be ignored*/
#define __kprobes __attribute__((__section__(".kprobes.text")))
@@ -316,7 +306,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
#endif
#endif /* CONFIG_OPTPROBES */
-#ifdef KPROBES_CAN_USE_FTRACE
+#ifdef CONFIG_KPROBES_ON_FTRACE
extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs);
extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6bfb2faa..e47ee46 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -135,16 +135,21 @@ struct hw_perf_event {
struct { /* software */
struct hrtimer hrtimer;
};
+ struct { /* tracepoint */
+ struct task_struct *tp_target;
+ /* for tp_event->class */
+ struct list_head tp_list;
+ };
#ifdef CONFIG_HAVE_HW_BREAKPOINT
struct { /* breakpoint */
- struct arch_hw_breakpoint info;
- struct list_head bp_list;
/*
* Crufty hack to avoid the chicken and egg
* problem hw_breakpoint has with context
* creation and event initalization.
*/
struct task_struct *bp_target;
+ struct arch_hw_breakpoint info;
+ struct list_head bp_list;
};
#endif
};
@@ -817,6 +822,17 @@ do { \
} while (0)
+struct perf_pmu_events_attr {
+ struct device_attribute attr;
+ u64 id;
+};
+
+#define PMU_EVENT_ATTR(_name, _var, _id, _show) \
+static struct perf_pmu_events_attr _var = { \
+ .attr = __ATTR(_name, 0444, _show, NULL), \
+ .id = _id, \
+};
+
#define PMU_FORMAT_ATTR(_name, _format) \
static ssize_t \
_name##_show(struct device *dev, \
diff --git a/include/linux/profile.h b/include/linux/profile.h
index a0fc322..2112390 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -82,9 +82,6 @@ int task_handoff_unregister(struct notifier_block * n);
int profile_event_register(enum profile_type, struct notifier_block * n);
int profile_event_unregister(enum profile_type, struct notifier_block * n);
-int register_timer_hook(int (*hook)(struct pt_regs *));
-void unregister_timer_hook(int (*hook)(struct pt_regs *));
-
struct pt_regs;
#else
@@ -135,16 +132,6 @@ static inline int profile_event_unregister(enum profile_type t, struct notifier_
#define profile_handoff_task(a) (0)
#define profile_munmap(a) do { } while (0)
-static inline int register_timer_hook(int (*hook)(struct pt_regs *))
-{
- return -ENOSYS;
-}
-
-static inline void unregister_timer_hook(int (*hook)(struct pt_regs *))
-{
- return;
-}
-
#endif /* CONFIG_PROFILING */
#endif /* _LINUX_PROFILE_H */
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 519777e..1342e69 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -167,6 +167,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu);
u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 4f628a6..02b83db 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -35,13 +35,20 @@ struct inode;
# include <asm/uprobes.h>
#endif
+#define UPROBE_HANDLER_REMOVE 1
+#define UPROBE_HANDLER_MASK 1
+
+enum uprobe_filter_ctx {
+ UPROBE_FILTER_REGISTER,
+ UPROBE_FILTER_UNREGISTER,
+ UPROBE_FILTER_MMAP,
+};
+
struct uprobe_consumer {
int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
- /*
- * filter is optional; If a filter exists, handler is run
- * if and only if filter returns true.
- */
- bool (*filter)(struct uprobe_consumer *self, struct task_struct *task);
+ bool (*filter)(struct uprobe_consumer *self,
+ enum uprobe_filter_ctx ctx,
+ struct mm_struct *mm);
struct uprobe_consumer *next;
};
@@ -94,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign
extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
+extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
extern int uprobe_mmap(struct vm_area_struct *vma);
extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
@@ -117,6 +125,11 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
{
return -ENOSYS;
}
+static inline int
+uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add)
+{
+ return -ENOSYS;
+}
static inline void
uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
{
diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h
new file mode 100644
index 0000000..88b8783
--- /dev/null
+++ b/include/trace/events/ras.h
@@ -0,0 +1,77 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ras
+
+#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_AER_H
+
+#include <linux/tracepoint.h>
+#include <linux/edac.h>
+
+
+/*
+ * PCIe AER Trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event on a PCIe device. The event report has
+ * the following structure:
+ *
+ * char * dev_name - The name of the slot where the device resides
+ * ([domain:]bus:device.function).
+ * u32 status - Either the correctable or uncorrectable register
+ * indicating what error or errors have been seen
+ * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED
+ */
+
+#define aer_correctable_errors \
+ {BIT(0), "Receiver Error"}, \
+ {BIT(6), "Bad TLP"}, \
+ {BIT(7), "Bad DLLP"}, \
+ {BIT(8), "RELAY_NUM Rollover"}, \
+ {BIT(12), "Replay Timer Timeout"}, \
+ {BIT(13), "Advisory Non-Fatal"}
+
+#define aer_uncorrectable_errors \
+ {BIT(4), "Data Link Protocol"}, \
+ {BIT(12), "Poisoned TLP"}, \
+ {BIT(13), "Flow Control Protocol"}, \
+ {BIT(14), "Completion Timeout"}, \
+ {BIT(15), "Completer Abort"}, \
+ {BIT(16), "Unexpected Completion"}, \
+ {BIT(17), "Receiver Overflow"}, \
+ {BIT(18), "Malformed TLP"}, \
+ {BIT(19), "ECRC"}, \
+ {BIT(20), "Unsupported Request"}
+
+TRACE_EVENT(aer_event,
+ TP_PROTO(const char *dev_name,
+ const u32 status,
+ const u8 severity),
+
+ TP_ARGS(dev_name, status, severity),
+
+ TP_STRUCT__entry(
+ __string( dev_name, dev_name )
+ __field( u32, status )
+ __field( u8, severity )
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev_name, dev_name);
+ __entry->status = status;
+ __entry->severity = severity;
+ ),
+
+ TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
+ __get_str(dev_name),
+ __entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" :
+ __entry->severity == HW_EVENT_ERR_FATAL ?
+ "Fatal" : "Uncorrected",
+ __entry->severity == HW_EVENT_ERR_CORRECTED ?
+ __print_flags(__entry->status, "|", aer_correctable_errors) :
+ __print_flags(__entry->status, "|", aer_uncorrectable_errors))
+);
+
+#endif /* _TRACE_AER_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 4f63c05..9fa9c62 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -579,7 +579,8 @@ enum perf_event_type {
* { u32 size;
* char data[size];}&& PERF_SAMPLE_RAW
*
- * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
+ * { u64 nr;
+ * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
*
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7b6646a..5c75791 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6171,11 +6171,14 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
if (task) {
event->attach_state = PERF_ATTACH_TASK;
+
+ if (attr->type == PERF_TYPE_TRACEPOINT)
+ event->hw.tp_target = task;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
/*
* hw_breakpoint is a bit difficult here..
*/
- if (attr->type == PERF_TYPE_BREAKPOINT)
+ else if (attr->type == PERF_TYPE_BREAKPOINT)
event->hw.bp_target = task;
#endif
}
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index fe8a916..a64f8ae 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -676,7 +676,7 @@ int __init init_hw_breakpoint(void)
err_alloc:
for_each_possible_cpu(err_cpu) {
for (i = 0; i < TYPE_MAX; i++)
- kfree(per_cpu(nr_task_bp_pinned[i], cpu));
+ kfree(per_cpu(nr_task_bp_pinned[i], err_cpu));
if (err_cpu == cpu)
break;
}
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index dea7acf..a567c8c 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -27,6 +27,7 @@
#include <linux/pagemap.h> /* read_mapping_page */
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/export.h>
#include <linux/rmap.h> /* anon_vma_prepare */
#include <linux/mmu_notifier.h> /* set_pte_at_notify */
#include <linux/swap.h> /* try_to_free_swap */
@@ -41,58 +42,31 @@
#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE
static struct rb_root uprobes_tree = RB_ROOT;
-
-static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
-
-#define UPROBES_HASH_SZ 13
-
/*
- * We need separate register/unregister and mmap/munmap lock hashes because
- * of mmap_sem nesting.
- *
- * uprobe_register() needs to install probes on (potentially) all processes
- * and thus needs to acquire multiple mmap_sems (consequtively, not
- * concurrently), whereas uprobe_mmap() is called while holding mmap_sem
- * for the particular process doing the mmap.
- *
- * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem
- * because of lock order against i_mmap_mutex. This means there's a hole in
- * the register vma iteration where a mmap() can happen.
- *
- * Thus uprobe_register() can race with uprobe_mmap() and we can try and
- * install a probe where one is already installed.
+ * allows us to skip the uprobe_mmap if there are no uprobe events active
+ * at this time. Probably a fine grained per inode count is better?
*/
+#define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree)
-/* serialize (un)register */
-static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
-
-#define uprobes_hash(v) (&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
+static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
+#define UPROBES_HASH_SZ 13
/* serialize uprobe->pending_list */
static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
static struct percpu_rw_semaphore dup_mmap_sem;
-/*
- * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe
- * events active at this time. Probably a fine grained per inode count is
- * better?
- */
-static atomic_t uprobe_events = ATOMIC_INIT(0);
-
/* Have a copy of original instruction */
#define UPROBE_COPY_INSN 0
-/* Dont run handlers when first register/ last unregister in progress*/
-#define UPROBE_RUN_HANDLER 1
/* Can skip singlestep */
-#define UPROBE_SKIP_SSTEP 2
+#define UPROBE_SKIP_SSTEP 1
struct uprobe {
struct rb_node rb_node; /* node in the rb tree */
atomic_t ref;
+ struct rw_semaphore register_rwsem;
struct rw_semaphore consumer_rwsem;
- struct mutex copy_mutex; /* TODO: kill me and UPROBE_COPY_INSN */
struct list_head pending_list;
struct uprobe_consumer *consumers;
struct inode *inode; /* Also hold a ref to inode */
@@ -430,9 +404,6 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
u = __insert_uprobe(uprobe);
spin_unlock(&uprobes_treelock);
- /* For now assume that the instruction need not be single-stepped */
- __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
-
return u;
}
@@ -452,8 +423,10 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
uprobe->inode = igrab(inode);
uprobe->offset = offset;
+ init_rwsem(&uprobe->register_rwsem);
init_rwsem(&uprobe->consumer_rwsem);
- mutex_init(&uprobe->copy_mutex);
+ /* For now assume that the instruction need not be single-stepped */
+ __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
/* add to uprobes_tree, sorted on inode:offset */
cur_uprobe = insert_uprobe(uprobe);
@@ -463,38 +436,17 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
kfree(uprobe);
uprobe = cur_uprobe;
iput(inode);
- } else {
- atomic_inc(&uprobe_events);
}
return uprobe;
}
-static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
-{
- struct uprobe_consumer *uc;
-
- if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags))
- return;
-
- down_read(&uprobe->consumer_rwsem);
- for (uc = uprobe->consumers; uc; uc = uc->next) {
- if (!uc->filter || uc->filter(uc, current))
- uc->handler(uc, regs);
- }
- up_read(&uprobe->consumer_rwsem);
-}
-
-/* Returns the previous consumer */
-static struct uprobe_consumer *
-consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
+static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
{
down_write(&uprobe->consumer_rwsem);
uc->next = uprobe->consumers;
uprobe->consumers = uc;
up_write(&uprobe->consumer_rwsem);
-
- return uc->next;
}
/*
@@ -588,7 +540,8 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
return ret;
- mutex_lock(&uprobe->copy_mutex);
+ /* TODO: move this into _register, until then we abuse this sem. */
+ down_write(&uprobe->consumer_rwsem);
if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
goto out;
@@ -612,7 +565,30 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
set_bit(UPROBE_COPY_INSN, &uprobe->flags);
out:
- mutex_unlock(&uprobe->copy_mutex);
+ up_write(&uprobe->consumer_rwsem);
+
+ return ret;
+}
+
+static inline bool consumer_filter(struct uprobe_consumer *uc,
+ enum uprobe_filter_ctx ctx, struct mm_struct *mm)
+{
+ return !uc->filter || uc->filter(uc, ctx, mm);
+}
+
+static bool filter_chain(struct uprobe *uprobe,
+ enum uprobe_filter_ctx ctx, struct mm_struct *mm)
+{
+ struct uprobe_consumer *uc;
+ bool ret = false;
+
+ down_read(&uprobe->consumer_rwsem);
+ for (uc = uprobe->consumers; uc; uc = uc->next) {
+ ret = consumer_filter(uc, ctx, mm);
+ if (ret)
+ break;
+ }
+ up_read(&uprobe->consumer_rwsem);
return ret;
}
@@ -624,16 +600,6 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
bool first_uprobe;
int ret;
- /*
- * If probe is being deleted, unregister thread could be done with
- * the vma-rmap-walk through. Adding a probe now can be fatal since
- * nobody will be able to cleanup. Also we could be from fork or
- * mremap path, where the probe might have already been inserted.
- * Hence behave as if probe already existed.
- */
- if (!uprobe->consumers)
- return 0;
-
ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr);
if (ret)
return ret;
@@ -658,14 +624,14 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
static int
remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
{
- /* can happen if uprobe_register() fails */
- if (!test_bit(MMF_HAS_UPROBES, &mm->flags))
- return 0;
-
set_bit(MMF_RECALC_UPROBES, &mm->flags);
return set_orig_insn(&uprobe->arch, mm, vaddr);
}
+static inline bool uprobe_is_active(struct uprobe *uprobe)
+{
+ return !RB_EMPTY_NODE(&uprobe->rb_node);
+}
/*
* There could be threads that have already hit the breakpoint. They
* will recheck the current insn and restart if find_uprobe() fails.
@@ -673,12 +639,15 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad
*/
static void delete_uprobe(struct uprobe *uprobe)
{
+ if (WARN_ON(!uprobe_is_active(uprobe)))
+ return;
+
spin_lock(&uprobes_treelock);
rb_erase(&uprobe->rb_node, &uprobes_tree);
spin_unlock(&uprobes_treelock);
+ RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */
iput(uprobe->inode);
put_uprobe(uprobe);
- atomic_dec(&uprobe_events);
}
struct map_info {
@@ -764,8 +733,10 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
return curr;
}
-static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
+static int
+register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
{
+ bool is_register = !!new;
struct map_info *info;
int err = 0;
@@ -794,10 +765,16 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
vaddr_to_offset(vma, info->vaddr) != uprobe->offset)
goto unlock;
- if (is_register)
- err = install_breakpoint(uprobe, mm, vma, info->vaddr);
- else
- err |= remove_breakpoint(uprobe, mm, info->vaddr);
+ if (is_register) {
+ /* consult only the "caller", new consumer. */
+ if (consumer_filter(new,
+ UPROBE_FILTER_REGISTER, mm))
+ err = install_breakpoint(uprobe, mm, vma, info->vaddr);
+ } else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) {
+ if (!filter_chain(uprobe,
+ UPROBE_FILTER_UNREGISTER, mm))
+ err |= remove_breakpoint(uprobe, mm, info->vaddr);
+ }
unlock:
up_write(&mm->mmap_sem);
@@ -810,17 +787,23 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
return err;
}
-static int __uprobe_register(struct uprobe *uprobe)
+static int __uprobe_register(struct uprobe *uprobe, struct uprobe_consumer *uc)
{
- return register_for_each_vma(uprobe, true);
+ consumer_add(uprobe, uc);
+ return register_for_each_vma(uprobe, uc);
}
-static void __uprobe_unregister(struct uprobe *uprobe)
+static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
{
- if (!register_for_each_vma(uprobe, false))
- delete_uprobe(uprobe);
+ int err;
+
+ if (!consumer_del(uprobe, uc)) /* WARN? */
+ return;
+ err = register_for_each_vma(uprobe, NULL);
/* TODO : cant unregister? schedule a worker thread */
+ if (!uprobe->consumers && !err)
+ delete_uprobe(uprobe);
}
/*
@@ -845,31 +828,59 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
struct uprobe *uprobe;
int ret;
- if (!inode || !uc || uc->next)
- return -EINVAL;
-
+ /* Racy, just to catch the obvious mistakes */
if (offset > i_size_read(inode))
return -EINVAL;
- ret = 0;
- mutex_lock(uprobes_hash(inode));
+ retry:
uprobe = alloc_uprobe(inode, offset);
-
- if (!uprobe) {
- ret = -ENOMEM;
- } else if (!consumer_add(uprobe, uc)) {
- ret = __uprobe_register(uprobe);
- if (ret) {
- uprobe->consumers = NULL;
- __uprobe_unregister(uprobe);
- } else {
- set_bit(UPROBE_RUN_HANDLER, &uprobe->flags);
- }
+ if (!uprobe)
+ return -ENOMEM;
+ /*
+ * We can race with uprobe_unregister()->delete_uprobe().
+ * Check uprobe_is_active() and retry if it is false.
+ */
+ down_write(&uprobe->register_rwsem);
+ ret = -EAGAIN;
+ if (likely(uprobe_is_active(uprobe))) {
+ ret = __uprobe_register(uprobe, uc);
+ if (ret)
+ __uprobe_unregister(uprobe, uc);
}
+ up_write(&uprobe->register_rwsem);
+ put_uprobe(uprobe);
- mutex_unlock(uprobes_hash(inode));
- if (uprobe)
- put_uprobe(uprobe);
+ if (unlikely(ret == -EAGAIN))
+ goto retry;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(uprobe_register);
+
+/*
+ * uprobe_apply - unregister a already registered probe.
+ * @inode: the file in which the probe has to be removed.
+ * @offset: offset from the start of the file.
+ * @uc: consumer which wants to add more or remove some breakpoints
+ * @add: add or remove the breakpoints
+ */
+int uprobe_apply(struct inode *inode, loff_t offset,
+ struct uprobe_consumer *uc, bool add)
+{
+ struct uprobe *uprobe;
+ struct uprobe_consumer *con;
+ int ret = -ENOENT;
+
+ uprobe = find_uprobe(inode, offset);
+ if (!uprobe)
+ return ret;
+
+ down_write(&uprobe->register_rwsem);
+ for (con = uprobe->consumers; con && con != uc ; con = con->next)
+ ;
+ if (con)
+ ret = register_for_each_vma(uprobe, add ? uc : NULL);
+ up_write(&uprobe->register_rwsem);
+ put_uprobe(uprobe);
return ret;
}
@@ -884,25 +895,42 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
{
struct uprobe *uprobe;
- if (!inode || !uc)
- return;
-
uprobe = find_uprobe(inode, offset);
if (!uprobe)
return;
- mutex_lock(uprobes_hash(inode));
+ down_write(&uprobe->register_rwsem);
+ __uprobe_unregister(uprobe, uc);
+ up_write(&uprobe->register_rwsem);
+ put_uprobe(uprobe);
+}
+EXPORT_SYMBOL_GPL(uprobe_unregister);
- if (consumer_del(uprobe, uc)) {
- if (!uprobe->consumers) {
- __uprobe_unregister(uprobe);
- clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags);
- }
+static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+ int err = 0;
+
+ down_read(&mm->mmap_sem);
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ unsigned long vaddr;
+ loff_t offset;
+
+ if (!valid_vma(vma, false) ||
+ vma->vm_file->f_mapping->host != uprobe->inode)
+ continue;
+
+ offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
+ if (uprobe->offset < offset ||
+ uprobe->offset >= offset + vma->vm_end - vma->vm_start)
+ continue;
+
+ vaddr = offset_to_vaddr(vma, uprobe->offset);
+ err |= remove_breakpoint(uprobe, mm, vaddr);
}
+ up_read(&mm->mmap_sem);
- mutex_unlock(uprobes_hash(inode));
- if (uprobe)
- put_uprobe(uprobe);
+ return err;
}
static struct rb_node *
@@ -979,7 +1007,7 @@ int uprobe_mmap(struct vm_area_struct *vma)
struct uprobe *uprobe, *u;
struct inode *inode;
- if (!atomic_read(&uprobe_events) || !valid_vma(vma, true))
+ if (no_uprobe_events() || !valid_vma(vma, true))
return 0;
inode = vma->vm_file->f_mapping->host;
@@ -988,9 +1016,14 @@ int uprobe_mmap(struct vm_area_struct *vma)
mutex_lock(uprobes_mmap_hash(inode));
build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list);
-
+ /*
+ * We can race with uprobe_unregister(), this uprobe can be already
+ * removed. But in this case filter_chain() must return false, all
+ * consumers have gone away.
+ */
list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
- if (!fatal_signal_pending(current)) {
+ if (!fatal_signal_pending(current) &&
+ filter_chain(uprobe, UPROBE_FILTER_MMAP, vma->vm_mm)) {
unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
}
@@ -1025,7 +1058,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e
*/
void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
- if (!atomic_read(&uprobe_events) || !valid_vma(vma, false))
+ if (no_uprobe_events() || !valid_vma(vma, false))
return;
if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */
@@ -1042,22 +1075,14 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
/* Slot allocation for XOL */
static int xol_add_vma(struct xol_area *area)
{
- struct mm_struct *mm;
- int ret;
-
- area->page = alloc_page(GFP_HIGHUSER);
- if (!area->page)
- return -ENOMEM;
-
- ret = -EALREADY;
- mm = current->mm;
+ struct mm_struct *mm = current->mm;
+ int ret = -EALREADY;
down_write(&mm->mmap_sem);
if (mm->uprobes_state.xol_area)
goto fail;
ret = -ENOMEM;
-
/* Try to map as high as possible, this is only a hint. */
area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0);
if (area->vaddr & ~PAGE_MASK) {
@@ -1073,54 +1098,53 @@ static int xol_add_vma(struct xol_area *area)
smp_wmb(); /* pairs with get_xol_area() */
mm->uprobes_state.xol_area = area;
ret = 0;
-
-fail:
+ fail:
up_write(&mm->mmap_sem);
- if (ret)
- __free_page(area->page);
return ret;
}
-static struct xol_area *get_xol_area(struct mm_struct *mm)
-{
- struct xol_area *area;
-
- area = mm->uprobes_state.xol_area;
- smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */
-
- return area;
-}
-
/*
- * xol_alloc_area - Allocate process's xol_area.
- * This area will be used for storing instructions for execution out of
- * line.
+ * get_xol_area - Allocate process's xol_area if necessary.
+ * This area will be used for storing instructions for execution out of line.
*
* Returns the allocated area or NULL.
*/
-static struct xol_area *xol_alloc_area(void)
+static struct xol_area *get_xol_area(void)
{
+ struct mm_struct *mm = current->mm;
struct xol_area *area;
+ area = mm->uprobes_state.xol_area;
+ if (area)
+ goto ret;
+
area = kzalloc(sizeof(*area), GFP_KERNEL);
if (unlikely(!area))
- return NULL;
+ goto out;
area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL);
-
if (!area->bitmap)
- goto fail;
+ goto free_area;
+
+ area->page = alloc_page(GFP_HIGHUSER);
+ if (!area->page)
+ goto free_bitmap;
init_waitqueue_head(&area->wq);
if (!xol_add_vma(area))
return area;
-fail:
+ __free_page(area->page);
+ free_bitmap:
kfree(area->bitmap);
+ free_area:
kfree(area);
-
- return get_xol_area(current->mm);
+ out:
+ area = mm->uprobes_state.xol_area;
+ ret:
+ smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */
+ return area;
}
/*
@@ -1186,33 +1210,26 @@ static unsigned long xol_take_insn_slot(struct xol_area *area)
}
/*
- * xol_get_insn_slot - If was not allocated a slot, then
- * allocate a slot.
+ * xol_get_insn_slot - allocate a slot for xol.
* Returns the allocated slot address or 0.
*/
-static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot_addr)
+static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
{
struct xol_area *area;
unsigned long offset;
+ unsigned long xol_vaddr;
void *vaddr;
- area = get_xol_area(current->mm);
- if (!area) {
- area = xol_alloc_area();
- if (!area)
- return 0;
- }
- current->utask->xol_vaddr = xol_take_insn_slot(area);
+ area = get_xol_area();
+ if (!area)
+ return 0;
- /*
- * Initialize the slot if xol_vaddr points to valid
- * instruction slot.
- */
- if (unlikely(!current->utask->xol_vaddr))
+ xol_vaddr = xol_take_insn_slot(area);
+ if (unlikely(!xol_vaddr))
return 0;
- current->utask->vaddr = slot_addr;
- offset = current->utask->xol_vaddr & ~PAGE_MASK;
+ /* Initialize the slot */
+ offset = xol_vaddr & ~PAGE_MASK;
vaddr = kmap_atomic(area->page);
memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES);
kunmap_atomic(vaddr);
@@ -1222,7 +1239,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot
*/
flush_dcache_page(area->page);
- return current->utask->xol_vaddr;
+ return xol_vaddr;
}
/*
@@ -1240,8 +1257,7 @@ static void xol_free_insn_slot(struct task_struct *tsk)
return;
slot_addr = tsk->utask->xol_vaddr;
-
- if (unlikely(!slot_addr || IS_ERR_VALUE(slot_addr)))
+ if (unlikely(!slot_addr))
return;
area = tsk->mm->uprobes_state.xol_area;
@@ -1303,33 +1319,48 @@ void uprobe_copy_process(struct task_struct *t)
}
/*
- * Allocate a uprobe_task object for the task.
- * Called when the thread hits a breakpoint for the first time.
+ * Allocate a uprobe_task object for the task if if necessary.
+ * Called when the thread hits a breakpoint.
*
* Returns:
* - pointer to new uprobe_task on success
* - NULL otherwise
*/
-static struct uprobe_task *add_utask(void)
+static struct uprobe_task *get_utask(void)
{
- struct uprobe_task *utask;
-
- utask = kzalloc(sizeof *utask, GFP_KERNEL);
- if (unlikely(!utask))
- return NULL;
-
- current->utask = utask;
- return utask;
+ if (!current->utask)
+ current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
+ return current->utask;
}
/* Prepare to single-step probed instruction out of line. */
static int
-pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr)
+pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
{
- if (xol_get_insn_slot(uprobe, vaddr) && !arch_uprobe_pre_xol(&uprobe->arch, regs))
- return 0;
+ struct uprobe_task *utask;
+ unsigned long xol_vaddr;
+ int err;
+
+ utask = get_utask();
+ if (!utask)
+ return -ENOMEM;
+
+ xol_vaddr = xol_get_insn_slot(uprobe);
+ if (!xol_vaddr)
+ return -ENOMEM;
+
+ utask->xol_vaddr = xol_vaddr;
+ utask->vaddr = bp_vaddr;
+
+ err = arch_uprobe_pre_xol(&uprobe->arch, regs);
+ if (unlikely(err)) {
+ xol_free_insn_slot(current);
+ return err;
+ }
- return -EFAULT;
+ utask->active_uprobe = uprobe;
+ utask->state = UTASK_SSTEP;
+ return 0;
}
/*
@@ -1391,6 +1422,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm)
* This is not strictly accurate, we can race with
* uprobe_unregister() and see the already removed
* uprobe if delete_uprobe() was not yet called.
+ * Or this uprobe can be filtered out.
*/
if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end))
return;
@@ -1452,13 +1484,33 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
return uprobe;
}
+static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
+{
+ struct uprobe_consumer *uc;
+ int remove = UPROBE_HANDLER_REMOVE;
+
+ down_read(&uprobe->register_rwsem);
+ for (uc = uprobe->consumers; uc; uc = uc->next) {
+ int rc = uc->handler(uc, regs);
+
+ WARN(rc & ~UPROBE_HANDLER_MASK,
+ "bad rc=0x%x from %pf()\n", rc, uc->handler);
+ remove &= rc;
+ }
+
+ if (remove && uprobe->consumers) {
+ WARN_ON(!uprobe_is_active(uprobe));
+ unapply_uprobe(uprobe, current->mm);
+ }
+ up_read(&uprobe->register_rwsem);
+}
+
/*
* Run handler and ask thread to singlestep.
* Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
*/
static void handle_swbp(struct pt_regs *regs)
{
- struct uprobe_task *utask;
struct uprobe *uprobe;
unsigned long bp_vaddr;
int uninitialized_var(is_swbp);
@@ -1483,6 +1535,10 @@ static void handle_swbp(struct pt_regs *regs)
}
return;
}
+
+ /* change it in advance for ->handler() and restart */
+ instruction_pointer_set(regs, bp_vaddr);
+
/*
* TODO: move copy_insn/etc into _register and remove this hack.
* After we hit the bp, _unregister + _register can install the
@@ -1490,32 +1546,16 @@ static void handle_swbp(struct pt_regs *regs)
*/
smp_rmb(); /* pairs with wmb() in install_breakpoint() */
if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))
- goto restart;
-
- utask = current->utask;
- if (!utask) {
- utask = add_utask();
- /* Cannot allocate; re-execute the instruction. */
- if (!utask)
- goto restart;
- }
+ goto out;
handler_chain(uprobe, regs);
if (can_skip_sstep(uprobe, regs))
goto out;
- if (!pre_ssout(uprobe, regs, bp_vaddr)) {
- utask->active_uprobe = uprobe;
- utask->state = UTASK_SSTEP;
+ if (!pre_ssout(uprobe, regs, bp_vaddr))
return;
- }
-restart:
- /*
- * cannot singlestep; cannot skip instruction;
- * re-execute the instruction.
- */
- instruction_pointer_set(regs, bp_vaddr);
+ /* can_skip_sstep() succeeded, or restart if can't singlestep */
out:
put_uprobe(uprobe);
}
@@ -1609,10 +1649,8 @@ static int __init init_uprobes(void)
{
int i;
- for (i = 0; i < UPROBES_HASH_SZ; i++) {
- mutex_init(&uprobes_mutex[i]);
+ for (i = 0; i < UPROBES_HASH_SZ; i++)
mutex_init(&uprobes_mmap_mutex[i]);
- }
if (percpu_init_rwsem(&dup_mmap_sem))
return -ENOMEM;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 098f396..f423c3e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -919,7 +919,7 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
}
#endif /* CONFIG_OPTPROBES */
-#ifdef KPROBES_CAN_USE_FTRACE
+#ifdef CONFIG_KPROBES_ON_FTRACE
static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
.func = kprobe_ftrace_handler,
.flags = FTRACE_OPS_FL_SAVE_REGS,
@@ -964,7 +964,7 @@ static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
(unsigned long)p->addr, 1, 0);
WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
}
-#else /* !KPROBES_CAN_USE_FTRACE */
+#else /* !CONFIG_KPROBES_ON_FTRACE */
#define prepare_kprobe(p) arch_prepare_kprobe(p)
#define arm_kprobe_ftrace(p) do {} while (0)
#define disarm_kprobe_ftrace(p) do {} while (0)
@@ -1414,12 +1414,12 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
*/
ftrace_addr = ftrace_location((unsigned long)p->addr);
if (ftrace_addr) {
-#ifdef KPROBES_CAN_USE_FTRACE
+#ifdef CONFIG_KPROBES_ON_FTRACE
/* Given address is not on the instruction boundary */
if ((unsigned long)p->addr != ftrace_addr)
return -EILSEQ;
p->flags |= KPROBE_FLAG_FTRACE;
-#else /* !KPROBES_CAN_USE_FTRACE */
+#else /* !CONFIG_KPROBES_ON_FTRACE */
return -EINVAL;
#endif
}
diff --git a/kernel/profile.c b/kernel/profile.c
index 1f39181..dc3384e 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -37,9 +37,6 @@ struct profile_hit {
#define NR_PROFILE_HIT (PAGE_SIZE/sizeof(struct profile_hit))
#define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ)
-/* Oprofile timer tick hook */
-static int (*timer_hook)(struct pt_regs *) __read_mostly;
-
static atomic_t *prof_buffer;
static unsigned long prof_len, prof_shift;
@@ -208,25 +205,6 @@ int profile_event_unregister(enum profile_type type, struct notifier_block *n)
}
EXPORT_SYMBOL_GPL(profile_event_unregister);
-int register_timer_hook(int (*hook)(struct pt_regs *))
-{
- if (timer_hook)
- return -EBUSY;
- timer_hook = hook;
- return 0;
-}
-EXPORT_SYMBOL_GPL(register_timer_hook);
-
-void unregister_timer_hook(int (*hook)(struct pt_regs *))
-{
- WARN_ON(hook != timer_hook);
- timer_hook = NULL;
- /* make sure all CPUs see the NULL hook */
- synchronize_sched(); /* Allow ongoing interrupts to complete. */
-}
-EXPORT_SYMBOL_GPL(unregister_timer_hook);
-
-
#ifdef CONFIG_SMP
/*
* Each cpu has a pair of open-addressed hashtables for pending
@@ -436,8 +414,6 @@ void profile_tick(int type)
{
struct pt_regs *regs = get_irq_regs();
- if (type == CPU_PROFILING && timer_hook)
- timer_hook(regs);
if (!user_mode(regs) && prof_cpu_mask != NULL &&
cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
profile_hit(type, (void *)profile_pc(regs));
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 6cbeaae..acbd284 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -712,6 +712,12 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type,
kiov->iov_len, kiov->iov_base);
}
+/*
+ * This is declared in linux/regset.h and defined in machine-dependent
+ * code. We put the export here, near the primary machine-neutral use,
+ * to ensure no machine forgets it.
+ */
+EXPORT_SYMBOL_GPL(task_user_regset_view);
#endif
int ptrace_request(struct task_struct *child, long request,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5d89335..3656756 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -39,6 +39,9 @@ config HAVE_DYNAMIC_FTRACE
help
See Documentation/trace/ftrace-design.txt
+config HAVE_DYNAMIC_FTRACE_WITH_REGS
+ bool
+
config HAVE_FTRACE_MCOUNT_RECORD
bool
help
@@ -250,6 +253,16 @@ config FTRACE_SYSCALLS
help
Basic tracer to catch the syscall entry and exit events.
+config TRACER_SNAPSHOT
+ bool "Create a snapshot trace buffer"
+ select TRACER_MAX_TRACE
+ help
+ Allow tracing users to take snapshot of the current buffer using the
+ ftrace interface, e.g.:
+
+ echo 1 > /sys/kernel/debug/tracing/snapshot
+ cat snapshot
+
config TRACE_BRANCH_PROFILING
bool
select GENERIC_TRACER
@@ -434,6 +447,11 @@ config DYNAMIC_FTRACE
were made. If so, it runs stop_machine (stops all CPUS)
and modifies the code to jump over the call to ftrace.
+config DYNAMIC_FTRACE_WITH_REGS
+ def_bool y
+ depends on DYNAMIC_FTRACE
+ depends on HAVE_DYNAMIC_FTRACE_WITH_REGS
+
config FUNCTION_PROFILER
bool "Kernel function profiler"
depends on FUNCTION_TRACER
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index c0bd030..71259e2 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -147,7 +147,7 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
return;
local_irq_save(flags);
- buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
+ buf = this_cpu_ptr(bt->msg_data);
va_start(args, fmt);
n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
va_end(args);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 41473b4..ce8c3d6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -111,6 +111,26 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
#endif
+/*
+ * Traverse the ftrace_global_list, invoking all entries. The reason that we
+ * can use rcu_dereference_raw() is that elements removed from this list
+ * are simply leaked, so there is no need to interact with a grace-period
+ * mechanism. The rcu_dereference_raw() calls are needed to handle
+ * concurrent insertions into the ftrace_global_list.
+ *
+ * Silly Alpha and silly pointer-speculation compiler optimizations!
+ */
+#define do_for_each_ftrace_op(op, list) \
+ op = rcu_dereference_raw(list); \
+ do
+
+/*
+ * Optimized for just a single item in the list (as that is the normal case).
+ */
+#define while_for_each_ftrace_op(op) \
+ while (likely(op = rcu_dereference_raw((op)->next)) && \
+ unlikely((op) != &ftrace_list_end))
+
/**
* ftrace_nr_registered_ops - return number of ops registered
*
@@ -132,29 +152,21 @@ int ftrace_nr_registered_ops(void)
return cnt;
}
-/*
- * Traverse the ftrace_global_list, invoking all entries. The reason that we
- * can use rcu_dereference_raw() is that elements removed from this list
- * are simply leaked, so there is no need to interact with a grace-period
- * mechanism. The rcu_dereference_raw() calls are needed to handle
- * concurrent insertions into the ftrace_global_list.
- *
- * Silly Alpha and silly pointer-speculation compiler optimizations!
- */
static void
ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *regs)
{
- if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
+ int bit;
+
+ bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
+ if (bit < 0)
return;
- trace_recursion_set(TRACE_GLOBAL_BIT);
- op = rcu_dereference_raw(ftrace_global_list); /*see above*/
- while (op != &ftrace_list_end) {
+ do_for_each_ftrace_op(op, ftrace_global_list) {
op->func(ip, parent_ip, op, regs);
- op = rcu_dereference_raw(op->next); /*see above*/
- };
- trace_recursion_clear(TRACE_GLOBAL_BIT);
+ } while_for_each_ftrace_op(op);
+
+ trace_clear_recursion(bit);
}
static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
@@ -221,10 +233,24 @@ static void update_global_ops(void)
* registered callers.
*/
if (ftrace_global_list == &ftrace_list_end ||
- ftrace_global_list->next == &ftrace_list_end)
+ ftrace_global_list->next == &ftrace_list_end) {
func = ftrace_global_list->func;
- else
+ /*
+ * As we are calling the function directly.
+ * If it does not have recursion protection,
+ * the function_trace_op needs to be updated
+ * accordingly.
+ */
+ if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE)
+ global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
+ else
+ global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
+ } else {
func = ftrace_global_list_func;
+ /* The list has its own recursion protection. */
+ global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
+ }
+
/* If we filter on pids, update to use the pid function */
if (!list_empty(&ftrace_pids)) {
@@ -337,7 +363,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
return -EINVAL;
-#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
/*
* If the ftrace_ops specifies SAVE_REGS, then it only can be used
* if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
@@ -4090,14 +4116,11 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
*/
preempt_disable_notrace();
trace_recursion_set(TRACE_CONTROL_BIT);
- op = rcu_dereference_raw(ftrace_control_list);
- while (op != &ftrace_list_end) {
+ do_for_each_ftrace_op(op, ftrace_control_list) {
if (!ftrace_function_local_disabled(op) &&
ftrace_ops_test(op, ip))
op->func(ip, parent_ip, op, regs);
-
- op = rcu_dereference_raw(op->next);
- };
+ } while_for_each_ftrace_op(op);
trace_recursion_clear(TRACE_CONTROL_BIT);
preempt_enable_notrace();
}
@@ -4112,27 +4135,26 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ignored, struct pt_regs *regs)
{
struct ftrace_ops *op;
+ int bit;
if (function_trace_stop)
return;
- if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT)))
+ bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+ if (bit < 0)
return;
- trace_recursion_set(TRACE_INTERNAL_BIT);
/*
* Some of the ops may be dynamically allocated,
* they must be freed after a synchronize_sched().
*/
preempt_disable_notrace();
- op = rcu_dereference_raw(ftrace_ops_list);
- while (op != &ftrace_list_end) {
+ do_for_each_ftrace_op(op, ftrace_ops_list) {
if (ftrace_ops_test(op, ip))
op->func(ip, parent_ip, op, regs);
- op = rcu_dereference_raw(op->next);
- };
+ } while_for_each_ftrace_op(op);
preempt_enable_notrace();
- trace_recursion_clear(TRACE_INTERNAL_BIT);
+ trace_clear_recursion(bit);
}
/*
@@ -4143,8 +4165,8 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
* Archs are to support both the regs and ftrace_ops at the same time.
* If they support ftrace_ops, it is assumed they support regs.
* If call backs want to use regs, they must either check for regs
- * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS.
- * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved.
+ * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
+ * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
* An architecture can pass partial regs with ftrace_ops and still
* set the ARCH_SUPPORT_FTARCE_OPS.
*/
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index ce8514fe..7244acd 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3,8 +3,10 @@
*
* Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
*/
+#include <linux/ftrace_event.h>
#include <linux/ring_buffer.h>
#include <linux/trace_clock.h>
+#include <linux/trace_seq.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
@@ -21,7 +23,6 @@
#include <linux/fs.h>
#include <asm/local.h>
-#include "trace.h"
static void update_pages_handler(struct work_struct *work);
@@ -2432,41 +2433,76 @@ rb_reserve_next_event(struct ring_buffer *buffer,
#ifdef CONFIG_TRACING
-#define TRACE_RECURSIVE_DEPTH 16
+/*
+ * The lock and unlock are done within a preempt disable section.
+ * The current_context per_cpu variable can only be modified
+ * by the current task between lock and unlock. But it can
+ * be modified more than once via an interrupt. To pass this
+ * information from the lock to the unlock without having to
+ * access the 'in_interrupt()' functions again (which do show
+ * a bit of overhead in something as critical as function tracing,
+ * we use a bitmask trick.
+ *
+ * bit 0 = NMI context
+ * bit 1 = IRQ context
+ * bit 2 = SoftIRQ context
+ * bit 3 = normal context.
+ *
+ * This works because this is the order of contexts that can
+ * preempt other contexts. A SoftIRQ never preempts an IRQ
+ * context.
+ *
+ * When the context is determined, the corresponding bit is
+ * checked and set (if it was set, then a recursion of that context
+ * happened).
+ *
+ * On unlock, we need to clear this bit. To do so, just subtract
+ * 1 from the current_context and AND it to itself.
+ *
+ * (binary)
+ * 101 - 1 = 100
+ * 101 & 100 = 100 (clearing bit zero)
+ *
+ * 1010 - 1 = 1001
+ * 1010 & 1001 = 1000 (clearing bit 1)
+ *
+ * The least significant bit can be cleared this way, and it
+ * just so happens that it is the same bit corresponding to
+ * the current context.
+ */
+static DEFINE_PER_CPU(unsigned int, current_context);
-/* Keep this code out of the fast path cache */
-static noinline void trace_recursive_fail(void)
+static __always_inline int trace_recursive_lock(void)
{
- /* Disable all tracing before we do anything else */
- tracing_off_permanent();
-
- printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
- "HC[%lu]:SC[%lu]:NMI[%lu]\n",
- trace_recursion_buffer(),
- hardirq_count() >> HARDIRQ_SHIFT,
- softirq_count() >> SOFTIRQ_SHIFT,
- in_nmi());
-
- WARN_ON_ONCE(1);
-}
+ unsigned int val = this_cpu_read(current_context);
+ int bit;
-static inline int trace_recursive_lock(void)
-{
- trace_recursion_inc();
+ if (in_interrupt()) {
+ if (in_nmi())
+ bit = 0;
+ else if (in_irq())
+ bit = 1;
+ else
+ bit = 2;
+ } else
+ bit = 3;
- if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
- return 0;
+ if (unlikely(val & (1 << bit)))
+ return 1;
- trace_recursive_fail();
+ val |= (1 << bit);
+ this_cpu_write(current_context, val);
- return -1;
+ return 0;
}
-static inline void trace_recursive_unlock(void)
+static __always_inline void trace_recursive_unlock(void)
{
- WARN_ON_ONCE(!trace_recursion_buffer());
+ unsigned int val = this_cpu_read(current_context);
- trace_recursion_dec();
+ val--;
+ val &= this_cpu_read(current_context);
+ this_cpu_write(current_context, val);
}
#else
@@ -3067,6 +3103,24 @@ ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
/**
+ * ring_buffer_read_events_cpu - get the number of events successfully read
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of events read
+ */
+unsigned long
+ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return 0;
+
+ cpu_buffer = buffer->buffers[cpu];
+ return cpu_buffer->read;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
+
+/**
* ring_buffer_entries - get the number of entries in a buffer
* @buffer: The ring buffer
*
@@ -3425,7 +3479,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
/* check for end of page padding */
if ((iter->head >= rb_page_size(iter->head_page)) &&
(iter->head_page != cpu_buffer->commit_page))
- rb_advance_iter(iter);
+ rb_inc_iter(iter);
}
static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3c13e46..5d520b7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -249,7 +249,7 @@ static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
static struct tracer *trace_types __read_mostly;
/* current_trace points to the tracer that is currently active */
-static struct tracer *current_trace __read_mostly;
+static struct tracer *current_trace __read_mostly = &nop_trace;
/*
* trace_types_lock is used to protect the trace_types list.
@@ -709,10 +709,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
return;
WARN_ON_ONCE(!irqs_disabled());
- if (!current_trace->use_max_tr) {
- WARN_ON_ONCE(1);
+
+ if (!current_trace->allocated_snapshot) {
+ /* Only the nop tracer should hit this when disabling */
+ WARN_ON_ONCE(current_trace != &nop_trace);
return;
}
+
arch_spin_lock(&ftrace_max_lock);
tr->buffer = max_tr.buffer;
@@ -739,10 +742,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
return;
WARN_ON_ONCE(!irqs_disabled());
- if (!current_trace->use_max_tr) {
- WARN_ON_ONCE(1);
+ if (WARN_ON_ONCE(!current_trace->allocated_snapshot))
return;
- }
arch_spin_lock(&ftrace_max_lock);
@@ -862,10 +863,13 @@ int register_tracer(struct tracer *type)
current_trace = type;
- /* If we expanded the buffers, make sure the max is expanded too */
- if (ring_buffer_expanded && type->use_max_tr)
- ring_buffer_resize(max_tr.buffer, trace_buf_size,
- RING_BUFFER_ALL_CPUS);
+ if (type->use_max_tr) {
+ /* If we expanded the buffers, make sure the max is expanded too */
+ if (ring_buffer_expanded)
+ ring_buffer_resize(max_tr.buffer, trace_buf_size,
+ RING_BUFFER_ALL_CPUS);
+ type->allocated_snapshot = true;
+ }
/* the test is responsible for initializing and enabling */
pr_info("Testing tracer %s: ", type->name);
@@ -881,10 +885,14 @@ int register_tracer(struct tracer *type)
/* Only reset on passing, to avoid touching corrupted buffers */
tracing_reset_online_cpus(tr);
- /* Shrink the max buffer again */
- if (ring_buffer_expanded && type->use_max_tr)
- ring_buffer_resize(max_tr.buffer, 1,
- RING_BUFFER_ALL_CPUS);
+ if (type->use_max_tr) {
+ type->allocated_snapshot = false;
+
+ /* Shrink the max buffer again */
+ if (ring_buffer_expanded)
+ ring_buffer_resize(max_tr.buffer, 1,
+ RING_BUFFER_ALL_CPUS);
+ }
printk(KERN_CONT "PASSED\n");
}
@@ -922,6 +930,9 @@ void tracing_reset(struct trace_array *tr, int cpu)
{
struct ring_buffer *buffer = tr->buffer;
+ if (!buffer)
+ return;
+
ring_buffer_record_disable(buffer);
/* Make sure all commits have finished */
@@ -936,6 +947,9 @@ void tracing_reset_online_cpus(struct trace_array *tr)
struct ring_buffer *buffer = tr->buffer;
int cpu;
+ if (!buffer)
+ return;
+
ring_buffer_record_disable(buffer);
/* Make sure all commits have finished */
@@ -1167,7 +1181,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
entry->preempt_count = pc & 0xff;
entry->pid = (tsk) ? tsk->pid : 0;
- entry->padding = 0;
entry->flags =
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1335,7 +1348,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
*/
preempt_disable_notrace();
- use_stack = ++__get_cpu_var(ftrace_stack_reserve);
+ use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
/*
* We don't need any atomic variables, just a barrier.
* If an interrupt comes in, we don't care, because it would
@@ -1389,7 +1402,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
out:
/* Again, don't let gcc optimize things here */
barrier();
- __get_cpu_var(ftrace_stack_reserve)--;
+ __this_cpu_dec(ftrace_stack_reserve);
preempt_enable_notrace();
}
@@ -1517,7 +1530,6 @@ static struct trace_buffer_struct *trace_percpu_nmi_buffer;
static char *get_trace_buf(void)
{
struct trace_buffer_struct *percpu_buffer;
- struct trace_buffer_struct *buffer;
/*
* If we have allocated per cpu buffers, then we do not
@@ -1535,9 +1547,7 @@ static char *get_trace_buf(void)
if (!percpu_buffer)
return NULL;
- buffer = per_cpu_ptr(percpu_buffer, smp_processor_id());
-
- return buffer->buffer;
+ return this_cpu_ptr(&percpu_buffer->buffer[0]);
}
static int alloc_percpu_trace_buffer(void)
@@ -1942,21 +1952,27 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
static void *s_start(struct seq_file *m, loff_t *pos)
{
struct trace_iterator *iter = m->private;
- static struct tracer *old_tracer;
int cpu_file = iter->cpu_file;
void *p = NULL;
loff_t l = 0;
int cpu;
- /* copy the tracer to avoid using a global lock all around */
+ /*
+ * copy the tracer to avoid using a global lock all around.
+ * iter->trace is a copy of current_trace, the pointer to the
+ * name may be used instead of a strcmp(), as iter->trace->name
+ * will point to the same string as current_trace->name.
+ */
mutex_lock(&trace_types_lock);
- if (unlikely(old_tracer != current_trace && current_trace)) {
- old_tracer = current_trace;
+ if (unlikely(current_trace && iter->trace->name != current_trace->name))
*iter->trace = *current_trace;
- }
mutex_unlock(&trace_types_lock);
- atomic_inc(&trace_record_cmdline_disabled);
+ if (iter->snapshot && iter->trace->use_max_tr)
+ return ERR_PTR(-EBUSY);
+
+ if (!iter->snapshot)
+ atomic_inc(&trace_record_cmdline_disabled);
if (*pos != iter->pos) {
iter->ent = NULL;
@@ -1995,7 +2011,11 @@ static void s_stop(struct seq_file *m, void *p)
{
struct trace_iterator *iter = m->private;
- atomic_dec(&trace_record_cmdline_disabled);
+ if (iter->snapshot && iter->trace->use_max_tr)
+ return;
+
+ if (!iter->snapshot)
+ atomic_dec(&trace_record_cmdline_disabled);
trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
}
@@ -2080,8 +2100,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
unsigned long total;
const char *name = "preemption";
- if (type)
- name = type->name;
+ name = type->name;
get_total_entries(tr, &total, &entries);
@@ -2430,7 +2449,7 @@ static const struct seq_operations tracer_seq_ops = {
};
static struct trace_iterator *
-__tracing_open(struct inode *inode, struct file *file)
+__tracing_open(struct inode *inode, struct file *file, bool snapshot)
{
long cpu_file = (long) inode->i_private;
struct trace_iterator *iter;
@@ -2457,16 +2476,16 @@ __tracing_open(struct inode *inode, struct file *file)
if (!iter->trace)
goto fail;
- if (current_trace)
- *iter->trace = *current_trace;
+ *iter->trace = *current_trace;
if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
goto fail;
- if (current_trace && current_trace->print_max)
+ if (current_trace->print_max || snapshot)
iter->tr = &max_tr;
else
iter->tr = &global_trace;
+ iter->snapshot = snapshot;
iter->pos = -1;
mutex_init(&iter->mutex);
iter->cpu_file = cpu_file;
@@ -2483,8 +2502,9 @@ __tracing_open(struct inode *inode, struct file *file)
if (trace_clocks[trace_clock_id].in_ns)
iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
- /* stop the trace while dumping */
- tracing_stop();
+ /* stop the trace while dumping if we are not opening "snapshot" */
+ if (!iter->snapshot)
+ tracing_stop();
if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
for_each_tracing_cpu(cpu) {
@@ -2547,8 +2567,9 @@ static int tracing_release(struct inode *inode, struct file *file)
if (iter->trace && iter->trace->close)
iter->trace->close(iter);
- /* reenable tracing if it was previously enabled */
- tracing_start();
+ if (!iter->snapshot)
+ /* reenable tracing if it was previously enabled */
+ tracing_start();
mutex_unlock(&trace_types_lock);
mutex_destroy(&iter->mutex);
@@ -2576,7 +2597,7 @@ static int tracing_open(struct inode *inode, struct file *file)
}
if (file->f_mode & FMODE_READ) {
- iter = __tracing_open(inode, file);
+ iter = __tracing_open(inode, file, false);
if (IS_ERR(iter))
ret = PTR_ERR(iter);
else if (trace_flags & TRACE_ITER_LATENCY_FMT)
@@ -3014,10 +3035,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
int r;
mutex_lock(&trace_types_lock);
- if (current_trace)
- r = sprintf(buf, "%s\n", current_trace->name);
- else
- r = sprintf(buf, "\n");
+ r = sprintf(buf, "%s\n", current_trace->name);
mutex_unlock(&trace_types_lock);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
@@ -3183,6 +3201,7 @@ static int tracing_set_tracer(const char *buf)
static struct trace_option_dentry *topts;
struct trace_array *tr = &global_trace;
struct tracer *t;
+ bool had_max_tr;
int ret = 0;
mutex_lock(&trace_types_lock);
@@ -3207,9 +3226,21 @@ static int tracing_set_tracer(const char *buf)
goto out;
trace_branch_disable();
- if (current_trace && current_trace->reset)
+ if (current_trace->reset)
current_trace->reset(tr);
- if (current_trace && current_trace->use_max_tr) {
+
+ had_max_tr = current_trace->allocated_snapshot;
+ current_trace = &nop_trace;
+
+ if (had_max_tr && !t->use_max_tr) {
+ /*
+ * We need to make sure that the update_max_tr sees that
+ * current_trace changed to nop_trace to keep it from
+ * swapping the buffers after we resize it.
+ * The update_max_tr is called from interrupts disabled
+ * so a synchronized_sched() is sufficient.
+ */
+ synchronize_sched();
/*
* We don't free the ring buffer. instead, resize it because
* The max_tr ring buffer has some state (e.g. ring->clock) and
@@ -3217,18 +3248,19 @@ static int tracing_set_tracer(const char *buf)
*/
ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
set_buffer_entries(&max_tr, 1);
+ tracing_reset_online_cpus(&max_tr);
+ current_trace->allocated_snapshot = false;
}
destroy_trace_option_files(topts);
- current_trace = &nop_trace;
-
topts = create_trace_option_files(t);
- if (t->use_max_tr) {
+ if (t->use_max_tr && !had_max_tr) {
/* we need to make per cpu buffer sizes equivalent */
ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
RING_BUFFER_ALL_CPUS);
if (ret < 0)
goto out;
+ t->allocated_snapshot = true;
}
if (t->init) {
@@ -3336,8 +3368,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
ret = -ENOMEM;
goto fail;
}
- if (current_trace)
- *iter->trace = *current_trace;
+ *iter->trace = *current_trace;
if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
ret = -ENOMEM;
@@ -3477,7 +3508,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct trace_iterator *iter = filp->private_data;
- static struct tracer *old_tracer;
ssize_t sret;
/* return any leftover data */
@@ -3489,10 +3519,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
/* copy the tracer to avoid using a global lock all around */
mutex_lock(&trace_types_lock);
- if (unlikely(old_tracer != current_trace && current_trace)) {
- old_tracer = current_trace;
+ if (unlikely(iter->trace->name != current_trace->name))
*iter->trace = *current_trace;
- }
mutex_unlock(&trace_types_lock);
/*
@@ -3648,7 +3676,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
.ops = &tracing_pipe_buf_ops,
.spd_release = tracing_spd_release_pipe,
};
- static struct tracer *old_tracer;
ssize_t ret;
size_t rem;
unsigned int i;
@@ -3658,10 +3685,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
/* copy the tracer to avoid using a global lock all around */
mutex_lock(&trace_types_lock);
- if (unlikely(old_tracer != current_trace && current_trace)) {
- old_tracer = current_trace;
+ if (unlikely(iter->trace->name != current_trace->name))
*iter->trace = *current_trace;
- }
mutex_unlock(&trace_types_lock);
mutex_lock(&iter->mutex);
@@ -4037,8 +4062,7 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
* Reset the buffer so that it doesn't have incomparable timestamps.
*/
tracing_reset_online_cpus(&global_trace);
- if (max_tr.buffer)
- tracing_reset_online_cpus(&max_tr);
+ tracing_reset_online_cpus(&max_tr);
mutex_unlock(&trace_types_lock);
@@ -4054,6 +4078,87 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
return single_open(file, tracing_clock_show, NULL);
}
+#ifdef CONFIG_TRACER_SNAPSHOT
+static int tracing_snapshot_open(struct inode *inode, struct file *file)
+{
+ struct trace_iterator *iter;
+ int ret = 0;
+
+ if (file->f_mode & FMODE_READ) {
+ iter = __tracing_open(inode, file, true);
+ if (IS_ERR(iter))
+ ret = PTR_ERR(iter);
+ }
+ return ret;
+}
+
+static ssize_t
+tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ unsigned long val;
+ int ret;
+
+ ret = tracing_update_buffers();
+ if (ret < 0)
+ return ret;
+
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
+ return ret;
+
+ mutex_lock(&trace_types_lock);
+
+ if (current_trace->use_max_tr) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ switch (val) {
+ case 0:
+ if (current_trace->allocated_snapshot) {
+ /* free spare buffer */
+ ring_buffer_resize(max_tr.buffer, 1,
+ RING_BUFFER_ALL_CPUS);
+ set_buffer_entries(&max_tr, 1);
+ tracing_reset_online_cpus(&max_tr);
+ current_trace->allocated_snapshot = false;
+ }
+ break;
+ case 1:
+ if (!current_trace->allocated_snapshot) {
+ /* allocate spare buffer */
+ ret = resize_buffer_duplicate_size(&max_tr,
+ &global_trace, RING_BUFFER_ALL_CPUS);
+ if (ret < 0)
+ break;
+ current_trace->allocated_snapshot = true;
+ }
+
+ local_irq_disable();
+ /* Now, we're going to swap */
+ update_max_tr(&global_trace, current, smp_processor_id());
+ local_irq_enable();
+ break;
+ default:
+ if (current_trace->allocated_snapshot)
+ tracing_reset_online_cpus(&max_tr);
+ else
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret >= 0) {
+ *ppos += cnt;
+ ret = cnt;
+ }
+out:
+ mutex_unlock(&trace_types_lock);
+ return ret;
+}
+#endif /* CONFIG_TRACER_SNAPSHOT */
+
+
static const struct file_operations tracing_max_lat_fops = {
.open = tracing_open_generic,
.read = tracing_max_lat_read,
@@ -4110,6 +4215,16 @@ static const struct file_operations trace_clock_fops = {
.write = tracing_clock_write,
};
+#ifdef CONFIG_TRACER_SNAPSHOT
+static const struct file_operations snapshot_fops = {
+ .open = tracing_snapshot_open,
+ .read = seq_read,
+ .write = tracing_snapshot_write,
+ .llseek = tracing_seek,
+ .release = tracing_release,
+};
+#endif /* CONFIG_TRACER_SNAPSHOT */
+
struct ftrace_buffer_info {
struct trace_array *tr;
void *spare;
@@ -4414,6 +4529,9 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
trace_seq_printf(s, "dropped events: %ld\n", cnt);
+ cnt = ring_buffer_read_events_cpu(tr->buffer, cpu);
+ trace_seq_printf(s, "read events: %ld\n", cnt);
+
count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
kfree(s);
@@ -4490,7 +4608,7 @@ struct dentry *tracing_init_dentry(void)
static struct dentry *d_percpu;
-struct dentry *tracing_dentry_percpu(void)
+static struct dentry *tracing_dentry_percpu(void)
{
static int once;
struct dentry *d_tracer;
@@ -4906,6 +5024,11 @@ static __init int tracer_init_debugfs(void)
&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
#endif
+#ifdef CONFIG_TRACER_SNAPSHOT
+ trace_create_file("snapshot", 0644, d_tracer,
+ (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops);
+#endif
+
create_trace_options_dir();
for_each_tracing_cpu(cpu)
@@ -5014,6 +5137,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
if (disable_tracing)
ftrace_kill();
+ /* Simulate the iterator */
trace_init_global_iter(&iter);
for_each_tracing_cpu(cpu) {
@@ -5025,10 +5149,6 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
/* don't look at user memory in panic mode */
trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
- /* Simulate the iterator */
- iter.tr = &global_trace;
- iter.trace = current_trace;
-
switch (oops_dump_mode) {
case DUMP_ALL:
iter.cpu_file = TRACE_PIPE_ALL_CPU;
@@ -5173,7 +5293,7 @@ __init static int tracer_alloc_buffers(void)
init_irq_work(&trace_work_wakeup, trace_wake_up);
register_tracer(&nop_trace);
- current_trace = &nop_trace;
+
/* All seems OK, enable tracing */
tracing_disabled = 0;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c75d798..57d7e53 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -287,20 +287,62 @@ struct tracer {
struct tracer_flags *flags;
bool print_max;
bool use_max_tr;
+ bool allocated_snapshot;
};
/* Only current can touch trace_recursion */
-#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
-#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
-/* Ring buffer has the 10 LSB bits to count */
-#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
-
-/* for function tracing recursion */
-#define TRACE_INTERNAL_BIT (1<<11)
-#define TRACE_GLOBAL_BIT (1<<12)
-#define TRACE_CONTROL_BIT (1<<13)
+/*
+ * For function tracing recursion:
+ * The order of these bits are important.
+ *
+ * When function tracing occurs, the following steps are made:
+ * If arch does not support a ftrace feature:
+ * call internal function (uses INTERNAL bits) which calls...
+ * If callback is registered to the "global" list, the list
+ * function is called and recursion checks the GLOBAL bits.
+ * then this function calls...
+ * The function callback, which can use the FTRACE bits to
+ * check for recursion.
+ *
+ * Now if the arch does not suppport a feature, and it calls
+ * the global list function which calls the ftrace callback
+ * all three of these steps will do a recursion protection.
+ * There's no reason to do one if the previous caller already
+ * did. The recursion that we are protecting against will
+ * go through the same steps again.
+ *
+ * To prevent the multiple recursion checks, if a recursion
+ * bit is set that is higher than the MAX bit of the current
+ * check, then we know that the check was made by the previous
+ * caller, and we can skip the current check.
+ */
+enum {
+ TRACE_BUFFER_BIT,
+ TRACE_BUFFER_NMI_BIT,
+ TRACE_BUFFER_IRQ_BIT,
+ TRACE_BUFFER_SIRQ_BIT,
+
+ /* Start of function recursion bits */
+ TRACE_FTRACE_BIT,
+ TRACE_FTRACE_NMI_BIT,
+ TRACE_FTRACE_IRQ_BIT,
+ TRACE_FTRACE_SIRQ_BIT,
+
+ /* GLOBAL_BITs must be greater than FTRACE_BITs */
+ TRACE_GLOBAL_BIT,
+ TRACE_GLOBAL_NMI_BIT,
+ TRACE_GLOBAL_IRQ_BIT,
+ TRACE_GLOBAL_SIRQ_BIT,
+
+ /* INTERNAL_BITs must be greater than GLOBAL_BITs */
+ TRACE_INTERNAL_BIT,
+ TRACE_INTERNAL_NMI_BIT,
+ TRACE_INTERNAL_IRQ_BIT,
+ TRACE_INTERNAL_SIRQ_BIT,
+
+ TRACE_CONTROL_BIT,
/*
* Abuse of the trace_recursion.
@@ -309,11 +351,77 @@ struct tracer {
* was called in irq context but we have irq tracing off. Since this
* can only be modified by current, we can reuse trace_recursion.
*/
-#define TRACE_IRQ_BIT (1<<13)
+ TRACE_IRQ_BIT,
+};
+
+#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0)
+#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0)
+#define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit)))
+
+#define TRACE_CONTEXT_BITS 4
+
+#define TRACE_FTRACE_START TRACE_FTRACE_BIT
+#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
+
+#define TRACE_GLOBAL_START TRACE_GLOBAL_BIT
+#define TRACE_GLOBAL_MAX ((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1)
+
+#define TRACE_LIST_START TRACE_INTERNAL_BIT
+#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
+
+#define TRACE_CONTEXT_MASK TRACE_LIST_MAX
+
+static __always_inline int trace_get_context_bit(void)
+{
+ int bit;
-#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0)
-#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0)
-#define trace_recursion_test(bit) ((current)->trace_recursion & (bit))
+ if (in_interrupt()) {
+ if (in_nmi())
+ bit = 0;
+
+ else if (in_irq())
+ bit = 1;
+ else
+ bit = 2;
+ } else
+ bit = 3;
+
+ return bit;
+}
+
+static __always_inline int trace_test_and_set_recursion(int start, int max)
+{
+ unsigned int val = current->trace_recursion;
+ int bit;
+
+ /* A previous recursion check was made */
+ if ((val & TRACE_CONTEXT_MASK) > max)
+ return 0;
+
+ bit = trace_get_context_bit() + start;
+ if (unlikely(val & (1 << bit)))
+ return -1;
+
+ val |= 1 << bit;
+ current->trace_recursion = val;
+ barrier();
+
+ return bit;
+}
+
+static __always_inline void trace_clear_recursion(int bit)
+{
+ unsigned int val = current->trace_recursion;
+
+ if (!bit)
+ return;
+
+ bit = 1 << bit;
+ val &= ~bit;
+
+ barrier();
+ current->trace_recursion = val;
+}
#define TRACE_PIPE_ALL_CPU -1
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 1bbb1b2..aa8f5f4 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -21,8 +21,6 @@
#include <linux/ktime.h>
#include <linux/trace_clock.h>
-#include "trace.h"
-
/*
* trace_clock_local(): the simplest and least coherent tracing clock.
*
@@ -87,7 +85,7 @@ u64 notrace trace_clock_global(void)
local_irq_save(flags);
this_cpu = raw_smp_processor_id();
- now = cpu_clock(this_cpu);
+ now = sched_clock_cpu(this_cpu);
/*
* If in an NMI context then dont risk lockups and return the
* cpu_clock() time:
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 880073d..57e9b28 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -116,7 +116,6 @@ static int trace_define_common_fields(void)
__common_field(unsigned char, flags);
__common_field(unsigned char, preempt_count);
__common_field(int, pid);
- __common_field(int, padding);
return ret;
}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8e3ad80..6011525 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -47,34 +47,6 @@ static void function_trace_start(struct trace_array *tr)
tracing_reset_online_cpus(tr);
}
-static void
-function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *pt_regs)
-{
- struct trace_array *tr = func_trace;
- struct trace_array_cpu *data;
- unsigned long flags;
- long disabled;
- int cpu;
- int pc;
-
- if (unlikely(!ftrace_function_enabled))
- return;
-
- pc = preempt_count();
- preempt_disable_notrace();
- local_save_flags(flags);
- cpu = raw_smp_processor_id();
- data = tr->data[cpu];
- disabled = atomic_inc_return(&data->disabled);
-
- if (likely(disabled == 1))
- trace_function(tr, ip, parent_ip, flags, pc);
-
- atomic_dec(&data->disabled);
- preempt_enable_notrace();
-}
-
/* Our option */
enum {
TRACE_FUNC_OPT_STACK = 0x1,
@@ -85,34 +57,34 @@ static struct tracer_flags func_flags;
static void
function_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs)
-
{
struct trace_array *tr = func_trace;
struct trace_array_cpu *data;
unsigned long flags;
- long disabled;
+ int bit;
int cpu;
int pc;
if (unlikely(!ftrace_function_enabled))
return;
- /*
- * Need to use raw, since this must be called before the
- * recursive protection is performed.
- */
- local_irq_save(flags);
- cpu = raw_smp_processor_id();
- data = tr->data[cpu];
- disabled = atomic_inc_return(&data->disabled);
+ pc = preempt_count();
+ preempt_disable_notrace();
- if (likely(disabled == 1)) {
- pc = preempt_count();
+ bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
+ if (bit < 0)
+ goto out;
+
+ cpu = smp_processor_id();
+ data = tr->data[cpu];
+ if (!atomic_read(&data->disabled)) {
+ local_save_flags(flags);
trace_function(tr, ip, parent_ip, flags, pc);
}
+ trace_clear_recursion(bit);
- atomic_dec(&data->disabled);
- local_irq_restore(flags);
+ out:
+ preempt_enable_notrace();
}
static void
@@ -185,11 +157,6 @@ static void tracing_start_function_trace(void)
{
ftrace_function_enabled = 0;
- if (trace_flags & TRACE_ITER_PREEMPTONLY)
- trace_ops.func = function_trace_call_preempt_only;
- else
- trace_ops.func = function_trace_call;
-
if (func_flags.val & TRACE_FUNC_OPT_STACK)
register_ftrace_function(&trace_stack_ops);
else
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4edb4b7..39ada66 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -47,6 +47,8 @@ struct fgraph_data {
#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
#define TRACE_GRAPH_PRINT_IRQS 0x40
+static unsigned int max_depth;
+
static struct tracer_opt trace_opts[] = {
/* Display overruns? (for self-debug purpose) */
{ TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
@@ -189,10 +191,16 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
ftrace_pop_return_trace(&trace, &ret, frame_pointer);
trace.rettime = trace_clock_local();
- ftrace_graph_return(&trace);
barrier();
current->curr_ret_stack--;
+ /*
+ * The trace should run after decrementing the ret counter
+ * in case an interrupt were to come in. We don't want to
+ * lose the interrupt if max_depth is set.
+ */
+ ftrace_graph_return(&trace);
+
if (unlikely(!ret)) {
ftrace_graph_stop();
WARN_ON(1);
@@ -250,8 +258,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
return 0;
/* trace it when it is-nested-in or is a function enabled. */
- if (!(trace->depth || ftrace_graph_addr(trace->func)) ||
- ftrace_graph_ignore_irqs())
+ if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
+ ftrace_graph_ignore_irqs()) ||
+ (max_depth && trace->depth >= max_depth))
return 0;
local_irq_save(flags);
@@ -1457,6 +1466,59 @@ static struct tracer graph_trace __read_mostly = {
#endif
};
+
+static ssize_t
+graph_depth_write(struct file *filp, const char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
+ return ret;
+
+ max_depth = val;
+
+ *ppos += cnt;
+
+ return cnt;
+}
+
+static ssize_t
+graph_depth_read(struct file *filp, char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ char buf[15]; /* More than enough to hold UINT_MAX + "\n"*/
+ int n;
+
+ n = sprintf(buf, "%d\n", max_depth);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
+}
+
+static const struct file_operations graph_depth_fops = {
+ .open = tracing_open_generic,
+ .write = graph_depth_write,
+ .read = graph_depth_read,
+ .llseek = generic_file_llseek,
+};
+
+static __init int init_graph_debugfs(void)
+{
+ struct dentry *d_tracer;
+
+ d_tracer = tracing_init_dentry();
+ if (!d_tracer)
+ return 0;
+
+ trace_create_file("max_graph_depth", 0644, d_tracer,
+ NULL, &graph_depth_fops);
+
+ return 0;
+}
+fs_initcall(init_graph_debugfs);
+
static __init int init_graph_trace(void)
{
max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 9337086..5c7e09d 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -66,7 +66,6 @@
#define TP_FLAG_TRACE 1
#define TP_FLAG_PROFILE 2
#define TP_FLAG_REGISTERED 4
-#define TP_FLAG_UPROBE 8
/* data_rloc: data relative location, compatible with u32 */
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 4762316..51c819c 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -415,7 +415,8 @@ static void trace_selftest_test_recursion_func(unsigned long ip,
* The ftrace infrastructure should provide the recursion
* protection. If not, this will crash the kernel!
*/
- trace_selftest_recursion_cnt++;
+ if (trace_selftest_recursion_cnt++ > 10)
+ return;
DYN_FTRACE_TEST_NAME();
}
@@ -452,7 +453,6 @@ trace_selftest_function_recursion(void)
char *func_name;
int len;
int ret;
- int cnt;
/* The previous test PASSED */
pr_cont("PASSED\n");
@@ -510,19 +510,10 @@ trace_selftest_function_recursion(void)
unregister_ftrace_function(&test_recsafe_probe);
- /*
- * If arch supports all ftrace features, and no other task
- * was on the list, we should be fine.
- */
- if (!ftrace_nr_registered_ops() && !FTRACE_FORCE_LIST_FUNC)
- cnt = 2; /* Should have recursed */
- else
- cnt = 1;
-
ret = -1;
- if (trace_selftest_recursion_cnt != cnt) {
- pr_cont("*callback not called expected %d times (%d)* ",
- cnt, trace_selftest_recursion_cnt);
+ if (trace_selftest_recursion_cnt != 2) {
+ pr_cont("*callback not called expected 2 times (%d)* ",
+ trace_selftest_recursion_cnt);
goto out;
}
@@ -568,7 +559,7 @@ trace_selftest_function_regs(void)
int ret;
int supported = 0;
-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
supported = 1;
#endif
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 7609dd6..5329e13e 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -77,7 +77,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
return syscalls_metadata[nr];
}
-enum print_line_t
+static enum print_line_t
print_syscall_enter(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
@@ -130,7 +130,7 @@ end:
return TRACE_TYPE_HANDLED;
}
-enum print_line_t
+static enum print_line_t
print_syscall_exit(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
@@ -270,7 +270,7 @@ static int syscall_exit_define_fields(struct ftrace_event_call *call)
return ret;
}
-void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
+static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
{
struct syscall_trace_enter *entry;
struct syscall_metadata *sys_data;
@@ -305,7 +305,7 @@ void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
trace_current_buffer_unlock_commit(buffer, event, 0, 0);
}
-void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
+static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
{
struct syscall_trace_exit *entry;
struct syscall_metadata *sys_data;
@@ -337,7 +337,7 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
trace_current_buffer_unlock_commit(buffer, event, 0, 0);
}
-int reg_event_syscall_enter(struct ftrace_event_call *call)
+static int reg_event_syscall_enter(struct ftrace_event_call *call)
{
int ret = 0;
int num;
@@ -356,7 +356,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
return ret;
}
-void unreg_event_syscall_enter(struct ftrace_event_call *call)
+static void unreg_event_syscall_enter(struct ftrace_event_call *call)
{
int num;
@@ -371,7 +371,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call)
mutex_unlock(&syscall_trace_lock);
}
-int reg_event_syscall_exit(struct ftrace_event_call *call)
+static int reg_event_syscall_exit(struct ftrace_event_call *call)
{
int ret = 0;
int num;
@@ -390,7 +390,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
return ret;
}
-void unreg_event_syscall_exit(struct ftrace_event_call *call)
+static void unreg_event_syscall_exit(struct ftrace_event_call *call)
{
int num;
@@ -459,7 +459,7 @@ unsigned long __init __weak arch_syscall_addr(int nr)
return (unsigned long)sys_call_table[nr];
}
-int __init init_ftrace_syscalls(void)
+static int __init init_ftrace_syscalls(void)
{
struct syscall_metadata *meta;
unsigned long addr;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c86e6d4..8dad2a9 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -28,20 +28,21 @@
#define UPROBE_EVENT_SYSTEM "uprobes"
+struct trace_uprobe_filter {
+ rwlock_t rwlock;
+ int nr_systemwide;
+ struct list_head perf_events;
+};
+
/*
* uprobe event core functions
*/
-struct trace_uprobe;
-struct uprobe_trace_consumer {
- struct uprobe_consumer cons;
- struct trace_uprobe *tu;
-};
-
struct trace_uprobe {
struct list_head list;
struct ftrace_event_class class;
struct ftrace_event_call call;
- struct uprobe_trace_consumer *consumer;
+ struct trace_uprobe_filter filter;
+ struct uprobe_consumer consumer;
struct inode *inode;
char *filename;
unsigned long offset;
@@ -64,6 +65,18 @@ static LIST_HEAD(uprobe_list);
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
+static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
+{
+ rwlock_init(&filter->rwlock);
+ filter->nr_systemwide = 0;
+ INIT_LIST_HEAD(&filter->perf_events);
+}
+
+static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
+{
+ return !filter->nr_systemwide && list_empty(&filter->perf_events);
+}
+
/*
* Allocate new trace_uprobe and initialize it (including uprobes).
*/
@@ -92,6 +105,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs)
goto error;
INIT_LIST_HEAD(&tu->list);
+ tu->consumer.handler = uprobe_dispatcher;
+ init_trace_uprobe_filter(&tu->filter);
return tu;
error:
@@ -253,12 +268,18 @@ static int create_trace_uprobe(int argc, char **argv)
if (ret)
goto fail_address_parse;
+ inode = igrab(path.dentry->d_inode);
+ path_put(&path);
+
+ if (!inode || !S_ISREG(inode->i_mode)) {
+ ret = -EINVAL;
+ goto fail_address_parse;
+ }
+
ret = kstrtoul(arg, 0, &offset);
if (ret)
goto fail_address_parse;
- inode = igrab(path.dentry->d_inode);
-
argc -= 2;
argv += 2;
@@ -356,7 +377,7 @@ fail_address_parse:
if (inode)
iput(inode);
- pr_info("Failed to parse address.\n");
+ pr_info("Failed to parse address or file.\n");
return ret;
}
@@ -465,7 +486,7 @@ static const struct file_operations uprobe_profile_ops = {
};
/* uprobe handler */
-static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
+static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
{
struct uprobe_trace_entry_head *entry;
struct ring_buffer_event *event;
@@ -475,8 +496,6 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
unsigned long irq_flags;
struct ftrace_event_call *call = &tu->call;
- tu->nhit++;
-
local_save_flags(irq_flags);
pc = preempt_count();
@@ -485,16 +504,18 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
size, irq_flags, pc);
if (!event)
- return;
+ return 0;
entry = ring_buffer_event_data(event);
- entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
+ entry->ip = instruction_pointer(task_pt_regs(current));
data = (u8 *)&entry[1];
for (i = 0; i < tu->nr_args; i++)
call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
if (!filter_current_check_discard(buffer, call, entry, event))
trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
+
+ return 0;
}
/* Event entry printers */
@@ -533,42 +554,43 @@ partial:
return TRACE_TYPE_PARTIAL_LINE;
}
-static int probe_event_enable(struct trace_uprobe *tu, int flag)
+static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
{
- struct uprobe_trace_consumer *utc;
- int ret = 0;
+ return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
+}
- if (!tu->inode || tu->consumer)
- return -EINTR;
+typedef bool (*filter_func_t)(struct uprobe_consumer *self,
+ enum uprobe_filter_ctx ctx,
+ struct mm_struct *mm);
- utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL);
- if (!utc)
+static int
+probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
+{
+ int ret = 0;
+
+ if (is_trace_uprobe_enabled(tu))
return -EINTR;
- utc->cons.handler = uprobe_dispatcher;
- utc->cons.filter = NULL;
- ret = uprobe_register(tu->inode, tu->offset, &utc->cons);
- if (ret) {
- kfree(utc);
- return ret;
- }
+ WARN_ON(!uprobe_filter_is_empty(&tu->filter));
tu->flags |= flag;
- utc->tu = tu;
- tu->consumer = utc;
+ tu->consumer.filter = filter;
+ ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
+ if (ret)
+ tu->flags &= ~flag;
- return 0;
+ return ret;
}
static void probe_event_disable(struct trace_uprobe *tu, int flag)
{
- if (!tu->inode || !tu->consumer)
+ if (!is_trace_uprobe_enabled(tu))
return;
- uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons);
+ WARN_ON(!uprobe_filter_is_empty(&tu->filter));
+
+ uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
tu->flags &= ~flag;
- kfree(tu->consumer);
- tu->consumer = NULL;
}
static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
@@ -642,8 +664,96 @@ static int set_print_fmt(struct trace_uprobe *tu)
}
#ifdef CONFIG_PERF_EVENTS
+static bool
+__uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
+{
+ struct perf_event *event;
+
+ if (filter->nr_systemwide)
+ return true;
+
+ list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
+ if (event->hw.tp_target->mm == mm)
+ return true;
+ }
+
+ return false;
+}
+
+static inline bool
+uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
+{
+ return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
+}
+
+static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
+{
+ bool done;
+
+ write_lock(&tu->filter.rwlock);
+ if (event->hw.tp_target) {
+ /*
+ * event->parent != NULL means copy_process(), we can avoid
+ * uprobe_apply(). current->mm must be probed and we can rely
+ * on dup_mmap() which preserves the already installed bp's.
+ *
+ * attr.enable_on_exec means that exec/mmap will install the
+ * breakpoints we need.
+ */
+ done = tu->filter.nr_systemwide ||
+ event->parent || event->attr.enable_on_exec ||
+ uprobe_filter_event(tu, event);
+ list_add(&event->hw.tp_list, &tu->filter.perf_events);
+ } else {
+ done = tu->filter.nr_systemwide;
+ tu->filter.nr_systemwide++;
+ }
+ write_unlock(&tu->filter.rwlock);
+
+ if (!done)
+ uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
+
+ return 0;
+}
+
+static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
+{
+ bool done;
+
+ write_lock(&tu->filter.rwlock);
+ if (event->hw.tp_target) {
+ list_del(&event->hw.tp_list);
+ done = tu->filter.nr_systemwide ||
+ (event->hw.tp_target->flags & PF_EXITING) ||
+ uprobe_filter_event(tu, event);
+ } else {
+ tu->filter.nr_systemwide--;
+ done = tu->filter.nr_systemwide;
+ }
+ write_unlock(&tu->filter.rwlock);
+
+ if (!done)
+ uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
+
+ return 0;
+}
+
+static bool uprobe_perf_filter(struct uprobe_consumer *uc,
+ enum uprobe_filter_ctx ctx, struct mm_struct *mm)
+{
+ struct trace_uprobe *tu;
+ int ret;
+
+ tu = container_of(uc, struct trace_uprobe, consumer);
+ read_lock(&tu->filter.rwlock);
+ ret = __uprobe_perf_filter(&tu->filter, mm);
+ read_unlock(&tu->filter.rwlock);
+
+ return ret;
+}
+
/* uprobe profile handler */
-static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
+static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
{
struct ftrace_event_call *call = &tu->call;
struct uprobe_trace_entry_head *entry;
@@ -652,11 +762,14 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
int size, __size, i;
int rctx;
+ if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
+ return UPROBE_HANDLER_REMOVE;
+
__size = sizeof(*entry) + tu->size;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
- return;
+ return 0;
preempt_disable();
@@ -664,7 +777,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
if (!entry)
goto out;
- entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
+ entry->ip = instruction_pointer(task_pt_regs(current));
data = (u8 *)&entry[1];
for (i = 0; i < tu->nr_args; i++)
call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
@@ -674,6 +787,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
out:
preempt_enable();
+ return 0;
}
#endif /* CONFIG_PERF_EVENTS */
@@ -684,7 +798,7 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
switch (type) {
case TRACE_REG_REGISTER:
- return probe_event_enable(tu, TP_FLAG_TRACE);
+ return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
case TRACE_REG_UNREGISTER:
probe_event_disable(tu, TP_FLAG_TRACE);
@@ -692,11 +806,18 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
#ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER:
- return probe_event_enable(tu, TP_FLAG_PROFILE);
+ return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
case TRACE_REG_PERF_UNREGISTER:
probe_event_disable(tu, TP_FLAG_PROFILE);
return 0;
+
+ case TRACE_REG_PERF_OPEN:
+ return uprobe_perf_open(tu, data);
+
+ case TRACE_REG_PERF_CLOSE:
+ return uprobe_perf_close(tu, data);
+
#endif
default:
return 0;
@@ -706,22 +827,20 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
{
- struct uprobe_trace_consumer *utc;
struct trace_uprobe *tu;
+ int ret = 0;
- utc = container_of(con, struct uprobe_trace_consumer, cons);
- tu = utc->tu;
- if (!tu || tu->consumer != utc)
- return 0;
+ tu = container_of(con, struct trace_uprobe, consumer);
+ tu->nhit++;
if (tu->flags & TP_FLAG_TRACE)
- uprobe_trace_func(tu, regs);
+ ret |= uprobe_trace_func(tu, regs);
#ifdef CONFIG_PERF_EVENTS
if (tu->flags & TP_FLAG_PROFILE)
- uprobe_perf_func(tu, regs);
+ ret |= uprobe_perf_func(tu, regs);
#endif
- return 0;
+ return ret;
}
static struct trace_event_functions uprobe_funcs = {
diff --git a/samples/Kconfig b/samples/Kconfig
index 7b6792a..6181c2c 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -5,12 +5,6 @@ menuconfig SAMPLES
if SAMPLES
-config SAMPLE_TRACEPOINTS
- tristate "Build tracepoints examples -- loadable modules only"
- depends on TRACEPOINTS && m
- help
- This build tracepoints example modules.
-
config SAMPLE_TRACE_EVENTS
tristate "Build trace_events examples -- loadable modules only"
depends on EVENT_TRACING && m
diff --git a/samples/Makefile b/samples/Makefile
index 5ef08bb..1a60c62 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,4 +1,4 @@
# Makefile for Linux samples code
-obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \
+obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ \
hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/
diff --git a/samples/tracepoints/Makefile b/samples/tracepoints/Makefile
deleted file mode 100644
index 36479ad..0000000
--- a/samples/tracepoints/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# builds the tracepoint example kernel modules;
-# then to use one (as root): insmod <module_name.ko>
-
-obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-sample.o
-obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample.o
-obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample2.o
diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h
deleted file mode 100644
index 4d46be9..0000000
--- a/samples/tracepoints/tp-samples-trace.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _TP_SAMPLES_TRACE_H
-#define _TP_SAMPLES_TRACE_H
-
-#include <linux/proc_fs.h> /* for struct inode and struct file */
-#include <linux/tracepoint.h>
-
-DECLARE_TRACE(subsys_event,
- TP_PROTO(struct inode *inode, struct file *file),
- TP_ARGS(inode, file));
-DECLARE_TRACE_NOARGS(subsys_eventb);
-#endif
diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c
deleted file mode 100644
index 744c0b9..0000000
--- a/samples/tracepoints/tracepoint-probe-sample.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * tracepoint-probe-sample.c
- *
- * sample tracepoint probes.
- */
-
-#include <linux/module.h>
-#include <linux/file.h>
-#include <linux/dcache.h>
-#include "tp-samples-trace.h"
-
-/*
- * Here the caller only guarantees locking for struct file and struct inode.
- * Locking must therefore be done in the probe to use the dentry.
- */
-static void probe_subsys_event(void *ignore,
- struct inode *inode, struct file *file)
-{
- path_get(&file->f_path);
- dget(file->f_path.dentry);
- printk(KERN_INFO "Event is encountered with filename %s\n",
- file->f_path.dentry->d_name.name);
- dput(file->f_path.dentry);
- path_put(&file->f_path);
-}
-
-static void probe_subsys_eventb(void *ignore)
-{
- printk(KERN_INFO "Event B is encountered\n");
-}
-
-static int __init tp_sample_trace_init(void)
-{
- int ret;
-
- ret = register_trace_subsys_event(probe_subsys_event, NULL);
- WARN_ON(ret);
- ret = register_trace_subsys_eventb(probe_subsys_eventb, NULL);
- WARN_ON(ret);
-
- return 0;
-}
-
-module_init(tp_sample_trace_init);
-
-static void __exit tp_sample_trace_exit(void)
-{
- unregister_trace_subsys_eventb(probe_subsys_eventb, NULL);
- unregister_trace_subsys_event(probe_subsys_event, NULL);
- tracepoint_synchronize_unregister();
-}
-
-module_exit(tp_sample_trace_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mathieu Desnoyers");
-MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c
deleted file mode 100644
index 9fcf990..0000000
--- a/samples/tracepoints/tracepoint-probe-sample2.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * tracepoint-probe-sample2.c
- *
- * 2nd sample tracepoint probes.
- */
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include "tp-samples-trace.h"
-
-/*
- * Here the caller only guarantees locking for struct file and struct inode.
- * Locking must therefore be done in the probe to use the dentry.
- */
-static void probe_subsys_event(void *ignore,
- struct inode *inode, struct file *file)
-{
- printk(KERN_INFO "Event is encountered with inode number %lu\n",
- inode->i_ino);
-}
-
-static int __init tp_sample_trace_init(void)
-{
- int ret;
-
- ret = register_trace_subsys_event(probe_subsys_event, NULL);
- WARN_ON(ret);
-
- return 0;
-}
-
-module_init(tp_sample_trace_init);
-
-static void __exit tp_sample_trace_exit(void)
-{
- unregister_trace_subsys_event(probe_subsys_event, NULL);
- tracepoint_synchronize_unregister();
-}
-
-module_exit(tp_sample_trace_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mathieu Desnoyers");
-MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
deleted file mode 100644
index f4d89e0..0000000
--- a/samples/tracepoints/tracepoint-sample.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/* tracepoint-sample.c
- *
- * Executes a tracepoint when /proc/tracepoint-sample is opened.
- *
- * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
- */
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/proc_fs.h>
-#include "tp-samples-trace.h"
-
-DEFINE_TRACE(subsys_event);
-DEFINE_TRACE(subsys_eventb);
-
-struct proc_dir_entry *pentry_sample;
-
-static int my_open(struct inode *inode, struct file *file)
-{
- int i;
-
- trace_subsys_event(inode, file);
- for (i = 0; i < 10; i++)
- trace_subsys_eventb();
- return -EPERM;
-}
-
-static const struct file_operations mark_ops = {
- .open = my_open,
- .llseek = noop_llseek,
-};
-
-static int __init sample_init(void)
-{
- printk(KERN_ALERT "sample init\n");
- pentry_sample = proc_create("tracepoint-sample", 0444, NULL,
- &mark_ops);
- if (!pentry_sample)
- return -EPERM;
- return 0;
-}
-
-static void __exit sample_exit(void)
-{
- printk(KERN_ALERT "sample exit\n");
- remove_proc_entry("tracepoint-sample", NULL);
-}
-
-module_init(sample_init)
-module_exit(sample_exit)
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mathieu Desnoyers");
-MODULE_DESCRIPTION("Tracepoint sample");
diff --git a/tools/Makefile b/tools/Makefile
index 1f9a529..798fa0e 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -15,7 +15,7 @@ help:
@echo ' x86_energy_perf_policy - Intel energy policy tool'
@echo ''
@echo 'You can do:'
- @echo ' $$ make -C tools/<tool>_install'
+ @echo ' $$ make -C tools/ <tool>_install'
@echo ''
@echo ' from the kernel command line to build and install one of'
@echo ' the tools above'
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 5a824e3..82b0606 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -13,8 +13,7 @@
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
@@ -1224,6 +1223,34 @@ static int field_is_long(struct format_field *field)
return 0;
}
+static unsigned int type_size(const char *name)
+{
+ /* This covers all FIELD_IS_STRING types. */
+ static struct {
+ const char *type;
+ unsigned int size;
+ } table[] = {
+ { "u8", 1 },
+ { "u16", 2 },
+ { "u32", 4 },
+ { "u64", 8 },
+ { "s8", 1 },
+ { "s16", 2 },
+ { "s32", 4 },
+ { "s64", 8 },
+ { "char", 1 },
+ { },
+ };
+ int i;
+
+ for (i = 0; table[i].type; i++) {
+ if (!strcmp(table[i].type, name))
+ return table[i].size;
+ }
+
+ return 0;
+}
+
static int event_read_fields(struct event_format *event, struct format_field **fields)
{
struct format_field *field = NULL;
@@ -1233,6 +1260,8 @@ static int event_read_fields(struct event_format *event, struct format_field **f
int count = 0;
do {
+ unsigned int size_dynamic = 0;
+
type = read_token(&token);
if (type == EVENT_NEWLINE) {
free_token(token);
@@ -1391,6 +1420,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
field->type = new_type;
strcat(field->type, " ");
strcat(field->type, field->name);
+ size_dynamic = type_size(field->name);
free_token(field->name);
strcat(field->type, brackets);
field->name = token;
@@ -1463,7 +1493,8 @@ static int event_read_fields(struct event_format *event, struct format_field **f
if (read_expect_type(EVENT_ITEM, &token))
goto fail;
- /* add signed type */
+ if (strtoul(token, NULL, 0))
+ field->flags |= FIELD_IS_SIGNED;
free_token(token);
if (read_expected(EVENT_OP, ";") < 0)
@@ -1478,10 +1509,14 @@ static int event_read_fields(struct event_format *event, struct format_field **f
if (field->flags & FIELD_IS_ARRAY) {
if (field->arraylen)
field->elementsize = field->size / field->arraylen;
+ else if (field->flags & FIELD_IS_DYNAMIC)
+ field->elementsize = size_dynamic;
else if (field->flags & FIELD_IS_STRING)
field->elementsize = 1;
- else
- field->elementsize = event->pevent->long_size;
+ else if (field->flags & FIELD_IS_LONG)
+ field->elementsize = event->pevent ?
+ event->pevent->long_size :
+ sizeof(long);
} else
field->elementsize = field->size;
@@ -1785,6 +1820,8 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
strcmp(token, "/") == 0 ||
strcmp(token, "<") == 0 ||
strcmp(token, ">") == 0 ||
+ strcmp(token, "<=") == 0 ||
+ strcmp(token, ">=") == 0 ||
strcmp(token, "==") == 0 ||
strcmp(token, "!=") == 0) {
@@ -2481,7 +2518,7 @@ process_dynamic_array(struct event_format *event, struct print_arg *arg, char **
free_token(token);
arg = alloc_arg();
- if (!field) {
+ if (!arg) {
do_warning("%s: not enough memory!", __func__);
*tok = NULL;
return EVENT_ERROR;
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 24a4bba..7be7e89 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -13,8 +13,7 @@
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
diff --git a/tools/lib/traceevent/event-utils.h b/tools/lib/traceevent/event-utils.h
index bc07500..e76c9ac 100644
--- a/tools/lib/traceevent/event-utils.h
+++ b/tools/lib/traceevent/event-utils.h
@@ -13,8 +13,7 @@
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 5ea4326..2500e75 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -13,8 +13,7 @@
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
diff --git a/tools/lib/traceevent/parse-utils.c b/tools/lib/traceevent/parse-utils.c
index f023a13..bba701c 100644
--- a/tools/lib/traceevent/parse-utils.c
+++ b/tools/lib/traceevent/parse-utils.c
@@ -1,3 +1,22 @@
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c
index b1ccc92..a57db80 100644
--- a/tools/lib/traceevent/trace-seq.c
+++ b/tools/lib/traceevent/trace-seq.c
@@ -13,8 +13,7 @@
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index ef6d22e..eb30044 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -222,10 +222,14 @@ install-pdf: pdf
#install-html: html
# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
+ifneq ($(MAKECMDGOALS),clean)
+ifneq ($(MAKECMDGOALS),tags)
$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
$(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) $(OUTPUT)PERF-VERSION-FILE
-include $(OUTPUT)PERF-VERSION-FILE
+endif
+endif
#
# Determine "include::" file references in asciidoc files.
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index c8ffd9f..5ad07ef4 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -61,11 +61,13 @@ OPTIONS
--stdio:: Use the stdio interface.
---tui:: Use the TUI interface Use of --tui requires a tty, if one is not
+--tui:: Use the TUI interface. Use of --tui requires a tty, if one is not
present, as when piping to other commands, the stdio interface is
used. This interfaces starts by centering on the line with more
samples, TAB/UNTAB cycles through the lines with more samples.
+--gtk:: Use the GTK interface.
+
-C::
--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
be provided as a comma-separated list with no space: 0,1. Ranges of
@@ -88,6 +90,9 @@ OPTIONS
--objdump=<path>::
Path to objdump binary.
+--skip-missing::
+ Skip symbols that cannot be annotated.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index c105770..e9a8349 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -24,6 +24,13 @@ OPTIONS
-r::
--remove=::
Remove specified file from the cache.
+-M::
+--missing=::
+ List missing build ids in the cache for the specified file.
+-u::
+--update::
+ Update specified file of the cache. It can be used to update kallsyms
+ kernel dso to vmlinux in order to support annotation.
-v::
--verbose::
Be more verbose.
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index 194f37d..5b3123d 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -22,10 +22,6 @@ specified perf.data files.
OPTIONS
-------
--M::
---displacement::
- Show position displacement relative to baseline.
-
-D::
--dump-raw-trace::
Dump raw trace in ASCII.
diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt
index 1521734..1ceb370 100644
--- a/tools/perf/Documentation/perf-evlist.txt
+++ b/tools/perf/Documentation/perf-evlist.txt
@@ -28,6 +28,10 @@ OPTIONS
--verbose=::
Show all fields.
+-g::
+--group::
+ Show event group information.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-list[1],
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index f4d91be..02284a0 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -57,11 +57,44 @@ OPTIONS
-s::
--sort=::
- Sort by key(s): pid, comm, dso, symbol, parent, srcline.
+ Sort histogram entries by given key(s) - multiple keys can be specified
+ in CSV format. Following sort keys are available:
+ pid, comm, dso, symbol, parent, cpu, srcline.
+
+ Each key has following meaning:
+
+ - comm: command (name) of the task which can be read via /proc/<pid>/comm
+ - pid: command and tid of the task
+ - dso: name of library or module executed at the time of sample
+ - symbol: name of function executed at the time of sample
+ - parent: name of function matched to the parent regex filter. Unmatched
+ entries are displayed as "[other]".
+ - cpu: cpu number the task ran at the time of sample
+ - srcline: filename and line number executed at the time of sample. The
+ DWARF debuggin info must be provided.
+
+ By default, comm, dso and symbol keys are used.
+ (i.e. --sort comm,dso,symbol)
+
+ If --branch-stack option is used, following sort keys are also
+ available:
+ dso_from, dso_to, symbol_from, symbol_to, mispredict.
+
+ - dso_from: name of library or module branched from
+ - dso_to: name of library or module branched to
+ - symbol_from: name of function branched from
+ - symbol_to: name of function branched to
+ - mispredict: "N" for predicted branch, "Y" for mispredicted branch
+
+ And default sort keys are changed to comm, dso_from, symbol_from, dso_to
+ and symbol_to, see '--branch-stack'.
-p::
--parent=<regex>::
- regex filter to identify parent, see: '--sort parent'
+ A regex filter to identify parent. The parent is a caller of this
+ function and searched through the callchain, thus it requires callchain
+ information recorded. The pattern is in the exteneded regex format and
+ defaults to "\^sys_|^do_page_fault", see '--sort parent'.
-x::
--exclude-other::
@@ -74,7 +107,6 @@ OPTIONS
-t::
--field-separator=::
-
Use a special separator character and don't pad with spaces, replacing
all occurrences of this separator in symbol names (and other output)
with a '.' character, that thus it's the only non valid separator.
@@ -171,6 +203,9 @@ OPTIONS
--objdump=<path>::
Path to objdump binary.
+--group::
+ Show event group information together.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-annotate[1]
diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
index a4027f2..9f1f054 100644
--- a/tools/perf/Documentation/perf-script-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -336,7 +336,6 @@ scripts listed by the 'perf script -l' command e.g.:
----
root@tropicana:~# perf script -l
List of available trace scripts:
- workqueue-stats workqueue stats (ins/exe/create/destroy)
wakeup-latency system-wide min/max/avg wakeup latency
rw-by-file <comm> r/w activity for a program, by file
rw-by-pid system-wide r/w activity
@@ -402,7 +401,6 @@ should show a new entry for your script:
----
root@tropicana:~# perf script -l
List of available trace scripts:
- workqueue-stats workqueue stats (ins/exe/create/destroy)
wakeup-latency system-wide min/max/avg wakeup latency
rw-by-file <comm> r/w activity for a program, by file
rw-by-pid system-wide r/w activity
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index cf0c310..faf4f4f 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -114,6 +114,17 @@ with it. --append may be used here. Examples:
perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage
+-I msecs::
+--interval-print msecs::
+ Print count deltas every N milliseconds (minimum: 100ms)
+ example: perf stat -I 1000 -e cycles -a sleep 5
+
+--aggr-socket::
+Aggregate counts per processor socket for system-wide mode measurements. This
+is a useful mode to detect imbalance between sockets. To enable this mode,
+use --aggr-socket in addition to -a. (system-wide). The output includes the
+socket number and the number of online processors on that socket. This is
+useful to gauge the amount of aggregation.
EXAMPLES
--------
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
index b24ac40..d1d3e5121 100644
--- a/tools/perf/Documentation/perf-test.txt
+++ b/tools/perf/Documentation/perf-test.txt
@@ -23,6 +23,10 @@ from 'perf test list'.
OPTIONS
-------
+-s::
+--skip::
+ Tests to skip (comma separater numeric list).
+
-v::
--verbose::
Be more verbose.
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 5b80d84..a414bc9 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -60,7 +60,7 @@ Default is to monitor all CPUS.
-i::
--inherit::
- Child tasks inherit counters, only makes sens with -p option.
+ Child tasks do not inherit counters.
-k <path>::
--vmlinux=<path>::
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 8ab05e54..a2108ca 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -47,10 +47,11 @@ include config/utilities.mak
# backtrace post unwind.
#
# Define NO_BACKTRACE if you do not want stack backtrace debug feature
+#
+# Define NO_LIBNUMA if you do not want numa perf benchmark
$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
@$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
--include $(OUTPUT)PERF-VERSION-FILE
uname_M := $(shell uname -m 2>/dev/null || echo not)
@@ -148,13 +149,25 @@ RM = rm -f
MKDIR = mkdir
FIND = find
INSTALL = install
+FLEX = flex
+BISON= bison
# sparse is architecture-neutral, which means that we need to tell it
# explicitly what architecture to check for. Fix this up for yours..
SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
+ifneq ($(MAKECMDGOALS),clean)
+ifneq ($(MAKECMDGOALS),tags)
-include config/feature-tests.mak
+ifeq ($(call get-executable,$(FLEX)),)
+ dummy := $(error Error: $(FLEX) is missing on this system, please install it)
+endif
+
+ifeq ($(call get-executable,$(BISON)),)
+ dummy := $(error Error: $(BISON) is missing on this system, please install it)
+endif
+
ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y)
CFLAGS := $(CFLAGS) -fstack-protector-all
endif
@@ -206,6 +219,8 @@ ifeq ($(call try-cc,$(SOURCE_BIONIC),$(CFLAGS),bionic),y)
EXTLIBS := $(filter-out -lpthread,$(EXTLIBS))
BASIC_CFLAGS += -I.
endif
+endif # MAKECMDGOALS != tags
+endif # MAKECMDGOALS != clean
# Guard against environment variables
BUILTIN_OBJS =
@@ -230,11 +245,19 @@ endif
LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
TE_LIB := -L$(TE_PATH) -ltraceevent
+export LIBTRACEEVENT
+
+# python extension build directories
+PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
+PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
+PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
+export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
+
+python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
+
PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py
-export LIBTRACEEVENT
-
$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
$(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
--quiet build_ext; \
@@ -269,20 +292,17 @@ endif
export PERL_PATH
-FLEX = flex
-BISON= bison
-
$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
$(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c
$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
- $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c
+ $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
$(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c
$(OUTPUT)util/pmu-bison.c: util/pmu.y
- $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c
+ $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
@@ -378,8 +398,11 @@ LIB_H += util/rblist.h
LIB_H += util/intlist.h
LIB_H += util/perf_regs.h
LIB_H += util/unwind.h
-LIB_H += ui/helpline.h
LIB_H += util/vdso.h
+LIB_H += ui/helpline.h
+LIB_H += ui/progress.h
+LIB_H += ui/util.h
+LIB_H += ui/ui.h
LIB_OBJS += $(OUTPUT)util/abspath.o
LIB_OBJS += $(OUTPUT)util/alias.o
@@ -453,6 +476,7 @@ LIB_OBJS += $(OUTPUT)util/stat.o
LIB_OBJS += $(OUTPUT)ui/setup.o
LIB_OBJS += $(OUTPUT)ui/helpline.o
LIB_OBJS += $(OUTPUT)ui/progress.o
+LIB_OBJS += $(OUTPUT)ui/util.o
LIB_OBJS += $(OUTPUT)ui/hist.o
LIB_OBJS += $(OUTPUT)ui/stdio/hist.o
@@ -471,7 +495,8 @@ LIB_OBJS += $(OUTPUT)tests/rdpmc.o
LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
LIB_OBJS += $(OUTPUT)tests/pmu.o
-LIB_OBJS += $(OUTPUT)tests/util.o
+LIB_OBJS += $(OUTPUT)tests/hists_link.o
+LIB_OBJS += $(OUTPUT)tests/python-use.o
BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
@@ -510,14 +535,13 @@ PERFLIBS = $(LIB_FILE) $(LIBTRACEEVENT)
#
# Platform specific tweaks
#
+ifneq ($(MAKECMDGOALS),clean)
+ifneq ($(MAKECMDGOALS),tags)
# We choose to avoid "if .. else if .. else .. endif endif"
# because maintaining the nesting to match is a pain. If
# we had "elif" things would have been much nicer...
--include config.mak.autogen
--include config.mak
-
ifdef NO_LIBELF
NO_DWARF := 1
NO_DEMANGLE := 1
@@ -557,6 +581,11 @@ else
endif # SOURCE_LIBELF
endif # NO_LIBELF
+# There's only x86 (both 32 and 64) support for CFI unwind so far
+ifneq ($(ARCH),x86)
+ NO_LIBUNWIND := 1
+endif
+
ifndef NO_LIBUNWIND
# for linking with debug library, run like:
# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
@@ -646,7 +675,6 @@ ifndef NO_NEWT
LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
LIB_OBJS += $(OUTPUT)ui/browsers/map.o
LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o
- LIB_OBJS += $(OUTPUT)ui/util.o
LIB_OBJS += $(OUTPUT)ui/tui/setup.o
LIB_OBJS += $(OUTPUT)ui/tui/util.o
LIB_OBJS += $(OUTPUT)ui/tui/helpline.o
@@ -655,9 +683,6 @@ ifndef NO_NEWT
LIB_H += ui/browsers/map.h
LIB_H += ui/keysyms.h
LIB_H += ui/libslang.h
- LIB_H += ui/progress.h
- LIB_H += ui/util.h
- LIB_H += ui/ui.h
endif
endif
@@ -673,14 +698,12 @@ ifndef NO_GTK2
BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null)
EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null)
LIB_OBJS += $(OUTPUT)ui/gtk/browser.o
+ LIB_OBJS += $(OUTPUT)ui/gtk/hists.o
LIB_OBJS += $(OUTPUT)ui/gtk/setup.o
LIB_OBJS += $(OUTPUT)ui/gtk/util.o
LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o
LIB_OBJS += $(OUTPUT)ui/gtk/progress.o
- # Make sure that it'd be included only once.
- ifeq ($(findstring -DNEWT_SUPPORT,$(BASIC_CFLAGS)),)
- LIB_OBJS += $(OUTPUT)ui/util.o
- endif
+ LIB_OBJS += $(OUTPUT)ui/gtk/annotate.o
endif
endif
@@ -707,7 +730,7 @@ disable-python = $(eval $(disable-python_code))
define disable-python_code
BASIC_CFLAGS += -DNO_LIBPYTHON
$(if $(1),$(warning No $(1) was found))
- $(warning Python support won't be built)
+ $(warning Python support will not be built)
endef
override PYTHON := \
@@ -715,19 +738,10 @@ override PYTHON := \
ifndef PYTHON
$(call disable-python,python interpreter)
- python-clean :=
else
PYTHON_WORD := $(call shell-wordify,$(PYTHON))
- # python extension build directories
- PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
- PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
- PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
- export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
-
- python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
-
ifdef NO_LIBPYTHON
$(call disable-python)
else
@@ -839,10 +853,24 @@ ifndef NO_BACKTRACE
endif
endif
+ifndef NO_LIBNUMA
+ FLAGS_LIBNUMA = $(ALL_CFLAGS) $(ALL_LDFLAGS) -lnuma
+ ifneq ($(call try-cc,$(SOURCE_LIBNUMA),$(FLAGS_LIBNUMA),libnuma),y)
+ msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numa-libs-devel or libnuma-dev);
+ else
+ BASIC_CFLAGS += -DLIBNUMA_SUPPORT
+ BUILTIN_OBJS += $(OUTPUT)bench/numa.o
+ EXTLIBS += -lnuma
+ endif
+endif
+
ifdef ASCIIDOC8
export ASCIIDOC8
endif
+endif # MAKECMDGOALS != tags
+endif # MAKECMDGOALS != clean
+
# Shell quote (do not use $(call) to accommodate ancient setups);
ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
@@ -884,7 +912,7 @@ strip: $(PROGRAMS) $(OUTPUT)perf
$(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \
+ $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \
'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
$(ALL_CFLAGS) -c $(filter %.c,$^) -o $@
@@ -948,7 +976,13 @@ $(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
- '-DBINDIR="$(bindir_SQ)"' \
+ '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \
+ $<
+
+$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
+ -DPYTHONPATH='"$(OUTPUT)python"' \
+ -DPYTHON='"$(PYTHON_WORD)"' \
$<
$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
@@ -1099,7 +1133,7 @@ perfexec_instdir = $(prefix)/$(perfexecdir)
endif
perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
-install: all try-install-man
+install-bin: all
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
$(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
@@ -1120,6 +1154,8 @@ install: all try-install-man
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
+install: install-bin try-install-man
+
install-python_ext:
$(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index 3e975cb..aacef07 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -155,6 +155,7 @@ static int perf_session_env__lookup_binutils_path(struct perf_session_env *env,
if (lookup_path(buf))
goto out;
free(buf);
+ buf = NULL;
}
if (!strcmp(arch, "arm"))
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 8f89998..a5223e6 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -1,6 +1,7 @@
#ifndef BENCH_H
#define BENCH_H
+extern int bench_numa(int argc, const char **argv, const char *prefix);
extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
extern int bench_mem_memcpy(int argc, const char **argv,
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
new file mode 100644
index 0000000..30d1c322
--- /dev/null
+++ b/tools/perf/bench/numa.c
@@ -0,0 +1,1731 @@
+/*
+ * numa.c
+ *
+ * numa: Simulate NUMA-sensitive workload and measure their NUMA performance
+ */
+
+#include "../perf.h"
+#include "../builtin.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+
+#include "bench.h"
+
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <assert.h>
+#include <malloc.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+
+#include <numa.h>
+#include <numaif.h>
+
+/*
+ * Regular printout to the terminal, supressed if -q is specified:
+ */
+#define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)
+
+/*
+ * Debug printf:
+ */
+#define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0)
+
+struct thread_data {
+ int curr_cpu;
+ cpu_set_t bind_cpumask;
+ int bind_node;
+ u8 *process_data;
+ int process_nr;
+ int thread_nr;
+ int task_nr;
+ unsigned int loops_done;
+ u64 val;
+ u64 runtime_ns;
+ pthread_mutex_t *process_lock;
+};
+
+/* Parameters set by options: */
+
+struct params {
+ /* Startup synchronization: */
+ bool serialize_startup;
+
+ /* Task hierarchy: */
+ int nr_proc;
+ int nr_threads;
+
+ /* Working set sizes: */
+ const char *mb_global_str;
+ const char *mb_proc_str;
+ const char *mb_proc_locked_str;
+ const char *mb_thread_str;
+
+ double mb_global;
+ double mb_proc;
+ double mb_proc_locked;
+ double mb_thread;
+
+ /* Access patterns to the working set: */
+ bool data_reads;
+ bool data_writes;
+ bool data_backwards;
+ bool data_zero_memset;
+ bool data_rand_walk;
+ u32 nr_loops;
+ u32 nr_secs;
+ u32 sleep_usecs;
+
+ /* Working set initialization: */
+ bool init_zero;
+ bool init_random;
+ bool init_cpu0;
+
+ /* Misc options: */
+ int show_details;
+ int run_all;
+ int thp;
+
+ long bytes_global;
+ long bytes_process;
+ long bytes_process_locked;
+ long bytes_thread;
+
+ int nr_tasks;
+ bool show_quiet;
+
+ bool show_convergence;
+ bool measure_convergence;
+
+ int perturb_secs;
+ int nr_cpus;
+ int nr_nodes;
+
+ /* Affinity options -C and -N: */
+ char *cpu_list_str;
+ char *node_list_str;
+};
+
+
+/* Global, read-writable area, accessible to all processes and threads: */
+
+struct global_info {
+ u8 *data;
+
+ pthread_mutex_t startup_mutex;
+ int nr_tasks_started;
+
+ pthread_mutex_t startup_done_mutex;
+
+ pthread_mutex_t start_work_mutex;
+ int nr_tasks_working;
+
+ pthread_mutex_t stop_work_mutex;
+ u64 bytes_done;
+
+ struct thread_data *threads;
+
+ /* Convergence latency measurement: */
+ bool all_converged;
+ bool stop_work;
+
+ int print_once;
+
+ struct params p;
+};
+
+static struct global_info *g = NULL;
+
+static int parse_cpus_opt(const struct option *opt, const char *arg, int unset);
+static int parse_nodes_opt(const struct option *opt, const char *arg, int unset);
+
+struct params p0;
+
+static const struct option options[] = {
+ OPT_INTEGER('p', "nr_proc" , &p0.nr_proc, "number of processes"),
+ OPT_INTEGER('t', "nr_threads" , &p0.nr_threads, "number of threads per process"),
+
+ OPT_STRING('G', "mb_global" , &p0.mb_global_str, "MB", "global memory (MBs)"),
+ OPT_STRING('P', "mb_proc" , &p0.mb_proc_str, "MB", "process memory (MBs)"),
+ OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"),
+ OPT_STRING('T', "mb_thread" , &p0.mb_thread_str, "MB", "thread memory (MBs)"),
+
+ OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run"),
+ OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run"),
+ OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"),
+
+ OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"),
+ OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"),
+ OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"),
+ OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
+ OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk, "access the data with random (32bit LFSR) walk"),
+
+
+ OPT_BOOLEAN('z', "init_zero" , &p0.init_zero, "bzero the initial allocations"),
+ OPT_BOOLEAN('I', "init_random" , &p0.init_random, "randomize the contents of the initial allocations"),
+ OPT_BOOLEAN('0', "init_cpu0" , &p0.init_cpu0, "do the initial allocations on CPU#0"),
+ OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs, "perturb thread 0/0 every X secs, to test convergence stability"),
+
+ OPT_INCR ('d', "show_details" , &p0.show_details, "Show details"),
+ OPT_INCR ('a', "all" , &p0.run_all, "Run all tests in the suite"),
+ OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
+ OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"),
+ OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"),
+ OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "bzero the initial allocations"),
+ OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
+
+ /* Special option string parsing callbacks: */
+ OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]",
+ "bind the first N tasks to these specific cpus (the rest is unbound)",
+ parse_cpus_opt),
+ OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]",
+ "bind the first N tasks to these specific memory nodes (the rest is unbound)",
+ parse_nodes_opt),
+ OPT_END()
+};
+
+static const char * const bench_numa_usage[] = {
+ "perf bench numa <options>",
+ NULL
+};
+
+static const char * const numa_usage[] = {
+ "perf bench numa mem [<options>]",
+ NULL
+};
+
+static cpu_set_t bind_to_cpu(int target_cpu)
+{
+ cpu_set_t orig_mask, mask;
+ int ret;
+
+ ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
+ BUG_ON(ret);
+
+ CPU_ZERO(&mask);
+
+ if (target_cpu == -1) {
+ int cpu;
+
+ for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
+ CPU_SET(cpu, &mask);
+ } else {
+ BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
+ CPU_SET(target_cpu, &mask);
+ }
+
+ ret = sched_setaffinity(0, sizeof(mask), &mask);
+ BUG_ON(ret);
+
+ return orig_mask;
+}
+
+static cpu_set_t bind_to_node(int target_node)
+{
+ int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes;
+ cpu_set_t orig_mask, mask;
+ int cpu;
+ int ret;
+
+ BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus);
+ BUG_ON(!cpus_per_node);
+
+ ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
+ BUG_ON(ret);
+
+ CPU_ZERO(&mask);
+
+ if (target_node == -1) {
+ for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
+ CPU_SET(cpu, &mask);
+ } else {
+ int cpu_start = (target_node + 0) * cpus_per_node;
+ int cpu_stop = (target_node + 1) * cpus_per_node;
+
+ BUG_ON(cpu_stop > g->p.nr_cpus);
+
+ for (cpu = cpu_start; cpu < cpu_stop; cpu++)
+ CPU_SET(cpu, &mask);
+ }
+
+ ret = sched_setaffinity(0, sizeof(mask), &mask);
+ BUG_ON(ret);
+
+ return orig_mask;
+}
+
+static void bind_to_cpumask(cpu_set_t mask)
+{
+ int ret;
+
+ ret = sched_setaffinity(0, sizeof(mask), &mask);
+ BUG_ON(ret);
+}
+
+static void mempol_restore(void)
+{
+ int ret;
+
+ ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1);
+
+ BUG_ON(ret);
+}
+
+static void bind_to_memnode(int node)
+{
+ unsigned long nodemask;
+ int ret;
+
+ if (node == -1)
+ return;
+
+ BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask));
+ nodemask = 1L << node;
+
+ ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
+ dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
+
+ BUG_ON(ret);
+}
+
+#define HPSIZE (2*1024*1024)
+
+#define set_taskname(fmt...) \
+do { \
+ char name[20]; \
+ \
+ snprintf(name, 20, fmt); \
+ prctl(PR_SET_NAME, name); \
+} while (0)
+
+static u8 *alloc_data(ssize_t bytes0, int map_flags,
+ int init_zero, int init_cpu0, int thp, int init_random)
+{
+ cpu_set_t orig_mask;
+ ssize_t bytes;
+ u8 *buf;
+ int ret;
+
+ if (!bytes0)
+ return NULL;
+
+ /* Allocate and initialize all memory on CPU#0: */
+ if (init_cpu0) {
+ orig_mask = bind_to_node(0);
+ bind_to_memnode(0);
+ }
+
+ bytes = bytes0 + HPSIZE;
+
+ buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0);
+ BUG_ON(buf == (void *)-1);
+
+ if (map_flags == MAP_PRIVATE) {
+ if (thp > 0) {
+ ret = madvise(buf, bytes, MADV_HUGEPAGE);
+ if (ret && !g->print_once) {
+ g->print_once = 1;
+ printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n");
+ }
+ }
+ if (thp < 0) {
+ ret = madvise(buf, bytes, MADV_NOHUGEPAGE);
+ if (ret && !g->print_once) {
+ g->print_once = 1;
+ printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n");
+ }
+ }
+ }
+
+ if (init_zero) {
+ bzero(buf, bytes);
+ } else {
+ /* Initialize random contents, different in each word: */
+ if (init_random) {
+ u64 *wbuf = (void *)buf;
+ long off = rand();
+ long i;
+
+ for (i = 0; i < bytes/8; i++)
+ wbuf[i] = i + off;
+ }
+ }
+
+ /* Align to 2MB boundary: */
+ buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1));
+
+ /* Restore affinity: */
+ if (init_cpu0) {
+ bind_to_cpumask(orig_mask);
+ mempol_restore();
+ }
+
+ return buf;
+}
+
+static void free_data(void *data, ssize_t bytes)
+{
+ int ret;
+
+ if (!data)
+ return;
+
+ ret = munmap(data, bytes);
+ BUG_ON(ret);
+}
+
+/*
+ * Create a shared memory buffer that can be shared between processes, zeroed:
+ */
+static void * zalloc_shared_data(ssize_t bytes)
+{
+ return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0, g->p.thp, g->p.init_random);
+}
+
+/*
+ * Create a shared memory buffer that can be shared between processes:
+ */
+static void * setup_shared_data(ssize_t bytes)
+{
+ return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0, g->p.thp, g->p.init_random);
+}
+
+/*
+ * Allocate process-local memory - this will either be shared between
+ * threads of this process, or only be accessed by this thread:
+ */
+static void * setup_private_data(ssize_t bytes)
+{
+ return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0, g->p.thp, g->p.init_random);
+}
+
+/*
+ * Return a process-shared (global) mutex:
+ */
+static void init_global_mutex(pthread_mutex_t *mutex)
+{
+ pthread_mutexattr_t attr;
+
+ pthread_mutexattr_init(&attr);
+ pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
+ pthread_mutex_init(mutex, &attr);
+}
+
+static int parse_cpu_list(const char *arg)
+{
+ p0.cpu_list_str = strdup(arg);
+
+ dprintf("got CPU list: {%s}\n", p0.cpu_list_str);
+
+ return 0;
+}
+
+static void parse_setup_cpu_list(void)
+{
+ struct thread_data *td;
+ char *str0, *str;
+ int t;
+
+ if (!g->p.cpu_list_str)
+ return;
+
+ dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
+
+ str0 = str = strdup(g->p.cpu_list_str);
+ t = 0;
+
+ BUG_ON(!str);
+
+ tprintf("# binding tasks to CPUs:\n");
+ tprintf("# ");
+
+ while (true) {
+ int bind_cpu, bind_cpu_0, bind_cpu_1;
+ char *tok, *tok_end, *tok_step, *tok_len, *tok_mul;
+ int bind_len;
+ int step;
+ int mul;
+
+ tok = strsep(&str, ",");
+ if (!tok)
+ break;
+
+ tok_end = strstr(tok, "-");
+
+ dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
+ if (!tok_end) {
+ /* Single CPU specified: */
+ bind_cpu_0 = bind_cpu_1 = atol(tok);
+ } else {
+ /* CPU range specified (for example: "5-11"): */
+ bind_cpu_0 = atol(tok);
+ bind_cpu_1 = atol(tok_end + 1);
+ }
+
+ step = 1;
+ tok_step = strstr(tok, "#");
+ if (tok_step) {
+ step = atol(tok_step + 1);
+ BUG_ON(step <= 0 || step >= g->p.nr_cpus);
+ }
+
+ /*
+ * Mask length.
+ * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4',
+ * where the _4 means the next 4 CPUs are allowed.
+ */
+ bind_len = 1;
+ tok_len = strstr(tok, "_");
+ if (tok_len) {
+ bind_len = atol(tok_len + 1);
+ BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus);
+ }
+
+ /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
+ mul = 1;
+ tok_mul = strstr(tok, "x");
+ if (tok_mul) {
+ mul = atol(tok_mul + 1);
+ BUG_ON(mul <= 0);
+ }
+
+ dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul);
+
+ BUG_ON(bind_cpu_0 < 0 || bind_cpu_0 >= g->p.nr_cpus);
+ BUG_ON(bind_cpu_1 < 0 || bind_cpu_1 >= g->p.nr_cpus);
+ BUG_ON(bind_cpu_0 > bind_cpu_1);
+
+ for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
+ int i;
+
+ for (i = 0; i < mul; i++) {
+ int cpu;
+
+ if (t >= g->p.nr_tasks) {
+ printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu);
+ goto out;
+ }
+ td = g->threads + t;
+
+ if (t)
+ tprintf(",");
+ if (bind_len > 1) {
+ tprintf("%2d/%d", bind_cpu, bind_len);
+ } else {
+ tprintf("%2d", bind_cpu);
+ }
+
+ CPU_ZERO(&td->bind_cpumask);
+ for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
+ BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
+ CPU_SET(cpu, &td->bind_cpumask);
+ }
+ t++;
+ }
+ }
+ }
+out:
+
+ tprintf("\n");
+
+ if (t < g->p.nr_tasks)
+ printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
+
+ free(str0);
+}
+
+static int parse_cpus_opt(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused)
+{
+ if (!arg)
+ return -1;
+
+ return parse_cpu_list(arg);
+}
+
+static int parse_node_list(const char *arg)
+{
+ p0.node_list_str = strdup(arg);
+
+ dprintf("got NODE list: {%s}\n", p0.node_list_str);
+
+ return 0;
+}
+
+static void parse_setup_node_list(void)
+{
+ struct thread_data *td;
+ char *str0, *str;
+ int t;
+
+ if (!g->p.node_list_str)
+ return;
+
+ dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
+
+ str0 = str = strdup(g->p.node_list_str);
+ t = 0;
+
+ BUG_ON(!str);
+
+ tprintf("# binding tasks to NODEs:\n");
+ tprintf("# ");
+
+ while (true) {
+ int bind_node, bind_node_0, bind_node_1;
+ char *tok, *tok_end, *tok_step, *tok_mul;
+ int step;
+ int mul;
+
+ tok = strsep(&str, ",");
+ if (!tok)
+ break;
+
+ tok_end = strstr(tok, "-");
+
+ dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
+ if (!tok_end) {
+ /* Single NODE specified: */
+ bind_node_0 = bind_node_1 = atol(tok);
+ } else {
+ /* NODE range specified (for example: "5-11"): */
+ bind_node_0 = atol(tok);
+ bind_node_1 = atol(tok_end + 1);
+ }
+
+ step = 1;
+ tok_step = strstr(tok, "#");
+ if (tok_step) {
+ step = atol(tok_step + 1);
+ BUG_ON(step <= 0 || step >= g->p.nr_nodes);
+ }
+
+ /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
+ mul = 1;
+ tok_mul = strstr(tok, "x");
+ if (tok_mul) {
+ mul = atol(tok_mul + 1);
+ BUG_ON(mul <= 0);
+ }
+
+ dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step);
+
+ BUG_ON(bind_node_0 < 0 || bind_node_0 >= g->p.nr_nodes);
+ BUG_ON(bind_node_1 < 0 || bind_node_1 >= g->p.nr_nodes);
+ BUG_ON(bind_node_0 > bind_node_1);
+
+ for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) {
+ int i;
+
+ for (i = 0; i < mul; i++) {
+ if (t >= g->p.nr_tasks) {
+ printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);
+ goto out;
+ }
+ td = g->threads + t;
+
+ if (!t)
+ tprintf(" %2d", bind_node);
+ else
+ tprintf(",%2d", bind_node);
+
+ td->bind_node = bind_node;
+ t++;
+ }
+ }
+ }
+out:
+
+ tprintf("\n");
+
+ if (t < g->p.nr_tasks)
+ printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
+
+ free(str0);
+}
+
+static int parse_nodes_opt(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused)
+{
+ if (!arg)
+ return -1;
+
+ return parse_node_list(arg);
+
+ return 0;
+}
+
+#define BIT(x) (1ul << x)
+
+static inline uint32_t lfsr_32(uint32_t lfsr)
+{
+ const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
+ return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps);
+}
+
+/*
+ * Make sure there's real data dependency to RAM (when read
+ * accesses are enabled), so the compiler, the CPU and the
+ * kernel (KSM, zero page, etc.) cannot optimize away RAM
+ * accesses:
+ */
+static inline u64 access_data(u64 *data __attribute__((unused)), u64 val)
+{
+ if (g->p.data_reads)
+ val += *data;
+ if (g->p.data_writes)
+ *data = val + 1;
+ return val;
+}
+
+/*
+ * The worker process does two types of work, a forwards going
+ * loop and a backwards going loop.
+ *
+ * We do this so that on multiprocessor systems we do not create
+ * a 'train' of processing, with highly synchronized processes,
+ * skewing the whole benchmark.
+ */
+static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val)
+{
+ long words = bytes/sizeof(u64);
+ u64 *data = (void *)__data;
+ long chunk_0, chunk_1;
+ u64 *d0, *d, *d1;
+ long off;
+ long i;
+
+ BUG_ON(!data && words);
+ BUG_ON(data && !words);
+
+ if (!data)
+ return val;
+
+ /* Very simple memset() work variant: */
+ if (g->p.data_zero_memset && !g->p.data_rand_walk) {
+ bzero(data, bytes);
+ return val;
+ }
+
+ /* Spread out by PID/TID nr and by loop nr: */
+ chunk_0 = words/nr_max;
+ chunk_1 = words/g->p.nr_loops;
+ off = nr*chunk_0 + loop*chunk_1;
+
+ while (off >= words)
+ off -= words;
+
+ if (g->p.data_rand_walk) {
+ u32 lfsr = nr + loop + val;
+ int j;
+
+ for (i = 0; i < words/1024; i++) {
+ long start, end;
+
+ lfsr = lfsr_32(lfsr);
+
+ start = lfsr % words;
+ end = min(start + 1024, words-1);
+
+ if (g->p.data_zero_memset) {
+ bzero(data + start, (end-start) * sizeof(u64));
+ } else {
+ for (j = start; j < end; j++)
+ val = access_data(data + j, val);
+ }
+ }
+ } else if (!g->p.data_backwards || (nr + loop) & 1) {
+
+ d0 = data + off;
+ d = data + off + 1;
+ d1 = data + words;
+
+ /* Process data forwards: */
+ for (;;) {
+ if (unlikely(d >= d1))
+ d = data;
+ if (unlikely(d == d0))
+ break;
+
+ val = access_data(d, val);
+
+ d++;
+ }
+ } else {
+ /* Process data backwards: */
+
+ d0 = data + off;
+ d = data + off - 1;
+ d1 = data + words;
+
+ /* Process data forwards: */
+ for (;;) {
+ if (unlikely(d < data))
+ d = data + words-1;
+ if (unlikely(d == d0))
+ break;
+
+ val = access_data(d, val);
+
+ d--;
+ }
+ }
+
+ return val;
+}
+
+static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
+{
+ unsigned int cpu;
+
+ cpu = sched_getcpu();
+
+ g->threads[task_nr].curr_cpu = cpu;
+ prctl(0, bytes_worked);
+}
+
+#define MAX_NR_NODES 64
+
+/*
+ * Count the number of nodes a process's threads
+ * are spread out on.
+ *
+ * A count of 1 means that the process is compressed
+ * to a single node. A count of g->p.nr_nodes means it's
+ * spread out on the whole system.
+ */
+static int count_process_nodes(int process_nr)
+{
+ char node_present[MAX_NR_NODES] = { 0, };
+ int nodes;
+ int n, t;
+
+ for (t = 0; t < g->p.nr_threads; t++) {
+ struct thread_data *td;
+ int task_nr;
+ int node;
+
+ task_nr = process_nr*g->p.nr_threads + t;
+ td = g->threads + task_nr;
+
+ node = numa_node_of_cpu(td->curr_cpu);
+ node_present[node] = 1;
+ }
+
+ nodes = 0;
+
+ for (n = 0; n < MAX_NR_NODES; n++)
+ nodes += node_present[n];
+
+ return nodes;
+}
+
+/*
+ * Count the number of distinct process-threads a node contains.
+ *
+ * A count of 1 means that the node contains only a single
+ * process. If all nodes on the system contain at most one
+ * process then we are well-converged.
+ */
+static int count_node_processes(int node)
+{
+ int processes = 0;
+ int t, p;
+
+ for (p = 0; p < g->p.nr_proc; p++) {
+ for (t = 0; t < g->p.nr_threads; t++) {
+ struct thread_data *td;
+ int task_nr;
+ int n;
+
+ task_nr = p*g->p.nr_threads + t;
+ td = g->threads + task_nr;
+
+ n = numa_node_of_cpu(td->curr_cpu);
+ if (n == node) {
+ processes++;
+ break;
+ }
+ }
+ }
+
+ return processes;
+}
+
+static void calc_convergence_compression(int *strong)
+{
+ unsigned int nodes_min, nodes_max;
+ int p;
+
+ nodes_min = -1;
+ nodes_max = 0;
+
+ for (p = 0; p < g->p.nr_proc; p++) {
+ unsigned int nodes = count_process_nodes(p);
+
+ nodes_min = min(nodes, nodes_min);
+ nodes_max = max(nodes, nodes_max);
+ }
+
+ /* Strong convergence: all threads compress on a single node: */
+ if (nodes_min == 1 && nodes_max == 1) {
+ *strong = 1;
+ } else {
+ *strong = 0;
+ tprintf(" {%d-%d}", nodes_min, nodes_max);
+ }
+}
+
+static void calc_convergence(double runtime_ns_max, double *convergence)
+{
+ unsigned int loops_done_min, loops_done_max;
+ int process_groups;
+ int nodes[MAX_NR_NODES];
+ int distance;
+ int nr_min;
+ int nr_max;
+ int strong;
+ int sum;
+ int nr;
+ int node;
+ int cpu;
+ int t;
+
+ if (!g->p.show_convergence && !g->p.measure_convergence)
+ return;
+
+ for (node = 0; node < g->p.nr_nodes; node++)
+ nodes[node] = 0;
+
+ loops_done_min = -1;
+ loops_done_max = 0;
+
+ for (t = 0; t < g->p.nr_tasks; t++) {
+ struct thread_data *td = g->threads + t;
+ unsigned int loops_done;
+
+ cpu = td->curr_cpu;
+
+ /* Not all threads have written it yet: */
+ if (cpu < 0)
+ continue;
+
+ node = numa_node_of_cpu(cpu);
+
+ nodes[node]++;
+
+ loops_done = td->loops_done;
+ loops_done_min = min(loops_done, loops_done_min);
+ loops_done_max = max(loops_done, loops_done_max);
+ }
+
+ nr_max = 0;
+ nr_min = g->p.nr_tasks;
+ sum = 0;
+
+ for (node = 0; node < g->p.nr_nodes; node++) {
+ nr = nodes[node];
+ nr_min = min(nr, nr_min);
+ nr_max = max(nr, nr_max);
+ sum += nr;
+ }
+ BUG_ON(nr_min > nr_max);
+
+ BUG_ON(sum > g->p.nr_tasks);
+
+ if (0 && (sum < g->p.nr_tasks))
+ return;
+
+ /*
+ * Count the number of distinct process groups present
+ * on nodes - when we are converged this will decrease
+ * to g->p.nr_proc:
+ */
+ process_groups = 0;
+
+ for (node = 0; node < g->p.nr_nodes; node++) {
+ int processes = count_node_processes(node);
+
+ nr = nodes[node];
+ tprintf(" %2d/%-2d", nr, processes);
+
+ process_groups += processes;
+ }
+
+ distance = nr_max - nr_min;
+
+ tprintf(" [%2d/%-2d]", distance, process_groups);
+
+ tprintf(" l:%3d-%-3d (%3d)",
+ loops_done_min, loops_done_max, loops_done_max-loops_done_min);
+
+ if (loops_done_min && loops_done_max) {
+ double skew = 1.0 - (double)loops_done_min/loops_done_max;
+
+ tprintf(" [%4.1f%%]", skew * 100.0);
+ }
+
+ calc_convergence_compression(&strong);
+
+ if (strong && process_groups == g->p.nr_proc) {
+ if (!*convergence) {
+ *convergence = runtime_ns_max;
+ tprintf(" (%6.1fs converged)\n", *convergence/1e9);
+ if (g->p.measure_convergence) {
+ g->all_converged = true;
+ g->stop_work = true;
+ }
+ }
+ } else {
+ if (*convergence) {
+ tprintf(" (%6.1fs de-converged)", runtime_ns_max/1e9);
+ *convergence = 0;
+ }
+ tprintf("\n");
+ }
+}
+
+static void show_summary(double runtime_ns_max, int l, double *convergence)
+{
+ tprintf("\r # %5.1f%% [%.1f mins]",
+ (double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max/1e9 / 60.0);
+
+ calc_convergence(runtime_ns_max, convergence);
+
+ if (g->p.show_details >= 0)
+ fflush(stdout);
+}
+
+static void *worker_thread(void *__tdata)
+{
+ struct thread_data *td = __tdata;
+ struct timeval start0, start, stop, diff;
+ int process_nr = td->process_nr;
+ int thread_nr = td->thread_nr;
+ unsigned long last_perturbance;
+ int task_nr = td->task_nr;
+ int details = g->p.show_details;
+ int first_task, last_task;
+ double convergence = 0;
+ u64 val = td->val;
+ double runtime_ns_max;
+ u8 *global_data;
+ u8 *process_data;
+ u8 *thread_data;
+ u64 bytes_done;
+ long work_done;
+ u32 l;
+
+ bind_to_cpumask(td->bind_cpumask);
+ bind_to_memnode(td->bind_node);
+
+ set_taskname("thread %d/%d", process_nr, thread_nr);
+
+ global_data = g->data;
+ process_data = td->process_data;
+ thread_data = setup_private_data(g->p.bytes_thread);
+
+ bytes_done = 0;
+
+ last_task = 0;
+ if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1)
+ last_task = 1;
+
+ first_task = 0;
+ if (process_nr == 0 && thread_nr == 0)
+ first_task = 1;
+
+ if (details >= 2) {
+ printf("# thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n",
+ process_nr, thread_nr, global_data, process_data, thread_data);
+ }
+
+ if (g->p.serialize_startup) {
+ pthread_mutex_lock(&g->startup_mutex);
+ g->nr_tasks_started++;
+ pthread_mutex_unlock(&g->startup_mutex);
+
+ /* Here we will wait for the main process to start us all at once: */
+ pthread_mutex_lock(&g->start_work_mutex);
+ g->nr_tasks_working++;
+
+ /* Last one wake the main process: */
+ if (g->nr_tasks_working == g->p.nr_tasks)
+ pthread_mutex_unlock(&g->startup_done_mutex);
+
+ pthread_mutex_unlock(&g->start_work_mutex);
+ }
+
+ gettimeofday(&start0, NULL);
+
+ start = stop = start0;
+ last_perturbance = start.tv_sec;
+
+ for (l = 0; l < g->p.nr_loops; l++) {
+ start = stop;
+
+ if (g->stop_work)
+ break;
+
+ val += do_work(global_data, g->p.bytes_global, process_nr, g->p.nr_proc, l, val);
+ val += do_work(process_data, g->p.bytes_process, thread_nr, g->p.nr_threads, l, val);
+ val += do_work(thread_data, g->p.bytes_thread, 0, 1, l, val);
+
+ if (g->p.sleep_usecs) {
+ pthread_mutex_lock(td->process_lock);
+ usleep(g->p.sleep_usecs);
+ pthread_mutex_unlock(td->process_lock);
+ }
+ /*
+ * Amount of work to be done under a process-global lock:
+ */
+ if (g->p.bytes_process_locked) {
+ pthread_mutex_lock(td->process_lock);
+ val += do_work(process_data, g->p.bytes_process_locked, thread_nr, g->p.nr_threads, l, val);
+ pthread_mutex_unlock(td->process_lock);
+ }
+
+ work_done = g->p.bytes_global + g->p.bytes_process +
+ g->p.bytes_process_locked + g->p.bytes_thread;
+
+ update_curr_cpu(task_nr, work_done);
+ bytes_done += work_done;
+
+ if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs)
+ continue;
+
+ td->loops_done = l;
+
+ gettimeofday(&stop, NULL);
+
+ /* Check whether our max runtime timed out: */
+ if (g->p.nr_secs) {
+ timersub(&stop, &start0, &diff);
+ if (diff.tv_sec >= g->p.nr_secs) {
+ g->stop_work = true;
+ break;
+ }
+ }
+
+ /* Update the summary at most once per second: */
+ if (start.tv_sec == stop.tv_sec)
+ continue;
+
+ /*
+ * Perturb the first task's equilibrium every g->p.perturb_secs seconds,
+ * by migrating to CPU#0:
+ */
+ if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
+ cpu_set_t orig_mask;
+ int target_cpu;
+ int this_cpu;
+
+ last_perturbance = stop.tv_sec;
+
+ /*
+ * Depending on where we are running, move into
+ * the other half of the system, to create some
+ * real disturbance:
+ */
+ this_cpu = g->threads[task_nr].curr_cpu;
+ if (this_cpu < g->p.nr_cpus/2)
+ target_cpu = g->p.nr_cpus-1;
+ else
+ target_cpu = 0;
+
+ orig_mask = bind_to_cpu(target_cpu);
+
+ /* Here we are running on the target CPU already */
+ if (details >= 1)
+ printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
+
+ bind_to_cpumask(orig_mask);
+ }
+
+ if (details >= 3) {
+ timersub(&stop, &start, &diff);
+ runtime_ns_max = diff.tv_sec * 1000000000;
+ runtime_ns_max += diff.tv_usec * 1000;
+
+ if (details >= 0) {
+ printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016lx]\n",
+ process_nr, thread_nr, runtime_ns_max / bytes_done, val);
+ }
+ fflush(stdout);
+ }
+ if (!last_task)
+ continue;
+
+ timersub(&stop, &start0, &diff);
+ runtime_ns_max = diff.tv_sec * 1000000000ULL;
+ runtime_ns_max += diff.tv_usec * 1000ULL;
+
+ show_summary(runtime_ns_max, l, &convergence);
+ }
+
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start0, &diff);
+ td->runtime_ns = diff.tv_sec * 1000000000ULL;
+ td->runtime_ns += diff.tv_usec * 1000ULL;
+
+ free_data(thread_data, g->p.bytes_thread);
+
+ pthread_mutex_lock(&g->stop_work_mutex);
+ g->bytes_done += bytes_done;
+ pthread_mutex_unlock(&g->stop_work_mutex);
+
+ return NULL;
+}
+
+/*
+ * A worker process starts a couple of threads:
+ */
+static void worker_process(int process_nr)
+{
+ pthread_mutex_t process_lock;
+ struct thread_data *td;
+ pthread_t *pthreads;
+ u8 *process_data;
+ int task_nr;
+ int ret;
+ int t;
+
+ pthread_mutex_init(&process_lock, NULL);
+ set_taskname("process %d", process_nr);
+
+ /*
+ * Pick up the memory policy and the CPU binding of our first thread,
+ * so that we initialize memory accordingly:
+ */
+ task_nr = process_nr*g->p.nr_threads;
+ td = g->threads + task_nr;
+
+ bind_to_memnode(td->bind_node);
+ bind_to_cpumask(td->bind_cpumask);
+
+ pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t));
+ process_data = setup_private_data(g->p.bytes_process);
+
+ if (g->p.show_details >= 3) {
+ printf(" # process %2d global mem: %p, process mem: %p\n",
+ process_nr, g->data, process_data);
+ }
+
+ for (t = 0; t < g->p.nr_threads; t++) {
+ task_nr = process_nr*g->p.nr_threads + t;
+ td = g->threads + task_nr;
+
+ td->process_data = process_data;
+ td->process_nr = process_nr;
+ td->thread_nr = t;
+ td->task_nr = task_nr;
+ td->val = rand();
+ td->curr_cpu = -1;
+ td->process_lock = &process_lock;
+
+ ret = pthread_create(pthreads + t, NULL, worker_thread, td);
+ BUG_ON(ret);
+ }
+
+ for (t = 0; t < g->p.nr_threads; t++) {
+ ret = pthread_join(pthreads[t], NULL);
+ BUG_ON(ret);
+ }
+
+ free_data(process_data, g->p.bytes_process);
+ free(pthreads);
+}
+
+static void print_summary(void)
+{
+ if (g->p.show_details < 0)
+ return;
+
+ printf("\n ###\n");
+ printf(" # %d %s will execute (on %d nodes, %d CPUs):\n",
+ g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", g->p.nr_nodes, g->p.nr_cpus);
+ printf(" # %5dx %5ldMB global shared mem operations\n",
+ g->p.nr_loops, g->p.bytes_global/1024/1024);
+ printf(" # %5dx %5ldMB process shared mem operations\n",
+ g->p.nr_loops, g->p.bytes_process/1024/1024);
+ printf(" # %5dx %5ldMB thread local mem operations\n",
+ g->p.nr_loops, g->p.bytes_thread/1024/1024);
+
+ printf(" ###\n");
+
+ printf("\n ###\n"); fflush(stdout);
+}
+
+static void init_thread_data(void)
+{
+ ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
+ int t;
+
+ g->threads = zalloc_shared_data(size);
+
+ for (t = 0; t < g->p.nr_tasks; t++) {
+ struct thread_data *td = g->threads + t;
+ int cpu;
+
+ /* Allow all nodes by default: */
+ td->bind_node = -1;
+
+ /* Allow all CPUs by default: */
+ CPU_ZERO(&td->bind_cpumask);
+ for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
+ CPU_SET(cpu, &td->bind_cpumask);
+ }
+}
+
+static void deinit_thread_data(void)
+{
+ ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
+
+ free_data(g->threads, size);
+}
+
+static int init(void)
+{
+ g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0);
+
+ /* Copy over options: */
+ g->p = p0;
+
+ g->p.nr_cpus = numa_num_configured_cpus();
+
+ g->p.nr_nodes = numa_max_node() + 1;
+
+ /* char array in count_process_nodes(): */
+ BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
+
+ if (g->p.show_quiet && !g->p.show_details)
+ g->p.show_details = -1;
+
+ /* Some memory should be specified: */
+ if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str)
+ return -1;
+
+ if (g->p.mb_global_str) {
+ g->p.mb_global = atof(g->p.mb_global_str);
+ BUG_ON(g->p.mb_global < 0);
+ }
+
+ if (g->p.mb_proc_str) {
+ g->p.mb_proc = atof(g->p.mb_proc_str);
+ BUG_ON(g->p.mb_proc < 0);
+ }
+
+ if (g->p.mb_proc_locked_str) {
+ g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str);
+ BUG_ON(g->p.mb_proc_locked < 0);
+ BUG_ON(g->p.mb_proc_locked > g->p.mb_proc);
+ }
+
+ if (g->p.mb_thread_str) {
+ g->p.mb_thread = atof(g->p.mb_thread_str);
+ BUG_ON(g->p.mb_thread < 0);
+ }
+
+ BUG_ON(g->p.nr_threads <= 0);
+ BUG_ON(g->p.nr_proc <= 0);
+
+ g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads;
+
+ g->p.bytes_global = g->p.mb_global *1024L*1024L;
+ g->p.bytes_process = g->p.mb_proc *1024L*1024L;
+ g->p.bytes_process_locked = g->p.mb_proc_locked *1024L*1024L;
+ g->p.bytes_thread = g->p.mb_thread *1024L*1024L;
+
+ g->data = setup_shared_data(g->p.bytes_global);
+
+ /* Startup serialization: */
+ init_global_mutex(&g->start_work_mutex);
+ init_global_mutex(&g->startup_mutex);
+ init_global_mutex(&g->startup_done_mutex);
+ init_global_mutex(&g->stop_work_mutex);
+
+ init_thread_data();
+
+ tprintf("#\n");
+ parse_setup_cpu_list();
+ parse_setup_node_list();
+ tprintf("#\n");
+
+ print_summary();
+
+ return 0;
+}
+
+static void deinit(void)
+{
+ free_data(g->data, g->p.bytes_global);
+ g->data = NULL;
+
+ deinit_thread_data();
+
+ free_data(g, sizeof(*g));
+ g = NULL;
+}
+
+/*
+ * Print a short or long result, depending on the verbosity setting:
+ */
+static void print_res(const char *name, double val,
+ const char *txt_unit, const char *txt_short, const char *txt_long)
+{
+ if (!name)
+ name = "main,";
+
+ if (g->p.show_quiet)
+ printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
+ else
+ printf(" %14.3f %s\n", val, txt_long);
+}
+
+static int __bench_numa(const char *name)
+{
+ struct timeval start, stop, diff;
+ u64 runtime_ns_min, runtime_ns_sum;
+ pid_t *pids, pid, wpid;
+ double delta_runtime;
+ double runtime_avg;
+ double runtime_sec_max;
+ double runtime_sec_min;
+ int wait_stat;
+ double bytes;
+ int i, t;
+
+ if (init())
+ return -1;
+
+ pids = zalloc(g->p.nr_proc * sizeof(*pids));
+ pid = -1;
+
+ /* All threads try to acquire it, this way we can wait for them to start up: */
+ pthread_mutex_lock(&g->start_work_mutex);
+
+ if (g->p.serialize_startup) {
+ tprintf(" #\n");
+ tprintf(" # Startup synchronization: ..."); fflush(stdout);
+ }
+
+ gettimeofday(&start, NULL);
+
+ for (i = 0; i < g->p.nr_proc; i++) {
+ pid = fork();
+ dprintf(" # process %2d: PID %d\n", i, pid);
+
+ BUG_ON(pid < 0);
+ if (!pid) {
+ /* Child process: */
+ worker_process(i);
+
+ exit(0);
+ }
+ pids[i] = pid;
+
+ }
+ /* Wait for all the threads to start up: */
+ while (g->nr_tasks_started != g->p.nr_tasks)
+ usleep(1000);
+
+ BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
+
+ if (g->p.serialize_startup) {
+ double startup_sec;
+
+ pthread_mutex_lock(&g->startup_done_mutex);
+
+ /* This will start all threads: */
+ pthread_mutex_unlock(&g->start_work_mutex);
+
+ /* This mutex is locked - the last started thread will wake us: */
+ pthread_mutex_lock(&g->startup_done_mutex);
+
+ gettimeofday(&stop, NULL);
+
+ timersub(&stop, &start, &diff);
+
+ startup_sec = diff.tv_sec * 1000000000.0;
+ startup_sec += diff.tv_usec * 1000.0;
+ startup_sec /= 1e9;
+
+ tprintf(" threads initialized in %.6f seconds.\n", startup_sec);
+ tprintf(" #\n");
+
+ start = stop;
+ pthread_mutex_unlock(&g->startup_done_mutex);
+ } else {
+ gettimeofday(&start, NULL);
+ }
+
+ /* Parent process: */
+
+
+ for (i = 0; i < g->p.nr_proc; i++) {
+ wpid = waitpid(pids[i], &wait_stat, 0);
+ BUG_ON(wpid < 0);
+ BUG_ON(!WIFEXITED(wait_stat));
+
+ }
+
+ runtime_ns_sum = 0;
+ runtime_ns_min = -1LL;
+
+ for (t = 0; t < g->p.nr_tasks; t++) {
+ u64 thread_runtime_ns = g->threads[t].runtime_ns;
+
+ runtime_ns_sum += thread_runtime_ns;
+ runtime_ns_min = min(thread_runtime_ns, runtime_ns_min);
+ }
+
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+
+ BUG_ON(bench_format != BENCH_FORMAT_DEFAULT);
+
+ tprintf("\n ###\n");
+ tprintf("\n");
+
+ runtime_sec_max = diff.tv_sec * 1000000000.0;
+ runtime_sec_max += diff.tv_usec * 1000.0;
+ runtime_sec_max /= 1e9;
+
+ runtime_sec_min = runtime_ns_min/1e9;
+
+ bytes = g->bytes_done;
+ runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / 1e9;
+
+ if (g->p.measure_convergence) {
+ print_res(name, runtime_sec_max,
+ "secs,", "NUMA-convergence-latency", "secs latency to NUMA-converge");
+ }
+
+ print_res(name, runtime_sec_max,
+ "secs,", "runtime-max/thread", "secs slowest (max) thread-runtime");
+
+ print_res(name, runtime_sec_min,
+ "secs,", "runtime-min/thread", "secs fastest (min) thread-runtime");
+
+ print_res(name, runtime_avg,
+ "secs,", "runtime-avg/thread", "secs average thread-runtime");
+
+ delta_runtime = (runtime_sec_max - runtime_sec_min)/2.0;
+ print_res(name, delta_runtime / runtime_sec_max * 100.0,
+ "%,", "spread-runtime/thread", "% difference between max/avg runtime");
+
+ print_res(name, bytes / g->p.nr_tasks / 1e9,
+ "GB,", "data/thread", "GB data processed, per thread");
+
+ print_res(name, bytes / 1e9,
+ "GB,", "data-total", "GB data processed, total");
+
+ print_res(name, runtime_sec_max * 1e9 / (bytes / g->p.nr_tasks),
+ "nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime");
+
+ print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max,
+ "GB/sec,", "thread-speed", "GB/sec/thread speed");
+
+ print_res(name, bytes / runtime_sec_max / 1e9,
+ "GB/sec,", "total-speed", "GB/sec total speed");
+
+ free(pids);
+
+ deinit();
+
+ return 0;
+}
+
+#define MAX_ARGS 50
+
+static int command_size(const char **argv)
+{
+ int size = 0;
+
+ while (*argv) {
+ size++;
+ argv++;
+ }
+
+ BUG_ON(size >= MAX_ARGS);
+
+ return size;
+}
+
+static void init_params(struct params *p, const char *name, int argc, const char **argv)
+{
+ int i;
+
+ printf("\n # Running %s \"perf bench numa", name);
+
+ for (i = 0; i < argc; i++)
+ printf(" %s", argv[i]);
+
+ printf("\"\n");
+
+ memset(p, 0, sizeof(*p));
+
+ /* Initialize nonzero defaults: */
+
+ p->serialize_startup = 1;
+ p->data_reads = true;
+ p->data_writes = true;
+ p->data_backwards = true;
+ p->data_rand_walk = true;
+ p->nr_loops = -1;
+ p->init_random = true;
+}
+
+static int run_bench_numa(const char *name, const char **argv)
+{
+ int argc = command_size(argv);
+
+ init_params(&p0, name, argc, argv);
+ argc = parse_options(argc, argv, options, bench_numa_usage, 0);
+ if (argc)
+ goto err;
+
+ if (__bench_numa(name))
+ goto err;
+
+ return 0;
+
+err:
+ usage_with_options(numa_usage, options);
+ return -1;
+}
+
+#define OPT_BW_RAM "-s", "20", "-zZq", "--thp", " 1", "--no-data_rand_walk"
+#define OPT_BW_RAM_NOTHP OPT_BW_RAM, "--thp", "-1"
+
+#define OPT_CONV "-s", "100", "-zZ0qcm", "--thp", " 1"
+#define OPT_CONV_NOTHP OPT_CONV, "--thp", "-1"
+
+#define OPT_BW "-s", "20", "-zZ0q", "--thp", " 1"
+#define OPT_BW_NOTHP OPT_BW, "--thp", "-1"
+
+/*
+ * The built-in test-suite executed by "perf bench numa -a".
+ *
+ * (A minimum of 4 nodes and 16 GB of RAM is recommended.)
+ */
+static const char *tests[][MAX_ARGS] = {
+ /* Basic single-stream NUMA bandwidth measurements: */
+ { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ "-C" , "0", "-M", "0", OPT_BW_RAM },
+ { "RAM-bw-local-NOTHP,",
+ "mem", "-p", "1", "-t", "1", "-P", "1024",
+ "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP },
+ { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ "-C" , "0", "-M", "1", OPT_BW_RAM },
+
+ /* 2-stream NUMA bandwidth measurements: */
+ { "RAM-bw-local-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024",
+ "-C", "0,2", "-M", "0x2", OPT_BW_RAM },
+ { "RAM-bw-remote-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024",
+ "-C", "0,2", "-M", "1x2", OPT_BW_RAM },
+
+ /* Cross-stream NUMA bandwidth measurement: */
+ { "RAM-bw-cross,", "mem", "-p", "2", "-t", "1", "-P", "1024",
+ "-C", "0,8", "-M", "1,0", OPT_BW_RAM },
+
+ /* Convergence latency measurements: */
+ { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV },
+ { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV },
+ { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV },
+ { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
+ { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
+ { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV },
+ { " 4x4-convergence-NOTHP,",
+ "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV_NOTHP },
+ { " 4x6-convergence,", "mem", "-p", "4", "-t", "6", "-P", "1020", OPT_CONV },
+ { " 4x8-convergence,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_CONV },
+ { " 8x4-convergence,", "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV },
+ { " 8x4-convergence-NOTHP,",
+ "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV_NOTHP },
+ { " 3x1-convergence,", "mem", "-p", "3", "-t", "1", "-P", "512", OPT_CONV },
+ { " 4x1-convergence,", "mem", "-p", "4", "-t", "1", "-P", "512", OPT_CONV },
+ { " 8x1-convergence,", "mem", "-p", "8", "-t", "1", "-P", "512", OPT_CONV },
+ { "16x1-convergence,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_CONV },
+ { "32x1-convergence,", "mem", "-p", "32", "-t", "1", "-P", "128", OPT_CONV },
+
+ /* Various NUMA process/thread layout bandwidth measurements: */
+ { " 2x1-bw-process,", "mem", "-p", "2", "-t", "1", "-P", "1024", OPT_BW },
+ { " 3x1-bw-process,", "mem", "-p", "3", "-t", "1", "-P", "1024", OPT_BW },
+ { " 4x1-bw-process,", "mem", "-p", "4", "-t", "1", "-P", "1024", OPT_BW },
+ { " 8x1-bw-process,", "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW },
+ { " 8x1-bw-process-NOTHP,",
+ "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP },
+ { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW },
+
+ { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
+ { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
+ { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
+ { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
+
+ { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
+ { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
+ { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
+ { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
+ { " 4x8-bw-thread-NOTHP,",
+ "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP },
+ { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
+ { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
+
+ { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
+ { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
+
+ { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
+ { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP },
+ { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW },
+ { "numa01-bw-thread-NOTHP,",
+ "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW_NOTHP },
+};
+
+static int bench_all(void)
+{
+ int nr = ARRAY_SIZE(tests);
+ int ret;
+ int i;
+
+ ret = system("echo ' #'; echo ' # Running test on: '$(uname -a); echo ' #'");
+ BUG_ON(ret < 0);
+
+ for (i = 0; i < nr; i++) {
+ if (run_bench_numa(tests[i][0], tests[i] + 1))
+ return -1;
+ }
+
+ printf("\n");
+
+ return 0;
+}
+
+int bench_numa(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+ init_params(&p0, "main,", argc, argv);
+ argc = parse_options(argc, argv, options, bench_numa_usage, 0);
+ if (argc)
+ goto err;
+
+ if (p0.run_all)
+ return bench_all();
+
+ if (__bench_numa(NULL))
+ goto err;
+
+ return 0;
+
+err:
+ usage_with_options(numa_usage, options);
+ return -1;
+}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index dc870cf..2e6961e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -34,9 +34,10 @@
struct perf_annotate {
struct perf_tool tool;
- bool force, use_tui, use_stdio;
+ bool force, use_tui, use_stdio, use_gtk;
bool full_paths;
bool print_line;
+ bool skip_missing;
const char *sym_hist_filter;
const char *cpu_list;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -138,9 +139,22 @@ find_next:
continue;
}
- if (use_browser > 0) {
+ if (use_browser == 2) {
+ int ret;
+
+ ret = hist_entry__gtk_annotate(he, evidx, NULL);
+ if (!ret || !ann->skip_missing)
+ return;
+
+ /* skip missing symbols */
+ nd = rb_next(nd);
+ } else if (use_browser == 1) {
key = hist_entry__tui_annotate(he, evidx, NULL);
switch (key) {
+ case -1:
+ if (!ann->skip_missing)
+ return;
+ /* fall through */
case K_RIGHT:
next = rb_next(nd);
break;
@@ -224,6 +238,10 @@ static int __cmd_annotate(struct perf_annotate *ann)
ui__error("The %s file has no samples!\n", session->filename);
goto out_delete;
}
+
+ if (use_browser == 2)
+ perf_gtk__show_annotations();
+
out_delete:
/*
* Speed up the exit process, for large files this can
@@ -270,6 +288,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
"be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
+ OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
@@ -280,6 +299,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
"print matching source lines (may be slow)"),
OPT_BOOLEAN('P', "full-paths", &annotate.full_paths,
"Don't shorten the displayed pathnames"),
+ OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
+ "Skip symbols that cannot be annotated"),
OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
@@ -300,6 +321,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
use_browser = 0;
else if (annotate.use_tui)
use_browser = 1;
+ else if (annotate.use_gtk)
+ use_browser = 2;
setup_browser(true);
@@ -309,7 +332,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
if (symbol__init() < 0)
return -1;
- setup_sorting(annotate_usage, options);
+ if (setup_sorting() < 0)
+ usage_with_options(annotate_usage, options);
if (argc) {
/*
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index cae9a5f..77298bf 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -35,6 +35,18 @@ struct bench_suite {
/* sentinel: easy for help */
#define suite_all { "all", "Test all benchmark suites", NULL }
+#ifdef LIBNUMA_SUPPORT
+static struct bench_suite numa_suites[] = {
+ { "mem",
+ "Benchmark for NUMA workloads",
+ bench_numa },
+ suite_all,
+ { NULL,
+ NULL,
+ NULL }
+};
+#endif
+
static struct bench_suite sched_suites[] = {
{ "messaging",
"Benchmark for scheduler and IPC mechanisms",
@@ -68,6 +80,11 @@ struct bench_subsys {
};
static struct bench_subsys subsystems[] = {
+#ifdef LIBNUMA_SUPPORT
+ { "numa",
+ "NUMA scheduling and MM behavior",
+ numa_suites },
+#endif
{ "sched",
"scheduler and IPC mechanism",
sched_suites },
@@ -159,6 +176,7 @@ static void all_suite(struct bench_subsys *subsys) /* FROM HERE */
printf("# Running %s/%s benchmark...\n",
subsys->name,
suites[i].name);
+ fflush(stdout);
argv[1] = suites[i].name;
suites[i].fn(1, argv, NULL);
@@ -225,6 +243,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
printf("# Running %s/%s benchmark...\n",
subsystems[i].name,
subsystems[i].suites[j].name);
+ fflush(stdout);
status = subsystems[i].suites[j].fn(argc - 1,
argv + 1, prefix);
goto end;
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index fae8b25..c96c8fa 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -14,6 +14,7 @@
#include "util/parse-options.h"
#include "util/strlist.h"
#include "util/build-id.h"
+#include "util/session.h"
#include "util/symbol.h"
static int build_id_cache__add_file(const char *filename, const char *debugdir)
@@ -58,19 +59,89 @@ static int build_id_cache__remove_file(const char *filename,
return err;
}
+static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
+{
+ char filename[PATH_MAX];
+ u8 build_id[BUILD_ID_SIZE];
+
+ if (dso__build_id_filename(dso, filename, sizeof(filename)) &&
+ filename__read_build_id(filename, build_id,
+ sizeof(build_id)) != sizeof(build_id)) {
+ if (errno == ENOENT)
+ return false;
+
+ pr_warning("Problems with %s file, consider removing it from the cache\n",
+ filename);
+ } else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) {
+ pr_warning("Problems with %s file, consider removing it from the cache\n",
+ filename);
+ }
+
+ return true;
+}
+
+static int build_id_cache__fprintf_missing(const char *filename, bool force, FILE *fp)
+{
+ struct perf_session *session = perf_session__new(filename, O_RDONLY,
+ force, false, NULL);
+ if (session == NULL)
+ return -1;
+
+ perf_session__fprintf_dsos_buildid(session, fp, dso__missing_buildid_cache, 0);
+ perf_session__delete(session);
+
+ return 0;
+}
+
+static int build_id_cache__update_file(const char *filename,
+ const char *debugdir)
+{
+ u8 build_id[BUILD_ID_SIZE];
+ char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+ int err;
+
+ if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) {
+ pr_debug("Couldn't read a build-id in %s\n", filename);
+ return -1;
+ }
+
+ build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ err = build_id_cache__remove_s(sbuild_id, debugdir);
+ if (!err) {
+ err = build_id_cache__add_s(sbuild_id, debugdir, filename,
+ false, false);
+ }
+ if (verbose)
+ pr_info("Updating %s %s: %s\n", sbuild_id, filename,
+ err ? "FAIL" : "Ok");
+
+ return err;
+}
+
int cmd_buildid_cache(int argc, const char **argv,
const char *prefix __maybe_unused)
{
struct strlist *list;
struct str_node *pos;
+ int ret = 0;
+ bool force = false;
char debugdir[PATH_MAX];
char const *add_name_list_str = NULL,
- *remove_name_list_str = NULL;
+ *remove_name_list_str = NULL,
+ *missing_filename = NULL,
+ *update_name_list_str = NULL;
+
const struct option buildid_cache_options[] = {
OPT_STRING('a', "add", &add_name_list_str,
"file list", "file(s) to add"),
OPT_STRING('r', "remove", &remove_name_list_str, "file list",
"file(s) to remove"),
+ OPT_STRING('M', "missing", &missing_filename, "file",
+ "to find missing build ids in the cache"),
+ OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+ OPT_STRING('u', "update", &update_name_list_str, "file list",
+ "file(s) to update"),
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_END()
};
@@ -125,5 +196,26 @@ int cmd_buildid_cache(int argc, const char **argv,
}
}
- return 0;
+ if (missing_filename)
+ ret = build_id_cache__fprintf_missing(missing_filename, force, stdout);
+
+ if (update_name_list_str) {
+ list = strlist__new(true, update_name_list_str);
+ if (list) {
+ strlist__for_each(pos, list)
+ if (build_id_cache__update_file(pos->s, debugdir)) {
+ if (errno == ENOENT) {
+ pr_debug("%s wasn't in the cache\n",
+ pos->s);
+ continue;
+ }
+ pr_warning("Couldn't update %s: %s\n",
+ pos->s, strerror(errno));
+ }
+
+ strlist__delete(list);
+ }
+ }
+
+ return ret;
}
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index a82d99f..e74366a 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -44,23 +44,26 @@ static int filename__fprintf_build_id(const char *name, FILE *fp)
return fprintf(fp, "%s\n", sbuild_id);
}
+static bool dso__skip_buildid(struct dso *dso, int with_hits)
+{
+ return with_hits && !dso->hit;
+}
+
static int perf_session__list_build_ids(bool force, bool with_hits)
{
struct perf_session *session;
symbol__elf_init();
-
- session = perf_session__new(input_name, O_RDONLY, force, false,
- &build_id__mark_dso_hit_ops);
- if (session == NULL)
- return -1;
-
/*
* See if this is an ELF file first:
*/
- if (filename__fprintf_build_id(session->filename, stdout))
+ if (filename__fprintf_build_id(input_name, stdout))
goto out;
+ session = perf_session__new(input_name, O_RDONLY, force, false,
+ &build_id__mark_dso_hit_ops);
+ if (session == NULL)
+ return -1;
/*
* in pipe-mode, the only way to get the buildids is to parse
* the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
@@ -68,9 +71,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
if (with_hits || session->fd_pipe)
perf_session__process_events(session, &build_id__mark_dso_hit_ops);
- perf_session__fprintf_dsos_buildid(session, stdout, with_hits);
-out:
+ perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits);
perf_session__delete(session);
+out:
return 0;
}
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 93b852f..d207a97 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -23,7 +23,6 @@ static char const *input_old = "perf.data.old",
*input_new = "perf.data";
static char diff__default_sort_order[] = "dso,symbol";
static bool force;
-static bool show_displacement;
static bool show_period;
static bool show_formula;
static bool show_baseline_only;
@@ -146,58 +145,47 @@ static int setup_compute(const struct option *opt, const char *str,
return -EINVAL;
}
-static double get_period_percent(struct hist_entry *he, u64 period)
+double perf_diff__period_percent(struct hist_entry *he, u64 period)
{
u64 total = he->hists->stats.total_period;
return (period * 100.0) / total;
}
-double perf_diff__compute_delta(struct hist_entry *he)
+double perf_diff__compute_delta(struct hist_entry *he, struct hist_entry *pair)
{
- struct hist_entry *pair = hist_entry__next_pair(he);
- double new_percent = get_period_percent(he, he->stat.period);
- double old_percent = pair ? get_period_percent(pair, pair->stat.period) : 0.0;
+ double new_percent = perf_diff__period_percent(he, he->stat.period);
+ double old_percent = perf_diff__period_percent(pair, pair->stat.period);
he->diff.period_ratio_delta = new_percent - old_percent;
he->diff.computed = true;
return he->diff.period_ratio_delta;
}
-double perf_diff__compute_ratio(struct hist_entry *he)
+double perf_diff__compute_ratio(struct hist_entry *he, struct hist_entry *pair)
{
- struct hist_entry *pair = hist_entry__next_pair(he);
double new_period = he->stat.period;
- double old_period = pair ? pair->stat.period : 0;
+ double old_period = pair->stat.period;
he->diff.computed = true;
- he->diff.period_ratio = pair ? (new_period / old_period) : 0;
+ he->diff.period_ratio = new_period / old_period;
return he->diff.period_ratio;
}
-s64 perf_diff__compute_wdiff(struct hist_entry *he)
+s64 perf_diff__compute_wdiff(struct hist_entry *he, struct hist_entry *pair)
{
- struct hist_entry *pair = hist_entry__next_pair(he);
u64 new_period = he->stat.period;
- u64 old_period = pair ? pair->stat.period : 0;
+ u64 old_period = pair->stat.period;
he->diff.computed = true;
-
- if (!pair)
- he->diff.wdiff = 0;
- else
- he->diff.wdiff = new_period * compute_wdiff_w2 -
- old_period * compute_wdiff_w1;
+ he->diff.wdiff = new_period * compute_wdiff_w2 -
+ old_period * compute_wdiff_w1;
return he->diff.wdiff;
}
-static int formula_delta(struct hist_entry *he, char *buf, size_t size)
+static int formula_delta(struct hist_entry *he, struct hist_entry *pair,
+ char *buf, size_t size)
{
- struct hist_entry *pair = hist_entry__next_pair(he);
-
- if (!pair)
- return -1;
-
return scnprintf(buf, size,
"(%" PRIu64 " * 100 / %" PRIu64 ") - "
"(%" PRIu64 " * 100 / %" PRIu64 ")",
@@ -205,41 +193,36 @@ static int formula_delta(struct hist_entry *he, char *buf, size_t size)
pair->stat.period, pair->hists->stats.total_period);
}
-static int formula_ratio(struct hist_entry *he, char *buf, size_t size)
+static int formula_ratio(struct hist_entry *he, struct hist_entry *pair,
+ char *buf, size_t size)
{
- struct hist_entry *pair = hist_entry__next_pair(he);
double new_period = he->stat.period;
- double old_period = pair ? pair->stat.period : 0;
-
- if (!pair)
- return -1;
+ double old_period = pair->stat.period;
return scnprintf(buf, size, "%.0F / %.0F", new_period, old_period);
}
-static int formula_wdiff(struct hist_entry *he, char *buf, size_t size)
+static int formula_wdiff(struct hist_entry *he, struct hist_entry *pair,
+ char *buf, size_t size)
{
- struct hist_entry *pair = hist_entry__next_pair(he);
u64 new_period = he->stat.period;
- u64 old_period = pair ? pair->stat.period : 0;
-
- if (!pair)
- return -1;
+ u64 old_period = pair->stat.period;
return scnprintf(buf, size,
"(%" PRIu64 " * " "%" PRId64 ") - (%" PRIu64 " * " "%" PRId64 ")",
new_period, compute_wdiff_w2, old_period, compute_wdiff_w1);
}
-int perf_diff__formula(char *buf, size_t size, struct hist_entry *he)
+int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair,
+ char *buf, size_t size)
{
switch (compute) {
case COMPUTE_DELTA:
- return formula_delta(he, buf, size);
+ return formula_delta(he, pair, buf, size);
case COMPUTE_RATIO:
- return formula_ratio(he, buf, size);
+ return formula_ratio(he, pair, buf, size);
case COMPUTE_WEIGHTED_DIFF:
- return formula_wdiff(he, buf, size);
+ return formula_wdiff(he, pair, buf, size);
default:
BUG_ON(1);
}
@@ -292,48 +275,6 @@ static struct perf_tool tool = {
.ordering_requires_timestamps = true,
};
-static void insert_hist_entry_by_name(struct rb_root *root,
- struct hist_entry *he)
-{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct hist_entry *iter;
-
- while (*p != NULL) {
- parent = *p;
- iter = rb_entry(parent, struct hist_entry, rb_node);
- if (hist_entry__cmp(he, iter) < 0)
- p = &(*p)->rb_left;
- else
- p = &(*p)->rb_right;
- }
-
- rb_link_node(&he->rb_node, parent, p);
- rb_insert_color(&he->rb_node, root);
-}
-
-static void hists__name_resort(struct hists *self, bool sort)
-{
- unsigned long position = 1;
- struct rb_root tmp = RB_ROOT;
- struct rb_node *next = rb_first(&self->entries);
-
- while (next != NULL) {
- struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node);
-
- next = rb_next(&n->rb_node);
- n->position = position++;
-
- if (sort) {
- rb_erase(&n->rb_node, &self->entries);
- insert_hist_entry_by_name(&tmp, n);
- }
- }
-
- if (sort)
- self->entries = tmp;
-}
-
static struct perf_evsel *evsel_match(struct perf_evsel *evsel,
struct perf_evlist *evlist)
{
@@ -346,34 +287,34 @@ static struct perf_evsel *evsel_match(struct perf_evsel *evsel,
return NULL;
}
-static void perf_evlist__resort_hists(struct perf_evlist *evlist, bool name)
+static void perf_evlist__collapse_resort(struct perf_evlist *evlist)
{
struct perf_evsel *evsel;
list_for_each_entry(evsel, &evlist->entries, node) {
struct hists *hists = &evsel->hists;
- hists__output_resort(hists);
-
- /*
- * The hists__name_resort only sets possition
- * if name is false.
- */
- if (name || ((!name) && show_displacement))
- hists__name_resort(hists, name);
+ hists__collapse_resort(hists);
}
}
static void hists__baseline_only(struct hists *hists)
{
- struct rb_node *next = rb_first(&hists->entries);
+ struct rb_root *root;
+ struct rb_node *next;
+ if (sort__need_collapse)
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ next = rb_first(root);
while (next != NULL) {
- struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node);
+ struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in);
- next = rb_next(&he->rb_node);
+ next = rb_next(&he->rb_node_in);
if (!hist_entry__next_pair(he)) {
- rb_erase(&he->rb_node, &hists->entries);
+ rb_erase(&he->rb_node_in, root);
hist_entry__free(he);
}
}
@@ -385,18 +326,21 @@ static void hists__precompute(struct hists *hists)
while (next != NULL) {
struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node);
+ struct hist_entry *pair = hist_entry__next_pair(he);
next = rb_next(&he->rb_node);
+ if (!pair)
+ continue;
switch (compute) {
case COMPUTE_DELTA:
- perf_diff__compute_delta(he);
+ perf_diff__compute_delta(he, pair);
break;
case COMPUTE_RATIO:
- perf_diff__compute_ratio(he);
+ perf_diff__compute_ratio(he, pair);
break;
case COMPUTE_WEIGHTED_DIFF:
- perf_diff__compute_wdiff(he);
+ perf_diff__compute_wdiff(he, pair);
break;
default:
BUG_ON(1);
@@ -470,19 +414,30 @@ static void insert_hist_entry_by_compute(struct rb_root *root,
static void hists__compute_resort(struct hists *hists)
{
- struct rb_root tmp = RB_ROOT;
- struct rb_node *next = rb_first(&hists->entries);
+ struct rb_root *root;
+ struct rb_node *next;
+
+ if (sort__need_collapse)
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ hists->entries = RB_ROOT;
+ next = rb_first(root);
+
+ hists->nr_entries = 0;
+ hists->stats.total_period = 0;
+ hists__reset_col_len(hists);
while (next != NULL) {
- struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node);
+ struct hist_entry *he;
- next = rb_next(&he->rb_node);
+ he = rb_entry(next, struct hist_entry, rb_node_in);
+ next = rb_next(&he->rb_node_in);
- rb_erase(&he->rb_node, &hists->entries);
- insert_hist_entry_by_compute(&tmp, he, compute);
+ insert_hist_entry_by_compute(&hists->entries, he, compute);
+ hists__inc_nr_entries(hists, he);
}
-
- hists->entries = tmp;
}
static void hists__process(struct hists *old, struct hists *new)
@@ -497,6 +452,8 @@ static void hists__process(struct hists *old, struct hists *new)
if (sort_compute) {
hists__precompute(new);
hists__compute_resort(new);
+ } else {
+ hists__output_resort(new);
}
hists__fprintf(new, true, 0, 0, stdout);
@@ -528,8 +485,8 @@ static int __cmd_diff(void)
evlist_old = older->evlist;
evlist_new = newer->evlist;
- perf_evlist__resort_hists(evlist_old, true);
- perf_evlist__resort_hists(evlist_new, false);
+ perf_evlist__collapse_resort(evlist_old);
+ perf_evlist__collapse_resort(evlist_new);
list_for_each_entry(evsel, &evlist_new->entries, node) {
struct perf_evsel *evsel_old;
@@ -562,8 +519,6 @@ static const char * const diff_usage[] = {
static const struct option options[] = {
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
- OPT_BOOLEAN('M', "displacement", &show_displacement,
- "Show position displacement relative to baseline"),
OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
"Show only items with match in baseline"),
OPT_CALLBACK('c', "compute", &compute,
@@ -597,40 +552,32 @@ static const struct option options[] = {
static void ui_init(void)
{
- perf_hpp__init();
-
- /* No overhead column. */
- perf_hpp__column_enable(PERF_HPP__OVERHEAD, false);
-
/*
- * Display baseline/delta/ratio/displacement/
+ * Display baseline/delta/ratio
* formula/periods columns.
*/
- perf_hpp__column_enable(PERF_HPP__BASELINE, true);
+ perf_hpp__column_enable(PERF_HPP__BASELINE);
switch (compute) {
case COMPUTE_DELTA:
- perf_hpp__column_enable(PERF_HPP__DELTA, true);
+ perf_hpp__column_enable(PERF_HPP__DELTA);
break;
case COMPUTE_RATIO:
- perf_hpp__column_enable(PERF_HPP__RATIO, true);
+ perf_hpp__column_enable(PERF_HPP__RATIO);
break;
case COMPUTE_WEIGHTED_DIFF:
- perf_hpp__column_enable(PERF_HPP__WEIGHTED_DIFF, true);
+ perf_hpp__column_enable(PERF_HPP__WEIGHTED_DIFF);
break;
default:
BUG_ON(1);
};
- if (show_displacement)
- perf_hpp__column_enable(PERF_HPP__DISPL, true);
-
if (show_formula)
- perf_hpp__column_enable(PERF_HPP__FORMULA, true);
+ perf_hpp__column_enable(PERF_HPP__FORMULA);
if (show_period) {
- perf_hpp__column_enable(PERF_HPP__PERIOD, true);
- perf_hpp__column_enable(PERF_HPP__PERIOD_BASELINE, true);
+ perf_hpp__column_enable(PERF_HPP__PERIOD);
+ perf_hpp__column_enable(PERF_HPP__PERIOD_BASELINE);
}
}
@@ -658,7 +605,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
ui_init();
- setup_sorting(diff_usage, options);
+ if (setup_sorting() < 0)
+ usage_with_options(diff_usage, options);
+
setup_pager();
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", NULL);
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index c20f1dc..05bd9df 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -15,39 +15,6 @@
#include "util/parse-options.h"
#include "util/session.h"
-struct perf_attr_details {
- bool freq;
- bool verbose;
-};
-
-static int comma_printf(bool *first, const char *fmt, ...)
-{
- va_list args;
- int ret = 0;
-
- if (!*first) {
- ret += printf(",");
- } else {
- ret += printf(":");
- *first = false;
- }
-
- va_start(args, fmt);
- ret += vprintf(fmt, args);
- va_end(args);
- return ret;
-}
-
-static int __if_print(bool *first, const char *field, u64 value)
-{
- if (value == 0)
- return 0;
-
- return comma_printf(first, " %s: %" PRIu64, field, value);
-}
-
-#define if_print(field) __if_print(&first, #field, pos->attr.field)
-
static int __cmd_evlist(const char *file_name, struct perf_attr_details *details)
{
struct perf_session *session;
@@ -57,52 +24,8 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
if (session == NULL)
return -ENOMEM;
- list_for_each_entry(pos, &session->evlist->entries, node) {
- bool first = true;
-
- printf("%s", perf_evsel__name(pos));
-
- if (details->verbose || details->freq) {
- comma_printf(&first, " sample_freq=%" PRIu64,
- (u64)pos->attr.sample_freq);
- }
-
- if (details->verbose) {
- if_print(type);
- if_print(config);
- if_print(config1);
- if_print(config2);
- if_print(size);
- if_print(sample_type);
- if_print(read_format);
- if_print(disabled);
- if_print(inherit);
- if_print(pinned);
- if_print(exclusive);
- if_print(exclude_user);
- if_print(exclude_kernel);
- if_print(exclude_hv);
- if_print(exclude_idle);
- if_print(mmap);
- if_print(comm);
- if_print(freq);
- if_print(inherit_stat);
- if_print(enable_on_exec);
- if_print(task);
- if_print(watermark);
- if_print(precise_ip);
- if_print(mmap_data);
- if_print(sample_id_all);
- if_print(exclude_host);
- if_print(exclude_guest);
- if_print(__reserved_1);
- if_print(wakeup_events);
- if_print(bp_type);
- if_print(branch_sample_type);
- }
-
- putchar('\n');
- }
+ list_for_each_entry(pos, &session->evlist->entries, node)
+ perf_evsel__fprintf(pos, details, stdout);
perf_session__delete(session);
return 0;
@@ -116,6 +39,8 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('F', "freq", &details.freq, "Show the sample frequency"),
OPT_BOOLEAN('v', "verbose", &details.verbose,
"Show all event attr details"),
+ OPT_BOOLEAN('g', "group", &details.event_group,
+ "Show event group information"),
OPT_END()
};
const char * const evlist_usage[] = {
@@ -127,5 +52,10 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
if (argc)
usage_with_options(evlist_usage, options);
+ if (details.event_group && (details.verbose || details.freq)) {
+ pr_err("--group option is not compatible with other options\n");
+ usage_with_options(evlist_usage, options);
+ }
+
return __cmd_evlist(input_name, &details);
}
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 0b4b796..46878da 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -17,6 +17,7 @@
#include "util/debug.h"
#include <linux/rbtree.h>
+#include <linux/string.h>
struct alloc_stat;
typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
@@ -340,7 +341,7 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
int n_lines, int is_caller)
{
struct rb_node *next;
- struct machine *machine;
+ struct machine *machine = &session->machines.host;
printf("%.102s\n", graph_dotted_line);
printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
@@ -349,11 +350,6 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
next = rb_first(root);
- machine = perf_session__find_host_machine(session);
- if (!machine) {
- pr_err("__print_result: couldn't find kernel information\n");
- return;
- }
while (next && n_lines--) {
struct alloc_stat *data = rb_entry(next, struct alloc_stat,
node);
@@ -614,8 +610,7 @@ static struct sort_dimension *avail_sorts[] = {
&pingpong_sort_dimension,
};
-#define NUM_AVAIL_SORTS \
- (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *))
+#define NUM_AVAIL_SORTS ((int)ARRAY_SIZE(avail_sorts))
static int sort_dimension__add(const char *tok, struct list_head *list)
{
@@ -624,12 +619,11 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
for (i = 0; i < NUM_AVAIL_SORTS; i++) {
if (!strcmp(avail_sorts[i]->name, tok)) {
- sort = malloc(sizeof(*sort));
+ sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i]));
if (!sort) {
- pr_err("%s: malloc failed\n", __func__);
+ pr_err("%s: memdup failed\n", __func__);
return -1;
}
- memcpy(sort, avail_sorts[i], sizeof(*sort));
list_add_tail(&sort->list, list);
return 0;
}
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index ca3f80e..37a769d 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -973,8 +973,7 @@ __cmd_buildid_list(const char *file_name, int argc, const char **argv)
int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused)
{
- const char *file_name;
-
+ const char *file_name = NULL;
const struct option kvm_options[] = {
OPT_STRING('i', "input", &file_name, "file",
"Input file name"),
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f3151d3..774c907 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -224,130 +224,28 @@ static bool perf_evlist__equal(struct perf_evlist *evlist,
static int perf_record__open(struct perf_record *rec)
{
+ char msg[512];
struct perf_evsel *pos;
struct perf_evlist *evlist = rec->evlist;
struct perf_session *session = rec->session;
struct perf_record_opts *opts = &rec->opts;
int rc = 0;
- /*
- * Set the evsel leader links before we configure attributes,
- * since some might depend on this info.
- */
- if (opts->group)
- perf_evlist__set_leader(evlist);
-
- perf_evlist__config_attrs(evlist, opts);
+ perf_evlist__config(evlist, opts);
list_for_each_entry(pos, &evlist->entries, node) {
- struct perf_event_attr *attr = &pos->attr;
- /*
- * Check if parse_single_tracepoint_event has already asked for
- * PERF_SAMPLE_TIME.
- *
- * XXX this is kludgy but short term fix for problems introduced by
- * eac23d1c that broke 'perf script' by having different sample_types
- * when using multiple tracepoint events when we use a perf binary
- * that tries to use sample_id_all on an older kernel.
- *
- * We need to move counter creation to perf_session, support
- * different sample_types, etc.
- */
- bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
-
-fallback_missing_features:
- if (opts->exclude_guest_missing)
- attr->exclude_guest = attr->exclude_host = 0;
-retry_sample_id:
- attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
try_again:
if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
- int err = errno;
-
- if (err == EPERM || err == EACCES) {
- ui__error_paranoid();
- rc = -err;
- goto out;
- } else if (err == ENODEV && opts->target.cpu_list) {
- pr_err("No such device - did you specify"
- " an out-of-range profile CPU?\n");
- rc = -err;
- goto out;
- } else if (err == EINVAL) {
- if (!opts->exclude_guest_missing &&
- (attr->exclude_guest || attr->exclude_host)) {
- pr_debug("Old kernel, cannot exclude "
- "guest or host samples.\n");
- opts->exclude_guest_missing = true;
- goto fallback_missing_features;
- } else if (!opts->sample_id_all_missing) {
- /*
- * Old kernel, no attr->sample_id_type_all field
- */
- opts->sample_id_all_missing = true;
- if (!opts->sample_time && !opts->raw_samples && !time_needed)
- attr->sample_type &= ~PERF_SAMPLE_TIME;
-
- goto retry_sample_id;
- }
- }
-
- /*
- * If it's cycles then fall back to hrtimer
- * based cpu-clock-tick sw counter, which
- * is always available even if no PMU support.
- *
- * PPC returns ENXIO until 2.6.37 (behavior changed
- * with commit b0a873e).
- */
- if ((err == ENOENT || err == ENXIO)
- && attr->type == PERF_TYPE_HARDWARE
- && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
-
+ if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
if (verbose)
- ui__warning("The cycles event is not supported, "
- "trying to fall back to cpu-clock-ticks\n");
- attr->type = PERF_TYPE_SOFTWARE;
- attr->config = PERF_COUNT_SW_CPU_CLOCK;
- if (pos->name) {
- free(pos->name);
- pos->name = NULL;
- }
+ ui__warning("%s\n", msg);
goto try_again;
}
- if (err == ENOENT) {
- ui__error("The %s event is not supported.\n",
- perf_evsel__name(pos));
- rc = -err;
- goto out;
- } else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
- ui__error("\'precise\' request may not be supported. "
- "Try removing 'p' modifier\n");
- rc = -err;
- goto out;
- }
-
- printf("\n");
- error("sys_perf_event_open() syscall returned with %d "
- "(%s) for event %s. /bin/dmesg may provide "
- "additional information.\n",
- err, strerror(err), perf_evsel__name(pos));
-
-#if defined(__i386__) || defined(__x86_64__)
- if (attr->type == PERF_TYPE_HARDWARE &&
- err == EOPNOTSUPP) {
- pr_err("No hardware sampling interrupt available."
- " No APIC? If so then you can boot the kernel"
- " with the \"lapic\" boot parameter to"
- " force-enable it.\n");
- rc = -err;
- goto out;
- }
-#endif
-
- pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
- rc = -err;
+ rc = -errno;
+ perf_evsel__open_strerror(pos, &opts->target,
+ errno, msg, sizeof(msg));
+ ui__error("%s\n", msg);
goto out;
}
}
@@ -430,10 +328,6 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
{
int err;
struct perf_tool *tool = data;
-
- if (machine__is_host(machine))
- return;
-
/*
*As for guest kernel when processing subcommand record&report,
*we arrange module mmap prior to guest kernel mmap and trigger
@@ -592,6 +486,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
goto out_delete_session;
}
+ if (!evsel_list->nr_groups)
+ perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
+
/*
* perf_session__delete(session) will be called at perf_record__exit()
*/
@@ -618,12 +515,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
- machine = perf_session__find_host_machine(session);
- if (!machine) {
- pr_err("Couldn't find native kernel information.\n");
- err = -1;
- goto out_delete_session;
- }
+ machine = &session->machines.host;
if (opts->pipe_output) {
err = perf_event__synthesize_attrs(tool, session,
@@ -676,9 +568,10 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
"Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
"Check /proc/modules permission or run as root.\n");
- if (perf_guest)
- perf_session__process_machines(session, tool,
- perf_event__synthesize_guest_os);
+ if (perf_guest) {
+ machines__process_guests(&session->machines,
+ perf_event__synthesize_guest_os, tool);
+ }
if (!opts->target.system_wide)
err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
@@ -875,11 +768,10 @@ static int get_stack_size(char *str, unsigned long *_size)
}
#endif /* LIBUNWIND_SUPPORT */
-static int
-parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
- int unset)
+int record_parse_callchain_opt(const struct option *opt,
+ const char *arg, int unset)
{
- struct perf_record *rec = (struct perf_record *)opt->value;
+ struct perf_record_opts *opts = opt->value;
char *tok, *name, *saveptr = NULL;
char *buf;
int ret = -1;
@@ -905,7 +797,7 @@ parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
/* Framepointer style */
if (!strncmp(name, "fp", sizeof("fp"))) {
if (!strtok_r(NULL, ",", &saveptr)) {
- rec->opts.call_graph = CALLCHAIN_FP;
+ opts->call_graph = CALLCHAIN_FP;
ret = 0;
} else
pr_err("callchain: No more arguments "
@@ -918,20 +810,20 @@ parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
const unsigned long default_stack_dump_size = 8192;
ret = 0;
- rec->opts.call_graph = CALLCHAIN_DWARF;
- rec->opts.stack_dump_size = default_stack_dump_size;
+ opts->call_graph = CALLCHAIN_DWARF;
+ opts->stack_dump_size = default_stack_dump_size;
tok = strtok_r(NULL, ",", &saveptr);
if (tok) {
unsigned long size = 0;
ret = get_stack_size(tok, &size);
- rec->opts.stack_dump_size = size;
+ opts->stack_dump_size = size;
}
if (!ret)
pr_debug("callchain: stack dump size %d\n",
- rec->opts.stack_dump_size);
+ opts->stack_dump_size);
#endif /* LIBUNWIND_SUPPORT */
} else {
pr_err("callchain: Unknown -g option "
@@ -944,7 +836,7 @@ parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
free(buf);
if (!ret)
- pr_debug("callchain: type %d\n", rec->opts.call_graph);
+ pr_debug("callchain: type %d\n", opts->call_graph);
return ret;
}
@@ -982,9 +874,9 @@ static struct perf_record record = {
#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
#ifdef LIBUNWIND_SUPPORT
-static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
+const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
#else
-static const char callchain_help[] = CALLCHAIN_HELP "[fp]";
+const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
#endif
/*
@@ -1028,9 +920,9 @@ const struct option record_options[] = {
"number of mmap data pages"),
OPT_BOOLEAN(0, "group", &record.opts.group,
"put the counters into a counter group"),
- OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
- callchain_help, &parse_callchain_opt,
- "fp"),
+ OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
+ "mode[,dump_size]", record_callchain_help,
+ &record_parse_callchain_opt, "fp"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index fc25100..96b5a7f 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -8,6 +8,7 @@
#include "builtin.h"
#include "util/util.h"
+#include "util/cache.h"
#include "util/annotate.h"
#include "util/color.h"
@@ -54,6 +55,16 @@ struct perf_report {
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
+static int perf_report_config(const char *var, const char *value, void *cb)
+{
+ if (!strcmp(var, "report.group")) {
+ symbol_conf.event_group = perf_config_bool(var, value);
+ return 0;
+ }
+
+ return perf_default_config(var, value, cb);
+}
+
static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
struct addr_location *al,
struct perf_sample *sample,
@@ -299,6 +310,21 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self,
char unit;
unsigned long nr_samples = self->stats.nr_events[PERF_RECORD_SAMPLE];
u64 nr_events = self->stats.total_period;
+ struct perf_evsel *evsel = hists_to_evsel(self);
+ char buf[512];
+ size_t size = sizeof(buf);
+
+ if (symbol_conf.event_group && evsel->nr_members > 1) {
+ struct perf_evsel *pos;
+
+ perf_evsel__group_desc(evsel, buf, size);
+ evname = buf;
+
+ for_each_group_member(pos, evsel) {
+ nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
+ nr_events += pos->hists.stats.total_period;
+ }
+ }
nr_samples = convert_unit(nr_samples, &unit);
ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit);
@@ -319,6 +345,10 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
struct hists *hists = &pos->hists;
const char *evname = perf_evsel__name(pos);
+ if (symbol_conf.event_group &&
+ !perf_evsel__is_group_leader(pos))
+ continue;
+
hists__fprintf_nr_sample_events(hists, evname, stdout);
hists__fprintf(hists, true, 0, 0, stdout);
fprintf(stdout, "\n\n");
@@ -372,7 +402,7 @@ static int __cmd_report(struct perf_report *rep)
if (ret)
goto out_delete;
- kernel_map = session->host_machine.vmlinux_maps[MAP__FUNCTION];
+ kernel_map = session->machines.host.vmlinux_maps[MAP__FUNCTION];
kernel_kmap = map__kmap(kernel_map);
if (kernel_map == NULL ||
(kernel_map->dso->hit &&
@@ -416,8 +446,16 @@ static int __cmd_report(struct perf_report *rep)
hists->symbol_filter_str = rep->symbol_filter_str;
hists__collapse_resort(hists);
- hists__output_resort(hists);
nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE];
+
+ /* Non-group events are considered as leader */
+ if (symbol_conf.event_group &&
+ !perf_evsel__is_group_leader(pos)) {
+ struct hists *leader_hists = &pos->leader->hists;
+
+ hists__match(leader_hists, hists);
+ hists__link(leader_hists, hists);
+ }
}
if (nr_samples == 0) {
@@ -425,11 +463,22 @@ static int __cmd_report(struct perf_report *rep)
goto out_delete;
}
+ list_for_each_entry(pos, &session->evlist->entries, node)
+ hists__output_resort(&pos->hists);
+
if (use_browser > 0) {
if (use_browser == 1) {
- perf_evlist__tui_browse_hists(session->evlist, help,
- NULL,
- &session->header.env);
+ ret = perf_evlist__tui_browse_hists(session->evlist,
+ help,
+ NULL,
+ &session->header.env);
+ /*
+ * Usually "ret" is the last pressed key, and we only
+ * care if the key notifies us to switch data file.
+ */
+ if (ret != K_SWITCH_INPUT_DATA)
+ ret = 0;
+
} else if (use_browser == 2) {
perf_evlist__gtk_browse_hists(session->evlist, help,
NULL);
@@ -595,8 +644,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "stdio", &report.use_stdio,
"Use the stdio interface"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
- "sort by key(s): pid, comm, dso, symbol, parent, dso_to,"
- " dso_from, symbol_to, symbol_from, mispredict"),
+ "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline,"
+ " dso_to, dso_from, symbol_to, symbol_from, mispredict"),
OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
"Show sample percentage for different cpu modes"),
OPT_STRING('p', "parent", &parent_pattern, "regex",
@@ -638,6 +687,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
"Specify disassembler style (e.g. -M intel for intel syntax)"),
OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
"Show a column with the sum of periods"),
+ OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
+ "Show event group information together"),
OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "",
"use branch records for histogram filling", parse_branch_mode),
OPT_STRING(0, "objdump", &objdump_path, "path",
@@ -645,6 +696,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_END()
};
+ perf_config(perf_report_config, NULL);
+
argc = parse_options(argc, argv, options, report_usage, 0);
if (report.use_stdio)
@@ -663,6 +716,16 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
else
input_name = "perf.data";
}
+
+ if (strcmp(input_name, "-") != 0)
+ setup_browser(true);
+ else {
+ use_browser = 0;
+ perf_hpp__column_enable(PERF_HPP__OVERHEAD);
+ perf_hpp__init();
+ }
+
+repeat:
session = perf_session__new(input_name, O_RDONLY,
report.force, false, &report.tool);
if (session == NULL)
@@ -688,14 +751,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
}
- if (strcmp(input_name, "-") != 0)
- setup_browser(true);
- else {
- use_browser = 0;
- perf_hpp__init();
- }
-
- setup_sorting(report_usage, options);
+ if (setup_sorting() < 0)
+ usage_with_options(report_usage, options);
/*
* Only in the newt browser we are doing integrated annotation,
@@ -763,6 +820,12 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
}
ret = __cmd_report(&report);
+ if (ret == K_SWITCH_INPUT_DATA) {
+ perf_session__delete(session);
+ goto repeat;
+ } else
+ ret = 0;
+
error:
perf_session__delete(session);
return ret;
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index cc28b85..1382294 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1475,9 +1475,9 @@ static int perf_sched__read_events(struct perf_sched *sched, bool destroy,
goto out_delete;
}
- sched->nr_events = session->hists.stats.nr_events[0];
- sched->nr_lost_events = session->hists.stats.total_lost;
- sched->nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST];
+ sched->nr_events = session->stats.nr_events[0];
+ sched->nr_lost_events = session->stats.total_lost;
+ sched->nr_lost_chunks = session->stats.nr_events[PERF_RECORD_LOST];
}
if (destroy)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index b363e7b..92d4658 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -692,7 +692,7 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
const char *arg, int unset __maybe_unused)
{
char *tok;
- int i, imax = sizeof(all_output_options) / sizeof(struct output_option);
+ int i, imax = ARRAY_SIZE(all_output_options);
int j;
int rc = 0;
char *str = strdup(arg);
@@ -909,18 +909,6 @@ static const char *ends_with(const char *str, const char *suffix)
return NULL;
}
-static char *ltrim(char *str)
-{
- int len = strlen(str);
-
- while (len && isspace(*str)) {
- len--;
- str++;
- }
-
- return str;
-}
-
static int read_script_info(struct script_desc *desc, const char *filename)
{
char line[BUFSIZ], *p;
@@ -1487,7 +1475,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
return -1;
}
- perf_session__fprintf_info(session, stdout, show_full_info);
+ if (!script_name && !generate_script_lang)
+ perf_session__fprintf_info(session, stdout, show_full_info);
if (!no_callchain)
symbol_conf.use_callchain = true;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c247fac..9984876 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -65,6 +65,11 @@
#define CNTR_NOT_SUPPORTED "<not supported>"
#define CNTR_NOT_COUNTED "<not counted>"
+static void print_stat(int argc, const char **argv);
+static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
+static void print_counter(struct perf_evsel *counter, char *prefix);
+static void print_aggr_socket(char *prefix);
+
static struct perf_evlist *evsel_list;
static struct perf_target target = {
@@ -75,6 +80,7 @@ static int run_count = 1;
static bool no_inherit = false;
static bool scale = true;
static bool no_aggr = false;
+static bool aggr_socket = false;
static pid_t child_pid = -1;
static bool null_run = false;
static int detailed_run = 0;
@@ -87,6 +93,9 @@ static FILE *output = NULL;
static const char *pre_cmd = NULL;
static const char *post_cmd = NULL;
static bool sync_run = false;
+static unsigned int interval = 0;
+static struct timespec ref_time;
+static struct cpu_map *sock_map;
static volatile int done = 0;
@@ -94,6 +103,28 @@ struct perf_stat {
struct stats res_stats[3];
};
+static inline void diff_timespec(struct timespec *r, struct timespec *a,
+ struct timespec *b)
+{
+ r->tv_sec = a->tv_sec - b->tv_sec;
+ if (a->tv_nsec < b->tv_nsec) {
+ r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
+ r->tv_sec--;
+ } else {
+ r->tv_nsec = a->tv_nsec - b->tv_nsec ;
+ }
+}
+
+static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
+{
+ return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
+}
+
+static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
+{
+ return perf_evsel__cpus(evsel)->nr;
+}
+
static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
{
evsel->priv = zalloc(sizeof(struct perf_stat));
@@ -106,14 +137,27 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
evsel->priv = NULL;
}
-static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
+static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
{
- return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
+ void *addr;
+ size_t sz;
+
+ sz = sizeof(*evsel->counts) +
+ (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
+
+ addr = zalloc(sz);
+ if (!addr)
+ return -ENOMEM;
+
+ evsel->prev_raw_counts = addr;
+
+ return 0;
}
-static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
+static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
{
- return perf_evsel__cpus(evsel)->nr;
+ free(evsel->prev_raw_counts);
+ evsel->prev_raw_counts = NULL;
}
static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
@@ -132,8 +176,6 @@ static struct stats walltime_nsecs_stats;
static int create_perf_stat_counter(struct perf_evsel *evsel)
{
struct perf_event_attr *attr = &evsel->attr;
- bool exclude_guest_missing = false;
- int ret;
if (scale)
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
@@ -141,38 +183,16 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
attr->inherit = !no_inherit;
-retry:
- if (exclude_guest_missing)
- evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
-
- if (perf_target__has_cpu(&target)) {
- ret = perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
- if (ret)
- goto check_ret;
- return 0;
- }
+ if (perf_target__has_cpu(&target))
+ return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
if (!perf_target__has_task(&target) &&
- !perf_evsel__is_group_member(evsel)) {
+ perf_evsel__is_group_leader(evsel)) {
attr->disabled = 1;
attr->enable_on_exec = 1;
}
- ret = perf_evsel__open_per_thread(evsel, evsel_list->threads);
- if (!ret)
- return 0;
- /* fall through */
-check_ret:
- if (ret && errno == EINVAL) {
- if (!exclude_guest_missing &&
- (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
- pr_debug("Old kernel, cannot exclude "
- "guest or host samples.\n");
- exclude_guest_missing = true;
- goto retry;
- }
- }
- return ret;
+ return perf_evsel__open_per_thread(evsel, evsel_list->threads);
}
/*
@@ -269,15 +289,79 @@ static int read_counter(struct perf_evsel *counter)
return 0;
}
+static void print_interval(void)
+{
+ static int num_print_interval;
+ struct perf_evsel *counter;
+ struct perf_stat *ps;
+ struct timespec ts, rs;
+ char prefix[64];
+
+ if (no_aggr) {
+ list_for_each_entry(counter, &evsel_list->entries, node) {
+ ps = counter->priv;
+ memset(ps->res_stats, 0, sizeof(ps->res_stats));
+ read_counter(counter);
+ }
+ } else {
+ list_for_each_entry(counter, &evsel_list->entries, node) {
+ ps = counter->priv;
+ memset(ps->res_stats, 0, sizeof(ps->res_stats));
+ read_counter_aggr(counter);
+ }
+ }
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ diff_timespec(&rs, &ts, &ref_time);
+ sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
+
+ if (num_print_interval == 0 && !csv_output) {
+ if (aggr_socket)
+ fprintf(output, "# time socket cpus counts events\n");
+ else if (no_aggr)
+ fprintf(output, "# time CPU counts events\n");
+ else
+ fprintf(output, "# time counts events\n");
+ }
+
+ if (++num_print_interval == 25)
+ num_print_interval = 0;
+
+ if (aggr_socket)
+ print_aggr_socket(prefix);
+ else if (no_aggr) {
+ list_for_each_entry(counter, &evsel_list->entries, node)
+ print_counter(counter, prefix);
+ } else {
+ list_for_each_entry(counter, &evsel_list->entries, node)
+ print_counter_aggr(counter, prefix);
+ }
+}
+
static int __run_perf_stat(int argc __maybe_unused, const char **argv)
{
+ char msg[512];
unsigned long long t0, t1;
struct perf_evsel *counter;
+ struct timespec ts;
int status = 0;
int child_ready_pipe[2], go_pipe[2];
const bool forks = (argc > 0);
char buf;
+ if (interval) {
+ ts.tv_sec = interval / 1000;
+ ts.tv_nsec = (interval % 1000) * 1000000;
+ } else {
+ ts.tv_sec = 1;
+ ts.tv_nsec = 0;
+ }
+
+ if (aggr_socket
+ && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
+ perror("cannot build socket map");
+ return -1;
+ }
+
if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
perror("failed to create pipes");
return -1;
@@ -348,20 +432,13 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
continue;
}
- if (errno == EPERM || errno == EACCES) {
- error("You may not have permission to collect %sstats.\n"
- "\t Consider tweaking"
- " /proc/sys/kernel/perf_event_paranoid or running as root.",
- target.system_wide ? "system-wide " : "");
- } else {
- error("open_counter returned with %d (%s). "
- "/bin/dmesg may provide additional information.\n",
- errno, strerror(errno));
- }
+ perf_evsel__open_strerror(counter, &target,
+ errno, msg, sizeof(msg));
+ ui__error("%s\n", msg);
+
if (child_pid != -1)
kill(child_pid, SIGTERM);
- pr_err("Not all events could be opened.\n");
return -1;
}
counter->supported = true;
@@ -377,14 +454,25 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
* Enable counters and exec the command:
*/
t0 = rdclock();
+ clock_gettime(CLOCK_MONOTONIC, &ref_time);
if (forks) {
close(go_pipe[1]);
+ if (interval) {
+ while (!waitpid(child_pid, &status, WNOHANG)) {
+ nanosleep(&ts, NULL);
+ print_interval();
+ }
+ }
wait(&status);
if (WIFSIGNALED(status))
psignal(WTERMSIG(status), argv[0]);
} else {
- while(!done) sleep(1);
+ while (!done) {
+ nanosleep(&ts, NULL);
+ if (interval)
+ print_interval();
+ }
}
t1 = rdclock();
@@ -454,13 +542,21 @@ static void print_noise(struct perf_evsel *evsel, double avg)
print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
}
-static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
+static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
{
double msecs = avg / 1e6;
char cpustr[16] = { '\0', };
const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
- if (no_aggr)
+ if (aggr_socket)
+ sprintf(cpustr, "S%*d%s%*d%s",
+ csv_output ? 0 : -5,
+ cpu,
+ csv_sep,
+ csv_output ? 0 : 4,
+ nr,
+ csv_sep);
+ else if (no_aggr)
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
perf_evsel__cpus(evsel)->map[cpu], csv_sep);
@@ -470,7 +566,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
if (evsel->cgrp)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
- if (csv_output)
+ if (csv_output || interval)
return;
if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
@@ -659,7 +755,7 @@ static void print_ll_cache_misses(int cpu,
fprintf(output, " of all LL-cache hits ");
}
-static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
+static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
{
double total, ratio = 0.0;
char cpustr[16] = { '\0', };
@@ -672,7 +768,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
else
fmt = "%s%18.0f%s%-25s";
- if (no_aggr)
+ if (aggr_socket)
+ sprintf(cpustr, "S%*d%s%*d%s",
+ csv_output ? 0 : -5,
+ cpu,
+ csv_sep,
+ csv_output ? 0 : 4,
+ nr,
+ csv_sep);
+ else if (no_aggr)
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
perf_evsel__cpus(evsel)->map[cpu], csv_sep);
@@ -684,12 +788,11 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
if (evsel->cgrp)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
- if (csv_output)
+ if (csv_output || interval)
return;
if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
total = avg_stats(&runtime_cycles_stats[cpu]);
-
if (total)
ratio = avg / total;
@@ -779,16 +882,83 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
}
}
+static void print_aggr_socket(char *prefix)
+{
+ struct perf_evsel *counter;
+ u64 ena, run, val;
+ int cpu, s, s2, sock, nr;
+
+ if (!sock_map)
+ return;
+
+ for (s = 0; s < sock_map->nr; s++) {
+ sock = cpu_map__socket(sock_map, s);
+ list_for_each_entry(counter, &evsel_list->entries, node) {
+ val = ena = run = 0;
+ nr = 0;
+ for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+ s2 = cpu_map__get_socket(evsel_list->cpus, cpu);
+ if (s2 != sock)
+ continue;
+ val += counter->counts->cpu[cpu].val;
+ ena += counter->counts->cpu[cpu].ena;
+ run += counter->counts->cpu[cpu].run;
+ nr++;
+ }
+ if (prefix)
+ fprintf(output, "%s", prefix);
+
+ if (run == 0 || ena == 0) {
+ fprintf(output, "S%*d%s%*d%s%*s%s%*s",
+ csv_output ? 0 : -5,
+ s,
+ csv_sep,
+ csv_output ? 0 : 4,
+ nr,
+ csv_sep,
+ csv_output ? 0 : 18,
+ counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
+ csv_sep,
+ csv_output ? 0 : -24,
+ perf_evsel__name(counter));
+ if (counter->cgrp)
+ fprintf(output, "%s%s",
+ csv_sep, counter->cgrp->name);
+
+ fputc('\n', output);
+ continue;
+ }
+
+ if (nsec_counter(counter))
+ nsec_printout(sock, nr, counter, val);
+ else
+ abs_printout(sock, nr, counter, val);
+
+ if (!csv_output) {
+ print_noise(counter, 1.0);
+
+ if (run != ena)
+ fprintf(output, " (%.2f%%)",
+ 100.0 * run / ena);
+ }
+ fputc('\n', output);
+ }
+ }
+}
+
/*
* Print out the results of a single counter:
* aggregated counts in system-wide mode
*/
-static void print_counter_aggr(struct perf_evsel *counter)
+static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
{
struct perf_stat *ps = counter->priv;
double avg = avg_stats(&ps->res_stats[0]);
int scaled = counter->counts->scaled;
+ if (prefix)
+ fprintf(output, "%s", prefix);
+
if (scaled == -1) {
fprintf(output, "%*s%s%*s",
csv_output ? 0 : 18,
@@ -805,9 +975,9 @@ static void print_counter_aggr(struct perf_evsel *counter)
}
if (nsec_counter(counter))
- nsec_printout(-1, counter, avg);
+ nsec_printout(-1, 0, counter, avg);
else
- abs_printout(-1, counter, avg);
+ abs_printout(-1, 0, counter, avg);
print_noise(counter, avg);
@@ -831,7 +1001,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
* Print out the results of a single counter:
* does not use aggregated count in system-wide
*/
-static void print_counter(struct perf_evsel *counter)
+static void print_counter(struct perf_evsel *counter, char *prefix)
{
u64 ena, run, val;
int cpu;
@@ -840,6 +1010,10 @@ static void print_counter(struct perf_evsel *counter)
val = counter->counts->cpu[cpu].val;
ena = counter->counts->cpu[cpu].ena;
run = counter->counts->cpu[cpu].run;
+
+ if (prefix)
+ fprintf(output, "%s", prefix);
+
if (run == 0 || ena == 0) {
fprintf(output, "CPU%*d%s%*s%s%*s",
csv_output ? 0 : -4,
@@ -859,9 +1033,9 @@ static void print_counter(struct perf_evsel *counter)
}
if (nsec_counter(counter))
- nsec_printout(cpu, counter, val);
+ nsec_printout(cpu, 0, counter, val);
else
- abs_printout(cpu, counter, val);
+ abs_printout(cpu, 0, counter, val);
if (!csv_output) {
print_noise(counter, 1.0);
@@ -899,12 +1073,14 @@ static void print_stat(int argc, const char **argv)
fprintf(output, ":\n\n");
}
- if (no_aggr) {
+ if (aggr_socket)
+ print_aggr_socket(NULL);
+ else if (no_aggr) {
list_for_each_entry(counter, &evsel_list->entries, node)
- print_counter(counter);
+ print_counter(counter, NULL);
} else {
list_for_each_entry(counter, &evsel_list->entries, node)
- print_counter_aggr(counter);
+ print_counter_aggr(counter, NULL);
}
if (!csv_output) {
@@ -925,7 +1101,7 @@ static volatile int signr = -1;
static void skip_signal(int signo)
{
- if(child_pid == -1)
+ if ((child_pid == -1) || interval)
done = 1;
signr = signo;
@@ -1145,6 +1321,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
"command to run prior to the measured command"),
OPT_STRING(0, "post", &post_cmd, "command",
"command to run after to the measured command"),
+ OPT_UINTEGER('I', "interval-print", &interval,
+ "print counts at regular interval in ms (>= 100)"),
+ OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"),
OPT_END()
};
const char * const stat_usage[] = {
@@ -1231,6 +1410,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
usage_with_options(stat_usage, options);
}
+ if (aggr_socket) {
+ if (!perf_target__has_cpu(&target)) {
+ fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
+ usage_with_options(stat_usage, options);
+ }
+ no_aggr = true;
+ }
+
if (add_default_attributes())
goto out;
@@ -1245,12 +1432,23 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
usage_with_options(stat_usage, options);
return -1;
}
+ if (interval && interval < 100) {
+ pr_err("print interval must be >= 100ms\n");
+ usage_with_options(stat_usage, options);
+ return -1;
+ }
list_for_each_entry(pos, &evsel_list->entries, node) {
if (perf_evsel__alloc_stat_priv(pos) < 0 ||
perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0)
goto out_free_fd;
}
+ if (interval) {
+ list_for_each_entry(pos, &evsel_list->entries, node) {
+ if (perf_evsel__alloc_prev_raw_counts(pos) < 0)
+ goto out_free_fd;
+ }
+ }
/*
* We dont want to block the signals - that would cause
@@ -1260,6 +1458,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
*/
atexit(sig_atexit);
signal(SIGINT, skip_signal);
+ signal(SIGCHLD, skip_signal);
signal(SIGALRM, skip_signal);
signal(SIGABRT, skip_signal);
@@ -1272,11 +1471,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
status = run_perf_stat(argc, argv);
}
- if (status != -1)
+ if (status != -1 && !interval)
print_stat(argc, argv);
out_free_fd:
- list_for_each_entry(pos, &evsel_list->entries, node)
+ list_for_each_entry(pos, &evsel_list->entries, node) {
perf_evsel__free_stat_priv(pos);
+ perf_evsel__free_counts(pos);
+ perf_evsel__free_prev_raw_counts(pos);
+ }
perf_evlist__delete_maps(evsel_list);
out:
perf_evlist__delete(evsel_list);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index c9ff395..72f6eb7 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -68,27 +68,7 @@
#include <linux/unistd.h>
#include <linux/types.h>
-void get_term_dimensions(struct winsize *ws)
-{
- char *s = getenv("LINES");
-
- if (s != NULL) {
- ws->ws_row = atoi(s);
- s = getenv("COLUMNS");
- if (s != NULL) {
- ws->ws_col = atoi(s);
- if (ws->ws_row && ws->ws_col)
- return;
- }
- }
-#ifdef TIOCGWINSZ
- if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
- ws->ws_row && ws->ws_col)
- return;
-#endif
- ws->ws_row = 25;
- ws->ws_col = 80;
-}
+static volatile int done;
static void perf_top__update_print_entries(struct perf_top *top)
{
@@ -453,8 +433,10 @@ static int perf_top__key_mapped(struct perf_top *top, int c)
return 0;
}
-static void perf_top__handle_keypress(struct perf_top *top, int c)
+static bool perf_top__handle_keypress(struct perf_top *top, int c)
{
+ bool ret = true;
+
if (!perf_top__key_mapped(top, c)) {
struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
struct termios tc, save;
@@ -475,7 +457,7 @@ static void perf_top__handle_keypress(struct perf_top *top, int c)
tcsetattr(0, TCSAFLUSH, &save);
if (!perf_top__key_mapped(top, c))
- return;
+ return ret;
}
switch (c) {
@@ -537,7 +519,8 @@ static void perf_top__handle_keypress(struct perf_top *top, int c)
printf("exiting.\n");
if (top->dump_symtab)
perf_session__fprintf_dsos(top->session, stderr);
- exit(0);
+ ret = false;
+ break;
case 's':
perf_top__prompt_symbol(top, "Enter details symbol");
break;
@@ -560,6 +543,8 @@ static void perf_top__handle_keypress(struct perf_top *top, int c)
default:
break;
}
+
+ return ret;
}
static void perf_top__sort_new_samples(void *arg)
@@ -596,13 +581,12 @@ static void *display_thread_tui(void *arg)
* via --uid.
*/
list_for_each_entry(pos, &top->evlist->entries, node)
- pos->hists.uid_filter_str = top->target.uid_str;
+ pos->hists.uid_filter_str = top->record_opts.target.uid_str;
perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
&top->session->header.env);
- exit_browser(0);
- exit(0);
+ done = 1;
return NULL;
}
@@ -626,7 +610,7 @@ repeat:
/* trash return*/
getc(stdin);
- while (1) {
+ while (!done) {
perf_top__print_sym_table(top);
/*
* Either timeout expired or we got an EINTR due to SIGWINCH,
@@ -640,15 +624,14 @@ repeat:
continue;
/* Fall trhu */
default:
- goto process_hotkey;
+ c = getc(stdin);
+ tcsetattr(0, TCSAFLUSH, &save);
+
+ if (perf_top__handle_keypress(top, c))
+ goto repeat;
+ done = 1;
}
}
-process_hotkey:
- c = getc(stdin);
- tcsetattr(0, TCSAFLUSH, &save);
-
- perf_top__handle_keypress(top, c);
- goto repeat;
return NULL;
}
@@ -716,7 +699,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
static struct intlist *seen;
if (!seen)
- seen = intlist__new();
+ seen = intlist__new(NULL);
if (!intlist__has_entry(seen, event->ip.pid)) {
pr_err("Can't find guest [%d]'s kernel information\n",
@@ -727,8 +710,8 @@ static void perf_event__process_sample(struct perf_tool *tool,
}
if (!machine) {
- pr_err("%u unprocessable samples recorded.",
- top->session->hists.stats.nr_unprocessable_samples++);
+ pr_err("%u unprocessable samples recorded.\r",
+ top->session->stats.nr_unprocessable_samples++);
return;
}
@@ -847,13 +830,13 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
++top->us_samples;
if (top->hide_user_symbols)
continue;
- machine = perf_session__find_host_machine(session);
+ machine = &session->machines.host;
break;
case PERF_RECORD_MISC_KERNEL:
++top->kernel_samples;
if (top->hide_kernel_symbols)
continue;
- machine = perf_session__find_host_machine(session);
+ machine = &session->machines.host;
break;
case PERF_RECORD_MISC_GUEST_KERNEL:
++top->guest_kernel_samples;
@@ -878,7 +861,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
hists__inc_nr_events(&evsel->hists, event->header.type);
machine__process_event(machine, event);
} else
- ++session->hists.stats.nr_unknown_events;
+ ++session->stats.nr_unknown_events;
}
}
@@ -890,123 +873,42 @@ static void perf_top__mmap_read(struct perf_top *top)
perf_top__mmap_read_idx(top, i);
}
-static void perf_top__start_counters(struct perf_top *top)
+static int perf_top__start_counters(struct perf_top *top)
{
+ char msg[512];
struct perf_evsel *counter;
struct perf_evlist *evlist = top->evlist;
+ struct perf_record_opts *opts = &top->record_opts;
- if (top->group)
- perf_evlist__set_leader(evlist);
+ perf_evlist__config(evlist, opts);
list_for_each_entry(counter, &evlist->entries, node) {
- struct perf_event_attr *attr = &counter->attr;
-
- attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
-
- if (top->freq) {
- attr->sample_type |= PERF_SAMPLE_PERIOD;
- attr->freq = 1;
- attr->sample_freq = top->freq;
- }
-
- if (evlist->nr_entries > 1) {
- attr->sample_type |= PERF_SAMPLE_ID;
- attr->read_format |= PERF_FORMAT_ID;
- }
-
- if (perf_target__has_cpu(&top->target))
- attr->sample_type |= PERF_SAMPLE_CPU;
-
- if (symbol_conf.use_callchain)
- attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
-
- attr->mmap = 1;
- attr->comm = 1;
- attr->inherit = top->inherit;
-fallback_missing_features:
- if (top->exclude_guest_missing)
- attr->exclude_guest = attr->exclude_host = 0;
-retry_sample_id:
- attr->sample_id_all = top->sample_id_all_missing ? 0 : 1;
try_again:
if (perf_evsel__open(counter, top->evlist->cpus,
top->evlist->threads) < 0) {
- int err = errno;
-
- if (err == EPERM || err == EACCES) {
- ui__error_paranoid();
- goto out_err;
- } else if (err == EINVAL) {
- if (!top->exclude_guest_missing &&
- (attr->exclude_guest || attr->exclude_host)) {
- pr_debug("Old kernel, cannot exclude "
- "guest or host samples.\n");
- top->exclude_guest_missing = true;
- goto fallback_missing_features;
- } else if (!top->sample_id_all_missing) {
- /*
- * Old kernel, no attr->sample_id_type_all field
- */
- top->sample_id_all_missing = true;
- goto retry_sample_id;
- }
- }
- /*
- * If it's cycles then fall back to hrtimer
- * based cpu-clock-tick sw counter, which
- * is always available even if no PMU support:
- */
- if ((err == ENOENT || err == ENXIO) &&
- (attr->type == PERF_TYPE_HARDWARE) &&
- (attr->config == PERF_COUNT_HW_CPU_CYCLES)) {
-
+ if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
if (verbose)
- ui__warning("Cycles event not supported,\n"
- "trying to fall back to cpu-clock-ticks\n");
-
- attr->type = PERF_TYPE_SOFTWARE;
- attr->config = PERF_COUNT_SW_CPU_CLOCK;
- if (counter->name) {
- free(counter->name);
- counter->name = NULL;
- }
+ ui__warning("%s\n", msg);
goto try_again;
}
- if (err == ENOENT) {
- ui__error("The %s event is not supported.\n",
- perf_evsel__name(counter));
- goto out_err;
- } else if (err == EMFILE) {
- ui__error("Too many events are opened.\n"
- "Try again after reducing the number of events\n");
- goto out_err;
- } else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
- ui__error("\'precise\' request may not be supported. "
- "Try removing 'p' modifier\n");
- goto out_err;
- }
-
- ui__error("The sys_perf_event_open() syscall "
- "returned with %d (%s). /bin/dmesg "
- "may provide additional information.\n"
- "No CONFIG_PERF_EVENTS=y kernel support "
- "configured?\n", err, strerror(err));
+ perf_evsel__open_strerror(counter, &opts->target,
+ errno, msg, sizeof(msg));
+ ui__error("%s\n", msg);
goto out_err;
}
}
- if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) {
+ if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
ui__error("Failed to mmap with %d (%s)\n",
errno, strerror(errno));
goto out_err;
}
- return;
+ return 0;
out_err:
- exit_browser(0);
- exit(0);
+ return -1;
}
static int perf_top__setup_sample_type(struct perf_top *top)
@@ -1016,7 +918,7 @@ static int perf_top__setup_sample_type(struct perf_top *top)
ui__error("Selected -g but \"sym\" not present in --sort/-s.");
return -EINVAL;
}
- } else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) {
+ } else if (callchain_param.mode != CHAIN_NONE) {
if (callchain_register_param(&callchain_param) < 0) {
ui__error("Can't register callchain params.\n");
return -EINVAL;
@@ -1028,6 +930,7 @@ static int perf_top__setup_sample_type(struct perf_top *top)
static int __cmd_top(struct perf_top *top)
{
+ struct perf_record_opts *opts = &top->record_opts;
pthread_t thread;
int ret;
/*
@@ -1042,26 +945,42 @@ static int __cmd_top(struct perf_top *top)
if (ret)
goto out_delete;
- if (perf_target__has_task(&top->target))
+ if (perf_target__has_task(&opts->target))
perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
perf_event__process,
- &top->session->host_machine);
+ &top->session->machines.host);
else
perf_event__synthesize_threads(&top->tool, perf_event__process,
- &top->session->host_machine);
- perf_top__start_counters(top);
+ &top->session->machines.host);
+
+ ret = perf_top__start_counters(top);
+ if (ret)
+ goto out_delete;
+
top->session->evlist = top->evlist;
perf_session__set_id_hdr_size(top->session);
+ /*
+ * When perf is starting the traced process, all the events (apart from
+ * group members) have enable_on_exec=1 set, so don't spoil it by
+ * prematurely enabling them.
+ *
+ * XXX 'top' still doesn't start workloads like record, trace, but should,
+ * so leave the check here.
+ */
+ if (!perf_target__none(&opts->target))
+ perf_evlist__enable(top->evlist);
+
/* Wait for a minimal set of events before starting the snapshot */
poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
perf_top__mmap_read(top);
+ ret = -1;
if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
display_thread), top)) {
ui__error("Could not create display thread.\n");
- exit(-1);
+ goto out_delete;
}
if (top->realtime_prio) {
@@ -1070,11 +989,11 @@ static int __cmd_top(struct perf_top *top)
param.sched_priority = top->realtime_prio;
if (sched_setscheduler(0, SCHED_FIFO, &param)) {
ui__error("Could not set realtime priority.\n");
- exit(-1);
+ goto out_delete;
}
}
- while (1) {
+ while (!done) {
u64 hits = top->samples;
perf_top__mmap_read(top);
@@ -1083,126 +1002,67 @@ static int __cmd_top(struct perf_top *top)
ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
}
+ ret = 0;
out_delete:
perf_session__delete(top->session);
top->session = NULL;
- return 0;
+ return ret;
}
static int
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
{
- struct perf_top *top = (struct perf_top *)opt->value;
- char *tok, *tok2;
- char *endptr;
-
/*
* --no-call-graph
*/
- if (unset) {
- top->dont_use_callchains = true;
+ if (unset)
return 0;
- }
symbol_conf.use_callchain = true;
- if (!arg)
- return 0;
-
- tok = strtok((char *)arg, ",");
- if (!tok)
- return -1;
-
- /* get the output mode */
- if (!strncmp(tok, "graph", strlen(arg)))
- callchain_param.mode = CHAIN_GRAPH_ABS;
-
- else if (!strncmp(tok, "flat", strlen(arg)))
- callchain_param.mode = CHAIN_FLAT;
-
- else if (!strncmp(tok, "fractal", strlen(arg)))
- callchain_param.mode = CHAIN_GRAPH_REL;
-
- else if (!strncmp(tok, "none", strlen(arg))) {
- callchain_param.mode = CHAIN_NONE;
- symbol_conf.use_callchain = false;
-
- return 0;
- } else
- return -1;
-
- /* get the min percentage */
- tok = strtok(NULL, ",");
- if (!tok)
- goto setup;
-
- callchain_param.min_percent = strtod(tok, &endptr);
- if (tok == endptr)
- return -1;
-
- /* get the print limit */
- tok2 = strtok(NULL, ",");
- if (!tok2)
- goto setup;
-
- if (tok2[0] != 'c') {
- callchain_param.print_limit = strtod(tok2, &endptr);
- tok2 = strtok(NULL, ",");
- if (!tok2)
- goto setup;
- }
-
- /* get the call chain order */
- if (!strcmp(tok2, "caller"))
- callchain_param.order = ORDER_CALLER;
- else if (!strcmp(tok2, "callee"))
- callchain_param.order = ORDER_CALLEE;
- else
- return -1;
-setup:
- if (callchain_register_param(&callchain_param) < 0) {
- fprintf(stderr, "Can't register callchain params\n");
- return -1;
- }
- return 0;
+ return record_parse_callchain_opt(opt, arg, unset);
}
int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
{
- struct perf_evsel *pos;
int status;
char errbuf[BUFSIZ];
struct perf_top top = {
.count_filter = 5,
.delay_secs = 2,
- .freq = 4000, /* 4 KHz */
- .mmap_pages = 128,
- .sym_pcnt_filter = 5,
- .target = {
- .uses_mmap = true,
+ .record_opts = {
+ .mmap_pages = UINT_MAX,
+ .user_freq = UINT_MAX,
+ .user_interval = ULLONG_MAX,
+ .freq = 4000, /* 4 KHz */
+ .target = {
+ .uses_mmap = true,
+ },
},
+ .sym_pcnt_filter = 5,
};
- char callchain_default_opt[] = "fractal,0.5,callee";
+ struct perf_record_opts *opts = &top.record_opts;
+ struct perf_target *target = &opts->target;
const struct option options[] = {
OPT_CALLBACK('e', "event", &top.evlist, "event",
"event selector. use 'perf list' to list available events",
parse_events_option),
- OPT_INTEGER('c', "count", &top.default_interval,
- "event period to sample"),
- OPT_STRING('p', "pid", &top.target.pid, "pid",
+ OPT_U64('c', "count", &opts->user_interval, "event period to sample"),
+ OPT_STRING('p', "pid", &target->pid, "pid",
"profile events on existing process id"),
- OPT_STRING('t', "tid", &top.target.tid, "tid",
+ OPT_STRING('t', "tid", &target->tid, "tid",
"profile events on existing thread id"),
- OPT_BOOLEAN('a', "all-cpus", &top.target.system_wide,
+ OPT_BOOLEAN('a', "all-cpus", &target->system_wide,
"system-wide collection from all CPUs"),
- OPT_STRING('C', "cpu", &top.target.cpu_list, "cpu",
+ OPT_STRING('C', "cpu", &target->cpu_list, "cpu",
"list of cpus to monitor"),
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
"hide kernel symbols"),
- OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"),
+ OPT_UINTEGER('m', "mmap-pages", &opts->mmap_pages,
+ "number of mmap data pages"),
OPT_INTEGER('r', "realtime", &top.realtime_prio,
"collect data with this RT SCHED_FIFO priority"),
OPT_INTEGER('d', "delay", &top.delay_secs,
@@ -1211,16 +1071,14 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
"dump the symbol table used for profiling"),
OPT_INTEGER('f', "count-filter", &top.count_filter,
"only display functions with more events than this"),
- OPT_BOOLEAN('g', "group", &top.group,
+ OPT_BOOLEAN('g', "group", &opts->group,
"put the counters into a counter group"),
- OPT_BOOLEAN('i', "inherit", &top.inherit,
- "child tasks inherit counters"),
+ OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit,
+ "child tasks do not inherit counters"),
OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
"symbol to annotate"),
- OPT_BOOLEAN('z', "zero", &top.zero,
- "zero history across updates"),
- OPT_INTEGER('F', "freq", &top.freq,
- "profile at this frequency"),
+ OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"),
+ OPT_UINTEGER('F', "freq", &opts->user_freq, "profile at this frequency"),
OPT_INTEGER('E', "entries", &top.print_entries,
"display this many functions"),
OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
@@ -1233,10 +1091,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
"sort by key(s): pid, comm, dso, symbol, parent"),
OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
"Show a column with the number of samples"),
- OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order",
- "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. "
- "Default: fractal,0.5,callee", &parse_callchain_opt,
- callchain_default_opt),
+ OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
+ "mode[,dump_size]", record_callchain_help,
+ &parse_callchain_opt, "fp"),
OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
"Show a column with the sum of periods"),
OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
@@ -1251,7 +1108,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
"Display raw encoding of assembly instructions (default)"),
OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
"Specify disassembler style (e.g. -M intel for intel syntax)"),
- OPT_STRING('u', "uid", &top.target.uid_str, "user", "user to profile"),
+ OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
OPT_END()
};
const char * const top_usage[] = {
@@ -1272,7 +1129,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
if (sort_order == default_sort_order)
sort_order = "dso,symbol";
- setup_sorting(top_usage, options);
+ if (setup_sorting() < 0)
+ usage_with_options(top_usage, options);
if (top.use_stdio)
use_browser = 0;
@@ -1281,33 +1139,33 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
setup_browser(false);
- status = perf_target__validate(&top.target);
+ status = perf_target__validate(target);
if (status) {
- perf_target__strerror(&top.target, status, errbuf, BUFSIZ);
+ perf_target__strerror(target, status, errbuf, BUFSIZ);
ui__warning("%s", errbuf);
}
- status = perf_target__parse_uid(&top.target);
+ status = perf_target__parse_uid(target);
if (status) {
int saved_errno = errno;
- perf_target__strerror(&top.target, status, errbuf, BUFSIZ);
+ perf_target__strerror(target, status, errbuf, BUFSIZ);
ui__error("%s", errbuf);
status = -saved_errno;
goto out_delete_evlist;
}
- if (perf_target__none(&top.target))
- top.target.system_wide = true;
+ if (perf_target__none(target))
+ target->system_wide = true;
- if (perf_evlist__create_maps(top.evlist, &top.target) < 0)
+ if (perf_evlist__create_maps(top.evlist, target) < 0)
usage_with_options(top_usage, options);
if (!top.evlist->nr_entries &&
perf_evlist__add_default(top.evlist) < 0) {
ui__error("Not enough memory for event selector list\n");
- return -ENOMEM;
+ goto out_delete_maps;
}
symbol_conf.nr_events = top.evlist->nr_entries;
@@ -1315,24 +1173,22 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
if (top.delay_secs < 1)
top.delay_secs = 1;
+ if (opts->user_interval != ULLONG_MAX)
+ opts->default_interval = opts->user_interval;
+ if (opts->user_freq != UINT_MAX)
+ opts->freq = opts->user_freq;
+
/*
* User specified count overrides default frequency.
*/
- if (top.default_interval)
- top.freq = 0;
- else if (top.freq) {
- top.default_interval = top.freq;
+ if (opts->default_interval)
+ opts->freq = 0;
+ else if (opts->freq) {
+ opts->default_interval = opts->freq;
} else {
ui__error("frequency and count are zero, aborting\n");
- exit(EXIT_FAILURE);
- }
-
- list_for_each_entry(pos, &top.evlist->entries, node) {
- /*
- * Fill in the ones not specifically initialized via -c:
- */
- if (!pos->attr.sample_period)
- pos->attr.sample_period = top.default_interval;
+ status = -EINVAL;
+ goto out_delete_maps;
}
top.sym_evsel = perf_evlist__first(top.evlist);
@@ -1365,6 +1221,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
status = __cmd_top(&top);
+out_delete_maps:
+ perf_evlist__delete_maps(top.evlist);
out_delete_evlist:
perf_evlist__delete(top.evlist);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 7932ffa..d222d7f 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -455,7 +455,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
goto out_delete_evlist;
}
- perf_evlist__config_attrs(evlist, &trace->opts);
+ perf_evlist__config(evlist, &trace->opts);
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak
index f5ac774..b4eabb4 100644
--- a/tools/perf/config/feature-tests.mak
+++ b/tools/perf/config/feature-tests.mak
@@ -225,3 +225,14 @@ int main(void)
return on_exit(NULL, NULL);
}
endef
+
+define SOURCE_LIBNUMA
+#include <numa.h>
+#include <numaif.h>
+
+int main(void)
+{
+ numa_available();
+ return 0;
+}
+endef \ No newline at end of file
diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak
index e541312..8ef3bd3 100644
--- a/tools/perf/config/utilities.mak
+++ b/tools/perf/config/utilities.mak
@@ -13,7 +13,7 @@ newline := $(newline)
# what should replace a newline when escaping
# newlines; the default is a bizarre string.
#
-nl-escape = $(or $(1),m822df3020w6a44id34bt574ctac44eb9f4n)
+nl-escape = $(if $(1),$(1),m822df3020w6a44id34bt574ctac44eb9f4n)
# escape-nl
#
@@ -173,9 +173,9 @@ _ge-abspath = $(if $(is-executable),$(1))
# Usage: absolute-executable-path-or-empty = $(call get-executable-or-default,variable,default)
#
define get-executable-or-default
-$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
+$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2),$(1)))
endef
-_ge_attempt = $(or $(get-executable),$(_gea_warn),$(call _gea_err,$(2)))
+_ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2)))
_gea_warn = $(warning The path '$(1)' is not executable.)
_gea_err = $(if $(1),$(error Please set '$(1)' appropriately))
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 0f661fb..095b882 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -328,14 +328,23 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode))
return 0;
+ status = 1;
/* Check for ENOSPC and EIO errors.. */
- if (fflush(stdout))
- die("write failure on standard output: %s", strerror(errno));
- if (ferror(stdout))
- die("unknown write failure on standard output");
- if (fclose(stdout))
- die("close failed on standard output: %s", strerror(errno));
- return 0;
+ if (fflush(stdout)) {
+ fprintf(stderr, "write failure on standard output: %s", strerror(errno));
+ goto out;
+ }
+ if (ferror(stdout)) {
+ fprintf(stderr, "unknown write failure on standard output");
+ goto out;
+ }
+ if (fclose(stdout)) {
+ fprintf(stderr, "close failed on standard output: %s", strerror(errno));
+ goto out;
+ }
+ status = 0;
+out:
+ return status;
}
static void handle_internal_command(int argc, const char **argv)
@@ -467,7 +476,8 @@ int main(int argc, const char **argv)
cmd += 5;
argv[0] = cmd;
handle_internal_command(argc, argv);
- die("cannot handle %s internally", cmd);
+ fprintf(stderr, "cannot handle %s internally", cmd);
+ goto out;
}
/* Look for flags.. */
@@ -485,7 +495,7 @@ int main(int argc, const char **argv)
printf("\n usage: %s\n\n", perf_usage_string);
list_common_cmds_help();
printf("\n %s\n\n", perf_more_info_string);
- exit(1);
+ goto out;
}
cmd = argv[0];
@@ -517,7 +527,7 @@ int main(int argc, const char **argv)
fprintf(stderr, "Expansion of alias '%s' failed; "
"'%s' is not a perf-command\n",
cmd, argv[0]);
- exit(1);
+ goto out;
}
if (!done_help) {
cmd = argv[0] = help_unknown_cmd(cmd);
@@ -528,6 +538,6 @@ int main(int argc, const char **argv)
fprintf(stderr, "Failed to run command '%s': %s\n",
cmd, strerror(errno));
-
+out:
return 1;
}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 2c340e7..c2206c8 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -1,10 +1,6 @@
#ifndef _PERF_PERF_H
#define _PERF_PERF_H
-struct winsize;
-
-void get_term_dimensions(struct winsize *ws);
-
#include <asm/unistd.h>
#if defined(__i386__)
@@ -107,32 +103,6 @@ void get_term_dimensions(struct winsize *ws);
#include "util/types.h"
#include <stdbool.h>
-struct perf_mmap {
- void *base;
- int mask;
- unsigned int prev;
-};
-
-static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
-{
- struct perf_event_mmap_page *pc = mm->base;
- int head = pc->data_head;
- rmb();
- return head;
-}
-
-static inline void perf_mmap__write_tail(struct perf_mmap *md,
- unsigned long tail)
-{
- struct perf_event_mmap_page *pc = md->base;
-
- /*
- * ensure all reads are done before we write the tail out.
- */
- /* mb(); */
- pc->data_tail = tail;
-}
-
/*
* prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
* counters in the current task.
@@ -237,8 +207,6 @@ struct perf_record_opts {
bool raw_samples;
bool sample_address;
bool sample_time;
- bool sample_id_all_missing;
- bool exclude_guest_missing;
bool period;
unsigned int freq;
unsigned int mmap_pages;
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-record b/tools/perf/scripts/perl/bin/workqueue-stats-record
deleted file mode 100644
index 8edda90..0000000
--- a/tools/perf/scripts/perl/bin/workqueue-stats-record
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-perf record -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report
deleted file mode 100644
index 6d91411..0000000
--- a/tools/perf/scripts/perl/bin/workqueue-stats-report
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-# description: workqueue stats (ins/exe/create/destroy)
-perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
diff --git a/tools/perf/scripts/perl/rwtop.pl b/tools/perf/scripts/perl/rwtop.pl
index 4bb3ecd..8b20787 100644
--- a/tools/perf/scripts/perl/rwtop.pl
+++ b/tools/perf/scripts/perl/rwtop.pl
@@ -17,6 +17,7 @@ use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
use lib "./Perf-Trace-Util/lib";
use Perf::Trace::Core;
use Perf::Trace::Util;
+use POSIX qw/SIGALRM SA_RESTART/;
my $default_interval = 3;
my $nlines = 20;
@@ -90,7 +91,10 @@ sub syscalls::sys_enter_write
sub trace_begin
{
- $SIG{ALRM} = \&set_print_pending;
+ my $sa = POSIX::SigAction->new(\&set_print_pending);
+ $sa->flags(SA_RESTART);
+ $sa->safe(1);
+ POSIX::sigaction(SIGALRM, $sa) or die "Can't set SIGALRM handler: $!\n";
alarm 1;
}
diff --git a/tools/perf/scripts/perl/workqueue-stats.pl b/tools/perf/scripts/perl/workqueue-stats.pl
deleted file mode 100644
index a8eaff5..0000000
--- a/tools/perf/scripts/perl/workqueue-stats.pl
+++ /dev/null
@@ -1,129 +0,0 @@
-#!/usr/bin/perl -w
-# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
-# Licensed under the terms of the GNU GPL License version 2
-
-# Displays workqueue stats
-#
-# Usage:
-#
-# perf record -c 1 -f -a -R -e workqueue:workqueue_creation -e
-# workqueue:workqueue_destruction -e workqueue:workqueue_execution
-# -e workqueue:workqueue_insertion
-#
-# perf script -p -s tools/perf/scripts/perl/workqueue-stats.pl
-
-use 5.010000;
-use strict;
-use warnings;
-
-use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
-use lib "./Perf-Trace-Util/lib";
-use Perf::Trace::Core;
-use Perf::Trace::Util;
-
-my @cpus;
-
-sub workqueue::workqueue_destruction
-{
- my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
- $thread_comm, $thread_pid) = @_;
-
- $cpus[$common_cpu]{$thread_pid}{destroyed}++;
- $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
-}
-
-sub workqueue::workqueue_creation
-{
- my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
- $thread_comm, $thread_pid, $cpu) = @_;
-
- $cpus[$common_cpu]{$thread_pid}{created}++;
- $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
-}
-
-sub workqueue::workqueue_execution
-{
- my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
- $thread_comm, $thread_pid, $func) = @_;
-
- $cpus[$common_cpu]{$thread_pid}{executed}++;
- $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
-}
-
-sub workqueue::workqueue_insertion
-{
- my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
- $thread_comm, $thread_pid, $func) = @_;
-
- $cpus[$common_cpu]{$thread_pid}{inserted}++;
- $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
-}
-
-sub trace_end
-{
- print "workqueue work stats:\n\n";
- my $cpu = 0;
- printf("%3s %6s %6s\t%-20s\n", "cpu", "ins", "exec", "name");
- printf("%3s %6s %6s\t%-20s\n", "---", "---", "----", "----");
- foreach my $pidhash (@cpus) {
- while ((my $pid, my $wqhash) = each %$pidhash) {
- my $ins = $$wqhash{'inserted'} || 0;
- my $exe = $$wqhash{'executed'} || 0;
- my $comm = $$wqhash{'comm'} || "";
- if ($ins || $exe) {
- printf("%3u %6u %6u\t%-20s\n", $cpu, $ins, $exe, $comm);
- }
- }
- $cpu++;
- }
-
- $cpu = 0;
- print "\nworkqueue lifecycle stats:\n\n";
- printf("%3s %6s %6s\t%-20s\n", "cpu", "created", "destroyed", "name");
- printf("%3s %6s %6s\t%-20s\n", "---", "-------", "---------", "----");
- foreach my $pidhash (@cpus) {
- while ((my $pid, my $wqhash) = each %$pidhash) {
- my $created = $$wqhash{'created'} || 0;
- my $destroyed = $$wqhash{'destroyed'} || 0;
- my $comm = $$wqhash{'comm'} || "";
- if ($created || $destroyed) {
- printf("%3u %6u %6u\t%-20s\n", $cpu, $created, $destroyed,
- $comm);
- }
- }
- $cpu++;
- }
-
- print_unhandled();
-}
-
-my %unhandled;
-
-sub print_unhandled
-{
- if ((scalar keys %unhandled) == 0) {
- return;
- }
-
- print "\nunhandled events:\n\n";
-
- printf("%-40s %10s\n", "event", "count");
- printf("%-40s %10s\n", "----------------------------------------",
- "-----------");
-
- foreach my $event_name (keys %unhandled) {
- printf("%-40s %10d\n", $event_name, $unhandled{$event_name});
- }
-}
-
-sub trace_unhandled
-{
- my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm) = @_;
-
- $unhandled{$event_name}++;
-}
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 25638a9..bdcceb8 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -19,6 +19,11 @@
* permissions. All the event text files are stored there.
*/
+/*
+ * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
+ * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
+ */
+#define __SANE_USERSPACE_TYPES__
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
@@ -33,8 +38,6 @@
extern int verbose;
-bool test_attr__enabled;
-
static char *dir;
void test_attr__init(void)
@@ -146,7 +149,7 @@ static int run_dir(const char *d, const char *perf)
{
char cmd[3*PATH_MAX];
- snprintf(cmd, 3*PATH_MAX, "python %s/attr.py -d %s/attr/ -p %s %s",
+ snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %s",
d, d, perf, verbose ? "-v" : "");
return system(cmd);
diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
index e702b82..2f629ca 100644
--- a/tools/perf/tests/attr.py
+++ b/tools/perf/tests/attr.py
@@ -68,7 +68,7 @@ class Event(dict):
self[key] = val
def __init__(self, name, data, base):
- log.info(" Event %s" % name);
+ log.debug(" Event %s" % name);
self.name = name;
self.group = ''
self.add(base)
@@ -97,6 +97,14 @@ class Event(dict):
return False
return True
+ def diff(self, other):
+ for t in Event.terms:
+ if not self.has_key(t) or not other.has_key(t):
+ continue
+ if not self.compare_data(self[t], other[t]):
+ log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
+
+
# Test file description needs to have following sections:
# [config]
# - just single instance in file
@@ -113,7 +121,7 @@ class Test(object):
parser = ConfigParser.SafeConfigParser()
parser.read(path)
- log.warning("running '%s'" % path)
+ log.debug("running '%s'" % path)
self.path = path
self.test_dir = options.test_dir
@@ -128,7 +136,7 @@ class Test(object):
self.expect = {}
self.result = {}
- log.info(" loading expected events");
+ log.debug(" loading expected events");
self.load_events(path, self.expect)
def is_event(self, name):
@@ -164,7 +172,7 @@ class Test(object):
self.perf, self.command, tempdir, self.args)
ret = os.WEXITSTATUS(os.system(cmd))
- log.info(" running '%s' ret %d " % (cmd, ret))
+ log.warning(" running '%s' ret %d " % (cmd, ret))
if ret != int(self.ret):
raise Unsup(self)
@@ -172,7 +180,7 @@ class Test(object):
def compare(self, expect, result):
match = {}
- log.info(" compare");
+ log.debug(" compare");
# For each expected event find all matching
# events in result. Fail if there's not any.
@@ -187,10 +195,11 @@ class Test(object):
else:
log.debug(" ->FAIL");
- log.info(" match: [%s] matches %s" % (exp_name, str(exp_list)))
+ log.debug(" match: [%s] matches %s" % (exp_name, str(exp_list)))
# we did not any matching event - fail
if (not exp_list):
+ exp_event.diff(res_event)
raise Fail(self, 'match failure');
match[exp_name] = exp_list
@@ -208,10 +217,10 @@ class Test(object):
if res_group not in match[group]:
raise Fail(self, 'group failure')
- log.info(" group: [%s] matches group leader %s" %
+ log.debug(" group: [%s] matches group leader %s" %
(exp_name, str(match[group])))
- log.info(" matched")
+ log.debug(" matched")
def resolve_groups(self, events):
for name, event in events.items():
@@ -233,7 +242,7 @@ class Test(object):
self.run_cmd(tempdir);
# load events expectation for the test
- log.info(" loading result events");
+ log.debug(" loading result events");
for f in glob.glob(tempdir + '/event*'):
self.load_events(f, self.result);
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index f1485d8..5bc3880 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -7,7 +7,7 @@ size=96
config=0
sample_period=4000
sample_type=263
-read_format=7
+read_format=0
disabled=1
inherit=1
pinned=0
diff --git a/tools/perf/tests/attr/test-record-group b/tools/perf/tests/attr/test-record-group
index a6599e9..57739ca 100644
--- a/tools/perf/tests/attr/test-record-group
+++ b/tools/perf/tests/attr/test-record-group
@@ -6,12 +6,14 @@ args = --group -e cycles,instructions kill >/dev/null 2>&1
fd=1
group_fd=-1
sample_type=327
+read_format=4
[event-2:base-record]
fd=2
group_fd=1
config=1
sample_type=327
+read_format=4
mmap=0
comm=0
enable_on_exec=0
diff --git a/tools/perf/tests/attr/test-record-group1 b/tools/perf/tests/attr/test-record-group1
index 5a8359d..c5548d0 100644
--- a/tools/perf/tests/attr/test-record-group1
+++ b/tools/perf/tests/attr/test-record-group1
@@ -1,11 +1,12 @@
[config]
command = record
-args = -e '{cycles,instructions}' kill >/tmp/krava 2>&1
+args = -e '{cycles,instructions}' kill >/dev/null 2>&1
[event-1:base-record]
fd=1
group_fd=-1
sample_type=327
+read_format=4
[event-2:base-record]
fd=2
@@ -13,6 +14,7 @@ group_fd=1
type=0
config=1
sample_type=327
+read_format=4
mmap=0
comm=0
enable_on_exec=0
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 186f675..acb98e0 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -4,6 +4,7 @@
* Builtin regression testing command: ever growing number of sanity tests
*/
#include "builtin.h"
+#include "intlist.h"
#include "tests.h"
#include "debug.h"
#include "color.h"
@@ -69,6 +70,14 @@ static struct test {
.func = test__attr,
},
{
+ .desc = "Test matching and linking mutliple hists",
+ .func = test__hists_link,
+ },
+ {
+ .desc = "Try 'use perf' in python, checking link problems",
+ .func = test__python_use,
+ },
+ {
.func = NULL,
},
};
@@ -97,7 +106,7 @@ static bool perf_test__matches(int curr, int argc, const char *argv[])
return false;
}
-static int __cmd_test(int argc, const char *argv[])
+static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
{
int i = 0;
int width = 0;
@@ -118,13 +127,28 @@ static int __cmd_test(int argc, const char *argv[])
continue;
pr_info("%2d: %-*s:", i, width, tests[curr].desc);
+
+ if (intlist__find(skiplist, i)) {
+ color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
+ continue;
+ }
+
pr_debug("\n--- start ---\n");
err = tests[curr].func();
pr_debug("---- end ----\n%s:", tests[curr].desc);
- if (err)
- color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
- else
+
+ switch (err) {
+ case TEST_OK:
pr_info(" Ok\n");
+ break;
+ case TEST_SKIP:
+ color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
+ break;
+ case TEST_FAIL:
+ default:
+ color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
+ break;
+ }
}
return 0;
@@ -152,11 +176,14 @@ int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused)
"perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]",
NULL,
};
+ const char *skip = NULL;
const struct option test_options[] = {
+ OPT_STRING('s', "skip", &skip, "tests", "tests to skip"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
OPT_END()
};
+ struct intlist *skiplist = NULL;
argc = parse_options(argc, argv, test_options, test_usage, 0);
if (argc >= 1 && !strcmp(argv[0], "list"))
@@ -169,5 +196,8 @@ int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused)
if (symbol__init() < 0)
return -1;
- return __cmd_test(argc, argv);
+ if (skip != NULL)
+ skiplist = intlist__new(skip);
+
+ return __cmd_test(argc, argv, skiplist);
}
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index e61fc82..0fd99a9 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -22,7 +22,7 @@ static int perf_evsel__roundtrip_cache_name_test(void)
for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
__perf_evsel__hw_cache_type_op_res_name(type, op, i,
name, sizeof(name));
- err = parse_events(evlist, name, 0);
+ err = parse_events(evlist, name);
if (err)
ret = err;
}
@@ -70,7 +70,7 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names)
return -ENOMEM;
for (i = 0; i < nr_names; ++i) {
- err = parse_events(evlist, names[i], 0);
+ err = parse_events(evlist, names[i]);
if (err) {
pr_debug("failed to parse event '%s', err %d\n",
names[i], err);
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
new file mode 100644
index 0000000..1be64a6
--- /dev/null
+++ b/tools/perf/tests/hists_link.c
@@ -0,0 +1,500 @@
+#include "perf.h"
+#include "tests.h"
+#include "debug.h"
+#include "symbol.h"
+#include "sort.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "machine.h"
+#include "thread.h"
+#include "parse-events.h"
+
+static struct {
+ u32 pid;
+ const char *comm;
+} fake_threads[] = {
+ { 100, "perf" },
+ { 200, "perf" },
+ { 300, "bash" },
+};
+
+static struct {
+ u32 pid;
+ u64 start;
+ const char *filename;
+} fake_mmap_info[] = {
+ { 100, 0x40000, "perf" },
+ { 100, 0x50000, "libc" },
+ { 100, 0xf0000, "[kernel]" },
+ { 200, 0x40000, "perf" },
+ { 200, 0x50000, "libc" },
+ { 200, 0xf0000, "[kernel]" },
+ { 300, 0x40000, "bash" },
+ { 300, 0x50000, "libc" },
+ { 300, 0xf0000, "[kernel]" },
+};
+
+struct fake_sym {
+ u64 start;
+ u64 length;
+ const char *name;
+};
+
+static struct fake_sym perf_syms[] = {
+ { 700, 100, "main" },
+ { 800, 100, "run_command" },
+ { 900, 100, "cmd_record" },
+};
+
+static struct fake_sym bash_syms[] = {
+ { 700, 100, "main" },
+ { 800, 100, "xmalloc" },
+ { 900, 100, "xfree" },
+};
+
+static struct fake_sym libc_syms[] = {
+ { 700, 100, "malloc" },
+ { 800, 100, "free" },
+ { 900, 100, "realloc" },
+};
+
+static struct fake_sym kernel_syms[] = {
+ { 700, 100, "schedule" },
+ { 800, 100, "page_fault" },
+ { 900, 100, "sys_perf_event_open" },
+};
+
+static struct {
+ const char *dso_name;
+ struct fake_sym *syms;
+ size_t nr_syms;
+} fake_symbols[] = {
+ { "perf", perf_syms, ARRAY_SIZE(perf_syms) },
+ { "bash", bash_syms, ARRAY_SIZE(bash_syms) },
+ { "libc", libc_syms, ARRAY_SIZE(libc_syms) },
+ { "[kernel]", kernel_syms, ARRAY_SIZE(kernel_syms) },
+};
+
+static struct machine *setup_fake_machine(struct machines *machines)
+{
+ struct machine *machine = machines__find(machines, HOST_KERNEL_ID);
+ size_t i;
+
+ if (machine == NULL) {
+ pr_debug("Not enough memory for machine setup\n");
+ return NULL;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(fake_threads); i++) {
+ struct thread *thread;
+
+ thread = machine__findnew_thread(machine, fake_threads[i].pid);
+ if (thread == NULL)
+ goto out;
+
+ thread__set_comm(thread, fake_threads[i].comm);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
+ union perf_event fake_mmap_event = {
+ .mmap = {
+ .header = { .misc = PERF_RECORD_MISC_USER, },
+ .pid = fake_mmap_info[i].pid,
+ .start = fake_mmap_info[i].start,
+ .len = 0x1000ULL,
+ .pgoff = 0ULL,
+ },
+ };
+
+ strcpy(fake_mmap_event.mmap.filename,
+ fake_mmap_info[i].filename);
+
+ machine__process_mmap_event(machine, &fake_mmap_event);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {
+ size_t k;
+ struct dso *dso;
+
+ dso = __dsos__findnew(&machine->user_dsos,
+ fake_symbols[i].dso_name);
+ if (dso == NULL)
+ goto out;
+
+ /* emulate dso__load() */
+ dso__set_loaded(dso, MAP__FUNCTION);
+
+ for (k = 0; k < fake_symbols[i].nr_syms; k++) {
+ struct symbol *sym;
+ struct fake_sym *fsym = &fake_symbols[i].syms[k];
+
+ sym = symbol__new(fsym->start, fsym->length,
+ STB_GLOBAL, fsym->name);
+ if (sym == NULL)
+ goto out;
+
+ symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
+ }
+ }
+
+ return machine;
+
+out:
+ pr_debug("Not enough memory for machine setup\n");
+ machine__delete_threads(machine);
+ machine__delete(machine);
+ return NULL;
+}
+
+struct sample {
+ u32 pid;
+ u64 ip;
+ struct thread *thread;
+ struct map *map;
+ struct symbol *sym;
+};
+
+static struct sample fake_common_samples[] = {
+ /* perf [kernel] schedule() */
+ { .pid = 100, .ip = 0xf0000 + 700, },
+ /* perf [perf] main() */
+ { .pid = 200, .ip = 0x40000 + 700, },
+ /* perf [perf] cmd_record() */
+ { .pid = 200, .ip = 0x40000 + 900, },
+ /* bash [bash] xmalloc() */
+ { .pid = 300, .ip = 0x40000 + 800, },
+ /* bash [libc] malloc() */
+ { .pid = 300, .ip = 0x50000 + 700, },
+};
+
+static struct sample fake_samples[][5] = {
+ {
+ /* perf [perf] run_command() */
+ { .pid = 100, .ip = 0x40000 + 800, },
+ /* perf [libc] malloc() */
+ { .pid = 100, .ip = 0x50000 + 700, },
+ /* perf [kernel] page_fault() */
+ { .pid = 100, .ip = 0xf0000 + 800, },
+ /* perf [kernel] sys_perf_event_open() */
+ { .pid = 200, .ip = 0xf0000 + 900, },
+ /* bash [libc] free() */
+ { .pid = 300, .ip = 0x50000 + 800, },
+ },
+ {
+ /* perf [libc] free() */
+ { .pid = 200, .ip = 0x50000 + 800, },
+ /* bash [libc] malloc() */
+ { .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */
+ /* bash [bash] xfee() */
+ { .pid = 300, .ip = 0x40000 + 900, },
+ /* bash [libc] realloc() */
+ { .pid = 300, .ip = 0x50000 + 900, },
+ /* bash [kernel] page_fault() */
+ { .pid = 300, .ip = 0xf0000 + 800, },
+ },
+};
+
+static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
+{
+ struct perf_evsel *evsel;
+ struct addr_location al;
+ struct hist_entry *he;
+ struct perf_sample sample = { .cpu = 0, };
+ size_t i = 0, k;
+
+ /*
+ * each evsel will have 10 samples - 5 common and 5 distinct.
+ * However the second evsel also has a collapsed entry for
+ * "bash [libc] malloc" so total 9 entries will be in the tree.
+ */
+ list_for_each_entry(evsel, &evlist->entries, node) {
+ for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) {
+ const union perf_event event = {
+ .ip = {
+ .header = {
+ .misc = PERF_RECORD_MISC_USER,
+ },
+ .pid = fake_common_samples[k].pid,
+ .ip = fake_common_samples[k].ip,
+ },
+ };
+
+ if (perf_event__preprocess_sample(&event, machine, &al,
+ &sample, 0) < 0)
+ goto out;
+
+ he = __hists__add_entry(&evsel->hists, &al, NULL, 1);
+ if (he == NULL)
+ goto out;
+
+ fake_common_samples[k].thread = al.thread;
+ fake_common_samples[k].map = al.map;
+ fake_common_samples[k].sym = al.sym;
+ }
+
+ for (k = 0; k < ARRAY_SIZE(fake_samples[i]); k++) {
+ const union perf_event event = {
+ .ip = {
+ .header = {
+ .misc = PERF_RECORD_MISC_USER,
+ },
+ .pid = fake_samples[i][k].pid,
+ .ip = fake_samples[i][k].ip,
+ },
+ };
+
+ if (perf_event__preprocess_sample(&event, machine, &al,
+ &sample, 0) < 0)
+ goto out;
+
+ he = __hists__add_entry(&evsel->hists, &al, NULL, 1);
+ if (he == NULL)
+ goto out;
+
+ fake_samples[i][k].thread = al.thread;
+ fake_samples[i][k].map = al.map;
+ fake_samples[i][k].sym = al.sym;
+ }
+ i++;
+ }
+
+ return 0;
+
+out:
+ pr_debug("Not enough memory for adding a hist entry\n");
+ return -1;
+}
+
+static int find_sample(struct sample *samples, size_t nr_samples,
+ struct thread *t, struct map *m, struct symbol *s)
+{
+ while (nr_samples--) {
+ if (samples->thread == t && samples->map == m &&
+ samples->sym == s)
+ return 1;
+ samples++;
+ }
+ return 0;
+}
+
+static int __validate_match(struct hists *hists)
+{
+ size_t count = 0;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ /*
+ * Only entries from fake_common_samples should have a pair.
+ */
+ if (sort__need_collapse)
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ node = rb_first(root);
+ while (node) {
+ struct hist_entry *he;
+
+ he = rb_entry(node, struct hist_entry, rb_node_in);
+
+ if (hist_entry__has_pairs(he)) {
+ if (find_sample(fake_common_samples,
+ ARRAY_SIZE(fake_common_samples),
+ he->thread, he->ms.map, he->ms.sym)) {
+ count++;
+ } else {
+ pr_debug("Can't find the matched entry\n");
+ return -1;
+ }
+ }
+
+ node = rb_next(node);
+ }
+
+ if (count != ARRAY_SIZE(fake_common_samples)) {
+ pr_debug("Invalid count for matched entries: %zd of %zd\n",
+ count, ARRAY_SIZE(fake_common_samples));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int validate_match(struct hists *leader, struct hists *other)
+{
+ return __validate_match(leader) || __validate_match(other);
+}
+
+static int __validate_link(struct hists *hists, int idx)
+{
+ size_t count = 0;
+ size_t count_pair = 0;
+ size_t count_dummy = 0;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ /*
+ * Leader hists (idx = 0) will have dummy entries from other,
+ * and some entries will have no pair. However every entry
+ * in other hists should have (dummy) pair.
+ */
+ if (sort__need_collapse)
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ node = rb_first(root);
+ while (node) {
+ struct hist_entry *he;
+
+ he = rb_entry(node, struct hist_entry, rb_node_in);
+
+ if (hist_entry__has_pairs(he)) {
+ if (!find_sample(fake_common_samples,
+ ARRAY_SIZE(fake_common_samples),
+ he->thread, he->ms.map, he->ms.sym) &&
+ !find_sample(fake_samples[idx],
+ ARRAY_SIZE(fake_samples[idx]),
+ he->thread, he->ms.map, he->ms.sym)) {
+ count_dummy++;
+ }
+ count_pair++;
+ } else if (idx) {
+ pr_debug("A entry from the other hists should have pair\n");
+ return -1;
+ }
+
+ count++;
+ node = rb_next(node);
+ }
+
+ /*
+ * Note that we have a entry collapsed in the other (idx = 1) hists.
+ */
+ if (idx == 0) {
+ if (count_dummy != ARRAY_SIZE(fake_samples[1]) - 1) {
+ pr_debug("Invalid count of dummy entries: %zd of %zd\n",
+ count_dummy, ARRAY_SIZE(fake_samples[1]) - 1);
+ return -1;
+ }
+ if (count != count_pair + ARRAY_SIZE(fake_samples[0])) {
+ pr_debug("Invalid count of total leader entries: %zd of %zd\n",
+ count, count_pair + ARRAY_SIZE(fake_samples[0]));
+ return -1;
+ }
+ } else {
+ if (count != count_pair) {
+ pr_debug("Invalid count of total other entries: %zd of %zd\n",
+ count, count_pair);
+ return -1;
+ }
+ if (count_dummy > 0) {
+ pr_debug("Other hists should not have dummy entries: %zd\n",
+ count_dummy);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int validate_link(struct hists *leader, struct hists *other)
+{
+ return __validate_link(leader, 0) || __validate_link(other, 1);
+}
+
+static void print_hists(struct hists *hists)
+{
+ int i = 0;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ if (sort__need_collapse)
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ pr_info("----- %s --------\n", __func__);
+ node = rb_first(root);
+ while (node) {
+ struct hist_entry *he;
+
+ he = rb_entry(node, struct hist_entry, rb_node_in);
+
+ pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n",
+ i, he->thread->comm, he->ms.map->dso->short_name,
+ he->ms.sym->name, he->stat.period);
+
+ i++;
+ node = rb_next(node);
+ }
+}
+
+int test__hists_link(void)
+{
+ int err = -1;
+ struct machines machines;
+ struct machine *machine = NULL;
+ struct perf_evsel *evsel, *first;
+ struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
+
+ if (evlist == NULL)
+ return -ENOMEM;
+
+ err = parse_events(evlist, "cpu-clock");
+ if (err)
+ goto out;
+ err = parse_events(evlist, "task-clock");
+ if (err)
+ goto out;
+
+ /* default sort order (comm,dso,sym) will be used */
+ if (setup_sorting() < 0)
+ goto out;
+
+ machines__init(&machines);
+
+ /* setup threads/dso/map/symbols also */
+ machine = setup_fake_machine(&machines);
+ if (!machine)
+ goto out;
+
+ if (verbose > 1)
+ machine__fprintf(machine, stderr);
+
+ /* process sample events */
+ err = add_hist_entries(evlist, machine);
+ if (err < 0)
+ goto out;
+
+ list_for_each_entry(evsel, &evlist->entries, node) {
+ hists__collapse_resort(&evsel->hists);
+
+ if (verbose > 2)
+ print_hists(&evsel->hists);
+ }
+
+ first = perf_evlist__first(evlist);
+ evsel = perf_evlist__last(evlist);
+
+ /* match common entries */
+ hists__match(&first->hists, &evsel->hists);
+ err = validate_match(&first->hists, &evsel->hists);
+ if (err)
+ goto out;
+
+ /* link common and/or dummy entries */
+ hists__link(&first->hists, &evsel->hists);
+ err = validate_link(&first->hists, &evsel->hists);
+ if (err)
+ goto out;
+
+ err = 0;
+
+out:
+ /* tear down everything */
+ perf_evlist__delete(evlist);
+ machines__exit(&machines);
+
+ return err;
+}
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index e174681..cdd5075 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -22,36 +22,16 @@ int test__basic_mmap(void)
struct thread_map *threads;
struct cpu_map *cpus;
struct perf_evlist *evlist;
- struct perf_event_attr attr = {
- .type = PERF_TYPE_TRACEPOINT,
- .read_format = PERF_FORMAT_ID,
- .sample_type = PERF_SAMPLE_ID,
- .watermark = 0,
- };
cpu_set_t cpu_set;
const char *syscall_names[] = { "getsid", "getppid", "getpgrp",
"getpgid", };
pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp,
(void*)getpgid };
#define nsyscalls ARRAY_SIZE(syscall_names)
- int ids[nsyscalls];
unsigned int nr_events[nsyscalls],
expected_nr_events[nsyscalls], i, j;
struct perf_evsel *evsels[nsyscalls], *evsel;
- for (i = 0; i < nsyscalls; ++i) {
- char name[64];
-
- snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
- ids[i] = trace_event__id(name);
- if (ids[i] < 0) {
- pr_debug("Is debugfs mounted on /sys/kernel/debug?\n");
- return -1;
- }
- nr_events[i] = 0;
- expected_nr_events[i] = random() % 257;
- }
-
threads = thread_map__new(-1, getpid(), UINT_MAX);
if (threads == NULL) {
pr_debug("thread_map__new\n");
@@ -79,18 +59,19 @@ int test__basic_mmap(void)
goto out_free_cpus;
}
- /* anonymous union fields, can't be initialized above */
- attr.wakeup_events = 1;
- attr.sample_period = 1;
-
for (i = 0; i < nsyscalls; ++i) {
- attr.config = ids[i];
- evsels[i] = perf_evsel__new(&attr, i);
+ char name[64];
+
+ snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
+ evsels[i] = perf_evsel__newtp("syscalls", name, i);
if (evsels[i] == NULL) {
pr_debug("perf_evsel__new\n");
goto out_free_evlist;
}
+ evsels[i]->attr.wakeup_events = 1;
+ perf_evsel__set_sample_id(evsels[i]);
+
perf_evlist__add(evlist, evsels[i]);
if (perf_evsel__open(evsels[i], cpus, threads) < 0) {
@@ -99,6 +80,9 @@ int test__basic_mmap(void)
strerror(errno));
goto out_close_fd;
}
+
+ nr_events[i] = 0;
+ expected_nr_events[i] = 1 + rand() % 127;
}
if (perf_evlist__mmap(evlist, 128, true) < 0) {
@@ -128,6 +112,7 @@ int test__basic_mmap(void)
goto out_munmap;
}
+ err = -1;
evsel = perf_evlist__id2evsel(evlist, sample.id);
if (evsel == NULL) {
pr_debug("event with id %" PRIu64
@@ -137,16 +122,17 @@ int test__basic_mmap(void)
nr_events[evsel->idx]++;
}
+ err = 0;
list_for_each_entry(evsel, &evlist->entries, node) {
if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
pr_debug("expected %d %s events, got %d\n",
expected_nr_events[evsel->idx],
perf_evsel__name(evsel), nr_events[evsel->idx]);
+ err = -1;
goto out_munmap;
}
}
- err = 0;
out_munmap:
perf_evlist__munmap(evlist);
out_close_fd:
diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/open-syscall-all-cpus.c
index 31072ab..b0657a9 100644
--- a/tools/perf/tests/open-syscall-all-cpus.c
+++ b/tools/perf/tests/open-syscall-all-cpus.c
@@ -7,20 +7,12 @@
int test__open_syscall_event_on_all_cpus(void)
{
int err = -1, fd, cpu;
- struct thread_map *threads;
struct cpu_map *cpus;
struct perf_evsel *evsel;
- struct perf_event_attr attr;
unsigned int nr_open_calls = 111, i;
cpu_set_t cpu_set;
- int id = trace_event__id("sys_enter_open");
+ struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
- if (id < 0) {
- pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
- return -1;
- }
-
- threads = thread_map__new(-1, getpid(), UINT_MAX);
if (threads == NULL) {
pr_debug("thread_map__new\n");
return -1;
@@ -32,15 +24,11 @@ int test__open_syscall_event_on_all_cpus(void)
goto out_thread_map_delete;
}
-
CPU_ZERO(&cpu_set);
- memset(&attr, 0, sizeof(attr));
- attr.type = PERF_TYPE_TRACEPOINT;
- attr.config = id;
- evsel = perf_evsel__new(&attr, 0);
+ evsel = perf_evsel__newtp("syscalls", "sys_enter_open", 0);
if (evsel == NULL) {
- pr_debug("perf_evsel__new\n");
+ pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
goto out_thread_map_delete;
}
@@ -110,6 +98,7 @@ int test__open_syscall_event_on_all_cpus(void)
}
}
+ perf_evsel__free_counts(evsel);
out_close_fd:
perf_evsel__close_fd(evsel, 1, threads->nr);
out_evsel_delete:
diff --git a/tools/perf/tests/open-syscall.c b/tools/perf/tests/open-syscall.c
index 98be8b5..befc067 100644
--- a/tools/perf/tests/open-syscall.c
+++ b/tools/perf/tests/open-syscall.c
@@ -6,29 +6,18 @@
int test__open_syscall_event(void)
{
int err = -1, fd;
- struct thread_map *threads;
struct perf_evsel *evsel;
- struct perf_event_attr attr;
unsigned int nr_open_calls = 111, i;
- int id = trace_event__id("sys_enter_open");
+ struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
- if (id < 0) {
- pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
- return -1;
- }
-
- threads = thread_map__new(-1, getpid(), UINT_MAX);
if (threads == NULL) {
pr_debug("thread_map__new\n");
return -1;
}
- memset(&attr, 0, sizeof(attr));
- attr.type = PERF_TYPE_TRACEPOINT;
- attr.config = id;
- evsel = perf_evsel__new(&attr, 0);
+ evsel = perf_evsel__newtp("syscalls", "sys_enter_open", 0);
if (evsel == NULL) {
- pr_debug("perf_evsel__new\n");
+ pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
goto out_thread_map_delete;
}
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 32ee478..c5636f3 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -3,6 +3,7 @@
#include "evsel.h"
#include "evlist.h"
#include "sysfs.h"
+#include "debugfs.h"
#include "tests.h"
#include <linux/hw_breakpoint.h>
@@ -22,6 +23,7 @@ static int test__checkevent_tracepoint(struct perf_evlist *evlist)
struct perf_evsel *evsel = perf_evlist__first(evlist);
TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
TEST_ASSERT_VAL("wrong sample_type",
PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type);
@@ -34,6 +36,7 @@ static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist)
struct perf_evsel *evsel;
TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
+ TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups);
list_for_each_entry(evsel, &evlist->entries, node) {
TEST_ASSERT_VAL("wrong type",
@@ -463,10 +466,10 @@ static int test__checkevent_pmu_events(struct perf_evlist *evlist)
static int test__checkterms_simple(struct list_head *terms)
{
- struct parse_events__term *term;
+ struct parse_events_term *term;
/* config=10 */
- term = list_entry(terms->next, struct parse_events__term, list);
+ term = list_entry(terms->next, struct parse_events_term, list);
TEST_ASSERT_VAL("wrong type term",
term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG);
TEST_ASSERT_VAL("wrong type val",
@@ -475,7 +478,7 @@ static int test__checkterms_simple(struct list_head *terms)
TEST_ASSERT_VAL("wrong config", !term->config);
/* config1 */
- term = list_entry(term->list.next, struct parse_events__term, list);
+ term = list_entry(term->list.next, struct parse_events_term, list);
TEST_ASSERT_VAL("wrong type term",
term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG1);
TEST_ASSERT_VAL("wrong type val",
@@ -484,7 +487,7 @@ static int test__checkterms_simple(struct list_head *terms)
TEST_ASSERT_VAL("wrong config", !term->config);
/* config2=3 */
- term = list_entry(term->list.next, struct parse_events__term, list);
+ term = list_entry(term->list.next, struct parse_events_term, list);
TEST_ASSERT_VAL("wrong type term",
term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG2);
TEST_ASSERT_VAL("wrong type val",
@@ -493,7 +496,7 @@ static int test__checkterms_simple(struct list_head *terms)
TEST_ASSERT_VAL("wrong config", !term->config);
/* umask=1*/
- term = list_entry(term->list.next, struct parse_events__term, list);
+ term = list_entry(term->list.next, struct parse_events_term, list);
TEST_ASSERT_VAL("wrong type term",
term->type_term == PARSE_EVENTS__TERM_TYPE_USER);
TEST_ASSERT_VAL("wrong type val",
@@ -509,6 +512,7 @@ static int test__group1(struct perf_evlist *evlist)
struct perf_evsel *evsel, *leader;
TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
/* instructions:k */
evsel = leader = perf_evlist__first(evlist);
@@ -521,7 +525,9 @@ static int test__group1(struct perf_evlist *evlist)
TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
/* cycles:upp */
evsel = perf_evsel__next(evsel);
@@ -536,6 +542,7 @@ static int test__group1(struct perf_evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
return 0;
}
@@ -545,6 +552,7 @@ static int test__group2(struct perf_evlist *evlist)
struct perf_evsel *evsel, *leader;
TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
/* faults + :ku modifier */
evsel = leader = perf_evlist__first(evlist);
@@ -557,7 +565,9 @@ static int test__group2(struct perf_evlist *evlist)
TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
/* cache-references + :u modifier */
evsel = perf_evsel__next(evsel);
@@ -567,10 +577,11 @@ static int test__group2(struct perf_evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
/* cycles:k */
evsel = perf_evsel__next(evsel);
@@ -583,7 +594,7 @@ static int test__group2(struct perf_evlist *evlist)
TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
return 0;
}
@@ -593,6 +604,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
struct perf_evsel *evsel, *leader;
TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
/* group1 syscalls:sys_enter_open:H */
evsel = leader = perf_evlist__first(evlist);
@@ -606,9 +618,11 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong group name",
!strcmp(leader->group_name, "group1"));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
/* group1 cycles:kppp */
evsel = perf_evsel__next(evsel);
@@ -624,6 +638,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 3);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
/* group2 cycles + G modifier */
evsel = leader = perf_evsel__next(evsel);
@@ -636,9 +651,11 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong group name",
!strcmp(leader->group_name, "group2"));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
/* group2 1:3 + G modifier */
evsel = perf_evsel__next(evsel);
@@ -651,6 +668,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
/* instructions:u */
evsel = perf_evsel__next(evsel);
@@ -663,7 +681,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
return 0;
}
@@ -673,6 +691,7 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused)
struct perf_evsel *evsel, *leader;
TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
/* cycles:u + p */
evsel = leader = perf_evlist__first(evlist);
@@ -687,7 +706,9 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 1);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
/* instructions:kp + p */
evsel = perf_evsel__next(evsel);
@@ -702,6 +723,7 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
return 0;
}
@@ -711,6 +733,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
struct perf_evsel *evsel, *leader;
TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
/* cycles + G */
evsel = leader = perf_evlist__first(evlist);
@@ -724,7 +747,9 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
/* instructions + G */
evsel = perf_evsel__next(evsel);
@@ -738,6 +763,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
/* cycles:G */
evsel = leader = perf_evsel__next(evsel);
@@ -751,7 +777,9 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
/* instructions:G */
evsel = perf_evsel__next(evsel);
@@ -765,6 +793,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
/* cycles */
evsel = perf_evsel__next(evsel);
@@ -777,18 +806,235 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+
+ return 0;
+}
+
+static int test__group_gh1(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+ /* cycles + :H group modifier */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+
+ /* cache-misses:G + :H group modifier */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+
+ return 0;
+}
+
+static int test__group_gh2(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+ /* cycles + :G group modifier */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+
+ /* cache-misses:H + :G group modifier */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+
+ return 0;
+}
+
+static int test__group_gh3(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+ /* cycles:G + :u group modifier */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+
+ /* cache-misses:H + :u group modifier */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+
+ return 0;
+}
+
+static int test__group_gh4(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+ /* cycles:G + :uG group modifier */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+
+ /* cache-misses:H + :uG group modifier */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
return 0;
}
-struct test__event_st {
+static int count_tracepoints(void)
+{
+ char events_path[PATH_MAX];
+ struct dirent *events_ent;
+ DIR *events_dir;
+ int cnt = 0;
+
+ scnprintf(events_path, PATH_MAX, "%s/tracing/events",
+ debugfs_find_mountpoint());
+
+ events_dir = opendir(events_path);
+
+ TEST_ASSERT_VAL("Can't open events dir", events_dir);
+
+ while ((events_ent = readdir(events_dir))) {
+ char sys_path[PATH_MAX];
+ struct dirent *sys_ent;
+ DIR *sys_dir;
+
+ if (!strcmp(events_ent->d_name, ".")
+ || !strcmp(events_ent->d_name, "..")
+ || !strcmp(events_ent->d_name, "enable")
+ || !strcmp(events_ent->d_name, "header_event")
+ || !strcmp(events_ent->d_name, "header_page"))
+ continue;
+
+ scnprintf(sys_path, PATH_MAX, "%s/%s",
+ events_path, events_ent->d_name);
+
+ sys_dir = opendir(sys_path);
+ TEST_ASSERT_VAL("Can't open sys dir", sys_dir);
+
+ while ((sys_ent = readdir(sys_dir))) {
+ if (!strcmp(sys_ent->d_name, ".")
+ || !strcmp(sys_ent->d_name, "..")
+ || !strcmp(sys_ent->d_name, "enable")
+ || !strcmp(sys_ent->d_name, "filter"))
+ continue;
+
+ cnt++;
+ }
+
+ closedir(sys_dir);
+ }
+
+ closedir(events_dir);
+ return cnt;
+}
+
+static int test__all_tracepoints(struct perf_evlist *evlist)
+{
+ TEST_ASSERT_VAL("wrong events count",
+ count_tracepoints() == evlist->nr_entries);
+
+ return test__checkevent_tracepoint_multi(evlist);
+}
+
+struct evlist_test {
const char *name;
__u32 type;
int (*check)(struct perf_evlist *evlist);
};
-static struct test__event_st test__events[] = {
+static struct evlist_test test__events[] = {
[0] = {
.name = "syscalls:sys_enter_open",
.check = test__checkevent_tracepoint,
@@ -921,9 +1167,29 @@ static struct test__event_st test__events[] = {
.name = "{cycles,instructions}:G,{cycles:G,instructions:G},cycles",
.check = test__group5,
},
+ [33] = {
+ .name = "*:*",
+ .check = test__all_tracepoints,
+ },
+ [34] = {
+ .name = "{cycles,cache-misses:G}:H",
+ .check = test__group_gh1,
+ },
+ [35] = {
+ .name = "{cycles,cache-misses:H}:G",
+ .check = test__group_gh2,
+ },
+ [36] = {
+ .name = "{cycles:G,cache-misses:H}:u",
+ .check = test__group_gh3,
+ },
+ [37] = {
+ .name = "{cycles:G,cache-misses:H}:uG",
+ .check = test__group_gh4,
+ },
};
-static struct test__event_st test__events_pmu[] = {
+static struct evlist_test test__events_pmu[] = {
[0] = {
.name = "cpu/config=10,config1,config2=3,period=1000/u",
.check = test__checkevent_pmu,
@@ -934,20 +1200,20 @@ static struct test__event_st test__events_pmu[] = {
},
};
-struct test__term {
+struct terms_test {
const char *str;
__u32 type;
int (*check)(struct list_head *terms);
};
-static struct test__term test__terms[] = {
+static struct terms_test test__terms[] = {
[0] = {
.str = "config=10,config1,config2=3,umask=1",
.check = test__checkterms_simple,
},
};
-static int test_event(struct test__event_st *e)
+static int test_event(struct evlist_test *e)
{
struct perf_evlist *evlist;
int ret;
@@ -956,7 +1222,7 @@ static int test_event(struct test__event_st *e)
if (evlist == NULL)
return -ENOMEM;
- ret = parse_events(evlist, e->name, 0);
+ ret = parse_events(evlist, e->name);
if (ret) {
pr_debug("failed to parse event '%s', err %d\n",
e->name, ret);
@@ -969,13 +1235,13 @@ static int test_event(struct test__event_st *e)
return ret;
}
-static int test_events(struct test__event_st *events, unsigned cnt)
+static int test_events(struct evlist_test *events, unsigned cnt)
{
int ret1, ret2 = 0;
unsigned i;
for (i = 0; i < cnt; i++) {
- struct test__event_st *e = &events[i];
+ struct evlist_test *e = &events[i];
pr_debug("running test %d '%s'\n", i, e->name);
ret1 = test_event(e);
@@ -986,7 +1252,7 @@ static int test_events(struct test__event_st *events, unsigned cnt)
return ret2;
}
-static int test_term(struct test__term *t)
+static int test_term(struct terms_test *t)
{
struct list_head *terms;
int ret;
@@ -1010,13 +1276,13 @@ static int test_term(struct test__term *t)
return ret;
}
-static int test_terms(struct test__term *terms, unsigned cnt)
+static int test_terms(struct terms_test *terms, unsigned cnt)
{
int ret = 0;
unsigned i;
for (i = 0; i < cnt; i++) {
- struct test__term *t = &terms[i];
+ struct terms_test *t = &terms[i];
pr_debug("running test %d '%s'\n", i, t->str);
ret = test_term(t);
@@ -1067,7 +1333,7 @@ static int test_pmu_events(void)
while (!ret && (ent = readdir(dir))) {
#define MAX_NAME 100
- struct test__event_st e;
+ struct evlist_test e;
char name[MAX_NAME];
if (!strcmp(ent->d_name, ".") ||
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 70e0d44..1e8e512 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -96,22 +96,22 @@ int test__PERF_RECORD(void)
err = perf_evlist__prepare_workload(evlist, &opts, argv);
if (err < 0) {
pr_debug("Couldn't run the workload!\n");
- goto out_delete_evlist;
+ goto out_delete_maps;
}
/*
* Config the evsels, setting attr->comm on the first one, etc.
*/
evsel = perf_evlist__first(evlist);
- evsel->attr.sample_type |= PERF_SAMPLE_CPU;
- evsel->attr.sample_type |= PERF_SAMPLE_TID;
- evsel->attr.sample_type |= PERF_SAMPLE_TIME;
- perf_evlist__config_attrs(evlist, &opts);
+ perf_evsel__set_sample_bit(evsel, CPU);
+ perf_evsel__set_sample_bit(evsel, TID);
+ perf_evsel__set_sample_bit(evsel, TIME);
+ perf_evlist__config(evlist, &opts);
err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
if (err < 0) {
pr_debug("sched__get_first_possible_cpu: %s\n", strerror(errno));
- goto out_delete_evlist;
+ goto out_delete_maps;
}
cpu = err;
@@ -121,7 +121,7 @@ int test__PERF_RECORD(void)
*/
if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
pr_debug("sched_setaffinity: %s\n", strerror(errno));
- goto out_delete_evlist;
+ goto out_delete_maps;
}
/*
@@ -131,7 +131,7 @@ int test__PERF_RECORD(void)
err = perf_evlist__open(evlist);
if (err < 0) {
pr_debug("perf_evlist__open: %s\n", strerror(errno));
- goto out_delete_evlist;
+ goto out_delete_maps;
}
/*
@@ -142,7 +142,7 @@ int test__PERF_RECORD(void)
err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
if (err < 0) {
pr_debug("perf_evlist__mmap: %s\n", strerror(errno));
- goto out_delete_evlist;
+ goto out_delete_maps;
}
/*
@@ -305,6 +305,8 @@ found_exit:
}
out_err:
perf_evlist__munmap(evlist);
+out_delete_maps:
+ perf_evlist__delete_maps(evlist);
out_delete_evlist:
perf_evlist__delete(evlist);
out:
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index a5f3798..12b322f 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -19,10 +19,8 @@ static struct test_format {
{ "krava23", "config2:28-29,38\n", },
};
-#define TEST_FORMATS_CNT (sizeof(test_formats) / sizeof(struct test_format))
-
/* Simulated users input. */
-static struct parse_events__term test_terms[] = {
+static struct parse_events_term test_terms[] = {
{
.config = (char *) "krava01",
.val.num = 15,
@@ -78,7 +76,6 @@ static struct parse_events__term test_terms[] = {
.type_term = PARSE_EVENTS__TERM_TYPE_USER,
},
};
-#define TERMS_CNT (sizeof(test_terms) / sizeof(struct parse_events__term))
/*
* Prepare format directory data, exported by kernel
@@ -93,7 +90,7 @@ static char *test_format_dir_get(void)
if (!mkdtemp(dir))
return NULL;
- for (i = 0; i < TEST_FORMATS_CNT; i++) {
+ for (i = 0; i < ARRAY_SIZE(test_formats); i++) {
static char name[PATH_MAX];
struct test_format *format = &test_formats[i];
FILE *file;
@@ -130,14 +127,12 @@ static struct list_head *test_terms_list(void)
static LIST_HEAD(terms);
unsigned int i;
- for (i = 0; i < TERMS_CNT; i++)
+ for (i = 0; i < ARRAY_SIZE(test_terms); i++)
list_add_tail(&test_terms[i].list, &terms);
return &terms;
}
-#undef TERMS_CNT
-
int test__pmu(void)
{
char *format = test_format_dir_get();
diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c
new file mode 100644
index 0000000..7760277
--- /dev/null
+++ b/tools/perf/tests/python-use.c
@@ -0,0 +1,23 @@
+/*
+ * Just test if we can load the python binding.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "tests.h"
+
+extern int verbose;
+
+int test__python_use(void)
+{
+ char *cmd;
+ int ret;
+
+ if (asprintf(&cmd, "echo \"import sys ; sys.path.append('%s'); import perf\" | %s %s",
+ PYTHONPATH, PYTHON, verbose ? "" : "2> /dev/null") < 0)
+ return -1;
+
+ ret = system(cmd) ? -1 : 0;
+ free(cmd);
+ return ret;
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index fc121ed..5de0be1 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -1,6 +1,12 @@
#ifndef TESTS_H
#define TESTS_H
+enum {
+ TEST_OK = 0,
+ TEST_FAIL = -1,
+ TEST_SKIP = -2,
+};
+
/* Tests */
int test__vmlinux_matches_kallsyms(void);
int test__open_syscall_event(void);
@@ -15,8 +21,7 @@ int test__pmu(void);
int test__attr(void);
int test__dso_data(void);
int test__parse_events(void);
-
-/* Util */
-int trace_event__id(const char *evname);
+int test__hists_link(void);
+int test__python_use(void);
#endif /* TESTS_H */
diff --git a/tools/perf/tests/util.c b/tools/perf/tests/util.c
deleted file mode 100644
index 748f2e8..0000000
--- a/tools/perf/tests/util.c
+++ /dev/null
@@ -1,30 +0,0 @@
-#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "tests.h"
-#include "debugfs.h"
-
-int trace_event__id(const char *evname)
-{
- char *filename;
- int err = -1, fd;
-
- if (asprintf(&filename,
- "%s/syscalls/%s/id",
- tracing_events_path, evname) < 0)
- return -1;
-
- fd = open(filename, O_RDONLY);
- if (fd >= 0) {
- char id[16];
- if (read(fd, id, sizeof(id)) > 0)
- err = atoi(id);
- close(fd);
- }
-
- free(filename);
- return err;
-}
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 0d1cdbe..7b4c4d2 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -44,7 +44,7 @@ int test__vmlinux_matches_kallsyms(void)
*/
if (machine__create_kernel_maps(&kallsyms) < 0) {
pr_debug("machine__create_kernel_maps ");
- return -1;
+ goto out;
}
/*
@@ -101,7 +101,8 @@ int test__vmlinux_matches_kallsyms(void)
*/
if (machine__load_vmlinux_path(&vmlinux, type,
vmlinux_matches_kallsyms_filter) <= 0) {
- pr_debug("machine__load_vmlinux_path ");
+ pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
+ err = TEST_SKIP;
goto out;
}
@@ -226,5 +227,7 @@ detour:
map__fprintf(pos, stderr);
}
out:
+ machine__exit(&kallsyms);
+ machine__exit(&vmlinux);
return err;
}
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index 4aeb7d5..809ea463 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -273,6 +273,8 @@ void ui_browser__hide(struct ui_browser *browser __maybe_unused)
{
pthread_mutex_lock(&ui__lock);
ui_helpline__pop();
+ free(browser->helpline);
+ browser->helpline = NULL;
pthread_mutex_unlock(&ui__lock);
}
@@ -471,7 +473,7 @@ unsigned int ui_browser__list_head_refresh(struct ui_browser *browser)
return row;
}
-static struct ui_browser__colorset {
+static struct ui_browser_colorset {
const char *name, *fg, *bg;
int colorset;
} ui_browser__colorsets[] = {
@@ -706,7 +708,7 @@ void ui_browser__init(void)
perf_config(ui_browser__color_config, NULL);
while (ui_browser__colorsets[i].name) {
- struct ui_browser__colorset *c = &ui_browser__colorsets[i++];
+ struct ui_browser_colorset *c = &ui_browser__colorsets[i++];
sltt_set_color(c->colorset, c->name, c->fg, c->bg);
}
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 5dab3ca..7dca155 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -182,6 +182,16 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
ab->selection = dl;
}
+static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym)
+{
+ if (!dl || !dl->ins || !ins__is_jump(dl->ins)
+ || !disasm_line__has_offset(dl)
+ || dl->ops.target.offset >= symbol__size(sym))
+ return false;
+
+ return true;
+}
+
static void annotate_browser__draw_current_jump(struct ui_browser *browser)
{
struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
@@ -195,8 +205,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
if (strstr(sym->name, "@plt"))
return;
- if (!cursor || !cursor->ins || !ins__is_jump(cursor->ins) ||
- !disasm_line__has_offset(cursor))
+ if (!disasm_line__is_valid_jump(cursor, sym))
return;
target = ab->offsets[cursor->ops.target.offset];
@@ -788,17 +797,9 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser
struct disasm_line *dl = browser->offsets[offset], *dlt;
struct browser_disasm_line *bdlt;
- if (!dl || !dl->ins || !ins__is_jump(dl->ins) ||
- !disasm_line__has_offset(dl))
+ if (!disasm_line__is_valid_jump(dl, sym))
continue;
- if (dl->ops.target.offset >= size) {
- ui__error("jump to after symbol!\n"
- "size: %zx, jump target: %" PRIx64,
- size, dl->ops.target.offset);
- continue;
- }
-
dlt = browser->offsets[dl->ops.target.offset];
/*
* FIXME: Oops, no jump target? Buggy disassembler? Or do we
@@ -921,11 +922,11 @@ out_free_offsets:
#define ANNOTATE_CFG(n) \
{ .name = #n, .value = &annotate_browser__opts.n, }
-
+
/*
* Keep the entries sorted, they are bsearch'ed
*/
-static struct annotate__config {
+static struct annotate_config {
const char *name;
bool *value;
} annotate__configs[] = {
@@ -939,7 +940,7 @@ static struct annotate__config {
static int annotate_config__cmp(const void *name, const void *cfgp)
{
- const struct annotate__config *cfg = cfgp;
+ const struct annotate_config *cfg = cfgp;
return strcmp(name, cfg->name);
}
@@ -947,7 +948,7 @@ static int annotate_config__cmp(const void *name, const void *cfgp)
static int annotate__config(const char *var, const char *value,
void *data __maybe_unused)
{
- struct annotate__config *cfg;
+ struct annotate_config *cfg;
const char *name;
if (prefixcmp(var, "annotate.") != 0)
@@ -955,7 +956,7 @@ static int annotate__config(const char *var, const char *value,
name = var + 9;
cfg = bsearch(name, annotate__configs, ARRAY_SIZE(annotate__configs),
- sizeof(struct annotate__config), annotate_config__cmp);
+ sizeof(struct annotate_config), annotate_config__cmp);
if (cfg == NULL)
return -1;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index ccc4bd1..aa22704 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -567,26 +567,128 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
return row - first_row;
}
-#define HPP__COLOR_FN(_name, _field) \
-static int hist_browser__hpp_color_ ## _name(struct perf_hpp *hpp, \
- struct hist_entry *he) \
+struct hpp_arg {
+ struct ui_browser *b;
+ char folded_sign;
+ bool current_entry;
+};
+
+static int __hpp__color_callchain(struct hpp_arg *arg)
+{
+ if (!symbol_conf.use_callchain)
+ return 0;
+
+ slsmg_printf("%c ", arg->folded_sign);
+ return 2;
+}
+
+static int __hpp__color_fmt(struct perf_hpp *hpp, struct hist_entry *he,
+ u64 (*get_field)(struct hist_entry *),
+ int (*callchain_cb)(struct hpp_arg *))
+{
+ int ret = 0;
+ double percent = 0.0;
+ struct hists *hists = he->hists;
+ struct hpp_arg *arg = hpp->ptr;
+
+ if (hists->stats.total_period)
+ percent = 100.0 * get_field(he) / hists->stats.total_period;
+
+ ui_browser__set_percent_color(arg->b, percent, arg->current_entry);
+
+ if (callchain_cb)
+ ret += callchain_cb(arg);
+
+ ret += scnprintf(hpp->buf, hpp->size, "%6.2f%%", percent);
+ slsmg_printf("%s", hpp->buf);
+
+ if (symbol_conf.event_group) {
+ int prev_idx, idx_delta;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+ struct hist_entry *pair;
+ int nr_members = evsel->nr_members;
+
+ if (nr_members <= 1)
+ goto out;
+
+ prev_idx = perf_evsel__group_idx(evsel);
+
+ list_for_each_entry(pair, &he->pairs.head, pairs.node) {
+ u64 period = get_field(pair);
+ u64 total = pair->hists->stats.total_period;
+
+ if (!total)
+ continue;
+
+ evsel = hists_to_evsel(pair->hists);
+ idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1;
+
+ while (idx_delta--) {
+ /*
+ * zero-fill group members in the middle which
+ * have no sample
+ */
+ ui_browser__set_percent_color(arg->b, 0.0,
+ arg->current_entry);
+ ret += scnprintf(hpp->buf, hpp->size,
+ " %6.2f%%", 0.0);
+ slsmg_printf("%s", hpp->buf);
+ }
+
+ percent = 100.0 * period / total;
+ ui_browser__set_percent_color(arg->b, percent,
+ arg->current_entry);
+ ret += scnprintf(hpp->buf, hpp->size,
+ " %6.2f%%", percent);
+ slsmg_printf("%s", hpp->buf);
+
+ prev_idx = perf_evsel__group_idx(evsel);
+ }
+
+ idx_delta = nr_members - prev_idx - 1;
+
+ while (idx_delta--) {
+ /*
+ * zero-fill group members at last which have no sample
+ */
+ ui_browser__set_percent_color(arg->b, 0.0,
+ arg->current_entry);
+ ret += scnprintf(hpp->buf, hpp->size,
+ " %6.2f%%", 0.0);
+ slsmg_printf("%s", hpp->buf);
+ }
+ }
+out:
+ if (!arg->current_entry || !arg->b->navkeypressed)
+ ui_browser__set_color(arg->b, HE_COLORSET_NORMAL);
+
+ return ret;
+}
+
+#define __HPP_COLOR_PERCENT_FN(_type, _field, _cb) \
+static u64 __hpp_get_##_field(struct hist_entry *he) \
+{ \
+ return he->stat._field; \
+} \
+ \
+static int hist_browser__hpp_color_##_type(struct perf_hpp *hpp, \
+ struct hist_entry *he) \
{ \
- struct hists *hists = he->hists; \
- double percent = 100.0 * he->stat._field / hists->stats.total_period; \
- *(double *)hpp->ptr = percent; \
- return scnprintf(hpp->buf, hpp->size, "%6.2f%%", percent); \
+ return __hpp__color_fmt(hpp, he, __hpp_get_##_field, _cb); \
}
-HPP__COLOR_FN(overhead, period)
-HPP__COLOR_FN(overhead_sys, period_sys)
-HPP__COLOR_FN(overhead_us, period_us)
-HPP__COLOR_FN(overhead_guest_sys, period_guest_sys)
-HPP__COLOR_FN(overhead_guest_us, period_guest_us)
+__HPP_COLOR_PERCENT_FN(overhead, period, __hpp__color_callchain)
+__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys, NULL)
+__HPP_COLOR_PERCENT_FN(overhead_us, period_us, NULL)
+__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys, NULL)
+__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us, NULL)
-#undef HPP__COLOR_FN
+#undef __HPP_COLOR_PERCENT_FN
void hist_browser__init_hpp(void)
{
+ perf_hpp__column_enable(PERF_HPP__OVERHEAD);
+
perf_hpp__init();
perf_hpp__format[PERF_HPP__OVERHEAD].color =
@@ -606,13 +708,13 @@ static int hist_browser__show_entry(struct hist_browser *browser,
unsigned short row)
{
char s[256];
- double percent;
- int i, printed = 0;
+ int printed = 0;
int width = browser->b.width;
char folded_sign = ' ';
bool current_entry = ui_browser__is_current_entry(&browser->b, row);
off_t row_offset = entry->row_offset;
bool first = true;
+ struct perf_hpp_fmt *fmt;
if (current_entry) {
browser->he_selection = entry;
@@ -625,41 +727,30 @@ static int hist_browser__show_entry(struct hist_browser *browser,
}
if (row_offset == 0) {
+ struct hpp_arg arg = {
+ .b = &browser->b,
+ .folded_sign = folded_sign,
+ .current_entry = current_entry,
+ };
struct perf_hpp hpp = {
.buf = s,
.size = sizeof(s),
+ .ptr = &arg,
};
ui_browser__gotorc(&browser->b, row, 0);
- for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
- if (!perf_hpp__format[i].cond)
- continue;
-
+ perf_hpp__for_each_format(fmt) {
if (!first) {
slsmg_printf(" ");
width -= 2;
}
first = false;
- if (perf_hpp__format[i].color) {
- hpp.ptr = &percent;
- /* It will set percent for us. See HPP__COLOR_FN above. */
- width -= perf_hpp__format[i].color(&hpp, entry);
-
- ui_browser__set_percent_color(&browser->b, percent, current_entry);
-
- if (i == PERF_HPP__OVERHEAD && symbol_conf.use_callchain) {
- slsmg_printf("%c ", folded_sign);
- width -= 2;
- }
-
- slsmg_printf("%s", s);
-
- if (!current_entry || !browser->b.navkeypressed)
- ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
+ if (fmt->color) {
+ width -= fmt->color(&hpp, entry);
} else {
- width -= perf_hpp__format[i].entry(&hpp, entry);
+ width -= fmt->entry(&hpp, entry);
slsmg_printf("%s", s);
}
}
@@ -1098,6 +1189,21 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size,
const struct thread *thread = hists->thread_filter;
unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
u64 nr_events = hists->stats.total_period;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+ char buf[512];
+ size_t buflen = sizeof(buf);
+
+ if (symbol_conf.event_group && evsel->nr_members > 1) {
+ struct perf_evsel *pos;
+
+ perf_evsel__group_desc(evsel, buf, buflen);
+ ev_name = buf;
+
+ for_each_group_member(pos, evsel) {
+ nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
+ nr_events += pos->hists.stats.total_period;
+ }
+ }
nr_samples = convert_unit(nr_samples, &unit);
printed = scnprintf(bf, size,
@@ -1135,6 +1241,96 @@ static inline bool is_report_browser(void *timer)
return timer == NULL;
}
+/*
+ * Only runtime switching of perf data file will make "input_name" point
+ * to a malloced buffer. So add "is_input_name_malloced" flag to decide
+ * whether we need to call free() for current "input_name" during the switch.
+ */
+static bool is_input_name_malloced = false;
+
+static int switch_data_file(void)
+{
+ char *pwd, *options[32], *abs_path[32], *tmp;
+ DIR *pwd_dir;
+ int nr_options = 0, choice = -1, ret = -1;
+ struct dirent *dent;
+
+ pwd = getenv("PWD");
+ if (!pwd)
+ return ret;
+
+ pwd_dir = opendir(pwd);
+ if (!pwd_dir)
+ return ret;
+
+ memset(options, 0, sizeof(options));
+ memset(options, 0, sizeof(abs_path));
+
+ while ((dent = readdir(pwd_dir))) {
+ char path[PATH_MAX];
+ u64 magic;
+ char *name = dent->d_name;
+ FILE *file;
+
+ if (!(dent->d_type == DT_REG))
+ continue;
+
+ snprintf(path, sizeof(path), "%s/%s", pwd, name);
+
+ file = fopen(path, "r");
+ if (!file)
+ continue;
+
+ if (fread(&magic, 1, 8, file) < 8)
+ goto close_file_and_continue;
+
+ if (is_perf_magic(magic)) {
+ options[nr_options] = strdup(name);
+ if (!options[nr_options])
+ goto close_file_and_continue;
+
+ abs_path[nr_options] = strdup(path);
+ if (!abs_path[nr_options]) {
+ free(options[nr_options]);
+ ui__warning("Can't search all data files due to memory shortage.\n");
+ fclose(file);
+ break;
+ }
+
+ nr_options++;
+ }
+
+close_file_and_continue:
+ fclose(file);
+ if (nr_options >= 32) {
+ ui__warning("Too many perf data files in PWD!\n"
+ "Only the first 32 files will be listed.\n");
+ break;
+ }
+ }
+ closedir(pwd_dir);
+
+ if (nr_options) {
+ choice = ui__popup_menu(nr_options, options);
+ if (choice < nr_options && choice >= 0) {
+ tmp = strdup(abs_path[choice]);
+ if (tmp) {
+ if (is_input_name_malloced)
+ free((void *)input_name);
+ input_name = tmp;
+ is_input_name_malloced = true;
+ ret = 0;
+ } else
+ ui__warning("Data switch failed due to memory shortage!\n");
+ }
+ }
+
+ free_popup_options(options, nr_options);
+ free_popup_options(abs_path, nr_options);
+ return ret;
+}
+
+
static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
const char *helpline, const char *ev_name,
bool left_exits,
@@ -1169,7 +1365,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
int choice = 0,
annotate = -2, zoom_dso = -2, zoom_thread = -2,
annotate_f = -2, annotate_t = -2, browse_map = -2;
- int scripts_comm = -2, scripts_symbol = -2, scripts_all = -2;
+ int scripts_comm = -2, scripts_symbol = -2,
+ scripts_all = -2, switch_data = -2;
nr_options = 0;
@@ -1226,6 +1423,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
if (is_report_browser(hbt))
goto do_scripts;
continue;
+ case 's':
+ if (is_report_browser(hbt))
+ goto do_data_switch;
+ continue;
case K_F1:
case 'h':
case '?':
@@ -1245,6 +1446,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
"d Zoom into current DSO\n"
"t Zoom into current Thread\n"
"r Run available scripts('perf report' only)\n"
+ "s Switch to another data file in PWD ('perf report' only)\n"
"P Print histograms to perf.hist.N\n"
"V Verbose (DSO names in callchains, etc)\n"
"/ Filter symbol by name");
@@ -1352,6 +1554,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
if (asprintf(&options[nr_options], "Run scripts for all samples") > 0)
scripts_all = nr_options++;
+ if (is_report_browser(hbt) && asprintf(&options[nr_options],
+ "Switch to another data file in PWD") > 0)
+ switch_data = nr_options++;
add_exit_option:
options[nr_options++] = (char *)"Exit";
retry_popup_menu:
@@ -1462,6 +1667,16 @@ do_scripts:
script_browse(script_opt);
}
+ /* Switch to another data file */
+ else if (choice == switch_data) {
+do_data_switch:
+ if (!switch_data_file()) {
+ key = K_SWITCH_INPUT_DATA;
+ break;
+ } else
+ ui__warning("Won't switch the data files due to\n"
+ "no valid data file get selected!\n");
+ }
}
out_free_stack:
pstack__delete(fstack);
@@ -1494,6 +1709,16 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
HE_COLORSET_NORMAL);
+ if (symbol_conf.event_group && evsel->nr_members > 1) {
+ struct perf_evsel *pos;
+
+ ev_name = perf_evsel__group_name(evsel);
+
+ for_each_group_member(pos, evsel) {
+ nr_events += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
+ }
+ }
+
nr_events = convert_unit(nr_events, &unit);
printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events,
unit, unit == ' ' ? "" : " ", ev_name);
@@ -1578,6 +1803,7 @@ browse_hists:
"Do you really want to exit?"))
continue;
/* Fall thru */
+ case K_SWITCH_INPUT_DATA:
case 'q':
case CTRL('c'):
goto out;
@@ -1604,8 +1830,19 @@ out:
return key;
}
+static bool filter_group_entries(struct ui_browser *self __maybe_unused,
+ void *entry)
+{
+ struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node);
+
+ if (symbol_conf.event_group && !perf_evsel__is_group_leader(evsel))
+ return true;
+
+ return false;
+}
+
static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
- const char *help,
+ int nr_entries, const char *help,
struct hist_browser_timer *hbt,
struct perf_session_env *env)
{
@@ -1616,7 +1853,8 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
.refresh = ui_browser__list_head_refresh,
.seek = ui_browser__list_head_seek,
.write = perf_evsel_menu__write,
- .nr_entries = evlist->nr_entries,
+ .filter = filter_group_entries,
+ .nr_entries = nr_entries,
.priv = evlist,
},
.env = env,
@@ -1632,20 +1870,37 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
menu.b.width = line_len;
}
- return perf_evsel_menu__run(&menu, evlist->nr_entries, help, hbt);
+ return perf_evsel_menu__run(&menu, nr_entries, help, hbt);
}
int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
struct hist_browser_timer *hbt,
struct perf_session_env *env)
{
- if (evlist->nr_entries == 1) {
+ int nr_entries = evlist->nr_entries;
+
+single_entry:
+ if (nr_entries == 1) {
struct perf_evsel *first = list_entry(evlist->entries.next,
struct perf_evsel, node);
const char *ev_name = perf_evsel__name(first);
- return perf_evsel__hists_browse(first, evlist->nr_entries, help,
+
+ return perf_evsel__hists_browse(first, nr_entries, help,
ev_name, false, hbt, env);
}
- return __perf_evlist__tui_browse_hists(evlist, help, hbt, env);
+ if (symbol_conf.event_group) {
+ struct perf_evsel *pos;
+
+ nr_entries = 0;
+ list_for_each_entry(pos, &evlist->entries, node)
+ if (perf_evsel__is_group_leader(pos))
+ nr_entries++;
+
+ if (nr_entries == 1)
+ goto single_entry;
+ }
+
+ return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
+ hbt, env);
}
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
new file mode 100644
index 0000000..7d8dc58
--- /dev/null
+++ b/tools/perf/ui/gtk/annotate.c
@@ -0,0 +1,229 @@
+#include "gtk.h"
+#include "util/debug.h"
+#include "util/annotate.h"
+#include "ui/helpline.h"
+
+
+enum {
+ ANN_COL__PERCENT,
+ ANN_COL__OFFSET,
+ ANN_COL__LINE,
+
+ MAX_ANN_COLS
+};
+
+static const char *const col_names[] = {
+ "Overhead",
+ "Offset",
+ "Line"
+};
+
+static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym,
+ struct disasm_line *dl, int evidx)
+{
+ struct sym_hist *symhist;
+ double percent = 0.0;
+ const char *markup;
+ int ret = 0;
+
+ strcpy(buf, "");
+
+ if (dl->offset == (s64) -1)
+ return 0;
+
+ symhist = annotation__histogram(symbol__annotation(sym), evidx);
+ if (!symhist->addr[dl->offset])
+ return 0;
+
+ percent = 100.0 * symhist->addr[dl->offset] / symhist->sum;
+
+ markup = perf_gtk__get_percent_color(percent);
+ if (markup)
+ ret += scnprintf(buf, size, "%s", markup);
+ ret += scnprintf(buf + ret, size - ret, "%6.2f%%", percent);
+ if (markup)
+ ret += scnprintf(buf + ret, size - ret, "</span>");
+
+ return ret;
+}
+
+static int perf_gtk__get_offset(char *buf, size_t size, struct symbol *sym,
+ struct map *map, struct disasm_line *dl)
+{
+ u64 start = map__rip_2objdump(map, sym->start);
+
+ strcpy(buf, "");
+
+ if (dl->offset == (s64) -1)
+ return 0;
+
+ return scnprintf(buf, size, "%"PRIx64, start + dl->offset);
+}
+
+static int perf_gtk__get_line(char *buf, size_t size, struct disasm_line *dl)
+{
+ int ret = 0;
+ char *line = g_markup_escape_text(dl->line, -1);
+ const char *markup = "<span fgcolor='gray'>";
+
+ strcpy(buf, "");
+
+ if (!line)
+ return 0;
+
+ if (dl->offset != (s64) -1)
+ markup = NULL;
+
+ if (markup)
+ ret += scnprintf(buf, size, "%s", markup);
+ ret += scnprintf(buf + ret, size - ret, "%s", line);
+ if (markup)
+ ret += scnprintf(buf + ret, size - ret, "</span>");
+
+ g_free(line);
+ return ret;
+}
+
+static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym,
+ struct map *map, int evidx,
+ struct hist_browser_timer *hbt __maybe_unused)
+{
+ struct disasm_line *pos, *n;
+ struct annotation *notes;
+ GType col_types[MAX_ANN_COLS];
+ GtkCellRenderer *renderer;
+ GtkListStore *store;
+ GtkWidget *view;
+ int i;
+ char s[512];
+
+ notes = symbol__annotation(sym);
+
+ for (i = 0; i < MAX_ANN_COLS; i++) {
+ col_types[i] = G_TYPE_STRING;
+ }
+ store = gtk_list_store_newv(MAX_ANN_COLS, col_types);
+
+ view = gtk_tree_view_new();
+ renderer = gtk_cell_renderer_text_new();
+
+ for (i = 0; i < MAX_ANN_COLS; i++) {
+ gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+ -1, col_names[i], renderer, "markup",
+ i, NULL);
+ }
+
+ gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
+ g_object_unref(GTK_TREE_MODEL(store));
+
+ list_for_each_entry(pos, &notes->src->source, node) {
+ GtkTreeIter iter;
+
+ gtk_list_store_append(store, &iter);
+
+ if (perf_gtk__get_percent(s, sizeof(s), sym, pos, evidx))
+ gtk_list_store_set(store, &iter, ANN_COL__PERCENT, s, -1);
+ if (perf_gtk__get_offset(s, sizeof(s), sym, map, pos))
+ gtk_list_store_set(store, &iter, ANN_COL__OFFSET, s, -1);
+ if (perf_gtk__get_line(s, sizeof(s), pos))
+ gtk_list_store_set(store, &iter, ANN_COL__LINE, s, -1);
+ }
+
+ gtk_container_add(GTK_CONTAINER(window), view);
+
+ list_for_each_entry_safe(pos, n, &notes->src->source, node) {
+ list_del(&pos->node);
+ disasm_line__free(pos);
+ }
+
+ return 0;
+}
+
+int symbol__gtk_annotate(struct symbol *sym, struct map *map, int evidx,
+ struct hist_browser_timer *hbt)
+{
+ GtkWidget *window;
+ GtkWidget *notebook;
+ GtkWidget *scrolled_window;
+ GtkWidget *tab_label;
+
+ if (map->dso->annotate_warned)
+ return -1;
+
+ if (symbol__annotate(sym, map, 0) < 0) {
+ ui__error("%s", ui_helpline__current);
+ return -1;
+ }
+
+ if (perf_gtk__is_active_context(pgctx)) {
+ window = pgctx->main_window;
+ notebook = pgctx->notebook;
+ } else {
+ GtkWidget *vbox;
+ GtkWidget *infobar;
+ GtkWidget *statbar;
+
+ signal(SIGSEGV, perf_gtk__signal);
+ signal(SIGFPE, perf_gtk__signal);
+ signal(SIGINT, perf_gtk__signal);
+ signal(SIGQUIT, perf_gtk__signal);
+ signal(SIGTERM, perf_gtk__signal);
+
+ window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
+ gtk_window_set_title(GTK_WINDOW(window), "perf annotate");
+
+ g_signal_connect(window, "delete_event", gtk_main_quit, NULL);
+
+ pgctx = perf_gtk__activate_context(window);
+ if (!pgctx)
+ return -1;
+
+ vbox = gtk_vbox_new(FALSE, 0);
+ notebook = gtk_notebook_new();
+ pgctx->notebook = notebook;
+
+ gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
+
+ infobar = perf_gtk__setup_info_bar();
+ if (infobar) {
+ gtk_box_pack_start(GTK_BOX(vbox), infobar,
+ FALSE, FALSE, 0);
+ }
+
+ statbar = perf_gtk__setup_statusbar();
+ gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
+
+ gtk_container_add(GTK_CONTAINER(window), vbox);
+ }
+
+ scrolled_window = gtk_scrolled_window_new(NULL, NULL);
+ tab_label = gtk_label_new(sym->name);
+
+ gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window),
+ GTK_POLICY_AUTOMATIC,
+ GTK_POLICY_AUTOMATIC);
+
+ gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window,
+ tab_label);
+
+ perf_gtk__annotate_symbol(scrolled_window, sym, map, evidx, hbt);
+ return 0;
+}
+
+void perf_gtk__show_annotations(void)
+{
+ GtkWidget *window;
+
+ if (!perf_gtk__is_active_context(pgctx))
+ return;
+
+ window = pgctx->main_window;
+ gtk_widget_show_all(window);
+
+ perf_gtk__resize_window(window);
+ gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER);
+
+ gtk_main();
+
+ perf_gtk__deactivate_context(&pgctx);
+}
diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c
index 253b621..c95012c 100644
--- a/tools/perf/ui/gtk/browser.c
+++ b/tools/perf/ui/gtk/browser.c
@@ -8,15 +8,13 @@
#include <signal.h>
-#define MAX_COLUMNS 32
-
-static void perf_gtk__signal(int sig)
+void perf_gtk__signal(int sig)
{
perf_gtk__exit(false);
psignal(sig, "perf");
}
-static void perf_gtk__resize_window(GtkWidget *window)
+void perf_gtk__resize_window(GtkWidget *window)
{
GdkRectangle rect;
GdkScreen *screen;
@@ -36,7 +34,7 @@ static void perf_gtk__resize_window(GtkWidget *window)
gtk_window_resize(GTK_WINDOW(window), width, height);
}
-static const char *perf_gtk__get_percent_color(double percent)
+const char *perf_gtk__get_percent_color(double percent)
{
if (percent >= MIN_RED)
return "<span fgcolor='red'>";
@@ -45,155 +43,8 @@ static const char *perf_gtk__get_percent_color(double percent)
return NULL;
}
-#define HPP__COLOR_FN(_name, _field) \
-static int perf_gtk__hpp_color_ ## _name(struct perf_hpp *hpp, \
- struct hist_entry *he) \
-{ \
- struct hists *hists = he->hists; \
- double percent = 100.0 * he->stat._field / hists->stats.total_period; \
- const char *markup; \
- int ret = 0; \
- \
- markup = perf_gtk__get_percent_color(percent); \
- if (markup) \
- ret += scnprintf(hpp->buf, hpp->size, "%s", markup); \
- ret += scnprintf(hpp->buf + ret, hpp->size - ret, "%6.2f%%", percent); \
- if (markup) \
- ret += scnprintf(hpp->buf + ret, hpp->size - ret, "</span>"); \
- \
- return ret; \
-}
-
-HPP__COLOR_FN(overhead, period)
-HPP__COLOR_FN(overhead_sys, period_sys)
-HPP__COLOR_FN(overhead_us, period_us)
-HPP__COLOR_FN(overhead_guest_sys, period_guest_sys)
-HPP__COLOR_FN(overhead_guest_us, period_guest_us)
-
-#undef HPP__COLOR_FN
-
-void perf_gtk__init_hpp(void)
-{
- perf_hpp__init();
-
- perf_hpp__format[PERF_HPP__OVERHEAD].color =
- perf_gtk__hpp_color_overhead;
- perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color =
- perf_gtk__hpp_color_overhead_sys;
- perf_hpp__format[PERF_HPP__OVERHEAD_US].color =
- perf_gtk__hpp_color_overhead_us;
- perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color =
- perf_gtk__hpp_color_overhead_guest_sys;
- perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
- perf_gtk__hpp_color_overhead_guest_us;
-}
-
-static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists)
-{
- GType col_types[MAX_COLUMNS];
- GtkCellRenderer *renderer;
- struct sort_entry *se;
- GtkListStore *store;
- struct rb_node *nd;
- GtkWidget *view;
- int i, col_idx;
- int nr_cols;
- char s[512];
-
- struct perf_hpp hpp = {
- .buf = s,
- .size = sizeof(s),
- };
-
- nr_cols = 0;
-
- for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
- if (!perf_hpp__format[i].cond)
- continue;
-
- col_types[nr_cols++] = G_TYPE_STRING;
- }
-
- list_for_each_entry(se, &hist_entry__sort_list, list) {
- if (se->elide)
- continue;
-
- col_types[nr_cols++] = G_TYPE_STRING;
- }
-
- store = gtk_list_store_newv(nr_cols, col_types);
-
- view = gtk_tree_view_new();
-
- renderer = gtk_cell_renderer_text_new();
-
- col_idx = 0;
-
- for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
- if (!perf_hpp__format[i].cond)
- continue;
-
- perf_hpp__format[i].header(&hpp);
-
- gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
- -1, s,
- renderer, "markup",
- col_idx++, NULL);
- }
-
- list_for_each_entry(se, &hist_entry__sort_list, list) {
- if (se->elide)
- continue;
-
- gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
- -1, se->se_header,
- renderer, "text",
- col_idx++, NULL);
- }
-
- gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
-
- g_object_unref(GTK_TREE_MODEL(store));
-
- for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
- struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
- GtkTreeIter iter;
-
- if (h->filtered)
- continue;
-
- gtk_list_store_append(store, &iter);
-
- col_idx = 0;
-
- for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
- if (!perf_hpp__format[i].cond)
- continue;
-
- if (perf_hpp__format[i].color)
- perf_hpp__format[i].color(&hpp, h);
- else
- perf_hpp__format[i].entry(&hpp, h);
-
- gtk_list_store_set(store, &iter, col_idx++, s, -1);
- }
-
- list_for_each_entry(se, &hist_entry__sort_list, list) {
- if (se->elide)
- continue;
-
- se->se_snprintf(h, s, ARRAY_SIZE(s),
- hists__col_len(hists, se->se_width_idx));
-
- gtk_list_store_set(store, &iter, col_idx++, s, -1);
- }
- }
-
- gtk_container_add(GTK_CONTAINER(window), view);
-}
-
#ifdef HAVE_GTK_INFO_BAR
-static GtkWidget *perf_gtk__setup_info_bar(void)
+GtkWidget *perf_gtk__setup_info_bar(void)
{
GtkWidget *info_bar;
GtkWidget *label;
@@ -220,7 +71,7 @@ static GtkWidget *perf_gtk__setup_info_bar(void)
}
#endif
-static GtkWidget *perf_gtk__setup_statusbar(void)
+GtkWidget *perf_gtk__setup_statusbar(void)
{
GtkWidget *stbar;
unsigned ctxid;
@@ -234,79 +85,3 @@ static GtkWidget *perf_gtk__setup_statusbar(void)
return stbar;
}
-
-int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
- const char *help,
- struct hist_browser_timer *hbt __maybe_unused)
-{
- struct perf_evsel *pos;
- GtkWidget *vbox;
- GtkWidget *notebook;
- GtkWidget *info_bar;
- GtkWidget *statbar;
- GtkWidget *window;
-
- signal(SIGSEGV, perf_gtk__signal);
- signal(SIGFPE, perf_gtk__signal);
- signal(SIGINT, perf_gtk__signal);
- signal(SIGQUIT, perf_gtk__signal);
- signal(SIGTERM, perf_gtk__signal);
-
- window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
-
- gtk_window_set_title(GTK_WINDOW(window), "perf report");
-
- g_signal_connect(window, "delete_event", gtk_main_quit, NULL);
-
- pgctx = perf_gtk__activate_context(window);
- if (!pgctx)
- return -1;
-
- vbox = gtk_vbox_new(FALSE, 0);
-
- notebook = gtk_notebook_new();
-
- list_for_each_entry(pos, &evlist->entries, node) {
- struct hists *hists = &pos->hists;
- const char *evname = perf_evsel__name(pos);
- GtkWidget *scrolled_window;
- GtkWidget *tab_label;
-
- scrolled_window = gtk_scrolled_window_new(NULL, NULL);
-
- gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window),
- GTK_POLICY_AUTOMATIC,
- GTK_POLICY_AUTOMATIC);
-
- perf_gtk__show_hists(scrolled_window, hists);
-
- tab_label = gtk_label_new(evname);
-
- gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, tab_label);
- }
-
- gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
-
- info_bar = perf_gtk__setup_info_bar();
- if (info_bar)
- gtk_box_pack_start(GTK_BOX(vbox), info_bar, FALSE, FALSE, 0);
-
- statbar = perf_gtk__setup_statusbar();
- gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
-
- gtk_container_add(GTK_CONTAINER(window), vbox);
-
- gtk_widget_show_all(window);
-
- perf_gtk__resize_window(window);
-
- gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER);
-
- ui_helpline__push(help);
-
- gtk_main();
-
- perf_gtk__deactivate_context(&pgctx);
-
- return 0;
-}
diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h
index 856320e..3d96785 100644
--- a/tools/perf/ui/gtk/gtk.h
+++ b/tools/perf/ui/gtk/gtk.h
@@ -10,6 +10,7 @@
struct perf_gtk_context {
GtkWidget *main_window;
+ GtkWidget *notebook;
#ifdef HAVE_GTK_INFO_BAR
GtkWidget *info_bar;
@@ -33,7 +34,14 @@ void perf_gtk__init_helpline(void);
void perf_gtk__init_progress(void);
void perf_gtk__init_hpp(void);
-#ifndef HAVE_GTK_INFO_BAR
+void perf_gtk__signal(int sig);
+void perf_gtk__resize_window(GtkWidget *window);
+const char *perf_gtk__get_percent_color(double percent);
+GtkWidget *perf_gtk__setup_statusbar(void);
+
+#ifdef HAVE_GTK_INFO_BAR
+GtkWidget *perf_gtk__setup_info_bar(void);
+#else
static inline GtkWidget *perf_gtk__setup_info_bar(void)
{
return NULL;
diff --git a/tools/perf/ui/gtk/helpline.c b/tools/perf/ui/gtk/helpline.c
index 5db4432..3388cbd 100644
--- a/tools/perf/ui/gtk/helpline.c
+++ b/tools/perf/ui/gtk/helpline.c
@@ -24,17 +24,7 @@ static void gtk_helpline_push(const char *msg)
pgctx->statbar_ctx_id, msg);
}
-static struct ui_helpline gtk_helpline_fns = {
- .pop = gtk_helpline_pop,
- .push = gtk_helpline_push,
-};
-
-void perf_gtk__init_helpline(void)
-{
- helpline_fns = &gtk_helpline_fns;
-}
-
-int perf_gtk__show_helpline(const char *fmt, va_list ap)
+static int gtk_helpline_show(const char *fmt, va_list ap)
{
int ret;
char *ptr;
@@ -54,3 +44,14 @@ int perf_gtk__show_helpline(const char *fmt, va_list ap)
return ret;
}
+
+static struct ui_helpline gtk_helpline_fns = {
+ .pop = gtk_helpline_pop,
+ .push = gtk_helpline_push,
+ .show = gtk_helpline_show,
+};
+
+void perf_gtk__init_helpline(void)
+{
+ helpline_fns = &gtk_helpline_fns;
+}
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
new file mode 100644
index 0000000..1e764a8
--- /dev/null
+++ b/tools/perf/ui/gtk/hists.c
@@ -0,0 +1,312 @@
+#include "../evlist.h"
+#include "../cache.h"
+#include "../evsel.h"
+#include "../sort.h"
+#include "../hist.h"
+#include "../helpline.h"
+#include "gtk.h"
+
+#define MAX_COLUMNS 32
+
+static int __percent_color_snprintf(char *buf, size_t size, double percent)
+{
+ int ret = 0;
+ const char *markup;
+
+ markup = perf_gtk__get_percent_color(percent);
+ if (markup)
+ ret += scnprintf(buf, size, markup);
+
+ ret += scnprintf(buf + ret, size - ret, " %6.2f%%", percent);
+
+ if (markup)
+ ret += scnprintf(buf + ret, size - ret, "</span>");
+
+ return ret;
+}
+
+
+static int __hpp__color_fmt(struct perf_hpp *hpp, struct hist_entry *he,
+ u64 (*get_field)(struct hist_entry *))
+{
+ int ret;
+ double percent = 0.0;
+ struct hists *hists = he->hists;
+
+ if (hists->stats.total_period)
+ percent = 100.0 * get_field(he) / hists->stats.total_period;
+
+ ret = __percent_color_snprintf(hpp->buf, hpp->size, percent);
+
+ if (symbol_conf.event_group) {
+ int prev_idx, idx_delta;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+ struct hist_entry *pair;
+ int nr_members = evsel->nr_members;
+
+ if (nr_members <= 1)
+ return ret;
+
+ prev_idx = perf_evsel__group_idx(evsel);
+
+ list_for_each_entry(pair, &he->pairs.head, pairs.node) {
+ u64 period = get_field(pair);
+ u64 total = pair->hists->stats.total_period;
+
+ evsel = hists_to_evsel(pair->hists);
+ idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1;
+
+ while (idx_delta--) {
+ /*
+ * zero-fill group members in the middle which
+ * have no sample
+ */
+ ret += __percent_color_snprintf(hpp->buf + ret,
+ hpp->size - ret,
+ 0.0);
+ }
+
+ percent = 100.0 * period / total;
+ ret += __percent_color_snprintf(hpp->buf + ret,
+ hpp->size - ret,
+ percent);
+
+ prev_idx = perf_evsel__group_idx(evsel);
+ }
+
+ idx_delta = nr_members - prev_idx - 1;
+
+ while (idx_delta--) {
+ /*
+ * zero-fill group members at last which have no sample
+ */
+ ret += __percent_color_snprintf(hpp->buf + ret,
+ hpp->size - ret,
+ 0.0);
+ }
+ }
+ return ret;
+}
+
+#define __HPP_COLOR_PERCENT_FN(_type, _field) \
+static u64 he_get_##_field(struct hist_entry *he) \
+{ \
+ return he->stat._field; \
+} \
+ \
+static int perf_gtk__hpp_color_##_type(struct perf_hpp *hpp, \
+ struct hist_entry *he) \
+{ \
+ return __hpp__color_fmt(hpp, he, he_get_##_field); \
+}
+
+__HPP_COLOR_PERCENT_FN(overhead, period)
+__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
+__HPP_COLOR_PERCENT_FN(overhead_us, period_us)
+__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
+__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
+
+#undef __HPP_COLOR_PERCENT_FN
+
+
+void perf_gtk__init_hpp(void)
+{
+ perf_hpp__column_enable(PERF_HPP__OVERHEAD);
+
+ perf_hpp__init();
+
+ perf_hpp__format[PERF_HPP__OVERHEAD].color =
+ perf_gtk__hpp_color_overhead;
+ perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color =
+ perf_gtk__hpp_color_overhead_sys;
+ perf_hpp__format[PERF_HPP__OVERHEAD_US].color =
+ perf_gtk__hpp_color_overhead_us;
+ perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color =
+ perf_gtk__hpp_color_overhead_guest_sys;
+ perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
+ perf_gtk__hpp_color_overhead_guest_us;
+}
+
+static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists)
+{
+ struct perf_hpp_fmt *fmt;
+ GType col_types[MAX_COLUMNS];
+ GtkCellRenderer *renderer;
+ struct sort_entry *se;
+ GtkListStore *store;
+ struct rb_node *nd;
+ GtkWidget *view;
+ int col_idx;
+ int nr_cols;
+ char s[512];
+
+ struct perf_hpp hpp = {
+ .buf = s,
+ .size = sizeof(s),
+ .ptr = hists_to_evsel(hists),
+ };
+
+ nr_cols = 0;
+
+ perf_hpp__for_each_format(fmt)
+ col_types[nr_cols++] = G_TYPE_STRING;
+
+ list_for_each_entry(se, &hist_entry__sort_list, list) {
+ if (se->elide)
+ continue;
+
+ col_types[nr_cols++] = G_TYPE_STRING;
+ }
+
+ store = gtk_list_store_newv(nr_cols, col_types);
+
+ view = gtk_tree_view_new();
+
+ renderer = gtk_cell_renderer_text_new();
+
+ col_idx = 0;
+
+ perf_hpp__for_each_format(fmt) {
+ fmt->header(&hpp);
+
+ gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+ -1, ltrim(s),
+ renderer, "markup",
+ col_idx++, NULL);
+ }
+
+ list_for_each_entry(se, &hist_entry__sort_list, list) {
+ if (se->elide)
+ continue;
+
+ gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+ -1, se->se_header,
+ renderer, "text",
+ col_idx++, NULL);
+ }
+
+ gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
+
+ g_object_unref(GTK_TREE_MODEL(store));
+
+ for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
+ struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+ GtkTreeIter iter;
+
+ if (h->filtered)
+ continue;
+
+ gtk_list_store_append(store, &iter);
+
+ col_idx = 0;
+
+ perf_hpp__for_each_format(fmt) {
+ if (fmt->color)
+ fmt->color(&hpp, h);
+ else
+ fmt->entry(&hpp, h);
+
+ gtk_list_store_set(store, &iter, col_idx++, s, -1);
+ }
+
+ list_for_each_entry(se, &hist_entry__sort_list, list) {
+ if (se->elide)
+ continue;
+
+ se->se_snprintf(h, s, ARRAY_SIZE(s),
+ hists__col_len(hists, se->se_width_idx));
+
+ gtk_list_store_set(store, &iter, col_idx++, s, -1);
+ }
+ }
+
+ gtk_container_add(GTK_CONTAINER(window), view);
+}
+
+int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
+ const char *help,
+ struct hist_browser_timer *hbt __maybe_unused)
+{
+ struct perf_evsel *pos;
+ GtkWidget *vbox;
+ GtkWidget *notebook;
+ GtkWidget *info_bar;
+ GtkWidget *statbar;
+ GtkWidget *window;
+
+ signal(SIGSEGV, perf_gtk__signal);
+ signal(SIGFPE, perf_gtk__signal);
+ signal(SIGINT, perf_gtk__signal);
+ signal(SIGQUIT, perf_gtk__signal);
+ signal(SIGTERM, perf_gtk__signal);
+
+ window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
+
+ gtk_window_set_title(GTK_WINDOW(window), "perf report");
+
+ g_signal_connect(window, "delete_event", gtk_main_quit, NULL);
+
+ pgctx = perf_gtk__activate_context(window);
+ if (!pgctx)
+ return -1;
+
+ vbox = gtk_vbox_new(FALSE, 0);
+
+ notebook = gtk_notebook_new();
+
+ gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
+
+ info_bar = perf_gtk__setup_info_bar();
+ if (info_bar)
+ gtk_box_pack_start(GTK_BOX(vbox), info_bar, FALSE, FALSE, 0);
+
+ statbar = perf_gtk__setup_statusbar();
+ gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
+
+ gtk_container_add(GTK_CONTAINER(window), vbox);
+
+ list_for_each_entry(pos, &evlist->entries, node) {
+ struct hists *hists = &pos->hists;
+ const char *evname = perf_evsel__name(pos);
+ GtkWidget *scrolled_window;
+ GtkWidget *tab_label;
+ char buf[512];
+ size_t size = sizeof(buf);
+
+ if (symbol_conf.event_group) {
+ if (!perf_evsel__is_group_leader(pos))
+ continue;
+
+ if (pos->nr_members > 1) {
+ perf_evsel__group_desc(pos, buf, size);
+ evname = buf;
+ }
+ }
+
+ scrolled_window = gtk_scrolled_window_new(NULL, NULL);
+
+ gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window),
+ GTK_POLICY_AUTOMATIC,
+ GTK_POLICY_AUTOMATIC);
+
+ perf_gtk__show_hists(scrolled_window, hists);
+
+ tab_label = gtk_label_new(evname);
+
+ gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, tab_label);
+ }
+
+ gtk_widget_show_all(window);
+
+ perf_gtk__resize_window(window);
+
+ gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER);
+
+ ui_helpline__push(help);
+
+ gtk_main();
+
+ perf_gtk__deactivate_context(&pgctx);
+
+ return 0;
+}
diff --git a/tools/perf/ui/helpline.c b/tools/perf/ui/helpline.c
index a49bcf3..700fb3c 100644
--- a/tools/perf/ui/helpline.c
+++ b/tools/perf/ui/helpline.c
@@ -16,9 +16,16 @@ static void nop_helpline__push(const char *msg __maybe_unused)
{
}
+static int nop_helpline__show(const char *fmt __maybe_unused,
+ va_list ap __maybe_unused)
+{
+ return 0;
+}
+
static struct ui_helpline default_helpline_fns = {
.pop = nop_helpline__pop,
.push = nop_helpline__push,
+ .show = nop_helpline__show,
};
struct ui_helpline *helpline_fns = &default_helpline_fns;
@@ -59,3 +66,8 @@ void ui_helpline__puts(const char *msg)
ui_helpline__pop();
ui_helpline__push(msg);
}
+
+int ui_helpline__vshow(const char *fmt, va_list ap)
+{
+ return helpline_fns->show(fmt, ap);
+}
diff --git a/tools/perf/ui/helpline.h b/tools/perf/ui/helpline.h
index baa28a4..46181f4 100644
--- a/tools/perf/ui/helpline.h
+++ b/tools/perf/ui/helpline.h
@@ -9,6 +9,7 @@
struct ui_helpline {
void (*pop)(void);
void (*push)(const char *msg);
+ int (*show)(const char *fmt, va_list ap);
};
extern struct ui_helpline *helpline_fns;
@@ -20,28 +21,9 @@ void ui_helpline__push(const char *msg);
void ui_helpline__vpush(const char *fmt, va_list ap);
void ui_helpline__fpush(const char *fmt, ...);
void ui_helpline__puts(const char *msg);
+int ui_helpline__vshow(const char *fmt, va_list ap);
extern char ui_helpline__current[512];
-
-#ifdef NEWT_SUPPORT
extern char ui_helpline__last_msg[];
-int ui_helpline__show_help(const char *format, va_list ap);
-#else
-static inline int ui_helpline__show_help(const char *format __maybe_unused,
- va_list ap __maybe_unused)
-{
- return 0;
-}
-#endif /* NEWT_SUPPORT */
-
-#ifdef GTK2_SUPPORT
-int perf_gtk__show_helpline(const char *format, va_list ap);
-#else
-static inline int perf_gtk__show_helpline(const char *format __maybe_unused,
- va_list ap __maybe_unused)
-{
- return 0;
-}
-#endif /* GTK2_SUPPORT */
#endif /* _PERF_UI_HELPLINE_H_ */
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index aa84130..d671e63 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -3,151 +3,163 @@
#include "../util/hist.h"
#include "../util/util.h"
#include "../util/sort.h"
-
+#include "../util/evsel.h"
/* hist period print (hpp) functions */
-static int hpp__header_overhead(struct perf_hpp *hpp)
-{
- return scnprintf(hpp->buf, hpp->size, "Overhead");
-}
-
-static int hpp__width_overhead(struct perf_hpp *hpp __maybe_unused)
-{
- return 8;
-}
-
-static int hpp__color_overhead(struct perf_hpp *hpp, struct hist_entry *he)
-{
- struct hists *hists = he->hists;
- double percent = 100.0 * he->stat.period / hists->stats.total_period;
- return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%", percent);
-}
+typedef int (*hpp_snprint_fn)(char *buf, size_t size, const char *fmt, ...);
-static int hpp__entry_overhead(struct perf_hpp *hpp, struct hist_entry *he)
+static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
+ u64 (*get_field)(struct hist_entry *),
+ const char *fmt, hpp_snprint_fn print_fn,
+ bool fmt_percent)
{
+ int ret;
struct hists *hists = he->hists;
- double percent = 100.0 * he->stat.period / hists->stats.total_period;
- const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%%";
-
- return scnprintf(hpp->buf, hpp->size, fmt, percent);
-}
-static int hpp__header_overhead_sys(struct perf_hpp *hpp)
-{
- const char *fmt = symbol_conf.field_sep ? "%s" : "%7s";
-
- return scnprintf(hpp->buf, hpp->size, fmt, "sys");
-}
+ if (fmt_percent) {
+ double percent = 0.0;
-static int hpp__width_overhead_sys(struct perf_hpp *hpp __maybe_unused)
-{
- return 7;
-}
+ if (hists->stats.total_period)
+ percent = 100.0 * get_field(he) /
+ hists->stats.total_period;
-static int hpp__color_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he)
-{
- struct hists *hists = he->hists;
- double percent = 100.0 * he->stat.period_sys / hists->stats.total_period;
+ ret = print_fn(hpp->buf, hpp->size, fmt, percent);
+ } else
+ ret = print_fn(hpp->buf, hpp->size, fmt, get_field(he));
- return percent_color_snprintf(hpp->buf, hpp->size, "%6.2f%%", percent);
-}
+ if (symbol_conf.event_group) {
+ int prev_idx, idx_delta;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+ struct hist_entry *pair;
+ int nr_members = evsel->nr_members;
-static int hpp__entry_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he)
-{
- struct hists *hists = he->hists;
- double percent = 100.0 * he->stat.period_sys / hists->stats.total_period;
- const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%";
+ if (nr_members <= 1)
+ return ret;
- return scnprintf(hpp->buf, hpp->size, fmt, percent);
-}
+ prev_idx = perf_evsel__group_idx(evsel);
-static int hpp__header_overhead_us(struct perf_hpp *hpp)
-{
- const char *fmt = symbol_conf.field_sep ? "%s" : "%7s";
+ list_for_each_entry(pair, &he->pairs.head, pairs.node) {
+ u64 period = get_field(pair);
+ u64 total = pair->hists->stats.total_period;
- return scnprintf(hpp->buf, hpp->size, fmt, "user");
-}
+ if (!total)
+ continue;
-static int hpp__width_overhead_us(struct perf_hpp *hpp __maybe_unused)
-{
- return 7;
-}
+ evsel = hists_to_evsel(pair->hists);
+ idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1;
-static int hpp__color_overhead_us(struct perf_hpp *hpp, struct hist_entry *he)
-{
- struct hists *hists = he->hists;
- double percent = 100.0 * he->stat.period_us / hists->stats.total_period;
+ while (idx_delta--) {
+ /*
+ * zero-fill group members in the middle which
+ * have no sample
+ */
+ ret += print_fn(hpp->buf + ret, hpp->size - ret,
+ fmt, 0);
+ }
- return percent_color_snprintf(hpp->buf, hpp->size, "%6.2f%%", percent);
-}
+ if (fmt_percent)
+ ret += print_fn(hpp->buf + ret, hpp->size - ret,
+ fmt, 100.0 * period / total);
+ else
+ ret += print_fn(hpp->buf + ret, hpp->size - ret,
+ fmt, period);
-static int hpp__entry_overhead_us(struct perf_hpp *hpp, struct hist_entry *he)
-{
- struct hists *hists = he->hists;
- double percent = 100.0 * he->stat.period_us / hists->stats.total_period;
- const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%";
-
- return scnprintf(hpp->buf, hpp->size, fmt, percent);
-}
-
-static int hpp__header_overhead_guest_sys(struct perf_hpp *hpp)
-{
- return scnprintf(hpp->buf, hpp->size, "guest sys");
-}
-
-static int hpp__width_overhead_guest_sys(struct perf_hpp *hpp __maybe_unused)
-{
- return 9;
-}
-
-static int hpp__color_overhead_guest_sys(struct perf_hpp *hpp,
- struct hist_entry *he)
-{
- struct hists *hists = he->hists;
- double percent = 100.0 * he->stat.period_guest_sys / hists->stats.total_period;
-
- return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%% ", percent);
-}
-
-static int hpp__entry_overhead_guest_sys(struct perf_hpp *hpp,
- struct hist_entry *he)
-{
- struct hists *hists = he->hists;
- double percent = 100.0 * he->stat.period_guest_sys / hists->stats.total_period;
- const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%% ";
-
- return scnprintf(hpp->buf, hpp->size, fmt, percent);
-}
-
-static int hpp__header_overhead_guest_us(struct perf_hpp *hpp)
-{
- return scnprintf(hpp->buf, hpp->size, "guest usr");
-}
+ prev_idx = perf_evsel__group_idx(evsel);
+ }
-static int hpp__width_overhead_guest_us(struct perf_hpp *hpp __maybe_unused)
-{
- return 9;
-}
+ idx_delta = nr_members - prev_idx - 1;
-static int hpp__color_overhead_guest_us(struct perf_hpp *hpp,
- struct hist_entry *he)
-{
- struct hists *hists = he->hists;
- double percent = 100.0 * he->stat.period_guest_us / hists->stats.total_period;
-
- return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%% ", percent);
+ while (idx_delta--) {
+ /*
+ * zero-fill group members at last which have no sample
+ */
+ ret += print_fn(hpp->buf + ret, hpp->size - ret,
+ fmt, 0);
+ }
+ }
+ return ret;
}
-static int hpp__entry_overhead_guest_us(struct perf_hpp *hpp,
- struct hist_entry *he)
-{
- struct hists *hists = he->hists;
- double percent = 100.0 * he->stat.period_guest_us / hists->stats.total_period;
- const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%% ";
+#define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
+static int hpp__header_##_type(struct perf_hpp *hpp) \
+{ \
+ int len = _min_width; \
+ \
+ if (symbol_conf.event_group) { \
+ struct perf_evsel *evsel = hpp->ptr; \
+ \
+ len = max(len, evsel->nr_members * _unit_width); \
+ } \
+ return scnprintf(hpp->buf, hpp->size, "%*s", len, _str); \
+}
+
+#define __HPP_WIDTH_FN(_type, _min_width, _unit_width) \
+static int hpp__width_##_type(struct perf_hpp *hpp __maybe_unused) \
+{ \
+ int len = _min_width; \
+ \
+ if (symbol_conf.event_group) { \
+ struct perf_evsel *evsel = hpp->ptr; \
+ \
+ len = max(len, evsel->nr_members * _unit_width); \
+ } \
+ return len; \
+}
+
+#define __HPP_COLOR_PERCENT_FN(_type, _field) \
+static u64 he_get_##_field(struct hist_entry *he) \
+{ \
+ return he->stat._field; \
+} \
+ \
+static int hpp__color_##_type(struct perf_hpp *hpp, struct hist_entry *he) \
+{ \
+ return __hpp__fmt(hpp, he, he_get_##_field, " %6.2f%%", \
+ (hpp_snprint_fn)percent_color_snprintf, true); \
+}
+
+#define __HPP_ENTRY_PERCENT_FN(_type, _field) \
+static int hpp__entry_##_type(struct perf_hpp *hpp, struct hist_entry *he) \
+{ \
+ const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%"; \
+ return __hpp__fmt(hpp, he, he_get_##_field, fmt, \
+ scnprintf, true); \
+}
+
+#define __HPP_ENTRY_RAW_FN(_type, _field) \
+static u64 he_get_raw_##_field(struct hist_entry *he) \
+{ \
+ return he->stat._field; \
+} \
+ \
+static int hpp__entry_##_type(struct perf_hpp *hpp, struct hist_entry *he) \
+{ \
+ const char *fmt = symbol_conf.field_sep ? " %"PRIu64 : " %11"PRIu64; \
+ return __hpp__fmt(hpp, he, he_get_raw_##_field, fmt, scnprintf, false); \
+}
+
+#define HPP_PERCENT_FNS(_type, _str, _field, _min_width, _unit_width) \
+__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
+__HPP_WIDTH_FN(_type, _min_width, _unit_width) \
+__HPP_COLOR_PERCENT_FN(_type, _field) \
+__HPP_ENTRY_PERCENT_FN(_type, _field)
+
+#define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width) \
+__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
+__HPP_WIDTH_FN(_type, _min_width, _unit_width) \
+__HPP_ENTRY_RAW_FN(_type, _field)
+
+
+HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8)
+HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8)
+HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8)
+HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8)
+HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8)
+
+HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12)
+HPP_RAW_FNS(period, "Period", period, 12, 12)
- return scnprintf(hpp->buf, hpp->size, fmt, percent);
-}
static int hpp__header_baseline(struct perf_hpp *hpp)
{
@@ -179,7 +191,7 @@ static int hpp__color_baseline(struct perf_hpp *hpp, struct hist_entry *he)
{
double percent = baseline_percent(he);
- if (hist_entry__has_pairs(he))
+ if (hist_entry__has_pairs(he) || symbol_conf.field_sep)
return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%", percent);
else
return scnprintf(hpp->buf, hpp->size, " ");
@@ -196,44 +208,6 @@ static int hpp__entry_baseline(struct perf_hpp *hpp, struct hist_entry *he)
return scnprintf(hpp->buf, hpp->size, " ");
}
-static int hpp__header_samples(struct perf_hpp *hpp)
-{
- const char *fmt = symbol_conf.field_sep ? "%s" : "%11s";
-
- return scnprintf(hpp->buf, hpp->size, fmt, "Samples");
-}
-
-static int hpp__width_samples(struct perf_hpp *hpp __maybe_unused)
-{
- return 11;
-}
-
-static int hpp__entry_samples(struct perf_hpp *hpp, struct hist_entry *he)
-{
- const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%11" PRIu64;
-
- return scnprintf(hpp->buf, hpp->size, fmt, he->stat.nr_events);
-}
-
-static int hpp__header_period(struct perf_hpp *hpp)
-{
- const char *fmt = symbol_conf.field_sep ? "%s" : "%12s";
-
- return scnprintf(hpp->buf, hpp->size, fmt, "Period");
-}
-
-static int hpp__width_period(struct perf_hpp *hpp __maybe_unused)
-{
- return 12;
-}
-
-static int hpp__entry_period(struct perf_hpp *hpp, struct hist_entry *he)
-{
- const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%12" PRIu64;
-
- return scnprintf(hpp->buf, hpp->size, fmt, he->stat.period);
-}
-
static int hpp__header_period_baseline(struct perf_hpp *hpp)
{
const char *fmt = symbol_conf.field_sep ? "%s" : "%12s";
@@ -254,6 +228,7 @@ static int hpp__entry_period_baseline(struct perf_hpp *hpp, struct hist_entry *h
return scnprintf(hpp->buf, hpp->size, fmt, period);
}
+
static int hpp__header_delta(struct perf_hpp *hpp)
{
const char *fmt = symbol_conf.field_sep ? "%s" : "%7s";
@@ -268,14 +243,18 @@ static int hpp__width_delta(struct perf_hpp *hpp __maybe_unused)
static int hpp__entry_delta(struct perf_hpp *hpp, struct hist_entry *he)
{
+ struct hist_entry *pair = hist_entry__next_pair(he);
const char *fmt = symbol_conf.field_sep ? "%s" : "%7.7s";
char buf[32] = " ";
- double diff;
+ double diff = 0.0;
- if (he->diff.computed)
- diff = he->diff.period_ratio_delta;
- else
- diff = perf_diff__compute_delta(he);
+ if (pair) {
+ if (he->diff.computed)
+ diff = he->diff.period_ratio_delta;
+ else
+ diff = perf_diff__compute_delta(he, pair);
+ } else
+ diff = perf_diff__period_percent(he, he->stat.period);
if (fabs(diff) >= 0.01)
scnprintf(buf, sizeof(buf), "%+4.2F%%", diff);
@@ -297,14 +276,17 @@ static int hpp__width_ratio(struct perf_hpp *hpp __maybe_unused)
static int hpp__entry_ratio(struct perf_hpp *hpp, struct hist_entry *he)
{
+ struct hist_entry *pair = hist_entry__next_pair(he);
const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
char buf[32] = " ";
- double ratio;
+ double ratio = 0.0;
- if (he->diff.computed)
- ratio = he->diff.period_ratio;
- else
- ratio = perf_diff__compute_ratio(he);
+ if (pair) {
+ if (he->diff.computed)
+ ratio = he->diff.period_ratio;
+ else
+ ratio = perf_diff__compute_ratio(he, pair);
+ }
if (ratio > 0.0)
scnprintf(buf, sizeof(buf), "%+14.6F", ratio);
@@ -326,14 +308,17 @@ static int hpp__width_wdiff(struct perf_hpp *hpp __maybe_unused)
static int hpp__entry_wdiff(struct perf_hpp *hpp, struct hist_entry *he)
{
+ struct hist_entry *pair = hist_entry__next_pair(he);
const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
char buf[32] = " ";
- s64 wdiff;
+ s64 wdiff = 0;
- if (he->diff.computed)
- wdiff = he->diff.wdiff;
- else
- wdiff = perf_diff__compute_wdiff(he);
+ if (pair) {
+ if (he->diff.computed)
+ wdiff = he->diff.wdiff;
+ else
+ wdiff = perf_diff__compute_wdiff(he, pair);
+ }
if (wdiff != 0)
scnprintf(buf, sizeof(buf), "%14ld", wdiff);
@@ -341,30 +326,6 @@ static int hpp__entry_wdiff(struct perf_hpp *hpp, struct hist_entry *he)
return scnprintf(hpp->buf, hpp->size, fmt, buf);
}
-static int hpp__header_displ(struct perf_hpp *hpp)
-{
- return scnprintf(hpp->buf, hpp->size, "Displ.");
-}
-
-static int hpp__width_displ(struct perf_hpp *hpp __maybe_unused)
-{
- return 6;
-}
-
-static int hpp__entry_displ(struct perf_hpp *hpp,
- struct hist_entry *he)
-{
- struct hist_entry *pair = hist_entry__next_pair(he);
- long displacement = pair ? pair->position - he->position : 0;
- const char *fmt = symbol_conf.field_sep ? "%s" : "%6.6s";
- char buf[32] = " ";
-
- if (displacement)
- scnprintf(buf, sizeof(buf), "%+4ld", displacement);
-
- return scnprintf(hpp->buf, hpp->size, fmt, buf);
-}
-
static int hpp__header_formula(struct perf_hpp *hpp)
{
const char *fmt = symbol_conf.field_sep ? "%s" : "%70s";
@@ -379,67 +340,91 @@ static int hpp__width_formula(struct perf_hpp *hpp __maybe_unused)
static int hpp__entry_formula(struct perf_hpp *hpp, struct hist_entry *he)
{
+ struct hist_entry *pair = hist_entry__next_pair(he);
const char *fmt = symbol_conf.field_sep ? "%s" : "%-70s";
char buf[96] = " ";
- perf_diff__formula(buf, sizeof(buf), he);
+ if (pair)
+ perf_diff__formula(he, pair, buf, sizeof(buf));
+
return scnprintf(hpp->buf, hpp->size, fmt, buf);
}
-#define HPP__COLOR_PRINT_FNS(_name) \
- .header = hpp__header_ ## _name, \
- .width = hpp__width_ ## _name, \
- .color = hpp__color_ ## _name, \
- .entry = hpp__entry_ ## _name
+#define HPP__COLOR_PRINT_FNS(_name) \
+ { \
+ .header = hpp__header_ ## _name, \
+ .width = hpp__width_ ## _name, \
+ .color = hpp__color_ ## _name, \
+ .entry = hpp__entry_ ## _name \
+ }
-#define HPP__PRINT_FNS(_name) \
- .header = hpp__header_ ## _name, \
- .width = hpp__width_ ## _name, \
- .entry = hpp__entry_ ## _name
+#define HPP__PRINT_FNS(_name) \
+ { \
+ .header = hpp__header_ ## _name, \
+ .width = hpp__width_ ## _name, \
+ .entry = hpp__entry_ ## _name \
+ }
struct perf_hpp_fmt perf_hpp__format[] = {
- { .cond = false, HPP__COLOR_PRINT_FNS(baseline) },
- { .cond = true, HPP__COLOR_PRINT_FNS(overhead) },
- { .cond = false, HPP__COLOR_PRINT_FNS(overhead_sys) },
- { .cond = false, HPP__COLOR_PRINT_FNS(overhead_us) },
- { .cond = false, HPP__COLOR_PRINT_FNS(overhead_guest_sys) },
- { .cond = false, HPP__COLOR_PRINT_FNS(overhead_guest_us) },
- { .cond = false, HPP__PRINT_FNS(samples) },
- { .cond = false, HPP__PRINT_FNS(period) },
- { .cond = false, HPP__PRINT_FNS(period_baseline) },
- { .cond = false, HPP__PRINT_FNS(delta) },
- { .cond = false, HPP__PRINT_FNS(ratio) },
- { .cond = false, HPP__PRINT_FNS(wdiff) },
- { .cond = false, HPP__PRINT_FNS(displ) },
- { .cond = false, HPP__PRINT_FNS(formula) }
+ HPP__COLOR_PRINT_FNS(baseline),
+ HPP__COLOR_PRINT_FNS(overhead),
+ HPP__COLOR_PRINT_FNS(overhead_sys),
+ HPP__COLOR_PRINT_FNS(overhead_us),
+ HPP__COLOR_PRINT_FNS(overhead_guest_sys),
+ HPP__COLOR_PRINT_FNS(overhead_guest_us),
+ HPP__PRINT_FNS(samples),
+ HPP__PRINT_FNS(period),
+ HPP__PRINT_FNS(period_baseline),
+ HPP__PRINT_FNS(delta),
+ HPP__PRINT_FNS(ratio),
+ HPP__PRINT_FNS(wdiff),
+ HPP__PRINT_FNS(formula)
};
+LIST_HEAD(perf_hpp__list);
+
+
#undef HPP__COLOR_PRINT_FNS
#undef HPP__PRINT_FNS
+#undef HPP_PERCENT_FNS
+#undef HPP_RAW_FNS
+
+#undef __HPP_HEADER_FN
+#undef __HPP_WIDTH_FN
+#undef __HPP_COLOR_PERCENT_FN
+#undef __HPP_ENTRY_PERCENT_FN
+#undef __HPP_ENTRY_RAW_FN
+
+
void perf_hpp__init(void)
{
if (symbol_conf.show_cpu_utilization) {
- perf_hpp__format[PERF_HPP__OVERHEAD_SYS].cond = true;
- perf_hpp__format[PERF_HPP__OVERHEAD_US].cond = true;
+ perf_hpp__column_enable(PERF_HPP__OVERHEAD_SYS);
+ perf_hpp__column_enable(PERF_HPP__OVERHEAD_US);
if (perf_guest) {
- perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].cond = true;
- perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].cond = true;
+ perf_hpp__column_enable(PERF_HPP__OVERHEAD_GUEST_SYS);
+ perf_hpp__column_enable(PERF_HPP__OVERHEAD_GUEST_US);
}
}
if (symbol_conf.show_nr_samples)
- perf_hpp__format[PERF_HPP__SAMPLES].cond = true;
+ perf_hpp__column_enable(PERF_HPP__SAMPLES);
if (symbol_conf.show_total_period)
- perf_hpp__format[PERF_HPP__PERIOD].cond = true;
+ perf_hpp__column_enable(PERF_HPP__PERIOD);
+}
+
+void perf_hpp__column_register(struct perf_hpp_fmt *format)
+{
+ list_add_tail(&format->list, &perf_hpp__list);
}
-void perf_hpp__column_enable(unsigned col, bool enable)
+void perf_hpp__column_enable(unsigned col)
{
BUG_ON(col >= PERF_HPP__MAX_INDEX);
- perf_hpp__format[col].cond = enable;
+ perf_hpp__column_register(&perf_hpp__format[col]);
}
static inline void advance_hpp(struct perf_hpp *hpp, int inc)
@@ -452,27 +437,29 @@ int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he,
bool color)
{
const char *sep = symbol_conf.field_sep;
+ struct perf_hpp_fmt *fmt;
char *start = hpp->buf;
- int i, ret;
+ int ret;
bool first = true;
if (symbol_conf.exclude_other && !he->parent)
return 0;
- for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
- if (!perf_hpp__format[i].cond)
- continue;
-
+ perf_hpp__for_each_format(fmt) {
+ /*
+ * If there's no field_sep, we still need
+ * to display initial ' '.
+ */
if (!sep || !first) {
ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: " ");
advance_hpp(hpp, ret);
+ } else
first = false;
- }
- if (color && perf_hpp__format[i].color)
- ret = perf_hpp__format[i].color(hpp, he);
+ if (color && fmt->color)
+ ret = fmt->color(hpp, he);
else
- ret = perf_hpp__format[i].entry(hpp, he);
+ ret = fmt->entry(hpp, he);
advance_hpp(hpp, ret);
}
@@ -504,16 +491,18 @@ int hist_entry__sort_snprintf(struct hist_entry *he, char *s, size_t size,
*/
unsigned int hists__sort_list_width(struct hists *hists)
{
+ struct perf_hpp_fmt *fmt;
struct sort_entry *se;
- int i, ret = 0;
+ int i = 0, ret = 0;
+ struct perf_hpp dummy_hpp = {
+ .ptr = hists_to_evsel(hists),
+ };
- for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
- if (!perf_hpp__format[i].cond)
- continue;
+ perf_hpp__for_each_format(fmt) {
if (i)
ret += 2;
- ret += perf_hpp__format[i].width(NULL);
+ ret += fmt->width(&dummy_hpp);
}
list_for_each_entry(se, &hist_entry__sort_list, list)
diff --git a/tools/perf/ui/keysyms.h b/tools/perf/ui/keysyms.h
index 809eca5..65092d5 100644
--- a/tools/perf/ui/keysyms.h
+++ b/tools/perf/ui/keysyms.h
@@ -23,5 +23,6 @@
#define K_TIMER -1
#define K_ERROR -2
#define K_RESIZE -3
+#define K_SWITCH_INPUT_DATA -4
#endif /* _PERF_KEYSYMS_H_ */
diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c
index ebb4cc1..ae6a789 100644
--- a/tools/perf/ui/setup.c
+++ b/tools/perf/ui/setup.c
@@ -8,7 +8,7 @@ pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
void setup_browser(bool fallback_to_pager)
{
- if (!isatty(1) || dump_trace)
+ if (use_browser < 2 && (!isatty(1) || dump_trace))
use_browser = 0;
/* default to TUI */
@@ -30,6 +30,7 @@ void setup_browser(bool fallback_to_pager)
if (fallback_to_pager)
setup_pager();
+ perf_hpp__column_enable(PERF_HPP__OVERHEAD);
perf_hpp__init();
break;
}
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index f0ee204..ff1f60c 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -3,6 +3,7 @@
#include "../../util/util.h"
#include "../../util/hist.h"
#include "../../util/sort.h"
+#include "../../util/evsel.h"
static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin)
@@ -335,17 +336,19 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
int max_cols, FILE *fp)
{
+ struct perf_hpp_fmt *fmt;
struct sort_entry *se;
struct rb_node *nd;
size_t ret = 0;
unsigned int width;
const char *sep = symbol_conf.field_sep;
const char *col_width = symbol_conf.col_width_list_str;
- int idx, nr_rows = 0;
+ int nr_rows = 0;
char bf[96];
struct perf_hpp dummy_hpp = {
.buf = bf,
.size = sizeof(bf),
+ .ptr = hists_to_evsel(hists),
};
bool first = true;
@@ -355,16 +358,14 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
goto print_entries;
fprintf(fp, "# ");
- for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) {
- if (!perf_hpp__format[idx].cond)
- continue;
+ perf_hpp__for_each_format(fmt) {
if (!first)
fprintf(fp, "%s", sep ?: " ");
else
first = false;
- perf_hpp__format[idx].header(&dummy_hpp);
+ fmt->header(&dummy_hpp);
fprintf(fp, "%s", bf);
}
@@ -400,18 +401,16 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
first = true;
fprintf(fp, "# ");
- for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) {
- unsigned int i;
- if (!perf_hpp__format[idx].cond)
- continue;
+ perf_hpp__for_each_format(fmt) {
+ unsigned int i;
if (!first)
fprintf(fp, "%s", sep ?: " ");
else
first = false;
- width = perf_hpp__format[idx].width(&dummy_hpp);
+ width = fmt->width(&dummy_hpp);
for (i = 0; i < width; i++)
fprintf(fp, ".");
}
@@ -462,7 +461,7 @@ out:
return ret;
}
-size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp)
+size_t events_stats__fprintf(struct events_stats *stats, FILE *fp)
{
int i;
size_t ret = 0;
@@ -470,7 +469,7 @@ size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp)
for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
const char *name;
- if (hists->stats.nr_events[i] == 0)
+ if (stats->nr_events[i] == 0)
continue;
name = perf_event__name(i);
@@ -478,7 +477,7 @@ size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp)
continue;
ret += fprintf(fp, "%16s events: %10d\n", name,
- hists->stats.nr_events[i]);
+ stats->nr_events[i]);
}
return ret;
diff --git a/tools/perf/ui/tui/helpline.c b/tools/perf/ui/tui/helpline.c
index 2884d2f..1c8b9af 100644
--- a/tools/perf/ui/tui/helpline.c
+++ b/tools/perf/ui/tui/helpline.c
@@ -8,6 +8,8 @@
#include "../ui.h"
#include "../libslang.h"
+char ui_helpline__last_msg[1024];
+
static void tui_helpline__pop(void)
{
}
@@ -23,20 +25,7 @@ static void tui_helpline__push(const char *msg)
strncpy(ui_helpline__current, msg, sz)[sz - 1] = '\0';
}
-struct ui_helpline tui_helpline_fns = {
- .pop = tui_helpline__pop,
- .push = tui_helpline__push,
-};
-
-void ui_helpline__init(void)
-{
- helpline_fns = &tui_helpline_fns;
- ui_helpline__puts(" ");
-}
-
-char ui_helpline__last_msg[1024];
-
-int ui_helpline__show_help(const char *format, va_list ap)
+static int tui_helpline__show(const char *format, va_list ap)
{
int ret;
static int backlog;
@@ -55,3 +44,15 @@ int ui_helpline__show_help(const char *format, va_list ap)
return ret;
}
+
+struct ui_helpline tui_helpline_fns = {
+ .pop = tui_helpline__pop,
+ .push = tui_helpline__push,
+ .show = tui_helpline__show,
+};
+
+void ui_helpline__init(void)
+{
+ helpline_fns = &tui_helpline_fns;
+ ui_helpline__puts(" ");
+}
diff --git a/tools/perf/ui/util.c b/tools/perf/ui/util.c
index 4f98977..e3e0a96 100644
--- a/tools/perf/ui/util.c
+++ b/tools/perf/ui/util.c
@@ -52,7 +52,6 @@ int ui__warning(const char *format, ...)
return ret;
}
-
/**
* perf_error__register - Register error logging functions
* @eops: The pointer to error logging function struct
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
index 6aa34e5..055fef3 100755
--- a/tools/perf/util/PERF-VERSION-GEN
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -26,13 +26,13 @@ VN=$(expr "$VN" : v*'\(.*\)')
if test -r $GVF
then
- VC=$(sed -e 's/^PERF_VERSION = //' <$GVF)
+ VC=$(sed -e 's/^#define PERF_VERSION "\(.*\)"/\1/' <$GVF)
else
VC=unset
fi
test "$VN" = "$VC" || {
echo >&2 "PERF_VERSION = $VN"
- echo "PERF_VERSION = $VN" >$GVF
+ echo "#define PERF_VERSION \"$VN\"" >$GVF
}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 07aaeea..d33fe93 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -809,7 +809,7 @@ fallback:
pr_err("Can't annotate %s:\n\n"
"No vmlinux file%s\nwas found in the path.\n\n"
"Please use:\n\n"
- " perf buildid-cache -av vmlinux\n\n"
+ " perf buildid-cache -vu vmlinux\n\n"
"or:\n\n"
" --vmlinux vmlinux\n",
sym->name, build_id_msg ?: "");
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 8eec943..c422440 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -6,6 +6,7 @@
#include "types.h"
#include "symbol.h"
#include "hist.h"
+#include "sort.h"
#include <linux/list.h>
#include <linux/rbtree.h>
#include <pthread.h>
@@ -154,6 +155,29 @@ static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused,
}
#endif
+#ifdef GTK2_SUPPORT
+int symbol__gtk_annotate(struct symbol *sym, struct map *map, int evidx,
+ struct hist_browser_timer *hbt);
+
+static inline int hist_entry__gtk_annotate(struct hist_entry *he, int evidx,
+ struct hist_browser_timer *hbt)
+{
+ return symbol__gtk_annotate(he->ms.sym, he->ms.map, evidx, hbt);
+}
+
+void perf_gtk__show_annotations(void);
+#else
+static inline int hist_entry__gtk_annotate(struct hist_entry *he __maybe_unused,
+ int evidx __maybe_unused,
+ struct hist_browser_timer *hbt
+ __maybe_unused)
+{
+ return 0;
+}
+
+static inline void perf_gtk__show_annotations(void) {}
+#endif
+
extern const char *disassembler_style;
#endif /* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index d3b3f5d..42b6a63 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -444,7 +444,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
struct callchain_cursor_node *node = *cursor->last;
if (!node) {
- node = calloc(sizeof(*node), 1);
+ node = calloc(1, sizeof(*node));
if (!node)
return -ENOMEM;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index eb34057..3ee9f67 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -143,4 +143,9 @@ static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
cursor->curr = cursor->curr->next;
cursor->pos++;
}
+
+struct option;
+
+int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
+extern const char record_callchain_help[];
#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 2b32ffa..f817046 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -1,4 +1,5 @@
#include "util.h"
+#include "sysfs.h"
#include "../perf.h"
#include "cpumap.h"
#include <assert.h>
@@ -201,3 +202,56 @@ void cpu_map__delete(struct cpu_map *map)
{
free(map);
}
+
+int cpu_map__get_socket(struct cpu_map *map, int idx)
+{
+ FILE *fp;
+ const char *mnt;
+ char path[PATH_MAX];
+ int cpu, ret;
+
+ if (idx > map->nr)
+ return -1;
+
+ cpu = map->map[idx];
+
+ mnt = sysfs_find_mountpoint();
+ if (!mnt)
+ return -1;
+
+ sprintf(path,
+ "%s/devices/system/cpu/cpu%d/topology/physical_package_id",
+ mnt, cpu);
+
+ fp = fopen(path, "r");
+ if (!fp)
+ return -1;
+ ret = fscanf(fp, "%d", &cpu);
+ fclose(fp);
+ return ret == 1 ? cpu : -1;
+}
+
+int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
+{
+ struct cpu_map *sock;
+ int nr = cpus->nr;
+ int cpu, s1, s2;
+
+ sock = calloc(1, sizeof(*sock) + nr * sizeof(int));
+ if (!sock)
+ return -1;
+
+ for (cpu = 0; cpu < nr; cpu++) {
+ s1 = cpu_map__get_socket(cpus, cpu);
+ for (s2 = 0; s2 < sock->nr; s2++) {
+ if (s1 == sock->map[s2])
+ break;
+ }
+ if (s2 == sock->nr) {
+ sock->map[sock->nr] = s1;
+ sock->nr++;
+ }
+ }
+ *sockp = sock;
+ return 0;
+}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 2f68a3b..161b007 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -14,6 +14,15 @@ struct cpu_map *cpu_map__dummy_new(void);
void cpu_map__delete(struct cpu_map *map);
struct cpu_map *cpu_map__read(FILE *file);
size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
+int cpu_map__get_socket(struct cpu_map *map, int idx);
+int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
+
+static inline int cpu_map__socket(struct cpu_map *sock, int s)
+{
+ if (!sock || s > sock->nr || s < 0)
+ return 0;
+ return sock->map[s];
+}
static inline int cpu_map__nr(const struct cpu_map *map)
{
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 03f830b..399e74c 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -23,10 +23,8 @@ int eprintf(int level, const char *fmt, ...)
if (verbose >= level) {
va_start(args, fmt);
- if (use_browser == 1)
- ret = ui_helpline__show_help(fmt, args);
- else if (use_browser == 2)
- ret = perf_gtk__show_helpline(fmt, args);
+ if (use_browser >= 1)
+ ui_helpline__vshow(fmt, args);
else
ret = vfprintf(stderr, fmt, args);
va_end(args);
@@ -49,28 +47,6 @@ int dump_printf(const char *fmt, ...)
return ret;
}
-#if !defined(NEWT_SUPPORT) && !defined(GTK2_SUPPORT)
-int ui__warning(const char *format, ...)
-{
- va_list args;
-
- va_start(args, format);
- vfprintf(stderr, format, args);
- va_end(args);
- return 0;
-}
-#endif
-
-int ui__error_paranoid(void)
-{
- return ui__error("Permission error - are you root?\n"
- "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n"
- " -1 - Not paranoid at all\n"
- " 0 - Disallow raw tracepoint access for unpriv\n"
- " 1 - Disallow cpu events for unpriv\n"
- " 2 - Disallow kernel profiling for unpriv\n");
-}
-
void trace_event(union perf_event *event)
{
unsigned char *raw_event = (void *)event;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 83e8d23..efbd988 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -5,6 +5,8 @@
#include <stdbool.h>
#include "event.h"
#include "../ui/helpline.h"
+#include "../ui/progress.h"
+#include "../ui/util.h"
extern int verbose;
extern bool quiet, dump_trace;
@@ -12,39 +14,7 @@ extern bool quiet, dump_trace;
int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
void trace_event(union perf_event *event);
-struct ui_progress;
-struct perf_error_ops;
-
-#if defined(NEWT_SUPPORT) || defined(GTK2_SUPPORT)
-
-#include "../ui/progress.h"
int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2)));
-#include "../ui/util.h"
-
-#else
-
-static inline void ui_progress__update(u64 curr __maybe_unused,
- u64 total __maybe_unused,
- const char *title __maybe_unused) {}
-static inline void ui_progress__finish(void) {}
-
-#define ui__error(format, arg...) ui__warning(format, ##arg)
-
-static inline int
-perf_error__register(struct perf_error_ops *eops __maybe_unused)
-{
- return 0;
-}
-
-static inline int
-perf_error__unregister(struct perf_error_ops *eops __maybe_unused)
-{
- return 0;
-}
-
-#endif /* NEWT_SUPPORT || GTK2_SUPPORT */
-
int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
-int ui__error_paranoid(void);
#endif /* __PERF_DEBUG_H */
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index d6d9a46..6f7d5a9 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -539,13 +539,13 @@ struct dso *__dsos__findnew(struct list_head *head, const char *name)
}
size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
- bool with_hits)
+ bool (skip)(struct dso *dso, int parm), int parm)
{
struct dso *pos;
size_t ret = 0;
list_for_each_entry(pos, head, node) {
- if (with_hits && !pos->hit)
+ if (skip && skip(pos, parm))
continue;
ret += dso__fprintf_buildid(pos, fp);
ret += fprintf(fp, " %s\n", pos->long_name);
@@ -583,7 +583,7 @@ size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp)
if (dso->short_name != dso->long_name)
ret += fprintf(fp, "%s, ", dso->long_name);
ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type],
- dso->loaded ? "" : "NOT ");
+ dso__loaded(dso, type) ? "" : "NOT ");
ret += dso__fprintf_buildid(dso, fp);
ret += fprintf(fp, ")\n");
for (nd = rb_first(&dso->symbols[type]); nd; nd = rb_next(nd)) {
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index e032769..450199a 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -138,7 +138,7 @@ struct dso *__dsos__findnew(struct list_head *head, const char *name);
bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
- bool with_hits);
+ bool (skip)(struct dso *dso, int parm), int parm);
size_t __dsos__fprintf(struct list_head *head, FILE *fp);
size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 3cf2c3e..5cd13d7 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -476,8 +476,10 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
}
}
- if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0)
+ if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0) {
+ free(event);
return -ENOENT;
+ }
map = machine->vmlinux_maps[MAP__FUNCTION];
size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 7052934..bc4ad79 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -49,10 +49,16 @@ struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
return evlist;
}
-void perf_evlist__config_attrs(struct perf_evlist *evlist,
- struct perf_record_opts *opts)
+void perf_evlist__config(struct perf_evlist *evlist,
+ struct perf_record_opts *opts)
{
struct perf_evsel *evsel;
+ /*
+ * Set the evsel leader links before we configure attributes,
+ * since some might depend on this info.
+ */
+ if (opts->group)
+ perf_evlist__set_leader(evlist);
if (evlist->cpus->map[0] < 0)
opts->no_inherit = true;
@@ -61,7 +67,7 @@ void perf_evlist__config_attrs(struct perf_evlist *evlist,
perf_evsel__config(evsel, opts);
if (evlist->nr_entries > 1)
- evsel->attr.sample_type |= PERF_SAMPLE_ID;
+ perf_evsel__set_sample_id(evsel);
}
}
@@ -111,18 +117,21 @@ void __perf_evlist__set_leader(struct list_head *list)
struct perf_evsel *evsel, *leader;
leader = list_entry(list->next, struct perf_evsel, node);
- leader->leader = NULL;
+ evsel = list_entry(list->prev, struct perf_evsel, node);
+
+ leader->nr_members = evsel->idx - leader->idx + 1;
list_for_each_entry(evsel, list, node) {
- if (evsel != leader)
- evsel->leader = leader;
+ evsel->leader = leader;
}
}
void perf_evlist__set_leader(struct perf_evlist *evlist)
{
- if (evlist->nr_entries)
+ if (evlist->nr_entries) {
+ evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
__perf_evlist__set_leader(&evlist->entries);
+ }
}
int perf_evlist__add_default(struct perf_evlist *evlist)
@@ -222,7 +231,7 @@ void perf_evlist__disable(struct perf_evlist *evlist)
for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
list_for_each_entry(pos, &evlist->entries, node) {
- if (perf_evsel__is_group_member(pos))
+ if (!perf_evsel__is_group_leader(pos))
continue;
for (thread = 0; thread < evlist->threads->nr; thread++)
ioctl(FD(pos, cpu, thread),
@@ -238,7 +247,7 @@ void perf_evlist__enable(struct perf_evlist *evlist)
for (cpu = 0; cpu < cpu_map__nr(evlist->cpus); cpu++) {
list_for_each_entry(pos, &evlist->entries, node) {
- if (perf_evsel__is_group_member(pos))
+ if (!perf_evsel__is_group_leader(pos))
continue;
for (thread = 0; thread < evlist->threads->nr; thread++)
ioctl(FD(pos, cpu, thread),
@@ -366,7 +375,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
if ((old & md->mask) + size != ((old + size) & md->mask)) {
unsigned int offset = old;
unsigned int len = min(sizeof(*event), size), cpy;
- void *dst = &evlist->event_copy;
+ void *dst = &md->event_copy;
do {
cpy = min(md->mask + 1 - (offset & md->mask), len);
@@ -376,7 +385,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
len -= cpy;
} while (len);
- event = &evlist->event_copy;
+ event = &md->event_copy;
}
old += size;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 56003f7..2dd07bd 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -17,10 +17,18 @@ struct perf_record_opts;
#define PERF_EVLIST__HLIST_BITS 8
#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
+struct perf_mmap {
+ void *base;
+ int mask;
+ unsigned int prev;
+ union perf_event event_copy;
+};
+
struct perf_evlist {
struct list_head entries;
struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
int nr_entries;
+ int nr_groups;
int nr_fds;
int nr_mmaps;
int mmap_len;
@@ -29,7 +37,6 @@ struct perf_evlist {
pid_t pid;
} workload;
bool overwrite;
- union perf_event event_copy;
struct perf_mmap *mmap;
struct pollfd *pollfd;
struct thread_map *threads;
@@ -76,8 +83,8 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx);
int perf_evlist__open(struct perf_evlist *evlist);
-void perf_evlist__config_attrs(struct perf_evlist *evlist,
- struct perf_record_opts *opts);
+void perf_evlist__config(struct perf_evlist *evlist,
+ struct perf_record_opts *opts);
int perf_evlist__prepare_workload(struct perf_evlist *evlist,
struct perf_record_opts *opts,
@@ -135,4 +142,25 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist)
}
size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp);
+
+static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
+{
+ struct perf_event_mmap_page *pc = mm->base;
+ int head = pc->data_head;
+ rmb();
+ return head;
+}
+
+static inline void perf_mmap__write_tail(struct perf_mmap *md,
+ unsigned long tail)
+{
+ struct perf_event_mmap_page *pc = md->base;
+
+ /*
+ * ensure all reads are done before we write the tail out.
+ */
+ /* mb(); */
+ pc->data_tail = tail;
+}
+
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1b16dd1..9c82f98f 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -22,6 +22,11 @@
#include <linux/perf_event.h>
#include "perf_regs.h"
+static struct {
+ bool sample_id_all;
+ bool exclude_guest;
+} perf_missing_features;
+
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
static int __perf_evsel__sample_size(u64 sample_type)
@@ -50,11 +55,36 @@ void hists__init(struct hists *hists)
pthread_mutex_init(&hists->lock, NULL);
}
+void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
+ enum perf_event_sample_format bit)
+{
+ if (!(evsel->attr.sample_type & bit)) {
+ evsel->attr.sample_type |= bit;
+ evsel->sample_size += sizeof(u64);
+ }
+}
+
+void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
+ enum perf_event_sample_format bit)
+{
+ if (evsel->attr.sample_type & bit) {
+ evsel->attr.sample_type &= ~bit;
+ evsel->sample_size -= sizeof(u64);
+ }
+}
+
+void perf_evsel__set_sample_id(struct perf_evsel *evsel)
+{
+ perf_evsel__set_sample_bit(evsel, ID);
+ evsel->attr.read_format |= PERF_FORMAT_ID;
+}
+
void perf_evsel__init(struct perf_evsel *evsel,
struct perf_event_attr *attr, int idx)
{
evsel->idx = idx;
evsel->attr = *attr;
+ evsel->leader = evsel;
INIT_LIST_HEAD(&evsel->node);
hists__init(&evsel->hists);
evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
@@ -404,6 +434,31 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
return evsel->name ?: "unknown";
}
+const char *perf_evsel__group_name(struct perf_evsel *evsel)
+{
+ return evsel->group_name ?: "anon group";
+}
+
+int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
+{
+ int ret;
+ struct perf_evsel *pos;
+ const char *group_name = perf_evsel__group_name(evsel);
+
+ ret = scnprintf(buf, size, "%s", group_name);
+
+ ret += scnprintf(buf + ret, size - ret, " { %s",
+ perf_evsel__name(evsel));
+
+ for_each_group_member(pos, evsel)
+ ret += scnprintf(buf + ret, size - ret, ", %s",
+ perf_evsel__name(pos));
+
+ ret += scnprintf(buf + ret, size - ret, " }");
+
+ return ret;
+}
+
/*
* The enable_on_exec/disabled value strategy:
*
@@ -438,13 +493,11 @@ void perf_evsel__config(struct perf_evsel *evsel,
struct perf_event_attr *attr = &evsel->attr;
int track = !evsel->idx; /* only the first counter needs these */
- attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
+ attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
attr->inherit = !opts->no_inherit;
- attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
- PERF_FORMAT_TOTAL_TIME_RUNNING |
- PERF_FORMAT_ID;
- attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+ perf_evsel__set_sample_bit(evsel, IP);
+ perf_evsel__set_sample_bit(evsel, TID);
/*
* We default some events to a 1 default interval. But keep
@@ -453,7 +506,7 @@ void perf_evsel__config(struct perf_evsel *evsel,
if (!attr->sample_period || (opts->user_freq != UINT_MAX &&
opts->user_interval != ULLONG_MAX)) {
if (opts->freq) {
- attr->sample_type |= PERF_SAMPLE_PERIOD;
+ perf_evsel__set_sample_bit(evsel, PERIOD);
attr->freq = 1;
attr->sample_freq = opts->freq;
} else {
@@ -468,16 +521,16 @@ void perf_evsel__config(struct perf_evsel *evsel,
attr->inherit_stat = 1;
if (opts->sample_address) {
- attr->sample_type |= PERF_SAMPLE_ADDR;
+ perf_evsel__set_sample_bit(evsel, ADDR);
attr->mmap_data = track;
}
if (opts->call_graph) {
- attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
+ perf_evsel__set_sample_bit(evsel, CALLCHAIN);
if (opts->call_graph == CALLCHAIN_DWARF) {
- attr->sample_type |= PERF_SAMPLE_REGS_USER |
- PERF_SAMPLE_STACK_USER;
+ perf_evsel__set_sample_bit(evsel, REGS_USER);
+ perf_evsel__set_sample_bit(evsel, STACK_USER);
attr->sample_regs_user = PERF_REGS_MASK;
attr->sample_stack_user = opts->stack_dump_size;
attr->exclude_callchain_user = 1;
@@ -485,20 +538,20 @@ void perf_evsel__config(struct perf_evsel *evsel,
}
if (perf_target__has_cpu(&opts->target))
- attr->sample_type |= PERF_SAMPLE_CPU;
+ perf_evsel__set_sample_bit(evsel, CPU);
if (opts->period)
- attr->sample_type |= PERF_SAMPLE_PERIOD;
+ perf_evsel__set_sample_bit(evsel, PERIOD);
- if (!opts->sample_id_all_missing &&
+ if (!perf_missing_features.sample_id_all &&
(opts->sample_time || !opts->no_inherit ||
perf_target__has_cpu(&opts->target)))
- attr->sample_type |= PERF_SAMPLE_TIME;
+ perf_evsel__set_sample_bit(evsel, TIME);
if (opts->raw_samples) {
- attr->sample_type |= PERF_SAMPLE_TIME;
- attr->sample_type |= PERF_SAMPLE_RAW;
- attr->sample_type |= PERF_SAMPLE_CPU;
+ perf_evsel__set_sample_bit(evsel, TIME);
+ perf_evsel__set_sample_bit(evsel, RAW);
+ perf_evsel__set_sample_bit(evsel, CPU);
}
if (opts->no_delay) {
@@ -506,7 +559,7 @@ void perf_evsel__config(struct perf_evsel *evsel,
attr->wakeup_events = 1;
}
if (opts->branch_stack) {
- attr->sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
attr->branch_sample_type = opts->branch_stack;
}
@@ -519,14 +572,14 @@ void perf_evsel__config(struct perf_evsel *evsel,
* Disabling only independent events or group leaders,
* keeping group members enabled.
*/
- if (!perf_evsel__is_group_member(evsel))
+ if (perf_evsel__is_group_leader(evsel))
attr->disabled = 1;
/*
* Setting enable_on_exec for independent events and
* group leaders for traced executed by perf.
*/
- if (perf_target__none(&opts->target) && !perf_evsel__is_group_member(evsel))
+ if (perf_target__none(&opts->target) && perf_evsel__is_group_leader(evsel))
attr->enable_on_exec = 1;
}
@@ -612,6 +665,11 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
}
}
+void perf_evsel__free_counts(struct perf_evsel *evsel)
+{
+ free(evsel->counts);
+}
+
void perf_evsel__exit(struct perf_evsel *evsel)
{
assert(list_empty(&evsel->node));
@@ -631,6 +689,28 @@ void perf_evsel__delete(struct perf_evsel *evsel)
free(evsel);
}
+static inline void compute_deltas(struct perf_evsel *evsel,
+ int cpu,
+ struct perf_counts_values *count)
+{
+ struct perf_counts_values tmp;
+
+ if (!evsel->prev_raw_counts)
+ return;
+
+ if (cpu == -1) {
+ tmp = evsel->prev_raw_counts->aggr;
+ evsel->prev_raw_counts->aggr = *count;
+ } else {
+ tmp = evsel->prev_raw_counts->cpu[cpu];
+ evsel->prev_raw_counts->cpu[cpu] = *count;
+ }
+
+ count->val = count->val - tmp.val;
+ count->ena = count->ena - tmp.ena;
+ count->run = count->run - tmp.run;
+}
+
int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
int cpu, int thread, bool scale)
{
@@ -646,6 +726,8 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
return -errno;
+ compute_deltas(evsel, cpu, &count);
+
if (scale) {
if (count.run == 0)
count.val = 0;
@@ -684,6 +766,8 @@ int __perf_evsel__read(struct perf_evsel *evsel,
}
}
+ compute_deltas(evsel, -1, aggr);
+
evsel->counts->scaled = 0;
if (scale) {
if (aggr->run == 0) {
@@ -707,7 +791,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
struct perf_evsel *leader = evsel->leader;
int fd;
- if (!perf_evsel__is_group_member(evsel))
+ if (perf_evsel__is_group_leader(evsel))
return -1;
/*
@@ -738,6 +822,13 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
pid = evsel->cgrp->fd;
}
+fallback_missing_features:
+ if (perf_missing_features.exclude_guest)
+ evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
+retry_sample_id:
+ if (perf_missing_features.sample_id_all)
+ evsel->attr.sample_id_all = 0;
+
for (cpu = 0; cpu < cpus->nr; cpu++) {
for (thread = 0; thread < threads->nr; thread++) {
@@ -754,13 +845,26 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
group_fd, flags);
if (FD(evsel, cpu, thread) < 0) {
err = -errno;
- goto out_close;
+ goto try_fallback;
}
}
}
return 0;
+try_fallback:
+ if (err != -EINVAL || cpu > 0 || thread > 0)
+ goto out_close;
+
+ if (!perf_missing_features.exclude_guest &&
+ (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
+ perf_missing_features.exclude_guest = true;
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.sample_id_all) {
+ perf_missing_features.sample_id_all = true;
+ goto retry_sample_id;
+ }
+
out_close:
do {
while (--thread >= 0) {
@@ -1205,3 +1309,225 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
return 0;
}
+
+static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
+{
+ va_list args;
+ int ret = 0;
+
+ if (!*first) {
+ ret += fprintf(fp, ",");
+ } else {
+ ret += fprintf(fp, ":");
+ *first = false;
+ }
+
+ va_start(args, fmt);
+ ret += vfprintf(fp, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+static int __if_fprintf(FILE *fp, bool *first, const char *field, u64 value)
+{
+ if (value == 0)
+ return 0;
+
+ return comma_fprintf(fp, first, " %s: %" PRIu64, field, value);
+}
+
+#define if_print(field) printed += __if_fprintf(fp, &first, #field, evsel->attr.field)
+
+struct bit_names {
+ int bit;
+ const char *name;
+};
+
+static int bits__fprintf(FILE *fp, const char *field, u64 value,
+ struct bit_names *bits, bool *first)
+{
+ int i = 0, printed = comma_fprintf(fp, first, " %s: ", field);
+ bool first_bit = true;
+
+ do {
+ if (value & bits[i].bit) {
+ printed += fprintf(fp, "%s%s", first_bit ? "" : "|", bits[i].name);
+ first_bit = false;
+ }
+ } while (bits[++i].name != NULL);
+
+ return printed;
+}
+
+static int sample_type__fprintf(FILE *fp, bool *first, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_##n, #n }
+ struct bit_names bits[] = {
+ bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
+ bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
+ bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
+ bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
+ { .name = NULL, }
+ };
+#undef bit_name
+ return bits__fprintf(fp, "sample_type", value, bits, first);
+}
+
+static int read_format__fprintf(FILE *fp, bool *first, u64 value)
+{
+#define bit_name(n) { PERF_FORMAT_##n, #n }
+ struct bit_names bits[] = {
+ bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
+ bit_name(ID), bit_name(GROUP),
+ { .name = NULL, }
+ };
+#undef bit_name
+ return bits__fprintf(fp, "read_format", value, bits, first);
+}
+
+int perf_evsel__fprintf(struct perf_evsel *evsel,
+ struct perf_attr_details *details, FILE *fp)
+{
+ bool first = true;
+ int printed = 0;
+
+ if (details->event_group) {
+ struct perf_evsel *pos;
+
+ if (!perf_evsel__is_group_leader(evsel))
+ return 0;
+
+ if (evsel->nr_members > 1)
+ printed += fprintf(fp, "%s{", evsel->group_name ?: "");
+
+ printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+ for_each_group_member(pos, evsel)
+ printed += fprintf(fp, ",%s", perf_evsel__name(pos));
+
+ if (evsel->nr_members > 1)
+ printed += fprintf(fp, "}");
+ goto out;
+ }
+
+ printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+
+ if (details->verbose || details->freq) {
+ printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64,
+ (u64)evsel->attr.sample_freq);
+ }
+
+ if (details->verbose) {
+ if_print(type);
+ if_print(config);
+ if_print(config1);
+ if_print(config2);
+ if_print(size);
+ printed += sample_type__fprintf(fp, &first, evsel->attr.sample_type);
+ if (evsel->attr.read_format)
+ printed += read_format__fprintf(fp, &first, evsel->attr.read_format);
+ if_print(disabled);
+ if_print(inherit);
+ if_print(pinned);
+ if_print(exclusive);
+ if_print(exclude_user);
+ if_print(exclude_kernel);
+ if_print(exclude_hv);
+ if_print(exclude_idle);
+ if_print(mmap);
+ if_print(comm);
+ if_print(freq);
+ if_print(inherit_stat);
+ if_print(enable_on_exec);
+ if_print(task);
+ if_print(watermark);
+ if_print(precise_ip);
+ if_print(mmap_data);
+ if_print(sample_id_all);
+ if_print(exclude_host);
+ if_print(exclude_guest);
+ if_print(__reserved_1);
+ if_print(wakeup_events);
+ if_print(bp_type);
+ if_print(branch_sample_type);
+ }
+out:
+ fputc('\n', fp);
+ return ++printed;
+}
+
+bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
+ char *msg, size_t msgsize)
+{
+ if ((err == ENOENT || err == ENXIO) &&
+ evsel->attr.type == PERF_TYPE_HARDWARE &&
+ evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
+ /*
+ * If it's cycles then fall back to hrtimer based
+ * cpu-clock-tick sw counter, which is always available even if
+ * no PMU support.
+ *
+ * PPC returns ENXIO until 2.6.37 (behavior changed with commit
+ * b0a873e).
+ */
+ scnprintf(msg, msgsize, "%s",
+"The cycles event is not supported, trying to fall back to cpu-clock-ticks");
+
+ evsel->attr.type = PERF_TYPE_SOFTWARE;
+ evsel->attr.config = PERF_COUNT_SW_CPU_CLOCK;
+
+ free(evsel->name);
+ evsel->name = NULL;
+ return true;
+ }
+
+ return false;
+}
+
+int perf_evsel__open_strerror(struct perf_evsel *evsel,
+ struct perf_target *target,
+ int err, char *msg, size_t size)
+{
+ switch (err) {
+ case EPERM:
+ case EACCES:
+ return scnprintf(msg, size, "%s",
+ "You may not have permission to collect %sstats.\n"
+ "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n"
+ " -1 - Not paranoid at all\n"
+ " 0 - Disallow raw tracepoint access for unpriv\n"
+ " 1 - Disallow cpu events for unpriv\n"
+ " 2 - Disallow kernel profiling for unpriv",
+ target->system_wide ? "system-wide " : "");
+ case ENOENT:
+ return scnprintf(msg, size, "The %s event is not supported.",
+ perf_evsel__name(evsel));
+ case EMFILE:
+ return scnprintf(msg, size, "%s",
+ "Too many events are opened.\n"
+ "Try again after reducing the number of events.");
+ case ENODEV:
+ if (target->cpu_list)
+ return scnprintf(msg, size, "%s",
+ "No such device - did you specify an out-of-range profile CPU?\n");
+ break;
+ case EOPNOTSUPP:
+ if (evsel->attr.precise_ip)
+ return scnprintf(msg, size, "%s",
+ "\'precise\' request may not be supported. Try removing 'p' modifier.");
+#if defined(__i386__) || defined(__x86_64__)
+ if (evsel->attr.type == PERF_TYPE_HARDWARE)
+ return scnprintf(msg, size, "%s",
+ "No hardware sampling interrupt available.\n"
+ "No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.");
+#endif
+ break;
+ default:
+ break;
+ }
+
+ return scnprintf(msg, size,
+ "The sys_perf_event_open() syscall returned with %d (%s) for event (%s). \n"
+ "/bin/dmesg may provide additional information.\n"
+ "No CONFIG_PERF_EVENTS=y kernel support configured?\n",
+ err, strerror(err), perf_evsel__name(evsel));
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 3d2b801..52021c3 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -53,6 +53,7 @@ struct perf_evsel {
struct xyarray *sample_id;
u64 *id;
struct perf_counts *counts;
+ struct perf_counts *prev_raw_counts;
int idx;
u32 ids;
struct hists hists;
@@ -73,10 +74,13 @@ struct perf_evsel {
bool needs_swap;
/* parse modifier helper */
int exclude_GH;
+ int nr_members;
struct perf_evsel *leader;
char *group_name;
};
+#define hists_to_evsel(h) container_of(h, struct perf_evsel, hists)
+
struct cpu_map;
struct thread_map;
struct perf_evlist;
@@ -110,14 +114,30 @@ extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX];
int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
char *bf, size_t size);
const char *perf_evsel__name(struct perf_evsel *evsel);
+const char *perf_evsel__group_name(struct perf_evsel *evsel);
+int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size);
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
void perf_evsel__free_fd(struct perf_evsel *evsel);
void perf_evsel__free_id(struct perf_evsel *evsel);
+void perf_evsel__free_counts(struct perf_evsel *evsel);
void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
+void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
+ enum perf_event_sample_format bit);
+void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
+ enum perf_event_sample_format bit);
+
+#define perf_evsel__set_sample_bit(evsel, bit) \
+ __perf_evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit)
+
+#define perf_evsel__reset_sample_bit(evsel, bit) \
+ __perf_evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit)
+
+void perf_evsel__set_sample_id(struct perf_evsel *evsel);
+
int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
const char *filter);
@@ -226,8 +246,34 @@ static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel)
return list_entry(evsel->node.next, struct perf_evsel, node);
}
-static inline bool perf_evsel__is_group_member(const struct perf_evsel *evsel)
+static inline bool perf_evsel__is_group_leader(const struct perf_evsel *evsel)
+{
+ return evsel->leader == evsel;
+}
+
+struct perf_attr_details {
+ bool freq;
+ bool verbose;
+ bool event_group;
+};
+
+int perf_evsel__fprintf(struct perf_evsel *evsel,
+ struct perf_attr_details *details, FILE *fp);
+
+bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
+ char *msg, size_t msgsize);
+int perf_evsel__open_strerror(struct perf_evsel *evsel,
+ struct perf_target *target,
+ int err, char *msg, size_t size);
+
+static inline int perf_evsel__group_idx(struct perf_evsel *evsel)
{
- return evsel->leader != NULL;
+ return evsel->idx - evsel->leader->idx;
}
+
+#define for_each_group_member(_evsel, _leader) \
+for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node); \
+ (_evsel) && (_evsel)->leader == (_leader); \
+ (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node))
+
#endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b7da463..f4bfd79 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -148,7 +148,7 @@ static char *do_read_string(int fd, struct perf_header *ph)
u32 len;
char *buf;
- sz = read(fd, &len, sizeof(len));
+ sz = readn(fd, &len, sizeof(len));
if (sz < (ssize_t)sizeof(len))
return NULL;
@@ -159,7 +159,7 @@ static char *do_read_string(int fd, struct perf_header *ph)
if (!buf)
return NULL;
- ret = read(fd, buf, len);
+ ret = readn(fd, buf, len);
if (ret == (ssize_t)len) {
/*
* strings are padded by zeroes
@@ -287,12 +287,12 @@ static int dsos__write_buildid_table(struct perf_header *header, int fd)
struct perf_session *session = container_of(header,
struct perf_session, header);
struct rb_node *nd;
- int err = machine__write_buildid_table(&session->host_machine, fd);
+ int err = machine__write_buildid_table(&session->machines.host, fd);
if (err)
return err;
- for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
+ for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
err = machine__write_buildid_table(pos, fd);
if (err)
@@ -313,7 +313,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
if (is_kallsyms) {
if (symbol_conf.kptr_restrict) {
pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n");
- return 0;
+ err = 0;
+ goto out_free;
}
realname = (char *) name;
} else
@@ -448,9 +449,9 @@ static int perf_session__cache_build_ids(struct perf_session *session)
if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
return -1;
- ret = machine__cache_build_ids(&session->host_machine, debugdir);
+ ret = machine__cache_build_ids(&session->machines.host, debugdir);
- for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
+ for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
ret |= machine__cache_build_ids(pos, debugdir);
}
@@ -467,9 +468,9 @@ static bool machine__read_build_ids(struct machine *machine, bool with_hits)
static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
{
struct rb_node *nd;
- bool ret = machine__read_build_ids(&session->host_machine, with_hits);
+ bool ret = machine__read_build_ids(&session->machines.host, with_hits);
- for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
+ for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
ret |= machine__read_build_ids(pos, with_hits);
}
@@ -954,6 +955,7 @@ static int write_topo_node(int fd, int node)
}
fclose(fp);
+ fp = NULL;
ret = do_write(fd, &mem_total, sizeof(u64));
if (ret)
@@ -980,7 +982,8 @@ static int write_topo_node(int fd, int node)
ret = do_write_string(fd, buf);
done:
free(buf);
- fclose(fp);
+ if (fp)
+ fclose(fp);
return ret;
}
@@ -1051,16 +1054,25 @@ static int write_pmu_mappings(int fd, struct perf_header *h __maybe_unused,
struct perf_pmu *pmu = NULL;
off_t offset = lseek(fd, 0, SEEK_CUR);
__u32 pmu_num = 0;
+ int ret;
/* write real pmu_num later */
- do_write(fd, &pmu_num, sizeof(pmu_num));
+ ret = do_write(fd, &pmu_num, sizeof(pmu_num));
+ if (ret < 0)
+ return ret;
while ((pmu = perf_pmu__scan(pmu))) {
if (!pmu->name)
continue;
pmu_num++;
- do_write(fd, &pmu->type, sizeof(pmu->type));
- do_write_string(fd, pmu->name);
+
+ ret = do_write(fd, &pmu->type, sizeof(pmu->type));
+ if (ret < 0)
+ return ret;
+
+ ret = do_write_string(fd, pmu->name);
+ if (ret < 0)
+ return ret;
}
if (pwrite(fd, &pmu_num, sizeof(pmu_num), offset) != sizeof(pmu_num)) {
@@ -1073,6 +1085,52 @@ static int write_pmu_mappings(int fd, struct perf_header *h __maybe_unused,
}
/*
+ * File format:
+ *
+ * struct group_descs {
+ * u32 nr_groups;
+ * struct group_desc {
+ * char name[];
+ * u32 leader_idx;
+ * u32 nr_members;
+ * }[nr_groups];
+ * };
+ */
+static int write_group_desc(int fd, struct perf_header *h __maybe_unused,
+ struct perf_evlist *evlist)
+{
+ u32 nr_groups = evlist->nr_groups;
+ struct perf_evsel *evsel;
+ int ret;
+
+ ret = do_write(fd, &nr_groups, sizeof(nr_groups));
+ if (ret < 0)
+ return ret;
+
+ list_for_each_entry(evsel, &evlist->entries, node) {
+ if (perf_evsel__is_group_leader(evsel) &&
+ evsel->nr_members > 1) {
+ const char *name = evsel->group_name ?: "{anon_group}";
+ u32 leader_idx = evsel->idx;
+ u32 nr_members = evsel->nr_members;
+
+ ret = do_write_string(fd, name);
+ if (ret < 0)
+ return ret;
+
+ ret = do_write(fd, &leader_idx, sizeof(leader_idx));
+ if (ret < 0)
+ return ret;
+
+ ret = do_write(fd, &nr_members, sizeof(nr_members));
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/*
* default get_cpuid(): nothing gets recorded
* actual implementation must be in arch/$(ARCH)/util/header.c
*/
@@ -1209,14 +1267,14 @@ read_event_desc(struct perf_header *ph, int fd)
size_t msz;
/* number of events */
- ret = read(fd, &nre, sizeof(nre));
+ ret = readn(fd, &nre, sizeof(nre));
if (ret != (ssize_t)sizeof(nre))
goto error;
if (ph->needs_swap)
nre = bswap_32(nre);
- ret = read(fd, &sz, sizeof(sz));
+ ret = readn(fd, &sz, sizeof(sz));
if (ret != (ssize_t)sizeof(sz))
goto error;
@@ -1244,7 +1302,7 @@ read_event_desc(struct perf_header *ph, int fd)
* must read entire on-file attr struct to
* sync up with layout.
*/
- ret = read(fd, buf, sz);
+ ret = readn(fd, buf, sz);
if (ret != (ssize_t)sz)
goto error;
@@ -1253,7 +1311,7 @@ read_event_desc(struct perf_header *ph, int fd)
memcpy(&evsel->attr, buf, msz);
- ret = read(fd, &nr, sizeof(nr));
+ ret = readn(fd, &nr, sizeof(nr));
if (ret != (ssize_t)sizeof(nr))
goto error;
@@ -1274,7 +1332,7 @@ read_event_desc(struct perf_header *ph, int fd)
evsel->id = id;
for (j = 0 ; j < nr; j++) {
- ret = read(fd, id, sizeof(*id));
+ ret = readn(fd, id, sizeof(*id));
if (ret != (ssize_t)sizeof(*id))
goto error;
if (ph->needs_swap)
@@ -1435,6 +1493,31 @@ error:
fprintf(fp, "# pmu mappings: unable to read\n");
}
+static void print_group_desc(struct perf_header *ph, int fd __maybe_unused,
+ FILE *fp)
+{
+ struct perf_session *session;
+ struct perf_evsel *evsel;
+ u32 nr = 0;
+
+ session = container_of(ph, struct perf_session, header);
+
+ list_for_each_entry(evsel, &session->evlist->entries, node) {
+ if (perf_evsel__is_group_leader(evsel) &&
+ evsel->nr_members > 1) {
+ fprintf(fp, "# group: %s{%s", evsel->group_name ?: "",
+ perf_evsel__name(evsel));
+
+ nr = evsel->nr_members - 1;
+ } else if (nr) {
+ fprintf(fp, ",%s", perf_evsel__name(evsel));
+
+ if (--nr == 0)
+ fprintf(fp, "}\n");
+ }
+ }
+}
+
static int __event_process_build_id(struct build_id_event *bev,
char *filename,
struct perf_session *session)
@@ -1506,14 +1589,14 @@ static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
while (offset < limit) {
ssize_t len;
- if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
+ if (readn(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
return -1;
if (header->needs_swap)
perf_event_header__bswap(&old_bev.header);
len = old_bev.header.size - sizeof(old_bev);
- if (read(input, filename, len) != len)
+ if (readn(input, filename, len) != len)
return -1;
bev.header = old_bev.header;
@@ -1548,14 +1631,14 @@ static int perf_header__read_build_ids(struct perf_header *header,
while (offset < limit) {
ssize_t len;
- if (read(input, &bev, sizeof(bev)) != sizeof(bev))
+ if (readn(input, &bev, sizeof(bev)) != sizeof(bev))
goto out;
if (header->needs_swap)
perf_event_header__bswap(&bev.header);
len = bev.header.size - sizeof(bev);
- if (read(input, filename, len) != len)
+ if (readn(input, filename, len) != len)
goto out;
/*
* The a1645ce1 changeset:
@@ -1641,7 +1724,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused,
size_t ret;
u32 nr;
- ret = read(fd, &nr, sizeof(nr));
+ ret = readn(fd, &nr, sizeof(nr));
if (ret != sizeof(nr))
return -1;
@@ -1650,7 +1733,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused,
ph->env.nr_cpus_online = nr;
- ret = read(fd, &nr, sizeof(nr));
+ ret = readn(fd, &nr, sizeof(nr));
if (ret != sizeof(nr))
return -1;
@@ -1684,7 +1767,7 @@ static int process_total_mem(struct perf_file_section *section __maybe_unused,
uint64_t mem;
size_t ret;
- ret = read(fd, &mem, sizeof(mem));
+ ret = readn(fd, &mem, sizeof(mem));
if (ret != sizeof(mem))
return -1;
@@ -1756,7 +1839,7 @@ static int process_cmdline(struct perf_file_section *section __maybe_unused,
u32 nr, i;
struct strbuf sb;
- ret = read(fd, &nr, sizeof(nr));
+ ret = readn(fd, &nr, sizeof(nr));
if (ret != sizeof(nr))
return -1;
@@ -1792,7 +1875,7 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused
char *str;
struct strbuf sb;
- ret = read(fd, &nr, sizeof(nr));
+ ret = readn(fd, &nr, sizeof(nr));
if (ret != sizeof(nr))
return -1;
@@ -1813,7 +1896,7 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused
}
ph->env.sibling_cores = strbuf_detach(&sb, NULL);
- ret = read(fd, &nr, sizeof(nr));
+ ret = readn(fd, &nr, sizeof(nr));
if (ret != sizeof(nr))
return -1;
@@ -1850,7 +1933,7 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse
struct strbuf sb;
/* nr nodes */
- ret = read(fd, &nr, sizeof(nr));
+ ret = readn(fd, &nr, sizeof(nr));
if (ret != sizeof(nr))
goto error;
@@ -1862,15 +1945,15 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse
for (i = 0; i < nr; i++) {
/* node number */
- ret = read(fd, &node, sizeof(node));
+ ret = readn(fd, &node, sizeof(node));
if (ret != sizeof(node))
goto error;
- ret = read(fd, &mem_total, sizeof(u64));
+ ret = readn(fd, &mem_total, sizeof(u64));
if (ret != sizeof(u64))
goto error;
- ret = read(fd, &mem_free, sizeof(u64));
+ ret = readn(fd, &mem_free, sizeof(u64));
if (ret != sizeof(u64))
goto error;
@@ -1909,7 +1992,7 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused
u32 type;
struct strbuf sb;
- ret = read(fd, &pmu_num, sizeof(pmu_num));
+ ret = readn(fd, &pmu_num, sizeof(pmu_num));
if (ret != sizeof(pmu_num))
return -1;
@@ -1925,7 +2008,7 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused
strbuf_init(&sb, 128);
while (pmu_num) {
- if (read(fd, &type, sizeof(type)) != sizeof(type))
+ if (readn(fd, &type, sizeof(type)) != sizeof(type))
goto error;
if (ph->needs_swap)
type = bswap_32(type);
@@ -1949,6 +2032,98 @@ error:
return -1;
}
+static int process_group_desc(struct perf_file_section *section __maybe_unused,
+ struct perf_header *ph, int fd,
+ void *data __maybe_unused)
+{
+ size_t ret = -1;
+ u32 i, nr, nr_groups;
+ struct perf_session *session;
+ struct perf_evsel *evsel, *leader = NULL;
+ struct group_desc {
+ char *name;
+ u32 leader_idx;
+ u32 nr_members;
+ } *desc;
+
+ if (readn(fd, &nr_groups, sizeof(nr_groups)) != sizeof(nr_groups))
+ return -1;
+
+ if (ph->needs_swap)
+ nr_groups = bswap_32(nr_groups);
+
+ ph->env.nr_groups = nr_groups;
+ if (!nr_groups) {
+ pr_debug("group desc not available\n");
+ return 0;
+ }
+
+ desc = calloc(nr_groups, sizeof(*desc));
+ if (!desc)
+ return -1;
+
+ for (i = 0; i < nr_groups; i++) {
+ desc[i].name = do_read_string(fd, ph);
+ if (!desc[i].name)
+ goto out_free;
+
+ if (readn(fd, &desc[i].leader_idx, sizeof(u32)) != sizeof(u32))
+ goto out_free;
+
+ if (readn(fd, &desc[i].nr_members, sizeof(u32)) != sizeof(u32))
+ goto out_free;
+
+ if (ph->needs_swap) {
+ desc[i].leader_idx = bswap_32(desc[i].leader_idx);
+ desc[i].nr_members = bswap_32(desc[i].nr_members);
+ }
+ }
+
+ /*
+ * Rebuild group relationship based on the group_desc
+ */
+ session = container_of(ph, struct perf_session, header);
+ session->evlist->nr_groups = nr_groups;
+
+ i = nr = 0;
+ list_for_each_entry(evsel, &session->evlist->entries, node) {
+ if (evsel->idx == (int) desc[i].leader_idx) {
+ evsel->leader = evsel;
+ /* {anon_group} is a dummy name */
+ if (strcmp(desc[i].name, "{anon_group}"))
+ evsel->group_name = desc[i].name;
+ evsel->nr_members = desc[i].nr_members;
+
+ if (i >= nr_groups || nr > 0) {
+ pr_debug("invalid group desc\n");
+ goto out_free;
+ }
+
+ leader = evsel;
+ nr = evsel->nr_members - 1;
+ i++;
+ } else if (nr) {
+ /* This is a group member */
+ evsel->leader = leader;
+
+ nr--;
+ }
+ }
+
+ if (i != nr_groups || nr != 0) {
+ pr_debug("invalid group desc\n");
+ goto out_free;
+ }
+
+ ret = 0;
+out_free:
+ while ((int) --i >= 0)
+ free(desc[i].name);
+ free(desc);
+
+ return ret;
+}
+
struct feature_ops {
int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
void (*print)(struct perf_header *h, int fd, FILE *fp);
@@ -1988,6 +2163,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology),
FEAT_OPA(HEADER_BRANCH_STACK, branch_stack),
FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings),
+ FEAT_OPP(HEADER_GROUP_DESC, group_desc),
};
struct header_print_data {
@@ -2077,7 +2253,7 @@ static int perf_header__adds_write(struct perf_header *header,
if (!nr_sections)
return 0;
- feat_sec = p = calloc(sizeof(*feat_sec), nr_sections);
+ feat_sec = p = calloc(nr_sections, sizeof(*feat_sec));
if (feat_sec == NULL)
return -ENOMEM;
@@ -2249,7 +2425,7 @@ int perf_header__process_sections(struct perf_header *header, int fd,
if (!nr_sections)
return 0;
- feat_sec = sec = calloc(sizeof(*feat_sec), nr_sections);
+ feat_sec = sec = calloc(nr_sections, sizeof(*feat_sec));
if (!feat_sec)
return -1;
@@ -2912,16 +3088,22 @@ int perf_event__process_tracing_data(union perf_event *event,
session->repipe);
padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read;
- if (read(session->fd, buf, padding) < 0)
- die("reading input file");
+ if (readn(session->fd, buf, padding) < 0) {
+ pr_err("%s: reading input file", __func__);
+ return -1;
+ }
if (session->repipe) {
int retw = write(STDOUT_FILENO, buf, padding);
- if (retw <= 0 || retw != padding)
- die("repiping tracing data padding");
+ if (retw <= 0 || retw != padding) {
+ pr_err("%s: repiping tracing data padding", __func__);
+ return -1;
+ }
}
- if (size_read + padding != size)
- die("tracing data size mismatch");
+ if (size_read + padding != size) {
+ pr_err("%s: tracing data size mismatch", __func__);
+ return -1;
+ }
perf_evlist__prepare_tracepoint_events(session->evlist,
session->pevent);
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 20f0344..c9fc55c 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -29,6 +29,7 @@ enum {
HEADER_NUMA_TOPOLOGY,
HEADER_BRANCH_STACK,
HEADER_PMU_MAPPINGS,
+ HEADER_GROUP_DESC,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
@@ -79,6 +80,7 @@ struct perf_session_env {
char *numa_nodes;
int nr_pmu_mappings;
char *pmu_mappings;
+ int nr_groups;
};
struct perf_header {
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index cb17e2a..f855941 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -4,6 +4,7 @@
#include "hist.h"
#include "session.h"
#include "sort.h"
+#include "evsel.h"
#include <math.h>
static bool hists__filter_entry_by_dso(struct hists *hists,
@@ -82,6 +83,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_DSO, len);
}
+ if (h->parent)
+ hists__new_col_len(hists, HISTC_PARENT, h->parent->namelen);
+
if (h->branch_info) {
int symlen;
/*
@@ -242,6 +246,14 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
if (he->ms.map)
he->ms.map->referenced = true;
+
+ if (he->branch_info) {
+ if (he->branch_info->from.map)
+ he->branch_info->from.map->referenced = true;
+ if (he->branch_info->to.map)
+ he->branch_info->to.map->referenced = true;
+ }
+
if (symbol_conf.use_callchain)
callchain_init(he->callchain);
@@ -251,7 +263,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
return he;
}
-static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h)
+void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h)
{
if (!h->filtered) {
hists__calc_col_len(hists, h);
@@ -285,7 +297,13 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
parent = *p;
he = rb_entry(parent, struct hist_entry, rb_node_in);
- cmp = hist_entry__cmp(entry, he);
+ /*
+ * Make sure that it receives arguments in a same order as
+ * hist_entry__collapse() so that we can use an appropriate
+ * function when searching an entry regardless which sort
+ * keys were used.
+ */
+ cmp = hist_entry__cmp(he, entry);
if (!cmp) {
he_stat__add_period(&he->stat, period);
@@ -523,6 +541,62 @@ void hists__collapse_resort_threaded(struct hists *hists)
* reverse the map, sort on period.
*/
+static int period_cmp(u64 period_a, u64 period_b)
+{
+ if (period_a > period_b)
+ return 1;
+ if (period_a < period_b)
+ return -1;
+ return 0;
+}
+
+static int hist_entry__sort_on_period(struct hist_entry *a,
+ struct hist_entry *b)
+{
+ int ret;
+ int i, nr_members;
+ struct perf_evsel *evsel;
+ struct hist_entry *pair;
+ u64 *periods_a, *periods_b;
+
+ ret = period_cmp(a->stat.period, b->stat.period);
+ if (ret || !symbol_conf.event_group)
+ return ret;
+
+ evsel = hists_to_evsel(a->hists);
+ nr_members = evsel->nr_members;
+ if (nr_members <= 1)
+ return ret;
+
+ periods_a = zalloc(sizeof(periods_a) * nr_members);
+ periods_b = zalloc(sizeof(periods_b) * nr_members);
+
+ if (!periods_a || !periods_b)
+ goto out;
+
+ list_for_each_entry(pair, &a->pairs.head, pairs.node) {
+ evsel = hists_to_evsel(pair->hists);
+ periods_a[perf_evsel__group_idx(evsel)] = pair->stat.period;
+ }
+
+ list_for_each_entry(pair, &b->pairs.head, pairs.node) {
+ evsel = hists_to_evsel(pair->hists);
+ periods_b[perf_evsel__group_idx(evsel)] = pair->stat.period;
+ }
+
+ for (i = 1; i < nr_members; i++) {
+ ret = period_cmp(periods_a[i], periods_b[i]);
+ if (ret)
+ break;
+ }
+
+out:
+ free(periods_a);
+ free(periods_b);
+
+ return ret;
+}
+
static void __hists__insert_output_entry(struct rb_root *entries,
struct hist_entry *he,
u64 min_callchain_hits)
@@ -539,7 +613,7 @@ static void __hists__insert_output_entry(struct rb_root *entries,
parent = *p;
iter = rb_entry(parent, struct hist_entry, rb_node);
- if (he->stat.period > iter->stat.period)
+ if (hist_entry__sort_on_period(he, iter) > 0)
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
@@ -711,25 +785,38 @@ int hist_entry__annotate(struct hist_entry *he, size_t privsize)
return symbol__annotate(he->ms.sym, he->ms.map, privsize);
}
+void events_stats__inc(struct events_stats *stats, u32 type)
+{
+ ++stats->nr_events[0];
+ ++stats->nr_events[type];
+}
+
void hists__inc_nr_events(struct hists *hists, u32 type)
{
- ++hists->stats.nr_events[0];
- ++hists->stats.nr_events[type];
+ events_stats__inc(&hists->stats, type);
}
static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
struct hist_entry *pair)
{
- struct rb_node **p = &hists->entries.rb_node;
+ struct rb_root *root;
+ struct rb_node **p;
struct rb_node *parent = NULL;
struct hist_entry *he;
int cmp;
+ if (sort__need_collapse)
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ p = &root->rb_node;
+
while (*p != NULL) {
parent = *p;
- he = rb_entry(parent, struct hist_entry, rb_node);
+ he = rb_entry(parent, struct hist_entry, rb_node_in);
- cmp = hist_entry__cmp(pair, he);
+ cmp = hist_entry__collapse(he, pair);
if (!cmp)
goto out;
@@ -744,8 +831,8 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
if (he) {
memset(&he->stat, 0, sizeof(he->stat));
he->hists = hists;
- rb_link_node(&he->rb_node, parent, p);
- rb_insert_color(&he->rb_node, &hists->entries);
+ rb_link_node(&he->rb_node_in, parent, p);
+ rb_insert_color(&he->rb_node_in, root);
hists__inc_nr_entries(hists, he);
}
out:
@@ -755,11 +842,16 @@ out:
static struct hist_entry *hists__find_entry(struct hists *hists,
struct hist_entry *he)
{
- struct rb_node *n = hists->entries.rb_node;
+ struct rb_node *n;
+
+ if (sort__need_collapse)
+ n = hists->entries_collapsed.rb_node;
+ else
+ n = hists->entries_in->rb_node;
while (n) {
- struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node);
- int64_t cmp = hist_entry__cmp(he, iter);
+ struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node_in);
+ int64_t cmp = hist_entry__collapse(iter, he);
if (cmp < 0)
n = n->rb_left;
@@ -777,15 +869,21 @@ static struct hist_entry *hists__find_entry(struct hists *hists,
*/
void hists__match(struct hists *leader, struct hists *other)
{
+ struct rb_root *root;
struct rb_node *nd;
struct hist_entry *pos, *pair;
- for (nd = rb_first(&leader->entries); nd; nd = rb_next(nd)) {
- pos = rb_entry(nd, struct hist_entry, rb_node);
+ if (sort__need_collapse)
+ root = &leader->entries_collapsed;
+ else
+ root = leader->entries_in;
+
+ for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ pos = rb_entry(nd, struct hist_entry, rb_node_in);
pair = hists__find_entry(other, pos);
if (pair)
- hist__entry_add_pair(pos, pair);
+ hist_entry__add_pair(pair, pos);
}
}
@@ -796,17 +894,23 @@ void hists__match(struct hists *leader, struct hists *other)
*/
int hists__link(struct hists *leader, struct hists *other)
{
+ struct rb_root *root;
struct rb_node *nd;
struct hist_entry *pos, *pair;
- for (nd = rb_first(&other->entries); nd; nd = rb_next(nd)) {
- pos = rb_entry(nd, struct hist_entry, rb_node);
+ if (sort__need_collapse)
+ root = &other->entries_collapsed;
+ else
+ root = other->entries_in;
+
+ for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ pos = rb_entry(nd, struct hist_entry, rb_node_in);
if (!hist_entry__has_pairs(pos)) {
pair = hists__add_dummy_entry(leader, pos);
if (pair == NULL)
return -1;
- hist__entry_add_pair(pair, pos);
+ hist_entry__add_pair(pos, pair);
}
}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 8b091a5..3862468 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -96,8 +96,10 @@ void hists__decay_entries_threaded(struct hists *hists, bool zap_user,
bool zap_kernel);
void hists__output_recalc_col_len(struct hists *hists, int max_rows);
+void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h);
void hists__inc_nr_events(struct hists *self, u32 type);
-size_t hists__fprintf_nr_events(struct hists *self, FILE *fp);
+void events_stats__inc(struct events_stats *stats, u32 type);
+size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
size_t hists__fprintf(struct hists *self, bool show_header, int max_rows,
int max_cols, FILE *fp);
@@ -126,13 +128,19 @@ struct perf_hpp {
};
struct perf_hpp_fmt {
- bool cond;
int (*header)(struct perf_hpp *hpp);
int (*width)(struct perf_hpp *hpp);
int (*color)(struct perf_hpp *hpp, struct hist_entry *he);
int (*entry)(struct perf_hpp *hpp, struct hist_entry *he);
+
+ struct list_head list;
};
+extern struct list_head perf_hpp__list;
+
+#define perf_hpp__for_each_format(format) \
+ list_for_each_entry(format, &perf_hpp__list, list)
+
extern struct perf_hpp_fmt perf_hpp__format[];
enum {
@@ -148,14 +156,14 @@ enum {
PERF_HPP__DELTA,
PERF_HPP__RATIO,
PERF_HPP__WEIGHTED_DIFF,
- PERF_HPP__DISPL,
PERF_HPP__FORMULA,
PERF_HPP__MAX_INDEX
};
void perf_hpp__init(void);
-void perf_hpp__column_enable(unsigned col, bool enable);
+void perf_hpp__column_register(struct perf_hpp_fmt *format);
+void perf_hpp__column_enable(unsigned col);
int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he,
bool color);
@@ -219,8 +227,10 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __maybe_unused,
unsigned int hists__sort_list_width(struct hists *self);
-double perf_diff__compute_delta(struct hist_entry *he);
-double perf_diff__compute_ratio(struct hist_entry *he);
-s64 perf_diff__compute_wdiff(struct hist_entry *he);
-int perf_diff__formula(char *buf, size_t size, struct hist_entry *he);
+double perf_diff__compute_delta(struct hist_entry *he, struct hist_entry *pair);
+double perf_diff__compute_ratio(struct hist_entry *he, struct hist_entry *pair);
+s64 perf_diff__compute_wdiff(struct hist_entry *he, struct hist_entry *pair);
+int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair,
+ char *buf, size_t size);
+double perf_diff__period_percent(struct hist_entry *he, u64 period);
#endif /* __PERF_HIST_H */
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
index a55d8cf0..45cf10a 100644
--- a/tools/perf/util/include/linux/bitops.h
+++ b/tools/perf/util/include/linux/bitops.h
@@ -14,6 +14,7 @@
#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
+#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE)
#define for_each_set_bit(bit, addr, size) \
for ((bit) = find_first_bit((addr), (size)); \
diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c
index 9d07400..11a8d86 100644
--- a/tools/perf/util/intlist.c
+++ b/tools/perf/util/intlist.c
@@ -59,16 +59,40 @@ void intlist__remove(struct intlist *ilist, struct int_node *node)
struct int_node *intlist__find(struct intlist *ilist, int i)
{
- struct int_node *node = NULL;
- struct rb_node *rb_node = rblist__find(&ilist->rblist, (void *)((long)i));
+ struct int_node *node;
+ struct rb_node *rb_node;
+ if (ilist == NULL)
+ return NULL;
+
+ node = NULL;
+ rb_node = rblist__find(&ilist->rblist, (void *)((long)i));
if (rb_node)
node = container_of(rb_node, struct int_node, rb_node);
return node;
}
-struct intlist *intlist__new(void)
+static int intlist__parse_list(struct intlist *ilist, const char *s)
+{
+ char *sep;
+ int err;
+
+ do {
+ long value = strtol(s, &sep, 10);
+ err = -EINVAL;
+ if (*sep != ',' && *sep != '\0')
+ break;
+ err = intlist__add(ilist, value);
+ if (err)
+ break;
+ s = sep + 1;
+ } while (*sep != '\0');
+
+ return err;
+}
+
+struct intlist *intlist__new(const char *slist)
{
struct intlist *ilist = malloc(sizeof(*ilist));
@@ -77,9 +101,15 @@ struct intlist *intlist__new(void)
ilist->rblist.node_cmp = intlist__node_cmp;
ilist->rblist.node_new = intlist__node_new;
ilist->rblist.node_delete = intlist__node_delete;
+
+ if (slist && intlist__parse_list(ilist, slist))
+ goto out_delete;
}
return ilist;
+out_delete:
+ intlist__delete(ilist);
+ return NULL;
}
void intlist__delete(struct intlist *ilist)
diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h
index 6d63ab9..62351da 100644
--- a/tools/perf/util/intlist.h
+++ b/tools/perf/util/intlist.h
@@ -15,7 +15,7 @@ struct intlist {
struct rblist rblist;
};
-struct intlist *intlist__new(void);
+struct intlist *intlist__new(const char *slist);
void intlist__delete(struct intlist *ilist);
void intlist__remove(struct intlist *ilist, struct int_node *in);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 1f09d05..efdb38e 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1,10 +1,15 @@
+#include "callchain.h"
#include "debug.h"
#include "event.h"
+#include "evsel.h"
+#include "hist.h"
#include "machine.h"
#include "map.h"
+#include "sort.h"
#include "strlist.h"
#include "thread.h"
#include <stdbool.h>
+#include "unwind.h"
int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
@@ -48,6 +53,29 @@ static void dsos__delete(struct list_head *dsos)
}
}
+void machine__delete_dead_threads(struct machine *machine)
+{
+ struct thread *n, *t;
+
+ list_for_each_entry_safe(t, n, &machine->dead_threads, node) {
+ list_del(&t->node);
+ thread__delete(t);
+ }
+}
+
+void machine__delete_threads(struct machine *machine)
+{
+ struct rb_node *nd = rb_first(&machine->threads);
+
+ while (nd) {
+ struct thread *t = rb_entry(nd, struct thread, rb_node);
+
+ rb_erase(&t->rb_node, &machine->threads);
+ nd = rb_next(nd);
+ thread__delete(t);
+ }
+}
+
void machine__exit(struct machine *machine)
{
map_groups__exit(&machine->kmaps);
@@ -63,10 +91,22 @@ void machine__delete(struct machine *machine)
free(machine);
}
-struct machine *machines__add(struct rb_root *machines, pid_t pid,
+void machines__init(struct machines *machines)
+{
+ machine__init(&machines->host, "", HOST_KERNEL_ID);
+ machines->guests = RB_ROOT;
+}
+
+void machines__exit(struct machines *machines)
+{
+ machine__exit(&machines->host);
+ /* XXX exit guest */
+}
+
+struct machine *machines__add(struct machines *machines, pid_t pid,
const char *root_dir)
{
- struct rb_node **p = &machines->rb_node;
+ struct rb_node **p = &machines->guests.rb_node;
struct rb_node *parent = NULL;
struct machine *pos, *machine = malloc(sizeof(*machine));
@@ -88,18 +128,21 @@ struct machine *machines__add(struct rb_root *machines, pid_t pid,
}
rb_link_node(&machine->rb_node, parent, p);
- rb_insert_color(&machine->rb_node, machines);
+ rb_insert_color(&machine->rb_node, &machines->guests);
return machine;
}
-struct machine *machines__find(struct rb_root *machines, pid_t pid)
+struct machine *machines__find(struct machines *machines, pid_t pid)
{
- struct rb_node **p = &machines->rb_node;
+ struct rb_node **p = &machines->guests.rb_node;
struct rb_node *parent = NULL;
struct machine *machine;
struct machine *default_machine = NULL;
+ if (pid == HOST_KERNEL_ID)
+ return &machines->host;
+
while (*p != NULL) {
parent = *p;
machine = rb_entry(parent, struct machine, rb_node);
@@ -116,7 +159,7 @@ struct machine *machines__find(struct rb_root *machines, pid_t pid)
return default_machine;
}
-struct machine *machines__findnew(struct rb_root *machines, pid_t pid)
+struct machine *machines__findnew(struct machines *machines, pid_t pid)
{
char path[PATH_MAX];
const char *root_dir = "";
@@ -150,12 +193,12 @@ out:
return machine;
}
-void machines__process(struct rb_root *machines,
- machine__process_t process, void *data)
+void machines__process_guests(struct machines *machines,
+ machine__process_t process, void *data)
{
struct rb_node *nd;
- for (nd = rb_first(machines); nd; nd = rb_next(nd)) {
+ for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
process(pos, data);
}
@@ -175,12 +218,14 @@ char *machine__mmap_name(struct machine *machine, char *bf, size_t size)
return bf;
}
-void machines__set_id_hdr_size(struct rb_root *machines, u16 id_hdr_size)
+void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
{
struct rb_node *node;
struct machine *machine;
- for (node = rb_first(machines); node; node = rb_next(node)) {
+ machines->host.id_hdr_size = id_hdr_size;
+
+ for (node = rb_first(&machines->guests); node; node = rb_next(node)) {
machine = rb_entry(node, struct machine, rb_node);
machine->id_hdr_size = id_hdr_size;
}
@@ -264,6 +309,537 @@ int machine__process_lost_event(struct machine *machine __maybe_unused,
return 0;
}
+struct map *machine__new_module(struct machine *machine, u64 start,
+ const char *filename)
+{
+ struct map *map;
+ struct dso *dso = __dsos__findnew(&machine->kernel_dsos, filename);
+
+ if (dso == NULL)
+ return NULL;
+
+ map = map__new2(start, dso, MAP__FUNCTION);
+ if (map == NULL)
+ return NULL;
+
+ if (machine__is_host(machine))
+ dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
+ else
+ dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
+ map_groups__insert(&machine->kmaps, map);
+ return map;
+}
+
+size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
+{
+ struct rb_node *nd;
+ size_t ret = __dsos__fprintf(&machines->host.kernel_dsos, fp) +
+ __dsos__fprintf(&machines->host.user_dsos, fp);
+
+ for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ ret += __dsos__fprintf(&pos->kernel_dsos, fp);
+ ret += __dsos__fprintf(&pos->user_dsos, fp);
+ }
+
+ return ret;
+}
+
+size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm)
+{
+ return __dsos__fprintf_buildid(&machine->kernel_dsos, fp, skip, parm) +
+ __dsos__fprintf_buildid(&machine->user_dsos, fp, skip, parm);
+}
+
+size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm)
+{
+ struct rb_node *nd;
+ size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm);
+
+ for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm);
+ }
+ return ret;
+}
+
+size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
+{
+ int i;
+ size_t printed = 0;
+ struct dso *kdso = machine->vmlinux_maps[MAP__FUNCTION]->dso;
+
+ if (kdso->has_build_id) {
+ char filename[PATH_MAX];
+ if (dso__build_id_filename(kdso, filename, sizeof(filename)))
+ printed += fprintf(fp, "[0] %s\n", filename);
+ }
+
+ for (i = 0; i < vmlinux_path__nr_entries; ++i)
+ printed += fprintf(fp, "[%d] %s\n",
+ i + kdso->has_build_id, vmlinux_path[i]);
+
+ return printed;
+}
+
+size_t machine__fprintf(struct machine *machine, FILE *fp)
+{
+ size_t ret = 0;
+ struct rb_node *nd;
+
+ for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
+ struct thread *pos = rb_entry(nd, struct thread, rb_node);
+
+ ret += thread__fprintf(pos, fp);
+ }
+
+ return ret;
+}
+
+static struct dso *machine__get_kernel(struct machine *machine)
+{
+ const char *vmlinux_name = NULL;
+ struct dso *kernel;
+
+ if (machine__is_host(machine)) {
+ vmlinux_name = symbol_conf.vmlinux_name;
+ if (!vmlinux_name)
+ vmlinux_name = "[kernel.kallsyms]";
+
+ kernel = dso__kernel_findnew(machine, vmlinux_name,
+ "[kernel]",
+ DSO_TYPE_KERNEL);
+ } else {
+ char bf[PATH_MAX];
+
+ if (machine__is_default_guest(machine))
+ vmlinux_name = symbol_conf.default_guest_vmlinux_name;
+ if (!vmlinux_name)
+ vmlinux_name = machine__mmap_name(machine, bf,
+ sizeof(bf));
+
+ kernel = dso__kernel_findnew(machine, vmlinux_name,
+ "[guest.kernel]",
+ DSO_TYPE_GUEST_KERNEL);
+ }
+
+ if (kernel != NULL && (!kernel->has_build_id))
+ dso__read_running_kernel_build_id(kernel, machine);
+
+ return kernel;
+}
+
+struct process_args {
+ u64 start;
+};
+
+static int symbol__in_kernel(void *arg, const char *name,
+ char type __maybe_unused, u64 start)
+{
+ struct process_args *args = arg;
+
+ if (strchr(name, '['))
+ return 0;
+
+ args->start = start;
+ return 1;
+}
+
+/* Figure out the start address of kernel map from /proc/kallsyms */
+static u64 machine__get_kernel_start_addr(struct machine *machine)
+{
+ const char *filename;
+ char path[PATH_MAX];
+ struct process_args args;
+
+ if (machine__is_host(machine)) {
+ filename = "/proc/kallsyms";
+ } else {
+ if (machine__is_default_guest(machine))
+ filename = (char *)symbol_conf.default_guest_kallsyms;
+ else {
+ sprintf(path, "%s/proc/kallsyms", machine->root_dir);
+ filename = path;
+ }
+ }
+
+ if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+ return 0;
+
+ if (kallsyms__parse(filename, &args, symbol__in_kernel) <= 0)
+ return 0;
+
+ return args.start;
+}
+
+int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
+{
+ enum map_type type;
+ u64 start = machine__get_kernel_start_addr(machine);
+
+ for (type = 0; type < MAP__NR_TYPES; ++type) {
+ struct kmap *kmap;
+
+ machine->vmlinux_maps[type] = map__new2(start, kernel, type);
+ if (machine->vmlinux_maps[type] == NULL)
+ return -1;
+
+ machine->vmlinux_maps[type]->map_ip =
+ machine->vmlinux_maps[type]->unmap_ip =
+ identity__map_ip;
+ kmap = map__kmap(machine->vmlinux_maps[type]);
+ kmap->kmaps = &machine->kmaps;
+ map_groups__insert(&machine->kmaps,
+ machine->vmlinux_maps[type]);
+ }
+
+ return 0;
+}
+
+void machine__destroy_kernel_maps(struct machine *machine)
+{
+ enum map_type type;
+
+ for (type = 0; type < MAP__NR_TYPES; ++type) {
+ struct kmap *kmap;
+
+ if (machine->vmlinux_maps[type] == NULL)
+ continue;
+
+ kmap = map__kmap(machine->vmlinux_maps[type]);
+ map_groups__remove(&machine->kmaps,
+ machine->vmlinux_maps[type]);
+ if (kmap->ref_reloc_sym) {
+ /*
+ * ref_reloc_sym is shared among all maps, so free just
+ * on one of them.
+ */
+ if (type == MAP__FUNCTION) {
+ free((char *)kmap->ref_reloc_sym->name);
+ kmap->ref_reloc_sym->name = NULL;
+ free(kmap->ref_reloc_sym);
+ }
+ kmap->ref_reloc_sym = NULL;
+ }
+
+ map__delete(machine->vmlinux_maps[type]);
+ machine->vmlinux_maps[type] = NULL;
+ }
+}
+
+int machines__create_guest_kernel_maps(struct machines *machines)
+{
+ int ret = 0;
+ struct dirent **namelist = NULL;
+ int i, items = 0;
+ char path[PATH_MAX];
+ pid_t pid;
+ char *endp;
+
+ if (symbol_conf.default_guest_vmlinux_name ||
+ symbol_conf.default_guest_modules ||
+ symbol_conf.default_guest_kallsyms) {
+ machines__create_kernel_maps(machines, DEFAULT_GUEST_KERNEL_ID);
+ }
+
+ if (symbol_conf.guestmount) {
+ items = scandir(symbol_conf.guestmount, &namelist, NULL, NULL);
+ if (items <= 0)
+ return -ENOENT;
+ for (i = 0; i < items; i++) {
+ if (!isdigit(namelist[i]->d_name[0])) {
+ /* Filter out . and .. */
+ continue;
+ }
+ pid = (pid_t)strtol(namelist[i]->d_name, &endp, 10);
+ if ((*endp != '\0') ||
+ (endp == namelist[i]->d_name) ||
+ (errno == ERANGE)) {
+ pr_debug("invalid directory (%s). Skipping.\n",
+ namelist[i]->d_name);
+ continue;
+ }
+ sprintf(path, "%s/%s/proc/kallsyms",
+ symbol_conf.guestmount,
+ namelist[i]->d_name);
+ ret = access(path, R_OK);
+ if (ret) {
+ pr_debug("Can't access file %s\n", path);
+ goto failure;
+ }
+ machines__create_kernel_maps(machines, pid);
+ }
+failure:
+ free(namelist);
+ }
+
+ return ret;
+}
+
+void machines__destroy_kernel_maps(struct machines *machines)
+{
+ struct rb_node *next = rb_first(&machines->guests);
+
+ machine__destroy_kernel_maps(&machines->host);
+
+ while (next) {
+ struct machine *pos = rb_entry(next, struct machine, rb_node);
+
+ next = rb_next(&pos->rb_node);
+ rb_erase(&pos->rb_node, &machines->guests);
+ machine__delete(pos);
+ }
+}
+
+int machines__create_kernel_maps(struct machines *machines, pid_t pid)
+{
+ struct machine *machine = machines__findnew(machines, pid);
+
+ if (machine == NULL)
+ return -1;
+
+ return machine__create_kernel_maps(machine);
+}
+
+int machine__load_kallsyms(struct machine *machine, const char *filename,
+ enum map_type type, symbol_filter_t filter)
+{
+ struct map *map = machine->vmlinux_maps[type];
+ int ret = dso__load_kallsyms(map->dso, filename, map, filter);
+
+ if (ret > 0) {
+ dso__set_loaded(map->dso, type);
+ /*
+ * Since /proc/kallsyms will have multiple sessions for the
+ * kernel, with modules between them, fixup the end of all
+ * sections.
+ */
+ __map_groups__fixup_end(&machine->kmaps, type);
+ }
+
+ return ret;
+}
+
+int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
+ symbol_filter_t filter)
+{
+ struct map *map = machine->vmlinux_maps[type];
+ int ret = dso__load_vmlinux_path(map->dso, map, filter);
+
+ if (ret > 0) {
+ dso__set_loaded(map->dso, type);
+ map__reloc_vmlinux(map);
+ }
+
+ return ret;
+}
+
+static void map_groups__fixup_end(struct map_groups *mg)
+{
+ int i;
+ for (i = 0; i < MAP__NR_TYPES; ++i)
+ __map_groups__fixup_end(mg, i);
+}
+
+static char *get_kernel_version(const char *root_dir)
+{
+ char version[PATH_MAX];
+ FILE *file;
+ char *name, *tmp;
+ const char *prefix = "Linux version ";
+
+ sprintf(version, "%s/proc/version", root_dir);
+ file = fopen(version, "r");
+ if (!file)
+ return NULL;
+
+ version[0] = '\0';
+ tmp = fgets(version, sizeof(version), file);
+ fclose(file);
+
+ name = strstr(version, prefix);
+ if (!name)
+ return NULL;
+ name += strlen(prefix);
+ tmp = strchr(name, ' ');
+ if (tmp)
+ *tmp = '\0';
+
+ return strdup(name);
+}
+
+static int map_groups__set_modules_path_dir(struct map_groups *mg,
+ const char *dir_name)
+{
+ struct dirent *dent;
+ DIR *dir = opendir(dir_name);
+ int ret = 0;
+
+ if (!dir) {
+ pr_debug("%s: cannot open %s dir\n", __func__, dir_name);
+ return -1;
+ }
+
+ while ((dent = readdir(dir)) != NULL) {
+ char path[PATH_MAX];
+ struct stat st;
+
+ /*sshfs might return bad dent->d_type, so we have to stat*/
+ snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
+ if (stat(path, &st))
+ continue;
+
+ if (S_ISDIR(st.st_mode)) {
+ if (!strcmp(dent->d_name, ".") ||
+ !strcmp(dent->d_name, ".."))
+ continue;
+
+ ret = map_groups__set_modules_path_dir(mg, path);
+ if (ret < 0)
+ goto out;
+ } else {
+ char *dot = strrchr(dent->d_name, '.'),
+ dso_name[PATH_MAX];
+ struct map *map;
+ char *long_name;
+
+ if (dot == NULL || strcmp(dot, ".ko"))
+ continue;
+ snprintf(dso_name, sizeof(dso_name), "[%.*s]",
+ (int)(dot - dent->d_name), dent->d_name);
+
+ strxfrchar(dso_name, '-', '_');
+ map = map_groups__find_by_name(mg, MAP__FUNCTION,
+ dso_name);
+ if (map == NULL)
+ continue;
+
+ long_name = strdup(path);
+ if (long_name == NULL) {
+ ret = -1;
+ goto out;
+ }
+ dso__set_long_name(map->dso, long_name);
+ map->dso->lname_alloc = 1;
+ dso__kernel_module_get_build_id(map->dso, "");
+ }
+ }
+
+out:
+ closedir(dir);
+ return ret;
+}
+
+static int machine__set_modules_path(struct machine *machine)
+{
+ char *version;
+ char modules_path[PATH_MAX];
+
+ version = get_kernel_version(machine->root_dir);
+ if (!version)
+ return -1;
+
+ snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s/kernel",
+ machine->root_dir, version);
+ free(version);
+
+ return map_groups__set_modules_path_dir(&machine->kmaps, modules_path);
+}
+
+static int machine__create_modules(struct machine *machine)
+{
+ char *line = NULL;
+ size_t n;
+ FILE *file;
+ struct map *map;
+ const char *modules;
+ char path[PATH_MAX];
+
+ if (machine__is_default_guest(machine))
+ modules = symbol_conf.default_guest_modules;
+ else {
+ sprintf(path, "%s/proc/modules", machine->root_dir);
+ modules = path;
+ }
+
+ if (symbol__restricted_filename(path, "/proc/modules"))
+ return -1;
+
+ file = fopen(modules, "r");
+ if (file == NULL)
+ return -1;
+
+ while (!feof(file)) {
+ char name[PATH_MAX];
+ u64 start;
+ char *sep;
+ int line_len;
+
+ line_len = getline(&line, &n, file);
+ if (line_len < 0)
+ break;
+
+ if (!line)
+ goto out_failure;
+
+ line[--line_len] = '\0'; /* \n */
+
+ sep = strrchr(line, 'x');
+ if (sep == NULL)
+ continue;
+
+ hex2u64(sep + 1, &start);
+
+ sep = strchr(line, ' ');
+ if (sep == NULL)
+ continue;
+
+ *sep = '\0';
+
+ snprintf(name, sizeof(name), "[%s]", line);
+ map = machine__new_module(machine, start, name);
+ if (map == NULL)
+ goto out_delete_line;
+ dso__kernel_module_get_build_id(map->dso, machine->root_dir);
+ }
+
+ free(line);
+ fclose(file);
+
+ return machine__set_modules_path(machine);
+
+out_delete_line:
+ free(line);
+out_failure:
+ return -1;
+}
+
+int machine__create_kernel_maps(struct machine *machine)
+{
+ struct dso *kernel = machine__get_kernel(machine);
+
+ if (kernel == NULL ||
+ __machine__create_kernel_maps(machine, kernel) < 0)
+ return -1;
+
+ if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
+ if (machine__is_host(machine))
+ pr_debug("Problems creating module maps, "
+ "continuing anyway...\n");
+ else
+ pr_debug("Problems creating module maps for guest %d, "
+ "continuing anyway...\n", machine->pid);
+ }
+
+ /*
+ * Now that we have all the maps created, just set the ->end of them:
+ */
+ map_groups__fixup_end(&machine->kmaps);
+ return 0;
+}
+
static void machine__set_kernel_mmap_len(struct machine *machine,
union perf_event *event)
{
@@ -462,3 +1038,189 @@ int machine__process_event(struct machine *machine, union perf_event *event)
return ret;
}
+
+void machine__remove_thread(struct machine *machine, struct thread *th)
+{
+ machine->last_match = NULL;
+ rb_erase(&th->rb_node, &machine->threads);
+ /*
+ * We may have references to this thread, for instance in some hist_entry
+ * instances, so just move them to a separate list.
+ */
+ list_add_tail(&th->node, &machine->dead_threads);
+}
+
+static bool symbol__match_parent_regex(struct symbol *sym)
+{
+ if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
+ return 1;
+
+ return 0;
+}
+
+static const u8 cpumodes[] = {
+ PERF_RECORD_MISC_USER,
+ PERF_RECORD_MISC_KERNEL,
+ PERF_RECORD_MISC_GUEST_USER,
+ PERF_RECORD_MISC_GUEST_KERNEL
+};
+#define NCPUMODES (sizeof(cpumodes)/sizeof(u8))
+
+static void ip__resolve_ams(struct machine *machine, struct thread *thread,
+ struct addr_map_symbol *ams,
+ u64 ip)
+{
+ struct addr_location al;
+ size_t i;
+ u8 m;
+
+ memset(&al, 0, sizeof(al));
+
+ for (i = 0; i < NCPUMODES; i++) {
+ m = cpumodes[i];
+ /*
+ * We cannot use the header.misc hint to determine whether a
+ * branch stack address is user, kernel, guest, hypervisor.
+ * Branches may straddle the kernel/user/hypervisor boundaries.
+ * Thus, we have to try consecutively until we find a match
+ * or else, the symbol is unknown
+ */
+ thread__find_addr_location(thread, machine, m, MAP__FUNCTION,
+ ip, &al, NULL);
+ if (al.sym)
+ goto found;
+ }
+found:
+ ams->addr = ip;
+ ams->al_addr = al.addr;
+ ams->sym = al.sym;
+ ams->map = al.map;
+}
+
+struct branch_info *machine__resolve_bstack(struct machine *machine,
+ struct thread *thr,
+ struct branch_stack *bs)
+{
+ struct branch_info *bi;
+ unsigned int i;
+
+ bi = calloc(bs->nr, sizeof(struct branch_info));
+ if (!bi)
+ return NULL;
+
+ for (i = 0; i < bs->nr; i++) {
+ ip__resolve_ams(machine, thr, &bi[i].to, bs->entries[i].to);
+ ip__resolve_ams(machine, thr, &bi[i].from, bs->entries[i].from);
+ bi[i].flags = bs->entries[i].flags;
+ }
+ return bi;
+}
+
+static int machine__resolve_callchain_sample(struct machine *machine,
+ struct thread *thread,
+ struct ip_callchain *chain,
+ struct symbol **parent)
+
+{
+ u8 cpumode = PERF_RECORD_MISC_USER;
+ unsigned int i;
+ int err;
+
+ callchain_cursor_reset(&callchain_cursor);
+
+ if (chain->nr > PERF_MAX_STACK_DEPTH) {
+ pr_warning("corrupted callchain. skipping...\n");
+ return 0;
+ }
+
+ for (i = 0; i < chain->nr; i++) {
+ u64 ip;
+ struct addr_location al;
+
+ if (callchain_param.order == ORDER_CALLEE)
+ ip = chain->ips[i];
+ else
+ ip = chain->ips[chain->nr - i - 1];