diff options
Diffstat (limited to 'kernel/tools/perf/Documentation')
40 files changed, 4942 insertions, 0 deletions
diff --git a/kernel/tools/perf/Documentation/Build.txt b/kernel/tools/perf/Documentation/Build.txt new file mode 100644 index 000000000..f6fc6507b --- /dev/null +++ b/kernel/tools/perf/Documentation/Build.txt @@ -0,0 +1,49 @@ + +1) perf build +============= +The perf build process consists of several separated building blocks, +which are linked together to form the perf binary: + - libperf library (static) + - perf builtin commands + - traceevent library (static) + - GTK ui library + +Several makefiles govern the perf build: + + - Makefile + top level Makefile working as a wrapper that calls the main + Makefile.perf with a -j option to do parallel builds. + + - Makefile.perf + main makefile that triggers build of all perf objects including + installation and documentation processing. + + - tools/build/Makefile.build + main makefile of the build framework + + - tools/build/Build.include + build framework generic definitions + + - Build makefiles + makefiles that defines build objects + +Please refer to tools/build/Documentation/Build.txt for more +information about build framework. + + +2) perf build +============= +The Makefile.perf triggers the build framework for build objects: + perf, libperf, gtk + +resulting in following objects: + $ ls *-in.o + gtk-in.o libperf-in.o perf-in.o + +Those objects are then used in final linking: + libperf-gtk.so <- gtk-in.o libperf-in.o + perf <- perf-in.o libperf-in.o + + +NOTE this description is omitting other libraries involved, only + focusing on build framework outcomes diff --git a/kernel/tools/perf/Documentation/Makefile b/kernel/tools/perf/Documentation/Makefile new file mode 100644 index 000000000..3ba1c0b09 --- /dev/null +++ b/kernel/tools/perf/Documentation/Makefile @@ -0,0 +1,343 @@ +include ../../scripts/Makefile.include +include ../config/utilities.mak + +MAN1_TXT= \ + $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ + $(wildcard perf-*.txt)) \ + perf.txt +MAN5_TXT= +MAN7_TXT= + +MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT) +_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) +_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) + +MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML)) +MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML)) + +ARTICLES = +# with their own formatting rules. +SP_ARTICLES = +API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt))) +SP_ARTICLES += $(API_DOCS) +SP_ARTICLES += technical/api-index + +_DOC_HTML = $(_MAN_HTML) +_DOC_HTML+=$(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES)) +DOC_HTML=$(addprefix $(OUTPUT),$(_DOC_HTML)) + +_DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) +_DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) +_DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) + +DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1)) +DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5)) +DOC_MAN7=$(addprefix $(OUTPUT),$(_DOC_MAN7)) + +# Make the path relative to DESTDIR, not prefix +ifndef DESTDIR +prefix?=$(HOME) +endif +bindir?=$(prefix)/bin +htmldir?=$(prefix)/share/doc/perf-doc +pdfdir?=$(prefix)/share/doc/perf-doc +mandir?=$(prefix)/share/man +man1dir=$(mandir)/man1 +man5dir=$(mandir)/man5 +man7dir=$(mandir)/man7 + +ASCIIDOC=asciidoc +ASCIIDOC_EXTRA = --unsafe +MANPAGE_XSL = manpage-normal.xsl +XMLTO_EXTRA = +INSTALL?=install +RM ?= rm -f +DOC_REF = origin/man +HTML_REF = origin/html + +infodir?=$(prefix)/share/info +MAKEINFO=makeinfo +INSTALL_INFO=install-info +DOCBOOK2X_TEXI=docbook2x-texi +DBLATEX=dblatex +XMLTO=xmlto +ifndef PERL_PATH + PERL_PATH = /usr/bin/perl +endif + +-include ../config.mak.autogen +-include ../config.mak + +_tmp_tool_path := $(call get-executable,$(ASCIIDOC)) +ifeq ($(_tmp_tool_path),) + missing_tools = $(ASCIIDOC) +endif + +_tmp_tool_path := $(call get-executable,$(XMLTO)) +ifeq ($(_tmp_tool_path),) + missing_tools += $(XMLTO) +endif + +# +# For asciidoc ... +# -7.1.2, no extra settings are needed. +# 8.0-, set ASCIIDOC8. +# + +# +# For docbook-xsl ... +# -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0) +# 1.69.0, no extra settings are needed? +# 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP? +# 1.71.1, no extra settings are needed? +# 1.72.0, set DOCBOOK_XSL_172. +# 1.73.0-, set ASCIIDOC_NO_ROFF +# + +# +# If you had been using DOCBOOK_XSL_172 in an attempt to get rid +# of 'the ".ft C" problem' in your generated manpages, and you +# instead ended up with weird characters around callouts, try +# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8). +# + +ifdef ASCIIDOC8 +ASCIIDOC_EXTRA += -a asciidoc7compatible +endif +ifdef DOCBOOK_XSL_172 +ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff +MANPAGE_XSL = manpage-1.72.xsl +else + ifdef ASCIIDOC_NO_ROFF + # docbook-xsl after 1.72 needs the regular XSL, but will not + # pass-thru raw roff codes from asciidoc.conf, so turn them off. + ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff + endif +endif +ifdef MAN_BOLD_LITERAL +XMLTO_EXTRA += -m manpage-bold-literal.xsl +endif +ifdef DOCBOOK_SUPPRESS_SP +XMLTO_EXTRA += -m manpage-suppress-sp.xsl +endif + +SHELL_PATH ?= $(SHELL) +# Shell quote; +SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) + +# +# Please note that there is a minor bug in asciidoc. +# The version after 6.0.3 _will_ include the patch found here: +# http://marc.theaimsgroup.com/?l=perf&m=111558757202243&w=2 +# +# Until that version is released you may have to apply the patch +# yourself - yes, all 6 characters of it! +# + +QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir +QUIET_SUBDIR1 = + +ifneq ($(findstring $(MAKEFLAGS),w),w) +PRINT_DIR = --no-print-directory +else # "make -w" +NO_SUBDIR = : +endif + +ifneq ($(findstring $(MAKEFLAGS),s),s) +ifneq ($(V),1) + QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@; + QUIET_XMLTO = @echo ' XMLTO '$@; + QUIET_DB2TEXI = @echo ' DB2TEXI '$@; + QUIET_MAKEINFO = @echo ' MAKEINFO '$@; + QUIET_DBLATEX = @echo ' DBLATEX '$@; + QUIET_XSLTPROC = @echo ' XSLTPROC '$@; + QUIET_GEN = @echo ' GEN '$@; + QUIET_STDERR = 2> /dev/null + QUIET_SUBDIR0 = +@subdir= + QUIET_SUBDIR1 = ;$(NO_SUBDIR) \ + echo ' SUBDIR ' $$subdir; \ + $(MAKE) $(PRINT_DIR) -C $$subdir + export V +endif +endif + +all: html man + +html: $(DOC_HTML) + +$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7): asciidoc.conf + +man: man1 man5 man7 +man1: $(DOC_MAN1) +man5: $(DOC_MAN5) +man7: $(DOC_MAN7) + +info: $(OUTPUT)perf.info $(OUTPUT)perfman.info + +pdf: $(OUTPUT)user-manual.pdf + +install: install-man + +check-man-tools: +ifdef missing_tools + $(error "You need to install $(missing_tools) for man pages") +endif + +do-install-man: man + $(call QUIET_INSTALL, Documentation-man) \ + $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir); \ +# $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir); \ +# $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \ + $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir); \ +# $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir); \ +# $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir) + +install-man: check-man-tools man + +ifdef missing_tools + DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed) +else + DO_INSTALL_MAN = do-install-man +endif + +try-install-man: $(DO_INSTALL_MAN) + +install-info: info + $(call QUIET_INSTALL, Documentation-info) \ + $(INSTALL) -d -m 755 $(DESTDIR)$(infodir); \ + $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir); \ + if test -r $(DESTDIR)$(infodir)/dir; then \ + $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ + $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ + else \ + echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \ + fi + +install-pdf: pdf + $(call QUIET_INSTALL, Documentation-pdf) \ + $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir); \ + $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir) + +#install-html: html +# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) + + +# +# Determine "include::" file references in asciidoc files. +# +$(OUTPUT)doc.dep : $(wildcard *.txt) build-docdep.perl + $(QUIET_GEN)$(RM) $@+ $@ && \ + $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ + mv $@+ $@ + +-include $(OUPTUT)doc.dep + +_cmds_txt = cmds-ancillaryinterrogators.txt \ + cmds-ancillarymanipulators.txt \ + cmds-mainporcelain.txt \ + cmds-plumbinginterrogators.txt \ + cmds-plumbingmanipulators.txt \ + cmds-synchingrepositories.txt \ + cmds-synchelpers.txt \ + cmds-purehelpers.txt \ + cmds-foreignscminterface.txt +cmds_txt=$(addprefix $(OUTPUT),$(_cmds_txt)) + +$(cmds_txt): $(OUTPUT)cmd-list.made + +$(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) + $(QUIET_GEN)$(RM) $@ && \ + $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \ + date >$@ + +CLEAN_FILES = \ + $(MAN_XML) $(addsuffix +,$(MAN_XML)) \ + $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \ + $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7) \ + $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++ \ + $(OUTPUT)perf.info $(OUTPUT)perfman.info \ + $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep \ + $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt \ + $(cmds_txt) $(OUTPUT)*.made +clean: + $(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES) + +$(MAN_HTML): $(OUTPUT)%.html : %.txt + $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ + $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \ + $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ + mv $@+ $@ + +$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml + $(QUIET_XMLTO)$(RM) $@ && \ + $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< + +$(OUTPUT)%.xml : %.txt + $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ + $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ + $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ + mv $@+ $@ + +XSLT = docbook.xsl +XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css + +$(OUTPUT)user-manual.html: $(OUTPUT)user-manual.xml + $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $< + +$(OUTPUT)perf.info: $(OUTPUT)user-manual.texi + $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ $(OUTPUT)user-manual.texi + +$(OUTPUT)user-manual.texi: $(OUTPUT)user-manual.xml + $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ + $(DOCBOOK2X_TEXI) $(OUTPUT)user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ + $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \ + rm $@++ && \ + mv $@+ $@ + +$(OUTPUT)user-manual.pdf: $(OUTPUT)user-manual.xml + $(QUIET_DBLATEX)$(RM) $@+ $@ && \ + $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \ + mv $@+ $@ + +$(OUTPUT)perfman.texi: $(MAN_XML) cat-texi.perl + $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ + ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \ + --to-stdout $(xml) &&) true) > $@++ && \ + $(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \ + rm $@++ && \ + mv $@+ $@ + +$(OUTPUT)perfman.info: $(OUTPUT)perfman.texi + $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi + +$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml + $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ + $(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \ + mv $@+ $@ + +howto-index.txt: howto-index.sh $(wildcard howto/*.txt) + $(QUIET_GEN)$(RM) $@+ $@ && \ + '$(SHELL_PATH_SQ)' ./howto-index.sh $(wildcard howto/*.txt) >$@+ && \ + mv $@+ $@ + +$(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt + $(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt + +WEBDOC_DEST = /pub/software/tools/perf/docs + +$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt + $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ + sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \ + mv $@+ $@ + +# UNIMPLEMENTED +#install-webdoc : html +# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST) + +# quick-install: quick-install-man + +# quick-install-man: +# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir) + +#quick-install-html: +# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir) diff --git a/kernel/tools/perf/Documentation/android.txt b/kernel/tools/perf/Documentation/android.txt new file mode 100644 index 000000000..8484c3a04 --- /dev/null +++ b/kernel/tools/perf/Documentation/android.txt @@ -0,0 +1,78 @@ +How to compile perf for Android +========================================= + +I. Set the Android NDK environment +------------------------------------------------ + +(a). Use the Android NDK +------------------------------------------------ +1. You need to download and install the Android Native Development Kit (NDK). +Set the NDK variable to point to the path where you installed the NDK: + export NDK=/path/to/android-ndk + +2. Set cross-compiling environment variables for NDK toolchain and sysroot. +For arm: + export NDK_TOOLCHAIN=${NDK}/toolchains/arm-linux-androideabi-4.6/prebuilt/linux-x86/bin/arm-linux-androideabi- + export NDK_SYSROOT=${NDK}/platforms/android-9/arch-arm +For x86: + export NDK_TOOLCHAIN=${NDK}/toolchains/x86-4.6/prebuilt/linux-x86/bin/i686-linux-android- + export NDK_SYSROOT=${NDK}/platforms/android-9/arch-x86 + +This method is not working for Android NDK versions up to Revision 8b. +perf uses some bionic enhancements that are not included in these NDK versions. +You can use method (b) described below instead. + +(b). Use the Android source tree +----------------------------------------------- +1. Download the master branch of the Android source tree. +Set the environment for the target you want using: + source build/envsetup.sh + lunch + +2. Build your own NDK sysroot to contain latest bionic changes and set the +NDK sysroot environment variable. + cd ${ANDROID_BUILD_TOP}/ndk +For arm: + ./build/tools/build-ndk-sysroot.sh --abi=arm + export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-arm +For x86: + ./build/tools/build-ndk-sysroot.sh --abi=x86 + export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-x86 + +3. Set the NDK toolchain environment variable. +For arm: + export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/arm-linux-androideabi- +For x86: + export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/i686-linux-android- + +II. Compile perf for Android +------------------------------------------------ +You need to run make with the NDK toolchain and sysroot defined above: +For arm: + make ARCH=arm CROSS_COMPILE=${NDK_TOOLCHAIN} CFLAGS="--sysroot=${NDK_SYSROOT}" +For x86: + make ARCH=x86 CROSS_COMPILE=${NDK_TOOLCHAIN} CFLAGS="--sysroot=${NDK_SYSROOT}" + +III. Install perf +----------------------------------------------- +You need to connect to your Android device/emulator using adb. +Install perf using: + adb push perf /data/perf + +If you also want to use perf-archive you need busybox tools for Android. +For installing perf-archive, you first need to replace #!/bin/bash with #!/system/bin/sh: + sed 's/#!\/bin\/bash/#!\/system\/bin\/sh/g' perf-archive >> /tmp/perf-archive + chmod +x /tmp/perf-archive + adb push /tmp/perf-archive /data/perf-archive + +IV. Environment settings for running perf +------------------------------------------------ +Some perf features need environment variables to run properly. +You need to set these before running perf on the target: + adb shell + # PERF_PAGER=cat + +IV. Run perf +------------------------------------------------ +Run perf on your device/emulator to which you previously connected using adb: + # ./data/perf diff --git a/kernel/tools/perf/Documentation/asciidoc.conf b/kernel/tools/perf/Documentation/asciidoc.conf new file mode 100644 index 000000000..356b23a40 --- /dev/null +++ b/kernel/tools/perf/Documentation/asciidoc.conf @@ -0,0 +1,91 @@ +## linkperf: macro +# +# Usage: linkperf:command[manpage-section] +# +# Note, {0} is the manpage section, while {target} is the command. +# +# Show PERF link as: <command>(<section>); if section is defined, else just show +# the command. + +[macros] +(?su)[\\]?(?P<name>linkperf):(?P<target>\S*?)\[(?P<attrlist>.*?)\]= + +[attributes] +asterisk=* +plus=+ +caret=^ +startsb=[ +endsb=] +tilde=~ + +ifdef::backend-docbook[] +[linkperf-inlinemacro] +{0%{target}} +{0#<citerefentry>} +{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>} +{0#</citerefentry>} +endif::backend-docbook[] + +ifdef::backend-docbook[] +ifndef::perf-asciidoc-no-roff[] +# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this. +# v1.72 breaks with this because it replaces dots not in roff requests. +[listingblock] +<example><title>{title}</title> +<literallayout> +ifdef::doctype-manpage[] + .ft C +endif::doctype-manpage[] +| +ifdef::doctype-manpage[] + .ft +endif::doctype-manpage[] +</literallayout> +{title#}</example> +endif::perf-asciidoc-no-roff[] + +ifdef::perf-asciidoc-no-roff[] +ifdef::doctype-manpage[] +# The following two small workarounds insert a simple paragraph after screen +[listingblock] +<example><title>{title}</title> +<literallayout> +| +</literallayout><simpara></simpara> +{title#}</example> + +[verseblock] +<formalpara{id? id="{id}"}><title>{title}</title><para> +{title%}<literallayout{id? id="{id}"}> +{title#}<literallayout> +| +</literallayout> +{title#}</para></formalpara> +{title%}<simpara></simpara> +endif::doctype-manpage[] +endif::perf-asciidoc-no-roff[] +endif::backend-docbook[] + +ifdef::doctype-manpage[] +ifdef::backend-docbook[] +[header] +template::[header-declarations] +<refentry> +<refmeta> +<refentrytitle>{mantitle}</refentrytitle> +<manvolnum>{manvolnum}</manvolnum> +<refmiscinfo class="source">perf</refmiscinfo> +<refmiscinfo class="version">{perf_version}</refmiscinfo> +<refmiscinfo class="manual">perf Manual</refmiscinfo> +</refmeta> +<refnamediv> + <refname>{manname}</refname> + <refpurpose>{manpurpose}</refpurpose> +</refnamediv> +endif::backend-docbook[] +endif::doctype-manpage[] + +ifdef::backend-xhtml11[] +[linkperf-inlinemacro] +<a href="{target}.html">{target}{0?({0})}</a> +endif::backend-xhtml11[] diff --git a/kernel/tools/perf/Documentation/examples.txt b/kernel/tools/perf/Documentation/examples.txt new file mode 100644 index 000000000..a4e392156 --- /dev/null +++ b/kernel/tools/perf/Documentation/examples.txt @@ -0,0 +1,225 @@ + + ------------------------------ + ****** perf by examples ****** + ------------------------------ + +[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ] + + +First, discovery/enumeration of available counters can be done via +'perf list': + +titan:~> perf list + [...] + kmem:kmalloc [Tracepoint event] + kmem:kmem_cache_alloc [Tracepoint event] + kmem:kmalloc_node [Tracepoint event] + kmem:kmem_cache_alloc_node [Tracepoint event] + kmem:kfree [Tracepoint event] + kmem:kmem_cache_free [Tracepoint event] + kmem:mm_page_free [Tracepoint event] + kmem:mm_page_free_batched [Tracepoint event] + kmem:mm_page_alloc [Tracepoint event] + kmem:mm_page_alloc_zone_locked [Tracepoint event] + kmem:mm_page_pcpu_drain [Tracepoint event] + kmem:mm_page_alloc_extfrag [Tracepoint event] + +Then any (or all) of the above event sources can be activated and +measured. For example the page alloc/free properties of a 'hackbench +run' are: + + titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc + -e kmem:mm_page_free_batched -e kmem:mm_page_free ./hackbench 10 + Time: 0.575 + + Performance counter stats for './hackbench 10': + + 13857 kmem:mm_page_pcpu_drain + 27576 kmem:mm_page_alloc + 6025 kmem:mm_page_free_batched + 20934 kmem:mm_page_free + + 0.613972165 seconds time elapsed + +You can observe the statistical properties as well, by using the +'repeat the workload N times' feature of perf stat: + + titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e + kmem:mm_page_alloc -e kmem:mm_page_free_batched -e + kmem:mm_page_free ./hackbench 10 + Time: 0.627 + Time: 0.644 + Time: 0.564 + Time: 0.559 + Time: 0.626 + + Performance counter stats for './hackbench 10' (5 runs): + + 12920 kmem:mm_page_pcpu_drain ( +- 3.359% ) + 25035 kmem:mm_page_alloc ( +- 3.783% ) + 6104 kmem:mm_page_free_batched ( +- 0.934% ) + 18376 kmem:mm_page_free ( +- 4.941% ) + + 0.643954516 seconds time elapsed ( +- 2.363% ) + +Furthermore, these tracepoints can be used to sample the workload as +well. For example the page allocations done by a 'git gc' can be +captured the following way: + + titan:~/git> perf record -e kmem:mm_page_alloc -c 1 ./git gc + Counting objects: 1148, done. + Delta compression using up to 2 threads. + Compressing objects: 100% (450/450), done. + Writing objects: 100% (1148/1148), done. + Total 1148 (delta 690), reused 1148 (delta 690) + [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ] + +To check which functions generated page allocations: + + titan:~/git> perf report + # Samples: 10646 + # + # Overhead Command Shared Object + # ........ ............... .......................... + # + 23.57% git-repack /lib64/libc-2.5.so + 21.81% git /lib64/libc-2.5.so + 14.59% git ./git + 11.79% git-repack ./git + 7.12% git /lib64/ld-2.5.so + 3.16% git-repack /lib64/libpthread-2.5.so + 2.09% git-repack /bin/bash + 1.97% rm /lib64/libc-2.5.so + 1.39% mv /lib64/ld-2.5.so + 1.37% mv /lib64/libc-2.5.so + 1.12% git-repack /lib64/ld-2.5.so + 0.95% rm /lib64/ld-2.5.so + 0.90% git-update-serv /lib64/libc-2.5.so + 0.73% git-update-serv /lib64/ld-2.5.so + 0.68% perf /lib64/libpthread-2.5.so + 0.64% git-repack /usr/lib64/libz.so.1.2.3 + +Or to see it on a more finegrained level: + +titan:~/git> perf report --sort comm,dso,symbol +# Samples: 10646 +# +# Overhead Command Shared Object Symbol +# ........ ............... .......................... ...... +# + 9.35% git-repack ./git [.] insert_obj_hash + 9.12% git ./git [.] insert_obj_hash + 7.31% git /lib64/libc-2.5.so [.] memcpy + 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc + 6.24% git-repack /lib64/libc-2.5.so [.] memcpy + 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork + 5.47% git /lib64/libc-2.5.so [.] _int_malloc + 2.99% git /lib64/libc-2.5.so [.] memset + +Furthermore, call-graph sampling can be done too, of page +allocations - to see precisely what kind of page allocations there +are: + + titan:~/git> perf record -g -e kmem:mm_page_alloc -c 1 ./git gc + Counting objects: 1148, done. + Delta compression using up to 2 threads. + Compressing objects: 100% (450/450), done. + Writing objects: 100% (1148/1148), done. + Total 1148 (delta 690), reused 1148 (delta 690) + [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ] + + titan:~/git> perf report -g + # Samples: 10686 + # + # Overhead Command Shared Object + # ........ ............... .......................... + # + 23.25% git-repack /lib64/libc-2.5.so + | + |--50.00%-- _int_free + | + |--37.50%-- __GI___fork + | make_child + | + |--12.50%-- ptmalloc_unlock_all2 + | make_child + | + --6.25%-- __GI_strcpy + 21.61% git /lib64/libc-2.5.so + | + |--30.00%-- __GI_read + | | + | --83.33%-- git_config_from_file + | git_config + | | + [...] + +Or you can observe the whole system's page allocations for 10 +seconds: + +titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e +kmem:mm_page_alloc -e kmem:mm_page_free_batched -e +kmem:mm_page_free sleep 10 + + Performance counter stats for 'sleep 10': + + 171585 kmem:mm_page_pcpu_drain + 322114 kmem:mm_page_alloc + 73623 kmem:mm_page_free_batched + 254115 kmem:mm_page_free + + 10.000591410 seconds time elapsed + +Or observe how fluctuating the page allocations are, via statistical +analysis done over ten 1-second intervals: + + titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e + kmem:mm_page_alloc -e kmem:mm_page_free_batched -e + kmem:mm_page_free sleep 1 + + Performance counter stats for 'sleep 1' (10 runs): + + 17254 kmem:mm_page_pcpu_drain ( +- 3.709% ) + 34394 kmem:mm_page_alloc ( +- 4.617% ) + 7509 kmem:mm_page_free_batched ( +- 4.820% ) + 25653 kmem:mm_page_free ( +- 3.672% ) + + 1.058135029 seconds time elapsed ( +- 3.089% ) + +Or you can annotate the recorded 'git gc' run on a per symbol basis +and check which instructions/source-code generated page allocations: + + titan:~/git> perf annotate __GI___fork + ------------------------------------------------ + Percent | Source code & Disassembly of libc-2.5.so + ------------------------------------------------ + : + : + : Disassembly of section .plt: + : Disassembly of section .text: + : + : 00000031a2e95560 <__fork>: + [...] + 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax + 0.00 : 31a2e95607: 0f 05 syscall + 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax + 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202> + 0.00 : 31a2e95615: 85 c0 test %eax,%eax + +( this shows that 83.42% of __GI___fork's page allocations come from + the 0x38 system call it performs. ) + +etc. etc. - a lot more is possible. I could list a dozen of +other different usecases straight away - neither of which is +possible via /proc/vmstat. + +/proc/vmstat is not in the same league really, in terms of +expressive power of system analysis and performance +analysis. + +All that the above results needed were those new tracepoints +in include/tracing/events/kmem.h. + + Ingo + + diff --git a/kernel/tools/perf/Documentation/jit-interface.txt b/kernel/tools/perf/Documentation/jit-interface.txt new file mode 100644 index 000000000..a8656f564 --- /dev/null +++ b/kernel/tools/perf/Documentation/jit-interface.txt @@ -0,0 +1,15 @@ +perf supports a simple JIT interface to resolve symbols for dynamic code generated +by a JIT. + +The JIT has to write a /tmp/perf-%d.map (%d = pid of process) file + +This is a text file. + +Each line has the following format, fields separated with spaces: + +START SIZE symbolname + +START and SIZE are hex numbers without 0x. +symbolname is the rest of the line, so it could contain special characters. + +The ownership of the file has to match the process. diff --git a/kernel/tools/perf/Documentation/manpage-1.72.xsl b/kernel/tools/perf/Documentation/manpage-1.72.xsl new file mode 100644 index 000000000..b4d315cb8 --- /dev/null +++ b/kernel/tools/perf/Documentation/manpage-1.72.xsl @@ -0,0 +1,14 @@ +<!-- manpage-1.72.xsl: + special settings for manpages rendered from asciidoc+docbook + handles peculiarities in docbook-xsl 1.72.0 --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<xsl:import href="manpage-base.xsl"/> + +<!-- these are the special values for the roff control characters + needed for docbook-xsl 1.72.0 --> +<xsl:param name="git.docbook.backslash">▓</xsl:param> +<xsl:param name="git.docbook.dot" >⌂</xsl:param> + +</xsl:stylesheet> diff --git a/kernel/tools/perf/Documentation/manpage-base.xsl b/kernel/tools/perf/Documentation/manpage-base.xsl new file mode 100644 index 000000000..a264fa616 --- /dev/null +++ b/kernel/tools/perf/Documentation/manpage-base.xsl @@ -0,0 +1,35 @@ +<!-- manpage-base.xsl: + special formatting for manpages rendered from asciidoc+docbook --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<!-- these params silence some output from xmlto --> +<xsl:param name="man.output.quietly" select="1"/> +<xsl:param name="refentry.meta.get.quietly" select="1"/> + +<!-- convert asciidoc callouts to man page format; + git.docbook.backslash and git.docbook.dot params + must be supplied by another XSL file or other means --> +<xsl:template match="co"> + <xsl:value-of select="concat( + $git.docbook.backslash,'fB(', + substring-after(@id,'-'),')', + $git.docbook.backslash,'fR')"/> +</xsl:template> +<xsl:template match="calloutlist"> + <xsl:value-of select="$git.docbook.dot"/> + <xsl:text>sp </xsl:text> + <xsl:apply-templates/> + <xsl:text> </xsl:text> +</xsl:template> +<xsl:template match="callout"> + <xsl:value-of select="concat( + $git.docbook.backslash,'fB', + substring-after(@arearefs,'-'), + '. ',$git.docbook.backslash,'fR')"/> + <xsl:apply-templates/> + <xsl:value-of select="$git.docbook.dot"/> + <xsl:text>br </xsl:text> +</xsl:template> + +</xsl:stylesheet> diff --git a/kernel/tools/perf/Documentation/manpage-bold-literal.xsl b/kernel/tools/perf/Documentation/manpage-bold-literal.xsl new file mode 100644 index 000000000..608eb5df6 --- /dev/null +++ b/kernel/tools/perf/Documentation/manpage-bold-literal.xsl @@ -0,0 +1,17 @@ +<!-- manpage-bold-literal.xsl: + special formatting for manpages rendered from asciidoc+docbook --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<!-- render literal text as bold (instead of plain or monospace); + this makes literal text easier to distinguish in manpages + viewed on a tty --> +<xsl:template match="literal"> + <xsl:value-of select="$git.docbook.backslash"/> + <xsl:text>fB</xsl:text> + <xsl:apply-templates/> + <xsl:value-of select="$git.docbook.backslash"/> + <xsl:text>fR</xsl:text> +</xsl:template> + +</xsl:stylesheet> diff --git a/kernel/tools/perf/Documentation/manpage-normal.xsl b/kernel/tools/perf/Documentation/manpage-normal.xsl new file mode 100644 index 000000000..a48f5b11f --- /dev/null +++ b/kernel/tools/perf/Documentation/manpage-normal.xsl @@ -0,0 +1,13 @@ +<!-- manpage-normal.xsl: + special settings for manpages rendered from asciidoc+docbook + handles anything we want to keep away from docbook-xsl 1.72.0 --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<xsl:import href="manpage-base.xsl"/> + +<!-- these are the normal values for the roff control characters --> +<xsl:param name="git.docbook.backslash">\</xsl:param> +<xsl:param name="git.docbook.dot" >.</xsl:param> + +</xsl:stylesheet> diff --git a/kernel/tools/perf/Documentation/manpage-suppress-sp.xsl b/kernel/tools/perf/Documentation/manpage-suppress-sp.xsl new file mode 100644 index 000000000..a63c7632a --- /dev/null +++ b/kernel/tools/perf/Documentation/manpage-suppress-sp.xsl @@ -0,0 +1,21 @@ +<!-- manpage-suppress-sp.xsl: + special settings for manpages rendered from asciidoc+docbook + handles erroneous, inline .sp in manpage output of some + versions of docbook-xsl --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<!-- attempt to work around spurious .sp at the tail of the line + that some versions of docbook stylesheets seem to add --> +<xsl:template match="simpara"> + <xsl:variable name="content"> + <xsl:apply-templates/> + </xsl:variable> + <xsl:value-of select="normalize-space($content)"/> + <xsl:if test="not(ancestor::authorblurb) and + not(ancestor::personblurb)"> + <xsl:text> </xsl:text> + </xsl:if> +</xsl:template> + +</xsl:stylesheet> diff --git a/kernel/tools/perf/Documentation/perf-annotate.txt b/kernel/tools/perf/Documentation/perf-annotate.txt new file mode 100644 index 000000000..e9cd39a92 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-annotate.txt @@ -0,0 +1,101 @@ +perf-annotate(1) +================ + +NAME +---- +perf-annotate - Read perf.data (created by perf record) and display annotated code + +SYNOPSIS +-------- +[verse] +'perf annotate' [-i <file> | --input=file] [symbol_name] + +DESCRIPTION +----------- +This command reads the input file and displays an annotated version of the +code. If the object file has debug symbols then the source code will be +displayed alongside assembly code. + +If there is no debug info in the object, then annotated assembly is displayed. + +OPTIONS +------- +-i:: +--input=:: + Input file name. (default: perf.data unless stdin is a fifo) + +-d:: +--dsos=<dso[,dso...]>:: + Only consider symbols in these dsos. +-s:: +--symbol=<symbol>:: + Symbol to annotate. + +-f:: +--force:: + Don't complain, do it. + +-v:: +--verbose:: + Be more verbose. (Show symbol address, etc) + +-D:: +--dump-raw-trace:: + Dump raw trace in ASCII. + +-k:: +--vmlinux=<file>:: + vmlinux pathname. + +-m:: +--modules:: + Load module symbols. WARNING: use only with -k and LIVE kernel. + +-l:: +--print-line:: + Print matching source lines (may be slow). + +-P:: +--full-paths:: + Don't shorten the displayed pathnames. + +--stdio:: Use the stdio interface. + +--tui:: Use the TUI interface. Use of --tui requires a tty, if one is not + present, as when piping to other commands, the stdio interface is + used. This interfaces starts by centering on the line with more + samples, TAB/UNTAB cycles through the lines with more samples. + +--gtk:: Use the GTK interface. + +-C:: +--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can + be provided as a comma-separated list with no space: 0,1. Ranges of + CPUs are specified with -: 0-2. Default is to report samples on all + CPUs. + +--asm-raw:: + Show raw instruction encoding of assembly instructions. + +--source:: + Interleave source code with assembly code. Enabled by default, + disable with --no-source. + +--symfs=<directory>:: + Look for files with symbols relative to this directory. + +-M:: +--disassembler-style=:: Set disassembler style for objdump. + +--objdump=<path>:: + Path to objdump binary. + +--skip-missing:: + Skip symbols that cannot be annotated. + +--group:: + Show event group information together + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/kernel/tools/perf/Documentation/perf-archive.txt b/kernel/tools/perf/Documentation/perf-archive.txt new file mode 100644 index 000000000..ac6ecbb3e --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-archive.txt @@ -0,0 +1,22 @@ +perf-archive(1) +=============== + +NAME +---- +perf-archive - Create archive with object files with build-ids found in perf.data file + +SYNOPSIS +-------- +[verse] +'perf archive' [file] + +DESCRIPTION +----------- +This command runs perf-buildid-list --with-hits, and collects the files with the +buildids found so that analysis of perf.data contents can be possible on another +machine. + + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-buildid-list[1], linkperf:perf-report[1] diff --git a/kernel/tools/perf/Documentation/perf-bench.txt b/kernel/tools/perf/Documentation/perf-bench.txt new file mode 100644 index 000000000..f6480cbf3 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-bench.txt @@ -0,0 +1,218 @@ +perf-bench(1) +============= + +NAME +---- +perf-bench - General framework for benchmark suites + +SYNOPSIS +-------- +[verse] +'perf bench' [<common options>] <subsystem> <suite> [<options>] + +DESCRIPTION +----------- +This 'perf bench' command is a general framework for benchmark suites. + +COMMON OPTIONS +-------------- +-r:: +--repeat=:: +Specify amount of times to repeat the run (default 10). + +-f:: +--format=:: +Specify format style. +Current available format styles are: + +'default':: +Default style. This is mainly for human reading. +--------------------- +% perf bench sched pipe # with no style specified +(executing 1000000 pipe operations between two tasks) + Total time:5.855 sec + 5.855061 usecs/op + 170792 ops/sec +--------------------- + +'simple':: +This simple style is friendly for automated +processing by scripts. +--------------------- +% perf bench --format=simple sched pipe # specified simple +5.988 +--------------------- + +SUBSYSTEM +--------- + +'sched':: + Scheduler and IPC mechanisms. + +'mem':: + Memory access performance. + +'numa':: + NUMA scheduling and MM benchmarks. + +'futex':: + Futex stressing benchmarks. + +'all':: + All benchmark subsystems. + +SUITES FOR 'sched' +~~~~~~~~~~~~~~~~~~ +*messaging*:: +Suite for evaluating performance of scheduler and IPC mechanisms. +Based on hackbench by Rusty Russell. + +Options of *messaging* +^^^^^^^^^^^^^^^^^^^^^^ +-p:: +--pipe:: +Use pipe() instead of socketpair() + +-t:: +--thread:: +Be multi thread instead of multi process + +-g:: +--group=:: +Specify number of groups + +-l:: +--loop=:: +Specify number of loops + +Example of *messaging* +^^^^^^^^^^^^^^^^^^^^^^ + +--------------------- +% perf bench sched messaging # run with default +options (20 sender and receiver processes per group) +(10 groups == 400 processes run) + + Total time:0.308 sec + +% perf bench sched messaging -t -g 20 # be multi-thread, with 20 groups +(20 sender and receiver threads per group) +(20 groups == 800 threads run) + + Total time:0.582 sec +--------------------- + +*pipe*:: +Suite for pipe() system call. +Based on pipe-test-1m.c by Ingo Molnar. + +Options of *pipe* +^^^^^^^^^^^^^^^^^ +-l:: +--loop=:: +Specify number of loops. + +Example of *pipe* +^^^^^^^^^^^^^^^^^ + +--------------------- +% perf bench sched pipe +(executing 1000000 pipe operations between two tasks) + + Total time:8.091 sec + 8.091833 usecs/op + 123581 ops/sec + +% perf bench sched pipe -l 1000 # loop 1000 +(executing 1000 pipe operations between two tasks) + + Total time:0.016 sec + 16.948000 usecs/op + 59004 ops/sec +--------------------- + +SUITES FOR 'mem' +~~~~~~~~~~~~~~~~ +*memcpy*:: +Suite for evaluating performance of simple memory copy in various ways. + +Options of *memcpy* +^^^^^^^^^^^^^^^^^^^ +-l:: +--length:: +Specify length of memory to copy (default: 1MB). +Available units are B, KB, MB, GB and TB (case insensitive). + +-r:: +--routine:: +Specify routine to copy (default: default). +Available routines are depend on the architecture. +On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported. + +-i:: +--iterations:: +Repeat memcpy invocation this number of times. + +-c:: +--cycle:: +Use perf's cpu-cycles event instead of gettimeofday syscall. + +-o:: +--only-prefault:: +Show only the result with page faults before memcpy. + +-n:: +--no-prefault:: +Show only the result without page faults before memcpy. + +*memset*:: +Suite for evaluating performance of simple memory set in various ways. + +Options of *memset* +^^^^^^^^^^^^^^^^^^^ +-l:: +--length:: +Specify length of memory to set (default: 1MB). +Available units are B, KB, MB, GB and TB (case insensitive). + +-r:: +--routine:: +Specify routine to set (default: default). +Available routines are depend on the architecture. +On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported. + +-i:: +--iterations:: +Repeat memset invocation this number of times. + +-c:: +--cycle:: +Use perf's cpu-cycles event instead of gettimeofday syscall. + +-o:: +--only-prefault:: +Show only the result with page faults before memset. + +-n:: +--no-prefault:: +Show only the result without page faults before memset. + +SUITES FOR 'numa' +~~~~~~~~~~~~~~~~~ +*mem*:: +Suite for evaluating NUMA workloads. + +SUITES FOR 'futex' +~~~~~~~~~~~~~~~~~~ +*hash*:: +Suite for evaluating hash tables. + +*wake*:: +Suite for evaluating wake calls. + +*requeue*:: +Suite for evaluating requeue calls. + +SEE ALSO +-------- +linkperf:perf[1] diff --git a/kernel/tools/perf/Documentation/perf-buildid-cache.txt b/kernel/tools/perf/Documentation/perf-buildid-cache.txt new file mode 100644 index 000000000..dd07b55f5 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-buildid-cache.txt @@ -0,0 +1,63 @@ +perf-buildid-cache(1) +===================== + +NAME +---- +perf-buildid-cache - Manage build-id cache. + +SYNOPSIS +-------- +[verse] +'perf buildid-cache <options>' + +DESCRIPTION +----------- +This command manages the build-id cache. It can add, remove, update and purge +files to/from the cache. In the future it should as well set upper limits for +the space used by the cache, etc. + +OPTIONS +------- +-a:: +--add=:: + Add specified file to the cache. +-k:: +--kcore:: + Add specified kcore file to the cache. For the current host that is + /proc/kcore which requires root permissions to read. Be aware that + running 'perf buildid-cache' as root may update root's build-id cache + not the user's. Use the -v option to see where the file is created. + Note that the copied file contains only code sections not the whole core + image. Note also that files "kallsyms" and "modules" must also be in the + same directory and are also copied. All 3 files are created with read + permissions for root only. kcore will not be added if there is already a + kcore in the cache (with the same build-id) that has the same modules at + the same addresses. Use the -v option to see if a copy of kcore is + actually made. +-r:: +--remove=:: + Remove a cached binary which has same build-id of specified file + from the cache. +-p:: +--purge=:: + Purge all cached binaries including older caches which have specified + path from the cache. +-M:: +--missing=:: + List missing build ids in the cache for the specified file. +-u:: +--update=:: + Update specified file of the cache. Note that this doesn't remove + older entires since those may be still needed for annotating old + (or remote) perf.data. Only if there is already a cache which has + exactly same build-id, that is replaced by new one. It can be used + to update kallsyms and kernel dso to vmlinux in order to support + annotation. + +-v:: +--verbose:: + Be more verbose. + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1] diff --git a/kernel/tools/perf/Documentation/perf-buildid-list.txt b/kernel/tools/perf/Documentation/perf-buildid-list.txt new file mode 100644 index 000000000..25c52efcc --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-buildid-list.txt @@ -0,0 +1,43 @@ +perf-buildid-list(1) +==================== + +NAME +---- +perf-buildid-list - List the buildids in a perf.data file + +SYNOPSIS +-------- +[verse] +'perf buildid-list <options>' + +DESCRIPTION +----------- +This command displays the buildids found in a perf.data file, so that other +tools can be used to fetch packages with matching symbol tables for use by +perf report. + +It can also be used to show the build id of the running kernel or in an ELF +file using -i/--input. + +OPTIONS +------- +-H:: +--with-hits:: + Show only DSOs with hits. +-i:: +--input=:: + Input file name. (default: perf.data unless stdin is a fifo) +-f:: +--force:: + Don't do ownership validation. +-k:: +--kernel:: + Show running kernel build id. +-v:: +--verbose:: + Be more verbose. + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-top[1], +linkperf:perf-report[1] diff --git a/kernel/tools/perf/Documentation/perf-data.txt b/kernel/tools/perf/Documentation/perf-data.txt new file mode 100644 index 000000000..be8fa1a0a --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-data.txt @@ -0,0 +1,40 @@ +perf-data(1) +============== + +NAME +---- +perf-data - Data file related processing + +SYNOPSIS +-------- +[verse] +'perf data' [<common options>] <command> [<options>]", + +DESCRIPTION +----------- +Data file related processing. + +COMMANDS +-------- +convert:: + Converts perf data file into another format (only CTF [1] format is support by now). + It's possible to set data-convert debug variable to get debug messages from conversion, + like: + perf --debug data-convert data convert ... + +OPTIONS for 'convert' +--------------------- +--to-ctf:: + Triggers the CTF conversion, specify the path of CTF data directory. + +-i:: + Specify input perf data file path. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +SEE ALSO +-------- +linkperf:perf[1] +[1] Common Trace Format - http://www.efficios.com/ctf diff --git a/kernel/tools/perf/Documentation/perf-diff.txt b/kernel/tools/perf/Documentation/perf-diff.txt new file mode 100644 index 000000000..d1deb5738 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-diff.txt @@ -0,0 +1,214 @@ +perf-diff(1) +============ + +NAME +---- +perf-diff - Read perf.data files and display the differential profile + +SYNOPSIS +-------- +[verse] +'perf diff' [baseline file] [data file1] [[data file2] ... ] + +DESCRIPTION +----------- +This command displays the performance difference amongst two or more perf.data +files captured via perf record. + +If no parameters are passed it will assume perf.data.old and perf.data. + +The differential profile is displayed only for events matching both +specified perf.data files. + +If no parameters are passed the samples will be sorted by dso and symbol. +As the perf.data files could come from different binaries, the symbols addresses +could vary. So perf diff is based on the comparison of the files and +symbols name. + +OPTIONS +------- +-D:: +--dump-raw-trace:: + Dump raw trace in ASCII. + +--kallsyms=<file>:: + kallsyms pathname + +-m:: +--modules:: + Load module symbols. WARNING: use only with -k and LIVE kernel + +-d:: +--dsos=:: + Only consider symbols in these dsos. CSV that understands + file://filename entries. This option will affect the percentage + of the Baseline/Delta column. See --percentage for more info. + +-C:: +--comms=:: + Only consider symbols in these comms. CSV that understands + file://filename entries. This option will affect the percentage + of the Baseline/Delta column. See --percentage for more info. + +-S:: +--symbols=:: + Only consider these symbols. CSV that understands + file://filename entries. This option will affect the percentage + of the Baseline/Delta column. See --percentage for more info. + +-s:: +--sort=:: + Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline. + Please see description of --sort in the perf-report man page. + +-t:: +--field-separator=:: + + Use a special separator character and don't pad with spaces, replacing + all occurrences of this separator in symbol names (and other output) + with a '.' character, that thus it's the only non valid separator. + +-v:: +--verbose:: + Be verbose, for instance, show the raw counts in addition to the + diff. + +-f:: +--force:: + Don't complain, do it. + +--symfs=<directory>:: + Look for files with symbols relative to this directory. + +-b:: +--baseline-only:: + Show only items with match in baseline. + +-c:: +--compute:: + Differential computation selection - delta,ratio,wdiff (default is delta). + See COMPARISON METHODS section for more info. + +-p:: +--period:: + Show period values for both compared hist entries. + +-F:: +--formula:: + Show formula for given computation. + +-o:: +--order:: + Specify compute sorting column number. + +--percentage:: + Determine how to display the overhead percentage of filtered entries. + Filters can be applied by --comms, --dsos and/or --symbols options. + + "relative" means it's relative to filtered entries only so that the + sum of shown entries will be always 100%. "absolute" means it retains + the original value before and after the filter is applied. + +COMPARISON +---------- +The comparison is governed by the baseline file. The baseline perf.data +file is iterated for samples. All other perf.data files specified on +the command line are searched for the baseline sample pair. If the pair +is found, specified computation is made and result is displayed. + +All samples from non-baseline perf.data files, that do not match any +baseline entry, are displayed with empty space within baseline column +and possible computation results (delta) in their related column. + +Example files samples: +- file A with samples f1, f2, f3, f4, f6 +- file B with samples f2, f4, f5 +- file C with samples f1, f2, f5 + +Example output: + x - computation takes place for pair + b - baseline sample percentage + +- perf diff A B C + + baseline/A compute/B compute/C samples + --------------------------------------- + b x f1 + b x x f2 + b f3 + b x f4 + b f6 + x x f5 + +- perf diff B A C + + baseline/B compute/A compute/C samples + --------------------------------------- + b x x f2 + b x f4 + b x f5 + x x f1 + x f3 + x f6 + +- perf diff C B A + + baseline/C compute/B compute/A samples + --------------------------------------- + b x f1 + b x x f2 + b x f5 + x f3 + x x f4 + x f6 + +COMPARISON METHODS +------------------ +delta +~~~~~ +If specified the 'Delta' column is displayed with value 'd' computed as: + + d = A->period_percent - B->period_percent + +with: + - A/B being matching hist entry from data/baseline file specified + (or perf.data/perf.data.old) respectively. + + - period_percent being the % of the hist entry period value within + single data file + + - with filtering by -C, -d and/or -S, period_percent might be changed + relative to how entries are filtered. Use --percentage=absolute to + prevent such fluctuation. + +ratio +~~~~~ +If specified the 'Ratio' column is displayed with value 'r' computed as: + + r = A->period / B->period + +with: + - A/B being matching hist entry from data/baseline file specified + (or perf.data/perf.data.old) respectively. + + - period being the hist entry period value + +wdiff:WEIGHT-B,WEIGHT-A +~~~~~~~~~~~~~~~~~~~~~~~ +If specified the 'Weighted diff' column is displayed with value 'd' computed as: + + d = B->period * WEIGHT-A - A->period * WEIGHT-B + + - A/B being matching hist entry from data/baseline file specified + (or perf.data/perf.data.old) respectively. + + - period being the hist entry period value + + - WEIGHT-A/WEIGHT-B being user supplied weights in the the '-c' option + behind ':' separator like '-c wdiff:1,2'. + - WEIGHT-A being the weight of the data file + - WEIGHT-B being the weight of the baseline data file + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/kernel/tools/perf/Documentation/perf-evlist.txt b/kernel/tools/perf/Documentation/perf-evlist.txt new file mode 100644 index 000000000..1ceb3700f --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-evlist.txt @@ -0,0 +1,38 @@ +perf-evlist(1) +============== + +NAME +---- +perf-evlist - List the event names in a perf.data file + +SYNOPSIS +-------- +[verse] +'perf evlist <options>' + +DESCRIPTION +----------- +This command displays the names of events sampled in a perf.data file. + +OPTIONS +------- +-i:: +--input=:: + Input file name. (default: perf.data unless stdin is a fifo) + +-F:: +--freq=:: + Show just the sample frequency used for each event. + +-v:: +--verbose=:: + Show all fields. + +-g:: +--group:: + Show event group information. + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-list[1], +linkperf:perf-report[1] diff --git a/kernel/tools/perf/Documentation/perf-help.txt b/kernel/tools/perf/Documentation/perf-help.txt new file mode 100644 index 000000000..514391818 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-help.txt @@ -0,0 +1,38 @@ +perf-help(1) +============ + +NAME +---- +perf-help - display help information about perf + +SYNOPSIS +-------- +'perf help' [-a|--all] [COMMAND] + +DESCRIPTION +----------- + +With no options and no COMMAND given, the synopsis of the 'perf' +command and a list of the most commonly used perf commands are printed +on the standard output. + +If the option '--all' or '-a' is given, then all available commands are +printed on the standard output. + +If a perf command is named, a manual page for that command is brought +up. The 'man' program is used by default for this purpose, but this +can be overridden by other options or configuration variables. + +Note that `perf --help ...` is identical to `perf help ...` because the +former is internally converted into the latter. + +OPTIONS +------- +-a:: +--all:: + Prints all the available commands on the standard output. This + option supersedes any other option. + +PERF +---- +Part of the linkperf:perf[1] suite diff --git a/kernel/tools/perf/Documentation/perf-inject.txt b/kernel/tools/perf/Documentation/perf-inject.txt new file mode 100644 index 000000000..dc7442cf3 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-inject.txt @@ -0,0 +1,49 @@ +perf-inject(1) +============== + +NAME +---- +perf-inject - Filter to augment the events stream with additional information + +SYNOPSIS +-------- +[verse] +'perf inject <options>' + +DESCRIPTION +----------- +perf-inject reads a perf-record event stream and repipes it to stdout. At any +point the processing code can inject other events into the event stream - in +this case build-ids (-b option) are read and injected as needed into the event +stream. + +Build-ids are just the first user of perf-inject - potentially anything that +needs userspace processing to augment the events stream with additional +information could make use of this facility. + +OPTIONS +------- +-b:: +--build-ids=:: + Inject build-ids into the output stream +-v:: +--verbose:: + Be more verbose. +-i:: +--input=:: + Input file name. (default: stdin) +-o:: +--output=:: + Output file name. (default: stdout) +-s:: +--sched-stat:: + Merge sched_stat and sched_switch for getting events where and how long + tasks slept. sched_switch contains a callchain where a task slept and + sched_stat contains a timeslice how long a task slept. + +--kallsyms=<file>:: + kallsyms pathname + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] diff --git a/kernel/tools/perf/Documentation/perf-kmem.txt b/kernel/tools/perf/Documentation/perf-kmem.txt new file mode 100644 index 000000000..23219c65c --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-kmem.txt @@ -0,0 +1,57 @@ +perf-kmem(1) +============ + +NAME +---- +perf-kmem - Tool to trace/measure kernel memory properties + +SYNOPSIS +-------- +[verse] +'perf kmem' {record|stat} [<options>] + +DESCRIPTION +----------- +There are two variants of perf kmem: + + 'perf kmem record <command>' to record the kmem events + of an arbitrary workload. + + 'perf kmem stat' to report kernel memory statistics. + +OPTIONS +------- +-i <file>:: +--input=<file>:: + Select the input file (default: perf.data unless stdin is a fifo) + +-v:: +--verbose:: + Be more verbose. (show symbol address, etc) + +--caller:: + Show per-callsite statistics + +--alloc:: + Show per-allocation statistics + +-s <key[,key2...]>:: +--sort=<key[,key2...]>:: + Sort the output (default: frag,hit,bytes) + +-l <num>:: +--line=<num>:: + Print n lines only + +--raw-ip:: + Print raw ip instead of symbol + +--slab:: + Analyze SLAB allocator events. + +--page:: + Analyze page allocator events + +SEE ALSO +-------- +linkperf:perf-record[1] diff --git a/kernel/tools/perf/Documentation/perf-kvm.txt b/kernel/tools/perf/Documentation/perf-kvm.txt new file mode 100644 index 000000000..6252e7760 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-kvm.txt @@ -0,0 +1,158 @@ +perf-kvm(1) +=========== + +NAME +---- +perf-kvm - Tool to trace/measure kvm guest os + +SYNOPSIS +-------- +[verse] +'perf kvm' [--host] [--guest] [--guestmount=<path> + [--guestkallsyms=<path> --guestmodules=<path> | --guestvmlinux=<path>]] + {top|record|report|diff|buildid-list} [<options>] +'perf kvm' [--host] [--guest] [--guestkallsyms=<path> --guestmodules=<path> + | --guestvmlinux=<path>] {top|record|report|diff|buildid-list|stat} [<options>] +'perf kvm stat [record|report|live] [<options>] + +DESCRIPTION +----------- +There are a couple of variants of perf kvm: + + 'perf kvm [options] top <command>' to generates and displays + a performance counter profile of guest os in realtime + of an arbitrary workload. + + 'perf kvm record <command>' to record the performance counter profile + of an arbitrary workload and save it into a perf data file. We set the + default behavior of perf kvm as --guest, so if neither --host nor --guest + is input, the perf data file name is perf.data.guest. If --host is input, + the perf data file name is perf.data.kvm. If you want to record data into + perf.data.host, please input --host --no-guest. The behaviors are shown as + following: + Default('') -> perf.data.guest + --host -> perf.data.kvm + --guest -> perf.data.guest + --host --guest -> perf.data.kvm + --host --no-guest -> perf.data.host + + 'perf kvm report' to display the performance counter profile information + recorded via perf kvm record. + + 'perf kvm diff' to displays the performance difference amongst two perf.data + files captured via perf record. + + 'perf kvm buildid-list' to display the buildids found in a perf data file, + so that other tools can be used to fetch packages with matching symbol tables + for use by perf report. As buildid is read from /sys/kernel/notes in os, then + if you want to list the buildid for guest, please make sure your perf data file + was captured with --guestmount in perf kvm record. + + 'perf kvm stat <command>' to run a command and gather performance counter + statistics. + Especially, perf 'kvm stat record/report' generates a statistical analysis + of KVM events. Currently, vmexit, mmio (x86 only) and ioport (x86 only) + events are supported. 'perf kvm stat record <command>' records kvm events + and the events between start and end <command>. + And this command produces a file which contains tracing results of kvm + events. + + 'perf kvm stat report' reports statistical data which includes events + handled time, samples, and so on. + + 'perf kvm stat live' reports statistical data in a live mode (similar to + record + report but with statistical data updated live at a given display + rate). + +OPTIONS +------- +-i:: +--input=<path>:: + Input file name. +-o:: +--output=<path>:: + Output file name. +--host:: + Collect host side performance profile. +--guest:: + Collect guest side performance profile. +--guestmount=<path>:: + Guest os root file system mount directory. Users mounts guest os + root directories under <path> by a specific filesystem access method, + typically, sshfs. For example, start 2 guest os. The one's pid is 8888 + and the other's is 9999. + #mkdir ~/guestmount; cd ~/guestmount + #sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/ + #sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/ + #perf kvm --host --guest --guestmount=~/guestmount top +--guestkallsyms=<path>:: + Guest os /proc/kallsyms file copy. 'perf' kvm' reads it to get guest + kernel symbols. Users copy it out from guest os. +--guestmodules=<path>:: + Guest os /proc/modules file copy. 'perf' kvm' reads it to get guest + kernel module information. Users copy it out from guest os. +--guestvmlinux=<path>:: + Guest os kernel vmlinux. +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +STAT REPORT OPTIONS +------------------- +--vcpu=<value>:: + analyze events which occur on this vcpu. (default: all vcpus) + +--event=<value>:: + event to be analyzed. Possible values: vmexit, mmio (x86 only), + ioport (x86 only). (default: vmexit) +-k:: +--key=<value>:: + Sorting key. Possible values: sample (default, sort by samples + number), time (sort by average time). +-p:: +--pid=:: + Analyze events only for given process ID(s) (comma separated list). + +STAT LIVE OPTIONS +----------------- +-d:: +--display:: + Time in seconds between display updates + +-m:: +--mmap-pages=:: + Number of mmap data pages (must be a power of two) or size + specification with appended unit character - B/K/M/G. The + size is rounded up to have nearest pages power of two value. + +-a:: +--all-cpus:: + System-wide collection from all CPUs. + +-p:: +--pid=:: + Analyze events only for given process ID(s) (comma separated list). + +--vcpu=<value>:: + analyze events which occur on this vcpu. (default: all vcpus) + + +--event=<value>:: + event to be analyzed. Possible values: vmexit, + mmio (x86 only), ioport (x86 only). + (default: vmexit) + +-k:: +--key=<value>:: + Sorting key. Possible values: sample (default, sort by samples + number), time (sort by average time). + +--duration=<value>:: + Show events other than HLT (x86 only) or Wait state (s390 only) + that take longer than duration usecs. + +SEE ALSO +-------- +linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1], +linkperf:perf-diff[1], linkperf:perf-buildid-list[1], +linkperf:perf-stat[1] diff --git a/kernel/tools/perf/Documentation/perf-list.txt b/kernel/tools/perf/Documentation/perf-list.txt new file mode 100644 index 000000000..bada8933f --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-list.txt @@ -0,0 +1,142 @@ +perf-list(1) +============ + +NAME +---- +perf-list - List all symbolic event types + +SYNOPSIS +-------- +[verse] +'perf list' [hw|sw|cache|tracepoint|pmu|event_glob] + +DESCRIPTION +----------- +This command displays the symbolic event types which can be selected in the +various perf commands with the -e option. + +[[EVENT_MODIFIERS]] +EVENT MODIFIERS +--------------- + +Events can optionally have a modifier by appending a colon and one or +more modifiers. Modifiers allow the user to restrict the events to be +counted. The following modifiers exist: + + u - user-space counting + k - kernel counting + h - hypervisor counting + I - non idle counting + G - guest counting (in KVM guests) + H - host counting (not in KVM guests) + p - precise level + S - read sample value (PERF_SAMPLE_READ) + D - pin the event to the PMU + +The 'p' modifier can be used for specifying how precise the instruction +address should be. The 'p' modifier can be specified multiple times: + + 0 - SAMPLE_IP can have arbitrary skid + 1 - SAMPLE_IP must have constant skid + 2 - SAMPLE_IP requested to have 0 skid + 3 - SAMPLE_IP must have 0 skid + +For Intel systems precise event sampling is implemented with PEBS +which supports up to precise-level 2. + +On AMD systems it is implemented using IBS (up to precise-level 2). +The precise modifier works with event types 0x76 (cpu-cycles, CPU +clocks not halted) and 0xC1 (micro-ops retired). Both events map to +IBS execution sampling (IBS op) with the IBS Op Counter Control bit +(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmer’s +Manual Volume 2: System Programming, 13.3 Instruction-Based +Sampling). Examples to use IBS: + + perf record -a -e cpu-cycles:p ... # use ibs op counting cycles + perf record -a -e r076:p ... # same as -e cpu-cycles:p + perf record -a -e r0C1:p ... # use ibs op counting micro-ops + +RAW HARDWARE EVENT DESCRIPTOR +----------------------------- +Even when an event is not available in a symbolic form within perf right now, +it can be encoded in a per processor specific way. + +For instance For x86 CPUs NNN represents the raw register encoding with the +layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide] Figure 30-1 Layout +of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmer’s Manual Volume 2: System Programming], Page 344, +Figure 13-7 Performance Event-Select Register (PerfEvtSeln)). + +Note: Only the following bit fields can be set in x86 counter +registers: event, umask, edge, inv, cmask. Esp. guest/host only and +OS/user mode flags must be setup using <<EVENT_MODIFIERS, EVENT +MODIFIERS>>. + +Example: + +If the Intel docs for a QM720 Core i7 describe an event as: + + Event Umask Event Mask + Num. Value Mnemonic Description Comment + + A8H 01H LSD.UOPS Counts the number of micro-ops Use cmask=1 and + delivered by loop stream detector invert to count + cycles + +raw encoding of 0x1A8 can be used: + + perf stat -e r1a8 -a sleep 1 + perf record -e r1a8 ... + +You should refer to the processor specific documentation for getting these +details. Some of them are referenced in the SEE ALSO section below. + +PARAMETERIZED EVENTS +-------------------- + +Some pmu events listed by 'perf-list' will be displayed with '?' in them. For +example: + + hv_gpci/dtbp_ptitc,phys_processor_idx=?/ + +This means that when provided as an event, a value for '?' must +also be supplied. For example: + + perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ... + +OPTIONS +------- + +Without options all known events will be listed. + +To limit the list use: + +. 'hw' or 'hardware' to list hardware events such as cache-misses, etc. + +. 'sw' or 'software' to list software events such as context switches, etc. + +. 'cache' or 'hwcache' to list hardware cache events such as L1-dcache-loads, etc. + +. 'tracepoint' to list all tracepoint events, alternatively use + 'subsys_glob:event_glob' to filter by tracepoint subsystems such as sched, + block, etc. + +. 'pmu' to print the kernel supplied PMU events. + +. If none of the above is matched, it will apply the supplied glob to all + events, printing the ones that match. + +One or more types can be used at the same time, listing the events for the +types specified. + +Support raw format: + +. '--raw-dump', shows the raw-dump of all the events. +. '--raw-dump [hw|sw|cache|tracepoint|pmu|event_glob]', shows the raw-dump of + a certain kind of events. + +SEE ALSO +-------- +linkperf:perf-stat[1], linkperf:perf-top[1], +linkperf:perf-record[1], +http://www.intel.com/Assets/PDF/manual/253669.pdf[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide], +http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming] diff --git a/kernel/tools/perf/Documentation/perf-lock.txt b/kernel/tools/perf/Documentation/perf-lock.txt new file mode 100644 index 000000000..ab25be28c --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-lock.txt @@ -0,0 +1,66 @@ +perf-lock(1) +============ + +NAME +---- +perf-lock - Analyze lock events + +SYNOPSIS +-------- +[verse] +'perf lock' {record|report|script|info} + +DESCRIPTION +----------- +You can analyze various lock behaviours +and statistics with this 'perf lock' command. + + 'perf lock record <command>' records lock events + between start and end <command>. And this command + produces the file "perf.data" which contains tracing + results of lock events. + + 'perf lock report' reports statistical data. + + 'perf lock script' shows raw lock events. + + 'perf lock info' shows metadata like threads or addresses + of lock instances. + +COMMON OPTIONS +-------------- + +-i:: +--input=<file>:: + Input file name. (default: perf.data unless stdin is a fifo) + +-v:: +--verbose:: + Be more verbose (show symbol address, etc). + +-D:: +--dump-raw-trace:: + Dump raw trace in ASCII. + +REPORT OPTIONS +-------------- + +-k:: +--key=<value>:: + Sorting key. Possible values: acquired (default), contended, + avg_wait, wait_total, wait_max, wait_min. + +INFO OPTIONS +------------ + +-t:: +--threads:: + dump thread list in perf.data + +-m:: +--map:: + dump map of lock instances (address:name table) + +SEE ALSO +-------- +linkperf:perf[1] diff --git a/kernel/tools/perf/Documentation/perf-mem.txt b/kernel/tools/perf/Documentation/perf-mem.txt new file mode 100644 index 000000000..43310d866 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-mem.txt @@ -0,0 +1,53 @@ +perf-mem(1) +=========== + +NAME +---- +perf-mem - Profile memory accesses + +SYNOPSIS +-------- +[verse] +'perf mem' [<options>] (record [<command>] | report) + +DESCRIPTION +----------- +"perf mem record" runs a command and gathers memory operation data +from it, into perf.data. Perf record options are accepted and are passed through. + +"perf mem report" displays the result. It invokes perf report with the +right set of options to display a memory access profile. By default, loads +and stores are sampled. Use the -t option to limit to loads or stores. + +Note that on Intel systems the memory latency reported is the use-latency, +not the pure load (or store latency). Use latency includes any pipeline +queueing delays in addition to the memory subsystem latency. + +OPTIONS +------- +<command>...:: + Any command you can specify in a shell. + +-t:: +--type=:: + Select the memory operation type: load or store (default: load,store) + +-D:: +--dump-raw-samples=:: + Dump the raw decoded samples on the screen in a format that is easy to parse with + one sample per line. + +-x:: +--field-separator:: + Specify the field separator used when dump raw samples (-D option). By default, + The separator is the space character. + +-C:: +--cpu-list:: + Restrict dump of raw samples to those provided via this option. Note that the same + option can be passed in record mode. It will be interpreted the same way as perf + record. + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/kernel/tools/perf/Documentation/perf-probe.txt b/kernel/tools/perf/Documentation/perf-probe.txt new file mode 100644 index 000000000..239609c09 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-probe.txt @@ -0,0 +1,222 @@ +perf-probe(1) +============= + +NAME +---- +perf-probe - Define new dynamic tracepoints + +SYNOPSIS +-------- +[verse] +'perf probe' [options] --add='PROBE' [...] +or +'perf probe' [options] PROBE +or +'perf probe' [options] --del='[GROUP:]EVENT' [...] +or +'perf probe' --list +or +'perf probe' [options] --line='LINE' +or +'perf probe' [options] --vars='PROBEPOINT' + +DESCRIPTION +----------- +This command defines dynamic tracepoint events, by symbol and registers +without debuginfo, or by C expressions (C line numbers, C function names, +and C local variables) with debuginfo. + + +OPTIONS +------- +-k:: +--vmlinux=PATH:: + Specify vmlinux path which has debuginfo (Dwarf binary). + +-m:: +--module=MODNAME|PATH:: + Specify module name in which perf-probe searches probe points + or lines. If a path of module file is passed, perf-probe + treat it as an offline module (this means you can add a probe on + a module which has not been loaded yet). + +-s:: +--source=PATH:: + Specify path to kernel source. + +-v:: +--verbose:: + Be more verbose (show parsed arguments, etc). + Can not use with -q. + +-q:: +--quiet:: + Be quiet (do not show any messages including errors). + Can not use with -v. + +-a:: +--add=:: + Define a probe event (see PROBE SYNTAX for detail). + +-d:: +--del=:: + Delete probe events. This accepts glob wildcards('*', '?') and character + classes(e.g. [a-z], [!A-Z]). + +-l:: +--list:: + List up current probe events. + +-L:: +--line=:: + Show source code lines which can be probed. This needs an argument + which specifies a range of the source code. (see LINE SYNTAX for detail) + +-V:: +--vars=:: + Show available local variables at given probe point. The argument + syntax is same as PROBE SYNTAX, but NO ARGs. + +--externs:: + (Only for --vars) Show external defined variables in addition to local + variables. + +-F:: +--funcs:: + Show available functions in given module or kernel. With -x/--exec, + can also list functions in a user space executable / shared library. + +--filter=FILTER:: + (Only for --vars and --funcs) Set filter. FILTER is a combination of glob + pattern, see FILTER PATTERN for detail. + Default FILTER is "!__k???tab_* & !__crc_*" for --vars, and "!_*" + for --funcs. + If several filters are specified, only the last filter is used. + +-f:: +--force:: + Forcibly add events with existing name. + +-n:: +--dry-run:: + Dry run. With this option, --add and --del doesn't execute actual + adding and removal operations. + +--max-probes=NUM:: + Set the maximum number of probe points for an event. Default is 128. + +-x:: +--exec=PATH:: + Specify path to the executable or shared library file for user + space tracing. Can also be used with --funcs option. + +--demangle:: + Demangle application symbols. --no-demangle is also available + for disabling demangling. + +--demangle-kernel:: + Demangle kernel symbols. --no-demangle-kernel is also available + for disabling kernel demangling. + +In absence of -m/-x options, perf probe checks if the first argument after +the options is an absolute path name. If its an absolute path, perf probe +uses it as a target module/target user space binary to probe. + +PROBE SYNTAX +------------ +Probe points are defined by following syntax. + + 1) Define event based on function name + [EVENT=]FUNC[@SRC][:RLN|+OFFS|%return|;PTN] [ARG ...] + + 2) Define event based on source file with line number + [EVENT=]SRC:ALN [ARG ...] + + 3) Define event based on source file with lazy pattern + [EVENT=]SRC;PTN [ARG ...] + + +'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. Currently, event group name is set as 'probe'. +'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function. +It is also possible to specify a probe point by the source line number or lazy matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file path, ':ALN' is the line number and ';PTN' is the lazy matching pattern. +'ARG' specifies the arguments of this probe point, (see PROBE ARGUMENT). + +PROBE ARGUMENT +-------------- +Each probe argument follows below syntax. + + [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE] + +'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) +'$vars' special argument is also available for NAME, it is expanded to the local variables which can access at given probe point. +'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type. + +On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid. + +LINE SYNTAX +----------- +Line range is described by following syntax. + + "FUNC[@SRC][:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]" + +FUNC specifies the function name of showing lines. 'RLN' is the start line +number from function entry line, and 'RLN2' is the end line number. As same as +probe syntax, 'SRC' means the source file path, 'ALN' is start line number, +and 'ALN2' is end line number in the file. It is also possible to specify how +many lines to show by using 'NUM'. Moreover, 'FUNC@SRC' combination is good +for searching a specific function when several functions share same name. +So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function. + +LAZY MATCHING +------------- + The lazy line matching is similar to glob matching but ignoring spaces in both of pattern and target. So this accepts wildcards('*', '?') and character classes(e.g. [a-z], [!A-Z]). + +e.g. + 'a=*' can matches 'a=b', 'a = b', 'a == b' and so on. + +This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.) + +FILTER PATTERN +-------------- + The filter pattern is a glob matching pattern(s) to filter variables. + In addition, you can use "!" for specifying filter-out rule. You also can give several rules combined with "&" or "|", and fold those rules as one rule by using "(" ")". + +e.g. + With --filter "foo* | bar*", perf probe -V shows variables which start with "foo" or "bar". + With --filter "!foo* & *bar", perf probe -V shows variables which don't start with "foo" and end with "bar", like "fizzbar". But "foobar" is filtered out. + +EXAMPLES +-------- +Display which lines in schedule() can be probed: + + ./perf probe --line schedule + +Add a probe on schedule() function 12th line with recording cpu local variable: + + ./perf probe schedule:12 cpu + or + ./perf probe --add='schedule:12 cpu' + + this will add one or more probes which has the name start with "schedule". + + Add probes on lines in schedule() function which calls update_rq_clock(). + + ./perf probe 'schedule;update_rq_clock*' + or + ./perf probe --add='schedule;update_rq_clock*' + +Delete all probes on schedule(). + + ./perf probe --del='schedule*' + +Add probes at zfree() function on /bin/zsh + + ./perf probe -x /bin/zsh zfree or ./perf probe /bin/zsh zfree + +Add probes at malloc() function on libc + + ./perf probe -x /lib/libc.so.6 malloc or ./perf probe /lib/libc.so.6 malloc + +SEE ALSO +-------- +linkperf:perf-trace[1], linkperf:perf-record[1] diff --git a/kernel/tools/perf/Documentation/perf-record.txt b/kernel/tools/perf/Documentation/perf-record.txt new file mode 100644 index 000000000..4847a793d --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-record.txt @@ -0,0 +1,262 @@ +perf-record(1) +============== + +NAME +---- +perf-record - Run a command and record its profile into perf.data + +SYNOPSIS +-------- +[verse] +'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> +'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>] + +DESCRIPTION +----------- +This command runs a command and gathers a performance counter profile +from it, into perf.data - without displaying anything. + +This file can then be inspected later on, using 'perf report'. + + +OPTIONS +------- +<command>...:: + Any command you can specify in a shell. + +-e:: +--event=:: + Select the PMU event. Selection can be: + + - a symbolic event name (use 'perf list' to list all events) + + - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. + + - a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where + 'param1', 'param2', etc are defined as formats for the PMU in + /sys/bus/event_sources/devices/<pmu>/format/*. + + - a symbolically formed event like 'pmu/config=M,config1=N,config3=K/' + + where M, N, K are numbers (in decimal, hex, octal format). Acceptable + values for each of 'config', 'config1' and 'config2' are defined by + corresponding entries in /sys/bus/event_sources/devices/<pmu>/format/* + param1 and param2 are defined as formats for the PMU in: + /sys/bus/event_sources/devices/<pmu>/format/* + + - a hardware breakpoint event in the form of '\mem:addr[/len][:access]' + where addr is the address in memory you want to break in. + Access is the memory access type (read, write, execute) it can + be passed as follows: '\mem:addr[:[r][w][x]]'. len is the range, + number of bytes from specified addr, which the breakpoint will cover. + If you want to profile read-write accesses in 0x1000, just set + 'mem:0x1000:rw'. + If you want to profile write accesses in [0x1000~1008), just set + 'mem:0x1000/8:w'. + + - a group of events surrounded by a pair of brace ("{event1,event2,...}"). + Each event is separated by commas and the group should be quoted to + prevent the shell interpretation. You also need to use --group on + "perf report" to view group events together. + +--filter=<filter>:: + Event filter. + +-a:: +--all-cpus:: + System-wide collection from all CPUs. + +-p:: +--pid=:: + Record events on existing process ID (comma separated list). + +-t:: +--tid=:: + Record events on existing thread ID (comma separated list). + This option also disables inheritance by default. Enable it by adding + --inherit. + +-u:: +--uid=:: + Record events in threads owned by uid. Name or number. + +-r:: +--realtime=:: + Collect data with this RT SCHED_FIFO priority. + +--no-buffering:: + Collect data without buffering. + +-c:: +--count=:: + Event period to sample. + +-o:: +--output=:: + Output file name. + +-i:: +--no-inherit:: + Child tasks do not inherit counters. +-F:: +--freq=:: + Profile at this frequency. + +-m:: +--mmap-pages=:: + Number of mmap data pages (must be a power of two) or size + specification with appended unit character - B/K/M/G. The + size is rounded up to have nearest pages power of two value. + +--group:: + Put all events in a single event group. This precedes the --event + option and remains only for backward compatibility. See --event. + +-g:: + Enables call-graph (stack chain/backtrace) recording. + +--call-graph:: + Setup and enable call-graph (stack chain/backtrace) recording, + implies -g. + + Allows specifying "fp" (frame pointer) or "dwarf" + (DWARF's CFI - Call Frame Information) or "lbr" + (Hardware Last Branch Record facility) as the method to collect + the information used to show the call graphs. + + In some systems, where binaries are build with gcc + --fomit-frame-pointer, using the "fp" method will produce bogus + call graphs, using "dwarf", if available (perf tools linked to + the libunwind library) should be used instead. + Using the "lbr" method doesn't require any compiler options. It + will produce call graphs from the hardware LBR registers. The + main limition is that it is only available on new Intel + platforms, such as Haswell. It can only get user call chain. It + doesn't work with branch stack sampling at the same time. + +-q:: +--quiet:: + Don't print any message, useful for scripting. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +-s:: +--stat:: + Per thread counts. + +-d:: +--data:: + Sample addresses. + +-T:: +--timestamp:: + Sample timestamps. Use it with 'perf report -D' to see the timestamps, + for instance. + +-n:: +--no-samples:: + Don't sample. + +-R:: +--raw-samples:: +Collect raw sample records from all opened counters (default for tracepoint counters). + +-C:: +--cpu:: +Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a +comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. +In per-thread mode with inheritance mode on (default), samples are captured only when +the thread executes on the designated CPUs. Default is to monitor all CPUs. + +-N:: +--no-buildid-cache:: +Do not update the buildid cache. This saves some overhead in situations +where the information in the perf.data file (which includes buildids) +is sufficient. + +-G name,...:: +--cgroup name,...:: +monitor only in the container (cgroup) called "name". This option is available only +in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to +container "name" are monitored when they run on the monitored CPUs. Multiple cgroups +can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup +to first event, second cgroup to second event and so on. It is possible to provide +an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have +corresponding events, i.e., they always refer to events defined earlier on the command +line. + +-b:: +--branch-any:: +Enable taken branch stack sampling. Any type of taken branch may be sampled. +This is a shortcut for --branch-filter any. See --branch-filter for more infos. + +-j:: +--branch-filter:: +Enable taken branch stack sampling. Each sample captures a series of consecutive +taken branches. The number of branches captured with each sample depends on the +underlying hardware, the type of branches of interest, and the executed code. +It is possible to select the types of branches captured by enabling filters. The +following filters are defined: + + - any: any type of branches + - any_call: any function call or system call + - any_ret: any function return or system call return + - ind_call: any indirect branch + - u: only when the branch target is at the user level + - k: only when the branch target is in the kernel + - hv: only when the target is at the hypervisor level + - in_tx: only when the target is in a hardware transaction + - no_tx: only when the target is not in a hardware transaction + - abort_tx: only when the target is a hardware transaction abort + - cond: conditional branches + ++ +The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. +The privilege levels may be omitted, in which case, the privilege levels of the associated +event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege +levels are subject to permissions. When sampling on multiple events, branch stack sampling +is enabled for all the sampling events. The sampled branch type is the same for all events. +The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k +Note that this feature may not be available on all processors. + +--weight:: +Enable weightened sampling. An additional weight is recorded per sample and can be +displayed with the weight and local_weight sort keys. This currently works for TSX +abort events and some memory events in precise mode on modern Intel CPUs. + +--transaction:: +Record transaction flags for transaction related events. + +--per-thread:: +Use per-thread mmaps. By default per-cpu mmaps are created. This option +overrides that and uses per-thread mmaps. A side-effect of that is that +inheritance is automatically disabled. --per-thread is ignored with a warning +if combined with -a or -C options. + +-D:: +--delay=:: +After starting the program, wait msecs before measuring. This is useful to +filter out the startup phase of the program, which is often very different. + +-I:: +--intr-regs:: +Capture machine state (registers) at interrupt, i.e., on counter overflows for +each sample. List of captured registers depends on the architecture. This option +is off by default. + +--running-time:: +Record running and enabled time for read events (:S) + +-k:: +--clockid:: +Sets the clock id to use for the various time fields in the perf_event_type +records. See clock_gettime(). In particular CLOCK_MONOTONIC and +CLOCK_MONOTONIC_RAW are supported, some events might also allow +CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI. + +SEE ALSO +-------- +linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/kernel/tools/perf/Documentation/perf-report.txt b/kernel/tools/perf/Documentation/perf-report.txt new file mode 100644 index 000000000..4879cf638 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-report.txt @@ -0,0 +1,328 @@ +perf-report(1) +============== + +NAME +---- +perf-report - Read perf.data (created by perf record) and display the profile + +SYNOPSIS +-------- +[verse] +'perf report' [-i <file> | --input=file] + +DESCRIPTION +----------- +This command displays the performance counter profile information recorded +via perf record. + +OPTIONS +------- +-i:: +--input=:: + Input file name. (default: perf.data unless stdin is a fifo) + +-v:: +--verbose:: + Be more verbose. (show symbol address, etc) + +-n:: +--show-nr-samples:: + Show the number of samples for each symbol + +--showcpuutilization:: + Show sample percentage for different cpu modes. + +-T:: +--threads:: + Show per-thread event counters +-c:: +--comms=:: + Only consider symbols in these comms. CSV that understands + file://filename entries. This option will affect the percentage of + the overhead column. See --percentage for more info. +--pid=:: + Only show events for given process ID (comma separated list). + +--tid=:: + Only show events for given thread ID (comma separated list). +-d:: +--dsos=:: + Only consider symbols in these dsos. CSV that understands + file://filename entries. This option will affect the percentage of + the overhead column. See --percentage for more info. +-S:: +--symbols=:: + Only consider these symbols. CSV that understands + file://filename entries. This option will affect the percentage of + the overhead column. See --percentage for more info. + +--symbol-filter=:: + Only show symbols that match (partially) with this filter. + +-U:: +--hide-unresolved:: + Only display entries resolved to a symbol. + +-s:: +--sort=:: + Sort histogram entries by given key(s) - multiple keys can be specified + in CSV format. Following sort keys are available: + pid, comm, dso, symbol, parent, cpu, srcline, weight, local_weight. + + Each key has following meaning: + + - comm: command (name) of the task which can be read via /proc/<pid>/comm + - pid: command and tid of the task + - dso: name of library or module executed at the time of sample + - symbol: name of function executed at the time of sample + - parent: name of function matched to the parent regex filter. Unmatched + entries are displayed as "[other]". + - cpu: cpu number the task ran at the time of sample + - srcline: filename and line number executed at the time of sample. The + DWARF debugging info must be provided. + - weight: Event specific weight, e.g. memory latency or transaction + abort cost. This is the global weight. + - local_weight: Local weight version of the weight above. + - transaction: Transaction abort flags. + - overhead: Overhead percentage of sample + - overhead_sys: Overhead percentage of sample running in system mode + - overhead_us: Overhead percentage of sample running in user mode + - overhead_guest_sys: Overhead percentage of sample running in system mode + on guest machine + - overhead_guest_us: Overhead percentage of sample running in user mode on + guest machine + - sample: Number of sample + - period: Raw number of event count of sample + + By default, comm, dso and symbol keys are used. + (i.e. --sort comm,dso,symbol) + + If --branch-stack option is used, following sort keys are also + available: + dso_from, dso_to, symbol_from, symbol_to, mispredict. + + - dso_from: name of library or module branched from + - dso_to: name of library or module branched to + - symbol_from: name of function branched from + - symbol_to: name of function branched to + - mispredict: "N" for predicted branch, "Y" for mispredicted branch + - in_tx: branch in TSX transaction + - abort: TSX transaction abort. + + And default sort keys are changed to comm, dso_from, symbol_from, dso_to + and symbol_to, see '--branch-stack'. + +-F:: +--fields=:: + Specify output field - multiple keys can be specified in CSV format. + Following fields are available: + overhead, overhead_sys, overhead_us, overhead_children, sample and period. + Also it can contain any sort key(s). + + By default, every sort keys not specified in -F will be appended + automatically. + + If --mem-mode option is used, following sort keys are also available + (incompatible with --branch-stack): + symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline. + + - symbol_daddr: name of data symbol being executed on at the time of sample + - dso_daddr: name of library or module containing the data being executed + on at the time of sample + - locked: whether the bus was locked at the time of sample + - tlb: type of tlb access for the data at the time of sample + - mem: type of memory access for the data at the time of sample + - snoop: type of snoop (if any) for the data at the time of sample + - dcacheline: the cacheline the data address is on at the time of sample + + And default sort keys are changed to local_weight, mem, sym, dso, + symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. + +-p:: +--parent=<regex>:: + A regex filter to identify parent. The parent is a caller of this + function and searched through the callchain, thus it requires callchain + information recorded. The pattern is in the exteneded regex format and + defaults to "\^sys_|^do_page_fault", see '--sort parent'. + +-x:: +--exclude-other:: + Only display entries with parent-match. + +-w:: +--column-widths=<width[,width...]>:: + Force each column width to the provided list, for large terminal + readability. 0 means no limit (default behavior). + +-t:: +--field-separator=:: + Use a special separator character and don't pad with spaces, replacing + all occurrences of this separator in symbol names (and other output) + with a '.' character, that thus it's the only non valid separator. + +-D:: +--dump-raw-trace:: + Dump raw trace in ASCII. + +-g [type,min[,limit],order[,key][,branch]]:: +--call-graph:: + Display call chains using type, min percent threshold, optional print + limit and order. + type can be either: + - flat: single column, linear exposure of call chains. + - graph: use a graph tree, displaying absolute overhead rates. + - fractal: like graph, but displays relative rates. Each branch of + the tree is considered as a new profiled object. + + + order can be either: + - callee: callee based call graph. + - caller: inverted caller based call graph. + + key can be: + - function: compare on functions + - address: compare on individual code addresses + + branch can be: + - branch: include last branch information in callgraph + when available. Usually more convenient to use --branch-history + for this. + + Default: fractal,0.5,callee,function. + +--children:: + Accumulate callchain of children to parent entry so that then can + show up in the output. The output will have a new "Children" column + and will be sorted on the data. It requires callchains are recorded. + +--max-stack:: + Set the stack depth limit when parsing the callchain, anything + beyond the specified depth will be ignored. This is a trade-off + between information loss and faster processing especially for + workloads that can have a very long callchain stack. + + Default: 127 + +-G:: +--inverted:: + alias for inverted caller based call graph. + +--ignore-callees=<regex>:: + Ignore callees of the function(s) matching the given regex. + This has the effect of collecting the callers of each such + function into one place in the call-graph tree. + +--pretty=<key>:: + Pretty printing style. key: normal, raw + +--stdio:: Use the stdio interface. + +--tui:: Use the TUI interface, that is integrated with annotate and allows + zooming into DSOs or threads, among other features. Use of --tui + requires a tty, if one is not present, as when piping to other + commands, the stdio interface is used. + +--gtk:: Use the GTK2 interface. + +-k:: +--vmlinux=<file>:: + vmlinux pathname + +--kallsyms=<file>:: + kallsyms pathname + +-m:: +--modules:: + Load module symbols. WARNING: This should only be used with -k and + a LIVE kernel. + +-f:: +--force:: + Don't complain, do it. + +--symfs=<directory>:: + Look for files with symbols relative to this directory. + +-C:: +--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can + be provided as a comma-separated list with no space: 0,1. Ranges of + CPUs are specified with -: 0-2. Default is to report samples on all + CPUs. + +-M:: +--disassembler-style=:: Set disassembler style for objdump. + +--source:: + Interleave source code with assembly code. Enabled by default, + disable with --no-source. + +--asm-raw:: + Show raw instruction encoding of assembly instructions. + +--show-total-period:: Show a column with the sum of periods. + +-I:: +--show-info:: + Display extended information about the perf.data file. This adds + information which may be very large and thus may clutter the display. + It currently includes: cpu and numa topology of the host system. + +-b:: +--branch-stack:: + Use the addresses of sampled taken branches instead of the instruction + address to build the histograms. To generate meaningful output, the + perf.data file must have been obtained using perf record -b or + perf record --branch-filter xxx where xxx is a branch filter option. + perf report is able to auto-detect whether a perf.data file contains + branch stacks and it will automatically switch to the branch view mode, + unless --no-branch-stack is used. + +--branch-history:: + Add the addresses of sampled taken branches to the callstack. + This allows to examine the path the program took to each sample. + The data collection must have used -b (or -j) and -g. + +--objdump=<path>:: + Path to objdump binary. + +--group:: + Show event group information together. + +--demangle:: + Demangle symbol names to human readable form. It's enabled by default, + disable with --no-demangle. + +--demangle-kernel:: + Demangle kernel symbol names to human readable form (for C++ kernels). + +--mem-mode:: + Use the data addresses of samples in addition to instruction addresses + to build the histograms. To generate meaningful output, the perf.data + file must have been obtained using perf record -d -W and using a + special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See + 'perf mem' for simpler access. + +--percent-limit:: + Do not show entries which have an overhead under that percent. + (Default: 0). + +--percentage:: + Determine how to display the overhead percentage of filtered entries. + Filters can be applied by --comms, --dsos and/or --symbols options and + Zoom operations on the TUI (thread, dso, etc). + + "relative" means it's relative to filtered entries only so that the + sum of shown entries will be always 100%. "absolute" means it retains + the original value before and after the filter is applied. + +--header:: + Show header information in the perf.data file. This includes + various information like hostname, OS and perf version, cpu/mem + info, perf command line, event list and so on. Currently only + --stdio output supports this feature. + +--header-only:: + Show only perf.data header (forces --stdio). + +SEE ALSO +-------- +linkperf:perf-stat[1], linkperf:perf-annotate[1] diff --git a/kernel/tools/perf/Documentation/perf-sched.txt b/kernel/tools/perf/Documentation/perf-sched.txt new file mode 100644 index 000000000..8ff4df956 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-sched.txt @@ -0,0 +1,55 @@ +perf-sched(1) +============== + +NAME +---- +perf-sched - Tool to trace/measure scheduler properties (latencies) + +SYNOPSIS +-------- +[verse] +'perf sched' {record|latency|map|replay|script} + +DESCRIPTION +----------- +There are five variants of perf sched: + + 'perf sched record <command>' to record the scheduling events + of an arbitrary workload. + + 'perf sched latency' to report the per task scheduling latencies + and other scheduling properties of the workload. + + 'perf sched script' to see a detailed trace of the workload that + was recorded (aliased to 'perf script' for now). + + 'perf sched replay' to simulate the workload that was recorded + via perf sched record. (this is done by starting up mockup threads + that mimic the workload based on the events in the trace. These + threads can then replay the timings (CPU runtime and sleep patterns) + of the workload as it occurred when it was recorded - and can repeat + it a number of times, measuring its performance.) + + 'perf sched map' to print a textual context-switching outline of + workload captured via perf sched record. Columns stand for + individual CPUs, and the two-letter shortcuts stand for tasks that + are running on a CPU. A '*' denotes the CPU that had the event, and + a dot signals an idle CPU. + +OPTIONS +------- +-i:: +--input=<file>:: + Input file name. (default: perf.data unless stdin is a fifo) + +-v:: +--verbose:: + Be more verbose. (show symbol address, etc) + +-D:: +--dump-raw-trace=:: + Display verbose dump of the sched data. + +SEE ALSO +-------- +linkperf:perf-record[1] diff --git a/kernel/tools/perf/Documentation/perf-script-perl.txt b/kernel/tools/perf/Documentation/perf-script-perl.txt new file mode 100644 index 000000000..dfbb506d2 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-script-perl.txt @@ -0,0 +1,216 @@ +perf-script-perl(1) +================== + +NAME +---- +perf-script-perl - Process trace data with a Perl script + +SYNOPSIS +-------- +[verse] +'perf script' [-s [Perl]:script[.pl] ] + +DESCRIPTION +----------- + +This perf script option is used to process perf script data using perf's +built-in Perl interpreter. It reads and processes the input file and +displays the results of the trace analysis implemented in the given +Perl script, if any. + +STARTER SCRIPTS +--------------- + +You can avoid reading the rest of this document by running 'perf script +-g perl' in the same directory as an existing perf.data trace file. +That will generate a starter script containing a handler for each of +the event types in the trace file; it simply prints every available +field for each event in the trace file. + +You can also look at the existing scripts in +~/libexec/perf-core/scripts/perl for typical examples showing how to +do basic things like aggregate event data, print results, etc. Also, +the check-perf-script.pl script, while not interesting for its results, +attempts to exercise all of the main scripting features. + +EVENT HANDLERS +-------------- + +When perf script is invoked using a trace script, a user-defined +'handler function' is called for each event in the trace. If there's +no handler function defined for a given event type, the event is +ignored (or passed to a 'trace_handled' function, see below) and the +next event is processed. + +Most of the event's field values are passed as arguments to the +handler function; some of the less common ones aren't - those are +available as calls back into the perf executable (see below). + +As an example, the following perf record command can be used to record +all sched_wakeup events in the system: + + # perf record -a -e sched:sched_wakeup + +Traces meant to be processed using a script should be recorded with +the above option: -a to enable system-wide collection. + +The format file for the sched_wakep event defines the following fields +(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): + +---- + format: + field:unsigned short common_type; + field:unsigned char common_flags; + field:unsigned char common_preempt_count; + field:int common_pid; + + field:char comm[TASK_COMM_LEN]; + field:pid_t pid; + field:int prio; + field:int success; + field:int target_cpu; +---- + +The handler function for this event would be defined as: + +---- +sub sched::sched_wakeup +{ + my ($event_name, $context, $common_cpu, $common_secs, + $common_nsecs, $common_pid, $common_comm, + $comm, $pid, $prio, $success, $target_cpu) = @_; +} +---- + +The handler function takes the form subsystem::event_name. + +The $common_* arguments in the handler's argument list are the set of +arguments passed to all event handlers; some of the fields correspond +to the common_* fields in the format file, but some are synthesized, +and some of the common_* fields aren't common enough to to be passed +to every event as arguments but are available as library functions. + +Here's a brief description of each of the invariant event args: + + $event_name the name of the event as text + $context an opaque 'cookie' used in calls back into perf + $common_cpu the cpu the event occurred on + $common_secs the secs portion of the event timestamp + $common_nsecs the nsecs portion of the event timestamp + $common_pid the pid of the current task + $common_comm the name of the current process + +All of the remaining fields in the event's format file have +counterparts as handler function arguments of the same name, as can be +seen in the example above. + +The above provides the basics needed to directly access every field of +every event in a trace, which covers 90% of what you need to know to +write a useful trace script. The sections below cover the rest. + +SCRIPT LAYOUT +------------- + +Every perf script Perl script should start by setting up a Perl module +search path and 'use'ing a few support modules (see module +descriptions below): + +---- + use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; + use lib "./Perf-Trace-Util/lib"; + use Perf::Trace::Core; + use Perf::Trace::Context; + use Perf::Trace::Util; +---- + +The rest of the script can contain handler functions and support +functions in any order. + +Aside from the event handler functions discussed above, every script +can implement a set of optional functions: + +*trace_begin*, if defined, is called before any event is processed and +gives scripts a chance to do setup tasks: + +---- + sub trace_begin + { + } +---- + +*trace_end*, if defined, is called after all events have been + processed and gives scripts a chance to do end-of-script tasks, such + as display results: + +---- +sub trace_end +{ +} +---- + +*trace_unhandled*, if defined, is called after for any event that + doesn't have a handler explicitly defined for it. The standard set + of common arguments are passed into it: + +---- +sub trace_unhandled +{ + my ($event_name, $context, $common_cpu, $common_secs, + $common_nsecs, $common_pid, $common_comm) = @_; +} +---- + +The remaining sections provide descriptions of each of the available +built-in perf script Perl modules and their associated functions. + +AVAILABLE MODULES AND FUNCTIONS +------------------------------- + +The following sections describe the functions and variables available +via the various Perf::Trace::* Perl modules. To use the functions and +variables from the given module, add the corresponding 'use +Perf::Trace::XXX' line to your perf script script. + +Perf::Trace::Core Module +~~~~~~~~~~~~~~~~~~~~~~~~ + +These functions provide some essential functions to user scripts. + +The *flag_str* and *symbol_str* functions provide human-readable +strings for flag and symbolic fields. These correspond to the strings +and values parsed from the 'print fmt' fields of the event format +files: + + flag_str($event_name, $field_name, $field_value) - returns the string representation corresponding to $field_value for the flag field $field_name of event $event_name + symbol_str($event_name, $field_name, $field_value) - returns the string representation corresponding to $field_value for the symbolic field $field_name of event $event_name + +Perf::Trace::Context Module +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some of the 'common' fields in the event format file aren't all that +common, but need to be made accessible to user scripts nonetheless. + +Perf::Trace::Context defines a set of functions that can be used to +access this data in the context of the current event. Each of these +functions expects a $context variable, which is the same as the +$context variable passed into every event handler as the second +argument. + + common_pc($context) - returns common_preempt count for the current event + common_flags($context) - returns common_flags for the current event + common_lock_depth($context) - returns common_lock_depth for the current event + +Perf::Trace::Util Module +~~~~~~~~~~~~~~~~~~~~~~~~ + +Various utility functions for use with perf script: + + nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair + nsecs_secs($nsecs) - returns whole secs portion given nsecs + nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs + nsecs_str($nsecs) - returns printable string in the form secs.nsecs + avg($total, $n) - returns average given a sum and a total number of values + +SEE ALSO +-------- +linkperf:perf-script[1] diff --git a/kernel/tools/perf/Documentation/perf-script-python.txt b/kernel/tools/perf/Documentation/perf-script-python.txt new file mode 100644 index 000000000..54acba221 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-script-python.txt @@ -0,0 +1,620 @@ +perf-script-python(1) +==================== + +NAME +---- +perf-script-python - Process trace data with a Python script + +SYNOPSIS +-------- +[verse] +'perf script' [-s [Python]:script[.py] ] + +DESCRIPTION +----------- + +This perf script option is used to process perf script data using perf's +built-in Python interpreter. It reads and processes the input file and +displays the results of the trace analysis implemented in the given +Python script, if any. + +A QUICK EXAMPLE +--------------- + +This section shows the process, start to finish, of creating a working +Python script that aggregates and extracts useful information from a +raw perf script stream. You can avoid reading the rest of this +document if an example is enough for you; the rest of the document +provides more details on each step and lists the library functions +available to script writers. + +This example actually details the steps that were used to create the +'syscall-counts' script you see when you list the available perf script +scripts via 'perf script -l'. As such, this script also shows how to +integrate your script into the list of general-purpose 'perf script' +scripts listed by that command. + +The syscall-counts script is a simple script, but demonstrates all the +basic ideas necessary to create a useful script. Here's an example +of its output (syscall names are not yet supported, they will appear +as numbers): + +---- +syscall events: + +event count +---------------------------------------- ----------- +sys_write 455067 +sys_getdents 4072 +sys_close 3037 +sys_swapoff 1769 +sys_read 923 +sys_sched_setparam 826 +sys_open 331 +sys_newfstat 326 +sys_mmap 217 +sys_munmap 216 +sys_futex 141 +sys_select 102 +sys_poll 84 +sys_setitimer 12 +sys_writev 8 +15 8 +sys_lseek 7 +sys_rt_sigprocmask 6 +sys_wait4 3 +sys_ioctl 3 +sys_set_robust_list 1 +sys_exit 1 +56 1 +sys_access 1 +---- + +Basically our task is to keep a per-syscall tally that gets updated +every time a system call occurs in the system. Our script will do +that, but first we need to record the data that will be processed by +that script. Theoretically, there are a couple of ways we could do +that: + +- we could enable every event under the tracing/events/syscalls + directory, but this is over 600 syscalls, well beyond the number + allowable by perf. These individual syscall events will however be + useful if we want to later use the guidance we get from the + general-purpose scripts to drill down and get more detail about + individual syscalls of interest. + +- we can enable the sys_enter and/or sys_exit syscalls found under + tracing/events/raw_syscalls. These are called for all syscalls; the + 'id' field can be used to distinguish between individual syscall + numbers. + +For this script, we only need to know that a syscall was entered; we +don't care how it exited, so we'll use 'perf record' to record only +the sys_enter events: + +---- +# perf record -a -e raw_syscalls:sys_enter + +^C[ perf record: Woken up 1 times to write data ] +[ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ] +---- + +The options basically say to collect data for every syscall event +system-wide and multiplex the per-cpu output into a single stream. +That single stream will be recorded in a file in the current directory +called perf.data. + +Once we have a perf.data file containing our data, we can use the -g +'perf script' option to generate a Python script that will contain a +callback handler for each event type found in the perf.data trace +stream (for more details, see the STARTER SCRIPTS section). + +---- +# perf script -g python +generated Python script: perf-script.py + +The output file created also in the current directory is named +perf-script.py. Here's the file in its entirety: + +# perf script event handlers, generated by perf script -g python +# Licensed under the terms of the GNU GPL License version 2 + +# The common_* event handler fields are the most useful fields common to +# all events. They don't necessarily correspond to the 'common_*' fields +# in the format files. Those fields not available as handler params can +# be retrieved using Python functions of the form common_*(context). +# See the perf-script-python Documentation for the list of available functions. + +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * + +def trace_begin(): + print "in trace_begin" + +def trace_end(): + print "in trace_end" + +def raw_syscalls__sys_enter(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, args): + print_header(event_name, common_cpu, common_secs, common_nsecs, + common_pid, common_comm) + + print "id=%d, args=%s\n" % \ + (id, args), + +def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs, + common_pid, common_comm): + print_header(event_name, common_cpu, common_secs, common_nsecs, + common_pid, common_comm) + +def print_header(event_name, cpu, secs, nsecs, pid, comm): + print "%-20s %5u %05u.%09u %8u %-20s " % \ + (event_name, cpu, secs, nsecs, pid, comm), +---- + +At the top is a comment block followed by some import statements and a +path append which every perf script script should include. + +Following that are a couple generated functions, trace_begin() and +trace_end(), which are called at the beginning and the end of the +script respectively (for more details, see the SCRIPT_LAYOUT section +below). + +Following those are the 'event handler' functions generated one for +every event in the 'perf record' output. The handler functions take +the form subsystem__event_name, and contain named parameters, one for +each field in the event; in this case, there's only one event, +raw_syscalls__sys_enter(). (see the EVENT HANDLERS section below for +more info on event handlers). + +The final couple of functions are, like the begin and end functions, +generated for every script. The first, trace_unhandled(), is called +every time the script finds an event in the perf.data file that +doesn't correspond to any event handler in the script. This could +mean either that the record step recorded event types that it wasn't +really interested in, or the script was run against a trace file that +doesn't correspond to the script. + +The script generated by -g option simply prints a line for each +event found in the trace stream i.e. it basically just dumps the event +and its parameter values to stdout. The print_header() function is +simply a utility function used for that purpose. Let's rename the +script and run it to see the default output: + +---- +# mv perf-script.py syscall-counts.py +# perf script -s syscall-counts.py + +raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args= +raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args= +raw_syscalls__sys_enter 1 00840.847620860 7506 perf id=1, args= +raw_syscalls__sys_enter 1 00840.847710478 6533 npviewer.bin id=78, args= +raw_syscalls__sys_enter 1 00840.847719204 6533 npviewer.bin id=142, args= +raw_syscalls__sys_enter 1 00840.847755445 6533 npviewer.bin id=3, args= +raw_syscalls__sys_enter 1 00840.847775601 6533 npviewer.bin id=3, args= +raw_syscalls__sys_enter 1 00840.847781820 6533 npviewer.bin id=3, args= +. +. +. +---- + +Of course, for this script, we're not interested in printing every +trace event, but rather aggregating it in a useful way. So we'll get +rid of everything to do with printing as well as the trace_begin() and +trace_unhandled() functions, which we won't be using. That leaves us +with this minimalistic skeleton: + +---- +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * + +def trace_end(): + print "in trace_end" + +def raw_syscalls__sys_enter(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, args): +---- + +In trace_end(), we'll simply print the results, but first we need to +generate some results to print. To do that we need to have our +sys_enter() handler do the necessary tallying until all events have +been counted. A hash table indexed by syscall id is a good way to +store that information; every time the sys_enter() handler is called, +we simply increment a count associated with that hash entry indexed by +that syscall id: + +---- + syscalls = autodict() + + try: + syscalls[id] += 1 + except TypeError: + syscalls[id] = 1 +---- + +The syscalls 'autodict' object is a special kind of Python dictionary +(implemented in Core.py) that implements Perl's 'autovivifying' hashes +in Python i.e. with autovivifying hashes, you can assign nested hash +values without having to go to the trouble of creating intermediate +levels if they don't exist e.g syscalls[comm][pid][id] = 1 will create +the intermediate hash levels and finally assign the value 1 to the +hash entry for 'id' (because the value being assigned isn't a hash +object itself, the initial value is assigned in the TypeError +exception. Well, there may be a better way to do this in Python but +that's what works for now). + +Putting that code into the raw_syscalls__sys_enter() handler, we +effectively end up with a single-level dictionary keyed on syscall id +and having the counts we've tallied as values. + +The print_syscall_totals() function iterates over the entries in the +dictionary and displays a line for each entry containing the syscall +name (the dictionary keys contain the syscall ids, which are passed to +the Util function syscall_name(), which translates the raw syscall +numbers to the corresponding syscall name strings). The output is +displayed after all the events in the trace have been processed, by +calling the print_syscall_totals() function from the trace_end() +handler called at the end of script processing. + +The final script producing the output shown above is shown in its +entirety below (syscall_name() helper is not yet available, you can +only deal with id's for now): + +---- +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * +from Util import * + +syscalls = autodict() + +def trace_end(): + print_syscall_totals() + +def raw_syscalls__sys_enter(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, args): + try: + syscalls[id] += 1 + except TypeError: + syscalls[id] = 1 + +def print_syscall_totals(): + if for_comm is not None: + print "\nsyscall events for %s:\n\n" % (for_comm), + else: + print "\nsyscall events:\n\n", + + print "%-40s %10s\n" % ("event", "count"), + print "%-40s %10s\n" % ("----------------------------------------", \ + "-----------"), + + for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \ + reverse = True): + print "%-40s %10d\n" % (syscall_name(id), val), +---- + +The script can be run just as before: + + # perf script -s syscall-counts.py + +So those are the essential steps in writing and running a script. The +process can be generalized to any tracepoint or set of tracepoints +you're interested in - basically find the tracepoint(s) you're +interested in by looking at the list of available events shown by +'perf list' and/or look in /sys/kernel/debug/tracing events for +detailed event and field info, record the corresponding trace data +using 'perf record', passing it the list of interesting events, +generate a skeleton script using 'perf script -g python' and modify the +code to aggregate and display it for your particular needs. + +After you've done that you may end up with a general-purpose script +that you want to keep around and have available for future use. By +writing a couple of very simple shell scripts and putting them in the +right place, you can have your script listed alongside the other +scripts listed by the 'perf script -l' command e.g.: + +---- +root@tropicana:~# perf script -l +List of available trace scripts: + wakeup-latency system-wide min/max/avg wakeup latency + rw-by-file <comm> r/w activity for a program, by file + rw-by-pid system-wide r/w activity +---- + +A nice side effect of doing this is that you also then capture the +probably lengthy 'perf record' command needed to record the events for +the script. + +To have the script appear as a 'built-in' script, you write two simple +scripts, one for recording and one for 'reporting'. + +The 'record' script is a shell script with the same base name as your +script, but with -record appended. The shell script should be put +into the perf/scripts/python/bin directory in the kernel source tree. +In that script, you write the 'perf record' command-line needed for +your script: + +---- +# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record + +#!/bin/bash +perf record -a -e raw_syscalls:sys_enter +---- + +The 'report' script is also a shell script with the same base name as +your script, but with -report appended. It should also be located in +the perf/scripts/python/bin directory. In that script, you write the +'perf script -s' command-line needed for running your script: + +---- +# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report + +#!/bin/bash +# description: system-wide syscall counts +perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py +---- + +Note that the location of the Python script given in the shell script +is in the libexec/perf-core/scripts/python directory - this is where +the script will be copied by 'make install' when you install perf. +For the installation to install your script there, your script needs +to be located in the perf/scripts/python directory in the kernel +source tree: + +---- +# ls -al kernel-source/tools/perf/scripts/python + +root@tropicana:/home/trz/src/tip# ls -al tools/perf/scripts/python +total 32 +drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 . +drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 .. +drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin +-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py +drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util +-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py +---- + +Once you've done that (don't forget to do a new 'make install', +otherwise your script won't show up at run-time), 'perf script -l' +should show a new entry for your script: + +---- +root@tropicana:~# perf script -l +List of available trace scripts: + wakeup-latency system-wide min/max/avg wakeup latency + rw-by-file <comm> r/w activity for a program, by file + rw-by-pid system-wide r/w activity + syscall-counts system-wide syscall counts +---- + +You can now perform the record step via 'perf script record': + + # perf script record syscall-counts + +and display the output using 'perf script report': + + # perf script report syscall-counts + +STARTER SCRIPTS +--------------- + +You can quickly get started writing a script for a particular set of +trace data by generating a skeleton script using 'perf script -g +python' in the same directory as an existing perf.data trace file. +That will generate a starter script containing a handler for each of +the event types in the trace file; it simply prints every available +field for each event in the trace file. + +You can also look at the existing scripts in +~/libexec/perf-core/scripts/python for typical examples showing how to +do basic things like aggregate event data, print results, etc. Also, +the check-perf-script.py script, while not interesting for its results, +attempts to exercise all of the main scripting features. + +EVENT HANDLERS +-------------- + +When perf script is invoked using a trace script, a user-defined +'handler function' is called for each event in the trace. If there's +no handler function defined for a given event type, the event is +ignored (or passed to a 'trace_handled' function, see below) and the +next event is processed. + +Most of the event's field values are passed as arguments to the +handler function; some of the less common ones aren't - those are +available as calls back into the perf executable (see below). + +As an example, the following perf record command can be used to record +all sched_wakeup events in the system: + + # perf record -a -e sched:sched_wakeup + +Traces meant to be processed using a script should be recorded with +the above option: -a to enable system-wide collection. + +The format file for the sched_wakep event defines the following fields +(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): + +---- + format: + field:unsigned short common_type; + field:unsigned char common_flags; + field:unsigned char common_preempt_count; + field:int common_pid; + + field:char comm[TASK_COMM_LEN]; + field:pid_t pid; + field:int prio; + field:int success; + field:int target_cpu; +---- + +The handler function for this event would be defined as: + +---- +def sched__sched_wakeup(event_name, context, common_cpu, common_secs, + common_nsecs, common_pid, common_comm, + comm, pid, prio, success, target_cpu): + pass +---- + +The handler function takes the form subsystem__event_name. + +The common_* arguments in the handler's argument list are the set of +arguments passed to all event handlers; some of the fields correspond +to the common_* fields in the format file, but some are synthesized, +and some of the common_* fields aren't common enough to to be passed +to every event as arguments but are available as library functions. + +Here's a brief description of each of the invariant event args: + + event_name the name of the event as text + context an opaque 'cookie' used in calls back into perf + common_cpu the cpu the event occurred on + common_secs the secs portion of the event timestamp + common_nsecs the nsecs portion of the event timestamp + common_pid the pid of the current task + common_comm the name of the current process + +All of the remaining fields in the event's format file have +counterparts as handler function arguments of the same name, as can be +seen in the example above. + +The above provides the basics needed to directly access every field of +every event in a trace, which covers 90% of what you need to know to +write a useful trace script. The sections below cover the rest. + +SCRIPT LAYOUT +------------- + +Every perf script Python script should start by setting up a Python +module search path and 'import'ing a few support modules (see module +descriptions below): + +---- + import os + import sys + + sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + + from perf_trace_context import * + from Core import * +---- + +The rest of the script can contain handler functions and support +functions in any order. + +Aside from the event handler functions discussed above, every script +can implement a set of optional functions: + +*trace_begin*, if defined, is called before any event is processed and +gives scripts a chance to do setup tasks: + +---- +def trace_begin: + pass +---- + +*trace_end*, if defined, is called after all events have been + processed and gives scripts a chance to do end-of-script tasks, such + as display results: + +---- +def trace_end: + pass +---- + +*trace_unhandled*, if defined, is called after for any event that + doesn't have a handler explicitly defined for it. The standard set + of common arguments are passed into it: + +---- +def trace_unhandled(event_name, context, common_cpu, common_secs, + common_nsecs, common_pid, common_comm): + pass +---- + +The remaining sections provide descriptions of each of the available +built-in perf script Python modules and their associated functions. + +AVAILABLE MODULES AND FUNCTIONS +------------------------------- + +The following sections describe the functions and variables available +via the various perf script Python modules. To use the functions and +variables from the given module, add the corresponding 'from XXXX +import' line to your perf script script. + +Core.py Module +~~~~~~~~~~~~~~ + +These functions provide some essential functions to user scripts. + +The *flag_str* and *symbol_str* functions provide human-readable +strings for flag and symbolic fields. These correspond to the strings +and values parsed from the 'print fmt' fields of the event format +files: + + flag_str(event_name, field_name, field_value) - returns the string representation corresponding to field_value for the flag field field_name of event event_name + symbol_str(event_name, field_name, field_value) - returns the string representation corresponding to field_value for the symbolic field field_name of event event_name + +The *autodict* function returns a special kind of Python +dictionary that implements Perl's 'autovivifying' hashes in Python +i.e. with autovivifying hashes, you can assign nested hash values +without having to go to the trouble of creating intermediate levels if +they don't exist. + + autodict() - returns an autovivifying dictionary instance + + +perf_trace_context Module +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some of the 'common' fields in the event format file aren't all that +common, but need to be made accessible to user scripts nonetheless. + +perf_trace_context defines a set of functions that can be used to +access this data in the context of the current event. Each of these +functions expects a context variable, which is the same as the +context variable passed into every event handler as the second +argument. + + common_pc(context) - returns common_preempt count for the current event + common_flags(context) - returns common_flags for the current event + common_lock_depth(context) - returns common_lock_depth for the current event + +Util.py Module +~~~~~~~~~~~~~~ + +Various utility functions for use with perf script: + + nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair + nsecs_secs(nsecs) - returns whole secs portion given nsecs + nsecs_nsecs(nsecs) - returns nsecs remainder given nsecs + nsecs_str(nsecs) - returns printable string in the form secs.nsecs + avg(total, n) - returns average given a sum and a total number of values + +SEE ALSO +-------- +linkperf:perf-script[1] diff --git a/kernel/tools/perf/Documentation/perf-script.txt b/kernel/tools/perf/Documentation/perf-script.txt new file mode 100644 index 000000000..79445750f --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-script.txt @@ -0,0 +1,227 @@ +perf-script(1) +============= + +NAME +---- +perf-script - Read perf.data (created by perf record) and display trace output + +SYNOPSIS +-------- +[verse] +'perf script' [<options>] +'perf script' [<options>] record <script> [<record-options>] <command> +'perf script' [<options>] report <script> [script-args] +'perf script' [<options>] <script> <required-script-args> [<record-options>] <command> +'perf script' [<options>] <top-script> [script-args] + +DESCRIPTION +----------- +This command reads the input file and displays the trace recorded. + +There are several variants of perf script: + + 'perf script' to see a detailed trace of the workload that was + recorded. + + You can also run a set of pre-canned scripts that aggregate and + summarize the raw trace data in various ways (the list of scripts is + available via 'perf script -l'). The following variants allow you to + record and run those scripts: + + 'perf script record <script> <command>' to record the events required + for 'perf script report'. <script> is the name displayed in the + output of 'perf script --list' i.e. the actual script name minus any + language extension. If <command> is not specified, the events are + recorded using the -a (system-wide) 'perf record' option. + + 'perf script report <script> [args]' to run and display the results + of <script>. <script> is the name displayed in the output of 'perf + trace --list' i.e. the actual script name minus any language + extension. The perf.data output from a previous run of 'perf script + record <script>' is used and should be present for this command to + succeed. [args] refers to the (mainly optional) args expected by + the script. + + 'perf script <script> <required-script-args> <command>' to both + record the events required for <script> and to run the <script> + using 'live-mode' i.e. without writing anything to disk. <script> + is the name displayed in the output of 'perf script --list' i.e. the + actual script name minus any language extension. If <command> is + not specified, the events are recorded using the -a (system-wide) + 'perf record' option. If <script> has any required args, they + should be specified before <command>. This mode doesn't allow for + optional script args to be specified; if optional script args are + desired, they can be specified using separate 'perf script record' + and 'perf script report' commands, with the stdout of the record step + piped to the stdin of the report script, using the '-o -' and '-i -' + options of the corresponding commands. + + 'perf script <top-script>' to both record the events required for + <top-script> and to run the <top-script> using 'live-mode' + i.e. without writing anything to disk. <top-script> is the name + displayed in the output of 'perf script --list' i.e. the actual + script name minus any language extension; a <top-script> is defined + as any script name ending with the string 'top'. + + [<record-options>] can be passed to the record steps of 'perf script + record' and 'live-mode' variants; this isn't possible however for + <top-script> 'live-mode' or 'perf script report' variants. + + See the 'SEE ALSO' section for links to language-specific + information on how to write and run your own trace scripts. + +OPTIONS +------- +<command>...:: + Any command you can specify in a shell. + +-D:: +--dump-raw-script=:: + Display verbose dump of the trace data. + +-L:: +--Latency=:: + Show latency attributes (irqs/preemption disabled, etc). + +-l:: +--list=:: + Display a list of available trace scripts. + +-s ['lang']:: +--script=:: + Process trace data with the given script ([lang]:script[.ext]). + If the string 'lang' is specified in place of a script name, a + list of supported languages will be displayed instead. + +-g:: +--gen-script=:: + Generate perf-script.[ext] starter script for given language, + using current perf.data. + +-a:: + Force system-wide collection. Scripts run without a <command> + normally use -a by default, while scripts run with a <command> + normally don't - this option allows the latter to be run in + system-wide mode. + +-i:: +--input=:: + Input file name. (default: perf.data unless stdin is a fifo) + +-d:: +--debug-mode:: + Do various checks like samples ordering and lost events. + +-f:: +--fields:: + Comma separated list of fields to print. Options are: + comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period. + Field list can be prepended with the type, trace, sw or hw, + to indicate to which event type the field list applies. + e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace + + perf script -f <fields> + + is equivalent to: + + perf script -f trace:<fields> -f sw:<fields> -f hw:<fields> + + i.e., the specified fields apply to all event types if the type string + is not given. + + The arguments are processed in the order received. A later usage can + reset a prior request. e.g.: + + -f trace: -f comm,tid,time,ip,sym + + The first -f suppresses trace events (field list is ""), but then the + second invocation sets the fields to comm,tid,time,ip,sym. In this case a + warning is given to the user: + + "Overriding previous field request for all events." + + Alternatively, consider the order: + + -f comm,tid,time,ip,sym -f trace: + + The first -f sets the fields for all events and the second -f + suppresses trace events. The user is given a warning message about + the override, and the result of the above is that only S/W and H/W + events are displayed with the given fields. + + For the 'wildcard' option if a user selected field is invalid for an + event type, a message is displayed to the user that the option is + ignored for that type. For example: + + $ perf script -f comm,tid,trace + 'trace' not valid for hardware events. Ignoring. + 'trace' not valid for software events. Ignoring. + + Alternatively, if the type is given an invalid field is specified it + is an error. For example: + + perf script -v -f sw:comm,tid,trace + 'trace' not valid for software events. + + At this point usage is displayed, and perf-script exits. + + Finally, a user may not set fields to none for all event types. + i.e., -f "" is not allowed. + +-k:: +--vmlinux=<file>:: + vmlinux pathname + +--kallsyms=<file>:: + kallsyms pathname + +--symfs=<directory>:: + Look for files with symbols relative to this directory. + +-G:: +--hide-call-graph:: + When printing symbols do not display call chain. + +-C:: +--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can + be provided as a comma-separated list with no space: 0,1. Ranges of + CPUs are specified with -: 0-2. Default is to report samples on all + CPUs. + +-c:: +--comms=:: + Only display events for these comms. CSV that understands + file://filename entries. + +--pid=:: + Only show events for given process ID (comma separated list). + +--tid=:: + Only show events for given thread ID (comma separated list). + +-I:: +--show-info:: + Display extended information about the perf.data file. This adds + information which may be very large and thus may clutter the display. + It currently includes: cpu and numa topology of the host system. + It can only be used with the perf script report mode. + +--show-kernel-path:: + Try to resolve the path of [kernel.kallsyms] + +--show-task-events + Display task related events (e.g. FORK, COMM, EXIT). + +--show-mmap-events + Display mmap related events (e.g. MMAP, MMAP2). + +--header + Show perf.data header. + +--header-only + Show only perf.data header. + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-script-perl[1], +linkperf:perf-script-python[1] diff --git a/kernel/tools/perf/Documentation/perf-stat.txt b/kernel/tools/perf/Documentation/perf-stat.txt new file mode 100644 index 000000000..04e150d83 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-stat.txt @@ -0,0 +1,177 @@ +perf-stat(1) +============ + +NAME +---- +perf-stat - Run a command and gather performance counter statistics + +SYNOPSIS +-------- +[verse] +'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command> +'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>] + +DESCRIPTION +----------- +This command runs a command and gathers performance counter statistics +from it. + + +OPTIONS +------- +<command>...:: + Any command you can specify in a shell. + + +-e:: +--event=:: + Select the PMU event. Selection can be: + + - a symbolic event name (use 'perf list' to list all events) + + - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. + + - a symbolically formed event like 'pmu/param1=0x3,param2/' where + param1 and param2 are defined as formats for the PMU in + /sys/bus/event_sources/devices/<pmu>/format/* + + - a symbolically formed event like 'pmu/config=M,config1=N,config2=K/' + where M, N, K are numbers (in decimal, hex, octal format). + Acceptable values for each of 'config', 'config1' and 'config2' + parameters are defined by corresponding entries in + /sys/bus/event_sources/devices/<pmu>/format/* + +-i:: +--no-inherit:: + child tasks do not inherit counters +-p:: +--pid=<pid>:: + stat events on existing process id (comma separated list) + +-t:: +--tid=<tid>:: + stat events on existing thread id (comma separated list) + + +-a:: +--all-cpus:: + system-wide collection from all CPUs + +-c:: +--scale:: + scale/normalize counter values + +-r:: +--repeat=<n>:: + repeat command and print average + stddev (max: 100). 0 means forever. + +-B:: +--big-num:: + print large numbers with thousands' separators according to locale + +-C:: +--cpu=:: +Count only on the list of CPUs provided. Multiple CPUs can be provided as a +comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. +In per-thread mode, this option is ignored. The -a option is still necessary +to activate system-wide monitoring. Default is to count on all CPUs. + +-A:: +--no-aggr:: +Do not aggregate counts across all monitored CPUs in system-wide mode (-a). +This option is only valid in system-wide mode. + +-n:: +--null:: + null run - don't start any counters + +-v:: +--verbose:: + be more verbose (show counter open errors, etc) + +-x SEP:: +--field-separator SEP:: +print counts using a CSV-style output to make it easy to import directly into +spreadsheets. Columns are separated by the string specified in SEP. + +-G name:: +--cgroup name:: +monitor only in the container (cgroup) called "name". This option is available only +in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to +container "name" are monitored when they run on the monitored CPUs. Multiple cgroups +can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup +to first event, second cgroup to second event and so on. It is possible to provide +an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have +corresponding events, i.e., they always refer to events defined earlier on the command +line. + +-o file:: +--output file:: +Print the output into the designated file. + +--append:: +Append to the output file designated with the -o option. Ignored if -o is not specified. + +--log-fd:: + +Log output to fd, instead of stderr. Complementary to --output, and mutually exclusive +with it. --append may be used here. Examples: + 3>results perf stat --log-fd 3 -- $cmd + 3>>results perf stat --log-fd 3 --append -- $cmd + +--pre:: +--post:: + Pre and post measurement hooks, e.g.: + +perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage + +-I msecs:: +--interval-print msecs:: + Print count deltas every N milliseconds (minimum: 100ms) + example: perf stat -I 1000 -e cycles -a sleep 5 + +--per-socket:: +Aggregate counts per processor socket for system-wide mode measurements. This +is a useful mode to detect imbalance between sockets. To enable this mode, +use --per-socket in addition to -a. (system-wide). The output includes the +socket number and the number of online processors on that socket. This is +useful to gauge the amount of aggregation. + +--per-core:: +Aggregate counts per physical processor for system-wide mode measurements. This +is a useful mode to detect imbalance between physical cores. To enable this mode, +use --per-core in addition to -a. (system-wide). The output includes the +core number and the number of online logical processors on that physical processor. + +-D msecs:: +--delay msecs:: +After starting the program, wait msecs before measuring. This is useful to +filter out the startup phase of the program, which is often very different. + +-T:: +--transaction:: + +Print statistics of transactional execution if supported. + +EXAMPLES +-------- + +$ perf stat -- make -j + + Performance counter stats for 'make -j': + + 8117.370256 task clock ticks # 11.281 CPU utilization factor + 678 context switches # 0.000 M/sec + 133 CPU migrations # 0.000 M/sec + 235724 pagefaults # 0.029 M/sec + 24821162526 CPU cycles # 3057.784 M/sec + 18687303457 instructions # 2302.138 M/sec + 172158895 cache references # 21.209 M/sec + 27075259 cache misses # 3.335 M/sec + + Wall-clock time elapsed: 719.554352 msecs + +SEE ALSO +-------- +linkperf:perf-top[1], linkperf:perf-list[1] diff --git a/kernel/tools/perf/Documentation/perf-test.txt b/kernel/tools/perf/Documentation/perf-test.txt new file mode 100644 index 000000000..31a5c3ea7 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-test.txt @@ -0,0 +1,32 @@ +perf-test(1) +============ + +NAME +---- +perf-test - Runs sanity tests. + +SYNOPSIS +-------- +[verse] +'perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]' + +DESCRIPTION +----------- +This command does assorted sanity tests, initially through linked routines but +also will look for a directory with more tests in the form of scripts. + +To get a list of available tests use 'perf test list', specifying a test name +fragment will show all tests that have it. + +To run just specific tests, inform test name fragments or the numbers obtained +from 'perf test list'. + +OPTIONS +------- +-s:: +--skip:: + Tests to skip (comma separated numeric list). + +-v:: +--verbose:: + Be more verbose. diff --git a/kernel/tools/perf/Documentation/perf-timechart.txt b/kernel/tools/perf/Documentation/perf-timechart.txt new file mode 100644 index 000000000..df98d1c82 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-timechart.txt @@ -0,0 +1,126 @@ +perf-timechart(1) +================= + +NAME +---- +perf-timechart - Tool to visualize total system behavior during a workload + +SYNOPSIS +-------- +[verse] +'perf timechart' [<timechart options>] {record} [<record options>] + +DESCRIPTION +----------- +There are two variants of perf timechart: + + 'perf timechart record <command>' to record the system level events + of an arbitrary workload. By default timechart records only scheduler + and CPU events (task switches, running times, CPU power states, etc), + but it's possible to record IO (disk, network) activity using -I argument. + + 'perf timechart' to turn a trace into a Scalable Vector Graphics file, + that can be viewed with popular SVG viewers such as 'Inkscape'. Depending + on the events in the perf.data file, timechart will contain scheduler/cpu + events or IO events. + + In IO mode, every bar has two charts: upper and lower. + Upper bar shows incoming events (disk reads, ingress network packets). + Lower bar shows outgoing events (disk writes, egress network packets). + There are also poll bars which show how much time application spent + in poll/epoll/select syscalls. + +TIMECHART OPTIONS +----------------- +-o:: +--output=:: + Select the output file (default: output.svg) +-i:: +--input=:: + Select the input file (default: perf.data unless stdin is a fifo) +-w:: +--width=:: + Select the width of the SVG file (default: 1000) +-P:: +--power-only:: + Only output the CPU power section of the diagram +-T:: +--tasks-only:: + Don't output processor state transitions +-p:: +--process:: + Select the processes to display, by name or PID + +--symfs=<directory>:: + Look for files with symbols relative to this directory. +-n:: +--proc-num:: + Print task info for at least given number of tasks. +-t:: +--topology:: + Sort CPUs according to topology. +--highlight=<duration_nsecs|task_name>:: + Highlight tasks (using different color) that run more than given + duration or tasks with given name. If number is given it's interpreted + as number of nanoseconds. If non-numeric string is given it's + interpreted as task name. +--io-skip-eagain:: + Don't draw EAGAIN IO events. +--io-min-time=<nsecs>:: + Draw small events as if they lasted min-time. Useful when you need + to see very small and fast IO. It's possible to specify ms or us + suffix to specify time in milliseconds or microseconds. + Default value is 1ms. +--io-merge-dist=<nsecs>:: + Merge events that are merge-dist nanoseconds apart. + Reduces number of figures on the SVG and makes it more render-friendly. + It's possible to specify ms or us suffix to specify time in + milliseconds or microseconds. + Default value is 1us. + +RECORD OPTIONS +-------------- +-P:: +--power-only:: + Record only power-related events +-T:: +--tasks-only:: + Record only tasks-related events +-I:: +--io-only:: + Record only io-related events +-g:: +--callchain:: + Do call-graph (stack chain/backtrace) recording + +EXAMPLES +-------- + +$ perf timechart record git pull + + [ perf record: Woken up 13 times to write data ] + [ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ] + +$ perf timechart + + Written 10.2 seconds of trace to output.svg. + +Record system-wide timechart: + + $ perf timechart record + + then generate timechart and highlight 'gcc' tasks: + + $ perf timechart --highlight gcc + +Record system-wide IO events: + + $ perf timechart record -I + + then generate timechart: + + $ perf timechart + +SEE ALSO +-------- +linkperf:perf-record[1] diff --git a/kernel/tools/perf/Documentation/perf-top.txt b/kernel/tools/perf/Documentation/perf-top.txt new file mode 100644 index 000000000..3265b1070 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-top.txt @@ -0,0 +1,240 @@ +perf-top(1) +=========== + +NAME +---- +perf-top - System profiling tool. + +SYNOPSIS +-------- +[verse] +'perf top' [-e <EVENT> | --event=EVENT] [<options>] + +DESCRIPTION +----------- +This command generates and displays a performance counter profile in real time. + + +OPTIONS +------- +-a:: +--all-cpus:: + System-wide collection. (default) + +-c <count>:: +--count=<count>:: + Event period to sample. + +-C <cpu-list>:: +--cpu=<cpu>:: +Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a +comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. +Default is to monitor all CPUS. + +-d <seconds>:: +--delay=<seconds>:: + Number of seconds to delay between refreshes. + +-e <event>:: +--event=<event>:: + Select the PMU event. Selection can be a symbolic event name + (use 'perf list' to list all events) or a raw PMU + event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. + +-E <entries>:: +--entries=<entries>:: + Display this many functions. + +-f <count>:: +--count-filter=<count>:: + Only display functions with more events than this. + +--group:: + Put the counters into a counter group. + +-F <freq>:: +--freq=<freq>:: + Profile at this frequency. + +-i:: +--inherit:: + Child tasks do not inherit counters. + +-k <path>:: +--vmlinux=<path>:: + Path to vmlinux. Required for annotation functionality. + +-m <pages>:: +--mmap-pages=<pages>:: + Number of mmap data pages (must be a power of two) or size + specification with appended unit character - B/K/M/G. The + size is rounded up to have nearest pages power of two value. + +-p <pid>:: +--pid=<pid>:: + Profile events on existing Process ID (comma separated list). + +-t <tid>:: +--tid=<tid>:: + Profile events on existing thread ID (comma separated list). + +-u:: +--uid=:: + Record events in threads owned by uid. Name or number. + +-r <priority>:: +--realtime=<priority>:: + Collect data with this RT SCHED_FIFO priority. + +--sym-annotate=<symbol>:: + Annotate this symbol. + +-K:: +--hide_kernel_symbols:: + Hide kernel symbols. + +-U:: +--hide_user_symbols:: + Hide user symbols. + +--demangle-kernel:: + Demangle kernel symbols. + +-D:: +--dump-symtab:: + Dump the symbol table used for profiling. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +-z:: +--zero:: + Zero history across display updates. + +-s:: +--sort:: + Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight, + local_weight, abort, in_tx, transaction, overhead, sample, period. + Please see description of --sort in the perf-report man page. + +--fields=:: + Specify output field - multiple keys can be specified in CSV format. + Following fields are available: + overhead, overhead_sys, overhead_us, overhead_children, sample and period. + Also it can contain any sort key(s). + + By default, every sort keys not specified in --field will be appended + automatically. + +-n:: +--show-nr-samples:: + Show a column with the number of samples. + +--show-total-period:: + Show a column with the sum of periods. + +--dsos:: + Only consider symbols in these dsos. This option will affect the + percentage of the overhead column. See --percentage for more info. + +--comms:: + Only consider symbols in these comms. This option will affect the + percentage of the overhead column. See --percentage for more info. + +--symbols:: + Only consider these symbols. This option will affect the + percentage of the overhead column. See --percentage for more info. + +-M:: +--disassembler-style=:: Set disassembler style for objdump. + +--source:: + Interleave source code with assembly code. Enabled by default, + disable with --no-source. + +--asm-raw:: + Show raw instruction encoding of assembly instructions. + +-g:: + Enables call-graph (stack chain/backtrace) recording. + +--call-graph:: + Setup and enable call-graph (stack chain/backtrace) recording, + implies -g. + +--children:: + Accumulate callchain of children to parent entry so that then can + show up in the output. The output will have a new "Children" column + and will be sorted on the data. It requires -g/--call-graph option + enabled. + +--max-stack:: + Set the stack depth limit when parsing the callchain, anything + beyond the specified depth will be ignored. This is a trade-off + between information loss and faster processing especially for + workloads that can have a very long callchain stack. + + Default: 127 + +--ignore-callees=<regex>:: + Ignore callees of the function(s) matching the given regex. + This has the effect of collecting the callers of each such + function into one place in the call-graph tree. + +--percent-limit:: + Do not show entries which have an overhead under that percent. + (Default: 0). + +--percentage:: + Determine how to display the overhead percentage of filtered entries. + Filters can be applied by --comms, --dsos and/or --symbols options and + Zoom operations on the TUI (thread, dso, etc). + + "relative" means it's relative to filtered entries only so that the + sum of shown entries will be always 100%. "absolute" means it retains + the original value before and after the filter is applied. + +-w:: +--column-widths=<width[,width...]>:: + Force each column width to the provided list, for large terminal + readability. 0 means no limit (default behavior). + + +INTERACTIVE PROMPTING KEYS +-------------------------- + +[d]:: + Display refresh delay. + +[e]:: + Number of entries to display. + +[E]:: + Event to display when multiple counters are active. + +[f]:: + Profile display filter (>= hit count). + +[F]:: + Annotation display filter (>= % of total). + +[s]:: + Annotate symbol. + +[S]:: + Stop annotation, return to full profile display. + +[z]:: + Toggle event count zeroing across display updates. + +[qQ]:: + Quit. + +Pressing any unmapped key displays a menu, and prompts for input. + + +SEE ALSO +-------- +linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-report[1] diff --git a/kernel/tools/perf/Documentation/perf-trace.txt b/kernel/tools/perf/Documentation/perf-trace.txt new file mode 100644 index 000000000..ba03fd5d1 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf-trace.txt @@ -0,0 +1,164 @@ +perf-trace(1) +============= + +NAME +---- +perf-trace - strace inspired tool + +SYNOPSIS +-------- +[verse] +'perf trace' +'perf trace record' + +DESCRIPTION +----------- +This command will show the events associated with the target, initially +syscalls, but other system events like pagefaults, task lifetime events, +scheduling events, etc. + +This is a live mode tool in addition to working with perf.data files like +the other perf tools. Files can be generated using the 'perf record' command +but the session needs to include the raw_syscalls events (-e 'raw_syscalls:*'). +Alternatively, 'perf trace record' can be used as a shortcut to +automatically include the raw_syscalls events when writing events to a file. + +The following options apply to perf trace; options to perf trace record are +found in the perf record man page. + +OPTIONS +------- + +-a:: +--all-cpus:: + System-wide collection from all CPUs. + +-e:: +--expr:: + List of events to show, currently only syscall names. + Prefixing with ! shows all syscalls but the ones specified. You may + need to escape it. + +-o:: +--output=:: + Output file name. + +-p:: +--pid=:: + Record events on existing process ID (comma separated list). + +-t:: +--tid=:: + Record events on existing thread ID (comma separated list). + +-u:: +--uid=:: + Record events in threads owned by uid. Name or number. + +--filter-pids=:: + Filter out events for these pids and for 'trace' itself (comma separated list). + +-v:: +--verbose=:: + Verbosity level. + +-i:: +--no-inherit:: + Child tasks do not inherit counters. + +-m:: +--mmap-pages=:: + Number of mmap data pages (must be a power of two) or size + specification with appended unit character - B/K/M/G. The + size is rounded up to have nearest pages power of two value. + +-C:: +--cpu:: +Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a +comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. +In per-thread mode with inheritance mode on (default), Events are captured only when +the thread executes on the designated CPUs. Default is to monitor all CPUs. + +--duration: + Show only events that had a duration greater than N.M ms. + +--sched: + Accrue thread runtime and provide a summary at the end of the session. + +-i +--input + Process events from a given perf data file. + +-T +--time + Print full timestamp rather time relative to first sample. + +--comm:: + Show process COMM right beside its ID, on by default, disable with --no-comm. + +-s:: +--summary:: + Show only a summary of syscalls by thread with min, max, and average times + (in msec) and relative stddev. + +-S:: +--with-summary:: + Show all syscalls followed by a summary by thread with min, max, and + average times (in msec) and relative stddev. + +--tool_stats:: + Show tool stats such as number of times fd->pathname was discovered thru + hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc. + +-F=[all|min|maj]:: +--pf=[all|min|maj]:: + Trace pagefaults. Optionally, you can specify whether you want minor, + major or all pagefaults. Default value is maj. + +--syscalls:: + Trace system calls. This options is enabled by default. + +--event:: + Trace other events, see 'perf list' for a complete list. + +PAGEFAULTS +---------- + +When tracing pagefaults, the format of the trace is as follows: + +<min|maj>fault [<ip.symbol>+<ip.offset>] => <addr.dso@addr.offset> (<map type><addr level>). + +- min/maj indicates whether fault event is minor or major; +- ip.symbol shows symbol for instruction pointer (the code that generated the + fault); if no debug symbols available, perf trace will print raw IP; +- addr.dso shows DSO for the faulted address; +- map type is either 'd' for non-executable maps or 'x' for executable maps; +- addr level is either 'k' for kernel dso or '.' for user dso. + +For symbols resolution you may need to install debugging symbols. + +Please be aware that duration is currently always 0 and doesn't reflect actual +time it took for fault to be handled! + +When --verbose specified, perf trace tries to print all available information +for both IP and fault address in the form of dso@symbol+offset. + +EXAMPLES +-------- + +Trace only major pagefaults: + + $ perf trace --no-syscalls -F + +Trace syscalls, major and minor pagefaults: + + $ perf trace -F all + + 1416.547 ( 0.000 ms): python/20235 majfault [CRYPTO_push_info_+0x0] => /lib/x86_64-linux-gnu/libcrypto.so.1.0.0@0x61be0 (x.) + + As you can see, there was major pagefault in python process, from + CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so. + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/kernel/tools/perf/Documentation/perf.txt b/kernel/tools/perf/Documentation/perf.txt new file mode 100644 index 000000000..2b1317763 --- /dev/null +++ b/kernel/tools/perf/Documentation/perf.txt @@ -0,0 +1,41 @@ +perf(1) +======= + +NAME +---- +perf - Performance analysis tools for Linux + +SYNOPSIS +-------- +[verse] +'perf' [--version] [--help] [OPTIONS] COMMAND [ARGS] + +OPTIONS +------- +--debug:: + Setup debug variable (see list below) in value + range (0, 10). Use like: + --debug verbose # sets verbose = 1 + --debug verbose=2 # sets verbose = 2 + + List of debug variables allowed to set: + verbose - general debug messages + ordered-events - ordered events object debug messages + data-convert - data convert command debug messages + +--buildid-dir:: + Setup buildid cache directory. It has higher priority than + buildid.dir config file option. + +DESCRIPTION +----------- +Performance counters for Linux are a new kernel-based subsystem +that provide a framework for all things performance analysis. It +covers hardware level (CPU/PMU, Performance Monitoring Unit) features +and software features (software counters, tracepoints) as well. + +SEE ALSO +-------- +linkperf:perf-stat[1], linkperf:perf-top[1], +linkperf:perf-record[1], linkperf:perf-report[1], +linkperf:perf-list[1] diff --git a/kernel/tools/perf/Documentation/perfconfig.example b/kernel/tools/perf/Documentation/perfconfig.example new file mode 100644 index 000000000..767ea2436 --- /dev/null +++ b/kernel/tools/perf/Documentation/perfconfig.example @@ -0,0 +1,29 @@ +[colors] + + # These were the old defaults + top = red, lightgray + medium = green, lightgray + normal = black, lightgray + selected = lightgray, magenta + code = blue, lightgray + addr = magenta, lightgray + +[tui] + + # Defaults if linked with libslang + report = on + annotate = on + top = on + +[buildid] + + # Default, disable using /dev/null + dir = /root/.debug + +[annotate] + + # Defaults + hide_src_code = false + use_offset = true + jump_arrows = true + show_nr_jumps = false |