From 83c84e78c9f8edac83884f295baf32022e99f28a Mon Sep 17 00:00:00 2001 From: Brendan Gregg Date: Mon, 8 Sep 2014 15:00:58 -0700 Subject: [PATCH] improve error path, and minor adjustments. --- examples/tcpretrans_example.txt | 18 +++++ man/man8/tcpretrans.8 | 14 ++-- net/tcpretrans | 122 ++++++++++++++++++++------------ 3 files changed, 101 insertions(+), 53 deletions(-) diff --git a/examples/tcpretrans_example.txt b/examples/tcpretrans_example.txt index f91f51a..c849357 100644 --- a/examples/tcpretrans_example.txt +++ b/examples/tcpretrans_example.txt @@ -65,6 +65,24 @@ This looks like a fast retransmit (inclusion of tcp_fastretrans_alert(), and being based on receiving an ACK, rather than a timer). +The -l option will include TCP tail loss probe events (TLP; see +http://lwn.net/Articles/542642/). Eg: + +# ./tcpretrans -l +TIME PID LADDR:LPORT -- RADDR:RPORT STATE +21:56:06 0 10.100.155.200:22 R> 10.10.237.72:18554 LAST_ACK +21:56:08 0 10.100.155.200:22 R> 10.10.237.72:18554 LAST_ACK +21:56:10 16452 10.100.155.200:22 R> 10.10.237.72:18554 LAST_ACK +21:56:10 0 10.100.155.200:22 L> 10.10.237.72:46408 LAST_ACK +21:56:10 0 10.100.155.200:22 R> 10.10.237.72:46408 LAST_ACK +21:56:12 0 10.100.155.200:22 R> 10.10.237.72:46408 LAST_ACK +21:56:13 0 10.100.155.200:22 R> 10.10.237.72:46408 LAST_ACK +^C +Ending tracing... + +Look for "L>" in the type column ("--") for TLP events. + + Use -h to print the USAGE message: # ./tcpretrans -h diff --git a/man/man8/tcpretrans.8 b/man/man8/tcpretrans.8 index 9a8efd8..677d25e 100644 --- a/man/man8/tcpretrans.8 +++ b/man/man8/tcpretrans.8 @@ -19,7 +19,12 @@ modifications to work on different kernels and platforms. This would be better written using a tracer such as SystemTap, and will likely be rewritten in the future when certain tracing features are added to the Linux kernel. -Currently only IPv4 is supported. +When \-l is used, this also uses dynamic tracing of tcp_send_loss_probe() and +a register. + +Currently only IPv4 is supported, on x86_64. If you try this on a different +architecture, you'll likely need to adjust the register locations (search +for %di). Since this uses ftrace, only the root user can use this tool. .SH REQUIREMENTS @@ -34,7 +39,7 @@ Print usage message. \-s Include kernel stack traces. .TP -\-p +\-l Include TCP tail loss probes. .SH EXAMPLES .TP @@ -56,7 +61,7 @@ LPORT Local port. .TP \-\- -Packet type: "R>" for retransmit. +Packet type: "R>" for retransmit, and "L>" for tail loss probe. .TP RADDR Remote address. @@ -66,9 +71,6 @@ Remote port. .TP STATE TCP session state. -.TP -TLP -Tail loss probe: "Y/N". .SH OVERHEAD The CPU overhead is relative to the rate of TCP retransmits, and is designed to be low as this does not examine every packet. Once per second the diff --git a/net/tcpretrans b/net/tcpretrans index f0a64ae..1d9d553 100755 --- a/net/tcpretrans +++ b/net/tcpretrans @@ -8,10 +8,11 @@ # sent by the kernel on timeouts). To keep overhead low, only # tcp_retransmit_skb() calls are traced (this does not trace every packet). # -# USAGE: ./tcpretrans [-hsp] +# USAGE: ./tcpretrans [-hls] # -# REQUIREMENTS: FTRACE and KPROBE CONFIG, tcp_retransmit_skb() kernel function. -# You may have these already have these on recent kernels. And Perl. +# REQUIREMENTS: FTRACE and KPROBE CONFIG, tcp_retransmit_skb() kernel function, +# and tcp_send_loss_probe() when -l is used. You may have these already have +# these on recent kernels. And Perl. # # This was written as a proof of concept for ftrace, for older Linux systems, # and without kernel debuginfo. It uses dynamic tracing of tcp_retransmit_skb(), @@ -21,7 +22,12 @@ # written using a tracer such as SystemTap, and will likely be rewritten in the # future when certain tracing features are added to the Linux kernel. # -# Currently only IPv4 is supported. +# When -l is used, this also uses dynamic tracing of tcp_send_loss_probe() and +# a register. +# +# Currently only IPv4 is supported, on x86_64. If you try this on a different +# architecture, you'll likely need to adjust the register locations (search +# for %di). # # OVERHEAD: The CPU overhead is relative to the rate of TCP retransmits, and is # designed to be low as this does not examine every packet. Once per second the @@ -70,25 +76,34 @@ $| = 1; my ($help, $stacks, $tlp); GetOptions("help|h" => \$help, "stacks|s" => \$stacks, - "tlp|p" => \$tlp) + "tlp|l" => \$tlp) or usage(); usage() if $help; sub usage { - print STDERR "USAGE: tcpretrans [-hsp]\n"; - print STDERR " -h # help message\n"; - print STDERR " -s # print stack traces\n"; - print STDERR " -p # trace TCP tail loss probes\n"; + print STDERR "USAGE: tcpretrans [-hls]\n"; + print STDERR " -h # help message\n"; + print STDERR " -l # trace TCP tail loss probes\n"; + print STDERR " -s # print stack traces\n"; print STDERR " eg,\n"; - print STDERR " tcpretrans # trace TCP retransmits\n"; + print STDERR " tcpretrans # trace TCP retransmits\n"; exit; } +# delete lock and die sub ldie { unlink $flock; die @_; } +# end tracing (silently) and die +sub edie { + print STDERR "@_\n"; + close STDOUT; + close STDERR; + cleanup(); +} + sub writeto { my ($string, $file) = @_; open FILE, ">$file" or return 0; @@ -103,6 +118,23 @@ sub appendto { close FILE or return 0; } +# kprobe functions +sub create_kprobe { + my ($kname, $kval) = @_; + appendto "p:$kname $kval", "kprobe_events" or return 0; +} + +sub enable_kprobe { + my ($kname) = @_; + writeto "1", "events/kprobes/$kname/enable" or return 0; +} + +sub remove_kprobe { + my ($kname) = @_; + writeto "0", "events/kprobes/$kname/enable" or return 0; + appendto "-:$kname", "kprobe_events" or return 0; +} + # tcp socket cache my %tcp; sub cache_tcp { @@ -142,27 +174,6 @@ sub inet_h2a { return join(".", @addr); } -sub create_kprobe { - my ($kname, $kval) = @_; - appendto "p:$kname $kval", "kprobe_events" - or ldie "ERROR: creating kprobe for $kname."; -} - -sub enable_kprobe { - my ($kname) = @_; - unless (writeto "1", "events/kprobes/$kname/enable") { - appendto "-:$kname", "kprobe_events"; - ldie "ERROR: enabling kprobe."; - } -} - -sub remove_kprobe { - my ($kname) = @_; - writeto "0", "events/kprobes/$kname/enable" - or ldie "ERROR: disabling kprobe $kname"; - appendto "-:$kname", "kprobe_events"; -} - ### check permissions chdir "$tracing" or die "ERROR: accessing tracing. Root? Kernel has FTRACE?" . "\ndebugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)"; @@ -174,20 +185,32 @@ if (-e $flock) { } writeto "$$", $flock or die "ERROR: unable to write $flock."; -### setup and begin tracing +# +# Setup and begin tracing. +# Use of ldie() and edie() ensures that if an error is encountered, the +# kernel is not left in a partially configured state. +# writeto "nop", "current_tracer" or ldie "ERROR: disabling current_tracer."; -my ($kname, $kname_tlp) = ("tcpretrans_tcp_retransmit_skb", "tcpretrans_tcp_send_loss_probe"); -create_kprobe $kname, "tcp_retransmit_skb sk=%di"; -create_kprobe $kname_tlp, "tcp_send_loss_probe sk=%di" if $tlp; +my $kname_rtr = "tcpretrans_tcp_retransmit_skb"; +my $kname_tlp = "tcpretrans_tcp_send_loss_probe"; +create_kprobe $kname_rtr, "tcp_retransmit_skb sk=%di" or + ldie "ERROR: creating kprobe for tcp_retransmit_skb().";; +if ($tlp) { + create_kprobe $kname_tlp, "tcp_send_loss_probe sk=%di" or + edie "ERROR: creating kprobe for tcp_send_loss_probe(). " . + "Older kernel version?"; +} if ($stacks) { writeto "1", "options/stacktrace" or print STDERR "WARNING: " . - "unable to enable stacktraces."; + "unable to enable stacktraces.\n"; +} +enable_kprobe $kname_rtr or edie "ERROR: enabling $kname_rtr probe."; +if ($tlp) { + enable_kprobe $kname_tlp or edie "ERROR: enabling $kname_tlp probe."; } -enable_kprobe $kname; -enable_kprobe $kname_tlp if $tlp; map_tcp_states(); -printf "%-8s %-6s %-20s -- %-20s %-12s %s\n", "TIME", "PID", - "LADDR:LPORT", "RADDR:RPORT", "STATE", "TLP"; +printf "%-8s %-6s %-20s -- %-20s %-12s\n", "TIME", "PID", "LADDR:LPORT", + "RADDR:RPORT", "STATE"; # # Read and print event data. This loop waits one second then reads the buffered @@ -200,14 +223,14 @@ while (1) { sleep $interval; # buffer trace data - open TPIPE, "trace" or ldie "ERROR: opening trace_pipe."; + open TPIPE, "trace" or edie "ERROR: opening trace_pipe."; my @trace = (); while () { next if /^#/; push @trace, $_; } close TPIPE; - writeto "0", "trace" or ldie "ERROR: clearing trace"; + writeto "0", "trace" or edie "ERROR: clearing trace"; # cache /proc/net/tcp state if (scalar @trace) { @@ -246,8 +269,9 @@ while (1) { } my $now = strftime "%H:%M:%S", localtime; - printf "%-8s %-6s %-20s R> %-20s %-12s %s\n", $now, $pid, - "$laddr:$lport", "$raddr:$rport", $state, $rest =~ /$kname_tlp/ ? "Y" : "N"; + printf "%-8s %-6s %-20s %s> %-20s %-12s\n", $now, $pid, + "$laddr:$lport", $rest =~ /$kname_tlp/ ? "L" : "R", + "$raddr:$rport", $state, } } @@ -259,10 +283,14 @@ sub cleanup { close TPIPE; if ($stacks) { writeto "0", "options/stacktrace" or print STDERR "WARNING: " . - "unable to disable stacktraces."; + "unable to disable stacktraces.\n"; + } + remove_kprobe $kname_rtr + or print STDERR "ERROR: removing kprobe $kname_rtr\n"; + if ($tlp) { + remove_kprobe $kname_tlp + or print STDERR "ERROR: removing kprobe $kname_tlp\n"; } - remove_kprobe $kname; - remove_kprobe $kname_tlp if $tlp; writeto "", "trace"; unlink $flock; exit;