aboutsummaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorMatt Rude <[email protected]>2011-06-25 22:43:51 -0500
committerMatt Rude <[email protected]>2011-06-25 22:43:51 -0500
commitf3a6a7f119c16d07e6cdb233fc9fe9bff9ad1f8c (patch)
tree9874500e39e285009f1fbbd34204d87548d64a3c /scripts
parent57ca488b31dce17a1bc26547dc983abfb0e7b969 (diff)
downloadtime.mattrude.com-f3a6a7f119c16d07e6cdb233fc9fe9bff9ad1f8c.tar.gz
time.mattrude.com-f3a6a7f119c16d07e6cdb233fc9fe9bff9ad1f8c.tar.bz2
time.mattrude.com-f3a6a7f119c16d07e6cdb233fc9fe9bff9ad1f8c.zip
Adding NTP Clients status
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/ntp_clients_stats601
-rwxr-xr-xscripts/ntpclientsd477
2 files changed, 1078 insertions, 0 deletions
diff --git a/scripts/ntp_clients_stats b/scripts/ntp_clients_stats
new file mode 100755
index 0000000..1fe3b77
--- /dev/null
+++ b/scripts/ntp_clients_stats
@@ -0,0 +1,601 @@
+#!/usr/bin/perl -w
+
+# This script is public domain, there is no copyright on it.
+# - Wayne Schlitt
+
+use strict;
+
+use Getopt::Long;
+use POSIX qw(strftime);
+use Fcntl ':flock';
+
+my $HELP = 0;
+my $STARTFILE = "/var/log/ntpstats/ntp_stats.dump";
+
+my $result = GetOptions('help' => \$HELP,
+ 'startfile=s' => \$STARTFILE,
+ );
+
+if ($HELP || !$result) {
+ print "Usage: ntp_clients_stats [options]\n";
+ print "\n";
+ print " -help Help on the options.\n";
+ print "\n";
+ print " -startfile=/path/dumpfile File to read initial state\n";
+
+ exit(0);
+}
+
+# pre-allocate the hashes, just to make things a little faster
+
+my (%count, %rate, %first_tstamp, %last_tstamp, %last_printed, %is_active, %is_abusive);
+
+my $dump_magic = "ntp_stats";
+my $dump_version = 1;
+
+my $dump_created = 0;
+my $dump_written = 0;
+my $dump_age = 0;
+
+my $total_count = 0;
+my $active_count = 0;
+my $cur_rate = 0;
+my $est_cur_rate = 0;
+my $lterm_rate = 0;
+my $est_lterm_rate = 0;
+my $cur_freq = 0;
+my $not_counted = 0;
+my $num_clients = 0;
+my $num_active = 0;
+my $num_abusive = 0;
+my $last_cleaned = 0;
+my $tdiff;
+
+my ($ip, $key, $value, $sum, $rank, $max_dist);
+my $stars = "*" x 80;
+
+
+#
+# This sub tries to determine if the client is active or not. Since
+# NTP is a stateless and connectionless (UDP based) protocol, we can
+# never know for sure.
+#
+# Any client that has polled recently is assumed to be active,
+# although this problably overcounts the active clients because many
+# clients, such as those from ntpdate, only give a short burst of
+# traffic.
+#
+# A client that polls once per 2^14 sec (they exist) can have a very low
+# count. If they use iburst, they could have a count of only 4 or so.
+# if they don't use iburst, they could have a count of 1. Hence, any
+# connection with a small count *may* be active.
+#
+# By testing this function against live data, I can see that it both
+# sometimes counts clients as inactive when they are active, and counts
+# clients as still being active for quite a while after they become inactive.
+# Overall, it seems to do a pretty good job and the overcounts/undercounts
+# are not too common and pretty much cancel each other out. I suspect that
+# there is a slight bias toward overcounting.
+#
+sub calc_is_active {
+ my $ip = $_[0];
+ my $age = $dump_written - $last_tstamp{$ip};
+
+ if ( ($count{$ip} > 5 || $rate{$ip} > 256) && $age / 5. < $rate{$ip} ) {
+ # client has poll enough to show a pattern and has miss fewer
+ # than 4 poll intervals. (Or, is rapidly increasing the poll)
+ return 1;
+ } elsif ( $count{$ip} > 1 && $rate{$ip} < 4 && $age < 1024 + 60) {
+ # client may well have used iburst, which will give misleading initial
+ # rate values, but it isn't old enough to toss yet.
+ return 1;
+ } elsif ( $count{$ip} > 1 && $rate{$ip} > 64 && $age < 300 ) {
+ # I'm not sure what kind of clients these are, but they show up.
+ return 1;
+ } elsif ( $age < 60) {
+ # client just polled, assume it is active.
+ return 1;
+ } else {
+ # assume client has stopped polling.
+ return 0;
+ }
+}
+
+# quickly read in the dump data
+
+my ($magic, $ver);
+my $t0 = time();
+while ( 1 ) {
+ open(DUMP, "<", $STARTFILE ) or die "Could not open startfile: $STARTFILE";
+ flock(DUMP,LOCK_SH);
+
+ ($magic, $ver, $total_count, $dump_created, $dump_written, $cur_rate, $lterm_rate) = split(' ', <DUMP>);
+
+ # make sure we don't process a half-written dump (does this still happen??)
+ if ( !defined($magic) || $magic eq "" || !defined($dump_written) ) {
+ flock(DUMP,LOCK_UN);
+ close(DUMP);
+ select(undef, undef, undef, .5);
+ next;
+ }
+
+ die "Timed out trying to get valid dump data from $STARTFILE" if ( time() - $t0 >= 15 );
+ die "$STARTFILE is not a dump file created by ntp_clients" if ( !defined( $magic ) || $magic ne $dump_magic );
+ die "Incorrect dump file version: $ver" if ( !defined( $ver ) || ($ver ne "1" && $ver ne "2") );
+
+ $tdiff = time() - $dump_written;
+ last if ( ($tdiff > 0 && $tdiff < 59)
+ || $tdiff < -1 || $tdiff > 70 );
+ flock(DUMP,LOCK_UN);
+ close(DUMP);
+
+ if ( $tdiff >= 60 ) {
+ select(undef, undef, undef, .5);
+ } else {
+ select(undef, undef, undef, 2 - $tdiff);
+ }
+}
+
+my @dump = <DUMP>;
+flock(DUMP,LOCK_UN);
+close(DUMP);
+
+
+# process the data
+
+$cur_rate = -1 if ($ver eq "1");
+$lterm_rate = -1 if ($ver eq "1");
+
+keys( %count ) = $#dump;
+keys( %rate ) = $#dump;
+keys( %first_tstamp ) = $#dump;
+keys( %last_tstamp ) = $#dump;
+keys( %last_printed ) = $#dump;
+keys( %is_active ) = $#dump;
+keys( %is_abusive ) = $#dump;
+
+$dump_age = $dump_written - $dump_created;
+if ( $total_count > 2 ) {
+ $est_lterm_rate = $dump_age / ($total_count - 1);
+} else {
+ $est_lterm_rate = 99999;
+}
+
+foreach( @dump ) {
+ my ($key, $r_count, $r_rate, $r_first_tstamp, $r_last_tstamp) = split;
+ $count{$key} = $r_count;
+ $rate{$key} = $r_rate;
+ $first_tstamp{$key} = $dump_written - $r_first_tstamp;
+ $last_tstamp{$key} = $dump_written - $r_last_tstamp;
+ $last_printed{$key} = 0;
+}
+
+while (($ip, $value) = each %last_tstamp) {
+
+ $is_active{$ip} = calc_is_active($ip);
+ if ( $is_active{$ip} ) {
+ $num_active++;
+ $active_count += $count{$ip};
+ }
+
+ # If we have a version 1 dump file, try estimating the current bandwidth
+ # this estimate tends to lag the info from ver 2 dumps, and can be
+ # off by quite a bit, especially when there isn't much data yet.
+ if ($rate{$ip} > 0 && $count{$ip} > 5 ) {
+ my $period = 15*60;
+
+ my $t1 = $first_tstamp{$ip};
+ if ( $dump_written - $first_tstamp{$ip} > $period ) {
+ $t1 = $dump_written - $period;
+ }
+
+ my $t0 = $last_tstamp{$ip};
+ if ( $dump_written - $last_tstamp{$ip} > $period ) {
+ $t0 = $dump_written - $period;
+ }
+
+ $cur_freq += (($t0 - $t1)/$period) / $rate{$ip};
+
+ } else {
+ $not_counted++;
+ }
+
+ $num_clients++;
+
+ if ( $count{$ip} < 100
+ || ($last_tstamp{$ip} - $first_tstamp{$ip}) / ($count{$ip} - 1) > 30
+ || !$is_active{$ip}
+ ) {
+ $is_abusive{$ip} = 0;
+ } else {
+ $num_abusive++;
+ $is_abusive{$ip} = 1;
+ }
+}
+
+
+# assume clients that couldn't be counted (new?) are like the rest
+#$cur_freq += $not_counted * $lterm_freq / $num_clients;
+# fudge for clients that couldn't be counted (ntpdate? new?)
+$cur_freq += $not_counted / 2048;
+$est_cur_rate = 1/$cur_freq;
+$est_cur_rate = $est_lterm_rate if ( $dump_age < 6*1024 + 60 );
+
+
+# print the report
+
+printf( "Estimated active ntp pool clients: %6d\n", $num_active);
+printf( "Estimated abusive ntp pool clients: %6d\n", $num_abusive);
+printf( "Estimated inactive ntp pool clients: %6d\n", $num_clients - $num_active);
+printf( "Total ntp pool clients being tracked: %6d\n", $num_clients);
+printf( "Note: NTP is a stateless and connectionless (UDP based) protocol, so\n" );
+printf( " exact numbers can't be determined.\n\n" );
+
+if ( $dump_age <= 2048 ) {
+ # need at least two polls from the 1024 before we have good data
+ printf( "** Warning: this dump file is very new and many results will be misleading. **\n" );
+} elsif ( $dump_age <= 4096 ) {
+ # this is barely long enough to detect most abusive clients (30s * 100req)
+ # and most of the graphs are still pretty useless.
+ printf( "Note: this dump file is too new to give very accurate results.\n" );
+}
+
+printf( "%9d ntp requests, in total, have been seen since %s\n",
+ $total_count, strftime( "%D %T", localtime( $dump_created ) ) );
+printf( "%9d (%4.1f%%) are from clients that are still active.\n",
+ $active_count, 100 * ($active_count / $total_count) ) if ($total_count > 0 );
+
+sub print_rates {
+ my ($hdr, $rate) = @_;
+ printf( "%s request rate: %.3f seconds between requests (%.2f req/sec)\n",
+ $hdr, $rate, 1/$rate );
+ printf( "%s bandwidth in: %6.3f KBytes/s %7.3f Kbits/s\n",
+ $hdr, 76./(1024*$rate), 76*8./(1024*$rate) );
+ printf( "%s bandwidth in: %6.3f GB/month %7.3f Gb/month\n",
+ $hdr, 30*24*60*60*76./(1024*1024*1024*$rate), 30*24*60*60*76*8./(1024*1024*1024*$rate) );
+ }
+
+if ( $dump_age > 12*60*60 ) {
+ if ( $lterm_rate > 0 ) {
+ print_rates( "Long term", $lterm_rate );
+ } elsif ( $est_lterm_rate > 0 ) {
+ print_rates( "Est Long term", $est_lterm_rate );
+ }
+}
+
+if ( $cur_rate > 0 ) {
+ print_rates( "Current", $cur_rate );
+} elsif ( $est_cur_rate > 0 ) {
+ print_rates( "Est current", $est_cur_rate );
+}
+
+printf( "(NTP packets are usually 76 bytes, UDP overhead included, in each direction.)\n" );
+
+print "\n";
+printf( "The dump file was written %d seconds ago, at %s\n",
+ $tdiff, strftime( "%D %T", localtime( $dump_written ) ) );
+
+exit if ($total_count < 1 );
+
+
+print "\n";
+print "Subnets with many clients:\n";
+my (%class_c, %class_c_rate, %class_c_count, %class_c_abusive);
+keys( %class_c ) = $num_clients;
+foreach $ip (keys( %last_tstamp )) {
+ my $key = $ip;
+ next if ( ! $is_active{$ip} );
+ $key =~ s/^([0-9.]*)\.[0-9][0-9]*$/$1/;
+ $class_c{$key}++;
+ $class_c_rate{$key} += 1./$rate{$ip} if ($rate{$ip} > 0);
+ $class_c_count{$key} += $count{$ip} if ($count{$ip} > 0);
+ $class_c_abusive{$key} += $is_abusive{$ip};
+}
+printf "# of Subnet Total Aggregate Abusive\n";
+printf " IPs Count Rate Clients\n";
+
+foreach $key (sort { $class_c{$b} <=> $class_c{$a} } keys %class_c) {
+ last if ( $class_c{$key} < 4 );
+ printf "%4d %-13s", $class_c{$key}, $key . ".x";
+ if ( defined( $class_c_count{$key} ) ) {
+ printf " %8d", $class_c_count{$key};
+ }
+ if ( defined( $class_c_rate{$key} ) ) {
+ printf " %9.3f", 1./$class_c_rate{$key};
+ }
+ if ( defined( $class_c_abusive{$key} ) ) {
+ printf " %7d", $class_c_abusive{$key};
+ }
+ printf "\n";
+}
+
+
+print "\n";
+print "Clients with rapid updates (min requests of 100):\n";
+my @bad_clients;
+while (($ip, $value) = each %last_tstamp) {
+ my $is_rapid = $count{$ip} >= 100 && $rate{$ip} <= 60;
+
+# printf( "%-15s count: %6d delta: %10.3f rate: %7.2f active: %d\n",
+# $ip, $count{$ip}, ($last_tstamp{$ip} - $first_tstamp{$ip}),
+# $rate{$ip}, $is_active{$ip} ) if ($is_abusive{$ip} && !$is_rapid );
+
+ next if (!$is_rapid && !$is_abusive{$ip} );
+
+ push @bad_clients, $ip
+}
+print "Rank First Seen Client IP Requests Rate Usage Cumulative\n";
+$sum = 0;
+$rank = 0;
+foreach $ip (sort { $count{$b} <=> $count{$a} } @bad_clients) {
+ $rank++;
+ if ( $is_active{$ip} ) {
+ $sum += $count{$ip};
+
+ printf( "%3d %s %-15s %7d %8.2f %5.2f%% %5.2f%% *",
+ $rank, strftime( "%D %T", localtime( $first_tstamp{$ip} ) ),
+ $ip, $count{$ip}, $rate{$ip},
+ 100 * $count{$ip} / $active_count, 100 * $sum / $active_count
+ );
+ } else {
+ printf( "%3d %s %-15s %7d %8.2f (%5.2f%%)",
+ $rank, strftime( "%D %T", localtime( $first_tstamp{$ip} ) ),
+ $ip, $count{$ip}, $rate{$ip},
+ 100 * $count{$ip} / ($active_count + $count{$ip} )
+ );
+ }
+ print " !" if ($is_abusive{$ip});
+ print "\n";
+ last if ( $rank >= 100 );
+}
+print "* = \"active\" = probably will send another request.\n";
+print "! = \"abusive\" = min requests of 100 and an average rate of less than 30s\n";
+print " between requests over the life of the entire connection.\n";
+print "Percentages are based on all packets from *active* clients. If the client\n";
+print "is not active, its percentage is what it would be if it were active.\n";
+
+
+print "\n\n";
+print "Clients with very long updates (min requests of 10):\n";
+my @great_clients;
+while (($ip, $value) = each %last_tstamp) {
+ next if ( $count{$ip} < 10 || $rate{$ip} < 3000 );
+
+ push @great_clients, $ip
+}
+print "Rank First Seen Client IP Requests Rate Usage Cumulative\n";
+$sum = 0;
+$rank = 0;
+foreach $ip (sort { $rate{$b} <=> $rate{$a} } @great_clients) {
+ $rank++;
+
+ if ( $is_active{$ip} ) {
+ $sum += $count{$ip};
+
+ printf( "%3d %s %-15s %7d %8.2f %5.2f%% %5.2f%% *",
+ $rank, strftime( "%D %T", localtime( $first_tstamp{$ip} ) ),
+ $ip, $count{$ip}, $rate{$ip},
+ 100 * $count{$ip} / $active_count, 100 * $sum / $active_count
+ );
+ } else {
+ printf( "%3d %s %-15s %7d %8.2f (%5.2f%%)",
+ $rank, strftime( "%D %T", localtime( $first_tstamp{$ip} ) ),
+ $ip, $count{$ip}, $rate{$ip},
+ 100 * $count{$ip} / ($active_count + $count{$ip} )
+ );
+ }
+ print "\n";
+ last if ( $rank >= 10 );
+}
+
+my @client_dist;
+my ($log_idx, $idx);
+my $base = log( sqrt(2) );
+
+
+@client_dist = ();
+print "\n";
+print "Clients distribution by count:\n";
+while (($ip, $value) = each %last_tstamp) {
+
+ if ( $count{$ip} <= 1 ) {
+ $log_idx = 0;
+ } else {
+ $log_idx = log( $count{$ip} )/$base;
+ }
+ $idx = int( $log_idx + 0.5 );
+ $client_dist[$idx]++;
+}
+$max_dist = 0;
+for( $idx = 0; $idx <= $#client_dist; $idx++ ) {
+ $max_dist = $client_dist[$idx] if ( defined $client_dist[$idx] && $max_dist < $client_dist[$idx] );
+}
+print " bin ( bin range ) count\n";
+
+for( $idx = 0; $idx <= $#client_dist; $idx++ ) {
+ $client_dist[$idx] = 0 if ( ! defined( $client_dist[$idx] ) );
+ my $low = $idx ? 2**($idx/2 - .25) : 0;
+ my $high = 2**($idx/2 + .25);
+ my $center = 2**($idx/2);
+ my $num_star = 45 * ($client_dist[$idx] / $max_dist);
+ if ( int( $low ) == int( $high ) ) {
+ printf( "Error: value in zero range bin!: %d %7.1f - %7.1f\n", $idx, $low, $high ) if ( $client_dist[$idx] != 0 );
+ next;
+ }
+ $low++;
+ if ( int( $low ) == int( $high ) ) {
+ $center = $high;
+ }
+ printf( "%7d (%7d - %7d) %5d %.*s\n",
+ $center, $low, $high, $client_dist[$idx], $num_star, $stars );
+}
+
+
+@client_dist = ();
+print "\n";
+print "Clients distribution by rate (min requests of 10): \n";
+while (($ip, $value) = each %last_tstamp) {
+ next if ( $count{$ip} < 10 );
+
+ if ( $rate{$ip} <= 1 ) {
+ $log_idx = 0;
+ } else {
+ $log_idx = log( $rate{$ip} )/$base;
+ }
+ $idx = int( $log_idx + 0.5 );
+ $client_dist[$idx]++;
+}
+$max_dist = 0;
+for( $idx = 0; $idx <= $#client_dist; $idx++ ) {
+ $max_dist = $client_dist[$idx] if ( defined $client_dist[$idx] && $max_dist < $client_dist[$idx] );
+}
+print " bin ( bin range ) count\n";
+
+for( $idx = 0; $idx <= $#client_dist; $idx++ ) {
+ $client_dist[$idx] = 0 if ( ! defined( $client_dist[$idx] ) );
+ my $low = $idx ? 2**($idx/2 - .25) : 0;
+ my $high = 2**($idx/2 + .25);
+ my $center = 2**($idx/2);
+ my $num_star = 45 * ($client_dist[$idx] / $max_dist);
+ printf( "%7.1f (%7.1f - %7.1f) %5d %.*s\n",
+ $center, $low, $high, $client_dist[$idx], $num_star, $stars );
+}
+
+
+@client_dist = ();
+print "\n";
+print "Clients distribution by amount of time client was active (in hours):\n";
+while (($ip, $value) = each %last_tstamp) {
+
+ my $age = ($last_tstamp{$ip} - $first_tstamp{$ip})/(60*60);
+
+ if ( $age <= 1 ) {
+ $log_idx = 0;
+ } else {
+ $log_idx = log( $age )/$base;
+ }
+ $idx = int( $log_idx + 0.5 );
+ $client_dist[$idx]++;
+}
+$max_dist = 0;
+for( $idx = 0; $idx <= $#client_dist; $idx++ ) {
+ $max_dist = $client_dist[$idx] if ( defined $client_dist[$idx] && $max_dist < $client_dist[$idx] );
+}
+print " bin ( bin range ) count\n";
+
+for( $idx = 0; $idx <= $#client_dist; $idx++ ) {
+ $client_dist[$idx] = 0 if ( ! defined( $client_dist[$idx] ) );
+ my $low = $idx ? 2**($idx/2 - .25) : 0;
+ my $high = 2**($idx/2 + .25);
+ my $center = 2**($idx/2);
+ my $num_star = 45 * ($client_dist[$idx] / $max_dist);
+ printf( "%7.1f (%7.1f - %7.1f) %5d %.*s\n",
+ $center, $low, $high, $client_dist[$idx], $num_star, $stars );
+}
+
+
+@client_dist = ();
+print "\n";
+print "Clients distribution by time since first request (in hours):\n";
+while (($ip, $value) = each %last_tstamp) {
+
+ my $age = ($dump_written - $first_tstamp{$ip})/(60*60);
+
+# printf( "%-15s %s %s %.2f\n",
+# $ip,
+# strftime( "%D %T", localtime( $first_tstamp{$ip} ) ),
+# strftime( "%D %T", localtime( $last_tstamp{$ip} ) ),
+# $age );
+
+ if ( $age <= 1 ) {
+ $log_idx = 0;
+ } else {
+ $log_idx = log( $age )/$base;
+ }
+ $idx = int( $log_idx + 0.5 );
+ $client_dist[$idx]++;
+}
+$max_dist = 0;
+for( $idx = 0; $idx <= $#client_dist; $idx++ ) {
+ $max_dist = $client_dist[$idx] if ( defined $client_dist[$idx] && $max_dist < $client_dist[$idx] );
+}
+print " bin ( bin range ) count\n";
+
+for( $idx = 0; $idx <= $#client_dist; $idx++ ) {
+ $client_dist[$idx] = 0 if ( ! defined( $client_dist[$idx] ) );
+ my $low = $idx ? 2**($idx/2 - .25) : 0;
+ my $high = 2**($idx/2 + .25);
+ my $center = 2**($idx/2);
+ my $num_star = 45 * ($client_dist[$idx] / $max_dist);
+ printf( "%7.1f (%7.1f - %7.1f) %5d %.*s\n",
+ $center, $low, $high, $client_dist[$idx], $num_star, $stars );
+}
+
+
+@client_dist = ();
+print "\n";
+print "Active clients distribution by time since last request (in secs):\n";
+while (($ip, $value) = each %last_tstamp) {
+
+ next if ( ! $is_active{$ip} );
+
+ my $age = ($dump_written - $last_tstamp{$ip});
+
+ if ( $age <= 1 ) {
+ $log_idx = 0;
+ } else {
+ $log_idx = log( $age )/$base;
+ }
+ $idx = int( $log_idx + 0.5 );
+ $client_dist[$idx]++;
+}
+$max_dist = 0;
+for( $idx = 0; $idx <= $#client_dist; $idx++ ) {
+ $max_dist = $client_dist[$idx] if ( defined $client_dist[$idx] && $max_dist < $client_dist[$idx] );
+}
+print " bin ( bin range ) count\n";
+
+for( $idx = 0; $idx <= $#client_dist; $idx++ ) {
+ $client_dist[$idx] = 0 if ( ! defined( $client_dist[$idx] ) );
+ my $low = $idx ? 2**($idx/2 - .25) : 0;
+ my $high = 2**($idx/2 + .25);
+ my $center = 2**($idx/2);
+ my $num_star = 45 * ($client_dist[$idx] / $max_dist);
+ printf( "%7.1f (%7.1f - %7.1f) %5d %.*s\n",
+ $center, $low, $high, $client_dist[$idx], $num_star, $stars );
+}
+
+
+@client_dist = ();
+print "\n";
+print "Inactive clients distribution by time since last request (in secs):\n";
+while (($ip, $value) = each %last_tstamp) {
+
+ next if ( $is_active{$ip} );
+
+ my $age = ($dump_written - $last_tstamp{$ip});
+
+ if ( $age <= 1 ) {
+ $log_idx = 0;
+ } else {
+ $log_idx = log( $age )/$base;
+ }
+ $idx = int( $log_idx + 0.5 );
+ printf( "%-15s inactive %5d %7.2f > %7.2f\n", $ip, $count{$ip}, ($dump_written - $last_tstamp{$ip}) / 10.,$rate{$ip} ) if ( $idx < 10 );
+ $client_dist[$idx]++;
+}
+$max_dist = 0;
+for( $idx = 0; $idx <= $#client_dist; $idx++ ) {
+ $max_dist = $client_dist[$idx] if ( defined $client_dist[$idx] && $max_dist < $client_dist[$idx] );
+}
+print " bin ( bin range ) count\n";
+
+for( $idx = 0; $idx <= $#client_dist; $idx++ ) {
+ $client_dist[$idx] = 0 if ( ! defined( $client_dist[$idx] ) );
+ my $low = $idx ? 2**($idx/2 - .25) : 0;
+ my $high = 2**($idx/2 + .25);
+ my $center = 2**($idx/2);
+ my $num_star = 45 * ($client_dist[$idx] / $max_dist);
+ printf( "%7.1f (%7.1f - %7.1f) %5d %.*s\n",
+ $center, $low, $high, $client_dist[$idx], $num_star, $stars );
+}
diff --git a/scripts/ntpclientsd b/scripts/ntpclientsd
new file mode 100755
index 0000000..17dd991
--- /dev/null
+++ b/scripts/ntpclientsd
@@ -0,0 +1,477 @@
+#!/usr/bin/perl -w
+
+# This script is public domain, there is no copyright on it.
+# - Wayne Schlitt
+
+
+use strict;
+
+use Getopt::Long;
+use POSIX qw(strftime);
+use Fcntl ':flock';
+
+#my $TCPDUMP_PROG = "/tmp/tcpdump-3.7.2/tcpdump";
+#my $TCPDUMP_PROG = "/tmp/tcpdump-3.9.1/tcpdump";
+my $TCPDUMP_PROG = "tcpdump";
+
+
+my $HELP = 0;
+my $DUMPFILE = "";
+my $STARTFILE = "";
+my $VERBOSE = 1;
+my $INTERFACE = "";
+my $TCPDUMP_VER = undef;
+
+my $result = GetOptions('help' => \$HELP,
+ 'dumpfile=s' => \$DUMPFILE,
+ 'startfile=s' => \$STARTFILE,
+ 'interface=s' => \$INTERFACE,
+ 'verbose+' => \$VERBOSE,
+ 'tcpdump=f' => \$TCPDUMP_VER,
+ 'quiet' => sub { $VERBOSE = 0 },
+ );
+
+if ($HELP || !$result) {
+ print "Usage: ntp_clients [options]\n";
+ print "\n";
+ print " -help Help on the options.\n";
+ print "\n";
+ print " -dumpfile=/path/dumpfile File to read/write internal state\n";
+ print " -startfile=/path/dumpfile File to read initial state\n";
+ print " -interface=eth# Lan interface to monitor\n";
+ print " -verbose increase amount of information printed\n";
+ print " by default, track only bad clients\n";
+ print " -v will give you all clients\n";
+ print " -v -v will give you all requests\n";
+ print " -tcpdump=version set format of NTP trace output";
+ print " -quiet don't print any info\n";
+
+ exit(0);
+}
+
+$| = 1;
+
+# pre-allocate the hashes, just to make things a little faster
+
+my (%count, %rate, %first_tstamp, %last_tstamp, %last_printed);
+keys( %count ) = 1024;
+keys( %rate ) = 1024;
+keys( %first_tstamp ) = 1024;
+keys( %last_tstamp ) = 1024;
+keys( %last_printed ) = 1024;
+
+my $dump_magic = "ntp_stats";
+my $dump_version = 2;
+my $dump_created = time();
+my $dump_written = 0;
+my $dump_age = 0;
+
+my $total_count = 0;
+my $base_rate = 99999;
+my $cur_period = 15*60;
+my $cur_rate = -1;
+my $cur_factor1 = 1;
+my $cur_factor2 = 0;
+my $lterm_period = 15*24*60*60;
+my $lterm_rate = -1;
+my $lterm_factor1 = 1;
+my $lterm_factor2 = 0;
+my $prev_tstamp = undef;
+my $delta = undef;
+my $num_clients = 0;
+my $last_cleaned = 0;
+my $last_mday = (localtime(time))[3];
+
+my ($tstamp, $ip_str, $ip, $tofrom, $ip2, $ver, $s_or_c, $s_or_c2, $s_or_c3 );
+
+$tstamp = $prev_tstamp = time();
+
+
+$STARTFILE = $DUMPFILE if ( !$STARTFILE && $DUMPFILE );
+
+if ( $STARTFILE && open(DUMP, "<", $STARTFILE ) ) {
+ flock(DUMP,LOCK_SH);
+
+ my ($magic, $ver);
+ ($magic, $ver, $total_count, $dump_created, $dump_written, $cur_rate, $lterm_rate) = split(' ', <DUMP>);
+ die "$STARTFILE was not created by ntp_stats" if ( !defined( $magic ) || $magic ne $dump_magic );
+ die "Incorrect dump file version: $ver" if ( !defined( $ver ) || ($ver ne "1" && $ver ne "2") );
+ if (time() - $dump_written > 2*60*60 ) {
+ # If the dumpfile is too old, it will really screw up the stats
+ # and it will take hours for them to drift back to what they should be.
+ # Might as well just start over.
+ print "Warning: start file too old to be used.\n";
+ $total_count = 0;
+ $base_rate = 99999;
+ $cur_rate = -1;
+ $lterm_rate = -1;
+ $dump_created = time();
+ $dump_written = 0;
+ $dump_age = 0;
+ } else {
+ $cur_rate = -1 if ($ver eq "1");
+ $lterm_rate = -1 if ($ver eq "1");
+ $dump_age = $dump_written - $dump_created;
+ if ( $total_count > 2 ) {
+ $base_rate = $dump_age / ($total_count - 1);
+ } else {
+ $base_rate = 99999;
+ }
+ $lterm_rate = $base_rate if ( $lterm_rate > $base_rate * 10 );
+ $cur_rate = $lterm_rate if ( $cur_rate > $lterm_rate * 10 );
+
+ while(<DUMP>) {
+ chomp;
+ my ($key, $r_count, $r_rate, $r_first_tstamp, $r_last_tstamp, $r_last_printed) = split;
+ $count{$key} = $r_count;
+ $rate{$key} = $r_rate;
+ $first_tstamp{$key} = $dump_written - $r_first_tstamp;
+ $last_tstamp{$key} = $dump_written - $r_last_tstamp;
+ $last_printed{$key} = 0;
+ $num_clients++;
+ }
+ }
+ flock(DUMP,LOCK_UN);
+ close(DUMP);
+}
+
+
+my $tcpdump_major = -1;
+my $tcpdump_minor = -1;
+my $tcpdump_patch = -1;
+if ( ! defined( $TCPDUMP_VER ) ) {
+ open(PROG, "$TCPDUMP_PROG -V 2>&1 |") or die "Can't run tcpdump: $!";
+ $TCPDUMP_VER = <PROG>;
+ close(PROG);
+ die "Could not determine tcpdump version" if ( !defined( $TCPDUMP_VER ) );
+ $TCPDUMP_VER =~ s/^tcpdump version ([.0-9]*).*\n$/$1/;
+
+ # Apparently, the 3.8.2 release is screwed up. Straight from
+ # tcpdump.org, the tarball for v3.8.2 has stuff that is marked as being
+ # in the "v3.9 branch", while the v3.8.3 release has the older stuff,
+ # from the "v3.8 branch". To make matters worse, it appears that at
+ # least the RedHat Fedora-Core 3 release of tcpdump v3.8.2 claims to
+ # be just version "3.8".
+
+ if ( $TCPDUMP_VER eq "3.8.2" || $TCPDUMP_VER eq "3.8" ) {
+ $TCPDUMP_VER = "3.9";
+ }
+}
+
+
+($tcpdump_major = $TCPDUMP_VER) =~ s/^([0-9][0-9]*).*/$1/;
+($tcpdump_minor = $TCPDUMP_VER) =~ s/^[0-9][0-9]*\.([0-9][0-9]*).*/$1/;
+($tcpdump_patch = $TCPDUMP_VER) =~ s/^[0-9][0-9]*\.[0-9][0-9]*\.([0-9][0-9]*.*)/$1/;
+$tcpdump_minor = 0 if ($tcpdump_minor eq $TCPDUMP_VER);
+$tcpdump_patch = 0 if ($tcpdump_patch eq $TCPDUMP_VER);
+print "Warning: untested tcpdump version: $TCPDUMP_VER\n"
+ if ( $tcpdump_major != 3 || $tcpdump_minor < 4 || $tcpdump_minor > 9 );
+
+
+print "tcpdump version: $TCPDUMP_VER $tcpdump_major $tcpdump_minor $tcpdump_patch\n" if ($VERBOSE > 2);
+
+# get the data
+my $iface = "";
+$iface = "-i $INTERFACE" if ( $INTERFACE ne "" );
+open(PROG, "$TCPDUMP_PROG -n -tt -p $iface port 123 2>/dev/null |") or die "Can't run tcpdump: $!";
+
+# process the data
+
+if ( $VERBOSE == 1 && (time() - $dump_created < 10*60 || $total_count <= 25 * $num_clients) ) {
+ print "Collecting data... May take up to 100 minutes to display bad clients.\n\n";
+}
+
+if ( $VERBOSE ) {
+ printf " Time Total Num Client Client Delta Rate\n";
+ printf " Requests Clients IP Requests (sec) (sec)\n";
+}
+
+while(<PROG>) {
+
+ if ( $tcpdump_major > 3 || ($tcpdump_major == 3 && $tcpdump_minor >= 9 ) ) {
+ ($tstamp, $ip_str, $ip, $tofrom, $ip2, $ver, $s_or_c, $s_or_c2, $s_or_c3 ) = split;
+# print("$_");
+# printf( "tstamp: %s\n", $tstamp );
+# printf( "ip_literal: %s\n", $ip_str );
+# printf( "ip: %s\n", $ip );
+# printf( "tofrom: %s\n", $tofrom );
+# printf( "ip2: %s\n", $ip2 );
+# printf( "ver: %s\n", $ver );
+# printf( "s_or_c: %s\n", $s_or_c );
+
+ # I'm getting a few packets with incorrect NTP versions
+ if ( $ver !~ "NTPv" ) {
+ print("wrong protocol: $_" ) if ( $VERBOSE > 1 );
+ next;
+ }
+
+ if ( $ver !~ "NTPv[1-4]" ) {
+ print("unsupported NTP version: $_" ) if ( $VERBOSE > 1 );
+ next;
+ }
+
+ # sanity check the rest of the data
+
+ if ($s_or_c eq "+1s" || $s_or_c eq "-1s" ) {
+ $s_or_c = $s_or_c2;
+ $s_or_c2 = $s_or_c3;
+ $s_or_c3 = undef;
+ }
+
+ if ( !defined($s_or_c) ) {
+ print( "The server/client value is missing from the tcpdump\n" );
+ print( "output: $_\n" );
+ next;
+ }
+
+ $s_or_c =~ s/,$//;
+ $s_or_c = lc($s_or_c);
+ if ( $s_or_c eq "symmetric" && defined( $s_or_c2 ) ) {
+ $s_or_c2 = lc($s_or_c2);
+ $s_or_c = "sym_act" if ( $s_or_c2 eq "active," );
+ $s_or_c = "sym_pas" if ( $s_or_c2 eq "passive," );
+ $s_or_c2 = $s_or_c3;
+ $s_or_c3 = undef;
+ }
+
+ $s_or_c = "res1" if ( $s_or_c eq "reserved" );
+ $s_or_c = "unspec" if ( $s_or_c eq "unspecified" );
+ $s_or_c = "bcast" if ( $s_or_c eq "broadcast" );
+
+
+ if ( defined($s_or_c2) && $s_or_c2 eq "length"
+ && defined($s_or_c3) && $s_or_c3 ne "48" ) {
+ print("Warning: wrong length NTP packet: $_" ) if ( $VERBOSE > 2 );
+ }
+
+ } elsif ( $tcpdump_major == 3 && $tcpdump_minor == 8 ) {
+ ($tstamp, $ip_str, $ip, $tofrom, $ip2, $ver, $s_or_c, $s_or_c2 ) = split;
+# print("$_");
+# printf( "tstamp: %s\n", $tstamp );
+# printf( "ip_literal: %s\n", $ip_str );
+# printf( "ip: %s\n", $ip );
+# printf( "tofrom: %s\n", $tofrom );
+# printf( "ip2: %s\n", $ip2 );
+# printf( "ver: %s\n", $ver );
+# printf( "s_or_c: %s\n", $s_or_c );
+
+ # I'm getting a few packets with incorrect NTP versions
+ if ( $ver !~ "NTPv" ) {
+ print("wrong protocol: $_" ) if ( $VERBOSE > 1 );
+ next;
+ }
+
+ if ( $ver !~ "NTPv[1-4]" ) {
+ print("unsupported NTP version: $_" ) if ( $VERBOSE > 1 );
+ next;
+ }
+
+ if ( $ver =~ "^\\[len=[0-9][0-9]*]NTPv[1-4]\$" ) {
+ print("Warning: wrong length NTP packet: $_" ) if ( $VERBOSE > 2 );
+ }
+
+ # sanity check the rest of the data
+
+ if ($s_or_c eq "+1s" || $s_or_c eq "-1s" ) {
+ $s_or_c = $s_or_c2;
+ $s_or_c2 = undef;
+ }
+
+ if ( !defined($s_or_c) ) {
+ print( "The server/client value is missing from the tcpdump\n" );
+ print( "output: $_\n" );
+ next;
+ }
+
+ $s_or_c =~ s/,$//;
+ } else {
+ ($tstamp, $ip, $tofrom, $ip2, $ver, $s_or_c, $s_or_c2, $s_or_c3 ) = split;
+# print("$_");
+# printf( "tstamp: %s\n", $tstamp );
+# printf( "ip: %s\n", $ip );
+# printf( "tofrom: %s\n", $tofrom );
+# printf( "ip2: %s\n", $ip2 );
+# printf( "ver: %s\n", $ver );
+# printf( "s_or_c: %s\n", $s_or_c );
+
+ if ( $ver =~ "^\\[len=[0-9][0-9]*]" && $s_or_c =~ "^v[1-4]\$" ) {
+ print("Warning: wrong length NTP packet: $_" ) if ( $VERBOSE > 2 );
+ $ver = $s_or_c;
+ $s_or_c = $s_or_c2;
+ $s_or_c2 = $s_or_c3;
+ $s_or_c3 = undef;
+ }
+
+ # I'm getting a few packets with incorrect NTP versions
+ if ( $ver !~ "v" ) {
+ print("wrong protocol: $_" ) if ( $VERBOSE > 1 );
+ next;
+ }
+
+ if ( $ver !~ "v[1-4]" ) {
+ print("unsupported NTP version: $_" ) if ( $VERBOSE > 1 );
+ next;
+ }
+
+ # sanity check the rest of the data
+
+ if ($s_or_c eq "+1s" || $s_or_c eq "-1s" ) {
+ $s_or_c = $s_or_c2;
+ $s_or_c2 = $s_or_c3;
+ $s_or_c3 = undef;
+ }
+
+ if ( !defined($s_or_c) ) {
+ print( "The server/client value is missing from the tcpdump\n" );
+ print( "output: $_\n" );
+ next;
+ }
+ }
+
+ if ( $VERBOSE > 1
+ && $s_or_c ne "server" && $s_or_c ne "client"
+ && $s_or_c ne "sym_pas" && $s_or_c ne "sym_act"
+ && $s_or_c ne "res1" && $s_or_c ne "res2"
+ && $s_or_c ne "unspec" && $s_or_c ne "bcast"
+ ) {
+ printf( "Invalid server/client value: %s\n", $s_or_c );
+ printf( "tcpdump line: %s\n", $_ );
+ }
+
+ # select only packets being sent, not coming to
+ next if ( $s_or_c ne "client" && $s_or_c ne "sym_act" );
+
+
+
+ # clean up the variables, removing port numbers, punctuation, etc.
+ $ip =~ s/\.[0-9a-z_-]+$//;
+# $ip2 =~ s/\.[0-9a-z_-]+:$//;
+
+
+ # calculate stats
+ if ( defined($first_tstamp{$ip}) ) {
+ $count{$ip}++;
+ $delta = $tstamp - $last_tstamp{$ip};
+ if ( $count{$ip} < 2 ) {
+ $rate{$ip} = -1; # workaround old bug, stored in the dump
+ } elsif ( $count{$ip} < 10 ) {
+ $rate{$ip} = ($tstamp - $first_tstamp{$ip}) / ($count{$ip} - 1);
+ } else {
+ # Calculate weighted average. Recent deltas count for more.
+ # The most recent counts for 5% of the average, the 10th oldest
+ # counts for 2.9%, the 50th counts for 0.38%, the 100th for 0.03%
+ # This tries to allow for quick detection of clients changing their
+ # polling rate while ignoring the effects dropped packets.
+ $rate{$ip} = $rate{$ip} * 0.95 + $delta * 0.05;
+ }
+ $last_tstamp{$ip} = $tstamp;
+
+ } else {
+ $num_clients++;
+ $count{$ip} = 1;
+ $delta = -1;
+ $rate{$ip} = -1;
+ $first_tstamp{$ip} = $last_tstamp{$ip} = $tstamp;
+ $last_printed{$ip} = 0;
+ }
+
+ $total_count++;
+ $lterm_rate = $lterm_rate * $lterm_factor1
+ + ($tstamp - $prev_tstamp) * $lterm_factor2;
+ $cur_rate = $cur_rate * $cur_factor1
+ + ($tstamp - $prev_tstamp) * $cur_factor2;
+ $prev_tstamp = $tstamp;
+
+ # remove old entries, once a minute
+ if ( $tstamp - $last_cleaned > 60 ) {
+# print "cleaning...\n";
+ $last_cleaned = $tstamp;
+ $num_clients = 0;
+
+ # recalculate rate factors. They don't change much, but they
+ # do gradually drift as more clients join the pool.
+ if ( $total_count > 2 ) {
+ $base_rate = ($tstamp - $dump_created) / ($total_count - 1);
+ if ( $base_rate <= $lterm_period ) {
+ # the most recent 15days gives about 60% of the value of lterm_rate
+ $lterm_factor2 = $base_rate / $lterm_period;
+ $lterm_factor1 = 1 - $lterm_factor2;
+ }
+ if ( $lterm_rate <= $cur_period ) {
+ # the most recent 15min gives about 60% of the value of cur_rate
+ $cur_factor2 = $lterm_rate / $cur_period;
+ $cur_factor1 = 1 - $cur_factor2;
+ }
+ }
+ $dump_age = $tstamp - $dump_created;
+ if ( $dump_age < $lterm_period || $lterm_rate > $dump_age
+ || $lterm_rate <= $base_rate * .1 || $lterm_rate > $base_rate * 10 ) {
+ $lterm_rate = $base_rate;
+ }
+ if ( $dump_age < $cur_period || $lterm_rate > $dump_age
+ || $cur_rate <= $lterm_rate * .1 || $cur_rate > $lterm_rate * 10 ) {
+ $cur_rate = $lterm_rate;
+ }
+
+
+ if ( $DUMPFILE ) {
+ open(DUMP, ">", $DUMPFILE );
+ flock(DUMP,LOCK_EX);
+ printf( DUMP "%s %d %d %d %.3f %g %g\n",
+ $dump_magic, $dump_version, $total_count,
+ $dump_created, $tstamp, $cur_rate, $lterm_rate );
+ }
+
+ while (my ($key, $value) = each %last_tstamp) {
+
+ if ( $tstamp - $value > 2**14 + 60 ) { # I've seen clients with 14 poll
+ delete $count{$key};
+ delete $rate{$key};
+ delete $first_tstamp{$key};
+ delete $last_tstamp{$key};
+ delete $last_printed{$key};
+ } else {
+ $num_clients++;
+ printf( DUMP "%-15s %8d %9.3f %12.3f %9.3f\n",
+ $key, $count{$key}, $rate{$key},
+ $tstamp - $first_tstamp{$key}, $tstamp - $last_tstamp{$key} )
+ if ( $DUMPFILE );
+ }
+ }
+ if ( $DUMPFILE ) {
+ flock(DUMP,LOCK_UN);
+ close(DUMP);
+ }
+ }
+
+ next if ( $VERBOSE == 0 );
+
+ # keep bad clients from filling the log
+ if ( $VERBOSE == 1 ) {
+ my $lprt = $tstamp - $last_printed{$ip};
+ next if ( $lprt < 60*60 );
+ next if ( $count{$ip} < 100 || $rate{$ip} > 60 );
+ }
+ elsif ( $VERBOSE == 2 ) {
+ my $age = $tstamp - $first_tstamp{$ip};
+ my $lprt = $tstamp - $last_printed{$ip};
+ next if ( ($age > $lprt * 3 || $lprt < 2) && $lprt < 500 );
+ }
+
+ # print out the results
+ my @ltime = localtime( int($tstamp) );
+ if ( $last_mday != $ltime[3] ) {
+ printf( " --- Mark: %s ---\n", strftime("%D %T",@ltime) );
+ $last_mday = $ltime[3];
+ }
+ my $tstr = strftime("%T", @ltime);
+ $last_printed{$ip} = $tstamp;
+ if ( $count{$ip} == 1 ) {
+ printf( "%s %9d %6d %-15s %8d\n",
+ $tstr, $total_count, $num_clients, $ip, $count{$ip} );
+ } else {
+ printf( "%s %9d %6d %-15s %8d %+8.1f %8.2f\n",
+ $tstr, $total_count, $num_clients, $ip, $count{$ip},
+ $delta, $rate{$ip} );
+ }
+}