#!/usr/bin/perl -w # This script is public domain, there is no copyright on it. # - Wayne Schlitt use strict; use Getopt::Long; use POSIX qw(strftime); use Fcntl ':flock'; my $HELP = 0; my $STARTFILE = "/var/log/ntpstats/ntp_stats.dump"; my $result = GetOptions('help' => \$HELP, 'startfile=s' => \$STARTFILE, ); if ($HELP || !$result) { print "Usage: ntp_clients_stats [options]\n"; print "\n"; print " -help Help on the options.\n"; print "\n"; print " -startfile=/path/dumpfile File to read initial state\n"; exit(0); } # pre-allocate the hashes, just to make things a little faster my (%count, %rate, %first_tstamp, %last_tstamp, %last_printed, %is_active, %is_abusive); my $dump_magic = "ntp_stats"; my $dump_version = 1; my $dump_created = 0; my $dump_written = 0; my $dump_age = 0; my $total_count = 0; my $active_count = 0; my $cur_rate = 0; my $est_cur_rate = 0; my $lterm_rate = 0; my $est_lterm_rate = 0; my $cur_freq = 0; my $not_counted = 0; my $num_clients = 0; my $num_active = 0; my $num_abusive = 0; my $last_cleaned = 0; my $tdiff; my ($ip, $key, $value, $sum, $rank, $max_dist); my $stars = "*" x 80; # # This sub tries to determine if the client is active or not. Since # NTP is a stateless and connectionless (UDP based) protocol, we can # never know for sure. # # Any client that has polled recently is assumed to be active, # although this problably overcounts the active clients because many # clients, such as those from ntpdate, only give a short burst of # traffic. # # A client that polls once per 2^14 sec (they exist) can have a very low # count. If they use iburst, they could have a count of only 4 or so. # if they don't use iburst, they could have a count of 1. Hence, any # connection with a small count *may* be active. # # By testing this function against live data, I can see that it both # sometimes counts clients as inactive when they are active, and counts # clients as still being active for quite a while after they become inactive. # Overall, it seems to do a pretty good job and the overcounts/undercounts # are not too common and pretty much cancel each other out. I suspect that # there is a slight bias toward overcounting. # sub calc_is_active { my $ip = $_[0]; my $age = $dump_written - $last_tstamp{$ip}; if ( ($count{$ip} > 5 || $rate{$ip} > 256) && $age / 5. < $rate{$ip} ) { # client has poll enough to show a pattern and has miss fewer # than 4 poll intervals. (Or, is rapidly increasing the poll) return 1; } elsif ( $count{$ip} > 1 && $rate{$ip} < 4 && $age < 1024 + 60) { # client may well have used iburst, which will give misleading initial # rate values, but it isn't old enough to toss yet. return 1; } elsif ( $count{$ip} > 1 && $rate{$ip} > 64 && $age < 300 ) { # I'm not sure what kind of clients these are, but they show up. return 1; } elsif ( $age < 60) { # client just polled, assume it is active. return 1; } else { # assume client has stopped polling. return 0; } } # quickly read in the dump data my ($magic, $ver); my $t0 = time(); while ( 1 ) { open(DUMP, "<", $STARTFILE ) or die "Could not open startfile: $STARTFILE"; flock(DUMP,LOCK_SH); ($magic, $ver, $total_count, $dump_created, $dump_written, $cur_rate, $lterm_rate) = split(' ', ); # make sure we don't process a half-written dump (does this still happen??) if ( !defined($magic) || $magic eq "" || !defined($dump_written) ) { flock(DUMP,LOCK_UN); close(DUMP); select(undef, undef, undef, .5); next; } die "Timed out trying to get valid dump data from $STARTFILE" if ( time() - $t0 >= 15 ); die "$STARTFILE is not a dump file created by ntp_clients" if ( !defined( $magic ) || $magic ne $dump_magic ); die "Incorrect dump file version: $ver" if ( !defined( $ver ) || ($ver ne "1" && $ver ne "2") ); $tdiff = time() - $dump_written; last if ( ($tdiff > 0 && $tdiff < 59) || $tdiff < -1 || $tdiff > 70 ); flock(DUMP,LOCK_UN); close(DUMP); if ( $tdiff >= 60 ) { select(undef, undef, undef, .5); } else { select(undef, undef, undef, 2 - $tdiff); } } my @dump = ; flock(DUMP,LOCK_UN); close(DUMP); # process the data $cur_rate = -1 if ($ver eq "1"); $lterm_rate = -1 if ($ver eq "1"); keys( %count ) = $#dump; keys( %rate ) = $#dump; keys( %first_tstamp ) = $#dump; keys( %last_tstamp ) = $#dump; keys( %last_printed ) = $#dump; keys( %is_active ) = $#dump; keys( %is_abusive ) = $#dump; $dump_age = $dump_written - $dump_created; if ( $total_count > 2 ) { $est_lterm_rate = $dump_age / ($total_count - 1); } else { $est_lterm_rate = 99999; } foreach( @dump ) { my ($key, $r_count, $r_rate, $r_first_tstamp, $r_last_tstamp) = split; $count{$key} = $r_count; $rate{$key} = $r_rate; $first_tstamp{$key} = $dump_written - $r_first_tstamp; $last_tstamp{$key} = $dump_written - $r_last_tstamp; $last_printed{$key} = 0; } while (($ip, $value) = each %last_tstamp) { $is_active{$ip} = calc_is_active($ip); if ( $is_active{$ip} ) { $num_active++; $active_count += $count{$ip}; } # If we have a version 1 dump file, try estimating the current bandwidth # this estimate tends to lag the info from ver 2 dumps, and can be # off by quite a bit, especially when there isn't much data yet. if ($rate{$ip} > 0 && $count{$ip} > 5 ) { my $period = 15*60; my $t1 = $first_tstamp{$ip}; if ( $dump_written - $first_tstamp{$ip} > $period ) { $t1 = $dump_written - $period; } my $t0 = $last_tstamp{$ip}; if ( $dump_written - $last_tstamp{$ip} > $period ) { $t0 = $dump_written - $period; } $cur_freq += (($t0 - $t1)/$period) / $rate{$ip}; } else { $not_counted++; } $num_clients++; if ( $count{$ip} < 100 || ($last_tstamp{$ip} - $first_tstamp{$ip}) / ($count{$ip} - 1) > 30 || !$is_active{$ip} ) { $is_abusive{$ip} = 0; } else { $num_abusive++; $is_abusive{$ip} = 1; } } # assume clients that couldn't be counted (new?) are like the rest #$cur_freq += $not_counted * $lterm_freq / $num_clients; # fudge for clients that couldn't be counted (ntpdate? new?) $cur_freq += $not_counted / 2048; $est_cur_rate = 1/$cur_freq; $est_cur_rate = $est_lterm_rate if ( $dump_age < 6*1024 + 60 ); # print the report printf( "Estimated active ntp pool clients: %6d\n", $num_active); printf( "Estimated abusive ntp pool clients: %6d\n", $num_abusive); printf( "Estimated inactive ntp pool clients: %6d\n", $num_clients - $num_active); printf( "Total ntp pool clients being tracked: %6d\n", $num_clients); printf( "Note: NTP is a stateless and connectionless (UDP based) protocol, so\n" ); printf( " exact numbers can't be determined.\n\n" ); if ( $dump_age <= 2048 ) { # need at least two polls from the 1024 before we have good data printf( "** Warning: this dump file is very new and many results will be misleading. **\n" ); } elsif ( $dump_age <= 4096 ) { # this is barely long enough to detect most abusive clients (30s * 100req) # and most of the graphs are still pretty useless. printf( "Note: this dump file is too new to give very accurate results.\n" ); } printf( "%9d ntp requests, in total, have been seen since %s\n", $total_count, strftime( "%D %T", localtime( $dump_created ) ) ); printf( "%9d (%4.1f%%) are from clients that are still active.\n", $active_count, 100 * ($active_count / $total_count) ) if ($total_count > 0 ); sub print_rates { my ($hdr, $rate) = @_; printf( "%s request rate: %.3f seconds between requests (%.2f req/sec)\n", $hdr, $rate, 1/$rate ); printf( "%s bandwidth in: %6.3f KBytes/s %7.3f Kbits/s\n", $hdr, 76./(1024*$rate), 76*8./(1024*$rate) ); printf( "%s bandwidth in: %6.3f GB/month %7.3f Gb/month\n", $hdr, 30*24*60*60*76./(1024*1024*1024*$rate), 30*24*60*60*76*8./(1024*1024*1024*$rate) ); } if ( $dump_age > 12*60*60 ) { if ( $lterm_rate > 0 ) { print_rates( "Long term", $lterm_rate ); } elsif ( $est_lterm_rate > 0 ) { print_rates( "Est Long term", $est_lterm_rate ); } } if ( $cur_rate > 0 ) { print_rates( "Current", $cur_rate ); } elsif ( $est_cur_rate > 0 ) { print_rates( "Est current", $est_cur_rate ); } printf( "(NTP packets are usually 76 bytes, UDP overhead included, in each direction.)\n" ); print "\n"; printf( "The dump file was written %d seconds ago, at %s\n", $tdiff, strftime( "%D %T", localtime( $dump_written ) ) ); exit if ($total_count < 1 ); print "\n"; print "Subnets with many clients:\n"; my (%class_c, %class_c_rate, %class_c_count, %class_c_abusive); keys( %class_c ) = $num_clients; foreach $ip (keys( %last_tstamp )) { my $key = $ip; next if ( ! $is_active{$ip} ); $key =~ s/^([0-9.]*)\.[0-9][0-9]*$/$1/; $class_c{$key}++; $class_c_rate{$key} += 1./$rate{$ip} if ($rate{$ip} > 0); $class_c_count{$key} += $count{$ip} if ($count{$ip} > 0); $class_c_abusive{$key} += $is_abusive{$ip}; } printf "# of Subnet Total Aggregate Abusive\n"; printf " IPs Count Rate Clients\n"; foreach $key (sort { $class_c{$b} <=> $class_c{$a} } keys %class_c) { last if ( $class_c{$key} < 4 ); printf "%4d %-13s", $class_c{$key}, $key . ".x"; if ( defined( $class_c_count{$key} ) ) { printf " %8d", $class_c_count{$key}; } if ( defined( $class_c_rate{$key} ) ) { printf " %9.3f", 1./$class_c_rate{$key}; } if ( defined( $class_c_abusive{$key} ) ) { printf " %7d", $class_c_abusive{$key}; } printf "\n"; } print "\n"; print "Clients with rapid updates (min requests of 100):\n"; my @bad_clients; while (($ip, $value) = each %last_tstamp) { my $is_rapid = $count{$ip} >= 100 && $rate{$ip} <= 60; # printf( "%-15s count: %6d delta: %10.3f rate: %7.2f active: %d\n", # $ip, $count{$ip}, ($last_tstamp{$ip} - $first_tstamp{$ip}), # $rate{$ip}, $is_active{$ip} ) if ($is_abusive{$ip} && !$is_rapid ); next if (!$is_rapid && !$is_abusive{$ip} ); push @bad_clients, $ip } print "Rank First Seen Client IP Requests Rate Usage Cumulative\n"; $sum = 0; $rank = 0; foreach $ip (sort { $count{$b} <=> $count{$a} } @bad_clients) { $rank++; if ( $is_active{$ip} ) { $sum += $count{$ip}; printf( "%3d %s %-15s %7d %8.2f %5.2f%% %5.2f%% *", $rank, strftime( "%D %T", localtime( $first_tstamp{$ip} ) ), $ip, $count{$ip}, $rate{$ip}, 100 * $count{$ip} / $active_count, 100 * $sum / $active_count ); } else { printf( "%3d %s %-15s %7d %8.2f (%5.2f%%)", $rank, strftime( "%D %T", localtime( $first_tstamp{$ip} ) ), $ip, $count{$ip}, $rate{$ip}, 100 * $count{$ip} / ($active_count + $count{$ip} ) ); } print " !" if ($is_abusive{$ip}); print "\n"; last if ( $rank >= 100 ); } print "* = \"active\" = probably will send another request.\n"; print "! = \"abusive\" = min requests of 100 and an average rate of less than 30s\n"; print " between requests over the life of the entire connection.\n"; print "Percentages are based on all packets from *active* clients. If the client\n"; print "is not active, its percentage is what it would be if it were active.\n"; print "\n\n"; print "Clients with very long updates (min requests of 10):\n"; my @great_clients; while (($ip, $value) = each %last_tstamp) { next if ( $count{$ip} < 10 || $rate{$ip} < 3000 ); push @great_clients, $ip } print "Rank First Seen Client IP Requests Rate Usage Cumulative\n"; $sum = 0; $rank = 0; foreach $ip (sort { $rate{$b} <=> $rate{$a} } @great_clients) { $rank++; if ( $is_active{$ip} ) { $sum += $count{$ip}; printf( "%3d %s %-15s %7d %8.2f %5.2f%% %5.2f%% *", $rank, strftime( "%D %T", localtime( $first_tstamp{$ip} ) ), $ip, $count{$ip}, $rate{$ip}, 100 * $count{$ip} / $active_count, 100 * $sum / $active_count ); } else { printf( "%3d %s %-15s %7d %8.2f (%5.2f%%)", $rank, strftime( "%D %T", localtime( $first_tstamp{$ip} ) ), $ip, $count{$ip}, $rate{$ip}, 100 * $count{$ip} / ($active_count + $count{$ip} ) ); } print "\n"; last if ( $rank >= 10 ); } my @client_dist; my ($log_idx, $idx); my $base = log( sqrt(2) ); @client_dist = (); print "\n"; print "Clients distribution by count:\n"; while (($ip, $value) = each %last_tstamp) { if ( $count{$ip} <= 1 ) { $log_idx = 0; } else { $log_idx = log( $count{$ip} )/$base; } $idx = int( $log_idx + 0.5 ); $client_dist[$idx]++; } $max_dist = 0; for( $idx = 0; $idx <= $#client_dist; $idx++ ) { $max_dist = $client_dist[$idx] if ( defined $client_dist[$idx] && $max_dist < $client_dist[$idx] ); } print " bin ( bin range ) count\n"; for( $idx = 0; $idx <= $#client_dist; $idx++ ) { $client_dist[$idx] = 0 if ( ! defined( $client_dist[$idx] ) ); my $low = $idx ? 2**($idx/2 - .25) : 0; my $high = 2**($idx/2 + .25); my $center = 2**($idx/2); my $num_star = 45 * ($client_dist[$idx] / $max_dist); if ( int( $low ) == int( $high ) ) { printf( "Error: value in zero range bin!: %d %7.1f - %7.1f\n", $idx, $low, $high ) if ( $client_dist[$idx] != 0 ); next; } $low++; if ( int( $low ) == int( $high ) ) { $center = $high; } printf( "%7d (%7d - %7d) %5d %.*s\n", $center, $low, $high, $client_dist[$idx], $num_star, $stars ); } @client_dist = (); print "\n"; print "Clients distribution by rate (min requests of 10): \n"; while (($ip, $value) = each %last_tstamp) { next if ( $count{$ip} < 10 ); if ( $rate{$ip} <= 1 ) { $log_idx = 0; } else { $log_idx = log( $rate{$ip} )/$base; } $idx = int( $log_idx + 0.5 ); $client_dist[$idx]++; } $max_dist = 0; for( $idx = 0; $idx <= $#client_dist; $idx++ ) { $max_dist = $client_dist[$idx] if ( defined $client_dist[$idx] && $max_dist < $client_dist[$idx] ); } print " bin ( bin range ) count\n"; for( $idx = 0; $idx <= $#client_dist; $idx++ ) { $client_dist[$idx] = 0 if ( ! defined( $client_dist[$idx] ) ); my $low = $idx ? 2**($idx/2 - .25) : 0; my $high = 2**($idx/2 + .25); my $center = 2**($idx/2); my $num_star = 45 * ($client_dist[$idx] / $max_dist); printf( "%7.1f (%7.1f - %7.1f) %5d %.*s\n", $center, $low, $high, $client_dist[$idx], $num_star, $stars ); } @client_dist = (); print "\n"; print "Clients distribution by amount of time client was active (in hours):\n"; while (($ip, $value) = each %last_tstamp) { my $age = ($last_tstamp{$ip} - $first_tstamp{$ip})/(60*60); if ( $age <= 1 ) { $log_idx = 0; } else { $log_idx = log( $age )/$base; } $idx = int( $log_idx + 0.5 ); $client_dist[$idx]++; } $max_dist = 0; for( $idx = 0; $idx <= $#client_dist; $idx++ ) { $max_dist = $client_dist[$idx] if ( defined $client_dist[$idx] && $max_dist < $client_dist[$idx] ); } print " bin ( bin range ) count\n"; for( $idx = 0; $idx <= $#client_dist; $idx++ ) { $client_dist[$idx] = 0 if ( ! defined( $client_dist[$idx] ) ); my $low = $idx ? 2**($idx/2 - .25) : 0; my $high = 2**($idx/2 + .25); my $center = 2**($idx/2); my $num_star = 45 * ($client_dist[$idx] / $max_dist); printf( "%7.1f (%7.1f - %7.1f) %5d %.*s\n", $center, $low, $high, $client_dist[$idx], $num_star, $stars ); } @client_dist = (); print "\n"; print "Clients distribution by time since first request (in hours):\n"; while (($ip, $value) = each %last_tstamp) { my $age = ($dump_written - $first_tstamp{$ip})/(60*60); # printf( "%-15s %s %s %.2f\n", # $ip, # strftime( "%D %T", localtime( $first_tstamp{$ip} ) ), # strftime( "%D %T", localtime( $last_tstamp{$ip} ) ), # $age ); if ( $age <= 1 ) { $log_idx = 0; } else { $log_idx = log( $age )/$base; } $idx = int( $log_idx + 0.5 ); $client_dist[$idx]++; } $max_dist = 0; for( $idx = 0; $idx <= $#client_dist; $idx++ ) { $max_dist = $client_dist[$idx] if ( defined $client_dist[$idx] && $max_dist < $client_dist[$idx] ); } print " bin ( bin range ) count\n"; for( $idx = 0; $idx <= $#client_dist; $idx++ ) { $client_dist[$idx] = 0 if ( ! defined( $client_dist[$idx] ) ); my $low = $idx ? 2**($idx/2 - .25) : 0; my $high = 2**($idx/2 + .25); my $center = 2**($idx/2); my $num_star = 45 * ($client_dist[$idx] / $max_dist); printf( "%7.1f (%7.1f - %7.1f) %5d %.*s\n", $center, $low, $high, $client_dist[$idx], $num_star, $stars ); } @client_dist = (); print "\n"; print "Active clients distribution by time since last request (in secs):\n"; while (($ip, $value) = each %last_tstamp) { next if ( ! $is_active{$ip} ); my $age = ($dump_written - $last_tstamp{$ip}); if ( $age <= 1 ) { $log_idx = 0; } else { $log_idx = log( $age )/$base; } $idx = int( $log_idx + 0.5 ); $client_dist[$idx]++; } $max_dist = 0; for( $idx = 0; $idx <= $#client_dist; $idx++ ) { $max_dist = $client_dist[$idx] if ( defined $client_dist[$idx] && $max_dist < $client_dist[$idx] ); } print " bin ( bin range ) count\n"; for( $idx = 0; $idx <= $#client_dist; $idx++ ) { $client_dist[$idx] = 0 if ( ! defined( $client_dist[$idx] ) ); my $low = $idx ? 2**($idx/2 - .25) : 0; my $high = 2**($idx/2 + .25); my $center = 2**($idx/2); my $num_star = 45 * ($client_dist[$idx] / $max_dist); printf( "%7.1f (%7.1f - %7.1f) %5d %.*s\n", $center, $low, $high, $client_dist[$idx], $num_star, $stars ); } @client_dist = (); print "\n"; print "Inactive clients distribution by time since last request (in secs):\n"; while (($ip, $value) = each %last_tstamp) { next if ( $is_active{$ip} ); my $age = ($dump_written - $last_tstamp{$ip}); if ( $age <= 1 ) { $log_idx = 0; } else { $log_idx = log( $age )/$base; } $idx = int( $log_idx + 0.5 ); printf( "%-15s inactive %5d %7.2f > %7.2f\n", $ip, $count{$ip}, ($dump_written - $last_tstamp{$ip}) / 10.,$rate{$ip} ) if ( $idx < 10 ); $client_dist[$idx]++; } $max_dist = 0; for( $idx = 0; $idx <= $#client_dist; $idx++ ) { $max_dist = $client_dist[$idx] if ( defined $client_dist[$idx] && $max_dist < $client_dist[$idx] ); } print " bin ( bin range ) count\n"; for( $idx = 0; $idx <= $#client_dist; $idx++ ) { $client_dist[$idx] = 0 if ( ! defined( $client_dist[$idx] ) ); my $low = $idx ? 2**($idx/2 - .25) : 0; my $high = 2**($idx/2 + .25); my $center = 2**($idx/2); my $num_star = 45 * ($client_dist[$idx] / $max_dist); printf( "%7.1f (%7.1f - %7.1f) %5d %.*s\n", $center, $low, $high, $client_dist[$idx], $num_star, $stars ); }