aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/do-ntp-client-update
blob: 100bc6e610b96584440001a80b9a4f1bc2ae8c8a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#!/usr/bin/perl -w

# This script is public domain, there is no copyright on it.
# - Wayne Schlitt

use strict;

use Getopt::Long;
use POSIX qw(strftime);
use Fcntl ':flock';

my $HELP = 0;
my $STARTFILE = "/var/log/ntpstats/ntp_stats.dump";

my $result = GetOptions('help'		=> \$HELP,
			'startfile=s'	=> \$STARTFILE,
		       );

if ($HELP  || !$result) {
  print "Usage: ntp_clients_stats [options]\n";
  print "\n";
  print "      -help	Help on the options.\n";
  print "\n";
  print "      -startfile=/path/dumpfile File to read initial state\n";
  
  exit(0);
}

# pre-allocate the hashes, just to make things a little faster

my (%count, %rate, %first_tstamp, %last_tstamp, %last_printed, %is_active, %is_abusive);

my $dump_magic   = "ntp_stats";
my $dump_version = 1;

my $dump_created = 0;
my $dump_written = 0;
my $dump_age     = 0;

my $total_count  = 0;
my $active_count = 0;
my $cur_rate     = 0;
my $est_cur_rate = 0;
my $lterm_rate   = 0;
my $est_lterm_rate = 0;
my $cur_freq     = 0;
my $not_counted  = 0;
my $num_clients  = 0;
my $num_active   = 0;
my $num_abusive  = 0;
my $last_cleaned = 0;
my $tdiff;

my ($ip, $key, $value, $sum, $rank, $max_dist);
my $stars = "*" x 80;


#
# This sub tries to determine if the client is active or not.  Since
# NTP is a stateless and connectionless (UDP based) protocol, we can
# never know for sure.
#
# Any client that has polled recently is assumed to be active,
# although this problably overcounts the active clients because many
# clients, such as those from ntpdate, only give a short burst of
# traffic.
#
# A client that polls once per 2^14 sec (they exist) can have a very low
# count.  If they use iburst, they could have a count of only 4 or so.
# if they don't use iburst, they could have a count of 1.  Hence, any
# connection with a small count *may* be active.
#
# By testing this function against live data, I can see that it both
# sometimes counts clients as inactive when they are active, and counts
# clients as still being active for quite a while after they become inactive.
# Overall, it seems to do a pretty good job and the overcounts/undercounts
# are not too common and pretty much cancel each other out.  I suspect that
# there is a slight bias toward overcounting.
#
sub calc_is_active {
  my $ip = $_[0];
  my $age = $dump_written - $last_tstamp{$ip};

  if ( ($count{$ip} > 5  ||  $rate{$ip} > 256)  &&  $age / 5. < $rate{$ip} ) {
    # client has poll enough to show a pattern and has miss fewer
    # than 4 poll intervals.  (Or, is rapidly increasing the poll)
    return 1;
  } elsif ( $count{$ip} > 1  &&  $rate{$ip} < 4  &&  $age < 1024 + 60) {
    # client may well have used iburst, which will give misleading initial
    # rate values, but it isn't old enough to toss yet.
    return 1;
  } elsif ( $count{$ip} > 1  &&  $rate{$ip} > 64 &&  $age < 300 ) {
    # I'm not sure what kind of clients these are, but they show up.
    return 1;
  } elsif ( $age < 60) {
    # client just polled, assume it is active.
    return 1;
  } else {
    # assume client has stopped polling.
    return 0;
  }
}

# quickly read in the dump data

my ($magic, $ver);
my $t0 = time();
while ( 1 ) {
  open(DUMP, "<", $STARTFILE ) or die "Could not open startfile: $STARTFILE";
  flock(DUMP,LOCK_SH);

  ($magic, $ver, $total_count, $dump_created, $dump_written, $cur_rate, $lterm_rate) = split(' ', <DUMP>);

  # make sure we don't process a half-written dump  (does this still happen??)
  if ( !defined($magic) || $magic eq "" || !defined($dump_written) ) {
    flock(DUMP,LOCK_UN);
    close(DUMP);
    select(undef, undef, undef, .5);
    next;
  }

  die "Timed out trying to get valid dump data from $STARTFILE" if ( time() - $t0 >= 15 );
  die "$STARTFILE is not a dump file created by ntp_clients" if ( !defined( $magic ) || $magic ne $dump_magic );
  die "Incorrect dump file version: $ver" if ( !defined( $ver ) || ($ver ne "1" && $ver ne "2") );

  $tdiff = time() - $dump_written;
  last if ( ($tdiff > 0  &&  $tdiff < 59)
	    ||  $tdiff < -1  ||  $tdiff > 70 );
  flock(DUMP,LOCK_UN);
  close(DUMP);

  if ( $tdiff >= 60 ) {
    select(undef, undef, undef, .5);
  } else {
    select(undef, undef, undef, 2 - $tdiff);
  }
}

my @dump = <DUMP>;
flock(DUMP,LOCK_UN);
close(DUMP);


# process the data

$cur_rate   = -1 if ($ver eq "1");
$lterm_rate = -1 if ($ver eq "1");

keys( %count )        = $#dump;
keys( %rate )         = $#dump;
keys( %first_tstamp ) = $#dump;
keys( %last_tstamp )  = $#dump;
keys( %last_printed ) = $#dump;
keys( %is_active )    = $#dump;
keys( %is_abusive )   = $#dump;

$dump_age = $dump_written - $dump_created;
if ( $total_count > 2 ) {
  $est_lterm_rate = $dump_age / ($total_count - 1);
} else {
  $est_lterm_rate = 99999;
}

foreach( @dump ) {
  my ($key, $r_count, $r_rate, $r_first_tstamp, $r_last_tstamp) = split;
  $count{$key} = $r_count;
  $rate{$key} = $r_rate;
  $first_tstamp{$key} = $dump_written - $r_first_tstamp;
  $last_tstamp{$key} = $dump_written - $r_last_tstamp;
  $last_printed{$key} = 0;
}

while (($ip, $value) = each %last_tstamp) {

  $is_active{$ip} = calc_is_active($ip);
  if ( $is_active{$ip} ) {
    $num_active++;
    $active_count += $count{$ip};
  }

  # If we have a version 1 dump file, try estimating the current bandwidth
  # this estimate tends to lag the info from ver 2 dumps, and can be
  # off by quite a bit, especially when there isn't much data yet.
  if ($rate{$ip} > 0  &&  $count{$ip} > 5 ) {
    my $period = 15*60;

    my $t1 = $first_tstamp{$ip};
    if ( $dump_written - $first_tstamp{$ip} > $period ) {
      $t1 = $dump_written - $period;
    }

    my $t0 = $last_tstamp{$ip};
    if ( $dump_written - $last_tstamp{$ip} > $period ) {
      $t0 = $dump_written - $period;
    }

    $cur_freq += (($t0 - $t1)/$period) / $rate{$ip};

  } else {
    $not_counted++;
  }

  $num_clients++;

  if ( $count{$ip} < 100
       || ($last_tstamp{$ip} - $first_tstamp{$ip}) / ($count{$ip} - 1) > 30
       || !$is_active{$ip}
     ) {
    $is_abusive{$ip} = 0;
  } else {
    $num_abusive++;
    $is_abusive{$ip} = 1;
  }
}


# assume clients that couldn't be counted (new?) are like the rest
#$cur_freq += $not_counted * $lterm_freq / $num_clients;
# fudge for clients that couldn't be counted (ntpdate? new?) 
$cur_freq += $not_counted / 2048;
$est_cur_rate = 1/$cur_freq;
$est_cur_rate = $est_lterm_rate if ( $dump_age < 6*1024 + 60 );


# print the report

printf("%d:%d", $num_active, $num_abusive);
#printf( "Estimated active ntp pool clients:    %6d\n", $num_active);
#printf( "Estimated abusive ntp pool clients:   %6d\n", $num_abusive);
#printf( "Estimated inactive ntp pool clients:  %6d\n", $num_clients - $num_active);
#printf( "Total ntp pool clients being tracked: %6d\n", $num_clients);