#!/usr/bin/perl -w 

# by Erik Braun, license CC0

use strict;
use English;
use File::Basename;
use Getopt::Long;
use IPC::Open2; # open2
use File::Fetch;
use IPC::Cmd qw/ can_run /;
use List::MoreUtils qw(uniq);

use Data::Dumper;

use v5.10; # say, when
use experimental 'smartmatch'; # ~~

my $VERSION = '0.03';

my $mirmon_url = 'rsync://comedy.dante.de/MirMon/mirmon.list';
my $sites_url = 'http://mirrors.ctan.org/CTAN.sites';
my $states_url = 'rsync://comedy.dante.de/MirMon/mirmon.state';

my $verbose = 0;
my $max = 20;
my $protocols = 'all';
my $ping_command = 'fping -ae';

main( @ARGV ) unless caller();

sub main {
	Getopt::Long::Configure( 'bundling' );

	GetOptions(
		'mirmon_url|u=s' => \$mirmon_url,
		'states_url|s=s' => \$states_url,
		'max|m=i'        => \$max,
		'protocols|p=s'  => \$protocols,
		'verbose|v'      => \$verbose,
		'help|h'         => sub { usage(); exit },
		'version|V'      => sub { version(); exit },
	);

	my @protocols = set_protocols($protocols);

	my ($mirmon_file, $sites_file, $states_file) = 
			fetch_files($mirmon_url, $sites_url, $states_url);

 	my $mirrors_ref = get_mirrors($mirmon_file); 	

 	set_urls($mirrors_ref, $sites_file);

 	get_times($mirrors_ref);

 	set_states($mirrors_ref, $states_file);
 	
 	print_sorted($mirrors_ref, \@protocols);

	# print Dumper($mirrors_ref);

	exit 0;
}

sub fetch_files {
	my $mirmon_url = shift;
	my $sites_url = shift;
	my $states_url = shift;

	my ($mirmon_file, $sites_file, $states_file);

	my $ff = File::Fetch->new(uri => "$mirmon_url");	
	my $where = $ff->fetch(to => \$mirmon_file) or warn $ff->error;
	die "couldn't load $mirmon_url\n" if not defined $where;

	my $ff2 = File::Fetch->new(uri => "$sites_url");
	$where = $ff2->fetch(to => \$sites_file) or warn $ff2->error;
	die "couldn't load $sites_url\n" if not defined $where;

	my $ff3 = File::Fetch->new(uri => "$states_url");
	$where = $ff3->fetch(to => \$states_file) or warn $ff3->error;
	die "couldn't load $states_url\n" if not defined $where;

	check_prerequisites($ff, $ff2, $ff3);

	return ($mirmon_file, $sites_file, $states_file);
}

sub set_states {
	my $mirrors_ref=shift;	
	my $in = shift;
	my %mirrors=%$mirrors_ref; # Beware! %mirrors is a local copy, its contents not!

	for ( split /\n/, $in) {
		s/^\s+|\s+$//g; 
		if (m!^(\w+://(.*?)/.*?) (\d{10}) (\w+) !) {
			$mirrors{$2}{'age'}=$3;
			$mirrors{$2}{'status'}=$4;			
		} else {
			warn "unknown line in $states_url: $_\n";
		}
	}			

	return;	
}

sub set_urls {
	my $mirrors_ref=shift;	
	my $in = shift;

	for ( split /\n/, $in) {
		s/^\s+|\s+$//g; 
		next unless /URL:/;
		# reject lines with unusal characters
		next unless m![\w.:/-]!;
		if (m!URL: (\w+://(.*?)/.*)!) {
			my $hostname = replace_alias($2);
			$mirrors_ref->{$hostname}{'time'}=-1;
			push @{ $mirrors_ref->{$hostname}{'urls'} }, $1;
		} else {
			warn "unknown line in $sites_url: $_\n";
		}
	}			

	return;
}

sub print_sorted {
	my $mirrors_ref=shift;
	my $protocols_ref=shift;
	my %mirrors=%$mirrors_ref;	
	my @protocols=@$protocols_ref;

	my @hosts;
	my $now = time();

	foreach my $mirror (keys %mirrors) {
		unless ($mirrors{$mirror}{'time'} < 0) {
			push @hosts, $mirror;
		} else {
			# TODO check if these hosts are accessible via the appropriate protocols
			say "either down or blocked ping, removed from list: $mirror" if $verbose;		
		}
	}

	my @sorted_hosts = sort { $mirrors{$a}->{'time'} <=> $mirrors{$b}->{'time'} } @hosts;
	
	say "msec\tStatus\tAge\tMirror";
	my $i=1;

	foreach (@sorted_hosts) {
		my @output = qw//;
		if (defined $mirrors{$_}{'url'}) {

			for my $url (@{$mirrors{$_}{'urls'}}) {
			 	for (@protocols) {
			 		# say "\t$url" if $url =~ m!$_://!;
			 		push @output, $url if $url =~ m!$_://!;
		 		}
			}

			if (@output) {
				my $age = int(($now - $mirrors{$_}{'age'})/3600+0.4);
				print "$mirrors{$_}{'time'}";
				print "\t$mirrors{$_}{'status'}";
				print "\t${age}h";
			}

			# URL from mirmon
			# say "\t$mirrors{$_}{'url'}"; 
		}	else {
			say "Warning: host $_ not in $mirmon_url, but in $sites_url";
		}
		# URLs from CTAN.sites - official and more comprehensive
		say "Warning: URL $mirrors{$_}{'url'} not in $sites_url, but in $mirmon_url" if not defined $mirrors{$_}{'urls'};
		
		my $firstline=1;
		for (@output) {
			print "\t\t" unless $firstline;
			say "\t$_";
			$firstline=0;
		}
		
		next unless @output;
			
		print "\n";
		last if ($i++ == $max);
	}

	return;
}

sub get_times {
	my $mirrors_ref=shift;
	my %mirrors=%$mirrors_ref;

	my @hosts=keys(%mirrors);
	
	print  "probing " . scalar @hosts . " hosts: " if $verbose;

	if ($ping_command =~ /^fping/) {
	 	open2(\*PINGOUT, \*PINGIN, "$ping_command") or die "Can't start $ping_command: $!";
	
		say PINGIN for @hosts;

 		close PINGIN;

 		while (<PINGOUT>) {
			if (/(.*?) \((\d+\.?\d*) ms\)/) {
				say "$2: $1" if $verbose;
				$mirrors_ref->{$1}{'time'} = $2;
			} else {
				print "unknown result from $ping_command: $_";
			}
		}
		close PINGOUT;
	} else {
		# Fallback inetutils-ping / iputils-ping
		foreach my $host (@hosts) {
		 	open2(\*PINGOUT, \*PINGIN, "$ping_command $host") or die "Can't start $ping_command: $!";
		 	while (<PINGOUT>) {		 	
		 		next unless /^64/;	
				if (/time=(\d+\.?\d*) ms/) {
					say "$host: $1" if $verbose;
					$mirrors_ref->{$host}{'time'} = $1;
				} else {
					print "unknown result from $ping_command: $_";
				}
			}
			close PINGOUT;
		 	close PINGIN;
		}
	}

	return; 
}

sub get_mirrors {
	my $in = shift;
	my %mirrors;

	for ( split /\n/, $in) {
		next if /^#/ or /^Root/;
		# reject lines with unusal characters
		next unless m![\w.:/-]!;
		if (m!(\w+://(.*?)/.*)!) {
			$mirrors{$2}{'url'}="$1";
			$mirrors{$2}{'time'}=-1;
		} else {
			warn "unknown line in $mirmon_url: $_\n";
		}
	}

	return \%mirrors;
}

sub set_protocols {
	my $protocols = shift;

	my @in = uniq split/,/, $protocols;

	my @supported = qw/ rsync ftp http https /;
	my @protocols;

	if('all' ~~ @in ) {
		say "It's not useful to use »all« together with other protocols."
			if scalar @in > 1;
		
		return @supported;
	}

	for (@in) {
		if ($_ ~~ @supported) {
			push @protocols, $_;
		} else {
			say "Unknown protocol »$_«";
		}
	}

	return @supported unless @protocols;

	return @protocols;
}

sub replace_alias {
	my $hostname = shift;

	return ('ftp.uni-erlangen.de') if $hostname eq 'ftp.rrze.uni-erlangen.de';
	return ('ctan.cs.uu.nl')  if $hostname eq 'archive.cs.uu.nl';
	return ('ctan.cs.uu.nl')  if $hostname eq 'rsync.cs.uu.nl';
	return ('piotrkosoft.net')  if $hostname eq 'ftp.piotrkosoft.net';
	return ('muug.ca')  if $hostname eq 'ftp.muug.ca';
	return ('ctan.math.utah.edu')  if $hostname eq 'tug.ctan.org';
	return ('vesta.informatik.rwth-aachen.de')  if $hostname eq 'sunsite.informatik.rwth-aachen.de';

	return $hostname;
}

sub check_prerequisites {
	my $ff = shift;
	my $ff2 = shift;
	my $ff3 = shift;
    
	say "Mirmon URL: $mirmon_url\nCTAN sites: $sites_url" if $verbose;
	
	if ($File::Fetch::VERSION < 0.56 and 
		($ff->scheme eq 'https' or $ff2->scheme eq 'https')) {
	    warn <<"END"
WARNING: File::Fetch < 0.56 has a bug in connection with https
You may:
   Change the scheme in the URL to »http« (and leave the work to the webserver)
OR add the line »https  => [ qw|lwp wget curl| ],« to %METHODS in File/Fetch.pm
OR simply update your Perl installation.

END
	}

	if ($ff->scheme eq 'rsync' or $ff2->scheme eq 'rsync') {
		can_run('rsync') or warn <<"END"
»rsync« is not in your PATH. Fetching the list of mirrors may fail.
	
END
	}

	if ($ping_command =~ /^fping/ and not can_run('fping')) {
		$ping_command = 'ping -c 1 -W 1';
		warn <<"END"
»fping« is not in your PATH. On Debian/Ubuntu type: apt install fping
	
Falling back to the MUCH slower installed command »ping« (option -v is 
recommended). This may fail, since there are different implementations.
In case of strange behaviour try to set your locale to POSIX or en_US.

END
	}

	return;
}

sub version {
    my $progname = basename $0;
    
    say "$progname\t$VERSION";

    return;
}


sub usage {
    my $progname = basename $0;

    print <<"END"
This script returns a list of CTAN mirrors sorted by ping time.

Usage: $progname [options]

OPTIONS:
 -m, --max NR     maximum number of listed mirrors (default: $max),
                  set it to »0« for the entire list
 -p  --protocol   (all, rsync, ftp, http, https)  (default: $protocols)
 -v  --verbose    detailed output, can help with debugging
 -h, --help       this help
 -V, --version    print version
END

# Let's hope that rsync://comedy.dante.de/MirMon/ doesn't change
# -u, --url URL    mirror URL in mirmon format
#                  (default: $mirmon_url)
# -s, --states URL states file URL
#                  (default: $states_url)

}

exit;
