From 71656f4680e9c9e435dcfcb4e1aa044035c93f54 Mon Sep 17 00:00:00 2001
From: stdweird <stijn.deweirdt@ugent.be>
Date: Fri, 18 Sep 2015 09:14:58 +0200
Subject: [PATCH] Download: add retrieve method

---
 src/main/perl/Download.pm     | 104 +++++++++++++++++++++++++++++++++-
 src/main/perl/Download/URL.pm |  27 ++++++++-
 src/test/perl/download-url.t  |  21 ++++++-
 src/test/perl/download.t      | 101 ++++++++++++++++++++++++++++++++-
 4 files changed, 245 insertions(+), 8 deletions(-)
diff --git a/src/main/perl/Download.pm b/src/main/perl/Download.pm
index ec97b1cb..cd8193b3 100644
--- a/src/main/perl/Download.pm
+++ b/src/main/perl/Download.pm
@@ -19,6 +19,8 @@ our @EXPORT_OK = qw(set_url_defaults);
 
 # TODO: dependencies on curl and kinit
 
+Readonly my $MAX_RETRIES => 1000;
+
 Readonly::Hash my %DOWNLOAD_METHODS => {
     http => [qw(lwp curl)], # try https, if not, try http
     https => [qw(lwp curl)], # only https
@@ -141,15 +143,111 @@ No errors are logged.
 
 =cut
 
-# TODO: e.g.mkdir, check write rights, tempdir, in case there's no intermediate file...
+# TODO: e.g. mkdir, check write rights, tempdir, in case there's no intermediate file...
 
 sub prepare_destination
 {
     my ($self, $destination) = @_;
 
-    my ($newdest);
+    return $destination;
+}
+
+=pod
+
+=item download
+
+Download the data from the url(s) to the destination.
+In case a retrieval fails, the following url is tried. If there are no more urls to try,
+it will reiterate over the original list of urls, and this maximum C<retries> time per url,
+with a C<retry_wait> wait interval before each retry.
+
+Returns SUCCESS on succes, undef in case of failure (and sets the C<fail> attribute).
+No errors are logged.
+
+=cut
+
+sub download
+{
+    my ($self) = @_;
+
+    # in case the prepare_destination failed. fail attribute is set
+    return if (!defined($self->{destination}));
+
+    # in case the parse_urls failed. fail attribute is set
+    return if (!defined($self->{urls}));
+
+
+    my %tried; # per-url retry counter
+    my $tries = 0; # total tries
+
+    # a weak copy, so we can push/shift without changing original list of urls
+    my @urls = @{$self->{urls}};
+
+    while (@urls) {
+        my $url = shift @urls;
+
+        my $txt = $url->{_string};
+        my $id = $url->{_id};
+
+        $self->verbose("download url $txt (id $id) attempt ",
+                       ($tried{$id} || 0) + 1,
+                       " total attempts $tries.");
+
+        # TODO: should we really wait if we try another url?
+        #       only wait on actual retry of same url(s)?
+        # first attempt of first url does not get a retry wait
+        my $wait = $url->{retry_wait};
+        if($tries && $wait) {
+            $self->debug(1, "sleep retry_wait $wait url $txt id $id");
+            sleep($wait);
+        };
+
+        # loop over auths and methods
+        # TODO: no waits here?
+        foreach my $method (@{$url->{method}}) {
+            foreach my $auth (@{$url->{auth}}) {
+                return SUCCESS if($self->retrieve($url, $method, $auth));
+                # TODO: warn or verbose the failures?
+            }
+        }
+
+        # tried this url
+        $tried{$id}++;
+
+        # if retries is not defined, try forever
+        # everything is limited by MAX_RETRIES (to avoid infinite loops)
+        if ((! defined($url->{retries})) || $tried{$id} < $url->{retries}) {
+            if ($tried{$id} >= $MAX_RETRIES) {
+                $self->warn("MAX_RETRIES $MAX_RETRIES reached for url $txt (ud $id).");
+            } else {
+                push(@urls, $url);
+            }
+        } else {
+            $self->verbose("Not retrying url $txt (id $id) anymore");
+        }
+        $tries++;
+    }
+
+    return $self->fail("download failed: no more urls to try (total attempts $tries).");
+}
+
+=pod
+
+=item retrieve
+
+Retrieve a single C<$url> using method C<$method> and authentication C<$auth>.
+(The C<method> and C<auth> attributes of the url are ignored).
+
+Returns SUCCESS on succes, undef in case of failure (and sets the C<fail> attribute).
+No errors are logged.
+
+=cut
 
-    return $newdest;
+sub retrieve
+{
+    my ($self, $url, $method, $auth) = @_;
+
+    return SUCCESS;
 }
 
 =pod
diff --git a/src/main/perl/Download/URL.pm b/src/main/perl/Download/URL.pm
index 49cb52c7..71398a41 100644
--- a/src/main/perl/Download/URL.pm
+++ b/src/main/perl/Download/URL.pm
@@ -51,6 +51,7 @@ Readonly::Hash my %URL_DEFAULTS => {
         port => undef,
         reverse => undef, # reverse proxy (default is false, i.e. forward)
     },
+
 };
 
 # create deep copy of defaults as private copy
@@ -88,6 +89,9 @@ sub _is_valid_url
 
     # 2 levels
     foreach my $k (sort keys %$url) {
+        # ignore keys with starting with _
+        next if ($k =~ m/^_/);
+
         return if (!exists($URL_DEFAULTS{$k}));
         if (ref($url->{$k}) eq 'HASH') {
             return if (grep {! exists($URL_DEFAULTS{$k}->{$_})} keys %{$url->{$k}});
@@ -97,6 +101,23 @@ sub _is_valid_url
     return SUCCESS;
 }
 
+# Given (valid) $url, return string representation
+sub _to_string
+{
+    my $url = shift;
+
+    my $txt = join('+',
+                   @{$url->{auth} || []},
+                   @{$url->{method} || []},
+                   $url->{proto},
+        );
+    $txt .= "://";
+    $txt .= $url->{server} || '';
+    $txt .= $url->{filename} || '';
+
+    return $txt;
+}
+
 # Merge 2 url hashrefs, the first url is updated in place.
 # First checks if both urls are valid urls with the _is_valid_url check.
 # If 3rd option update is set, existing key/value are overwritten,
@@ -223,7 +244,7 @@ Timeout in seconds for initial request which checks for changes/existence.
 
 =item retries
 
-The number retries (default 3).
+The number retries (default 3). If undef, retry forever.
 
 =item retry_wait
 
@@ -310,6 +331,7 @@ sub parse_urls
     my ($self, $urls) = @_;
 
     my @newurls;
+
     foreach my $url (@$urls) {
         my $ref = ref($url);
         if($ref eq '') {
@@ -331,6 +353,9 @@ sub parse_urls
             return $self->fail("Unable to merge url with url defaults.");
         };
 
+        $url->{_string} = _to_string($url);
+        $url->{_id} = scalar(@newurls);
+
         push(@newurls, $url);
     }
 
diff --git a/src/test/perl/download-url.t b/src/test/perl/download-url.t
index 0d587bd9..5a0fe558 100644
--- a/src/test/perl/download-url.t
+++ b/src/test/perl/download-url.t
@@ -30,6 +30,10 @@ ok(CAF::Download::URL::_is_valid_url({server => 'myserver'}),
 ok(CAF::Download::URL::_is_valid_url({krb5 => {realm => 'VALUE'}, server => 'myserver'}),
    "valid 2nd level");
 
+# ignore keys with _
+ok(CAF::Download::URL::_is_valid_url({krb5 => {realm => 'VALUE'}, server => 'myserver', '_something' => 1}),
+   "valid 2nd level, ignore _keys");
+
 =item _merge_url
 
 =cut
@@ -89,6 +93,19 @@ $validurl2b->{server} = 'myserver';
 
 is_deeply($validurl2, $validurl2b, "url2 merged without update");
 
+=item _to_string
+
+=cut
+
+is(CAF::Download::URL::_to_string({
+    server => 'server',
+    filename => '/location',
+    proto => 'proto',
+    auth => [qw(a b)],
+    method => [qw(m n)],
+    }), "a+b+m+n+proto://server/location",
+   "generate correct string representation of url");
+
 =item set_url_defaults
 
 =cut
@@ -206,9 +223,9 @@ $d->{fail} = undef;
 
 # 2 valid urls, one string, one hashref
 my $current_defaults = set_url_defaults();
-my $url1 = {server => 'server1', filename => '/location1', proto => 'https'};
+my $url1 = {server => 'server1', filename => '/location1', proto => 'https', '_string' => 'https://server1/location1', '_id' => 0};
 my $url2_orig = {server => 'server2', filename => '/location2', proto => 'http'};
-my $url2 = {};
+my $url2 = {'_string' => 'http://server2/location2', '_id' => 1};
 ok(CAF::Download::URL::_merge_url($url2, $url2_orig, 1), "Made a copy of url2_orig");
 ok(CAF::Download::URL::_merge_url($url1, $current_defaults, 0), "merged url1 with current defaults");
 ok(CAF::Download::URL::_merge_url($url2, $current_defaults, 0), "merged url2 with current defaults");
diff --git a/src/test/perl/download.t b/src/test/perl/download.t
index 6c2111b4..c00e9a20 100644
--- a/src/test/perl/download.t
+++ b/src/test/perl/download.t
@@ -1,11 +1,21 @@
 use strict;
 use warnings;
+
+our $sleep = 0;
+BEGIN {
+    *CORE::GLOBAL::sleep = sub { $sleep += shift; };
+}
+
 use Test::More;
 use Test::Quattor;
 use CAF::Download qw(set_url_defaults);
+use Test::Quattor::Object;
 use Test::MockModule;
 use Cwd;
 
+my $obj = Test::Quattor::Object->new();
+my $mock = Test::MockModule->new('CAF::Download');
+
 =pod
 
 =head1 SYNOPSIS
@@ -19,12 +29,12 @@ Test all methods for C<CAF::Download>
 =cut
 
 
-my $d = CAF::Download->new("/tmp/dest", ["http://localhost"]);
+my $d = CAF::Download->new("/tmp/dest", ["http://localhost"], log => $obj);
 isa_ok($d, 'CAF::Download', 'is a CAF::Download instance');
 is($d->{setup}, 1, "default setup is 1");
 is($d->{cleanup}, 1, "default cleanup is 1");
 
-$d = CAF::Download->new("/tmp/dest", ["http://localhost"], setup => 0, cleanup => 0);
+$d = CAF::Download->new("/tmp/dest", ["http://localhost"], setup => 0, cleanup => 0, log => $obj);
 isa_ok($d, 'CAF::Download', 'is a CAF::Download instance');
 is($d->{setup}, 0, "setup disabled / set to 0");
 is($d->{cleanup}, 0, "cleanup disabled / set to 0");
@@ -34,6 +44,93 @@ is($d->{cleanup}, 0, "cleanup disabled / set to 0");
 =cut
 
 # TODO? what do we support?
+# not much, just retrun the input
+is_deeply($d->prepare_destination({x => 1}),
+          {x => 1}, "prepare destination returns argument");
+
+=item download
+
+=cut
+
+# test return undef with empty urls and destination
+my $uniq_fail = 'yyz';
+$d->{fail} = $uniq_fail;
+
+$d->{destination} = undef;
+$d->{urls} = [{}];
+ok(defined($d->{urls}), 'urls attribute defined for this test');
+ok(! defined($d->download()), 'download with undefined destination returns undef');
+is($d->{fail}, $uniq_fail, 'download with undefined destination does not modify fail attribute');
+
+$d->{destination} = '/a/file';
+$d->{urls} = undef;
+ok(defined($d->{destination}), 'destination attribute defined for this test');
+ok(! defined($d->download()), 'download with undefined urls returns undef');
+is($d->{fail}, $uniq_fail, 'download with undefined urls does not modify fail attribute');
+
+# return undef with all failures
+$d->{urls} = [];
+ok(! defined($d->download()), 'download with empty urls returns undef (no more urls to try)');
+is($d->{fail}, 'download failed: no more urls to try (total attempts 0).',
+   'no more urls to try fail message');
+
+# test loops and MAX_RETRIES
+my $retrievals = [];
+my $success = 3; # success after this number of retrievals
+$mock->mock('retrieve', sub {
+    my ($self, $url, $method, $auth) = @_;
+    push(@$retrievals, [$url->{_id}, $method, $auth]);
+    return scalar(@$retrievals) == $success;
+});
+
+# simple test
+$d->{urls} = [
+    {auth => ['a'], method => ['m'], retry_wait => 30, retries => 5, _string => 'u1', _id => 0},
+];
+
+$retrievals = [];
+$sleep = 0;
+ok($d->download(), 'download succesful');
+is_deeply($retrievals, [
+              [qw(0 m a)],
+              [qw(0 m a)],
+              [qw(0 m a)],
+          ], "tried downloaded urls");
+is($sleep, 2*30, 'slept 2*30 seconds due to retry_wait');
+
+$d->{urls} = [
+    {auth => ['a'], method => ['m'], retry_wait => 30, retries => 5, _string => 'u1', _id => 0},
+    {auth => ['b', 'c'], method => ['n', 'p'], retry_wait => 20, retries => 25, _string => 'u2', _id => 1},
+    {auth => ['d'], method => ['o'], _string => 'u3', _id => 2},
+];
+
+$success = -1; # fail all the way
+$retrievals = [];
+$sleep = 0;
+ok(! defined($d->download()), 'download failed');
+# sleep in total
+my $max_retries = 1000;
+# 1st url, 5 times minus 1 for very first attempt
+# 2nd: 25*20, no sleep due to multiple auth/method
+# 3rd: no sleep due to no retry_wait
+is($sleep, (5-1)*30 + 25*20, "sleep due to retry_wait");
+# total retrievals
+# 1st 5 times
+# 2nd: 25 times 2 method times 2 auth
+# 3rd: no limit, so max_retries
+is(scalar(@$retrievals), 5 + 25*2*2 + $max_retries, 'expected number of retrievals');
+my @first_retrievals = @{$retrievals}[0..11];
+is_deeply(\@first_retrievals, [
+              [qw(0 m a)],
+              [qw(1 n b)], [qw(1 n c)], [qw(1 p b)], [qw(1 p c)],
+              [qw(2 o d)],
+              [qw(0 m a)],
+              [qw(1 n b)], [qw(1 n c)], [qw(1 p b)], [qw(1 p c)],
+              [qw(2 o d)],
+          ], "first 12 (i.e. 2 iterations) tried downloaded urls");
+
+# original list of urls is not modified
+is(scalar(@{$d->{urls}}), 3 , 'original list of urls unmodified');
 
 =pod