From 71656f4680e9c9e435dcfcb4e1aa044035c93f54 Mon Sep 17 00:00:00 2001 From: stdweird Date: Fri, 18 Sep 2015 09:14:58 +0200 Subject: [PATCH] Download: add retrieve method --- src/main/perl/Download.pm | 104 +++++++++++++++++++++++++++++++++- src/main/perl/Download/URL.pm | 27 ++++++++- src/test/perl/download-url.t | 21 ++++++- src/test/perl/download.t | 101 ++++++++++++++++++++++++++++++++- 4 files changed, 245 insertions(+), 8 deletions(-) diff --git a/src/main/perl/Download.pm b/src/main/perl/Download.pm index ec97b1cb..cd8193b3 100644 --- a/src/main/perl/Download.pm +++ b/src/main/perl/Download.pm @@ -19,6 +19,8 @@ our @EXPORT_OK = qw(set_url_defaults); # TODO: dependencies on curl and kinit +Readonly my $MAX_RETRIES => 1000; + Readonly::Hash my %DOWNLOAD_METHODS => { http => [qw(lwp curl)], # try https, if not, try http https => [qw(lwp curl)], # only https @@ -141,15 +143,111 @@ No errors are logged. =cut -# TODO: e.g.mkdir, check write rights, tempdir, in case there's no intermediate file... +# TODO: e.g. mkdir, check write rights, tempdir, in case there's no intermediate file... sub prepare_destination { my ($self, $destination) = @_; - my ($newdest); + return $destination; +} + +=pod + +=item download + +Download the data from the url(s) to the destination. +In case a retrieval fails, the following url is tried. If there are no more urls to try, +it will reiterate over the original list of urls, and this maximum C time per url, +with a C wait interval before each retry. + +Returns SUCCESS on succes, undef in case of failure (and sets the C attribute). +No errors are logged. + +=cut + +sub download +{ + my ($self) = @_; + + # in case the prepare_destination failed. fail attribute is set + return if (!defined($self->{destination})); + + # in case the parse_urls failed. fail attribute is set + return if (!defined($self->{urls})); + + + my %tried; # per-url retry counter + my $tries = 0; # total tries + + # a weak copy, so we can push/shift without changing original list of urls + my @urls = @{$self->{urls}}; + + while (@urls) { + my $url = shift @urls; + + my $txt = $url->{_string}; + my $id = $url->{_id}; + + $self->verbose("download url $txt (id $id) attempt ", + ($tried{$id} || 0) + 1, + " total attempts $tries."); + + # TODO: should we really wait if we try another url? + # only wait on actual retry of same url(s)? + # first attempt of first url does not get a retry wait + my $wait = $url->{retry_wait}; + if($tries && $wait) { + $self->debug(1, "sleep retry_wait $wait url $txt id $id"); + sleep($wait); + }; + + # loop over auths and methods + # TODO: no waits here? + foreach my $method (@{$url->{method}}) { + foreach my $auth (@{$url->{auth}}) { + return SUCCESS if($self->retrieve($url, $method, $auth)); + # TODO: warn or verbose the failures? + } + } + + # tried this url + $tried{$id}++; + + # if retries is not defined, try forever + # everything is limited by MAX_RETRIES (to avoid infinite loops) + if ((! defined($url->{retries})) || $tried{$id} < $url->{retries}) { + if ($tried{$id} >= $MAX_RETRIES) { + $self->warn("MAX_RETRIES $MAX_RETRIES reached for url $txt (ud $id)."); + } else { + push(@urls, $url); + } + } else { + $self->verbose("Not retrying url $txt (id $id) anymore"); + } + $tries++; + } + + return $self->fail("download failed: no more urls to try (total attempts $tries)."); +} + +=pod + +=item retrieve + +Retrieve a single C<$url> using method C<$method> and authentication C<$auth>. +(The C and C attributes of the url are ignored). + +Returns SUCCESS on succes, undef in case of failure (and sets the C attribute). +No errors are logged. + +=cut - return $newdest; +sub retrieve +{ + my ($self, $url, $method, $auth) = @_; + + return SUCCESS; } =pod diff --git a/src/main/perl/Download/URL.pm b/src/main/perl/Download/URL.pm index 49cb52c7..71398a41 100644 --- a/src/main/perl/Download/URL.pm +++ b/src/main/perl/Download/URL.pm @@ -51,6 +51,7 @@ Readonly::Hash my %URL_DEFAULTS => { port => undef, reverse => undef, # reverse proxy (default is false, i.e. forward) }, + }; # create deep copy of defaults as private copy @@ -88,6 +89,9 @@ sub _is_valid_url # 2 levels foreach my $k (sort keys %$url) { + # ignore keys with starting with _ + next if ($k =~ m/^_/); + return if (!exists($URL_DEFAULTS{$k})); if (ref($url->{$k}) eq 'HASH') { return if (grep {! exists($URL_DEFAULTS{$k}->{$_})} keys %{$url->{$k}}); @@ -97,6 +101,23 @@ sub _is_valid_url return SUCCESS; } +# Given (valid) $url, return string representation +sub _to_string +{ + my $url = shift; + + my $txt = join('+', + @{$url->{auth} || []}, + @{$url->{method} || []}, + $url->{proto}, + ); + $txt .= "://"; + $txt .= $url->{server} || ''; + $txt .= $url->{filename} || ''; + + return $txt; +} + # Merge 2 url hashrefs, the first url is updated in place. # First checks if both urls are valid urls with the _is_valid_url check. # If 3rd option update is set, existing key/value are overwritten, @@ -223,7 +244,7 @@ Timeout in seconds for initial request which checks for changes/existence. =item retries -The number retries (default 3). +The number retries (default 3). If undef, retry forever. =item retry_wait @@ -310,6 +331,7 @@ sub parse_urls my ($self, $urls) = @_; my @newurls; + foreach my $url (@$urls) { my $ref = ref($url); if($ref eq '') { @@ -331,6 +353,9 @@ sub parse_urls return $self->fail("Unable to merge url with url defaults."); }; + $url->{_string} = _to_string($url); + $url->{_id} = scalar(@newurls); + push(@newurls, $url); } diff --git a/src/test/perl/download-url.t b/src/test/perl/download-url.t index 0d587bd9..5a0fe558 100644 --- a/src/test/perl/download-url.t +++ b/src/test/perl/download-url.t @@ -30,6 +30,10 @@ ok(CAF::Download::URL::_is_valid_url({server => 'myserver'}), ok(CAF::Download::URL::_is_valid_url({krb5 => {realm => 'VALUE'}, server => 'myserver'}), "valid 2nd level"); +# ignore keys with _ +ok(CAF::Download::URL::_is_valid_url({krb5 => {realm => 'VALUE'}, server => 'myserver', '_something' => 1}), + "valid 2nd level, ignore _keys"); + =item _merge_url =cut @@ -89,6 +93,19 @@ $validurl2b->{server} = 'myserver'; is_deeply($validurl2, $validurl2b, "url2 merged without update"); +=item _to_string + +=cut + +is(CAF::Download::URL::_to_string({ + server => 'server', + filename => '/location', + proto => 'proto', + auth => [qw(a b)], + method => [qw(m n)], + }), "a+b+m+n+proto://server/location", + "generate correct string representation of url"); + =item set_url_defaults =cut @@ -206,9 +223,9 @@ $d->{fail} = undef; # 2 valid urls, one string, one hashref my $current_defaults = set_url_defaults(); -my $url1 = {server => 'server1', filename => '/location1', proto => 'https'}; +my $url1 = {server => 'server1', filename => '/location1', proto => 'https', '_string' => 'https://server1/location1', '_id' => 0}; my $url2_orig = {server => 'server2', filename => '/location2', proto => 'http'}; -my $url2 = {}; +my $url2 = {'_string' => 'http://server2/location2', '_id' => 1}; ok(CAF::Download::URL::_merge_url($url2, $url2_orig, 1), "Made a copy of url2_orig"); ok(CAF::Download::URL::_merge_url($url1, $current_defaults, 0), "merged url1 with current defaults"); ok(CAF::Download::URL::_merge_url($url2, $current_defaults, 0), "merged url2 with current defaults"); diff --git a/src/test/perl/download.t b/src/test/perl/download.t index 6c2111b4..c00e9a20 100644 --- a/src/test/perl/download.t +++ b/src/test/perl/download.t @@ -1,11 +1,21 @@ use strict; use warnings; + +our $sleep = 0; +BEGIN { + *CORE::GLOBAL::sleep = sub { $sleep += shift; }; +} + use Test::More; use Test::Quattor; use CAF::Download qw(set_url_defaults); +use Test::Quattor::Object; use Test::MockModule; use Cwd; +my $obj = Test::Quattor::Object->new(); +my $mock = Test::MockModule->new('CAF::Download'); + =pod =head1 SYNOPSIS @@ -19,12 +29,12 @@ Test all methods for C =cut -my $d = CAF::Download->new("/tmp/dest", ["http://localhost"]); +my $d = CAF::Download->new("/tmp/dest", ["http://localhost"], log => $obj); isa_ok($d, 'CAF::Download', 'is a CAF::Download instance'); is($d->{setup}, 1, "default setup is 1"); is($d->{cleanup}, 1, "default cleanup is 1"); -$d = CAF::Download->new("/tmp/dest", ["http://localhost"], setup => 0, cleanup => 0); +$d = CAF::Download->new("/tmp/dest", ["http://localhost"], setup => 0, cleanup => 0, log => $obj); isa_ok($d, 'CAF::Download', 'is a CAF::Download instance'); is($d->{setup}, 0, "setup disabled / set to 0"); is($d->{cleanup}, 0, "cleanup disabled / set to 0"); @@ -34,6 +44,93 @@ is($d->{cleanup}, 0, "cleanup disabled / set to 0"); =cut # TODO? what do we support? +# not much, just retrun the input +is_deeply($d->prepare_destination({x => 1}), + {x => 1}, "prepare destination returns argument"); + +=item download + +=cut + +# test return undef with empty urls and destination +my $uniq_fail = 'yyz'; +$d->{fail} = $uniq_fail; + +$d->{destination} = undef; +$d->{urls} = [{}]; +ok(defined($d->{urls}), 'urls attribute defined for this test'); +ok(! defined($d->download()), 'download with undefined destination returns undef'); +is($d->{fail}, $uniq_fail, 'download with undefined destination does not modify fail attribute'); + +$d->{destination} = '/a/file'; +$d->{urls} = undef; +ok(defined($d->{destination}), 'destination attribute defined for this test'); +ok(! defined($d->download()), 'download with undefined urls returns undef'); +is($d->{fail}, $uniq_fail, 'download with undefined urls does not modify fail attribute'); + +# return undef with all failures +$d->{urls} = []; +ok(! defined($d->download()), 'download with empty urls returns undef (no more urls to try)'); +is($d->{fail}, 'download failed: no more urls to try (total attempts 0).', + 'no more urls to try fail message'); + +# test loops and MAX_RETRIES +my $retrievals = []; +my $success = 3; # success after this number of retrievals +$mock->mock('retrieve', sub { + my ($self, $url, $method, $auth) = @_; + push(@$retrievals, [$url->{_id}, $method, $auth]); + return scalar(@$retrievals) == $success; +}); + +# simple test +$d->{urls} = [ + {auth => ['a'], method => ['m'], retry_wait => 30, retries => 5, _string => 'u1', _id => 0}, +]; + +$retrievals = []; +$sleep = 0; +ok($d->download(), 'download succesful'); +is_deeply($retrievals, [ + [qw(0 m a)], + [qw(0 m a)], + [qw(0 m a)], + ], "tried downloaded urls"); +is($sleep, 2*30, 'slept 2*30 seconds due to retry_wait'); + +$d->{urls} = [ + {auth => ['a'], method => ['m'], retry_wait => 30, retries => 5, _string => 'u1', _id => 0}, + {auth => ['b', 'c'], method => ['n', 'p'], retry_wait => 20, retries => 25, _string => 'u2', _id => 1}, + {auth => ['d'], method => ['o'], _string => 'u3', _id => 2}, +]; + +$success = -1; # fail all the way +$retrievals = []; +$sleep = 0; +ok(! defined($d->download()), 'download failed'); +# sleep in total +my $max_retries = 1000; +# 1st url, 5 times minus 1 for very first attempt +# 2nd: 25*20, no sleep due to multiple auth/method +# 3rd: no sleep due to no retry_wait +is($sleep, (5-1)*30 + 25*20, "sleep due to retry_wait"); +# total retrievals +# 1st 5 times +# 2nd: 25 times 2 method times 2 auth +# 3rd: no limit, so max_retries +is(scalar(@$retrievals), 5 + 25*2*2 + $max_retries, 'expected number of retrievals'); +my @first_retrievals = @{$retrievals}[0..11]; +is_deeply(\@first_retrievals, [ + [qw(0 m a)], + [qw(1 n b)], [qw(1 n c)], [qw(1 p b)], [qw(1 p c)], + [qw(2 o d)], + [qw(0 m a)], + [qw(1 n b)], [qw(1 n c)], [qw(1 p b)], [qw(1 p c)], + [qw(2 o d)], + ], "first 12 (i.e. 2 iterations) tried downloaded urls"); + +# original list of urls is not modified +is(scalar(@{$d->{urls}}), 3 , 'original list of urls unmodified'); =pod