Skip to content

Commit

Permalink
Download: add retrieve method
Browse files Browse the repository at this point in the history
  • Loading branch information
stdweird committed Dec 12, 2015
1 parent 3a25691 commit 71656f4
Show file tree
Hide file tree
Showing 4 changed files with 245 additions and 8 deletions.
104 changes: 101 additions & 3 deletions src/main/perl/Download.pm
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ our @EXPORT_OK = qw(set_url_defaults);

# TODO: dependencies on curl and kinit

Readonly my $MAX_RETRIES => 1000;

Readonly::Hash my %DOWNLOAD_METHODS => {
http => [qw(lwp curl)], # try https, if not, try http
https => [qw(lwp curl)], # only https
Expand Down Expand Up @@ -141,15 +143,111 @@ No errors are logged.
=cut

# TODO: e.g.mkdir, check write rights, tempdir, in case there's no intermediate file...
# TODO: e.g. mkdir, check write rights, tempdir, in case there's no intermediate file...

sub prepare_destination
{
my ($self, $destination) = @_;

my ($newdest);
return $destination;
}

=pod
=item download
Download the data from the url(s) to the destination.
In case a retrieval fails, the following url is tried. If there are no more urls to try,
it will reiterate over the original list of urls, and this maximum C<retries> time per url,
with a C<retry_wait> wait interval before each retry.
Returns SUCCESS on succes, undef in case of failure (and sets the C<fail> attribute).
No errors are logged.
=cut

sub download
{
my ($self) = @_;

# in case the prepare_destination failed. fail attribute is set
return if (!defined($self->{destination}));

# in case the parse_urls failed. fail attribute is set
return if (!defined($self->{urls}));


my %tried; # per-url retry counter
my $tries = 0; # total tries

# a weak copy, so we can push/shift without changing original list of urls
my @urls = @{$self->{urls}};

while (@urls) {
my $url = shift @urls;

my $txt = $url->{_string};
my $id = $url->{_id};

$self->verbose("download url $txt (id $id) attempt ",
($tried{$id} || 0) + 1,
" total attempts $tries.");

# TODO: should we really wait if we try another url?
# only wait on actual retry of same url(s)?
# first attempt of first url does not get a retry wait
my $wait = $url->{retry_wait};
if($tries && $wait) {
$self->debug(1, "sleep retry_wait $wait url $txt id $id");
sleep($wait);
};

# loop over auths and methods
# TODO: no waits here?
foreach my $method (@{$url->{method}}) {
foreach my $auth (@{$url->{auth}}) {
return SUCCESS if($self->retrieve($url, $method, $auth));
# TODO: warn or verbose the failures?
}
}

# tried this url
$tried{$id}++;

# if retries is not defined, try forever
# everything is limited by MAX_RETRIES (to avoid infinite loops)
if ((! defined($url->{retries})) || $tried{$id} < $url->{retries}) {
if ($tried{$id} >= $MAX_RETRIES) {
$self->warn("MAX_RETRIES $MAX_RETRIES reached for url $txt (ud $id).");
} else {
push(@urls, $url);
}
} else {
$self->verbose("Not retrying url $txt (id $id) anymore");
}
$tries++;
}

return $self->fail("download failed: no more urls to try (total attempts $tries).");
}

=pod
=item retrieve
Retrieve a single C<$url> using method C<$method> and authentication C<$auth>.
(The C<method> and C<auth> attributes of the url are ignored).
Returns SUCCESS on succes, undef in case of failure (and sets the C<fail> attribute).
No errors are logged.
=cut

return $newdest;
sub retrieve
{
my ($self, $url, $method, $auth) = @_;

return SUCCESS;
}

=pod
Expand Down
27 changes: 26 additions & 1 deletion src/main/perl/Download/URL.pm
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ Readonly::Hash my %URL_DEFAULTS => {
port => undef,
reverse => undef, # reverse proxy (default is false, i.e. forward)
},

};

# create deep copy of defaults as private copy
Expand Down Expand Up @@ -88,6 +89,9 @@ sub _is_valid_url

# 2 levels
foreach my $k (sort keys %$url) {
# ignore keys with starting with _
next if ($k =~ m/^_/);

return if (!exists($URL_DEFAULTS{$k}));
if (ref($url->{$k}) eq 'HASH') {
return if (grep {! exists($URL_DEFAULTS{$k}->{$_})} keys %{$url->{$k}});
Expand All @@ -97,6 +101,23 @@ sub _is_valid_url
return SUCCESS;
}

# Given (valid) $url, return string representation
sub _to_string
{
my $url = shift;

my $txt = join('+',
@{$url->{auth} || []},
@{$url->{method} || []},
$url->{proto},
);
$txt .= "://";
$txt .= $url->{server} || '';
$txt .= $url->{filename} || '';

return $txt;
}

# Merge 2 url hashrefs, the first url is updated in place.
# First checks if both urls are valid urls with the _is_valid_url check.
# If 3rd option update is set, existing key/value are overwritten,
Expand Down Expand Up @@ -223,7 +244,7 @@ Timeout in seconds for initial request which checks for changes/existence.
=item retries
The number retries (default 3).
The number retries (default 3). If undef, retry forever.
=item retry_wait
Expand Down Expand Up @@ -310,6 +331,7 @@ sub parse_urls
my ($self, $urls) = @_;

my @newurls;

foreach my $url (@$urls) {
my $ref = ref($url);
if($ref eq '') {
Expand All @@ -331,6 +353,9 @@ sub parse_urls
return $self->fail("Unable to merge url with url defaults.");
};

$url->{_string} = _to_string($url);
$url->{_id} = scalar(@newurls);

push(@newurls, $url);
}

Expand Down
21 changes: 19 additions & 2 deletions src/test/perl/download-url.t
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ ok(CAF::Download::URL::_is_valid_url({server => 'myserver'}),
ok(CAF::Download::URL::_is_valid_url({krb5 => {realm => 'VALUE'}, server => 'myserver'}),
"valid 2nd level");

# ignore keys with _
ok(CAF::Download::URL::_is_valid_url({krb5 => {realm => 'VALUE'}, server => 'myserver', '_something' => 1}),
"valid 2nd level, ignore _keys");

=item _merge_url
=cut
Expand Down Expand Up @@ -89,6 +93,19 @@ $validurl2b->{server} = 'myserver';

is_deeply($validurl2, $validurl2b, "url2 merged without update");

=item _to_string
=cut

is(CAF::Download::URL::_to_string({
server => 'server',
filename => '/location',
proto => 'proto',
auth => [qw(a b)],
method => [qw(m n)],
}), "a+b+m+n+proto://server/location",
"generate correct string representation of url");

=item set_url_defaults
=cut
Expand Down Expand Up @@ -206,9 +223,9 @@ $d->{fail} = undef;

# 2 valid urls, one string, one hashref
my $current_defaults = set_url_defaults();
my $url1 = {server => 'server1', filename => '/location1', proto => 'https'};
my $url1 = {server => 'server1', filename => '/location1', proto => 'https', '_string' => 'https://server1/location1', '_id' => 0};
my $url2_orig = {server => 'server2', filename => '/location2', proto => 'http'};
my $url2 = {};
my $url2 = {'_string' => 'http://server2/location2', '_id' => 1};
ok(CAF::Download::URL::_merge_url($url2, $url2_orig, 1), "Made a copy of url2_orig");
ok(CAF::Download::URL::_merge_url($url1, $current_defaults, 0), "merged url1 with current defaults");
ok(CAF::Download::URL::_merge_url($url2, $current_defaults, 0), "merged url2 with current defaults");
Expand Down
101 changes: 99 additions & 2 deletions src/test/perl/download.t
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
use strict;
use warnings;

our $sleep = 0;
BEGIN {
*CORE::GLOBAL::sleep = sub { $sleep += shift; };
}

use Test::More;
use Test::Quattor;
use CAF::Download qw(set_url_defaults);
use Test::Quattor::Object;
use Test::MockModule;
use Cwd;

my $obj = Test::Quattor::Object->new();
my $mock = Test::MockModule->new('CAF::Download');

=pod
=head1 SYNOPSIS
Expand All @@ -19,12 +29,12 @@ Test all methods for C<CAF::Download>
=cut


my $d = CAF::Download->new("/tmp/dest", ["http://localhost"]);
my $d = CAF::Download->new("/tmp/dest", ["http://localhost"], log => $obj);
isa_ok($d, 'CAF::Download', 'is a CAF::Download instance');
is($d->{setup}, 1, "default setup is 1");
is($d->{cleanup}, 1, "default cleanup is 1");

$d = CAF::Download->new("/tmp/dest", ["http://localhost"], setup => 0, cleanup => 0);
$d = CAF::Download->new("/tmp/dest", ["http://localhost"], setup => 0, cleanup => 0, log => $obj);
isa_ok($d, 'CAF::Download', 'is a CAF::Download instance');
is($d->{setup}, 0, "setup disabled / set to 0");
is($d->{cleanup}, 0, "cleanup disabled / set to 0");
Expand All @@ -34,6 +44,93 @@ is($d->{cleanup}, 0, "cleanup disabled / set to 0");
=cut

# TODO? what do we support?
# not much, just retrun the input
is_deeply($d->prepare_destination({x => 1}),
{x => 1}, "prepare destination returns argument");

=item download
=cut

# test return undef with empty urls and destination
my $uniq_fail = 'yyz';
$d->{fail} = $uniq_fail;

$d->{destination} = undef;
$d->{urls} = [{}];
ok(defined($d->{urls}), 'urls attribute defined for this test');
ok(! defined($d->download()), 'download with undefined destination returns undef');
is($d->{fail}, $uniq_fail, 'download with undefined destination does not modify fail attribute');

$d->{destination} = '/a/file';
$d->{urls} = undef;
ok(defined($d->{destination}), 'destination attribute defined for this test');
ok(! defined($d->download()), 'download with undefined urls returns undef');
is($d->{fail}, $uniq_fail, 'download with undefined urls does not modify fail attribute');

# return undef with all failures
$d->{urls} = [];
ok(! defined($d->download()), 'download with empty urls returns undef (no more urls to try)');
is($d->{fail}, 'download failed: no more urls to try (total attempts 0).',
'no more urls to try fail message');

# test loops and MAX_RETRIES
my $retrievals = [];
my $success = 3; # success after this number of retrievals
$mock->mock('retrieve', sub {
my ($self, $url, $method, $auth) = @_;
push(@$retrievals, [$url->{_id}, $method, $auth]);
return scalar(@$retrievals) == $success;
});

# simple test
$d->{urls} = [
{auth => ['a'], method => ['m'], retry_wait => 30, retries => 5, _string => 'u1', _id => 0},
];

$retrievals = [];
$sleep = 0;
ok($d->download(), 'download succesful');
is_deeply($retrievals, [
[qw(0 m a)],
[qw(0 m a)],
[qw(0 m a)],
], "tried downloaded urls");
is($sleep, 2*30, 'slept 2*30 seconds due to retry_wait');

$d->{urls} = [
{auth => ['a'], method => ['m'], retry_wait => 30, retries => 5, _string => 'u1', _id => 0},
{auth => ['b', 'c'], method => ['n', 'p'], retry_wait => 20, retries => 25, _string => 'u2', _id => 1},
{auth => ['d'], method => ['o'], _string => 'u3', _id => 2},
];

$success = -1; # fail all the way
$retrievals = [];
$sleep = 0;
ok(! defined($d->download()), 'download failed');
# sleep in total
my $max_retries = 1000;
# 1st url, 5 times minus 1 for very first attempt
# 2nd: 25*20, no sleep due to multiple auth/method
# 3rd: no sleep due to no retry_wait
is($sleep, (5-1)*30 + 25*20, "sleep due to retry_wait");
# total retrievals
# 1st 5 times
# 2nd: 25 times 2 method times 2 auth
# 3rd: no limit, so max_retries
is(scalar(@$retrievals), 5 + 25*2*2 + $max_retries, 'expected number of retrievals');
my @first_retrievals = @{$retrievals}[0..11];
is_deeply(\@first_retrievals, [
[qw(0 m a)],
[qw(1 n b)], [qw(1 n c)], [qw(1 p b)], [qw(1 p c)],
[qw(2 o d)],
[qw(0 m a)],
[qw(1 n b)], [qw(1 n c)], [qw(1 p b)], [qw(1 p c)],
[qw(2 o d)],
], "first 12 (i.e. 2 iterations) tried downloaded urls");

# original list of urls is not modified
is(scalar(@{$d->{urls}}), 3 , 'original list of urls unmodified');

=pod
Expand Down

0 comments on commit 71656f4

Please sign in to comment.