-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathimport-gcis
executable file
·128 lines (111 loc) · 3.82 KB
/
import-gcis
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env perl
use Mojo::UserAgent;
use File::Temp;
use Sys::Hostname qw/hostname/;
use Smart::Comments;
use Time::Duration qw/duration/;
use v5.14;
use lib '.';
use Virtuoso;
$ENV{MOJO_MAX_MESSAGE_SIZE} = 52428800;
# config file looks like this :
# { user => 'x', pass => 'y' }
#
my $config = "$ENV{HOME}/etc/isql.conf";
my $conf = require $config;
my $pw = $conf->{pass};
my $user = $conf->{user};
my $hostname = hostname();
my $url_base =
$hostname =~ /gcis-dev-rdf/ ? q[https://data-dev.globalchange.gov]
: $hostname =~ /gcis-test-rdf/ ? q[https://data-stage.globalchange.gov]
: $hostname =~ /gcis-prod-rdf/ ? q[https://data.globalchange.gov]
: die "no host for $hostname"
;
my $ua = Mojo::UserAgent->new()->max_redirects(3);
my $v = Virtuoso->new(
user => $user,
pass => $pw,
graph_uri => q[https://tmp.data.globalchange.gov]
);
&main;
sub info($) {
my $msg = shift;
say sprintf('%30s: %s',scalar localtime, $msg);
$v->logger->info($msg);
}
sub debug {
my $msg = shift;
$v->logger->info($msg);
}
sub get_to_tmp {
my $path = shift or die "no path";
my $query = shift || {};
#info $path;
my $url = Mojo::URL->new($url_base)->path($path.'.ttl');
$url->query(%$query);
my $tx = $ua->get($url);
my $res = $tx->success or do { warn "Error getting $url : " . $tx->error; return undef; };
my $tmp = File::Temp->new;
$res->content->asset->move_to("$tmp") or die $!;
return $tmp;
}
sub get_json {
my $path = shift;
my $query = shift || {};
my $url = Mojo::URL->new($url_base)->path($path.'.json');
$url->query(%$query);
my $tx = $ua->get($url);
my $res = $tx->success or do { warn "Error getting $url : " . $tx->error; return undef; };
my $json = $res->json or die $url.' : '.$res->message.' : '.$res->body;
return $json;
}
sub main {
my $start = time;
my $count = 0;
info "getting from $url_base";
$v->drop_graph;
$v->drop_graph('https://tmp.data.globalchange.gov');
for my $resource (qw/lexicon article journal book webpage region person organization dataset activity platform instrument model project scenario activity image role_type generic/) {
info $resource;
my $instances = get_json("/$resource", { all => 1 });
for my $instance (@$instances) { ### importing-->[%] done
debug $instance->{uri};
$v->load_ttl_file(get_to_tmp("$instance->{uri}", { show_all => 1 }));
$count++;
}
}
# Instrument instances.
my $platforms = get_json("/platform", { all => 1 });
for my $platform (@$platforms) {
my $instances = get_json("/platform/$platform->{identifier}/instrument");
for my $instance (@$instances) {
$v->load_ttl_file( get_to_tmp($instance->{uri}, { show_all => 1 }) );
$count++;
}
}
my $json = get_json("/report", { all => 1 });
for my $report (@$json) {
$v->load_ttl_file( get_to_tmp("/report/$report->{identifier}", { show_all => 1 }) ) or next;
$count++;
my $id = $report->{identifier};
info "report $id";
for my $resource (qw/figure table finding chapter reference/) {
my $items = get_json("/report/$id/$resource", { all => 1 });
info $resource if @$items;
for my $item (@$items) { ### importing--->[%] done
debug $item->{uri};
$v->load_ttl_file(get_to_tmp("$item->{uri}", { show_all => 1 }));
$count++;
}
}
}
info "Renaming";
$v->drop_graph('https://data.globalchange.gov');
$v->rename_graph_to('https://data.globalchange.gov');
say "time elapsed : ".duration(time - $start);
say "ttl files loaded : ".$count;
say "removing logfile ".$v->logger->path;
sleep 3;
unlink $v->logger->path;
}