Merge pull request #31 from unt-libraries/add-token-support

Add token support
unt-libraries · Jul 23, 2019 · 20b2991 · 20b2991
2 parents 1fe354c + 4d6ca9e
commit 20b2991
Show file tree

Hide file tree

Showing 3 changed files with 53 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -24,7 +24,7 @@ That gives you usage instructions:
 
 ```
 usage: wasapi-client [-h] [-b BASE_URI] [-d DESTINATION] [-l LOG] [-n] [-v]
-                     [--profile PROFILE | -u USER]
+                     [--profile PROFILE | -u USER | -t TOKEN]
                      [-c | -m | -p PROCESSES | -s | -r]
                      [--collection COLLECTION [COLLECTION ...]]
                      [--filename FILENAME] [--crawl CRAWL]
@@ -57,6 +57,8 @@ optional arguments:
   -v, --verbose         log verbosely; -v is INFO, -vv is DEBUG
   --profile PROFILE     profile to use for API authentication
   -u USER, --user USER  username for API authentication
+  -t TOKEN, --token TOKEN
+                        token for API authentication
   -c, --count           print number of files for download and exit
   -m, --manifest        generate checksum files only and exit
   -p PROCESSES, --processes PROCESSES
@@ -109,12 +111,26 @@ Order of precedence is command line, environment, config file.
 
 The following command downloads the WARC files available from a crawl
 with `crawl id` 256119 and logs program output to a file named
-`out.log`. Downloads are carried out by one process.
+`out.log`. The program will prompt the user to enter the password for
+user `myusername`. Downloads are carried out by one process.
+
+```
+ $ wasapi-client -u myusername --crawl 256119 --log /tmp/out.log -p 1
+```
+
+The following command downloads similarly, but user credentials are
+supplied by a configuration file.
 
 ```
  $ wasapi-client --profile unt --crawl 256119 --log out.log -p 1
 ```
 
+You may supply an API token instead of user credentials.
+
+```
+ $ wasapi-client --token thisistheAPItokenIwasgiven --crawl 256119 --log out.log -p 1
+```
+
 The following command downloads the WARC files available from crawls
 that occurred in the specified time range. Verbose logging is being
 written to a file named out.log. Downloads are happening via four

diff --git a/tests/test_wasapi_client.py b/tests/test_wasapi_client.py
@@ -98,8 +98,10 @@ def __init__(self):
 class Test_make_session:
     def test_make_session_auth(self):
         auth = ('user', 'pass')
-        session = wc.make_session(auth)
+        headers = {'Authorization': 'Token lalala'}
+        session = wc.make_session(auth, headers)
         assert session.auth == auth
+        assert 'Authorization' in session.headers
 
     def test_make_session_no_auth(self):
         session = wc.make_session(None)

diff --git a/wasapi_client.py b/wasapi_client.py
@@ -73,13 +73,14 @@ class WASAPIManifestError(Exception):
     pass
 
 
-def make_session(auth=None):
+def make_session(auth=None, headers={}):
     """Make a session that will store our auth.
 
     `auth` is a tuple of the form (user, password)
     """
     session = requests.Session()
     session.auth = auth
+    session.headers.update(headers)
     return session
 
 
@@ -100,17 +101,17 @@ def get_webdata(webdata_uri, session):
         sys.exit('Non-JSON response from {}:\n{}'.format(webdata_uri, err))
 
 
-def get_files_count(webdata_uri, auth=None):
+def get_files_count(webdata_uri, auth=None, headers={}):
     """Return total number of downloadable files."""
-    session = make_session(auth)
+    session = make_session(auth, headers)
     webdata = get_webdata(webdata_uri, session)
     session.close()
     return webdata.get('count', None)
 
 
-def get_files_size(page_uri, auth=None):
+def get_files_size(page_uri, auth=None, headers={}):
     """Return total size (bytes) of downloadable files."""
-    session = make_session(auth)
+    session = make_session(auth, headers)
     total = 0
     count = 0
     webdata = None
@@ -145,7 +146,8 @@ class Downloads:
     each available hash algorithm.
     """
 
-    def __init__(self, page_uri, auth=None, download=True, destination=''):
+    def __init__(self, page_uri, auth=None, download=True, destination='',
+                 headers={}):
         self.page_uri = page_uri
         self.auth = auth
         self.download = download
@@ -154,11 +156,12 @@ def __init__(self, page_uri, auth=None, download=True, destination=''):
         self.checksums = defaultdict(list)
         self.urls = []
         self.destination = '' if destination == '.' else destination
+        self.headers = headers
         self.populate_downloads()
 
     def populate_downloads(self):
         """Repeat webdata requests to gather downloadable file info."""
-        session = make_session(self.auth)
+        session = make_session(self.auth, self.headers)
         current_uri = self.page_uri
         while current_uri:
             webdata = get_webdata(current_uri, session)
@@ -333,11 +336,11 @@ class Downloader(multiprocessing.Process):
     """Worker for downloading web files with a persistent session."""
 
     def __init__(self, get_q, result_q, log_q, log_level=logging.ERROR,
-                 auth=None, destination='.', *args, **kwargs):
+                 auth=None, destination='.', headers={}, *args, **kwargs):
         super(Downloader, self).__init__(*args, **kwargs)
         self.get_q = get_q
         self.result_q = result_q
-        self.session = make_session(auth)
+        self.session = make_session(auth, headers)
         self.destination = destination
         configure_worker_logging(log_q, log_level)
 
@@ -430,6 +433,10 @@ def _parse_args(args=sys.argv[1:]):
                             '--user',
                             dest='user',
                             help='username for API authentication')
+    auth_group.add_argument('-t',
+                            '--token',
+                            dest='token',
+                            help='token for API authentication')
 
     out_group = parser.add_mutually_exclusive_group()
     out_group.add_argument('-c',
@@ -569,39 +576,46 @@ def stop_listener_logging():
         query = ''
     webdata_uri = '{}{}'.format(args.base_uri, query)
 
-    # Generate authentication tuple for the API calls.
-    auth = get_credentials(args.user, args.profile)
+    # Set up authentication.
+    auth = None
+    headers = {}
+    if args.token:
+        # Set the HTTP Authentication header.
+        headers['Authorization'] = 'Token {}'.format(args.token)
+    else:
+        # Generate authentication tuple for the API calls.
+        auth = get_credentials(args.user, args.profile)
 
     # If user wants the size, don't download files.
     if args.size:
-        count, size = get_files_size(webdata_uri, auth)
+        count, size = get_files_size(webdata_uri, auth, headers)
         print('Number of Files: ', count)
         print('Size of Files: ', convert_bytes(size))
         sys.exit()
 
     # If user wants a count, don't download files.
     if args.count:
-        print('Number of Files: ', get_files_count(webdata_uri, auth))
+        print('Number of Files: ', get_files_count(webdata_uri, auth, headers))
         sys.exit()
 
     # Process webdata requests to generate checksum files.
     if args.manifest:
         downloads = Downloads(webdata_uri, auth, download=False,
-                              destination=args.destination)
+                              destination=args.destination, headers=headers)
         downloads.generate_manifests()
         sys.exit()
 
     # Print the URLs for files that can be downloaded; don't download them.
     if args.urls:
         downloads = Downloads(webdata_uri, auth, download=False,
-                              destination=args.destination)
+                              destination=args.destination, headers=headers)
         for url in downloads.urls:
             print(url)
         sys.exit()
 
     # Process webdata requests to fill webdata file queue.
     downloads = Downloads(webdata_uri, auth, download=True,
-                          destination=args.destination)
+                          destination=args.destination, headers=headers)
 
     # Write manifest file(s).
     if not args.skip_manifest:
@@ -617,7 +631,8 @@ def stop_listener_logging():
     except NotImplementedError:
         num_processes = args.processes
     for _ in range(num_processes):
-        dp = Downloader(get_q, result_q, log_q, log_level, auth, args.destination)
+        dp = Downloader(get_q, result_q, log_q, log_level, auth,
+                        args.destination, headers=headers)
         dp.start()
         download_processes.append(dp)
     for dp in download_processes: