From 66b08dd37ab32fe8f18e50add27af0dd92116e99 Mon Sep 17 00:00:00 2001 From: Emilio Reyes Date: Tue, 6 Apr 2021 20:06:37 -0700 Subject: [PATCH] Fix paging Signed-off-by: Emilio Reyes --- README.md | 5 +++-- build.py | 2 +- src/main/python/github3api/githubapi.py | 27 ++++++++++--------------- src/unittest/python/test_githubapi.py | 17 ++++------------ 4 files changed, 19 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 1c21820..6c6b487 100644 --- a/README.md +++ b/README.md @@ -66,8 +66,9 @@ for repo in client.get('/user/repos', _get='all', _attributes=['full_name']): `GET page` directive - Yield a page from endpoint ```python -for repo in client.get('/user/repos', _get='page'): - print(repo['full_name']) +for page in client.get('/user/repos', _get='page'): + for repo in page: + print(repo['full_name']) ``` ### Projects using `github3api` ### diff --git a/build.py b/build.py index cb06863..17e154f 100644 --- a/build.py +++ b/build.py @@ -30,7 +30,7 @@ authors = [Author('Emilio Reyes', 'emilio.reyes@intel.com')] summary = 'An advanced REST client for the GitHub API' url = 'https://github.com/soda480/github3api' -version = '0.0.8' +version = '0.0.9' default_task = [ 'clean', 'analyze', diff --git a/src/main/python/github3api/githubapi.py b/src/main/python/github3api/githubapi.py index 83237d1..6a1aa16 100644 --- a/src/main/python/github3api/githubapi.py +++ b/src/main/python/github3api/githubapi.py @@ -55,19 +55,15 @@ def get_headers(self, **kwargs): headers['Accept'] = f'application/vnd.github.{self.version}+json' return headers - def _get_next_endpoint(self, link_header): - """ return next endpoint from link header + def _get_next_endpoint(self, url): + """ return next endpoint """ - if not link_header: + if not url: logger.debug('link header is empty') return - regex = fr".*/.*?)>; rel=\"next\".*" - match = re.match(regex, link_header) - if match: - endpoint = match.group('endpoint') - logger.debug(f'found next endpoint in link header: {endpoint}') - return endpoint - logger.debug('next endpoints not found in link header') + endpoint = url.replace(f'https://{self.hostname}', '') + logger.debug(f'next endpoint is: {endpoint}') + return endpoint def _get_all(self, endpoint, **kwargs): """ return all pages from endpoint @@ -75,7 +71,7 @@ def _get_all(self, endpoint, **kwargs): logger.debug(f'get items from: {endpoint}') items = [] while True: - link_header = None + url = None response = super(GitHubAPI, self).get(endpoint, raw_response=True, **kwargs) if response: data = response.json() @@ -83,9 +79,9 @@ def _get_all(self, endpoint, **kwargs): items.extend(response.json()) else: items.append(data) - link_header = response.headers.get('Link') + url = response.links.get('next', {}).get('url') - endpoint = self._get_next_endpoint(link_header) + endpoint = self._get_next_endpoint(url) if not endpoint: logger.debug('no more pages to retrieve') break @@ -97,9 +93,8 @@ def _get_page(self, endpoint, **kwargs): """ while True: response = super(GitHubAPI, self).get(endpoint, raw_response=True, **kwargs) - for page in response.json(): - yield page - endpoint = self._get_next_endpoint(response.headers.get('Link')) + yield response.json() + endpoint = self._get_next_endpoint(response.links.get('next', {}).get('url')) if not endpoint: logger.debug('no more pages') break diff --git a/src/unittest/python/test_githubapi.py b/src/unittest/python/test_githubapi.py index 05158e3..290be4f 100644 --- a/src/unittest/python/test_githubapi.py +++ b/src/unittest/python/test_githubapi.py @@ -176,17 +176,11 @@ def test__get_next_endpoint_Should_ReturnNone_When_NoLinkHeader(self, *patches): def test__get_next_endpoint_Should_ReturnExpected_When_CalledWithNextEndpoint(self, *patches): client = GitHubAPI(bearer_token='bearer-token') - link_header = '; rel="prev", ; rel="next", ; rel="last", ; rel="first"' + link_header = 'https://api.github.com/organizations/27781926/repos?page=4' result = client._get_next_endpoint(link_header) expected_result = '/organizations/27781926/repos?page=4' self.assertEqual(result, expected_result) - def test__get_next_endpoint_Should_ReturnNone_When_NoNextEndpoint(self, *patches): - client = GitHubAPI(bearer_token='bearer-token') - link_header = '; rel="prev", ; rel="first"' - result = client._get_next_endpoint(link_header) - self.assertIsNone(result) - @patch('github3api.GitHubAPI._get_next_endpoint') @patch('github3api.githubapi.RESTclient.get') def test__get_all_Should_ReturnExpected_When_GetReturnsList(self, get_patch, get_next_endpoint_patch, *patches): @@ -264,10 +258,8 @@ def test__get_page_Should_ReturnExpected_When_Called(self, get_patch, get_next_e ] client = GitHubAPI(bearer_token='bearer-token') result = client._get_page('endpoint') - self.assertEqual(next(result), 'page1') - self.assertEqual(next(result), 'page2') - self.assertEqual(next(result), 'page3') - self.assertEqual(next(result), 'page4') + self.assertEqual(next(result), ['page1', 'page2']) + self.assertEqual(next(result), ['page3', 'page4']) with self.assertRaises(StopIteration): next(result) @@ -287,8 +279,7 @@ def test__get_page_Should_ReturnExpected_When_NoEndpoint(self, get_patch, get_ne ] client = GitHubAPI(bearer_token='bearer-token') result = client._get_page('endpoint') - self.assertEqual(next(result), 'page1') - self.assertEqual(next(result), 'page2') + self.assertEqual(next(result), ['page1', 'page2']) with self.assertRaises(StopIteration): next(result)