From a1f3f8d07826866b788ff08eb280b51d8e2a7e47 Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Sat, 12 Aug 2023 17:24:45 -0500 Subject: [PATCH 01/88] intermediate work on graphical install Signed-off-by: Ulincsys --- Makefile | 4 + augur/api/view/init.py | 5 +- augur/api/view/server/__init__.py | 2 - augur/static/css/first_time.css | 64 +++- augur/templates/first-time-key.j2 | 41 +++ augur/templates/first-time.j2 | 295 +++++++++++------- augur/templates/json/db.json.j2 | 28 ++ .../install/bootstrap}/Environment.py | 2 +- .../install/bootstrap}/ServerThread.py | 2 +- scripts/install/bootstrap/__init__.py | 2 + scripts/install/install.sh | 9 + scripts/install/wizard.py | 223 +++++++++++++ scripts/install/workers.sh | 53 ++-- 13 files changed, 583 insertions(+), 147 deletions(-) create mode 100644 augur/templates/first-time-key.j2 create mode 100644 augur/templates/json/db.json.j2 rename {augur/api/view/server => scripts/install/bootstrap}/Environment.py (96%) rename {augur/api/view/server => scripts/install/bootstrap}/ServerThread.py (96%) create mode 100644 scripts/install/bootstrap/__init__.py create mode 100644 scripts/install/wizard.py diff --git a/Makefile b/Makefile index 26cac178b5..f67aac4676 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ default: @ echo "Installation Commands:" @ echo " install Installs Augur's full stack for production" + @ echo " wizard Install Augur and launch the graphical setup wizard" @ echo " clean Removes potentially troublesome compiled files" @ echo " rebuild Removes build/compiled files & binaries and reinstalls the project" @ echo @@ -34,6 +35,9 @@ default: install: @ ./scripts/install/install.sh dev +wizard: + @ ./scripts/install/install.sh graphical + install-spdx: @ ./scripts/install/install-spdx.sh diff --git a/augur/api/view/init.py b/augur/api/view/init.py index 210dc60e08..383dcc0599 100644 --- a/augur/api/view/init.py +++ b/augur/api/view/init.py @@ -1,12 +1,9 @@ from pathlib import Path -from .server import Environment from augur.application.logs import AugurLogger import logging, secrets, yaml -env = Environment() - # load configuration files and initialize globals -configFile = Path(env.setdefault("CONFIG_LOCATION", "config.yml")) +configFile = Path("config.yml") version = {"major": 0, "minor": 0.1, "series": "Alpha"} diff --git a/augur/api/view/server/__init__.py b/augur/api/view/server/__init__.py index 287457c4fd..222f8c6f48 100644 --- a/augur/api/view/server/__init__.py +++ b/augur/api/view/server/__init__.py @@ -1,4 +1,2 @@ -from .Environment import Environment from .User import User -from .ServerThread import ServerThread from .LoginException import LoginException diff --git a/augur/static/css/first_time.css b/augur/static/css/first_time.css index 12f8ae9f54..f2d4602399 100644 --- a/augur/static/css/first_time.css +++ b/augur/static/css/first_time.css @@ -1,50 +1,102 @@ +:root { + --color-bg: #1A233A; + --color-bg-light: #272E48; + --color-bg-contrast: #646683; + --color-fg: white; + --color-fg-dark: #b0bdd6; + --color-fg-contrast: black; + --color-accent: #6f42c1; + --color-accent-dark: #6134b3; + --color-notice: #00ddff; + --color-notice-contrast: #006979; +} + body{ margin-top:20px; - color: #bcd0f7; - background: #1A233A; + background-color: var(--color-bg); + color: var(--color-fg); } + h1 { font-size: 2rem; } + .sidebar .sidebar-top { margin: 0 0 1rem 0; padding-bottom: 1rem; text-align: center; } + .sidebar .sidebar-top .brand-logo { margin: 0 0 1rem 0; } + .sidebar .sidebar-top .brand-logo img { height: 90px; -webkit-border-radius: 100px; -moz-border-radius: 100px; border-radius: 100px; } + .sidebar .about { margin: 1rem 0 0 0; font-size: 0.8rem; text-align: center; } + +.subtitle { + color: var(--color-fg-dark); + margin-bottom: .5rem; + margin-left: 15px; +} + +.no-margin-bottom { + margin-bottom: 0; +} + .card { - background: #272E48; + background: var(--color-bg-light); -webkit-border-radius: 5px; -moz-border-radius: 5px; border-radius: 5px; border: 0; margin-bottom: 1rem; } + .form-control { border: 1px solid #596280; -webkit-border-radius: 2px; -moz-border-radius: 2px; border-radius: 2px; font-size: .825rem; - background: #1A233A; - color: #bcd0f7; + background: var(--color-bg-light); + color: var(--color-fg); +} + +.input-textbox { + color: var(--color-fg); + background-color: var(--color-bg); + border-color: var(--color-accent-dark); } + +.input-textbox::placeholder { + color: var(--color-fg-dark); +} + +.input-textbox:focus { + color: var(--color-fg); + background-color: var(--color-bg); + border-color: var(--color-accent-dark); +} + +.input-textbox:focus::placeholder { + color: var(--color-fg-dark); +} + .modal-content { - color: black; + color: var(--color-fg-contrast); } + .editor-container { height: 300px !important; } diff --git a/augur/templates/first-time-key.j2 b/augur/templates/first-time-key.j2 new file mode 100644 index 0000000000..372410855f --- /dev/null +++ b/augur/templates/first-time-key.j2 @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + + +
+
+

Enter setup key

+
+ +
+ +
+
+ \ No newline at end of file diff --git a/augur/templates/first-time.j2 b/augur/templates/first-time.j2 index c8eb284da8..180bd92183 100644 --- a/augur/templates/first-time.j2 +++ b/augur/templates/first-time.j2 @@ -1,18 +1,23 @@ {# https://www.bootdey.com/snippets/view/dark-profile-settings #} + - + + - +
{# Start sidebar #} @@ -29,14 +34,17 @@

First Time Setup

Take a moment to create or update the configuration for your instance.


-

Default values are shown. When you are done updating, click the restart button to save the settings and bring the primary server up.

+

Default values are shown. When you are done updating, click the restart button to + save the settings and bring the primary server up.

+

Double-click an empty input field to automatically populate it with the placeholder + value

+ class="btn btn-primary">Check connection
+
+
+
Configuration
+
+
+

Once you've verified and/or applied your database connection settings, click here to continue to the configuration page.

+
+
+ Continue +
+
{#
Gunicorn Settings
@@ -112,7 +124,6 @@
#} - @@ -187,60 +198,63 @@ versions_db_btn = document.getElementById("dbversions_button"); versions_db_btn.addEventListener("click", versions_db); - {# - function submit_form(event) { - // Stop the form from submitting so we can execute a request instead - event.preventDefault(); + + function submit_form(event) { + // Stop the form from submitting so we can execute a request instead + event.preventDefault(); - // Convert the form into a dictionary + // Convert the form into a dictionary - /* For some reason this didn't work here, so I did it another way - const formData = new FormData(event.target); - const formProperties = Object.fromEntries(formData); - */ + /* For some reason this didn't work here, so I did it another way + const formData = new FormData(event.target); + const formProperties = Object.fromEntries(formData); + */ - var elements = event.target.querySelectorAll('input'); - var config = new Object(); + var elements = event.target.querySelectorAll('input'); + var config = new Object(); - elements.forEach((element) => { - if (element.value != "") { - config[element.id] = element.value; - } - }); + elements.forEach((element) => { + if (element.value != "") { + config[element.id] = element.value; + } + }); - // Submit the gunicorn configuration to the server - var xhr = new XMLHttpRequest(); - xhr.open("POST", "{{ url_for('update_gunicorn') }}", true); - xhr.setRequestHeader('Content-Type', 'text/plain'); - xhr.send(editor.getValue()); + {# // Submit the gunicorn configuration to the server + var xhr = new XMLHttpRequest(); + xhr.open("POST", "{{ url_for('update_gunicorn') }}", true); + xhr.setRequestHeader('Content-Type', 'text/plain'); + xhr.send(editor.getValue()); #} - // Submit the form dictionary to the server - xhr = new XMLHttpRequest(); - xhr.open("POST", "{{ url_for('update_config') }}", true); - xhr.setRequestHeader('Content-Type', 'application/json'); - xhr.send(JSON.stringify(config)); + // Submit the form dictionary to the server + xhr = new XMLHttpRequest(); + xhr.open("POST", "{{ url_for('update_db') }}", true); + xhr.setRequestHeader('Content-Type', 'application/json'); + xhr.send(JSON.stringify(config)); - // Check the response - xhr.onreadystatechange = function () { - if (xhr.readyState == XMLHttpRequest.DONE) { - if (xhr.status != 200) { - if (xhr.status) { - // The first time setup server returned an error - displayError(xhr.responseText); - } else { - // The first time setup server did not respond - displayError("The first time setup server is unreachable"); - } + // Check the response + xhr.onreadystatechange = function () { + if (xhr.readyState == XMLHttpRequest.DONE) { + if (xhr.status != 200) { + if (xhr.status) { + // The first time setup server returned an error + displayError(xhr.responseText); } else { - // Display a loading indicator while starting up the production server - loadingModal.show(); - waitToReload(xhr.responseText); + // The first time setup server did not respond + displayError("The first time setup server is unreachable"); } + } else { + // Display a loading indicator while starting up the production server + {# window.location.replace(window.location); #} + {# window.location.reload; #} + loadingModal.show(); + window.location.href = window.location.href; + {# waitToReload(xhr.responseText); #} } } } + } - #} + async function waitToReload(new_url) { // Shut down the temp server and wait for primary to initialize fetch("{{ url_for('shutdown') }}").then(async () => { @@ -278,8 +292,8 @@ } // Have the settings-form call submit_form when submitted - // const loginForm = document.getElementById("settings-form"); - // loginForm.addEventListener("submit", submit_form); + const loginForm = document.getElementById("settings-form"); + loginForm.addEventListener("submit", submit_form); diff --git a/augur/templates/json/essential_config.json.j2 b/augur/templates/json/essential_config.json.j2 new file mode 100644 index 0000000000..8275e572d6 --- /dev/null +++ b/augur/templates/json/essential_config.json.j2 @@ -0,0 +1,33 @@ +{ "title": "Essential Config", + "subtitle": "Please ensure the accuracy of these config items", + "settings": [ + { "id": "AUGUR_GITHUB_API_KEY", + "display_name": "GitHub API Key", + "value": "{{ conf.Keys.github_api_key or "" if conf.Keys else "" }}", + "description": "Your GitHub API Key for use with Augur" + },{ "id": "AUGUR_GITHUB_USERNAME", + "display_name": "GitHub Username", + "value": "{{ gh_name or "" }}", + "description": "The username for the provided API Key" + },{ "id": "AUGUR_GITLAB_API_KEY", + "display_name": "GitLab API Key", + "value": "{{ conf.Keys.gitlab_api_key or "" if conf.Keys else "" }}", + "description": "Your GitLab API Key for use with Augur" + },{ + "id": "AUGUR_GITLAB_USERNAME", + "display_name": "GitLab Username", + "value": "{{ gl_name or "" }}", + "description": "The username for the provided API Key" + },{ + "id": "AUGUR_FACADE_REPO_DIRECTORY", + "display_name": "Facade Directory", + "value": "{{ conf.Facade.repo_directory or "" if conf.Facade else "" }}", + "description": "The directory for use with Facade" + },{ + "id": "RABBITMQ_CONN_STRING", + "display_name": "RabbitMQ Connection String", + "value": "{{ conf.RabbitMQ.connection_string or "" if conf.RabbitMQ else "" }}", + "description": "The connection string to use for RabbitMQ" + } + ] +} \ No newline at end of file diff --git a/scripts/install/config.sh b/scripts/install/config.sh index 5358b911a0..6673accfde 100755 --- a/scripts/install/config.sh +++ b/scripts/install/config.sh @@ -180,7 +180,7 @@ function create_config(){ #Create and cache credentials for github and gitlab touch $facade_repo_directory/.git-credentials - echo "https://$github_username:$github_api_key@github.com" >> $facade_repo_directory/.git-credentials + echo "https://$github_username:$github_api_key@github.com" > $facade_repo_directory/.git-credentials echo "https://$gitlab_username:$gitlab_api_key@gitlab.com" >> $facade_repo_directory/.git-credentials git config --global credential.helper "store --file $facade_repo_directory/.git-credentials" diff --git a/scripts/install/wizard.py b/scripts/install/wizard.py index ac893131b6..c1b89396fe 100644 --- a/scripts/install/wizard.py +++ b/scripts/install/wizard.py @@ -7,13 +7,15 @@ from functools import wraps from pathlib import Path -import threading, json, subprocess +import threading, json, subprocess, re top = Path.cwd() template_dir = top / "augur/templates/" static_dir = top / "augur/static/" dbfile = top / "db.config.json" +config_script = top / "scripts/install/config.sh" + def requires_key(func): global app @wraps(func) @@ -26,6 +28,11 @@ def wrapper(*args, **kwargs): return wrapper +def get_db_config() -> dict[str, dict[str, str]]: + out = subprocess.Popen("augur config get_all_json".split(), text=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + result = out.communicate()[0] + return json.loads(result) + def render_section(template, **kwargs): global app with app.app_context(): @@ -86,7 +93,46 @@ def root(): return render_template("first-time-key.j2") session["key"] = key - return render_template("first-time.j2", sections = sections, version = __version__, gunicorn_placeholder = "") + return render_template("first-time.j2", sections = sections, version = __version__) + + @app.route("/config") + @requires_key + def config(): + sections = [] + config = get_db_config() + facade_dir = "" + + for section_name, section_dict in config.items(): + temp_section = {"title": section_name, "settings": []} + for setting_name, value in section_dict.items(): + temp_section["settings"].append({ + "id": f"{section_name}.{setting_name}", + "display_name": setting_name.replace("_", " ").title(), + "value": value, + "description": "" + }) + + if section_name == "Facade" and setting_name == "repo_directory": + facade_dir = value + + sections.append(temp_section) + + credentials = {} + if facade_dir: + try: + credential_file = Path(facade_dir) / ".git-credentials" + for line in credential_file.read_text().splitlines(): + match = re.match("https://(.*?):(.*?)@(.*?)\\.\\w+", line) + groups = match.groups() + if groups[2] not in credentials: + credentials[groups[2]] = groups[0] + except: + credentials.clear() + + gh_name, gl_name = credentials.get("github"), credentials.get("gitlab") + essential_config = json.loads(render_template("json/essential_config.json.j2", conf=config, gh_name=gh_name, gl_name=gl_name)) + + return render_template("first-time-config.j2", essential_config=essential_config, sections = sections, version = __version__) @app.route("/db/test") @requires_key @@ -113,41 +159,44 @@ def versions_db(): result = out.communicate()[0] return result or "Error" - @app.route("/db/config/load") + @app.route("/db/config/load", methods=["POST"]) @requires_key - def config_db(): - # try: - dbstring = env["AUGUR_DB"] or get_db_string(dbconf) - conn = create_engine(dbstring) - meta = MetaData() - meta.reflect(bind=conn, schema="augur_operations") - config: Table = meta.tables["augur_operations.config"] - session = Session(conn) - result = session.query(config).all() - desired = [] - for row in result: - if row[2] in ["github_api_key", "gitlab_api_key", "repo_directory", "connection_string"]: - print(row) - desired.append(row) + def update_config_db(): + data = request.get_json() - session.close() - return "Success!" + result = subprocess.Popen("augur db create-schema".split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) + result.wait() + + for key, value in data.items(): + env[key] = value + result = subprocess.Popen(f"{config_script}", stdout=subprocess.PIPE, stderr=subprocess.PIPE) + try: + result.wait(10) + except: + return "Timeout reached waiting for database update to complete", 500 + + return "https://www.google.com" + + @app.route("/db/config/download") + @requires_key + def get_config_db(): + out = subprocess.Popen("augur config get_all_json".split(), text=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + result = out.communicate()[0] + return Response(result, + mimetype='application/json', + headers={'Content-Disposition':'attachment;filename=config.json'}) - @app.route("/db/update") + @app.route("/db/update", methods=["GET", "POST"]) @requires_key def update_db(): if dbstring := request.args.get("dbstring"): env["AUGUR_DB"] = dbstring else: - dbconf = { - "user": request.args.get("user"), - "password": request.args.get("password"), - "host": request.args.get("host"), - "port": request.args.get("port"), - "database_name": request.args.get("database_name") - } - + data = request.get_json() + dbconf.update(data) json.dump(dbconf, dbfile.open("w"), indent=4) + sections.clear() + sections.append(render_section("db.json", dbconf=dbconf, subtitle="Updated config")) return redirect(url_for("root")) @@ -155,6 +204,8 @@ def update_db(): @requires_key def shutdown(): # Notify the primary thread that the temp server is going down + global do_continue + do_continue = request.args.get("continue") update_complete.acquire() update_complete.notify_all() update_complete.release() @@ -193,9 +244,13 @@ def shutdown(): print("If you're hosting Augur locally, you can open this link to access the interface:") print(f"http://127.0.0.1:{port}?key={setup_key}") - while first_time(setup_key, port): - global app - del app + first_time(setup_key, port) + + print("First time setup exiting") + + global do_continue + if do_continue: + augur = subprocess.Popen("nohup augur backend start --disable-collection".split()) # if not settings: # # First time setup was aborted, so just quit From 84ed9e8391096d27675a1b4630aef5cc85c40641 Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Tue, 15 Aug 2023 06:21:11 -0500 Subject: [PATCH 03/88] fix not waiting for process completion Signed-off-by: Ulincsys --- augur/templates/first-time-config.j2 | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/augur/templates/first-time-config.j2 b/augur/templates/first-time-config.j2 index 1e7f51c547..06feaefa55 100644 --- a/augur/templates/first-time-config.j2 +++ b/augur/templates/first-time-config.j2 @@ -199,6 +199,8 @@ xhr.setRequestHeader('Content-Type', 'application/json'); xhr.send(JSON.stringify(config)); + loadingModal.show(); + var url; if(event.submitter.id == "Exit") { url = "{{ url_for('shutdown') }}"; @@ -206,11 +208,26 @@ url = "{{ url_for('shutdown', continue = 'True') }}"; } - fetch(url).then(async () => { - modal = document.getElementById("modal-title"); - modal.innerHTML = "Finished" - displayError("The first time setup server has been shut down.") - }) + xhr.onreadystatechange = function () { + if (xhr.readyState == XMLHttpRequest.DONE) { + if (xhr.status != 200) { + if (xhr.status) { + // The first time setup server returned an error + displayError(xhr.responseText); + } else { + // The first time setup server did not respond + displayError("The first time setup server is unreachable"); + } + } else { + // Display a loading indicator while starting up the production server + fetch(url).then(async () => { + modal = document.getElementById("modal-title"); + modal.innerHTML = "Finished" + displayError("The first time setup server has been shut down.") + }) + } + } + } } From 6ca9cb7c82d256ddee6d1b019716df0ac1e9386c Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Tue, 15 Aug 2023 09:45:24 -0500 Subject: [PATCH 04/88] touch-ups Signed-off-by: Ulincsys --- scripts/install/wizard.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/scripts/install/wizard.py b/scripts/install/wizard.py index c1b89396fe..9ae3fb0cdd 100644 --- a/scripts/install/wizard.py +++ b/scripts/install/wizard.py @@ -221,9 +221,7 @@ def shutdown(): update_complete.wait() except KeyboardInterrupt as e: # Shutdown gracefully on interrupt and abort relaunch - global autorestart - if autorestart: - return True + print("Shutting down on keyboard interrupt") except Exception as e: # On an unexpected exception, reraise after shutting down raise e @@ -233,11 +231,8 @@ def shutdown(): if __name__ == "__main__": import sys - autorestart = False - if len(sys.argv) > 1: - autorestart = True port = input("Enter the port to use for the configuration interface [8075]: ") or "8075" - setup_key = "5" #token_hex() + setup_key = token_hex() print("-" * 40) print("The configuration interface is starting up") print("You'll need the following key to unlock the interface:", setup_key) From b17c1a070207b581c1893ab1a7134d2de553b539 Mon Sep 17 00:00:00 2001 From: sgoggins Date: Wed, 16 Aug 2023 16:41:49 -0500 Subject: [PATCH 05/88] commented out unused ML import, and kept the versions open on several others that self resolve based on an end users version of Python3 now. Signed-off-by: sgoggins --- augur/tasks/data_analysis/message_insights/setup.py | 8 ++++---- setup.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/augur/tasks/data_analysis/message_insights/setup.py b/augur/tasks/data_analysis/message_insights/setup.py index 311eb9b6f9..f1ac484fc1 100644 --- a/augur/tasks/data_analysis/message_insights/setup.py +++ b/augur/tasks/data_analysis/message_insights/setup.py @@ -36,10 +36,10 @@ def read(filename): 'nltk==3.6.6', 'pandas==1.3.5', 'emoji==1.2.0', - 'Keras<2.9.0rc0', - 'Keras-Preprocessing==1.1.2', - 'tensorflow==2.8.0', - 'h5py~=3.6.0', + 'Keras', #<2.9.0rc0', + 'Keras-Preprocessing', #==1.1.2', + 'tensorflow', #==2.8.0', + #'h5py~=3.6.0', 'scikit-image==0.19.1', 'joblib==1.0.1', 'xgboost', diff --git a/setup.py b/setup.py index 9177110f17..63d3c1341e 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,7 @@ "partd >= 0.3.10", # 1.3.0 "distributed >= 2021.03.0", # 2022.8.1 "nltk==3.6.6", # 3.7 - "h5py~=3.6.0", # 3.7 + #"h5py~=3.6.0", # 3.7 "scipy==1.7.3", # 1.9.0 "blinker==1.4", # 1.5 "protobuf<3.22", # 4.21.5 From 310cfb349c5ece8c1a2ab077421da4d870fdd835 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Wed, 16 Aug 2023 17:05:27 -0500 Subject: [PATCH 06/88] wizard-docker-image.yml --- .github/workflows/docker-image.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .github/workflows/docker-image.yml diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml new file mode 100644 index 0000000000..d657a63b2d --- /dev/null +++ b/.github/workflows/docker-image.yml @@ -0,0 +1,18 @@ +name: Docker Image CI + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Build the Docker image + run: docker build . --file Dockerfile --tag my-image-name:$(date +%s) From de90e81c6dbc064dfd1d5e94db8b160d50a26821 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Wed, 16 Aug 2023 17:10:36 -0500 Subject: [PATCH 07/88] Update build_docker.yml --- .github/workflows/build_docker.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml index 0cf2441838..2888c065ca 100644 --- a/.github/workflows/build_docker.yml +++ b/.github/workflows/build_docker.yml @@ -2,10 +2,10 @@ name: Build Docker images on: push: branches: - - main + - startup-wizard pull_request: branches: - - main + - startup-wizard release: types: - published @@ -21,7 +21,7 @@ jobs: name: Build image runs-on: ubuntu-latest steps: - - name: Checkout main + - name: Checkout startup-wizard uses: actions/checkout@v2 - name: Run the build run: | From bccc9a4ece107b9d71b7d59e41a23017de37436b Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Tue, 22 Aug 2023 09:57:41 -0500 Subject: [PATCH 08/88] - Add configurable build target with `AUGUR_TARGET` - Add ability to pass flags to `augur backend start` in compose - Add entrypoint for `AUGUR_TARGET=graphical` - Fix metadata import in compose environment - Improve Docker compatibility with startup wizard Signed-off-by: Ulincsys --- docker-compose.yml | 5 ++-- docker/backend/graphical | 46 +++++++++++++++++++++++++++++++++++++ docker/backend/graphical.sh | 14 +++++++++++ scripts/install/wizard.py | 36 ++++++++++++++++++++++++----- 4 files changed, 93 insertions(+), 8 deletions(-) create mode 100644 docker/backend/graphical create mode 100644 docker/backend/graphical.sh diff --git a/docker-compose.yml b/docker-compose.yml index bc8186914a..3ac6b61d98 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,7 +21,7 @@ services: image: augur-new:latest build: context: . - dockerfile: ./docker/backend/Dockerfile + dockerfile: ./docker/backend/${AUGUR_TARGET:-Dockerfile} volumes: - facade:/augur/facade restart: unless-stopped @@ -32,11 +32,12 @@ services: environment: - "AUGUR_DB=postgresql+psycopg2://${AUGUR_DB_USER:-augur}:${AUGUR_DB_PASSWORD:-augur}@augur-db:5432/augur" - "AUGUR_DB_SCHEMA_BUILD=1" + - "AUGUR_FLAGS=$AUGUR_FLAGS" - "AUGUR_GITHUB_API_KEY=${AUGUR_GITHUB_API_KEY}" - "AUGUR_GITLAB_API_KEY=${AUGUR_GITLAB_API_KEY}" - "AUGUR_GITHUB_USERNAME=${AUGUR_GITHUB_USERNAME}" - "AUGUR_GITLAB_USERNAME=${AUGUR_GITLAB_USERNAME}" - - REDIS_CONN_STRING=redis://redis:6379 + - "REDIS_CONN_STRING=redis://redis:6379" depends_on: - augur-db - redis diff --git a/docker/backend/graphical b/docker/backend/graphical new file mode 100644 index 0000000000..a1c6b95d39 --- /dev/null +++ b/docker/backend/graphical @@ -0,0 +1,46 @@ +#SPDX-License-Identifier: MIT +FROM python:3.9-slim-bullseye + +LABEL maintainer="outdoors@acm.org" +LABEL version="0.51.1" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN set -x \ + && apt-get update \ + && apt-get -y install --no-install-recommends \ + git \ + bash \ + curl \ + gcc \ + python3-pip \ + wget \ + postgresql-client \ + && rm -rf /var/lib/apt/lists/* + +EXPOSE 5000 + +WORKDIR /augur +COPY ./README.md . +COPY ./alembic.ini . +COPY ./augur/ augur/ +COPY ./metadata.py . +COPY ./setup.py . +COPY ./scripts/ scripts/ + +#COPY ./docker/backend/docker.config.json . +RUN python3 -m venv /opt/venv + +RUN set -x \ + && /opt/venv/bin/pip install . + +RUN ./scripts/docker/install-workers-deps.sh + +RUN ./scripts/docker/install-go.sh +# RUN ./scripts/install/workers.sh + +RUN mkdir -p repos/ logs/ /augur/facade/ + +COPY ./docker/backend/graphical.sh / +RUN chmod +x /graphical.sh +ENTRYPOINT /graphical.sh diff --git a/docker/backend/graphical.sh b/docker/backend/graphical.sh new file mode 100644 index 0000000000..0cc123acb8 --- /dev/null +++ b/docker/backend/graphical.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SPDX-License-Identifier: MIT +set -e + +source /opt/venv/bin/activate + +export AUGUR_FACADE_REPO_DIRECTORY=/augur/facade/ +export AUGUR_DOCKER_DEPLOY="1" + +echo "Running with $AUGUR_FLAGS" + +# Run the graphical startup wizard at port 5000 +# -u option specifies unbuffered output +python -u ./scripts/install/wizard.py 5000 diff --git a/scripts/install/wizard.py b/scripts/install/wizard.py index 9ae3fb0cdd..8dcd91bcf2 100644 --- a/scripts/install/wizard.py +++ b/scripts/install/wizard.py @@ -2,13 +2,16 @@ from sqlalchemy import MetaData, Table, create_engine from bootstrap import ServerThread, Environment from sqlalchemy.orm import Session -from metadata import __version__ from secrets import token_hex from functools import wraps from pathlib import Path -import threading, json, subprocess, re +import threading, json, subprocess, re, importlib.util, sys + +env = Environment() +# Assuming the script is run from the root project directory +# (IE: where the makefile is located) top = Path.cwd() template_dir = top / "augur/templates/" static_dir = top / "augur/static/" @@ -16,6 +19,17 @@ config_script = top / "scripts/install/config.sh" +print(top) + +if "metadata" not in sys.modules: + # Docker build changes module hierarchy for some reason + spec = importlib.util.spec_from_file_location("metadata", top / "metadata.py") + module = importlib.util.module_from_spec(spec) + sys.modules["metadata"] = module + spec.loader.exec_module(module) + +from metadata import __version__ + def requires_key(func): global app @wraps(func) @@ -45,7 +59,6 @@ def first_time(setup_key, port = 5000): """ Run first time setup for this instance. """ - env = Environment() global app app = Flask(__name__, static_folder=static_dir, template_folder=template_dir) app.secret_key = setup_key @@ -131,6 +144,10 @@ def config(): gh_name, gl_name = credentials.get("github"), credentials.get("gitlab") essential_config = json.loads(render_template("json/essential_config.json.j2", conf=config, gh_name=gh_name, gl_name=gl_name)) + + for setting in essential_config["settings"]: + if env[setting["id"]]: + setting["value"] = env[setting["id"]] return render_template("first-time-config.j2", essential_config=essential_config, sections = sections, version = __version__) @@ -230,8 +247,11 @@ def shutdown(): update_complete.release() if __name__ == "__main__": - import sys - port = input("Enter the port to use for the configuration interface [8075]: ") or "8075" + if len(sys.argv) > 1: + port = sys.argv[1] + else: + port = input("Enter the port to use for the configuration interface [8075]: ") or "8075" + setup_key = token_hex() print("-" * 40) print("The configuration interface is starting up") @@ -245,7 +265,11 @@ def shutdown(): global do_continue if do_continue: - augur = subprocess.Popen("nohup augur backend start --disable-collection".split()) + if env["AUGUR_DOCKER_DEPLOY"]: + augur = subprocess.Popen(f"augur backend start {env['AUGUR_FLAGS'] or ''}".split()) + augur.wait() + else: + subprocess.Popen(f"nohup augur backend start {env['AUGUR_FLAGS'] or ''}".split()) # if not settings: # # First time setup was aborted, so just quit From 416a904b3952005d144ddc2e2a3abd1db8518270 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 22:02:58 +0000 Subject: [PATCH 09/88] Bump dnspython from 2.2.1 to 2.6.1 Bumps [dnspython](https://github.com/rthalley/dnspython) from 2.2.1 to 2.6.1. - [Release notes](https://github.com/rthalley/dnspython/releases) - [Changelog](https://github.com/rthalley/dnspython/blob/main/doc/whatsnew.rst) - [Commits](https://github.com/rthalley/dnspython/compare/v2.2.1...v2.6.1) --- updated-dependencies: - dependency-name: dnspython dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 279496bf82..3400477310 100644 --- a/setup.py +++ b/setup.py @@ -82,7 +82,7 @@ "flower==2.0.1", "tornado==6.3.3", # added because it sometimes errors when tornado is not 6.1 even though nothing we install depends on it "pylint==2.15.5", - "dnspython==2.2.1", + "dnspython==2.6.1", 'Werkzeug~=2.0.0', "pylint==2.15.5", "mdpdf==0.0.18", From 68f7d0692a1a25e4681fc2b4306439c607d929a8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 22:03:04 +0000 Subject: [PATCH 10/88] Bump eventlet from 0.33.3 to 0.35.2 Bumps [eventlet](https://github.com/eventlet/eventlet) from 0.33.3 to 0.35.2. - [Changelog](https://github.com/eventlet/eventlet/blob/master/NEWS) - [Commits](https://github.com/eventlet/eventlet/compare/v0.33.3...v0.35.2) --- updated-dependencies: - dependency-name: eventlet dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 279496bf82..c9ea9a914b 100644 --- a/setup.py +++ b/setup.py @@ -78,7 +78,7 @@ "XlsxWriter==1.3.7", # 3.0.3 "celery==5.2.7", # 5.2.7 "httpx==0.23.0", # 0.23.0 - "eventlet==0.33.3", + "eventlet==0.35.2", "flower==2.0.1", "tornado==6.3.3", # added because it sometimes errors when tornado is not 6.1 even though nothing we install depends on it "pylint==2.15.5", From ab84ce735bfaae44d412d47ce6a13fed55f3dfd9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 17 Apr 2024 01:03:09 +0000 Subject: [PATCH 11/88] Bump gunicorn from 20.1.0 to 22.0.0 Bumps [gunicorn](https://github.com/benoitc/gunicorn) from 20.1.0 to 22.0.0. - [Release notes](https://github.com/benoitc/gunicorn/releases) - [Commits](https://github.com/benoitc/gunicorn/compare/20.1.0...22.0.0) --- updated-dependencies: - dependency-name: gunicorn dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 279496bf82..28a2d5af6b 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ "psycopg2-binary==2.9.3", #2.9.3 what is pscopg-binary 3.0.16 "click==8.0.3", # 8.1.3 "psutil==5.8.0", # 5.9.1 - "gunicorn==20.1.0", # 20.1.0 + "gunicorn==22.0.0", # 20.1.0 "six==1.15.0", # 1.16.0 "bokeh==2.0.2", # 2.4.3 "selenium==3.141.0",# 4.4.3 From fc2e3391e4ac395bf70bf00ff061eb703f8e8cff Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 14 May 2024 00:28:21 +0000 Subject: [PATCH 12/88] Bump flask-cors in /augur/tasks/data_analysis/clustering_worker Bumps [flask-cors](https://github.com/corydolphin/flask-cors) from 3.0.10 to 4.0.1. - [Release notes](https://github.com/corydolphin/flask-cors/releases) - [Changelog](https://github.com/corydolphin/flask-cors/blob/main/CHANGELOG.md) - [Commits](https://github.com/corydolphin/flask-cors/compare/3.0.10...4.0.1) --- updated-dependencies: - dependency-name: flask-cors dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- augur/tasks/data_analysis/clustering_worker/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/data_analysis/clustering_worker/setup.py b/augur/tasks/data_analysis/clustering_worker/setup.py index 78fb0b4b50..fc5d0283d5 100644 --- a/augur/tasks/data_analysis/clustering_worker/setup.py +++ b/augur/tasks/data_analysis/clustering_worker/setup.py @@ -22,7 +22,7 @@ def read(filename): packages=find_packages(), install_requires=[ 'Flask==2.0.2', - 'Flask-Cors==3.0.10', + 'Flask-Cors==4.0.1', 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', 'requests==2.28.0', From abd026be8898f7869c46b0eda3ef227edf08d2f0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 14 May 2024 00:28:22 +0000 Subject: [PATCH 13/88] Bump flask-cors in /augur/tasks/data_analysis/insight_worker Bumps [flask-cors](https://github.com/corydolphin/flask-cors) from 3.0.10 to 4.0.1. - [Release notes](https://github.com/corydolphin/flask-cors/releases) - [Changelog](https://github.com/corydolphin/flask-cors/blob/main/CHANGELOG.md) - [Commits](https://github.com/corydolphin/flask-cors/compare/3.0.10...4.0.1) --- updated-dependencies: - dependency-name: flask-cors dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- augur/tasks/data_analysis/insight_worker/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/data_analysis/insight_worker/setup.py b/augur/tasks/data_analysis/insight_worker/setup.py index 1ee6e8a4bd..aff72965bf 100644 --- a/augur/tasks/data_analysis/insight_worker/setup.py +++ b/augur/tasks/data_analysis/insight_worker/setup.py @@ -23,7 +23,7 @@ def read(filename): packages=find_packages(exclude=('tests',)), install_requires=[ 'Flask==2.0.2', - 'Flask-Cors==3.0.10', + 'Flask-Cors==4.0.1', 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', 'requests==2.28.0', From db15302bd8fbfa64ed41f71bb9443e5b1bb73b86 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 14 May 2024 00:28:22 +0000 Subject: [PATCH 14/88] Bump flask-cors in /augur/tasks/git/util/facade_worker Bumps [flask-cors](https://github.com/corydolphin/flask-cors) from 3.0.10 to 4.0.1. - [Release notes](https://github.com/corydolphin/flask-cors/releases) - [Changelog](https://github.com/corydolphin/flask-cors/blob/main/CHANGELOG.md) - [Commits](https://github.com/corydolphin/flask-cors/compare/3.0.10...4.0.1) --- updated-dependencies: - dependency-name: flask-cors dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- augur/tasks/git/util/facade_worker/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/git/util/facade_worker/setup.py b/augur/tasks/git/util/facade_worker/setup.py index 298baff49d..fa38cf0759 100644 --- a/augur/tasks/git/util/facade_worker/setup.py +++ b/augur/tasks/git/util/facade_worker/setup.py @@ -23,7 +23,7 @@ def read(filename): packages=find_packages(exclude=('tests',)), install_requires=[ 'Flask==2.0.2', - 'Flask-Cors==3.0.10', + 'Flask-Cors==4.0.1', 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', 'requests==2.28.0', From b25c6af2056ed3b6eb46b14b25bd83445729042d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 14 May 2024 00:28:22 +0000 Subject: [PATCH 15/88] Bump flask-cors Bumps [flask-cors](https://github.com/corydolphin/flask-cors) from 3.0.10 to 4.0.1. - [Release notes](https://github.com/corydolphin/flask-cors/releases) - [Changelog](https://github.com/corydolphin/flask-cors/blob/main/CHANGELOG.md) - [Commits](https://github.com/corydolphin/flask-cors/compare/3.0.10...4.0.1) --- updated-dependencies: - dependency-name: flask-cors dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- augur/tasks/data_analysis/pull_request_analysis_worker/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py index 3341f24ff1..d8a8f7e063 100644 --- a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py +++ b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py @@ -22,7 +22,7 @@ def read(filename): packages=find_packages(), install_requires=[ 'Flask==2.0.2', - 'Flask-Cors==3.0.10', + 'Flask-Cors==4.0.1', 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', 'requests==2.28.0', From 51ef7dc290cc69d4d1ca86bededcdcf0f4ac3152 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 14 May 2024 00:28:22 +0000 Subject: [PATCH 16/88] Bump flask-cors in /augur/tasks/data_analysis/contributor_breadth_worker Bumps [flask-cors](https://github.com/corydolphin/flask-cors) from 3.0.10 to 4.0.1. - [Release notes](https://github.com/corydolphin/flask-cors/releases) - [Changelog](https://github.com/corydolphin/flask-cors/blob/main/CHANGELOG.md) - [Commits](https://github.com/corydolphin/flask-cors/compare/3.0.10...4.0.1) --- updated-dependencies: - dependency-name: flask-cors dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- augur/tasks/data_analysis/contributor_breadth_worker/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/data_analysis/contributor_breadth_worker/setup.py b/augur/tasks/data_analysis/contributor_breadth_worker/setup.py index 86052e164c..4d40fe423d 100644 --- a/augur/tasks/data_analysis/contributor_breadth_worker/setup.py +++ b/augur/tasks/data_analysis/contributor_breadth_worker/setup.py @@ -23,7 +23,7 @@ def read(filename): packages=find_packages(), install_requires=[ 'Flask==2.0.2', - 'Flask-Cors==3.0.10', + 'Flask-Cors==4.0.1', 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', 'requests==2.28.0', From 2d26ed4a92273c4598b149a4d027f597102e6abd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 14 May 2024 00:28:22 +0000 Subject: [PATCH 17/88] Bump flask-cors in /augur/tasks/data_analysis/discourse_analysis Bumps [flask-cors](https://github.com/corydolphin/flask-cors) from 3.0.10 to 4.0.1. - [Release notes](https://github.com/corydolphin/flask-cors/releases) - [Changelog](https://github.com/corydolphin/flask-cors/blob/main/CHANGELOG.md) - [Commits](https://github.com/corydolphin/flask-cors/compare/3.0.10...4.0.1) --- updated-dependencies: - dependency-name: flask-cors dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- augur/tasks/data_analysis/discourse_analysis/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/data_analysis/discourse_analysis/setup.py b/augur/tasks/data_analysis/discourse_analysis/setup.py index 37d6557ec5..7e678936c6 100644 --- a/augur/tasks/data_analysis/discourse_analysis/setup.py +++ b/augur/tasks/data_analysis/discourse_analysis/setup.py @@ -22,7 +22,7 @@ def read(filename): packages=find_packages(), install_requires=[ 'Flask==2.0.2', - 'Flask-Cors==3.0.10', + 'Flask-Cors==4.0.1', 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', 'requests==2.28.0', From bcdb1b6e0aa4a8b950d2f29476ed9223b529b99f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 14 May 2024 00:28:22 +0000 Subject: [PATCH 18/88] Bump flask-cors in /augur/tasks/data_analysis/message_insights Bumps [flask-cors](https://github.com/corydolphin/flask-cors) from 3.0.10 to 4.0.1. - [Release notes](https://github.com/corydolphin/flask-cors/releases) - [Changelog](https://github.com/corydolphin/flask-cors/blob/main/CHANGELOG.md) - [Commits](https://github.com/corydolphin/flask-cors/compare/3.0.10...4.0.1) --- updated-dependencies: - dependency-name: flask-cors dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- augur/tasks/data_analysis/message_insights/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/data_analysis/message_insights/setup.py b/augur/tasks/data_analysis/message_insights/setup.py index a4f6a30c43..17a42f5940 100644 --- a/augur/tasks/data_analysis/message_insights/setup.py +++ b/augur/tasks/data_analysis/message_insights/setup.py @@ -24,7 +24,7 @@ def read(filename): packages=find_packages(), install_requires=[ 'Flask==2.0.2', - 'Flask-Cors==3.0.10', + 'Flask-Cors==4.0.1', 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', 'requests==2.28.0', From 768c340b4f00674463b3ab7510c7fda77d08ccf2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 14 May 2024 00:28:23 +0000 Subject: [PATCH 19/88] Bump flask-cors from 3.0.10 to 4.0.1 Bumps [flask-cors](https://github.com/corydolphin/flask-cors) from 3.0.10 to 4.0.1. - [Release notes](https://github.com/corydolphin/flask-cors/releases) - [Changelog](https://github.com/corydolphin/flask-cors/blob/main/CHANGELOG.md) - [Commits](https://github.com/corydolphin/flask-cors/compare/3.0.10...4.0.1) --- updated-dependencies: - dependency-name: flask-cors dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 279496bf82..ba23b307cb 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ "itsdangerous==2.0.1", # 2.1.2 'Jinja2~=3.0.3', "Flask==2.0.2", # 2.2.2 - "Flask-Cors==3.0.10", + "Flask-Cors==4.0.1", "Flask-Login==0.5.0", "Flask-WTF==1.0.0", "pandas==1.5.3", # 1.4.3 From 9167238fe6954459e4346135191f026645f572b3 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Thu, 16 May 2024 21:35:33 -0500 Subject: [PATCH 20/88] Optimize secondary task recollection Signed-off-by: Andrew Brain --- augur/application/db/lib.py | 17 ++++++++++- augur/tasks/data_analysis/__init__.py | 2 +- augur/tasks/git/facade_tasks.py | 2 +- .../pull_requests/commits_model/core.py | 30 ++++++++++++------- .../pull_requests/commits_model/tasks.py | 4 +-- .../github/pull_requests/files_model/core.py | 30 ++++++++++++------- .../github/pull_requests/files_model/tasks.py | 4 +-- augur/tasks/github/pull_requests/tasks.py | 12 ++++++-- augur/tasks/start_tasks.py | 26 ++++++++-------- augur/tasks/util/collection_util.py | 12 +++++--- 10 files changed, 90 insertions(+), 49 deletions(-) diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index c1da707dbf..88ff5169b7 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -1,7 +1,7 @@ import sqlalchemy as s import logging from typing import List, Any, Optional -from augur.application.db.models import Config +from augur.application.db.models import Config, CollectionStatus, PullRequest from augur.application.db import get_session from augur.application.db.util import execute_session_query @@ -95,3 +95,18 @@ def get_value(section_name: str, setting_name: str) -> Optional[Any]: setting_dict = convert_type_of_value(setting_dict, logger) return setting_dict["value"] + + +def get_secondary_data_last_collected(repo_id): + + with get_session() as session: + try: + return session.query(CollectionStatus).filter(CollectionStatus.repo_id == repo_id).one().secondary_data_last_collected + except s.orm.exc.NoResultFound: + return None + +def get_updated_prs(since): + + with get_session() as session: + return session.query(PullRequest).filter(PullRequest.pr_updated_at >= since).order_by(PullRequest.pr_src_number).all() + diff --git a/augur/tasks/data_analysis/__init__.py b/augur/tasks/data_analysis/__init__.py index b600bcac77..70b156140d 100644 --- a/augur/tasks/data_analysis/__init__.py +++ b/augur/tasks/data_analysis/__init__.py @@ -1,7 +1,7 @@ from celery import chain import logging -def machine_learning_phase(repo_git): +def machine_learning_phase(repo_git, full_collection): from augur.tasks.data_analysis.clustering_worker.tasks import clustering_task from augur.tasks.data_analysis.discourse_analysis.tasks import discourse_analysis_task from augur.tasks.data_analysis.insight_worker.tasks import insight_task diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index b96c596bc7..4fde5ea352 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -465,7 +465,7 @@ def generate_contributor_sequence(logger,repo_git, session): return insert_facade_contributors.si(repo_id) -def facade_phase(repo_git): +def facade_phase(repo_git, full_collection): logger = logging.getLogger(facade_phase.__name__) logger.info("Generating facade sequence") with FacadeSession(logger) as session: diff --git a/augur/tasks/github/pull_requests/commits_model/core.py b/augur/tasks/github/pull_requests/commits_model/core.py index ea91a597da..cbdae74ba6 100644 --- a/augur/tasks/github/pull_requests/commits_model/core.py +++ b/augur/tasks/github/pull_requests/commits_model/core.py @@ -3,20 +3,28 @@ from augur.application.db.models import * from augur.tasks.github.util.util import get_owner_repo from augur.application.db.util import execute_session_query +from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs -def pull_request_commits_model(repo_id,logger, augur_db, key_auth): +def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collection=False): - # query existing PRs and the respective url we will append the commits url to - pr_url_sql = s.sql.text(""" - SELECT DISTINCT pr_url, pull_requests.pull_request_id - FROM pull_requests--, pull_request_meta - WHERE repo_id = :repo_id - """).bindparams(repo_id=repo_id) - pr_urls = [] - #pd.read_sql(pr_number_sql, self.db, params={}) - - pr_urls = augur_db.fetchall_data_from_sql_text(pr_url_sql)#session.execute_sql(pr_number_sql).fetchall() + if full_collection: + # query existing PRs and the respective url we will append the commits url to + pr_url_sql = s.sql.text(""" + SELECT DISTINCT pr_url, pull_requests.pull_request_id + FROM pull_requests--, pull_request_meta + WHERE repo_id = :repo_id + """).bindparams(repo_id=repo_id) + pr_urls = [] + #pd.read_sql(pr_number_sql, self.db, params={}) + + pr_urls = augur_db.fetchall_data_from_sql_text(pr_url_sql)#session.execute_sql(pr_number_sql).fetchall() + + else: + last_collected = get_secondary_data_last_collected(repo_id).date() + prs = get_updated_prs(last_collected) + pr_urls = [pr.pr_url for pr in prs] + query = augur_db.session.query(Repo).filter(Repo.repo_id == repo_id) repo = execute_session_query(query, 'one') diff --git a/augur/tasks/github/pull_requests/commits_model/tasks.py b/augur/tasks/github/pull_requests/commits_model/tasks.py index f0a065bdd1..4fa555b5f0 100644 --- a/augur/tasks/github/pull_requests/commits_model/tasks.py +++ b/augur/tasks/github/pull_requests/commits_model/tasks.py @@ -7,7 +7,7 @@ @celery.task(base=AugurSecondaryRepoCollectionTask) -def process_pull_request_commits(repo_git: str) -> None: +def process_pull_request_commits(repo_git: str, full_collection: bool) -> None: logger = logging.getLogger(process_pull_request_commits.__name__) @@ -18,4 +18,4 @@ def process_pull_request_commits(repo_git: str) -> None: query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git) repo = execute_session_query(query, 'one') - pull_request_commits_model(repo.repo_id, logger, augur_db, manifest.key_auth) + pull_request_commits_model(repo.repo_id, logger, augur_db, manifest.key_auth, full_collection) diff --git a/augur/tasks/github/pull_requests/files_model/core.py b/augur/tasks/github/pull_requests/files_model/core.py index 138aa61cb3..49aa6c3bba 100644 --- a/augur/tasks/github/pull_requests/files_model/core.py +++ b/augur/tasks/github/pull_requests/files_model/core.py @@ -3,20 +3,28 @@ from augur.application.db.models import * from augur.tasks.github.util.util import get_owner_repo from augur.application.db.util import execute_session_query +from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs -def pull_request_files_model(repo_id,logger, augur_db, key_auth): + +def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection=False): - # query existing PRs and the respective url we will append the commits url to - pr_number_sql = s.sql.text(""" - SELECT DISTINCT pr_src_number as pr_src_number, pull_requests.pull_request_id - FROM pull_requests--, pull_request_meta - WHERE repo_id = :repo_id - """).bindparams(repo_id=repo_id) - pr_numbers = [] - #pd.read_sql(pr_number_sql, self.db, params={}) + if full_collection: + # query existing PRs and the respective url we will append the commits url to + pr_number_sql = s.sql.text(""" + SELECT DISTINCT pr_src_number as pr_src_number, pull_requests.pull_request_id + FROM pull_requests--, pull_request_meta + WHERE repo_id = :repo_id + """).bindparams(repo_id=repo_id) + pr_numbers = [] + #pd.read_sql(pr_number_sql, self.db, params={}) + + result = augur_db.execute_sql(pr_number_sql)#.fetchall() + pr_numbers = [dict(row) for row in result.mappings()] - result = augur_db.execute_sql(pr_number_sql)#.fetchall() - pr_numbers = [dict(row) for row in result.mappings()] + else: + last_collected = get_secondary_data_last_collected(repo_id).date() + prs = get_updated_prs(last_collected) + pr_numbers = [pr.pr_src_number for pr in prs] query = augur_db.session.query(Repo).filter(Repo.repo_id == repo_id) repo = execute_session_query(query, 'one') diff --git a/augur/tasks/github/pull_requests/files_model/tasks.py b/augur/tasks/github/pull_requests/files_model/tasks.py index 988261f6c8..be75c88a9d 100644 --- a/augur/tasks/github/pull_requests/files_model/tasks.py +++ b/augur/tasks/github/pull_requests/files_model/tasks.py @@ -6,7 +6,7 @@ from augur.application.db.util import execute_session_query @celery.task(base=AugurSecondaryRepoCollectionTask) -def process_pull_request_files(repo_git: str) -> None: +def process_pull_request_files(repo_git: str, full_collection: bool) -> None: logger = logging.getLogger(process_pull_request_files.__name__) @@ -15,4 +15,4 @@ def process_pull_request_files(repo_git: str) -> None: query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git) repo = execute_session_query(query, 'one') - pull_request_files_model(repo.repo_id, logger, augur_db, manifest.key_auth) \ No newline at end of file + pull_request_files_model(repo.repo_id, logger, augur_db, manifest.key_auth, full_collection) \ No newline at end of file diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 69e40f6818..8557249911 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -11,6 +11,7 @@ from augur.application.db.models import PullRequest, Message, PullRequestReview, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestAssignee, PullRequestReviewMessageRef, Contributor, Repo from augur.application.db.util import execute_session_query from ..messages.tasks import process_github_comment_contributors +from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs platform_id = 1 @@ -317,7 +318,7 @@ def collect_pull_request_review_comments(repo_git: str) -> None: @celery.task(base=AugurSecondaryRepoCollectionTask) -def collect_pull_request_reviews(repo_git: str) -> None: +def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: logger = logging.getLogger(collect_pull_request_reviews.__name__) @@ -334,8 +335,13 @@ def collect_pull_request_reviews(repo_git: str) -> None: query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git) repo_id = execute_session_query(query, 'one').repo_id - query = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).order_by(PullRequest.pr_src_number) - prs = execute_session_query(query, 'all') + if full_collection: + + query = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).order_by(PullRequest.pr_src_number) + prs = execute_session_query(query, 'all') + else: + last_collected = get_secondary_data_last_collected(repo_id).date() + prs = get_updated_prs(last_collected) pr_count = len(prs) diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 866b7a0288..c0dcb1f81a 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -42,13 +42,13 @@ """ #Prelim phases are used to detect if where the repo has hosted has moved or not. -def prelim_phase(repo_git): +def prelim_phase(repo_git, full_collection): logger = logging.getLogger(prelim_phase.__name__) return detect_github_repo_move_core.si(repo_git) -def prelim_phase_secondary(repo_git): +def prelim_phase_secondary(repo_git, full_collection): logger = logging.getLogger(prelim_phase.__name__) return detect_github_repo_move_secondary.si(repo_git) @@ -56,7 +56,7 @@ def prelim_phase_secondary(repo_git): #This is the phase that defines the message for core augur collection #A chain is needed for each repo. -def primary_repo_collect_phase(repo_git): +def primary_repo_collect_phase(repo_git, full_collection): logger = logging.getLogger(primary_repo_collect_phase.__name__) @@ -85,7 +85,7 @@ def primary_repo_collect_phase(repo_git): return repo_task_group -def primary_repo_collect_phase_gitlab(repo_git): +def primary_repo_collect_phase_gitlab(repo_git, full_collection): logger = logging.getLogger(primary_repo_collect_phase_gitlab.__name__) @@ -109,13 +109,13 @@ def primary_repo_collect_phase_gitlab(repo_git): #This phase creates the message for secondary collection tasks. #These are less important and have their own worker. -def secondary_repo_collect_phase(repo_git): +def secondary_repo_collect_phase(repo_git, full_collection): logger = logging.getLogger(secondary_repo_collect_phase.__name__) repo_task_group = group( - process_pull_request_files.si(repo_git), - process_pull_request_commits.si(repo_git), - chain(collect_pull_request_reviews.si(repo_git), collect_pull_request_review_comments.si(repo_git)), + process_pull_request_files.si(repo_git, full_collection), + process_pull_request_commits.si(repo_git, full_collection), + chain(collect_pull_request_reviews.si(repo_git, full_collection), collect_pull_request_review_comments.si(repo_git)), process_ossf_dependency_metrics.si(repo_git) ) @@ -166,7 +166,7 @@ def build_primary_repo_collect_request(session,enabled_phase_names, days_until_c primary_gitlab_enabled_phases.append(primary_repo_collect_phase_gitlab) #task success is scheduled no matter what the config says. - def core_task_success_util_gen(repo_git): + def core_task_success_util_gen(repo_git, full_collection): return core_task_success_util.si(repo_git) primary_enabled_phases.append(core_task_success_util_gen) @@ -186,7 +186,7 @@ def build_secondary_repo_collect_request(session,enabled_phase_names, days_until secondary_enabled_phases.append(secondary_repo_collect_phase) - def secondary_task_success_util_gen(repo_git): + def secondary_task_success_util_gen(repo_git, full_collection): return secondary_task_success_util.si(repo_git) secondary_enabled_phases.append(secondary_task_success_util_gen) @@ -202,12 +202,12 @@ def build_facade_repo_collect_request(session,enabled_phase_names, days_until_co facade_enabled_phases.append(facade_phase) - def facade_task_success_util_gen(repo_git): + def facade_task_success_util_gen(repo_git, full_collection): return facade_task_success_util.si(repo_git) facade_enabled_phases.append(facade_task_success_util_gen) - def facade_task_update_weight_util_gen(repo_git): + def facade_task_update_weight_util_gen(repo_git, full_collection): return git_update_commit_count_weight.si(repo_git) facade_enabled_phases.append(facade_task_update_weight_util_gen) @@ -222,7 +222,7 @@ def build_ml_repo_collect_request(session,enabled_phase_names, days_until_collec ml_enabled_phases.append(machine_learning_phase) - def ml_task_success_util_gen(repo_git): + def ml_task_success_util_gen(repo_git, full_collection): return ml_task_success_util.si(repo_git) ml_enabled_phases.append(ml_task_success_util_gen) diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py index 3561b19b40..70955de1ed 100644 --- a/augur/tasks/util/collection_util.py +++ b/augur/tasks/util/collection_util.py @@ -154,7 +154,9 @@ def get_valid_repos(self,session): if limit <= 0: return - collection_list = get_valid_repos_for_users(session,limit,tuple(quarter_list),hook=self.name, days_to_wait_until_next_collection=self.days_until_collect_again) + repo_git_list = get_valid_repos_for_users(session,limit,tuple(quarter_list),hook=self.name, days_to_wait_until_next_collection=self.days_until_collect_again) + + collection_list = [tuple(repo_git, True) for repo_git in repo_git_list] self.repo_list.extend(collection_list) #Update limit with amount of repos started @@ -180,7 +182,9 @@ def get_valid_repos(self,session): #only start repos older than the specified amount of days #Query a set of valid repositories sorted by weight, also making sure that the repos aren't new or errored #Order by the relevant weight for the collection hook - collection_list = get_valid_repos_for_users(session,limit,tuple(quarter_list),allow_old_repos=True,hook=self.name, days_to_wait_until_next_collection=self.days_until_collect_again) + repo_git_list = get_valid_repos_for_users(session,limit,tuple(quarter_list),allow_old_repos=True,hook=self.name, days_to_wait_until_next_collection=self.days_until_collect_again) + + collection_list = [tuple(repo_git, False) for repo_git in repo_git_list] self.repo_list.extend(collection_list) limit -= len(collection_list) @@ -572,8 +576,8 @@ def send_messages(self): for col_hook in self.collection_hooks: self.logger.info(f"Starting collection on {len(col_hook.repo_list)} {col_hook.name} repos") - - for repo_git in col_hook.repo_list: + + for repo_git, full_collection in col_hook.repo_list: repo = self.session.query(Repo).filter(Repo.repo_git == repo_git).one() if "github" in repo.repo_git: From 0e47f00cb92945e16dd94867d990c3d42af24a9c Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Thu, 16 May 2024 21:37:05 -0500 Subject: [PATCH 21/88] Pass full collection flag to phases Signed-off-by: Andrew Brain --- augur/tasks/util/collection_util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py index 70955de1ed..fe08c0f8c1 100644 --- a/augur/tasks/util/collection_util.py +++ b/augur/tasks/util/collection_util.py @@ -585,7 +585,7 @@ def send_messages(self): for job in col_hook.phases: #Add the phase to the sequence in order as a celery task. #The preliminary task creates the larger task chain - augur_collection_sequence.append(job(repo_git)) + augur_collection_sequence.append(job(repo_git, full_collection)) #augur_collection_sequence.append(core_task_success_util.si(repo_git)) #Link all phases in a chain and send to celery @@ -603,7 +603,7 @@ def send_messages(self): for job in col_hook.gitlab_phases: #Add the phase to the sequence in order as a celery task. #The preliminary task creates the larger task chain - augur_collection_sequence.append(job(repo_git)) + augur_collection_sequence.append(job(repo_git, full_collection)) #augur_collection_sequence.append(core_task_success_util.si(repo_git)) #Link all phases in a chain and send to celery From 4d0463301a9bc89649548ca97790f4da7b9a4e1b Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 17 May 2024 12:21:49 -0500 Subject: [PATCH 22/88] Filter updated prs by repo_id Signed-off-by: Andrew Brain --- augur/application/db/lib.py | 4 ++-- augur/tasks/github/pull_requests/commits_model/core.py | 2 +- augur/tasks/github/pull_requests/files_model/core.py | 2 +- augur/tasks/github/pull_requests/tasks.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index 88ff5169b7..04ca54c2ee 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -105,8 +105,8 @@ def get_secondary_data_last_collected(repo_id): except s.orm.exc.NoResultFound: return None -def get_updated_prs(since): +def get_updated_prs(repo_id, since): with get_session() as session: - return session.query(PullRequest).filter(PullRequest.pr_updated_at >= since).order_by(PullRequest.pr_src_number).all() + return session.query(PullRequest).filter(PullRequest.repo_id == repo_id, PullRequest.pr_updated_at >= since).order_by(PullRequest.pr_src_number).all() diff --git a/augur/tasks/github/pull_requests/commits_model/core.py b/augur/tasks/github/pull_requests/commits_model/core.py index cbdae74ba6..73945a59c5 100644 --- a/augur/tasks/github/pull_requests/commits_model/core.py +++ b/augur/tasks/github/pull_requests/commits_model/core.py @@ -22,7 +22,7 @@ def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collecti else: last_collected = get_secondary_data_last_collected(repo_id).date() - prs = get_updated_prs(last_collected) + prs = get_updated_prs(repo_id, last_collected) pr_urls = [pr.pr_url for pr in prs] diff --git a/augur/tasks/github/pull_requests/files_model/core.py b/augur/tasks/github/pull_requests/files_model/core.py index 49aa6c3bba..5966acc9cc 100644 --- a/augur/tasks/github/pull_requests/files_model/core.py +++ b/augur/tasks/github/pull_requests/files_model/core.py @@ -23,7 +23,7 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection else: last_collected = get_secondary_data_last_collected(repo_id).date() - prs = get_updated_prs(last_collected) + prs = get_updated_prs(repo_id, last_collected) pr_numbers = [pr.pr_src_number for pr in prs] query = augur_db.session.query(Repo).filter(Repo.repo_id == repo_id) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 8557249911..57aca4a89a 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -341,7 +341,7 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: prs = execute_session_query(query, 'all') else: last_collected = get_secondary_data_last_collected(repo_id).date() - prs = get_updated_prs(last_collected) + prs = get_updated_prs(repo_id, last_collected) pr_count = len(prs) From 354a8334b69ccfaba8387d14d42baaa0da361e6b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 21 May 2024 02:40:50 +0000 Subject: [PATCH 23/88] --- updated-dependencies: - dependency-name: requests dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 279496bf82..7030fcc5e7 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ "Flask-WTF==1.0.0", "pandas==1.5.3", # 1.4.3 "numpy==1.26.0", # 1.23.2 - "requests==2.28.0", # 2.28.1 + "requests==2.32.0", # 2.28.1 "psycopg2-binary==2.9.3", #2.9.3 what is pscopg-binary 3.0.16 "click==8.0.3", # 8.1.3 "psutil==5.8.0", # 5.9.1 From 9d7420e74ca56eee08ec6d9f67f18fdb668f6d91 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 31 May 2024 07:29:41 -0500 Subject: [PATCH 24/88] Define new github data access Signed-off-by: Andrew Brain --- augur/tasks/github/util/github_data_access.py | 146 ++++++++++++++++++ setup.py | 3 +- 2 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 augur/tasks/github/util/github_data_access.py diff --git a/augur/tasks/github/util/github_data_access.py b/augur/tasks/github/util/github_data_access.py new file mode 100644 index 0000000000..9031772c82 --- /dev/null +++ b/augur/tasks/github/util/github_data_access.py @@ -0,0 +1,146 @@ +import logging +import time +import httpx +from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception, RetryError +from urllib.parse import urlparse, parse_qs + + +class RatelimitException(Exception): + pass + +class UrlNotFoundException(Exception): + pass + +class GithubDataAccess: + + def __init__(self, key_manager, logger: logging.Logger): + + self.logger = logger + self.key_manager = key_manager + + def paginate_resource(self, url): + + response = self.make_request_with_retries(url) + data = response.json() + if not isinstance(data, list): + raise Exception(f"GithubApiHandler.paginate_resource must be used with url that returns a list. Use GithubApiHandler.get_resource to retrieve data that is not paginated. The url of {url} returned a {type(data)}.") + + yield data + + while 'next' in response.links.keys(): + + next_page = response.links['next']['url'] + + response = self.make_request_with_retries(next_page) + data = response.json() + if not isinstance(data, list): + raise Exception(f"GithubApiHandler.paginate_resource must be used with url that returns a list. Use GithubApiHandler.get_resource to retrieve data that is not paginated. The url of {url} returned a {type(data)}. ") + + yield data + + return + + def is_pagination_limited_by_max_github_pages(self, url): + + page_count = self.get_resource_page_count(url) + + return page_count <= 299 + + def get_resource_page_count(self, url): + + response = self.make_request_with_retries(url, method="HEAD") + + if 'last' not in response.links.keys(): + return 1 + + try: + last_page_url = response.links['last']['url'] + + parsed_url = urlparse(last_page_url) + + return int(parse_qs(parsed_url.query)['page'][0]) + except (KeyError, ValueError): + raise Exception(f"Unable to parse 'last' url from response: {response.links['last']}") + + def get_resource(self, url): + + response = self.make_request_with_retries(url) + + return response.json() + + # TODO: Handle timeout exceptions better + def make_request(self, url, method="GET", timeout=100): + + with httpx.Client() as client: + + response = client.request(method=method, url=url, timeout=timeout, follow_redirects=True) + + if response.status_code in [403, 429]: + raise RatelimitException() + + elif response.status_code == 404: + raise UrlNotFoundException(f"Could not find {url}") + + response.raise_for_status() + + return response + + def make_request_with_retries(self, url, method="GET", timeout=100): + """ What method does? + 1. Catches RetryError and rethrows a nicely formatted OutOfRetriesException that includes that last exception thrown + """ + + try: + return self.__make_request_with_retries(url, method, timeout) + except RetryError as e: + raise e.last_attempt.exception() + + @retry(stop=stop_after_attempt(10), wait=wait_fixed(5), retry=retry_if_exception(lambda exc: not isinstance(exc, UrlNotFoundException))) + def __make_request_with_retries(self, url, method="GET", timeout=100): + """ What method does? + 1. Retires 10 times + 2. Waits 5 seconds between retires + 3. Does not rety UrlNotFoundException + 4. Catches RatelimitException and waits before raising exception + """ + + try: + return self.make_request(url, method, timeout) + except RatelimitException as e: + self.__handle_github_ratelimit_response(e.response) + raise e + + def __handle_github_ratelimit_response(self, response): + + headers = response.headers + + if "Retry-After" in headers: + + retry_after = int(headers["Retry-After"]) + self.logger.info( + f'\n\n\n\nSleeping for {retry_after} seconds due to secondary rate limit issue.\n\n\n\n') + time.sleep(retry_after) + + elif "X-RateLimit-Remaining" in headers and int(headers["X-RateLimit-Remaining"]) == 0: + current_epoch = int(time.time()) + epoch_when_key_resets = int(headers["X-RateLimit-Reset"]) + key_reset_time = epoch_when_key_resets - current_epoch + + if key_reset_time < 0: + self.logger.error(f"Key reset time was less than 0 setting it to 0.\nThe current epoch is {current_epoch} and the epoch that the key resets at is {epoch_when_key_resets}") + key_reset_time = 0 + + self.logger.info(f"\n\n\nAPI rate limit exceeded. Sleeping until the key resets ({key_reset_time} seconds)") + time.sleep(key_reset_time) + else: + time.sleep(60) + + + + + + + + + + diff --git a/setup.py b/setup.py index 279496bf82..b262b9c6da 100644 --- a/setup.py +++ b/setup.py @@ -86,7 +86,8 @@ 'Werkzeug~=2.0.0', "pylint==2.15.5", "mdpdf==0.0.18", - "typing-extensions==4.7.1" + "typing-extensions==4.7.1", + "tenacity==8.3.0", ], extras_require={ "dev": [ From 7fb513da62ee721ce9cf6fbccd24dbcf6a81cdab Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Fri, 31 May 2024 07:55:11 -0500 Subject: [PATCH 25/88] Add more functionality to github data access Signed-off-by: Andrew Brain --- augur/tasks/github/util/github_data_access.py | 45 +++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/augur/tasks/github/util/github_data_access.py b/augur/tasks/github/util/github_data_access.py index 9031772c82..002995c9fe 100644 --- a/augur/tasks/github/util/github_data_access.py +++ b/augur/tasks/github/util/github_data_access.py @@ -2,7 +2,7 @@ import time import httpx from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception, RetryError -from urllib.parse import urlparse, parse_qs +from urllib.parse import urlparse, parse_qs, urlencode class RatelimitException(Exception): @@ -18,14 +18,32 @@ def __init__(self, key_manager, logger: logging.Logger): self.logger = logger self.key_manager = key_manager + def get_resource_count(self, url): + + # set per_page to 100 explicitly so we know each page is 100 long + params = {"per_page": 100} + url = self.__add_query_params(url, params) + + num_pages = self.get_resource_page_count(url) + + # get data for last page + params = {"page": num_pages} + url = self.__add_query_params(url, params) + + data = self.get_resource(url) + + return (100 * (num_pages -1)) + len(data) + def paginate_resource(self, url): response = self.make_request_with_retries(url) data = response.json() + + # need to ensure data is a list so yield from works properly if not isinstance(data, list): raise Exception(f"GithubApiHandler.paginate_resource must be used with url that returns a list. Use GithubApiHandler.get_resource to retrieve data that is not paginated. The url of {url} returned a {type(data)}.") - yield data + yield from data while 'next' in response.links.keys(): @@ -33,10 +51,12 @@ def paginate_resource(self, url): response = self.make_request_with_retries(next_page) data = response.json() + + # need to ensure data is a list so yield from works properly if not isinstance(data, list): raise Exception(f"GithubApiHandler.paginate_resource must be used with url that returns a list. Use GithubApiHandler.get_resource to retrieve data that is not paginated. The url of {url} returned a {type(data)}. ") - yield data + yield from data return @@ -135,6 +155,25 @@ def __handle_github_ratelimit_response(self, response): else: time.sleep(60) + def __add_query_params(self, url: str, additional_params: dict) -> str: + """Add query params to a url. + + Args: + url: the url that is being modified + additional_params: key value pairs specififying the paramaters to be added + + Returns: + The url with the key value pairs in additional_params added as query params + """ + url_components = urlparse(url) + original_params = parse_qs(url_components.query) + # Before Python 3.5 you could update original_params with + # additional_params, but here all the variables are immutable. + merged_params = {**original_params, **additional_params} + updated_query = urlencode(merged_params, doseq=True) + # _replace() is how you can create a new NamedTuple with a changed field + return url_components._replace(query=updated_query).geturl() + From 69cd4210434d0f5e55f5ed70a135ddbfc5ad9611 Mon Sep 17 00:00:00 2001 From: kaxada Date: Sun, 2 Jun 2024 18:23:56 -0500 Subject: [PATCH 26/88] update API description URLS Signed-off-by: kaxada --- docs/source/rest-api/spec.yml | 147 ++++++++++++++++------------------ 1 file changed, 71 insertions(+), 76 deletions(-) diff --git a/docs/source/rest-api/spec.yml b/docs/source/rest-api/spec.yml index c859325a7e..28a799e40b 100644 --- a/docs/source/rest-api/spec.yml +++ b/docs/source/rest-api/spec.yml @@ -2,7 +2,7 @@ basePath: /api/unstable/ externalDocs: description: CHAOSS Metric Definitions - url: https://chaoss.community/metrics/ + url: https://chaoss.community/kb-metrics-and-metrics-models/ host: ai.chaoss.io info: title: Augur REST API @@ -330,9 +330,6 @@ paths: /metadata/repo_info: get: description: 'Returns the metadata about all repositories in an Augur instance, using information from a git platform (GitHub, GitLab, etc.). Each record includes the default branch name, repository license file, forks, stars, watchers, and committers. Also includes metadata about current repository issue and pull request status and counts.' - externalDocs: - description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/business-risk.md operationId: Activity Metadata (Repo) responses: '200': @@ -399,7 +396,7 @@ paths: description: 'Returns a list of repositories contributed to by all the contributors in an Augur Instance: INCLUDING all repositories on a platform, *not* merely those repositories in the Augur Instance. Numerical totals represent total CONTRIBUTIONS.' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/business-risk.md + url: https://chaoss.community/kb/metric-contributors/ operationId: Contributions Count (Repo) responses: '200': @@ -421,7 +418,7 @@ paths: description: 'Returns a list of repositories contributed to by all the contributors in an Augur Instance: INCLUDING all repositories on a platform, *not* merely those repositories in the Augur Instance. Numerical totals represent total CONTRIBUTORS.' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/business-risk.md + url: https://chaoss.community/kb/metric-contributors/ operationId: Contributors Count (Repo) responses: '200': @@ -443,7 +440,7 @@ paths: description: 'The average issue resolution time. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/business-risk.md + url: https://chaoss.community/kb/metric-issue-resolution-duration/ operationId: Average Issue Resolution Time (Repo Group) parameters: - description: Repository Group ID @@ -475,7 +472,7 @@ paths: description: 'The average issue resolution time. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/business-risk.md + url: https://chaoss.community/kb/metric-issue-resolution-duration/ operationId: Average Issue Resolution Time (Repo) parameters: - description: Repository ID. @@ -504,7 +501,7 @@ paths: description: 'The CII Best Practices Badge level. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/security.md + url: https://chaoss.community/kb/metric-open-source-security-foundation-openssf-best-practices-badge/ operationId: CII Best Practices Badge (Repo Group) parameters: - description: Repository Group ID @@ -536,7 +533,7 @@ paths: description: 'The CII Best Practices Badge level. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/security.md + url: https://chaoss.community/kb/metric-open-source-security-foundation-openssf-best-practices-badge/ operationId: CII Best Practices Badge (Repo) parameters: - description: Repository ID. @@ -565,7 +562,7 @@ paths: description: 'Number of persons opening an issue for the first time. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/metrics/Committers.md + url: https://chaoss.community/kb/metric-committers/ operationId: Committers (Repo Group) parameters: - description: Repository Group ID @@ -612,11 +609,10 @@ paths: - risk /repos/:repo_id/committers: get: - description: 'Number of persons contributing with an accepted commit for the - first time. ' + description: 'Number of persons contributing with an accepted commit for the first time.' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/metrics/Committers.md + url: https://chaoss.community/kb/metric-committers/ operationId: Committers (Repo) parameters: - description: Repository ID. @@ -666,10 +662,10 @@ paths: - risk /repo-groups/:repo_group_id/fork-count: get: - description: 'Fork count. ' + description: 'A technical fork is a distributed version control copy of a project. The number of technical forks indicates the number of copies of a project on the same code development platform.' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/business-risk.md + url: https://chaoss.community/kb/metric-technical-fork/ operationId: Fork Count (Repo Group) parameters: - description: Repository Group ID @@ -698,10 +694,10 @@ paths: - risk /repos/:repo_id/fork-count: get: - description: 'Fork count. ' + description: 'A technical fork is a distributed version control copy of a project. The number of technical forks indicates the number of copies of a project on the same code development platform.' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/business-risk.md + url: https://chaoss.community/kb/metric-technical-fork/ operationId: Fork Count (Repo) parameters: - description: Repository ID. @@ -730,7 +726,7 @@ paths: description: 'A time series of fork count. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/business-risk.md + url: https://chaoss.community/kb/metric-technical-fork/ operationId: Forks (Repo Group) parameters: - description: Repository Group ID @@ -765,7 +761,7 @@ paths: description: 'A time series of fork count. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/business-risk.md + url: https://chaoss.community/kb/metric-technical-fork/ operationId: Forks (Repo) parameters: - description: Repository ID. @@ -794,10 +790,10 @@ paths: - risk /repo-groups/:repo_group_id/license-coverage: get: - description: 'Number of persons opening an issue for the first time. ' + description: 'How much of the code base has declared licenses that scanners can recognize which may not be just OSI-approved. This includes both software and documentation source files and is represented as a percentage of total coverage.' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/metrics/License_Coverage.md + url: https://chaoss.community/kb/metric-license-coverage/ operationId: License Coverage (Repo Group) parameters: - description: Repository Group ID @@ -829,11 +825,10 @@ paths: - risk /repos/:repo_id/license-coverage: get: - description: 'Number of persons contributing with an accepted commit for the - first time. ' + description: 'How much of the code base has declared licenses that scanners can recognize which may not be just OSI-approved. This includes both software and documentation source files and is represented as a percentage of total coverage.' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/metrics/License_Coverage.md + url: https://chaoss.community/kb/metric-license-coverage/ operationId: License Coverage (Repo) parameters: - description: Repository ID. @@ -865,10 +860,10 @@ paths: - risk /repo-groups/:repo_group_id/license-declared: get: - description: 'An enumeration of all the licenses declared in a repo group at the file level.' + description: 'The total number and specific licenses declared in a software package. This can include both software and documentation source files. This metric is an enumeration of licenses, and the number of files with that particular license declaration.' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/metrics/License_Coverage.md + url: https://chaoss.community/kb/metric-licenses-declared/ operationId: License Declared (Repo Group) parameters: - description: Repository Group ID @@ -897,10 +892,10 @@ paths: - risk /repos/:repo_id/license-declared: get: - description: 'An enumeration of all the licenses declared in a repo at the file level.' + description: 'The total number and specific licenses declared in a software package. This can include both software and documentation source files. This metric is an enumeration of licenses, and the number of files with that particular license declaration.' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/licensing/license-declared.md + url: https://chaoss.community/kb/metric-licenses-declared/ operationId: License Declared (Repo) parameters: - description: Repository ID. @@ -932,7 +927,7 @@ paths: description: 'The primary language of the repository. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/security.md + url: https://chaoss.community/kb/metric-programming-language-distribution/ operationId: Languages (Repo Group) parameters: - description: Repository Group ID @@ -961,7 +956,7 @@ paths: description: 'The primary language of the repository. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/security.md + url: https://chaoss.community/kb/metric-programming-language-distribution/ operationId: Languages (Repo) parameters: - description: Repository ID. @@ -991,7 +986,7 @@ paths: badging data). ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/licensing.md + url: https://chaoss.community/kb/metric-osi-approved-licenses/ operationId: License Count (Repo Group) parameters: - description: Repository Group ID @@ -1024,7 +1019,7 @@ paths: badging data). ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-risk/blob/main/focus-areas/licensing.md + url: https://chaoss.community/kb/metric-osi-approved-licenses/ operationId: License Count (Repo) parameters: - description: Repository ID. @@ -2034,10 +2029,10 @@ paths: - experimental /repo-groups/:repo_group_id/lines-changed-by-author: get: - description: 'Count of closed issues. ' + description: 'Code Changes Lines measures the sum of lines added and removed in all source code changes during a specified period by an author. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/contributors-new.md + url: https://chaoss.community/kb/metric-code-changes-lines/ operationId: Lines Changed by Author (Repo Group) parameters: - description: Repository Group ID @@ -2075,10 +2070,10 @@ paths: - experimental /repo-groups/:repo_id/lines-changed-by-author: get: - description: 'Count of closed issues. ' + description: 'Code Changes Lines measures the sum of lines added and removed in all source code changes during a specified period by an author. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/contributors-new.md + url: https://chaoss.community/kb/metric-code-changes-lines/ operationId: Lines Changed by Author (Repo) parameters: - description: Repository ID. @@ -2123,7 +2118,7 @@ paths: description: 'Time series of number of commits during a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Code_Changes.md + url: https://chaoss.community/kb/metric-code-changes-commits/ operationId: Code Changes (Repo Group) parameters: - description: Repository Group ID @@ -2176,7 +2171,7 @@ paths: description: 'Time series number of commits during a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Code_Changes.md + url: https://chaoss.community/kb/metric-code-changes-commits/ operationId: Code Changes (Repo) parameters: - description: Repository ID. @@ -2226,7 +2221,7 @@ paths: description: 'Time series of lines added and removed during a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Code_Changes_Lines.md + url: https://chaoss.community/kb/metric-code-changes-lines/ operationId: Code Changes Lines (Repo Group) parameters: - description: Repository Group ID @@ -2282,7 +2277,7 @@ paths: description: 'Time series of lines added and removed during a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Code_Changes_Lines.md + url: https://chaoss.community/kb/metric-code-changes-lines/ operationId: Code Changes Lines (Repo) parameters: - description: Repository ID. @@ -2335,7 +2330,7 @@ paths: description: 'List of contributors and their contributions. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/contributors.md + url: https://chaoss.community/kb/metric-contributors/ operationId: Contributors (Repo Group) parameters: - description: Repository Group ID @@ -2400,7 +2395,7 @@ paths: description: 'List of contributors and their contributions. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/contributors.md + url: https://chaoss.community/kb/metric-contributors/ operationId: Contributors (Repo) parameters: - description: Repository ID. @@ -2459,7 +2454,7 @@ paths: description: 'Time series of number of new contributors during a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/contributors-new.md + url: https://chaoss.community/kb/metric-new-contributors/ operationId: New Contributors (Repo Group) parameters: - description: Repository Group ID @@ -2512,7 +2507,7 @@ paths: description: 'Time series of number of new contributors during a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/contributors-new.md + url: https://chaoss.community/kb/metric-new-contributors/ operationId: New Contributors (Repo) parameters: - description: Repository ID. @@ -2565,7 +2560,7 @@ paths: description: 'Number of issues currently open. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/focus_areas/code_development.md + url: https://chaoss.community/kb/metric-issues-active/ operationId: Issue Backlog (Repo Group) parameters: - description: Repository Group ID @@ -2597,7 +2592,7 @@ paths: description: 'Time since an issue is proposed until it is closed. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/focus_areas/code_development.md + url: https://chaoss.community/kb/metric-issues-active/ operationId: Issue Backlog (Repo) parameters: - description: Repository ID. @@ -2732,7 +2727,7 @@ paths: description: 'Ratio of issues closed to total issues. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/focus_areas/code_development.md + url: https://chaoss.community/kb/metric-issues-closed/ operationId: Issue Throughput (Repo Group) parameters: - description: Repository Group ID @@ -2764,7 +2759,7 @@ paths: description: 'Ratio of issues closed to total issues. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/focus_areas/code_development.md + url: https://chaoss.community/kb/metric-issues-closed/ operationId: Issue Throughput (Repo) parameters: - description: Repository ID. @@ -2794,7 +2789,7 @@ paths: a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Issues_Active.md + url: https://chaoss.community/kb/metric-issues-active/ operationId: Issues Active (Repo Group) parameters: - description: Repository Group ID @@ -2848,7 +2843,7 @@ paths: a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Issues_Active.md + url: https://chaoss.community/kb/metric-issues-active/ operationId: Issues Active (Repo) parameters: - description: Repository ID. @@ -2951,7 +2946,7 @@ paths: description: 'Time series of number of issues closed during a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Issues_New.md + url: https://chaoss.community/kb/metric-issues-closed/ operationId: Issues Closed (Repo) parameters: - description: Repository ID. @@ -3001,7 +2996,7 @@ paths: description: 'Duration of time for issues to be resolved. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/issues-closed-resolution-duration.md + url: https://chaoss.community/kb/metric-issue-resolution-duration/ operationId: Closed Issue Resolution Duration (Repo Group) parameters: - description: Repository Group ID @@ -3042,7 +3037,7 @@ paths: description: 'Duration of time for issues to be resolved. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/issues-closed-resolution-duration.md + url: https://chaoss.community/kb/metric-issue-resolution-duration/ operationId: Closed Issue Resolution Duration (Repo) parameters: - description: Repository ID. @@ -3086,7 +3081,7 @@ paths: description: 'Number of persons closing an issue for the first time. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/issues-first-time-closed.md + url: https://chaoss.community/kb/metric-new-contributors-closing-issues/ operationId: Closed Issues New Contributor (Repo Group) parameters: - description: Repository Group ID @@ -3139,7 +3134,7 @@ paths: description: 'Number of persons closing an issue for the first time. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/issues-first-time-closed.md + url: https://chaoss.community/kb/metric-new-contributors-closing-issues/ operationId: Closed Issues New Contributors (Repo) parameters: - description: Repository ID. @@ -3292,7 +3287,7 @@ paths: description: 'Duration of time for issues to be resolved. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/issues-maintainer-response-duration.md + url: https://chaoss.community/kb/metric-issue-resolution-duration/ operationId: Issue Response Time (Repo Group) parameters: - description: Repository Group ID @@ -3336,7 +3331,7 @@ paths: description: 'Duration of time for issues to be resolved. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/issues-maintainer-response-duration.md + url: https://chaoss.community/kb/metric-issue-resolution-duration/ operationId: Issue Response Time (Repo) parameters: - description: Repository ID. @@ -3380,7 +3375,7 @@ paths: description: 'Time series of number of new issues opened during a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Issues_New.md + url: https://chaoss.community/kb/metric-issues-new/ operationId: Issues New (Repo Group) parameters: - description: Repository Group ID @@ -3433,7 +3428,7 @@ paths: description: 'Time series of number of new issues opened during a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Issues_New.md + url: https://chaoss.community/kb/metric-issues-new/ operationId: Issues New (Repo) parameters: - description: Repository ID. @@ -3483,7 +3478,7 @@ paths: description: 'Age of open issues. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/issues-open-age.md + url: https://chaoss.community/kb/metric-issue-age/ operationId: Open Issue Age (Repo Group) parameters: - description: Repository Group ID @@ -3521,7 +3516,7 @@ paths: description: 'Age of open issues. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/issues-open-age.md + url: https://chaoss.community/kb/metric-issue-age/ operationId: Open Issue Age (Repo) parameters: - description: Repository ID. @@ -3664,7 +3659,7 @@ paths: description: 'Time since an review/pull request is proposed until it is accepted. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Reviews_Duration.md + url: https://chaoss.community/kb/metric-change-requests-duration/ operationId: Review Duration (Repo Group) parameters: - description: Repository Group ID @@ -3717,7 +3712,7 @@ paths: description: 'Time since an review/pull request is proposed until it is accepted. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Reviews_Duration.md + url: https://chaoss.community/kb/metric-change-requests-duration/ operationId: Review Duration (Repo) parameters: - description: Repository ID. @@ -3768,7 +3763,7 @@ paths: a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/reviews.md + url: https://chaoss.community/kb/metric-change-request-reviews/ operationId: Reviews (Repo Group) parameters: - description: Repository Group ID @@ -3822,7 +3817,7 @@ paths: a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/reviews.md + url: https://chaoss.community/kb/metric-change-request-reviews/ operationId: Reviews (Repo) parameters: - description: Repository ID. @@ -3873,7 +3868,7 @@ paths: within a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Reviews_Accepted.md + url: https://chaoss.community/kb/metric-change-requests-accepted/ operationId: Reviews Accepted (Repo Group) parameters: - description: Repository Group ID @@ -3927,7 +3922,7 @@ paths: within a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Reviews_Accepted.md + url: https://chaoss.community/kb/metric-change-requests-accepted/ operationId: Reviews Accepted (Repo) parameters: - description: Repository ID. @@ -3978,7 +3973,7 @@ paths: within a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Reviews_Accepted.md + url: https://chaoss.community/kb/metric-change-requests-declined/ operationId: Reviews Declined (Repo Group) parameters: - description: Repository Group ID @@ -4032,7 +4027,7 @@ paths: within a certain period. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Reviews_Accepted.md + url: https://chaoss.community/kb/metric-change-requests-declined/ operationId: Reviews Declined (Repo) parameters: - description: Repository ID. @@ -4158,7 +4153,7 @@ paths: description: 'Count of closed issues. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/contributors-new.md + url: https://chaoss.community/kb/metric-issues-closed/ operationId: Closed Issues Count (Repo Group) parameters: - description: Repository Group ID @@ -4196,7 +4191,7 @@ paths: description: 'Count of closed issues. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/contributors-new.md + url: https://chaoss.community/kb/metric-issues-closed/ operationId: Closed Issues Count (Repo) parameters: - description: Repository ID @@ -4228,7 +4223,7 @@ paths: description: 'Time since an issue is proposed until it is closed. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/focus_areas/code_development.md + url: https://chaoss.community/kb/metric-issue-resolution-duration/ operationId: Issue Duration (Repo Group) parameters: - description: Repository Group ID @@ -4281,7 +4276,7 @@ paths: description: 'Time since an issue is proposed until it is closed. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/focus_areas/code_development.md + url: https://chaoss.community/kb/metric-issue-resolution-duration/ operationId: Issue Duration (Repo) parameters: - description: Repository ID @@ -4331,7 +4326,7 @@ paths: description: 'Time since an review/pull request is proposed until it is accepted. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Reviews_Duration.md + url: https://chaoss.community/kb/metric-release-frequency/ operationId: Number of Releases (Repo) parameters: - description: Repository Group ID. @@ -4366,7 +4361,7 @@ paths: description: 'Time since an review/pull request is proposed until it is accepted. ' externalDocs: description: CHAOSS Metric Definition - url: https://github.com/chaoss/wg-evolution/blob/main/metrics/Reviews_Duration.md + url: https://chaoss.community/kb/metric-release-frequency/ operationId: Number of Releases (Repo) parameters: - description: Repository ID. From edb56ea66a087b2ce4baeec7269ea619019ea6fe Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 3 Jun 2024 18:21:39 -0500 Subject: [PATCH 27/88] Use github data access in prs Signed-off-by: Andrew Brain --- augur/tasks/github/pull_requests/tasks.py | 26 ++++++++--------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 73ea1b025a..6ee451f361 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -4,7 +4,7 @@ from augur.tasks.init.celery_app import celery_app as celery from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask, AugurSecondaryRepoCollectionTask from augur.application.db.data_parse import * -from augur.tasks.github.util.github_paginator import GithubPaginator +from augur.tasks.github.util.github_data_access import GithubDataAccess from augur.tasks.github.util.github_task_session import GithubTaskManifest from augur.tasks.util.worker_util import remove_duplicate_dicts from augur.tasks.github.util.util import add_key_value_pair_to_dicts, get_owner_repo @@ -34,8 +34,9 @@ def collect_pull_requests(repo_git: str) -> int: total_count = 0 all_data = [] - for page in retrieve_all_pr_data(repo_git, logger, manifest.key_auth): - all_data += page + for pr in retrieve_all_pr_data(repo_git, logger, manifest.key_auth): + + all_data.append(pr) if len(all_data) >= 1000: process_pull_requests(all_data, f"{owner}/{repo}: Pr task", repo_id, logger, augur_db) @@ -63,24 +64,15 @@ def retrieve_all_pr_data(repo_git: str, logger, key_auth): #-> Generator[List[Di logger.info(f"Collecting pull requests for {owner}/{repo}") url = f"https://api.github.com/repos/{owner}/{repo}/pulls?state=all&direction=desc" - # returns an iterable of all prs at this url (this essentially means you can treat the prs variable as a list of the prs) - prs = GithubPaginator(url, key_auth, logger) - num_pages = prs.get_num_pages() - for page_data, page in prs.iter_pages(): + github_data_access = GithubDataAccess(key_auth, logger) - if page_data is None: - return + num_pages = github_data_access.get_resource_page_count(url) - if len(page_data) == 0: - logger.debug( - f"{owner}/{repo} Prs Page {page} contains no data...returning") - logger.info(f"{owner}/{repo} Prs Page {page} of {num_pages}") - return + logger.info(f"{owner}/{repo}: Retrieving {num_pages} pages of pull requests") - logger.info(f"{owner}/{repo} Prs Page {page} of {num_pages}") - - yield page_data + # returns a generator so this method can be used by doing for x in retrieve_all_pr_data() + return github_data_access.paginate_resource(url) def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db): From 64b859b2556cb568f75e4d9022470052fa7204d8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 6 Jun 2024 23:34:20 +0000 Subject: [PATCH 28/88] Bump tornado from 6.3.3 to 6.4.1 Bumps [tornado](https://github.com/tornadoweb/tornado) from 6.3.3 to 6.4.1. - [Changelog](https://github.com/tornadoweb/tornado/blob/master/docs/releases.rst) - [Commits](https://github.com/tornadoweb/tornado/compare/v6.3.3...v6.4.1) --- updated-dependencies: - dependency-name: tornado dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 279496bf82..a5aa039616 100644 --- a/setup.py +++ b/setup.py @@ -80,7 +80,7 @@ "httpx==0.23.0", # 0.23.0 "eventlet==0.33.3", "flower==2.0.1", - "tornado==6.3.3", # added because it sometimes errors when tornado is not 6.1 even though nothing we install depends on it + "tornado==6.4.1", # added because it sometimes errors when tornado is not 6.1 even though nothing we install depends on it "pylint==2.15.5", "dnspython==2.2.1", 'Werkzeug~=2.0.0', From cbb76f8d2e2227d040043530a138e18146969878 Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Sun, 9 Jun 2024 21:32:32 -0500 Subject: [PATCH 29/88] clarify the setup end-condition Signed-off-by: Ulincsys --- augur/templates/first-time-config.j2 | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/augur/templates/first-time-config.j2 b/augur/templates/first-time-config.j2 index 06feaefa55..c9c2106d6c 100644 --- a/augur/templates/first-time-config.j2 +++ b/augur/templates/first-time-config.j2 @@ -223,7 +223,11 @@ fetch(url).then(async () => { modal = document.getElementById("modal-title"); modal.innerHTML = "Finished" - displayError("The first time setup server has been shut down.") + if(event.submitter.id == "Exit") { + displayError("The first-time setup server has been shut down. You can close this window.") + } else { + displayError("Augur has been started, and the first-time setup server has been shut down. You can now access the Augur front-end at the address and port previously configured."); + } }) } } From fdc18cb6a913eef9bc288b1d71141c460af7b316 Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Sun, 9 Jun 2024 21:59:00 -0500 Subject: [PATCH 30/88] side-port important change dropped in merge Signed-off-by: Ulincsys --- augur/api/view/init.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/augur/api/view/init.py b/augur/api/view/init.py index e7c8386d30..b0b4b27446 100644 --- a/augur/api/view/init.py +++ b/augur/api/view/init.py @@ -1,9 +1,12 @@ from pathlib import Path +from .server import Environment from augur.application.logs import AugurLogger import secrets, yaml +env = Environment() + # load configuration files and initialize globals -configFile = Path("config.yml") +configFile = Path(env.setdefault("CONFIG_LOCATION", "config.yml")) report_requests = {} settings = {} From daced69adc2f68a0e9a4264260b4195fb7e124dc Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Mon, 10 Jun 2024 18:53:19 -0500 Subject: [PATCH 31/88] updates to docker testing Signed-off-by: Sean P. Goggins --- .github/workflows/build_docker.yml | 2 +- scripts/control/fix-views.sh | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 scripts/control/fix-views.sh diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml index 7f93e599ae..b29ab2ed89 100644 --- a/.github/workflows/build_docker.yml +++ b/.github/workflows/build_docker.yml @@ -23,7 +23,7 @@ jobs: name: Build image runs-on: ubuntu-latest steps: - - name: Checkout startup-wizard + - name: Checkout main uses: actions/checkout@v2 - name: Run the build run: | diff --git a/scripts/control/fix-views.sh b/scripts/control/fix-views.sh new file mode 100644 index 0000000000..d870c0be0e --- /dev/null +++ b/scripts/control/fix-views.sh @@ -0,0 +1,20 @@ +#!/bin/sh +vacuumdb -h localhost -p 5432 -U augur -j 4 -z -v augur; +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW augur_data.api_get_all_repos_issues with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_commits_and_committers_daily_count with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW augur_data.api_get_all_repo_prs with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW augur_data.api_get_all_repos_commits with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW augur_data.augur_new_contributors with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_contributor_actions with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_new_contributors with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_entry_list with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_contributor_metrics with data; ' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_issue_assignments with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_pr_assignments with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_repo_languages with data; ' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_user_repos with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_pr_response_times with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_contributor_recent_actions with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_pr_response with data;' + + From be2b209f1cb5592de9da2e5aac6f6b9249828465 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Mon, 10 Jun 2024 19:07:33 -0500 Subject: [PATCH 32/88] update wizard timeout Signed-off-by: Sean P. Goggins --- scripts/install/wizard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install/wizard.py b/scripts/install/wizard.py index 8dcd91bcf2..84d336cc3f 100644 --- a/scripts/install/wizard.py +++ b/scripts/install/wizard.py @@ -188,7 +188,7 @@ def update_config_db(): env[key] = value result = subprocess.Popen(f"{config_script}", stdout=subprocess.PIPE, stderr=subprocess.PIPE) try: - result.wait(10) + result.wait(120) except: return "Timeout reached waiting for database update to complete", 500 From 42b0bc451df35f2994c4dd3cdba3b0813462696f Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 11 Jun 2024 12:22:20 -0500 Subject: [PATCH 33/88] move facade table refresh to refresh with the mat views Signed-off-by: Isaac Milarsky --- augur/tasks/db/refresh_materialized_views.py | 33 ++++++++ augur/tasks/git/facade_tasks.py | 86 +------------------- augur/tasks/start_tasks.py | 2 - 3 files changed, 34 insertions(+), 87 deletions(-) diff --git a/augur/tasks/db/refresh_materialized_views.py b/augur/tasks/db/refresh_materialized_views.py index 09faffe0cb..8a06ac7a61 100644 --- a/augur/tasks/db/refresh_materialized_views.py +++ b/augur/tasks/db/refresh_materialized_views.py @@ -4,6 +4,9 @@ from augur.tasks.init.celery_app import celery_app as celery from augur.application.db.lib import execute_sql +from augur.tasks.git.util.facade_worker.facade_worker.config import FacadeHelper +from augur.tasks.git.util.facade_worker.facade_worker.rebuildcache import invalidate_caches, rebuild_unknown_affiliation_and_web_caches + @celery.task(bind=True) def refresh_materialized_views(self): @@ -163,6 +166,36 @@ def refresh_materialized_views(self): logger.info(f"error is {e}") pass + #Now refresh facade tables + #Use this class to get all the settings and + #utility functions for facade + facade_helper = FacadeHelper(logger) + + if facade_helper.nuke_stored_affiliations: + logger.error("Nuke stored affiliations is deprecated!") + # deprecated because the UI component of facade where affiliations would be + # nuked upon change no longer exists, and this information can easily be derived + # from queries and materialized views in the current version of Augur. + # This method is also a major performance bottleneck with little value. + + if not facade_helper.limited_run or (facade_helper.limited_run and facade_helper.fix_affiliations): + logger.error("Fill empty affiliations is deprecated!") + # deprecated because the UI component of facade where affiliations would need + # to be fixed upon change no longer exists, and this information can easily be derived + # from queries and materialized views in the current version of Augur. + # This method is also a major performance bottleneck with little value. + + if facade_helper.force_invalidate_caches: + try: + invalidate_caches(facade_helper) + except Exception as e: + logger.info(f"error is {e}") + + if not facade_helper.limited_run or (facade_helper.limited_run and facade_helper.rebuild_caches): + try: + rebuild_unknown_affiliation_and_web_caches(facade_helper) + except Exception as e: + logger.info(f"error is {e}") diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index 97a69a7574..272c27afa2 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -10,8 +10,6 @@ from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_absolute_repo_path, get_parent_commits_set, get_existing_commits_set from augur.tasks.git.util.facade_worker.facade_worker.analyzecommit import analyze_commit from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_repo_commit_count, update_facade_scheduling_fields, get_facade_weight_with_commit_count -from augur.tasks.git.util.facade_worker.facade_worker.rebuildcache import fill_empty_affiliations, invalidate_caches, nuke_affiliations, rebuild_unknown_affiliation_and_web_caches - from augur.tasks.github.facade_github.tasks import * from augur.tasks.git.util.facade_worker.facade_worker.config import FacadeHelper @@ -235,37 +233,6 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None: logger.info("Analysis complete") return -@celery.task -def nuke_affiliations_facade_task(): - - logger = logging.getLogger(nuke_affiliations_facade_task.__name__) - - facade_helper = FacadeHelper(logger) - nuke_affiliations(facade_helper) - -@celery.task -def fill_empty_affiliations_facade_task(): - - logger = logging.getLogger(fill_empty_affiliations_facade_task.__name__) - facade_helper = FacadeHelper(logger) - fill_empty_affiliations(facade_helper) - -@celery.task -def invalidate_caches_facade_task(): - - logger = logging.getLogger(invalidate_caches_facade_task.__name__) - - facade_helper = FacadeHelper(logger) - invalidate_caches(facade_helper) - -@celery.task -def rebuild_unknown_affiliation_and_web_caches_facade_task(): - - logger = logging.getLogger(rebuild_unknown_affiliation_and_web_caches_facade_task.__name__) - - facade_helper = FacadeHelper(logger) - rebuild_unknown_affiliation_and_web_caches(facade_helper) - # retry this task indefinitely every 5 minutes if it errors. Since the only way it gets scheduled is by itself, so if it stops running no more clones will happen till the instance is restarted @celery.task(autoretry_for=(Exception,), retry_backoff=True, retry_backoff_max=300, retry_jitter=True, max_retries=None) def clone_repos(): @@ -464,55 +431,4 @@ def facade_phase(repo_git): ) logger.info(f"Facade sequence: {facade_sequence}") - return chain(*facade_sequence) - -def generate_non_repo_domain_facade_tasks(logger): - logger.info("Generating facade sequence") - facade_helper = FacadeHelper(logger) - - # Figure out what we need to do - limited_run = facade_helper.limited_run - delete_marked_repos = facade_helper.delete_marked_repos - pull_repos = facade_helper.pull_repos - # clone_repos = facade_helper.clone_repos - check_updates = facade_helper.check_updates - # force_updates = facade_helper.force_updates - run_analysis = facade_helper.run_analysis - # force_analysis = facade_helper.force_analysis - nuke_stored_affiliations = facade_helper.nuke_stored_affiliations - fix_affiliations = facade_helper.fix_affiliations - force_invalidate_caches = facade_helper.force_invalidate_caches - rebuild_caches = facade_helper.rebuild_caches - #if abs((datetime.datetime.strptime(session.cfg.get_setting('aliases_processed')[:-3], - # '%Y-%m-%d %I:%M:%S.%f') - datetime.datetime.now()).total_seconds()) // 3600 > int(session.cfg.get_setting( - # 'update_frequency')) else 0 - force_invalidate_caches = facade_helper.force_invalidate_caches - create_xlsx_summary_files = facade_helper.create_xlsx_summary_files - multithreaded = facade_helper.multithreaded - - facade_sequence = [] - - if nuke_stored_affiliations: - #facade_sequence.append(nuke_affiliations_facade_task.si().on_error(facade_error_handler.s()))#nuke_affiliations(session.cfg) - logger.info("Nuke stored affiliations is deprecated.") - # deprecated because the UI component of facade where affiliations would be - # nuked upon change no longer exists, and this information can easily be derived - # from queries and materialized views in the current version of Augur. - # This method is also a major performance bottleneck with little value. - - #logger.info(session.cfg) - if not limited_run or (limited_run and fix_affiliations): - #facade_sequence.append(fill_empty_affiliations_facade_task.si().on_error(facade_error_handler.s()))#fill_empty_affiliations(session) - logger.info("Fill empty affiliations is deprecated.") - # deprecated because the UI component of facade where affiliations would need - # to be fixed upon change no longer exists, and this information can easily be derived - # from queries and materialized views in the current version of Augur. - # This method is also a major performance bottleneck with little value. - - if force_invalidate_caches: - facade_sequence.append(invalidate_caches_facade_task.si().on_error(facade_error_handler.s()))#invalidate_caches(session.cfg) - - if not limited_run or (limited_run and rebuild_caches): - facade_sequence.append(rebuild_unknown_affiliation_and_web_caches_facade_task.si().on_error(facade_error_handler.s()))#rebuild_unknown_affiliation_and_web_caches(session.cfg) - - return facade_sequence + return chain(*facade_sequence) \ No newline at end of file diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 6b35881d60..f0b3450dfb 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -140,8 +140,6 @@ def non_repo_domain_tasks(self): enabled_tasks = [] - enabled_tasks.extend(generate_non_repo_domain_facade_tasks(logger)) - if machine_learning_phase.__name__ in enabled_phase_names: #enabled_tasks.extend(machine_learning_phase()) from augur.tasks.data_analysis.contributor_breadth_worker.contributor_breadth_worker import contributor_breadth_model From 2a76f9cf13e14c1842b43793fb40b7e5819a02b6 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sun, 16 Jun 2024 19:06:20 -0500 Subject: [PATCH 34/88] Don't use tuple constructor Signed-off-by: Andrew Brain --- augur/tasks/util/collection_util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py index b009bcb928..1112a791b8 100644 --- a/augur/tasks/util/collection_util.py +++ b/augur/tasks/util/collection_util.py @@ -142,7 +142,7 @@ def get_valid_repos(self,session): return new_collection_git_list = get_newly_added_repos(session, limit, hook=self.name) - collection_list = [tuple(repo_git, True) for repo_git in new_collection_git_list] + collection_list = [(repo_git, True) for repo_git in new_collection_git_list] self.repo_list.extend(collection_list) limit -= len(collection_list) @@ -151,7 +151,7 @@ def get_valid_repos(self,session): return recollection_git_list = get_repos_for_recollection(session, limit, hook=self.name, days_until_collect_again=self.days_until_collect_again) - collection_list = [tuple(repo_git, False) for repo_git in recollection_git_list] + collection_list = [(repo_git, False) for repo_git in recollection_git_list] self.repo_list.extend(collection_list) From 6730a11adbfe5a69238128749f8e809534703d66 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sun, 16 Jun 2024 22:42:13 -0500 Subject: [PATCH 35/88] Fix syntax errors Signed-off-by: Andrew Brain --- augur/tasks/github/pull_requests/commits_model/core.py | 7 +++++++ augur/tasks/github/pull_requests/files_model/core.py | 8 +++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/augur/tasks/github/pull_requests/commits_model/core.py b/augur/tasks/github/pull_requests/commits_model/core.py index 73945a59c5..76e1a1e8ed 100644 --- a/augur/tasks/github/pull_requests/commits_model/core.py +++ b/augur/tasks/github/pull_requests/commits_model/core.py @@ -25,6 +25,13 @@ def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collecti prs = get_updated_prs(repo_id, last_collected) pr_urls = [pr.pr_url for pr in prs] + pr_urls = [] + for pr in prs: + pr_urls.append({ + 'pr_url': pr.pr_url, + 'pull_request_id': pr.pull_request_id + }) + query = augur_db.session.query(Repo).filter(Repo.repo_id == repo_id) repo = execute_session_query(query, 'one') diff --git a/augur/tasks/github/pull_requests/files_model/core.py b/augur/tasks/github/pull_requests/files_model/core.py index 5966acc9cc..20b3720ce7 100644 --- a/augur/tasks/github/pull_requests/files_model/core.py +++ b/augur/tasks/github/pull_requests/files_model/core.py @@ -24,7 +24,13 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection else: last_collected = get_secondary_data_last_collected(repo_id).date() prs = get_updated_prs(repo_id, last_collected) - pr_numbers = [pr.pr_src_number for pr in prs] + + pr_numbers = [] + for pr in prs: + pr_numbers.append({ + 'pr_src_number': pr.pr_src_number, + 'pull_request_id': pr.pull_request_id + }) query = augur_db.session.query(Repo).filter(Repo.repo_id == repo_id) repo = execute_session_query(query, 'one') From 6035218a8b3fba08b00701bd72e0f187b5823e49 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sun, 16 Jun 2024 23:00:47 -0500 Subject: [PATCH 36/88] fixes after merging dev Signed-off-by: Andrew Brain --- .../pull_requests/commits_model/core.py | 1 - .../pull_requests/commits_model/tasks.py | 6 +- augur/tasks/github/pull_requests/tasks.py | 105 +++++++++--------- 3 files changed, 56 insertions(+), 56 deletions(-) diff --git a/augur/tasks/github/pull_requests/commits_model/core.py b/augur/tasks/github/pull_requests/commits_model/core.py index 44f8ef9b2a..f58d875503 100644 --- a/augur/tasks/github/pull_requests/commits_model/core.py +++ b/augur/tasks/github/pull_requests/commits_model/core.py @@ -72,7 +72,6 @@ def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collecti if len(all_data) > 0: logger.info(f"{task_name}: Inserting {len(all_data)} rows") pr_commits_natural_keys = ["pull_request_id", "repo_id", "pr_cmt_sha"] - bulk_insert_dicts(logger, all_data,PullRequestCommit,pr_commits_natural_keys) augur_db.insert_data(all_data,PullRequestCommit,pr_commits_natural_keys) diff --git a/augur/tasks/github/pull_requests/commits_model/tasks.py b/augur/tasks/github/pull_requests/commits_model/tasks.py index 6eedc16ebd..e6acdfa90a 100644 --- a/augur/tasks/github/pull_requests/commits_model/tasks.py +++ b/augur/tasks/github/pull_requests/commits_model/tasks.py @@ -2,7 +2,7 @@ from augur.tasks.github.pull_requests.commits_model.core import * from augur.tasks.init.celery_app import celery_app as celery from augur.tasks.init.celery_app import AugurSecondaryRepoCollectionTask -from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth +from augur.tasks.github.util.github_task_session import GithubTaskManifest from augur.application.db.lib import get_repo_by_repo_git @@ -14,6 +14,6 @@ def process_pull_request_commits(repo_git: str, full_collection: bool) -> None: repo = get_repo_by_repo_git(repo_git) - key_auth = GithubRandomKeyAuth(logger) + with GithubTaskManifest(logger) as manifest: - pull_request_commits_model(repo, logger, key_auth, full_collection) + pull_request_commits_model(repo.repo_id, logger, manifest.augur_db, manifest.key_auth, full_collection) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 40b3a63ff2..e8d19b5560 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -9,6 +9,7 @@ from augur.tasks.github.util.util import add_key_value_pair_to_dicts, get_owner_repo from augur.application.db.models import PullRequest, Message, PullRequestReview, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestAssignee, PullRequestReviewMessageRef, Contributor, Repo from augur.tasks.github.util.github_task_session import GithubTaskManifest +from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth from augur.application.db.lib import get_session, get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id from augur.application.db.util import execute_session_query from ..messages.tasks import process_github_comment_contributors @@ -352,73 +353,73 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: last_collected = get_secondary_data_last_collected(repo_id).date() prs = get_updated_prs(repo_id, last_collected) - pr_count = len(prs) + pr_count = len(prs) - all_pr_reviews = {} - for index, pr in enumerate(prs): + all_pr_reviews = {} + for index, pr in enumerate(prs): - pr_number = pr.pr_src_number - pull_request_id = pr.pull_request_id + pr_number = pr.pr_src_number + pull_request_id = pr.pull_request_id - logger.info(f"{owner}/{repo} Collecting Pr Reviews for pr {index + 1} of {pr_count}") + logger.info(f"{owner}/{repo} Collecting Pr Reviews for pr {index + 1} of {pr_count}") - pr_review_url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}/reviews" + pr_review_url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}/reviews" - pr_reviews = [] - pr_reviews_generator = GithubPaginator(pr_review_url, key_auth, logger) - for page_data, page in pr_reviews_generator.iter_pages(): - - if page_data is None: - break - - if len(page_data) == 0: - break - - if isinstance(page_data, list): - page_data = [ - element.decode('utf-8').replace('\x00', ' ') if isinstance(element, bytes) else element - for element in page_data - ] - logger.info(f"NUL characters were found in PR Reviews and replaced with spaces.") - elif isinstance(page_data, bytes): - page_data = page_data.decode('utf-8').replace('\x00', ' ') - logger.info(f"NUL characters were found in PR Reviews and replaced with spaces.") + pr_reviews = [] + pr_reviews_generator = GithubPaginator(pr_review_url, manifest.key_auth, logger) + for page_data, page in pr_reviews_generator.iter_pages(): + + if page_data is None: + break + + if len(page_data) == 0: + break + + if isinstance(page_data, list): + page_data = [ + element.decode('utf-8').replace('\x00', ' ') if isinstance(element, bytes) else element + for element in page_data + ] + logger.info(f"NUL characters were found in PR Reviews and replaced with spaces.") + elif isinstance(page_data, bytes): + page_data = page_data.decode('utf-8').replace('\x00', ' ') + logger.info(f"NUL characters were found in PR Reviews and replaced with spaces.") + + pr_reviews.extend(page_data) - pr_reviews.extend(page_data) - - if pr_reviews: - all_pr_reviews[pull_request_id] = pr_reviews + if pr_reviews: + all_pr_reviews[pull_request_id] = pr_reviews - if not list(all_pr_reviews.keys()): - logger.info(f"{owner}/{repo} No pr reviews for repo") - return + if not list(all_pr_reviews.keys()): + logger.info(f"{owner}/{repo} No pr reviews for repo") + return - contributors = [] - for pull_request_id in all_pr_reviews.keys(): + contributors = [] + for pull_request_id in all_pr_reviews.keys(): - reviews = all_pr_reviews[pull_request_id] - for review in reviews: - contributor = process_pull_request_review_contributor(review, tool_source, tool_version, data_source) - if contributor: - contributors.append(contributor) + reviews = all_pr_reviews[pull_request_id] + for review in reviews: + contributor = process_pull_request_review_contributor(review, tool_source, tool_version, data_source) + if contributor: + contributors.append(contributor) - logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(contributors)} contributors") - augur_db.insert_data(contributors, Contributor, ["cntrb_id"]) + logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(contributors)} contributors") + augur_db.insert_data(contributors, Contributor, ["cntrb_id"]) - pr_reviews = [] - for pull_request_id in all_pr_reviews.keys(): + pr_reviews = [] + for pull_request_id in all_pr_reviews.keys(): - reviews = all_pr_reviews[pull_request_id] - for review in reviews: - - if "cntrb_id" in review: - pr_reviews.append(extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id, tool_source, tool_version)) + reviews = all_pr_reviews[pull_request_id] + for review in reviews: + + if "cntrb_id" in review: + pr_reviews.append(extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id, tool_source, tool_version)) - logger.info(f"{owner}/{repo}: Inserting pr reviews of length: {len(pr_reviews)}") - pr_review_natural_keys = ["pr_review_src_id",] - augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys) + logger.info(f"{owner}/{repo}: Inserting pr reviews of length: {len(pr_reviews)}") + pr_review_natural_keys = ["pr_review_src_id",] + augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys) From 70fa87c019dab8d144fb84325d5f936b19d8e45c Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 17 Jun 2024 07:11:48 -0500 Subject: [PATCH 37/88] Fix missing import Signed-off-by: Andrew Brain --- augur/tasks/github/pull_requests/tasks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 6ee451f361..083abe4536 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -5,6 +5,7 @@ from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask, AugurSecondaryRepoCollectionTask from augur.application.db.data_parse import * from augur.tasks.github.util.github_data_access import GithubDataAccess +from augur.tasks.github.util.github_paginator import GithubPaginator from augur.tasks.github.util.github_task_session import GithubTaskManifest from augur.tasks.util.worker_util import remove_duplicate_dicts from augur.tasks.github.util.util import add_key_value_pair_to_dicts, get_owner_repo From dbed6aaa7333498e57ef979fe47e9a0baec3fa53 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 18 Jun 2024 00:15:05 +0000 Subject: [PATCH 38/88] Bump scikit-learn in /augur/tasks/data_analysis/clustering_worker Bumps [scikit-learn](https://github.com/scikit-learn/scikit-learn) from 1.1.3 to 1.5.0. - [Release notes](https://github.com/scikit-learn/scikit-learn/releases) - [Commits](https://github.com/scikit-learn/scikit-learn/compare/1.1.3...1.5.0) --- updated-dependencies: - dependency-name: scikit-learn dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- augur/tasks/data_analysis/clustering_worker/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/data_analysis/clustering_worker/setup.py b/augur/tasks/data_analysis/clustering_worker/setup.py index 78fb0b4b50..f917268f2c 100644 --- a/augur/tasks/data_analysis/clustering_worker/setup.py +++ b/augur/tasks/data_analysis/clustering_worker/setup.py @@ -28,7 +28,7 @@ def read(filename): 'requests==2.28.0', 'psycopg2-binary==2.9.3', #'sklearn==0.0.0', - 'scikit-learn==1.1.3', + 'scikit-learn==1.5.0', 'numpy==1.26.0', 'nltk==3.6.6', 'seaborn==0.11.1', From 95f37e6326edc537319529225851ac49987d6c63 Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Thu, 20 Jun 2024 14:36:09 -0500 Subject: [PATCH 39/88] fix missing import error Signed-off-by: Ulincsys --- augur/api/view/server/Environment.py | 52 ++++++++++++++++++++++++++++ augur/api/view/server/__init__.py | 1 + 2 files changed, 53 insertions(+) create mode 100644 augur/api/view/server/Environment.py diff --git a/augur/api/view/server/Environment.py b/augur/api/view/server/Environment.py new file mode 100644 index 0000000000..76b8207ca5 --- /dev/null +++ b/augur/api/view/server/Environment.py @@ -0,0 +1,52 @@ +import os + +class Environment: + """ + This class is used to make dealing with environment variables easier. It + allows you to set multiple environment variables at once, and to get items + with subscript notation without needing to deal with the particularities of + non-existent values. + """ + def __init__(self, **kwargs): + for (key, value) in kwargs.items(): + self[key] = value + + def setdefault(self, key, value): + if not self[key]: + self[key] = value + return value + return self[key] + + def setall(self, **kwargs): + result = {} + for (key, value) in kwargs.items(): + if self[key]: + result[key] = self[key] + self[key] = value + + def getany(self, *args): + result = {} + for arg in args: + if self[arg]: + result[arg] = self[arg] + return result + + def as_type(self, type, key): + if self[key]: + return type(self[key]) + return None + + def __getitem__(self, key): + return os.getenv(key) + + def __setitem__(self, key, value): + os.environ[key] = str(value) + + def __len__(self)-> int: + return len(os.environ) + + def __str__(self)-> str: + return str(os.environ) + + def __iter__(self): + return (item for item in os.environ.items()) \ No newline at end of file diff --git a/augur/api/view/server/__init__.py b/augur/api/view/server/__init__.py index e75f8f9d23..e919a597a8 100644 --- a/augur/api/view/server/__init__.py +++ b/augur/api/view/server/__init__.py @@ -1 +1,2 @@ from .LoginException import LoginException +from .Environment import Environment \ No newline at end of file From 3a78d153199ab7128fb496c5f80ada40f0067d5d Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Thu, 20 Jun 2024 16:13:41 -0500 Subject: [PATCH 40/88] Explicitly identify iconized buttons using role attribute Signed-off-by: Ulincsys --- augur/templates/groups-table.j2 | 2 +- augur/templates/login.j2 | 4 ++-- augur/templates/settings.j2 | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/augur/templates/groups-table.j2 b/augur/templates/groups-table.j2 index bb98cc383a..6601f17299 100644 --- a/augur/templates/groups-table.j2 +++ b/augur/templates/groups-table.j2 @@ -55,7 +55,7 @@ {{loop.index}} {{ group.name }} {{ group.count }} - + {% endfor %} diff --git a/augur/templates/login.j2 b/augur/templates/login.j2 index faaab620ea..a4bf228d6e 100644 --- a/augur/templates/login.j2 +++ b/augur/templates/login.j2 @@ -16,7 +16,7 @@ required>
@@ -26,7 +26,7 @@ placeholder="Confirm Password">
diff --git a/augur/templates/settings.j2 b/augur/templates/settings.j2 index c10a0c914c..a294ded1a7 100644 --- a/augur/templates/settings.j2 +++ b/augur/templates/settings.j2 @@ -204,8 +204,8 @@ href="{{ url_for('user_group_view', group=group.name) }}">{{ group.name }} {{ group.repos | length }} - + From 42b5840f7c9e117f63c407894ee1e471c4f70c5d Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Fri, 21 Jun 2024 01:04:59 -0500 Subject: [PATCH 41/88] Remove redundant button classification Signed-off-by: Ulincsys --- augur/templates/login.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/templates/login.j2 b/augur/templates/login.j2 index a4bf228d6e..faaab620ea 100644 --- a/augur/templates/login.j2 +++ b/augur/templates/login.j2 @@ -16,7 +16,7 @@ required>
@@ -26,7 +26,7 @@ placeholder="Confirm Password">
From 6f3f09b7f488203c1e5a35722be64d3e071ef573 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 21 Jun 2024 10:09:53 -0500 Subject: [PATCH 42/88] library version consistency fix Signed-off-by: Sean P. Goggins --- .../data_analysis/discourse_analysis/setup.py | 2 +- .../data_analysis/message_insights/setup.py | 2 +- .../mat_view_explore/vacuum-rebuild-views.sh | 17 +++++++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 scripts/mat_view_explore/vacuum-rebuild-views.sh diff --git a/augur/tasks/data_analysis/discourse_analysis/setup.py b/augur/tasks/data_analysis/discourse_analysis/setup.py index 7e678936c6..9e2a9bcea6 100644 --- a/augur/tasks/data_analysis/discourse_analysis/setup.py +++ b/augur/tasks/data_analysis/discourse_analysis/setup.py @@ -31,7 +31,7 @@ def read(filename): 'scipy>=1.10.0', 'nltk==3.6.6', 'pandas==1.5.3', - 'scikit-learn==1.1.3', + 'scikit-learn==1.5.0', 'textblob==0.15.3', 'python-crfsuite>=0.9.8', 'sklearn-crfsuite>=0.3.6', diff --git a/augur/tasks/data_analysis/message_insights/setup.py b/augur/tasks/data_analysis/message_insights/setup.py index 17a42f5940..87d90cba64 100644 --- a/augur/tasks/data_analysis/message_insights/setup.py +++ b/augur/tasks/data_analysis/message_insights/setup.py @@ -31,7 +31,7 @@ def read(filename): 'psycopg2-binary==2.9.3', 'click==8.0.3', 'scipy>=1.10.0', - 'scikit-learn==1.1.3', #0.24.2', + 'scikit-learn==1.5.0', #0.24.2', 'numpy==1.26.0', 'nltk==3.6.6', 'pandas==1.5.3', diff --git a/scripts/mat_view_explore/vacuum-rebuild-views.sh b/scripts/mat_view_explore/vacuum-rebuild-views.sh new file mode 100644 index 0000000000..9088c8f856 --- /dev/null +++ b/scripts/mat_view_explore/vacuum-rebuild-views.sh @@ -0,0 +1,17 @@ +vacuumdb -h localhost -p 5432 -U augur -j 4 -z -v augur; +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.api_get_all_repos_issues with data;' +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_commits_and_committers_daily_count with data;' +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.api_get_all_repo_prs with data;' +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.api_get_all_repos_commits with data;' +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.augur_new_contributors with data;' +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_contributor_actions with data;' +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_new_contributors with data;' +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_entry_list with data;' +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_contributor_metrics with data; ' +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_issue_assignments with data;' +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_pr_assignments with data;' +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_repo_languages with data; ' +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_user_repos with data;' +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_pr_response_times with data;' +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_contributor_recent_actions with data;' +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_pr_response with data;' From f25f6957b5744a886c116f4b5f44c42f116e4549 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 21 Jun 2024 10:11:53 -0500 Subject: [PATCH 43/88] library version consistency updates Signed-off-by: Sean P. Goggins --- augur/tasks/data_analysis/clustering_worker/setup.py | 2 +- augur/tasks/data_analysis/contributor_breadth_worker/setup.py | 2 +- augur/tasks/data_analysis/discourse_analysis/setup.py | 2 +- augur/tasks/data_analysis/insight_worker/setup.py | 2 +- augur/tasks/data_analysis/message_insights/setup.py | 2 +- augur/tasks/data_analysis/pull_request_analysis_worker/setup.py | 2 +- augur/tasks/git/util/facade_worker/setup.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/augur/tasks/data_analysis/clustering_worker/setup.py b/augur/tasks/data_analysis/clustering_worker/setup.py index 03aa92c94a..a197b21568 100644 --- a/augur/tasks/data_analysis/clustering_worker/setup.py +++ b/augur/tasks/data_analysis/clustering_worker/setup.py @@ -25,7 +25,7 @@ def read(filename): 'Flask-Cors==4.0.1', 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', - 'requests==2.28.0', + 'requests==2.32.0', 'psycopg2-binary==2.9.3', #'sklearn==0.0.0', 'scikit-learn==1.5.0', diff --git a/augur/tasks/data_analysis/contributor_breadth_worker/setup.py b/augur/tasks/data_analysis/contributor_breadth_worker/setup.py index 4d40fe423d..805edfb36b 100644 --- a/augur/tasks/data_analysis/contributor_breadth_worker/setup.py +++ b/augur/tasks/data_analysis/contributor_breadth_worker/setup.py @@ -26,7 +26,7 @@ def read(filename): 'Flask-Cors==4.0.1', 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', - 'requests==2.28.0', + 'requests==2.32.0', 'psycopg2-binary==2.9.3' ], entry_points={ diff --git a/augur/tasks/data_analysis/discourse_analysis/setup.py b/augur/tasks/data_analysis/discourse_analysis/setup.py index 9e2a9bcea6..ca936a6000 100644 --- a/augur/tasks/data_analysis/discourse_analysis/setup.py +++ b/augur/tasks/data_analysis/discourse_analysis/setup.py @@ -25,7 +25,7 @@ def read(filename): 'Flask-Cors==4.0.1', 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', - 'requests==2.28.0', + 'requests==2.32.0', 'psycopg2-binary==2.9.3', 'click==8.0.3', 'scipy>=1.10.0', diff --git a/augur/tasks/data_analysis/insight_worker/setup.py b/augur/tasks/data_analysis/insight_worker/setup.py index aff72965bf..92d663e3ae 100644 --- a/augur/tasks/data_analysis/insight_worker/setup.py +++ b/augur/tasks/data_analysis/insight_worker/setup.py @@ -26,7 +26,7 @@ def read(filename): 'Flask-Cors==4.0.1', 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', - 'requests==2.28.0', + 'requests==2.32.0', 'psycopg2-binary==2.9.3', 'click==8.0.3', 'scipy>=1.10.0', diff --git a/augur/tasks/data_analysis/message_insights/setup.py b/augur/tasks/data_analysis/message_insights/setup.py index 87d90cba64..2f86701619 100644 --- a/augur/tasks/data_analysis/message_insights/setup.py +++ b/augur/tasks/data_analysis/message_insights/setup.py @@ -27,7 +27,7 @@ def read(filename): 'Flask-Cors==4.0.1', 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', - 'requests==2.28.0', + 'requests==2.32.0', 'psycopg2-binary==2.9.3', 'click==8.0.3', 'scipy>=1.10.0', diff --git a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py index d8a8f7e063..63ccbec1de 100644 --- a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py +++ b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py @@ -25,7 +25,7 @@ def read(filename): 'Flask-Cors==4.0.1', 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', - 'requests==2.28.0', + 'requests==2.32.0', 'psycopg2-binary==2.9.3', 'sklearn==0.0', 'nltk==3.6.6', diff --git a/augur/tasks/git/util/facade_worker/setup.py b/augur/tasks/git/util/facade_worker/setup.py index fa38cf0759..e2a1af8b75 100644 --- a/augur/tasks/git/util/facade_worker/setup.py +++ b/augur/tasks/git/util/facade_worker/setup.py @@ -26,7 +26,7 @@ def read(filename): 'Flask-Cors==4.0.1', 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', - 'requests==2.28.0', + 'requests==2.32.0', 'psycopg2-binary==2.9.3', 'click==8.0.3', 'XlsxWriter==1.3.7' From 2229359c7f47f1456febc58aabba32d33a994438 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sat, 22 Jun 2024 12:16:28 -0500 Subject: [PATCH 44/88] materialized view refresh script Signed-off-by: Sean P. Goggins --- .../mat_view_explore/vacuum-rebuild-views.sh | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/scripts/mat_view_explore/vacuum-rebuild-views.sh b/scripts/mat_view_explore/vacuum-rebuild-views.sh index 9088c8f856..c7471b182f 100644 --- a/scripts/mat_view_explore/vacuum-rebuild-views.sh +++ b/scripts/mat_view_explore/vacuum-rebuild-views.sh @@ -1,17 +1,17 @@ vacuumdb -h localhost -p 5432 -U augur -j 4 -z -v augur; -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.api_get_all_repos_issues with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_commits_and_committers_daily_count with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.api_get_all_repo_prs with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.api_get_all_repos_commits with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.augur_new_contributors with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_contributor_actions with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_new_contributors with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_entry_list with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_contributor_metrics with data; ' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_issue_assignments with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_pr_assignments with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_repo_languages with data; ' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_user_repos with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_pr_response_times with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_contributor_recent_actions with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_pr_response with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.api_get_all_repos_issues with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_commits_and_committers_daily_count with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.api_get_all_repo_prs with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.api_get_all_repos_commits with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.augur_new_contributors with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_contributor_actions with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_new_contributors with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_entry_list with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_contributor_metrics with data; ' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_issue_assignments with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_pr_assignments with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_repo_languages with data; ' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_user_repos with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_pr_response_times with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_contributor_recent_actions with data;' +psql -U augur -h localhost -p 5432 -d augur -c 'REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_pr_response with data;' From 062fe363eb345c70998d8232baf066768b69474a Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sat, 22 Jun 2024 17:17:57 +0000 Subject: [PATCH 45/88] :materialized view refresh script update Signed-off-by: Ubuntu --- .../{vacuum-rebuild-views.sh => vacuum-rebuild-views.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/mat_view_explore/{vacuum-rebuild-views.sh => vacuum-rebuild-views.sh} (100%) mode change 100644 => 100755 diff --git a/scripts/mat_view_explore/vacuum-rebuild-views.sh b/scripts/mat_view_explore/vacuum-rebuild-views.sh old mode 100644 new mode 100755 similarity index 100% rename from scripts/mat_view_explore/vacuum-rebuild-views.sh rename to scripts/mat_view_explore/vacuum-rebuild-views.sh From 7cd77794a921e386cc81f84c210b4ff8a5dff6ea Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sat, 22 Jun 2024 12:24:33 -0500 Subject: [PATCH 46/88] update collection cadence Signed-off-by: Sean P. Goggins --- augur/application/cli/backend.py | 6 +++--- augur/application/cli/collection.py | 6 +++--- augur/application/cli/tasks.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/augur/application/cli/backend.py b/augur/application/cli/backend.py index 2a2deadd1d..ab8810a5a0 100644 --- a/augur/application/cli/backend.py +++ b/augur/application/cli/backend.py @@ -166,21 +166,21 @@ def determine_worker_processes(ratio,maximum): sleep_time += 6 #60% of estimate, Maximum value of 45 : Reduced because it can be lower - core_num_processes = determine_worker_processes(.15, 10) + core_num_processes = determine_worker_processes(.40, 50) logger.info(f"Starting core worker processes with concurrency={core_num_processes}") core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h" process_list.append(subprocess.Popen(core_worker.split(" "))) sleep_time += 6 #20% of estimate, Maximum value of 25 - secondary_num_processes = determine_worker_processes(.70, 60) + secondary_num_processes = determine_worker_processes(.39, 50) logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}") secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" process_list.append(subprocess.Popen(secondary_worker.split(" "))) sleep_time += 6 #15% of estimate, Maximum value of 20 - facade_num_processes = determine_worker_processes(.15, 20) + facade_num_processes = determine_worker_processes(.17, 20) logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}") facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade" diff --git a/augur/application/cli/collection.py b/augur/application/cli/collection.py index c13e648322..3cb08ef1cd 100644 --- a/augur/application/cli/collection.py +++ b/augur/application/cli/collection.py @@ -125,21 +125,21 @@ def determine_worker_processes(ratio,maximum): sleep_time += 6 #60% of estimate, Maximum value of 45: Reduced because not needed - core_num_processes = determine_worker_processes(.15, 10) + core_num_processes = determine_worker_processes(.40, 50) logger.info(f"Starting core worker processes with concurrency={core_num_processes}") core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h" process_list.append(subprocess.Popen(core_worker.split(" "))) sleep_time += 6 #20% of estimate, Maximum value of 25 - secondary_num_processes = determine_worker_processes(.70, 60) + secondary_num_processes = determine_worker_processes(.39, 50) logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}") secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" process_list.append(subprocess.Popen(secondary_worker.split(" "))) sleep_time += 6 #15% of estimate, Maximum value of 20 - facade_num_processes = determine_worker_processes(.15, 20) + facade_num_processes = determine_worker_processes(.17, 20) logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}") facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade" diff --git a/augur/application/cli/tasks.py b/augur/application/cli/tasks.py index d25f081ab6..d7ce4e4398 100644 --- a/augur/application/cli/tasks.py +++ b/augur/application/cli/tasks.py @@ -36,8 +36,8 @@ def start(): secondary_worker_process = None scheduling_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=1 -n scheduling:{uuid.uuid4().hex}@%h -Q scheduling" - core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=20 -n core:{uuid.uuid4().hex}@%h" - secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=60 -n secondary:{uuid.uuid4().hex}@%h -Q secondary" + core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=50 -n core:{uuid.uuid4().hex}@%h" + secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=50 -n secondary:{uuid.uuid4().hex}@%h -Q secondary" scheduling_worker_process = subprocess.Popen(scheduling_worker.split(" ")) core_worker_process = subprocess.Popen(core_worker.split(" ")) From 1dbb414e46be1daca8ec1d3685e5f48e1c75967a Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Mon, 24 Jun 2024 03:41:28 -0500 Subject: [PATCH 47/88] Improve directory selection for facade worker Signed-off-by: Ulincsys --- scripts/install/config.sh | 73 ++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/scripts/install/config.sh b/scripts/install/config.sh index 6673accfde..09a80d51e1 100755 --- a/scripts/install/config.sh +++ b/scripts/install/config.sh @@ -46,42 +46,37 @@ function get_gitlab_api_key(){ function get_facade_repo_path() { echo "The Facade data collection worker will clone repositories to this machine to run its analysis." - echo "Would you like to clone to an existing directory, or create a new one?" - - select create_facade_repo in "Use an existing directory" "Create a new directory" - do - case $create_facade_repo in - "Use an existing directory" ) - echo "** You MUST use an absolute path. Variable expansion is currently not supported.**" - read -p "Facade repo path: " facade_repo_directory - echo - - - while [[ ! -d "$facade_repo_directory" ]]; do - echo "That directory does not exist." - read -p "Facade repo path: " facade_repo_directory - echo - done - - break - ;; - "Create a new directory" ) - echo "** You MUST use an absolute path. Variable expansion is currently not supported.**" - read -p "Desired directory name: " facade_repo_directory - echo - - if [[ -d "$facade_repo_directory" ]]; then - echo "That directory already exists. Using the given directory." - echo - else - mkdir "$facade_repo_directory" - echo "Directory created." - echo - fi - - break - ;; - esac + echo "Please select a new or existing directory for the Facade worker to use:" + echo + + while true; do + read -e -p "Facade worker directory: " facade_repo_directory + facade_repo_directory=$(realpath $facade_repo_directory) + echo + + if [[ -d "$facade_repo_directory" ]]; then + read -r -p "That directory already exists. Use it? [Y/n]: " facade_response + case "$facade_response" in + [nN][oO]|[nN]) + continue + ;; + *) + break + ;; + esac + else + read -r -p "That directory does not exist. Create it? [Y/n]: " facade_response + case "$facade_response" in + [nN][oO]|[nN]) + continue + ;; + *) + mkdir "$facade_repo_directory" + echo "Directory created." + break + ;; + esac + fi done [[ "${facade_repo_directory}" != */ ]] && facade_repo_directory="${facade_repo_directory}/" @@ -107,6 +102,7 @@ function create_config(){ echo "Using it in the config" echo "Please unset AUGUR_GITHUB_API_KEY if you would like to be prompted for a github api key" github_api_key=$AUGUR_GITHUB_API_KEY + echo fi if [[ -z "${AUGUR_GITHUB_USERNAME}" ]] @@ -118,6 +114,7 @@ function create_config(){ echo "Using it in the config" echo "Please unset AUGUR_GITHUB_USERNAME if you would like to be prompted for a github username" github_username=$AUGUR_GITHUB_USERNAME + echo fi if [[ -z "${AUGUR_GITLAB_API_KEY}" ]] @@ -129,6 +126,7 @@ function create_config(){ echo "Using it in the config" echo "Please unset AUGUR_GITLAB_API_KEY if you would like to be prompted for a gitlab api key" gitlab_api_key=$AUGUR_GITLAB_API_KEY + echo fi @@ -141,6 +139,7 @@ function create_config(){ echo "Using it in the config" echo "Please unset AUGUR_GITLAB_USERNAME if you would like to be prompted for a gitlab username" gitlab_username=$AUGUR_GITLAB_USERNAME + echo fi if [[ -z "${AUGUR_FACADE_REPO_DIRECTORY}" ]] @@ -153,6 +152,7 @@ function create_config(){ echo "IMPORTANT NOTE: This assumes that this directory already exists" echo "Please unset AUGUR_FACADE_REPO_DIRECTORY if you would like to be prompted for the facade repo directory" facade_repo_directory=$AUGUR_FACADE_REPO_DIRECTORY + echo fi if [[ -z "${RABBITMQ_CONN_STRING}" ]] @@ -164,6 +164,7 @@ function create_config(){ echo "Using it in the config" echo "Please unset RABBITMQ_CONN_STRING if you would like to be prompted for the rabbit MQ connection string" rabbitmq_conn_string=$RABBITMQ_CONN_STRING + echo fi #special case for docker entrypoint From ccd637969b3521e4213a11382cea7650eb3225d5 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Mon, 24 Jun 2024 17:32:31 -0500 Subject: [PATCH 48/88] missing import fixed Signed-off-by: Sean P. Goggins --- augur/tasks/github/pull_requests/files_model/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/augur/tasks/github/pull_requests/files_model/core.py b/augur/tasks/github/pull_requests/files_model/core.py index 601a0495e3..983ac67595 100644 --- a/augur/tasks/github/pull_requests/files_model/core.py +++ b/augur/tasks/github/pull_requests/files_model/core.py @@ -1,4 +1,5 @@ import sqlalchemy as s +import httpx from augur.tasks.github.util.gh_graphql_entities import GraphQlPageCollection from augur.application.db.models import * from augur.tasks.github.util.util import get_owner_repo From 9eb8e636abb1ff858feebceb79c1c868528b0467 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 24 Jun 2024 17:33:20 -0500 Subject: [PATCH 49/88] Add response as member of rate limit exception Signed-off-by: Andrew Brain --- augur/tasks/github/util/github_data_access.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/augur/tasks/github/util/github_data_access.py b/augur/tasks/github/util/github_data_access.py index 002995c9fe..074c64f251 100644 --- a/augur/tasks/github/util/github_data_access.py +++ b/augur/tasks/github/util/github_data_access.py @@ -6,7 +6,12 @@ class RatelimitException(Exception): - pass + + def __init__(self, response, message="Github Rate limit exceeded") -> None: + + self.response = response + + super().__init__(message) class UrlNotFoundException(Exception): pass @@ -96,7 +101,7 @@ def make_request(self, url, method="GET", timeout=100): response = client.request(method=method, url=url, timeout=timeout, follow_redirects=True) if response.status_code in [403, 429]: - raise RatelimitException() + raise RatelimitException(response) elif response.status_code == 404: raise UrlNotFoundException(f"Could not find {url}") From 417cd154577438635eb1cf6940116ee4260d9283 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 24 Jun 2024 17:40:26 -0500 Subject: [PATCH 50/88] Clean review comments before inserting Signed-off-by: Andrew Brain --- augur/tasks/github/pull_requests/tasks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index d92893186d..12eac46482 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -284,7 +284,9 @@ def collect_pull_request_review_comments(repo_git: str) -> None: logger.info(f"Inserting {len(pr_review_comment_dicts)} pr review comments") message_natural_keys = ["platform_msg_id", "pltfrm_id"] message_return_columns = ["msg_id", "platform_msg_id"] - message_return_data = bulk_insert_dicts(logger, pr_review_comment_dicts, Message, message_natural_keys, message_return_columns) + message_string_fields = ["msg_text"] + message_return_data = bulk_insert_dicts(logger, pr_review_comment_dicts, Message, message_natural_keys, + return_columns=message_return_columns, string_fields=message_string_fields) if message_return_data is None: return From 8f2aaa3bd9b0ec30e8e44736bbd5187359dd6e08 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 24 Jun 2024 17:41:26 -0500 Subject: [PATCH 51/88] Use logger not session Signed-off-by: Andrew Brain --- augur/tasks/git/dependency_tasks/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 979020f9cb..76c1b15098 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -95,7 +95,7 @@ def generate_scorecard(logger, repo_git): try: required_output = parse_json_from_subprocess_call(logger,['./scorecard', command, '--format=json'],cwd=path_to_scorecard) except Exception as e: - session.logger.error(f"Could not parse required output! Error: {e}") + logger.error(f"Could not parse required output! Error: {e}") raise e # end From d0d20aef9693e76fedc094a1ef95195b2faf0de6 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 24 Jun 2024 18:36:04 -0500 Subject: [PATCH 52/88] Order settings and sections Signed-off-by: Andrew Brain --- augur/application/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index bfda4c8773..e3e93302eb 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -161,7 +161,7 @@ def get_section(self, section_name) -> dict: Returns: The section data as a dict """ - query = self.session.query(Config).filter_by(section_name=section_name) + query = self.session.query(Config).filter_by(section_name=section_name).order_by(Config.setting_name.asc()) section_data = execute_session_query(query, 'all') section_dict = {} @@ -213,7 +213,7 @@ def load_config(self) -> dict: The config from the database """ # get all the sections in the config table - query = self.session.query(Config.section_name) + query = self.session.query(Config.section_name).order_by(Config.section_name.asc()) section_names = execute_session_query(query, 'all') config = {} From 268d482ebc67f4b86823cfebd302f3e6e1f489e8 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Mon, 24 Jun 2024 18:38:57 -0500 Subject: [PATCH 53/88] Add check to see if git credentials are writable Signed-off-by: Isaac Milarsky --- scripts/install/config.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/install/config.sh b/scripts/install/config.sh index 09a80d51e1..6edcec45b1 100755 --- a/scripts/install/config.sh +++ b/scripts/install/config.sh @@ -177,6 +177,10 @@ function create_config(){ + if ! [ -w $facade_repo_directory/.git-credentials ]; then + echo "User $(whoami) does not have permission to write git credentials!" + exit 1 + fi #Create and cache credentials for github and gitlab touch $facade_repo_directory/.git-credentials From 3a2188f3ab05ca6341e87d1d881dede0ae4e1a83 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Mon, 24 Jun 2024 18:44:49 -0500 Subject: [PATCH 54/88] move check to inside loop Signed-off-by: Isaac Milarsky --- scripts/install/config.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/install/config.sh b/scripts/install/config.sh index 6edcec45b1..87cecf83a6 100755 --- a/scripts/install/config.sh +++ b/scripts/install/config.sh @@ -54,6 +54,11 @@ function get_facade_repo_path() { facade_repo_directory=$(realpath $facade_repo_directory) echo + if ! [ -w $facade_repo_directory/.git-credentials ]; then + echo "User $(whoami) does not have permission to write git credentials!" + exit 1 + fi + if [[ -d "$facade_repo_directory" ]]; then read -r -p "That directory already exists. Use it? [Y/n]: " facade_response case "$facade_response" in @@ -176,11 +181,6 @@ function create_config(){ fi - - if ! [ -w $facade_repo_directory/.git-credentials ]; then - echo "User $(whoami) does not have permission to write git credentials!" - exit 1 - fi #Create and cache credentials for github and gitlab touch $facade_repo_directory/.git-credentials From 8b8f90d9f8f3b0411009970bedbf5ffb6eed4918 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Mon, 24 Jun 2024 18:53:33 -0500 Subject: [PATCH 55/88] Fix merge error Signed-off-by: Isaac Milarsky --- augur/tasks/git/facade_tasks.py | 23 +---------------------- augur/tasks/github/facade_github/tasks.py | 7 +++---- 2 files changed, 4 insertions(+), 26 deletions(-) diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index b7a68be542..b29b59cfbd 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -388,27 +388,6 @@ def generate_analysis_sequence(logger,repo_git, facade_helper): -def generate_contributor_sequence(logger,repo_git, session): - - contributor_sequence = [] - #all_repo_ids = [] - repo_id = None - - #contributor_sequence.append(facade_start_contrib_analysis_task.si()) - repo = get_repo_by_repo_git(repo_git) - repo_id = repo.repo_id - - #pdb.set_trace() - #breakpoint() - #for repo in all_repos: - # contributor_sequence.append(insert_facade_contributors.si(repo['repo_id'])) - #all_repo_ids = [repo['repo_id'] for repo in all_repos] - - #contrib_group = create_grouped_task_load(dataList=all_repo_ids,task=insert_facade_contributors)#group(contributor_sequence) - #contrib_group.link_error(facade_error_handler.s()) - #return contrib_group#chain(facade_start_contrib_analysis_task.si(), contrib_group) - return insert_facade_contributors.si(repo_id) - def facade_phase(repo_git, full_collection): logger = logging.getLogger(facade_phase.__name__) @@ -450,7 +429,7 @@ def facade_phase(repo_git, full_collection): #Generate contributor analysis task group. if not limited_run or (limited_run and run_facade_contributors): - facade_core_collection.append(generate_contributor_sequence(logger,repo_git,facade_helper)) + facade_core_collection.append(insert_facade_contributors.si(repo_git)) #These tasks need repos to be cloned by facade before they can work. diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index 4a3806d507..3ec4d54990 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -7,7 +7,7 @@ from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth from augur.application.db.models import Contributor from augur.tasks.github.facade_github.core import * -from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name +from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name, get_repo_by_repo_git from augur.tasks.git.util.facade_worker.facade_worker.facade00mainprogram import * @@ -195,10 +195,9 @@ def insert_facade_contributors(self, repo_git): # Set platform id to 1 since this task is github specific platform_id = 1 - engine = self.app.engine - logger = logging.getLogger(insert_facade_contributors.__name__) - repo_id = None + repo = get_repo_by_repo_git(repo_git) + repo_id = repo.repo_id # Get all of the commit data's emails and names from the commit table that do not appear # in the contributors table or the contributors_aliases table. From 98ff8e1836899f9aa943652c4d723128c629244d Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Mon, 24 Jun 2024 18:54:27 -0500 Subject: [PATCH 56/88] Re-prompt on permission error Signed-off-by: Ulincsys --- scripts/install/config.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/install/config.sh b/scripts/install/config.sh index 87cecf83a6..fec4dfe767 100755 --- a/scripts/install/config.sh +++ b/scripts/install/config.sh @@ -55,8 +55,9 @@ function get_facade_repo_path() { echo if ! [ -w $facade_repo_directory/.git-credentials ]; then - echo "User $(whoami) does not have permission to write git credentials!" - exit 1 + echo "User $(whoami) does not have permission to write to that location" + echo "Please select another location" + continue fi if [[ -d "$facade_repo_directory" ]]; then From 523e1351a96e7baa4d06b34f5478a30245ea5750 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 24 Jun 2024 19:00:04 -0500 Subject: [PATCH 57/88] Define method to get core_data_last_collected Signed-off-by: Andrew Brain --- augur/application/db/lib.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index 9bfd48d9d9..86def423f0 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -511,6 +511,14 @@ def update_issue_closed_cntrbs_by_repo_id(repo_id): """) connection.execute(update_stmt, update_data) +def get_core_data_last_collected(repo_id): + + with get_session() as session: + try: + return session.query(CollectionStatus).filter(CollectionStatus.repo_id == repo_id).one().core_data_last_collected + except s.orm.exc.NoResultFound: + return None + def get_secondary_data_last_collected(repo_id): with get_session() as session: From f0cd7f7a5c0ccb9f1141dcc1fb98a9870ea0ae11 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 24 Jun 2024 19:01:07 -0500 Subject: [PATCH 58/88] Define method to get updated issues Signed-off-by: Andrew Brain --- augur/application/db/lib.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index 86def423f0..f5191b9920 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -531,4 +531,9 @@ def get_updated_prs(repo_id, since): with get_session() as session: return session.query(PullRequest).filter(PullRequest.repo_id == repo_id, PullRequest.pr_updated_at >= since).order_by(PullRequest.pr_src_number).all() + +def get_updated_issues(repo_id, since): + + with get_session() as session: + return session.query(Issue).filter(Issue.repo_id == repo_id, Issue.updated_at >= since).order_by(Issue.gh_issue_number).all() From d852a60ef9d069afedeec9c36eb4e548f90becb1 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 24 Jun 2024 19:15:12 -0500 Subject: [PATCH 59/88] Optimize prs and issues Signed-off-by: Andrew Brain --- augur/tasks/github/issues/tasks.py | 16 +++++++++++---- augur/tasks/github/pull_requests/tasks.py | 25 +++++++++++++++++------ 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py index 98a8067eb5..6a2a0172af 100644 --- a/augur/tasks/github/issues/tasks.py +++ b/augur/tasks/github/issues/tasks.py @@ -13,13 +13,13 @@ from augur.tasks.util.worker_util import remove_duplicate_dicts from augur.application.db.models import Issue, IssueLabel, IssueAssignee, Contributor from augur.application.config import get_development_flag -from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts +from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_core_data_last_collected development = get_development_flag() @celery.task(base=AugurCoreRepoCollectionTask) -def collect_issues(repo_git : str) -> int: +def collect_issues(repo_git : str, full_collection: bool) -> int: logger = logging.getLogger(collect_issues.__name__) @@ -27,12 +27,17 @@ def collect_issues(repo_git : str) -> int: owner, repo = get_owner_repo(repo_git) + if full_collection: + core_data_last_collected = None + else: + core_data_last_collected = get_core_data_last_collected().date() + key_auth = GithubRandomKeyAuth(logger) logger.info(f'this is the manifest.key_auth value: {str(key_auth)}') try: - issue_data = retrieve_all_issue_data(repo_git, logger, key_auth) + issue_data = retrieve_all_issue_data(repo_git, logger, key_auth, core_data_last_collected) if issue_data: total_issues = len(issue_data) @@ -48,7 +53,7 @@ def collect_issues(repo_git : str) -> int: -def retrieve_all_issue_data(repo_git, logger, key_auth) -> None: +def retrieve_all_issue_data(repo_git, logger, key_auth, since) -> None: owner, repo = get_owner_repo(repo_git) @@ -56,6 +61,9 @@ def retrieve_all_issue_data(repo_git, logger, key_auth) -> None: url = f"https://api.github.com/repos/{owner}/{repo}/issues?state=all" + if since: + url += since.isoformat() + # returns an iterable of all issues at this url (this essentially means you can treat the issues variable as a list of the issues) # Reference the code documenation for GithubPaginator for more details issues = GithubPaginator(url, key_auth, logger) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index d92893186d..aad8908db2 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -1,4 +1,5 @@ import logging +from datetime import datetime from augur.tasks.github.pull_requests.core import extract_data_from_pr_list from augur.tasks.init.celery_app import celery_app as celery @@ -14,7 +15,7 @@ from augur.application.db.lib import get_session, get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id from augur.application.db.util import execute_session_query from ..messages.tasks import process_github_comment_contributors -from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs +from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs, get_core_data_last_collected from typing import Generator, List, Dict @@ -22,7 +23,7 @@ platform_id = 1 @celery.task(base=AugurCoreRepoCollectionTask) -def collect_pull_requests(repo_git: str) -> int: +def collect_pull_requests(repo_git: str, full_collection: bool) -> int: logger = logging.getLogger(collect_pull_requests.__name__) @@ -36,9 +37,14 @@ def collect_pull_requests(repo_git: str) -> int: owner, repo = get_owner_repo(repo_git) + if full_collection: + core_data_last_collected = None + else: + core_data_last_collected = get_core_data_last_collected().date() + total_count = 0 all_data = [] - for pr in retrieve_all_pr_data(repo_git, logger, manifest.key_auth): + for pr in retrieve_all_pr_data(repo_git, logger, manifest.key_auth, core_data_last_collected): all_data.append(pr) @@ -61,13 +67,13 @@ def collect_pull_requests(repo_git: str) -> int: # TODO: Rename pull_request_reviewers table to pull_request_requested_reviewers # TODO: Fix column names in pull request labels table -def retrieve_all_pr_data(repo_git: str, logger, key_auth): #-> Generator[List[Dict]]: +def retrieve_all_pr_data(repo_git: str, logger, key_auth, since): #-> Generator[List[Dict]]: owner, repo = get_owner_repo(repo_git) logger.info(f"Collecting pull requests for {owner}/{repo}") - url = f"https://api.github.com/repos/{owner}/{repo}/pulls?state=all&direction=desc" + url = f"https://api.github.com/repos/{owner}/{repo}/pulls?state=all&direction=desc&sort=updated" github_data_access = GithubDataAccess(key_auth, logger) @@ -76,7 +82,14 @@ def retrieve_all_pr_data(repo_git: str, logger, key_auth): #-> Generator[List[Di logger.info(f"{owner}/{repo}: Retrieving {num_pages} pages of pull requests") # returns a generator so this method can be used by doing for x in retrieve_all_pr_data() - return github_data_access.paginate_resource(url) + + data = github_data_access.paginate_resource(url) + + yield data + + # return if last pr on the page was updated before the since date + if since and datetime.fromisoformat(data["updated_at"].replace("Z", "+00:00")) < since: + return def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db): """ From ce102931a0ea92db7a794ea1ad5c9e024f9ccf3c Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 24 Jun 2024 19:35:46 -0500 Subject: [PATCH 60/88] Only use contributor name if we find one Signed-off-by: Andrew Brain --- augur/tasks/github/facade_github/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index 4a3806d507..accae4221e 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -46,10 +46,10 @@ def process_commit_metadata(logger, auth, contributorQueue, repo_id, platform_id contributors_with_matching_name = get_contributors_by_full_name(name) - if not contributors_with_matching_name: + if not contributors_with_matching_name or len(contributors_with_matching_name) > 1: logger.debug("Failed local login lookup") else: - login = contributors_with_matching_name.gh_login + login = contributors_with_matching_name[0].gh_login # Try to get the login from the commit sha From 20c3ab98bc7ccf4e7687c52dcb9873dfda076b0e Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 24 Jun 2024 19:53:36 -0500 Subject: [PATCH 61/88] Fix ml issue Signed-off-by: Andrew Brain --- augur/tasks/start_tasks.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 92085f6d12..af0364f07a 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -29,6 +29,8 @@ from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_facade_weight_time_factor from augur.application.db.lib import execute_sql, get_session +RUNNING_DOCKER = os.environ.get('AUGUR_DOCKER_DEPLOY') == "1" + CELERY_GROUP_TYPE = type(group()) CELERY_CHAIN_TYPE = type(chain()) @@ -142,7 +144,7 @@ def non_repo_domain_tasks(self): enabled_tasks.extend(generate_non_repo_domain_facade_tasks(logger)) - if machine_learning_phase.__name__ in enabled_phase_names: + if not RUNNING_DOCKER and machine_learning_phase.__name__ in enabled_phase_names: #enabled_tasks.extend(machine_learning_phase()) from augur.tasks.data_analysis.contributor_breadth_worker.contributor_breadth_worker import contributor_breadth_model enabled_tasks.append(contributor_breadth_model.si()) @@ -260,7 +262,7 @@ def augur_collection_monitor(self): #start_facade_collection(session, max_repo=30) enabled_collection_hooks.append(build_facade_repo_collect_request(session, logger, enabled_phase_names)) - if machine_learning_phase.__name__ in enabled_phase_names: + if not RUNNING_DOCKER and machine_learning_phase.__name__ in enabled_phase_names: enabled_collection_hooks.append(build_ml_repo_collect_request(session, logger, enabled_phase_names)) #start_ml_collection(session,max_repo=5) From 64b82a2106a3ba3f40dfedecb283ee6123aeddd9 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 25 Jun 2024 19:37:01 -0500 Subject: [PATCH 62/88] Pass full collection flag to tasks Signed-off-by: Andrew Brain --- augur/tasks/start_tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 92085f6d12..fb380e3a84 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -63,8 +63,8 @@ def primary_repo_collect_phase(repo_git, full_collection): #Define primary group of jobs for the primary collect phase: issues and pull requests. primary_repo_jobs = group( - collect_issues.si(repo_git), - collect_pull_requests.si(repo_git) + collect_issues.si(repo_git, full_collection), + collect_pull_requests.si(repo_git, full_collection) ) #Define secondary group that can't run until after primary jobs have finished. From 3ee5e4854187b1c7d2ac6563d5c766923d5d69fc Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 25 Jun 2024 19:50:29 -0500 Subject: [PATCH 63/88] Pass repo_id Signed-off-by: Andrew Brain --- augur/tasks/github/issues/tasks.py | 2 +- augur/tasks/github/pull_requests/tasks.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py index 6a2a0172af..43d174b01e 100644 --- a/augur/tasks/github/issues/tasks.py +++ b/augur/tasks/github/issues/tasks.py @@ -30,7 +30,7 @@ def collect_issues(repo_git : str, full_collection: bool) -> int: if full_collection: core_data_last_collected = None else: - core_data_last_collected = get_core_data_last_collected().date() + core_data_last_collected = get_core_data_last_collected(repo_id).date() key_auth = GithubRandomKeyAuth(logger) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index aad8908db2..0611ed92e9 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -40,7 +40,7 @@ def collect_pull_requests(repo_git: str, full_collection: bool) -> int: if full_collection: core_data_last_collected = None else: - core_data_last_collected = get_core_data_last_collected().date() + core_data_last_collected = get_core_data_last_collected(repo_id).date() total_count = 0 all_data = [] From cd2c620c0ffa91d0b68500fc2e3a0f32061c0829 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 25 Jun 2024 19:56:46 -0500 Subject: [PATCH 64/88] Fix issues based on linter Signed-off-by: Andrew Brain --- augur/application/db/lib.py | 2 +- augur/tasks/github/issues/tasks.py | 13 +++++++------ augur/tasks/github/pull_requests/tasks.py | 10 +++++----- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index f5191b9920..7fb5ce0598 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -25,7 +25,7 @@ def convert_type_of_value(config_dict, logger=None): if data_type == "str" or data_type is None: return config_dict - elif data_type == "int": + if data_type == "int": config_dict["value"] = int(config_dict["value"]) elif data_type == "bool": diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py index 43d174b01e..083971e1bb 100644 --- a/augur/tasks/github/issues/tasks.py +++ b/augur/tasks/github/issues/tasks.py @@ -39,14 +39,15 @@ def collect_issues(repo_git : str, full_collection: bool) -> int: try: issue_data = retrieve_all_issue_data(repo_git, logger, key_auth, core_data_last_collected) - if issue_data: - total_issues = len(issue_data) - process_issues(issue_data, f"{owner}/{repo}: Issue task", repo_id, logger) - - return total_issues - else: + if not issue_data: logger.info(f"{owner}/{repo} has no issues") return 0 + + total_issues = len(issue_data) + process_issues(issue_data, f"{owner}/{repo}: Issue task", repo_id, logger) + + return total_issues + except Exception as e: logger.error(f"Could not collect issues for repo {repo_git}\n Reason: {e} \n Traceback: {''.join(traceback.format_exception(None, e, e.__traceback__))}") return -1 diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 0611ed92e9..5d90fd89e8 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -83,13 +83,13 @@ def retrieve_all_pr_data(repo_git: str, logger, key_auth, since): #-> Generator[ # returns a generator so this method can be used by doing for x in retrieve_all_pr_data() - data = github_data_access.paginate_resource(url) + for pr in github_data_access.paginate_resource(url): - yield data + yield pr - # return if last pr on the page was updated before the since date - if since and datetime.fromisoformat(data["updated_at"].replace("Z", "+00:00")) < since: - return + # return if last pr on the page was updated before the since date + if since and datetime.fromisoformat(pr["updated_at"].replace("Z", "+00:00")) < since: + return def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db): """ From 9ceb97fd303561bf87896790501c3d3d2a814080 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 25 Jun 2024 20:08:22 -0500 Subject: [PATCH 65/88] Update issues task to use github data access Signed-off-by: Andrew Brain --- augur/tasks/github/issues/tasks.py | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py index 083971e1bb..63eb5479a9 100644 --- a/augur/tasks/github/issues/tasks.py +++ b/augur/tasks/github/issues/tasks.py @@ -7,7 +7,7 @@ from augur.tasks.init.celery_app import celery_app as celery from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask from augur.application.db.data_parse import * -from augur.tasks.github.util.github_paginator import GithubPaginator +from augur.tasks.github.util.github_data_access import GithubDataAccess from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth from augur.tasks.github.util.util import add_key_value_pair_to_dicts, get_owner_repo from augur.tasks.util.worker_util import remove_duplicate_dicts @@ -65,32 +65,14 @@ def retrieve_all_issue_data(repo_git, logger, key_auth, since) -> None: if since: url += since.isoformat() - # returns an iterable of all issues at this url (this essentially means you can treat the issues variable as a list of the issues) - # Reference the code documenation for GithubPaginator for more details - issues = GithubPaginator(url, key_auth, logger) + github_data_access = GithubDataAccess(key_auth, logger) - # this is defined so we can decrement it each time - # we come across a pr, so at the end we can log how - # many issues were collected - # loop through the issues - all_data = [] - num_pages = issues.get_num_pages() - for page_data, page in issues.iter_pages(): + num_pages = github_data_access.get_resource_page_count(url) + logger.info(f"{owner}/{repo}: Retrieving {num_pages} pages of issues") - if page_data is None: - return all_data + issues_paginator = github_data_access.paginate_resource(url) - if len(page_data) == 0: - logger.debug( - f"{owner}/{repo}: Issues Page {page} contains no data...returning") - logger.info(f"{owner}/{repo}: Issues Page {page} of {num_pages}") - return all_data - - logger.info(f"{owner}/{repo}: Issues Page {page} of {num_pages}") - - all_data += page_data - - return all_data + return list(issues_paginator) def process_issues(issues, task_name, repo_id, logger) -> None: From 7392635e0adf50755822ec12308f83c39e5af3ef Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 25 Jun 2024 20:14:37 -0500 Subject: [PATCH 66/88] Compare datetime with datetime Signed-off-by: Andrew Brain --- augur/tasks/github/pull_requests/tasks.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 5d90fd89e8..6824467d1c 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -38,13 +38,14 @@ def collect_pull_requests(repo_git: str, full_collection: bool) -> int: owner, repo = get_owner_repo(repo_git) if full_collection: - core_data_last_collected = None + core_data_last_collected_datetime = None else: - core_data_last_collected = get_core_data_last_collected(repo_id).date() + core_data_last_collected_date = get_core_data_last_collected(repo_id).date() + core_data_last_collected_datetime = datetime.combine(core_data_last_collected_date, datetime.min.time()) total_count = 0 all_data = [] - for pr in retrieve_all_pr_data(repo_git, logger, manifest.key_auth, core_data_last_collected): + for pr in retrieve_all_pr_data(repo_git, logger, manifest.key_auth, core_data_last_collected_datetime): all_data.append(pr) From 636ef8c5464a1dd49ff84b506acd23d730ce2dcb Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 25 Jun 2024 20:32:36 -0500 Subject: [PATCH 67/88] Fixes to datetime handling Signed-off-by: Andrew Brain --- augur/tasks/github/issues/tasks.py | 4 +++- augur/tasks/github/pull_requests/tasks.py | 12 ++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py index 63eb5479a9..2b0316fb2f 100644 --- a/augur/tasks/github/issues/tasks.py +++ b/augur/tasks/github/issues/tasks.py @@ -1,5 +1,6 @@ import logging import traceback +from datetime import timedelta, timezone from sqlalchemy.exc import IntegrityError @@ -30,7 +31,8 @@ def collect_issues(repo_git : str, full_collection: bool) -> int: if full_collection: core_data_last_collected = None else: - core_data_last_collected = get_core_data_last_collected(repo_id).date() + # subtract 2 days to ensure all data is collected + core_data_last_collected = (get_core_data_last_collected(repo_id) - timedelta(days=2)).replace(tzinfo=timezone.utc) key_auth = GithubRandomKeyAuth(logger) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 45981eb354..a5311de079 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -1,5 +1,5 @@ import logging -from datetime import datetime +from datetime import datetime, timedelta, timezone from augur.tasks.github.pull_requests.core import extract_data_from_pr_list from augur.tasks.init.celery_app import celery_app as celery @@ -38,14 +38,14 @@ def collect_pull_requests(repo_git: str, full_collection: bool) -> int: owner, repo = get_owner_repo(repo_git) if full_collection: - core_data_last_collected_datetime = None + core_data_last_collected = None else: - core_data_last_collected_date = get_core_data_last_collected(repo_id).date() - core_data_last_collected_datetime = datetime.combine(core_data_last_collected_date, datetime.min.time()) + # subtract 2 days to ensure all data is collected + core_data_last_collected = (get_core_data_last_collected(repo_id) - timedelta(days=2)).replace(tzinfo=timezone.utc) total_count = 0 all_data = [] - for pr in retrieve_all_pr_data(repo_git, logger, manifest.key_auth, core_data_last_collected_datetime): + for pr in retrieve_all_pr_data(repo_git, logger, manifest.key_auth, core_data_last_collected): all_data.append(pr) @@ -89,7 +89,7 @@ def retrieve_all_pr_data(repo_git: str, logger, key_auth, since): #-> Generator[ yield pr # return if last pr on the page was updated before the since date - if since and datetime.fromisoformat(pr["updated_at"].replace("Z", "+00:00")) < since: + if since and datetime.fromisoformat(pr["updated_at"].replace("Z", "+00:00")).replace(tzinfo=timezone.utc) < since: return def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db): From 2149adf1ff8d60dccbd20da12d0d5a339046fefe Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Thu, 27 Jun 2024 12:46:43 -0500 Subject: [PATCH 68/88] Changes messages to use gihtub data access Signed-off-by: Andrew Brain --- augur/tasks/github/messages/tasks.py | 43 +++++++--------------------- 1 file changed, 11 insertions(+), 32 deletions(-) diff --git a/augur/tasks/github/messages/tasks.py b/augur/tasks/github/messages/tasks.py index 3e104fc6dc..f9e7e9d845 100644 --- a/augur/tasks/github/messages/tasks.py +++ b/augur/tasks/github/messages/tasks.py @@ -4,7 +4,7 @@ from augur.tasks.init.celery_app import celery_app as celery from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask from augur.application.db.data_parse import * -from augur.tasks.github.util.github_paginator import GithubPaginator +from augur.tasks.github.util.github_data_access import GithubDataAccess from augur.tasks.github.util.github_task_session import GithubTaskManifest from augur.tasks.util.worker_util import remove_duplicate_dicts from augur.tasks.github.util.util import get_owner_repo @@ -63,31 +63,14 @@ def fast_retrieve_all_pr_and_issue_messages(repo_git: str, logger, key_auth, tas # url to get issue and pull request comments url = f"https://api.github.com/repos/{owner}/{repo}/issues/comments" - - # define database task session, that also holds authentication keys the GithubPaginator needs - # returns an iterable of all issues at this url (this essentially means you can treat the issues variable as a list of the issues) - messages = GithubPaginator(url, key_auth, logger) - - num_pages = messages.get_num_pages() - all_data = [] - for page_data, page in messages.iter_pages(): + github_data_access = GithubDataAccess(key_auth, logger) - if page_data is None: - return all_data + num_pages = github_data_access.get_resource_count(url) - elif len(page_data) == 0: - logger.debug(f"{repo.capitalize()} Messages Page {page} contains no data...returning") - logger.info( - f"{task_name}: Page {page} of {num_pages}") - return all_data + logger.info(f"{task_name}: Collecting {num_pages} github messages") - logger.info(f"{task_name}: Page {page} of {num_pages}") - - all_data += page_data - - - return all_data + return list(github_data_access.paginate_resource(url)) def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger, key_auth, task_name, augur_db) -> None: @@ -110,20 +93,16 @@ def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger result = connection.execute(query).fetchall() comment_urls = [x[0] for x in result] - all_data = [] - for index, comment_url in enumerate(comment_urls): - - logger.info(f"{task_name}: Github messages index {index+1} of {len(comment_urls)}") + github_data_access = GithubDataAccess(key_auth, logger) - messages = GithubPaginator(comment_url, key_auth, logger) - for page_data, _ in messages.iter_pages(): + logger.info(f"{task_name}: Collecting github messages for {len(comment_urls)} prs/issues") - if page_data is None or len(page_data) == 0: - break + all_data = [] + for comment_url in comment_urls: - all_data += page_data + messages = list(github_data_access.paginate_resource(comment_url)) - logger.info(f"All data size: {len(all_data)}") + all_data += messages if len(all_data) >= 20: process_messages(all_data, task_name, repo_id, logger, augur_db) From 62c5d0dcfbed73aae89982621bff320c1f2f3199 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Thu, 27 Jun 2024 12:51:42 -0500 Subject: [PATCH 69/88] Update github events to use github data access Signed-off-by: Andrew Brain --- augur/tasks/github/events/tasks.py | 24 +++++------------------- augur/tasks/github/messages/tasks.py | 7 ++----- 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/augur/tasks/github/events/tasks.py b/augur/tasks/github/events/tasks.py index ee4f407616..44bb7e19ae 100644 --- a/augur/tasks/github/events/tasks.py +++ b/augur/tasks/github/events/tasks.py @@ -5,7 +5,7 @@ from augur.tasks.init.celery_app import celery_app as celery from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask from augur.application.db.data_parse import * -from augur.tasks.github.util.github_paginator import GithubPaginator +from augur.tasks.github.util.github_data_access import GithubDataAccess from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth from augur.tasks.github.util.util import get_owner_repo from augur.tasks.util.worker_util import remove_duplicate_dicts @@ -50,27 +50,13 @@ def retrieve_all_event_data(repo_git: str, logger, key_auth): url = f"https://api.github.com/repos/{owner}/{repo}/issues/events" - # returns an iterable of all issues at this url (this essentially means you can treat the issues variable as a list of the issues) - events = GithubPaginator(url, key_auth, logger) + github_data_access = GithubDataAccess(key_auth, logger) + event_count = github_data_access.get_resource_page_count(url) - num_pages = events.get_num_pages() - all_data = [] - for page_data, page in events.iter_pages(): + logger.info(f"{owner}/{repo}: Collecting {event_count} github events") - if page_data is None: - return all_data - - elif len(page_data) == 0: - logger.debug(f"{repo.capitalize()} Events Page {page} contains no data...returning") - logger.info(f"Events Page {page} of {num_pages}") - return all_data - - logger.info(f"{repo} Events Page {page} of {num_pages}") - - all_data += page_data - - return all_data + return list(github_data_access.paginate_resource(url)) def process_events(events, task_name, repo_id, logger): diff --git a/augur/tasks/github/messages/tasks.py b/augur/tasks/github/messages/tasks.py index f9e7e9d845..d47107c163 100644 --- a/augur/tasks/github/messages/tasks.py +++ b/augur/tasks/github/messages/tasks.py @@ -60,15 +60,12 @@ def fast_retrieve_all_pr_and_issue_messages(repo_git: str, logger, key_auth, tas # define logger for task logger.info(f"Collecting github comments for {owner}/{repo}") - - # url to get issue and pull request comments - url = f"https://api.github.com/repos/{owner}/{repo}/issues/comments" github_data_access = GithubDataAccess(key_auth, logger) - num_pages = github_data_access.get_resource_count(url) + message_count = github_data_access.get_resource_count(url) - logger.info(f"{task_name}: Collecting {num_pages} github messages") + logger.info(f"{task_name}: Collecting {message_count} github messages") return list(github_data_access.paginate_resource(url)) From 23f78925b08f0f363f6fb5030e3ed75cbe1b5745 Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Fri, 28 Jun 2024 07:52:09 -0500 Subject: [PATCH 70/88] Add confirmation dialog for blank input in config script Signed-off-by: Ulincsys --- scripts/install/config.sh | 238 +++++++++++++++++++++----------------- 1 file changed, 130 insertions(+), 108 deletions(-) diff --git a/scripts/install/config.sh b/scripts/install/config.sh index fec4dfe767..f9fcba1646 100755 --- a/scripts/install/config.sh +++ b/scripts/install/config.sh @@ -6,173 +6,201 @@ Your choice: " target=$1 -function get_github_username(){ +function blank_confirm() { + if [ -z "${1}" ]; then + echo "Bad usage of blank_confirm at:" + caller + fi + + confirm_placeholder=${!1} + + while [ -z "${confirm_placeholder}" ]; do + echo "You entered a blank line, are you sure?" + read -p "enter 'yes' to continue, or enter the intended value: " confirm_placeholder + case "$confirm_placeholder" in + [yY][eE][sS] | [yY][eE] | [yY]) + return + ;; + *) + continue + ;; + esac + done + printf -v "$1" "%s" $confirm_placeholder +} + +function get_github_username() { echo echo "Please provide your username for Github." echo "** This is required for Augur to clone Github repos ***" read -p "GitHub username: " github_username + blank_confirm github_username echo } - -function get_github_api_key(){ +function get_github_api_key() { echo echo "Please provide a valid GitHub API key." echo "For more information on how to create the key, visit:" echo "https://oss-augur.readthedocs.io/en/dev/getting-started/installation.html#backend" echo "** This is required for Augur to gather data ***" read -p "GitHub API Key: " github_api_key + blank_confirm github_api_key echo } -function get_gitlab_username(){ +function get_gitlab_username() { echo echo "Please provide your username for GitLab." echo "** This is required for Augur to clone GitLab repos ***" read -p "GitLab username: " gitlab_username + blank_confirm gitlab_username echo } -function get_gitlab_api_key(){ +function get_gitlab_api_key() { echo echo "Please provide a valid GitLab API key." echo "For more information on how to create the key, visit:" echo "https://oss-augur.readthedocs.io/en/dev/getting-started/installation.html#backend" echo "** This is required for Augur to gather data ***" read -p "GitLab API Key: " gitlab_api_key + blank_confirm gitlab_api_key echo } function get_facade_repo_path() { - echo "The Facade data collection worker will clone repositories to this machine to run its analysis." - echo "Please select a new or existing directory for the Facade worker to use:" - echo - - while true; do - read -e -p "Facade worker directory: " facade_repo_directory - facade_repo_directory=$(realpath $facade_repo_directory) - echo - - if ! [ -w $facade_repo_directory/.git-credentials ]; then - echo "User $(whoami) does not have permission to write to that location" - echo "Please select another location" - continue - fi - - if [[ -d "$facade_repo_directory" ]]; then - read -r -p "That directory already exists. Use it? [Y/n]: " facade_response - case "$facade_response" in - [nN][oO]|[nN]) - continue - ;; - *) - break - ;; - esac - else - read -r -p "That directory does not exist. Create it? [Y/n]: " facade_response - case "$facade_response" in - [nN][oO]|[nN]) - continue - ;; - *) - mkdir "$facade_repo_directory" - echo "Directory created." - break - ;; - esac - fi - done - - [[ "${facade_repo_directory}" != */ ]] && facade_repo_directory="${facade_repo_directory}/" + echo "The Facade data collection worker will clone repositories to this machine to run its analysis." + echo "Please select a new or existing directory for the Facade worker to use:" + echo + + while true; do + read -e -p "Facade worker directory: " facade_repo_directory + blank_confirm facade_repo_directory + + facade_repo_directory=$(realpath $facade_repo_directory) + echo + + if ! [ -w $facade_repo_directory/.git-credentials ]; then + echo "User $(whoami) does not have permission to write to that location" + echo "Please select another location" + continue + fi + + if [[ -d "$facade_repo_directory" ]]; then + read -r -p "That directory already exists. Use it? [Y/n]: " facade_response + case "$facade_response" in + [nN][oO] | [nN]) + continue + ;; + *) + break + ;; + esac + else + read -r -p "That directory does not exist. Create it? [Y/n]: " facade_response + case "$facade_response" in + [nN][oO] | [nN]) + continue + ;; + *) + mkdir "$facade_repo_directory" + echo "Directory created." + break + ;; + esac + fi + done + + [[ "${facade_repo_directory}" != */ ]] && facade_repo_directory="${facade_repo_directory}/" } -function get_rabbitmq_broker_url(){ +function get_rabbitmq_broker_url() { echo echo "Please provide your rabbitmq broker url." echo "** This is required for Augur to run all collection tasks. ***" read -p "broker_url: " rabbitmq_conn_string + blank_confirm rabbitmq_conn_string echo } +function create_config() { -function create_config(){ - - if [[ -z "${AUGUR_GITHUB_API_KEY}" ]] - then + if [[ -z "${AUGUR_GITHUB_API_KEY}" ]]; then get_github_api_key else - echo - echo "Found AUGUR_GITHUB_API_KEY environment variable with value $AUGUR_GITHUB_API_KEY" - echo "Using it in the config" - echo "Please unset AUGUR_GITHUB_API_KEY if you would like to be prompted for a github api key" - github_api_key=$AUGUR_GITHUB_API_KEY - echo + echo + echo "Found AUGUR_GITHUB_API_KEY environment variable with value $AUGUR_GITHUB_API_KEY" + echo "Using it in the config" + echo "Please unset AUGUR_GITHUB_API_KEY if you would like to be prompted for a github api key" + github_api_key=$AUGUR_GITHUB_API_KEY + echo fi - if [[ -z "${AUGUR_GITHUB_USERNAME}" ]] - then + if [[ -z "${AUGUR_GITHUB_USERNAME}" ]]; then get_github_username else - echo - echo "Found AUGUR_GITHUB_USERNAME environment variable with value $AUGUR_GITHUB_USERNAME" - echo "Using it in the config" - echo "Please unset AUGUR_GITHUB_USERNAME if you would like to be prompted for a github username" - github_username=$AUGUR_GITHUB_USERNAME - echo + echo + echo "Found AUGUR_GITHUB_USERNAME environment variable with value $AUGUR_GITHUB_USERNAME" + echo "Using it in the config" + echo "Please unset AUGUR_GITHUB_USERNAME if you would like to be prompted for a github username" + github_username=$AUGUR_GITHUB_USERNAME + echo fi - if [[ -z "${AUGUR_GITLAB_API_KEY}" ]] - then + if [[ -z "${AUGUR_GITLAB_API_KEY}" ]]; then get_gitlab_api_key else - echo - echo "Found AUGUR_GITLAB_API_KEY environment variable with value $AUGUR_GITLAB_API_KEY" - echo "Using it in the config" - echo "Please unset AUGUR_GITLAB_API_KEY if you would like to be prompted for a gitlab api key" - gitlab_api_key=$AUGUR_GITLAB_API_KEY - echo + echo + echo "Found AUGUR_GITLAB_API_KEY environment variable with value $AUGUR_GITLAB_API_KEY" + echo "Using it in the config" + echo "Please unset AUGUR_GITLAB_API_KEY if you would like to be prompted for a gitlab api key" + gitlab_api_key=$AUGUR_GITLAB_API_KEY + echo fi - - if [[ -z "${AUGUR_GITLAB_USERNAME}" ]] - then + if [[ -z "${AUGUR_GITLAB_USERNAME}" ]]; then get_gitlab_username else - echo - echo "Found AUGUR_GITLAB_USERNAME environment variable with value $AUGUR_GITLAB_USERNAME" - echo "Using it in the config" - echo "Please unset AUGUR_GITLAB_USERNAME if you would like to be prompted for a gitlab username" - gitlab_username=$AUGUR_GITLAB_USERNAME - echo + echo + echo "Found AUGUR_GITLAB_USERNAME environment variable with value $AUGUR_GITLAB_USERNAME" + echo "Using it in the config" + echo "Please unset AUGUR_GITLAB_USERNAME if you would like to be prompted for a gitlab username" + gitlab_username=$AUGUR_GITLAB_USERNAME + echo fi - if [[ -z "${AUGUR_FACADE_REPO_DIRECTORY}" ]] - then + if [[ -z "${AUGUR_FACADE_REPO_DIRECTORY}" ]]; then get_facade_repo_path else - echo - echo "Found AUGUR_FACADE_REPO_DIRECTORY environment variable with value $AUGUR_FACADE_REPO_DIRECTORY" - echo "Using it in the config" - echo "IMPORTANT NOTE: This assumes that this directory already exists" - echo "Please unset AUGUR_FACADE_REPO_DIRECTORY if you would like to be prompted for the facade repo directory" - facade_repo_directory=$AUGUR_FACADE_REPO_DIRECTORY - echo + echo + echo "Found AUGUR_FACADE_REPO_DIRECTORY environment variable with value $AUGUR_FACADE_REPO_DIRECTORY" + echo "Using it in the config" + echo "IMPORTANT NOTE: This assumes that this directory already exists" + echo "Please unset AUGUR_FACADE_REPO_DIRECTORY if you would like to be prompted for the facade repo directory" + facade_repo_directory=$AUGUR_FACADE_REPO_DIRECTORY + echo fi - if [[ -z "${RABBITMQ_CONN_STRING}" ]] - then + if [[ -z "${RABBITMQ_CONN_STRING}" ]]; then get_rabbitmq_broker_url else - echo - echo "Found RABBITMQ_CONN_STRING environment variable with value $RABBITMQ_CONN_STRING" - echo "Using it in the config" - echo "Please unset RABBITMQ_CONN_STRING if you would like to be prompted for the rabbit MQ connection string" - rabbitmq_conn_string=$RABBITMQ_CONN_STRING - echo + echo + echo "Found RABBITMQ_CONN_STRING environment variable with value $RABBITMQ_CONN_STRING" + echo "Using it in the config" + echo "Please unset RABBITMQ_CONN_STRING if you would like to be prompted for the rabbit MQ connection string" + rabbitmq_conn_string=$RABBITMQ_CONN_STRING + echo fi - + + # echo $rabbitmq_conn_string + # echo $facade_repo_directory + # echo $gitlab_username + # echo $gitlab_api_key + # echo $github_username + # echo $github_api_key + #special case for docker entrypoint if [ $target = "docker" ]; then cmd=( augur config init --github-api-key $github_api_key --gitlab-api-key $gitlab_api_key --facade-repo-directory $facade_repo_directory --redis-conn-string $redis_conn_string --rabbitmq-conn-string $rabbitmq_conn_string ) @@ -181,16 +209,14 @@ function create_config(){ cmd=( augur config init --github-api-key $github_api_key --gitlab-api-key $gitlab_api_key --facade-repo-directory $facade_repo_directory --rabbitmq-conn-string $rabbitmq_conn_string ) fi - - #Create and cache credentials for github and gitlab touch $facade_repo_directory/.git-credentials - + echo "https://$github_username:$github_api_key@github.com" > $facade_repo_directory/.git-credentials echo "https://$gitlab_username:$gitlab_api_key@gitlab.com" >> $facade_repo_directory/.git-credentials git config --global credential.helper "store --file $facade_repo_directory/.git-credentials" - "${cmd[@]}" + "${cmd[@]}" } echo echo "Collecting data for config..." @@ -200,7 +226,3 @@ echo "Config created" echo # config_prompt - - - - From cd8bad5a067f220042a739cfbafeea285c1be4c8 Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Fri, 28 Jun 2024 07:57:49 -0500 Subject: [PATCH 71/88] Add return from blank_confirm on bad usage Signed-off-by: Ulincsys --- scripts/install/config.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/install/config.sh b/scripts/install/config.sh index f9fcba1646..c4421550bc 100755 --- a/scripts/install/config.sh +++ b/scripts/install/config.sh @@ -10,6 +10,7 @@ function blank_confirm() { if [ -z "${1}" ]; then echo "Bad usage of blank_confirm at:" caller + return fi confirm_placeholder=${!1} From 249cf37e49099d514197e433bdb2739e35f7eaad Mon Sep 17 00:00:00 2001 From: Sean Goggins Date: Fri, 28 Jun 2024 15:02:33 -0500 Subject: [PATCH 72/88] updating ignore files Signed-off-by: Sean Goggins --- scripts/mat_view_explore/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 scripts/mat_view_explore/.gitignore diff --git a/scripts/mat_view_explore/.gitignore b/scripts/mat_view_explore/.gitignore new file mode 100644 index 0000000000..99270a10f7 --- /dev/null +++ b/scripts/mat_view_explore/.gitignore @@ -0,0 +1 @@ +.pgpass From ec99c3192da132e25a8a19e092080cfe127c9848 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 29 Jun 2024 14:17:07 -0500 Subject: [PATCH 73/88] Use key manager in requests Signed-off-by: Andrew Brain --- augur/tasks/github/util/github_data_access.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/util/github_data_access.py b/augur/tasks/github/util/github_data_access.py index 074c64f251..46843b86d2 100644 --- a/augur/tasks/github/util/github_data_access.py +++ b/augur/tasks/github/util/github_data_access.py @@ -98,7 +98,7 @@ def make_request(self, url, method="GET", timeout=100): with httpx.Client() as client: - response = client.request(method=method, url=url, timeout=timeout, follow_redirects=True) + response = client.request(method=method, url=url, auth=self.key_manager, timeout=timeout, follow_redirects=True) if response.status_code in [403, 429]: raise RatelimitException(response) From 59abdea9e7e790979325f383c2041ca5248a89b5 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 29 Jun 2024 14:18:15 -0500 Subject: [PATCH 74/88] Make api calls with key Signed-off-by: Andrew Brain --- augur/tasks/github/util/github_data_access.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/util/github_data_access.py b/augur/tasks/github/util/github_data_access.py index 074c64f251..46843b86d2 100644 --- a/augur/tasks/github/util/github_data_access.py +++ b/augur/tasks/github/util/github_data_access.py @@ -98,7 +98,7 @@ def make_request(self, url, method="GET", timeout=100): with httpx.Client() as client: - response = client.request(method=method, url=url, timeout=timeout, follow_redirects=True) + response = client.request(method=method, url=url, auth=self.key_manager, timeout=timeout, follow_redirects=True) if response.status_code in [403, 429]: raise RatelimitException(response) From 628a1a2c13ad5e82cafb0e35303f420448116d02 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 29 Jun 2024 14:25:13 -0500 Subject: [PATCH 75/88] Make changes recommend by linter Signed-off-by: Andrew Brain --- augur/tasks/github/util/github_data_access.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/util/github_data_access.py b/augur/tasks/github/util/github_data_access.py index 46843b86d2..2f4c988014 100644 --- a/augur/tasks/github/util/github_data_access.py +++ b/augur/tasks/github/util/github_data_access.py @@ -103,7 +103,7 @@ def make_request(self, url, method="GET", timeout=100): if response.status_code in [403, 429]: raise RatelimitException(response) - elif response.status_code == 404: + if response.status_code == 404: raise UrlNotFoundException(f"Could not find {url}") response.raise_for_status() From ab5b5a0496d6f49091b048ed1e5ea50b2a6f59db Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 29 Jun 2024 17:53:34 -0500 Subject: [PATCH 76/88] Remove empty files Signed-off-by: Andrew Brain --- augur/tasks/github/contributors/core.py | 0 augur/tasks/github/events/core.py | 0 augur/tasks/github/issues/core.py | 0 augur/tasks/github/messages/core.py | 0 augur/tasks/github/traffic/core.py | 0 5 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 augur/tasks/github/contributors/core.py delete mode 100644 augur/tasks/github/events/core.py delete mode 100644 augur/tasks/github/issues/core.py delete mode 100644 augur/tasks/github/messages/core.py delete mode 100644 augur/tasks/github/traffic/core.py diff --git a/augur/tasks/github/contributors/core.py b/augur/tasks/github/contributors/core.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/augur/tasks/github/events/core.py b/augur/tasks/github/events/core.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/augur/tasks/github/issues/core.py b/augur/tasks/github/issues/core.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/augur/tasks/github/messages/core.py b/augur/tasks/github/messages/core.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/augur/tasks/github/traffic/core.py b/augur/tasks/github/traffic/core.py deleted file mode 100644 index e69de29bb2..0000000000 From 86c8a68b8dbf7511d998d92cb36bb0ee5ce489be Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 29 Jun 2024 17:55:09 -0500 Subject: [PATCH 77/88] Move contributors tasks out of folder Signed-off-by: Andrew Brain --- augur/tasks/github/__init__.py | 2 +- augur/tasks/github/{contributors/tasks.py => contributors.py} | 0 augur/tasks/github/contributors/__init__.py | 0 augur/tasks/init/celery_app.py | 2 +- 4 files changed, 2 insertions(+), 2 deletions(-) rename augur/tasks/github/{contributors/tasks.py => contributors.py} (100%) delete mode 100644 augur/tasks/github/contributors/__init__.py diff --git a/augur/tasks/github/__init__.py b/augur/tasks/github/__init__.py index 29823eafe5..f10d9b68b7 100644 --- a/augur/tasks/github/__init__.py +++ b/augur/tasks/github/__init__.py @@ -1,4 +1,4 @@ -from augur.tasks.github.contributors.tasks import * +from augur.tasks.github.contributors import * from augur.tasks.github.events.tasks import * from augur.tasks.github.issues.tasks import * from augur.tasks.github.messages.tasks import * diff --git a/augur/tasks/github/contributors/tasks.py b/augur/tasks/github/contributors.py similarity index 100% rename from augur/tasks/github/contributors/tasks.py rename to augur/tasks/github/contributors.py diff --git a/augur/tasks/github/contributors/__init__.py b/augur/tasks/github/contributors/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py index e57fb674d2..c88eddffb8 100644 --- a/augur/tasks/init/celery_app.py +++ b/augur/tasks/init/celery_app.py @@ -26,7 +26,7 @@ 'augur.tasks.data_analysis', 'augur.tasks.util.collection_util'] -github_tasks = ['augur.tasks.github.contributors.tasks', +github_tasks = ['augur.tasks.github.contributors', 'augur.tasks.github.issues.tasks', 'augur.tasks.github.pull_requests.tasks', 'augur.tasks.github.events.tasks', From 3cf11de8682d799ecc4ed4ae3abdfd4b1947d02d Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 29 Jun 2024 17:56:43 -0500 Subject: [PATCH 78/88] Move events tasks out of folder Signed-off-by: Andrew Brain --- augur/tasks/github/__init__.py | 2 +- augur/tasks/github/{events/tasks.py => events.py} | 0 augur/tasks/github/events/__init__.py | 0 augur/tasks/init/celery_app.py | 2 +- 4 files changed, 2 insertions(+), 2 deletions(-) rename augur/tasks/github/{events/tasks.py => events.py} (100%) delete mode 100644 augur/tasks/github/events/__init__.py diff --git a/augur/tasks/github/__init__.py b/augur/tasks/github/__init__.py index f10d9b68b7..06a39a5cfb 100644 --- a/augur/tasks/github/__init__.py +++ b/augur/tasks/github/__init__.py @@ -1,5 +1,5 @@ from augur.tasks.github.contributors import * -from augur.tasks.github.events.tasks import * +from augur.tasks.github.events import * from augur.tasks.github.issues.tasks import * from augur.tasks.github.messages.tasks import * from augur.tasks.github.pull_requests.tasks import * diff --git a/augur/tasks/github/events/tasks.py b/augur/tasks/github/events.py similarity index 100% rename from augur/tasks/github/events/tasks.py rename to augur/tasks/github/events.py diff --git a/augur/tasks/github/events/__init__.py b/augur/tasks/github/events/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py index c88eddffb8..610ae46173 100644 --- a/augur/tasks/init/celery_app.py +++ b/augur/tasks/init/celery_app.py @@ -29,7 +29,7 @@ github_tasks = ['augur.tasks.github.contributors', 'augur.tasks.github.issues.tasks', 'augur.tasks.github.pull_requests.tasks', - 'augur.tasks.github.events.tasks', + 'augur.tasks.github.events', 'augur.tasks.github.messages.tasks', 'augur.tasks.github.facade_github.tasks', 'augur.tasks.github.releases.tasks', From da49d9ebf75b328d8003f3aa7d756a1dea266386 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 29 Jun 2024 17:58:11 -0500 Subject: [PATCH 79/88] Move issues tasks out of folder Signed-off-by: Andrew Brain --- augur/tasks/github/__init__.py | 2 +- augur/tasks/github/{issues/tasks.py => issues.py} | 0 augur/tasks/github/issues/__init__.py | 0 augur/tasks/init/celery_app.py | 2 +- 4 files changed, 2 insertions(+), 2 deletions(-) rename augur/tasks/github/{issues/tasks.py => issues.py} (100%) delete mode 100644 augur/tasks/github/issues/__init__.py diff --git a/augur/tasks/github/__init__.py b/augur/tasks/github/__init__.py index 06a39a5cfb..8f23bfd9f4 100644 --- a/augur/tasks/github/__init__.py +++ b/augur/tasks/github/__init__.py @@ -1,6 +1,6 @@ from augur.tasks.github.contributors import * from augur.tasks.github.events import * -from augur.tasks.github.issues.tasks import * +from augur.tasks.github.issues import * from augur.tasks.github.messages.tasks import * from augur.tasks.github.pull_requests.tasks import * from augur.tasks.github.repo_info.tasks import * diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues.py similarity index 100% rename from augur/tasks/github/issues/tasks.py rename to augur/tasks/github/issues.py diff --git a/augur/tasks/github/issues/__init__.py b/augur/tasks/github/issues/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py index 610ae46173..21df30430e 100644 --- a/augur/tasks/init/celery_app.py +++ b/augur/tasks/init/celery_app.py @@ -27,7 +27,7 @@ 'augur.tasks.util.collection_util'] github_tasks = ['augur.tasks.github.contributors', - 'augur.tasks.github.issues.tasks', + 'augur.tasks.github.issues', 'augur.tasks.github.pull_requests.tasks', 'augur.tasks.github.events', 'augur.tasks.github.messages.tasks', From dd4175ec37459afb26116f2b03b1496780e94650 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 29 Jun 2024 18:00:31 -0500 Subject: [PATCH 80/88] Move messages tasks out of folder Signed-off-by: Andrew Brain --- augur/tasks/github/__init__.py | 2 +- augur/tasks/github/{messages/tasks.py => messages.py} | 0 augur/tasks/github/messages/__init__.py | 0 augur/tasks/github/pull_requests/tasks.py | 2 +- augur/tasks/init/celery_app.py | 2 +- 5 files changed, 3 insertions(+), 3 deletions(-) rename augur/tasks/github/{messages/tasks.py => messages.py} (100%) delete mode 100644 augur/tasks/github/messages/__init__.py diff --git a/augur/tasks/github/__init__.py b/augur/tasks/github/__init__.py index 8f23bfd9f4..63d68da41b 100644 --- a/augur/tasks/github/__init__.py +++ b/augur/tasks/github/__init__.py @@ -1,7 +1,7 @@ from augur.tasks.github.contributors import * from augur.tasks.github.events import * from augur.tasks.github.issues import * -from augur.tasks.github.messages.tasks import * +from augur.tasks.github.messages import * from augur.tasks.github.pull_requests.tasks import * from augur.tasks.github.repo_info.tasks import * from augur.tasks.github.releases.tasks import * diff --git a/augur/tasks/github/messages/tasks.py b/augur/tasks/github/messages.py similarity index 100% rename from augur/tasks/github/messages/tasks.py rename to augur/tasks/github/messages.py diff --git a/augur/tasks/github/messages/__init__.py b/augur/tasks/github/messages/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index a5311de079..dd31f726f4 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -14,7 +14,7 @@ from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth from augur.application.db.lib import get_session, get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id from augur.application.db.util import execute_session_query -from ..messages.tasks import process_github_comment_contributors +from ..messages import process_github_comment_contributors from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs, get_core_data_last_collected from typing import Generator, List, Dict diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py index 21df30430e..7c141a4a90 100644 --- a/augur/tasks/init/celery_app.py +++ b/augur/tasks/init/celery_app.py @@ -30,7 +30,7 @@ 'augur.tasks.github.issues', 'augur.tasks.github.pull_requests.tasks', 'augur.tasks.github.events', - 'augur.tasks.github.messages.tasks', + 'augur.tasks.github.messages', 'augur.tasks.github.facade_github.tasks', 'augur.tasks.github.releases.tasks', 'augur.tasks.github.repo_info.tasks', From 1f7723e74825ccd0c6365fa26f4acd2902bb25f1 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 29 Jun 2024 18:02:54 -0500 Subject: [PATCH 81/88] Move traffic tasks out of folder Signed-off-by: Andrew Brain --- augur/tasks/github/traffic/__init__.py | 0 augur/tasks/github/traffic/tasks.py | 68 -------------------------- augur/tasks/init/celery_app.py | 2 +- augur/tasks/start_tasks.py | 2 +- 4 files changed, 2 insertions(+), 70 deletions(-) delete mode 100644 augur/tasks/github/traffic/__init__.py delete mode 100644 augur/tasks/github/traffic/tasks.py diff --git a/augur/tasks/github/traffic/__init__.py b/augur/tasks/github/traffic/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/augur/tasks/github/traffic/tasks.py b/augur/tasks/github/traffic/tasks.py deleted file mode 100644 index 4101faa3ff..0000000000 --- a/augur/tasks/github/traffic/tasks.py +++ /dev/null @@ -1,68 +0,0 @@ -import logging - -from augur.tasks.init.celery_app import celery_app as celery -from augur.application.db.data_parse import extract_needed_clone_history_data -from augur.tasks.github.util.github_paginator import GithubPaginator -from augur.tasks.util.worker_util import remove_duplicate_dicts -from augur.tasks.github.util.util import get_owner_repo -from augur.application.db.models import RepoClone -from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts -from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth - - -@celery.task -def collect_github_repo_clones_data(repo_git: str) -> None: - - logger = logging.getLogger(collect_github_repo_clones_data.__name__) - - repo_obj = get_repo_by_repo_git(repo_git) - repo_id = repo_obj.repo_id - - owner, repo = get_owner_repo(repo_git) - - logger.info(f"Collecting Github repository clone data for {owner}/{repo}") - - key_auth = GithubRandomKeyAuth(logger) - - clones_data = retrieve_all_clones_data(repo_git, logger, key_auth) - - if clones_data: - process_clones_data(clones_data, f"{owner}/{repo}: Traffic task", repo_id) - else: - logger.info(f"{owner}/{repo} has no clones") - -def retrieve_all_clones_data(repo_git: str, logger, key_auth): - # owner, repo = get_owner_repo(repo_git) - - # url = f"https://api.github.com/repos/{owner}/{repo}/traffic/clones" - - # clones = GithubPaginator(url, key_auth, logger) - - # num_pages = clones.get_num_pages() - all_data = [] - # for page_data, page in clones.iter_pages(): - - # if page_data is None: - # return all_data - - # elif len(page_data) == 0: - # logger.debug(f"{repo.capitalize()} Traffic Page {page} contains no data...returning") - # logger.info(f"Traffic Page {page} of {num_pages}") - # return all_data - - # logger.info(f"{repo} Traffic Page {page} of {num_pages}") - - # all_data += page_data - - return all_data - - -def process_clones_data(clones_data, task_name, repo_id, logger) -> None: - clone_history_data = clones_data[0]['clones'] - - clone_history_data_dicts = extract_needed_clone_history_data(clone_history_data, repo_id) - - clone_history_data = remove_duplicate_dicts(clone_history_data_dicts, 'clone_data_timestamp') - logger.info(f"{task_name}: Inserting {len(clone_history_data_dicts)} clone history records") - - bulk_insert_dicts(logger, clone_history_data_dicts, RepoClone, ['repo_id']) diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py index 7c141a4a90..da97751db9 100644 --- a/augur/tasks/init/celery_app.py +++ b/augur/tasks/init/celery_app.py @@ -37,7 +37,7 @@ 'augur.tasks.github.detect_move.tasks', 'augur.tasks.github.pull_requests.files_model.tasks', 'augur.tasks.github.pull_requests.commits_model.tasks', - 'augur.tasks.github.traffic.tasks'] + 'augur.tasks.github.traffic'] gitlab_tasks = ['augur.tasks.gitlab.merge_request_task', 'augur.tasks.gitlab.issues_task', diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 0572d0a1d2..5900f899cb 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -15,7 +15,7 @@ from augur.tasks.github.pull_requests.files_model.tasks import process_pull_request_files from augur.tasks.github.pull_requests.commits_model.tasks import process_pull_request_commits from augur.tasks.git.dependency_tasks.tasks import process_ossf_dependency_metrics -from augur.tasks.github.traffic.tasks import collect_github_repo_clones_data +from augur.tasks.github.tasks import collect_github_repo_clones_data from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests, collect_merge_request_metadata, collect_merge_request_commits, collect_merge_request_files, collect_merge_request_comments from augur.tasks.gitlab.issues_task import collect_gitlab_issues, collect_gitlab_issue_comments from augur.tasks.gitlab.events_task import collect_gitlab_issue_events, collect_gitlab_merge_request_events From 95798402996c02edd3004f13b73003559c175c18 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 29 Jun 2024 18:12:46 -0500 Subject: [PATCH 82/88] Fix issues url Signed-off-by: Andrew Brain --- augur/tasks/github/issues/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py index 2b0316fb2f..ae1fb07cd9 100644 --- a/augur/tasks/github/issues/tasks.py +++ b/augur/tasks/github/issues/tasks.py @@ -65,7 +65,7 @@ def retrieve_all_issue_data(repo_git, logger, key_auth, since) -> None: url = f"https://api.github.com/repos/{owner}/{repo}/issues?state=all" if since: - url += since.isoformat() + url += f"&since={since.isoformat()}" github_data_access = GithubDataAccess(key_auth, logger) From bfd6b23904120e3c26190c4c059b58a87ff0d5a1 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 29 Jun 2024 18:22:00 -0500 Subject: [PATCH 83/88] Fix import Signed-off-by: Andrew Brain --- augur/tasks/github/issues.py | 2 +- augur/tasks/start_tasks.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py index 2b0316fb2f..ae1fb07cd9 100644 --- a/augur/tasks/github/issues.py +++ b/augur/tasks/github/issues.py @@ -65,7 +65,7 @@ def retrieve_all_issue_data(repo_git, logger, key_auth, since) -> None: url = f"https://api.github.com/repos/{owner}/{repo}/issues?state=all" if since: - url += since.isoformat() + url += f"&since={since.isoformat()}" github_data_access = GithubDataAccess(key_auth, logger) diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 5900f899cb..001a2d4f91 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -15,7 +15,7 @@ from augur.tasks.github.pull_requests.files_model.tasks import process_pull_request_files from augur.tasks.github.pull_requests.commits_model.tasks import process_pull_request_commits from augur.tasks.git.dependency_tasks.tasks import process_ossf_dependency_metrics -from augur.tasks.github.tasks import collect_github_repo_clones_data +from augur.tasks.github.traffic import collect_github_repo_clones_data from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests, collect_merge_request_metadata, collect_merge_request_commits, collect_merge_request_files, collect_merge_request_comments from augur.tasks.gitlab.issues_task import collect_gitlab_issues, collect_gitlab_issue_comments from augur.tasks.gitlab.events_task import collect_gitlab_issue_events, collect_gitlab_merge_request_events From d858c00cf4ede3eab93ce2d057b7623858b268e4 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 29 Jun 2024 18:24:47 -0500 Subject: [PATCH 84/88] Add missing file Signed-off-by: Andrew Brain --- augur/tasks/github/traffic.py | 68 +++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 augur/tasks/github/traffic.py diff --git a/augur/tasks/github/traffic.py b/augur/tasks/github/traffic.py new file mode 100644 index 0000000000..4101faa3ff --- /dev/null +++ b/augur/tasks/github/traffic.py @@ -0,0 +1,68 @@ +import logging + +from augur.tasks.init.celery_app import celery_app as celery +from augur.application.db.data_parse import extract_needed_clone_history_data +from augur.tasks.github.util.github_paginator import GithubPaginator +from augur.tasks.util.worker_util import remove_duplicate_dicts +from augur.tasks.github.util.util import get_owner_repo +from augur.application.db.models import RepoClone +from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts +from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth + + +@celery.task +def collect_github_repo_clones_data(repo_git: str) -> None: + + logger = logging.getLogger(collect_github_repo_clones_data.__name__) + + repo_obj = get_repo_by_repo_git(repo_git) + repo_id = repo_obj.repo_id + + owner, repo = get_owner_repo(repo_git) + + logger.info(f"Collecting Github repository clone data for {owner}/{repo}") + + key_auth = GithubRandomKeyAuth(logger) + + clones_data = retrieve_all_clones_data(repo_git, logger, key_auth) + + if clones_data: + process_clones_data(clones_data, f"{owner}/{repo}: Traffic task", repo_id) + else: + logger.info(f"{owner}/{repo} has no clones") + +def retrieve_all_clones_data(repo_git: str, logger, key_auth): + # owner, repo = get_owner_repo(repo_git) + + # url = f"https://api.github.com/repos/{owner}/{repo}/traffic/clones" + + # clones = GithubPaginator(url, key_auth, logger) + + # num_pages = clones.get_num_pages() + all_data = [] + # for page_data, page in clones.iter_pages(): + + # if page_data is None: + # return all_data + + # elif len(page_data) == 0: + # logger.debug(f"{repo.capitalize()} Traffic Page {page} contains no data...returning") + # logger.info(f"Traffic Page {page} of {num_pages}") + # return all_data + + # logger.info(f"{repo} Traffic Page {page} of {num_pages}") + + # all_data += page_data + + return all_data + + +def process_clones_data(clones_data, task_name, repo_id, logger) -> None: + clone_history_data = clones_data[0]['clones'] + + clone_history_data_dicts = extract_needed_clone_history_data(clone_history_data, repo_id) + + clone_history_data = remove_duplicate_dicts(clone_history_data_dicts, 'clone_data_timestamp') + logger.info(f"{task_name}: Inserting {len(clone_history_data_dicts)} clone history records") + + bulk_insert_dicts(logger, clone_history_data_dicts, RepoClone, ['repo_id']) From a2a26e3ed4d65a5c052aa57a56798dfee1392854 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 30 Jun 2024 19:07:10 +0000 Subject: [PATCH 85/88] version bump Signed-off-by: Sean P. Goggins --- README.md | 4 ++-- metadata.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c0c99157cb..e5dde81d04 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Augur NEW Release v0.71.0 +# Augur NEW Release v0.76.0 Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data. Less data carpentry for everyone else! The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot) ... A public instance of 8Knot is available at https://metrix.chaoss.io ... That is tied to a public instance of Augur at https://ai.chaoss.io @@ -10,7 +10,7 @@ The primary way of looking at Augur data is through [8Knot](https://github.com/o ## NEW RELEASE ALERT! ### [If you want to jump right in, updated docker build/compose and bare metal installation instructions are available here](docs/new-install.md) -Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.71.0 +Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.76.0 - The `main` branch is a stable version of our new architecture, which features: - Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks. diff --git a/metadata.py b/metadata.py index 497e74ad46..d7b4ac37a2 100644 --- a/metadata.py +++ b/metadata.py @@ -5,8 +5,8 @@ __short_description__ = "Python 3 package for free/libre and open-source software community metrics, models & data collection" -__version__ = "0.71.0" -__release__ = "v0.71.0 (Taylor Baby!)" +__version__ = "0.76.0" +__release__ = "v0.76.0 (England's Favorite Traitors)" __license__ = "MIT" __copyright__ = "University of Missouri, University of Nebraska-Omaha, CHAOSS, Brian Warner & Augurlabs 2024" From 2b8c819a9570a58b3bec83db87cf8d45ab2d2ce4 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 1 Jul 2024 17:38:19 -0500 Subject: [PATCH 86/88] Fix syntax error Signed-off-by: Andrew Brain --- augur/tasks/start_tasks.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 001a2d4f91..562069ce84 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -142,8 +142,6 @@ def non_repo_domain_tasks(self): enabled_tasks = [] - enabled_tasks.extend(generate_non_repo_domain_facade_tasks(logger)) - if not RUNNING_DOCKER and machine_learning_phase.__name__ in enabled_phase_names: #enabled_tasks.extend(machine_learning_phase()) from augur.tasks.data_analysis.contributor_breadth_worker.contributor_breadth_worker import contributor_breadth_model From f07a971471f2133139ba89567d737a043207344b Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Mon, 1 Jul 2024 17:44:50 -0500 Subject: [PATCH 87/88] add empty output handle Signed-off-by: Isaac Milarsky --- augur/tasks/util/worker_util.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/augur/tasks/util/worker_util.py b/augur/tasks/util/worker_util.py index 6198f1ccdb..bb57f0dbfb 100644 --- a/augur/tasks/util/worker_util.py +++ b/augur/tasks/util/worker_util.py @@ -135,7 +135,10 @@ def parse_json_from_subprocess_call(logger, subprocess_arr, cwd=None): output = p.stdout try: - required_output = json.loads(output) + if output.strip(): + required_output = json.loads(output) + else: + required_output = {} except json.decoder.JSONDecodeError as e: logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}") raise e From c7380c24acfac9abd10a32044ea623e5ba5775b8 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Mon, 1 Jul 2024 17:51:05 -0500 Subject: [PATCH 88/88] check null Signed-off-by: Isaac Milarsky --- augur/tasks/util/worker_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/util/worker_util.py b/augur/tasks/util/worker_util.py index bb57f0dbfb..51b0109faa 100644 --- a/augur/tasks/util/worker_util.py +++ b/augur/tasks/util/worker_util.py @@ -135,7 +135,7 @@ def parse_json_from_subprocess_call(logger, subprocess_arr, cwd=None): output = p.stdout try: - if output.strip(): + if output and output.strip(): required_output = json.loads(output) else: required_output = {}