Skip to content

Commit

Permalink
Merge pull request #2857 from chaoss/dev
Browse files Browse the repository at this point in the history
Release 0.76.0
  • Loading branch information
sgoggins authored Jul 2, 2024
2 parents 99ee81f + c1b7a84 commit 6bf35a5
Show file tree
Hide file tree
Showing 73 changed files with 1,916 additions and 703 deletions.
18 changes: 18 additions & 0 deletions .github/workflows/docker-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: Docker Image CI

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:

build:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Build the Docker image
run: docker build . --file Dockerfile --tag my-image-name:$(date +%s)
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
default:
@ echo "Installation Commands:"
@ echo " install Installs Augur's full stack for production"
@ echo " wizard Install Augur and launch the graphical setup wizard"
@ echo " clean Removes potentially troublesome compiled files"
@ echo " rebuild Removes build/compiled files & binaries and reinstalls the project"
@ echo
Expand Down Expand Up @@ -34,6 +35,9 @@ default:
install:
@ ./scripts/install/install.sh dev

wizard:
@ ./scripts/install/install.sh graphical

install-spdx:
@ ./scripts/install/install-spdx.sh

Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Augur NEW Release v0.71.0
# Augur NEW Release v0.76.0

Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data. Less data carpentry for everyone else!
The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot) ... A public instance of 8Knot is available at https://metrix.chaoss.io ... That is tied to a public instance of Augur at https://ai.chaoss.io
Expand All @@ -10,7 +10,7 @@ The primary way of looking at Augur data is through [8Knot](https://github.com/o
## NEW RELEASE ALERT!
### [If you want to jump right in, updated docker build/compose and bare metal installation instructions are available here](docs/new-install.md)

Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.71.0
Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.76.0

- The `main` branch is a stable version of our new architecture, which features:
- Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks.
Expand Down
2 changes: 1 addition & 1 deletion augur/api/view/server/Environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,4 @@ def __str__(self)-> str:
return str(os.environ)

def __iter__(self):
return (item for item in os.environ.items)
return (item for item in os.environ.items())
3 changes: 1 addition & 2 deletions augur/api/view/server/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
from .Environment import Environment
from .ServerThread import ServerThread
from .LoginException import LoginException
from .Environment import Environment
6 changes: 3 additions & 3 deletions augur/application/cli/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,21 +166,21 @@ def determine_worker_processes(ratio,maximum):
sleep_time += 6

#60% of estimate, Maximum value of 45 : Reduced because it can be lower
core_num_processes = determine_worker_processes(.15, 10)
core_num_processes = determine_worker_processes(.40, 50)
logger.info(f"Starting core worker processes with concurrency={core_num_processes}")
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h"
process_list.append(subprocess.Popen(core_worker.split(" ")))
sleep_time += 6

#20% of estimate, Maximum value of 25
secondary_num_processes = determine_worker_processes(.70, 60)
secondary_num_processes = determine_worker_processes(.39, 50)
logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}")
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary"
process_list.append(subprocess.Popen(secondary_worker.split(" ")))
sleep_time += 6

#15% of estimate, Maximum value of 20
facade_num_processes = determine_worker_processes(.15, 20)
facade_num_processes = determine_worker_processes(.17, 20)
logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}")
facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade"

Expand Down
6 changes: 3 additions & 3 deletions augur/application/cli/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,21 +125,21 @@ def determine_worker_processes(ratio,maximum):
sleep_time += 6

#60% of estimate, Maximum value of 45: Reduced because not needed
core_num_processes = determine_worker_processes(.15, 10)
core_num_processes = determine_worker_processes(.40, 50)
logger.info(f"Starting core worker processes with concurrency={core_num_processes}")
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h"
process_list.append(subprocess.Popen(core_worker.split(" ")))
sleep_time += 6

#20% of estimate, Maximum value of 25
secondary_num_processes = determine_worker_processes(.70, 60)
secondary_num_processes = determine_worker_processes(.39, 50)
logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}")
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary"
process_list.append(subprocess.Popen(secondary_worker.split(" ")))
sleep_time += 6

#15% of estimate, Maximum value of 20
facade_num_processes = determine_worker_processes(.15, 20)
facade_num_processes = determine_worker_processes(.17, 20)
logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}")
facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade"

Expand Down
25 changes: 24 additions & 1 deletion augur/application/cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import json
import logging

from augur.application.db.models import Config
from augur.application.db.session import DatabaseSession
from augur.application.config import AugurConfig
from augur.application.cli import DatabaseContext, test_connection, test_db_connection, with_database
Expand Down Expand Up @@ -160,7 +161,7 @@ def add_section(ctx, section_name, file):
@click.option('--section', required=True)
@click.option('--setting', required=True)
@click.option('--value', required=True)
@click.option('--data-type', required=True)
@click.option('--data-type')
@test_connection
@test_db_connection
@with_database
Expand All @@ -169,6 +170,12 @@ def config_set(ctx, section, setting, value, data_type):

with DatabaseSession(logger, engine=ctx.obj.engine) as session:
config = AugurConfig(logger, session)

if not data_type:
result = session.query(Config).filter(Config.section_name == section, Config.setting_name == setting).all()
if not result:
return click.echo("You must specify a data-type if the setting does not already exist")
data_type = result[0].type

if data_type not in config.accepted_types:
print(f"Error invalid type for config. Please use one of these types: {config.accepted_types}")
Expand Down Expand Up @@ -218,6 +225,22 @@ def config_get(ctx, section, setting):
else:
print(f"Error: {section} section not found in config")

@cli.command('get_all_json')
def config_get_all_json():
data = {}
try:
with DatabaseSession(logger) as session:
sections = session.query(Config.section_name).distinct().all()
for section in sections:
data[section[0]] = {}

for row in session.query(Config).all():
data[row.section_name][row.setting_name] = row.value
except:
pass

print(json.dumps(data, indent=4))

@cli.command('clear')
@test_connection
@test_db_connection
Expand Down
4 changes: 2 additions & 2 deletions augur/application/cli/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def start():
secondary_worker_process = None

scheduling_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=1 -n scheduling:{uuid.uuid4().hex}@%h -Q scheduling"
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=20 -n core:{uuid.uuid4().hex}@%h"
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=60 -n secondary:{uuid.uuid4().hex}@%h -Q secondary"
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=50 -n core:{uuid.uuid4().hex}@%h"
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=50 -n secondary:{uuid.uuid4().hex}@%h -Q secondary"

scheduling_worker_process = subprocess.Popen(scheduling_worker.split(" "))
core_worker_process = subprocess.Popen(core_worker.split(" "))
Expand Down
4 changes: 2 additions & 2 deletions augur/application/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def get_section(self, section_name) -> dict:
Returns:
The section data as a dict
"""
query = self.session.query(Config).filter_by(section_name=section_name)
query = self.session.query(Config).filter_by(section_name=section_name).order_by(Config.setting_name.asc())
section_data = execute_session_query(query, 'all')

section_dict = {}
Expand Down Expand Up @@ -213,7 +213,7 @@ def load_config(self) -> dict:
The config from the database
"""
# get all the sections in the config table
query = self.session.query(Config.section_name)
query = self.session.query(Config.section_name).order_by(Config.section_name.asc())
section_names = execute_session_query(query, 'all')

config = {}
Expand Down
33 changes: 30 additions & 3 deletions augur/application/db/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from psycopg2.errors import DeadlockDetected
from typing import List, Any, Optional, Union

from augur.application.db.models import Config, Repo, Commit, WorkerOauth, Issue, PullRequest, PullRequestReview, ContributorsAlias, UnresolvedCommitEmail, Contributor, CollectionStatus
from augur.application.db.models import Config, Repo, Commit, WorkerOauth, Issue, PullRequest, PullRequestReview, ContributorsAlias,UnresolvedCommitEmail, Contributor, CollectionStatus
from augur.tasks.util.collection_state import CollectionState
from augur.application.db import get_session, get_engine
from augur.application.db.util import execute_session_query
Expand All @@ -25,7 +25,7 @@ def convert_type_of_value(config_dict, logger=None):
if data_type == "str" or data_type is None:
return config_dict

elif data_type == "int":
if data_type == "int":
config_dict["value"] = int(config_dict["value"])

elif data_type == "bool":
Expand Down Expand Up @@ -509,4 +509,31 @@ def update_issue_closed_cntrbs_by_repo_id(repo_id):
WHERE issue_id = :issue_id
AND repo_id = :repo_id
""")
connection.execute(update_stmt, update_data)
connection.execute(update_stmt, update_data)

def get_core_data_last_collected(repo_id):

with get_session() as session:
try:
return session.query(CollectionStatus).filter(CollectionStatus.repo_id == repo_id).one().core_data_last_collected
except s.orm.exc.NoResultFound:
return None

def get_secondary_data_last_collected(repo_id):

with get_session() as session:
try:
return session.query(CollectionStatus).filter(CollectionStatus.repo_id == repo_id).one().secondary_data_last_collected
except s.orm.exc.NoResultFound:
return None

def get_updated_prs(repo_id, since):

with get_session() as session:
return session.query(PullRequest).filter(PullRequest.repo_id == repo_id, PullRequest.pr_updated_at >= since).order_by(PullRequest.pr_src_number).all()

def get_updated_issues(repo_id, since):

with get_session() as session:
return session.query(Issue).filter(Issue.repo_id == repo_id, Issue.updated_at >= since).order_by(Issue.gh_issue_number).all()

64 changes: 58 additions & 6 deletions augur/static/css/first_time.css
Original file line number Diff line number Diff line change
@@ -1,50 +1,102 @@
:root {
--color-bg: #1A233A;
--color-bg-light: #272E48;
--color-bg-contrast: #646683;
--color-fg: white;
--color-fg-dark: #b0bdd6;
--color-fg-contrast: black;
--color-accent: #6f42c1;
--color-accent-dark: #6134b3;
--color-notice: #00ddff;
--color-notice-contrast: #006979;
}

body{
margin-top:20px;
color: #bcd0f7;
background: #1A233A;
background-color: var(--color-bg);
color: var(--color-fg);
}

h1 {
font-size: 2rem;
}

.sidebar .sidebar-top {
margin: 0 0 1rem 0;
padding-bottom: 1rem;
text-align: center;
}

.sidebar .sidebar-top .brand-logo {
margin: 0 0 1rem 0;
}

.sidebar .sidebar-top .brand-logo img {
height: 90px;
-webkit-border-radius: 100px;
-moz-border-radius: 100px;
border-radius: 100px;
}

.sidebar .about {
margin: 1rem 0 0 0;
font-size: 0.8rem;
text-align: center;
}

.subtitle {
color: var(--color-fg-dark);
margin-bottom: .5rem;
margin-left: 15px;
}

.no-margin-bottom {
margin-bottom: 0;
}

.card {
background: #272E48;
background: var(--color-bg-light);
-webkit-border-radius: 5px;
-moz-border-radius: 5px;
border-radius: 5px;
border: 0;
margin-bottom: 1rem;
}

.form-control {
border: 1px solid #596280;
-webkit-border-radius: 2px;
-moz-border-radius: 2px;
border-radius: 2px;
font-size: .825rem;
background: #1A233A;
color: #bcd0f7;
background: var(--color-bg-light);
color: var(--color-fg);
}

.input-textbox {
color: var(--color-fg);
background-color: var(--color-bg);
border-color: var(--color-accent-dark);
}

.input-textbox::placeholder {
color: var(--color-fg-dark);
}

.input-textbox:focus {
color: var(--color-fg);
background-color: var(--color-bg);
border-color: var(--color-accent-dark);
}

.input-textbox:focus::placeholder {
color: var(--color-fg-dark);
}

.modal-content {
color: black;
color: var(--color-fg-contrast);
}

.editor-container {
height: 300px !important;
}
Expand Down
4 changes: 2 additions & 2 deletions augur/tasks/data_analysis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from celery import chain
import logging

def machine_learning_phase(repo_git):
def machine_learning_phase(repo_git, full_collection):
from augur.tasks.data_analysis.clustering_worker.tasks import clustering_task
from augur.tasks.data_analysis.discourse_analysis.tasks import discourse_analysis_task
from augur.tasks.data_analysis.insight_worker.tasks import insight_task
Expand All @@ -15,7 +15,7 @@ def machine_learning_phase(repo_git):
ml_tasks.append(discourse_analysis_task.si(repo_git))
ml_tasks.append(insight_task.si(repo_git))
ml_tasks.append(message_insight_task.si(repo_git))
ml_tasks.append(pull_request_analysis_task.si(repo_git))
#ml_tasks.append(pull_request_analysis_task.si(repo_git))

logger.info(f"Machine learning sequence: {ml_tasks}")
return chain(*ml_tasks)
6 changes: 3 additions & 3 deletions augur/tasks/data_analysis/clustering_worker/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ def read(filename):
packages=find_packages(),
install_requires=[
'Flask==2.0.2',
'Flask-Cors==3.0.10',
'Flask-Cors==4.0.1',
'Flask-Login==0.5.0',
'Flask-WTF==1.0.0',
'requests==2.28.0',
'requests==2.32.0',
'psycopg2-binary==2.9.3',
#'sklearn==0.0.0',
'scikit-learn==1.1.3',
'scikit-learn==1.5.0',
'numpy==1.26.0',
'nltk==3.6.6',
'seaborn==0.11.1',
Expand Down
Loading

0 comments on commit 6bf35a5

Please sign in to comment.