Skip to content

Commit

Permalink
Merge pull request #2898 from chaoss/augur-release-0.76.2
Browse files Browse the repository at this point in the history
Augur release 0.76.2
  • Loading branch information
sgoggins authored Sep 25, 2024
2 parents fe11b0e + ef39e84 commit c8eba65
Show file tree
Hide file tree
Showing 55 changed files with 15,473 additions and 2,391 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ augur_export_env.sh
config.yml
reports.yml
*.pid
*.sock

node_modules/
.idea/
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Augur NEW Release v0.76.1
# Augur NEW Release v0.76.2

Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data. Less data carpentry for everyone else!
The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot) ... A public instance of 8Knot is available at https://metrix.chaoss.io ... That is tied to a public instance of Augur at https://ai.chaoss.io
Expand All @@ -10,7 +10,7 @@ The primary way of looking at Augur data is through [8Knot](https://github.com/o
## NEW RELEASE ALERT!
### [If you want to jump right in, updated docker build/compose and bare metal installation instructions are available here](docs/new-install.md)

Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.76.1
Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.76.2

- The `main` branch is a stable version of our new architecture, which features:
- Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks.
Expand Down
158 changes: 158 additions & 0 deletions augur/api/metrics/deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,162 @@ def deps(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=No
return results


@register_metric()
def libyear(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None):
"""
Returns a list of all the dependencies in a project/repo/repo_group.
:param repo_id: The repository's id
:param repo_group_id: The repository's group id
:param period: To set the periodicity to 'day', 'week', 'month' or 'year', defaults to 'day'
:param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00'
:param end_date: Specifies the end date, defaults to datetime.now()
:return: DataFrame of persons/period
"""

if not begin_date:
begin_date = '1970-1-1 00:00:01'
if not end_date:
end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

if repo_id:

libyearSQL = s.sql.text("""
SELECT
rg_name,
repo_group_id,
repo_name,
d.repo_id,
repo_git,
forked_from,
repo_archived,
c.name,
c.libyear,
MAX ( C.data_collection_date ) AS most_recent_collection
FROM
(
SELECT A.rg_name AS rg_name,
A.repo_group_id AS repo_group_id,
b.repo_name AS repo_name,
b.repo_id AS repo_id,
b.repo_git AS repo_git,
b.forked_from AS forked_from,
b.repo_archived AS repo_archived
FROM
repo_groups A,
repo b
WHERE
A.repo_group_id = b.repo_group_id
ORDER BY
rg_name,
repo_name
) d,
(
SELECT DISTINCT
f.repo_id,
f.NAME,
f.libyear,
f.data_collection_date
FROM
( SELECT repo_id, NAME, MAX ( data_collection_date ) AS data_collection_date FROM augur_data.repo_deps_libyear WHERE repo_id = :repo_id GROUP BY repo_id, NAME ORDER BY NAME ) e,
augur_data.repo_deps_libyear f
WHERE
e.data_collection_date = f.data_collection_date and
e.repo_id = f.repo_id
ORDER BY
NAME
) C
WHERE
d.repo_id = C.repo_id
AND C.repo_id = :repo_id
GROUP BY
rg_name,
repo_git,
repo_group_id,
repo_name,
d.repo_id,
forked_from,
repo_archived,
c.name,
c.libyear
ORDER BY
repo_id;
""")

with current_app.engine.connect() as conn:
results = pd.read_sql(libyearSQL, conn, params={'repo_id': repo_id})

else:

libyearSQL = s.sql.text("""
Select w.* from
(
SELECT
rg_name,
repo_group_id,
repo_name,
d.repo_id,
repo_git,
forked_from,
repo_archived,
c.name,
c.libyear,
MAX ( C.data_collection_date ) AS most_recent_collection
FROM
(
SELECT A.rg_name AS rg_name,
A.repo_group_id AS repo_group_id,
b.repo_name AS repo_name,
b.repo_id AS repo_id,
b.repo_git AS repo_git,
b.forked_from AS forked_from,
b.repo_archived AS repo_archived
FROM
repo_groups A,
repo b
WHERE
A.repo_group_id = b.repo_group_id
ORDER BY
rg_name,
repo_name
) d,
(
SELECT DISTINCT
f.repo_id,
f.NAME,
f.libyear,
f.data_collection_date
FROM
( SELECT repo_id, NAME, MAX ( data_collection_date ) AS data_collection_date FROM augur_data.repo_deps_libyear GROUP BY repo_id, NAME ORDER BY NAME ) e,
augur_data.repo_deps_libyear f
WHERE
e.data_collection_date = f.data_collection_date and
e.repo_id = f.repo_id
ORDER BY
NAME
) C
WHERE
d.repo_id = C.repo_id
GROUP BY
rg_name,
repo_git,
repo_group_id,
repo_name,
d.repo_id,
forked_from,
repo_archived,
c.name,
c.libyear
ORDER BY
repo_id) w,
repo_groups y,
repo z
where w.repo_id=z.repo_id and
y.repo_group_id=z.repo_group_id
and z.repo_group_id = :repo_group_id
""")

with current_app.engine.connect() as conn:
results = pd.read_sql(libyearSQL, conn, params={'repo_group_id': repo_group_id})
return results

7 changes: 7 additions & 0 deletions augur/api/routes/pull_request_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@
from bokeh.models.glyphs import Rect
from bokeh.transform import dodge, factor_cmap, transform

# from selenium.webdriver import Firefox, FirefoxOptions
# options = FirefoxOptions()
# options.headless = True
# webdriver = Firefox(options=options)
#export_png(item, path, webdriver=webdriver)

warnings.filterwarnings('ignore')

from augur.api.routes import AUGUR_API_VERSION
Expand Down Expand Up @@ -604,6 +610,7 @@ def average_commits_per_PR():
# opts = FirefoxOptions()
# opts.add_argument("--headless")
# driver = webdriver.Firefox(firefox_options=opts)
# filename = export_png(grid, timeout=180, webdriver=webdriver)
filename = export_png(grid, timeout=180)

return send_file(filename)
Expand Down
2 changes: 1 addition & 1 deletion augur/api/view/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,4 @@ def write_settings(current_settings):
# Initialize logging
def init_logging():
global logger
logger = AugurLogger("augur_view", reset_logfiles=True).get_logger()
logger = AugurLogger("augur_view", reset_logfiles=False).get_logger()
4 changes: 2 additions & 2 deletions augur/application/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def new_func(ctx, *args, **kwargs):
You are not connected to the internet.\n \
Please connect to the internet to run Augur\n \
Consider setting http_proxy variables for limited access installations.")
sys.exit()
sys.exit(-1)

return update_wrapper(new_func, function_internet_connection)

Expand Down Expand Up @@ -78,7 +78,7 @@ def new_func(ctx, *args, **kwargs):
print(f"\n\n{usage} command setup failed\nERROR: connecting to database\nHINT: The {incorrect_values} may be incorrectly specified in {location}\n")

engine.dispose()
sys.exit()
sys.exit(-2)

return update_wrapper(new_func, function_db_connection)

Expand Down
1 change: 0 additions & 1 deletion augur/application/cli/_multicommand.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ def get_command(self, ctx, name):

# Check that the command exists before importing
if not cmdfile.is_file():

return

# Prefer to raise exception instead of silcencing it
Expand Down
9 changes: 5 additions & 4 deletions augur/application/cli/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,16 @@

from augur.application.db.session import DatabaseSession
from augur.application.logs import AugurLogger
from augur.application.cli import test_connection, test_db_connection, with_database
from augur.application.cli import test_connection, test_db_connection, with_database, DatabaseContext
from augur.application.cli._cli_util import _broadcast_signal_to_processes, raise_open_file_limit, clear_redis_caches, clear_rabbitmq_messages
from augur.application.db.lib import get_value

logger = AugurLogger("augur", reset_logfiles=True).get_logger()
logger = AugurLogger("augur", reset_logfiles=False).get_logger()

@click.group('api', short_help='Commands for controlling the backend API server')
def cli():
pass
@click.pass_context
def cli(ctx):
ctx.obj = DatabaseContext()

@cli.command("start")
@click.option("--development", is_flag=True, default=False, help="Enable development mode")
Expand Down
67 changes: 62 additions & 5 deletions augur/application/cli/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def cli(ctx):
@click.pass_context
def start(ctx, disable_collection, development, pidfile, port):
"""Start Augur's backend server."""
with open(pidfile, "w") as pidfile:
pidfile.write(str(os.getpid()))
with open(pidfile, "w") as pidfile_io:
pidfile_io.write(str(os.getpid()))

try:
if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1":
Expand All @@ -63,6 +63,8 @@ def start(ctx, disable_collection, development, pidfile, port):
if development:
os.environ["AUGUR_DEV"] = "1"
logger.info("Starting in development mode")

os.environ["AUGUR_PIDFILE"] = pidfile

try:
gunicorn_location = os.getcwd() + "/augur/api/gunicorn_conf.py"
Expand All @@ -74,6 +76,11 @@ def start(ctx, disable_collection, development, pidfile, port):
if not port:
port = get_value("Server", "port")

os.environ["AUGUR_PORT"] = str(port)

if disable_collection:
os.environ["AUGUR_DISABLE_COLLECTION"] = "1"

worker_vmem_cap = get_value("Celery", 'worker_process_vmem_cap')

gunicorn_command = f"gunicorn -c {gunicorn_location} -b {host}:{port} augur.api.server:app --log-file gunicorn.log"
Expand Down Expand Up @@ -128,7 +135,7 @@ def start(ctx, disable_collection, development, pidfile, port):
augur_collection_monitor.si().apply_async()

else:
logger.info("Collection disabled")
logger.info("Collection disabled")

try:
server.wait()
Expand All @@ -153,6 +160,8 @@ def start(ctx, disable_collection, development, pidfile, port):
cleanup_after_collection_halt(logger, ctx.obj.engine)
except RedisConnectionError:
pass

os.unlink(pidfile)

def start_celery_worker_processes(vmem_cap_ratio, disable_collection=False):

Expand Down Expand Up @@ -185,7 +194,7 @@ def determine_worker_processes(ratio,maximum):
sleep_time += 6

#60% of estimate, Maximum value of 45 : Reduced because it can be lower
core_num_processes = determine_worker_processes(.40, 50)
core_num_processes = determine_worker_processes(.40, 90)
logger.info(f"Starting core worker processes with concurrency={core_num_processes}")
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h"
process_list.append(subprocess.Popen(core_worker.split(" ")))
Expand Down Expand Up @@ -224,6 +233,54 @@ def stop(ctx):

augur_stop(signal.SIGTERM, logger, ctx.obj.engine)

@cli.command('stop-collection-blocking')
@test_connection
@test_db_connection
@with_database
@click.pass_context
def stop_collection(ctx):
"""
Stop collection tasks if they are running, block until complete
"""
processes = get_augur_processes()

stopped = []

p: psutil.Process
for p in processes:
if p.name() == "celery":
stopped.append(p)
p.terminate()

if not len(stopped):
logger.info("No collection processes found")
return

_, alive = psutil.wait_procs(stopped, 5,
lambda p: logger.info(f"STOPPED: {p.pid}"))

killed = []
while True:
for i in range(len(alive)):
if alive[i].status() == psutil.STATUS_ZOMBIE:
logger.info(f"KILLING ZOMBIE: {alive[i].pid}")
alive[i].kill()
killed.append(i)
elif not alive[i].is_running():
logger.info(f"STOPPED: {p.pid}")
killed.append(i)

for i in reversed(killed):
alive.pop(i)

if not len(alive):
break

logger.info(f"Waiting on [{', '.join(str(p.pid for p in alive))}]")
time.sleep(0.5)

cleanup_after_collection_halt(logger, ctx.obj.engine)

@cli.command('kill')
@test_connection
@test_db_connection
Expand Down Expand Up @@ -388,7 +445,7 @@ def processes():
Outputs the name/PID of all Augur server & worker processes"""
augur_processes = get_augur_processes()
for process in augur_processes:
logger.info(f"Found process {process.pid}")
logger.info(f"Found process {process.pid} [{process.name()}] -> Parent: {process.parent().pid}")

def get_augur_processes():
augur_processes = []
Expand Down
Loading

0 comments on commit c8eba65

Please sign in to comment.