diff --git a/.github/scripts/convert_airtable_to_json.py b/.github/scripts/convert_airtable_to_json.py index 4817d306f..e4c78528e 100644 --- a/.github/scripts/convert_airtable_to_json.py +++ b/.github/scripts/convert_airtable_to_json.py @@ -6,35 +6,50 @@ import requests AIRTABLE_MODEL_HUB_BASE_ID = "appgxpCzCDNyGjWc8" -AIRTABLE_TABLE_ID = 'tblZGe2a2XeBxrEHP' +AIRTABLE_TABLE_ID = "tblZGe2a2XeBxrEHP" AWS_ACCOUNT_REGION = "eu-central-1" -ERSILIA_MODEL_HUB_S3_BUCKET= 'ersilia-model-hub' +ERSILIA_MODEL_HUB_S3_BUCKET = "ersilia-model-hub" -def convert_airtable_to_json(airtable_api_key, aws_access_key_id, aws_secret_access_key): - - headers = {'Authorization': f'Bearer {airtable_api_key}'} - response= requests.get(f'https://api.airtable.com/v0/{AIRTABLE_MODEL_HUB_BASE_ID}/{AIRTABLE_TABLE_ID}', headers=headers) - data=response.json() - records_models= [record['fields'] for record in data['records']] - models_json=json.dumps(records_models, indent=4) +def convert_airtable_to_json( + airtable_api_key, aws_access_key_id, aws_secret_access_key +): + headers = {"Authorization": f"Bearer {airtable_api_key}"} + response = requests.get( + f"https://api.airtable.com/v0/{AIRTABLE_MODEL_HUB_BASE_ID}/{AIRTABLE_TABLE_ID}", + headers=headers, + ) - #Load JSON in AWS S3 bucket - s3 = boto3.client('s3',aws_access_key_id=aws_access_key_id,aws_secret_access_key=aws_secret_access_key,region_name=AWS_ACCOUNT_REGION) + data = response.json() + records_models = [record["fields"] for record in data["records"]] + models_json = json.dumps(records_models, indent=4) + + # Load JSON in AWS S3 bucket + s3 = boto3.client( + "s3", + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + region_name=AWS_ACCOUNT_REGION, + ) try: - s3.put_object(Body=models_json, Bucket=ERSILIA_MODEL_HUB_S3_BUCKET, Key='models.json', ACL='public-read') + s3.put_object( + Body=models_json, + Bucket=ERSILIA_MODEL_HUB_S3_BUCKET, + Key="models.json", + ACL="public-read", + ) print("file models.json uploaded") except NoCredentialsError: logging.error("Unable to upload tracking data to AWS: Credentials not found") except ClientError as e: logging.error(e) - -if __name__ == "__main__": + +if __name__ == "__main__": print("Getting environmental variables") - airtable_api_key = os.environ.get('AIRTABLE_API_KEY') - aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID') - aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY') + airtable_api_key = os.environ.get("AIRTABLE_API_KEY") + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID") + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY") print("Converting AirTable base to JSON file") convert_airtable_to_json(airtable_api_key, aws_access_key_id, aws_secret_access_key) diff --git a/.github/scripts/place_a_dockerfile_in_current_eos_repo.py b/.github/scripts/place_a_dockerfile_in_current_eos_repo.py index 0e6f9bcb2..f27151c93 100644 --- a/.github/scripts/place_a_dockerfile_in_current_eos_repo.py +++ b/.github/scripts/place_a_dockerfile_in_current_eos_repo.py @@ -25,4 +25,4 @@ def download_file(url, filename): text = text.replace("eos_identifier", model_id) with open("Dockerfile", "w") as f: - f.write(text) \ No newline at end of file + f.write(text) diff --git a/.github/scripts/update_metadata.py b/.github/scripts/update_metadata.py index 259a6b573..ee24296b2 100644 --- a/.github/scripts/update_metadata.py +++ b/.github/scripts/update_metadata.py @@ -106,10 +106,12 @@ def populate_metadata(self): # Check if model_description is a list if isinstance(self.json_input["model_description"], list): # Join the list elements into a single string separated by commas - self.metadata["Description"] = ", ".join(self.json_input["model_description"]) + self.metadata["Description"] = ", ".join( + self.json_input["model_description"] + ) else: # If it's already a string, just assign it directly - self.metadata["Description"] = self.json_input["model_description"] + self.metadata["Description"] = self.json_input["model_description"] if self.metadata["Publication"] == "": self.metadata["Publication"] = self.json_input["publication"] if self.metadata["Source Code"] == "": diff --git a/ersilia/_version.py b/ersilia/_version.py index 9690b6ce2..90c4dfe99 100644 --- a/ersilia/_version.py +++ b/ersilia/_version.py @@ -33,11 +33,13 @@ def get_latest_semver_tag(): return tag return None + def increment_patch_version(version): version = version.split(".") version[2] = str(int(version[2]) + 1) return ".".join(version) + def get_version_for_setup(): # version = get_latest_semver_tag() version = increment_patch_version(get_version_from_static()) diff --git a/ersilia/auth/auth.py b/ersilia/auth/auth.py index 718c2c428..f83da77d5 100644 --- a/ersilia/auth/auth.py +++ b/ersilia/auth/auth.py @@ -2,6 +2,7 @@ Deal with privileges in Ersilia. Base on GitHub login. """ + from pathlib import Path import os import yaml diff --git a/ersilia/cli/commands/close.py b/ersilia/cli/commands/close.py index 86ffbc47d..4d18cb076 100644 --- a/ersilia/cli/commands/close.py +++ b/ersilia/cli/commands/close.py @@ -4,7 +4,7 @@ from .. import echo from ... import ErsiliaModel from ...core.session import Session -from ...core.tracking import check_file_exists, close_persistent_file +from ...core.tracking import check_file_exists, close_persistent_file def close_cmd(): @@ -23,4 +23,4 @@ def close(): # Close our persistent tracking file if check_file_exists(model_id): - close_persistent_file(mdl.model_id) + close_persistent_file(mdl.model_id) diff --git a/ersilia/cli/commands/example.py b/ersilia/cli/commands/example.py index 33e1b9420..861b1c3a8 100644 --- a/ersilia/cli/commands/example.py +++ b/ersilia/cli/commands/example.py @@ -20,7 +20,7 @@ def example_cmd(): @click.option("--n_samples", "-n", default=5, type=click.INT) @click.option("--file_name", "-f", default=None, type=click.STRING) @click.option("--simple/--complete", "-s/-c", default=True) - @click.option("--predefined/--random", "-p/-r", default=False) + @click.option("--predefined/--random", "-p/-r", default=True) def example(model, n_samples, file_name, simple, predefined): if model is not None: model_id = ModelBase(model).model_id diff --git a/ersilia/cli/commands/fetch.py b/ersilia/cli/commands/fetch.py index f8ef626a0..5f1ffae78 100644 --- a/ersilia/cli/commands/fetch.py +++ b/ersilia/cli/commands/fetch.py @@ -31,7 +31,7 @@ def _fetch(mf, model_id): "--from_dir", default=None, type=click.STRING, - help="Local path where the model is stored" + help="Local path where the model is stored", ) @click.option( "--from_github", @@ -60,6 +60,18 @@ def _fetch(mf, model_id): default=None, help="Fetch a model based on a URL. This only creates a basic folder structure for the model, the model is not actually downloaded.", ) + @click.option( + "--with_bentoml", + is_flag=True, + default=False, + help="Force fetch using BentoML", + ) + @click.option( + "--with_fastapi", + is_flag=True, + default=False, + help="Force fetch using FastAPI", + ) def fetch( model, repo_path, @@ -72,7 +84,11 @@ def fetch( from_s3, from_hosted, from_url, + with_bentoml, + with_fastapi, ): + if with_bentoml and with_fastapi: + raise Exception("Cannot use both BentoML and FastAPI") if repo_path is not None: mdl = ModelBase(repo_path=repo_path) elif from_dir is not None: @@ -94,6 +110,8 @@ def fetch( force_from_s3=from_s3, force_from_dockerhub=from_dockerhub, force_from_hosted=from_hosted, + force_with_bentoml=with_bentoml, + force_with_fastapi=with_fastapi, hosted_url=from_url, ) _fetch(mf, model_id) diff --git a/ersilia/cli/commands/run.py b/ersilia/cli/commands/run.py index c5c186777..19ff6efe8 100644 --- a/ersilia/cli/commands/run.py +++ b/ersilia/cli/commands/run.py @@ -41,7 +41,7 @@ def run(input, output, batch_size, standard): fg="red", ) return - + mdl = ErsiliaModel( model_id, service_class=service_class, @@ -64,17 +64,11 @@ def run(input, output, batch_size, standard): echo("Something went wrong", fg="red") else: echo(result) - + if track_runs: """ - Retrieve the time taken to run the model and update the total. + Retrieve the time taken to run the model and update the total. """ - time_tracker = RunTracker( - model_id=model_id, - config_json=None - ) - - time_tracker.update_total_time( - model_id=model_id, - start_time=start_time - ) + time_tracker = RunTracker(model_id=model_id, config_json=None) + + time_tracker.update_total_time(model_id=model_id, start_time=start_time) diff --git a/ersilia/cli/commands/serve.py b/ersilia/cli/commands/serve.py index b9bf5573a..3ed5c4723 100644 --- a/ersilia/cli/commands/serve.py +++ b/ersilia/cli/commands/serve.py @@ -47,8 +47,7 @@ def serve(model, lake, docker, port, track): ) if not mdl.is_valid(): ModelNotFound(mdl).echo() - - + mdl.serve() if mdl.url is None: echo("No URL found. Service unsuccessful.", fg="red") @@ -73,7 +72,7 @@ def serve(model, lake, docker, port, track): echo("") echo(":person_tipping_hand: Information:", fg="blue") echo(" - info", fg="blue") - + if track: """ Retrieve the time taken in seconds to serve the Model. diff --git a/ersilia/core/base.py b/ersilia/core/base.py index ffba43482..2992a46b2 100644 --- a/ersilia/core/base.py +++ b/ersilia/core/base.py @@ -2,6 +2,7 @@ import subprocess from pathlib import Path from ..utils.config import Config, Credentials +from ..utils.paths import resolve_pack_method from ..default import EOS from .. import logger @@ -89,8 +90,10 @@ def _get_bundle_location(self, model_id): else: return path - @staticmethod - def _get_bento_location(model_id): + def _get_bento_location(self, model_id): + bundle_path = self._get_bundle_location(model_id) + if resolve_pack_method(bundle_path) != "bentoml": + return None cmd = ["bentoml", "get", "%s:latest" % model_id, "--print-location", "--quiet"] result = subprocess.run(cmd, stdout=subprocess.PIPE) result = result.stdout.decode("utf-8").rstrip() diff --git a/ersilia/core/model.py b/ersilia/core/model.py index 65cecd4a6..b640d38a3 100644 --- a/ersilia/core/model.py +++ b/ersilia/core/model.py @@ -445,7 +445,6 @@ def get_apis(self): def _run( self, input=None, output=None, batch_size=DEFAULT_BATCH_SIZE, track_run=False ): - api_name = self.get_apis()[0] result = self.api( api_name=api_name, input=input, output=output, batch_size=batch_size @@ -498,9 +497,7 @@ def run( ) # Start tracking model run if track flag is used in serve if self._run_tracker is not None and track_run: - self._run_tracker.track( - input=input, result=result, meta=self._model_info - ) + self._run_tracker.track(input=input, result=result, meta=self._model_info) self._run_tracker.log(result=result, meta=self._model_info) return result diff --git a/ersilia/core/tracking.py b/ersilia/core/tracking.py index 3c65c6899..f82cdbf0b 100644 --- a/ersilia/core/tracking.py +++ b/ersilia/core/tracking.py @@ -26,13 +26,11 @@ from botocore.exceptions import ClientError, NoCredentialsError - - def flatten_dict(data): """ This will flatten the nested dictionaries from the generator into a single-level dictionary, where keys from all levels are merged into one dictionary. - + :flat_dict: Result returned in a dictionary """ flat_dict = {} @@ -40,7 +38,6 @@ def flatten_dict(data): for inner_key, value in inner_dict.items(): flat_dict[inner_key] = value return flat_dict - def log_files_metrics(file_log, model_id): @@ -81,7 +78,9 @@ def log_files_metrics(file_log, model_id): # encountering new logs # make sure error flags are closed if ersilia_error_flag: - errors["Unknown Ersilia exception class"] = errors.get("Unknown Ersilia exception class", 0) + 1 + errors["Unknown Ersilia exception class"] = ( + errors.get("Unknown Ersilia exception class", 0) + 1 + ) ersilia_error_flag = False if misc_error_flag: errors[error_name] = errors.get(error_name, 0) + 1 @@ -106,15 +105,15 @@ def log_files_metrics(file_log, model_id): errors["Unknown Ersilia exception class"] += 1 if misc_error_flag: errors[error_name] += 1 - + json_dict = {} json_dict["Error count"] = error_count - + if len(errors) > 0: json_dict["Breakdown by error types"] = {} for error in errors: - json_dict["Breakdown by error types"][error] = errors[error] - json_dict["Warning count"] = warning_count + json_dict["Breakdown by error types"][error] = errors[error] + json_dict["Warning count"] = warning_count json_object = json.dumps(json_dict, indent=4) write_persistent_file(json_object, model_id) except (IsADirectoryError, FileNotFoundError): @@ -180,22 +179,19 @@ def close_persistent_file(model_id): """ if check_file_exists(model_id): file_name = get_persistent_file_path(model_id) - file_log = os.path.join( - EOS, "console.log" - ) + file_log = os.path.join(EOS, "console.log") log_files_metrics(file_log, model_id) new_file_path = os.path.join( os.path.dirname(file_name), datetime.now().strftime("%Y-%m-%d_%H-%M-%S.txt"), ) os.rename(file_name, new_file_path) - + else: - raise FileNotFoundError( f"The persistent file for model {model_id} does not exist. Cannot close file." ) - + def upload_to_s3(json_dict, bucket="ersilia-tracking", object_name=None): """Upload a file to an S3 bucket @@ -350,64 +346,61 @@ def __init__(self, model_id, config_json): self.tabular_result_logger = TabularResultLogger() - -# TODO: see the following link for more details -# https://github.com/ersilia-os/ersilia/issues/1165?notification_referrer_id=NT_kwDOAsB0trQxMTEyNTc5MDIxNzo0NjE2NzIyMg#issuecomment-2178596998 - -# def stats(self, result): -# """ -# Stats function: calculates the basic statistics of the output file from a model. This includes the -# mode (if applicable), minimum, maximum, and standard deviation. -# :param result: The path to the model's output file. -# :return: A dictionary containing the stats for each column of the result. -# """ - -# data = read_csv(result) - - # drop first two columns (key, input) -# for row in data: -# row.pop('key', None) -# row.pop('input', None) - - # Convert data to a column-oriented format -# columns = defaultdict(list) -# for row in data: -# for key, value in row.items(): -# columns[key].append(float(value)) - - # Calculate statistics -# stats = {} -# for column, values in columns.items(): -# column_stats = {} -# column_stats["mean"] = statistics.mean(values) -# try: -# column_stats["mode"] = statistics.mode(values) -# except statistics.StatisticsError: -# column_stats["mode"] = None -# column_stats["min"] = min(values) -# column_stats["max"] = max(values) -# column_stats["std"] = statistics.stdev(values) if len(values) > 1 else 0 -# -# stats[column] = column_stats - - -# return stats - + # TODO: see the following link for more details + # https://github.com/ersilia-os/ersilia/issues/1165?notification_referrer_id=NT_kwDOAsB0trQxMTEyNTc5MDIxNzo0NjE2NzIyMg#issuecomment-2178596998 + + # def stats(self, result): + # """ + # Stats function: calculates the basic statistics of the output file from a model. This includes the + # mode (if applicable), minimum, maximum, and standard deviation. + # :param result: The path to the model's output file. + # :return: A dictionary containing the stats for each column of the result. + # """ + + # data = read_csv(result) + + # drop first two columns (key, input) + # for row in data: + # row.pop('key', None) + # row.pop('input', None) + + # Convert data to a column-oriented format + # columns = defaultdict(list) + # for row in data: + # for key, value in row.items(): + # columns[key].append(float(value)) + + # Calculate statistics + # stats = {} + # for column, values in columns.items(): + # column_stats = {} + # column_stats["mean"] = statistics.mean(values) + # try: + # column_stats["mode"] = statistics.mode(values) + # except statistics.StatisticsError: + # column_stats["mode"] = None + # column_stats["min"] = min(values) + # column_stats["max"] = max(values) + # column_stats["std"] = statistics.stdev(values) if len(values) > 1 else 0 + # + # stats[column] = column_stats + + # return stats def update_total_time(self, model_id, start_time): """ Method to track and update the Total time taken by model. :Param model_id: The currently running model. - :Param start_time: The start time of the running model. + :Param start_time: The start time of the running model. """ - + end_time = time.time() duration = end_time - start_time if check_file_exists(model_id): file_name = get_persistent_file_path(model_id) with open(file_name, "r") as f: lines = f.readlines() - + updated_lines = [] total_time_found = False @@ -424,7 +417,7 @@ def update_total_time(self, model_id, start_time): print(f"Error parsing 'Total time taken' value: {e}") else: updated_lines.append(line) - + if not total_time_found: updated_lines.append(f"Total time taken: {formatted_duration}\n") @@ -435,8 +428,7 @@ def update_total_time(self, model_id, start_time): new_content = f"Total time: {formatted_duration}\n" with open(file_name, "w") as f: f.write(f"{new_content}\n") - - + def get_file_sizes(self, input_file, output_file): """ Calculates the size of the input and output dataframes, as well as the average size of each row. @@ -512,7 +504,6 @@ def get_peak_memory(self): peak_memory = peak_memory_kb / 1024 return peak_memory - def get_memory_info(self, process="ersilia"): """ Retrieves the memory information of the current process @@ -545,8 +536,7 @@ def get_memory_info(self, process="ersilia"): return "No such process found." except Exception as e: return str(e) - - + def log_result(self, result): output_dir = os.path.join(self.lake_folder, self.model_id) if not os.path.exists(output_dir): @@ -575,26 +565,23 @@ def log_logs(self): file_name = os.path.join(output_dir, "{0}.log".format(self.model_id)) session_file = os.path.join(EOS, "session.json") shutil.copyfile(session_file, file_name) - - - + def track(self, input, result, meta): """ Tracks the results of a model run. """ - + self.docker_client = SimpleDocker() self.data = CsvDataLoader() json_dict = {} - + if os.path.isfile(input): input_data = self.data.read(input) else: input_data = [{"SMILES": input}] - + # Create a temporary file to store the result if it is a generator if isinstance(result, types.GeneratorType): - # Ensure EOS/tmp directory exists tmp_dir = os.path.join(EOS, "tmp") if not os.path.exists(tmp_dir): @@ -604,18 +591,18 @@ def track(self, input, result, meta): temp_output_file = tempfile.NamedTemporaryFile( delete=False, suffix=".csv", dir=tmp_dir ) - + flat_data_list = [flatten_dict(row) for row in result] if flat_data_list: - header = list(flat_data_list[0].keys()) - temp_output_path = temp_output_file.name + header = list(flat_data_list[0].keys()) + temp_output_path = temp_output_file.name with open(temp_output_path, "w", newline="") as csvfile: csvWriter = csv.DictWriter(csvfile, fieldnames=header) csvWriter.writeheader() for flat_data in flat_data_list: - csvWriter.writerow(flat_data) + csvWriter.writerow(flat_data) result_data = self.data.read(temp_output_path) - os.remove(temp_output_path) + os.remove(temp_output_path) else: result_data = self.data.read(result) @@ -630,13 +617,13 @@ def track(self, input, result, meta): json_dict["check_types"] = self.check_types(result_data, meta["metadata"]) json_dict["file_sizes"] = self.get_file_sizes(input_data, result_data) - - docker_info = (self.docker_client.container_memory(), - self.docker_client.container_cpu(), - self.docker_client.container_peak() + + docker_info = ( + self.docker_client.container_memory(), + self.docker_client.container_cpu(), + self.docker_client.container_peak(), ) - - + json_dict["Docker Container"] = docker_info # Get the memory stats of the run processs diff --git a/ersilia/db/environments/managers.py b/ersilia/db/environments/managers.py index 45c6e3090..53dfab29e 100644 --- a/ersilia/db/environments/managers.py +++ b/ersilia/db/environments/managers.py @@ -7,14 +7,14 @@ from ...setup.requirements.docker import DockerRequirement from ...utils.paths import Paths from ...utils.terminal import run_command -from ...utils.docker import SimpleDocker, is_inside_docker, resolve_platform +from ...utils.docker import SimpleDocker, resolve_platform +from ...utils.system import is_inside_docker from ...utils.identifiers.short import ShortIdentifier from ...utils.ports import find_free_port from .localdb import EnvironmentDb from ...default import DOCKERHUB_ORG, DOCKERHUB_LATEST_TAG -import subprocess import sys BENTOML_DOCKERPORT = 5000 diff --git a/ersilia/db/hubdata/json_models_interface.py b/ersilia/db/hubdata/json_models_interface.py index 70d99a1fb..7243bc04c 100644 --- a/ersilia/db/hubdata/json_models_interface.py +++ b/ersilia/db/hubdata/json_models_interface.py @@ -8,12 +8,10 @@ class JsonModelsInterface: def __init__(self, json_file_name): - self.json_file_name = json_file_name self.s3_client = boto3.client("s3", region_name=AWS_ACCOUNT_REGION) def _read_json_file(self): - s3_response = self.s3_client.get_object( Bucket=ERSILIA_MODEL_HUB_S3_BUCKET, Key=self.json_file_name ) diff --git a/ersilia/default.py b/ersilia/default.py index 0db7f68fa..760fd5416 100644 --- a/ersilia/default.py +++ b/ersilia/default.py @@ -39,7 +39,12 @@ MODEL_CONFIG_FILENAME = "config.json" EXAMPLE_STANDARD_INPUT_CSV_FILENAME = "example_standard_input.csv" EXAMPLE_STANDARD_OUTPUT_CSV_FILENAME = "example_standard_output.csv" -PREDEFINED_EXAMPLE_FILENAME = "example.csv" +PREDEFINED_EXAMPLE_FILES = [ + "model/framework/examples/input.csv", + "model/framework/input.csv", + "model/framework/example.csv", + "example.csv", +] DEFAULT_ERSILIA_ERROR_EXIT_CODE = 1 METADATA_JSON_FILE = "metadata.json" SERVICE_CLASS_FILE = "service_class.txt" @@ -50,6 +55,9 @@ DEFAULT_UDOCKER_USERNAME = "udockerusername" DEFAULT_UDOCKER_PASSWORD = "udockerpassword" ERSILIA_RUNS_FOLDER = "ersilia_runs" +ALLOWED_API_NAMES = ["run", "train"] # This can grow in the future based on needs +PACK_METHOD_FASTAPI = "fastapi" +PACK_METHOD_BENTOML = "bentoml" # Isaura data lake H5_EXTENSION = ".h5" diff --git a/ersilia/hub/content/card.py b/ersilia/hub/content/card.py index ea3176205..ffe22c347 100644 --- a/ersilia/hub/content/card.py +++ b/ersilia/hub/content/card.py @@ -46,7 +46,12 @@ except: Hdf5Explorer = None -from ...default import EOS, INFORMATION_FILE, METADATA_JSON_FILE, SERVICE_CLASS_FILE +from ...default import ( + CARD_FILE, + METADATA_JSON_FILE, + SERVICE_CLASS_FILE, + INFORMATION_FILE, +) class BaseInformation(ErsiliaBase): @@ -61,7 +66,7 @@ def __init__(self, config_json): self._mode = None self._task = None self._input = None - + self._input_shape = None self._output = None self._output_type = None @@ -535,7 +540,7 @@ def read_information(self): ) am = AirtableMetadata(model_id=self.model_id) bi = am.read_information() - print(bi.as_dict()) + self.logger.info(bi.as_dict()) return bi def write_information(self, data: BaseInformation, readme_path=None): @@ -708,43 +713,51 @@ class LocalCard(ErsiliaBase): This class provides information on models that have been fetched and are available locally. It retrieves and caches information about the models. """ + def __init__(self, config_json): ErsiliaBase.__init__(self, config_json=config_json) - + @lru_cache(maxsize=32) def _load_data(self, model_id): """ Loads the JSON data from the model's information file. """ model_path = self._model_path(model_id) - file_path = os.path.join(model_path, INFORMATION_FILE) - - if os.path.exists(file_path): - try: - with open(file_path, "r") as f: - return json.load(f) - except json.JSONDecodeError: - return None - return None - - def get(self, model_id): + info_file = os.path.join(model_path, INFORMATION_FILE) + if os.path.exists(info_file): + card_path = info_file + else: + card_path = os.path.join(model_path, CARD_FILE) + if os.path.exists(card_path): + with open(card_path, "r") as f: + card = json.load(f) + return card + else: + return None + + def get_service_class(self, model_id): """ - Returns the 'card' information for the specified model. + This method returns information about how the model was fetched by reading + the service class file located in the model's bundle directory. If the service + class file does not exist, it returns None. """ - data = self._load_data(model_id) - if data: - return data.get("card") - return None - - def get_service_class(self, model_id): + service_class_path = os.path.join( + self._get_bundle_location(model_id), SERVICE_CLASS_FILE + ) + + if os.path.exists(service_class_path): + with open(service_class_path, "r") as f: + service_class = f.read().strip() + return service_class + else: + return None + + def get(self, model_id): """ - Returns the 'service class' information for the specified model. + This method returns the card for a model. If the model does not exist, it returns None. """ - - data = self._load_data(model_id) - if data: - return data.get("service_class") - return None + card = self._load_data(model_id) + return card class LakeCard(ErsiliaBase): @@ -764,6 +777,7 @@ def get(self, model_id, as_json=False): class ModelCard(object): def __init__(self, config_json=None): + self.lc = LocalCard(config_json=config_json) self.mc = MetadataCard(config_json=config_json) self.ac = AirtableCard(config_json=config_json) @@ -791,7 +805,7 @@ def get(self, model_id, as_json=False): return json.dumps(card, indent=4) else: return card - + def get_service_class(self, model_id, as_json=False): service = self.lc.get_service_class(model_id) if service is None: diff --git a/ersilia/hub/content/catalog.py b/ersilia/hub/content/catalog.py index adb133e52..652adf1b5 100644 --- a/ersilia/hub/content/catalog.py +++ b/ersilia/hub/content/catalog.py @@ -2,6 +2,7 @@ import subprocess import requests +import shutil import os import json import csv @@ -9,7 +10,7 @@ from ... import ErsiliaBase from ...utils.identifiers.model import ModelIdentifier from ...auth.auth import Auth -from ...default import GITHUB_ORG +from ...default import GITHUB_ORG, BENTOML_PATH from ... import logger try: @@ -93,21 +94,21 @@ def _get_status(self, card): if "Status" in card: return card["Status"] return None - + def _get_input(self, card): if "input" in card: return card["input"][0] if "Input" in card: return card["Input"][0] return None - + def _get_output(self, card): if "output" in card: return card["output"][0] if "Output" in card: return card["Output"][0] return None - + def airtable(self): """List models available in AirTable Ersilia Model Hub base""" if webbrowser: @@ -199,7 +200,15 @@ def local(self): output = self._get_output(card) service_class = mc.get_service_class(model_id) R += [[model_id, slug, title, status, inputs, output, service_class]] - columns = ["Identifier", "Slug", "Title", "Status", "Input", "Output", "Service Class"] + columns = [ + "Identifier", + "Slug", + "Title", + "Status", + "Input", + "Output", + "Service Class", + ] logger.info("Found {0} models".format(len(R))) if len(R) == 0: return CatalogTable(data=[], columns=columns) @@ -207,9 +216,13 @@ def local(self): def bentoml(self): """List models available as BentoServices""" - result = subprocess.run( - ["bentoml", "list"], stdout=subprocess.PIPE, env=os.environ - ) + try: + result = subprocess.run( + ["bentoml", "list"], stdout=subprocess.PIPE, env=os.environ, timeout=10 + ) + except Exception as e: + shutil.rmtree(BENTOML_PATH) + return None result = [r for r in result.stdout.decode("utf-8").split("\n") if r] if len(result) == 1: return diff --git a/ersilia/hub/content/information.py b/ersilia/hub/content/information.py index 7d083aee9..d7aa858f9 100644 --- a/ersilia/hub/content/information.py +++ b/ersilia/hub/content/information.py @@ -24,7 +24,7 @@ def __init__(self, model_id, config_json=None): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self.model_id = model_id self.repository_folder = os.path.join( - self._get_bento_location(model_id=self.model_id) + self._get_bundle_location(model_id=self.model_id) ) self.dest_folder = os.path.join(self._model_path(model_id=model_id)) diff --git a/ersilia/hub/content/search.py b/ersilia/hub/content/search.py index 824d4a96c..849af785c 100644 --- a/ersilia/hub/content/search.py +++ b/ersilia/hub/content/search.py @@ -1,4 +1,5 @@ """Search for models""" + from .catalog import CatalogTable import numpy as np import re diff --git a/ersilia/hub/delete/delete.py b/ersilia/hub/delete/delete.py index 1cbb05cfd..0f2c505d5 100644 --- a/ersilia/hub/delete/delete.py +++ b/ersilia/hub/delete/delete.py @@ -5,7 +5,7 @@ from ...utils.terminal import run_command from ...utils.environment import Environment from ...utils.conda import SimpleConda -from ...utils.docker import is_inside_docker +from ...utils.system import is_inside_docker from ..content.catalog import ModelCatalog from ...db.environments.localdb import EnvironmentDb from ...db.hubdata.localslugs import SlugDb diff --git a/ersilia/hub/fetch/actions/get.py b/ersilia/hub/fetch/actions/get.py index 60dfd4e4c..6a6e8ac28 100644 --- a/ersilia/hub/fetch/actions/get.py +++ b/ersilia/hub/fetch/actions/get.py @@ -9,15 +9,15 @@ from . import BaseAction from .... import ErsiliaBase from ....utils.download import GitHubDownloader, S3Downloader -from ....utils.paths import Paths from ...bundle.repo import PackFile, DockerfileFile from ....utils.exceptions_utils.throw_ersilia_exception import throw_ersilia_exception from ....utils.exceptions_utils.fetch_exceptions import ( FolderNotFoundError, S3DownloaderError, ) +from .template_resolver import TemplateResolver -from ....default import S3_BUCKET_URL_ZIP, PREDEFINED_EXAMPLE_FILENAME +from ....default import S3_BUCKET_URL_ZIP, PREDEFINED_EXAMPLE_FILES MODEL_DIR = "model" ROOT = os.path.basename(os.path.abspath(__file__)) @@ -91,7 +91,6 @@ def _append_commands_to_dockerfile(self, commands, dockerfile_content): run_section_end_line = i if line != "": sanitized.append(line) - print(sanitized, run_section_end_line) if sanitized[run_section_end_line + 2].startswith("RUN"): raise BaseException( "There are more instalation than the standard 'RUN pip install rdkit'" @@ -199,21 +198,6 @@ def __init__( self.force_from_s3 = force_from_s3 self.repo_path = repo_path - def _dev_model_path(self): - pt = Paths() - path = pt.models_development_path() - if path is not None: - path = os.path.join(path, self.model_id) - if pt.exists(path): - return path - else: - path = pt.ersilia_development_path() - if path is not None: - path = os.path.join(path, "test", "models", self.model_id) - if pt.exists(path): - return path - return None - @staticmethod def _copy_from_local(src, dst): shutil.copytree(src, dst) @@ -284,52 +268,46 @@ def _prepare_inner_template(self): TemplatePreparer(model_id=self.model_id, config_json=self.config_json).prepare() def _copy_example_file_if_available(self): - file_name = os.path.join( - self._model_path(self.model_id), - "model", - "framework", - PREDEFINED_EXAMPLE_FILENAME, - ) - dest_file = os.path.join( - self._model_path(self.model_id), PREDEFINED_EXAMPLE_FILENAME - ) - if os.path.exists(file_name): - self.logger.debug("Example file exists") - shutil.copy(file_name, dest_file) - else: - - self.logger.debug("Example file {0} does not exist".format(file_name)) + self.logger.debug("Copying example file if available") + for pf in PREDEFINED_EXAMPLE_FILES: + file_name = os.path.join(self._model_path(self.model_id), pf) + dest_file = os.path.join(self._model_path(self.model_id), "input.csv") + if os.path.exists(file_name): + self.logger.debug("Example file exists") + shutil.copy(file_name, dest_file) + return + else: + self.logger.debug("Example file {0} does not exist".format(file_name)) @throw_ersilia_exception def get(self): """Copy model repository from local or download from S3 or GitHub""" folder = self._model_path(self.model_id) - dev_model_path = self._dev_model_path() - if dev_model_path is not None: - self.logger.debug( - "Copying from local {0} to {1}".format(dev_model_path, folder) - ) - self._copy_from_local(dev_model_path, folder) + tr = TemplateResolver( + model_id=self.model_id, repo_path=folder, config_json=self.config_json + ) + if self.repo_path is not None: + self._copy_from_local(self.repo_path, folder) else: - if self.repo_path is not None: - self._copy_from_local(self.repo_path, folder) + if self.force_from_github: + self._copy_from_github(folder) else: - if self.force_from_github: - self._copy_from_github(folder) - else: - try: - self.logger.debug("Trying to download from S3") - self._copy_zip_from_s3(folder) - except: - self.logger.debug( - "Could not download in zip format in S3. Downloading from GitHub repository." - ) - if self.force_from_s3: - raise S3DownloaderError(model_id=self.model_id) - else: - self._copy_from_github(folder) - self._prepare_inner_template() - self._change_py_version_in_dockerfile_if_necessary() + try: + self.logger.debug("Trying to download from S3") + self._copy_zip_from_s3(folder) + except: + self.logger.debug( + "Could not download in zip format in S3. Downloading from GitHub repository." + ) + if self.force_from_s3: + raise S3DownloaderError(model_id=self.model_id) + else: + self._copy_from_github(folder) + + if tr.is_bentoml(): + self._prepare_inner_template() + self._change_py_version_in_dockerfile_if_necessary() + self._remove_sudo_if_root() self._copy_example_file_if_available() @@ -355,6 +333,11 @@ def get(self): """Create a ./model folder in the model repository""" model_path = self._model_path(self.model_id) folder = self._get_destination() + tr = TemplateResolver( + model_id=self.model_id, repo_path=model_path, config_json=self.config_json + ) + if tr.is_fastapi(): + return None if not os.path.exists(folder): os.mkdir(folder) if not self._requires_parameters(model_path): @@ -389,8 +372,11 @@ def _get_model_parameters(self): @throw_ersilia_exception def get(self): + self.logger.debug("Getting repository") self._get_repository() if self.repo_path is None: + self.logger.debug("Getting model parameters") self._get_model_parameters() + self.logger.debug("Done getting model") if not os.path.exists(self._model_path(self.model_id)): raise FolderNotFoundError(os.path.exists(self._model_path(self.model_id))) diff --git a/ersilia/hub/fetch/actions/inform.py b/ersilia/hub/fetch/actions/inform.py index aec6d06cf..c2f373881 100644 --- a/ersilia/hub/fetch/actions/inform.py +++ b/ersilia/hub/fetch/actions/inform.py @@ -6,6 +6,7 @@ from . import BaseAction from ....default import INFORMATION_FILE +from ....utils.paths import resolve_pack_method class ModelInformer(BaseAction): @@ -26,8 +27,10 @@ def _add_info_api(self): sf = ServiceFile( path=os.path.join(self._get_bundle_location(self.model_id), self.model_id) ) - sf.add_info_api(information_file=self.information_file) + if os.path.exists(sf.get_file()): + sf.add_info_api(information_file=self.information_file) def inform(self): self._write_information_json() - self._add_info_api() + if resolve_pack_method(self._get_bundle_location(self.model_id)) == "bentoml": + self._add_info_api() diff --git a/ersilia/hub/fetch/actions/pack.py b/ersilia/hub/fetch/actions/pack_bentoml.py similarity index 93% rename from ersilia/hub/fetch/actions/pack.py rename to ersilia/hub/fetch/actions/pack_bentoml.py index f85be6602..60ba2cb63 100644 --- a/ersilia/hub/fetch/actions/pack.py +++ b/ersilia/hub/fetch/actions/pack_bentoml.py @@ -3,8 +3,8 @@ from . import BaseAction from .modify import ModelModifier -from ..pack.mode import PackModeDecision, AVAILABLE_MODES -from ..pack.runners import get_runner +from ..pack.bentoml_pack.mode import PackModeDecision, AVAILABLE_MODES +from ..pack.bentoml_pack.runners import get_runner from ...bundle.repo import ServiceFile, DockerfileFile from ....default import PACKMODE_FILE diff --git a/ersilia/hub/fetch/actions/pack_fastapi.py b/ersilia/hub/fetch/actions/pack_fastapi.py new file mode 100644 index 000000000..7a1d4e954 --- /dev/null +++ b/ersilia/hub/fetch/actions/pack_fastapi.py @@ -0,0 +1,41 @@ +import os +import sys + +from . import BaseAction +from ..pack.fastapi_pack.mode import PackModeDecision, AVAILABLE_MODES +from ..pack.fastapi_pack.runners import get_runner +from ....default import PACKMODE_FILE + + +class ModelPacker(BaseAction): + def __init__(self, model_id, mode, config_json): + BaseAction.__init__( + self, model_id=model_id, config_json=config_json, credentials_json=None + ) + if mode is not None: + assert mode in AVAILABLE_MODES + self.mode = mode + + def _setup(self): + self.folder = self._model_path(self.model_id) + + def _decide_pack_mode(self): + if self.mode is None: + pmd = PackModeDecision(self.model_id, config_json=self.config_json) + self.pack_mode = pmd.decide() + else: + self.pack_mode = self.mode + + with open(os.path.join(self.folder, PACKMODE_FILE), "w") as f: + f.write(self.pack_mode) + + def _run(self): + runner = get_runner(self.pack_mode)( + model_id=self.model_id, config_json=self.config_json + ) + runner.run() + + def pack(self): + self._setup() + self._decide_pack_mode() + self._run() diff --git a/ersilia/hub/fetch/actions/setup.py b/ersilia/hub/fetch/actions/setup.py index 86b6ecada..34f524c0e 100644 --- a/ersilia/hub/fetch/actions/setup.py +++ b/ersilia/hub/fetch/actions/setup.py @@ -17,7 +17,7 @@ def _gh_cli(self): self.logger.debug("GitHub CLI is installed") else: self.logger.info( - "GitHub CLI is not installed. Ersilia can work without it, but we highy recommend that you install this tool." + "GitHub CLI is not installed. Ersilia can work without it, but we highly recommend that you install this tool." ) def _git_lfs(self): diff --git a/ersilia/hub/fetch/actions/sniff.py b/ersilia/hub/fetch/actions/sniff_bentoml.py similarity index 93% rename from ersilia/hub/fetch/actions/sniff.py rename to ersilia/hub/fetch/actions/sniff_bentoml.py index 21b00c349..ce131140d 100644 --- a/ersilia/hub/fetch/actions/sniff.py +++ b/ersilia/hub/fetch/actions/sniff_bentoml.py @@ -12,25 +12,35 @@ from ....io.input import ExampleGenerator from ....io.pure import PureDataTyper from ....io.annotated import AnnotatedDataTyper -from ....default import API_SCHEMA_FILE, MODEL_SIZE_FILE, METADATA_JSON_FILE, PREDEFINED_EXAMPLE_FILENAME +from ....default import ( + API_SCHEMA_FILE, + MODEL_SIZE_FILE, + METADATA_JSON_FILE, + PREDEFINED_EXAMPLE_FILES, +) from ....utils.exceptions_utils.exceptions import EmptyOutputError from ....utils.exceptions_utils.fetch_exceptions import ( OutputDataTypesNotConsistentError, ) + class BuiltinExampleReader(ErsiliaBase): def __init__(self, model_id, config_json): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self.model_id = model_id - self.example_file = os.path.join( - self._get_bundle_location(self.model_id), - self.model_id, - "artifacts", - "framework", - PREDEFINED_EXAMPLE_FILENAME, - ) + self.example_file = None + for pf in PREDEFINED_EXAMPLE_FILES: + example_file = os.path.join( + self._model_path(self.model_id), + pf, + ) + if os.path.exists(example_file): + self.example_file = example_file + break def has_builtin_example(self): + if self.example_file is None: + return False if os.path.exists(self.example_file): return True else: @@ -87,7 +97,7 @@ def _get_size_in_mb(self): dest_dir = self._model_path(self.model_id) repo_dir = self._get_bundle_location(self.model_id) size = self._get_directory_size(dest_dir) + self._get_directory_size(repo_dir) - mbytes = size / (1024 ** 2) + mbytes = size / (1024**2) return mbytes def _get_output_ann_type(self): @@ -190,10 +200,9 @@ def _get_schema(self, results): self.logger.debug("Schema: {0}".format(schema)) self.logger.debug("Done with the schema!") return schema - + @throw_ersilia_exception def _get_schema_type_for_simple_run_api_case(self): - # read metadata dest_dir = self._model_path(self.model_id) metadata_file = os.path.join(dest_dir, METADATA_JSON_FILE) @@ -217,7 +226,7 @@ def _get_schema_type_for_simple_run_api_case(self): return None if output_type not in ["Float", "String"]: return None - + # get output shape from metadata.json output_shape = metadata["Output Shape"] if output_shape not in ["Single", "List"]: @@ -249,7 +258,7 @@ def sniff(self): self.logger.debug("Sniffing model") self.logger.debug("Getting model size") size = self._get_size_in_mb() - self.logger.debug("Mode size is {0} MB".format(size)) + self.logger.debug("Model size is {0} MB".format(size)) path = os.path.join(self._model_path(self.model_id), MODEL_SIZE_FILE) with open(path, "w") as f: json.dump({"size": size, "units": "MB"}, f, indent=4) @@ -291,7 +300,10 @@ def get_results(api_name): if schema["output"]["outcome"]["type"] is None: schema["output"]["outcome"]["type"] = schema_type_backup if "shape" not in schema["output"]["outcome"]: - shape = self._try_to_resolve_output_shape(schema["output"]["outcome"]["meta"], schema["output"]["outcome"]["type"]) + shape = self._try_to_resolve_output_shape( + schema["output"]["outcome"]["meta"], + schema["output"]["outcome"]["type"], + ) if shape is not None: schema["output"]["outcome"]["shape"] = shape all_schemas[api_name] = schema diff --git a/ersilia/hub/fetch/actions/sniff_fastapi.py b/ersilia/hub/fetch/actions/sniff_fastapi.py new file mode 100644 index 000000000..5978cf05e --- /dev/null +++ b/ersilia/hub/fetch/actions/sniff_fastapi.py @@ -0,0 +1,85 @@ +import os +import csv +import json +from pathlib import Path + +from .... import throw_ersilia_exception + +from . import BaseAction +from .... import ErsiliaBase +from ....default import MODEL_SIZE_FILE + + +N = 3 + +BUILTIN_EXAMPLE_FILE_NAME = "example.csv" +BUILTIN_OUTPUT_FILE_NAME = "output.csv" + + +# TODO for now this is not used +class BuiltinExampleReader(ErsiliaBase): + def __init__(self, model_id, config_json): + ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) + self.model_id = model_id + self.input_example_file = os.path.join( + self._get_bundle_location(self.model_id), + "model", + "framework", + BUILTIN_EXAMPLE_FILE_NAME, + ) + self.output_example_file = os.path.join( + self._get_bundle_location(self.model_id), + "model", + "framework", + BUILTIN_OUTPUT_FILE_NAME, + ) + + def input_example(self): + data = [] + with open(self.input_example_file, "r") as f: + reader = csv.reader(f) + next(reader) + for r in reader: + data += [r[0]] + return data[:N] + + def output_example(self): + data = [] + with open(self.input_example_file, "r") as f: + reader = csv.reader(f) + next(reader) + for r in reader: + data += [r[0]] + return data[:N] + + +class ModelSniffer(BaseAction): + def __init__(self, model_id, config_json): + BaseAction.__init__( + self, model_id=model_id, config_json=config_json, credentials_json=None + ) + + @staticmethod + def _get_directory_size(dir): + root_directory = Path(dir) + bytes = sum( + f.stat().st_size for f in root_directory.glob("**/*") if f.is_file() + ) + return bytes + + def _get_size_in_mb(self): + dest_dir = self._model_path(self.model_id) + repo_dir = self._get_bundle_location(self.model_id) + size = self._get_directory_size(dest_dir) + self._get_directory_size(repo_dir) + mbytes = size / (1024**2) + return mbytes + + @throw_ersilia_exception + def sniff(self): + self.logger.debug("Sniffing model") + self.logger.debug("Getting model size") + size = self._get_size_in_mb() + self.logger.debug("Model size is {0} MB".format(size)) + path = os.path.join(self._model_path(self.model_id), MODEL_SIZE_FILE) + with open(path, "w") as f: + json.dump({"size": size, "units": "MB"}, f, indent=4) diff --git a/ersilia/hub/fetch/actions/template_resolver.py b/ersilia/hub/fetch/actions/template_resolver.py new file mode 100644 index 000000000..d66b00a59 --- /dev/null +++ b/ersilia/hub/fetch/actions/template_resolver.py @@ -0,0 +1,63 @@ +import os +import http.client +import urllib.parse + +from . import BaseAction + +from ....default import GITHUB_ORG, ALLOWED_API_NAMES + + +class TemplateResolver(BaseAction): + def __init__(self, model_id, repo_path=None, config_json=None): + BaseAction.__init__( + self, model_id=model_id, config_json=config_json, credentials_json=None + ) + self.repo_path = repo_path + + def _check_file_in_repo(self, file_path): + file_path = os.path.join(self.repo_path, file_path) + if os.path.exists(file_path): + return True + else: + return False + + def _check_file_in_github(self, file_path): + url = "https://raw.githubusercontent.com/{0}/{1}/main/{2}".format( + GITHUB_ORG, self.model_id, file_path + ) + parsed_url = urllib.parse.urlparse(url) + conn = http.client.HTTPSConnection(parsed_url.netloc) + try: + conn.request("HEAD", parsed_url.path) + response = conn.getresponse() + return response.status == 200 + except Exception as e: + return False + finally: + conn.close() + + def _check_file(self, file_path): + if self.repo_path is not None: + return self._check_file_in_repo(file_path) + else: + return self._check_file_in_github(file_path) + + def is_fastapi(self): + if not self._check_file("Dockerfile") and not self._check_file("install.yml"): + return False + has_sh = False + for allowed_api in ALLOWED_API_NAMES: + if self._check_file("model/framework/{0}.sh".format(allowed_api)): + has_sh = True + if not has_sh: + return False + return True + + def is_bentoml(self): + if not self._check_file("pack.py"): + return False + if not self._check_file("Dockerfile"): + return False + if not self._check_file("src/service.py"): + return False + return True diff --git a/ersilia/hub/fetch/fetch.py b/ersilia/hub/fetch/fetch.py index 86d36d7e8..0ad181e1a 100644 --- a/ersilia/hub/fetch/fetch.py +++ b/ersilia/hub/fetch/fetch.py @@ -1,26 +1,20 @@ -"""Fetch model from the Ersilia Model Hub""" +"""Fetch Model from the Ersilia Model Hub.""" -import json import os -from timeit import default_timer as timer -from datetime import timedelta - -from .actions.setup import SetupChecker -from .actions.prepare import ModelPreparer -from .actions.get import ModelGetter -from .actions.lake import LakeGetter -from .actions.pack import ModelPacker -from .actions.toolize import ModelToolizer -from .actions.content import CardGetter -from .actions.check import ModelChecker -from .actions.sniff import ModelSniffer -from .actions.inform import ModelInformer -from .register.register import ModelRegisterer +import json +import importlib + from .lazy_fetchers.dockerhub import ModelDockerHubFetcher from .lazy_fetchers.hosted import ModelHostedFetcher from .register.standard_example import ModelStandardExample - from ... import ErsiliaBase +from ...hub.fetch.actions.template_resolver import TemplateResolver +from ...utils.exceptions_utils.fetch_exceptions import ( + NotInstallableWithFastAPI, + NotInstallableWithBentoML, +) +from ...utils.exceptions_utils.throw_ersilia_exception import throw_ersilia_exception +from ...default import PACK_METHOD_BENTOML, PACK_METHOD_FASTAPI from . import STATUS_FILE, DONE_TAG @@ -39,6 +33,8 @@ def __init__( force_from_s3=False, force_from_dockerhub=False, force_from_hosted=False, + force_with_bentoml=False, + force_with_fastapi=False, hosted_url=None, ): ErsiliaBase.__init__( @@ -64,89 +60,83 @@ def __init__( self.force_from_s3 = force_from_s3 self.force_from_dockerhub = force_from_dockerhub self.force_from_hosted = force_from_hosted + self.force_with_bentoml = force_with_bentoml + self.force_with_fastapi = force_with_fastapi self.hosted_url = hosted_url - def _setup_check(self): - sc = SetupChecker(model_id=self.model_id, config_json=self.config_json) - sc.check() + @throw_ersilia_exception + def _decide_fetcher(self, model_id): + tr = TemplateResolver(model_id=model_id, repo_path=self.repo_path) + if tr.is_bentoml(): + return PACK_METHOD_BENTOML + elif tr.is_fastapi(): + return PACK_METHOD_FASTAPI + else: + raise Exception("No fetcher available") - def _prepare(self): - mp = ModelPreparer( - model_id=self.model_id, - overwrite=self.overwrite, + @throw_ersilia_exception + def _fetch_from_fastapi(self): + self.logger.debug("Fetching using Ersilia Pack (FastAPI)") + fetch = importlib.import_module("ersilia.hub.fetch.fetch_fastapi") + mf = fetch.ModelFetcherFromFastAPI( config_json=self.config_json, - ) - mp.prepare() - - def _get(self): - mg = ModelGetter( - model_id=self.model_id, + credentials_json=self.credentials_json, + overwrite=self.overwrite, repo_path=self.repo_path, - config_json=self.config_json, - force_from_gihtub=self.force_from_github, + mode=self.mode, + force_from_github=self.force_from_github, force_from_s3=self.force_from_s3, ) - mg.get() - - def _lake(self): - ml = LakeGetter(model_id=self.model_id, config_json=self.config_json) - ml.get() - - def _pack(self): - mp = ModelPacker( - model_id=self.model_id, mode=self.mode, config_json=self.config_json + if mf.seems_installable(model_id=self.model_id): + mf.fetch(model_id=self.model_id) + else: + self.logger.debug("Not installable with FastAPI") + raise NotInstallableWithFastAPI(model_id=self.model_id) + + @throw_ersilia_exception + def _fetch_from_bentoml(self): + self.logger.debug("Fetching using BentoML") + fetch = importlib.import_module("ersilia.hub.fetch.fetch_bentoml") + mf = fetch.ModelFetcherFromBentoML( + config_json=self.config_json, + credentials_json=self.credentials_json, + overwrite=self.overwrite, + repo_path=self.repo_path, + mode=self.mode, + pip=self.do_pip, + dockerize=self.do_docker, + force_from_github=self.force_from_github, + force_from_s3=self.force_from_s3, ) - mp.pack() - - def _toolize(self): - mt = ModelToolizer(model_id=self.model_id, config_json=self.config_json) - mt.toolize(do_pip=self.do_pip, do_docker=self.do_docker) - - def _content(self): - cg = CardGetter(self.model_id, self.config_json) - cg.get() - - def _check(self): - mc = ModelChecker(self.model_id, self.config_json) - mc.check() - - def _sniff(self): - sn = ModelSniffer(self.model_id, self.config_json) - sn.sniff() - - def _inform(self): - mi = ModelInformer(self.model_id, self.config_json) - mi.inform() - - def _success(self): - done = {DONE_TAG: True} - status_file = os.path.join(self._dest_dir, self.model_id, STATUS_FILE) - with open(status_file, "w") as f: - json.dump(done, f, indent=4) - mr = ModelRegisterer(self.model_id, config_json=self.config_json) - mr.register(is_from_dockerhub=False) + if mf.seems_installable(model_id=self.model_id): + mf.fetch(model_id=self.model_id) + else: + self.logger.debug("Not installable with BentoML") + raise NotInstallableWithBentoML(model_id=self.model_id) + @throw_ersilia_exception def _fetch_not_from_dockerhub(self, model_id): - start = timer() self.model_id = model_id - self._setup_check() - self._prepare() - self._get() - self._pack() - self._toolize() - self._content() - self._check() - self._sniff() - self._inform() - self._success() - end = timer() - elapsed_time = timedelta(seconds=end - start) - self.logger.debug( - "Fetching {0} done in time: {1}s".format(model_id, abs(elapsed_time)) - ) - self.logger.info( - "Fetching {0} done successfully: {1}".format(model_id, elapsed_time) - ) + is_fetched = False + if not self.exists(model_id): + self.logger.debug("Model doesn't exist in your local, fetching it now") + if self.force_with_fastapi: + self._fetch_from_fastapi() + is_fetched = True + if self.force_with_bentoml: + self._fetch_from_bentoml() + is_fetched = True + if is_fetched: + return + else: + self.logger.debug("Deciding fetcher (BentoML or FastAPI)") + fetcher_type = self._decide_fetcher(model_id) + if fetcher_type == PACK_METHOD_FASTAPI: + self._fetch_from_fastapi() + if fetcher_type == PACK_METHOD_BENTOML: + self._fetch_from_bentoml() + else: + self.logger.debug("Model already exists in your local, skipping fetching") def _fetch_from_dockerhub(self, model_id): self.logger.debug("Fetching from DockerHub") @@ -214,14 +204,19 @@ def _fetch(self, model_id): self.logger.debug("Starting fetching procedure") do_hosted = self._decide_if_use_hosted(model_id=model_id) if do_hosted: + self.logger.debug("Fetching from hosted") self._fetch_from_hosted(model_id=model_id) return do_dockerhub = self._decide_if_use_dockerhub(model_id=model_id) if do_dockerhub: + print("Fetching from DockerHub") + self.logger.debug("Fetching from DockerHub") self._fetch_from_dockerhub(model_id=model_id) return if self.overwrite is None: + self.logger.debug("Overwriting") self.overwrite = True + self.logger.debug("Fetching in your system, not from DockerHub") self._fetch_not_from_dockerhub(model_id=model_id) def fetch(self, model_id): diff --git a/ersilia/hub/fetch/fetch_bentoml.py b/ersilia/hub/fetch/fetch_bentoml.py new file mode 100644 index 000000000..4eed90999 --- /dev/null +++ b/ersilia/hub/fetch/fetch_bentoml.py @@ -0,0 +1,149 @@ +"""Fetch model from the Ersilia Model Hub using BentoML.""" + +import json +import os +from timeit import default_timer as timer +from datetime import timedelta + +from .actions.template_resolver import TemplateResolver +from .actions.setup import SetupChecker +from .actions.prepare import ModelPreparer +from .actions.get import ModelGetter +from .actions.lake import LakeGetter +from .actions.pack_bentoml import ModelPacker +from .actions.toolize import ModelToolizer +from .actions.content import CardGetter +from .actions.check import ModelChecker +from .actions.sniff_bentoml import ModelSniffer +from .actions.inform import ModelInformer +from .register.register import ModelRegisterer + +from ... import ErsiliaBase + +from . import STATUS_FILE, DONE_TAG + + +class ModelFetcherFromBentoML(ErsiliaBase): + def __init__( + self, + config_json=None, + credentials_json=None, + overwrite=None, + repo_path=None, + mode=None, + pip=False, + dockerize=False, + force_from_github=False, + force_from_s3=False, + ): + ErsiliaBase.__init__( + self, config_json=config_json, credentials_json=credentials_json + ) + self.overwrite = overwrite + self.mode = mode + self.do_pip = pip + self.repo_path = repo_path + if self.mode == "docker": + self.logger.debug("When packing mode is docker, dockerization is mandatory") + dockerize = True + self.do_docker = dockerize + self.force_from_github = force_from_github + self.force_from_s3 = force_from_s3 + + def _setup_check(self): + sc = SetupChecker(model_id=self.model_id, config_json=self.config_json) + sc.check() + + def _prepare(self): + mp = ModelPreparer( + model_id=self.model_id, + overwrite=self.overwrite, + config_json=self.config_json, + ) + mp.prepare() + + def _get(self): + mg = ModelGetter( + model_id=self.model_id, + repo_path=self.repo_path, + config_json=self.config_json, + force_from_gihtub=self.force_from_github, + force_from_s3=self.force_from_s3, + ) + mg.get() + + def _lake(self): + ml = LakeGetter(model_id=self.model_id, config_json=self.config_json) + ml.get() + + def _pack(self): + mp = ModelPacker( + model_id=self.model_id, mode=self.mode, config_json=self.config_json + ) + mp.pack() + + def _toolize(self): + mt = ModelToolizer(model_id=self.model_id, config_json=self.config_json) + mt.toolize(do_pip=self.do_pip, do_docker=self.do_docker) + + def _content(self): + cg = CardGetter(self.model_id, self.config_json) + cg.get() + + def _check(self): + mc = ModelChecker(self.model_id, self.config_json) + mc.check() + + def _sniff(self): + sn = ModelSniffer(self.model_id, self.config_json) + sn.sniff() + + def _inform(self): + mi = ModelInformer(self.model_id, self.config_json) + mi.inform() + + def _success(self): + done = {DONE_TAG: True} + status_file = os.path.join(self._dest_dir, self.model_id, STATUS_FILE) + with open(status_file, "w") as f: + json.dump(done, f, indent=4) + mr = ModelRegisterer(self.model_id, config_json=self.config_json) + mr.register(is_from_dockerhub=False) + + def _fetch_not_from_dockerhub(self, model_id): + start = timer() + self.model_id = model_id + self._setup_check() + self._prepare() + self._get() + self._pack() + self._toolize() + self._content() + self._check() + self._sniff() + self._inform() + self._success() + end = timer() + elapsed_time = timedelta(seconds=end - start) + self.logger.debug( + "Fetching {0} done in time: {1}s".format(model_id, abs(elapsed_time)) + ) + self.logger.info( + "Fetching {0} done successfully: {1}".format(model_id, elapsed_time) + ) + + def _fetch(self, model_id): + self.logger.debug("Starting fetching procedure") + if self.overwrite is None: + self.overwrite = True + self._fetch_not_from_dockerhub(model_id=model_id) + + def seems_installable(self, model_id): + tr = TemplateResolver( + model_id=model_id, repo_path=self.repo_path, config_json=self.config_json + ) + self.logger.debug("Checking if the model is installable with BentoML") + return tr.is_bentoml() + + def fetch(self, model_id): + self._fetch(model_id) diff --git a/ersilia/hub/fetch/fetch_fastapi.py b/ersilia/hub/fetch/fetch_fastapi.py new file mode 100644 index 000000000..ab6e37eb0 --- /dev/null +++ b/ersilia/hub/fetch/fetch_fastapi.py @@ -0,0 +1,126 @@ +"""Fetch model from the Ersilia Model Hub using FastAPI.""" + +import os +import json + +from timeit import default_timer as timer +from datetime import timedelta + +from .actions.template_resolver import TemplateResolver +from .actions.setup import SetupChecker +from .actions.prepare import ModelPreparer +from .actions.get import ModelGetter +from .actions.pack_fastapi import ModelPacker +from .actions.content import CardGetter +from .actions.check import ModelChecker +from .actions.sniff_fastapi import ModelSniffer +from .actions.inform import ModelInformer +from .register.register import ModelRegisterer + +from ... import ErsiliaBase + +from . import STATUS_FILE, DONE_TAG + + +class ModelFetcherFromFastAPI(ErsiliaBase): + def __init__( + self, + config_json=None, + credentials_json=None, + overwrite=None, + repo_path=None, + mode=None, + force_from_github=False, + force_from_s3=False, + ): + ErsiliaBase.__init__( + self, config_json=config_json, credentials_json=credentials_json + ) + self.repo_path = repo_path + self.overwrite = overwrite + self.mode = mode + self.force_from_github = force_from_github + self.force_from_s3 = force_from_s3 + + def _setup_check(self): + sc = SetupChecker(model_id=self.model_id, config_json=self.config_json) + sc.check() + + def _prepare(self): + mp = ModelPreparer( + model_id=self.model_id, + overwrite=self.overwrite, + config_json=self.config_json, + ) + mp.prepare() + + def _get(self): + mg = ModelGetter( + model_id=self.model_id, + repo_path=self.repo_path, + config_json=self.config_json, + force_from_gihtub=self.force_from_github, + force_from_s3=self.force_from_s3, + ) + mg.get() + + def _pack(self): + mp = ModelPacker( + model_id=self.model_id, mode=self.mode, config_json=self.config_json + ) + mp.pack() + + def _content(self): + cg = CardGetter(self.model_id, self.config_json) + cg.get() + + def _check(self): + mc = ModelChecker(self.model_id, self.config_json) + mc.check() + + def _sniff(self): + sn = ModelSniffer(self.model_id, self.config_json) + sn.sniff() + + def _inform(self): + mi = ModelInformer(self.model_id, self.config_json) + mi.inform() + + def _success(self): + done = {DONE_TAG: True} + status_file = os.path.join(self._dest_dir, self.model_id, STATUS_FILE) + with open(status_file, "w") as f: + json.dump(done, f, indent=4) + mr = ModelRegisterer(self.model_id, config_json=self.config_json) + mr.register(is_from_dockerhub=False) + + def _fetch(self, model_id): + start = timer() + self.model_id = model_id + self._setup_check() + self._prepare() + self._get() + self._pack() + self._content() + self._check() + self._sniff() + self._inform() + self._success() + end = timer() + elapsed_time = timedelta(seconds=end - start) + self.logger.debug( + "Fetching {0} done in time: {1}s".format(model_id, abs(elapsed_time)) + ) + self.logger.info( + "Fetching {0} done successfully: {1}".format(model_id, elapsed_time) + ) + + def seems_installable(self, model_id): + tr = TemplateResolver( + model_id=model_id, repo_path=self.repo_path, config_json=self.config_json + ) + return tr.is_fastapi() + + def fetch(self, model_id): + self.logger.debug("Fetching from FastAPI...") + self._fetch(model_id=model_id) diff --git a/ersilia/hub/fetch/lazy_fetchers/dockerhub.py b/ersilia/hub/fetch/lazy_fetchers/dockerhub.py index 899001bbe..147eb6666 100644 --- a/ersilia/hub/fetch/lazy_fetchers/dockerhub.py +++ b/ersilia/hub/fetch/lazy_fetchers/dockerhub.py @@ -2,7 +2,13 @@ from .... import ErsiliaBase, throw_ersilia_exception from .... import EOS -from ....default import DOCKERHUB_ORG, DOCKERHUB_LATEST_TAG, PREDEFINED_EXAMPLE_FILENAME, INFORMATION_FILE, API_SCHEMA_FILE +from ....default import ( + DOCKERHUB_ORG, + DOCKERHUB_LATEST_TAG, + PREDEFINED_EXAMPLE_FILES, + INFORMATION_FILE, + API_SCHEMA_FILE, +) from ...pull.pull import ModelPuller from ....serve.services import PulledDockerImageService @@ -61,7 +67,6 @@ def copy_metadata(self, model_id): local_path=to_file, org=DOCKERHUB_ORG, img=model_id, - tag=DOCKERHUB_LATEST_TAG, ) @@ -77,20 +82,20 @@ def copy_status(self, model_id): ) def copy_example_if_available(self, model_id): - fr_file = "/root/eos/dest/{0}/model/framework/{1}".format( - model_id, PREDEFINED_EXAMPLE_FILENAME - ) - to_file = "{0}/dest/{1}/{2}".format(EOS, model_id, PREDEFINED_EXAMPLE_FILENAME) - try: - self.simple_docker.cp_from_image( - img_path=fr_file, - local_path=to_file, - org=DOCKERHUB_ORG, - img=model_id, - tag=DOCKERHUB_LATEST_TAG, - ) - except: - self.logger.debug("Could not find example file in docker image") + for pf in PREDEFINED_EXAMPLE_FILES: + fr_file = "/root/eos/dest/{0}/{1}".format(model_id, pf) + to_file = "{0}/dest/{1}/{2}".format(EOS, model_id, "input.csv") + try: + self.simple_docker.cp_from_image( + img_path=fr_file, + local_path=to_file, + org=DOCKERHUB_ORG, + img=model_id, + tag=DOCKERHUB_LATEST_TAG, + ) + return + except: + self.logger.debug("Could not find example file in docker image") @throw_ersilia_exception def fetch(self, model_id): diff --git a/ersilia/hub/fetch/pack/__init__.py b/ersilia/hub/fetch/pack/__init__.py index cc504faf9..e69de29bb 100644 --- a/ersilia/hub/fetch/pack/__init__.py +++ b/ersilia/hub/fetch/pack/__init__.py @@ -1,149 +0,0 @@ -import os -import shutil - -from .... import ErsiliaBase -from ...bundle.repo import DockerfileFile -from ...delete.delete import ModelBentoDeleter - -from .. import MODEL_INSTALL_COMMANDS_FILE -from ....default import BENTOML_PATH, H5_DATA_FILE, ISAURA_FILE_TAG, H5_EXTENSION - - -class _Deleter(ErsiliaBase): - def __init__(self, model_id, config_json): - ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) - self.model_id = model_id - self.config_json = config_json - self._delete_bentoml_if_exists() - - def _delete_bentoml_if_exists(self): - bentoml_path = os.path.join(BENTOML_PATH, "repository", self.model_id) - if os.path.exists(bentoml_path): - self.logger.debug( - "BentoML path exists! Removing it: {0}".format(bentoml_path) - ) - deleter = ModelBentoDeleter(config_json=self.config_json) - deleter.delete(model_id=self.model_id) - self.logger.debug("Trying to remove path: {0}".format(bentoml_path)) - try: - self.logger.debug("...successfully") - shutil.rmtree(bentoml_path) - except: - self.logger.debug("...but path did not exist!") - - -class _Symlinker(ErsiliaBase): - def __init__(self, model_id, config_json): - ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) - self.model_id = model_id - - def _dest_bundle_symlink(self): - # TODO: improve function so that it treats other files. - # at the moment it only deals with the model folder - self.logger.debug("Creating model symlink bundle artifacts > dest") - model_id = self.model_id - path = self._model_path(model_id) - model_path = os.path.join(path, "model") - if os.path.exists(model_path): - shutil.rmtree(model_path) - bundle_dir = self._get_bundle_location(model_id) - src = os.path.join(bundle_dir, model_id, "artifacts") - os.symlink(src, model_path, target_is_directory=True) - # env - env_path = os.path.join(path, "env") - if os.path.exists(env_path): - self.logger.debug("Creating env symlink dest <> bundle") - trg = os.path.join(bundle_dir, "env") - self.logger.debug(trg) - shutil.move(env_path, trg) - os.symlink(trg, env_path, target_is_directory=True) - # model_install_commands - model_install_commands_path = os.path.join(path, MODEL_INSTALL_COMMANDS_FILE) - if not os.path.exists(model_install_commands_path): - with open(model_install_commands_path, "w") as f: - pass - trg = os.path.join(bundle_dir, MODEL_INSTALL_COMMANDS_FILE) - self.logger.debug("Creating model_install_commands.sh symlink dest <> bundle") - shutil.move(model_install_commands_path, trg) - os.symlink(trg, model_install_commands_path) - - def _bentoml_bundle_symlink(self): - model_id = self.model_id - src = self._get_bentoml_location(model_id) - self.logger.debug("BentoML location is {0}".format(src)) - dst_ = os.path.join(self._bundles_dir, model_id) - os.makedirs(dst_, exist_ok=True) - dst = os.path.join(dst_, os.path.basename(src)) - shutil.move(src, dst) - self.logger.debug("Ersilia Bento location is {0}".format(dst)) - self.logger.debug("Building symlinks between {0} and {1}".format(dst, src)) - os.symlink(dst, src, target_is_directory=True) - - def _dest_lake_symlink(self): - src = os.path.join(self._model_path(self.model_id), H5_DATA_FILE) - dst = os.path.join( - self._lake_dir, - "{0}{1}{2}".format(self.model_id, ISAURA_FILE_TAG, H5_EXTENSION), - ) - if os.path.exists(src) and os.path.exists(os.path.dirname(dst)): - self.logger.debug("Symbolic link from {0}".format(src)) - self.logger.debug("Symbolic link to {0}".format(dst)) - os.symlink(src, dst, target_is_directory=False) - else: - self.logger.info( - "Could not create symbolic link from {0} to {1}".format(src, dst) - ) - - def _symlinks(self): - self._bentoml_bundle_symlink() - self._dest_bundle_symlink() - self._dest_lake_symlink() - - -class _Writer(ErsiliaBase): - def __init__(self, model_id, config_json): - ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) - self.model_id = model_id - - def _write_model_install_commands(self): - self.logger.debug("Writing install commands") - dockerfile = DockerfileFile(self._model_path(self.model_id)) - dis_warn = "--disable-pip-version-check" - version = dockerfile.get_bentoml_version() - runs = dockerfile.get_install_commands()["commands"] - self.logger.debug("Run commands: {0}".format(runs)) - fn = os.path.join(self._model_path(self.model_id), MODEL_INSTALL_COMMANDS_FILE) - self.logger.debug("Writing install commands in {0}".format(fn)) - with open(fn, "w") as f: - for r in runs: - if r[:3] == "pip": - is_pip3 = None - if r[:4] == "pip ": - r = "python -m pip " + r[4:] - is_pip3 = False - if r[:4] == "pip3": - is_pip3 = True - assert is_pip3 is not None - r = r.split(" ") - if is_pip3: - r = " ".join([r[0]] + [dis_warn] + r[1:]) - else: - r = " ".join(r[:3] + [dis_warn] + r[3:]) - f.write("{0}{1}".format(r, os.linesep)) - if version["version"] == "0.11.0": - cmd = "python -m pip {1} install git+https://github.com/ersilia-os/bentoml-ersilia.git{0}".format( - os.linesep, dis_warn - ) - else: - cmd = "python -m pip {2} install bentoml=={0}{1}".format( - version["version"], os.linesep, dis_warn - ) - f.write(cmd) - return fn - - -class BasePack(_Deleter, _Symlinker, _Writer): - def __init__(self, model_id, config_json): - _Deleter.__init__(self, model_id, config_json) - _Symlinker.__init__(self, model_id, config_json) - _Writer.__init__(self, model_id, config_json) diff --git a/ersilia/hub/fetch/pack/bentoml_pack/__init__.py b/ersilia/hub/fetch/pack/bentoml_pack/__init__.py new file mode 100644 index 000000000..0da838c26 --- /dev/null +++ b/ersilia/hub/fetch/pack/bentoml_pack/__init__.py @@ -0,0 +1,149 @@ +import os +import shutil + +from ..... import ErsiliaBase +from ....bundle.repo import DockerfileFile +from ....delete.delete import ModelBentoDeleter + +from ... import MODEL_INSTALL_COMMANDS_FILE +from .....default import BENTOML_PATH, H5_DATA_FILE, ISAURA_FILE_TAG, H5_EXTENSION + + +class _Deleter(ErsiliaBase): + def __init__(self, model_id, config_json): + ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) + self.model_id = model_id + self.config_json = config_json + self._delete_bentoml_if_exists() + + def _delete_bentoml_if_exists(self): + bentoml_path = os.path.join(BENTOML_PATH, "repository", self.model_id) + if os.path.exists(bentoml_path): + self.logger.debug( + "BentoML path exists! Removing it: {0}".format(bentoml_path) + ) + deleter = ModelBentoDeleter(config_json=self.config_json) + deleter.delete(model_id=self.model_id) + self.logger.debug("Trying to remove path: {0}".format(bentoml_path)) + try: + self.logger.debug("...successfully") + shutil.rmtree(bentoml_path) + except: + self.logger.debug("...but path did not exist!") + + +class _Symlinker(ErsiliaBase): + def __init__(self, model_id, config_json): + ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) + self.model_id = model_id + + def _dest_bundle_symlink(self): + # TODO: improve function so that it treats other files. + # at the moment it only deals with the model folder + self.logger.debug("Creating model symlink bundle artifacts > dest") + model_id = self.model_id + path = self._model_path(model_id) + model_path = os.path.join(path, "model") + if os.path.exists(model_path): + shutil.rmtree(model_path) + bundle_dir = self._get_bundle_location(model_id) + src = os.path.join(bundle_dir, model_id, "artifacts") + os.symlink(src, model_path, target_is_directory=True) + # env + env_path = os.path.join(path, "env") + if os.path.exists(env_path): + self.logger.debug("Creating env symlink dest <> bundle") + trg = os.path.join(bundle_dir, "env") + self.logger.debug(trg) + shutil.move(env_path, trg) + os.symlink(trg, env_path, target_is_directory=True) + # model_install_commands + model_install_commands_path = os.path.join(path, MODEL_INSTALL_COMMANDS_FILE) + if not os.path.exists(model_install_commands_path): + with open(model_install_commands_path, "w") as f: + pass + trg = os.path.join(bundle_dir, MODEL_INSTALL_COMMANDS_FILE) + self.logger.debug("Creating model_install_commands.sh symlink dest <> bundle") + shutil.move(model_install_commands_path, trg) + os.symlink(trg, model_install_commands_path) + + def _bentoml_bundle_symlink(self): + model_id = self.model_id + src = self._get_bentoml_location(model_id) + self.logger.debug("BentoML location is {0}".format(src)) + dst_ = os.path.join(self._bundles_dir, model_id) + os.makedirs(dst_, exist_ok=True) + dst = os.path.join(dst_, os.path.basename(src)) + shutil.move(src, dst) + self.logger.debug("Ersilia Bento location is {0}".format(dst)) + self.logger.debug("Building symlinks between {0} and {1}".format(dst, src)) + os.symlink(dst, src, target_is_directory=True) + + def _dest_lake_symlink(self): + src = os.path.join(self._model_path(self.model_id), H5_DATA_FILE) + dst = os.path.join( + self._lake_dir, + "{0}{1}{2}".format(self.model_id, ISAURA_FILE_TAG, H5_EXTENSION), + ) + if os.path.exists(src) and os.path.exists(os.path.dirname(dst)): + self.logger.debug("Symbolic link from {0}".format(src)) + self.logger.debug("Symbolic link to {0}".format(dst)) + os.symlink(src, dst, target_is_directory=False) + else: + self.logger.info( + "Could not create symbolic link from {0} to {1}".format(src, dst) + ) + + def _symlinks(self): + self._bentoml_bundle_symlink() + self._dest_bundle_symlink() + self._dest_lake_symlink() + + +class _Writer(ErsiliaBase): + def __init__(self, model_id, config_json): + ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) + self.model_id = model_id + + def _write_model_install_commands(self): + self.logger.debug("Writing install commands") + dockerfile = DockerfileFile(self._model_path(self.model_id)) + dis_warn = "--disable-pip-version-check" + version = dockerfile.get_bentoml_version() + runs = dockerfile.get_install_commands()["commands"] + self.logger.debug("Run commands: {0}".format(runs)) + fn = os.path.join(self._model_path(self.model_id), MODEL_INSTALL_COMMANDS_FILE) + self.logger.debug("Writing install commands in {0}".format(fn)) + with open(fn, "w") as f: + for r in runs: + if r[:3] == "pip": + is_pip3 = None + if r[:4] == "pip ": + r = "python -m pip " + r[4:] + is_pip3 = False + if r[:4] == "pip3": + is_pip3 = True + assert is_pip3 is not None + r = r.split(" ") + if is_pip3: + r = " ".join([r[0]] + [dis_warn] + r[1:]) + else: + r = " ".join(r[:3] + [dis_warn] + r[3:]) + f.write("{0}{1}".format(r, os.linesep)) + if version["version"] == "0.11.0": + cmd = "python -m pip {1} install git+https://github.com/ersilia-os/bentoml-ersilia.git{0}".format( + os.linesep, dis_warn + ) + else: + cmd = "python -m pip {2} install bentoml=={0}{1}".format( + version["version"], os.linesep, dis_warn + ) + f.write(cmd) + return fn + + +class BasePack(_Deleter, _Symlinker, _Writer): + def __init__(self, model_id, config_json): + _Deleter.__init__(self, model_id, config_json) + _Symlinker.__init__(self, model_id, config_json) + _Writer.__init__(self, model_id, config_json) diff --git a/ersilia/hub/fetch/pack/mode.py b/ersilia/hub/fetch/pack/bentoml_pack/mode.py similarity index 92% rename from ersilia/hub/fetch/pack/mode.py rename to ersilia/hub/fetch/pack/bentoml_pack/mode.py index c58117d51..94e52eec4 100644 --- a/ersilia/hub/fetch/pack/mode.py +++ b/ersilia/hub/fetch/pack/bentoml_pack/mode.py @@ -1,13 +1,13 @@ import os import json -from .... import ErsiliaBase -from ...bundle.repo import DockerfileFile -from ....utils.versioning import Versioner -from ....utils.system import SystemChecker -from ....setup.requirements.conda import CondaRequirement -from ....setup.requirements.docker import DockerRequirement -from ....default import MODEL_CONFIG_FILENAME +from ..... import ErsiliaBase +from ....bundle.repo import DockerfileFile +from .....utils.versioning import Versioner +from .....utils.system import SystemChecker +from .....setup.requirements.conda import CondaRequirement +from .....setup.requirements.docker import DockerRequirement +from .....default import MODEL_CONFIG_FILENAME AVAILABLE_MODES = ["system", "venv", "conda", "docker"] diff --git a/ersilia/hub/fetch/pack/runners.py b/ersilia/hub/fetch/pack/bentoml_pack/runners.py similarity index 93% rename from ersilia/hub/fetch/pack/runners.py rename to ersilia/hub/fetch/pack/bentoml_pack/runners.py index 2b544e1e5..d7418e1ce 100644 --- a/ersilia/hub/fetch/pack/runners.py +++ b/ersilia/hub/fetch/pack/bentoml_pack/runners.py @@ -7,18 +7,18 @@ bentoml = None from . import BasePack -from ....utils.terminal import run_command -from ....db.environments.localdb import EnvironmentDb -from ....db.environments.managers import DockerManager -from ....utils.venv import SimpleVenv -from ....utils.conda import SimpleConda -from ....utils.docker import SimpleDocker -from ....setup.baseconda import SetupBaseConda - -from ....default import DEFAULT_VENV -from .. import MODEL_INSTALL_COMMANDS_FILE -from .... import throw_ersilia_exception -from ....utils.exceptions_utils.fetch_exceptions import CondaEnvironmentExistsError +from .....utils.terminal import run_command +from .....db.environments.localdb import EnvironmentDb +from .....db.environments.managers import DockerManager +from .....utils.venv import SimpleVenv +from .....utils.conda import SimpleConda +from .....utils.docker import SimpleDocker +from .....setup.baseconda import SetupBaseConda + +from .....default import DEFAULT_VENV +from ... import MODEL_INSTALL_COMMANDS_FILE +from ..... import throw_ersilia_exception +from .....utils.exceptions_utils.fetch_exceptions import CondaEnvironmentExistsError USE_CHECKSUM = False diff --git a/ersilia/hub/fetch/pack/fastapi_pack/__init__.py b/ersilia/hub/fetch/pack/fastapi_pack/__init__.py new file mode 100644 index 000000000..cef67b430 --- /dev/null +++ b/ersilia/hub/fetch/pack/fastapi_pack/__init__.py @@ -0,0 +1,48 @@ +import os +import shutil + +from .....default import H5_DATA_FILE, ISAURA_FILE_TAG, H5_EXTENSION +from ..... import ErsiliaBase + + +class _Symlinker(ErsiliaBase): + def __init__(self, model_id, config_json): + ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) + self.model_id = model_id + + def _dest_bundle_symlink(self): + self.logger.debug("Creating model symlink bundle > dest") + model_id = self.model_id + path = self._model_path(model_id) + model_path = os.path.join(path, "model") + if os.path.exists(model_path): + shutil.rmtree(model_path) + bundle_dir = self._get_bundle_location(model_id) + src = os.path.join(bundle_dir, "model") + self.logger.debug("Creating symlink from {0}".format(src)) + self.logger.debug("Creating symlink to {0}".format(model_path)) + os.symlink(src, model_path, target_is_directory=True) + + def _dest_lake_symlink(self): + src = os.path.join(self._model_path(self.model_id), H5_DATA_FILE) + dst = os.path.join( + self._lake_dir, + "{0}{1}{2}".format(self.model_id, ISAURA_FILE_TAG, H5_EXTENSION), + ) + if os.path.exists(src) and os.path.exists(os.path.dirname(dst)): + self.logger.debug("Symbolic link from {0}".format(src)) + self.logger.debug("Symbolic link to {0}".format(dst)) + os.symlink(src, dst, target_is_directory=False) + else: + self.logger.info( + "Could not create symbolic link from {0} to {1}".format(src, dst) + ) + + def _symlinks(self): + self._dest_bundle_symlink() + self._dest_lake_symlink() + + +class BasePack(_Symlinker): + def __init__(self, model_id, config_json=None): + _Symlinker.__init__(self, model_id, config_json) diff --git a/ersilia/hub/fetch/pack/fastapi_pack/mode.py b/ersilia/hub/fetch/pack/fastapi_pack/mode.py new file mode 100644 index 000000000..112d7d9e1 --- /dev/null +++ b/ersilia/hub/fetch/pack/fastapi_pack/mode.py @@ -0,0 +1,53 @@ +import os +import json + +from .....default import MODEL_CONFIG_FILENAME +from .....utils.system import SystemChecker +from ..... import ErsiliaBase + + +AVAILABLE_MODES = ["system", "conda"] + + +class PackModeDecision(ErsiliaBase): + def __init__(self, model_id, config_json): + ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) + self.model_id = model_id + + def decide_from_config_file_if_available(self): + folder = self._model_path(self.model_id) + if not os.path.exists(os.path.join(folder, MODEL_CONFIG_FILENAME)): + return None + with open(os.path.join(folder, MODEL_CONFIG_FILENAME), "r") as f: + model_config = json.load(f) + if "default_mode" in model_config: + default_mode = model_config["default_mode"] + if default_mode not in AVAILABLE_MODES: + raise Exception( + "The model default_mode specified in the config.json file of the model repo is not correct. It should be one of {0}".format( + " ".join(AVAILABLE_MODES) + ) + ) + else: + return default_mode + return None + + def decide(self): + sc = SystemChecker() + if sc.is_github_action(): + self.logger.debug( + "Code is being run inside a GitHub Actions workflow. Use conda as a by-default mode." + ) + return "conda" + mode = self.decide_from_config_file_if_available() + if mode is not None: + self.logger.debug("Mode is already specified in the model repository") + self.logger.debug("Mode: {0}".format(mode)) + return mode + self.logger.debug( + "Check if model can be run with vanilla (system) code. This is a default option when inside a docker container." + ) + if sc.is_inside_docker(): + return "system" + else: + return "conda" diff --git a/ersilia/hub/fetch/pack/fastapi_pack/runners.py b/ersilia/hub/fetch/pack/fastapi_pack/runners.py new file mode 100644 index 000000000..afc71209b --- /dev/null +++ b/ersilia/hub/fetch/pack/fastapi_pack/runners.py @@ -0,0 +1,115 @@ +import os +import yaml + +from . import BasePack +from .....utils.terminal import run_command +from .....db.environments.localdb import EnvironmentDb +from .....utils.conda import SimpleConda +from .....utils.docker import SimpleDockerfileParser + +from ..... import throw_ersilia_exception +from .....utils.exceptions_utils.fetch_exceptions import CondaEnvironmentExistsError + +from ..... import EOS + + +class SystemPack(BasePack): + def __init__(self, model_id, config_json): + BasePack.__init__(self, model_id, config_json) + self.logger.debug("Initializing system packer") + + def _run(self): + self.logger.debug("Packing model with system installation") + dest_dir = self._model_path(self.model_id) + bundle_dir = os.path.join(EOS, "repository") + cmd = "ersilia_model_pack --repo_path {0} --bundles_repo_path {1}".format( + dest_dir, bundle_dir + ) + self.logger.debug("Running packing command {0}".format(cmd)) + run_command(cmd) + self._symlinks() + + def run(self): + self._run() + + +class CondaPack(BasePack): + def __init__(self, model_id, config_json): + BasePack.__init__(self, model_id, config_json) + self.conda = SimpleConda() + self.logger.debug("Initializing conda packer") + + def _decide_python_version(self): + install_yml_file = os.path.join(self._model_path(self.model_id), "install.yml") + if os.path.exists(install_yml_file): + self.logger.debug("Reading python version from install.yml") + with open(install_yml_file, "r") as f: + data = yaml.safe_load(f) + return data["python"] + dockerfile_file = os.path.join(self._model_path(self.model_id), "Dockerfile") + if os.path.exists(dockerfile_file): + self.logger.debug("Reading python version from Dockerfile") + pyver = ( + SimpleDockerfileParser(dockerfile_file).get_baseimage().split("-py")[-1] + ) + pyver = pyver[0] + "." + pyver[1:] + return pyver + self.logger.error("Could not find python version in install.yml or Dockerfile") + return None + + def _setup(self): + self.logger.debug("Setting up") + model_id = self.model_id + env = model_id + python_version = self._decide_python_version() + self.logger.debug("Conda environment {0}".format(env)) + if not self.conda.exists(env): + self.logger.debug("Environment {0} does not exist".format(env)) + self.conda.create(environment=env, python_version=python_version) + self.logger.debug("Creating base conda environment") + commandlines = [ + "python -m pip install git+https://github.com/ersilia-os/ersilia-pack.git" + ] + self.conda.run_commandlines(environment=env, commandlines=commandlines) + self.logger.debug( + "Storing Conda environment in the local environment database" + ) + db = EnvironmentDb(config_json=self.config_json) + db.table = "conda" + db.insert(model_id=model_id, env=env) + self.logger.debug("Done with the Conda setup") + else: + self.logger.debug("Environment {0} does exist".format(env)) + + if env is not None: + if not self.conda.exists(env): + raise CondaEnvironmentExistsError(env) + return env + + def _run(self): + env = self._setup() + self.logger.debug("Using environment {0}".format(env)) + dest_dir = self._model_path(self.model_id) + bundle_dir = os.path.join(EOS, "repository") + cmd = "ersilia_model_pack --repo_path {0} --bundles_repo_path {1} --conda_env_name {2}".format( + dest_dir, bundle_dir, env + ) + self.logger.debug("Running command: {0}".format(cmd)) + self.conda.run_commandlines(environment=env, commandlines=cmd) + self.logger.debug( + "Packing command successfully run inside {0} conda environment".format(env) + ) + self._symlinks() + self.logger.debug("Symlinks created") + + @throw_ersilia_exception + def run(self): + self.logger.debug("Packing model with Conda") + self._run() + + +def get_runner(pack_mode): + if pack_mode == "system": + return SystemPack + if pack_mode == "conda": + return CondaPack diff --git a/ersilia/hub/pull/pull.py b/ersilia/hub/pull/pull.py index fb7d52924..15ed82e11 100644 --- a/ersilia/hub/pull/pull.py +++ b/ersilia/hub/pull/pull.py @@ -7,7 +7,10 @@ from ... import ErsiliaBase from ...utils.terminal import yes_no_input, run_command from ... import throw_ersilia_exception -from ...utils.exceptions_utils.pull_exceptions import DockerImageNotAvailableError, DockerConventionalPullError +from ...utils.exceptions_utils.pull_exceptions import ( + DockerImageNotAvailableError, + DockerConventionalPullError, +) from ...utils.docker import SimpleDocker from ...default import DOCKERHUB_ORG, DOCKERHUB_LATEST_TAG @@ -120,7 +123,9 @@ def pull(self): pull_log = f.read() self.logger.debug(pull_log) if re.search(r"no match.*manifest", pull_log): - self.logger.warning("No matching manifest for image {0}".format(self.model_id)) + self.logger.warning( + "No matching manifest for image {0}".format(self.model_id) + ) raise DockerConventionalPullError(model=self.model_id) self.logger.debug("Image pulled succesfully!") except DockerConventionalPullError: diff --git a/ersilia/io/input.py b/ersilia/io/input.py index ff387600c..f50fc4378 100644 --- a/ersilia/io/input.py +++ b/ersilia/io/input.py @@ -16,7 +16,7 @@ from .readers.pyinput import PyInputReader from .readers.file import TabularFileReader, JsonFileReader -from ..default import PREDEFINED_EXAMPLE_FILENAME +from ..default import PREDEFINED_EXAMPLE_FILES class BaseIOGetter(ErsiliaBase): @@ -26,7 +26,11 @@ def __init__(self, config_json=None): def _read_input_from_card(self, model_id): self.logger.debug("Reading card from {0}".format(model_id)) - input_type = self.mc.get(model_id)["Input"] + # This is because ersilia-pack adds another level in the JSON with the key "card" + if "Input" not in self.mc.get(model_id): + input_type = self.mc.get(model_id)["card"]["Input"] + else: + input_type = self.mc.get(model_id)["Input"] if len(input_type) != 1: self.logger.error("Ersilia does not deal with multiple inputs yet..!") else: @@ -35,6 +39,7 @@ def _read_input_from_card(self, model_id): def _read_shape_from_card(self, model_id): self.logger.debug("Reading shape from {0}".format(model_id)) + try: input_shape = self.mc.get(model_id)["Input Shape"] except: @@ -253,12 +258,13 @@ def random_example(self, n_samples, file_name, simple): def predefined_example(self, file_name): dest_folder = self._model_path(self.model_id) - example_file = os.path.join(dest_folder, PREDEFINED_EXAMPLE_FILENAME) - if os.path.exists(example_file): - shutil.copy(example_file, file_name) - return True - else: - return False + for pf in PREDEFINED_EXAMPLE_FILES: + example_file = os.path.join(dest_folder, pf) + if os.path.exists(example_file): + shutil.copy(example_file, file_name) + return True + else: + return False def example(self, n_samples, file_name, simple, try_predefined): predefined_done = False diff --git a/ersilia/io/output.py b/ersilia/io/output.py index de729364e..3c9dde1f4 100644 --- a/ersilia/io/output.py +++ b/ersilia/io/output.py @@ -11,8 +11,9 @@ from ..serve.schema import ApiSchema from .. import ErsiliaBase from ..utils.hdf5 import Hdf5Data, Hdf5DataStacker -from ..default import FEATURE_MERGE_PATTERN from ..db.hubdata.interfaces import AirtableInterface +from ..default import FEATURE_MERGE_PATTERN, PACK_METHOD_FASTAPI +from ..utils.paths import resolve_pack_method class DataFrame(object): @@ -133,6 +134,10 @@ def __init__(self, model_id=None, config_json=None): ["array", "numeric_array", "string_array", "mixed_array"] ) self.model_id = model_id + self.was_fast_api = ( + resolve_pack_method(self._get_bundle_location(self.model_id)) + == PACK_METHOD_FASTAPI + ) @staticmethod def _is_string(output): @@ -322,7 +327,7 @@ def merge(self, subfiles, output_file): fo.write(l) use_header = False - def adapt(self, result, output, model_id=None, api_name=None): + def _adapt_generic(self, result, output, model_id=None, api_name=None): if model_id is not None and api_name is not None and self.api_schema is None: self.api_schema = ApiSchema(model_id=model_id, config_json=self.config_json) if self.api_schema is not None: @@ -331,7 +336,7 @@ def adapt(self, result, output, model_id=None, api_name=None): else: self.api_schema = None if output is not None and self._schema is None: - raise Exception + raise Exception("Schema not available") if self._has_extension(output, "json"): data = json.loads(result) with open(output, "w") as f: @@ -347,6 +352,63 @@ def adapt(self, result, output, model_id=None, api_name=None): df.write(output) return result + def _adapt_when_fastapi_was_used( + self, result, output, model_id=None, api_name=None + ): + if api_name != "run": + return None + if model_id is None: + return None + if output is None: + return None + if not self.was_fast_api: + return None + if self._has_extension(output, "csv"): + extension = "csv" + elif self._has_extension(output, "tsv"): + extension = "tsv" + elif self._has_extension(output, "h5"): + extension = "h5" + elif self._has_extension(output, "json"): + extension = "json" + else: + extension = None + delimiters = {"csv": ",", "tsv": "\t"} + if extension in ["csv", "tsv"]: + R = [] + for r in json.loads(result): + inp = r["input"] + out = r["output"] + vals = [out[k] for k in out.keys()] + R += [[inp["key"], inp["input"]] + vals] + header = ["key", "input"] + [k for k in out.keys()] + with open(output, "w") as f: + writer = csv.writer(f, delimiter=delimiters[extension]) + writer.writerow(header) + for r in R: + writer.writerow(r) + elif extension == "json": + data = json.loads(result) + with open(output, "w") as f: + json.dump(data, f, indent=4) + elif extension == "h5": + df = self._to_dataframe( + result, model_id + ) # TODO: we can potentially simplify this and get rid of the to_dataframe method for conversion to HDF5. + df.write(output) + else: + pass + return result + + def adapt(self, result, output, model_id=None, api_name=None): + adapted_result = self._adapt_when_fastapi_was_used( + result, output, model_id, api_name + ) + if adapted_result is None: + return self._adapt_generic(result, output, model_id, api_name) + else: + return adapted_result + class DictlistDataframeConverter(GenericOutputAdapter): def __init__(self, config_json): diff --git a/ersilia/publish/store.py b/ersilia/publish/store.py index 210b7fea1..0377170cb 100644 --- a/ersilia/publish/store.py +++ b/ersilia/publish/store.py @@ -3,6 +3,7 @@ This functionality is used when developing of a model is done. """ + import os import shutil from .. import ErsiliaBase diff --git a/ersilia/serve/autoservice.py b/ersilia/serve/autoservice.py index e9adbea58..1c96601b5 100644 --- a/ersilia/serve/autoservice.py +++ b/ersilia/serve/autoservice.py @@ -38,7 +38,7 @@ def __init__( url=None, ): ErsiliaBase.__init__(self, config_json=config_json) - self.logger.debug("Setting AutoService for {0}".format(model_id)) + self.logger.debug("Setting BentoML AutoService for {0}".format(model_id)) self.config_json = config_json self.model_id = model_id self._meta = None diff --git a/ersilia/serve/services.py b/ersilia/serve/services.py index 7a4ac452e..33f6c3d3b 100644 --- a/ersilia/serve/services.py +++ b/ersilia/serve/services.py @@ -9,6 +9,7 @@ from .. import ErsiliaBase, throw_ersilia_exception from ..utils.terminal import run_command from ..utils.ports import find_free_port +from ..utils.paths import resolve_pack_method from ..db.environments.localdb import EnvironmentDb from ..db.environments.managers import DockerManager from ..setup.requirements.docker import DockerRequirement @@ -20,6 +21,7 @@ from ..default import DOCKERHUB_ORG, DOCKERHUB_LATEST_TAG from ..default import IS_FETCHED_FROM_HOSTED_FILE from ..default import INFORMATION_FILE +from ..default import PACK_METHOD_BENTOML, PACK_METHOD_FASTAPI from ..utils.exceptions_utils.serve_exceptions import ( BadGatewayError, DockerNotActiveError, @@ -37,10 +39,26 @@ def __init__(self, model_id, config_json=None, preferred_port=None, url=None): self.bundle_tag = self._get_latest_bundle_tag(model_id=self.model_id) self.port = preferred_port + def _get_apis_from_apis_list(self): + self.logger.debug("Getting APIs from list file") + file_name = os.path.join( + self._get_bundle_location(self.model_id), APIS_LIST_FILE + ) + if not os.path.exists(file_name): + return None + with open(file_name, "r") as f: + apis_list = [] + for l in f: + apis_list += [l.rstrip()] + if len(apis_list) > 0: + return apis_list + else: + return None + def _get_info_from_bento(self): """Get info available from the Bento""" tmp_folder = tempfile.mkdtemp(prefix="ersilia-") - tmp_file = os.path.join(tmp_folder, "info.json") + tmp_file = os.path.join(tmp_folder, "information.json") cmd = "bentoml info --quiet {0}:{1} > {2}".format( self.model_id, self.bundle_tag, tmp_file ) @@ -62,27 +80,29 @@ def _get_apis_from_bento(self): apis_list += [item["name"]] return apis_list - def _get_apis_from_apis_list(self): - self.logger.debug("Getting APIs from list file") - file_name = os.path.join( - self._get_bundle_location(self.model_id), APIS_LIST_FILE - ) - if not os.path.exists(file_name): - return None - with open(file_name, "r") as f: - apis_list = [] - for l in f: - apis_list += [l.rstrip()] - print(apis_list) - if len(apis_list) > 0: - return apis_list - else: - return None + def _get_apis_from_fastapi(self): + bundle_path = self._model_path(self.model_id) + apis_list = [] + for fn in os.listdir(os.path.join(bundle_path, "model", "framework")): + if fn.endswith(".sh"): + api_name = fn.split(".")[0] + apis_list += [api_name] + return apis_list def _get_apis_from_where_available(self): apis_list = self._get_apis_from_apis_list() if apis_list is None: - apis_list = self._get_apis_from_bento() + pack_method = resolve_pack_method( + model_path=self._get_bundle_location(self.model_id) + ) + if pack_method == PACK_METHOD_FASTAPI: + self.logger.debug("Getting APIs from FastAPI") + apis_list = self._get_apis_from_fastapi() + elif pack_method == PACK_METHOD_BENTOML: + self.logger.debug("Getting APIs from BentoML") + apis_list = self._get_apis_from_bento() + else: + raise if apis_list is None: apis_list = [] for api in apis_list: @@ -108,7 +128,7 @@ def __init__(self, model_id, config_json=None, preferred_port=None): self.SEARCH_SUF_STRING = "Press CTRL+C to quit" self.ERROR_STRING = "error" - def _bentoml_serve(self, runcommand_func=None): + def serve(self, runcommand_func=None): self.logger.debug("Trying to serve model with BentoML locally") preferred_port = self.port self.port = find_free_port(preferred_port=preferred_port) @@ -194,16 +214,157 @@ def _bentoml_serve(self, runcommand_func=None): self.logger.debug("No URL found") self.url = None - def _close(self): + def close(self): + try: + os.kill(self.pid, 9) + except: + self.logger.info("PID {0} is unassigned".format(self.pid)) + + +class _FastApiService(BaseServing): + def __init__(self, model_id, config_json=None, preferred_port=None): + BaseServing.__init__( + self, + model_id=model_id, + config_json=config_json, + preferred_port=preferred_port, + ) + self.SEARCH_PRE_STRING = "Uvicorn running on " + self.SEARCH_SUF_STRING = "(Press CTRL+C to quit)" + self.ERROR_STRING = "error" + self.conda = SimpleConda() + + def serve(self, runcommand_func=None): + bundle_path = self._get_bundle_location(self.model_id) + self.logger.debug("Trying to serve model with FastAPI locally") + preferred_port = self.port + self.port = find_free_port(preferred_port=preferred_port) + if self.port != preferred_port: + self.logger.warning( + "Port {0} was already in use. Using {1} instead".format( + preferred_port, self.port + ) + ) + self.logger.debug("Free port: {0}".format(self.port)) + tmp_folder = tempfile.mkdtemp(prefix="ersilia-") + tmp_script = os.path.join(tmp_folder, "serve.sh") + tmp_file = os.path.join(tmp_folder, "serve.log") + tmp_pid = os.path.join(tmp_folder, "serve.pid") + sl = [ + "ersilia_model_serve --bundle_path {0} --port {1} &> {2} &".format( + bundle_path, self.port, tmp_file + ) + ] + sl += ["_pid=$!"] + sl += ['echo "$_pid" > {0}'.format(tmp_pid)] + self.logger.debug("Writing on {0}".format(tmp_script)) + self.conda.create_executable_bash_script( + environment=self.model_id, commandlines=sl, file_name=tmp_script + ) + cmd = "bash {0}".format(tmp_script) + if runcommand_func is None: + self.logger.debug("Run command function not available. Running from shell") + run_command(cmd) + else: + self.logger.debug("Run command function available") + runcommand_func(cmd) + with open(tmp_pid, "r") as f: + self.pid = int(f.read().strip()) + self.logger.debug("Process id: {0}".format(self.pid)) + _logged_file_done = False + _logged_server_done = False + for it in range(int(TIMEOUT_SECONDS / SLEEP_SECONDS)): + self.logger.debug("Trying to wake up. Iteration: {0}".format(it)) + self.logger.debug( + "Timeout: {0} Sleep time: {1}".format(TIMEOUT_SECONDS, SLEEP_SECONDS) + ) + if not os.path.exists(tmp_file): + if not _logged_file_done: + self.logger.debug("Waiting for file {0}".format(tmp_file)) + _logged_file_done = True + time.sleep(SLEEP_SECONDS) + continue + self.logger.debug("Temporary file available: {0}".format(tmp_file)) + # If error string is identified, finish + with open(tmp_file, "r") as f: + r = f.read() + if self.ERROR_STRING in r.lower(): + self.logger.warning("Error string found in: {0}".format(r)) + # TODO perhaps find a better error string. + # self.url = None + # return + self.logger.debug("No error strings found in temporary file") + # If everything looks good, wait until server is ready + with open(tmp_file, "r") as f: + r = f.read() + if self.SEARCH_PRE_STRING not in r or self.SEARCH_SUF_STRING not in r: + if not _logged_server_done: + self.logger.debug("Waiting for server") + else: + self.logger.debug("Server logging done") + time.sleep(SLEEP_SECONDS) + _logged_server_done = True + continue + self.logger.debug("Server is ready. Trying to get URL") + # When the search strings are found get url + with open(tmp_file, "r") as f: + for l in f: + if self.SEARCH_PRE_STRING in l: + self.url = ( + l.split(self.SEARCH_PRE_STRING)[1].split(" ")[0].rstrip() + ) + self.logger.debug("URL found: {0}".format(self.url)) + return + self.logger.debug("Search strings not found yet") + self.logger.debug("No URL found") + self.url = None + + def close(self): try: os.kill(self.pid, 9) except: self.logger.info("PID {0} is unassigned".format(self.pid)) -class SystemBundleService(_BentoMLService): +class _LocalService(ErsiliaBase): + def __init__(self, model_id, config_json=None, preferred_port=None, url=None): + self.model_id = model_id + ErsiliaBase.__init__(self, config_json=config_json) + pack_method = resolve_pack_method( + model_path=self._get_bundle_location(model_id) + ) + self.logger.debug("Pack method is: {0}".format(pack_method)) + if pack_method == PACK_METHOD_FASTAPI: + self.server = _FastApiService( + model_id, + config_json=config_json, + preferred_port=preferred_port, + ) + elif pack_method == PACK_METHOD_BENTOML: + self.server = _BentoMLService( + model_id, + config_json=config_json, + preferred_port=preferred_port, + ) + else: + raise Exception("Model is not a valid BentoML or FastAPI model") + + def _get_apis_from_where_available(self): + return self.server._get_apis_from_where_available() + + def local_serve(self, runcommand_func=None): + self.server.serve(runcommand_func=runcommand_func) + self.url = self.server.url + self.pid = self.server.pid + self.port = self.server.port + + def local_close(self): + self.server.close() + + +class SystemBundleService(_LocalService): def __init__(self, model_id, config_json=None, preferred_port=None, url=None): - _BentoMLService.__init__( + _LocalService.__init__( self, model_id=model_id, config_json=config_json, @@ -232,18 +393,18 @@ def is_available(self): return avail def serve(self): - self._bentoml_serve() + self.local_serve() def close(self): - self._close() + self.local_close() def api(self, api_name, input): return self._api_with_url(api_name, input) -class VenvEnvironmentService(_BentoMLService): +class VenvEnvironmentService(_LocalService): def __init__(self, model_id, config_json=None, preferred_port=None, url=None): - _BentoMLService.__init__( + _LocalService.__init__( self, model_id=model_id, config_json=config_json, @@ -269,18 +430,18 @@ def is_available(self): return False def serve(self): - self._bentoml_serve(self._run_command) + self.local_serve(self._run_command) def close(self): - self._close() + self.local_close() def api(self, api_name, input): return self._api_with_url(api_name, input) -class CondaEnvironmentService(_BentoMLService): +class CondaEnvironmentService(_LocalService): def __init__(self, model_id, config_json=None, preferred_port=None, url=None): - _BentoMLService.__init__( + _LocalService.__init__( self, model_id=model_id, config_json=config_json, @@ -331,10 +492,10 @@ def is_available(self): return False def serve(self): - self._bentoml_serve(self._run_command) + self.local_serve(self._run_command) def close(self): - self._close() + self.local_close() def api(self, api_name, input): return self._api_with_url(api_name, input) diff --git a/ersilia/setup/requirements/docker.py b/ersilia/setup/requirements/docker.py index ad84194c4..d0a8fc465 100644 --- a/ersilia/setup/requirements/docker.py +++ b/ersilia/setup/requirements/docker.py @@ -1,5 +1,6 @@ from ...utils.terminal import run_command_check_output -from ...utils.docker import is_inside_docker, resolve_platform +from ...utils.docker import resolve_platform +from ...utils.system import is_inside_docker class DockerRequirement(object): diff --git a/ersilia/setup/requirements/isaura.py b/ersilia/setup/requirements/isaura.py index ba2337835..77d79d0ae 100644 --- a/ersilia/setup/requirements/isaura.py +++ b/ersilia/setup/requirements/isaura.py @@ -1,6 +1,7 @@ import importlib from ...utils.terminal import run_command + # TODO class IsauraRequirement(object): def __init__(self): diff --git a/ersilia/setup/requirements/pyairtable.py b/ersilia/setup/requirements/pyairtable.py index cca83530c..2f2642a11 100644 --- a/ersilia/setup/requirements/pyairtable.py +++ b/ersilia/setup/requirements/pyairtable.py @@ -2,6 +2,7 @@ import sys from ...utils.terminal import run_command + class PyAirtableRequirement: def __init__(self): self.name = "pyairtable" @@ -14,5 +15,5 @@ def is_installed(self): return False def install(self): - version = '<2' if sys.version_info.minor == 7 else '<3' - run_command(f"python -m pip install 'pyairtable{version}'") \ No newline at end of file + version = "<2" if sys.version_info.minor == 7 else "<3" + run_command(f"python -m pip install 'pyairtable{version}'") diff --git a/ersilia/utils/conda.py b/ersilia/utils/conda.py index 8d9b95825..f658fdfae 100644 --- a/ersilia/utils/conda.py +++ b/ersilia/utils/conda.py @@ -253,6 +253,10 @@ def _env_list(self): envs += [l.rstrip()] return envs + def create(self, environment, python_version): + cmd = "conda create -n {0} python={1} -y".format(environment, python_version) + run_command(cmd) + def active_env(self): envs = self._env_list() for l in envs: @@ -368,11 +372,28 @@ def _catch_critical_errors_in_conda(self, log): critical_errors += [l] return critical_errors + def create_executable_bash_script(self, environment, commandlines, file_name): + if type(commandlines) is list: + commandlines = "\n".join(commandlines) + bash_script = self.activate_base() + bash_script += """ + source {0}/etc/profile.d/conda.sh + conda activate {1} + {2} + """.format( + self.conda_prefix(True), environment, commandlines + ) + with open(file_name, "w") as f: + f.write(bash_script) + return file_name + @throw_ersilia_exception def run_commandlines(self, environment, commandlines): """ Run commands in a given conda environment. """ + if type(commandlines) is list: + commandlines = " && ".join(commandlines) logger.debug("Run commandlines on {0}".format(environment)) logger.debug(commandlines) if not self.exists(environment): @@ -381,18 +402,7 @@ def run_commandlines(self, environment, commandlines): tmp_script = os.path.join(tmp_folder, "script.sh") logger.debug("Activating base environment") logger.debug("Current working directory: {0}".format(os.getcwd())) - bash_script = self.activate_base() - bash_script += """ - source {0}/etc/profile.d/conda.sh - conda activate {1} - conda env list - {2} - """.format( - self.conda_prefix(True), environment, commandlines - ) - with open(tmp_script, "w") as f: - f.write(bash_script) - + self.create_executable_bash_script(environment, commandlines, tmp_script) tmp_folder = tempfile.mkdtemp(prefix="ersilia-") tmp_log = os.path.join(tmp_folder, "command_outputs.log") cmd = "bash {0} 2>&1 | tee -a {1}".format(tmp_script, tmp_log) diff --git a/ersilia/utils/config.py b/ersilia/utils/config.py index cee288ae8..791afc46a 100644 --- a/ersilia/utils/config.py +++ b/ersilia/utils/config.py @@ -2,6 +2,7 @@ The Config provide access to all sort of useful parameters. """ + import os import json from ..default import ( @@ -206,19 +207,6 @@ def to_credentials(self, json_file): for k, v in sj.items(): secrets[k] = "'{0}'".format(v) cred["SECRETS"] = secrets - # Local paths - from .paths import Paths - - pt = Paths() - local = {} - # .. development models path - dev_mod_path = pt.models_development_path() - if dev_mod_path is None: - v = "None" - else: - v = "'{0}'".format(dev_mod_path) - local["DEVEL_MODELS_PATH"] = v - cred["LOCAL"] = local with open(json_file, "w") as f: json.dump(cred, f, indent=4, sort_keys=True) return True diff --git a/ersilia/utils/csvfile.py b/ersilia/utils/csvfile.py index ed3b53dc0..4ba839d0b 100644 --- a/ersilia/utils/csvfile.py +++ b/ersilia/utils/csvfile.py @@ -23,11 +23,9 @@ def load(self, csv_file): self.keys += [r[0]] self.inputs += [r[1]] self.values += [r[-len(self.features) :]] - - - + def _read_csv_tsv(self, file_path, delimiter): - with open(file_path, mode='r') as file: + with open(file_path, mode="r") as file: reader = csv.DictReader(file, delimiter=delimiter) data = [row for row in reader] return data @@ -36,7 +34,6 @@ def _read_json(self, file_path): with open(file_path, mode="r") as file: return json.load(file) - def read(self, file_path): """ Reads a file and returns the data as a list of dictionaries. @@ -44,12 +41,12 @@ def read(self, file_path): :param file_path: Path to the CSV file. :return: A list of dictionaries containing the CSV data. """ - + file_extension = os.path.splitext(file_path)[1].lower() - if file_extension == '.json': + if file_extension == ".json": return self._read_json(file_path) - elif file_extension in ['.csv', '.tsv']: - delimiter = '\t' if file_extension == '.tsv' else ',' + elif file_extension in [".csv", ".tsv"]: + delimiter = "\t" if file_extension == ".tsv" else "," return self._read_csv_tsv(file_path, delimiter) else: raise ValueError("Unsupported file format") diff --git a/ersilia/utils/docker.py b/ersilia/utils/docker.py index 020ce5160..bd3b56c46 100644 --- a/ersilia/utils/docker.py +++ b/ersilia/utils/docker.py @@ -12,13 +12,6 @@ from ..utils.system import SystemChecker -def is_inside_docker(): - if os.path.isfile("/.dockerenv"): - return True - else: - return False - - def resolve_platform(): if SystemChecker().is_arm64(): return "linux/arm64" @@ -286,7 +279,7 @@ def container_memory(self): return ( f"Total memory consumed by container '{container.name}': {mem_usage:.2f}MiB", ) - return + return def container_cpu(self): """ diff --git a/ersilia/utils/dvc.py b/ersilia/utils/dvc.py index a9faf30e5..599bd5116 100644 --- a/ersilia/utils/dvc.py +++ b/ersilia/utils/dvc.py @@ -17,9 +17,9 @@ def set_secrets_file(): secrets = Secrets() if not os.path.exists(secrets.gdrive_client_secrets_json): secrets.fetch_gdrive_secrets_from_github() - GoogleAuth.DEFAULT_SETTINGS[ - "client_config_file" - ] = secrets.gdrive_client_secrets_json + GoogleAuth.DEFAULT_SETTINGS["client_config_file"] = ( + secrets.gdrive_client_secrets_json + ) return GoogleAuth diff --git a/ersilia/utils/environment.py b/ersilia/utils/environment.py index b2a1e2917..f4ce8e7f5 100644 --- a/ersilia/utils/environment.py +++ b/ersilia/utils/environment.py @@ -1,6 +1,7 @@ """ Utility functions to get information about the working environment. """ + import pkg_resources diff --git a/ersilia/utils/exceptions_utils/fetch_exceptions.py b/ersilia/utils/exceptions_utils/fetch_exceptions.py index 6251647f8..82f338e30 100644 --- a/ersilia/utils/exceptions_utils/fetch_exceptions.py +++ b/ersilia/utils/exceptions_utils/fetch_exceptions.py @@ -1,3 +1,4 @@ +from typing import Any from .exceptions import ErsiliaError @@ -160,3 +161,30 @@ def _get_message(self): def _get_hints(self): text = "Make sure that Docker is running on your computer. We recommend to use Docker Desktop." return text + + +class NotInstallableError(ErsiliaError): + def __init__(self, model_id, packing_strategy): + self.packing_strategy = packing_strategy + self.model_id = model_id + self.message = self._get_message() + self.hints = self._get_hints() + ErsiliaError.__init__(self, self.message, self.hints) + + def _get_message(self): + text = f"Model {self.model_id} is not installable with {self.packing_strategy}" + return text + + def _get_hints(self): + text = f"This model is not compatible with {self.packing_strategy}. Please check the model structure or reach out to Ersilia directly to report this error." + return text + + +class NotInstallableWithFastAPI(NotInstallableError): + def __init__(self, model_id): + super.__init__(model_id, "FastAPI") + + +class NotInstallableWithBentoML(NotInstallableError): + def __init__(self, model_id): + super.__init__(model_id, "BentoML") diff --git a/ersilia/utils/exceptions_utils/pull_exceptions.py b/ersilia/utils/exceptions_utils/pull_exceptions.py index 358cbd21a..bfab68e3e 100644 --- a/ersilia/utils/exceptions_utils/pull_exceptions.py +++ b/ersilia/utils/exceptions_utils/pull_exceptions.py @@ -20,7 +20,8 @@ def __init__(self, model): self.hints = "If you are using an Apple M1/M2 chip, it is possible that this model is not supported for your architecture, unfortunately.\nOne possible alternative is to use GitHub Codespaces to run Ersilia on the cloud, and fetch the model from there. If you absolutely want this model to run on a Mac, please reach out to us and we will try to help." ErsiliaError.__init__(self, self.message, self.hints) + class DockerConventionalPullError(ErsiliaError): def __init__(self, model): self.message = "Conventional pull did not work for model {0}".format(model) - ErsiliaError.__init__(self, self.message) \ No newline at end of file + ErsiliaError.__init__(self, self.message) diff --git a/ersilia/utils/paths.py b/ersilia/utils/paths.py index dcbe6ccf7..75ed364be 100644 --- a/ersilia/utils/paths.py +++ b/ersilia/utils/paths.py @@ -2,6 +2,8 @@ import os import collections from pathlib import Path +from ersilia import logger +from ..default import PACK_METHOD_BENTOML, PACK_METHOD_FASTAPI MODELS_DEVEL_DIRNAME = "models" @@ -47,24 +49,6 @@ def ersilia_development_path(self): return None return path - def models_development_path(self): - """Try to guess the models path in the local computer. - The directory with more 'eos0xxx' subdirectories will be returned.""" - org_dev_path = self.org_development_path() - if org_dev_path is None: - return - regex = self._eos_regex() - cands = collections.defaultdict(int) - for dirpath, dirnames, filenames in os.walk(org_dev_path): - ap = os.path.abspath(dirpath) - bn = os.path.basename(ap) - if bn == MODELS_DEVEL_DIRNAME: - for dn in dirnames: - if regex.match(dn): - cands[ap] += 1 - path = sorted(cands.items(), key=lambda item: -item[1])[0][0] - return path - @staticmethod def exists(path): if path is None: @@ -73,3 +57,12 @@ def exists(path): return True else: return False + + +def resolve_pack_method(model_path): + if os.path.exists(os.path.join(model_path, "installs", "install.sh")): + return PACK_METHOD_FASTAPI + elif os.path.exists(os.path.join(model_path, "bentoml.yml")): + return PACK_METHOD_BENTOML + logger.warning("Could not resolve pack method") + return None diff --git a/ersilia/utils/system.py b/ersilia/utils/system.py index bed9a09ba..d006a7b64 100644 --- a/ersilia/utils/system.py +++ b/ersilia/utils/system.py @@ -2,6 +2,13 @@ import os +def is_inside_docker(): + if os.path.isfile("/.dockerenv"): + return True + else: + return False + + class SystemChecker(object): def __init__(self): self.uname = platform.uname() @@ -18,3 +25,6 @@ def is_github_action(self): return True else: return False + + def is_inside_docker(self): + return is_inside_docker() diff --git a/test/test_models.py b/test/test_models.py index 037121605..2fc0ea842 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -1,3 +1,4 @@ +import os from ersilia.hub.fetch.fetch import ModelFetcher from ersilia import ErsiliaModel @@ -20,7 +21,9 @@ def test_model_1(): def test_model_2(): MODEL_ID = MODELS[1] INPUT = "CCCC" - ModelFetcher().fetch(MODEL_ID) + ModelFetcher(repo_path=os.path.join(os.getcwd(), "test/models", MODEL_ID)).fetch( + MODEL_ID + ) em = ErsiliaModel(MODEL_ID) em.serve() em.predict(INPUT)