Skip to content

Commit

Permalink
full working pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
Mohamed-Mejri committed Jan 23, 2023
1 parent bc773bb commit fc9e368
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 24 deletions.
4 changes: 3 additions & 1 deletion check_data/conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ channels:
- conda-forge
- defaults
dependencies:
- python=3.8
- numpy=1.20
- pandas=1.2.3
- pip=20.3.3
- pytest=6.2.2
- scipy=1.6.1
- pip:
- wandb==0.10.21
- wandb==0.13.4
6 changes: 4 additions & 2 deletions conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ channels:
- conda-forge
- defaults
dependencies:
- python=3.8
- numpy=1.20
- requests=2.24.0
- pip=20.3.3
- mlflow=1.14.1
- mlflow=2.0.1
- hydra-core=1.0.6
- pip:
- wandb==0.10.21
- wandb==0.13.4
- hydra-joblib-launcher==1.1.2
4 changes: 3 additions & 1 deletion download/conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ channels:
- conda-forge
- defaults
dependencies:
- python=3.8
- numpy=1.20
- requests=2.24.0
- pip=20.3.3
- pip:
- wandb==0.10.21
- wandb==0.13.4
6 changes: 4 additions & 2 deletions evaluate/conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ channels:
- conda-forge
- defaults
dependencies:
- python=3.8
- numpy=1.20
- pandas=1.2.3
- pip=20.3.3
- scikit-learn=0.24.1
- matplotlib==3.2.2
- plotly==4.4.1
- pillow=8.1.2
- mlflow=1.14.1
- mlflow=2.0.1
- pip:
- wandb==0.10.21
- wandb==0.13.4
72 changes: 58 additions & 14 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@ def go(config: DictConfig):
# This was passed on the command line as a comma-separated list of steps
steps_to_execute = config["main"]["execute_steps"].split(",")
else:
assert isinstance(config["main"]["execute_steps"], list)
steps_to_execute = config["main"]["execute_steps"]
print("###########")
# print(type(config["main"]["execute_steps"]))
# assert isinstance(config["main"]["execute_steps"], list)
# steps_to_execute = config["main"]["execute_steps"]
steps_to_execute = list(config["main"]["execute_steps"])
print(type(steps_to_execute))

# Download step
if "download" in steps_to_execute:
Expand All @@ -39,18 +43,43 @@ def go(config: DictConfig):

if "preprocess" in steps_to_execute:

## YOUR CODE HERE: call the preprocess step
pass
_ = mlflow.run(
os.path.join(root_path, "preprocess"),
"main",
parameters={
"input_artifact": "raw_data.parquet:latest",
"artifact_name": "preprocessed_data.csv",
"artifact_type": "preprocessed_data",
"artifact_description": "Data preprocessed"
},
)

if "check_data" in steps_to_execute:

## YOUR CODE HERE: call the check_data step
pass
_ = mlflow.run(
os.path.join(root_path, "check_data"),
"main",
parameters={
"reference_artifact": config["data"]["reference_dataset"],
"sample_artifact": "preprocessed_data.csv:latest",
"ks_alpha": config["data"]["ks_alpha"]
},
)

if "segregate" in steps_to_execute:

## YOUR CODE HERE: call the segregate step
pass
_ = mlflow.run(
os.path.join(root_path, "segregate"),
"main",
parameters={
"input_artifact": "preprocessed_data.csv:latest",
"artifact_root": "data",
"artifact_type": "segregated_data",
"test_size": config["data"]["test_size"],
"random_state": config["main"]["random_seed"],
"stratify": config["data"]["stratify"]
},
)

if "random_forest" in steps_to_execute:

Expand All @@ -60,14 +89,29 @@ def go(config: DictConfig):
with open(model_config, "w+") as fp:
fp.write(OmegaConf.to_yaml(config["random_forest_pipeline"]))

## YOUR CODE HERE: call the random_forest step
pass
_ = mlflow.run(
os.path.join(root_path, "random_forest"),
"main",
parameters={
"train_data": "data_train.csv:latest",
"model_config": model_config,
"export_artifact": config["random_forest_pipeline"]["export_artifact"],
"random_seed": config['random_forest_pipeline']['random_forest']['random_state'],
"val_size": config["data"]["test_size"],
"stratify": config["data"]["stratify"]
}
)

if "evaluate" in steps_to_execute:

## YOUR CODE HERE: call the evaluate step
pass

_ = mlflow.run(
os.path.join(root_path, "evaluate"),
"main",
parameters={
"model_export": f"{config['random_forest_pipeline']['export_artifact']}:latest",
"test_data": "data_test.csv:latest"
}
)


if __name__ == "__main__":
go()
4 changes: 3 additions & 1 deletion preprocess/conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ channels:
- conda-forge
- defaults
dependencies:
- python=3.8
- numpy=1.20
- pandas=1.2.3
- pip=20.3.3
- pyarrow=2.0
- pip:
- wandb==0.10.21
- wandb==0.13.4
6 changes: 4 additions & 2 deletions random_forest/conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@ channels:
- conda-forge
- defaults
dependencies:
- python=3.8
- numpy=1.20
- pandas=1.2.3
- pip=20.3.3
- scikit-learn=0.24.1
- matplotlib==3.2.2
- plotly==4.4.1
- pillow=8.1.2
- mlflow=1.14.1
- mlflow=2.0.1
- pip:
- wandb==0.10.21
- wandb==0.13.4
- omegaconf==2.0.6
4 changes: 3 additions & 1 deletion segregate/conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ channels:
- conda-forge
- defaults
dependencies:
- python=3.8
- numpy=1.20
- pandas=1.2.3
- pip=20.3.3
- scikit-learn=0.24.1
- pip:
- wandb==0.10.21
- wandb==0.13.4

0 comments on commit fc9e368

Please sign in to comment.