diff --git a/mlmodel-cicd/ml_model.yaml b/mlmodel-cicd/ml_model.yaml index 7f18f83..ab55623 100644 --- a/mlmodel-cicd/ml_model.yaml +++ b/mlmodel-cicd/ml_model.yaml @@ -7,31 +7,53 @@ description: | algorithm: XGBoost target: expected_time - +tags: + - tagFQN : PII.None + description: Tag None + labelType: Manual + state: Confirmed + source: Classification + - tagFQN: PersonalData.Personal + description: Personal Data Tag + labelType: Manual + state: Confirmed + source: Classification + - tagFQN: SmartMed.Average + description: Smartmed Tag + labelType: Manual + state: Confirmed + source: Classification mlFeatures: - name: last_update_bucket dataType: categorical featureAlgorithm: Bucketing + tags: + - tagFQN: PII.None + description: Tag None + labelType: Manual + state: Confirmed + source: Classification + featureSources: - - name: orders + - name: last_update dataType: timestamp - dataSourceFqn: demo_pg.postgres.public.orders + dataSourceFqn: orders.postgres.public.orders - name: restaurant_dimension dataType: numerical featureAlgorithm: PCA featureSources: - name: rating dataType: integer - dataSourceFqn: demo_pg.postgres.public.restaurants + dataSourceFqn: orders.postgres.public.restaurants - name: average_price dataType: integer - dataSourceFqn: demo_pg.postgres.public.restaurants + dataSourceFqn: orders.postgres.public.restaurants - name: is_premium dataType: categorical featureSources: - name: premium dataType: boolean - dataSourceFqn: demo_pg.postgres.public.users + dataSourceFqn: orders.postgres.public.users mlHyperParameters: - name: regularisation @@ -41,3 +63,4 @@ mlHyperParameters: mlStore: storage: s3://path-to-pickle imageRepository: https://docker.hub.com/image + diff --git a/mlmodel-cicd/mlmodel_cicd.py b/mlmodel-cicd/mlmodel_cicd.py index 75511ac..3407b6c 100644 --- a/mlmodel-cicd/mlmodel_cicd.py +++ b/mlmodel-cicd/mlmodel_cicd.py @@ -36,11 +36,12 @@ from metadata.generated.schema.security.client.openMetadataJWTClientConfig import ( OpenMetadataJWTClientConfig, ) +from metadata.generated.schema.type.tagLabel import TagLabel from metadata.ingestion.ometa.ometa_api import OpenMetadata OM_HOST_PORT = "http://localhost:8585/api" -OM_JWT_TOKEN = "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" -ML_MODEL_FILE = "ml_model.yaml" +OM_JWT_TOKEN = "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJvcGVuLW1ldGFkYXRhLm9yZyIsInN1YiI6ImluZ2VzdGlvbi1ib3QiLCJlbWFpbCI6ImluZ2VzdGlvbi1ib3RAb3Blbm1ldGFkYXRhLm9yZyIsImlzQm90Ijp0cnVlLCJ0b2tlblR5cGUiOiJCT1QiLCJpYXQiOjE2ODM4MDIyNjUsImV4cCI6bnVsbH0.abHiU2KnFnrtF7QrnygHohPjeu6FGmtyBCLnTlFnf_p-7-Xq8NNz6aDWoFRJf13oAE6z8Tg_CwWmOEbTGqfG4GvWyNYCrxrGlS6RO9Td5QmVxDX6YZUdP4rn2RdvYzOdjQfAzeBTTsPe3VKEFGJB9HpEBy_Uux6QN1K7CQKewm-DrDejqwrkfG5lozPUuFxSx17BE60wJ7Z3TzhK8qAI94touRRodsGHwhFi48vL0fLcTu_tEG_cmJyy12Bzmv25pqHsFombb37lhiHwi-4mJMt_tV7dVCf-_YU13GDFC8pRTxHsi_Srf6hC1Pt4ZY7wA0p88kCRiPbjxLxqR-vgMA" +ML_MODEL_FILE = "ml_model_tags.yaml" def read_ml_metadata(path: Path) -> dict: @@ -127,19 +128,48 @@ def update_openmetadata(raw_meta: dict) -> None: if not metadata.health_check(): raise RuntimeError("Error connecting to OpenMetadata") + + + + #create tags getting info form yaml file + createTagClassificationRaw(metadata, raw_meta) + + + print("Connected to OpenMetadata ✅") time.sleep(1) - + print (raw_meta) create_ml_model = CreateMlModelRequest( + name=raw_meta["name"], description=raw_meta["description"], algorithm=raw_meta["algorithm"], target=raw_meta["target"], + tags=[ + TagLabel( + tagFQN=ml_tag["tagFQN"], + description=ml_tag["description"], + labelType=ml_tag["labelType"], + state=ml_tag["state"], + source=ml_tag["source"] + ) + for ml_tag in raw_meta.get("tags") or [] + ], mlFeatures=[ MlFeature( name=ml_feature["name"], dataType=ml_feature["dataType"], featureAlgorithm=ml_feature.get("featureAlgorithm"), + tags=[ + TagLabel( + tagFQN=ml_tagFeature["tagFQN"], + description=ml_tagFeature["description"], + labelType=ml_tagFeature["labelType"], + state=ml_tagFeature["state"], + source=ml_tagFeature["source"] + ) + for ml_tagFeature in ml_feature.get("tags") or [] + ], featureSources=[ FeatureSource( name=feature_source["name"], @@ -200,6 +230,45 @@ def run_workflow() -> None: update_openmetadata(raw_meta) +def createTagClassificationRaw(metadata, raw_meta) -> None: + for ml_tag in raw_meta.get("tags"): + + #print( ml_tag["tagFQN"]) + class_tag=ml_tag["tagFQN"].split(".") + classificationName=class_tag[0] + tagName=class_tag[1] + createTagClassification(metadata,classificationName,tagName) + + + + + + + +def createTagClassification(metadata, classification_name, tag_name) -> None: + from metadata.generated.schema.api.classification.createClassification import ( + CreateClassificationRequest, + ) + from metadata.generated.schema.api.classification.createTag import CreateTagRequest + + classification_request = CreateClassificationRequest( + name=classification_name, + description="Sample classification.", + ) + + metadata.create_or_update(classification_request) + + tag_request = CreateTagRequest( + classification=classification_request.name, + name=tag_name, + description="Sample Tag.", + ) + + metadata.create_or_update(tag_request) + + + if __name__ == "__main__": run_workflow() +