Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tags at ML Model level and at feature level #22

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 29 additions & 6 deletions mlmodel-cicd/ml_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,53 @@ description: |

algorithm: XGBoost
target: expected_time

tags:
- tagFQN : PII.None
description: Tag None
labelType: Manual
state: Confirmed
source: Classification
- tagFQN: PersonalData.Personal
description: Personal Data Tag
labelType: Manual
state: Confirmed
source: Classification
- tagFQN: SmartMed.Average
description: Smartmed Tag
labelType: Manual
state: Confirmed
source: Classification
mlFeatures:
- name: last_update_bucket
dataType: categorical
featureAlgorithm: Bucketing
tags:
- tagFQN: PII.None
description: Tag None
labelType: Manual
state: Confirmed
source: Classification

featureSources:
- name: orders
- name: last_update
dataType: timestamp
dataSourceFqn: demo_pg.postgres.public.orders
dataSourceFqn: orders.postgres.public.orders
- name: restaurant_dimension
dataType: numerical
featureAlgorithm: PCA
featureSources:
- name: rating
dataType: integer
dataSourceFqn: demo_pg.postgres.public.restaurants
dataSourceFqn: orders.postgres.public.restaurants
- name: average_price
dataType: integer
dataSourceFqn: demo_pg.postgres.public.restaurants
dataSourceFqn: orders.postgres.public.restaurants
- name: is_premium
dataType: categorical
featureSources:
- name: premium
dataType: boolean
dataSourceFqn: demo_pg.postgres.public.users
dataSourceFqn: orders.postgres.public.users

mlHyperParameters:
- name: regularisation
Expand All @@ -41,3 +63,4 @@ mlHyperParameters:
mlStore:
storage: s3://path-to-pickle
imageRepository: https://docker.hub.com/image

75 changes: 72 additions & 3 deletions mlmodel-cicd/mlmodel_cicd.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,12 @@
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
OpenMetadataJWTClientConfig,
)
from metadata.generated.schema.type.tagLabel import TagLabel
from metadata.ingestion.ometa.ometa_api import OpenMetadata

OM_HOST_PORT = "http://localhost:8585/api"
OM_JWT_TOKEN = "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
ML_MODEL_FILE = "ml_model.yaml"
OM_JWT_TOKEN = "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJvcGVuLW1ldGFkYXRhLm9yZyIsInN1YiI6ImluZ2VzdGlvbi1ib3QiLCJlbWFpbCI6ImluZ2VzdGlvbi1ib3RAb3Blbm1ldGFkYXRhLm9yZyIsImlzQm90Ijp0cnVlLCJ0b2tlblR5cGUiOiJCT1QiLCJpYXQiOjE2ODM4MDIyNjUsImV4cCI6bnVsbH0.abHiU2KnFnrtF7QrnygHohPjeu6FGmtyBCLnTlFnf_p-7-Xq8NNz6aDWoFRJf13oAE6z8Tg_CwWmOEbTGqfG4GvWyNYCrxrGlS6RO9Td5QmVxDX6YZUdP4rn2RdvYzOdjQfAzeBTTsPe3VKEFGJB9HpEBy_Uux6QN1K7CQKewm-DrDejqwrkfG5lozPUuFxSx17BE60wJ7Z3TzhK8qAI94touRRodsGHwhFi48vL0fLcTu_tEG_cmJyy12Bzmv25pqHsFombb37lhiHwi-4mJMt_tV7dVCf-_YU13GDFC8pRTxHsi_Srf6hC1Pt4ZY7wA0p88kCRiPbjxLxqR-vgMA"
ML_MODEL_FILE = "ml_model_tags.yaml"


def read_ml_metadata(path: Path) -> dict:
Expand Down Expand Up @@ -127,19 +128,48 @@ def update_openmetadata(raw_meta: dict) -> None:
if not metadata.health_check():
raise RuntimeError("Error connecting to OpenMetadata")




#create tags getting info form yaml file
createTagClassificationRaw(metadata, raw_meta)



print("Connected to OpenMetadata ✅")
time.sleep(1)

print (raw_meta)
create_ml_model = CreateMlModelRequest(

name=raw_meta["name"],
description=raw_meta["description"],
algorithm=raw_meta["algorithm"],
target=raw_meta["target"],
tags=[
TagLabel(
tagFQN=ml_tag["tagFQN"],
description=ml_tag["description"],
labelType=ml_tag["labelType"],
state=ml_tag["state"],
source=ml_tag["source"]
)
for ml_tag in raw_meta.get("tags") or []
],
mlFeatures=[
MlFeature(
name=ml_feature["name"],
dataType=ml_feature["dataType"],
featureAlgorithm=ml_feature.get("featureAlgorithm"),
tags=[
TagLabel(
tagFQN=ml_tagFeature["tagFQN"],
description=ml_tagFeature["description"],
labelType=ml_tagFeature["labelType"],
state=ml_tagFeature["state"],
source=ml_tagFeature["source"]
)
for ml_tagFeature in ml_feature.get("tags") or []
],
featureSources=[
FeatureSource(
name=feature_source["name"],
Expand Down Expand Up @@ -200,6 +230,45 @@ def run_workflow() -> None:
update_openmetadata(raw_meta)


def createTagClassificationRaw(metadata, raw_meta) -> None:
for ml_tag in raw_meta.get("tags"):

#print( ml_tag["tagFQN"])
class_tag=ml_tag["tagFQN"].split(".")
classificationName=class_tag[0]
tagName=class_tag[1]
createTagClassification(metadata,classificationName,tagName)







def createTagClassification(metadata, classification_name, tag_name) -> None:
from metadata.generated.schema.api.classification.createClassification import (
CreateClassificationRequest,
)
from metadata.generated.schema.api.classification.createTag import CreateTagRequest

classification_request = CreateClassificationRequest(
name=classification_name,
description="Sample classification.",
)

metadata.create_or_update(classification_request)

tag_request = CreateTagRequest(
classification=classification_request.name,
name=tag_name,
description="Sample Tag.",
)

metadata.create_or_update(tag_request)



if __name__ == "__main__":

run_workflow()