Skip to content

Commit

Permalink
Merge pull request #150 from george0st/change
Browse files Browse the repository at this point in the history
Remove null values in parquet
  • Loading branch information
george0st authored Apr 16, 2024
2 parents 1ae4029 + 5506269 commit 55fd0a9
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 14 deletions.
2 changes: 1 addition & 1 deletion 01-model/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"description": "The machine learning meta-model with synthetic data (useful for MLOps/feature store), part of the quality gate concept.",
"kind": "model",
"spec": {
"version": "0.2.2",
"version": "0.2.3",
"CSV_SEPARATOR": ",",
"CSV_DECIMAL": "."
}
Expand Down
3 changes: 1 addition & 2 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ click~=8.1

numpy~=1.26
pandas==2.2.2
#pyarrow==15.0.2
#fastparquet==2024.2.0
pyarrow==15.0.2

# generation of synthetic data
faker~=24.2
Expand Down
15 changes: 9 additions & 6 deletions generator/basic_contact.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,20 @@ def generate(self, count):
# generate different amount of contact information
# Customer = email + phone
# !Customer = random email or phone
contact_detail=ContactEnum.Full if party['party-type']=="Customer" else ContactEnum.Email\
contact_detail=ContactEnum.Full if party['party-type']=="Customer" else ContactEnum.Email \
if self.rnd_bool() else ContactEnum.Phone

# "name": "contact-email"
if contact_detail & ContactEnum.Email:
# "name": "contact-email"
model['contact-email']=self.fake.email()
model['contact-email'] = self.fake.email()
else:
model['contact-email'] = ""

# "name": "contact-phone"
if contact_detail & ContactEnum.Phone:
# "name": "contact-phone"
model['contact-phone']=self.fake.phone_number()
model['contact-phone'] = self.fake.phone_number()
else:
model['contact-phone'] = ""

# "name": "contact-state"
if count==0:
Expand All @@ -72,4 +76,3 @@ def generate(self, count):
model['record-date']=self.gmodel["NOW"]

self.model.append(model)

5 changes: 3 additions & 2 deletions generator/basic_transaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ def generate(self, count):

# "name": "counterparty-other",
# "description": "Transaction counterparty other information",
# TODO: Add
# TODO: Add relevant value
model["counterparty-other"]=""

fraud=False
fraud_anomaly=0
Expand All @@ -118,7 +119,7 @@ def generate(self, count):
# "description": "Identification of fraud (True - fraud, False - without fraud)",
model["transaction-fraud"] = int(fraud)

# "name": "record-date",
# "name": "record-date",
# "description": "The date when the record was created",
model['record-date']=self.gmodel["NOW"]

Expand Down
2 changes: 1 addition & 1 deletion generator/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# Store the version here so:

__version__ = '0.2.0'
__version__ = '0.2.1'
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ click~=8.1

numpy~=1.26
pandas==2.2.2
#pyarrow==15.0.2
#fastparquet==2024.2.0
pyarrow==15.0.2

# generation of synthetic data
faker~=24.2

0 comments on commit 55fd0a9

Please sign in to comment.