-
Notifications
You must be signed in to change notification settings - Fork 0
/
setup.py
64 lines (45 loc) · 2.25 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Databricks notebook source
# MAGIC %pip install faker
# COMMAND ----------
spark.conf.set("fs.azure.account.auth.type.oneenvadls.dfs.core.windows.net", "OAuth")
spark.conf.set("fs.azure.account.oauth.provider.type.oneenvadls.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set("fs.azure.account.oauth2.client.id.oneenvadls.dfs.core.windows.net", dbutils.secrets.get(scope="mzeni-kv-tests",key="applicationid"))
spark.conf.set("fs.azure.account.oauth2.client.secret.oneenvadls.dfs.core.windows.net", dbutils.secrets.get(scope="mzeni-kv-tests",key="secret"))
spark.conf.set("fs.azure.account.oauth2.client.endpoint.oneenvadls.dfs.core.windows.net", "https://login.microsoftonline.com/{}/oauth2/token".format(dbutils.secrets.get(scope="mzeni-kv-tests",key="directoryid")))
# COMMAND ----------
dbutils.fs.rm('abfss://deltalake@oneenvadls.dfs.core.windows.net/mattia/demos/sql-udf/', True)
# COMMAND ----------
# MAGIC %sql
# MAGIC
# MAGIC DROP DATABASE IF EXISTS database_mattia_demos_sql_udf CASCADE
# COMMAND ----------
# MAGIC %sql
# MAGIC
# MAGIC CREATE DATABASE database_mattia_demos_sql_udf
# COMMAND ----------
def generate_user_names(faker):
return faker.first_name(), faker.last_name()
# COMMAND ----------
import random
import string
from faker import Faker
def get_random_string(length):
# choose from all lowercase letter
letters = string.ascii_lowercase
return ''.join(random.choice(letters) for i in range(length))
def create_data():
data = []
faker = Faker()
for index in range(0, 1000):
first_name, last_name = generate_user_names(faker)
age = random.randint(1,100)
data.append([index, "{0} {1}".format(first_name, last_name), age, "{0}.{1}@gmail.com".format(first_name, last_name), "OLD" if age > 50 else "NOT OLD"])
return spark.createDataFrame(data, ["ID", "NAME", "AGE", "EMAIL-PII", "LABEL"]).write.format("delta").save("abfss://deltalake@oneenvadls.dfs.core.windows.net/mattia/demos/sql-udf/delta-input/")
# COMMAND ----------
create_data()
# COMMAND ----------
# MAGIC %sql
# MAGIC
# MAGIC CREATE TABLE database_mattia_demos_sql_udf.dataset
# MAGIC USING DELTA
# MAGIC LOCATION 'abfss://deltalake@oneenvadls.dfs.core.windows.net/mattia/demos/sql-udf/delta-input/'