Skip to content

Commit

Permalink
Merge branch 'main' into feature/prompt-engineering-resource
Browse files Browse the repository at this point in the history
  • Loading branch information
kkiani authored Oct 31, 2024
2 parents 659b707 + 2259cfa commit 72ac6b0
Show file tree
Hide file tree
Showing 12 changed files with 477 additions and 74 deletions.
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,23 @@
## v0.7.0 (2024-10-14)

### Feat

- add Java for running spark related applications
- add Job schedule and checkpoints bucket
- work on Glue component

### Fix

- Pulumi args
- application example

### Refactor

- force creating required buckets
- apply PR reviews
- Fix comments and types
- fix format

## v0.6.0 (2024-10-10)

### Feat
Expand Down
4 changes: 4 additions & 0 deletions devenv.nix
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@
};
};

# Java is required for PySpark
languages.java.enable = true;
languages.java.jdk.package = pkgs.jdk8; # Java version running on AWS Glue

enterShell = ''
hello
pdm install
Expand Down
12 changes: 3 additions & 9 deletions examples/sparkle/Pulumi.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
name: object_storage
name: simple-spark-application
runtime:
name: python
options:
toolchain: pip
virtualenv: venv
description: A minimal Azure Native Python Pulumi program
config:
pulumi:tags:
value:
pulumi:template: azure-python
description: A minimal spark application that uses Sparkle
region: eu-west-1
8 changes: 3 additions & 5 deletions examples/sparkle/__main__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from damavand.cloud.provider import AwsProvider
from damavand.factories import SparkControllerFactory

Expand All @@ -10,7 +9,7 @@ def main() -> None:
spark_factory = SparkControllerFactory(
provider=AwsProvider(
app_name="my-app",
region="us-west-2",
region="eu-west-1",
),
tags={"env": "dev"},
)
Expand All @@ -22,10 +21,9 @@ def main() -> None:
CustomerOrders(),
],
)
# app_name = os.getenv("APP_NAME", "products-app") # Get app name on runtime

app_name = os.getenv("APP_NAME", "default_app") # Get app name on runtime

spark_controller.run_application(app_name)
# spark_controller.run_application(app_name)
spark_controller.provision()


Expand Down
15 changes: 14 additions & 1 deletion examples/sparkle/applications/orders.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from sparkle.config import Config
from sparkle.config import Config, IcebergConfig, KafkaReaderConfig
from sparkle.config.kafka_config import KafkaConfig, Credentials
from sparkle.writer.iceberg_writer import IcebergWriter
from sparkle.application import Sparkle
from sparkle.reader.kafka_reader import KafkaReader
Expand All @@ -15,6 +16,18 @@ def __init__(self):
version="0.0.1",
database_bucket="s3://test-bucket",
checkpoints_bucket="s3://test-checkpoints",
iceberg_output=IcebergConfig(
database_name="all_products",
database_path="",
table_name="orders_v1",
),
kafka_input=KafkaReaderConfig(
KafkaConfig(
bootstrap_servers="localhost:9119",
credentials=Credentials("test", "test"),
),
kafka_topic="src_orders_v1",
),
),
readers={"orders": KafkaReader},
writers=[IcebergWriter],
Expand Down
14 changes: 11 additions & 3 deletions examples/sparkle/applications/products.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from sparkle.application import Sparkle
from sparkle.config import Config
from sparkle.config import Config, IcebergConfig, TableConfig
from sparkle.writer.iceberg_writer import IcebergWriter
from sparkle.writer.kafka_writer import KafkaStreamPublisher
from sparkle.reader.table_reader import TableReader

from pyspark.sql import DataFrame
Expand All @@ -16,11 +15,20 @@ def __init__(self):
version="0.0.1",
database_bucket="s3://test-bucket",
checkpoints_bucket="s3://test-checkpoints",
iceberg_output=IcebergConfig(
database_name="all_products",
database_path="",
table_name="products_v1",
),
hive_table_input=TableConfig(
database="source_database",
table="products_v1",
bucket="",
),
),
readers={"products": TableReader},
writers=[
IcebergWriter,
KafkaStreamPublisher,
],
)

Expand Down
19 changes: 19 additions & 0 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/damavand/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.6.0"
__version__ = "0.7.0"
1 change: 0 additions & 1 deletion src/damavand/base/controllers/spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def application_with_id(self, app_id: str) -> Sparkle:
Returns:
Sparkle: The Spark application.
"""

for app in self.applications:
if app.config.app_id == app_id:
return app
Expand Down
Loading

0 comments on commit 72ac6b0

Please sign in to comment.