Skip to content

Commit

Permalink
Fix engine choice in case of connection to serverless
Browse files Browse the repository at this point in the history
  • Loading branch information
aversey committed Dec 5, 2024
1 parent 47be364 commit 4e827bc
Showing 1 changed file with 11 additions and 11 deletions.
22 changes: 11 additions & 11 deletions python/hopsworks_common/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import weakref
from typing import Any, Optional

from hopsworks_common import client, usage, util, version
from hopsworks_common import client, constants, usage, util, version
from hopsworks_common.core import (
hosts_api,
project_api,
Expand Down Expand Up @@ -99,8 +99,8 @@ class Connection:
defaults to the project from where the client is run from.
Defaults to `None`.
engine: Specifies the engine to use. Possible options are "spark", "python", "training", "spark-no-metastore", or "spark-delta". The default value is None, which automatically selects the engine based on the environment:
"spark": Used if Spark is available, such as in Hopsworks or Databricks environments.
"python": Used in local Python environments or AWS SageMaker when Spark is not available.
"spark": Used if Spark is available and the connection is not to serverless Hopsworks, such as in Hopsworks or Databricks environments.
"python": Used in local Python environments or AWS SageMaker when Spark is not available or the connection is done to serverless Hopsworks.
"training": Used when only feature store metadata is needed, such as for obtaining training dataset locations and label information during Hopsworks training experiments.
"spark-no-metastore": Functions like "spark" but does not rely on the Hive metastore.
"spark-delta": Minimizes dependencies further by avoiding both Hive metastore and HopsFS.
Expand Down Expand Up @@ -339,12 +339,15 @@ def connect(self) -> None:
try:
# determine engine, needed to init client
if (self._engine is not None and self._engine.lower() == "spark") or (
self._engine is None and importlib.util.find_spec("pyspark")
self._engine is None
and importlib.util.find_spec("pyspark")
and (
client.base.Client.REST_ENDPOINT in os.environ
or self._host != constants.HOSTS.APP_HOST
)
):
self._engine = "spark"
elif (self._engine is not None and self._engine.lower() == "python") or (
self._engine is None and not importlib.util.find_spec("pyspark")
):
elif self._engine is None or self._engine.lower() == "python":
self._engine = "python"
elif self._engine is not None and self._engine.lower() == "training":
self._engine = "training"
Expand All @@ -353,10 +356,7 @@ def connect(self) -> None:
and self._engine.lower() == "spark-no-metastore"
):
self._engine = "spark-no-metastore"
elif (
self._engine is not None
and self._engine.lower() == "spark-delta"
):
elif self._engine is not None and self._engine.lower() == "spark-delta":
self._engine = "spark-delta"
else:
raise ConnectionError(
Expand Down

0 comments on commit 4e827bc

Please sign in to comment.