-
Notifications
You must be signed in to change notification settings - Fork 307
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make FlyteFile and FlyteDirectory pickleable #3030
Changes from 9 commits
8a0804e
2060962
dddde88
cc49802
6adf14b
37f61e2
6152ce4
3aaf9ac
a60450b
2e2e5d9
d0b1d3c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
import typing | ||
from contextlib import contextmanager | ||
from dataclasses import dataclass, field | ||
from functools import partial | ||
from typing import Dict, cast | ||
from urllib.parse import unquote | ||
|
||
|
@@ -20,6 +21,7 @@ | |
|
||
from flytekit.core.constants import MESSAGEPACK | ||
from flytekit.core.context_manager import FlyteContext, FlyteContextManager | ||
from flytekit.core.data_persistence import FileAccessProvider | ||
from flytekit.core.type_engine import ( | ||
AsyncTypeTransformer, | ||
TypeEngine, | ||
|
@@ -307,7 +309,8 @@ def __init__( | |
if ctx.file_access.is_remote(self.path): | ||
self._remote_source = self.path | ||
self._local_path = ctx.file_access.get_random_local_path(self._remote_source) | ||
self._downloader = lambda: FlyteFilePathTransformer.downloader( | ||
self._downloader = partial( | ||
FlyteFilePathTransformer.downloader, | ||
ctx=ctx, | ||
remote_path=self._remote_source, # type: ignore | ||
local_path=self._local_path, | ||
|
@@ -732,25 +735,28 @@ async def async_to_python_value( | |
|
||
# For the remote case, return an FlyteFile object that can download | ||
local_path = ctx.file_access.get_random_local_path(uri) | ||
|
||
expected_format = FlyteFilePathTransformer.get_format(expected_python_type) | ||
ff = FlyteFile.__class_getitem__(expected_format)( | ||
path=local_path, downloader=lambda: self.downloader(ctx=ctx, remote_path=uri, local_path=local_path) | ||
path=local_path, | ||
downloader=partial(self.downloader, ctx.file_access, remote_path=uri, local_path=local_path), | ||
) | ||
ff._remote_source = uri | ||
|
||
return ff | ||
|
||
@staticmethod | ||
def downloader( | ||
ctx: FlyteContext, remote_path: typing.Union[str, os.PathLike], local_path: typing.Union[str, os.PathLike] | ||
file_access_provider: FileAccessProvider, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This breaks backward compatibility. Is it safe to change the signature here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as I can tell this staticmethod was added in a PR 4 days ago: https://github.com/flyteorg/flytekit/pull/2991/files And the only call site was in this module itself There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I rather not have this be part of the public API. Can this be renamed to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can also revert this to the older implementation with the pure partial There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That works for me. |
||
remote_path: typing.Union[str, os.PathLike], | ||
local_path: typing.Union[str, os.PathLike], | ||
) -> None: | ||
""" | ||
Download data from remote_path to local_path. | ||
|
||
We design the downloader as a static method because its behavior is logically | ||
related to this class but don't need to interact with class or instance data. | ||
""" | ||
ctx.file_access.get_data(remote_path, local_path, is_multipart=False) | ||
file_access_provider.get_data(remote_path, local_path, is_multipart=False) | ||
|
||
def guess_python_type(self, literal_type: LiteralType) -> typing.Type[FlyteFile[typing.Any]]: | ||
if ( | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -1,5 +1,6 @@ | ||||||||||||||||||||
import os | ||||||||||||||||||||
import pathlib | ||||||||||||||||||||
import pickle | ||||||||||||||||||||
import shutil | ||||||||||||||||||||
import tempfile | ||||||||||||||||||||
import typing | ||||||||||||||||||||
|
@@ -20,7 +21,7 @@ | |||||||||||||||||||
from flytekit.core.workflow import workflow | ||||||||||||||||||||
from flytekit.exceptions.user import FlyteAssertion | ||||||||||||||||||||
from flytekit.models.core.types import BlobType | ||||||||||||||||||||
from flytekit.models.literals import LiteralMap | ||||||||||||||||||||
from flytekit.models.literals import LiteralMap, Blob, BlobMetadata | ||||||||||||||||||||
from flytekit.types.directory.types import FlyteDirectory, FlyteDirToMultipartBlobTransformer | ||||||||||||||||||||
from google.protobuf import json_format as _json_format | ||||||||||||||||||||
from google.protobuf import struct_pb2 as _struct | ||||||||||||||||||||
|
@@ -407,8 +408,7 @@ def my_wf(path: SvgDirectory) -> DC: | |||||||||||||||||||
assert dc1 == dc2 | ||||||||||||||||||||
|
||||||||||||||||||||
|
||||||||||||||||||||
def test_input_from_flyte_console_attribute_access_flytefile( | ||||||||||||||||||||
local_dummy_directory): | ||||||||||||||||||||
def test_input_from_flyte_console_attribute_access_flytefile(local_dummy_directory): | ||||||||||||||||||||
# Flyte Console will send the input data as protobuf Struct | ||||||||||||||||||||
|
||||||||||||||||||||
dict_obj = {"path": local_dummy_directory} | ||||||||||||||||||||
|
@@ -422,3 +422,27 @@ def test_input_from_flyte_console_attribute_access_flytefile( | |||||||||||||||||||
FlyteContextManager.current_context(), upstream_output, FlyteDirectory) | ||||||||||||||||||||
assert isinstance(downstream_input, FlyteDirectory) | ||||||||||||||||||||
assert downstream_input == FlyteDirectory(local_dummy_directory) | ||||||||||||||||||||
|
||||||||||||||||||||
|
||||||||||||||||||||
def test_flyte_directory_is_pickleable(): | ||||||||||||||||||||
upstream_output = Literal( | ||||||||||||||||||||
scalar=Scalar( | ||||||||||||||||||||
blob=Blob( | ||||||||||||||||||||
uri="s3://sample-path/directory", | ||||||||||||||||||||
metadata=BlobMetadata( | ||||||||||||||||||||
type=BlobType( | ||||||||||||||||||||
dimensionality=BlobType.BlobDimensionality.MULTIPART, | ||||||||||||||||||||
format="" | ||||||||||||||||||||
) | ||||||||||||||||||||
) | ||||||||||||||||||||
) | ||||||||||||||||||||
) | ||||||||||||||||||||
) | ||||||||||||||||||||
downstream_input = TypeEngine.to_python_value( | ||||||||||||||||||||
FlyteContextManager.current_context(), upstream_output, FlyteDirectory | ||||||||||||||||||||
) | ||||||||||||||||||||
|
||||||||||||||||||||
# test round trip pickling | ||||||||||||||||||||
pickled_input = pickle.dumps(downstream_input) | ||||||||||||||||||||
unpickled_input = pickle.loads(pickled_input) | ||||||||||||||||||||
assert downstream_input == unpickled_input | ||||||||||||||||||||
Comment on lines
+445
to
+448
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider adding more pickle test assertions
Consider adding more assertions to verify the pickled/unpickled Code suggestionCheck the AI-generated fix before applying
Suggested change
Code Review Run #639caa Is this a valid issue, or was it incorrectly flagged by the Agent?
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider updating the
downloader
partial function call to include type hints for better code maintainability and IDE support. Thectx.file_access
parameter type could be explicitly specified asFileAccessProvider
.Code suggestion
Code Review Run #d95336
Is this a valid issue, or was it incorrectly flagged by the Agent?