From 85268c8470f7aa279691eaa96caaaa8652d20204 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Mon, 16 May 2022 08:56:30 -0700 Subject: [PATCH] Enable pickling of CloudPath (#224) * pickling * Add to changelog --- HISTORY.md | 4 +++ cloudpathlib/cloudpath.py | 13 +++++++ docs/docs/authentication.md | 64 +++++++++++++++++++++++++++++++++ tests/test_cloudpath_file_io.py | 19 ++++++++++ 4 files changed, 100 insertions(+) diff --git a/HISTORY.md b/HISTORY.md index 2f4419ff..a5e8a226 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,9 @@ # cloudpathlib Changelog +## v0.7.2 (UNRELEASED) + + - Fixed pickling of `CloudPath` objects not working. ([Issue #223](https://github.com/drivendataorg/cloudpathlib/issues/223), [PR #224](https://github.com/drivendataorg/cloudpathlib/pull/224)) + ## v0.7.1 (2022-04-06) - Fixed inadvertent inclusion of tests module in package. ([Issue #173](https://github.com/drivendataorg/cloudpathlib/issues/173), [PR #219](https://github.com/drivendataorg/cloudpathlib/pull/219)) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index dcb06e33..834ddbe9 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -184,6 +184,19 @@ def __del__(self): if self._handle is not None: self._handle.close() + def __getstate__(self): + state = self.__dict__.copy() + + # don't pickle client + del state["client"] + + return state + + def __setstate__(self, state): + client = self._cloud_meta.client_class.get_default_client() + state["client"] = client + return self.__dict__.update(state) + @property def _no_prefix(self) -> str: return self._str[len(self.cloud_prefix) :] diff --git a/docs/docs/authentication.md b/docs/docs/authentication.md index ad0bb51d..849c23a6 100644 --- a/docs/docs/authentication.md +++ b/docs/docs/authentication.md @@ -82,3 +82,67 @@ cp2 = CloudPath("s3://cloudpathlib-test-bucket/", client=client) client.set_as_default_client() cp3 = CloudPath("s3://cloudpathlib-test-bucket/") ``` + +## Pickling `CloudPath` objects + +You can pickle and unpickle `CloudPath` objects normally, for example: + +```python +from pathlib import Path +import pickle + +from cloudpathlib import CloudPath + + +with Path("cloud_path.pkl").open("wb") as f: + pickle.dump(CloudPath("s3://my-awesome-bucket/cool-file.txt"), f) + +with Path("cloud_path.pkl").open("rb") as f: + pickled = pickle.load(f) + +assert pickled.bucket == "my-awesome-bucket" +``` + +The associated `client`, however, is not pickled. When a `CloudPath` is +unpickled, the client on the unpickled object will be set to the default +client for that class. + +For example, this **will not work**: + +```python +from pathlib import Path +import pickle + +from cloudpathlib import S3Client, CloudPath + + +# create a custom client pointing to the endpoint +client = S3Client(endpoint_url="http://my.s3.server:1234") + +# use that client when creating a cloud path +p = CloudPath("s3://cloudpathlib-test-bucket/cool_file.txt", client=client) +p.write_text("hello!") + +with Path("cloud_path.pkl").open("wb") as f: + pickle.dump(p, f) + +with Path("cloud_path.pkl").open("rb") as f: + pickled = pickle.load(f) + +# this will be False, because it will use the default `S3Client` +assert pickled.exists() == False +``` + +To get this to work, you need to set the custom `client` to the default +before unpickling: + +```python +# set the custom client as the default before unpickling +client.set_as_default_client() + +with ("cloud_path.pkl").open("rb") as f: + pickled2 = pickle.load(f) + +assert pickled2.exists() +assert pickled2.client == client +``` diff --git a/tests/test_cloudpath_file_io.py b/tests/test_cloudpath_file_io.py index 9fe0924c..61bf3ac1 100644 --- a/tests/test_cloudpath_file_io.py +++ b/tests/test_cloudpath_file_io.py @@ -1,6 +1,7 @@ from datetime import datetime import os from pathlib import PurePosixPath +import pickle from shutil import rmtree from time import sleep @@ -309,3 +310,21 @@ def test_os_open(rig): p = rig.create_cloud_path("dir_0/file0_0.txt") with open(p, "r") as f: assert f.readable() + + +def test_pickle(rig, tmpdir): + p = rig.create_cloud_path("dir_0/file0_0.txt") + + with (tmpdir / "test.pkl").open("wb") as f: + pickle.dump(p, f) + + with (tmpdir / "test.pkl").open("rb") as f: + pickled = pickle.load(f) + + # test a call to the network + assert pickled.exists() + + # check we unpickled, and that client is the default client + assert str(pickled) == str(p) + assert pickled.client == p.client + assert rig.client_class._default_client == pickled.client