Skip to content

Commit

Permalink
Revert "Add serialization support for SentencepieceResources."
Browse files Browse the repository at this point in the history
This reverts commit 52f9004.

The correct symbols are not exported from TF; so it will break Windows builds.
  • Loading branch information
broken committed Jul 1, 2021
1 parent a14c2d5 commit e9ba855
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 59 deletions.
1 change: 0 additions & 1 deletion tensorflow_text/core/kernels/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,6 @@ tf_text_kernel_library(
# absl/strings dep
# absl/types:span dep
"@com_google_sentencepiece//src:sentencepiece_cc_proto",
"@com_google_sentencepiece//src:sentencepiece_model_cc_proto",
"@com_google_sentencepiece//src:sentencepiece_processor",
] + tf_deps(deps = [
# tf:framework tensorflow dep,
Expand Down
23 changes: 1 addition & 22 deletions tensorflow_text/core/kernels/sentencepiece_kernels.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include "absl/strings/string_view.h"
#include "absl/synchronization/mutex.h"
#include "absl/types/span.h"
#include "src/sentencepiece_model.pb.h"
#include "src/sentencepiece.pb.h"
#include "src/sentencepiece_processor.h"
#include "tensorflow/core/framework/bounds_check.h"
Expand All @@ -33,7 +32,6 @@
#include "tensorflow/core/framework/tensor_types.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/graph/graph_def_builder.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/refcount.h"
#include "tensorflow/core/lib/core/status.h"
Expand All @@ -56,7 +54,7 @@ struct SentencepieceResource : public ResourceBase {
bool add_bos = false;
bool add_eos = false;
bool reverse = false;
mutable absl::Mutex mu;
absl::Mutex mu;

string DebugString() const override { return "Sentencepiece Resource"; }

Expand All @@ -66,25 +64,6 @@ struct SentencepieceResource : public ResourceBase {
return (add_bos == this->add_bos) && (add_eos == this->add_eos) &&
(reverse == this->reverse);
}

Status AsGraphDef(GraphDefBuilder* builder, Node** out) const override {
absl::ReaderMutexLock l(&mu);
// We set use_node_name_sharing with a unique node name so that the resource
// can outlive the kernel. This means that the lifetime of the re-created
// resource will be tied to the lifetime of the resource manager it is
// created in.
static std::atomic<int64> counter(0);
std::string unique_node_name = strings::StrCat(
"SentencepieceResourceFromGraphDef", "/", counter.fetch_add(1));
std::string model = processor.model_proto().SerializeAsString();
*out = ops::SourceOp(
"SentencepieceOp",
builder->opts()
.WithName(unique_node_name)
.WithAttr("model", model)
.WithAttr("use_node_name_sharing", true));
return Status::OK();
}
};

// According to .../tensorflow/core/util/work_sharder.cc, this values determines
Expand Down
36 changes: 0 additions & 36 deletions tensorflow_text/python/ops/sentencepiece_tokenizer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from tensorflow.python.framework import test_util
from tensorflow.python.lib.io import file_io
from tensorflow.python.module import module
from tensorflow.python.ops import gen_experimental_dataset_ops
from tensorflow.python.ops.ragged import ragged_factory_ops
from tensorflow.python.ops.ragged import ragged_gather_ops
from tensorflow.python.platform import gfile
Expand Down Expand Up @@ -500,40 +499,5 @@ def testInvalidModel(self):
result.eval()


# Test that datasets depending on a sentencepiece tokenizer resources can be
# serialized without external references.
# This test is separate from `SentencepieceTokenizerOpTest` below because
# context._reset_context() must be called from outside the context created by
# `@test_util.run_all_in_graph_and_eager_modes`.
class DatasetSerializationTest(test_util.TensorFlowTestCase):

def testSerialization(self):
with context.eager_mode():
sentencepiece_model_file = (
'tensorflow_text/python/ops/test_data/'
'test_oss_model.model')
model = gfile.GFile(sentencepiece_model_file, 'rb').read()
sp = SentencepieceTokenizer(model)
strings = ['hello', 'world']
dataset = dataset_ops.Dataset.from_tensor_slices(strings)
# Ensure we can map the tokenizer across the dataset.
dataset = dataset.map(sp.tokenize)
graph = dataset._as_serialized_graph()
element_spec = dataset.element_spec
dataset_graph_string = graph.numpy()
expected = sp.tokenize(strings)

# Reset the eager context to make sure that the serialized dataset graph
# is self-contained.
context._reset_context()

with context.eager_mode():
restored = dataset_ops.from_variant(
gen_experimental_dataset_ops.dataset_from_graph(dataset_graph_string),
element_spec)
for i, result in enumerate(restored):
self.assertAllEqual(result, expected[i])


if __name__ == '__main__':
test.main()

0 comments on commit e9ba855

Please sign in to comment.