From 6216c453b7b1fa48f40254d04eaf3afa0f44ecac Mon Sep 17 00:00:00 2001 From: Marina Date: Tue, 6 Jun 2023 21:53:53 +0000 Subject: [PATCH 1/5] Add support for auto-downloading retvec v1 model from keras applications --- retvec/tf/layers/embedding.py | 12 ++++- retvec/tf/layers/tokenizer.py | 4 +- retvec/tf/utils.py | 49 ++++++++++++++++++++ tests/tf/layers/test_embedding.py | 77 +++++++++++++++++++------------ tests/tf/layers/test_tokenizer.py | 44 +++++------------- 5 files changed, 120 insertions(+), 66 deletions(-) diff --git a/retvec/tf/layers/embedding.py b/retvec/tf/layers/embedding.py index 75b0cce..4b4e45a 100644 --- a/retvec/tf/layers/embedding.py +++ b/retvec/tf/layers/embedding.py @@ -20,6 +20,8 @@ import tensorflow as tf from tensorflow import Tensor, TensorShape +from ..utils import RETVEC_MODEL_URLS, download_retvec_saved_model + @tf.keras.utils.register_keras_serializable(package="retvec") class RETVecEmbedding(tf.keras.layers.Layer): @@ -36,7 +38,8 @@ def __init__( Args: model: Path to saved pretrained RETVec model, str or pathlib.Path - object. + object. 'retvec-v1' to use V1 of the pre-trained RETVec word + embedding model. trainable: Whether to make the pretrained RETVec model trainable or to freeze all weights. @@ -93,11 +96,16 @@ def _load_model( """Load pretrained RETVec model. Args: - path: Path to the saved REW* model. + model: Path to saved pretrained RETVec model. Either a pre-defined + RETVec model name, str or pathlib.Path. Returns: The pretrained RETVec model, trainable set to `self.trainable`. """ + path_str = str(path) + if path_str in RETVEC_MODEL_URLS: + path = download_retvec_saved_model(path_str) + model = tf.keras.models.load_model(path) model.trainable = self.trainable model.compile("adam", "mse") diff --git a/retvec/tf/layers/tokenizer.py b/retvec/tf/layers/tokenizer.py index d3c1823..0e88572 100644 --- a/retvec/tf/layers/tokenizer.py +++ b/retvec/tf/layers/tokenizer.py @@ -79,7 +79,9 @@ def __init__( `sequence_length` words. model: Path to saved pretrained RETVec model, str or pathlib.Path - object. + object. 'retvec-v1' to use V1 of the pre-trained RETVec word + embedding model, None to use the default RETVec character + encoding. trainable: Whether to make the pretrained RETVec model trainable or to freeze all weights. diff --git a/retvec/tf/utils.py b/retvec/tf/utils.py index 8d0b125..fbc8ba0 100644 --- a/retvec/tf/utils.py +++ b/retvec/tf/utils.py @@ -14,9 +14,16 @@ limitations under the License. """ +import os +from pathlib import Path +from typing import Optional import tensorflow as tf +RETVEC_MODEL_URLS = { + "retvec-v1": "https://storage.googleapis.com/tensorflow/keras-applications/retvec-v1" +} + def tf_cap_memory(): """Avoid TF to hog memory before needing it""" @@ -38,3 +45,45 @@ def clone_initializer(initializer: tf.keras.initializers.Initializer): ): return initializer.__class__.from_config(initializer.get_config()) return initializer + + +def download_retvec_saved_model( + model_name: str = "retvec-v1", + cache_dir: str = "~/.keras/", + model_cache_subdir: str = "retvec-v1", +): + if model_name not in RETVEC_MODEL_URLS: + raise ValueError(f"{model_name} is not a valid RETVec model name.") + + model_url = RETVEC_MODEL_URLS[model_name] + model_cache_subdir_variables = f"{model_cache_subdir}/variables" + + model_components = [ + "fingerprint.pb", + "keras_metadata.pb", + "saved_model.pb", + ] + + model_components_variables = [ + "variables.data-00000-of-00001", + "variables.index", + ] + + # download model components + for component_name in model_components: + tf.keras.utils.get_file( + origin=f"{model_url}/{component_name}", + extract=True, + cache_subdir=model_cache_subdir, + ) + + # download variables which are under a different folder + for component_name in model_components_variables: + tf.keras.utils.get_file( + origin=f"{model_url}/variables/{component_name}", + extract=True, + cache_subdir=model_cache_subdir_variables, + ) + + retvec_model_dir = cache_dir + model_cache_subdir + return Path(retvec_model_dir).expanduser() diff --git a/tests/tf/layers/test_embedding.py b/tests/tf/layers/test_embedding.py index fdf56da..bfd9648 100644 --- a/tests/tf/layers/test_embedding.py +++ b/tests/tf/layers/test_embedding.py @@ -18,11 +18,20 @@ from retvec.tf.layers import RETVecBinarizer, RETVecEmbedding -TEST_EMB_SIZE = 16 +TEST_EMB_SIZE = 256 +TEST_WORD_LENGTH = 16 +TEST_CHAR_ENCODING_SIZE = 24 +TEST_INPUTS = [ + tf.constant(["Testing😀"]), + tf.constant(["Testing😀", "Testing😀"]), + tf.constant(["Testing a very long string as input"]), +] def create_retvec_embedding(tmp_path): - i = tf.keras.layers.Input((16, 32), dtype=tf.float32) + i = tf.keras.layers.Input( + (TEST_WORD_LENGTH, TEST_CHAR_ENCODING_SIZE), dtype=tf.float32 + ) x = tf.keras.layers.Flatten()(i) o = tf.keras.layers.Dense(TEST_EMB_SIZE)(x) model = tf.keras.models.Model(i, o) @@ -36,32 +45,23 @@ def create_retvec_embedding(tmp_path): def test_rewnet_model(tmp_path): embedding_model = create_retvec_embedding(tmp_path) - binarizer = RETVecBinarizer(word_length=16, encoding_size=32) - - test_inputs = [ - tf.constant(["Testing😀"]), - tf.constant(["Testing😀", "Testing😀"]), - tf.constant(["Testing a very long string as input"]), - ] + binarizer = RETVecBinarizer( + word_length=TEST_WORD_LENGTH, encoding_size=TEST_CHAR_ENCODING_SIZE + ) - for test_input in test_inputs: + for test_input in TEST_INPUTS: embeddings = embedding_model(binarizer.binarize(test_input)) assert embeddings.shape == (test_input.shape[0], TEST_EMB_SIZE) def test_2d_inputs(tmp_path): - i = tf.keras.layers.Input((16, 32), dtype=tf.float32) - x = tf.keras.layers.Flatten()(i) - o = tf.keras.layers.Dense(16)(x) - model = tf.keras.models.Model(i, o) - - save_path = tmp_path / "test_retvec_embedding" - model.save(save_path) - - embedding_model = RETVecEmbedding(str(save_path)) + embedding_model = create_retvec_embedding(tmp_path) test_input = tf.random.uniform( - (2, 3, 16, 32), minval=0, maxval=2, dtype=tf.int32 + (2, 3, TEST_WORD_LENGTH, TEST_CHAR_ENCODING_SIZE), + minval=0, + maxval=2, + dtype=tf.int32, ) test_input = tf.cast(test_input, dtype=tf.float32) embeddings = embedding_model(test_input) @@ -70,24 +70,22 @@ def test_2d_inputs(tmp_path): def test_binarizer_embedding_model(tmp_path): i = tf.keras.layers.Input((1,), dtype=tf.string) - x = RETVecBinarizer(word_length=16, encoding_size=32)(i) + x = RETVecBinarizer( + word_length=TEST_WORD_LENGTH, encoding_size=TEST_CHAR_ENCODING_SIZE + )(i) o = create_retvec_embedding(tmp_path)(x) model = tf.keras.models.Model(i, o) - test_inputs = [ - tf.constant(["Testing😀"]), - tf.constant(["Testing😀", "Testing😀"]), - tf.constant(["Testing a very long string as input"]), - ] - - for test_input in test_inputs: + for test_input in TEST_INPUTS: embeddings = model(test_input) assert embeddings.shape == (test_input.shape[0], TEST_EMB_SIZE) def test_binarizer_embedding_model_2d(tmp_path): i = tf.keras.layers.Input((3,), dtype=tf.string) - x = RETVecBinarizer(word_length=16, encoding_size=32)(i) + x = RETVecBinarizer( + word_length=TEST_WORD_LENGTH, encoding_size=TEST_CHAR_ENCODING_SIZE + )(i) o = create_retvec_embedding(tmp_path)(x) model = tf.keras.models.Model(i, o) @@ -104,10 +102,29 @@ def test_binarizer_embedding_model_2d(tmp_path): def test_serialization(tmp_path): embedding_model = create_retvec_embedding(tmp_path) - i = tf.keras.layers.Input((16, 32), dtype=tf.float32) + i = tf.keras.layers.Input( + (TEST_WORD_LENGTH, TEST_CHAR_ENCODING_SIZE), dtype=tf.float32 + ) x = embedding_model(i) model = tf.keras.models.Model(i, x) save_path = tmp_path / "test_retvec_embedding_serialization" model.save(save_path) tf.keras.models.load_model(save_path) + + +def test_default_embedding_model(tmp_path): + embedding_size = 256 + binarizer = RETVecBinarizer( + word_length=TEST_WORD_LENGTH, encoding_size=TEST_CHAR_ENCODING_SIZE + ) + + i = tf.keras.layers.Input( + (TEST_WORD_LENGTH, TEST_CHAR_ENCODING_SIZE), dtype=tf.float32 + ) + x = RETVecEmbedding(model="retvec-v1")(i) + model = tf.keras.models.Model(i, x) + + for test_input in TEST_INPUTS: + embeddings = model(binarizer.binarize(test_input)) + assert embeddings.shape == (test_input.shape[0], embedding_size) diff --git a/tests/tf/layers/test_tokenizer.py b/tests/tf/layers/test_tokenizer.py index 0b0faec..4da35da 100644 --- a/tests/tf/layers/test_tokenizer.py +++ b/tests/tf/layers/test_tokenizer.py @@ -20,30 +20,15 @@ SEQUENCE_LENGTH = 128 WORD_LENGTH = 16 -CHAR_ENCODING_SIZE = 32 -EMBEDDING_SIZE = 128 - - -def create_and_save_retvec_embedding(tmp_path): - i = tf.keras.layers.Input( - (WORD_LENGTH, CHAR_ENCODING_SIZE), dtype=tf.float32 - ) - x = tf.keras.layers.Flatten()(i) - o = tf.keras.layers.Dense(EMBEDDING_SIZE)(x) - model = tf.keras.models.Model(i, o) - - save_path = tmp_path / "test_retvec_embedding" - model.save(save_path) - return str(save_path) +CHAR_ENCODING_SIZE = 24 +RETVEC_MODEL = "retvec-v1" def test_graph_mode_with_model(tmp_path): - model_path = create_and_save_retvec_embedding(tmp_path) - i = tf.keras.layers.Input((1,), dtype=tf.string) x = RETVecTokenizer( sequence_length=SEQUENCE_LENGTH, - model=model_path, + model=RETVEC_MODEL, word_length=WORD_LENGTH, char_encoding_size=CHAR_ENCODING_SIZE, )(i) @@ -59,28 +44,25 @@ def test_graph_mode_with_model(tmp_path): assert embeddings.shape == ( test_input.shape[0], SEQUENCE_LENGTH, - EMBEDDING_SIZE, + 256, ) def test_eager_mode_with_model(tmp_path): - model_path = create_and_save_retvec_embedding(tmp_path) - tokenizer = RETVecTokenizer( - model=model_path, + model=RETVEC_MODEL, sequence_length=SEQUENCE_LENGTH, word_length=WORD_LENGTH, char_encoding_size=CHAR_ENCODING_SIZE, ) - assert tokenizer.embedding_size == EMBEDDING_SIZE s = "Testing😀 a full sentence" embeddings = tokenizer.tokenize(tf.constant(s)) - assert embeddings.shape == [SEQUENCE_LENGTH, EMBEDDING_SIZE] + assert embeddings.shape == [SEQUENCE_LENGTH, tokenizer.embedding_size] embeddings = tokenizer.tokenize(tf.constant([s, s, s])) - assert embeddings.shape == [3, SEQUENCE_LENGTH, EMBEDDING_SIZE] + assert embeddings.shape == [3, SEQUENCE_LENGTH, tokenizer.embedding_size] def test_graph_mode_no_model(): @@ -146,11 +128,9 @@ def test_standardize(): def test_tfds_map_tokenize(tmp_path): - model_path = create_and_save_retvec_embedding(tmp_path) - - for model in [None, model_path]: + for model_path in [None, RETVEC_MODEL]: tokenizer = RETVecTokenizer( - model=model, + model=model_path, sequence_length=SEQUENCE_LENGTH, word_length=WORD_LENGTH, char_encoding_size=CHAR_ENCODING_SIZE, @@ -172,12 +152,10 @@ def test_tfds_map_tokenize(tmp_path): def test_serialization(tmp_path): - model_path = create_and_save_retvec_embedding(tmp_path) - - for model in [None, model_path]: + for model_path in [None, RETVEC_MODEL]: i = tf.keras.layers.Input((1,), dtype=tf.string) x = RETVecTokenizer( - model=model, + model=model_path, sequence_length=SEQUENCE_LENGTH, word_length=WORD_LENGTH, char_encoding_size=CHAR_ENCODING_SIZE, From f06e480239d00aedc7afffb80326acfed2ae4594 Mon Sep 17 00:00:00 2001 From: Marina Date: Tue, 6 Jun 2023 21:54:25 +0000 Subject: [PATCH 2/5] remove broken classifiers from setup.py --- setup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup.py b/setup.py index 6475624..c742e6f 100644 --- a/setup.py +++ b/setup.py @@ -71,8 +71,6 @@ def get_version(rel_path): classifiers=[ "Development Status :: 3 - Alpha", "Environment :: Console", - "Framework :: TensorFlow", - "Framework :: Torch", "License :: OSI Approved :: Apache Software License", "Intended Audience :: Science/Research", "Programming Language :: Python :: 3", From 16426859c68d08572e3677e31de8ec7fe54b38b4 Mon Sep 17 00:00:00 2001 From: Marina Date: Tue, 6 Jun 2023 21:55:39 +0000 Subject: [PATCH 3/5] version bump to 1.0.0 --- retvec/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retvec/__init__.py b/retvec/__init__.py index 49d69b6..ae7d866 100644 --- a/retvec/__init__.py +++ b/retvec/__init__.py @@ -14,4 +14,4 @@ limitations under the License. """ -__version__ = "0.1.0" +__version__ = "1.0.0" From 349f9ee32b267c6b87c487dee8085d5cfad6c6e0 Mon Sep 17 00:00:00 2001 From: Marina Date: Thu, 29 Jun 2023 17:46:13 +0000 Subject: [PATCH 4/5] add sha256 hashes to download retvec saved model files --- retvec/tf/utils.py | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/retvec/tf/utils.py b/retvec/tf/utils.py index fbc8ba0..47b5966 100644 --- a/retvec/tf/utils.py +++ b/retvec/tf/utils.py @@ -24,6 +24,17 @@ "retvec-v1": "https://storage.googleapis.com/tensorflow/keras-applications/retvec-v1" } +# TODO (marinazh): we should download RETVec model weights instead of SavedModel files +RETVEC_COMPONENTS_HASHES = { + "retvec-v1": { + "fingerprint.pb": "5c3991599c293ba653c55e8cceae8e10815eeedea6aff75a64905cd71587d4c1", + "keras_metadata.pb": "e87e8b660ef66f8a058c4c0aa8bfaa8b683bcd4669c21e4bf71055148f8c6afc", + "saved_model.pb": "337c8e91c92946513d127b256f2872a497545186c4d2c2c09afc7d76b55454b7", + "variables.data-00000-of-00001": "22d4760b452fe8110ef2fa96b3d84186372f5259b8f6c4041a05c3ab58d93d37", + "variables.index": "431d19b7426b939c9834bb7d55d515a4ee7d7a6cda78ef0bf7b8ba03e67e480b", + } +} + def tf_cap_memory(): """Avoid TF to hog memory before needing it""" @@ -58,31 +69,21 @@ def download_retvec_saved_model( model_url = RETVEC_MODEL_URLS[model_name] model_cache_subdir_variables = f"{model_cache_subdir}/variables" - model_components = [ - "fingerprint.pb", - "keras_metadata.pb", - "saved_model.pb", - ] - - model_components_variables = [ - "variables.data-00000-of-00001", - "variables.index", - ] - # download model components - for component_name in model_components: - tf.keras.utils.get_file( - origin=f"{model_url}/{component_name}", - extract=True, - cache_subdir=model_cache_subdir, - ) + retvec_components = RETVEC_COMPONENTS_HASHES[model_name] + for component_name in retvec_components.keys(): + if "variables" in component_name: + origin = f"{model_url}/variables/{component_name}" + cache_subdir = model_cache_subdir_variables + else: + origin = f"{model_url}/{component_name}" + cache_subdir = model_cache_subdir - # download variables which are under a different folder - for component_name in model_components_variables: tf.keras.utils.get_file( - origin=f"{model_url}/variables/{component_name}", + origin=origin, extract=True, - cache_subdir=model_cache_subdir_variables, + cache_subdir=cache_subdir, + file_hash=retvec_components[component_name], ) retvec_model_dir = cache_dir + model_cache_subdir From 7a8fb9b689c478c1c8c49cefb2c5b1144455422b Mon Sep 17 00:00:00 2001 From: Marina Date: Thu, 29 Jun 2023 18:28:50 +0000 Subject: [PATCH 5/5] upgrade license header to 2023 --- retvec/__init__.py | 2 +- retvec/tf/__init__.py | 2 +- retvec/tf/dataset/__init__.py | 2 +- retvec/tf/dataset/io.py | 2 +- retvec/tf/layers/__init__.py | 2 +- retvec/tf/layers/binarizer.py | 2 +- retvec/tf/layers/embedding.py | 2 +- retvec/tf/layers/integerizer.py | 2 +- retvec/tf/layers/tokenizer.py | 2 +- retvec/tf/models/__init__.py | 2 +- retvec/tf/models/gau.py | 2 +- retvec/tf/models/layers.py | 2 +- retvec/tf/models/outputs.py | 2 +- retvec/tf/models/positional_embeddings.py | 2 +- retvec/tf/models/retvec_base.py | 2 +- retvec/tf/models/retvec_large.py | 2 +- retvec/tf/optimizers/__init__.py | 2 +- retvec/tf/optimizers/warmup_cosine_decay.py | 2 +- retvec/tf/utils.py | 2 +- tests/conftest.py | 2 +- tests/tf/layers/test_binarizer.py | 2 +- tests/tf/layers/test_embedding.py | 2 +- tests/tf/layers/test_integerizer.py | 2 +- tests/tf/layers/test_tokenizer.py | 2 +- tests/tf/models/test_models.py | 2 +- training/README.md | 3 +-- training/train_tf_retvec_models.py | 5 ++++- 27 files changed, 30 insertions(+), 28 deletions(-) diff --git a/retvec/__init__.py b/retvec/__init__.py index ae7d866..7c125ca 100644 --- a/retvec/__init__.py +++ b/retvec/__init__.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/__init__.py b/retvec/tf/__init__.py index 473367c..6c01262 100644 --- a/retvec/tf/__init__.py +++ b/retvec/tf/__init__.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/dataset/__init__.py b/retvec/tf/dataset/__init__.py index 662a4a4..83f918e 100644 --- a/retvec/tf/dataset/__init__.py +++ b/retvec/tf/dataset/__init__.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/dataset/io.py b/retvec/tf/dataset/io.py index fc84dfd..c5644c8 100644 --- a/retvec/tf/dataset/io.py +++ b/retvec/tf/dataset/io.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/layers/__init__.py b/retvec/tf/layers/__init__.py index aa74759..f6246e8 100644 --- a/retvec/tf/layers/__init__.py +++ b/retvec/tf/layers/__init__.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/layers/binarizer.py b/retvec/tf/layers/binarizer.py index 11b73c2..ca141a6 100644 --- a/retvec/tf/layers/binarizer.py +++ b/retvec/tf/layers/binarizer.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/layers/embedding.py b/retvec/tf/layers/embedding.py index 4b4e45a..a96f2cc 100644 --- a/retvec/tf/layers/embedding.py +++ b/retvec/tf/layers/embedding.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/layers/integerizer.py b/retvec/tf/layers/integerizer.py index 2808c32..f1d3af8 100644 --- a/retvec/tf/layers/integerizer.py +++ b/retvec/tf/layers/integerizer.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/layers/tokenizer.py b/retvec/tf/layers/tokenizer.py index 0e88572..1a4987f 100644 --- a/retvec/tf/layers/tokenizer.py +++ b/retvec/tf/layers/tokenizer.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/models/__init__.py b/retvec/tf/models/__init__.py index 662a4a4..83f918e 100644 --- a/retvec/tf/models/__init__.py +++ b/retvec/tf/models/__init__.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/models/gau.py b/retvec/tf/models/gau.py index f04bd5c..1a13a86 100644 --- a/retvec/tf/models/gau.py +++ b/retvec/tf/models/gau.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/models/layers.py b/retvec/tf/models/layers.py index 184ffad..aafccb8 100644 --- a/retvec/tf/models/layers.py +++ b/retvec/tf/models/layers.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/models/outputs.py b/retvec/tf/models/outputs.py index b9929b2..09e84ef 100644 --- a/retvec/tf/models/outputs.py +++ b/retvec/tf/models/outputs.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/models/positional_embeddings.py b/retvec/tf/models/positional_embeddings.py index 50b3ce1..58ab0f3 100644 --- a/retvec/tf/models/positional_embeddings.py +++ b/retvec/tf/models/positional_embeddings.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/models/retvec_base.py b/retvec/tf/models/retvec_base.py index b8aa34f..1b8fd50 100644 --- a/retvec/tf/models/retvec_base.py +++ b/retvec/tf/models/retvec_base.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/models/retvec_large.py b/retvec/tf/models/retvec_large.py index 6dfd41a..9960ff6 100644 --- a/retvec/tf/models/retvec_large.py +++ b/retvec/tf/models/retvec_large.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/optimizers/__init__.py b/retvec/tf/optimizers/__init__.py index e78f16e..5a8017f 100644 --- a/retvec/tf/optimizers/__init__.py +++ b/retvec/tf/optimizers/__init__.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/optimizers/warmup_cosine_decay.py b/retvec/tf/optimizers/warmup_cosine_decay.py index 20342e1..b124179 100644 --- a/retvec/tf/optimizers/warmup_cosine_decay.py +++ b/retvec/tf/optimizers/warmup_cosine_decay.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/retvec/tf/utils.py b/retvec/tf/utils.py index 47b5966..49ffeee 100644 --- a/retvec/tf/utils.py +++ b/retvec/tf/utils.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/tests/conftest.py b/tests/conftest.py index 6377c54..dd6fa0c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/tests/tf/layers/test_binarizer.py b/tests/tf/layers/test_binarizer.py index 38f7f4e..66f51b5 100644 --- a/tests/tf/layers/test_binarizer.py +++ b/tests/tf/layers/test_binarizer.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/tests/tf/layers/test_embedding.py b/tests/tf/layers/test_embedding.py index bfd9648..5b8f50e 100644 --- a/tests/tf/layers/test_embedding.py +++ b/tests/tf/layers/test_embedding.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/tests/tf/layers/test_integerizer.py b/tests/tf/layers/test_integerizer.py index 6a824cd..6f5860a 100644 --- a/tests/tf/layers/test_integerizer.py +++ b/tests/tf/layers/test_integerizer.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/tests/tf/layers/test_tokenizer.py b/tests/tf/layers/test_tokenizer.py index 4da35da..9de5c9c 100644 --- a/tests/tf/layers/test_tokenizer.py +++ b/tests/tf/layers/test_tokenizer.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/tests/tf/models/test_models.py b/tests/tf/models/test_models.py index 78fef18..aa88a80 100644 --- a/tests/tf/models/test_models.py +++ b/tests/tf/models/test_models.py @@ -1,5 +1,5 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/training/README.md b/training/README.md index e0f7d54..fbcc16c 100644 --- a/training/README.md +++ b/training/README.md @@ -1,7 +1,6 @@ # RetVec Training -This directory contains the scripts needed to train RetVec models as described -in [Fixme] +This directory contains the scripts needed to train RETVec models. ## Usage diff --git a/training/train_tf_retvec_models.py b/training/train_tf_retvec_models.py index 63b91d8..5ce1ccd 100644 --- a/training/train_tf_retvec_models.py +++ b/training/train_tf_retvec_models.py @@ -1,9 +1,12 @@ """ - Copyright 2021 Google LLC + Copyright 2023 Google LLC + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + https://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.