diff --git a/examples/structured_data/img/tabtransformer/tabtransformer_24_1.png b/examples/structured_data/img/tabtransformer/tabtransformer_24_1.png deleted file mode 100644 index 1541a0a1e8..0000000000 Binary files a/examples/structured_data/img/tabtransformer/tabtransformer_24_1.png and /dev/null differ diff --git a/examples/structured_data/img/tabtransformer/tabtransformer_24_2.png b/examples/structured_data/img/tabtransformer/tabtransformer_24_2.png new file mode 100644 index 0000000000..b4f53f0d34 Binary files /dev/null and b/examples/structured_data/img/tabtransformer/tabtransformer_24_2.png differ diff --git a/examples/structured_data/img/tabtransformer/tabtransformer_29_1.png b/examples/structured_data/img/tabtransformer/tabtransformer_29_1.png index 829f870bf4..493d66c585 100644 Binary files a/examples/structured_data/img/tabtransformer/tabtransformer_29_1.png and b/examples/structured_data/img/tabtransformer/tabtransformer_29_1.png differ diff --git a/examples/structured_data/ipynb/tabtransformer.ipynb b/examples/structured_data/ipynb/tabtransformer.ipynb index 227fb6f01c..3a0778e27d 100644 --- a/examples/structured_data/ipynb/tabtransformer.ipynb +++ b/examples/structured_data/ipynb/tabtransformer.ipynb @@ -29,13 +29,7 @@ "The Transformer layers transform the embeddings of categorical features\n", "into robust contextual embeddings to achieve higher predictive accuracy.\n", "\n", - "This example should be run with TensorFlow 2.7 or higher,\n", - "as well as [TensorFlow Addons](https://www.tensorflow.org/addons/overview),\n", - "which can be installed using the following command:\n", "\n", - "```python\n", - "pip install -U tensorflow-addons\n", - "```\n", "\n", "## Setup" ] @@ -48,14 +42,16 @@ }, "outputs": [], "source": [ + "import keras\n", + "from keras import layers\n", + "from keras import ops\n", + "\n", "import math\n", "import numpy as np\n", "import pandas as pd\n", - "import tensorflow as tf\n", - "from tensorflow import keras\n", - "from tensorflow.keras import layers\n", - "import tensorflow_addons as tfa\n", - "import matplotlib.pyplot as plt" + "from tensorflow import data as tf_data\n", + "import matplotlib.pyplot as plt\n", + "from functools import partial" ] }, { @@ -289,19 +285,43 @@ " return features, target_index, weights\n", "\n", "\n", + "lookup_dict = {}\n", + "for feature_name in CATEGORICAL_FEATURE_NAMES:\n", + " vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]\n", + " # Create a lookup to convert a string values to an integer indices.\n", + " # Since we are not using a mask token, nor expecting any out of vocabulary\n", + " # (oov) token, we set mask_token to None and num_oov_indices to 0.\n", + " lookup = layers.StringLookup(\n", + " vocabulary=vocabulary, mask_token=None, num_oov_indices=0\n", + " )\n", + " lookup_dict[feature_name] = lookup\n", + "\n", + "\n", + "def encode_categorical(batch_x, batch_y, weights):\n", + " for feature_name in CATEGORICAL_FEATURE_NAMES:\n", + " batch_x[feature_name] = lookup_dict[feature_name](batch_x[feature_name])\n", + "\n", + " return batch_x, batch_y, weights\n", + "\n", + "\n", "def get_dataset_from_csv(csv_file_path, batch_size=128, shuffle=False):\n", - " dataset = tf.data.experimental.make_csv_dataset(\n", - " csv_file_path,\n", - " batch_size=batch_size,\n", - " column_names=CSV_HEADER,\n", - " column_defaults=COLUMN_DEFAULTS,\n", - " label_name=TARGET_FEATURE_NAME,\n", - " num_epochs=1,\n", - " header=False,\n", - " na_value=\"?\",\n", - " shuffle=shuffle,\n", - " ).map(prepare_example, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False)\n", - " return dataset.cache()\n" + " dataset = (\n", + " tf_data.experimental.make_csv_dataset(\n", + " csv_file_path,\n", + " batch_size=batch_size,\n", + " column_names=CSV_HEADER,\n", + " column_defaults=COLUMN_DEFAULTS,\n", + " label_name=TARGET_FEATURE_NAME,\n", + " num_epochs=1,\n", + " header=False,\n", + " na_value=\"?\",\n", + " shuffle=shuffle,\n", + " )\n", + " .map(prepare_example, num_parallel_calls=tf_data.AUTOTUNE, deterministic=False)\n", + " .map(encode_categorical)\n", + " )\n", + " return dataset.cache()\n", + "" ] }, { @@ -331,8 +351,7 @@ " weight_decay,\n", " batch_size,\n", "):\n", - "\n", - " optimizer = tfa.optimizers.AdamW(\n", + " optimizer = keras.optimizers.AdamW(\n", " learning_rate=learning_rate, weight_decay=weight_decay\n", " )\n", "\n", @@ -355,7 +374,8 @@ "\n", " print(f\"Validation accuracy: {round(accuracy * 100, 2)}%\")\n", "\n", - " return history\n" + " return history\n", + "" ] }, { @@ -385,13 +405,14 @@ " for feature_name in FEATURE_NAMES:\n", " if feature_name in NUMERIC_FEATURE_NAMES:\n", " inputs[feature_name] = layers.Input(\n", - " name=feature_name, shape=(), dtype=tf.float32\n", + " name=feature_name, shape=(), dtype=\"float32\"\n", " )\n", " else:\n", " inputs[feature_name] = layers.Input(\n", - " name=feature_name, shape=(), dtype=tf.string\n", + " name=feature_name, shape=(), dtype=\"float32\"\n", " )\n", - " return inputs\n" + " return inputs\n", + "" ] }, { @@ -417,28 +438,17 @@ "source": [ "\n", "def encode_inputs(inputs, embedding_dims):\n", - "\n", " encoded_categorical_feature_list = []\n", " numerical_feature_list = []\n", "\n", " for feature_name in inputs:\n", " if feature_name in CATEGORICAL_FEATURE_NAMES:\n", - "\n", - " # Get the vocabulary of the categorical feature.\n", " vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]\n", - "\n", - " # Create a lookup to convert string values to an integer indices.\n", - " # Since we are not using a mask token nor expecting any out of vocabulary\n", - " # (oov) token, we set mask_token to None and num_oov_indices to 0.\n", - " lookup = layers.StringLookup(\n", - " vocabulary=vocabulary,\n", - " mask_token=None,\n", - " num_oov_indices=0,\n", - " output_mode=\"int\",\n", - " )\n", + " # Create a lookup to convert a string values to an integer indices.\n", + " # Since we are not using a mask token, nor expecting any out of vocabulary\n", + " # (oov) token, we set mask_token to None and num_oov_indices to 0.\n", "\n", " # Convert the string input values into integer indices.\n", - " encoded_feature = lookup(inputs[feature_name])\n", "\n", " # Create an embedding layer with the specified dimensions.\n", " embedding = layers.Embedding(\n", @@ -446,16 +456,16 @@ " )\n", "\n", " # Convert the index values to embedding representations.\n", - " encoded_categorical_feature = embedding(encoded_feature)\n", + " encoded_categorical_feature = embedding(inputs[feature_name])\n", " encoded_categorical_feature_list.append(encoded_categorical_feature)\n", "\n", " else:\n", - "\n", " # Use the numerical features as-is.\n", - " numerical_feature = tf.expand_dims(inputs[feature_name], -1)\n", + " numerical_feature = ops.expand_dims(inputs[feature_name], -1)\n", " numerical_feature_list.append(numerical_feature)\n", "\n", - " return encoded_categorical_feature_list, numerical_feature_list\n" + " return encoded_categorical_feature_list, numerical_feature_list\n", + "" ] }, { @@ -477,14 +487,14 @@ "source": [ "\n", "def create_mlp(hidden_units, dropout_rate, activation, normalization_layer, name=None):\n", - "\n", " mlp_layers = []\n", " for units in hidden_units:\n", - " mlp_layers.append(normalization_layer),\n", + " mlp_layers.append(normalization_layer()),\n", " mlp_layers.append(layers.Dense(units, activation=activation))\n", " mlp_layers.append(layers.Dropout(dropout_rate))\n", "\n", - " return keras.Sequential(mlp_layers, name=name)\n" + " return keras.Sequential(mlp_layers, name=name)\n", + "" ] }, { @@ -510,7 +520,6 @@ "def create_baseline_model(\n", " embedding_dims, num_mlp_blocks, mlp_hidden_units_factors, dropout_rate\n", "):\n", - "\n", " # Create model inputs.\n", " inputs = create_model_inputs()\n", " # encode features.\n", @@ -530,7 +539,7 @@ " hidden_units=feedforward_units,\n", " dropout_rate=dropout_rate,\n", " activation=keras.activations.gelu,\n", - " normalization_layer=layers.LayerNormalization(epsilon=1e-6),\n", + " normalization_layer=layers.LayerNormalization,\n", " name=f\"feedforward_{layer_idx}\",\n", " )(features)\n", "\n", @@ -543,7 +552,7 @@ " hidden_units=mlp_hidden_units,\n", " dropout_rate=dropout_rate,\n", " activation=keras.activations.selu,\n", - " normalization_layer=layers.BatchNormalization(),\n", + " normalization_layer=layers.BatchNormalization,\n", " name=\"MLP\",\n", " )(features)\n", "\n", @@ -644,7 +653,6 @@ " dropout_rate,\n", " use_column_embedding=False,\n", "):\n", - "\n", " # Create model inputs.\n", " inputs = create_model_inputs()\n", " # encode features.\n", @@ -652,7 +660,7 @@ " inputs, embedding_dims\n", " )\n", " # Stack categorical feature embeddings for the Tansformer.\n", - " encoded_categorical_features = tf.stack(encoded_categorical_feature_list, axis=1)\n", + " encoded_categorical_features = ops.stack(encoded_categorical_feature_list, axis=1)\n", " # Concatenate numerical features.\n", " numerical_features = layers.concatenate(numerical_feature_list)\n", "\n", @@ -662,7 +670,7 @@ " column_embedding = layers.Embedding(\n", " input_dim=num_columns, output_dim=embedding_dims\n", " )\n", - " column_indices = tf.range(start=0, limit=num_columns, delta=1)\n", + " column_indices = ops.arange(start=0, stop=num_columns, step=1)\n", " encoded_categorical_features = encoded_categorical_features + column_embedding(\n", " column_indices\n", " )\n", @@ -687,7 +695,9 @@ " hidden_units=[embedding_dims],\n", " dropout_rate=dropout_rate,\n", " activation=keras.activations.gelu,\n", - " normalization_layer=layers.LayerNormalization(epsilon=1e-6),\n", + " normalization_layer=partial(\n", + " layers.LayerNormalization, epsilon=1e-6\n", + " ), # using partial to provide keyword arguments before initialization\n", " name=f\"feedforward_{block_idx}\",\n", " )(x)\n", " # Skip connection 2.\n", @@ -713,7 +723,7 @@ " hidden_units=mlp_hidden_units,\n", " dropout_rate=dropout_rate,\n", " activation=keras.activations.selu,\n", - " normalization_layer=layers.BatchNormalization(),\n", + " normalization_layer=layers.BatchNormalization,\n", " name=\"MLP\",\n", " )(features)\n", "\n", @@ -794,7 +804,7 @@ "\n", "| Trained Model | Demo |\n", "| :--: | :--: |\n", - "| [![Generic badge](https://img.shields.io/badge/🤗%20Model-TabTransformer-black.svg)](https://huggingface.co/keras-io/tab_transformer) | [![Generic badge](https://img.shields.io/badge/🤗%20Spaces-TabTransformer-black.svg)](https://huggingface.co/spaces/keras-io/TabTransformer_Classification) |" + "| [![Generic badge](https://img.shields.io/badge/\ud83e\udd17%20Model-TabTransformer-black.svg)](https://huggingface.co/keras-io/tab_transformer) | [![Generic badge](https://img.shields.io/badge/\ud83e\udd17%20Spaces-TabTransformer-black.svg)](https://huggingface.co/spaces/keras-io/TabTransformer_Classification) |" ] } ], diff --git a/examples/structured_data/md/tabtransformer.md b/examples/structured_data/md/tabtransformer.md index 22623f898f..677e1c5fc6 100644 --- a/examples/structured_data/md/tabtransformer.md +++ b/examples/structured_data/md/tabtransformer.md @@ -20,27 +20,23 @@ The TabTransformer is built upon self-attention based Transformers. The Transformer layers transform the embeddings of categorical features into robust contextual embeddings to achieve higher predictive accuracy. -This example should be run with TensorFlow 2.7 or higher, -as well as [TensorFlow Addons](https://www.tensorflow.org/addons/overview), -which can be installed using the following command: -```python -pip install -U tensorflow-addons -``` --- ## Setup ```python +import keras +from keras import layers +from keras import ops + import math import numpy as np import pandas as pd -import tensorflow as tf -from tensorflow import keras -from tensorflow.keras import layers -import tensorflow_addons as tfa +from tensorflow import data as tf_data import matplotlib.pyplot as plt +from functools import partial ``` --- @@ -207,23 +203,45 @@ def prepare_example(features, target): return features, target_index, weights +lookup_dict = {} +for feature_name in CATEGORICAL_FEATURE_NAMES: + vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name] + # Create a lookup to convert a string values to an integer indices. + # Since we are not using a mask token, nor expecting any out of vocabulary + # (oov) token, we set mask_token to None and num_oov_indices to 0. + lookup = layers.StringLookup( + vocabulary=vocabulary, mask_token=None, num_oov_indices=0 + ) + lookup_dict[feature_name] = lookup + + +def encode_categorical(batch_x, batch_y, weights): + for feature_name in CATEGORICAL_FEATURE_NAMES: + batch_x[feature_name] = lookup_dict[feature_name](batch_x[feature_name]) + + return batch_x, batch_y, weights + + def get_dataset_from_csv(csv_file_path, batch_size=128, shuffle=False): - dataset = tf.data.experimental.make_csv_dataset( - csv_file_path, - batch_size=batch_size, - column_names=CSV_HEADER, - column_defaults=COLUMN_DEFAULTS, - label_name=TARGET_FEATURE_NAME, - num_epochs=1, - header=False, - na_value="?", - shuffle=shuffle, - ).map(prepare_example, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False) + dataset = ( + tf_data.experimental.make_csv_dataset( + csv_file_path, + batch_size=batch_size, + column_names=CSV_HEADER, + column_defaults=COLUMN_DEFAULTS, + label_name=TARGET_FEATURE_NAME, + num_epochs=1, + header=False, + na_value="?", + shuffle=shuffle, + ) + .map(prepare_example, num_parallel_calls=tf_data.AUTOTUNE, deterministic=False) + .map(encode_categorical) + ) return dataset.cache() ``` - --- ## Implement a training and evaluation procedure @@ -239,8 +257,7 @@ def run_experiment( weight_decay, batch_size, ): - - optimizer = tfa.optimizers.AdamW( + optimizer = keras.optimizers.AdamW( learning_rate=learning_rate, weight_decay=weight_decay ) @@ -282,11 +299,11 @@ def create_model_inputs(): for feature_name in FEATURE_NAMES: if feature_name in NUMERIC_FEATURE_NAMES: inputs[feature_name] = layers.Input( - name=feature_name, shape=(), dtype=tf.float32 + name=feature_name, shape=(), dtype="float32" ) else: inputs[feature_name] = layers.Input( - name=feature_name, shape=(), dtype=tf.string + name=feature_name, shape=(), dtype="float32" ) return inputs @@ -303,28 +320,17 @@ regardless their vocabulary sizes. This is required for the Transformer model. ```python def encode_inputs(inputs, embedding_dims): - encoded_categorical_feature_list = [] numerical_feature_list = [] for feature_name in inputs: if feature_name in CATEGORICAL_FEATURE_NAMES: - - # Get the vocabulary of the categorical feature. vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name] - - # Create a lookup to convert string values to an integer indices. - # Since we are not using a mask token nor expecting any out of vocabulary - # (oov) token, we set mask_token to None and num_oov_indices to 0. - lookup = layers.StringLookup( - vocabulary=vocabulary, - mask_token=None, - num_oov_indices=0, - output_mode="int", - ) + # Create a lookup to convert a string values to an integer indices. + # Since we are not using a mask token, nor expecting any out of vocabulary + # (oov) token, we set mask_token to None and num_oov_indices to 0. # Convert the string input values into integer indices. - encoded_feature = lookup(inputs[feature_name]) # Create an embedding layer with the specified dimensions. embedding = layers.Embedding( @@ -332,13 +338,12 @@ def encode_inputs(inputs, embedding_dims): ) # Convert the index values to embedding representations. - encoded_categorical_feature = embedding(encoded_feature) + encoded_categorical_feature = embedding(inputs[feature_name]) encoded_categorical_feature_list.append(encoded_categorical_feature) else: - # Use the numerical features as-is. - numerical_feature = tf.expand_dims(inputs[feature_name], -1) + numerical_feature = ops.expand_dims(inputs[feature_name], -1) numerical_feature_list.append(numerical_feature) return encoded_categorical_feature_list, numerical_feature_list @@ -352,10 +357,9 @@ def encode_inputs(inputs, embedding_dims): ```python def create_mlp(hidden_units, dropout_rate, activation, normalization_layer, name=None): - mlp_layers = [] for units in hidden_units: - mlp_layers.append(normalization_layer), + mlp_layers.append(normalization_layer()), mlp_layers.append(layers.Dense(units, activation=activation)) mlp_layers.append(layers.Dropout(dropout_rate)) @@ -374,7 +378,6 @@ In the first experiment, we create a simple multi-layer feed-forward network. def create_baseline_model( embedding_dims, num_mlp_blocks, mlp_hidden_units_factors, dropout_rate ): - # Create model inputs. inputs = create_model_inputs() # encode features. @@ -394,7 +397,7 @@ def create_baseline_model( hidden_units=feedforward_units, dropout_rate=dropout_rate, activation=keras.activations.gelu, - normalization_layer=layers.LayerNormalization(epsilon=1e-6), + normalization_layer=layers.LayerNormalization, name=f"feedforward_{layer_idx}", )(features) @@ -407,7 +410,7 @@ def create_baseline_model( hidden_units=mlp_hidden_units, dropout_rate=dropout_rate, activation=keras.activations.selu, - normalization_layer=layers.BatchNormalization(), + normalization_layer=layers.BatchNormalization, name="MLP", )(features) @@ -430,12 +433,14 @@ keras.utils.plot_model(baseline_model, show_shapes=True, rankdir="LR")
``` -Total model weights: 109629 +An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu. + +Total model weights: 110693 ```
-![png](/img/examples/structured_data/tabtransformer/tabtransformer_24_1.png) +![png](/img/examples/structured_data/tabtransformer/tabtransformer_24_2.png) @@ -459,37 +464,37 @@ history = run_experiment( ``` Start training the model... Epoch 1/15 -123/123 [==============================] - 6s 25ms/step - loss: 110178.8203 - accuracy: 0.7478 - val_loss: 92703.0859 - val_accuracy: 0.7825 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 13s 70ms/step - accuracy: 0.6912 - loss: 127137.3984 - val_accuracy: 0.7623 - val_loss: 96156.1875 Epoch 2/15 -123/123 [==============================] - 2s 14ms/step - loss: 90979.8125 - accuracy: 0.7675 - val_loss: 71798.9219 - val_accuracy: 0.8001 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 2s 13ms/step - accuracy: 0.7626 - loss: 102946.6797 - val_accuracy: 0.7699 - val_loss: 77236.8828 Epoch 3/15 -123/123 [==============================] - 2s 14ms/step - loss: 77226.5547 - accuracy: 0.7902 - val_loss: 68581.0312 - val_accuracy: 0.8168 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 2s 13ms/step - accuracy: 0.7738 - loss: 82999.3281 - val_accuracy: 0.8154 - val_loss: 70085.9609 Epoch 4/15 -123/123 [==============================] - 2s 14ms/step - loss: 72652.2422 - accuracy: 0.8004 - val_loss: 70084.0469 - val_accuracy: 0.7974 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 2s 13ms/step - accuracy: 0.7981 - loss: 75569.4375 - val_accuracy: 0.8111 - val_loss: 69759.5547 Epoch 5/15 -123/123 [==============================] - 2s 14ms/step - loss: 71207.9375 - accuracy: 0.8033 - val_loss: 66552.1719 - val_accuracy: 0.8130 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 2s 13ms/step - accuracy: 0.8006 - loss: 74234.1641 - val_accuracy: 0.7968 - val_loss: 71532.2422 Epoch 6/15 -123/123 [==============================] - 2s 14ms/step - loss: 69321.4375 - accuracy: 0.8091 - val_loss: 65837.0469 - val_accuracy: 0.8149 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 2s 13ms/step - accuracy: 0.8074 - loss: 71770.2891 - val_accuracy: 0.8082 - val_loss: 69105.5078 Epoch 7/15 -123/123 [==============================] - 2s 14ms/step - loss: 68839.3359 - accuracy: 0.8099 - val_loss: 65613.0156 - val_accuracy: 0.8187 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 2s 13ms/step - accuracy: 0.8118 - loss: 70526.6797 - val_accuracy: 0.8094 - val_loss: 68746.7891 Epoch 8/15 -123/123 [==============================] - 2s 14ms/step - loss: 68126.7344 - accuracy: 0.8124 - val_loss: 66155.8594 - val_accuracy: 0.8108 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 2s 13ms/step - accuracy: 0.8110 - loss: 70309.3750 - val_accuracy: 0.8132 - val_loss: 68305.1328 Epoch 9/15 -123/123 [==============================] - 2s 14ms/step - loss: 67768.9844 - accuracy: 0.8147 - val_loss: 66705.8047 - val_accuracy: 0.8230 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 2s 13ms/step - accuracy: 0.8143 - loss: 69896.9141 - val_accuracy: 0.8046 - val_loss: 70013.1016 Epoch 10/15 -123/123 [==============================] - 2s 14ms/step - loss: 67482.5859 - accuracy: 0.8151 - val_loss: 65668.3672 - val_accuracy: 0.8143 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 2s 13ms/step - accuracy: 0.8124 - loss: 69885.8281 - val_accuracy: 0.8037 - val_loss: 70305.7969 Epoch 11/15 -123/123 [==============================] - 2s 14ms/step - loss: 66792.6875 - accuracy: 0.8181 - val_loss: 66536.3828 - val_accuracy: 0.8233 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 2s 13ms/step - accuracy: 0.8131 - loss: 69193.8203 - val_accuracy: 0.8075 - val_loss: 69615.5547 Epoch 12/15 -123/123 [==============================] - 2s 14ms/step - loss: 65610.4531 - accuracy: 0.8229 - val_loss: 70377.7266 - val_accuracy: 0.8256 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 2s 13ms/step - accuracy: 0.8148 - loss: 68933.5703 - val_accuracy: 0.7997 - val_loss: 70789.2422 Epoch 13/15 -123/123 [==============================] - 2s 14ms/step - loss: 63930.2500 - accuracy: 0.8282 - val_loss: 68294.8516 - val_accuracy: 0.8289 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 2s 13ms/step - accuracy: 0.8146 - loss: 68929.5078 - val_accuracy: 0.8104 - val_loss: 68525.1016 Epoch 14/15 -123/123 [==============================] - 2s 14ms/step - loss: 63420.1562 - accuracy: 0.8323 - val_loss: 63050.5859 - val_accuracy: 0.8324 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 3s 26ms/step - accuracy: 0.8174 - loss: 68447.2500 - val_accuracy: 0.8119 - val_loss: 68787.0078 Epoch 15/15 -123/123 [==============================] - 2s 14ms/step - loss: 62619.4531 - accuracy: 0.8345 - val_loss: 66933.7500 - val_accuracy: 0.8277 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 2s 13ms/step - accuracy: 0.8184 - loss: 68346.5391 - val_accuracy: 0.8143 - val_loss: 68101.9531 Model training finished -Validation accuracy: 82.77% +Validation accuracy: 81.43% ``` @@ -526,7 +531,6 @@ def create_tabtransformer_classifier( dropout_rate, use_column_embedding=False, ): - # Create model inputs. inputs = create_model_inputs() # encode features. @@ -534,7 +538,7 @@ def create_tabtransformer_classifier( inputs, embedding_dims ) # Stack categorical feature embeddings for the Tansformer. - encoded_categorical_features = tf.stack(encoded_categorical_feature_list, axis=1) + encoded_categorical_features = ops.stack(encoded_categorical_feature_list, axis=1) # Concatenate numerical features. numerical_features = layers.concatenate(numerical_feature_list) @@ -544,7 +548,7 @@ def create_tabtransformer_classifier( column_embedding = layers.Embedding( input_dim=num_columns, output_dim=embedding_dims ) - column_indices = tf.range(start=0, limit=num_columns, delta=1) + column_indices = ops.arange(start=0, stop=num_columns, step=1) encoded_categorical_features = encoded_categorical_features + column_embedding( column_indices ) @@ -569,7 +573,9 @@ def create_tabtransformer_classifier( hidden_units=[embedding_dims], dropout_rate=dropout_rate, activation=keras.activations.gelu, - normalization_layer=layers.LayerNormalization(epsilon=1e-6), + normalization_layer=partial( + layers.LayerNormalization, epsilon=1e-6 + ), # using partial to provide keyword arguments before initialization name=f"feedforward_{block_idx}", )(x) # Skip connection 2. @@ -595,7 +601,7 @@ def create_tabtransformer_classifier( hidden_units=mlp_hidden_units, dropout_rate=dropout_rate, activation=keras.activations.selu, - normalization_layer=layers.BatchNormalization(), + normalization_layer=layers.BatchNormalization, name="MLP", )(features) @@ -619,7 +625,7 @@ keras.utils.plot_model(tabtransformer_model, show_shapes=True, rankdir="LR")
``` -Total model weights: 87479 +Total model weights: 88543 ```
@@ -648,37 +654,37 @@ history = run_experiment( ``` Start training the model... Epoch 1/15 -123/123 [==============================] - 13s 61ms/step - loss: 82503.1641 - accuracy: 0.7944 - val_loss: 64260.2305 - val_accuracy: 0.8421 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 46s 272ms/step - accuracy: 0.7504 - loss: 103329.7578 - val_accuracy: 0.7637 - val_loss: 122401.2188 Epoch 2/15 -123/123 [==============================] - 6s 51ms/step - loss: 68677.9375 - accuracy: 0.8251 - val_loss: 63819.8633 - val_accuracy: 0.8389 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 8s 62ms/step - accuracy: 0.8033 - loss: 79797.0469 - val_accuracy: 0.7712 - val_loss: 97510.0000 Epoch 3/15 -123/123 [==============================] - 6s 51ms/step - loss: 66703.8984 - accuracy: 0.8301 - val_loss: 63052.8789 - val_accuracy: 0.8428 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 6s 52ms/step - accuracy: 0.8202 - loss: 73736.2500 - val_accuracy: 0.8037 - val_loss: 79687.8906 Epoch 4/15 -123/123 [==============================] - 6s 51ms/step - loss: 65287.8672 - accuracy: 0.8342 - val_loss: 61593.1484 - val_accuracy: 0.8451 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 6s 52ms/step - accuracy: 0.8247 - loss: 70282.2031 - val_accuracy: 0.8355 - val_loss: 64703.9453 Epoch 5/15 -123/123 [==============================] - 6s 52ms/step - loss: 63968.8594 - accuracy: 0.8379 - val_loss: 61385.4531 - val_accuracy: 0.8442 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 6s 52ms/step - accuracy: 0.8317 - loss: 67661.8906 - val_accuracy: 0.8427 - val_loss: 64015.5156 Epoch 6/15 -123/123 [==============================] - 6s 51ms/step - loss: 63645.7812 - accuracy: 0.8394 - val_loss: 61332.3281 - val_accuracy: 0.8447 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 6s 52ms/step - accuracy: 0.8333 - loss: 67486.6562 - val_accuracy: 0.8402 - val_loss: 65543.7188 Epoch 7/15 -123/123 [==============================] - 6s 51ms/step - loss: 62778.6055 - accuracy: 0.8412 - val_loss: 61342.5352 - val_accuracy: 0.8461 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 6s 52ms/step - accuracy: 0.8359 - loss: 66328.3516 - val_accuracy: 0.8360 - val_loss: 68744.6484 Epoch 8/15 -123/123 [==============================] - 6s 51ms/step - loss: 62815.6992 - accuracy: 0.8398 - val_loss: 61220.8242 - val_accuracy: 0.8460 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 6s 52ms/step - accuracy: 0.8354 - loss: 66040.3906 - val_accuracy: 0.8209 - val_loss: 72937.5703 Epoch 9/15 -123/123 [==============================] - 6s 52ms/step - loss: 62191.1016 - accuracy: 0.8416 - val_loss: 61055.9102 - val_accuracy: 0.8452 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 6s 52ms/step - accuracy: 0.8376 - loss: 65606.2344 - val_accuracy: 0.8298 - val_loss: 72673.2031 Epoch 10/15 -123/123 [==============================] - 6s 51ms/step - loss: 61992.1602 - accuracy: 0.8439 - val_loss: 61251.8047 - val_accuracy: 0.8441 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 6s 52ms/step - accuracy: 0.8395 - loss: 65170.4375 - val_accuracy: 0.8259 - val_loss: 70717.4922 Epoch 11/15 -123/123 [==============================] - 6s 50ms/step - loss: 61745.1289 - accuracy: 0.8429 - val_loss: 61364.7695 - val_accuracy: 0.8445 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 8s 62ms/step - accuracy: 0.8395 - loss: 65003.5820 - val_accuracy: 0.8481 - val_loss: 62421.4102 Epoch 12/15 -123/123 [==============================] - 6s 51ms/step - loss: 61696.3477 - accuracy: 0.8445 - val_loss: 61074.3594 - val_accuracy: 0.8450 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 12s 94ms/step - accuracy: 0.8396 - loss: 64860.1797 - val_accuracy: 0.8482 - val_loss: 63217.3516 Epoch 13/15 -123/123 [==============================] - 6s 51ms/step - loss: 61569.1719 - accuracy: 0.8436 - val_loss: 61844.9688 - val_accuracy: 0.8456 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 6s 52ms/step - accuracy: 0.8412 - loss: 64597.3945 - val_accuracy: 0.8256 - val_loss: 71274.4609 Epoch 14/15 -123/123 [==============================] - 6s 51ms/step - loss: 61343.0898 - accuracy: 0.8445 - val_loss: 61702.8828 - val_accuracy: 0.8455 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 11s 94ms/step - accuracy: 0.8419 - loss: 63789.4688 - val_accuracy: 0.8473 - val_loss: 63099.7422 Epoch 15/15 -123/123 [==============================] - 6s 51ms/step - loss: 61355.0547 - accuracy: 0.8454 - val_loss: 61272.2852 - val_accuracy: 0.8455 + 123/123 ━━━━━━━━━━━━━━━━━━━━ 11s 94ms/step - accuracy: 0.8427 - loss: 63856.9531 - val_accuracy: 0.8459 - val_loss: 64541.9688 Model training finished -Validation accuracy: 84.55% +Validation accuracy: 84.59% ``` diff --git a/examples/structured_data/tabtransformer.py b/examples/structured_data/tabtransformer.py index c70b86e9c7..62d9f2559a 100644 --- a/examples/structured_data/tabtransformer.py +++ b/examples/structured_data/tabtransformer.py @@ -17,25 +17,20 @@ The Transformer layers transform the embeddings of categorical features into robust contextual embeddings to achieve higher predictive accuracy. -This example should be run with TensorFlow 2.7 or higher, -as well as [TensorFlow Addons](https://www.tensorflow.org/addons/overview), -which can be installed using the following command: -```python -pip install -U tensorflow-addons -``` ## Setup """ +import keras +from keras import layers +from keras import ops import math import numpy as np import pandas as pd -import tensorflow as tf -from tensorflow import keras -from tensorflow.keras import layers -import tensorflow_addons as tfa +from tensorflow import data as tf_data import matplotlib.pyplot as plt +from functools import partial """ ## Prepare the data @@ -185,18 +180,41 @@ def prepare_example(features, target): return features, target_index, weights +lookup_dict = {} +for feature_name in CATEGORICAL_FEATURE_NAMES: + vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name] + # Create a lookup to convert a string values to an integer indices. + # Since we are not using a mask token, nor expecting any out of vocabulary + # (oov) token, we set mask_token to None and num_oov_indices to 0. + lookup = layers.StringLookup( + vocabulary=vocabulary, mask_token=None, num_oov_indices=0 + ) + lookup_dict[feature_name] = lookup + + +def encode_categorical(batch_x, batch_y, weights): + for feature_name in CATEGORICAL_FEATURE_NAMES: + batch_x[feature_name] = lookup_dict[feature_name](batch_x[feature_name]) + + return batch_x, batch_y, weights + + def get_dataset_from_csv(csv_file_path, batch_size=128, shuffle=False): - dataset = tf.data.experimental.make_csv_dataset( - csv_file_path, - batch_size=batch_size, - column_names=CSV_HEADER, - column_defaults=COLUMN_DEFAULTS, - label_name=TARGET_FEATURE_NAME, - num_epochs=1, - header=False, - na_value="?", - shuffle=shuffle, - ).map(prepare_example, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False) + dataset = ( + tf_data.experimental.make_csv_dataset( + csv_file_path, + batch_size=batch_size, + column_names=CSV_HEADER, + column_defaults=COLUMN_DEFAULTS, + label_name=TARGET_FEATURE_NAME, + num_epochs=1, + header=False, + na_value="?", + shuffle=shuffle, + ) + .map(prepare_example, num_parallel_calls=tf_data.AUTOTUNE, deterministic=False) + .map(encode_categorical) + ) return dataset.cache() @@ -214,7 +232,7 @@ def run_experiment( weight_decay, batch_size, ): - optimizer = tfa.optimizers.AdamW( + optimizer = keras.optimizers.AdamW( learning_rate=learning_rate, weight_decay=weight_decay ) @@ -254,11 +272,11 @@ def create_model_inputs(): for feature_name in FEATURE_NAMES: if feature_name in NUMERIC_FEATURE_NAMES: inputs[feature_name] = layers.Input( - name=feature_name, shape=(), dtype=tf.float32 + name=feature_name, shape=(), dtype="float32" ) else: inputs[feature_name] = layers.Input( - name=feature_name, shape=(), dtype=tf.string + name=feature_name, shape=(), dtype="float32" ) return inputs @@ -278,21 +296,12 @@ def encode_inputs(inputs, embedding_dims): for feature_name in inputs: if feature_name in CATEGORICAL_FEATURE_NAMES: - # Get the vocabulary of the categorical feature. vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name] - - # Create a lookup to convert string values to an integer indices. - # Since we are not using a mask token nor expecting any out of vocabulary - # (oov) token, we set mask_token to None and num_oov_indices to 0. - lookup = layers.StringLookup( - vocabulary=vocabulary, - mask_token=None, - num_oov_indices=0, - output_mode="int", - ) + # Create a lookup to convert a string values to an integer indices. + # Since we are not using a mask token, nor expecting any out of vocabulary + # (oov) token, we set mask_token to None and num_oov_indices to 0. # Convert the string input values into integer indices. - encoded_feature = lookup(inputs[feature_name]) # Create an embedding layer with the specified dimensions. embedding = layers.Embedding( @@ -300,12 +309,12 @@ def encode_inputs(inputs, embedding_dims): ) # Convert the index values to embedding representations. - encoded_categorical_feature = embedding(encoded_feature) + encoded_categorical_feature = embedding(inputs[feature_name]) encoded_categorical_feature_list.append(encoded_categorical_feature) else: # Use the numerical features as-is. - numerical_feature = tf.expand_dims(inputs[feature_name], -1) + numerical_feature = ops.expand_dims(inputs[feature_name], -1) numerical_feature_list.append(numerical_feature) return encoded_categorical_feature_list, numerical_feature_list @@ -319,7 +328,7 @@ def encode_inputs(inputs, embedding_dims): def create_mlp(hidden_units, dropout_rate, activation, normalization_layer, name=None): mlp_layers = [] for units in hidden_units: - mlp_layers.append(normalization_layer), + mlp_layers.append(normalization_layer()), mlp_layers.append(layers.Dense(units, activation=activation)) mlp_layers.append(layers.Dropout(dropout_rate)) @@ -355,7 +364,7 @@ def create_baseline_model( hidden_units=feedforward_units, dropout_rate=dropout_rate, activation=keras.activations.gelu, - normalization_layer=layers.LayerNormalization(epsilon=1e-6), + normalization_layer=layers.LayerNormalization, name=f"feedforward_{layer_idx}", )(features) @@ -368,7 +377,7 @@ def create_baseline_model( hidden_units=mlp_hidden_units, dropout_rate=dropout_rate, activation=keras.activations.selu, - normalization_layer=layers.BatchNormalization(), + normalization_layer=layers.BatchNormalization, name="MLP", )(features) @@ -443,7 +452,7 @@ def create_tabtransformer_classifier( inputs, embedding_dims ) # Stack categorical feature embeddings for the Tansformer. - encoded_categorical_features = tf.stack(encoded_categorical_feature_list, axis=1) + encoded_categorical_features = ops.stack(encoded_categorical_feature_list, axis=1) # Concatenate numerical features. numerical_features = layers.concatenate(numerical_feature_list) @@ -453,7 +462,7 @@ def create_tabtransformer_classifier( column_embedding = layers.Embedding( input_dim=num_columns, output_dim=embedding_dims ) - column_indices = tf.range(start=0, limit=num_columns, delta=1) + column_indices = ops.arange(start=0, stop=num_columns, step=1) encoded_categorical_features = encoded_categorical_features + column_embedding( column_indices ) @@ -478,7 +487,9 @@ def create_tabtransformer_classifier( hidden_units=[embedding_dims], dropout_rate=dropout_rate, activation=keras.activations.gelu, - normalization_layer=layers.LayerNormalization(epsilon=1e-6), + normalization_layer=partial( + layers.LayerNormalization, epsilon=1e-6 + ), # using partial to provide keyword arguments before initialization name=f"feedforward_{block_idx}", )(x) # Skip connection 2. @@ -504,7 +515,7 @@ def create_tabtransformer_classifier( hidden_units=mlp_hidden_units, dropout_rate=dropout_rate, activation=keras.activations.selu, - normalization_layer=layers.BatchNormalization(), + normalization_layer=layers.BatchNormalization, name="MLP", )(features)