diff --git a/data/mnist/.gitignore b/data/mnist/.gitignore
new file mode 100644
index 000000000..6ffe2a1c6
--- /dev/null
+++ b/data/mnist/.gitignore
@@ -0,0 +1,2 @@
+mnist_images/
+grayscale_images/
diff --git a/data/mnist/README.md b/data/mnist/README.md
new file mode 100644
index 000000000..f3f284c46
--- /dev/null
+++ b/data/mnist/README.md
@@ -0,0 +1,69 @@
+## MNIST Dataset and Preprocessing Scripts
+
+This directory contains scripts compatible with the MNIST dataset, which is a
+classic dataset of handwritten digits commonly used for training various image
+processing systems and machine learning models, including Language Learning
+Models (LLMs).
+
+### Script `get_dataset.py`
+
+The `get_dataset.py` script is used to download the MNIST dataset and save the
+images to a specified directory.
+
+**Usage:**
+```bash
+python3 get_dataset.py
+```
+
+By default, the script will download the MNIST dataset and save the images in
+the `mnist_images` directory.
+
+### Script `gray.py`
+
+The `gray.py` script is used to convert images from the MNIST dataset into ASCII
+art, which can be used for training LLMs or for visualization purposes.
+
+**Usage:**
+
+```bash
+python3 gray.py --image-dir mnist_images --output-dimensions 8x8 --levels 2 --chars 01 --append-to-file
+```
+
+Options:
+- `--image-dir` (required): Directory containing images to convert.
+- `--output-dir` (default: `grayscale_images`): Directory to save ASCII art.
+- `--output-dimensions` (default: `16x16`): Output dimensions for ASCII art, e.g., 8x8, 16x16.
+- `--levels` (default: 2): Number of grayscale levels, currently 2 - 9 supported.
+- `--chars` (optional): Custom characters for ASCII art, ordered from darkest to lightest.
+- `--append-to-file` (optional): Append ASCII art to a single file instead of creating separate files.
+- `--output-file` (default: `input.txt`): File to append ASCII art to if `--append-to-file` is used.
+- `--number-placement` (default: `before`): Place the type of number before or after the ASCII image in the output file. Choices are `before` or `after`.
+
+The script will process each image in the `--image-dir` directory and save the
+resulting ASCII art to the `--output-dir` directory.
+
+## License Information for MNIST Dataset
+
+The MNIST dataset is made available under the terms of the [Creative Commons
+Attribution-Share Alike 3.0
+license](https://creativecommons.org/licenses/by-sa/3.0/). When using the MNIST
+dataset, you should attribute the source as provided by the original authors.
+
+Please note that while the MNIST dataset itself is licensed under the Creative
+Commons license, the `get_dataset.py` and `gray.py` scripts provided in this
+directory are subject to the license terms of the repository they reside in.
+
+## Citation Information for MNIST Dataset
+
+If you use the MNIST dataset in your research, please cite:
+
+```bibtex
+@article{lecun2010mnist,
+  title={MNIST handwritten digit database},
+  author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
+  journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist},
+  volume={2},
+  year={2010}
+}
+```
+
diff --git a/data/mnist/get_dataset.py b/data/mnist/get_dataset.py
new file mode 100644
index 000000000..1f7a594fb
--- /dev/null
+++ b/data/mnist/get_dataset.py
@@ -0,0 +1,26 @@
+import os
+from tqdm import tqdm
+from datasets import load_dataset
+
+def save_mnist_images(dataset, output_dir):
+    """Saves images from the MNIST dataset to a specified directory."""
+    os.makedirs(output_dir, exist_ok=True)
+    for idx, item in tqdm(enumerate(dataset), total=len(dataset), desc="Saving images"):
+        image = item['image']  # The image is already a PIL.Image object.
+        label = item['label']
+        image.save(os.path.join(output_dir, f'{idx}_{label}.png'))
+
+def main():
+    # Load the MNIST dataset from Hugging Face
+    dataset = load_dataset("mnist", split='train')
+
+    # Specify the output directory for images
+    output_dir = 'mnist_images'
+
+    # Save images
+    save_mnist_images(dataset, output_dir)
+    print(f"Images saved in {output_dir}")
+
+if __name__ == "__main__":
+    main()
+
diff --git a/data/mnist/gray.py b/data/mnist/gray.py
new file mode 100644
index 000000000..e20607cb3
--- /dev/null
+++ b/data/mnist/gray.py
@@ -0,0 +1,102 @@
+from PIL import Image
+import argparse
+import os
+import numpy as np
+import sys
+
+def convert_image_to_ascii(image_path, output_size, grayscale, levels, chars=None):
+    # Load the image from the specified path and resize it
+    img = Image.open(image_path).resize(output_size, Image.LANCZOS)
+
+    if grayscale:
+        img = img.convert("L")  # Convert to grayscale
+        if levels < 256:
+            img = img.point(lambda p: (p * levels) // 256 * (256 // levels))
+
+    # Define default characters for different levels
+    default_chars = "@%#*+=-:. "
+    if chars is None:
+        if levels == 2:
+            chars = "@ "
+        elif levels == 3:
+            chars = "@. "
+        elif levels == 4:
+            chars = "@*- "
+        elif levels == 5:
+            chars = "@#+- "
+        elif levels == 6:
+            chars = "@#+-. "
+        elif levels == 7:
+            chars = "@#+-:. "
+        elif levels == 8:
+            chars = "@#*+-:. "
+        elif levels == 9:
+            chars = "@%#*+-:. "
+        else:
+            sys.exit(f"number of levels {levels} not supported")
+
+    # Normalize the characters set based on the number of levels
+    char_array = np.array([c for c in chars])
+    n_chars = len(char_array)
+    scale_factor = 256 // levels
+
+    # Convert the image to a numpy array
+    img_np = np.array(img)
+
+    # Convert each pixel to the corresponding ASCII character
+    ascii_img = char_array[img_np // scale_factor]
+
+    # Join characters to form lines and then join lines to form the full ASCII image
+    ascii_img_lines = ["".join(row) for row in ascii_img]
+    ascii_result = "\n".join(ascii_img_lines)
+
+    return ascii_result
+
+# Usage example
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Convert images to ASCII art.')
+    parser.add_argument('--output-dimensions', type=str, default='16x16', help='Output dimensions for ASCII art, e.g., 8x8, 16x16')
+    parser.add_argument('--levels', type=int, default=2, help='Number of grayscale levels, currently 2 - 9 supported')
+    parser.add_argument('--image-dir', type=str, required=True, help='Directory containing images to convert.')
+    parser.add_argument('--output-dir', type=str, default='grayscale_images', help='Directory to save ASCII art.')
+    parser.add_argument('--append-to-file', action='store_true', help='Append ASCII art to a single file.')
+    parser.add_argument('--output-file', type=str, default='input.txt', help='File to append ASCII art to.')
+    parser.add_argument('--number-placement', type=str, default='before', choices=['before', 'after'], help='Place the type of number before or after the ASCII image in the output file.')
+    parser.add_argument('--chars', type=str, default=None, help='Custom characters for ASCII art, ordered from darkest to lightest.')
+    args = parser.parse_args()
+
+    # Parse output dimensions
+    output_dimensions = tuple(map(int, args.output_dimensions.split('x')))
+
+    # Create output directory if it doesn't exist
+    if not os.path.exists(args.output_dir):
+        os.makedirs(args.output_dir)
+
+    # Open the output file for appending if required
+    output_file = None
+    if args.append_to_file:
+        output_file = open(args.output_file, 'a')
+
+    # Process each image in the directory
+    for image_filename in os.listdir(args.image_dir):
+        if image_filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
+            image_path = os.path.join(args.image_dir, image_filename)
+            ascii_art = convert_image_to_ascii(image_path, output_size=output_dimensions, grayscale=True, levels=args.levels, chars=args.chars)
+            if args.append_to_file:
+                # Determine the number from the filename
+                number = os.path.splitext(image_filename)[0].split('_')[-1]
+                # Append the ASCII art to the output file with the number placement
+                if args.number_placement == 'before':
+                    output_file.write(f'{number}\n{ascii_art}\n')
+                else:
+                    output_file.write(f'{ascii_art}\n{number}\n')
+            else:
+                # Save the ASCII art to a text file
+                output_path = os.path.join(args.output_dir, os.path.splitext(image_filename)[0] + '.txt')
+                with open(output_path, 'w') as f:
+                    f.write(ascii_art)
+                print(f'ASCII art saved to {output_path}')
+
+    # Close the output file if it was opened
+    if output_file:
+        output_file.close()
diff --git a/data/mnist/prepare.py b/data/mnist/prepare.py
new file mode 120000
index 000000000..713f6b001
--- /dev/null
+++ b/data/mnist/prepare.py
@@ -0,0 +1 @@
+../template/prepare.py
\ No newline at end of file
diff --git a/data/mnist/utils/meta_util.py b/data/mnist/utils/meta_util.py
new file mode 120000
index 000000000..d25555789
--- /dev/null
+++ b/data/mnist/utils/meta_util.py
@@ -0,0 +1 @@
+../template/utils/meta_util.py
\ No newline at end of file
diff --git a/data/mnist/utils/txt_to_phonemes.sh b/data/mnist/utils/txt_to_phonemes.sh
new file mode 120000
index 000000000..baec9c434
--- /dev/null
+++ b/data/mnist/utils/txt_to_phonemes.sh
@@ -0,0 +1 @@
+../template/utils/txt_to_phonemes.sh
\ No newline at end of file
diff --git a/explorations/mnist.json b/explorations/mnist.json
new file mode 100644
index 000000000..5f2fbadcc
--- /dev/null
+++ b/explorations/mnist.json
@@ -0,0 +1,28 @@
+[
+    {
+        "max_iters": ["30000"],
+        "eval_iters": ["200"],
+        "eval_interval": ["250"],
+        "log_interval": ["10"],
+        "n_layer": ["8"],
+        "n_kv_group": ["4"],
+        "n_head": ["8"],
+        "n_embd": ["384"],
+        "block_size": ["256"],
+        "shared_mlp_size" : ["1"],
+        "shared_mlp_sym" : [true],
+        "shared_attn_size" : ["2"],
+        "shared_attn_sym" : [true],
+        "device": ["cuda"],
+        "dataset": ["mnist"],
+        "compile": [true],
+        "use_post_ln": [false],
+        "softmax_variant_attn": ["softmax"],
+        "use_abs_pos_embeddings": [false],
+        "use_rotary_embeddings": [false],
+        "use_fire_embeddings": [true],
+        "shared_fire_embeddings": [true],
+        "tensorboard_run_name": ["mnist_ascii"]
+    }
+]
+