diff --git a/examples/muon.py b/muon.py
similarity index 100%
rename from examples/muon.py
rename to muon.py
diff --git a/zeta/tokenizers/__init__.py b/zeta/tokenizers/__init__.py
deleted file mode 100644
index a2db2cc7..00000000
--- a/zeta/tokenizers/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# from zeta.tokenizers.gptx_tokenizer import LanguageTokenizerGPTX
-# from zeta.tokenizers.llama_sentencepiece import LLamaTokenizer
-# from zeta.tokenizers.multi_modal_tokenizer import MultiModalTokenizer
-# from zeta.tokenizers.sentence_piece import SentencePieceTokenizer
-# from zeta.tokenizers.tokenmonster import TokenMonster
-
-# __all__ = [
-#     "LanguageTokenizerGPTX",
-#     "MultiModalTokenizer",
-#     "SentencePieceTokenizer",
-#     "TokenMonster",
-#     "LLamaTokenizer",
-# ]
diff --git a/zeta/tokenizers/gptx_tokenizer.py b/zeta/tokenizers/gptx_tokenizer.py
deleted file mode 100644
index 60c54ce1..00000000
--- a/zeta/tokenizers/gptx_tokenizer.py
+++ /dev/null
@@ -1,52 +0,0 @@
-from transformers import AutoTokenizer
-
-
-class LanguageTokenizerGPTX:
-    """
-    LanguageTokenizerGPTX is a class that provides tokenization and decoding functionality using the GPT-Neox-20B model.
-    """
-
-    def __init__(self):
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            "EleutherAI/gpt-neox-20b",
-            eos_token="<eos>",
-            pad_token="<pad>",
-            extra_ids=0,
-            model_max_length=8192,
-        )
-
-    def tokenize_texts(self, texts):
-        """
-        Tokenizes a list of texts using the GPT-Neox-20B tokenizer.
-
-        Args:
-            texts (List[str]): A list of texts to be tokenized.
-
-        Returns:
-            torch.Tensor: The tokenized input IDs as a PyTorch tensor.
-        """
-        return self.tokenizer(
-            texts, return_tensors="pt", padding=True, truncation=True
-        ).input_ids
-
-    def decode(self, texts):
-        """
-        Decodes a list of tokenized input IDs into text.
-
-        Args:
-            texts (torch.Tensor): The tokenized input IDs as a PyTorch tensor.
-
-        Returns:
-            str: The decoded text.
-        """
-        return self.tokenizer.decode(texts)
-
-    def __len__(self):
-        """
-        Returns the number of tokens in the tokenizer's vocabulary.
-
-        Returns:
-            int: The number of tokens in the vocabulary.
-        """
-        num_tokens = len(self.tokenizer)
-        return num_tokens
diff --git a/zeta/tokenizers/llama_sentencepiece.py b/zeta/tokenizers/llama_sentencepiece.py
deleted file mode 100644
index 1b5fc618..00000000
--- a/zeta/tokenizers/llama_sentencepiece.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Using LLAMA tokenizer
-import os
-from logging import getLogger
-
-import requests
-from sentencepiece import SentencePieceProcessor
-
-logger = getLogger()
-
-PRETRAINED_VOCAB_FILES_MAP = {
-    "vocab_file": {
-        "hf-internal-testing/llama-tokenizer": "https://huggingface.co/hf-internal-testing/llama-tokenizer/resolve/main/tokenizer.model",
-    },
-    "tokenizer_file": {
-        "hf-internal-testing/llama-tokenizer": "https://huggingface.co/hf-internal-testing/llama-tokenizer/resolve/main/tokenizer_config.json",
-    },
-}
-
-
-class LLamaTokenizer:
-    """
-    A tokenizer that uses a pretrained SentencePiece model for text tokenization.
-
-    Args:
-        model_path: Path to a pretrained SentencePiece model file.
-        tokenizer_name: Name of a pretrained SentencePiece model hosted on HuggingFace Hub.
-
-    Examples:
-        >>> tokenizer_name = "hf-internal-testing/llama-tokenizer"
-        >>> tokenizer = Tokenizer(tokenizer_name=tokenizer_name)
-        >>> encoded_text = tokenizer.encode("This is a sample text")
-        >>> decoded_text = tokenizer.decode(encoded_text)
-        >>> print("Encoded text:", encoded_text)
-        >>> print("Decoded text:", decoded_text)
-    """
-
-    def __init__(self, model_path: str = None, tokenizer_name: str = None):
-        if model_path:
-            assert os.path.isfile(model_path), model_path
-        elif tokenizer_name:
-            model_path = self.download_tokenizer(tokenizer_name)
-        else:
-            raise ValueError(
-                "Either model_path or tokenizer_name must be provided."
-            )
-
-        self.sp_model = SentencePieceProcessor(model_file=model_path)
-        logger.info(f"Reloaded SentencePiece model from {model_path}")
-
-    @staticmethod
-    def download_tokenizer(tokenizer_name: str) -> str:
-        if tokenizer_name not in PRETRAINED_VOCAB_FILES_MAP["vocab_file"]:
-            raise ValueError(f"Tokenizer {tokenizer_name} is not available.")
-
-        model_url = PRETRAINED_VOCAB_FILES_MAP["vocab_file"][tokenizer_name]
-        model_path = os.path.join("data", "tokenizer.model")
-
-        if not os.path.exists("data"):
-            os.makedirs("data")
-
-        # Downloading the tokenizer model file
-        response = requests.get(model_url)
-        if response.status_code == 200:
-            with open(model_path, "wb") as file:
-                file.write(response.content)
-            logger.info(f"Downloaded SentencePiece model to {model_path}")
-        else:
-            raise Exception(f"Failed to download model from {model_url}")
-
-        return model_path
-
-    def encode(self, s: str) -> [int]:
-        """Encodes a string into a list of token ids.
-
-        Args:
-            s (str): _description_
-
-        Returns:
-            [int]: _description_
-        """
-        return self.sp_model.encode(s, out_type=int)
-
-    def decode(self, ids: [int]) -> str:
-        """decodes a list of token ids into a string.
-
-        Args:
-            ids (int]): _description_
-
-        Returns:
-            str: _description_
-        """
-        return self.sp_model.decode(ids)
diff --git a/zeta/tokenizers/multi_modal_tokenizer.py b/zeta/tokenizers/multi_modal_tokenizer.py
deleted file mode 100644
index 66327807..00000000
--- a/zeta/tokenizers/multi_modal_tokenizer.py
+++ /dev/null
@@ -1,119 +0,0 @@
-import logging
-
-import torch
-from transformers import AutoTokenizer, CLIPProcessor
-
-logging.basicConfig(
-    level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
-)
-
-
-class MultiModalTokenizer:
-    """
-    A tokenizer class for the kosmos model
-
-    Attributes:
-        processor(CLIPProcessor): The processor to tokenize images
-        tokenizer: (AutoTokenizer): The tokenizer to tokenize text
-        im_idx: (int): The Index of the "<image>" token.
-        im_end_idx (int): The index of the "</image>" token.
-    """
-
-    def __init__(self, max_length: int = 8192):
-        self.max_length = max_length
-
-        try:
-            self.processor = CLIPProcessor.from_pretrained(
-                "laion/CLIP-ViT-L-14-laion2B-s32B-b82K"
-            )
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                "EleutherAI/gpt-neox-20b",
-                additional_special_tokens=["<image>", "</image>"],
-                eos_token="<eos>",
-                pad_token="<pad>",
-                extra_ids=0,
-                model_max_length=self.max_length,
-            )
-        except Exception as e:
-            logging.error(f"Failed to initialize KosmosTokenizer: {e}")
-            raise
-
-        self.im_idx, self.im_end_idx = self.tokenizer.convert_tokens_to_ids(
-            ["<image>", "</image>"]
-        )
-
-    def tokenize_texts(self, texts: str):
-        """
-        Tokenize given texts.
-
-        Args:
-            Texts (str): The Text to be tokenized
-
-
-        Returns:
-            A tuple containing the tokenized texts and only the text tokens.
-        """
-        try:
-            texts = self.tokenizer(
-                texts, return_tensors="pt", padding=True, truncation=True
-            ).input_ids
-            # Add image tokens to text as "<s> <image> </image> text </s>"
-            image_tokens = torch.tensor(
-                [[self.im_idx, self.im_end_idx]] * texts.shape[0]
-            )
-            return (
-                torch.cat([texts[:, 0:1], image_tokens, texts[:, 1:]], dim=1),
-                texts,
-            )
-        except Exception as e:
-            logging.error(f"Failed to tokenize texts: {e}")
-            raise
-
-    def tokenize_images(self, images):
-        """
-        Tokenizes given images.
-
-        Args:
-            images: The images to be tokenized
-
-        Returns:
-            The tokenized images.
-
-        """
-        try:
-            return self.processor(
-                images=images, return_tensors="pt"
-            ).pixel_values
-        except Exception as e:
-            logging.error(f"Failed to tokenize images: {e}")
-            raise
-
-    def tokenize(self, sample):
-        """
-        Tokenizes given sample.
-
-        Args:
-            Sample: The sample to be tokenized
-
-        Returns:
-            A dictionary containing the tokenized text tokens, images, labels, and attention mask.
-
-        """
-        try:
-            text_tokens, only_text_tokens = self.tokenize_texts(
-                sample["target_text"]
-            )
-            attention_mask = text_tokens != self.tokenizer.pad_token_id
-            dummy_image_features = torch.ones((text_tokens.shape[0], 64))
-            attention_mask = torch.cat(
-                [dummy_image_features, attention_mask], dim=1
-            )
-            return {
-                "text_tokens": text_tokens,
-                "images": self.tokenize_images(sample["image"]),
-                "labels": only_text_tokens,
-                "attention_mask": attention_mask,
-            }
-        except Exception as e:
-            logging.error(f"Failed to tokenize sample: {e}")
-            raise
diff --git a/zeta/tokenizers/sentence_piece.py b/zeta/tokenizers/sentence_piece.py
deleted file mode 100644
index b09de319..00000000
--- a/zeta/tokenizers/sentence_piece.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import os
-from logging import getLogger
-from typing import List, Optional
-
-from sentencepiece import SentencePieceProcessor
-
-logger = getLogger()
-
-
-class SentencePieceTokenizer:
-    """
-    A SentencePieceTokenizer is a tokenizer that uses a pretrained SentencePiece model to convert text into tokens and vice versa.
-    It includes the ability to add special tokens for infilling tasks and provides functionality to encode and decode text with or without implicit leading spaces.
-    Parameters:
-    - model_path (str): Path to the pretrained SentencePiece model file.
-
-    Attributes:
-    - n_words (int): Vocabulary size of the SentencePiece model.
-    - bos_id (int): Token ID of the beginning-of-sentence (BOS) token.
-    - eos_id (int): Token ID of the end-of-sentence (EOS) token.
-    - pad_id (int): Token ID of the padding (PAD) token.
-    - prefix_id (int, optional): Token ID of the prefix token. Default: None.
-    - middle_id (int, optional): Token ID of the middle token. Default: None.
-    - suffix_id (int, optional): Token ID of the suffix token. Default: None.
-    - eot_id (int, optional): Token ID of the end-of-turn (EOT) token. Default: None.
-    """
-
-    def __init__(self, model_path: str):
-        # reload tokenizer
-        assert os.path.isfile(model_path), model_path
-        self.sp_model = SentencePieceProcessor(model_file=model_path)
-        logger.info(f"Reloaded SentencePiece model from {model_path}")
-
-        # BOS / EOS token IDs
-        self.n_words: int = self.sp_model.vocab_size()
-        self.bos_id: int = self.sp_model.bos_id()
-        self.eos_id: int = self.sp_model.eos_id()
-        self.pad_id: int = self.sp_model.pad_id()
-
-        # token IDs for special infilling tokens
-        self.prefix_id: Optional[int] = (
-            self.sp_model.piece_to_id("▁<PRE>") or None
-        )
-        self.middle_id: Optional[int] = (
-            self.sp_model.piece_to_id("▁<MID>") or None
-        )
-        self.suffix_id: Optional[int] = (
-            self.sp_model.piece_to_id("▁<SUF>") or None
-        )
-        self.eot_id: Optional[int] = self.sp_model.piece_to_id("▁<EOT>") or None
-        logger.info(
-            f"#words: {self.n_words} - BOS ID: {self.bos_id} - EOS ID:"
-            f" {self.eos_id} - PRE ID: {self.prefix_id} - MID ID:"
-            f" {self.middle_id} - SUF ID: {self.suffix_id} - EOT ID:"
-            f" {self.eot_id}"
-        )
-        assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
-
-    def encode(self, s: str, bos: bool, eos: bool) -> List[int]:
-        """
-        Encodes a given string using the SentencePiece tokenizer.
-
-        Args:
-            s (str): The input string to be encoded.
-            bos (bool): Whether to add a beginning of sentence token.
-            eos (bool): Whether to add an end of sentence token.
-
-        Returns:
-            List[int]: The list of encoded tokens.
-
-        """
-        assert isinstance(s, str)
-        t = self.sp_model.encode(s)
-        if bos:
-            t = [self.bos_id] + t
-        if eos:
-            t = t + [self.eos_id]
-        return t
-
-    def decode(self, t: List[int]) -> str:
-        """Decode a list of token IDs into a string.
-
-        Args:
-            t (List[int]): _description_
-
-        Returns:
-            str: _description_
-        """
-        return self.sp_model.decode(t)
-
-    def encode_infilling(self, s: str) -> List[int]:
-        """Encode a string without an implicit leading space."""
-        return self.sp_model.encode("☺" + s)[2:]
-
-    def decode_infilling(self, t: List[int]) -> str:
-        """Decode a string without an implicit leading space."""
-        return self.sp_model.decode([self.sp_model.piece_to_id("☺")] + t)[1:]
diff --git a/zeta/tokenizers/tokenmonster.py b/zeta/tokenizers/tokenmonster.py
deleted file mode 100644
index b6302b4a..00000000
--- a/zeta/tokenizers/tokenmonster.py
+++ /dev/null
@@ -1,347 +0,0 @@
-import tokenmonster
-
-
-class TokenMonster:
-    """
-    A class that encapsulates the functionality of the tokenmonster library.
-
-    >>> from zeta.tokenizers import TokenMonster
-    >>> tokenizer = TokenMonster("englishcode-32000-consistent-v1")
-    >>> tokenizer.tokenize("Hello world!")
-    """
-
-    def __init__(self, path):
-        """
-        Initializes the TokenMonster class and loads a vocabulary.
-
-        Args:
-            path (str): A filepath, URL or pre-built vocabulary name.
-        """
-        self.vocab = tokenmonster.load(path)
-
-    def set_local_directory(self, dir=None):
-        """
-        Sets the local directory for TokenMonster.
-
-        Args:
-            dir (str, optional): The local directory to use. Defaults to None.
-        """
-        tokenmonster.set_local_directory(dir)
-
-    def load(self, path):
-        """
-        Loads a TokenMonster vocabulary from file, URL or by name.
-
-        Args:
-            path (str): A filepath, URL or pre-built vocabulary name.
-        """
-        self.vocab = tokenmonster.load(path)
-
-    def load_multiprocess_safe(self, path):
-        """
-        Loads a TokenMonster vocabulary from file, URL or by name. It's safe for multiprocessing,
-        but vocabulary modification is disabled and tokenization is slightly slower.
-
-        Args:
-            path (str): A filepath, URL or pre-built vocabulary name.
-        """
-        self.vocab = tokenmonster.load_multiprocess_safe(path)
-
-    def new(self, yaml):
-        """
-        Creates a new vocabulary from a YAML string.
-
-        Args:
-            yaml (str): The YAML file.
-        """
-        self.vocab = tokenmonster.new(yaml)
-
-    def save(self, fname):
-        """
-        Saves the current vocabulary to a file.
-
-        Args:
-            fname (str): The filename to save the vocabulary to.
-        """
-        self.vocab.save(fname)
-
-    def export_yaml(self, order_by_score=False):
-        """
-        Exports the vocabulary as a YAML file, which is returned as a bytes string.
-
-        Args:
-            order_by_score (bool, optional): If true the tokens are order by score instead of alphabetically. Defaults to False.
-
-        Returns:
-            bytes: The vocabulary in YAML format.
-        """
-        return self.vocab.export_yaml(order_by_score)
-
-    def tokenize(self, text):
-        """
-        Tokenizes a string into tokens according to the vocabulary.
-
-        Args:
-            text (str): A string or bytes string, or list of strings or bytes strings.
-
-        Returns:
-            numpy array: The tokens IDs
-        """
-        return self.vocab.tokenize(text)
-
-    def tokenize_count(self, text):
-        """
-        Same as tokenize, but it returns only the number of tokens.
-
-        Args:
-            text (str): A string or bytes string, or list of strings or bytes strings.
-
-        Returns:
-            int: The number of tokens for each input string
-        """
-        return self.vocab.tokenize_count(text)
-
-    def decode(self, tokens):
-        """
-        Decodes tokens into a string.
-
-        Args:
-            tokens (int, list of int, or numpy array): The tokens to decode into a string.
-
-        Returns:
-            str: The composed string from the input tokens.
-        """
-        return self.vocab.decode(tokens)
-
-    def decoder(self):
-        """
-        Returns a new decoder instance used for decoding tokens into text.
-
-        Returns:
-            tokenmonster.DecoderInstance: A new decoder instance.
-        """
-        return self.vocab.decoder()
-
-    def get_dictionary(self):
-        """
-        Returns a dictionary of all tokens in the vocabulary.
-
-        Returns:
-            list: A list of dictionaries where the index is the token ID and each is a dictionary.
-        """
-        return self.vocab.get_dictionary()
-
-    def charset(self):
-        """
-        Returns the character set used by the vocabulary.
-
-        Returns:
-            str: The character set used by the vocabulary. Possible values are "UTF-8", "None".
-        """
-        return self.vocab.charset()
-
-    def normalization(self):
-        """
-        Returns the normalization of the vocabulary.
-
-        Returns:
-            str: The normalization of the vocabulary. Possible values are "None", "NFD", "Lowercase", "Accents", "Quotemarks", "Collapse", "Trim", "LeadingSpace", "UnixLines".
-        """
-        return self.vocab.normalization()
-
-    def capcode(self):
-        """
-        Returns the capcode level of the vocabulary.
-
-        Returns:
-            int: The capcode level (0-2).
-        """
-        return self.vocab.capcode()
-
-    def mode(self):
-        """
-        Returns the optimization mode of the vocabulary.
-
-        Returns:
-            int: The optimization mode (0-5).
-        """
-        return self.vocab.mode()
-
-    def id_to_token(self, id):
-        """
-        Get the token string from a single token ID, in its capcode-encoded form.
-
-        Args:
-            id (int): The token ID.
-
-        Returns:
-            str or None: The token string corresponding to the input ID. None if the ID is not in the vocabulary.
-        """
-        return self.vocab.id_to_token(id)
-
-    def id_to_token_decoded(self, id):
-        """
-        Get the token string from a single token ID, in its capcode-decoded form.
-
-        Args:
-            id (int): The token ID.
-
-        Returns:
-            str or None: The token string corresponding to the input ID. None if the ID is not in the vocabulary.
-        """
-        return self.vocab.id_to_token_decoded(id)
-
-    def token_to_id(self, token):
-        """
-        Returns the ID of a single token.
-
-        Args:
-            token (str): The token to get the ID for.
-
-        Returns:
-            int or None: The ID of the token. None if the token is not in the vocabulary.
-        """
-        return self.vocab.token_to_id(token)
-
-    def modify(
-        self,
-        add_special_tokens=None,
-        add_regular_tokens=None,
-        delete_tokens=None,
-        resize=None,
-        change_unk=None,
-    ):
-        """
-        Modifies the vocabulary.
-
-        Args:
-            add_special_tokens (str or list of str, optional): Special tokens to add to the vocabulary.
-            add_regular_tokens (str or list of str, optional): Regular tokens to add to the vocabulary.
-            delete_tokens (str or list of str, optional): Regular or Special tokens to delete.
-            resize (int, optional): Resizes the vocabulary to this size.
-            change_unk (bool, optional): If set, it enables or disables the UNK token.
-
-        Returns:
-            int: The new size of the vocabulary.
-        """
-        return self.vocab.modify(
-            add_special_tokens,
-            add_regular_tokens,
-            delete_tokens,
-            resize,
-            change_unk,
-        )
-
-    def add_token(self, token):
-        """
-        Add one or more regular tokens.
-
-        Args:
-            token (str or list of str): The regular tokens to add.
-
-        Returns:
-            int: The new size of the vocabulary.
-        """
-        return self.vocab.add_token(token)
-
-    def delete_token(self, token):
-        """
-        Delete one or more regular or special tokens.
-
-        Args:
-            token (str or list of str): The tokens to delete.
-
-        Returns:
-            int: The new size of the vocabulary.
-        """
-        return self.vocab.delete_token(token)
-
-    def delete_token_by_id(self, id):
-        """
-        Delete one or more regular or special token by specifying the token ID.
-
-        Args:
-            id (int or list of int): The IDs of the tokens to delete.
-
-        Returns:
-            int: The new size of the vocabulary.
-        """
-        return self.vocab.delete_token_by_id(id)
-
-    def add_special_token(self, token):
-        """
-        Add one or more special tokens.
-
-        Args:
-            token (str or list of str): The special tokens to add.
-
-        Returns:
-            int: The new size of the vocabulary.
-        """
-        return self.vocab.add_special_token(token)
-
-    def resize(self, size):
-        """
-        Changes the size of the vocabulary.
-
-        Args:
-            size (int): The new size of the vocabulary.
-
-        Returns:
-            int: The new size of the vocabulary.
-        """
-        return self.vocab.resize(size)
-
-    def reset_token_ids(self):
-        """
-        Resets the token IDs to be sequential beginning from zero.
-        """
-        self.vocab.reset_token_ids()
-
-    def enable_unk_token(self):
-        """
-        Enables the UNK token.
-
-        Returns:
-            int: The new size of the vocabulary.
-        """
-        return self.vocab.enable_unk_token()
-
-    def disable_unk_token(self):
-        """
-        Disables the UNK token.
-
-        Returns:
-            int: The new size of the vocabulary.
-        """
-        return self.vocab.disable_unk_token()
-
-    def disconnect(self):
-        """
-        Disconnects and closes tokenmonsterserver.
-        """
-        tokenmonster.disconnect()
-
-    def serialize_tokens(self, integer_list):
-        """
-        Serializes tokens from a list of ints or numpy array into a binary string.
-
-        Args:
-            integer_list (list of int or numpy array): The tokens to serialize.
-
-        Returns:
-            bytes: The serialized binary string.
-        """
-        return self.vocab.serialize_tokens(integer_list)
-
-    def deserialize_tokens(self, binary_string):
-        """
-        Deserializes a binary string into a numpy array of token IDs.
-
-        Args:
-            binary_string (bytes): The binary string to deserialize.
-
-        Returns:
-            np.array: The deserialized tokens.
-        """
-        return self.vocab.deserialize_tokens(binary_string)