fix_unicode_filenames.py


"""
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***
     *** SAVE CODE CHANGES TO A .SAVETEXT FILE FIRST AND CHECK ENCODING BEFORE OVERWRITING!!! IT IS VERY EASY TO CORRUPT THIS FILE!!! TOO EASY!!! ***


    Coverts Unicode/non-ASCII filenames into ASCII filenames -- "Romanizing-Plus"

    USAGE:

        SETUP: To suppress user prompting: set AUTOMATIC_UNICODE_CLEANING=1

        RECURSIVE: add "/s" to the end to recurse folders [in filemode only, obviously]

        MODE 1:  No      arguments  : Run with no arguments to cleanse everything in your existing folder of unicode characters
              :  "auto"  argument   : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Do this, but suppress confirmation prompts
        MODE 2: "file   <arguments>": Use "file"   as your first argument to cleanse the rest of the command line of unicode, as if it were a windows filename
        MODE 3: "string <arguments>": Use "string" as your first argument to cleanse the rest of the command line of unicode, without restricting to only-valid-in-windows-fiklenames
        MODE 4: "test"              : to convert the internal testing string
        MODE 5: "script"            : experimental

    EXAMPLE PROGRAMMATIC USAGE:
        import fixUnicodeFilenames
        a_string_without_unicode = fixUnicodeFilenames.convert_a_string  (original_stringval_with_unicode,silent=False)
        filename_without_unicode = fixUnicodeFilenames.convert_a_filename(original_file_name_with_unicode,silent_if_unchanged=True,silent_if_changed=True)
         #silent=suppresses all output no matter what


    Uses Polyglot library to attempt a language-agnostic translation, which can easliy fail
    Then several internal custom mapping tables for phonetically romanizing characters & emojis
    Then several lingual libraries for romanizing individual characters for some "weirder alphabet" languages
    Then an emoji library for converting unconverted emojis


"""


#pylint: disable=C0103,C0413,W0719,R1726
import os
os.system("")                                                               #necessary bugfix, believe it or not #GOAT but let's try taking it out to challenge ourselves and maybe speedup startup time
os.environ['PYTHAINLP_ZONEINFO_PACKAGE'] = 'tzdata'                         #necessary bugfix, believe it or not
import sys ; sys.setrecursionlimit(sys.getrecursionlimit() * 5)             #recursionlimit came up during EXE-build attempts
import shutil
import msvcrt
import builtins
#import unidecode                                                           #pip install Unidecode==1.2.0 - for the right one - capitalizing the U (or not) is (or isn't) important. this package sucks.
import unicodedata
from unidecode import unidecode
from colorama import Fore, Back, Style, just_fix_windows_console
#init()
just_fix_windows_console()
import clairecjs_utils as claire
import fix_unicode_filenames_every_char as everychar
original_print = print                                                      #Store the original print function before any potential overriding

############################ RUNTIME CONFIGURATION ############################
VALID_MODES = ["string", "file", "test", "script"]
INVALID_WINDOWS_FILENAME_CHARACTERS = r'<>:"/\|?*'
RECURSE=False                          #Whether we are in recursive mode or not
###############################################################################

########################## DEVELOPMENT CONFIGURATION ############################
DIE_ON_UNDECODEABLE_UNICODE_CHARACTER = True
DRY_RUN                               = False
#################################################################################


################################## DEBUG CONFIGURATION ################################################
DEBUG_MOST_CHARS = False           #controls several debugs below
DEBUG_ALL_CHARS  = False           #controls several debugs below

DEBUG_ANNOUNCE_FILENAMES=True
DEBUG_MODE_ARGV=False
DEBUG_LANG_DETECT=False
DEBUG_POLYGLOT=False
DEBUG_CHAR                        = bool(False or DEBUG_ALL_CHARS or DEBUG_MOST_CHARS)
DEBUG_UNIDECODECHAR               = bool(False or DEBUG_ALL_CHARS or DEBUG_MOST_CHARS)
DEBUG_UNIDECODECHAR_TRANSLATECHAR = bool(False
or DEBUG_ALL_CHARS)                      # super verbose
DEBUG_INTERNAL_TESTING=False
#######################################################################################################


###################################### TESTING ######################################
## CREATE A GOOD TESTING STRING:
#
#     This string includes:
#
#          ASCII text ("Hello, world!")
#          Chinese text ("ä½ å¥½ï¼Œä¸–ç•Œï¼")
#          Japanese text ("ã“ã‚“ã«ã¡ã¯ã€ä¸–ç•Œï¼")
#          Korean text ("ì•ˆë…•í•˜ì„¸ìš”, ì„¸ê³„!")
#          Russian text ("ÐŸÑ€Ð¸Ð²ÐµÑ‚, Ð¼Ð¸Ñ€!")
#          Greek text ("ÎšÎ±Î»Î·Î¼Î­ÏÎ± ÎºÏŒÏƒÎ¼Îµ!")
#          Emoji ("ðŸ‘‹ðŸŒ")
#          Special symbols and numbers ("âµâ„“Â©Â®Â½Â¼Â¾â…“â…”â…›â…œâ…â…žâ°Â¹Â²Â³â´âµâ¶â·â¸â¹")
#          Mathematical Alphanumeric Symbols (ð‘¨ð’ƒð’„ð’…ð’†ð’‡ð’ˆð’‰ð’Šð’‹ð’Œð’ð’Žð’ð’ð’‘ð’’ð’“ð’”ð’•ð’–ð’—ð’˜ð’™ð’šð’› ð‘¨ð‘©ð‘ªð‘«ð‘¬ð‘­ð‘®ð‘¯ð‘°ð‘±ð‘²ð‘³ð‘´ð‘µð‘¶ð‘·ð‘¸ð‘¹ð‘ºð‘»ð‘¼ð‘½ð‘¾ð‘¿ð’€ð’)
#          Hebrew and Arabic scripts ("×Ö¸×œÖ¶×£ Ø¨ÙÙŠØª")
#          Various letters and symbols from different scripts (Ç„Ç…Ç†Ç‡ÇˆÇ‰ÇŠÇ‹ÇŒ á»’á»£á»”á»™ á»–á»—á»˜á»™ á»©á»¤á»±á»¦á»­ á»¨á»¯ á»ªá»­á»¬á»« á»®á»­ á»°á»±)

massive_testing_string_backup = "Â½Â¼Â¾â…“â…”â…›â…œâ…â…ž"
massive_testing_string = """
Hello, world! ä½ å¥½ï¼Œä¸–ç•Œï¼ã“ã‚“ã«ã¡ã¯ã€ä¸–ç•Œï¼ì•ˆë…•í•˜ì„¸ìš”, ì„¸ê³„! ÐŸÑ€Ð¸Ð²ÐµÑ‚, Ð¼Ð¸Ñ€! ÎšÎ±Î»Î·Î¼Î­ÏÎ± ÎºÏŒÏƒÎ¼Îµ!
HAND="ðŸ‘‹",WORLD="ðŸŒ" âµâ„“ COPYRIGHT="Â©",RESTRICT="Â®"
Â½Â¼Â¾â…“â…”â…›â…œâ…â…žâ°Â¹Â²Â³â´âµâ¶â·â¸â¹
ð‘¨ð’ƒð’„ð’…ð’†ð’‡ð’ˆð’‰ð’Šð’‹ð’Œð’ð’Žð’ð’ð’‘ð’’ð’“ð’”ð’•ð’–ð’—ð’˜ð’™ð’šð’› ð‘¨ð‘©ð‘ªð‘«ð‘¬ð‘­ð‘®ð‘¯ð‘°ð‘±ð‘²ð‘³ð‘´ð‘µð‘¶ð‘·ð‘¸ð‘¹ð‘ºð‘»ð‘¼ð‘½ð‘¾ð‘¿ð’€ð’
×Ö¸×œÖ¶×£ Ø¨ÙÙŠØª Ç„Ç…Ç†Ç‡ÇˆÇ‰ÇŠÇ‹ÇŒ á»’á»£á»”á»™ á»–á»—á»˜á»™ á»©á»¤á»±á»¦á»­ á»¨á»¯ á»ªá»­á»¬á»« á»®á»­ á»°á»±
"""


#####################################################################################


def print_error(*args, called_from_primt=False, **kwargs):                                                                                                             #pylint: disable=W0613
    if not called_from_primt: raise Exception("A print statement was used in the code. Use primt instead, because we want everything to go to our logfile")            #pylint: disable=W0719


def primt(*args, **kwargs):     #custom_print "prim print" function to print, prim and proper, to screen & logfile at the same time
    global LOGFILE

    new_args = []
    for arg in args:
        if isinstance(arg, str):
            new_arg = unidecode(arg)
            new_args.append(new_arg)
        else:
            new_args.append(arg)
    output = " ".join(map(str, new_args))

    original_print(output, **kwargs)                                    # Call the original print function that we saved before
    with open("fix-unicode-filenames.log", "a", encoding='utf-8') as log_file:
        #log_file.write(f"{strip_ansi_codes(output)}\n")
        log_file.write(f"{output}\n")


def convert_to_ascii_filename_chracters(filename,mode):
    """Translates a string (in our case, a filename) to its ASCII/roman equivalent

    (1) First, an amazing multi-language language-agnostic full translation library called polyglot is used
        to interpret the entire filename/string at a high ("smart") level to see if a language is detected,
        and then to make language-specific conversions to our ASCII/roman equivalent characters.

        But it throws an exception if a specific language is not detected, and it's also hard to install,
        so the entire thing is wrapped around an exception that just throws the original text if anything goes wrong.

        Also, polyglot will omit characters sometimes, so we do not want a null string

    (2) Then, each character is processed at a per-chracter level, checking its unicode range to see if it's a language,
        and then passing through either a language library or a phoenetic mapping table, to translate the chracters
        back to ASCII/roman.

    """
    global DEBUG, DEBUG_POLYGLOT
    string_romanized_with_polyglot = polyglot_language_agnostic_romanize(filename)
    if DEBUG_POLYGLOT:
        primt(f'DEBUG: string_romanized_with_polyglot({filename}) is "{string_romanized_with_polyglot}"')
    return ''.join(translate_character_with_language_libraries(char,mode,filename=filename) for char in string_romanized_with_polyglot)


#pylint: disable=C0415                                                                                  #don't nag me about lazy-loading the libraries, pylint!
def polyglot_language_agnostic_romanize(text):
    """Return translated text, but fail very gracefully and transparently if there are any exceptions"""
    global DEBUG, DEBUG_LANG_DETECT
    try:
        import logging
        if not DEBUG_LANG_DETECT: logging.getLogger('polyglot').setLevel(logging.ERROR)                             #Disable logging messages from Polyglot unless in debug mode
        from polyglot.detect import Detector
        from polyglot.transliteration import Transliterator
        detector = Detector(text)
        if DEBUG_LANG_DETECT: primt(f"* Detector: {str(detector)}")
        source_lang = detector.language.code
        transliterator = Transliterator(source_lang=source_lang, target_lang="en")
        return transliterator.transliterate(text)
    except Exception:                                                                                   #pylint: disable=W0718
        return text


def get_unicode_hex(character):
    if character == "": return "0"                                                                      #just fill in a dummy value
    return "\\u" + hex(ord(character))[2:].zfill(4)                                                     #thank you ChatGPT


def translate_one_or_more_chars_with_custom_character_mapping(chars, mode):                                                                                             #pylint: disable=R0912
    """
        Returns characters (after mapping), and done (boolean) which i used for flow control in the outer scope the first time it's called, but ignored the second time it's called
    """
    global DEBUG_UNIDECODECHAR_TRANSLATECHAR, VALID_MODES

    if mode not in VALID_MODES:
        primt(f"{Fore.RED}FATAL TRANSLATE ERROR: translate_one_or_more_chars_with_custom_character_mapping called with invalid mode of {mode} which is not in {VALID_MODES}")
        sys.exit(666)

    translated_chars = []
    done = False

    code2 = ""                                      # unicode code without the \ before it
    for char in chars:                              # If it's not in our custom mapping, we basically pass through without doing anything
        code, code2, code3 = "", "", ""
        if DEBUG_UNIDECODECHAR_TRANSLATECHAR:
            code  =               get_unicode_hex(char)
            code2 = "code " + str(get_unicode_hex(char)).replace("\\","")
            primt(f"\t{Fore.CYAN}translate_one_or_more_chars_with_custom_character_mapping(char={char},code={code},code2={code2})",end="")


        if char in unicode_to_ascii_custom_character_mapping:   #if it's not found now, it's really not found
            mapping = unicode_to_ascii_custom_character_mapping[char]
            if DEBUG_UNIDECODECHAR_TRANSLATECHAR: primt(f"{Fore.GREEN}    Found in mapping!",end="")
        else:
            if DEBUG_UNIDECODECHAR_TRANSLATECHAR: primt(  f"{Fore.RED}Not found in mapping!",end="")
            code2 = "code " + str(get_unicode_hex(char)).replace("\\","")
            if code2 in unicode_to_ascii_custom_character_mapping:
                if DEBUG_UNIDECODECHAR_TRANSLATECHAR: primt(f"{Fore.GREEN}{Style.BRIGHT}Found by 2nd-attempt code lookup!{Style.NORMAL}",end="")
                mapping = unicode_to_ascii_custom_character_mapping[code2]
            else:
                if DEBUG_UNIDECODECHAR_TRANSLATECHAR: primt(f"{Style.BRIGHT}(Twice!)(code2={code2}){Style.NORMAL}",end="")
                translated_chars.append(char)
                continue

        #mapping = unicode_to_ascii_custom_character_mapping[char]
        if   len(mapping) == 0: raise Exception("FATAL ERROR: ZERO MAPPING LENGTH")
        if mode == "file" and len(mapping) > 1: mapping_number_to_use = 1
        else:                                   mapping_number_to_use = 0
        translated_chars.append(mapping[mapping_number_to_use])
        done = True                                                             # If any character is mapped, mark it as done
        if DEBUG_UNIDECODECHAR_TRANSLATECHAR: primt("\n")

    return ''.join(translated_chars), done


def is_emoji_character(char):
    """Checks if a character is an emoji."""
    emoji_ranges = [
        ( '\u2600',  '\u26FF'),  # Miscellaneous Symbols
        ( '\u2700',  '\u27BF'),  # Dingbats
        ( '\uE000',  '\uF8FF'),  # Private Use Area
        ( '\uFE00',  '\uFE0F'),  # Variation Selectors
        ('\u1F000', '\u1F02B'),  # Mahjong Tiles, Domino Tiles, Playing Cards
        ('\u1F030', '\u1F093'),  # Enclosed Alphanumeric Supplement
        ('\u1F0A0', '\u1F0AE'),  # Playing cards
        ('\u1F100', '\u1F1FF'),  # Enclosed Alphanumeric Supplement
        ('\u1F200', '\u1F2FF'),  # Enclosed Ideographic Supplement
        ('\u1F1E6', '\u1F1FF'),  # Regional Indicator Symbols
        ('\u1F300', '\u1F5FF'),  # Miscellaneous Symbols and Pictographs
        ('\u1F600', '\u1F64F'),  # Emoticons
        ('\u1F680', '\u1F6FF'),  # Transport and Map Symbols
        ('\u1F700', '\u1F77F'),  # Alchemical Symbols
        ('\u1F780', '\u1F7FF'),  # Geometric Shapes Extended
        ('\u1F800', '\u1F8FF'),  # Supplemental Arrows-C
        ('\u1F900', '\u1F9FF'),  # Supplemental Symbols and Pictographs
        ('\u1FA00', '\u1FA6F'),  # Chess Symbols
        ('\u1FAB0', '\u1FAB6'),  # Face in Cloud, Spiral, Hole, Rock, Wood, Hut
        ('\u1FAC0', '\u1FAC2'),  # People Hugging, People with Bunny Ears, Person in Tuxedo
        ('\u1FAD0', '\u1FAD6'),  # Heart on Fire, Mending Heart, Face Exhaling, Face with Spiral Eyes, Face in Clouds
        ('\u1FA70', '\u1FAFF'),  # Symbols and Pictographs Extended-A
    ]
    for start, end in emoji_ranges:
        if start <= char <= end: return True
    return False
is_emoji = is_emoji_character

def is_unicode_character(char):
    """Checks if a character is a valid Unicode character."""
    unicode_range = ('\u0000', '\U0010FFFF')
    return unicode_range[0] <= char <= unicode_range[1]


def translate_character_with_language_libraries(char,mode,filename="not given"):                                                                #pylint: disable=R0912,R0915
    """Translates a single character to its ASCII/roman equivalent.

        Each character is processed individually, checking its unicode value.

        The value is checked to see if it in the range of seveal specific languages.

        For some languages, we use a language-specific proprietary library     to convert back to ASCII/roman characters.
        For some languages, we use a language-specific phoenetic mapping table to convert back to ASCII/roman characters.

        Note that the final step, unidecode.unicode, is a multi-lingual catch-all.

        For example, it is purported to remove accents over French/Spanish, vowels, & change Russian to phonetic equivalents

        LANGUAGE SUPPORT:

        Our list of addressed languages, even if only implicitly/partially addressed is, at the very least:

                Arabic, Bengali, Chinese, English, French, Hindi, Japanese, Korean, Spanish, Russian, Thai

        NEW LANGUAGES:

        We do not need to actually and every language in existence.
        We attempted to add the most common languages that have hard-to-romanize alphabets (usually non-"Western" languages)
        Common languages that have easy-to-romanize alphabets are likely covered by unidecode.unicode.

    """
    global DEBUG, DEBUG_CHAR, DEBUG_UNIDECODECHAR, DIE_ON_UNDECODEABLE_UNICODE_CHARACTER

    char_for_primt = char.encode('utf-16', 'surrogatepass').decode('utf-16','ignore')

    if DEBUG_CHAR:
        try:
            primt (f"- DEBUG: char is {Fore.YELLOW}{char}{Fore.WHITE}\tvalue {Fore.YELLOW}{get_unicode_hex(char)}{Fore.WHITE}{Style.NORMAL}",end="")
        except Exception:                                                                                   #pylint: disable=W0718
            primt (f"- DEBUG: char is {Fore.YELLOW}{char_for_primt}{Fore.WHITE}\tvalue {Fore.YELLOW}{get_unicode_hex(char)}{Fore.WHITE}{Style.NORMAL}",end="")


    # First we check our custom mapping, our highest priority. It is hand-created and thought out.
    char, done = translate_one_or_more_chars_with_custom_character_mapping(char,mode)
    if DEBUG_CHAR: primt (f" \t... custom mapping: {Fore.YELLOW}{char}{Fore.WHITE}\tdone={done:1}",end="")
    if done:
        if DEBUG_CHAR: primt("")
        return char

    # if a character is still untranslated, then we check our various lingual libraries and phoenetic mapping tables:
    is_emoji = False
    caught = False
    translate_return_value = ""
    if   '\u0600' <= char <= '\u06FF': caught,is_unicode=True,True; translate_return_value = translate_arabic___to_ascii(char) # if Arabic
    elif '\u0900' <= char <= '\u097F': caught,is_unicode=True,True; translate_return_value = translate_hindi____to_ascii(char) # if Hindi
    elif '\u0980' <= char <= '\u09FF': caught,is_unicode=True,True; translate_return_value = translate_bengali__to_ascii(char) # if Bengali
    elif '\u0E01' <= char <= '\u0E5B': caught,is_unicode=True,True; translate_return_value = translate_thai_____to_ascii(char) # if Thai
    elif '\u3040' <= char <= '\u30ff': caught,is_unicode=True,True; translate_return_value = translate_japanese_to_ascii(char) # if Japanese
    elif '\u4e00' <= char <= '\u9fff': caught,is_unicode=True,True; translate_return_value = translate_chinese__to_ascii(char) # if Chinese
    elif '\uac00' <= char <= '\ud7af': caught,is_unicode=True,True; translate_return_value = translate_korean___to_ascii(char) # if Korean
    elif is_emoji_character(char)  :                                                                                           # if Emoji
        demojified = translate_emoji_to_ascii(char)
        if demojified:
            caught, is_emoji = True, True
            translate_return_value = demojified
        else:
            caught, is_emoji = False, False
        if DEBUG_UNIDECODECHAR: primt(f" | is_emomji?={is_emoji_character(char):1} | caight?={caught} | {char} {Style.BRIGHT}de-{Style.NORMAL}emojied is '{Fore.YELLOW}{demojified}'{Fore.WHITE} | translate_return_value={translate_return_value}",end="")

    # if a character is even still untranlated, we need to use our catch-all code
    # this library purports to fix things all kinds of things like: Spanish n-with-tilde will become an N,
    # French c-with-a-hook will get hook removed, Russian is phonetically translated,
    # but we fear it may return nothing if it  doesn't have a great guess:
    is_unicode = caught
    if not caught:
        is_unicode = is_unicode_character(char)

    if not is_unicode:
        translate_return_value = char
    else:
        if caught: char = translate_return_value
        unidecodeChar = unidecode(char)
        if DEBUG_UNIDECODECHAR:
            if unidecodeChar == '':  style_adjustment = f"{Fore.RED}"
            else:                    style_adjustment = f"{Fore.WHITE}"
            primt(f" | emoji?={is_emoji:1} | unicode?={is_unicode:1} | {char}\t{style_adjustment}uni{Style.BRIGHT}de{style_adjustment}{Style.NORMAL}coded is '{Fore.YELLOW}{unidecodeChar}{style_adjustment}'{Fore.WHITE}",end="")
        if unidecodeChar == "":
            translate_return_value = char

            hex = get_unicode_hex(char)
            unicodedata_decode = get_name_from_hex(hex)

            if unicodedata_decode not in ["", char, translate_return_value]:           #assign new character if it's actually a new character
                translate_return_value = unicodedata_decode
            else:                                                                      #fairly unreachable code but comment out the if part and this can be a fun way to find un-manually-mapped characters to add more pleasant/customized mapping
                message = f"{Fore.RED}{Style.BRIGHT}\n!!! FATAL DECODE ERROR: COULD NOT DECODE UNICODE CHARACTER OF {char} (unicode hex={hex}) !!!\nFilename = {filename}\nPlease add to custom mapping table at the bottom of fixUnicodeFilenames.py\nYou may need to copy and paste this character into google to find out what it actually is:\n%EDITOR% {sys.argv[0]}{Fore.WHITE}{Style.NORMAL}"
                if DIE_ON_UNDECODEABLE_UNICODE_CHARACTER: raise Exception(message)
                primt(message)
            translate_return_value = "{" + translate_return_value + "}"
        else:
            translate_return_value = unidecodeChar

    if DEBUG_CHAR or DEBUG_UNIDECODECHAR: primt("")

    #If we are in file mode, we need to make one more pass because the previous code could have turned it into something bad due to a bug:
    #First we check our custom mapping, our highest priority. It is hand-created and thought out.
    #translate_return_value, _ = translate_one_or_more_chars_with_custom_character_mapping(translate_return_value,mode) #TODO evaluate whether it is safe to disable this now that we have internal 'tegrity checks for key values that would be invalid filenames
    return translate_return_value


import emoji                                                                                                    # emoji    library
import romkan                                                                                                   # Japanese library
from pypinyin                   import lazy_pinyin, Style as PypinyinStyle                                      # Chinese  library
from korean_romanizer.romanizer import Romanizer          as KoreanRomanizer                                    # Korean   library
from pythainlp.transliterate    import romanize           as ThaiRomanize                                       # Thai     library

def translate_thai_____to_ascii(text): return ThaiRomanize(text)                                                # Thai
def translate_japanese_to_ascii(char): return romkan.to_roma(char)                                              # Japanese
def translate_chinese__to_ascii(char): return ''.join(lazy_pinyin(char, style=PypinyinStyle.TONE3))             # Chinese
def translate_bengali__to_ascii(text): return ''.join(bengali_to_english_phonetic.get(c, '_') for c in text)    # Bengali  (no library used)
def translate_arabic___to_ascii(text): return ''.join( arabic_to_english_phonetic.get(c, '_') for c in text)    # Arabic   (no library used)
def translate_hindi____to_ascii(text): return ''.join(  hindi_to_english_phonetic.get(c, '_') for c in text)    # Hindi    (no library used)
def translate_korean___to_ascii(text):                                                                          # Korean
    try:    retval = KoreanRomanizer(text).romanize()
    except: retval = text
    return  retval
def translate_emoji_to_ascii(char):
    demojized = emoji.demojize(char)
    if demojized.startswith(':') and demojized.endswith(':'): return '{' + demojized[1:-1] + '}'
    return demojized


def get_name_from_hex(unicode_hex):
    primt(f"\n\nRunning get_name_from_hex({unicode_hex})")
    unicode_hex_original = unicode_hex

    unicode_hex = unicode_hex.replace('\\u', '').replace('\\U', '')  # Remove the Unicode escape sequence part     #added capital-U version for 2024/05/23 situation

    primt(f"unicode_hex is now {unicode_hex}")

    unicode_char = chr(int(unicode_hex, 16))      # Convert hex string to Unicode character
    try:
        return unicodedata.name(unicode_char)
    except ValueError:                                              # Raised when the character does not have a name

        unicode_char = chr(int("000" + unicode_hex, 16))            # Convert hex string to Unicode character
        try:
            return unicodedata.name(unicode_char)
        except ValueError:                                          # Raised when the character does not have a name

            unicode_char = chr(int("00" + unicode_hex, 16))         # Convert hex string to Unicode character
            try:
                return unicodedata.name(unicode_char)
            except ValueError:                                      # Raised when the character does not have a name

                unicode_char = chr(int("0" + unicode_hex, 16))      # Convert hex string to Unicode character
                try:
                    return unicodedata.name(unicode_char)
                except ValueError:                                  # Raised when the character does not have a name

                    #unicode_char = {{{TRY OTHER THINGS HERE}}}     # Convert hex string to Unicode character
                    try:
                        return unicodedata.name(unicode_char)
                    except ValueError:                              # Raised when the character does not have a name

                        return f"            [ERROR: get_name_from_hex ___ fail_for_hex={unicode_hex_original},char={unicode_char}]               "


def ask_permission(old_name, new_name):
    """Asks the user for permission to rename a file."""
    primt(f"\n{Fore.YELLOW}{Style.BRIGHT}***** Rename:"                                                                   +
          f"\n{Fore.RED   }{Style.BRIGHT}From: {Style.NORMAL}{old_name}{Fore.CYAN}{Style.NORMAL}"                         +
          f"\n{Fore.GREEN }{Style.BRIGHT}  To: {Style.NORMAL}{new_name}{Fore.CYAN}{Style.NORMAL} "                        +
          f"\n{Fore.YELLOW}{Style.BRIGHT}***** Rename?"                                                                   +
          f" { Fore.BLUE  }{Style.BRIGHT}[{Fore.CYAN}Y{Fore.BLUE}/{Style.NORMAL}{Fore.CYAN}n{Style.BRIGHT}]{Style.NORMAL} ", end="")
    clear_keyboard_buffer()
    response = msvcrt.getch().decode().lower().strip()
    primt(Style.BRIGHT, end="")
    if response.lower() in ['y', 'yes', '']:
        primt(f"{Fore.GREEN}Yes!", end="")
        return True
    primt(f"{Fore.RED}No!", end="")
    return False

def clear_keyboard_buffer():
    while msvcrt.kbhit(): msvcrt.getch()


#def rename_files_in_current_directory_last_ver_before_recursion(mode="file",automatic_mode=False,recursive_mode=False):                 #defaults to file mode
#    """Renames all files in a directory, replacing unicode characters."""
#    global DRY_RUN, DEBUG_ANNOUNCE_FILENAMES
#    any_files_found_to_rename_at_all = False
#    do_it_for_real = True
#    automatic      = False
#    DRY_RUN        = False
#    permission     = False
#    directory      = sys.argv[1] if len(sys.argv) > 1 else '.'                  #get all the files in the current dir...
#    for filename in os.listdir(directory):
#        filename_for_primt = filename.encode('utf-8','ignore')
#        if DEBUG_ANNOUNCE_FILENAMES: primt(f"{Fore.CYAN}{Style.BRIGHT}* Processing file {filename}...{Style.NORMAL}{Fore.WHITE}")
#        new_name = convert_to_ascii_filename_chracters(filename,mode)           #this is where all the magic happens
#
#        if filename != new_name:
#            any_files_found_to_rename_at_all = True
#            if automatic_mode:
#                automatic      = True
#                do_it_for_real = True
#                action_string  = "  Auto-Renamed"
#            else:
#                permission = ask_permission(filename, new_name)
#                do_it_for_real = permission
#                action_string  = "       Renamed" if permission is True else f"{Fore.RED}Did not rename"
#            if DRY_RUN:
#                do_it_for_real = False
#
#            old_file = os.path.join(directory, filename)
#            new_file = os.path.join(directory, new_name)
#
#            new_new_file = last_minute_filename_cleanser(new_file)              #if we've put invalid values in our mapping table without having run our tests, it can be possible to have to cleanse one more time.  Also, some emoji libraries may decode into something invalid for filenames, and since we didn't test if all the decodings were valid, we must run it through a 2nd time for that possibility as well. It's unfortunate, but not expensive.
#            if do_it_for_real:
#                #os.rename(old_file, new_new_file)                              #would error if new folder already existed
#                rename_folder_or_file_but_if_renamed_is_a_folder_that_already_exists_then_move_files_into_it_instead(old_file, new_new_file)
#
#            primt("\n")
#            if automatic: primt(f"\t{Fore.YELLOW} Automatic Run: {mode}")
#            if DRY_RUN:   primt(f"\t{Fore.YELLOW}" +   "Dry Run: ")
#            primt(f"{Fore.GREEN}{Style.NORMAL}\t{action_string}:" + f"\t{Fore.LIGHTBLACK_EX}{old_file} " +
#                  f"{Fore.CYAN}\n\t\t    to:" +  f"\t{Fore.GREEN}{new_new_file}{Style.NORMAL}\n\n\n")
#    if not any_files_found_to_rename_at_all:
#        primt(f"{Fore.RED}No files with unicode characters found.{Style.RESET_ALL}")


def rename_files_in_current_directory(mode="file",automatic_mode=False,recursive_mode=False):
    """Renames all files in a directory, replacing unicode characters."""
    global DRY_RUN, DEBUG_ANNOUNCE_FILENAMES
    any_files_found_to_rename_at_all = False
    do_it_for_real = True
    automatic      = False
    DRY_RUN        = False
    permission     = False
    directory      = sys.argv[1] if len(sys.argv) > 1 else '.'

    def process_directory(directory):
        nonlocal any_files_found_to_rename_at_all, automatic
        for filename in os.listdir(directory):
            filename_for_primt = filename.encode('utf-8','ignore')
            if DEBUG_ANNOUNCE_FILENAMES:
                ### without color-cycling:
                #primt(f"{Fore.CYAN}{Style.BRIGHT}* Processing file {filename}...{Style.NORMAL}{Fore.WHITE}")
                ### with color-cycling:
                original_print(f"* Processing file {filename}...")
                for i in range(100): claire.tick(mode="fg")                     #TODO maybe consider the range(100) thing bad form haha but we're also testing another library
            new_name = convert_to_ascii_filename_chracters(filename,mode)

            if filename != new_name:
                any_files_found_to_rename_at_all = True
                if automatic_mode:
                    automatic      = True
                    do_it_for_real = True
                    action_string  = "  Auto-Renamed"
                else:
                    automatic      = False
                    permission = ask_permission(filename, new_name)
                    do_it_for_real = permission
                    action_string  = "       Renamed" if permission is True else f"{Fore.RED}Did not rename"
                if DRY_RUN:
                    do_it_for_real = False

                old_file = os.path.join(directory, filename)
                new_file = os.path.join(directory, new_name)

                new_new_file = last_minute_filename_cleanser(new_file)
                if do_it_for_real:
                    rename_folder_or_file_but_if_renamed_is_a_folder_that_already_exists_then_move_files_into_it_instead(old_file, new_new_file)

                primt("\n")
                if automatic: primt(f"\t{Fore.YELLOW} Automatic Run: {mode}")
                if DRY_RUN:   primt(f"\t{Fore.YELLOW}" +   "Dry Run: ")
                primt(f"{Fore.GREEN}{Style.NORMAL}\t{action_string}:" + f"\t{Fore.LIGHTBLACK_EX}{old_file} " +
                      f"{Fore.CYAN}\n\t\t    to:" +  f"\t{Fore.GREEN}{new_new_file}{Style.NORMAL}\n\n\n")

    if recursive_mode:
        for root, dirs, files in os.walk(directory):
            process_directory(root)
    else:
        process_directory(directory)

    if not any_files_found_to_rename_at_all:
        primt(f"{Fore.RED}No files with unicode characters found.{Style.RESET_ALL}")


def rename_folder_or_file_but_if_renamed_is_a_folder_that_already_exists_then_move_files_into_it_instead(old_name, new_name):
    if len(new_name) > 253: new_name = new_name.replace('{', '').replace('}', '')
    if len(new_name) > 253: new_name = new_name.replace('(', '').replace(')', '')
    if len(new_name) > 253: new_name = new_name.replace('[', '').replace(']', '')
    if len(new_name) > 253: new_name = new_name.replace(' ', '')
    if len(new_name) > 253: new_name = new_name[:253]

    if not os.path.exists(new_name):                              # If the new folder doesn't exist, simply rename the old folder
        os.rename(old_name, new_name)
    else:
        for filename in os.listdir(old_name):                 # If the new folder exists, move all files from the old folder to the new one
            old_file_path = os.path.join(old_name, filename)
            new_file_path = os.path.join(new_name, filename)

            # If a file with the same name exists in the new directory, it will be replaced
            # If you don't want this behavior, you can add a check here
            shutil.move(old_file_path, new_file_path)

        # Optionally, if you want to delete the old folder after moving all files
        os.rmdir(old_name)


def last_minute_filename_cleanser_original(filename):
    """
    This whole program could be just this one function, if we were not too picky.
    """
    global INVALID_WINDOWS_FILENAME_CHARACTERS
    if any(char in INVALID_WINDOWS_FILENAME_CHARACTERS for char in filename):
        filename = convert_a_filename(filename,silent_if_unchanged=False)   #TODO true
    filename = filename.lstrip('.-')  # Strip "." or "-" from the beginning of the filename
    return filename


def last_minute_filename_cleanser(filename):
    global INVALID_WINDOWS_FILENAME_CHARACTERS

    leading_patterns = [".\\", "./", ".\\\\", ".//", "..\\", "..\\\\", "../", "..//"]                 # Define the leading patterns to exclude
    for pattern in leading_patterns:                                                                  # Check if the filename starts with any of the leading patterns
        if filename.startswith(pattern):
            stripped_filename = filename[len(pattern):]                                               # Remove the leading pattern
            break
    else:
        stripped_filename = filename                                                                  # If no leading pattern found, use the original filename

    if any(char in INVALID_WINDOWS_FILENAME_CHARACTERS for char in stripped_filename):                # Perform the necessary processing on the stripped filename
        stripped_filename = convert_a_filename(stripped_filename, silent_if_unchanged=True, silent_if_changed=True)

    stripped_filename = stripped_filename.lstrip('.-')                                                 # Strip "." or "-" from the beginning of the filename
    if  stripped_filename != filename:                                                                 # Restore the leading pattern, if it was stripped
        stripped_filename  = filename[:len(filename) - len(stripped_filename)] + stripped_filename

    return stripped_filename


## Public calls:
def convert_a_string  (string_to_convert  ,silent_if_unchanged=False, silent_if_changed=False, silent=False): return just_convert_a_string(  string_to_convert,"string",silent_if_unchanged=silent_if_unchanged,silent_if_changed=silent_if_changed,silent=silent)
def convert_a_filename(filename_to_convert,silent_if_unchanged=False, silent_if_changed=False, silent=False): return just_convert_a_string(filename_to_convert,"file"  ,silent_if_unchanged=silent_if_unchanged,silent_if_changed=silent_if_changed,silent=silent)

def just_convert_a_string(string_to_convert,mode,silent_if_unchanged=False,silent_if_changed=False,silent=False):
    global DIE_ON_UNDECODEABLE_UNICODE_CHARACTER
    if __name__ != "__main__": DIE_ON_UNDECODEABLE_UNICODE_CHARACTER=False          #only die when being run, not when being imported

    # special handling for testing mode
    if mode == "test":
        run_internal_tests()
        for temp_mode in ["file", "string"]:
            primt (f"\n\n{Fore.YELLOW}{Style.BRIGHT}* Testing in mode {temp_mode}:{Style.NORMAL}\n")
            primt ("Test result: " + just_convert_a_string(string_to_convert,temp_mode))
        return ":)"

    # special handling for script mdoe
    if mode == "script":
        create_script_to_define_emoji_characters()
        sys.exit(0)
        return ":)"


    # actually convert the string
    romanized_string  = convert_to_ascii_filename_chracters(string_to_convert,mode)     #...which we then fix the same way we would fix our filenames

    # print out the ch ange if we are instructed to do so
    if silent or (silent_if_unchanged and string_to_convert == romanized_string) or (silent_if_changed and string_to_convert != romanized_string):
        pass #don't primt
    else:
        primt(f"{Fore.RED}Old string: {string_to_convert}")
        primt(f"{Fore.GREEN}New string: {romanized_string }")

    return romanized_string


#from emoji.unicode_codes import EMOJI_DATA
#
#def create_script_to_define_emoji_characters():
#    for emoji, emoji_data in EMOJI_DATA.items():
#        # Fetch the emoji name
#        emoji_name = emoji_data.get('en', '')
#        emoji_name = emoji_name.upper().replace(' ', '_').replace(':', '').replace('-', '_')
#
#        # Get the unicode code points and convert them to decimal
#        emoji_codes = emoji.encode('unicode_escape').decode('ASCII').split('\\')[1:]
#
#        # Create the output string
#        output_string = f"SET EMOJI_{emoji_name}="
#        for code in reversed(emoji_codes):  # reversed added here
#            if code.startswith('0'):
#                value = int(code, 16)
#                output_string += f"%@CHAR[{value}]"
#            elif code.startswith('U'):
#                value = int(code[1:], 16)
#                output_string += f"%@CHAR[{value}]"
#            elif code == 'ufe0f':
#                # This is a variation selector, handle it accordingly
#                output_string += "+%@CHAR[65039]"  # 65039 is decimal equivalent of 'U+FE0F'
#            elif code == 'u200d':
#                # This is a Zero Width Joiner, handle it accordingly
#                output_string += "+%@CHAR[8205]"  # 8205 is decimal equivalent of 'U+200D'
#            else:
#                # Unknown code, handle it as you see fit
#                primt(f"Unknown code encountered: {code}")
#
#        primt(output_string)


def create_script_to_define_emoji_characters_1():
    primt("EMOJI_ENVIRONMENT_VARIABLES_CREATED_BY=fix_unicode_files.py script")
    import ctypes
    from emoji.unicode_codes import EMOJI_DATA
    processed_emojis = set()  # Set to track processed emojis
    output_strings = []  # List to store the output strings
    processed_output_strings = set()  # Set to track processed output strings
    for emoji, emoji_data in EMOJI_DATA.items():
        # Fetch the base emoji without any skin tone variation
        base_emoji = emoji.split('\u200d')[0]

        # Check if the base emoji has already been processed
        if base_emoji not in processed_emojis:
            processed_emojis.add(base_emoji)

            emoji_name_meat = emoji_data['en'].upper().replace(' ', '_').replace(':', '').replace('-', '_').replace("'", '').replace('SKIN_TONE', 'SKIN').replace('&', '_AND_')

            # Check if the current emoji is fully qualified
            if emoji_data['status'] == 'fully_qualified':
                emoji_name = f"EMOJI_{emoji_name_meat}"
            else:
                emoji_name = f"EMOJI_{emoji_name_meat}_UNQUALIFIED"

            # Convert the emoji into a ctypes wide string
            # Then cast it to a pointer to short (16-bit) integers, and fetch the values
            emoji_code_units = ctypes.cast(ctypes.c_wchar_p(emoji), ctypes.POINTER(ctypes.c_uint16))

            # Create the output string
            output_string = f"{emoji_name}="
            for i in range(2):  # two UTF-16 code units
                output_string += f"%@CHAR[{emoji_code_units[i]}]"

            output_strings.append(output_string)

    # Check and print the output strings
    for output_string in output_strings:
        if output_string not in processed_output_strings:
            processed_output_strings.add(output_string)

    # Print the output strings
    for output_string in processed_output_strings:
        primt(output_string)


def create_script_to_define_emoji_charactersDECENTBUTPROBLEMATICAF():
    primt("EMOJI_ENVIRONMENT_VARIABLES_CREATED_BY=fix_unicode_files.py script")
    import ctypes
    from emoji.unicode_codes import EMOJI_DATA
    processed_emojis = set()  # Set to track processed emojis
    qualified_emojis = set()  # Set to track qualified emojis
    output_strings = []  # List to store the output strings
    for emoji, emoji_data in EMOJI_DATA.items():
        # Fetch the base emoji without any skin tone variation
        base_emoji = emoji.split('\u200d')[0]

        # Check if the base emoji has already been processed
        if base_emoji not in processed_emojis:
            processed_emojis.add(base_emoji)

            emoji_name_meat = emoji_data['en'].upper().replace(' ', '_').replace(':', '').replace('-', '_').replace("'", '').replace('SKIN_TONE', 'SKIN').replace('&', '_AND_')

            # Check if the current emoji is fully qualified
            if emoji_data['status'] == 'fully_qualified':
                qualified_emojis.add(emoji_name_meat)
                emoji_name = f"EMOJI_{emoji_name_meat}"
                qualified_output_string = None
            else:
                emoji_name = f"EMOJI_{emoji_name_meat}_UNQUALIFIED"
                qualified_output_string = f"EMOJI_{emoji_name_meat}"

            # Convert the emoji into a ctypes wide string
            # Then cast it to a pointer to short (16-bit) integers, and fetch the values
            emoji_code_units = ctypes.cast(ctypes.c_wchar_p(emoji), ctypes.POINTER(ctypes.c_uint16))

            # Create the output string
            output_string = f"{emoji_name}="
            for i in range(2):  # two UTF-16 code units
                output_string += f"%@CHAR[{emoji_code_units[i]}]"

            output_strings.append(output_string)

            # Append the qualified output string if available
            if qualified_output_string:
                qualified_output_string += f"=%@CHAR[{emoji_code_units[0]}]%@CHAR[{emoji_code_units[1]}]"
                output_strings.append(qualified_output_string)

    # Print the output strings
    for output_string in output_strings:
        primt(output_string)


def create_script_to_define_emoji_characters_tried_without_gpt_got_5718():
    primt("EMOJI_ENVIRONMENT_VARIABLES_CREATED_BY=fix_unicode_files.py script")
    import ctypes
    from emoji.unicode_codes import EMOJI_DATA
    processed_emojis = set()  # Set to track processed emojis
    qualified_emojis = set()  # Set to track qualified emojis
    unqualified_emojis = set()  # Set to track qualified emojis
    rights = set() # set to track right half of = in output file so we don't make duplicates
    output_strings = []  # List to store the output strings
    for emoji, emoji_data in EMOJI_DATA.items():
        # Fetch the base emoji without any skin tone variation
        base_emoji = emoji.split('\u200d')[0]

        # Check if the base emoji has already been processed
        if base_emoji not in processed_emojis:
            processed_emojis.add(base_emoji)

            emoji_name_meat = emoji_data['en'].upper().replace(' ', '_').replace(':', '').replace('-', '_').replace("'", '').replace('SKIN_TONE', 'SKIN').replace('&', '_AND_')

            # Check if the current emoji is fully qualified
            if emoji_data['status'] == 'fully_qualified':
                if emoji_name_meat in qualified_emojis:
                    continue
                qualified_emojis.add(emoji_name_meat)
                emoji_name = f"EMOJI_{emoji_name_meat}"
                qualified_output_string = None

            else:
                if emoji_name_meat in unqualified_emojis:
                    continue
                unqualified_emojis.add(emoji_name_meat)
                emoji_name = f"EMOJI_{emoji_name_meat}_UNQUALIFIED"
                qualified_output_string = f"EMOJI_{emoji_name_meat}"

            # Convert the emoji into a ctypes wide string
            # Then cast it to a pointer to short (16-bit) integers, and fetch the values
            emoji_code_units = ctypes.cast(ctypes.c_wchar_p(emoji), ctypes.POINTER(ctypes.c_uint16))

            # Create the output string
            right = ""
            for i in range(2):  # two UTF-16 code units
                right += f"%@CHAR[{emoji_code_units[i]}]"
            output_string = f"{emoji_name}={right}"
            if right in rights:
                continue
            rights.add(right)

            if output_string in output_strings:
                continue
            output_strings.append(output_string)

            # Append the qualified output string if available
            if qualified_output_string:
                qualified_output_string += f"=%@CHAR[{emoji_code_units[0]}]%@CHAR[{emoji_code_units[1]}]"
                output_strings.append(qualified_output_string)

    # Print the output strings
    printed = set()
    for output_string in output_strings:
        if output_string in printed: continue
        printed.add(output_string)
        primt(output_string)


def create_script_to_define_emoji_characters_got_3106_much_better():
    primt("EMOJI_ENVIRONMENT_VARIABLES_CREATED_BY=fix_unicode_files.py script")
    import ctypes
    from emoji.unicode_codes import EMOJI_DATA
    processed_emojis = set()  # Set to track processed emojis
    rights = set()  # Set to track right half of = in output file so we don't make duplicates
    output_strings = set()  # Set to handle duplicate output strings
    for emoji, emoji_data in EMOJI_DATA.items():
        # Fetch the base emoji without any skin tone variation
        base_emoji = emoji.split('\u200d')[0]

        # Check if the base emoji has already been processed
        if base_emoji not in processed_emojis:
            processed_emojis.add(base_emoji)

            emoji_name_meat = emoji_data['en'].upper().replace(' ', '_').replace(':', '').replace('-', '_').replace("'", '').replace('SKIN_TONE', 'SKIN').replace('&', '_AND_')

            # Convert the emoji into a ctypes wide string
            # Then cast it to a pointer to short (16-bit) integers, and fetch the values
            emoji_code_units = ctypes.cast(ctypes.c_wchar_p(emoji), ctypes.POINTER(ctypes.c_uint16))

            # Create the output string
            right = ""
            for i in range(2):  # two UTF-16 code units
                right += f"%@CHAR[{emoji_code_units[i]}]"

            if right in rights:
                continue
            rights.add(right)

            # Check if the current emoji is fully qualified
            if emoji_data['status'] == 'fully_qualified':
                emoji_name = f"EMOJI_{emoji_name_meat}"
            else:
                emoji_name = f"EMOJI_{emoji_name_meat}_UNQUALIFIED"

            output_string = f"{emoji_name}={right}"

            output_strings.add(output_string)  # Add to a set to handle duplicates

    # Print the output strings
    for output_string in output_strings:
        primt(output_string)


# thread about this: https://jpsoft.com/forums/threads/1431-emoji-environment-variables-for-your-echoing-convenience.11618/
def create_script_to_define_emoji_characters(): #2860, 1431 unique
    primt("EMOJI_ENVIRONMENT_VARIABLES_CREATED_BY=fix_unicode_files.py script")
    import ctypes
    from emoji.unicode_codes import EMOJI_DATA
    processed_emojis = set()  # Set to track processed emojis
    rights = set()  # Set to track right half of = in output file so we don't make duplicates
    output_strings = set()  # Set to handle duplicate output strings
    for emoji, emoji_data in EMOJI_DATA.items():
        # Fetch the base emoji without any skin tone variation
        base_emoji = emoji.split('\u200d')[0]

        emoji_name_meat = emoji_data['en'].upper().replace(' ', '_').replace(':', '').replace('-', '_').replace("'", '').replace('SKIN_TONE', 'SKIN').replace('&', '_AND_').replace('�','')

        # Construct the fully qualified and unqualified names
        fully_qualified_name = f"EMOJI_{emoji_name_meat}"
        unqualified_name = f"EMOJI_{emoji_name_meat}_UNQUALIFIED"

        # Determine the name to be used based on the status of the emoji
        if emoji_data['status'] == 3:           #rem emoji.fully_qualified:component = 1 fully_qualified = 2 minimally_qualified = 3 unqualified = 4
            emoji_name = unqualified_name
        else:
            emoji_name = fully_qualified_name

        # Check if the current version of this emoji has already been processed
        if emoji_name in processed_emojis:
            continue
        processed_emojis.add(emoji_name)

        # Convert the emoji into a ctypes wide string
        # Then cast it to a pointer to short (16-bit) integers, and fetch the values
        emoji_code_units = ctypes.cast(ctypes.c_wchar_p(emoji), ctypes.POINTER(ctypes.c_uint16))

        # Create the output string
        right = ""
        for i in range(2):  # two UTF-16 code units
            right += f"%@CHAR[{emoji_code_units[i]}]"

        if right in rights:
            continue
        rights.add(right)

        output_string = f"{emoji_name}={right}"

        output_strings.add(output_string)  # Add to a set to handle duplicates

    # Print the output strings
    printed = set()
    for output_string in output_strings:
        if output_string in printed: continue
        printed.add(output_string)
        primt(output_string)


def run_internal_tests(emoji_only=False):
    if not emoji_only: primt (f"{Fore.GREEN}{Style.BRIGHT}\nRunning internal mapping table integrity test for valid filename characters...{Style.NORMAL}")
    internal_mapping_table_integrity_test_check_for_invalid_filename_chars(emoji_only=emoji_only)
    if not emoji_only: primt (f"{Fore.GREEN}{Style.BRIGHT}Passed!{Style.NORMAL}")

def internal_mapping_table_integrity_test_check_for_invalid_filename_chars(emoji_only=False):
    #NEW LANGUAGES might get added here
    global INVALID_WINDOWS_FILENAME_CHARACTERS, DEBUG_INTERNAL_TESTING
    global unicode_to_ascii_custom_character_mapping
    global hindi_to_english_phonetic, arabic_to_english_phonetic, bengali_to_english_phonetic
    dictionaries = {"hindi"   : hindi_to_english_phonetic                ,
                    "arabic"  : arabic_to_english_phonetic               ,
                    "bengali" : bengali_to_english_phonetic              ,
                    "custom"  : unicode_to_ascii_custom_character_mapping}
    anyFailed = False
    d = 0
    t = 0
    DEBUG_INTERNAL_TESTING=True #goat
    for dictionary_name, dictionary in dictionaries.items():
        d += 1
        if not emoji_only: primt(f"{Fore.GREEN}{Style.BRIGHT}- Testing dictionary #{d}: {dictionary_name}")
        e = 0
        for key, value in dictionary.items():
            e += 1
            t += 1
            stringValue = key
            fileValue   = ""
            if len(value) > 1: fileValue = value[1]
            else             : fileValue = value[0] if value else stringValue
            if DEBUG_INTERNAL_TESTING: primt(f"{Fore.GREEN}{Style.NORMAL}- Testing entry #{t}: Dict #{d}, entry#{e}: [key={key},value={value}] [strVal={stringValue},fileVal={fileValue}]")
            if fileValue != "":
                #DEBUG: if DEBUG_INTERNAL_TESTING: primt(f"\t- File value found: '{fileValue}'")
                if any(char in fileValue for char in INVALID_WINDOWS_FILENAME_CHARACTERS):
                    primt(f"\t{Fore.RED}{Style.BRIGHT}- stringValue={stringValue} in fileValue={Back.LIGHTBLACK_EX}{fileValue}{Back.BLACK} in dictionary d={d}, entry e={e}, contains invalid character! Cannot contain any character from INVALID_WINDOWS_FILENAME_CHARACTERS={Fore.YELLOW}{INVALID_WINDOWS_FILENAME_CHARACTERS}\n{Fore.YELLOW}{Style.NORMAL}This means you need to edit your code so that the first value in the value array is a valid windows filename. I.E. in our dictionary of Character:[translation1,translation2], the translation1 provided of '{fileValue}' has invalid windows filename characters in it (i.e. one of the following characters:{Fore.RED}{INVALID_WINDOWS_FILENAME_CHARACTERS}{Fore.YELLOW}) and must be changed in the source code!\n{Style.BRIGHT}Basically, copy and paste this: {Back.LIGHTBLACK_EX}{fileValue}{Back.BLACK} (the part with the weird grey background), search for that in the source code, and make it not include any of these red characters: {Fore.RED}{INVALID_WINDOWS_FILENAME_CHARACTERS}{Fore.YELLOW}\n{Fore.RED}\tkey='{key}',value='{value}'")  #pylint: disable=C0301
                    anyFailed = True
    if anyFailed: sys.exit(6666)


def get_testing_string():
    global massive_testing_string                               #a versatile string full of all kinds of characters used for testing
    testing_string = massive_testing_string
    #TODO programatically add values from our mapping tables to test them out
    return testing_string


def get_mode(always_use_automatic_mode=False):
    """Determines the current mode of the program based on the command-line arguments.

    Parameters:
        argv (list): List of command-line arguments.

    Returns:
        str: The current mode of the program, either 'filename' or 'string' or 'test'.

    Sets:
        global variable RECURSE=True if we are in recursive mode
    """
    global DEBUG_MODE_ARGV, RECURSE, VALID_MODES

    AUTOMATIC_MODE = False

    return_value = 'unknown'
    if DEBUG_MODE_ARGV: primt (f"sys.argv is {sys.argv}")

    # see if we are in automatic mode or not
    if len(sys.argv) > 1:                                                       #if first option is 'auto', set automatic_mode and pop that option off
        arg1 = sys.argv[1].lower()
        if arg1 in ['auto', 'automatic']:
            AUTOMATIC_MODE = True
            del sys.argv[1]

    if      os.getenv ('AUTOMATIC_UNICODE_CLEANING')  == "1":
        del os.environ['AUTOMATIC_UNICODE_CLEANING']                            #delete the environment variable so we only let this directive work once (we don't want to get stuck in automatic mode)     #might want to check if permissions actually allow this, though
        AUTOMATIC_MODE = True

    if always_use_automatic_mode: AUTOMATIC_MODE = True

    # see if we are in recurse mode or not
    for arg in sys.argv:
        if arg.lower() != '/s':
            recurse=False
        else:
            recurse=True
            sys.argv.remove(arg)


    # see if we are in stringmode or filemode or testmode or scriptmode not
    if len(sys.argv) > 1:
        arg1 = sys.argv[1].lower()
        if   arg1 in ['stringmode'    ,'string']: return_value = 'string'
        elif arg1 in ['filename'      ,'file'  ]: return_value = 'file'
        elif arg1 in ['filename'      ,'file'  ]: return_value = 'file'
        elif arg1 in ['testing','test','script']:
            if len(sys.argv) > 2:
                primt (f'\n{Fore.RED}ERROR: Mode of {Style.BRIGHT}"{arg1}"{Style.NORMAL} cannot accept any other parameters as it uses an internal testing string. ')
                sys.exit(666)
            if arg1 in ['testing','test']: return_value = 'test'
            if arg1 in ['script']:         return_value = 'script'
        else:
            primt (f'\n{Fore.RED}ERROR: Mode of {Style.BRIGHT}"{arg1}"{Style.NORMAL} is not a valid mode from the possible valid modes of: {VALID_MODES}. ')
            sys.exit(666)

    if DEBUG_MODE_ARGV: primt (f"{Fore.BLUE}* Running in {return_value} mode with arguments {sys.argv}.\n\tAUTOMATIC_MODE is {AUTOMATIC_MODE}")
    return return_value, recurse, AUTOMATIC_MODE


def main():
    mode_name, mode_is_recursive, mode_is_automatic = get_mode(always_use_automatic_mode=False)

    string=""
    if len(sys.argv) == 1:                                                                                                #MODE 1: Fix all files in the current folder, in filename mode
        rename_files_in_current_directory    (mode="file",automatic_mode=mode_is_automatic)                               #do current folder, which may change folder names
        if mode_is_recursive:
            rename_files_in_current_directory(mode="file",automatic_mode=mode_is_automatic, recursive_mode=True)          #then recurse the new folder names
        sys.exit(0)
    elif mode_name in ['test'  ]: string = get_testing_string() + "\n\n\n TESTING STRING #2: \n\n\n" + everychar.ALMOST_EVERY_CHARACTER       #MODE 4: Prepare to translate internal testing string
    elif mode_name in ['script']: create_script_to_define_emoji_characters()                                                                  #MODE 5: experimental
    else:                         string = " ".join(sys.argv[2:])                                                                             #MODES 2 & 3: Prepare to translate our command-line string

    just_convert_a_string(string,mode_name)                                                                    #MODES 2 - 4: Run the proper translation

    if mode_name == 'test': primt(f"{Style.BRIGHT}{Fore.GREEN}\n...Seems like all tests passed if we got this far!")


########################################################################################################################################################################################
########################################################################################################################################################################################
########################################################################################################################################################################################
########################################################################################################################################################################################
########################################################################################################################################################################################
########################################################################################################################################################################################
########################################################################################################################################################################################
########################################################################################################################################################################################


arabic_to_english_phonetic = {
    'ا': 'a', 'ب': 'b', 'ت': 't', 'ث': 'th', 'ج': 'j', 'ح': 'h', 'خ': 'kh', 'د': 'd', 'ذ': 'dh', 'ر': 'r', 'ز': 'z', 'س': 's',
    'ش': 'sh', 'ص': 's', 'ض': 'd', 'ط': 't', 'ظ': 'z', 'ع': 'a', 'غ': 'gh', 'ف': 'f', 'ق': 'q','ك': 'k', 'ل': 'l', 'م': 'm',
    'ن': 'n', 'ه': 'h', 'و': 'w', 'ي': 'y', 'ء': "'", 'ة': 'h', 'ى': 'a', 'ئ': 'a', 'ؤ': 'o', 'َ': 'a', 'ِ': 'i', 'ُ': 'u', '٠': '0',
    '١': '1', '٢': '2', '٣': '3', '٤': '4', '٥': '5', '٦': '6', '٧': '7', '٨': '8', '٩': '9'
}
bengali_to_english_phonetic = {
    'অ': 'o', 'আ': 'a', 'ই': 'i', 'ঈ': 'ee', 'উ': 'u', 'ঊ': 'oo', 'এ': 'e', 'ঐ': 'oi', 'ও': 'o', 'ঔ': 'ou', 'ক': 'k', 'খ': 'kh',
    'গ': 'g', 'ঘ': 'gh', 'ঙ': 'ng', 'চ': 'ch', 'ছ': 'chh', 'জ': 'j', 'ঝ': 'jh', 'ঞ': 'n', 'ট': 't', 'ঠ': 'th', 'ড': 'd', 'ঢ': 'dh',
    'ণ': 'n', 'ত': 't', 'থ': 'th', 'দ': 'd', 'ধ': 'dh', 'ন': 'n', 'প': 'p', 'ফ': 'ph', 'ব': 'b', 'ভ': 'bh', 'ম': 'm', 'য': 'y',
    'র': 'r', 'ল': 'l', 'শ': 'sh', 'ষ': 'sh', 'স': 's', 'হ': 'h', '়': '', 'া': 'a', 'ি': 'i', 'ী': 'ee', 'ু': 'u', 'ূ': 'oo',
    'ৃ': 'ri', 'ে': 'e', 'ৈ': 'oi', 'ো': 'o', 'ৌ': 'ou', 'ৎ': 't', '০': '0', '১': '1', '২': '2', '৩': '3', '৪': '4', '৫': '5',
    '৬': '6', '৭': '7', '৮': '8', '৯': '9'
}
hindi_to_english_phonetic = {
    'अ': 'a', 'आ': 'aa', 'इ': 'i', 'ई': 'ii', 'उ': 'u', 'ऊ': 'uu', 'ए': 'e', 'ऐ': 'ai', 'ओ': 'o', 'औ': 'au', 'ऋ': 'ri', 'क': 'k',
    'ख': 'kh', 'ग': 'g', 'घ': 'gh', 'ङ': 'ng', 'च': 'ch', 'छ': 'chh', 'ज': 'j', 'झ': 'jh', 'ञ': 'n', 'ट': 't', 'ठ': 'th', 'ड': 'd',
    'ढ': 'dh', 'ण': 'n', 'त': 't', 'थ': 'th', 'द': 'd', 'ध': 'dh', 'न': 'n', 'प': 'p', 'फ': 'ph', 'ब': 'b', 'भ': 'bh', 'म': 'm',
    'य': 'y', 'र': 'r', 'ल': 'l', 'व': 'v', 'श': 'sh', 'ष': 'sh', 'स': 's', 'ह': 'h', 'क्ष': 'ksh', 'त्र': 'tr', 'ज्ञ': 'gy', 'श्र': 'shr'
}


# Mapping of unicode symbols to ASCII equivalents that are valid for filenames
unicode_to_ascii_custom_character_mapping = {

    ## characters explicitly not valid in ascii filenames -- THESE NINE MUST BE LISTED FIRST IN THIS MAPPING FOR INTERNAL UNIT TESTING PURPOSES
    '*' :   ['*'  , 'x' ],  # ASCII asterisk
    '?' :   ['?'  , '_' ],  # ASCII question mark
    '|' :   ['|'  , '-' ],  # ASCII pipe
    ':' :   [':'  , '- '],  # ASCII     colon
    '/' :   ['/'  , '--'],  # ASCII     slash
    '\\':   ['\\/','--' ],  # ASCII backslash
    '<' :   ['<'  , '(' ],  # ASCII    less-than
    '>' :   ['>'  , ')' ],  # ASCII greater-than
    '"' :   ['"'  , "''" ], # ASCII quote          #converting to 2 apostrophes because of a theory of certain long filenames not being parseable if too many apostrophes and an odd number of apostrophes
    '^' :   ['-'],          # controversial, but this messes up Claire's personal TCC situation too much. If you want to allow carets in filenames, comment this line out.

    ## ones that look like the above but aren't, and are actually valid but we just don't like:
    '！':   '!' ,  # unicode exclamation mark
    '？':   ['?' , '_' ],  # unicode question mark
    '；' :   [';'] ,  # unicode semicolon
    '，' :   [','] ,  # unicode comma
    '。' :   ['.'] ,  # unicode full stop

    ## characters that are problematic with command line processors
    '%' :   ['%' , 'pct'],  # percent sign               [substitution only needed for filenames]        #TODO make this configurable with config deleting this key
    '`' :   ["'" , "'"],    # backtick                  [2 of these in a filename can makes parsers think there is bad quoting]


    # Emojis with ASCII equivalents: faces:
    "😰": [":'(", "TT"],   # anxious face with sweat
    "😧": [":|", "TT"],    # anguished face
    "😠": ["):<", "]-["],   # angry face
    "😲": [":O", "O_O"],    # astonished face
    "😁": ["^_^", "{beaming face with smiley eyes}"],          # beaming face with smiling eyes
    "😖": [">.<", "{confounded face}"],    # confounded face
    "😕": [":/", "{confused face}"],    # confused face
    "😢": [")':", "{crying face}"],   # crying face
    "😭": [")':", "TT"],   # loudly crying face
    "😓": ["^^;", "^^'"],   # downcast face with sweat
    "😈": [">;)", "{devil smiling}"],   # devil smiling
    "😞": ["):", "]-["],    # disappointed face
    "😑": ["-_-"],          # expressionless face
    "😮": [":o", "O_O"],    # face with open mouth
    "😤": [">:(", "]-["],   # face with steam from nose
    "😨": [":o", "O_O"],    # fearful face
    "😳": [":$", "O_O"],    # flushed face
    "😦": ["):", "]-["],    # frowning face with open mouth
    "😬": ["D:", "D-"],     # grimacing face
    "😀": [":)", "=)"],     # grinning face
    "😃": [":D", "=D"],     # grinning face with big eyes
    "😄": ["XD"],           # grinning face with smiling eyes
    "😅": ["^_^'","{grinning face with sweat}"],         # grinning face with sweat
    "😆": ["X'D", "XD"],    # grinning squinting face
    "😇": ["O:)", "O)"],   # halo face
    "😯": [":o", "O_O"],    # hushed face
    "😗": [":*", "{kissing face}"],    # kissing face
    "😘": [":*", "{blowing a kiss}"],    # kiss blowing face
    "😙": [":*", "{kiss with smiling eyes}"],    # kissing face with smiling eyes
    "😚": ["XOXO"],         # kissing face with closed eyes
    "😭": [")':", "{loudly crying face}"],   # loudly crying face
    "😔": ["):", "{pensive face}"],    # pensive face
    "😣": [">.<", "{persevering face}"],    # persevering face
    "😡": [">_<", "{pouting face}"],   # pouting face
    "😥": [")':", "{sad but relieved face}"],   # sad but relieved face
    "😱": [":O", "O_O"],    # screaming in fear
    "😪": ["X|", "-_-"],    # sleepy face
    "🙂": [":)", "=)"],     # slightly smiling face
    "😍": ["<3_<3", "{smiling face with heart eyes}D"],  # smiling face with heart eyes
    "😎": ["B-)", "B)"],    # smiling face with sunglasses


    # Emojis with ASCII equivalents: hearts
    "❤️":   ["<3"  , "(3"],     # {heart}
    '💔':   ['</3' , "(3_3"],   # Broken Heart
    '💕':   ['<3<3', "(3(3"],   # Two Hearts
    '💖':   ['<3'  , "(3"],     # Sparkling Heart
    "💗":   ["<3<3", "(3(3"],   # {growing heart}
    "💙":   ["<3"  , "(3"],     # {blue heart}
    "💚":   ["<3"  , "(3"],     # {green heart}
    "💛":   ["<3"  , "(3"],     # {yellow heart}
    "💜":   ["<3"  , "(3"],     # {purple heart}
    "🖤":   ["<3"  , "(3"],     # {black heart}
    "💝":   ["<3"  , "(3"],     # {heart with ribbon}
    "💞":   ["<3<3", "(3(3"],   # {revolving hearts}
    "💟":   ["<3"  , "(3"],     # {heart decoration}
    "💌":   ["<3"  , "(3"],     # {love letter}
    "❤️‍🩹": ["<3:)", "(3_)"],   # {mending heart}
    "❣️":    ["<3!", "(3!"],    # {heart exclamation}
    "❤️‍🔥":  ["<3" , "(3"],     # {heart on fire}


    # Emojis with ASCII equivalents: faces:
    "😝":    ["XP", "XP"],       # {squinting face with tongue}
    "😤":    [">:(", "{face with steam from nose}"],      # {steam from nose}
    "😛":    [":p", "{sticking out tongue}"],       # {sticking out tongue}
    "😊":    [":)", "=)"],       # {smiling face with smiling_eyes}
    "😏":    [";)", "{smirking}"],       # {smirking_face}
    "😓":    ["^_^;", "{downcast face with sweat},"],     # {sweat face aka downcast_face_with_sweat}
    "😂":    ["XD", "XD"],       # {tears of joy face}
    "😫":    [":/", "{tired face}"],       # {tired_face}
    "😒":    [":/", "{unamused face}"],       # {unamused_face}
    "😩":    ["):", "{weary face}"],       # {weary_face}
    "😜":    [";p", "{winking face with tongue}"],       # {winking face with tongue}
    "😟":    ["/:", "{worried face}"],       # {worried_face}
    "😉":    [";)", ";)"],       # {winking_face} (with tongue)


    '〈' :    ['<','['],   # '<',   #  unicode    less-than
    '〉' :    ['>',']'],   # '>',   #  unicode greater-than
    '〈' :    ['<','['],   #'<',   #   unicode    less-than
    '〉' :    ['>',']'],   #'>',   #   unicode greater-than

    '｟' :   '((',   #   unicode double left  paren
    '｠' :   '))',   #   unicode double right paren
    '〖':   '[(',   #   unicode left   combo-paren/bracket
    '〗':   ')]',   #   unicode right  combo-paren/bracket
    '〚':   '[[',   #   unicode double left  bracket
    '〛':   ']]',   #   unicode double right bracket
    '〘':   '[[',   #   unicode double left  bracket
    '〙':   ']]',   #   unicode double right bracket
    '《' :    ['<<','[['],   #   unicode double     less-than
    '》' :    ['>>',']]'],   #   unicode double  greater-than
    '᚛' :    ['>-',')-'],
    '᚜' :    ['-<','-('],
    'π' :   'Pi',   #decent
    'ⁱ'	:   '^i', 	#Superscript Latin Small Letter I
    'ⁿ' :   '^n',   #Superscript n which is upper-ASCII and not actually unicode \____ might be the same chracter really
    'ⁿ'	:   '^n', 	#Superscript Latin Small Letter N                            /
    '⁰'	:   '^o', 	#Superscript Zero
    '²' :   '^2',   #Superscript 2 which is upper-ASCII and not actually unicode
    '⁴'	:   '^4', 	#Superscript Four
    '⁵'	:   '^5', 	#Superscript Five
    '⁶'	:   '^6', 	#Superscript Six
    '⁷'	:   '^7', 	#Superscript Seven
    '⁸'	:   '^8', 	#Superscript Eight
    '⁹'	:   '^9', 	#Superscript Nine
    '⁺'	:    '+', 	#Superscript Plus Sign
    '⁻'	:    '-', 	#Superscript Minus
    '⁼'	:    '=', 	#Superscript Equals Sign
    '⁽'	:    '(', 	#Superscript Left Parenthesis
    '⁾'	:    ')', 	#Superscript Right Parenthesis
    '₀'	:  '(0)', 	#Subscript Zero
    '₁'	:  '(1)', 	#Subscript One
    '₂'	:  '(2)', 	#Subscript Two
    '₃'	:  '(3)', 	#Subscript Three
    '₄'	:  '(4)', 	#Subscript Four
    '₅'	:  '(5)', 	#Subscript Five
    '₆'	:  '(6)', 	#Subscript Six
    '₇'	:  '(7)', 	#Subscript Seven
    '₈'	:  '(8)', 	#Subscript Eight
    '₉'	:  '(9)', 	#Subscript Nine
    '₊'	:  '(+)', 	#Subscript Plus Sign
    '₋'	:  '(-)', 	#Subscript Minus
    '₌'	:  '(=)', 	#Subscript Equals Sign
    'ₔ'	:'(schwa)', #Latin Subscript Small Letter Schwa
    'ₐ'	:   '(a)', 	#Latin Subscript Small Letter A
    'ₑ'	:   '(e)',  #Latin Subscript Small Letter E
    'ₒ'	:   '(o)', 	#Latin Subscript Small Letter O
    'ₓ'	:   '(x)', 	#Latin Subscript Small Letter X
    'ₕ'	:   '(h)', 	#Latin Subscript Small Letter H     [doesn't render in EditPlus right so i'm not positive this is it]
    'ₖ'	:   '(k)', 	#Latin Subscript Small Letter K     [doesn't render in EditPlus right so i'm not positive this is it]
    'ₗ'	:   '(l)', 	#Latin Subscript Small Letter L     [doesn't render in EditPlus right so i'm not positive this is it]
    'ₘ'	:   '(m)', 	#Latin Subscript Small Letter M     [doesn't render in EditPlus right so i'm not positive this is it]
    'ₙ'	:   '(n)', 	#Latin Subscript Small Letter N     [doesn't render in EditPlus right so i'm not positive this is it]
    'ₚ'	:   '(p)', 	#Latin Subscript Small Letter P     [doesn't render in EditPlus right so i'm not positive this is it]
    'ₛ'	:   '(s)', 	#Latin Subscript Small Letter S     [doesn't render in EditPlus right so i'm not positive this is it]
    'ₜ'	:   '(t)', 	#Latin Subscript Small Letter T     [doesn't render in EditPlus right so i'm not positive this is it]


    '༼ ༽':  ['/\\','{upside down v thingy}'] ,   #a fairly good approximation until you need valid filename chars

    '∞' :'[Inf]',   #tempted to make "8", but that would lose too much meaning

    '『' :   'F' ,   # this is a stretch
    '「' :   'F' ,   # this is a stretch
    '｢' :   'F' ,   # this is a stretch
    '』' :   'J' ,   # this is a stretch, it's almost more like an L but backwards
    '」' :   'J' ,   # this is a stretch, it's almost more like an L but backwards
    '｣' :   'J' ,   # this is a stretch, it's almost more like an L but backwards

    '∑' :   'E=',   #quite the stretch, maybe "sigma" would be better
    '∫' :   'S=',   #quite the stretch, maybe "sum"   would be better

    #let's just take these out and let them be processed normally
    #'༺':    ['@:'],  #a huge stretch, this barely even looks like that
    #'༻':    [':@'],  #a huge stretch, this barely even looks like that


    #regional indicator codes that are basically just subscript letters
    '\uE0048'    :["H"],            #Latin Capital Letter H
    '\ue0069'    :["i"],            #Latin Small Letter I
    '\uE005A'    :["Z"],            #Latin Capital Letter Z
    '\ue006c'    :["l"],            #Latin Small Letter L
    '\uE004D'    :["M"],            #Latin Capital Letter M
    '\uE004C'    :["L"],            #Latin Capital Letter L
    '\ue0061'    :["a"],            #Latin Small Letter a
    '\uE0057'    :["W"],            #Latin Capital Letter W
    '\ue0063'    :["c"],            #Latin Small Letter C
    '\uE0047'    :["G"],            #Latin Capital Letter G
    '\ue006d'    :["m"],            #Latin Small Letter M
    '\ue0064'    :["d"],            #Latin Small Letter D
    '\ue0067'    :["g"],            #Latin Small Letter G
    '\uE0051'    :["Q"],            #Latin Capital Letter Q
    '\uE0045'    :["E"],            #Latin Capital Letter E
    '\uE004A'    :["J"],            #Latin Capital Letter J
    '\ue0070'    :["p"],            #Latin Small Letter P
    '\uE0052'    :["R"],            #Latin Capital Letter R
    '\uE0050'    :["P"],            #Latin Capital Letter P
    '\ue0078'    :["x"],            #Latin Small Letter X
    '\uE0056'    :["V"],            #Latin Capital Letter V
    '\ue007a'    :["z"],            #Latin Small Letter Z
    '\ue0066'    :["f"],            #Latin Small Letter F
    '\uE0058'    :["X"],            #Latin Capital Letter X
    '\ue0076'    :["v"],            #Latin Small Letter V
    '\uE0059'    :["Y"],            #Latin Capital Letter Y
    '\ue0065'    :["e"],            #Latin Small Letter E
    '\uE0049'    :["I"],            #Latin Capital Letter I
    '\uE0055'    :["U"],            #Latin Capital Letter U
    '\ue0073'    :["s"],            #Latin Small Letter S
    '\uE0053'    :["S"],            #Latin Capital Letter S
    '\ue006f'    :["o"],            #Latin Small Letter O
    '\ue0071'    :["q"],            #Latin Small Letter Q
    '\ue006b'    :["k"],            #Latin Small Letter K
    '\uE004E'    :["N"],            #Latin Capital Letter N
    '\ue0077'    :["w"],            #Latin Small Letter W
    '\uE0054'    :["T"],            #Latin Capital Letter T
    '\uE004B'    :["K"],            #Latin Capital Letter K
    '\ue0072'    :["r"],            #Latin Small Letter R
    '\uE0044'    :["D"],            #Latin Capital Letter D
    '\ue0068'    :["h"],            #Latin Small Letter H
    '\uE004F'    :["O"],            #Latin Capital Letter O
    '\ue006e'    :["n"],            #Latin Small Letter N
    '\ue0079'    :["y"],            #Latin Small Letter Y
    '\ue0075'    :["u"],            #Latin Small Letter U
    '\uE0041'    :["a"],            #Latin Capital Letter a
    '\uE0042'    :["B"],            #Latin Capital Letter B
    '\uE0046'    :["F"],            #Latin Capital Letter F
    '\uE0043'    :["C"],            #Latin Capital Letter C
    '\ue0062'    :["b"],            #Latin Small Letter B
    '\ue006a'    :["j"],            #Latin Small Letter J
    '\ue0074'    :["t"],            #Latin Small Letter T           #failed as hash key
    'code ue0074':["t"],            #Latin Small Letter T


    #the most puzzling thing i've found - this character was completely invisible in Windows Explorer file view (Windows 10, 2023/05/28)
    #as well as not copy-pastable (unlike all the others), so we couldn't even google it
    #it is called "ZERO WIDTH JOINER" and used in Indian languages: https://www.fileformat.info/info/unicode/char/200d/index.htm
    '\u200d     ': ['|',' '],  #deciding what to do with this character was difficult


    #these mfs exposed a python bug where certain characters aren't usable as keys in a dictionary
        '\u1f1fe': ["Y",],
    'code u1f1fe': ["Y",],                          #workaround
    'code u008d' : ["{reverse line feed}",],        #workaround

    "'":          ["'"],                             #is this a unicode apostrophe?
    '\ue0067':    ["E","g"],
    '\u0081':     ["{control}",],
    '\u0090':     ["{device control}",],
    '\u008F':     ["3",],                                  #"single shift 3"
    '\ud83d':     ["{smiling face with open mouth}"],
    'code ud83d': ["{smiling face with open mouth}"],
    "\u1f409":    ["{dragon}",],
    "code u1f409":["{dragon}",],
    "code 1fa77": ["{unicorn}",],
    "code u1fa77": ["{unicorn}",],
    #"ðŸ¦‹":      ["{butterfly}",],             \
    #"ðŸ”—":      ["{chain link}",],             \
    #"ðŸ§¨":      ["{firecracker}",],             \
    #"ï¸â™¥ï¸":   ["<3","{heart}"],                \____ examples of how the encoding gets screwed up by EditPlus and leaves us
    #"â™•":       ["{crown}",],                    /     with destroyed code. Better to use codes than paste chars directly in.
    #"ðŸ¯":       ["{tiger face}",],              /
    #"ðŸ’¸":      ["{money with wings}",],       /
    "🎉":         ["{party popper}",],
    "⧸":          ["/","--"],
    "\u29F8":     ["/","--"],
    "／":         ["/","--"],                   #what strange new slash is this?

    "♬":      ["♫",],                          #unicode 'beamed music note' to ascii music note
    "code u1fa01":      ["{Military Helmet}",],
    "code u1fa02":      ["{Accordion}",],
    "code u1fa03":      ["{Long Drum}",],
    "code u1fa04":      ["{Coin}",],
    "code u1fa05":      ["{Carpentry Saw}",],
    "code u1fa06":      ["{Screwdriver}",],
    "code u1fa07":      ["{Ladder}",],
    "code u1fa08":      ["{Hook}",],
    "code u1fa09":      ["{Mirror}",],
    "code u1fa0a":      ["{Window}",],
    "code u1fa0b":      ["{Plunger}",],
    "code u1fa0c":      ["{Sewing Needle}",],
    "code u1fa0d":      ["{Safety Pin}",],
    "code u1fa0e":      ["{Broom}",],
    "code u1fa0f":      ["{Bucket}",],
    "code u1fa10":      ["{Toothbrush}",],
    "code u1fa11":      ["{Hose}",],
    "code u1fa12":      ["{Mouse Trap}",],
    "code u1fa13":      ["{Skateboard}",],
    "code u1fa14":      ["{Roller Skate}",],
    "code u1fa15":      ["{Fishing Pole}",],
    "code u1fa16":      ["{Yo-Yo}",],
    "code u1fa17":      ["{Kite}",],
    "code u1fa18":      ["{Parachute}",],
    "code u1fa19":      ["{Boomerang}",],
    "code u1fa1a":      ["{Magic Wand}",],
    "code u1fa1b":      ["{Nazar Amulet}",],
    "code u1fa1c":      ["{Hamsa}",],
    "code u1fa1d":      ["{Red Envelope}",],
    "code u1fa1e":      ["{Carp Streamer}",],
    "code u1fa1f":      ["{Firecracker}",],
    "code u1fab0":      ["{Fly}",],
    "code u1fab1":      ["{Worm}",],
    "code u1fab2":      ["{Beetle}",],
    "code u1fab3":      ["{Cockroach}",],
    "code u1fab4":      ["{Potted Plant}",],
    "code u1fab5":      ["{Wood}",],
    "code u1fab6":      ["{Feather}",],
    "code u1fab7":      ["{Lotus}",],
    "code u1fab8":      ["{Coral}",],
    "code u1fab9":      ["{Empty Nest}",],
    "code u1faba":      ["{Nest with Eggs}",],
    "code u1fabb":      ["{Hyacinth}",],
    "code u1fabc":      ["{Jellyfish}",],
    "code u1fabd":      ["{Wing}",],
    "code u1fabe":      ["{Plant in Ground}",],
    "code u1fabf":      ["{Goose}",],
    "code u1fac0":      ["{Anatomical Heart}",],
    "code u1fac1":      ["{Lungs}",],
    "code u1fac2":      ["{People Hugging}",],
    "code u1fac3":      ["{Pregnant Man}",],
    "code u1fac4":      ["{Pregnant Person}",],
    "code u1fac5":      ["{Person with Crown}",],
    "code u1fac6":      ["{Person in Lotus Position}",],
    "code u1fac7":      ["{Hamsa}",],
    "code u1fac8":      ["{Empty Bowl}",],
    "code u1fac9":      ["{Nest with Eggs}",],
    "code u1faca":      ["{Bowl with Spoon}",],
    "code u1facb":      ["{Jar}",],
    "code u1facc":      ["{Empty Jar}",],
    "code u1facd":      ["{Meringue}",],
    "code u1face":      ["{Moose}",],
    "code u1facf":      ["{Donkey}",],
    "code u1fad0":      ["{Blueberries}",],
    "code u1fad1":      ["{Bell Pepper}",],
    "code u1fad2":      ["{Olive}",],
    "code u1fad3":      ["{Flatbread}",],
    "code u1fad4":      ["{Tamale}",],
    "code u1fad5":      ["{Fondue}",],
    "code u1fad6":      ["{Teapot}",],
    "code u1fad7":      ["{Pouring Liquid}",],
    "code u1fad8":      ["{Beans}",],
    "code u1fad9":      ["{Jar with a Lid}",],
    "code u1fada":      ["{Ginger Root}",],
    "code u1fadb":      ["{Pea Pod}",],
    "code u1fadc":      ["{Empty Bowl}",],
    "code u1fadd":      ["{Bowl with Spoon}",],
    "code u1fade":      ["{Tamale}",],
    "code u1fadf":      ["{Empty Plate}",],
    "code u1fae0":      ["{Melting Face}",],
    "code u1fae1":      ["{Saluting Face}",],
    "code u1fae2":      ["{Face with Open Eyes and Hand Over Mouth}",],
    "code u1fae3":      ["{Face with Peeking Eye}",],
    "code u1fae4":      ["{Face with Diagonal Mouth}",],
    "code u1fae5":      ["{Dotted Line Face}",],
    "code u1fae6":      ["{Biting Lip}",],
    "code u1fae7":      ["{Bubbles}",],
    "code u1fae8":      ["{Shaking Face}",],
    "code u1fae9":      ["{Pink Heart}",],
    "code u1faea":      ["{Light Blue Heart}",],
    "code u1faeb":      ["{Grey Heart}",],
    "code u1faec":      ["{Hand with Palm Facing Up}",],
    "code u1faed":      ["{Hand with Palm Facing Down}",],
    "code u1faee":      ["{Index Pointing at the Viewer}",],
    "code u1faef":      ["{Rightwards Hand}",],
    "code u1faf0":      ["{Leftwards Hand}",],
    "code u1faf1":      ["{Palm Down Hand}",],
    "code u1faf2":      ["{Palm Up Hand}",],
    "code u1faf3":      ["{Hand with Thumb and Index Finger Together}",],
    "code u1faf4":      ["{Leftwards Palm with Thumb and Index Finger}",],
    "code u1faf5":      ["{Palm with Index and Thumb Crossed}",],
        "\u1faf6":      ["{Hands Holding Heart}",],
    "code u1faf6":      ["{Hands Holding Heart}",],
          "1faf6":      ["{Hands Holding Heart}",],
    "code u1faf7":      ["{Rightwards Hand}",],
    "code u1faf8":      ["{Leftwards Hand}",],
    "code u1faf9":      ["{Index Pointing at the Viewer}",],
    "code u1fafa":      ["{Rightwards Hand}",],
    "code u1fafb":      ["{Leftwards Hand}",],
    "code u1fafc":      ["{Palm Up Hand}",],
    "code u1fafd":      ["{Palm Down Hand}",],
    "code u1fafe":      ["{Palm with Thumb and Index Finger Together}",],
    "code u1faff":      ["{Rightwards Palm with Thumb and Index Finger}",],

    "code u1f3fb":      ["{Light Skin Tone}",],
    "code u1f3fc":      ["{Medium-Light Skin Tone}",],
    "code u1f3fd":      ["{Medium Skin Tone}",],
    "code u1f3fe":      ["{Medium-Dark Skin Tone}",],
    "code u1f3ff":      ["{Dark Skin Tone}",],
    "code u1f3fa":      ["{Amphora}",],
    "code u1f3f9":      ["{Bow and Arrow}",],
    "code u1f3f8":      ["{Drum with Drumsticks}",],
    "code u1f3f7":      ["{Label}",],

    #"":      ["",],
    #"":      ["",],
    #"":      ["",],
    #"":      ["",],
    #"":      ["",],


    '\U0001fabd' :["_"],            #2024/05/23 ran nto this not sure what it is
    '\U0001fae7' :["_"],            #2024/05/23 ran nto this not sure what it is
    '\U0001fae6' :["_"],            #2024/06/12 ran nto this not sure what it is
    '\U0001fa75' :["_"],            #2024/06/12 ran nto this not sure what it is


}


if __name__ == "__main__":
    #we do this only in main because otherwise it affects loading modules
    original_print = print                                      # Store the original print function before overriding
    builtins.print = print_error                               # Override the built-in print function with the custom one
    main()