-
Notifications
You must be signed in to change notification settings - Fork 0
/
custom_dict.py
34 lines (29 loc) · 26.1 KB
/
custom_dict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
'''
Translates text to turn slang words and misspellings into proper language.
'''
dictionary = {'mardi': 'mardi', 'jaja': 'haha', 'jason': 'jason', 'chinese': 'chinese', 'ali': 'ali', 'alg': 'alg', 'raiyans': 'raiyans', 'gf': 'girlfriend', 'tarra': 'tarra', 'asian': 'asian', 'liam': 'liam', 'hmmm': 'hm', 'somthing': 'something', 'aug': 'august', 'ty': 'thank you', 'imma': 'im going to', 'mcs': 'mcs', 'itll': 'itll', 'oclock': 'oclock', 'lucifer': 'lucifer', 'ahhaha': 'haha', 'td': 'td', 'te': 'yes', 'ta': 'to', 'jakob': 'jakob', 'schulic': 'schulic', 'sherbrooke': 'sherbrooke', 'haahah': 'haha', 'hannah': 'hannah', 'rc': '', 'fav': 'favorite', 'hahahahahahaha': 'haha', 'yoooo': 'hey', 'lololol': 'lol', 'mla': 'mla', 'facebook': 'facebook', 'moncton': 'moncton', 'jacob': 'jacob', 'memes': 'memes', 'iite': 'alright', 'macbeths': 'macbeths', 'salvador': 'salvador', 'angus': 'angus', 'ryan': 'ryan', 'herbert': 'herbert', 'mcallister': 'mcallister', 'soooo': 'so', 'trist': 'trist', 'oooh': 'oh', 'ahahahaha': 'haha', 'havent': 'havent', 'tru': 'true', 'dimitris': 'dimitris', 'rach': 'rach', 'gb': 'gb', 'cfl': 'cfl', 'velk': 'velk', 'instagram': 'instagram', 'seshu': 'seshu', 'ahah': 'haha', 'docuum': 'docuum', 'ted': 'ted ', 'haah': 'haha', 'quebec': 'quebec', 'nbm': 'nbm', 'chen': 'chen', 'elise': 'elise', 'ellas': 'ellas', 'cbc': 'cbc', 'chriss': 'chris', 'gl': 'good luck', 'abd': 'bad', 'christ': 'christ', 'yiss': 'yes', 'maine': 'maine', 'tanya': 'tanya', 'indian': 'indian', 'sus': '', 'fuckk': 'fuck', 'playa': 'player', 'emily': 'emily', 'ummmm': 'um', 'ahhhhh': 'ah', 'nazis': 'nazis', 'sux': 'suck', 'meg': 'meg', 'iran': 'iran', 'cass': 'cass', 'zach': 'zach', 'yurp': 'yes', 'heyy': 'hey', 'smelch': 'smelch', 'meh': 'uh', 'french': 'french', 'leo': 'leo', 'ryerson': 'ryerson', 'ubuntu': 'ubuntu', 'douchey': 'douchey', 'neal': 'neal', 'shoulda': 'should have', 'bff': 'best friend', 'rdu': 'richard', 'thxx': 'thanks', 'vladi': 'vladi', 'yessss': 'yes', 'sweeet': 'sweet', 'canadian': 'canadian', 'thatd': 'thatd', 'verry': 'very', 'turkish': 'turkish', 'jackie': 'jackie', 'pics': 'pictures', 'itunes': 'itunes', 'tv': 'tv', 'ctf': 'ctf', 'arent': 'arent', 'hbu': 'how about you', 'stanko': 'stanko', 'minecraft': 'minecraft', 'roomie': 'roomate', 'montreal': 'montreal', 'aahha': 'aahha', 'ttyl': 'talk to you later', 'hahaah': 'haha', 'vid': 'video', 'clare': 'clare', 'app': 'app', 'wil': 'will', 'julia': 'julia', 'hahahahah': 'haha', 'insta': 'instagram', 'ugandan': 'ugandan', 'sneha': 'sneha', 'brb': 'be right bacj', 'sarah': 'sarah', 'wadda': 'what a', 'hahahahaha': 'haha', 'feb': 'feb', 'usb': 'usb', 'usa': 'usa', 'pree': 'pre', 'tmmrw': 'tomorrow', 'texting': 'texting', 'shite': 'shit', 'msn': 'msn', 'jphmun': 'jphmun', 'iphone': 'iphone', 'ahhh': 'ah', 'calgary': 'calgary', 'mathdsz': 'mathdsz', 'calc': 'calculus', 'smacfone': 'smacfone', 'ahha': 'haha', 'shitt': 'shit', 'scotia': 'scotia', 'woojong': 'woojong', 'oscar': 'oscar', 'suxz': 'sucks', 'haha': 'haha', 'soooooooo': 'so', 'skyped': 'skyped', 'skypee': 'skype', 'koreans': 'koreans', 'sickkk': 'sick', 'adnan': 'adnan', 'mc': 'mc', 'cisv': 'cisv', 'dev': 'development', 'judy': 'judy', 'dem': 'them', 'ull': 'you will', 'mr': 'mr', 'aight': 'alright', 'didnt': 'didnt', 'caf': 'cafetiere', 'simona': 'simona', 'tarantino': 'tarantino', 'interms': 'in terms', 'yeaaaa': 'yes', 'bffs': 'best friends', 'waaaay': 'way', 'heey': 'hey', 'arisz': 'arisz', 'matson': 'matson', 'lolol': 'lol', 'homie': 'homie', 'yoooooo': 'hey', 'lunchie': 'lunch', 'mcgills': 'mcgills', 'siu': 'siu', 'eng': 'engineering', 'ahahhah': 'haha', 'yfc': 'yfc', 'vancouver': 'vancouver', 'haahaha': 'haha', 'gurl': 'girl', 'hahahaa': 'haha', 'ingmaras': 'ingmaras', 'mah': 'my', 'shouldnt': 'shouldnt', 'snb': 'snb', 'bday': 'birthday', 'raiyan': 'raiyan', 'yeppp': 'yes', 'hahahahh': 'haha', 'fb': 'facebook', 'whatta': 'what a', 'hahahaha': 'haha', 'vaped': 'vaped', 'da': 'the', 'peacebus': 'peacebus', 'siked': 'siked', 'fo': 'for', 'danny': 'danny', 'hahahah': 'haha', 'jliu': 'josh liu', 'uoft': 'uoft', 'django': 'django', 'sj': 'sj', 'claire': 'claire', 'mcgill': 'mcgill', 'african': 'african', 'yeeeee': 'yes', 'probert': 'probert', 'skyping': 'skyping', 'manga': 'manga', 'se': 'see', 'sd': 'sd', 'photoshop': 'photoshop', 'ws': 'was', 'wut': 'what', 'itd': 'itd', 'fam': 'family', 'sooo': 'so', 'deca': 'deca', 'ahaah': 'ahaah', 'ahahahahhaha': 'haha', 'kony': 'kony', 'isnt': 'isnt', 'arnt': 'arnt', 'ahahahahahah': 'haha', 'ttc': 'ttc', 'cole': 'cole', 'le': 'le', 'wallace': 'wallace', 'gtg': 'got to go', 'ln': 'last night', 'hil': 'hil', 'thnx': 'thaks', 'imo': 'in my opinion', 'li': 'little', 'tyler': 'tyler', 'etransfer': 'etransfer', 'dooleys': 'dooleys', 'sig': 'sig', 'dokey': 'dokey', 'norway': 'norway', 'marshall': 'marshall', 'urself': 'yourself', 'deets': 'details', 'noo': 'no', 'ym': 'my', 'pegi': 'pegi', 'atwater': 'atwater', 'dal': 'dal', 'yyy': 'why', 'yessir': 'yes sir', 'vinson': 'vinson', 'uhh': 'uh', 'dat': 'that', 'lukes': 'lukes', 'srsly': 'seriously', 'alicias': 'alicias', 'hahahahahaha': 'haha', 'ahahh': 'haha', 'hali': 'hali', 'nov': 'november', 'yyz': 'yyz', 'ahaha': 'haha', 'ther': 'there', 'damian': 'damian', 'robbie': 'robbie', 'tues': 'tuesday', 'jmac': 'josh', 'whatd': 'whatd', 'ahahhahahaha': 'haha', 'sim': 'sim', 'valentin': 'valentin', 'ahahhahahah': 'haha', 'vaishu': 'vaishu', 'yeaa': 'yes', 'idek': 'i dont even know', 'youu': 'you', 'spanish': 'spanish', 'mharie': 'mharie', 'nigerian': 'nigerian', 'quiznos': 'quiznos', 'dawg': 'dog', 'yeap': 'yes', 'connor': 'connor', 'ahahha': 'haha', 'hawt': 'hot', 'meth': 'meth', 'dic': 'dick', 'ep': 'episode', 'ouu': 'oh', 'alward': 'alward', 'rez': 'rez', 'ru': 'are you', 'matt': 'matt', 'gon': 'going', 'yayaya': 'yes', 'pokemon': '', 'lolo': 'lol', 'ricky': 'ricky', 'noice': 'nice', 'ypu': 'you', 'nite': 'night', 'internet': 'internet', 'okey': 'okay', 'lolz': 'lol', 'rn': 'right now', 'ro': 'ro', 'hhahah': 'haha', 'jareds': 'jareds', 'lizzie': 'lizzie', 'ipod': 'ipod', 'jmacs': 'joshs', 'fave': 'favourite', 'coel': 'coel', 'fred': 'fred', 'mmm': 'yes', 'shud': 'should', 'lmk': 'let me know', 'rly': 'really', 'argo': 'argo', 'bhutanese': 'bhutanese', 'seger': 'seger', 'spencer': 'spencer', 'opps': 'oops', 'trueee': 'true', 'holdens': 'holdens', 'yesh': 'yes', 'crn': '', 'atm': 'at the moment', 'american': 'american', 'ish': 'ish', 'fhs': 'fhs', 'webscrape': 'webscrape', 'yess': 'yes', 'cristian': 'cristian', 'kendall': 'kendall', 'dont': 'dont', 'whyy': 'why', 'rachel': 'rachel', 'ez': 'easy', 'xd': 'id', 'jesus': 'jesus', 'ahhahahaha': 'haha', 'americans': 'americans', 'comin': 'coming', 'hehe': 'haha', 'andor': 'andor', 'ganja': 'ganja', 'haaha': 'haha', 'gif': 'gif', 'gpa': 'gpa', 'carson': 'carson', 'fml': 'fuck my life', 'yooooo': 'hey', 'ahhahah': 'haha', 'rons': 'rons', 'nigeria': 'nigeria', 'simptek': 'simptek', 'coburn': 'coburn', 'meme': 'meme', 'thibk': 'think', 'shawn': 'shawn', 'tooo': 'too', 'brazilian': 'brazilian', 'cya': 'bye', 'github': 'github', 'mclennan': 'mclennan', 'skyrim': 'skyrim', 'hadnt': 'hadnt', 'shirley': 'shirley', 'obama': 'obama', 'ipad': 'ipad', 'jmer': 'josh', 'nathan': 'nathan', 'lmao': 'laughing my ass off', 'ew': 'ew', 'mannnn': 'man', 'matsons': 'matsons', 'riyan': 'riyan', 'sti': 'sti', 'stu': 'stu', 'sweden': 'sweden', 'greg': 'greg', 'suree': 'sure', 'whyyy': 'why', 'recieved': 'recieved', 'prescott': 'prescott', 'havnt': 'havent', 'hav': 'have', 'sean': 'sean', 'pre': 'pre', 'tht': 'that', 'outta': 'outta', 'ana': 'ana', 'tesss': 'tess', 'wasnt': 'wasnt', 'mflb': 'mflb', 'yeee': 'yes', 'lin': 'lin', 'ummm': 'um', 'minz': 'minutes', 'adele': 'adele', 'infront': 'infront', 'uganda': 'uganda', 'yees': 'yes', 'mins': 'minutes', 'artsci': 'artsci', 'tcamp': 'tcamp', 'ahaahah': 'haha', 'mia': 'mia', 'maxs': 'maxs', 'mic': 'microphone', 'liu': 'liu', 'rome': 'rome', 'yeahh': 'yes', 'online': 'online', 'korean': 'korean', 'liz': 'liz', 'sm': 'so much', 'wooow': 'wow', 'aww': 'aw', 'doin': 'doing', 'doug': 'doug', 'dnt': 'dont', 'victoria': 'victoria', 'welll': 'well', 'awk': 'awkward', 'amar': 'amar', 'marie': 'marie', 'sall': 'its all', 'leblanc': 'leblanc', 'url': 'url', 'sg': 'sounds good', 'sbtf': 'sbtf', 'mrs': 'mrs', 'lul': 'lol', 'urs': 'yours', 'amir': 'amir', 'danielito': '', 'frances': 'frances', 'german': 'german', 'joshs': 'joshs', 'corse': 'corse', 'jared': 'jared', 'shitttt': 'shit', 'probs': 'probabily', 'samosa': 'samosa', 'aint': 'aint', 'sayin': 'saying', 'fredericton': 'fredericton', 'duncan': 'duncan', 'joli': 'joli', 'sista': 'sister', 'madeline': 'madeline', 'abt': 'about', 'caroline': 'caroline', 'monica': 'monica', 'weve': 'weve', 'asians': 'asians', 'ahahahh': 'haha', 'di': 'did', 'luke': 'luke', 'dd': 'designated driver', 'alicia': 'alicia', 'ahahaha': 'haha', 'tbh': 'to be honest', 'awww': 'aw', 'skewl': 'school', 'storey': 'storey', 'werent': 'werent', 'richards': 'richards', 'dt': 'downtown', 'dr': 'dr', 'alexander': 'alexander', 'wensday': 'wednesday', 'randalls': 'randalls', 'daniellutesmailmcgillca': '', 'pearson': 'pearson', 'denmark': 'denmark', 'webdev': 'webdev', 'gras': 'gras', 'qc': 'qc', 'ruiqidugmailcom': '', 'mercedes': 'mercedes', 'scott': 'scott', 'jakobs': 'jakobs', 'tix': 'tickets', 'amanda': 'amanda', 'xmas': 'christmas', 'btw': 'by the way', 'prague': 'prague', 'travis': 'travis', 'lyfe': 'life', 'persian': 'persian', 'lolololol': 'lol', 'yeye': 'yes', 'ari': 'ari', 'gmail': 'gmail', 'luv': 'love', 'thurs': 'thursday', 'cegep': 'cegep', 'mexicans': 'mexicans', 'luc': 'luc', 'deid': 'died', 'mel': 'mel', 'grampa': 'grampa', 'yepp': 'yes', 'youll': 'youll', 'wouldve': 'wouldve', 'ikr': 'i know right', 'reddit': 'reddit', 'omg': 'oh my god', 'uk': 'uk', 'marvin': 'marvin', 'jw': 'just wondering', 'upp': 'up', 'jr': 'juniour', 'wj': 'we', 'ww': 'wow', 'omw': 'on my way', 'wtf': 'what the fuck', 'jk': 'just kidding', 'yumy': 'yumy', 'cus': '', 'omy': 'on my way', 'ahahahahaha': 'haha', 'jb': 'junior board', 'jc': 'junior councillpr', 'tnite': 'tonight', 'vua': 'vua', 'admin': 'admin', 'ruffalo': 'ruffalo', 'molson': 'molson', 'unb': 'unb', 'robie': 'robbie', 'hhaha': 'haha', 'alot': 'a lot', 'oovoo': 'oovoo', 'yeeee': 'yes', 'uni': 'university', 'brah': 'bro', 'seshing': 'seshing', 'minicamp': 'minicamp', 'uno': 'you know', 'whatsapp': 'whats app', 'jeff': 'jeff', 'jorya': 'jorya', 'matty': 'matty', 'cd': 'cd', 'wth': 'what the hell', 'linux': 'linux', 'yaaa': 'yes', 'sao': 'sao', 'nicola': 'nicola', 'deniz': 'deniz', 'nicole': 'nicole', 'yuppp': 'yes', 'daniellutesyahoocom': '', 'ps': 'by the way', 'snapchat': 'snapchat', 'yay': 'yes', 'bbg': 'baby girl', 'fri': 'friday', 'sara': 'sara', 'frm': 'from', 'whos': 'whos', 'pc': 'pc', 'whoo': 'who', 'roomate': 'roomate', 'yah': 'yes', 'hahahaah': 'haha', 'sesh': 'sesh', 'trottier': 'trottier', 'yaa': 'yes', 'bbq': 'bbq', 'walter': 'walter', 'username': 'username', 'addison': 'addison', 'mccully': 'mccully', 'sydney': 'sydney', 'urbain': 'urbain', 'hmu': 'hit me up', 'bmh': 'bmh', 'georgia': 'georgia', 'sooooo': 'so', 'andrew': 'andrew', 'al': 'al', 'hmm': 'hm', 'dave': 'dave', 'utube': 'youtube', 'aahaha': 'haha', 'msged': 'messaged', 'yuuuup': 'yes', 'theyve': 'theyve', 'rudolf': 'rudolf', 'woah': 'wow', 'iym': 'iym', 'samosas': 'samosas', 'osheaga': 'osheaga', 'convo': 'conversation', 'podcasts': 'podcasts', 'wyatt': 'wyatt', 'fuc': 'fuck', 'bransfield': 'bransfield', 'hahaha': 'haha', 'cancelled': 'cancelled', 'hmmmm': 'hm', 'tim': 'tim', 'skypr': 'skype', 'ive': 'ive', 'thatll': 'thatll', 'portugal': 'portugal', 'minerva': 'minerva', 'vape': 'vape', 'ir': 'it', 'vp': 'vice president', 'im': 'im', 'il': 'ill', 'skype': 'skype', 'ie': 'internet explorer', 'ohhhh': 'oh', 'smurf': 'smurf', 'hahahahha': 'haha', 'noooo': 'no', 'dhe': 'daniel he', 'sobeys': 'sobeys', 'thoo': 'though', 'goign': 'going', 'paulo': 'paulo', 'ahahhahah': 'haha', 'wesc': 'wesc', 'hahah': 'haha', 'yeahhh': 'yes', 'hahaa': 'haha', 'ot': 'it', 'yooo': 'hey', 'yoon': 'yoon', 'px': 'px', 'toooo': 'too', 'syd': 'syd', 'ella': 'ella', 'umm': 'um', 'leger': 'leger', 'nig': 'night', 'jirka': 'jirka', 'phd': 'phd', 'jaclyn': 'jaclyn', 'yy': 'why', 'neuro': 'neuro', 'valerie': 'valerie', 'conor': 'connor', 'thx': 'thanks', 'quizup': 'quizup', 'youre': 'youre', 'thr': 'there', 'lil': 'little', 'cadusd': 'cad usd', 'anime': 'anime', 'mann': 'man', 'howd': 'howd', 'english': 'english', 'ahahhaha': 'haha', 'yeees': 'yes', 'ae': 'ae', 'riverdale': 'riverdale', 'nb': 'nb', 'starbucks': 'starbucks', 'yeh': 'yes', 'alvin': 'alvin', 'yolo': 'you only live once', 'noah': 'noah', 'ddc': 'ddc', 'yee': 'yes', 'yall': 'you all', 'hahahha': 'haha', 'yey': 'yes', 'rachels': 'rachels', 'woodsworth': 'woodsworth', 'casper': 'casper', 'seshed': 'seshed', 'hess': 'he is', 'qbit': 'qbit', 'mannn': 'man', 'hah': 'haha', 'jose': 'jose', 'hal': 'hal', 'daniellutes': '', 'rebecca': 'rebecca', 'prescotts': 'prescotts', 'thang': 'thing', 'cuz': 'because', 'ooooh': 'oh', 'ahhhh': 'ah', 'soooooo': 'sp', 'courtney': 'courtney', 'vids': 'videos', 'ohhh': 'oh', 'elp': 'elp', 'hahahhah': 'haha', 'mycourses': 'mycourses', 'nate': 'nate', 'naw': 'no', 'bio': 'bio', 'ahhah': 'haha', 'ight': 'alright', 'vanessa': 'vanessa', 'mcginn': 'mcginn', 'fyi': 'for your information', 'hhahaha': 'haha', 'tmrw': 'tomorrow', 'wbu': 'what about you', 'tmrs': 'tomorrows', 'mon': 'monday', 'mol': 'mol', 'nvm': 'never mind', 'nooooo': 'no', 'shittt': 'shit', 'richie': 'richie', 'lincoln': 'lincoln', 'nevermind': 'nevermind', 'sowwy': 'sorry', 'hahha': 'haha', 'liams': 'liams', 'zeland': 'zeland', 'theyre': 'theyre', 'seshus': 'seshus', 'sql': 'sql', 'aylmer': 'aylmer', 'yeeeeee': 'yes', 'fuckkk': 'fuck', 'webb': 'web', 'portuguese': 'portuguese', 'ijr': 'ijr', 'google': 'google', 'czech': 'czech', 'amy': 'amy', 'boi': 'boy', 'hahhaha': 'haha', 'tumblr': 'tumblr', 'njr': 'njr', 'vsb': 'vsb', 'smelchel': 'smelchel', 'thailand': 'thailand', 'halifax': 'halifax', 'steph': 'steph', 'comon': 'come on', 'yesss': 'yes', 'uhm': 'um', 'tmr': 'tomorrow', 'karims': 'karims', 'sudan': 'sudan', 'nooo': 'no', 'js': 'just saying', 'ssmu': 'ssmu', 'cappa': 'cappa', 'poli': 'poli', 'frum': 'from', 'ahahahah': 'haha', 'christmas': 'christmas', 'eya': 'yes', 'sooon': 'soon', 'pls': 'please', 'bb': 'baby', 'bc': 'because', 'yeaah': 'yes', 'plz': 'please', 'bj': 'bj', 'mau': 'mau', 'bs': 'bull shit', 'annas': 'annas', 'java': 'java', 'thts': 'thats', 'oo': 'oh', 'src': 'src', 'ol': 'old', 'ok': 'okay', 'justin': 'justin', 'wouldnt': 'wouldnt', 'netflix': 'netflix', 'ashe': 'ashe', 'cafe': 'cafe', 'idk': 'i dont know', 'youve': 'youve', 'ndp': 'ndp', 'ou': 'oh', 'sry': 'sorry', 'os': 'is', 'idc': 'i dont care', 'legoland': 'legoland', 'ahahahahha': 'haha', 'fallis': 'fall is', 'ahh': 'ah', 'youd': 'youd', 'catherine': 'catherine', 'soo': 'so', 'ppl': 'people', 'hahahahahah': 'haha', 'doesnt': 'doesnt', 'inbox': 'inbox', 'theyd': 'theyd', 'couldnt': 'couldnt', 'ontario': 'ontario', 'dewd': 'dude', 'ahahah': 'haha', 'goin': 'going', 'hahahhaha': 'haha', 'daniels': 'daniels', 'apps': 'apps', 'lol': 'lol', 'hed': 'hed', 'christian': 'christian', 'wordpress': 'wordpress', 'taylyr': 'taylyr', 'bruh': 'bro', 'jane': 'jane', 'ayy': 'yes', 'wat': 'what', 'ahahhahaha': 'haha', 'listserv': 'listserv', 'tims': 'tims', 'jewish': 'jewish', 'yupp': 'yes', 'prez': 'president', 'shana': 'haha', 'hw': 'homework', 'yoyo': 'hey', 'msg': 'message', 'hm': 'hm', 'yeaaa': 'yes', 'podcast': 'podcast', 'nicee': 'nice', 'lynn': 'lynn', 'jonathan': 'jonathan', 'oscars': 'oscars', 'uu': 'you', 'ut': 'it', 'ahahahha': 'haha', 'ur': 'you are', 'lncc': 'lncc', 'cumberlandprescott': '', 'tonite': 'tonight', 'hahhah': '', 'cassidy': 'cassidy', 'mtl': 'montreal', 'lvl': 'level', 'txts': 'texts', 'dawa': 'dawa', 'rvc': 'rvc', 'ahahahhaha': 'haha', 'isaacs': 'isaacs', 'cmon': 'come on', 'ohh': 'oh', 'lololololol': 'lol', 'af': 'as fuck', 'ai': 'ai', 'pic': 'picture', 'macbeth': 'macbeth', 'trib': 'tribune', 'erin': 'erin', 'ap': 'app', 'brazilians': 'brazilians', 'shediac': 'shediac', 'haahha': 'haha', 'triv': 'trivia', 'russel': 'russel', 'eric': 'eric', 'dejon': 'dejon', 'yoo': 'hey', 'hasnt': 'hasnt', 'theyll': 'theyll', 'jcs': 'jcs', 'niceeee': 'nice', 'nah': 'no', 'ng': 'ng', 'til': 'until', 'ny': 'new york', 'dunfield': 'dunfield', 'twerk': 'twerk', 'tuff': 'tough', 'np': 'no problem', 'esf': 'esf', 'ahhahaha': 'haha', 'ahaahha': 'haha', 'tess': 'tess', 'hahhaa': 'haha', 'danielplutesgmailcom': '', 'leacock': 'leacock', 'ilan': 'ilan', 'ftp': 'ftp', 'favourite': 'favourite', 'annes': 'annes', 'irving': 'irving', 'hahahahahaa': 'haha', 'mahdi': 'mahdi', 'evan': 'evan', 'cheque': 'check', 'chris': 'chris', 'leauge': 'leauge', 'niceee': 'nice', 'nxt': 'next', 'agm': 'agm', 'texted': 'texted', 'alvins': 'alvins', 'wifi': 'wifi', 'youtube': 'youtube', 'nikol': 'nikol', 'ottawa': 'ottawa', 'ahahahahah': 'haha', 'uhhh': 'uh', 'corey': 'corey', 'adam': 'adam', 'ahhahahah': 'haha', 'duuude': 'dude', 'nash': 'nash'}
reverseDictionary = {'saying': [('sayin', 3), ('saying', 18)], 'bull shit': [('bs', 1)], 'joshs': [('jmacs', 4), ('joshs', 9)], 'talk to you later': [('ttyl', 2)], 'should have': [('shoulda', 2)], 'internet explorer': [('ie', 5)], 'what about you': [('wbu', 20)], 'dick': [('dic', 5), ('dick', 34)], 'thursday': [('thurs', 2), ('thursday', 7)], 'to': [('ta', 1), ('to', 2117)], 'going': [('goign', 2), ('goin', 18), ('going', 459)], 'in terms': [('interms', 5)], 'very': [('verry', 3), ('very', 32)], 'cafetiere': [('caf', 4)], 'daniel he': [('dhe', 5)], 'facebook': [('fb', 28), ('facebook', 10)], 'as fuck': [('af', 4)], 'im going to': [('imma', 17)], 'tickets': [('tix', 1), ('tickets', 7)], 'school': [('skewl', 1), ('school', 120)], 'level': [('lvl', 12), ('level', 3)], 'did': [('di', 1), ('did', 426)], 'wednesday': [('wensday', 3), ('wednesday', 1)], 'bad': [('abd', 1), ('bad', 117)], 'havent': [('havnt', 5), ('havent', 63)], 'instagram': [('insta', 4), ('instagram', 2)], 'i dont know': [('idk', 78)], 'see': [('se', 4), ('see', 232)], 'video': [('vid', 18), ('video', 29)], 'yes': [('te', 2), ('yeeeee', 1), ('yess', 1), ('mmm', 6), ('yeee', 6), ('yeahh', 2), ('yeh', 4), ('yee', 8), ('yey', 3), ('eya', 4), ('yeeeeee', 1), ('yesh', 5), ('yeye', 6), ('yay', 1), ('yah', 2), ('yaa', 1), ('yesss', 1), ('yeaaa', 3), ('yeaah', 1), ('yes', 78)], 'what': [('wut', 34), ('wat', 4), ('what', 673)], 'richard': [('rdu', 2), ('richard', 31)], 'pictures': [('pics', 16), ('pictures', 7)], 'wow': [('ww', 2), ('woah', 1), ('wooow', 1), ('wow', 30)], 'right now': [('rn', 42)], 'please': [('plz', 12), ('please', 12)], 'i dont care': [('idc', 3)], 'august': [('aug', 1), ('august', 1)], 'november': [('nov', 1), ('november', 2)], 'boy': [('boi', 2), ('boy', 26)], 'ill': [('il', 1), ('ill', 198)], 'so much': [('sm', 1)], 'last night': [('ln', 8)], 'montreal': [('mtl', 34), ('montreal', 10)], 'thats': [('thts', 1), ('thats', 191)], 'tough': [('tuff', 10), ('tough', 13)], 'family': [('fam', 6), ('family', 42)], 'sweet': [('sweeet', 3), ('sweet', 87)], 'app': [('ap', 2), ('app', 9)], 'hit me up': [('hmu', 1)], 'from': [('frm', 39), ('frum', 4), ('from', 250)], 'next': [('nxt', 18), ('next', 86)], 'sounds good': [('sg', 13)], 'to be honest': [('tbh', 12)], 'roomate': [('roomie', 6), ('roomate', 4)], 'until': [('til', 1), ('until', 12)], 'life': [('lyfe', 2), ('life', 64)], 'haha': [('jaja', 1), ('ahah', 223), ('haah', 10), ('hahaah', 10), ('hahahahah', 10), ('haahaha', 3), ('ahahhahahaha', 1), ('ahahha', 24), ('hhahah', 7), ('haaha', 8), ('ahaahah', 6), ('ahahahh', 1), ('ahahaha', 53), ('hahahaah', 3), ('hahahahha', 7), ('hahah', 221), ('hahaa', 4), ('ahahhaha', 3), ('hahahha', 23), ('hah', 168), ('hahahhah', 5), ('hhahaha', 12), ('hahha', 83), ('hahahah', 34), ('ahahahahha', 1), ('ahahah', 146), ('ahahhahaha', 4), ('ahaahha', 3), ('hahahahahah', 3), ('ahhaha', 13), ('haahah', 3), ('ahahahaha', 15), ('hahahahaha', 5), ('ahha', 25), ('ahahahah', 22), ('hahahaa', 1), ('hahahaha', 19), ('ahahahahahah', 5), ('ahahh', 4), ('ahaha', 236), ('ahahhahahah', 4), ('ahhahahaha', 2), ('hehe', 5), ('ahhahah', 2), ('shana', 2), ('ahahahahaha', 5), ('hhaha', 41), ('ahahahhaha', 2), ('aahaha', 5), ('hahaha', 139), ('haahha', 4), ('ahahhahah', 3), ('ahahahahhaha', 3), ('ahhah', 4), ('hahhaha', 22), ('hahahhaha', 9), ('ahahahha', 9), ('ahahahahah', 5), ('ahhahaha', 1), ('hahhaa', 2), ('ahhahahah', 4), ('haha', 399)], 'about': [('abt', 15), ('about', 291)], 'fuck': [('fuckk', 1), ('fuc', 4), ('fuckkk', 4), ('fuck', 197)], 'downtown': [('dt', 2), ('downtown', 5)], 'baby': [('bb', 9), ('baby', 6)], 'juniour': [('jr', 1)], 'thaks': [('thnx', 2)], 'sucks': [('suxz', 2), ('sucks', 39)], 'will': [('wil', 2), ('will', 411)], 'josh liu': [('jliu', 12)], 'you are': [('ur', 602)], 'by the way': [('ps', 3), ('btw', 38)], 'my': [('ym', 2), ('mah', 1), ('my', 581)], 'aw': [('aww', 2), ('aw', 133)], 'come on': [('cmon', 3)], 'is': [('os', 5), ('is', 1915)], 'it': [('ir', 15), ('ut', 289), ('ot', 100), ('it', 1931)], 'player': [('playa', 1), ('player', 2)], 'he is': [('hess', 3)], 'something': [('somthing', 15), ('something', 56)], 'skype': [('skypr', 4), ('skype', 76)], 'for your information': [('fyi', 1)], 'awkward': [('awk', 1), ('awkward', 11)], 'conversation': [('convo', 3), ('conversation', 3)], 'got to go': [('gtg', 10)], 'hot': [('hawt', 3), ('hot', 19)], 'development': [('dev', 11), ('development', 2)], 'okay': [('ok', 2), ('okay', 92)], 'best friend': [('bff', 1)], 'birthday': [('bday', 4), ('birthday', 10)], 'coming': [('comin', 6), ('coming', 110)], 'president': [('prez', 9), ('president', 3)], 'suck': [('sux', 2), ('suck', 8)], 'man': [('mannn', 2), ('mannnn', 1), ('mann', 4), ('man', 392)], 'sp': [('soooooo', 1)], 'well': [('welll', 2), ('well', 123)], 'so': [('sooo', 27), ('sooooo', 2), ('soo', 45), ('soooo', 10), ('soooooooo', 2), ('so', 575)], 'oops': [('opps', 2)], 'the': [('da', 18), ('the', 3324)], 'yours': [('urs', 17), ('yours', 3)], 'you only live once': [('yolo', 1)], 'designated driver': [('dd', 4)], 'baby girl': [('bbg', 9)], 'soon': [('sooon', 3), ('soon', 42)], 'what a': [('wadda', 1)], 'thanks': [('thx', 51), ('thanks', 48)], 'on my way': [('omy', 2)], 'love': [('luv', 4), ('love', 53)], 'oh my god': [('omg', 4)], 'josh': [('jmac', 16), ('josh', 45)], 'easy': [('ez', 6), ('easy', 40)], 'bye': [('cya', 2), ('bye', 10)], 'tuesday': [('tues', 3), ('tuesday', 5)], 'homework': [('hw', 8), ('homework', 6)], 'probabily': [('probs', 16)], 'them': [('dem', 5), ('them', 199)], 'university': [('uni', 11), ('university', 23)], 'a lot': [('alot', 59)], 'night': [('nite', 21), ('nig', 1), ('night', 150)], 'tess': [('tesss', 5), ('tess', 19)], 'just wondering': [('jw', 3)], 'because': [('cuz', 50), ('bc', 1), ('because', 65)], 'old': [('ol', 203), ('old', 55)], 'people': [('ppl', 113), ('people', 74)], 'doing': [('doin', 6), ('doing', 137)], 'vice president': [('vp', 2)], 'junior councillpr': [('jc', 4)], 'girl': [('gurl', 6), ('girl', 47)], 'just kidding': [('jk', 7)], 'for': [('fo', 4), ('for', 837)], 'its all': [('sall', 2)], 'christmas': [('xmas', 3), ('christmas', 12)], 'in my opinion': [('imo', 2)], 'be right bacj': [('brb', 1)], 'no problem': [('np', 25)], 'sister': [('sista', 3), ('sister', 12)], 'you will': [('ull', 10)], 'oh': [('ouu', 1), ('ohhh', 1), ('oo', 30), ('ou', 109), ('oh', 192)], 'favorite': [('fav', 3), ('favorite', 5)], 'thing': [('thang', 3), ('thing', 156)], 'messaged': [('msged', 1), ('messaged', 11)], 'think': [('thibk', 6), ('think', 444)], 'dont': [('dnt', 2), ('dont', 387)], 'yourself': [('urself', 2), ('yourself', 1)], 'tribune': [('trib', 12), ('tribune', 1)], 'tomorrow': [('tmr', 252), ('tomorrow', 41)], 'little': [('lil', 16), ('li', 4), ('little', 49)], 'there': [('ther', 8), ('thr', 3), ('there', 464)], 'hey': [('yooooo', 1), ('yooo', 2), ('heey', 2), ('yoo', 3), ('hey', 142)], 'lol': [('lololol', 1), ('lolol', 6), ('lolo', 2), ('lolz', 4), ('lul', 6), ('lolololol', 1), ('lol', 352)], 'new york': [('ny', 25)], 'too': [('tooo', 12), ('toooo', 2), ('too', 174)], 'was': [('ws', 4), ('was', 724)], 'you know': [('uno', 1)], 'let me know': [('lmk', 1)], 'that': [('dat', 9), ('that', 1131)], 'ted ': [('ted', 1)], 'hm': [('hmmm', 5), ('hmm', 13), ('hm', 1)], 'true': [('tru', 9), ('true', 63)], 'are you': [('ru', 7)], 'up': [('upp', 4), ('up', 489)], 'um': [('umm', 3), ('ummm', 1), ('um', 21)], 'uh': [('meh', 4), ('uhh', 2), ('uh', 3)], 'minutes': [('minz', 5), ('mins', 19), ('minutes', 21)], 'pre': [('pree', 3), ('pre', 31)], 'videos': [('vids', 1), ('videos', 3)], 'ah': [('ahhhh', 4), ('ahhhhh', 1), ('ahhh', 2), ('ahh', 4), ('ah', 32)], 'junior board': [('jb', 7)], 'tonight': [('tnite', 1), ('tonight', 103)], 'engineering': [('eng', 3), ('engineering', 4)], 'never mind': [('nvm', 22)], 'have': [('hav', 1), ('have', 1026)], 'girlfriend': [('gf', 7), ('girlfriend', 3)], 'shit': [('shite', 7), ('shitttt', 1), ('shit', 362)], 'microphone': [('mic', 5)], 'no': [('noooo', 1), ('naw', 82), ('noo', 1), ('nooo', 2), ('no', 352)], 'sick': [('sickkk', 1), ('sick', 55)], 'you': [('ypu', 3), ('uu', 6), ('you', 1238)], 'trivia': [('triv', 21), ('trivia', 17)], 'nice': [('noice', 4), ('niceeee', 1), ('niceee', 1), ('nice', 75)], 'calculus': [('calc', 5), ('calculus', 1)], 'picture': [('pic', 53), ('picture', 16)], 'sure': [('suree', 2), ('sure', 191)], 'what the fuck': [('wtf', 27)], 'monday': [('mon', 2), ('monday', 14)], 'though': [('thoo', 3), ('though', 38)], 'favourite': [('fave', 3)], 'connor': [('conor', 1), ('connor', 3)], 'chris': [('chriss', 2), ('chris', 46)], 'why': [('yy', 8), ('yyy', 2), ('why', 128)], 'died': [('deid', 1), ('died', 5)], 'episode': [('ep', 5), ('episode', 8)], 'youtube': [('utube', 4), ('youtube', 5)], 'dog': [('dawg', 18), ('dog', 12)], 'you all': [('yall', 1)], 'alright': [('ight', 19), ('aight', 2), ('alright', 3)]}
import random
def translateWord (word):
l = reverseDictionary[word]
probs = []
count = 0
# Create an array which should the cumulative distribution of the dictionary entry
for i in range(len(l)):
count += l[i][1]
probs.append(count)
# Randomly select elment from the array
ran = random.random() * count
for i in range(len(l)):
if ran < probs[i]:
return l[i][0]
def translateSentence(sentence):
# Add buffer to sentence
sentence = " " + sentence + " "
for key in reverseDictionary:
# If sentence contains subphrase is dicitionary make translation
if ( " " + key + " ") in sentence:
split = sentence.split(key)
sentence = split[0] + translateWord(key) + split[1]
# Return stence without the buffers
return sentence[1:len(sentence)-1]