From 7f3dc6443e4ffb85af1db35cdb40303538dcdef2 Mon Sep 17 00:00:00 2001 From: Charles Ashby Date: Wed, 26 Jul 2017 15:56:39 -0400 Subject: [PATCH] Update data_utils.py --- lib/data_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/data_utils.py b/lib/data_utils.py index f897f9b..eff1562 100644 --- a/lib/data_utils.py +++ b/lib/data_utils.py @@ -25,7 +25,9 @@ # TODO: Add non-Ascii characters emb_alphabet = 'abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:\'"/\\|_@#$%^&*~`+-=<>()[]{} ' -emb_alphabet_extra= 'äöüß“„”€❤😂♥😊😍😁😄😉♡☺♫😘☕😜😀😭😏😎😔😆😃😡😱😩😓😅😋😒😴😌😢' + +# Enable emoticons and other special characters +# emb_alphabet_extra= 'äöüß“„”€❤😂♥😊😍😁😄😉♡☺♫😘☕😜😀😭😏😎😔😆😃😡😱😩😓😅😋😒😴😌😢' DICT = {ch: ix for ix, ch in enumerate(emb_alphabet + emb_alphabet_extra.decode("utf-8"))} ALPHABET_SIZE = len(emb_alphabet + emb_alphabet_extra.decode("utf-8"))