diff --git a/data/word_embeddings/wikifil.pl b/data/word_embeddings/wikifil.pl index 14e9883..6d497f8 100644 --- a/data/word_embeddings/wikifil.pl +++ b/data/word_embeddings/wikifil.pl @@ -31,8 +31,8 @@ s/\[\[category:([^|\]]*)[^]]*\]\]/[[$1]]/ig; # show categories without markup s/\[\[[a-z\-]*:[^\]]*\]\]//g; # remove links to other languages s/\[\[[^\|\]]*\|/[[/g; # remove wiki url, preserve visible text - s/{{[^}]*}}//g; # remove {{icons}} and {tables} - s/{[^}]*}//g; + s/\{\{[^\}]*\}\}//g; # remove {{icons}} and {tables} + s/\{[^\}]*\}//g; s/\[//g; # remove [ and ] s/\]//g; s/&[^;]*;/ /g; # remove URL encoded chars