Skip to content

Commit

Permalink
allowing to change tokenize fct
Browse files Browse the repository at this point in the history
  • Loading branch information
Inspirateur committed Jan 8, 2023
1 parent 49bc70c commit 7994b9e
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 9 deletions.
8 changes: 4 additions & 4 deletions src/handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use serenity::{
prelude::*, utils::Color
};
use wordcloud_rs::{Token, WordCloud, Colors};
use crate::idiom::Idioms;
use crate::idiom::{Idioms, tokenize};
const READ_PAST: u64 = 10000;

fn convert_color(color: Color) -> Rgb {
Expand All @@ -39,7 +39,7 @@ impl Handler {
}

pub fn message(&self, guild_id: GuildId, channel_id: ChannelId, member_id: UserId, message: String) {
self.idioms.get_mut(&guild_id).unwrap().update(channel_id, member_id, message);
self.idioms.get_mut(&guild_id).unwrap().update(channel_id, member_id, tokenize(message));
}

fn to_wc_tokens(&self, tokens: Vec<(String, f32)>) -> Vec<(Token, f32)> {
Expand Down Expand Up @@ -115,7 +115,7 @@ impl Handler {
).await {
for message in messages {
idioms.get_mut(&guild.id).unwrap().update(
channel_id, message.author.id, message.content
channel_id, message.author.id, tokenize(message.content)
);
}
info!(target: "Wordy", "Read {} past messages in {}/{}", READ_PAST, guild.name, channel.name())
Expand All @@ -127,7 +127,7 @@ impl Handler {
}

pub async fn register_commands(&self, http: Arc<Http>, guild_id: GuildId) {
println!("Registering slash commands for Guild {}", guild_id);
trace!("Registering slash commands for Guild {}", guild_id);
if let Err(why) =
GuildId::set_application_commands(&guild_id, http, |commands| {
commands
Expand Down
14 changes: 10 additions & 4 deletions src/idiom/idiom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,15 @@ lazy_static! {
static ref RE_TOKEN: Regex = Regex::new(r"\w+").unwrap();
}

fn tokenize(text: String) -> Vec<(String, f32)> {
pub fn tokenize(text: String) -> Vec<String> {
RE_TOKEN.find_iter(&text)
.map(|token| token.as_str().to_string())
.collect_vec()
}

fn counts(tokens: Vec<String>) -> Vec<(String, f32)> {
let mut counts: HashMap<String, usize> = HashMap::new();
for token in RE_TOKEN.find_iter(&text) {
for token in tokens {
*counts.entry(token.as_str().to_string()).or_default() += 1;
}
counts.into_iter().map(|(k, v)| (k, v as f32)).collect()
Expand All @@ -40,10 +46,10 @@ impl<P: Hash+Eq, U: Hash+Eq> Idioms<P, U> {
}
}

pub fn update(&mut self, place: P, person: U, message: String) {
pub fn update(&mut self, place: P, person: U, tokens: Vec<String>) {
let place_voc = self.places.entry(place).or_insert(TopFreqs::new());
let user_voc = self.people.entry(person).or_insert(TopFreqs::new());
let tokens = tokenize(message);
let tokens = counts(tokens);
for (token, value) in tokens {
let idx = match self.tokens.get_by_left(&token) {
Some(v) => *v,
Expand Down
3 changes: 2 additions & 1 deletion src/idiom/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mod idiom;
mod top_freqs;
pub use idiom::Idioms;
pub use idiom::Idioms;
pub use idiom::tokenize;

0 comments on commit 7994b9e

Please sign in to comment.