diff --git a/Cargo.lock b/Cargo.lock index 704a118..a2e9f58 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2342,9 +2342,9 @@ dependencies = [ [[package]] name = "wordcloud-rs" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6376e4377c9727d07621710f07db78b8dd629ef37c74f700bd9898609e99a47" +checksum = "147dd54380da9cd7704068c95b24c065415cfc765193556c1604c55d97aef7e6" dependencies = [ "anyhow", "fontdue", diff --git a/src/idiom/idiom.rs b/src/idiom/idiom.rs index 79beabc..841ce54 100644 --- a/src/idiom/idiom.rs +++ b/src/idiom/idiom.rs @@ -3,7 +3,6 @@ use std::hash::Hash; use itertools::Itertools; use bimap::BiMap; use super::top_freqs::TopFreqs; -use super::text_utils::counts; const PLACE_VOC_LEN: usize = 500; const PERSON_VOC_LEN: usize = 200; @@ -27,8 +26,7 @@ impl Idioms { pub fn update(&mut self, place: P, person: U, tokens: Vec) { let place_voc = self.places.entry(place).or_insert(TopFreqs::new()); let user_voc = self.people.entry(person).or_insert(TopFreqs::new()); - let tokens = counts(tokens); - for (token, value) in tokens { + for token in tokens { let idx = match self.tokens.get_by_left(&token) { Some(v) => *v, None => { @@ -37,8 +35,8 @@ impl Idioms { v } }; - place_voc.add(idx, value); - let inctx_value = (-place_voc.get(&idx)).exp()*50.; + place_voc.add(idx, 1.); + let inctx_value = (-place_voc.get(&idx)).exp()*20.; user_voc.add(idx, inctx_value); } } diff --git a/src/idiom/text_utils.rs b/src/idiom/text_utils.rs index cb418f0..5bda4d1 100644 --- a/src/idiom/text_utils.rs +++ b/src/idiom/text_utils.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use itertools::Itertools; use lazy_static::lazy_static; use regex::Regex; @@ -31,11 +30,3 @@ pub fn tokenize(text: String) -> Vec { .map(|token| smart_lower(trim(token.as_str()))) .collect_vec() } - -pub(crate) fn counts(tokens: Vec) -> Vec<(String, f32)> { - let mut counts: HashMap = HashMap::new(); - for token in tokens { - *counts.entry(token.as_str().to_string()).or_default() += 1; - } - counts.into_iter().map(|(k, v)| (k, v as f32)).collect() -}