-
Notifications
You must be signed in to change notification settings - Fork 2
/
dictionary.go
executable file
·67 lines (57 loc) · 1.37 KB
/
dictionary.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
package jieba
import (
"io"
"math"
"sync"
"github.com/fumiama/jieba/dictionary"
)
// A Dictionary represents a thread-safe dictionary used for word segmentation.
type Dictionary struct {
sync.RWMutex
total, logTotal float64
freqMap map[string]float64
}
// Load loads all tokens
func (d *Dictionary) Load(tokens ...dictionary.Token) {
d.Lock()
for _, token := range tokens {
d.addToken(token)
}
d.Unlock()
d.updateLogTotal()
}
// AddToken adds one token
func (d *Dictionary) AddToken(token dictionary.Token) {
d.Lock()
d.addToken(token)
d.Unlock()
d.updateLogTotal()
}
func (d *Dictionary) addToken(token dictionary.Token) {
d.freqMap[token.Text()] = token.Frequency()
d.total += token.Frequency()
runes := []rune(token.Text())
n := len(runes)
for i := 0; i < n; i++ { //TODO: n-1?
frag := string(runes[:i+1])
if _, ok := d.freqMap[frag]; !ok {
d.freqMap[frag] = 0.0
}
}
}
func (d *Dictionary) updateLogTotal() {
d.logTotal = math.Log(d.total)
}
// Frequency returns the frequency and existence of give word
func (d *Dictionary) Frequency(key string) (float64, bool) {
d.RLock()
freq, ok := d.freqMap[key]
d.RUnlock()
return freq, ok
}
func (d *Dictionary) loadDictionary(file io.Reader) error {
return dictionary.LoadDictionary(d, file)
}
func (d *Dictionary) loadDictionaryAt(file string) error {
return dictionary.LoadDictionaryAt(d, file)
}