-
Notifications
You must be signed in to change notification settings - Fork 0
/
tf_idf.go
41 lines (31 loc) · 1.15 KB
/
tf_idf.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
package main
import (
"fmt"
"math"
"strings"
)
func main() {
// The document we want to calculate the TF-IDF for
document := "The quick brown fox jumps over the lazy dog."
// Split the document into a slice of words
words := strings.Fields(document)
// The term we want to calculate the TF-IDF for
term := "fox"
// The total number of words in the document
wordCount := len(words)
// The number of times the term appears in the document
termCount := 0
for _, word := range words {
if word == term {
termCount++
}
}
// The term frequency (TF) is the number of times the term appears in the document divided by the total number of words in the document
tf := float64(termCount) / float64(wordCount)
// In this example, we will assume that the term appears in all documents in the collection
// If the term appears in N documents, the inverse document frequency (IDF) is calculated as log(D/N), where D is the total number of documents in the collection
idf := math.Log(float64(1) / float64(1))
// The TF-IDF is the product of the TF and IDF
tfidf := tf * idf
fmt.Printf("TF-IDF of term '%s' in document '%s': %f\n", term, document, tfidf)
}