-
Notifications
You must be signed in to change notification settings - Fork 2
/
example_parallel_cut_test.go
executable file
·96 lines (79 loc) · 1.69 KB
/
example_parallel_cut_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
package jieba
import (
"bufio"
"fmt"
"log"
"os"
"runtime"
"strings"
"time"
)
type line struct {
number int
text string
}
var (
segmenter *Segmenter
numThreads = runtime.NumCPU()
task = make(chan line, numThreads)
result = make(chan line, numThreads)
)
func worker() {
for l := range task {
segments := segmenter.Cut(l.text, true)
l.text = fmt.Sprintf("%s\n", strings.Join(segments, " / "))
result <- l
}
}
func Example_parallelCut() {
// open file for segmentation
file, err := os.Open("README.md")
if err != nil {
log.Fatal(err)
}
defer file.Close()
// Load dictionary
segmenter, err = LoadDictionaryAt("dict.txt")
if err != nil {
log.Fatal(err)
}
// start worker routines
for i := 0; i < numThreads; i++ {
go worker()
}
var length, size int
scanner := bufio.NewScanner(file)
t0 := time.Now()
lines := make([]string, 0)
// Read lines
for scanner.Scan() {
t := scanner.Text()
size += len(t)
lines = append(lines, t)
}
length = len(lines)
// Segmentation
go func() {
for i := 0; i < length; i++ {
task <- line{number: i, text: lines[i]}
}
close(task)
}()
// Make sure the segmentation result contains same line as original file
for i := 0; i < length; i++ {
l := <-result
lines[l.number] = l.text
}
t1 := time.Now()
close(result)
// Write the segments into a file for verify
outputFile, _ := os.OpenFile("parallelCut.log", os.O_CREATE|os.O_WRONLY, 0600)
defer outputFile.Close()
writer := bufio.NewWriter(outputFile)
for _, l := range lines {
writer.WriteString(l)
}
writer.Flush()
log.Printf("Time cousumed: %v", t1.Sub(t0))
log.Printf("Segmentation speed: %f MB/s", float64(size)/t1.Sub(t0).Seconds()/(1024*1024))
}