-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwikibots.go
159 lines (131 loc) · 3.46 KB
/
wikibots.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
package wikibots
import (
"context"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net/http"
"net/url"
"strings"
"sync"
"time"
"github.com/pkg/errors"
)
//New returns a functions that maps ID to the name of Wikipedia bots
func New(ctx context.Context, lang string) (ID2Bot func(uint32) (string, bool), err error) {
untypedID2Bot, ok := lang2UserID2User.Load(lang)
if ok {
return untypedID2Bot.(func(uint32) (string, bool)), nil
}
userID2User, err := users(ctx, lang)
if err != nil {
return
}
untypedID2Bot, _ = lang2UserID2User.LoadOrStore(lang, func(ID uint32) (name string, ok bool) { name, ok = userID2User[ID]; return })
return untypedID2Bot.(func(uint32) (string, bool)), nil
}
//lang2UserID2User represents UserID2User cache by language
var lang2UserID2User sync.Map
func users(ctx context.Context, lang string) (userID2User map[uint32]string, err error) {
userID2User = make(map[uint32]string, len(usernames))
for _, query := range toURL(usernames, lang) {
var ud usersData
for t := time.Second; t < time.Hour; t *= 2 { //exponential backoff
ud, err = usersDataFrom(ctx, query)
if err == nil {
break
}
select {
case <-ctx.Done():
return nil, errors.Wrap(ctx.Err(), "Error: change in context state")
case <-time.After(t):
//do nothing
}
}
if err != nil {
userID2User = nil
break
}
for _, u := range ud.Query.Users {
if u.Missing {
continue
}
userID2User[u.UserID] = u.Name
}
}
return
}
func toURL(names []string, lang string) (URLs []string) {
for _, names := range chunkerize(names) {
URLs = append(URLs, fmt.Sprintf(base, lang, url.QueryEscape(strings.Join(names, "|"))))
}
return
}
const uslimit = 50
const base = "https://%v.wikipedia.org/w/api.php?action=query&list=users&format=json&formatversion=2&ususers=%v"
func chunkerize(names []string) (chunks [][]string) {
N := (len(names) + uslimit - 1) / uslimit
for i := 0; i < N; i++ {
b := i * uslimit
e := b + uslimit
if e > len(names) {
e = len(names)
}
chunks = append(chunks, names[b:e])
}
return
}
type usersData struct {
Batchcomplete interface{}
Warnings interface{}
Query struct {
Users []mayMissingUser
}
}
func usersDataFrom(ctx context.Context, query string) (pd usersData, err error) {
fail := func(e error) (usersData, error) {
pd, err = usersData{}, errors.Wrapf(e, "BotId2Name: error with the following query: %v", query)
return pd, err
}
bodyR, err := stream(ctx, query)
if err != nil {
return fail(err)
}
defer bodyR.Close()
body, err := ioutil.ReadAll(bodyR)
if err != nil {
return fail(err)
}
err = json.Unmarshal(body, &pd)
if err != nil {
return fail(err)
}
if pd.Batchcomplete == nil {
return fail(errors.Errorf("BotId2Name: incomplete batch with the following query: %v", query))
}
if pd.Warnings != nil {
return fail(errors.Errorf("BotId2Name: warnings - %v - with the following query: %v", pd.Warnings, query))
}
return
}
type mayMissingUser struct {
UserID uint32
Name string
Missing bool
}
func stream(ctx context.Context, query string) (r io.ReadCloser, err error) {
req, err := http.NewRequest("GET", query, nil)
if err != nil {
err = errors.Wrap(err, "Error: unable create a request with the following url: "+query)
return
}
resp, err := client.Do(req.WithContext(ctx))
if err != nil {
err = errors.Wrap(err, "Error: unable do a request with the following url: "+query)
return
}
r = resp.Body
return
}
var client = &http.Client{Timeout: time.Minute}