-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstream.go
441 lines (402 loc) · 15.5 KB
/
stream.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
package tusgo
import (
"bytes"
"context"
"encoding/base64"
"errors"
"fmt"
"hash"
"io"
"net/http"
"net/url"
"strconv"
"time"
"github.com/bdragon300/tusgo/checksum"
)
// NewUploadStream constructs a new upload stream. Receives a http client that will be used to make requests, and
// an upload object. During the upload process the given upload is modified, the RemoteOffset field in the first place.
func NewUploadStream(client *Client, upload *Upload) *UploadStream {
if upload == nil {
panic("upload is nil")
}
const chunkSize = 2 * 1024 * 1024
return &UploadStream{
ChunkSize: chunkSize,
Upload: upload,
client: client,
uploadMethod: http.MethodPatch,
ctx: client.ctx,
}
}
// NoChunked assigned to UploadStream.ChunkSize makes the uploading process not to use chunking
const NoChunked = 0
// UploadStream is write-only stream with TUS requests as underlying implementation. During creation, the UploadStream
// receives a pointer to Upload object, where it holds the current server offset to write data to. This offset is
// continuously updated during uploading data to the server. Note, that stream takes ownership of upload, so the upload
// available for read only.
//
// By default, we upload data in chunks, which size is defined in ChunkSize field. To disable chunking, set it to
// NoChunked -- dirty buffer will not be used, and the data will be written to the request body directly.
//
// The approach to work with this stream is described in appropriate methods, but in general it's the following:
//
// 1. Create a stream with an Upload with the offset we want to start writing from
//
// 2. Write the data to stream
//
// 3. If some error has interrupted uploading, call the same method again to continue from the last successful offset
//
// The TUS server generally expects that we write the data on the concrete offset it manages. We use Upload.RemoteOffset
// field to construct a request. If UploadStream local and server remote offsets are not equal, than this stream
// considered "not synced". To sync it with remote offset, use the Sync method.
//
// To use checksum data verification feature, use the WithChecksumAlgorithm method. Note, that the server must support at
// least the 'checksum' extension and the hash algorithm you're using. If ChunkSize is set to NoChunked, the server must
// also support 'checksum-trailer', since we calculate the hash once the whole data will be read, and put the hash to HTTP
// trailer.
//
// To use "Deferred length" feature, before the first write, set the Upload.RemoteSize to the particular size and
// set SetUploadSize field to true. Generally, when using "Deferred length" feature, we create an upload with
// unknown size, and the server expects that we will tell it the size on the first upload request.
// So the very first write to UploadStream for a concrete upload (i.e. when RemoteOffset == 0) generates a request
// with the upload size included.
//
// Errors, which the stream methods may return, along with the Client methods, are:
//
// - ErrOffsetsNotSynced -- local offset and server offset are not equal. Call Sync method to adjust local offset.
//
// - ErrChecksumMismatch -- server detects data corruption, if checksum verification feature is used
//
// - ErrCannotUpload -- unable to write the data to the existing upload. Generally, it means that the upload is full,
// or this upload is concatenated upload, or it does not accept the data by some reason
type UploadStream struct {
// ChunkSize determines the chunk size and dirty buffer size for chunking uploading. You can set
// this value to NoChunked to disable chunking which prevents using dirty buffer. Default is 2MiB
ChunkSize int64
// LastResponse is read-only field that contains the last response from server was received by this UploadStream.
// This is useful, for example, if it's needed to get the response that caused an error.
LastResponse *http.Response
// SetUploadSize relates to the "Deferred length" TUS protocol feature. When using this feature, we create an upload
// with unknown size, and the server expects that we will tell it the size on the first upload request.
//
// If SetUploadSize is true, then the very first request for an upload (i.e. when RemoteOffset == 0) will also
// contain the upload size, which is taken from Upload.RemoteSize field.
SetUploadSize bool
checksumHash hash.Hash
rawChecksumHashName string
Upload *Upload
client *Client
dirtyBuffer []byte
uploadMethod string
ctx context.Context
}
// WithContext assigns a given context to the copy of stream and returns it
func (us *UploadStream) WithContext(ctx context.Context) *UploadStream {
res := *us
res.LastResponse = nil
res.dirtyBuffer = nil
res.ctx = ctx
return &res
}
// WithChecksumAlgorithm sets the checksum algorithm to the copy of stream and returns it
func (us *UploadStream) WithChecksumAlgorithm(name string) *UploadStream {
res := *us
res.LastResponse = nil
res.dirtyBuffer = nil
if alg, ok := checksum.GetAlgorithm(name); !ok {
panic(fmt.Sprintf("checksum algorithm %q does not supported", name))
} else {
f := checksum.Algorithms[alg]
res.checksumHash = f()
}
res.rawChecksumHashName = name
return &res
}
// ReadFrom uploads the data read from r, starting from offset Upload.RemoteOffset. Uploading stops when r
// will be fully drawn out or the upload becomes full, whichever comes first. The Upload.RemoteOffset is continuously
// updated with current offset during the process.
// The return value n is the number of bytes read from r.
//
// Here we read r to the dirty buffer by chunks. When the reading has been started, the stream becomes "dirty".
// If the error has occurred in the middle, we keep the failed chunk in the dirty buffer and return an error.
// The stream remains "dirty". On the repeated ReadFrom calls, we try to upload the dirty buffer first before further reading r.
// If error has occurred again, the dirty buffer is kept as it was.
//
// After the uploading has finished successfully, we clear the dirty buffer, and the stream becomes "clean".
//
// If ChunkSize is set to NoChunked, we copy data from r directly to the request body. We don't use the dirty buffer
// in this case, so the stream never becomes "dirty". Also, if checksum feature is used in this case, we put the hash
// to the HTTP trailer, so the "checksum-trailer" server extension is required.
func (us *UploadStream) ReadFrom(r io.Reader) (n int64, err error) {
if err = us.validate(); err != nil {
return
}
if us.dirtyBuffer != nil {
if _, err = us.uploadChunked(bytes.NewReader(us.dirtyBuffer)); err != nil {
return
}
}
us.setupDirtyBuffer()
counterRd := &counterReader{Rd: r}
if _, err = us.uploadChunked(counterRd); err != nil {
return counterRd.BytesRead, err
}
us.dirtyBuffer = nil // Mark stream as clean if the whole data has been uploaded successfully
return counterRd.BytesRead, err
}
// Write uploads a bytes starting from offset Upload.RemoteOffset. The Upload.RemoteOffset is continuously
// updated with current offset during the process. The return value n is the number of bytes successfully uploaded
// to the server.
//
// Here we read r to the dirty buffer by chunks. When the reading has been started, the stream becomes "dirty".
// Whether an error occurred in the middle or not, the stream will become "clean" after the call. If stream is already
// "dirty" before the call, we ignore this and clear the dirty buffer.
//
// If ChunkSize is set to NoChunked, we copy the whole given bytes to the request body. We don't use the dirty buffer
// in this case, so the stream never becomes "dirty". Also, if checksum feature is used in this case, we put the hash
// to the HTTP trailer, so the "checksum-trailer" server extension is required.
//
// If the bytes to be uploaded doesn't fit to space left in the upload, we upload the data we can and return io.ErrShortWrite.
func (us *UploadStream) Write(p []byte) (n int, err error) {
if err = us.validate(); err != nil {
return
}
us.setupDirtyBuffer()
defer func() { us.dirtyBuffer = nil }() // Always mark stream as clean, since p is seekable
var rd io.Reader = bytes.NewReader(p)
var uploaded int64
if uploaded, err = us.uploadChunked(rd); err == nil {
if uploaded != int64(len(p)) {
err = io.ErrShortWrite
}
}
return int(uploaded), err
}
// Sync method sets the stream offset to be equal the server offset. Usually this method have to be called before
// starting the transfer, or when an ErrOffsetsNotSynced error was returned by UploadStream
func (us *UploadStream) Sync() (response *http.Response, err error) {
f := Upload{}
if response, err = us.client.GetUpload(&f, us.Upload.Location); err == nil {
us.Upload.RemoteOffset = f.RemoteOffset
}
us.LastResponse = response
return
}
// Seek moves Upload.RemoteOffset to the requested position. Returns new offset
func (us *UploadStream) Seek(offset int64, whence int) (int64, error) {
var newOffset int64
switch whence {
case io.SeekStart:
newOffset = offset
case io.SeekCurrent:
newOffset = us.Upload.RemoteOffset + offset
case io.SeekEnd:
newOffset = us.Upload.RemoteSize - 1 + offset
default:
panic("unknown whence value")
}
if offset >= us.Upload.RemoteSize {
return newOffset, fmt.Errorf("offset %d exceeds the upload size %d bytes", newOffset, us.Upload.RemoteSize)
}
if offset < 0 {
return newOffset, fmt.Errorf("offset %d is negative", newOffset)
}
us.Upload.RemoteOffset = newOffset
return newOffset, nil
}
// Tell returns the current offset
func (us *UploadStream) Tell() int64 {
return us.Upload.RemoteOffset
}
// Len returns the upload size
func (us *UploadStream) Len() int64 {
return us.Upload.RemoteSize
}
// Dirty returns true if stream has been marked "dirty". This means it contains the data chunk, which was failed
// to upload to the server.
func (us *UploadStream) Dirty() bool {
return us.dirtyBuffer != nil
}
// ForceClean marks the stream as "clean". It erases the data from the dirty buffer.
func (us *UploadStream) ForceClean() {
us.dirtyBuffer = nil
}
func (us *UploadStream) uploadChunked(r io.Reader) (uploadedBytes int64, err error) {
var loc *url.URL
var offset int64
var lastResponse *http.Response
if loc, err = url.Parse(us.Upload.Location); err != nil {
return
}
u := us.client.BaseURL.ResolveReference(loc).String()
uploaded := us.ChunkSize
for uploaded == us.ChunkSize {
uploaded, offset, lastResponse, err = us.uploadChunkImpl(u, r, nil)
if lastResponse != nil {
us.LastResponse = lastResponse
}
if err != nil {
return
}
us.Upload.RemoteOffset = offset
uploadedBytes += uploaded
}
return
}
func (us *UploadStream) setupDirtyBuffer() {
if int64(len(us.dirtyBuffer)) != us.ChunkSize {
us.dirtyBuffer = nil
}
if len(us.dirtyBuffer) == 0 && us.ChunkSize != NoChunked {
us.dirtyBuffer = make([]byte, us.ChunkSize)
}
}
func (us *UploadStream) uploadChunkImpl(requestURL string, data io.Reader, extraHeaders map[string]string) (bytesUploaded int64, offset int64, response *http.Response, err error) {
const unknownSize int64 = -1
chunking := us.ChunkSize != NoChunked // Chunking enabled
offset = us.Upload.RemoteOffset
if err = us.validate(); err != nil {
return
}
bytesToUpload := unknownSize
if chunking {
if int64(len(us.dirtyBuffer)) > us.ChunkSize {
panic("programming error: dirty buffer is larger than ChunkSize")
}
bytesToUpload = int64(len(us.dirtyBuffer))
remoteBytesLeft := us.Upload.RemoteSize - offset
if bytesToUpload > remoteBytesLeft { // Buffer size is larger than the space left in the remote upload
bytesToUpload = remoteBytesLeft
us.dirtyBuffer = us.dirtyBuffer[:bytesToUpload]
}
if bytesToUpload == 0 {
return
}
}
// Perform actions that can generate an error before invoking a reader
if us.checksumHash != nil && !chunking {
if err = us.client.ensureExtension("checksum-trailer"); err != nil {
return
}
}
var req *http.Request
if req, err = us.client.GetRequest(us.uploadMethod, requestURL, nil, us.client, us.client.client); err != nil {
return
}
if chunking {
t, e := io.ReadAtLeast(data, us.dirtyBuffer, int(bytesToUpload))
switch {
case errors.Is(e, io.EOF): // Reader is empty
return
case errors.Is(e, io.ErrUnexpectedEOF): // Reader has ended early
bytesToUpload = int64(t)
us.dirtyBuffer = us.dirtyBuffer[:bytesToUpload]
default:
if e != nil {
err = e
return
}
}
data = bytes.NewReader(us.dirtyBuffer)
}
if us.checksumHash != nil {
us.checksumHash.Reset()
if chunking {
us.checksumHash.Write(us.dirtyBuffer)
sum := us.checksumHash.Sum(make([]byte, 0))
req.Header.Set("Upload-Checksum", fmt.Sprintf("%s %s", us.rawChecksumHashName, base64.StdEncoding.EncodeToString(sum)))
} else {
trailers := map[string]io.Reader{"Upload-Checksum": checksum.NewHashBase64ReadWriter(us.checksumHash, us.rawChecksumHashName+" ")}
data = checksum.NewDeferTrailerReader(io.TeeReader(data, us.checksumHash), trailers, req)
}
}
req.Body = io.NopCloser(data)
if bytesToUpload != unknownSize {
req.ContentLength = bytesToUpload
}
req.Header.Set("Content-Type", "application/offset+octet-stream")
req.Header.Set("Upload-Offset", strconv.FormatInt(offset, 10))
if us.SetUploadSize && offset == 0 {
req.Header.Set("Upload-Length", strconv.FormatInt(us.Upload.RemoteSize, 10))
}
if len(extraHeaders) > 0 {
for k, v := range extraHeaders {
if v == "" {
req.Header.Del(k)
} else {
req.Header.Set(k, v)
}
}
}
if us.ctx != nil {
req = req.WithContext(us.ctx)
}
if response, err = us.client.tusRequest(us.ctx, req); err != nil {
return
}
defer response.Body.Close()
switch response.StatusCode {
case http.StatusCreated: // For "Creation With Upload" feature
if us.uploadMethod != http.MethodPost {
err = ErrUnexpectedResponse
return
}
fallthrough
case http.StatusNoContent:
if offset, err = strconv.ParseInt(response.Header.Get("Upload-Offset"), 10, 64); err != nil {
err = ErrProtocol.WithErr(fmt.Errorf("cannot parse Upload-Offset header %q: %w", response.Header.Get("Upload-Offset"), err))
return
}
bytesUploaded = offset - us.Upload.RemoteOffset
if bytesUploaded < 0 {
bytesUploaded = 0
}
if v := response.Header.Get("Upload-Expires"); v != "" {
var t time.Time
if t, err = time.Parse(time.RFC1123, v); err != nil {
err = ErrProtocol.WithErr(fmt.Errorf("cannot parse Upload-Expires RFC1123 header %q: %w", v, err))
return
}
us.Upload.UploadExpired = &t
}
case http.StatusConflict:
err = ErrOffsetsNotSynced.WithResponse(response)
case http.StatusForbidden:
err = ErrCannotUpload.WithResponse(response)
case http.StatusNotFound, http.StatusGone:
err = ErrUploadDoesNotExist.WithResponse(response)
case http.StatusRequestEntityTooLarge:
err = ErrUploadTooLarge.WithResponse(response)
case 460: // Non-standard HTTP code '460 Checksum Mismatch'
if us.checksumHash != nil {
err = ErrChecksumMismatch.WithResponse(response)
return
}
fallthrough
default:
err = ErrUnexpectedResponse
}
return
}
func (us *UploadStream) validate() error {
if us.Upload.RemoteSize == SizeUnknown {
panic("upload must have size before start the uploading")
}
if us.Upload.RemoteSize < 0 {
panic(fmt.Sprintf("upload size is negative %d", us.Upload.RemoteSize))
}
if us.SetUploadSize {
if err := us.client.ensureExtension("creation-defer-length"); err != nil {
return err
}
}
if us.checksumHash != nil {
if err := us.client.ensureExtension("checksum"); err != nil {
return err
}
}
if us.ChunkSize < 0 && us.ChunkSize != NoChunked {
panic("ChunkSize must be either a positive number or NoChunked")
}
return nil
}